(*) Replace the ip2location IP geolocation data with ASN information from apnic.net.
Doesn't really make sense to use ip2location as a middle man for information that is already freely available...
This commit is contained in:
parent
722b56c8ca
commit
d7bd540683
@ -22,6 +22,8 @@ public class DomainInformation {
|
||||
boolean unknownDomain;
|
||||
|
||||
String ip;
|
||||
Integer asn;
|
||||
String asnOrg;
|
||||
String ipCountry;
|
||||
String state;
|
||||
|
||||
|
@ -62,6 +62,15 @@ public class WmsaHome {
|
||||
|
||||
public static Path getIPLocationDatabse() {
|
||||
return getHomePath().resolve("data").resolve("IP2LOCATION-LITE-DB1.CSV");
|
||||
|
||||
}
|
||||
|
||||
public static Path getAsnMappingDatabase() {
|
||||
return getHomePath().resolve("data").resolve("asn-data-raw-table");
|
||||
}
|
||||
|
||||
public static Path getAsnInfoDatabase() {
|
||||
return getHomePath().resolve("data").resolve("asn-used-autnums");
|
||||
}
|
||||
|
||||
public static LanguageModels getLanguageModels() {
|
||||
@ -85,4 +94,6 @@ public class WmsaHome {
|
||||
public static boolean isDebug() {
|
||||
return debugMode;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.ip_blocklist;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.geoip.AsnTable;
|
||||
import nu.marginalia.geoip.GeoIpDictionary;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.slf4j.Logger;
|
||||
@ -43,7 +44,9 @@ public class GeoIpBlocklist {
|
||||
|
||||
public String getCountry(EdgeDomain domain) {
|
||||
try {
|
||||
return ipDictionary.getCountry(InetAddressCache.getAddress(domain));
|
||||
return ipDictionary.getAsnInfo(InetAddressCache.getAddress(domain))
|
||||
.map(AsnTable.AsnInfo::country)
|
||||
.orElse("-");
|
||||
}
|
||||
catch (Throwable ex) {
|
||||
logger.debug("Failed to resolve {}", domain);
|
||||
|
@ -0,0 +1,84 @@
|
||||
package nu.marginalia.geoip;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.TreeMap;
|
||||
|
||||
public class AsnMapping {
|
||||
private static final Logger logger = LoggerFactory.getLogger(AsnMapping.class);
|
||||
private final TreeMap<Integer, AsnMappingRecord> asns = new TreeMap<>(Integer::compareUnsigned);
|
||||
|
||||
public record AsnMappingRecord(int ipStart, int ipEnd, int asn) {
|
||||
public boolean contains(int ip) {
|
||||
return Integer.compareUnsigned(ipStart, ip) <= 0
|
||||
&& Integer.compareUnsigned(ip, ipEnd) < 0;
|
||||
}
|
||||
}
|
||||
|
||||
public AsnMapping(Path databaseFile) {
|
||||
try (var reader = Files.lines(databaseFile)) {
|
||||
reader.map(AsnMapping::parseAsnMappingFileLine).filter(Objects::nonNull).forEach(asn -> asns.put(asn.ipStart(), asn));
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to load ASN mapping" + databaseFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Optional<Integer> getAsnNumber(int ip) {
|
||||
var entry = asns.floorEntry(ip);
|
||||
|
||||
if (null == entry) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var asn = entry.getValue();
|
||||
if (asn.contains(ip)) {
|
||||
return Optional.of(asn.asn());
|
||||
}
|
||||
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
public static AsnMappingRecord parseAsnMappingFileLine(String s) {
|
||||
try {
|
||||
String[] parts = StringUtils.split(s, '\t');
|
||||
if (parts.length != 2) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Parse CIDR notation, e.g. 127.0.0.1/24 -> ["127.0.0.1", "24"]
|
||||
String[] cidrParts = StringUtils.split(parts[0], '/');
|
||||
if (cidrParts.length != 2) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Parse IP address and subnet mask
|
||||
String[] ipParts = StringUtils.split(cidrParts[0], '.');
|
||||
int ipMask = Integer.parseInt(cidrParts[1]);
|
||||
|
||||
// Convert subnet mask to integer start and end values
|
||||
int ipStart = 0;
|
||||
int ipEnd = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int ipByte = Integer.parseInt(ipParts[i]);
|
||||
ipStart |= ipByte << (24 - 8 * i);
|
||||
ipEnd |= ipByte << (24 - 8 * i);
|
||||
}
|
||||
ipStart &= 0xFFFFFFFF << (32 - ipMask);
|
||||
ipEnd |= 0xFFFFFFFF >>> ipMask;
|
||||
|
||||
return new AsnMappingRecord(ipStart, ipEnd, Integer.parseInt(parts[1]));
|
||||
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Failed to parse ASN mapping line: {}", s);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
package nu.marginalia.geoip;
|
||||
|
||||
import nu.marginalia.WmsaHome;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashMap;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
public class AsnTable {
|
||||
public HashMap<Integer, AsnInfo> asns = new HashMap<>(65536);
|
||||
public record AsnInfo(int asn, String country, String org) {}
|
||||
private static final Logger logger = LoggerFactory.getLogger(AsnTable.class);
|
||||
|
||||
public AsnTable(Path asnFile) {
|
||||
try (var reader = Files.lines(WmsaHome.getAsnInfoDatabase())) {
|
||||
reader.map(AsnTable::parseAsnFileLine).filter(Objects::nonNull).forEach(asn -> asns.put(asn.asn(), asn));
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to load ASN database " + asnFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Optional<AsnInfo> getAsnInfo(int asn) {
|
||||
return Optional.ofNullable(asns.get(asn));
|
||||
}
|
||||
|
||||
static AsnInfo parseAsnFileLine(String line) {
|
||||
line = line.trim();
|
||||
|
||||
try {
|
||||
int numEnd = line.indexOf(' ');
|
||||
String num = line.substring(0, numEnd);
|
||||
|
||||
int asn = Integer.parseInt(num);
|
||||
|
||||
int orgStart = numEnd + 1;
|
||||
int orgEnd = line.lastIndexOf(',');
|
||||
if (orgEnd < 0 || orgEnd < orgStart + 1) {
|
||||
orgEnd = line.length();
|
||||
}
|
||||
|
||||
String org = line.substring(orgStart, orgEnd);
|
||||
String country = "";
|
||||
if (orgEnd + 1 < line.length()) {
|
||||
country = line.substring(orgEnd + 1).trim();
|
||||
}
|
||||
|
||||
if ("UNALLOCATED".equals(org)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new AsnInfo(asn, country, org);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Failed to parse ASN line: {}", line);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,55 +1,33 @@
|
||||
package nu.marginalia.geoip;
|
||||
|
||||
import com.opencsv.CSVReader;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.FileReader;
|
||||
import java.net.InetAddress;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Optional;
|
||||
|
||||
public class GeoIpDictionary {
|
||||
private volatile TreeMap<Long, IpRange> ranges = null;
|
||||
private volatile AsnTable asnTable = null;
|
||||
private volatile AsnMapping asnMapping = null;
|
||||
private static final Logger logger = LoggerFactory.getLogger(GeoIpDictionary.class);
|
||||
|
||||
record IpRange(long from, long to, String country) {}
|
||||
|
||||
public GeoIpDictionary() {
|
||||
Thread.ofPlatform().start(() -> {
|
||||
try (var reader = new CSVReader(new FileReader(WmsaHome.getIPLocationDatabse().toFile()))) {
|
||||
var dict = new TreeMap<Long, IpRange>();
|
||||
|
||||
for (;;) {
|
||||
String[] vals = reader.readNext();
|
||||
if (vals == null) {
|
||||
break;
|
||||
}
|
||||
var range = new IpRange(Long.parseLong(vals[0]),
|
||||
Long.parseLong(vals[1]),
|
||||
vals[2]);
|
||||
dict.put(range.from, range);
|
||||
}
|
||||
ranges = dict;
|
||||
logger.info("Loaded {} IP ranges", ranges.size());
|
||||
} catch (Exception e) {
|
||||
ranges = new TreeMap<>();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
finally {
|
||||
synchronized (this) {
|
||||
this.notifyAll();
|
||||
}
|
||||
}
|
||||
this.asnTable = new AsnTable(WmsaHome.getAsnInfoDatabase());
|
||||
logger.info("Loaded ASN table");
|
||||
this.asnMapping = new AsnMapping(WmsaHome.getAsnMappingDatabase());
|
||||
logger.info("Loaded ASN mapping");
|
||||
});
|
||||
}
|
||||
|
||||
public boolean isReady() {
|
||||
return null != ranges;
|
||||
return null != asnMapping;
|
||||
}
|
||||
|
||||
public boolean waitReady() {
|
||||
while (null == ranges) {
|
||||
while (null == asnMapping) {
|
||||
try {
|
||||
synchronized (this) {
|
||||
this.wait(1000);
|
||||
@ -61,32 +39,30 @@ public class GeoIpDictionary {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getCountry(String ip) {
|
||||
public Optional<AsnTable.AsnInfo> getAsnInfo(String ip) {
|
||||
try {
|
||||
return getCountry(InetAddress.getByName(ip));
|
||||
return getAsnInfo(InetAddress.getByName(ip));
|
||||
} catch (Exception e) {
|
||||
return "";
|
||||
e.printStackTrace();
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public String getCountry(InetAddress address) {
|
||||
if (null == ranges) { // not loaded yet or failed to load
|
||||
return "";
|
||||
public Optional<AsnTable.AsnInfo> getAsnInfo(int ipAddress) {
|
||||
if (null == asnTable) { // not loaded yet or failed to load
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return asnMapping
|
||||
.getAsnNumber(ipAddress)
|
||||
.flatMap(asn -> asnTable.getAsnInfo(asn));
|
||||
}
|
||||
|
||||
public Optional<AsnTable.AsnInfo> getAsnInfo(InetAddress address) {
|
||||
byte[] bytes = address.getAddress();
|
||||
long ival = ((long)bytes[0]&0xFF) << 24 | ((long)bytes[1]&0xFF) << 16 | ((long)bytes[2]&0xFF)<< 8 | ((long)bytes[3]&0xFF);
|
||||
|
||||
Long key = ranges.floorKey(ival);
|
||||
if (null == key) {
|
||||
return "";
|
||||
}
|
||||
int ival = (int) (((long)bytes[0]&0xFF) << 24 | ((long)bytes[1]&0xFF) << 16 | ((long)bytes[2]&0xFF)<< 8 | ((long)bytes[3]&0xFF));
|
||||
|
||||
var range = ranges.get(key);
|
||||
if (ival >= key && ival < range.to) {
|
||||
return range.country;
|
||||
}
|
||||
|
||||
return "";
|
||||
return getAsnInfo(ival);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,55 @@
|
||||
package nu.marginalia.geoip;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
public class AsnMappingTest {
|
||||
@Test
|
||||
public void testParseAsnMappingFileLine() throws UnknownHostException {
|
||||
// Test Case 1: Valid ASN Mapping Line
|
||||
String input1 = "192.0.2.0/24\t65536";
|
||||
AsnMapping.AsnMappingRecord result1 = AsnMapping.parseAsnMappingFileLine(input1);
|
||||
assertNotNull(result1, "The result should not be null for valid data");
|
||||
assertEquals(65536, result1.asn(), "The asn is not as expected");
|
||||
|
||||
// Test Case 2: Invalid ASN Mapping Line - Different format
|
||||
String input2 = "nah I am just a string, not an ASN Mapping Line...";
|
||||
AsnMapping.AsnMappingRecord result2 = AsnMapping.parseAsnMappingFileLine(input2);
|
||||
assertNull(result2, "The result should be null for invalid data");
|
||||
|
||||
// Test Case 3: Invalid ASN Mapping Line - Null input
|
||||
String input3 = null;
|
||||
AsnMapping.AsnMappingRecord result3 = AsnMapping.parseAsnMappingFileLine(input3);
|
||||
assertNull(result3, "The result should be null for null input");
|
||||
|
||||
// Test Case 4: Invalid ASN Mapping Line - Empty string
|
||||
String input4 = "";
|
||||
AsnMapping.AsnMappingRecord result4 = AsnMapping.parseAsnMappingFileLine(input4);
|
||||
assertNull(result4, "The result should be null for empty string");
|
||||
|
||||
// Test Case 5: Invalid ASN Mapping Line - One part
|
||||
String input5 = "192.0.2.0/24";
|
||||
AsnMapping.AsnMappingRecord result5 = AsnMapping.parseAsnMappingFileLine(input5);
|
||||
assertNull(result5, "The result should be null for a string with only one part");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIpBounds() throws UnknownHostException {
|
||||
String input7 = "193.183.0.0/24\t207825";
|
||||
AsnMapping.AsnMappingRecord result7 = AsnMapping.parseAsnMappingFileLine(input7);
|
||||
assertNotNull(result7, "The result should not be null for valid data");
|
||||
var ip = InetAddress.getAllByName("193.183.0.0");
|
||||
byte[] ipBytes = ip[0].getAddress();
|
||||
|
||||
int ipInt = (int) (((long)ipBytes[0]&0xFF) << 24 | ((long)ipBytes[1]&0xFF) << 16 | ((long)ipBytes[2]&0xFF)<< 8 | ((long)ipBytes[3]&0xFF));
|
||||
|
||||
assertTrue(result7.contains(ipInt));
|
||||
}
|
||||
}
|
@ -0,0 +1,55 @@
|
||||
package nu.marginalia.geoip;
|
||||
|
||||
import nu.marginalia.geoip.AsnTable;
|
||||
import nu.marginalia.geoip.AsnTable.AsnInfo;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
|
||||
public class AsnTableTest {
|
||||
|
||||
/**
|
||||
* This class is to test the static method parseAsnFileLine of the AsnTable class.
|
||||
* This method parses a line from an ASN table file into an AsnInfo instance,
|
||||
* which holds ASN number, country and organization string.
|
||||
*/
|
||||
|
||||
@Test
|
||||
public void testParseAsnFileLine_ShouldReturnNullWhenUnallocated() {
|
||||
String unallocatedLine = " 1 UNALLOCATED";
|
||||
AsnInfo result = AsnTable.parseAsnFileLine(unallocatedLine);
|
||||
assertNull(result, "Parse ASN File Line output should be null for unallocated ASN");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseAsnFileLine_ShouldReturnNullWhenInputIsNotParsable() {
|
||||
String unparsableLine = " NotParsable Line";
|
||||
AsnInfo result = AsnTable.parseAsnFileLine(unparsableLine);
|
||||
assertNull(result, "Parse ASN File Line output should be null for unparsable lines");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseAsnFileLine_AllFieldsParsedCorrectly() {
|
||||
String asnLineWithAllFields = "123456 Company,US ";
|
||||
AsnInfo expected = new AsnInfo(123456, "US", "Company");
|
||||
AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithAllFields);
|
||||
assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line is correctly formatted");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseAsnFileLine_MultipleCommasInOrg() {
|
||||
String asnLineWithAllFields = "123456 Company, Inc., US ";
|
||||
AsnInfo expected = new AsnInfo(123456, "US", "Company, Inc.");
|
||||
AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithAllFields);
|
||||
assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line is correctly formatted");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testParseAsnFileLine_NoCountry() {
|
||||
String asnLineWithoutCountry = "123456 Company";
|
||||
AsnInfo expected = new AsnInfo(123456, "", "Company");
|
||||
AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithoutCountry);
|
||||
assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line lacks country");
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package nu.marginalia.geoip;
|
||||
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@Tag("slow")
|
||||
class GeoIpDictionaryTest {
|
||||
|
||||
@Test
|
||||
public void testAsnResolution() {
|
||||
GeoIpDictionary geoIpDictionary = new GeoIpDictionary();
|
||||
geoIpDictionary.waitReady();
|
||||
System.out.println(geoIpDictionary.getAsnInfo("193.183.0.162"));
|
||||
}
|
||||
|
||||
}
|
@ -124,10 +124,10 @@ public class DomainProcessor {
|
||||
|
||||
terms.add("ip:"+ip);
|
||||
|
||||
String ipCountryCode = geoIpDictionary.getCountry(ip).toLowerCase();
|
||||
if (!ipCountryCode.isBlank()) {
|
||||
terms.add("ip:"+ipCountryCode);
|
||||
}
|
||||
geoIpDictionary.getAsnInfo(ip).ifPresent(asnInfo -> {
|
||||
terms.add("asn:"+asnInfo.asn());
|
||||
terms.add("ip:"+asnInfo.country());
|
||||
});
|
||||
|
||||
if (cookies) {
|
||||
terms.add(HtmlFeature.COOKIES.getKeyword());
|
||||
|
@ -108,11 +108,6 @@
|
||||
<h1> Open Source </h1>
|
||||
The search engine is open source with an AGPL license. The sources can be perused at
|
||||
<tt><a href="https://git.marginalia.nu/">https://git.marginalia.nu/</a></tt>.
|
||||
<h1>Data Sources</h1>
|
||||
IP geolocation is sourced from the IP2Location LITE data available from
|
||||
<a rel="external noopener nofollow" href="https://lite.ip2location.com/">https://lite.ip2location.com/</a>
|
||||
under
|
||||
<a rel="external noopener nofollow" href="https://creativecommons.org/licenses/by-sa/4.0/">CC-BY-SA 4.0</a>.
|
||||
</section>
|
||||
|
||||
</footer>
|
||||
|
@ -7,5 +7,6 @@
|
||||
Pages Crawled: {{pagesFetched}} <br/>
|
||||
Pages Indexed: {{pagesIndexed}} <br/>
|
||||
IP: {{ip}} {{#if ipCountry}}<span title="{{ipCountry}}">{{getIpFlag}}</span>{{/if}}<br/>
|
||||
ASN: {{asn}} {{asnOrg}} <br/>
|
||||
</fieldset>
|
||||
<br/>
|
@ -60,7 +60,12 @@ public class DomainInformationService {
|
||||
String ip = rs.getString("IP");
|
||||
|
||||
builder.ip(ip);
|
||||
builder.ipCountry(geoIpDictionary.getCountry(ip));
|
||||
var isnInfo = geoIpDictionary.getAsnInfo(ip);
|
||||
if (isnInfo.isPresent()) {
|
||||
builder.asn(isnInfo.get().asn());
|
||||
builder.ipCountry(isnInfo.get().country());
|
||||
builder.asnOrg(isnInfo.get().org());
|
||||
}
|
||||
|
||||
builder.nodeAffinity(rs.getInt("NODE_AFFINITY"));
|
||||
builder.domain(new EdgeDomain(rs.getString("DOMAIN_NAME")));
|
||||
|
@ -29,8 +29,8 @@ download_model model/ngrams.bin https://downloads.marginalia.nu/model/ngrams.bin
|
||||
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin
|
||||
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz
|
||||
|
||||
download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP
|
||||
unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP
|
||||
download_model data/asn-data-raw-table https://thyme.apnic.net/current/data-raw-table
|
||||
download_model data/asn-used-autnums https://thyme.apnic.net/current/data-used-autnums
|
||||
|
||||
download_model data/adblock.txt https://downloads.marginalia.nu/data/adblock.txt
|
||||
if [ ! -f data/suggestions.txt ]; then
|
||||
|
Loading…
Reference in New Issue
Block a user