From d7bd540683909030846f0c447b6789bcc9a32dfe Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sat, 16 Dec 2023 21:55:04 +0100 Subject: [PATCH 1/2] (*) Replace the ip2location IP geolocation data with ASN information from apnic.net. Doesn't really make sense to use ip2location as a middle man for information that is already freely available... --- .../client/model/DomainInformation.java | 2 + .../src/main/java/nu/marginalia/WmsaHome.java | 11 +++ .../ip_blocklist/GeoIpBlocklist.java | 5 +- .../java/nu/marginalia/geoip/AsnMapping.java | 84 +++++++++++++++++++ .../java/nu/marginalia/geoip/AsnTable.java | 62 ++++++++++++++ .../nu/marginalia/geoip/GeoIpDictionary.java | 72 ++++++---------- .../nu/marginalia/geoip/AsnMappingTest.java | 55 ++++++++++++ .../nu/marginalia/geoip/AsnTableTest.java | 55 ++++++++++++ .../marginalia/geoip/GeoIpDictionaryTest.java | 16 ++++ .../converting/processor/DomainProcessor.java | 8 +- .../templates/search/parts/search-footer.hdb | 5 -- .../site-info/site-info-index-indexed.hdb | 1 + .../domains/DomainInformationService.java | 7 +- run/setup.sh | 4 +- 14 files changed, 326 insertions(+), 61 deletions(-) create mode 100644 code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnMapping.java create mode 100644 code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnTable.java create mode 100644 code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java create mode 100644 code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java create mode 100644 code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/GeoIpDictionaryTest.java diff --git a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java b/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java index 9e0489ee..625fd4be 100644 --- a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java +++ b/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java @@ -22,6 +22,8 @@ public class DomainInformation { boolean unknownDomain; String ip; + Integer asn; + String asnOrg; String ipCountry; String state; diff --git a/code/common/config/src/main/java/nu/marginalia/WmsaHome.java b/code/common/config/src/main/java/nu/marginalia/WmsaHome.java index b1bc3512..3c3352b5 100644 --- a/code/common/config/src/main/java/nu/marginalia/WmsaHome.java +++ b/code/common/config/src/main/java/nu/marginalia/WmsaHome.java @@ -62,6 +62,15 @@ public class WmsaHome { public static Path getIPLocationDatabse() { return getHomePath().resolve("data").resolve("IP2LOCATION-LITE-DB1.CSV"); + + } + + public static Path getAsnMappingDatabase() { + return getHomePath().resolve("data").resolve("asn-data-raw-table"); + } + + public static Path getAsnInfoDatabase() { + return getHomePath().resolve("data").resolve("asn-used-autnums"); } public static LanguageModels getLanguageModels() { @@ -85,4 +94,6 @@ public class WmsaHome { public static boolean isDebug() { return debugMode; } + + } diff --git a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java b/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java index 79ca6847..c836966d 100644 --- a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java +++ b/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java @@ -2,6 +2,7 @@ package nu.marginalia.ip_blocklist; import com.google.inject.Inject; import com.google.inject.Singleton; +import nu.marginalia.geoip.AsnTable; import nu.marginalia.geoip.GeoIpDictionary; import nu.marginalia.model.EdgeDomain; import org.slf4j.Logger; @@ -43,7 +44,9 @@ public class GeoIpBlocklist { public String getCountry(EdgeDomain domain) { try { - return ipDictionary.getCountry(InetAddressCache.getAddress(domain)); + return ipDictionary.getAsnInfo(InetAddressCache.getAddress(domain)) + .map(AsnTable.AsnInfo::country) + .orElse("-"); } catch (Throwable ex) { logger.debug("Failed to resolve {}", domain); diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnMapping.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnMapping.java new file mode 100644 index 00000000..5f37f5c9 --- /dev/null +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnMapping.java @@ -0,0 +1,84 @@ +package nu.marginalia.geoip; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; +import java.util.Optional; +import java.util.TreeMap; + +public class AsnMapping { + private static final Logger logger = LoggerFactory.getLogger(AsnMapping.class); + private final TreeMap asns = new TreeMap<>(Integer::compareUnsigned); + + public record AsnMappingRecord(int ipStart, int ipEnd, int asn) { + public boolean contains(int ip) { + return Integer.compareUnsigned(ipStart, ip) <= 0 + && Integer.compareUnsigned(ip, ipEnd) < 0; + } + } + + public AsnMapping(Path databaseFile) { + try (var reader = Files.lines(databaseFile)) { + reader.map(AsnMapping::parseAsnMappingFileLine).filter(Objects::nonNull).forEach(asn -> asns.put(asn.ipStart(), asn)); + } catch (Exception e) { + logger.error("Failed to load ASN mapping" + databaseFile, e); + } + } + + public Optional getAsnNumber(int ip) { + var entry = asns.floorEntry(ip); + + if (null == entry) { + return Optional.empty(); + } + + var asn = entry.getValue(); + if (asn.contains(ip)) { + return Optional.of(asn.asn()); + } + + return Optional.empty(); + } + + public static AsnMappingRecord parseAsnMappingFileLine(String s) { + try { + String[] parts = StringUtils.split(s, '\t'); + if (parts.length != 2) { + return null; + } + + // Parse CIDR notation, e.g. 127.0.0.1/24 -> ["127.0.0.1", "24"] + String[] cidrParts = StringUtils.split(parts[0], '/'); + if (cidrParts.length != 2) { + return null; + } + + // Parse IP address and subnet mask + String[] ipParts = StringUtils.split(cidrParts[0], '.'); + int ipMask = Integer.parseInt(cidrParts[1]); + + // Convert subnet mask to integer start and end values + int ipStart = 0; + int ipEnd = 0; + for (int i = 0; i < 4; i++) { + int ipByte = Integer.parseInt(ipParts[i]); + ipStart |= ipByte << (24 - 8 * i); + ipEnd |= ipByte << (24 - 8 * i); + } + ipStart &= 0xFFFFFFFF << (32 - ipMask); + ipEnd |= 0xFFFFFFFF >>> ipMask; + + return new AsnMappingRecord(ipStart, ipEnd, Integer.parseInt(parts[1])); + + } + catch (Exception ex) { + logger.warn("Failed to parse ASN mapping line: {}", s); + return null; + } + } + +} diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnTable.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnTable.java new file mode 100644 index 00000000..67b3966f --- /dev/null +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnTable.java @@ -0,0 +1,62 @@ +package nu.marginalia.geoip; + +import nu.marginalia.WmsaHome; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Objects; +import java.util.Optional; + +public class AsnTable { + public HashMap asns = new HashMap<>(65536); + public record AsnInfo(int asn, String country, String org) {} + private static final Logger logger = LoggerFactory.getLogger(AsnTable.class); + + public AsnTable(Path asnFile) { + try (var reader = Files.lines(WmsaHome.getAsnInfoDatabase())) { + reader.map(AsnTable::parseAsnFileLine).filter(Objects::nonNull).forEach(asn -> asns.put(asn.asn(), asn)); + } catch (Exception e) { + logger.error("Failed to load ASN database " + asnFile, e); + } + } + + public Optional getAsnInfo(int asn) { + return Optional.ofNullable(asns.get(asn)); + } + + static AsnInfo parseAsnFileLine(String line) { + line = line.trim(); + + try { + int numEnd = line.indexOf(' '); + String num = line.substring(0, numEnd); + + int asn = Integer.parseInt(num); + + int orgStart = numEnd + 1; + int orgEnd = line.lastIndexOf(','); + if (orgEnd < 0 || orgEnd < orgStart + 1) { + orgEnd = line.length(); + } + + String org = line.substring(orgStart, orgEnd); + String country = ""; + if (orgEnd + 1 < line.length()) { + country = line.substring(orgEnd + 1).trim(); + } + + if ("UNALLOCATED".equals(org)) { + return null; + } + + return new AsnInfo(asn, country, org); + } + catch (Exception ex) { + logger.warn("Failed to parse ASN line: {}", line); + return null; + } + } +} diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java index 67dd6366..acd2fdfd 100644 --- a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java @@ -1,55 +1,33 @@ package nu.marginalia.geoip; -import com.opencsv.CSVReader; import nu.marginalia.WmsaHome; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.FileReader; import java.net.InetAddress; -import java.util.TreeMap; +import java.util.Optional; public class GeoIpDictionary { - private volatile TreeMap ranges = null; + private volatile AsnTable asnTable = null; + private volatile AsnMapping asnMapping = null; private static final Logger logger = LoggerFactory.getLogger(GeoIpDictionary.class); - record IpRange(long from, long to, String country) {} public GeoIpDictionary() { Thread.ofPlatform().start(() -> { - try (var reader = new CSVReader(new FileReader(WmsaHome.getIPLocationDatabse().toFile()))) { - var dict = new TreeMap(); - - for (;;) { - String[] vals = reader.readNext(); - if (vals == null) { - break; - } - var range = new IpRange(Long.parseLong(vals[0]), - Long.parseLong(vals[1]), - vals[2]); - dict.put(range.from, range); - } - ranges = dict; - logger.info("Loaded {} IP ranges", ranges.size()); - } catch (Exception e) { - ranges = new TreeMap<>(); - throw new RuntimeException(e); - } - finally { - synchronized (this) { - this.notifyAll(); - } - } + this.asnTable = new AsnTable(WmsaHome.getAsnInfoDatabase()); + logger.info("Loaded ASN table"); + this.asnMapping = new AsnMapping(WmsaHome.getAsnMappingDatabase()); + logger.info("Loaded ASN mapping"); }); } public boolean isReady() { - return null != ranges; + return null != asnMapping; } public boolean waitReady() { - while (null == ranges) { + while (null == asnMapping) { try { synchronized (this) { this.wait(1000); @@ -61,32 +39,30 @@ public class GeoIpDictionary { return true; } - public String getCountry(String ip) { + public Optional getAsnInfo(String ip) { try { - return getCountry(InetAddress.getByName(ip)); + return getAsnInfo(InetAddress.getByName(ip)); } catch (Exception e) { - return ""; + e.printStackTrace(); + return Optional.empty(); } } - public String getCountry(InetAddress address) { - if (null == ranges) { // not loaded yet or failed to load - return ""; + public Optional getAsnInfo(int ipAddress) { + if (null == asnTable) { // not loaded yet or failed to load + return Optional.empty(); } + return asnMapping + .getAsnNumber(ipAddress) + .flatMap(asn -> asnTable.getAsnInfo(asn)); + } + + public Optional getAsnInfo(InetAddress address) { byte[] bytes = address.getAddress(); - long ival = ((long)bytes[0]&0xFF) << 24 | ((long)bytes[1]&0xFF) << 16 | ((long)bytes[2]&0xFF)<< 8 | ((long)bytes[3]&0xFF); - Long key = ranges.floorKey(ival); - if (null == key) { - return ""; - } + int ival = (int) (((long)bytes[0]&0xFF) << 24 | ((long)bytes[1]&0xFF) << 16 | ((long)bytes[2]&0xFF)<< 8 | ((long)bytes[3]&0xFF)); - var range = ranges.get(key); - if (ival >= key && ival < range.to) { - return range.country; - } - - return ""; + return getAsnInfo(ival); } } diff --git a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java new file mode 100644 index 00000000..23dbee2e --- /dev/null +++ b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java @@ -0,0 +1,55 @@ +package nu.marginalia.geoip; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.net.InetAddress; +import java.net.UnknownHostException; + +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +public class AsnMappingTest { + @Test + public void testParseAsnMappingFileLine() throws UnknownHostException { + // Test Case 1: Valid ASN Mapping Line + String input1 = "192.0.2.0/24\t65536"; + AsnMapping.AsnMappingRecord result1 = AsnMapping.parseAsnMappingFileLine(input1); + assertNotNull(result1, "The result should not be null for valid data"); + assertEquals(65536, result1.asn(), "The asn is not as expected"); + + // Test Case 2: Invalid ASN Mapping Line - Different format + String input2 = "nah I am just a string, not an ASN Mapping Line..."; + AsnMapping.AsnMappingRecord result2 = AsnMapping.parseAsnMappingFileLine(input2); + assertNull(result2, "The result should be null for invalid data"); + + // Test Case 3: Invalid ASN Mapping Line - Null input + String input3 = null; + AsnMapping.AsnMappingRecord result3 = AsnMapping.parseAsnMappingFileLine(input3); + assertNull(result3, "The result should be null for null input"); + + // Test Case 4: Invalid ASN Mapping Line - Empty string + String input4 = ""; + AsnMapping.AsnMappingRecord result4 = AsnMapping.parseAsnMappingFileLine(input4); + assertNull(result4, "The result should be null for empty string"); + + // Test Case 5: Invalid ASN Mapping Line - One part + String input5 = "192.0.2.0/24"; + AsnMapping.AsnMappingRecord result5 = AsnMapping.parseAsnMappingFileLine(input5); + assertNull(result5, "The result should be null for a string with only one part"); + + } + + @Test + public void testIpBounds() throws UnknownHostException { + String input7 = "193.183.0.0/24\t207825"; + AsnMapping.AsnMappingRecord result7 = AsnMapping.parseAsnMappingFileLine(input7); + assertNotNull(result7, "The result should not be null for valid data"); + var ip = InetAddress.getAllByName("193.183.0.0"); + byte[] ipBytes = ip[0].getAddress(); + + int ipInt = (int) (((long)ipBytes[0]&0xFF) << 24 | ((long)ipBytes[1]&0xFF) << 16 | ((long)ipBytes[2]&0xFF)<< 8 | ((long)ipBytes[3]&0xFF)); + + assertTrue(result7.contains(ipInt)); + } +} \ No newline at end of file diff --git a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java new file mode 100644 index 00000000..c7c43f31 --- /dev/null +++ b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java @@ -0,0 +1,55 @@ +package nu.marginalia.geoip; + +import nu.marginalia.geoip.AsnTable; +import nu.marginalia.geoip.AsnTable.AsnInfo; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class AsnTableTest { + + /** + * This class is to test the static method parseAsnFileLine of the AsnTable class. + * This method parses a line from an ASN table file into an AsnInfo instance, + * which holds ASN number, country and organization string. + */ + + @Test + public void testParseAsnFileLine_ShouldReturnNullWhenUnallocated() { + String unallocatedLine = " 1 UNALLOCATED"; + AsnInfo result = AsnTable.parseAsnFileLine(unallocatedLine); + assertNull(result, "Parse ASN File Line output should be null for unallocated ASN"); + } + + @Test + public void testParseAsnFileLine_ShouldReturnNullWhenInputIsNotParsable() { + String unparsableLine = " NotParsable Line"; + AsnInfo result = AsnTable.parseAsnFileLine(unparsableLine); + assertNull(result, "Parse ASN File Line output should be null for unparsable lines"); + } + + @Test + public void testParseAsnFileLine_AllFieldsParsedCorrectly() { + String asnLineWithAllFields = "123456 Company,US "; + AsnInfo expected = new AsnInfo(123456, "US", "Company"); + AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithAllFields); + assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line is correctly formatted"); + } + + @Test + public void testParseAsnFileLine_MultipleCommasInOrg() { + String asnLineWithAllFields = "123456 Company, Inc., US "; + AsnInfo expected = new AsnInfo(123456, "US", "Company, Inc."); + AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithAllFields); + assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line is correctly formatted"); + } + + + @Test + public void testParseAsnFileLine_NoCountry() { + String asnLineWithoutCountry = "123456 Company"; + AsnInfo expected = new AsnInfo(123456, "", "Company"); + AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithoutCountry); + assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line lacks country"); + } +} \ No newline at end of file diff --git a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/GeoIpDictionaryTest.java b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/GeoIpDictionaryTest.java new file mode 100644 index 00000000..08a7ea4d --- /dev/null +++ b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/GeoIpDictionaryTest.java @@ -0,0 +1,16 @@ +package nu.marginalia.geoip; + +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +@Tag("slow") +class GeoIpDictionaryTest { + + @Test + public void testAsnResolution() { + GeoIpDictionary geoIpDictionary = new GeoIpDictionary(); + geoIpDictionary.waitReady(); + System.out.println(geoIpDictionary.getAsnInfo("193.183.0.162")); + } + +} \ No newline at end of file diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java index e9794aad..5187582a 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java @@ -124,10 +124,10 @@ public class DomainProcessor { terms.add("ip:"+ip); - String ipCountryCode = geoIpDictionary.getCountry(ip).toLowerCase(); - if (!ipCountryCode.isBlank()) { - terms.add("ip:"+ipCountryCode); - } + geoIpDictionary.getAsnInfo(ip).ifPresent(asnInfo -> { + terms.add("asn:"+asnInfo.asn()); + terms.add("ip:"+asnInfo.country()); + }); if (cookies) { terms.add(HtmlFeature.COOKIES.getKeyword()); diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb b/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb index 88b6ad84..5b71b59c 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb @@ -108,11 +108,6 @@

Open Source

The search engine is open source with an AGPL license. The sources can be perused at https://git.marginalia.nu/. -

Data Sources

- IP geolocation is sourced from the IP2Location LITE data available from - https://lite.ip2location.com/ - under - CC-BY-SA 4.0. diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb index 979226a1..30ed1720 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb @@ -7,5 +7,6 @@ Pages Crawled: {{pagesFetched}}
Pages Indexed: {{pagesIndexed}}
IP: {{ip}} {{#if ipCountry}}{{getIpFlag}}{{/if}}
+ ASN: {{asn}} {{asnOrg}}

\ No newline at end of file diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java index 690509db..bac3f539 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java +++ b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java @@ -60,7 +60,12 @@ public class DomainInformationService { String ip = rs.getString("IP"); builder.ip(ip); - builder.ipCountry(geoIpDictionary.getCountry(ip)); + var isnInfo = geoIpDictionary.getAsnInfo(ip); + if (isnInfo.isPresent()) { + builder.asn(isnInfo.get().asn()); + builder.ipCountry(isnInfo.get().country()); + builder.asnOrg(isnInfo.get().org()); + } builder.nodeAffinity(rs.getInt("NODE_AFFINITY")); builder.domain(new EdgeDomain(rs.getString("DOMAIN_NAME"))); diff --git a/run/setup.sh b/run/setup.sh index f55817e3..e543650a 100755 --- a/run/setup.sh +++ b/run/setup.sh @@ -29,8 +29,8 @@ download_model model/ngrams.bin https://downloads.marginalia.nu/model/ngrams.bin download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz -download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP -unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP +download_model data/asn-data-raw-table https://thyme.apnic.net/current/data-raw-table +download_model data/asn-used-autnums https://thyme.apnic.net/current/data-used-autnums download_model data/adblock.txt https://downloads.marginalia.nu/data/adblock.txt if [ ! -f data/suggestions.txt ]; then From c92f1b8df876a0c82a1d5e927842db3c681327b7 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sun, 17 Dec 2023 15:03:00 +0100 Subject: [PATCH 2/2] (geo-ip) Revert removal of ip2location logic We do both ip2location and ASN data. The change also adds some keywords based on autonomous system information, on a somewhat experimental basis. It would be neat to be able to e.g. exclude cloud services or just e.g. cloudflare from the search results. --- .../client/model/DomainInformation.java | 2 + .../ip_blocklist/GeoIpBlocklist.java | 5 +- .../nu/marginalia/geoip/GeoIpDictionary.java | 32 +++++++++- .../geoip/{ => sources}/AsnMapping.java | 38 +++--------- .../geoip/{ => sources}/AsnTable.java | 2 +- .../geoip/sources/IP2LocationMapping.java | 40 ++++++++++++ .../geoip/sources/IpRangeMapping.java | 41 ++++++++++++ .../nu/marginalia/geoip/AsnMappingTest.java | 55 ---------------- .../nu/marginalia/geoip/AsnTableTest.java | 55 ---------------- .../converting/processor/DomainProcessor.java | 62 +++++++++++++++++-- .../templates/search/parts/search-footer.hdb | 5 ++ .../site-info/site-info-index-indexed.hdb | 3 +- .../domains/DomainInformationService.java | 12 ++-- run/setup.sh | 3 + 14 files changed, 196 insertions(+), 159 deletions(-) rename code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/{ => sources}/AsnMapping.java (60%) rename code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/{ => sources}/AsnTable.java (98%) create mode 100644 code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IP2LocationMapping.java create mode 100644 code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IpRangeMapping.java delete mode 100644 code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java delete mode 100644 code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java diff --git a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java b/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java index 625fd4be..5be64a97 100644 --- a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java +++ b/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java @@ -24,6 +24,8 @@ public class DomainInformation { String ip; Integer asn; String asnOrg; + String asnCountry; + String ipCountry; String state; diff --git a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java b/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java index c836966d..79ca6847 100644 --- a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java +++ b/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java @@ -2,7 +2,6 @@ package nu.marginalia.ip_blocklist; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.geoip.AsnTable; import nu.marginalia.geoip.GeoIpDictionary; import nu.marginalia.model.EdgeDomain; import org.slf4j.Logger; @@ -44,9 +43,7 @@ public class GeoIpBlocklist { public String getCountry(EdgeDomain domain) { try { - return ipDictionary.getAsnInfo(InetAddressCache.getAddress(domain)) - .map(AsnTable.AsnInfo::country) - .orElse("-"); + return ipDictionary.getCountry(InetAddressCache.getAddress(domain)); } catch (Throwable ex) { logger.debug("Failed to resolve {}", domain); diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java index acd2fdfd..3f68dd2b 100644 --- a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java @@ -1,6 +1,9 @@ package nu.marginalia.geoip; import nu.marginalia.WmsaHome; +import nu.marginalia.geoip.sources.AsnMapping; +import nu.marginalia.geoip.sources.AsnTable; +import nu.marginalia.geoip.sources.IP2LocationMapping; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -8,10 +11,12 @@ import java.net.InetAddress; import java.util.Optional; public class GeoIpDictionary { + private volatile IP2LocationMapping ip2locMapping = null; private volatile AsnTable asnTable = null; private volatile AsnMapping asnMapping = null; private static final Logger logger = LoggerFactory.getLogger(GeoIpDictionary.class); + volatile boolean ready = false; public GeoIpDictionary() { Thread.ofPlatform().start(() -> { @@ -19,15 +24,22 @@ public class GeoIpDictionary { logger.info("Loaded ASN table"); this.asnMapping = new AsnMapping(WmsaHome.getAsnMappingDatabase()); logger.info("Loaded ASN mapping"); + this.ip2locMapping = new IP2LocationMapping(WmsaHome.getIPLocationDatabse()); + + ready = true; + + synchronized (this) { + this.notifyAll(); + } }); } public boolean isReady() { - return null != asnMapping; + return ready; } public boolean waitReady() { - while (null == asnMapping) { + while (!ready) { try { synchronized (this) { this.wait(1000); @@ -39,6 +51,22 @@ public class GeoIpDictionary { return true; } + + public String getCountry(String ip) { + if (null == ip2locMapping) { + return ""; + } + return ip2locMapping.getCountry(ip); + } + + public String getCountry(InetAddress address) { + if (null == ip2locMapping) { + return ""; + } + + return ip2locMapping.getCountry(address); + } + public Optional getAsnInfo(String ip) { try { return getAsnInfo(InetAddress.getByName(ip)); diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnMapping.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnMapping.java similarity index 60% rename from code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnMapping.java rename to code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnMapping.java index 5f37f5c9..22a5b193 100644 --- a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnMapping.java +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnMapping.java @@ -1,4 +1,4 @@ -package nu.marginalia.geoip; +package nu.marginalia.geoip.sources; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -6,55 +6,35 @@ import org.slf4j.LoggerFactory; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Objects; import java.util.Optional; -import java.util.TreeMap; public class AsnMapping { private static final Logger logger = LoggerFactory.getLogger(AsnMapping.class); - private final TreeMap asns = new TreeMap<>(Integer::compareUnsigned); - - public record AsnMappingRecord(int ipStart, int ipEnd, int asn) { - public boolean contains(int ip) { - return Integer.compareUnsigned(ipStart, ip) <= 0 - && Integer.compareUnsigned(ip, ipEnd) < 0; - } - } + private final IpRangeMapping ranges = new IpRangeMapping<>(); public AsnMapping(Path databaseFile) { try (var reader = Files.lines(databaseFile)) { - reader.map(AsnMapping::parseAsnMappingFileLine).filter(Objects::nonNull).forEach(asn -> asns.put(asn.ipStart(), asn)); + reader.forEach(this::parseAsnMappingFileLine); } catch (Exception e) { logger.error("Failed to load ASN mapping" + databaseFile, e); } } public Optional getAsnNumber(int ip) { - var entry = asns.floorEntry(ip); - - if (null == entry) { - return Optional.empty(); - } - - var asn = entry.getValue(); - if (asn.contains(ip)) { - return Optional.of(asn.asn()); - } - - return Optional.empty(); + return ranges.get(ip); } - public static AsnMappingRecord parseAsnMappingFileLine(String s) { + private void parseAsnMappingFileLine(String s) { try { String[] parts = StringUtils.split(s, '\t'); if (parts.length != 2) { - return null; + return; } // Parse CIDR notation, e.g. 127.0.0.1/24 -> ["127.0.0.1", "24"] String[] cidrParts = StringUtils.split(parts[0], '/'); if (cidrParts.length != 2) { - return null; + return; } // Parse IP address and subnet mask @@ -72,12 +52,12 @@ public class AsnMapping { ipStart &= 0xFFFFFFFF << (32 - ipMask); ipEnd |= 0xFFFFFFFF >>> ipMask; - return new AsnMappingRecord(ipStart, ipEnd, Integer.parseInt(parts[1])); + + ranges.add(ipStart, ipEnd, Integer.parseInt(parts[1])); } catch (Exception ex) { logger.warn("Failed to parse ASN mapping line: {}", s); - return null; } } diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnTable.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnTable.java similarity index 98% rename from code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnTable.java rename to code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnTable.java index 67b3966f..fef14c5d 100644 --- a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/AsnTable.java +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnTable.java @@ -1,4 +1,4 @@ -package nu.marginalia.geoip; +package nu.marginalia.geoip.sources; import nu.marginalia.WmsaHome; import org.slf4j.Logger; diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IP2LocationMapping.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IP2LocationMapping.java new file mode 100644 index 00000000..90dd1991 --- /dev/null +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IP2LocationMapping.java @@ -0,0 +1,40 @@ +package nu.marginalia.geoip.sources; + +import com.opencsv.CSVReader; + +import java.net.InetAddress; +import java.nio.file.Files; +import java.nio.file.Path; + +/** Load an IP2LOCATION LITE database file and provide a method to look up the country for an IP address. + */ +public class IP2LocationMapping { + private final IpRangeMapping ranges = new IpRangeMapping<>(); + + public IP2LocationMapping(Path filename) { + try (var reader = new CSVReader(Files.newBufferedReader(filename))) { + for (;;) { + String[] vals = reader.readNext(); + if (vals == null) { + break; + } + + ranges.add(Integer.parseUnsignedInt(vals[0]), Integer.parseUnsignedInt(vals[1]), vals[2]); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public String getCountry(String ip) { + try { + return getCountry(InetAddress.getByName(ip)); + } catch (Exception e) { + return ""; + } + } + + public String getCountry(InetAddress address) { + return ranges.get(address).orElse(""); + } +} diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IpRangeMapping.java b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IpRangeMapping.java new file mode 100644 index 00000000..444c023b --- /dev/null +++ b/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IpRangeMapping.java @@ -0,0 +1,41 @@ +package nu.marginalia.geoip.sources; + +import java.net.InetAddress; +import java.util.Optional; +import java.util.TreeMap; + +public class IpRangeMapping { + private final TreeMap> ranges = new TreeMap<>(Integer::compareUnsigned); + + public record IpRangeWithCountry(int ipStart, int ipEnd, T value) { + public boolean contains(int ip) { + return Integer.compareUnsigned(ipStart, ip) <= 0 + && Integer.compareUnsigned(ip, ipEnd) < 0; + } + } + + public void add(int ipStart, int ipEnd, T value) { + ranges.put(ipStart, new IpRangeWithCountry<>(ipStart, ipEnd, value)); + } + + public Optional get(InetAddress address) { + byte[] bytes = address.getAddress(); + int ival = (int) (((long) bytes[0] & 0xFF) << 24 | ((long) bytes[1] & 0xFF) << 16 | ((long) bytes[2] & 0xFF) << 8 | ((long) bytes[3] & 0xFF)); + + return get(ival); + } + + public Optional get(int ipUnsignedInt) { + Integer key = ranges.floorKey(ipUnsignedInt); + if (null == key) { + return Optional.empty(); + } + + var range = ranges.get(key); + if (range.contains(ipUnsignedInt)) { + return Optional.of(range.value); + } + + return Optional.empty(); + } +} diff --git a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java deleted file mode 100644 index 23dbee2e..00000000 --- a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnMappingTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package nu.marginalia.geoip; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.net.InetAddress; -import java.net.UnknownHostException; - -import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -public class AsnMappingTest { - @Test - public void testParseAsnMappingFileLine() throws UnknownHostException { - // Test Case 1: Valid ASN Mapping Line - String input1 = "192.0.2.0/24\t65536"; - AsnMapping.AsnMappingRecord result1 = AsnMapping.parseAsnMappingFileLine(input1); - assertNotNull(result1, "The result should not be null for valid data"); - assertEquals(65536, result1.asn(), "The asn is not as expected"); - - // Test Case 2: Invalid ASN Mapping Line - Different format - String input2 = "nah I am just a string, not an ASN Mapping Line..."; - AsnMapping.AsnMappingRecord result2 = AsnMapping.parseAsnMappingFileLine(input2); - assertNull(result2, "The result should be null for invalid data"); - - // Test Case 3: Invalid ASN Mapping Line - Null input - String input3 = null; - AsnMapping.AsnMappingRecord result3 = AsnMapping.parseAsnMappingFileLine(input3); - assertNull(result3, "The result should be null for null input"); - - // Test Case 4: Invalid ASN Mapping Line - Empty string - String input4 = ""; - AsnMapping.AsnMappingRecord result4 = AsnMapping.parseAsnMappingFileLine(input4); - assertNull(result4, "The result should be null for empty string"); - - // Test Case 5: Invalid ASN Mapping Line - One part - String input5 = "192.0.2.0/24"; - AsnMapping.AsnMappingRecord result5 = AsnMapping.parseAsnMappingFileLine(input5); - assertNull(result5, "The result should be null for a string with only one part"); - - } - - @Test - public void testIpBounds() throws UnknownHostException { - String input7 = "193.183.0.0/24\t207825"; - AsnMapping.AsnMappingRecord result7 = AsnMapping.parseAsnMappingFileLine(input7); - assertNotNull(result7, "The result should not be null for valid data"); - var ip = InetAddress.getAllByName("193.183.0.0"); - byte[] ipBytes = ip[0].getAddress(); - - int ipInt = (int) (((long)ipBytes[0]&0xFF) << 24 | ((long)ipBytes[1]&0xFF) << 16 | ((long)ipBytes[2]&0xFF)<< 8 | ((long)ipBytes[3]&0xFF)); - - assertTrue(result7.contains(ipInt)); - } -} \ No newline at end of file diff --git a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java b/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java deleted file mode 100644 index c7c43f31..00000000 --- a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/AsnTableTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package nu.marginalia.geoip; - -import nu.marginalia.geoip.AsnTable; -import nu.marginalia.geoip.AsnTable.AsnInfo; -import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; - -public class AsnTableTest { - - /** - * This class is to test the static method parseAsnFileLine of the AsnTable class. - * This method parses a line from an ASN table file into an AsnInfo instance, - * which holds ASN number, country and organization string. - */ - - @Test - public void testParseAsnFileLine_ShouldReturnNullWhenUnallocated() { - String unallocatedLine = " 1 UNALLOCATED"; - AsnInfo result = AsnTable.parseAsnFileLine(unallocatedLine); - assertNull(result, "Parse ASN File Line output should be null for unallocated ASN"); - } - - @Test - public void testParseAsnFileLine_ShouldReturnNullWhenInputIsNotParsable() { - String unparsableLine = " NotParsable Line"; - AsnInfo result = AsnTable.parseAsnFileLine(unparsableLine); - assertNull(result, "Parse ASN File Line output should be null for unparsable lines"); - } - - @Test - public void testParseAsnFileLine_AllFieldsParsedCorrectly() { - String asnLineWithAllFields = "123456 Company,US "; - AsnInfo expected = new AsnInfo(123456, "US", "Company"); - AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithAllFields); - assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line is correctly formatted"); - } - - @Test - public void testParseAsnFileLine_MultipleCommasInOrg() { - String asnLineWithAllFields = "123456 Company, Inc., US "; - AsnInfo expected = new AsnInfo(123456, "US", "Company, Inc."); - AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithAllFields); - assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line is correctly formatted"); - } - - - @Test - public void testParseAsnFileLine_NoCountry() { - String asnLineWithoutCountry = "123456 Company"; - AsnInfo expected = new AsnInfo(123456, "", "Company"); - AsnInfo actual = AsnTable.parseAsnFileLine(asnLineWithoutCountry); - assertEquals(expected, actual, "Parse ASN File Line output should match expected AsnInfo when line lacks country"); - } -} \ No newline at end of file diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java index 9abb350e..d12d27f8 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java @@ -11,12 +11,14 @@ import nu.marginalia.converting.processor.logic.links.LinkGraph; import nu.marginalia.crawling.io.SerializableCrawlDataStream; import nu.marginalia.crawling.model.*; import nu.marginalia.geoip.GeoIpDictionary; +import nu.marginalia.geoip.sources.AsnTable; import nu.marginalia.model.crawl.DomainIndexingState; import nu.marginalia.converting.model.ProcessedDomain; import nu.marginalia.model.EdgeDomain; import nu.marginalia.converting.processor.logic.links.TopKeywords; import nu.marginalia.converting.processor.logic.LshDocumentDeduplicator; import nu.marginalia.model.crawl.HtmlFeature; +import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,12 +123,7 @@ public class DomainProcessor { List terms = new ArrayList<>(); - terms.add("ip:"+ip); - - geoIpDictionary.getAsnInfo(ip).ifPresent(asnInfo -> { - terms.add("asn:"+asnInfo.asn()); - terms.add("ip:"+asnInfo.country()); - }); + addIpInfo(terms, ip); if (cookies) { terms.add(HtmlFeature.COOKIES.getKeyword()); @@ -156,6 +153,59 @@ public class DomainProcessor { return ret; } + private void addIpInfo(List terms, String ip) { + terms.add("ip:"+ip); + + // Add IP location country as a term + String country = geoIpDictionary.getCountry(ip); + if (!country.isBlank()) { // use the ip:-prefix as there's no real confusion between e.g. ip:127.0.0.1 and ip:uk + terms.add("ip:"+country.toLowerCase()); + } + + // Add ASN as a term + geoIpDictionary.getAsnInfo(ip).ifPresent(asnInfo -> { + terms.add("as:"+asnInfo.asn()); + + for (var orgPart : StringUtils.split(asnInfo.org(), '-')) { + terms.add("as:"+orgPart.toLowerCase()); + } + + if (isCloudy(asnInfo)) { + terms.add("special:cloud"); + } + }); + + + } + + private boolean isCloudy(AsnTable.AsnInfo asnInfo) { + String org = asnInfo.org(); + + if (org.contains("MICROSOFT-AZURE")) { + return true; + } + if(org.contains("AMAZON")) { + return true; + } + if (org.contains("CLOUDFLARE")) { + return true; + } + if (org.contains("GOOGLE-CLOUD")) { + return true; + } + if (org.contains("DIGITALOCEAN")) { + return true; + } + if (org.contains("ALIBABA")) { + return true; + } + if (org.contains("CLOUDFLARE")) { + return true; + } + + return false; + } + private static final Pattern academicPattern = Pattern.compile(".*\\.(ac|edu)\\.[a-z]{2}$"); private boolean isAcademicDomain(EdgeDomain domain) { diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb b/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb index 5b71b59c..88b6ad84 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb @@ -108,6 +108,11 @@

Open Source

The search engine is open source with an AGPL license. The sources can be perused at https://git.marginalia.nu/. +

Data Sources

+ IP geolocation is sourced from the IP2Location LITE data available from + https://lite.ip2location.com/ + under + CC-BY-SA 4.0. diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb index 30ed1720..e36a896b 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb @@ -6,7 +6,8 @@ Pages Known: {{pagesKnown}}
Pages Crawled: {{pagesFetched}}
Pages Indexed: {{pagesIndexed}}
+

IP: {{ip}} {{#if ipCountry}}{{getIpFlag}}{{/if}}
- ASN: {{asn}} {{asnOrg}}
+ AS: {{#if asn}}AS{{asn}} {{asnOrg}} {{asnCountry}}{{/if}}

\ No newline at end of file diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java index bac3f539..69c82bdd 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java +++ b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java @@ -60,12 +60,12 @@ public class DomainInformationService { String ip = rs.getString("IP"); builder.ip(ip); - var isnInfo = geoIpDictionary.getAsnInfo(ip); - if (isnInfo.isPresent()) { - builder.asn(isnInfo.get().asn()); - builder.ipCountry(isnInfo.get().country()); - builder.asnOrg(isnInfo.get().org()); - } + geoIpDictionary.getAsnInfo(ip).ifPresent(asnInfo -> { + builder.asn(asnInfo.asn()); + builder.asnOrg(asnInfo.org()); + builder.asnCountry(asnInfo.country()); + }); + builder.ipCountry(geoIpDictionary.getCountry(ip)); builder.nodeAffinity(rs.getInt("NODE_AFFINITY")); builder.domain(new EdgeDomain(rs.getString("DOMAIN_NAME"))); diff --git a/run/setup.sh b/run/setup.sh index e543650a..1163d4b4 100755 --- a/run/setup.sh +++ b/run/setup.sh @@ -29,6 +29,9 @@ download_model model/ngrams.bin https://downloads.marginalia.nu/model/ngrams.bin download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz +download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP +unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP + download_model data/asn-data-raw-table https://thyme.apnic.net/current/data-raw-table download_model data/asn-used-autnums https://thyme.apnic.net/current/data-used-autnums