diff --git a/code/common/db/src/main/resources/db/migration/V24_02_0_000__drop_domain_links.sql b/code/common/db/src/main/resources/db/migration/V24_02_0_000__drop_domain_links.sql new file mode 100644 index 00000000..f0f18df8 --- /dev/null +++ b/code/common/db/src/main/resources/db/migration/V24_02_0_000__drop_domain_links.sql @@ -0,0 +1 @@ +DROP TABLE EC_DOMAIN_LINK; \ No newline at end of file diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/SelectingDomainLinkDb.java b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java similarity index 73% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/SelectingDomainLinkDb.java rename to code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java index d6220336..3d2c7270 100644 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/SelectingDomainLinkDb.java +++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java @@ -1,9 +1,7 @@ package nu.marginalia.linkdb.dlinks; import com.google.inject.name.Named; -import com.zaxxer.hikari.HikariDataSource; import gnu.trove.list.array.TIntArrayList; -import nu.marginalia.service.module.ServiceConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -12,18 +10,17 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; -/** DomainLinkDb that delegates to either a FileDomainLinkDb or a SqlDomainLinkDb, - * depending on whether the file exists. This is part of the migration path to - * always using FileDomainLinkDb. +/** DomainLinkDb that delegates a FileDomainLinkDb, but handles the case where the database + * is not yet loaded. This speeds up the startup of the index service, as the database is + * loaded in a separate thread. */ -public class SelectingDomainLinkDb implements DomainLinkDb { - private final static Logger logger = LoggerFactory.getLogger(SelectingDomainLinkDb.class); +public class DelayingDomainLinkDb implements DomainLinkDb { + private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinkDb.class); private volatile DomainLinkDb currentDb; private final Path filename; - public SelectingDomainLinkDb(@Named("domain-linkdb-file") Path filename, - ServiceConfiguration serviceConfiguration, - HikariDataSource dataSource) { + + public DelayingDomainLinkDb(@Named("domain-linkdb-file") Path filename) { this.filename = filename; // Load the database in a separate thread, so that the constructor can return @@ -32,12 +29,7 @@ public class SelectingDomainLinkDb implements DomainLinkDb { Thread.ofPlatform().start(() -> { try { - if (Files.exists(filename)) { - currentDb = new FileDomainLinkDb(filename); - } - else { - currentDb = new SqlDomainLinkDb(filename, dataSource, serviceConfiguration); - } + currentDb = new FileDomainLinkDb(filename); logger.info("Loaded linkdb"); } catch (Exception e) { logger.error("Failed to load linkdb", e); diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java index c548ab81..0fda3467 100644 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java +++ b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java @@ -23,7 +23,9 @@ public class FileDomainLinkDb implements DomainLinkDb { public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException { this.filename = filename; - loadInput(filename); + if (Files.exists(filename)) { + loadInput(filename); + } } @Override diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/SqlDomainLinkDb.java b/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/SqlDomainLinkDb.java deleted file mode 100644 index 883f8881..00000000 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/SqlDomainLinkDb.java +++ /dev/null @@ -1,150 +0,0 @@ -package nu.marginalia.linkdb.dlinks; - -import com.google.inject.name.Named; -import com.zaxxer.hikari.HikariDataSource; -import gnu.trove.list.array.TIntArrayList; -import gnu.trove.list.array.TLongArrayList; -import nu.marginalia.service.module.ServiceConfiguration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; -import java.util.Arrays; - -/** DomainLinkDb implementation that goes through the motions of - * being a File-backed DomainLinkDb, but actually uses the legacy SQL database - * for loading the data. - *

- * This is part of the migration path to using FileDomainLinkDb. - */ -public class SqlDomainLinkDb implements DomainLinkDb { - private volatile long[] sourceToDest = new long[0]; - private volatile long[] destToSource = new long[0]; - private static final Logger logger = LoggerFactory.getLogger(SqlDomainLinkDb.class); - - private final Path filename; - private final HikariDataSource dataSource; - private final int node; - - public SqlDomainLinkDb(@Named("domain-linkdb-file") Path filename, - HikariDataSource dataSource, - ServiceConfiguration configuration) - { - this.filename = filename; - this.dataSource = dataSource; - - node = configuration.node(); - loadDb(); - } - - @Override - public void switchInput(Path newFilename) throws IOException { - throw new UnsupportedEncodingException(); - } - - public void loadDb() { - try (var conn = dataSource.getConnection(); - var stmt = conn.prepareStatement( - STR.""" - SELECT - SOURCE_DOMAIN_ID, - DEST_DOMAIN_ID - FROM EC_DOMAIN_LINK - INNER JOIN EC_DOMAIN - ON EC_DOMAIN.ID = EC_DOMAIN_LINK.SOURCE_DOMAIN_ID - WHERE NODE_AFFINITY=\{node} - """); - var rs = stmt.executeQuery()) - { - TLongArrayList sourceToDest = new TLongArrayList(10_000_000); - TLongArrayList destToSource = new TLongArrayList(10_000_000); - - while (rs.next()) { - long source = Integer.toUnsignedLong(rs.getInt(1)); - long dest = Integer.toUnsignedLong(rs.getInt(2)); - - sourceToDest.add((source << 32) | dest); - destToSource.add((dest << 32) | source); - } - - sourceToDest.sort(); - destToSource.sort(); - - this.sourceToDest = sourceToDest.toArray(); - this.destToSource = destToSource.toArray(); - } - catch (Exception ex) { - logger.error("Failed to load linkdb", ex); - } - - logger.info("LinkDB loaded, size = {}", sourceToDest.length); - } - - @Override - public TIntArrayList findDestinations(int source) { - return findRelated(sourceToDest, source); - } - - @Override - public TIntArrayList findSources(int dest) { - return findRelated(destToSource, dest); - } - - @Override - public int countDestinations(int source) { - return countRelated(sourceToDest, source); - } - - @Override - public int countSources(int dest) { - return countRelated(destToSource, dest); - } - - @Override - public void forEach(SourceDestConsumer consumer) { - for (long val : sourceToDest) { - consumer.accept((int) (val >>> 32), (int) (val & 0xFFFF_FFFFL)); - } - } - - private TIntArrayList findRelated(long[] range, int key) { - long keyLong = Integer.toUnsignedLong(key) << 32; - long nextKeyLong = Integer.toUnsignedLong(key + 1) << 32; - - int start = Arrays.binarySearch(range, keyLong); - - if (start < 0) { - // Key is not found, get the insertion point - start = -start - 1; - } - - TIntArrayList result = new TIntArrayList(); - - for (int i = start; i < range.length && range[i] < nextKeyLong; i++) { - result.add((int) (range[i] & 0xFFFF_FFFFL)); - } - - return result; - } - - private int countRelated(long[] range, int key) { - long keyLong = Integer.toUnsignedLong(key) << 32; - long nextKeyLong = Integer.toUnsignedLong(key + 1) << 32; - - int start = Arrays.binarySearch(range, keyLong); - - if (start < 0) { - // Key is not found, get the insertion point - start = -start - 1; - } - - int num = 0; - for (int i = start; i < range.length && range[i] < nextKeyLong; i++, num++); - return num; - } - -} diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java index 1be9a6e2..ef0b83e6 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java @@ -38,10 +38,22 @@ public class RankingDomainFetcher { public void getDomains(Consumer consumer) { String query; if (getNames) { - query = "SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE NODE_AFFINITY>0 GROUP BY EC_DOMAIN.ID"; + query = """ + SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS + FROM EC_DOMAIN + INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID + WHERE NODE_AFFINITY>0 + GROUP BY EC_DOMAIN.ID + """; } else { - query = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE NODE_AFFINITY>0 GROUP BY EC_DOMAIN.ID"; + query = """ + SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS + FROM EC_DOMAIN + INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID + WHERE NODE_AFFINITY>0 + GROUP BY EC_DOMAIN.ID + """; } getDomains(query, consumer); @@ -51,10 +63,24 @@ public class RankingDomainFetcher { public void getPeripheralDomains(Consumer consumer) { String query; if (getNames) { - query = "SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND IS_ALIVE) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID"; + query = """ + SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS + FROM EC_DOMAIN + INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID + WHERE ((INDEXED>1 AND IS_ALIVE) + OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) + GROUP BY EC_DOMAIN.ID + """; } else { - query = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND IS_ALIVE) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID"; + query = """ + SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS + FROM EC_DOMAIN + INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID + WHERE ((INDEXED>1 AND IS_ALIVE) + OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) + GROUP BY EC_DOMAIN.ID + """; } getDomains(query, consumer); diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java index dd5d87b1..526e34bd 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java @@ -4,10 +4,8 @@ import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; import com.google.inject.name.Named; -import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.linkdb.dlinks.DomainLinkDb; -import nu.marginalia.linkdb.dlinks.SelectingDomainLinkDb; -import nu.marginalia.service.module.ServiceConfiguration; +import nu.marginalia.linkdb.dlinks.DelayingDomainLinkDb; import nu.marginalia.storage.FileStorageService; import nu.marginalia.IndexLocations; import org.slf4j.Logger; @@ -29,14 +27,12 @@ public class IndexModule extends AbstractModule { @Provides @Singleton public DomainLinkDb domainLinkDb ( - FileStorageService storageService, - HikariDataSource dataSource, - ServiceConfiguration serviceConfiguration + FileStorageService storageService ) { Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME); - return new SelectingDomainLinkDb(path, serviceConfiguration, dataSource); + return new DelayingDomainLinkDb(path); } @Provides