(db) Retire the EC_DOMAIN_LINK table
Retire the EC_DOMAIN_LINK table as the data has been migrated off into a file instead.
This commit is contained in:
parent
ef261cbbd7
commit
b15f47d80e
@ -0,0 +1 @@
|
||||
DROP TABLE EC_DOMAIN_LINK;
|
@ -1,9 +1,7 @@
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -12,18 +10,17 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
|
||||
/** DomainLinkDb that delegates to either a FileDomainLinkDb or a SqlDomainLinkDb,
|
||||
* depending on whether the file exists. This is part of the migration path to
|
||||
* always using FileDomainLinkDb.
|
||||
/** DomainLinkDb that delegates a FileDomainLinkDb, but handles the case where the database
|
||||
* is not yet loaded. This speeds up the startup of the index service, as the database is
|
||||
* loaded in a separate thread.
|
||||
*/
|
||||
public class SelectingDomainLinkDb implements DomainLinkDb {
|
||||
private final static Logger logger = LoggerFactory.getLogger(SelectingDomainLinkDb.class);
|
||||
public class DelayingDomainLinkDb implements DomainLinkDb {
|
||||
private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinkDb.class);
|
||||
|
||||
private volatile DomainLinkDb currentDb;
|
||||
private final Path filename;
|
||||
public SelectingDomainLinkDb(@Named("domain-linkdb-file") Path filename,
|
||||
ServiceConfiguration serviceConfiguration,
|
||||
HikariDataSource dataSource) {
|
||||
|
||||
public DelayingDomainLinkDb(@Named("domain-linkdb-file") Path filename) {
|
||||
this.filename = filename;
|
||||
|
||||
// Load the database in a separate thread, so that the constructor can return
|
||||
@ -32,12 +29,7 @@ public class SelectingDomainLinkDb implements DomainLinkDb {
|
||||
|
||||
Thread.ofPlatform().start(() -> {
|
||||
try {
|
||||
if (Files.exists(filename)) {
|
||||
currentDb = new FileDomainLinkDb(filename);
|
||||
}
|
||||
else {
|
||||
currentDb = new SqlDomainLinkDb(filename, dataSource, serviceConfiguration);
|
||||
}
|
||||
logger.info("Loaded linkdb");
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to load linkdb", e);
|
@ -23,8 +23,10 @@ public class FileDomainLinkDb implements DomainLinkDb {
|
||||
public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException {
|
||||
this.filename = filename;
|
||||
|
||||
if (Files.exists(filename)) {
|
||||
loadInput(filename);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void switchInput(Path newFilename) throws IOException {
|
||||
|
@ -1,150 +0,0 @@
|
||||
package nu.marginalia.linkdb.dlinks;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Arrays;
|
||||
|
||||
/** DomainLinkDb implementation that goes through the motions of
|
||||
* being a File-backed DomainLinkDb, but actually uses the legacy SQL database
|
||||
* for loading the data.
|
||||
* <p>
|
||||
* This is part of the migration path to using FileDomainLinkDb.
|
||||
*/
|
||||
public class SqlDomainLinkDb implements DomainLinkDb {
|
||||
private volatile long[] sourceToDest = new long[0];
|
||||
private volatile long[] destToSource = new long[0];
|
||||
private static final Logger logger = LoggerFactory.getLogger(SqlDomainLinkDb.class);
|
||||
|
||||
private final Path filename;
|
||||
private final HikariDataSource dataSource;
|
||||
private final int node;
|
||||
|
||||
public SqlDomainLinkDb(@Named("domain-linkdb-file") Path filename,
|
||||
HikariDataSource dataSource,
|
||||
ServiceConfiguration configuration)
|
||||
{
|
||||
this.filename = filename;
|
||||
this.dataSource = dataSource;
|
||||
|
||||
node = configuration.node();
|
||||
loadDb();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void switchInput(Path newFilename) throws IOException {
|
||||
throw new UnsupportedEncodingException();
|
||||
}
|
||||
|
||||
public void loadDb() {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement(
|
||||
STR."""
|
||||
SELECT
|
||||
SOURCE_DOMAIN_ID,
|
||||
DEST_DOMAIN_ID
|
||||
FROM EC_DOMAIN_LINK
|
||||
INNER JOIN EC_DOMAIN
|
||||
ON EC_DOMAIN.ID = EC_DOMAIN_LINK.SOURCE_DOMAIN_ID
|
||||
WHERE NODE_AFFINITY=\{node}
|
||||
""");
|
||||
var rs = stmt.executeQuery())
|
||||
{
|
||||
TLongArrayList sourceToDest = new TLongArrayList(10_000_000);
|
||||
TLongArrayList destToSource = new TLongArrayList(10_000_000);
|
||||
|
||||
while (rs.next()) {
|
||||
long source = Integer.toUnsignedLong(rs.getInt(1));
|
||||
long dest = Integer.toUnsignedLong(rs.getInt(2));
|
||||
|
||||
sourceToDest.add((source << 32) | dest);
|
||||
destToSource.add((dest << 32) | source);
|
||||
}
|
||||
|
||||
sourceToDest.sort();
|
||||
destToSource.sort();
|
||||
|
||||
this.sourceToDest = sourceToDest.toArray();
|
||||
this.destToSource = destToSource.toArray();
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to load linkdb", ex);
|
||||
}
|
||||
|
||||
logger.info("LinkDB loaded, size = {}", sourceToDest.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TIntArrayList findDestinations(int source) {
|
||||
return findRelated(sourceToDest, source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TIntArrayList findSources(int dest) {
|
||||
return findRelated(destToSource, dest);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countDestinations(int source) {
|
||||
return countRelated(sourceToDest, source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countSources(int dest) {
|
||||
return countRelated(destToSource, dest);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEach(SourceDestConsumer consumer) {
|
||||
for (long val : sourceToDest) {
|
||||
consumer.accept((int) (val >>> 32), (int) (val & 0xFFFF_FFFFL));
|
||||
}
|
||||
}
|
||||
|
||||
private TIntArrayList findRelated(long[] range, int key) {
|
||||
long keyLong = Integer.toUnsignedLong(key) << 32;
|
||||
long nextKeyLong = Integer.toUnsignedLong(key + 1) << 32;
|
||||
|
||||
int start = Arrays.binarySearch(range, keyLong);
|
||||
|
||||
if (start < 0) {
|
||||
// Key is not found, get the insertion point
|
||||
start = -start - 1;
|
||||
}
|
||||
|
||||
TIntArrayList result = new TIntArrayList();
|
||||
|
||||
for (int i = start; i < range.length && range[i] < nextKeyLong; i++) {
|
||||
result.add((int) (range[i] & 0xFFFF_FFFFL));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private int countRelated(long[] range, int key) {
|
||||
long keyLong = Integer.toUnsignedLong(key) << 32;
|
||||
long nextKeyLong = Integer.toUnsignedLong(key + 1) << 32;
|
||||
|
||||
int start = Arrays.binarySearch(range, keyLong);
|
||||
|
||||
if (start < 0) {
|
||||
// Key is not found, get the insertion point
|
||||
start = -start - 1;
|
||||
}
|
||||
|
||||
int num = 0;
|
||||
for (int i = start; i < range.length && range[i] < nextKeyLong; i++, num++);
|
||||
return num;
|
||||
}
|
||||
|
||||
}
|
@ -38,10 +38,22 @@ public class RankingDomainFetcher {
|
||||
public void getDomains(Consumer<RankingDomainData> consumer) {
|
||||
String query;
|
||||
if (getNames) {
|
||||
query = "SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE NODE_AFFINITY>0 GROUP BY EC_DOMAIN.ID";
|
||||
query = """
|
||||
SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||
FROM EC_DOMAIN
|
||||
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||
WHERE NODE_AFFINITY>0
|
||||
GROUP BY EC_DOMAIN.ID
|
||||
""";
|
||||
}
|
||||
else {
|
||||
query = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE NODE_AFFINITY>0 GROUP BY EC_DOMAIN.ID";
|
||||
query = """
|
||||
SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||
FROM EC_DOMAIN
|
||||
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||
WHERE NODE_AFFINITY>0
|
||||
GROUP BY EC_DOMAIN.ID
|
||||
""";
|
||||
}
|
||||
|
||||
getDomains(query, consumer);
|
||||
@ -51,10 +63,24 @@ public class RankingDomainFetcher {
|
||||
public void getPeripheralDomains(Consumer<RankingDomainData> consumer) {
|
||||
String query;
|
||||
if (getNames) {
|
||||
query = "SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND IS_ALIVE) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID";
|
||||
query = """
|
||||
SELECT EC_DOMAIN.ID,DOMAIN_NAME,DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||
FROM EC_DOMAIN
|
||||
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||
WHERE ((INDEXED>1 AND IS_ALIVE)
|
||||
OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0))
|
||||
GROUP BY EC_DOMAIN.ID
|
||||
""";
|
||||
}
|
||||
else {
|
||||
query = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND IS_ALIVE) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID";
|
||||
query = """
|
||||
SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS
|
||||
FROM EC_DOMAIN
|
||||
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||
WHERE ((INDEXED>1 AND IS_ALIVE)
|
||||
OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0))
|
||||
GROUP BY EC_DOMAIN.ID
|
||||
""";
|
||||
}
|
||||
|
||||
getDomains(query, consumer);
|
||||
|
@ -4,10 +4,8 @@ import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import com.google.inject.Singleton;
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
|
||||
import nu.marginalia.linkdb.dlinks.SelectingDomainLinkDb;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.linkdb.dlinks.DelayingDomainLinkDb;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.IndexLocations;
|
||||
import org.slf4j.Logger;
|
||||
@ -29,14 +27,12 @@ public class IndexModule extends AbstractModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
public DomainLinkDb domainLinkDb (
|
||||
FileStorageService storageService,
|
||||
HikariDataSource dataSource,
|
||||
ServiceConfiguration serviceConfiguration
|
||||
FileStorageService storageService
|
||||
)
|
||||
{
|
||||
Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME);
|
||||
|
||||
return new SelectingDomainLinkDb(path, serviceConfiguration, dataSource);
|
||||
return new DelayingDomainLinkDb(path);
|
||||
}
|
||||
|
||||
@Provides
|
||||
|
Loading…
Reference in New Issue
Block a user