diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java index a977e0de..37ca2a58 100644 --- a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java +++ b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java @@ -25,7 +25,7 @@ public class DomainRankingSetsService { public Optional get(String name) throws SQLException { try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement(""" - SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION + SELECT NAME, DESCRIPTION, DEPTH, DEFINITION FROM CONF_DOMAIN_RANKING_SET WHERE NAME = ? """)) { @@ -39,7 +39,6 @@ public class DomainRankingSetsService { return Optional.of(new DomainRankingSet( rs.getString("NAME"), rs.getString("DESCRIPTION"), - DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")), rs.getInt("DEPTH"), rs.getString("DEFINITION") )); @@ -53,15 +52,14 @@ public class DomainRankingSetsService { public void upsert(DomainRankingSet domainRankingSet) { try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement(""" - REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) - VALUES (?, ?, ?, ?, ?) + REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, DEPTH, DEFINITION) + VALUES (?, ?, ?, ?) """)) { stmt.setString(1, domainRankingSet.name()); stmt.setString(2, domainRankingSet.description()); - stmt.setString(3, domainRankingSet.algorithm().name()); - stmt.setInt(4, domainRankingSet.depth()); - stmt.setString(5, domainRankingSet.definition()); + stmt.setInt(3, domainRankingSet.depth()); + stmt.setString(4, domainRankingSet.definition()); stmt.executeUpdate(); if (!conn.getAutoCommit()) @@ -94,7 +92,7 @@ public class DomainRankingSetsService { try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement(""" - SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION + SELECT NAME, DESCRIPTION, DEPTH, DEFINITION FROM CONF_DOMAIN_RANKING_SET """)) { var rs = stmt.executeQuery(); @@ -105,7 +103,6 @@ public class DomainRankingSetsService { new DomainRankingSet( rs.getString("NAME"), rs.getString("DESCRIPTION"), - DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")), rs.getInt("DEPTH"), rs.getString("DEFINITION")) ); @@ -118,31 +115,17 @@ public class DomainRankingSetsService { } } - public enum DomainSetAlgorithm { - /** Use link graph, do a pagerank */ - LINKS_PAGERANK, - /** Use link graph, do a cheirank */ - LINKS_CHEIRANK, - /** Use adjacency graph, do a pagerank */ - ADJACENCY_PAGERANK, - /** Use adjacency graph, do a cheirank */ - ADJACENCY_CHEIRANK, - /** For reserved names. Use special algorithm, function of name */ - SPECIAL - }; /** Defines a domain ranking set, parameters for the ranking algorithms. * * @param name Key and name of the set * @param description Human-readable description - * @param algorithm Algorithm to use * @param depth Depth of the algorithm * @param definition Definition of the set, typically a list of domains or globs for domain-names * */ @With public record DomainRankingSet(String name, String description, - DomainSetAlgorithm algorithm, int depth, String definition) { @@ -159,7 +142,7 @@ public class DomainRankingSetsService { } public boolean isSpecial() { - return algorithm() == DomainSetAlgorithm.SPECIAL; + return name().equals("BLOGS") || name().equals("NONE") || name().equals("RANK"); } } diff --git a/code/common/db/src/main/resources/db/migration/V24_02_0_001__drop_ranking_set_algo.sql b/code/common/db/src/main/resources/db/migration/V24_02_0_001__drop_ranking_set_algo.sql new file mode 100644 index 00000000..507f8932 --- /dev/null +++ b/code/common/db/src/main/resources/db/migration/V24_02_0_001__drop_ranking_set_algo.sql @@ -0,0 +1 @@ +ALTER TABLE CONF_DOMAIN_RANKING_SET DROP COLUMN ALGORITHM; \ No newline at end of file diff --git a/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java b/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java index 797e2b55..8618318f 100644 --- a/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java +++ b/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java @@ -56,14 +56,12 @@ class DomainRankingSetsServiceTest { var newValue = new DomainRankingSetsService.DomainRankingSet( "test", "Test domain set", - DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK, 10, "test\\.nu" ); var newValue2 = new DomainRankingSetsService.DomainRankingSet( "test2", "Test domain set 2", - DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK, 20, "test\\.nu 2" ); diff --git a/code/features-index/domain-ranking/build.gradle b/code/features-index/domain-ranking/build.gradle index 11b0cd0a..29b191ef 100644 --- a/code/features-index/domain-ranking/build.gradle +++ b/code/features-index/domain-ranking/build.gradle @@ -32,13 +32,14 @@ dependencies { implementation libs.hll testImplementation project(':code:libraries:array') + testImplementation libs.commons.lang3 testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit testImplementation libs.mockito -} -test { - minHeapSize = "128m" // initial heap size - maxHeapSize = "20G" // maximum heap size + testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4') + testImplementation 'org.testcontainers:mariadb:1.17.4' + testImplementation 'org.testcontainers:junit-jupiter:1.17.4' + testImplementation project(':code:libraries:test-helpers') } \ No newline at end of file diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java index d73342f6..e87352b0 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java @@ -25,6 +25,12 @@ public class PageRankDomainRanker implements RankingAlgorithm { this.graph = source.getGraph(); } + public static PageRankDomainRanker forDomainNames(GraphSource source, + List influenceSet) + { + return new PageRankDomainRanker(source, source.domainIds(influenceSet)); + } + @Override public T calculate(int resultCount, Supplier> accumulatorP) { VertexScoringAlgorithm pageRank; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/AbstractGraphSource.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/AbstractGraphSource.java index da9233c7..26b4f3d3 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/AbstractGraphSource.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/AbstractGraphSource.java @@ -4,8 +4,7 @@ import com.zaxxer.hikari.HikariDataSource; import org.jgrapht.Graph; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; public abstract class AbstractGraphSource implements GraphSource { protected final HikariDataSource dataSource; @@ -39,17 +38,23 @@ public abstract class AbstractGraphSource implements GraphSource { var stmt = conn.prepareStatement(""" SELECT ID FROM EC_DOMAIN - WHERE DOMAIN_NAME IN (?) + WHERE DOMAIN_NAME LIKE ? """)) { - stmt.setArray(1, conn.createArrayOf("VARCHAR", domainNameList.toArray())); - try (var rs = stmt.executeQuery()) { - var result = new ArrayList(); - while (rs.next()) { - result.add(rs.getInt(1)); + Set retSet = new HashSet<>(); + + for (String domainName : domainNameList) { + stmt.setString(1, domainName); + try (var rs = stmt.executeQuery()) { + while (rs.next()) { + retSet.add(rs.getInt(1)); + } } - return result; } + + var ret = new ArrayList<>(retSet); + ret.sort(Comparator.naturalOrder()); + return ret; } catch (SQLException ex) { throw new RuntimeException(ex); diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/GraphSource.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/GraphSource.java index 26554428..b6e1daeb 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/GraphSource.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/GraphSource.java @@ -11,5 +11,13 @@ public interface GraphSource { /** Construct the graph */ Graph getGraph(); + /** Return a list of domain ids for the given domain names. + * The function will also accept SQL-style wildcards, + * e.g. "%marginalia.nu" will match "marginalia.nu" and "memex.marginalia.nu". + *

+ * If multiple wildcards are provided, and overlapping domains are matched, + * they will be included only once. The returned list will be sorted in + * numerical order of the domain IDs. + */ List domainIds(List domainNameList); } diff --git a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmWithRealDataTest.java b/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmWithRealDataTest.java index cd80dede..88a8b5e8 100644 --- a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmWithRealDataTest.java +++ b/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmWithRealDataTest.java @@ -1,11 +1,14 @@ package nu.marginalia.ranking; import nu.marginalia.ranking.accumulator.RankingResultListAccumulator; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.util.List; -// Test the ranking algorithm with prod data. +// Test the ranking algorithm with prod data. Will not run if the data is not available. +// It's not feasible to include the data in the git repo, as it's ~6 GB of data. +@Disabled class RankingAlgorithmWithRealDataTest { @Test diff --git a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmsContainerTest.java b/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmsContainerTest.java new file mode 100644 index 00000000..7fdd2f82 --- /dev/null +++ b/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmsContainerTest.java @@ -0,0 +1,161 @@ +package nu.marginalia.ranking; + + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.query.client.QueryClient; +import nu.marginalia.ranking.data.InvertedLinkGraphSource; +import nu.marginalia.ranking.data.LinkGraphSource; +import nu.marginalia.ranking.data.SimilarityGraphSource; +import nu.marginalia.test.TestMigrationLoader; +import org.jgrapht.Graph; +import org.jgrapht.graph.DefaultWeightedEdge; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.mockito.Mockito; +import org.testcontainers.containers.MariaDBContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.sql.SQLException; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; +import static org.mockito.Mockito.when; + +@Tag("slow") +@Testcontainers +@Execution(SAME_THREAD) +public class RankingAlgorithmsContainerTest { + @Container + static MariaDBContainer mariaDBContainer = new MariaDBContainer<>("mariadb") + .withDatabaseName("WMSA_prod") + .withUsername("wmsa") + .withPassword("wmsa") + .withNetworkAliases("mariadb"); + + static HikariDataSource dataSource; + + QueryClient queryClient; + QueryClient.AllLinks allLinks; + @BeforeAll + public static void setup() { + HikariConfig config = new HikariConfig(); + config.setJdbcUrl(mariaDBContainer.getJdbcUrl()); + config.setUsername("wmsa"); + config.setPassword("wmsa"); + + dataSource = new HikariDataSource(config); + TestMigrationLoader.flywayMigration(dataSource); + + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement()) { + stmt.executeUpdate(""" + INSERT INTO EC_DOMAIN(DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) + VALUES ('memex.marginalia.nu', 'marginalia.nu', 1), + ('search.marginalia.nu', 'marginalia.nu', 1), + ('encyclopedia.marginalia.nu', 'marginalia.nu', 1), + ('marginalia.nu', 'marginalia.nu', 1); + """); + } catch (SQLException e) { + throw new RuntimeException(e); + } + + } + + @BeforeEach + public void setupQueryClient() { + queryClient = Mockito.mock(QueryClient.class); + allLinks = new QueryClient.AllLinks(); + when(queryClient.getAllDomainLinks()).thenReturn(allLinks); + + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement()) { + stmt.executeUpdate("TRUNCATE TABLE EC_DOMAIN_NEIGHBORS_2"); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + private void addSimilarity(int source, int dest, double similarity) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + INSERT INTO EC_DOMAIN_NEIGHBORS_2(DOMAIN_ID, NEIGHBOR_ID, RELATEDNESS) + VALUES (?, ?, ?) + """)) { + stmt.setInt(1, source); + stmt.setInt(2, dest); + stmt.setDouble(3, similarity); + stmt.executeUpdate(); + + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + @Test + public void testGetDomains() { + // should all be the same, doesn't matter which one we use + var source = new LinkGraphSource(dataSource, queryClient); + + Assertions.assertEquals(List.of(1), + source.domainIds(List.of("memex.marginalia.nu"))); + + // Verify globbing + Assertions.assertEquals(List.of(1,2,3), + source.domainIds(List.of("%.marginalia.nu"))); + } + + @Test + public void testLinkGraphSource() { + allLinks.add(1, 3); + + var graph = new LinkGraphSource(dataSource, queryClient).getGraph(); + + Assertions.assertTrue(graph.containsVertex(1)); + Assertions.assertTrue(graph.containsVertex(2)); + Assertions.assertTrue(graph.containsVertex(3)); + + Assertions.assertTrue(graph.containsEdge(1, 3)); + + Assertions.assertFalse(graph.containsEdge(3, 1)); + Assertions.assertFalse(graph.containsEdge(2, 3)); + Assertions.assertFalse(graph.containsEdge(3, 2)); + } + @Test + public void testInvertedLinkGraphSource() { + allLinks.add(1, 3); + + var graph = new InvertedLinkGraphSource(dataSource, queryClient).getGraph(); + + Assertions.assertTrue(graph.containsVertex(1)); + Assertions.assertTrue(graph.containsVertex(2)); + Assertions.assertTrue(graph.containsVertex(3)); + + Assertions.assertTrue(graph.containsEdge(3, 1)); + + Assertions.assertFalse(graph.containsEdge(1, 3)); + Assertions.assertFalse(graph.containsEdge(2, 3)); + Assertions.assertFalse(graph.containsEdge(3, 2)); + } + @Test + @SuppressWarnings("unchecked") + public void testSimilarityGraphSource() { + + addSimilarity(1, 3, 0.5); + + var graph = (Graph) new SimilarityGraphSource(dataSource).getGraph(); + + Assertions.assertTrue(graph.containsVertex(1)); + Assertions.assertTrue(graph.containsVertex(2)); + Assertions.assertTrue(graph.containsVertex(3)); + + Assertions.assertTrue(graph.containsEdge(3, 1)); + Assertions.assertTrue(graph.containsEdge(1, 3)); + Assertions.assertEquals(graph.getEdgeWeight(graph.getEdge(1, 3)), 0.5, 0.0001); + + Assertions.assertFalse(graph.containsEdge(1, 2)); + Assertions.assertFalse(graph.containsEdge(2, 3)); + } +} diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java index 73f19611..ca2c3bc7 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java @@ -15,15 +15,12 @@ import java.sql.SQLException; import java.util.Map; public class ControlDomainRankingSetsService { - private final HikariDataSource dataSource; private final ControlRendererFactory rendererFactory; private final DomainRankingSetsService domainRankingSetsService; @Inject - public ControlDomainRankingSetsService(HikariDataSource dataSource, - ControlRendererFactory rendererFactory, + public ControlDomainRankingSetsService(ControlRendererFactory rendererFactory, DomainRankingSetsService domainRankingSetsService) { - this.dataSource = dataSource; this.rendererFactory = rendererFactory; this.domainRankingSetsService = domainRankingSetsService; } @@ -47,7 +44,6 @@ public class ControlDomainRankingSetsService { domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( id, request.queryParams("description"), - DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")), Integer.parseInt(request.queryParams("depth")), request.queryParams("definition") )); @@ -77,7 +73,6 @@ public class ControlDomainRankingSetsService { domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( request.queryParams("name").toUpperCase(), request.queryParams("description"), - DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")), Integer.parseInt(request.queryParams("depth")), request.queryParams("definition") )); @@ -95,17 +90,6 @@ public class ControlDomainRankingSetsService { } private Object rankingSetModel(Request request, Response response) throws SQLException { var model = domainRankingSetsService.get(request.params("id")).orElseThrow(); - return Map.of("rankingSet", model, - "selectedAlgo", Map.of( - "special", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.SPECIAL, - "adjacency_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK, - "adjacency_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_PAGERANK, - "links_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_CHEIRANK, - "links_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK) - ); - - - - + return Map.of("rankingSet", model); } } diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb index ba395319..52c50091 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb @@ -16,14 +16,12 @@ Name Description - Algorithm Depth {{#each rankingSets}} {{name}} {{description}} - {{algorithm}} {{depth}} {{/each}} diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb index c8c7ae9d..0b784882 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb @@ -26,15 +26,6 @@ {{#if special}}{{/if}} -
The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY @@ -68,7 +59,8 @@
A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards. These are used as the origin point for the Personalized PageRank algorithm, and will be considered - the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper. + the central points of the link or adjacency graph. If no domains are specified, + the entire domain space is used, as per the PageRank paper.
diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java index 690a60bc..df597a4d 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java @@ -18,7 +18,6 @@ import nu.marginalia.ranking.data.GraphSource; import nu.marginalia.ranking.data.LinkGraphSource; import nu.marginalia.ranking.data.SimilarityGraphSource; import nu.marginalia.service.control.ServiceEventLog; -import nu.marginalia.service.control.ServiceHeartbeat; import nu.marginalia.service.module.ServiceConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,13 +121,13 @@ public class IndexSearchSetsService { } try { - if (DomainRankingSetsService.DomainSetAlgorithm.SPECIAL.equals(rankingSet.algorithm())) { + if (rankingSet.isSpecial()) { switch (rankingSet.name()) { case "BLOGS" -> recalculateBlogsSet(rankingSet); case "NONE" -> {} // No-op } } else { - recalculateNornal(rankingSet); + recalculateNormal(rankingSet); } } catch (Exception ex) { @@ -138,16 +137,17 @@ public class IndexSearchSetsService { } } - private void recalculateNornal(DomainRankingSetsService.DomainRankingSet rankingSet) { - String[] domains = rankingSet.domains(); + private void recalculateNormal(DomainRankingSetsService.DomainRankingSet rankingSet) { + List domains = List.of(rankingSet.domains()); - GraphSource graphSource = switch (rankingSet.algorithm()) { - case LINKS_PAGERANK, LINKS_CHEIRANK -> linksDomains; - case ADJACENCY_PAGERANK, ADJACENCY_CHEIRANK -> similarityDomains; - default -> throw new IllegalStateException("Unexpected value: " + rankingSet.algorithm()); - }; + GraphSource source; - var data = new PageRankDomainRanker(linksDomains, linksDomains.domainIds(List.of(domains))) + // Similarity ranking does not behave well with an empty set of domains + if (domains.isEmpty()) source = linksDomains; + else source = similarityDomains; + + var data = PageRankDomainRanker + .forDomainNames(source, domains) .calculate(rankingSet.depth(), RankingResultHashSetAccumulator::new); var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data); @@ -180,9 +180,21 @@ public class IndexSearchSetsService { } private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) { + List domains = List.of(rankingSet.domains()); - var ranks = new PageRankDomainRanker(similarityDomains, similarityDomains.domainIds(List.of(rankingSet.domains()))) - .calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth())); + final GraphSource source; + + if (domains.isEmpty()) { + // Similarity ranking does not behave well with an empty set of domains + source = linksDomains; + } + else { + source = similarityDomains; + } + + var ranks = PageRankDomainRanker + .forDomainNames(source, domains) + .calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth())); synchronized (this) { domainRankings = new DomainRankings(ranks);