(domain-ranking) Integrate new ranking logic

The change deprecates the 'algorithm' field from the domain ranking set configuration.  Instead, the algorithm will be chosen based on whether influence domains are provided, and whether similarity data is present.
This commit is contained in:
Viktor Lofgren 2024-02-16 20:22:01 +01:00
parent 64acdb5f2a
commit 9ec262ae00
13 changed files with 235 additions and 83 deletions

View File

@ -25,7 +25,7 @@ public class DomainRankingSetsService {
public Optional<DomainRankingSet> get(String name) throws SQLException { public Optional<DomainRankingSet> get(String name) throws SQLException {
try (var conn = dataSource.getConnection(); try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement(""" var stmt = conn.prepareStatement("""
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
FROM CONF_DOMAIN_RANKING_SET FROM CONF_DOMAIN_RANKING_SET
WHERE NAME = ? WHERE NAME = ?
""")) { """)) {
@ -39,7 +39,6 @@ public class DomainRankingSetsService {
return Optional.of(new DomainRankingSet( return Optional.of(new DomainRankingSet(
rs.getString("NAME"), rs.getString("NAME"),
rs.getString("DESCRIPTION"), rs.getString("DESCRIPTION"),
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
rs.getInt("DEPTH"), rs.getInt("DEPTH"),
rs.getString("DEFINITION") rs.getString("DEFINITION")
)); ));
@ -53,15 +52,14 @@ public class DomainRankingSetsService {
public void upsert(DomainRankingSet domainRankingSet) { public void upsert(DomainRankingSet domainRankingSet) {
try (var conn = dataSource.getConnection(); try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement(""" var stmt = conn.prepareStatement("""
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, DEPTH, DEFINITION)
VALUES (?, ?, ?, ?, ?) VALUES (?, ?, ?, ?)
""")) """))
{ {
stmt.setString(1, domainRankingSet.name()); stmt.setString(1, domainRankingSet.name());
stmt.setString(2, domainRankingSet.description()); stmt.setString(2, domainRankingSet.description());
stmt.setString(3, domainRankingSet.algorithm().name()); stmt.setInt(3, domainRankingSet.depth());
stmt.setInt(4, domainRankingSet.depth()); stmt.setString(4, domainRankingSet.definition());
stmt.setString(5, domainRankingSet.definition());
stmt.executeUpdate(); stmt.executeUpdate();
if (!conn.getAutoCommit()) if (!conn.getAutoCommit())
@ -94,7 +92,7 @@ public class DomainRankingSetsService {
try (var conn = dataSource.getConnection(); try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement(""" var stmt = conn.prepareStatement("""
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
FROM CONF_DOMAIN_RANKING_SET FROM CONF_DOMAIN_RANKING_SET
""")) { """)) {
var rs = stmt.executeQuery(); var rs = stmt.executeQuery();
@ -105,7 +103,6 @@ public class DomainRankingSetsService {
new DomainRankingSet( new DomainRankingSet(
rs.getString("NAME"), rs.getString("NAME"),
rs.getString("DESCRIPTION"), rs.getString("DESCRIPTION"),
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
rs.getInt("DEPTH"), rs.getInt("DEPTH"),
rs.getString("DEFINITION")) rs.getString("DEFINITION"))
); );
@ -118,31 +115,17 @@ public class DomainRankingSetsService {
} }
} }
public enum DomainSetAlgorithm {
/** Use link graph, do a pagerank */
LINKS_PAGERANK,
/** Use link graph, do a cheirank */
LINKS_CHEIRANK,
/** Use adjacency graph, do a pagerank */
ADJACENCY_PAGERANK,
/** Use adjacency graph, do a cheirank */
ADJACENCY_CHEIRANK,
/** For reserved names. Use special algorithm, function of name */
SPECIAL
};
/** Defines a domain ranking set, parameters for the ranking algorithms. /** Defines a domain ranking set, parameters for the ranking algorithms.
* *
* @param name Key and name of the set * @param name Key and name of the set
* @param description Human-readable description * @param description Human-readable description
* @param algorithm Algorithm to use
* @param depth Depth of the algorithm * @param depth Depth of the algorithm
* @param definition Definition of the set, typically a list of domains or globs for domain-names * @param definition Definition of the set, typically a list of domains or globs for domain-names
* */ * */
@With @With
public record DomainRankingSet(String name, public record DomainRankingSet(String name,
String description, String description,
DomainSetAlgorithm algorithm,
int depth, int depth,
String definition) String definition)
{ {
@ -159,7 +142,7 @@ public class DomainRankingSetsService {
} }
public boolean isSpecial() { public boolean isSpecial() {
return algorithm() == DomainSetAlgorithm.SPECIAL; return name().equals("BLOGS") || name().equals("NONE") || name().equals("RANK");
} }
} }

View File

@ -0,0 +1 @@
ALTER TABLE CONF_DOMAIN_RANKING_SET DROP COLUMN ALGORITHM;

View File

@ -56,14 +56,12 @@ class DomainRankingSetsServiceTest {
var newValue = new DomainRankingSetsService.DomainRankingSet( var newValue = new DomainRankingSetsService.DomainRankingSet(
"test", "test",
"Test domain set", "Test domain set",
DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
10, 10,
"test\\.nu" "test\\.nu"
); );
var newValue2 = new DomainRankingSetsService.DomainRankingSet( var newValue2 = new DomainRankingSetsService.DomainRankingSet(
"test2", "test2",
"Test domain set 2", "Test domain set 2",
DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK,
20, 20,
"test\\.nu 2" "test\\.nu 2"
); );

View File

@ -32,13 +32,14 @@ dependencies {
implementation libs.hll implementation libs.hll
testImplementation project(':code:libraries:array') testImplementation project(':code:libraries:array')
testImplementation libs.commons.lang3 testImplementation libs.commons.lang3
testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit testImplementation libs.bundles.junit
testImplementation libs.mockito testImplementation libs.mockito
}
test { testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
minHeapSize = "128m" // initial heap size testImplementation 'org.testcontainers:mariadb:1.17.4'
maxHeapSize = "20G" // maximum heap size testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
testImplementation project(':code:libraries:test-helpers')
} }

View File

@ -25,6 +25,12 @@ public class PageRankDomainRanker implements RankingAlgorithm {
this.graph = source.getGraph(); this.graph = source.getGraph();
} }
public static PageRankDomainRanker forDomainNames(GraphSource source,
List<String> influenceSet)
{
return new PageRankDomainRanker(source, source.domainIds(influenceSet));
}
@Override @Override
public <T> T calculate(int resultCount, Supplier<RankingResultAccumulator<T>> accumulatorP) { public <T> T calculate(int resultCount, Supplier<RankingResultAccumulator<T>> accumulatorP) {
VertexScoringAlgorithm<Integer, Double> pageRank; VertexScoringAlgorithm<Integer, Double> pageRank;

View File

@ -4,8 +4,7 @@ import com.zaxxer.hikari.HikariDataSource;
import org.jgrapht.Graph; import org.jgrapht.Graph;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList; import java.util.*;
import java.util.List;
public abstract class AbstractGraphSource implements GraphSource { public abstract class AbstractGraphSource implements GraphSource {
protected final HikariDataSource dataSource; protected final HikariDataSource dataSource;
@ -39,18 +38,24 @@ public abstract class AbstractGraphSource implements GraphSource {
var stmt = conn.prepareStatement(""" var stmt = conn.prepareStatement("""
SELECT ID SELECT ID
FROM EC_DOMAIN FROM EC_DOMAIN
WHERE DOMAIN_NAME IN (?) WHERE DOMAIN_NAME LIKE ?
""")) """))
{ {
stmt.setArray(1, conn.createArrayOf("VARCHAR", domainNameList.toArray())); Set<Integer> retSet = new HashSet<>();
for (String domainName : domainNameList) {
stmt.setString(1, domainName);
try (var rs = stmt.executeQuery()) { try (var rs = stmt.executeQuery()) {
var result = new ArrayList<Integer>();
while (rs.next()) { while (rs.next()) {
result.add(rs.getInt(1)); retSet.add(rs.getInt(1));
} }
return result;
} }
} }
var ret = new ArrayList<>(retSet);
ret.sort(Comparator.naturalOrder());
return ret;
}
catch (SQLException ex) { catch (SQLException ex) {
throw new RuntimeException(ex); throw new RuntimeException(ex);
} }

View File

@ -11,5 +11,13 @@ public interface GraphSource {
/** Construct the graph */ /** Construct the graph */
Graph<Integer, ?> getGraph(); Graph<Integer, ?> getGraph();
/** Return a list of domain ids for the given domain names.
* The function will also accept SQL-style wildcards,
* e.g. "%marginalia.nu" will match "marginalia.nu" and "memex.marginalia.nu".
* <p></p>
* If multiple wildcards are provided, and overlapping domains are matched,
* they will be included only once. The returned list will be sorted in
* numerical order of the domain IDs.
*/
List<Integer> domainIds(List<String> domainNameList); List<Integer> domainIds(List<String> domainNameList);
} }

View File

@ -1,11 +1,14 @@
package nu.marginalia.ranking; package nu.marginalia.ranking;
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator; import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.util.List; import java.util.List;
// Test the ranking algorithm with prod data. // Test the ranking algorithm with prod data. Will not run if the data is not available.
// It's not feasible to include the data in the git repo, as it's ~6 GB of data.
@Disabled
class RankingAlgorithmWithRealDataTest { class RankingAlgorithmWithRealDataTest {
@Test @Test

View File

@ -0,0 +1,161 @@
package nu.marginalia.ranking;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.ranking.data.InvertedLinkGraphSource;
import nu.marginalia.ranking.data.LinkGraphSource;
import nu.marginalia.ranking.data.SimilarityGraphSource;
import nu.marginalia.test.TestMigrationLoader;
import org.jgrapht.Graph;
import org.jgrapht.graph.DefaultWeightedEdge;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.parallel.Execution;
import org.mockito.Mockito;
import org.testcontainers.containers.MariaDBContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import java.sql.SQLException;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
import static org.mockito.Mockito.when;
@Tag("slow")
@Testcontainers
@Execution(SAME_THREAD)
public class RankingAlgorithmsContainerTest {
@Container
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withNetworkAliases("mariadb");
static HikariDataSource dataSource;
QueryClient queryClient;
QueryClient.AllLinks allLinks;
@BeforeAll
public static void setup() {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
config.setUsername("wmsa");
config.setPassword("wmsa");
dataSource = new HikariDataSource(config);
TestMigrationLoader.flywayMigration(dataSource);
try (var conn = dataSource.getConnection();
var stmt = conn.createStatement()) {
stmt.executeUpdate("""
INSERT INTO EC_DOMAIN(DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY)
VALUES ('memex.marginalia.nu', 'marginalia.nu', 1),
('search.marginalia.nu', 'marginalia.nu', 1),
('encyclopedia.marginalia.nu', 'marginalia.nu', 1),
('marginalia.nu', 'marginalia.nu', 1);
""");
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@BeforeEach
public void setupQueryClient() {
queryClient = Mockito.mock(QueryClient.class);
allLinks = new QueryClient.AllLinks();
when(queryClient.getAllDomainLinks()).thenReturn(allLinks);
try (var conn = dataSource.getConnection();
var stmt = conn.createStatement()) {
stmt.executeUpdate("TRUNCATE TABLE EC_DOMAIN_NEIGHBORS_2");
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
private void addSimilarity(int source, int dest, double similarity) {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
INSERT INTO EC_DOMAIN_NEIGHBORS_2(DOMAIN_ID, NEIGHBOR_ID, RELATEDNESS)
VALUES (?, ?, ?)
""")) {
stmt.setInt(1, source);
stmt.setInt(2, dest);
stmt.setDouble(3, similarity);
stmt.executeUpdate();
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@Test
public void testGetDomains() {
// should all be the same, doesn't matter which one we use
var source = new LinkGraphSource(dataSource, queryClient);
Assertions.assertEquals(List.of(1),
source.domainIds(List.of("memex.marginalia.nu")));
// Verify globbing
Assertions.assertEquals(List.of(1,2,3),
source.domainIds(List.of("%.marginalia.nu")));
}
@Test
public void testLinkGraphSource() {
allLinks.add(1, 3);
var graph = new LinkGraphSource(dataSource, queryClient).getGraph();
Assertions.assertTrue(graph.containsVertex(1));
Assertions.assertTrue(graph.containsVertex(2));
Assertions.assertTrue(graph.containsVertex(3));
Assertions.assertTrue(graph.containsEdge(1, 3));
Assertions.assertFalse(graph.containsEdge(3, 1));
Assertions.assertFalse(graph.containsEdge(2, 3));
Assertions.assertFalse(graph.containsEdge(3, 2));
}
@Test
public void testInvertedLinkGraphSource() {
allLinks.add(1, 3);
var graph = new InvertedLinkGraphSource(dataSource, queryClient).getGraph();
Assertions.assertTrue(graph.containsVertex(1));
Assertions.assertTrue(graph.containsVertex(2));
Assertions.assertTrue(graph.containsVertex(3));
Assertions.assertTrue(graph.containsEdge(3, 1));
Assertions.assertFalse(graph.containsEdge(1, 3));
Assertions.assertFalse(graph.containsEdge(2, 3));
Assertions.assertFalse(graph.containsEdge(3, 2));
}
@Test
@SuppressWarnings("unchecked")
public void testSimilarityGraphSource() {
addSimilarity(1, 3, 0.5);
var graph = (Graph<Integer, DefaultWeightedEdge>) new SimilarityGraphSource(dataSource).getGraph();
Assertions.assertTrue(graph.containsVertex(1));
Assertions.assertTrue(graph.containsVertex(2));
Assertions.assertTrue(graph.containsVertex(3));
Assertions.assertTrue(graph.containsEdge(3, 1));
Assertions.assertTrue(graph.containsEdge(1, 3));
Assertions.assertEquals(graph.getEdgeWeight(graph.getEdge(1, 3)), 0.5, 0.0001);
Assertions.assertFalse(graph.containsEdge(1, 2));
Assertions.assertFalse(graph.containsEdge(2, 3));
}
}

View File

@ -15,15 +15,12 @@ import java.sql.SQLException;
import java.util.Map; import java.util.Map;
public class ControlDomainRankingSetsService { public class ControlDomainRankingSetsService {
private final HikariDataSource dataSource;
private final ControlRendererFactory rendererFactory; private final ControlRendererFactory rendererFactory;
private final DomainRankingSetsService domainRankingSetsService; private final DomainRankingSetsService domainRankingSetsService;
@Inject @Inject
public ControlDomainRankingSetsService(HikariDataSource dataSource, public ControlDomainRankingSetsService(ControlRendererFactory rendererFactory,
ControlRendererFactory rendererFactory,
DomainRankingSetsService domainRankingSetsService) { DomainRankingSetsService domainRankingSetsService) {
this.dataSource = dataSource;
this.rendererFactory = rendererFactory; this.rendererFactory = rendererFactory;
this.domainRankingSetsService = domainRankingSetsService; this.domainRankingSetsService = domainRankingSetsService;
} }
@ -47,7 +44,6 @@ public class ControlDomainRankingSetsService {
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
id, id,
request.queryParams("description"), request.queryParams("description"),
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
Integer.parseInt(request.queryParams("depth")), Integer.parseInt(request.queryParams("depth")),
request.queryParams("definition") request.queryParams("definition")
)); ));
@ -77,7 +73,6 @@ public class ControlDomainRankingSetsService {
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
request.queryParams("name").toUpperCase(), request.queryParams("name").toUpperCase(),
request.queryParams("description"), request.queryParams("description"),
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
Integer.parseInt(request.queryParams("depth")), Integer.parseInt(request.queryParams("depth")),
request.queryParams("definition") request.queryParams("definition")
)); ));
@ -95,17 +90,6 @@ public class ControlDomainRankingSetsService {
} }
private Object rankingSetModel(Request request, Response response) throws SQLException { private Object rankingSetModel(Request request, Response response) throws SQLException {
var model = domainRankingSetsService.get(request.params("id")).orElseThrow(); var model = domainRankingSetsService.get(request.params("id")).orElseThrow();
return Map.of("rankingSet", model, return Map.of("rankingSet", model);
"selectedAlgo", Map.of(
"special", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.SPECIAL,
"adjacency_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
"adjacency_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_PAGERANK,
"links_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_CHEIRANK,
"links_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK)
);
} }
} }

View File

@ -16,14 +16,12 @@
<tr> <tr>
<th>Name</th> <th>Name</th>
<th>Description</th> <th>Description</th>
<th>Algorithm</th>
<th>Depth</th> <th>Depth</th>
</tr> </tr>
{{#each rankingSets}} {{#each rankingSets}}
<tr> <tr>
<td><a href="/domain-ranking-sets/{{name}}">{{name}}</td></td> <td><a href="/domain-ranking-sets/{{name}}">{{name}}</td></td>
<td>{{description}}</td> <td>{{description}}</td>
<td>{{algorithm}}</td>
<td>{{depth}}</td> <td>{{depth}}</td>
</tr> </tr>
{{/each}} {{/each}}

View File

@ -26,15 +26,6 @@
<th><label for="algorithm">Algorithm</label></th> <th><label for="algorithm">Algorithm</label></th>
<td> <td>
{{#if special}}<input type="hidden" name="algorithm" value="{{algorithm}}" />{{/if}} {{#if special}}<input type="hidden" name="algorithm" value="{{algorithm}}" />{{/if}}
<select id="algorithm" name="algorithm" {{#if special}}disabled{{/if}}>
{{#with algorithm}}
<option value="SPECIAL" disabled {{#if selectedAlgo.special}}selected{{/if}}>SPECIAL</option>
<option value="LINKS_PAGERANK" {{#if selectedAlgo.links_pagerank}}selected{{/if}}>LINKS_PAGERANK</option>
<option value="LINKS_CHEIRANK" {{#if selectedAlgo.links_cheirank}}selected{{/if}}>LINKS_CHEIRANK</option>
<option value="ADJACENCY_PAGERANK" {{#if selectedAlgo.adjacency_pagerank}}selected{{/if}}>ADJACENCY_PAGERANK</option>
<option value="ADJACENCY_CHEIRANK" {{#if selectedAlgo.adjacency_cheirank}}selected{{/if}}>ADJACENCY_CHEIRANK</option>
{{/with}}
</select>
<div> <div>
<small class="text-muted"> <small class="text-muted">
The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY
@ -68,7 +59,8 @@
<div> <div>
<small class="text-muted">A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards. <small class="text-muted">A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards.
These are used as the origin point for the Personalized PageRank algorithm, and will be considered These are used as the origin point for the Personalized PageRank algorithm, and will be considered
the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper. the central points of the link or adjacency graph. If no domains are specified,
the entire domain space is used, as per the PageRank paper.
</small> </small>
</div> </div>
</td></tr> </td></tr>

View File

@ -18,7 +18,6 @@ import nu.marginalia.ranking.data.GraphSource;
import nu.marginalia.ranking.data.LinkGraphSource; import nu.marginalia.ranking.data.LinkGraphSource;
import nu.marginalia.ranking.data.SimilarityGraphSource; import nu.marginalia.ranking.data.SimilarityGraphSource;
import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.control.ServiceHeartbeat;
import nu.marginalia.service.module.ServiceConfiguration; import nu.marginalia.service.module.ServiceConfiguration;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -122,13 +121,13 @@ public class IndexSearchSetsService {
} }
try { try {
if (DomainRankingSetsService.DomainSetAlgorithm.SPECIAL.equals(rankingSet.algorithm())) { if (rankingSet.isSpecial()) {
switch (rankingSet.name()) { switch (rankingSet.name()) {
case "BLOGS" -> recalculateBlogsSet(rankingSet); case "BLOGS" -> recalculateBlogsSet(rankingSet);
case "NONE" -> {} // No-op case "NONE" -> {} // No-op
} }
} else { } else {
recalculateNornal(rankingSet); recalculateNormal(rankingSet);
} }
} }
catch (Exception ex) { catch (Exception ex) {
@ -138,16 +137,17 @@ public class IndexSearchSetsService {
} }
} }
private void recalculateNornal(DomainRankingSetsService.DomainRankingSet rankingSet) { private void recalculateNormal(DomainRankingSetsService.DomainRankingSet rankingSet) {
String[] domains = rankingSet.domains(); List<String> domains = List.of(rankingSet.domains());
GraphSource graphSource = switch (rankingSet.algorithm()) { GraphSource source;
case LINKS_PAGERANK, LINKS_CHEIRANK -> linksDomains;
case ADJACENCY_PAGERANK, ADJACENCY_CHEIRANK -> similarityDomains;
default -> throw new IllegalStateException("Unexpected value: " + rankingSet.algorithm());
};
var data = new PageRankDomainRanker(linksDomains, linksDomains.domainIds(List.of(domains))) // Similarity ranking does not behave well with an empty set of domains
if (domains.isEmpty()) source = linksDomains;
else source = similarityDomains;
var data = PageRankDomainRanker
.forDomainNames(source, domains)
.calculate(rankingSet.depth(), RankingResultHashSetAccumulator::new); .calculate(rankingSet.depth(), RankingResultHashSetAccumulator::new);
var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data); var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data);
@ -180,8 +180,20 @@ public class IndexSearchSetsService {
} }
private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) { private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) {
List<String> domains = List.of(rankingSet.domains());
var ranks = new PageRankDomainRanker(similarityDomains, similarityDomains.domainIds(List.of(rankingSet.domains()))) final GraphSource source;
if (domains.isEmpty()) {
// Similarity ranking does not behave well with an empty set of domains
source = linksDomains;
}
else {
source = similarityDomains;
}
var ranks = PageRankDomainRanker
.forDomainNames(source, domains)
.calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth())); .calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth()));
synchronized (this) { synchronized (this) {