(domain-ranking) Integrate new ranking logic
The change deprecates the 'algorithm' field from the domain ranking set configuration. Instead, the algorithm will be chosen based on whether influence domains are provided, and whether similarity data is present.
This commit is contained in:
parent
64acdb5f2a
commit
9ec262ae00
13 changed files with 235 additions and 83 deletions
|
@ -25,7 +25,7 @@ public class DomainRankingSetsService {
|
|||
public Optional<DomainRankingSet> get(String name) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
||||
SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
|
||||
FROM CONF_DOMAIN_RANKING_SET
|
||||
WHERE NAME = ?
|
||||
""")) {
|
||||
|
@ -39,7 +39,6 @@ public class DomainRankingSetsService {
|
|||
return Optional.of(new DomainRankingSet(
|
||||
rs.getString("NAME"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
||||
rs.getInt("DEPTH"),
|
||||
rs.getString("DEFINITION")
|
||||
));
|
||||
|
@ -53,15 +52,14 @@ public class DomainRankingSetsService {
|
|||
public void upsert(DomainRankingSet domainRankingSet) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, DEPTH, DEFINITION)
|
||||
VALUES (?, ?, ?, ?)
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, domainRankingSet.name());
|
||||
stmt.setString(2, domainRankingSet.description());
|
||||
stmt.setString(3, domainRankingSet.algorithm().name());
|
||||
stmt.setInt(4, domainRankingSet.depth());
|
||||
stmt.setString(5, domainRankingSet.definition());
|
||||
stmt.setInt(3, domainRankingSet.depth());
|
||||
stmt.setString(4, domainRankingSet.definition());
|
||||
stmt.executeUpdate();
|
||||
|
||||
if (!conn.getAutoCommit())
|
||||
|
@ -94,7 +92,7 @@ public class DomainRankingSetsService {
|
|||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
||||
SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
|
||||
FROM CONF_DOMAIN_RANKING_SET
|
||||
""")) {
|
||||
var rs = stmt.executeQuery();
|
||||
|
@ -105,7 +103,6 @@ public class DomainRankingSetsService {
|
|||
new DomainRankingSet(
|
||||
rs.getString("NAME"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
||||
rs.getInt("DEPTH"),
|
||||
rs.getString("DEFINITION"))
|
||||
);
|
||||
|
@ -118,31 +115,17 @@ public class DomainRankingSetsService {
|
|||
}
|
||||
}
|
||||
|
||||
public enum DomainSetAlgorithm {
|
||||
/** Use link graph, do a pagerank */
|
||||
LINKS_PAGERANK,
|
||||
/** Use link graph, do a cheirank */
|
||||
LINKS_CHEIRANK,
|
||||
/** Use adjacency graph, do a pagerank */
|
||||
ADJACENCY_PAGERANK,
|
||||
/** Use adjacency graph, do a cheirank */
|
||||
ADJACENCY_CHEIRANK,
|
||||
/** For reserved names. Use special algorithm, function of name */
|
||||
SPECIAL
|
||||
};
|
||||
|
||||
/** Defines a domain ranking set, parameters for the ranking algorithms.
|
||||
*
|
||||
* @param name Key and name of the set
|
||||
* @param description Human-readable description
|
||||
* @param algorithm Algorithm to use
|
||||
* @param depth Depth of the algorithm
|
||||
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
||||
* */
|
||||
@With
|
||||
public record DomainRankingSet(String name,
|
||||
String description,
|
||||
DomainSetAlgorithm algorithm,
|
||||
int depth,
|
||||
String definition)
|
||||
{
|
||||
|
@ -159,7 +142,7 @@ public class DomainRankingSetsService {
|
|||
}
|
||||
|
||||
public boolean isSpecial() {
|
||||
return algorithm() == DomainSetAlgorithm.SPECIAL;
|
||||
return name().equals("BLOGS") || name().equals("NONE") || name().equals("RANK");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
ALTER TABLE CONF_DOMAIN_RANKING_SET DROP COLUMN ALGORITHM;
|
|
@ -56,14 +56,12 @@ class DomainRankingSetsServiceTest {
|
|||
var newValue = new DomainRankingSetsService.DomainRankingSet(
|
||||
"test",
|
||||
"Test domain set",
|
||||
DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
|
||||
10,
|
||||
"test\\.nu"
|
||||
);
|
||||
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
|
||||
"test2",
|
||||
"Test domain set 2",
|
||||
DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK,
|
||||
20,
|
||||
"test\\.nu 2"
|
||||
);
|
||||
|
|
|
@ -32,13 +32,14 @@ dependencies {
|
|||
implementation libs.hll
|
||||
|
||||
testImplementation project(':code:libraries:array')
|
||||
|
||||
testImplementation libs.commons.lang3
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
||||
test {
|
||||
minHeapSize = "128m" // initial heap size
|
||||
maxHeapSize = "20G" // maximum heap size
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
}
|
|
@ -25,6 +25,12 @@ public class PageRankDomainRanker implements RankingAlgorithm {
|
|||
this.graph = source.getGraph();
|
||||
}
|
||||
|
||||
public static PageRankDomainRanker forDomainNames(GraphSource source,
|
||||
List<String> influenceSet)
|
||||
{
|
||||
return new PageRankDomainRanker(source, source.domainIds(influenceSet));
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T calculate(int resultCount, Supplier<RankingResultAccumulator<T>> accumulatorP) {
|
||||
VertexScoringAlgorithm<Integer, Double> pageRank;
|
||||
|
|
|
@ -4,8 +4,7 @@ import com.zaxxer.hikari.HikariDataSource;
|
|||
import org.jgrapht.Graph;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
public abstract class AbstractGraphSource implements GraphSource {
|
||||
protected final HikariDataSource dataSource;
|
||||
|
@ -39,17 +38,23 @@ public abstract class AbstractGraphSource implements GraphSource {
|
|||
var stmt = conn.prepareStatement("""
|
||||
SELECT ID
|
||||
FROM EC_DOMAIN
|
||||
WHERE DOMAIN_NAME IN (?)
|
||||
WHERE DOMAIN_NAME LIKE ?
|
||||
"""))
|
||||
{
|
||||
stmt.setArray(1, conn.createArrayOf("VARCHAR", domainNameList.toArray()));
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
var result = new ArrayList<Integer>();
|
||||
while (rs.next()) {
|
||||
result.add(rs.getInt(1));
|
||||
Set<Integer> retSet = new HashSet<>();
|
||||
|
||||
for (String domainName : domainNameList) {
|
||||
stmt.setString(1, domainName);
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
while (rs.next()) {
|
||||
retSet.add(rs.getInt(1));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
var ret = new ArrayList<>(retSet);
|
||||
ret.sort(Comparator.naturalOrder());
|
||||
return ret;
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
|
|
|
@ -11,5 +11,13 @@ public interface GraphSource {
|
|||
/** Construct the graph */
|
||||
Graph<Integer, ?> getGraph();
|
||||
|
||||
/** Return a list of domain ids for the given domain names.
|
||||
* The function will also accept SQL-style wildcards,
|
||||
* e.g. "%marginalia.nu" will match "marginalia.nu" and "memex.marginalia.nu".
|
||||
* <p></p>
|
||||
* If multiple wildcards are provided, and overlapping domains are matched,
|
||||
* they will be included only once. The returned list will be sorted in
|
||||
* numerical order of the domain IDs.
|
||||
*/
|
||||
List<Integer> domainIds(List<String> domainNameList);
|
||||
}
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
package nu.marginalia.ranking;
|
||||
|
||||
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// Test the ranking algorithm with prod data.
|
||||
// Test the ranking algorithm with prod data. Will not run if the data is not available.
|
||||
// It's not feasible to include the data in the git repo, as it's ~6 GB of data.
|
||||
@Disabled
|
||||
class RankingAlgorithmWithRealDataTest {
|
||||
|
||||
@Test
|
||||
|
|
|
@ -0,0 +1,161 @@
|
|||
package nu.marginalia.ranking;
|
||||
|
||||
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.ranking.data.InvertedLinkGraphSource;
|
||||
import nu.marginalia.ranking.data.LinkGraphSource;
|
||||
import nu.marginalia.ranking.data.SimilarityGraphSource;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.jgrapht.Graph;
|
||||
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.mockito.Mockito;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@Tag("slow")
|
||||
@Testcontainers
|
||||
@Execution(SAME_THREAD)
|
||||
public class RankingAlgorithmsContainerTest {
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
|
||||
QueryClient queryClient;
|
||||
QueryClient.AllLinks allLinks;
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate("""
|
||||
INSERT INTO EC_DOMAIN(DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY)
|
||||
VALUES ('memex.marginalia.nu', 'marginalia.nu', 1),
|
||||
('search.marginalia.nu', 'marginalia.nu', 1),
|
||||
('encyclopedia.marginalia.nu', 'marginalia.nu', 1),
|
||||
('marginalia.nu', 'marginalia.nu', 1);
|
||||
""");
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
public void setupQueryClient() {
|
||||
queryClient = Mockito.mock(QueryClient.class);
|
||||
allLinks = new QueryClient.AllLinks();
|
||||
when(queryClient.getAllDomainLinks()).thenReturn(allLinks);
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate("TRUNCATE TABLE EC_DOMAIN_NEIGHBORS_2");
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void addSimilarity(int source, int dest, double similarity) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
INSERT INTO EC_DOMAIN_NEIGHBORS_2(DOMAIN_ID, NEIGHBOR_ID, RELATEDNESS)
|
||||
VALUES (?, ?, ?)
|
||||
""")) {
|
||||
stmt.setInt(1, source);
|
||||
stmt.setInt(2, dest);
|
||||
stmt.setDouble(3, similarity);
|
||||
stmt.executeUpdate();
|
||||
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetDomains() {
|
||||
// should all be the same, doesn't matter which one we use
|
||||
var source = new LinkGraphSource(dataSource, queryClient);
|
||||
|
||||
Assertions.assertEquals(List.of(1),
|
||||
source.domainIds(List.of("memex.marginalia.nu")));
|
||||
|
||||
// Verify globbing
|
||||
Assertions.assertEquals(List.of(1,2,3),
|
||||
source.domainIds(List.of("%.marginalia.nu")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLinkGraphSource() {
|
||||
allLinks.add(1, 3);
|
||||
|
||||
var graph = new LinkGraphSource(dataSource, queryClient).getGraph();
|
||||
|
||||
Assertions.assertTrue(graph.containsVertex(1));
|
||||
Assertions.assertTrue(graph.containsVertex(2));
|
||||
Assertions.assertTrue(graph.containsVertex(3));
|
||||
|
||||
Assertions.assertTrue(graph.containsEdge(1, 3));
|
||||
|
||||
Assertions.assertFalse(graph.containsEdge(3, 1));
|
||||
Assertions.assertFalse(graph.containsEdge(2, 3));
|
||||
Assertions.assertFalse(graph.containsEdge(3, 2));
|
||||
}
|
||||
@Test
|
||||
public void testInvertedLinkGraphSource() {
|
||||
allLinks.add(1, 3);
|
||||
|
||||
var graph = new InvertedLinkGraphSource(dataSource, queryClient).getGraph();
|
||||
|
||||
Assertions.assertTrue(graph.containsVertex(1));
|
||||
Assertions.assertTrue(graph.containsVertex(2));
|
||||
Assertions.assertTrue(graph.containsVertex(3));
|
||||
|
||||
Assertions.assertTrue(graph.containsEdge(3, 1));
|
||||
|
||||
Assertions.assertFalse(graph.containsEdge(1, 3));
|
||||
Assertions.assertFalse(graph.containsEdge(2, 3));
|
||||
Assertions.assertFalse(graph.containsEdge(3, 2));
|
||||
}
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testSimilarityGraphSource() {
|
||||
|
||||
addSimilarity(1, 3, 0.5);
|
||||
|
||||
var graph = (Graph<Integer, DefaultWeightedEdge>) new SimilarityGraphSource(dataSource).getGraph();
|
||||
|
||||
Assertions.assertTrue(graph.containsVertex(1));
|
||||
Assertions.assertTrue(graph.containsVertex(2));
|
||||
Assertions.assertTrue(graph.containsVertex(3));
|
||||
|
||||
Assertions.assertTrue(graph.containsEdge(3, 1));
|
||||
Assertions.assertTrue(graph.containsEdge(1, 3));
|
||||
Assertions.assertEquals(graph.getEdgeWeight(graph.getEdge(1, 3)), 0.5, 0.0001);
|
||||
|
||||
Assertions.assertFalse(graph.containsEdge(1, 2));
|
||||
Assertions.assertFalse(graph.containsEdge(2, 3));
|
||||
}
|
||||
}
|
|
@ -15,15 +15,12 @@ import java.sql.SQLException;
|
|||
import java.util.Map;
|
||||
|
||||
public class ControlDomainRankingSetsService {
|
||||
private final HikariDataSource dataSource;
|
||||
private final ControlRendererFactory rendererFactory;
|
||||
private final DomainRankingSetsService domainRankingSetsService;
|
||||
|
||||
@Inject
|
||||
public ControlDomainRankingSetsService(HikariDataSource dataSource,
|
||||
ControlRendererFactory rendererFactory,
|
||||
public ControlDomainRankingSetsService(ControlRendererFactory rendererFactory,
|
||||
DomainRankingSetsService domainRankingSetsService) {
|
||||
this.dataSource = dataSource;
|
||||
this.rendererFactory = rendererFactory;
|
||||
this.domainRankingSetsService = domainRankingSetsService;
|
||||
}
|
||||
|
@ -47,7 +44,6 @@ public class ControlDomainRankingSetsService {
|
|||
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
|
||||
id,
|
||||
request.queryParams("description"),
|
||||
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
|
||||
Integer.parseInt(request.queryParams("depth")),
|
||||
request.queryParams("definition")
|
||||
));
|
||||
|
@ -77,7 +73,6 @@ public class ControlDomainRankingSetsService {
|
|||
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
|
||||
request.queryParams("name").toUpperCase(),
|
||||
request.queryParams("description"),
|
||||
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
|
||||
Integer.parseInt(request.queryParams("depth")),
|
||||
request.queryParams("definition")
|
||||
));
|
||||
|
@ -95,17 +90,6 @@ public class ControlDomainRankingSetsService {
|
|||
}
|
||||
private Object rankingSetModel(Request request, Response response) throws SQLException {
|
||||
var model = domainRankingSetsService.get(request.params("id")).orElseThrow();
|
||||
return Map.of("rankingSet", model,
|
||||
"selectedAlgo", Map.of(
|
||||
"special", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.SPECIAL,
|
||||
"adjacency_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
|
||||
"adjacency_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_PAGERANK,
|
||||
"links_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_CHEIRANK,
|
||||
"links_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK)
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
return Map.of("rankingSet", model);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,14 +16,12 @@
|
|||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
<th>Algorithm</th>
|
||||
<th>Depth</th>
|
||||
</tr>
|
||||
{{#each rankingSets}}
|
||||
<tr>
|
||||
<td><a href="/domain-ranking-sets/{{name}}">{{name}}</td></td>
|
||||
<td>{{description}}</td>
|
||||
<td>{{algorithm}}</td>
|
||||
<td>{{depth}}</td>
|
||||
</tr>
|
||||
{{/each}}
|
||||
|
|
|
@ -26,15 +26,6 @@
|
|||
<th><label for="algorithm">Algorithm</label></th>
|
||||
<td>
|
||||
{{#if special}}<input type="hidden" name="algorithm" value="{{algorithm}}" />{{/if}}
|
||||
<select id="algorithm" name="algorithm" {{#if special}}disabled{{/if}}>
|
||||
{{#with algorithm}}
|
||||
<option value="SPECIAL" disabled {{#if selectedAlgo.special}}selected{{/if}}>SPECIAL</option>
|
||||
<option value="LINKS_PAGERANK" {{#if selectedAlgo.links_pagerank}}selected{{/if}}>LINKS_PAGERANK</option>
|
||||
<option value="LINKS_CHEIRANK" {{#if selectedAlgo.links_cheirank}}selected{{/if}}>LINKS_CHEIRANK</option>
|
||||
<option value="ADJACENCY_PAGERANK" {{#if selectedAlgo.adjacency_pagerank}}selected{{/if}}>ADJACENCY_PAGERANK</option>
|
||||
<option value="ADJACENCY_CHEIRANK" {{#if selectedAlgo.adjacency_cheirank}}selected{{/if}}>ADJACENCY_CHEIRANK</option>
|
||||
{{/with}}
|
||||
</select>
|
||||
<div>
|
||||
<small class="text-muted">
|
||||
The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY
|
||||
|
@ -68,7 +59,8 @@
|
|||
<div>
|
||||
<small class="text-muted">A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards.
|
||||
These are used as the origin point for the Personalized PageRank algorithm, and will be considered
|
||||
the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper.
|
||||
the central points of the link or adjacency graph. If no domains are specified,
|
||||
the entire domain space is used, as per the PageRank paper.
|
||||
</small>
|
||||
</div>
|
||||
</td></tr>
|
||||
|
|
|
@ -18,7 +18,6 @@ import nu.marginalia.ranking.data.GraphSource;
|
|||
import nu.marginalia.ranking.data.LinkGraphSource;
|
||||
import nu.marginalia.ranking.data.SimilarityGraphSource;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -122,13 +121,13 @@ public class IndexSearchSetsService {
|
|||
}
|
||||
|
||||
try {
|
||||
if (DomainRankingSetsService.DomainSetAlgorithm.SPECIAL.equals(rankingSet.algorithm())) {
|
||||
if (rankingSet.isSpecial()) {
|
||||
switch (rankingSet.name()) {
|
||||
case "BLOGS" -> recalculateBlogsSet(rankingSet);
|
||||
case "NONE" -> {} // No-op
|
||||
}
|
||||
} else {
|
||||
recalculateNornal(rankingSet);
|
||||
recalculateNormal(rankingSet);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
|
@ -138,16 +137,17 @@ public class IndexSearchSetsService {
|
|||
}
|
||||
}
|
||||
|
||||
private void recalculateNornal(DomainRankingSetsService.DomainRankingSet rankingSet) {
|
||||
String[] domains = rankingSet.domains();
|
||||
private void recalculateNormal(DomainRankingSetsService.DomainRankingSet rankingSet) {
|
||||
List<String> domains = List.of(rankingSet.domains());
|
||||
|
||||
GraphSource graphSource = switch (rankingSet.algorithm()) {
|
||||
case LINKS_PAGERANK, LINKS_CHEIRANK -> linksDomains;
|
||||
case ADJACENCY_PAGERANK, ADJACENCY_CHEIRANK -> similarityDomains;
|
||||
default -> throw new IllegalStateException("Unexpected value: " + rankingSet.algorithm());
|
||||
};
|
||||
GraphSource source;
|
||||
|
||||
var data = new PageRankDomainRanker(linksDomains, linksDomains.domainIds(List.of(domains)))
|
||||
// Similarity ranking does not behave well with an empty set of domains
|
||||
if (domains.isEmpty()) source = linksDomains;
|
||||
else source = similarityDomains;
|
||||
|
||||
var data = PageRankDomainRanker
|
||||
.forDomainNames(source, domains)
|
||||
.calculate(rankingSet.depth(), RankingResultHashSetAccumulator::new);
|
||||
|
||||
var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data);
|
||||
|
@ -180,9 +180,21 @@ public class IndexSearchSetsService {
|
|||
}
|
||||
|
||||
private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) {
|
||||
List<String> domains = List.of(rankingSet.domains());
|
||||
|
||||
var ranks = new PageRankDomainRanker(similarityDomains, similarityDomains.domainIds(List.of(rankingSet.domains())))
|
||||
.calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth()));
|
||||
final GraphSource source;
|
||||
|
||||
if (domains.isEmpty()) {
|
||||
// Similarity ranking does not behave well with an empty set of domains
|
||||
source = linksDomains;
|
||||
}
|
||||
else {
|
||||
source = similarityDomains;
|
||||
}
|
||||
|
||||
var ranks = PageRankDomainRanker
|
||||
.forDomainNames(source, domains)
|
||||
.calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth()));
|
||||
|
||||
synchronized (this) {
|
||||
domainRankings = new DomainRankings(ranks);
|
||||
|
|
Loading…
Reference in a new issue