(domain-ranking) Integrate new ranking logic
The change deprecates the 'algorithm' field from the domain ranking set configuration. Instead, the algorithm will be chosen based on whether influence domains are provided, and whether similarity data is present.
This commit is contained in:
parent
64acdb5f2a
commit
9ec262ae00
@ -25,7 +25,7 @@ public class DomainRankingSetsService {
|
|||||||
public Optional<DomainRankingSet> get(String name) throws SQLException {
|
public Optional<DomainRankingSet> get(String name) throws SQLException {
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
|
||||||
FROM CONF_DOMAIN_RANKING_SET
|
FROM CONF_DOMAIN_RANKING_SET
|
||||||
WHERE NAME = ?
|
WHERE NAME = ?
|
||||||
""")) {
|
""")) {
|
||||||
@ -39,7 +39,6 @@ public class DomainRankingSetsService {
|
|||||||
return Optional.of(new DomainRankingSet(
|
return Optional.of(new DomainRankingSet(
|
||||||
rs.getString("NAME"),
|
rs.getString("NAME"),
|
||||||
rs.getString("DESCRIPTION"),
|
rs.getString("DESCRIPTION"),
|
||||||
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
|
||||||
rs.getInt("DEPTH"),
|
rs.getInt("DEPTH"),
|
||||||
rs.getString("DEFINITION")
|
rs.getString("DEFINITION")
|
||||||
));
|
));
|
||||||
@ -53,15 +52,14 @@ public class DomainRankingSetsService {
|
|||||||
public void upsert(DomainRankingSet domainRankingSet) {
|
public void upsert(DomainRankingSet domainRankingSet) {
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION)
|
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, DEPTH, DEFINITION)
|
||||||
VALUES (?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?)
|
||||||
"""))
|
"""))
|
||||||
{
|
{
|
||||||
stmt.setString(1, domainRankingSet.name());
|
stmt.setString(1, domainRankingSet.name());
|
||||||
stmt.setString(2, domainRankingSet.description());
|
stmt.setString(2, domainRankingSet.description());
|
||||||
stmt.setString(3, domainRankingSet.algorithm().name());
|
stmt.setInt(3, domainRankingSet.depth());
|
||||||
stmt.setInt(4, domainRankingSet.depth());
|
stmt.setString(4, domainRankingSet.definition());
|
||||||
stmt.setString(5, domainRankingSet.definition());
|
|
||||||
stmt.executeUpdate();
|
stmt.executeUpdate();
|
||||||
|
|
||||||
if (!conn.getAutoCommit())
|
if (!conn.getAutoCommit())
|
||||||
@ -94,7 +92,7 @@ public class DomainRankingSetsService {
|
|||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
|
||||||
FROM CONF_DOMAIN_RANKING_SET
|
FROM CONF_DOMAIN_RANKING_SET
|
||||||
""")) {
|
""")) {
|
||||||
var rs = stmt.executeQuery();
|
var rs = stmt.executeQuery();
|
||||||
@ -105,7 +103,6 @@ public class DomainRankingSetsService {
|
|||||||
new DomainRankingSet(
|
new DomainRankingSet(
|
||||||
rs.getString("NAME"),
|
rs.getString("NAME"),
|
||||||
rs.getString("DESCRIPTION"),
|
rs.getString("DESCRIPTION"),
|
||||||
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
|
||||||
rs.getInt("DEPTH"),
|
rs.getInt("DEPTH"),
|
||||||
rs.getString("DEFINITION"))
|
rs.getString("DEFINITION"))
|
||||||
);
|
);
|
||||||
@ -118,31 +115,17 @@ public class DomainRankingSetsService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public enum DomainSetAlgorithm {
|
|
||||||
/** Use link graph, do a pagerank */
|
|
||||||
LINKS_PAGERANK,
|
|
||||||
/** Use link graph, do a cheirank */
|
|
||||||
LINKS_CHEIRANK,
|
|
||||||
/** Use adjacency graph, do a pagerank */
|
|
||||||
ADJACENCY_PAGERANK,
|
|
||||||
/** Use adjacency graph, do a cheirank */
|
|
||||||
ADJACENCY_CHEIRANK,
|
|
||||||
/** For reserved names. Use special algorithm, function of name */
|
|
||||||
SPECIAL
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Defines a domain ranking set, parameters for the ranking algorithms.
|
/** Defines a domain ranking set, parameters for the ranking algorithms.
|
||||||
*
|
*
|
||||||
* @param name Key and name of the set
|
* @param name Key and name of the set
|
||||||
* @param description Human-readable description
|
* @param description Human-readable description
|
||||||
* @param algorithm Algorithm to use
|
|
||||||
* @param depth Depth of the algorithm
|
* @param depth Depth of the algorithm
|
||||||
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
||||||
* */
|
* */
|
||||||
@With
|
@With
|
||||||
public record DomainRankingSet(String name,
|
public record DomainRankingSet(String name,
|
||||||
String description,
|
String description,
|
||||||
DomainSetAlgorithm algorithm,
|
|
||||||
int depth,
|
int depth,
|
||||||
String definition)
|
String definition)
|
||||||
{
|
{
|
||||||
@ -159,7 +142,7 @@ public class DomainRankingSetsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public boolean isSpecial() {
|
public boolean isSpecial() {
|
||||||
return algorithm() == DomainSetAlgorithm.SPECIAL;
|
return name().equals("BLOGS") || name().equals("NONE") || name().equals("RANK");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
ALTER TABLE CONF_DOMAIN_RANKING_SET DROP COLUMN ALGORITHM;
|
@ -56,14 +56,12 @@ class DomainRankingSetsServiceTest {
|
|||||||
var newValue = new DomainRankingSetsService.DomainRankingSet(
|
var newValue = new DomainRankingSetsService.DomainRankingSet(
|
||||||
"test",
|
"test",
|
||||||
"Test domain set",
|
"Test domain set",
|
||||||
DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
|
|
||||||
10,
|
10,
|
||||||
"test\\.nu"
|
"test\\.nu"
|
||||||
);
|
);
|
||||||
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
|
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
|
||||||
"test2",
|
"test2",
|
||||||
"Test domain set 2",
|
"Test domain set 2",
|
||||||
DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK,
|
|
||||||
20,
|
20,
|
||||||
"test\\.nu 2"
|
"test\\.nu 2"
|
||||||
);
|
);
|
||||||
|
@ -32,13 +32,14 @@ dependencies {
|
|||||||
implementation libs.hll
|
implementation libs.hll
|
||||||
|
|
||||||
testImplementation project(':code:libraries:array')
|
testImplementation project(':code:libraries:array')
|
||||||
|
|
||||||
testImplementation libs.commons.lang3
|
testImplementation libs.commons.lang3
|
||||||
testImplementation libs.bundles.slf4j.test
|
testImplementation libs.bundles.slf4j.test
|
||||||
testImplementation libs.bundles.junit
|
testImplementation libs.bundles.junit
|
||||||
testImplementation libs.mockito
|
testImplementation libs.mockito
|
||||||
}
|
|
||||||
|
|
||||||
test {
|
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||||
minHeapSize = "128m" // initial heap size
|
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||||
maxHeapSize = "20G" // maximum heap size
|
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||||
|
testImplementation project(':code:libraries:test-helpers')
|
||||||
}
|
}
|
@ -25,6 +25,12 @@ public class PageRankDomainRanker implements RankingAlgorithm {
|
|||||||
this.graph = source.getGraph();
|
this.graph = source.getGraph();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static PageRankDomainRanker forDomainNames(GraphSource source,
|
||||||
|
List<String> influenceSet)
|
||||||
|
{
|
||||||
|
return new PageRankDomainRanker(source, source.domainIds(influenceSet));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public <T> T calculate(int resultCount, Supplier<RankingResultAccumulator<T>> accumulatorP) {
|
public <T> T calculate(int resultCount, Supplier<RankingResultAccumulator<T>> accumulatorP) {
|
||||||
VertexScoringAlgorithm<Integer, Double> pageRank;
|
VertexScoringAlgorithm<Integer, Double> pageRank;
|
||||||
|
@ -4,8 +4,7 @@ import com.zaxxer.hikari.HikariDataSource;
|
|||||||
import org.jgrapht.Graph;
|
import org.jgrapht.Graph;
|
||||||
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public abstract class AbstractGraphSource implements GraphSource {
|
public abstract class AbstractGraphSource implements GraphSource {
|
||||||
protected final HikariDataSource dataSource;
|
protected final HikariDataSource dataSource;
|
||||||
@ -39,18 +38,24 @@ public abstract class AbstractGraphSource implements GraphSource {
|
|||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
SELECT ID
|
SELECT ID
|
||||||
FROM EC_DOMAIN
|
FROM EC_DOMAIN
|
||||||
WHERE DOMAIN_NAME IN (?)
|
WHERE DOMAIN_NAME LIKE ?
|
||||||
"""))
|
"""))
|
||||||
{
|
{
|
||||||
stmt.setArray(1, conn.createArrayOf("VARCHAR", domainNameList.toArray()));
|
Set<Integer> retSet = new HashSet<>();
|
||||||
|
|
||||||
|
for (String domainName : domainNameList) {
|
||||||
|
stmt.setString(1, domainName);
|
||||||
try (var rs = stmt.executeQuery()) {
|
try (var rs = stmt.executeQuery()) {
|
||||||
var result = new ArrayList<Integer>();
|
|
||||||
while (rs.next()) {
|
while (rs.next()) {
|
||||||
result.add(rs.getInt(1));
|
retSet.add(rs.getInt(1));
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var ret = new ArrayList<>(retSet);
|
||||||
|
ret.sort(Comparator.naturalOrder());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
|
@ -11,5 +11,13 @@ public interface GraphSource {
|
|||||||
/** Construct the graph */
|
/** Construct the graph */
|
||||||
Graph<Integer, ?> getGraph();
|
Graph<Integer, ?> getGraph();
|
||||||
|
|
||||||
|
/** Return a list of domain ids for the given domain names.
|
||||||
|
* The function will also accept SQL-style wildcards,
|
||||||
|
* e.g. "%marginalia.nu" will match "marginalia.nu" and "memex.marginalia.nu".
|
||||||
|
* <p></p>
|
||||||
|
* If multiple wildcards are provided, and overlapping domains are matched,
|
||||||
|
* they will be included only once. The returned list will be sorted in
|
||||||
|
* numerical order of the domain IDs.
|
||||||
|
*/
|
||||||
List<Integer> domainIds(List<String> domainNameList);
|
List<Integer> domainIds(List<String> domainNameList);
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
package nu.marginalia.ranking;
|
package nu.marginalia.ranking;
|
||||||
|
|
||||||
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
// Test the ranking algorithm with prod data.
|
// Test the ranking algorithm with prod data. Will not run if the data is not available.
|
||||||
|
// It's not feasible to include the data in the git repo, as it's ~6 GB of data.
|
||||||
|
@Disabled
|
||||||
class RankingAlgorithmWithRealDataTest {
|
class RankingAlgorithmWithRealDataTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -0,0 +1,161 @@
|
|||||||
|
package nu.marginalia.ranking;
|
||||||
|
|
||||||
|
|
||||||
|
import com.zaxxer.hikari.HikariConfig;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.query.client.QueryClient;
|
||||||
|
import nu.marginalia.ranking.data.InvertedLinkGraphSource;
|
||||||
|
import nu.marginalia.ranking.data.LinkGraphSource;
|
||||||
|
import nu.marginalia.ranking.data.SimilarityGraphSource;
|
||||||
|
import nu.marginalia.test.TestMigrationLoader;
|
||||||
|
import org.jgrapht.Graph;
|
||||||
|
import org.jgrapht.graph.DefaultWeightedEdge;
|
||||||
|
import org.junit.jupiter.api.*;
|
||||||
|
import org.junit.jupiter.api.parallel.Execution;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
import org.testcontainers.containers.MariaDBContainer;
|
||||||
|
import org.testcontainers.junit.jupiter.Container;
|
||||||
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@Tag("slow")
|
||||||
|
@Testcontainers
|
||||||
|
@Execution(SAME_THREAD)
|
||||||
|
public class RankingAlgorithmsContainerTest {
|
||||||
|
@Container
|
||||||
|
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||||
|
.withDatabaseName("WMSA_prod")
|
||||||
|
.withUsername("wmsa")
|
||||||
|
.withPassword("wmsa")
|
||||||
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
|
static HikariDataSource dataSource;
|
||||||
|
|
||||||
|
QueryClient queryClient;
|
||||||
|
QueryClient.AllLinks allLinks;
|
||||||
|
@BeforeAll
|
||||||
|
public static void setup() {
|
||||||
|
HikariConfig config = new HikariConfig();
|
||||||
|
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||||
|
config.setUsername("wmsa");
|
||||||
|
config.setPassword("wmsa");
|
||||||
|
|
||||||
|
dataSource = new HikariDataSource(config);
|
||||||
|
TestMigrationLoader.flywayMigration(dataSource);
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate("""
|
||||||
|
INSERT INTO EC_DOMAIN(DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY)
|
||||||
|
VALUES ('memex.marginalia.nu', 'marginalia.nu', 1),
|
||||||
|
('search.marginalia.nu', 'marginalia.nu', 1),
|
||||||
|
('encyclopedia.marginalia.nu', 'marginalia.nu', 1),
|
||||||
|
('marginalia.nu', 'marginalia.nu', 1);
|
||||||
|
""");
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setupQueryClient() {
|
||||||
|
queryClient = Mockito.mock(QueryClient.class);
|
||||||
|
allLinks = new QueryClient.AllLinks();
|
||||||
|
when(queryClient.getAllDomainLinks()).thenReturn(allLinks);
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate("TRUNCATE TABLE EC_DOMAIN_NEIGHBORS_2");
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addSimilarity(int source, int dest, double similarity) {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
INSERT INTO EC_DOMAIN_NEIGHBORS_2(DOMAIN_ID, NEIGHBOR_ID, RELATEDNESS)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
""")) {
|
||||||
|
stmt.setInt(1, source);
|
||||||
|
stmt.setInt(2, dest);
|
||||||
|
stmt.setDouble(3, similarity);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDomains() {
|
||||||
|
// should all be the same, doesn't matter which one we use
|
||||||
|
var source = new LinkGraphSource(dataSource, queryClient);
|
||||||
|
|
||||||
|
Assertions.assertEquals(List.of(1),
|
||||||
|
source.domainIds(List.of("memex.marginalia.nu")));
|
||||||
|
|
||||||
|
// Verify globbing
|
||||||
|
Assertions.assertEquals(List.of(1,2,3),
|
||||||
|
source.domainIds(List.of("%.marginalia.nu")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLinkGraphSource() {
|
||||||
|
allLinks.add(1, 3);
|
||||||
|
|
||||||
|
var graph = new LinkGraphSource(dataSource, queryClient).getGraph();
|
||||||
|
|
||||||
|
Assertions.assertTrue(graph.containsVertex(1));
|
||||||
|
Assertions.assertTrue(graph.containsVertex(2));
|
||||||
|
Assertions.assertTrue(graph.containsVertex(3));
|
||||||
|
|
||||||
|
Assertions.assertTrue(graph.containsEdge(1, 3));
|
||||||
|
|
||||||
|
Assertions.assertFalse(graph.containsEdge(3, 1));
|
||||||
|
Assertions.assertFalse(graph.containsEdge(2, 3));
|
||||||
|
Assertions.assertFalse(graph.containsEdge(3, 2));
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testInvertedLinkGraphSource() {
|
||||||
|
allLinks.add(1, 3);
|
||||||
|
|
||||||
|
var graph = new InvertedLinkGraphSource(dataSource, queryClient).getGraph();
|
||||||
|
|
||||||
|
Assertions.assertTrue(graph.containsVertex(1));
|
||||||
|
Assertions.assertTrue(graph.containsVertex(2));
|
||||||
|
Assertions.assertTrue(graph.containsVertex(3));
|
||||||
|
|
||||||
|
Assertions.assertTrue(graph.containsEdge(3, 1));
|
||||||
|
|
||||||
|
Assertions.assertFalse(graph.containsEdge(1, 3));
|
||||||
|
Assertions.assertFalse(graph.containsEdge(2, 3));
|
||||||
|
Assertions.assertFalse(graph.containsEdge(3, 2));
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public void testSimilarityGraphSource() {
|
||||||
|
|
||||||
|
addSimilarity(1, 3, 0.5);
|
||||||
|
|
||||||
|
var graph = (Graph<Integer, DefaultWeightedEdge>) new SimilarityGraphSource(dataSource).getGraph();
|
||||||
|
|
||||||
|
Assertions.assertTrue(graph.containsVertex(1));
|
||||||
|
Assertions.assertTrue(graph.containsVertex(2));
|
||||||
|
Assertions.assertTrue(graph.containsVertex(3));
|
||||||
|
|
||||||
|
Assertions.assertTrue(graph.containsEdge(3, 1));
|
||||||
|
Assertions.assertTrue(graph.containsEdge(1, 3));
|
||||||
|
Assertions.assertEquals(graph.getEdgeWeight(graph.getEdge(1, 3)), 0.5, 0.0001);
|
||||||
|
|
||||||
|
Assertions.assertFalse(graph.containsEdge(1, 2));
|
||||||
|
Assertions.assertFalse(graph.containsEdge(2, 3));
|
||||||
|
}
|
||||||
|
}
|
@ -15,15 +15,12 @@ import java.sql.SQLException;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class ControlDomainRankingSetsService {
|
public class ControlDomainRankingSetsService {
|
||||||
private final HikariDataSource dataSource;
|
|
||||||
private final ControlRendererFactory rendererFactory;
|
private final ControlRendererFactory rendererFactory;
|
||||||
private final DomainRankingSetsService domainRankingSetsService;
|
private final DomainRankingSetsService domainRankingSetsService;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public ControlDomainRankingSetsService(HikariDataSource dataSource,
|
public ControlDomainRankingSetsService(ControlRendererFactory rendererFactory,
|
||||||
ControlRendererFactory rendererFactory,
|
|
||||||
DomainRankingSetsService domainRankingSetsService) {
|
DomainRankingSetsService domainRankingSetsService) {
|
||||||
this.dataSource = dataSource;
|
|
||||||
this.rendererFactory = rendererFactory;
|
this.rendererFactory = rendererFactory;
|
||||||
this.domainRankingSetsService = domainRankingSetsService;
|
this.domainRankingSetsService = domainRankingSetsService;
|
||||||
}
|
}
|
||||||
@ -47,7 +44,6 @@ public class ControlDomainRankingSetsService {
|
|||||||
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
|
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
|
||||||
id,
|
id,
|
||||||
request.queryParams("description"),
|
request.queryParams("description"),
|
||||||
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
|
|
||||||
Integer.parseInt(request.queryParams("depth")),
|
Integer.parseInt(request.queryParams("depth")),
|
||||||
request.queryParams("definition")
|
request.queryParams("definition")
|
||||||
));
|
));
|
||||||
@ -77,7 +73,6 @@ public class ControlDomainRankingSetsService {
|
|||||||
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
|
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
|
||||||
request.queryParams("name").toUpperCase(),
|
request.queryParams("name").toUpperCase(),
|
||||||
request.queryParams("description"),
|
request.queryParams("description"),
|
||||||
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
|
|
||||||
Integer.parseInt(request.queryParams("depth")),
|
Integer.parseInt(request.queryParams("depth")),
|
||||||
request.queryParams("definition")
|
request.queryParams("definition")
|
||||||
));
|
));
|
||||||
@ -95,17 +90,6 @@ public class ControlDomainRankingSetsService {
|
|||||||
}
|
}
|
||||||
private Object rankingSetModel(Request request, Response response) throws SQLException {
|
private Object rankingSetModel(Request request, Response response) throws SQLException {
|
||||||
var model = domainRankingSetsService.get(request.params("id")).orElseThrow();
|
var model = domainRankingSetsService.get(request.params("id")).orElseThrow();
|
||||||
return Map.of("rankingSet", model,
|
return Map.of("rankingSet", model);
|
||||||
"selectedAlgo", Map.of(
|
|
||||||
"special", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.SPECIAL,
|
|
||||||
"adjacency_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
|
|
||||||
"adjacency_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_PAGERANK,
|
|
||||||
"links_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_CHEIRANK,
|
|
||||||
"links_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK)
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,14 +16,12 @@
|
|||||||
<tr>
|
<tr>
|
||||||
<th>Name</th>
|
<th>Name</th>
|
||||||
<th>Description</th>
|
<th>Description</th>
|
||||||
<th>Algorithm</th>
|
|
||||||
<th>Depth</th>
|
<th>Depth</th>
|
||||||
</tr>
|
</tr>
|
||||||
{{#each rankingSets}}
|
{{#each rankingSets}}
|
||||||
<tr>
|
<tr>
|
||||||
<td><a href="/domain-ranking-sets/{{name}}">{{name}}</td></td>
|
<td><a href="/domain-ranking-sets/{{name}}">{{name}}</td></td>
|
||||||
<td>{{description}}</td>
|
<td>{{description}}</td>
|
||||||
<td>{{algorithm}}</td>
|
|
||||||
<td>{{depth}}</td>
|
<td>{{depth}}</td>
|
||||||
</tr>
|
</tr>
|
||||||
{{/each}}
|
{{/each}}
|
||||||
|
@ -26,15 +26,6 @@
|
|||||||
<th><label for="algorithm">Algorithm</label></th>
|
<th><label for="algorithm">Algorithm</label></th>
|
||||||
<td>
|
<td>
|
||||||
{{#if special}}<input type="hidden" name="algorithm" value="{{algorithm}}" />{{/if}}
|
{{#if special}}<input type="hidden" name="algorithm" value="{{algorithm}}" />{{/if}}
|
||||||
<select id="algorithm" name="algorithm" {{#if special}}disabled{{/if}}>
|
|
||||||
{{#with algorithm}}
|
|
||||||
<option value="SPECIAL" disabled {{#if selectedAlgo.special}}selected{{/if}}>SPECIAL</option>
|
|
||||||
<option value="LINKS_PAGERANK" {{#if selectedAlgo.links_pagerank}}selected{{/if}}>LINKS_PAGERANK</option>
|
|
||||||
<option value="LINKS_CHEIRANK" {{#if selectedAlgo.links_cheirank}}selected{{/if}}>LINKS_CHEIRANK</option>
|
|
||||||
<option value="ADJACENCY_PAGERANK" {{#if selectedAlgo.adjacency_pagerank}}selected{{/if}}>ADJACENCY_PAGERANK</option>
|
|
||||||
<option value="ADJACENCY_CHEIRANK" {{#if selectedAlgo.adjacency_cheirank}}selected{{/if}}>ADJACENCY_CHEIRANK</option>
|
|
||||||
{{/with}}
|
|
||||||
</select>
|
|
||||||
<div>
|
<div>
|
||||||
<small class="text-muted">
|
<small class="text-muted">
|
||||||
The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY
|
The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY
|
||||||
@ -68,7 +59,8 @@
|
|||||||
<div>
|
<div>
|
||||||
<small class="text-muted">A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards.
|
<small class="text-muted">A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards.
|
||||||
These are used as the origin point for the Personalized PageRank algorithm, and will be considered
|
These are used as the origin point for the Personalized PageRank algorithm, and will be considered
|
||||||
the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper.
|
the central points of the link or adjacency graph. If no domains are specified,
|
||||||
|
the entire domain space is used, as per the PageRank paper.
|
||||||
</small>
|
</small>
|
||||||
</div>
|
</div>
|
||||||
</td></tr>
|
</td></tr>
|
||||||
|
@ -18,7 +18,6 @@ import nu.marginalia.ranking.data.GraphSource;
|
|||||||
import nu.marginalia.ranking.data.LinkGraphSource;
|
import nu.marginalia.ranking.data.LinkGraphSource;
|
||||||
import nu.marginalia.ranking.data.SimilarityGraphSource;
|
import nu.marginalia.ranking.data.SimilarityGraphSource;
|
||||||
import nu.marginalia.service.control.ServiceEventLog;
|
import nu.marginalia.service.control.ServiceEventLog;
|
||||||
import nu.marginalia.service.control.ServiceHeartbeat;
|
|
||||||
import nu.marginalia.service.module.ServiceConfiguration;
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -122,13 +121,13 @@ public class IndexSearchSetsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (DomainRankingSetsService.DomainSetAlgorithm.SPECIAL.equals(rankingSet.algorithm())) {
|
if (rankingSet.isSpecial()) {
|
||||||
switch (rankingSet.name()) {
|
switch (rankingSet.name()) {
|
||||||
case "BLOGS" -> recalculateBlogsSet(rankingSet);
|
case "BLOGS" -> recalculateBlogsSet(rankingSet);
|
||||||
case "NONE" -> {} // No-op
|
case "NONE" -> {} // No-op
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
recalculateNornal(rankingSet);
|
recalculateNormal(rankingSet);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
@ -138,16 +137,17 @@ public class IndexSearchSetsService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void recalculateNornal(DomainRankingSetsService.DomainRankingSet rankingSet) {
|
private void recalculateNormal(DomainRankingSetsService.DomainRankingSet rankingSet) {
|
||||||
String[] domains = rankingSet.domains();
|
List<String> domains = List.of(rankingSet.domains());
|
||||||
|
|
||||||
GraphSource graphSource = switch (rankingSet.algorithm()) {
|
GraphSource source;
|
||||||
case LINKS_PAGERANK, LINKS_CHEIRANK -> linksDomains;
|
|
||||||
case ADJACENCY_PAGERANK, ADJACENCY_CHEIRANK -> similarityDomains;
|
|
||||||
default -> throw new IllegalStateException("Unexpected value: " + rankingSet.algorithm());
|
|
||||||
};
|
|
||||||
|
|
||||||
var data = new PageRankDomainRanker(linksDomains, linksDomains.domainIds(List.of(domains)))
|
// Similarity ranking does not behave well with an empty set of domains
|
||||||
|
if (domains.isEmpty()) source = linksDomains;
|
||||||
|
else source = similarityDomains;
|
||||||
|
|
||||||
|
var data = PageRankDomainRanker
|
||||||
|
.forDomainNames(source, domains)
|
||||||
.calculate(rankingSet.depth(), RankingResultHashSetAccumulator::new);
|
.calculate(rankingSet.depth(), RankingResultHashSetAccumulator::new);
|
||||||
|
|
||||||
var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data);
|
var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data);
|
||||||
@ -180,8 +180,20 @@ public class IndexSearchSetsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) {
|
private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) {
|
||||||
|
List<String> domains = List.of(rankingSet.domains());
|
||||||
|
|
||||||
var ranks = new PageRankDomainRanker(similarityDomains, similarityDomains.domainIds(List.of(rankingSet.domains())))
|
final GraphSource source;
|
||||||
|
|
||||||
|
if (domains.isEmpty()) {
|
||||||
|
// Similarity ranking does not behave well with an empty set of domains
|
||||||
|
source = linksDomains;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
source = similarityDomains;
|
||||||
|
}
|
||||||
|
|
||||||
|
var ranks = PageRankDomainRanker
|
||||||
|
.forDomainNames(source, domains)
|
||||||
.calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth()));
|
.calculate(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth()));
|
||||||
|
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
|
Loading…
Reference in New Issue
Block a user