From 5a62b3058f7d4a5840875233e5ade323a7c804ca Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 16 Jan 2024 10:55:24 +0100 Subject: [PATCH 1/7] (query-api) Make the search set identifier a string value in the API This will free the core marginalia search engine to use arbitrary search set definitions, while the app can use its hardcoded defaults. --- .../client/model/query/SearchSpecification.java | 2 +- .../nu/marginalia/query/QueryProtobufCodec.java | 8 ++++---- .../nu/marginalia/query/model/QueryParams.java | 4 ++-- .../java/nu/marginalia/api/ApiSearchOperator.java | 2 +- .../search/SearchQueryParamFactory.java | 8 ++++---- .../control/app/svc/SearchToBanService.java | 2 +- .../marginalia/index/svc/IndexQueryService.java | 4 +--- .../index/svc/IndexSearchSetsService.java | 15 +++++++++------ .../IndexQueryServiceIntegrationSmokeTest.java | 4 ++-- .../svc/IndexQueryServiceIntegrationTest.java | 2 +- .../IndexQueryServiceIntegrationTestModule.java | 2 +- .../nu/marginalia/query/QueryBasicInterface.java | 8 ++++++-- .../nu/marginalia/query/svc/QueryFactoryTest.java | 2 +- 13 files changed, 34 insertions(+), 29 deletions(-) diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java index 27d815e9..2ea743cf 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java @@ -15,7 +15,7 @@ public class SearchSpecification { /** If present and not empty, limit the search to these domain IDs */ public List domains; - public SearchSetIdentifier searchSetIdentifier; + public String searchSetIdentifier; public final String humanQuery; diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java b/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java index 48e8a94a..36b16a55 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java +++ b/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java @@ -31,7 +31,7 @@ public class QueryProtobufCodec { builder.addSubqueries(IndexProtobufCodec.convertSearchSubquery(subquery)); } - builder.setSearchSetIdentifier(query.specs.searchSetIdentifier.name()); + builder.setSearchSetIdentifier(query.specs.searchSetIdentifier); builder.setHumanQuery(request.getHumanQuery()); builder.setQuality(convertSpecLimit(query.specs.quality)); @@ -62,7 +62,7 @@ public class QueryProtobufCodec { convertSpecLimit(request.getDomainCount()), request.getDomainIdsList(), IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()), - SearchSetIdentifier.valueOf(request.getSearchSetIdentifier())); + request.getSearchSetIdentifier()); } @@ -133,7 +133,7 @@ public class QueryProtobufCodec { return new SearchSpecification( subqueries, specs.getDomainsList(), - SearchSetIdentifier.valueOf(specs.getSearchSetIdentifier()), + specs.getSearchSetIdentifier(), specs.getHumanQuery(), IndexProtobufCodec.convertSpecLimit(specs.getQuality()), IndexProtobufCodec.convertSpecLimit(specs.getYear()), @@ -159,7 +159,7 @@ public class QueryProtobufCodec { .setYear(convertSpecLimit(params.year())) .setSize(convertSpecLimit(params.size())) .setRank(convertSpecLimit(params.rank())) - .setSearchSetIdentifier(params.identifier().name()); + .setSearchSetIdentifier(params.identifier()); if (params.nearDomain() != null) builder.setNearDomain(params.nearDomain()); diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java index 6b88dbc6..9d83e117 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java +++ b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java @@ -23,10 +23,10 @@ public record QueryParams( SpecificationLimit domainCount, List domainIds, QueryLimits limits, - SearchSetIdentifier identifier + String identifier ) { - public QueryParams(String query, QueryLimits limits, SearchSetIdentifier identifier) { + public QueryParams(String query, QueryLimits limits, String identifier) { this(query, null, List.of(), List.of(), diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java index f60a1750..f02afedd 100644 --- a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java +++ b/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java @@ -55,7 +55,7 @@ public class ApiSearchOperator { Math.min(100, count), 150, 8192), - searchSet); + searchSet.name()); } private SearchSetIdentifier selectSearchSet(int index) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java index 03acc479..edb1b62f 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java @@ -35,7 +35,7 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), List.of(), new QueryLimits(1, 25, 200, 8192), - profile.searchSetIdentifier + profile.searchSetIdentifier.name() ); } @@ -54,7 +54,7 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), List.of(), new QueryLimits(count, count, 100, 512), - SearchSetIdentifier.NONE + SearchSetIdentifier.NONE.name() ); } @@ -72,7 +72,7 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), - SearchSetIdentifier.NONE + SearchSetIdentifier.NONE.name() ); } @@ -90,7 +90,7 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), - SearchSetIdentifier.NONE + SearchSetIdentifier.NONE.name() ); } } diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/SearchToBanService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/SearchToBanService.java index 12118989..6ff198da 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/SearchToBanService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/SearchToBanService.java @@ -79,7 +79,7 @@ public class SearchToBanService { private Object executeQuery(Context ctx, String query) { return queryClient.search(ctx, new QueryParams( query, new QueryLimits(2, 200, 250, 8192), - SearchSetIdentifier.NONE + "NONE" )); } } diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java index 8560b38a..cc9d8e04 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java @@ -261,9 +261,7 @@ public class IndexQueryService extends IndexApiImplBase { return new SmallSearchSet(request.getDomainsList()); } - return searchSetsService.getSearchSetByName( - SearchSetIdentifier.valueOf(request.getSearchSetIdentifier()) - ); + return searchSetsService.getSearchSetByName(request.getSearchSetIdentifier()); } private SearchResultSet executeSearch(SearchParameters params) throws SQLException { diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java index 47dcf5b2..d8f90d34 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java @@ -80,16 +80,19 @@ public class IndexSearchSetsService { return domainRankings; } - public SearchSet getSearchSetByName(SearchSetIdentifier searchSetIdentifier) { + public SearchSet getSearchSetByName(String searchSetIdentifier) { + if (null == searchSetIdentifier) { return anySet; } + return switch (searchSetIdentifier) { - case NONE -> anySet; - case POPULAR -> popularSet; - case ACADEMIA -> academiaSet; - case SMALLWEB -> smallWebSet; - case BLOGS -> blogsSet; + case "POPULAR" -> popularSet; + case "ACADEMIA" -> academiaSet; + case "SMALLWEB" -> smallWebSet; + case "BLOGS" -> blogsSet; + case "NONE", "" -> anySet; + default -> throw new IllegalArgumentException("Unknown search set"); }; } diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationSmokeTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationSmokeTest.java index f8c95cf2..c9a07e60 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationSmokeTest.java +++ b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationSmokeTest.java @@ -129,7 +129,7 @@ public class IndexQueryServiceIntegrationSmokeTest { .domainCount(SpecificationLimit.none()) .rankingParams(ResultRankingParameters.sensibleDefaults()) .domains(new ArrayList<>()) - .searchSetIdentifier(SearchSetIdentifier.NONE) + .searchSetIdentifier("NONE") .subqueries(List.of(new SearchSubquery( List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList(), Collections.emptyList()))).build()); @@ -207,7 +207,7 @@ public class IndexQueryServiceIntegrationSmokeTest { .rank(SpecificationLimit.none()) .domainCount(SpecificationLimit.none()) .queryStrategy(QueryStrategy.SENTENCE) - .searchSetIdentifier(SearchSetIdentifier.NONE) + .searchSetIdentifier("NONE") .rankingParams(ResultRankingParameters.sensibleDefaults()) .subqueries(List.of(new SearchSubquery( List.of("4"), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java index ca5cafe0..55fea27f 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java +++ b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java @@ -431,7 +431,7 @@ public class IndexQueryServiceIntegrationTest { .domainCount(SpecificationLimit.none()) .rankingParams(ResultRankingParameters.sensibleDefaults()) .domains(new ArrayList<>()) - .searchSetIdentifier(SearchSetIdentifier.NONE) + .searchSetIdentifier("NONE") .subqueries(List.of()); return mutator.apply(builder).build(); diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTestModule.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTestModule.java index 746657d8..5089bd5f 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTestModule.java +++ b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTestModule.java @@ -69,7 +69,7 @@ public class IndexQueryServiceIntegrationTestModule extends AbstractModule { bind(ProcessHeartbeat.class).toInstance(new FakeProcessHeartbeat()); IndexSearchSetsService setsServiceMock = Mockito.mock(IndexSearchSetsService.class); - when(setsServiceMock.getSearchSetByName(SearchSetIdentifier.NONE)).thenReturn(new SearchSetAny()); + when(setsServiceMock.getSearchSetByName("NONE")).thenReturn(new SearchSetAny()); when(setsServiceMock.getDomainRankings()).thenReturn(new DomainRankings()); bind(IndexSearchSetsService.class).toInstance(setsServiceMock); diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryBasicInterface.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryBasicInterface.java index ddef20d6..363b493f 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryBasicInterface.java +++ b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryBasicInterface.java @@ -44,9 +44,13 @@ public class QueryBasicInterface { if (queryParam == null) { return renderer.render(new Object()); } + + int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count")); + String set = request.queryParams("set") == null ? "" : request.queryParams("set"); + var query = queryFactory.createQuery(new QueryParams(queryParam, new QueryLimits( - 1, 10, 250, 8192 - ), SearchSetIdentifier.NONE)); + 1, count, 250, 8192 + ), set)); var rsp = indexClient.query( Context.fromRequest(request), diff --git a/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java b/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java index c5f2eb42..c0874acc 100644 --- a/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java +++ b/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java @@ -49,7 +49,7 @@ public class QueryFactoryTest { SpecificationLimit.none(), null, new QueryLimits(100, 100, 100, 100), - SearchSetIdentifier.BLOGS)).specs; + "NONE")).specs; } @Test From 36ad4c74663859743aa56934a9e456f4ef1b8200 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 16 Jan 2024 11:17:40 +0100 Subject: [PATCH 2/7] (db) Add a new configuration object 'domain ranking set' for storing ranking parameters --- .../db/DomainRankingSetsService.java | 156 ++++++++++++++++++ .../db/migration/V24_01_0_002__domain_set.sql | 12 ++ .../db/DomainRankingSetsServiceTest.java | 84 ++++++++++ 3 files changed, 252 insertions(+) create mode 100644 code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java create mode 100644 code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql create mode 100644 code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java new file mode 100644 index 00000000..6045cb76 --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java @@ -0,0 +1,156 @@ +package nu.marginalia.db; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import lombok.With; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +public class DomainRankingSetsService { + private static final Logger logger = LoggerFactory.getLogger(DomainRankingSetsService.class); + private final HikariDataSource dataSource; + + @Inject + public DomainRankingSetsService(HikariDataSource dataSource) { + this.dataSource = dataSource; + } + + public Optional get(String name) throws SQLException { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION + FROM CONF_DOMAIN_RANKING_SET + WHERE NAME = ? + """)) { + stmt.setString(1, name); + var rs = stmt.executeQuery(); + + if (!rs.next()) { + return Optional.empty(); + } + + return Optional.of(new DomainRankingSet( + rs.getString("NAME"), + rs.getString("DESCRIPTION"), + DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")), + rs.getInt("DEPTH"), + rs.getString("DEFINITION") + )); + } + catch (SQLException ex) { + logger.error("Failed to get domain set", ex); + return Optional.empty(); + } + } + + public void upsert(DomainRankingSet domainRankingSet) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) + VALUES (?, ?, ?, ?, ?) + """)) + { + stmt.setString(1, domainRankingSet.name()); + stmt.setString(2, domainRankingSet.description()); + stmt.setString(3, domainRankingSet.algorithm().name()); + stmt.setInt(4, domainRankingSet.depth()); + stmt.setString(5, domainRankingSet.definition()); + stmt.executeUpdate(); + } + catch (SQLException ex) { + logger.error("Failed to update domain set", ex); + } + } + + public void delete(DomainRankingSet domainRankingSet) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + DELETE FROM CONF_DOMAIN_RANKING_SET + WHERE NAME = ? + """)) + { + stmt.setString(1, domainRankingSet.name()); + stmt.executeUpdate(); + } + catch (SQLException ex) { + logger.error("Failed to delete domain set", ex); + } + } + + public List getAll() { + + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION + FROM CONF_DOMAIN_RANKING_SET + """)) { + var rs = stmt.executeQuery(); + List ret = new ArrayList<>(); + + while (rs.next()) { + ret.add( + new DomainRankingSet( + rs.getString("NAME"), + rs.getString("DESCRIPTION"), + DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")), + rs.getInt("DEPTH"), + rs.getString("DEFINITION")) + ); + } + return ret; + } + catch (SQLException ex) { + logger.error("Failed to get domain set", ex); + return List.of(); + } + } + + public enum DomainSetAlgorithm { + /** Use link graph, do a pagerank */ + LINKS_PAGERANK, + /** Use link graph, do a cheirank */ + LINKS_CHEIRANK, + /** Use adjacency graph, do a pagerank */ + ADJACENCY_PAGERANK, + /** Use adjacency graph, do a cheirank */ + ADJACENCY_CHEIRANK, + /** For reserved names. Use special algorithm, function of name */ + SPECIAL + }; + + /** Defines a domain ranking set, parameters for the ranking algorithms. + * + * @param name Key and name of the set + * @param description Human-readable description + * @param algorithm Algorithm to use + * @param depth Depth of the algorithm + * @param definition Definition of the set, typically a list of domains or globs for domain-names + * */ + @With + public record DomainRankingSet(String name, + String description, + DomainSetAlgorithm algorithm, + int depth, + String definition) + { + + public Path fileName(Path base) { + return base.resolve(name().toLowerCase() + ".dat"); + } + public String[] domains() { + return Arrays.stream(definition().split("\n+")) + .map(String::trim) + .filter(s -> !s.isBlank()) + .filter(s -> !s.startsWith("#")) + .toArray(String[]::new); + } + + } +} diff --git a/code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql b/code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql new file mode 100644 index 00000000..73912c6b --- /dev/null +++ b/code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql @@ -0,0 +1,12 @@ + +CREATE TABLE IF NOT EXISTS CONF_DOMAIN_RANKING_SET ( + NAME VARCHAR(255) PRIMARY KEY COLLATE utf8mb4_unicode_ci, + DESCRIPTION VARCHAR(255) NOT NULL, + ALGORITHM VARCHAR(255) NOT NULL, + DEPTH INT NOT NULL, + DEFINITION LONGTEXT NOT NULL +) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + +INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('NONE', 'Reserved: No Ranking Algorithm', 'SPECIAL', 50000, ''); +INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('BLOGS', 'Reserved: Blogs Set', 'SPECIAL', 50000, ''); +INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('RANK', 'Reserved: Main Domain Ranking', 'SPECIAL', 50000, ''); \ No newline at end of file diff --git a/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java b/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java new file mode 100644 index 00000000..999f1a7e --- /dev/null +++ b/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java @@ -0,0 +1,84 @@ +package nu.marginalia.db; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.test.TestMigrationLoader; +import org.junit.jupiter.api.*; +import org.testcontainers.containers.MariaDBContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import static org.junit.jupiter.api.Assertions.*; + +@Testcontainers +@Tag("slow") +class DomainRankingSetsServiceTest { + + @Container + static MariaDBContainer mariaDBContainer = new MariaDBContainer<>("mariadb") + .withDatabaseName("WMSA_prod") + .withUsername("wmsa") + .withPassword("wmsa") + .withNetworkAliases("mariadb"); + + static HikariDataSource dataSource; + + @BeforeAll + public static void setup() { + HikariConfig config = new HikariConfig(); + config.setJdbcUrl(mariaDBContainer.getJdbcUrl()); + config.setUsername("wmsa"); + config.setPassword("wmsa"); + + dataSource = new HikariDataSource(config); + + TestMigrationLoader.flywayMigration(dataSource); + } + + @AfterAll + static void tearDownAll() { + dataSource.close(); + mariaDBContainer.close(); + } + + @Test + public void testScenarios() throws Exception { + var service = new DomainRankingSetsService(dataSource); + + // Clean up default values + service.get("BLOGS").ifPresent(service::delete); + service.get("NONE").ifPresent(service::delete); + + var newValue = new DomainRankingSetsService.DomainRankingSet( + "test", + "Test domain set", + DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK, + 10, + "test\\.nu" + ); + var newValue2 = new DomainRankingSetsService.DomainRankingSet( + "test2", + "Test domain set 2", + DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK, + 20, + "test\\.nu 2" + ); + service.upsert(newValue); + service.upsert(newValue2); + assertEquals(newValue, service.get("test").orElseThrow()); + + var allValues = service.getAll(); + assertEquals(2, allValues.size()); + assertTrue(allValues.contains(newValue)); + assertTrue(allValues.contains(newValue2)); + + service.delete(newValue); + assertFalse(service.get("test").isPresent()); + + service.delete(newValue2); + assertFalse(service.get("test2").isPresent()); + + allValues = service.getAll(); + assertEquals(0, allValues.size()); + } +} \ No newline at end of file From e968365858d31a72688150a53dbdc68d23a04fec Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 16 Jan 2024 12:42:51 +0100 Subject: [PATCH 3/7] (index) Use new DomainRankingSets to configure ranking algos in index svc --- .../java/nu/marginalia/index/IndexModule.java | 10 - .../index/config/RankingSettings.java | 26 --- .../index/config/RankingSettingsEntry.java | 11 - .../marginalia/index/svc/IndexOpsService.java | 3 + .../index/svc/IndexSearchSetsService.java | 203 ++++++++---------- .../index/svc/searchset/RankingSearchSet.java | 14 +- .../index/model/RankingSettingsTest.java | 61 ------ 7 files changed, 102 insertions(+), 226 deletions(-) delete mode 100644 code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettings.java delete mode 100644 code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettingsEntry.java delete mode 100644 code/services-core/index-service/src/test/java/nu/marginalia/index/model/RankingSettingsTest.java diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java index d0b2dcf9..dd5d87b1 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java @@ -6,14 +6,10 @@ import com.google.inject.Singleton; import com.google.inject.name.Named; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.linkdb.dlinks.DomainLinkDb; -import nu.marginalia.linkdb.dlinks.FileDomainLinkDb; import nu.marginalia.linkdb.dlinks.SelectingDomainLinkDb; -import nu.marginalia.linkdb.dlinks.SqlDomainLinkDb; import nu.marginalia.service.module.ServiceConfiguration; import nu.marginalia.storage.FileStorageService; import nu.marginalia.IndexLocations; -import nu.marginalia.index.config.RankingSettings; -import nu.marginalia.WmsaHome; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,12 +26,6 @@ public class IndexModule extends AbstractModule { public void configure() { } - @Provides - public RankingSettings rankingSettings() { - Path dir = WmsaHome.getHomePath().resolve("conf/ranking-settings.yaml"); - return RankingSettings.from(dir); - } - @Provides @Singleton public DomainLinkDb domainLinkDb ( diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettings.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettings.java deleted file mode 100644 index a755a480..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettings.java +++ /dev/null @@ -1,26 +0,0 @@ -package nu.marginalia.index.config; - -import lombok.ToString; -import org.yaml.snakeyaml.Yaml; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -@ToString -public class RankingSettings { - public RankingSettingsEntry small; - public RankingSettingsEntry retro; - public RankingSettingsEntry standard; - public RankingSettingsEntry academia; - public RankingSettingsEntry ranking; - - public static RankingSettings from(Path dir) { - try { - return new Yaml().loadAs(Files.readString(dir), RankingSettings.class); - } - catch (IOException ex) { - throw new RuntimeException("Failed to load " + dir, ex); - } - } -} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettingsEntry.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettingsEntry.java deleted file mode 100644 index 7723e3ff..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/config/RankingSettingsEntry.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.index.config; - -import java.util.List; - -public class RankingSettingsEntry { - /** Bias the ranking toward these domains */ - public List domains; - - /** Number of domains to include in ranking */ - public int max; -} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexOpsService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexOpsService.java index 37d52f2e..4f1830c8 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexOpsService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexOpsService.java @@ -43,13 +43,16 @@ public class IndexOpsService { if (!run(searchSetService::recalculateAll)) { Spark.halt(503, "Operations busy"); } + return "OK"; } public Object reindexEndpoint(Request request, Response response) throws Exception { + if (!run(index::switchIndex).isPresent()) { Spark.halt(503, "Operations busy"); } + return "OK"; } diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java index d8f90d34..0d95a087 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java @@ -4,10 +4,11 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import gnu.trove.list.TIntList; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import lombok.SneakyThrows; +import nu.marginalia.db.DomainRankingSetsService; import nu.marginalia.db.DomainTypes; import nu.marginalia.index.IndexServicesFactory; import nu.marginalia.index.searchset.SearchSet; +import nu.marginalia.ranking.RankingAlgorithm; import nu.marginalia.ranking.ReversePageRank; import nu.marginalia.ranking.StandardPageRank; import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator; @@ -16,31 +17,32 @@ import nu.marginalia.ranking.data.RankingDomainFetcher; import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData; import nu.marginalia.index.svc.searchset.RankingSearchSet; import nu.marginalia.index.svc.searchset.SearchSetAny; -import nu.marginalia.index.config.RankingSettings; import nu.marginalia.ranking.DomainRankings; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; import nu.marginalia.index.db.DbUpdateRanks; +import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.control.ServiceHeartbeat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.sql.SQLException; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; @Singleton public class IndexSearchSetsService { private final Logger logger = LoggerFactory.getLogger(getClass()); private final DomainTypes domainTypes; private final ServiceHeartbeat heartbeat; + private final IndexServicesFactory indexServicesFactory; + private final ServiceEventLog eventLog; + private final DomainRankingSetsService domainRankingSetsService; private final DbUpdateRanks dbUpdateRanks; private final RankingDomainFetcher similarityDomains; - private final RankingSettings rankingSettings; - + private final RankingDomainFetcher linksDomains; + private final ConcurrentHashMap rankingSets = new ConcurrentHashMap<>(); // Below are binary indices that are used to constrain a search - private volatile RankingSearchSet popularSet; - private volatile RankingSearchSet smallWebSet; - private volatile RankingSearchSet academiaSet; - private volatile RankingSearchSet blogsSet; private final SearchSet anySet = new SearchSetAny(); // The ranking value of the domains used in sorting the domains @@ -51,29 +53,36 @@ public class IndexSearchSetsService { ServiceHeartbeat heartbeat, RankingDomainFetcher rankingDomains, RankingDomainFetcherForSimilarityData similarityDomains, - RankingSettings rankingSettings, - IndexServicesFactory servicesFactory, + IndexServicesFactory indexServicesFactory, + ServiceEventLog eventLog, + DomainRankingSetsService domainRankingSetsService, DbUpdateRanks dbUpdateRanks) throws IOException { this.domainTypes = domainTypes; this.heartbeat = heartbeat; + this.indexServicesFactory = indexServicesFactory; + this.eventLog = eventLog; + this.domainRankingSetsService = domainRankingSetsService; this.dbUpdateRanks = dbUpdateRanks; if (similarityDomains.hasData()) { this.similarityDomains = similarityDomains; + this.linksDomains = rankingDomains; } else { // on test environments the cosine similarity graph may not be present logger.info("Domain similarity is not present, falling back on link graph"); this.similarityDomains = rankingDomains; + this.linksDomains = rankingDomains; } - this.rankingSettings = rankingSettings; - - smallWebSet = new RankingSearchSet(SearchSetIdentifier.SMALLWEB, servicesFactory.getSearchSetsBase().resolve("small-web.dat")); - academiaSet = new RankingSearchSet(SearchSetIdentifier.ACADEMIA, servicesFactory.getSearchSetsBase().resolve("academia.dat")); - popularSet = new RankingSearchSet(SearchSetIdentifier.POPULAR, servicesFactory.getSearchSetsBase().resolve("popular.dat")); - blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, servicesFactory.getSearchSetsBase().resolve("blogs.dat")); + for (var rankingSet : domainRankingSetsService.getAll()) { + rankingSets.put(rankingSet.name(), + new RankingSearchSet(rankingSet.name(), + rankingSet.fileName(indexServicesFactory.getSearchSetsBase()) + ) + ); + } } public DomainRankings getDomainRankings() { @@ -86,51 +95,79 @@ public class IndexSearchSetsService { return anySet; } - return switch (searchSetIdentifier) { - case "POPULAR" -> popularSet; - case "ACADEMIA" -> academiaSet; - case "SMALLWEB" -> smallWebSet; - case "BLOGS" -> blogsSet; - case "NONE", "" -> anySet; - default -> throw new IllegalArgumentException("Unknown search set"); - }; + if ("NONE".equals(searchSetIdentifier) || "".equals(searchSetIdentifier)) { + return anySet; + } + + return Objects.requireNonNull(rankingSets.get(searchSetIdentifier), "Unknown search set"); } - enum RepartitionSteps { - UPDATE_ACADEMIA, - UPDATE_POPULAR, - UPDATE_SMALL_WEB, - UPDATE_BLOGS, - UPDATE_RANKINGS, - FINISHED - } public void recalculateAll() { - try (var processHeartbeat = heartbeat.createServiceTaskHeartbeat(RepartitionSteps.class, "repartitionAll")) { - - processHeartbeat.progress(RepartitionSteps.UPDATE_ACADEMIA); - updateAcademiaDomainsSet(); - - processHeartbeat.progress(RepartitionSteps.UPDATE_POPULAR); - updatePopularDomainsSet(); - - processHeartbeat.progress(RepartitionSteps.UPDATE_SMALL_WEB); - updateSmallWebDomainsSet(); - - processHeartbeat.progress(RepartitionSteps.UPDATE_BLOGS); - updateBlogsSet(); - - processHeartbeat.progress(RepartitionSteps.UPDATE_RANKINGS); - updateDomainRankings(); - - processHeartbeat.progress(RepartitionSteps.FINISHED); + for (var rankingSet : domainRankingSetsService.getAll()) { + try { + if (DomainRankingSetsService.DomainSetAlgorithm.SPECIAL.equals(rankingSet.algorithm())) { + switch (rankingSet.name()) { + case "BLOGS" -> recalculateBlogsSet(rankingSet); + case "RANK" -> updateDomainRankings(rankingSet); + case "NONE" -> {} + } + } else { + recalculateNornal(rankingSet); + } + } + catch (Exception ex) { + logger.warn("Failed to recalculate ranking set {}", rankingSet.name(), ex); + } + eventLog.logEvent("RANKING-SET-RECALCULATED", rankingSet.name()); } } - private void updateDomainRankings() { - var entry = rankingSettings.ranking; + private void recalculateNornal(DomainRankingSetsService.DomainRankingSet rankingSet) { + String[] domains = rankingSet.domains(); - var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new)); - var ranks = spr.pageRankWithPeripheralNodes(entry.max, () -> new RankingResultHashMapAccumulator(100_000)); + RankingAlgorithm rankingAlgorithm = switch (rankingSet.algorithm()) { + case LINKS_PAGERANK -> new StandardPageRank(linksDomains, domains); + case LINKS_CHEIRANK -> new ReversePageRank(linksDomains, domains); + case ADJACENCY_PAGERANK -> new StandardPageRank(similarityDomains, domains); + case ADJACENCY_CHEIRANK -> new ReversePageRank(similarityDomains, domains); + default -> throw new IllegalStateException("Unexpected value: " + rankingSet.algorithm()); + }; + + var data = rankingAlgorithm.pageRankWithPeripheralNodes(rankingSet.depth(), RankingResultHashSetAccumulator::new); + + var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data); + rankingSets.put(rankingSet.name(), set); + + try { + set.write(); + } + catch (IOException ex) { + logger.warn("Failed to write search set", ex); + } + } + + + + private void recalculateBlogsSet(DomainRankingSetsService.DomainRankingSet rankingSet) throws SQLException, IOException { + TIntList knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG); + + if (knownDomains.isEmpty()) { + // FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe? + domainTypes.reloadDomainsList(DomainTypes.Type.BLOG); + knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG); + } + + synchronized (this) { + var blogSet = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), new IntOpenHashSet(knownDomains.toArray())); + rankingSets.put(rankingSet.name(), blogSet); + blogSet.write(); + } + } + + private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) { + + var spr = new StandardPageRank(similarityDomains, rankingSet.domains()); + var ranks = spr.pageRankWithPeripheralNodes(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth())); synchronized (this) { domainRankings = new DomainRankings(ranks); @@ -141,60 +178,4 @@ public class IndexSearchSetsService { dbUpdateRanks.execute(ranks); } - @SneakyThrows - public void updatePopularDomainsSet() { - var entry = rankingSettings.retro; - - var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new)); - var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultHashSetAccumulator::new); - - synchronized (this) { - popularSet = new RankingSearchSet(SearchSetIdentifier.POPULAR, popularSet.source, data); - popularSet.write(); - } - } - - @SneakyThrows - public void updateSmallWebDomainsSet() { - var entry = rankingSettings.small; - - var rpr = new ReversePageRank(similarityDomains, entry.domains.toArray(String[]::new)); - rpr.setMaxKnownUrls(750); - var data = rpr.pageRankWithPeripheralNodes(entry.max, RankingResultHashSetAccumulator::new); - - synchronized (this) { - smallWebSet = new RankingSearchSet(SearchSetIdentifier.SMALLWEB, smallWebSet.source, data); - smallWebSet.write(); - } - } - - @SneakyThrows - public void updateBlogsSet() { - TIntList knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG); - - if (knownDomains.isEmpty()) { - // FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe? - domainTypes.reloadDomainsList(DomainTypes.Type.BLOG); - knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG); - } - - synchronized (this) { - blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.toArray())); - blogsSet.write(); - } - } - - - @SneakyThrows - public void updateAcademiaDomainsSet() { - var entry = rankingSettings.academia; - - var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new)); - var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultHashSetAccumulator::new); - - synchronized (this) { - academiaSet = new RankingSearchSet(SearchSetIdentifier.ACADEMIA, academiaSet.source, data); - academiaSet.write(); - } - } } diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/RankingSearchSet.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/RankingSearchSet.java index d8dd9ca1..89018493 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/RankingSearchSet.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/RankingSearchSet.java @@ -24,17 +24,17 @@ public class RankingSearchSet implements SearchSet { private final Logger logger = LoggerFactory.getLogger(getClass()); private final IntOpenHashSet set; - public final SearchSetIdentifier identifier; + public final String name; public final Path source; - public RankingSearchSet(SearchSetIdentifier identifier, Path source, IntOpenHashSet set) { - this.identifier = identifier; + public RankingSearchSet(String name, Path source, IntOpenHashSet set) { + this.name = name; this.source = source; this.set = set; } - public RankingSearchSet(SearchSetIdentifier identifier, Path source) throws IOException { - this.identifier = identifier; + public RankingSearchSet(String name, Path source) throws IOException { + this.name = name; this.source = source; if (!Files.exists(source)) { @@ -45,7 +45,7 @@ public class RankingSearchSet implements SearchSet { } if (set.isEmpty()) { - logger.warn("Search set {} is empty", identifier); + logger.warn("Search set {} is empty", name); } } @@ -87,6 +87,6 @@ public class RankingSearchSet implements SearchSet { } public String toString() { - return identifier.toString(); + return name; } } diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/model/RankingSettingsTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/model/RankingSettingsTest.java deleted file mode 100644 index f49d13d8..00000000 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/model/RankingSettingsTest.java +++ /dev/null @@ -1,61 +0,0 @@ -package nu.marginalia.index.model; - -import nu.marginalia.index.config.RankingSettings; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -class RankingSettingsTest { - - Path tempFile; - @BeforeEach - void setUp() throws IOException { - tempFile = Files.createTempFile(getClass().getSimpleName(), ".tmp"); - } - - @AfterEach - void tearDown() throws IOException { - Files.delete(tempFile); - } - - @Test - void testParseRankingSettings() throws IOException { - Files.writeString(tempFile, """ - retro: - max: 50 - domains: - - "www.rep.routledge.com" - - "www.personal.kent.edu" - small: - max: 10 - domains: - - "bikobatanari.art" - - "wiki.xxiivv.com" - academia: - max: 101 - domains: - - "%edu" - standard: - max: 23 - domains: - - "memex.marginalia.nu" - """); - - var settings = RankingSettings.from(tempFile); - assertEquals(List.of("www.rep.routledge.com","www.personal.kent.edu"), settings.retro.domains); - assertEquals(50, settings.retro.max); - assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains); - assertEquals(10, settings.small.max); - assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains); - assertEquals(List.of("%edu"), settings.academia.domains); - assertEquals(List.of("memex.marginalia.nu"), settings.standard.domains); - - } -} \ No newline at end of file From 2fe570554274b417662e0458e1a330c538445893 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 16 Jan 2024 17:10:09 +0100 Subject: [PATCH 4/7] (control) GUI for ranking sets Still missing is some polish, forms don't have proper labels, validation is inconsistent, no error messages, etc. --- .../db/DomainRankingSetsService.java | 10 ++ .../nu/marginalia/control/ControlService.java | 2 + .../java/nu/marginalia/control/Redirects.java | 1 + .../svc/ControlDomainRankingSetsService.java | 98 +++++++++++++++++++ .../templates/control/partials/nav.hdb | 3 +- .../control/sys/domain-ranking-sets.hdb | 38 +++++++ .../control/sys/new-domain-ranking-set.hdb | 74 ++++++++++++++ .../control/sys/update-domain-ranking-set.hdb | 88 +++++++++++++++++ 8 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java create mode 100644 code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb create mode 100644 code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb create mode 100644 code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java index 6045cb76..a977e0de 100644 --- a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java +++ b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java @@ -63,6 +63,9 @@ public class DomainRankingSetsService { stmt.setInt(4, domainRankingSet.depth()); stmt.setString(5, domainRankingSet.definition()); stmt.executeUpdate(); + + if (!conn.getAutoCommit()) + conn.commit(); } catch (SQLException ex) { logger.error("Failed to update domain set", ex); @@ -78,6 +81,9 @@ public class DomainRankingSetsService { { stmt.setString(1, domainRankingSet.name()); stmt.executeUpdate(); + + if (!conn.getAutoCommit()) + conn.commit(); } catch (SQLException ex) { logger.error("Failed to delete domain set", ex); @@ -152,5 +158,9 @@ public class DomainRankingSetsService { .toArray(String[]::new); } + public boolean isSpecial() { + return algorithm() == DomainSetAlgorithm.SPECIAL; + } + } } diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java index efe28e2f..c3a4e88f 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java @@ -53,6 +53,7 @@ public class ControlService extends Service { RandomExplorationService randomExplorationService, DataSetsService dataSetsService, ControlNodeService controlNodeService, + ControlDomainRankingSetsService controlDomainRankingSetsService, ControlActorService controlActorService ) throws IOException { @@ -66,6 +67,7 @@ public class ControlService extends Service { messageQueueService.register(); sysActionsService.register(); dataSetsService.register(); + controlDomainRankingSetsService.register(); // node controlFileStorageService.register(); diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/Redirects.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/Redirects.java index 2ade2779..7750b998 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/Redirects.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/Redirects.java @@ -8,6 +8,7 @@ public class Redirects { public static final HtmlRedirect redirectToOverview = new HtmlRedirect("/"); public static final HtmlRedirect redirectToBlacklist = new HtmlRedirect("/blacklist"); public static final HtmlRedirect redirectToComplaints = new HtmlRedirect("/complaints"); + public static final HtmlRedirect redirectToRankingDataSets = new HtmlRedirect("/domain-ranking-sets"); public static final HtmlRedirect redirectToMessageQueue = new HtmlRedirect("/message-queue"); public static class HtmlRedirect implements ResponseTransformer { diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java new file mode 100644 index 00000000..362ef0f8 --- /dev/null +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java @@ -0,0 +1,98 @@ +package nu.marginalia.control.sys.svc; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.control.ControlRendererFactory; +import nu.marginalia.control.Redirects; +import nu.marginalia.db.DomainRankingSetsService; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Map; + +public class ControlDomainRankingSetsService { + private final HikariDataSource dataSource; + private final ControlRendererFactory rendererFactory; + private final DomainRankingSetsService domainRankingSetsService; + + @Inject + public ControlDomainRankingSetsService(HikariDataSource dataSource, + ControlRendererFactory rendererFactory, + DomainRankingSetsService domainRankingSetsService) { + this.dataSource = dataSource; + this.rendererFactory = rendererFactory; + this.domainRankingSetsService = domainRankingSetsService; + } + + public void register() throws IOException { + var datasetsRenderer = rendererFactory.renderer("control/sys/domain-ranking-sets"); + var updateDatasetRenderer = rendererFactory.renderer("control/sys/update-domain-ranking-set"); + var newDatasetRenderer = rendererFactory.renderer("control/sys/new-domain-ranking-set"); + + Spark.get("/public/domain-ranking-sets", this::rankingSetsModel, datasetsRenderer::render); + Spark.get("/public/domain-ranking-sets/new", (rq,rs) -> new Object(), newDatasetRenderer::render); + Spark.get("/public/domain-ranking-sets/:id", this::rankingSetModel, updateDatasetRenderer::render); + Spark.post("/public/domain-ranking-sets/:id", this::alterSetModel, Redirects.redirectToRankingDataSets); + } + + private Object alterSetModel(Request request, Response response) throws SQLException { + final String act = request.queryParams("act"); + final String id = request.params("id"); + if ("update".equals(act)) { + domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( + id, + request.queryParams("description"), + DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")), + Integer.parseInt(request.queryParams("depth")), + request.queryParams("definition") + )); + return ""; + } + else if ("delete".equals(act)) { + var model = domainRankingSetsService.get(id).orElseThrow(); + if (model.isSpecial()) { + throw new IllegalArgumentException("Cannot delete special ranking set"); + } + domainRankingSetsService.delete(model); + return ""; + } + else if ("create".equals(act)) { + if (domainRankingSetsService.get(request.queryParams("name")).isPresent()) { + throw new IllegalArgumentException("Ranking set with that name already exists"); + } + + domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( + request.queryParams("name"), + request.queryParams("description"), + DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")), + Integer.parseInt(request.queryParams("depth")), + request.queryParams("definition") + )); + return ""; + } + + throw new UnsupportedOperationException(); + } + + private Object rankingSetsModel(Request request, Response response) { + return Map.of("rankingSets", domainRankingSetsService.getAll()); + } + private Object rankingSetModel(Request request, Response response) throws SQLException { + var model = domainRankingSetsService.get(request.params("id")).orElseThrow(); + return Map.of("rankingSet", model, + "selectedAlgo", Map.of( + "special", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.SPECIAL, + "adjacency_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK, + "adjacency_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_PAGERANK, + "links_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_CHEIRANK, + "links_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK) + ); + + + + + } +} diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/nav.hdb b/code/services-core/control-service/src/main/resources/templates/control/partials/nav.hdb index c72fafae..9cadd3be 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/partials/nav.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/partials/nav.hdb @@ -34,7 +34,8 @@ diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb new file mode 100644 index 00000000..79fcfb32 --- /dev/null +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb @@ -0,0 +1,38 @@ + + + + Control Service + {{> control/partials/head-includes }} + + +{{> control/partials/nav}} +
+

Domain Ranking Sets

+
+ Domain ranking sets configure the ranking algorithms used to determine the importance of a domain. +
+ + + + + + + + + {{#each rankingSets}} + + + + + + + {{/each}} +
NameDescriptionAlgorithmDepth
{{name}}{{description}}{{algorithm}}{{depth}}
+ + +
+ +{{> control/partials/foot-includes }} + \ No newline at end of file diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb new file mode 100644 index 00000000..7c086276 --- /dev/null +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb @@ -0,0 +1,74 @@ + + + + Control Service + {{> control/partials/head-includes }} + + +{{> control/partials/nav}} +
+

Create Domain Ranking Set

+
+ + + + + + + + + + + + + + + + + + + +
Name + +
+ The name is how the ranking set is identified in the query parameters, + and also decides the file name of the persisted ranking set definition. Keep it simple. +
+
Algorithm + +
+ + The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY + algorithms use the adjacency graph. CheiRank is a variant of PageRank that uses the reversed graph. + +
+
Description + +
+ This is purely to help keep track of what this ranking set does. +
+
Depth + +
+ Up to this number of domains are ranked, the rest are excluded. +
+
Definition
+ +
+ A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards. + These are used as the origin point for the Personalized PageRank algorithm, and will be considered + the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper. + +
+
+ +
+
+ +{{> control/partials/foot-includes }} + \ No newline at end of file diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb new file mode 100644 index 00000000..3e228c67 --- /dev/null +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb @@ -0,0 +1,88 @@ + + + + Control Service + {{> control/partials/head-includes }} + + +{{> control/partials/nav}} +
+{{#with rankingSet}} +

Domain Ranking Set: {{name}}

+
+ + + + + + + + + + + + + + + + + + + +
Name + {{#if special}}{{/if}} + +
+ The name is how the ranking set is identified in the query parameters, + and also decides the file name of the persisted ranking set definition. Keep it simple. +
+
Algorithm + {{#if special}}{{/if}} + +
+ + The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY + algorithms use the adjacency graph. CheiRank is a variant of PageRank that uses the reversed graph. + +
+
Description + {{#if special}}{{/if}} + +
+ This is purely to help keep track of what this ranking set does. +
+
Depth + +
+ Up to this number of domains are ranked, the rest are excluded. +
+
Definition
+ +
+ A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards. + These are used as the origin point for the Personalized PageRank algorithm, and will be considered + the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper. + +
+
+ +
+
+ + + + + +{{/with}} +
+ +{{> control/partials/foot-includes }} + \ No newline at end of file From 7fd4c092e3e24b3e1ccf977f56b42af17288a7ad Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 17 Jan 2024 10:47:14 +0100 Subject: [PATCH 5/7] (control) Clean up UX and accessibility for new domain ranking sets. The change also adds basic support for error messages in the GUI. --- .../nu/marginalia/control/ControlService.java | 6 +++- .../control/ControlValidationError.java | 15 ++++++++ .../svc/ControlDomainRankingSetsService.java | 21 ++++++++--- .../control/sys/svc/ControlErrorHandler.java | 35 +++++++++++++++++++ .../resources/templates/control/error.hdb | 25 +++++++++++++ .../control/sys/domain-ranking-sets.hdb | 15 ++++++++ .../control/sys/new-domain-ranking-set.hdb | 17 ++++----- .../control/sys/update-domain-ranking-set.hdb | 12 +++---- 8 files changed, 127 insertions(+), 19 deletions(-) create mode 100644 code/services-core/control-service/src/main/java/nu/marginalia/control/ControlValidationError.java create mode 100644 code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java create mode 100644 code/services-core/control-service/src/main/resources/templates/control/error.hdb diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java index c3a4e88f..e4b5d359 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java @@ -54,7 +54,8 @@ public class ControlService extends Service { DataSetsService dataSetsService, ControlNodeService controlNodeService, ControlDomainRankingSetsService controlDomainRankingSetsService, - ControlActorService controlActorService + ControlActorService controlActorService, + ControlErrorHandler errorHandler ) throws IOException { super(params); @@ -81,6 +82,8 @@ public class ControlService extends Service { domainComplaintService.register(); randomExplorationService.register(); + errorHandler.register(); + var indexRenderer = rendererFactory.renderer("control/index"); var eventsRenderer = rendererFactory.renderer("control/sys/events"); var serviceByIdRenderer = rendererFactory.renderer("control/sys/service-by-id"); @@ -106,6 +109,7 @@ public class ControlService extends Service { Spark.get("/public/:resource", this::serveStatic); + monitors.subscribe(this::logMonitorStateChange); controlActorService.startDefaultActors(); diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlValidationError.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlValidationError.java new file mode 100644 index 00000000..29ff7317 --- /dev/null +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlValidationError.java @@ -0,0 +1,15 @@ +package nu.marginalia.control; + +public class ControlValidationError extends RuntimeException { + public final String title; + public final String messageLong; + public final String redirect; + + public ControlValidationError(String title, String messageLong, String redirect) { + super(title); + + this.title = title; + this.messageLong = messageLong; + this.redirect = redirect; + } +} diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java index 362ef0f8..73f19611 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java @@ -3,6 +3,7 @@ package nu.marginalia.control.sys.svc; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.control.ControlRendererFactory; +import nu.marginalia.control.ControlValidationError; import nu.marginalia.control.Redirects; import nu.marginalia.db.DomainRankingSetsService; import spark.Request; @@ -41,6 +42,7 @@ public class ControlDomainRankingSetsService { private Object alterSetModel(Request request, Response response) throws SQLException { final String act = request.queryParams("act"); final String id = request.params("id"); + if ("update".equals(act)) { domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( id, @@ -54,18 +56,26 @@ public class ControlDomainRankingSetsService { else if ("delete".equals(act)) { var model = domainRankingSetsService.get(id).orElseThrow(); if (model.isSpecial()) { - throw new IllegalArgumentException("Cannot delete special ranking set"); + throw new ControlValidationError("Cannot delete special ranking set", + """ + SPECIAL data sets are reserved by the system and can not be deleted. + """, + "/domain-ranking-sets"); } domainRankingSetsService.delete(model); return ""; } else if ("create".equals(act)) { if (domainRankingSetsService.get(request.queryParams("name")).isPresent()) { - throw new IllegalArgumentException("Ranking set with that name already exists"); + throw new ControlValidationError("Ranking set with that name already exists", + """ + Ensure the new data set has a unique name and try again. + """, + "/domain-ranking-sets"); } domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet( - request.queryParams("name"), + request.queryParams("name").toUpperCase(), request.queryParams("description"), DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")), Integer.parseInt(request.queryParams("depth")), @@ -74,7 +84,10 @@ public class ControlDomainRankingSetsService { return ""; } - throw new UnsupportedOperationException(); + throw new ControlValidationError("Unknown action", """ + An unknown action was requested and the system does not understand how to act on it. + """, + "/domain-ranking-sets"); } private Object rankingSetsModel(Request request, Response response) { diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java new file mode 100644 index 00000000..277d84fc --- /dev/null +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java @@ -0,0 +1,35 @@ +package nu.marginalia.control.sys.svc; + +import com.google.inject.Inject; +import nu.marginalia.control.ControlRendererFactory; +import nu.marginalia.control.ControlValidationError; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.util.Map; + +public class ControlErrorHandler { + private final ControlRendererFactory.Renderer renderer; + + @Inject + public ControlErrorHandler(ControlRendererFactory rendererFactory) { + this.renderer = rendererFactory.renderer("control/error"); + } + + public void render(ControlValidationError error, Request request, Response response) { + String text = renderer.render( + Map.of( + "title", error.title, + "messageLong", error.messageLong, + "redirect", error.redirect + ) + ); + + response.body(text); + } + + public void register() { + Spark.exception(ControlValidationError.class, this::render); + } +} diff --git a/code/services-core/control-service/src/main/resources/templates/control/error.hdb b/code/services-core/control-service/src/main/resources/templates/control/error.hdb new file mode 100644 index 00000000..d973163d --- /dev/null +++ b/code/services-core/control-service/src/main/resources/templates/control/error.hdb @@ -0,0 +1,25 @@ + + + + Control Service: Error + + + {{> control/partials/head-includes }} + + +{{> control/partials/nav}} +
+

Error: {{title}}

+
+

{{messageLong}}

+ Go back +
+
+ +{{> control/partials/foot-includes }} + + diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb index 79fcfb32..ba395319 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb @@ -32,6 +32,21 @@ + + +
+

Several reserved ranking sets are available for use in the query parameters.

+
+
NONE
Placeholder for no restriction on the domains returned. + Does nothing, and exists only to prevent a new ranking + set from being created with this name.
+
RANK
Used to calculate the domain ranking for a given domain. + This affects the order they are stored in the index, and increases the odds they'll + even be considered within the time restrictions of the query.
+
BLOGS
Returns a fixed list of domains, configurable in Datasets. + Changes to this list will not be reflected in the index until the next time the index is rebuilt.
+
+
{{> control/partials/foot-includes }} diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb index 7c086276..bd68e0c4 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb @@ -11,17 +11,18 @@
- + - + - + - + - +
Name - +
- The name is how the ranking set is identified in the query parameters, + Must be all letters. + The name is how the ranking set is identified in the query parameters, and also decides the file name of the persisted ranking set definition. Keep it simple.
Algorithm
Description
@@ -47,15 +48,15 @@
Depth
- Up to this number of domains are ranked, the rest are excluded. + Number. Up to this number of domains are ranked, the rest are excluded.
Definition
diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb index 3e228c67..c36b9a5f 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb @@ -12,7 +12,7 @@ - + - + - + - + - +
Name {{#if special}}{{/if}} @@ -23,7 +23,7 @@
Algorithm {{#if special}}{{/if}}
Description {{#if special}}{{/if}} @@ -54,15 +54,15 @@
Depth
- Up to this number of domains are ranked, the rest are excluded. + Number. Up to this number of domains are ranked, the rest are excluded.
Definition
From 304d4c9acf2f9b4ffeb2c6cb791f12253541ea89 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 17 Jan 2024 10:56:16 +0100 Subject: [PATCH 6/7] (control) Fix result ordering in the file storage listing view In some scenarios, such as when restoring storage items from json-manifest on db failure, the file storage view would present the items in a non-chronological order. Added a sort() operation to mitigate this. --- .../control/node/svc/ControlNodeService.java | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeService.java index 6883d70a..66a2b485 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeService.java @@ -360,22 +360,29 @@ public class ControlNodeService { } private List makeFileStorageBaseWithStorage(List storageIds) throws SQLException { - Map fileStorageBaseByBaseId = new HashMap<>(); - Map> fileStoragByBaseId = new HashMap<>(); + Map> fileStorageByBaseId = new HashMap<>(); for (var id : storageIds) { var storage = fileStorageService.getStorage(id); fileStorageBaseByBaseId.computeIfAbsent(storage.base().id(), k -> storage.base()); - fileStoragByBaseId.computeIfAbsent(storage.base().id(), k -> new ArrayList<>()).add(new FileStorageWithActions(storage)); + fileStorageByBaseId.computeIfAbsent(storage.base().id(), k -> new ArrayList<>()).add(new FileStorageWithActions(storage)); } List result = new ArrayList<>(); - for (var baseId : fileStorageBaseByBaseId.keySet()) { - result.add(new FileStorageBaseWithStorage(fileStorageBaseByBaseId.get(baseId), - fileStoragByBaseId.get(baseId) - )); + for (var baseId : fileStorageBaseByBaseId.keySet()) { + var base = fileStorageBaseByBaseId.get(baseId); + var items = fileStorageByBaseId.get(baseId); + + // Sort by timestamp, then by relPath + // this ensures that the newest file is listed last + items.sort(Comparator + .comparing(FileStorageWithActions::getTimestamp) + .thenComparing(FileStorageWithActions::getRelPath) + ); + + result.add(new FileStorageBaseWithStorage(base, items)); } return result; From 41cdb8f71bf1e48e73b2bfd74072975bb6a188b3 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 17 Jan 2024 18:21:09 +0100 Subject: [PATCH 7/7] (control) Fix broken update button in the update-domain-ranking-set form id property was on the wrong element. --- .../templates/control/sys/update-domain-ranking-set.hdb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb index c36b9a5f..c8c7ae9d 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb @@ -9,8 +9,8 @@
{{#with rankingSet}}

Domain Ranking Set: {{name}}

- - + +
@@ -56,7 +56,7 @@
- +
Number. Up to this number of domains are ranked, the rest are excluded.