From 36ad4c74663859743aa56934a9e456f4ef1b8200 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 16 Jan 2024 11:17:40 +0100 Subject: [PATCH] (db) Add a new configuration object 'domain ranking set' for storing ranking parameters --- .../db/DomainRankingSetsService.java | 156 ++++++++++++++++++ .../db/migration/V24_01_0_002__domain_set.sql | 12 ++ .../db/DomainRankingSetsServiceTest.java | 84 ++++++++++ 3 files changed, 252 insertions(+) create mode 100644 code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java create mode 100644 code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql create mode 100644 code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java new file mode 100644 index 00000000..6045cb76 --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java @@ -0,0 +1,156 @@ +package nu.marginalia.db; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import lombok.With; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +public class DomainRankingSetsService { + private static final Logger logger = LoggerFactory.getLogger(DomainRankingSetsService.class); + private final HikariDataSource dataSource; + + @Inject + public DomainRankingSetsService(HikariDataSource dataSource) { + this.dataSource = dataSource; + } + + public Optional get(String name) throws SQLException { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION + FROM CONF_DOMAIN_RANKING_SET + WHERE NAME = ? + """)) { + stmt.setString(1, name); + var rs = stmt.executeQuery(); + + if (!rs.next()) { + return Optional.empty(); + } + + return Optional.of(new DomainRankingSet( + rs.getString("NAME"), + rs.getString("DESCRIPTION"), + DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")), + rs.getInt("DEPTH"), + rs.getString("DEFINITION") + )); + } + catch (SQLException ex) { + logger.error("Failed to get domain set", ex); + return Optional.empty(); + } + } + + public void upsert(DomainRankingSet domainRankingSet) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) + VALUES (?, ?, ?, ?, ?) + """)) + { + stmt.setString(1, domainRankingSet.name()); + stmt.setString(2, domainRankingSet.description()); + stmt.setString(3, domainRankingSet.algorithm().name()); + stmt.setInt(4, domainRankingSet.depth()); + stmt.setString(5, domainRankingSet.definition()); + stmt.executeUpdate(); + } + catch (SQLException ex) { + logger.error("Failed to update domain set", ex); + } + } + + public void delete(DomainRankingSet domainRankingSet) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + DELETE FROM CONF_DOMAIN_RANKING_SET + WHERE NAME = ? + """)) + { + stmt.setString(1, domainRankingSet.name()); + stmt.executeUpdate(); + } + catch (SQLException ex) { + logger.error("Failed to delete domain set", ex); + } + } + + public List getAll() { + + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION + FROM CONF_DOMAIN_RANKING_SET + """)) { + var rs = stmt.executeQuery(); + List ret = new ArrayList<>(); + + while (rs.next()) { + ret.add( + new DomainRankingSet( + rs.getString("NAME"), + rs.getString("DESCRIPTION"), + DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")), + rs.getInt("DEPTH"), + rs.getString("DEFINITION")) + ); + } + return ret; + } + catch (SQLException ex) { + logger.error("Failed to get domain set", ex); + return List.of(); + } + } + + public enum DomainSetAlgorithm { + /** Use link graph, do a pagerank */ + LINKS_PAGERANK, + /** Use link graph, do a cheirank */ + LINKS_CHEIRANK, + /** Use adjacency graph, do a pagerank */ + ADJACENCY_PAGERANK, + /** Use adjacency graph, do a cheirank */ + ADJACENCY_CHEIRANK, + /** For reserved names. Use special algorithm, function of name */ + SPECIAL + }; + + /** Defines a domain ranking set, parameters for the ranking algorithms. + * + * @param name Key and name of the set + * @param description Human-readable description + * @param algorithm Algorithm to use + * @param depth Depth of the algorithm + * @param definition Definition of the set, typically a list of domains or globs for domain-names + * */ + @With + public record DomainRankingSet(String name, + String description, + DomainSetAlgorithm algorithm, + int depth, + String definition) + { + + public Path fileName(Path base) { + return base.resolve(name().toLowerCase() + ".dat"); + } + public String[] domains() { + return Arrays.stream(definition().split("\n+")) + .map(String::trim) + .filter(s -> !s.isBlank()) + .filter(s -> !s.startsWith("#")) + .toArray(String[]::new); + } + + } +} diff --git a/code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql b/code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql new file mode 100644 index 00000000..73912c6b --- /dev/null +++ b/code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql @@ -0,0 +1,12 @@ + +CREATE TABLE IF NOT EXISTS CONF_DOMAIN_RANKING_SET ( + NAME VARCHAR(255) PRIMARY KEY COLLATE utf8mb4_unicode_ci, + DESCRIPTION VARCHAR(255) NOT NULL, + ALGORITHM VARCHAR(255) NOT NULL, + DEPTH INT NOT NULL, + DEFINITION LONGTEXT NOT NULL +) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + +INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('NONE', 'Reserved: No Ranking Algorithm', 'SPECIAL', 50000, ''); +INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('BLOGS', 'Reserved: Blogs Set', 'SPECIAL', 50000, ''); +INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('RANK', 'Reserved: Main Domain Ranking', 'SPECIAL', 50000, ''); \ No newline at end of file diff --git a/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java b/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java new file mode 100644 index 00000000..999f1a7e --- /dev/null +++ b/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java @@ -0,0 +1,84 @@ +package nu.marginalia.db; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.test.TestMigrationLoader; +import org.junit.jupiter.api.*; +import org.testcontainers.containers.MariaDBContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import static org.junit.jupiter.api.Assertions.*; + +@Testcontainers +@Tag("slow") +class DomainRankingSetsServiceTest { + + @Container + static MariaDBContainer mariaDBContainer = new MariaDBContainer<>("mariadb") + .withDatabaseName("WMSA_prod") + .withUsername("wmsa") + .withPassword("wmsa") + .withNetworkAliases("mariadb"); + + static HikariDataSource dataSource; + + @BeforeAll + public static void setup() { + HikariConfig config = new HikariConfig(); + config.setJdbcUrl(mariaDBContainer.getJdbcUrl()); + config.setUsername("wmsa"); + config.setPassword("wmsa"); + + dataSource = new HikariDataSource(config); + + TestMigrationLoader.flywayMigration(dataSource); + } + + @AfterAll + static void tearDownAll() { + dataSource.close(); + mariaDBContainer.close(); + } + + @Test + public void testScenarios() throws Exception { + var service = new DomainRankingSetsService(dataSource); + + // Clean up default values + service.get("BLOGS").ifPresent(service::delete); + service.get("NONE").ifPresent(service::delete); + + var newValue = new DomainRankingSetsService.DomainRankingSet( + "test", + "Test domain set", + DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK, + 10, + "test\\.nu" + ); + var newValue2 = new DomainRankingSetsService.DomainRankingSet( + "test2", + "Test domain set 2", + DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK, + 20, + "test\\.nu 2" + ); + service.upsert(newValue); + service.upsert(newValue2); + assertEquals(newValue, service.get("test").orElseThrow()); + + var allValues = service.getAll(); + assertEquals(2, allValues.size()); + assertTrue(allValues.contains(newValue)); + assertTrue(allValues.contains(newValue2)); + + service.delete(newValue); + assertFalse(service.get("test").isPresent()); + + service.delete(newValue2); + assertFalse(service.get("test2").isPresent()); + + allValues = service.getAll(); + assertEquals(0, allValues.size()); + } +} \ No newline at end of file