(db) Add a new configuration object 'domain ranking set' for storing ranking parameters

This commit is contained in:
Viktor Lofgren 2024-01-16 11:17:40 +01:00
parent 5a62b3058f
commit 36ad4c7466
3 changed files with 252 additions and 0 deletions

View File

@ -0,0 +1,156 @@
package nu.marginalia.db;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import lombok.With;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
public class DomainRankingSetsService {
private static final Logger logger = LoggerFactory.getLogger(DomainRankingSetsService.class);
private final HikariDataSource dataSource;
@Inject
public DomainRankingSetsService(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
public Optional<DomainRankingSet> get(String name) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
FROM CONF_DOMAIN_RANKING_SET
WHERE NAME = ?
""")) {
stmt.setString(1, name);
var rs = stmt.executeQuery();
if (!rs.next()) {
return Optional.empty();
}
return Optional.of(new DomainRankingSet(
rs.getString("NAME"),
rs.getString("DESCRIPTION"),
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
rs.getInt("DEPTH"),
rs.getString("DEFINITION")
));
}
catch (SQLException ex) {
logger.error("Failed to get domain set", ex);
return Optional.empty();
}
}
public void upsert(DomainRankingSet domainRankingSet) {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION)
VALUES (?, ?, ?, ?, ?)
"""))
{
stmt.setString(1, domainRankingSet.name());
stmt.setString(2, domainRankingSet.description());
stmt.setString(3, domainRankingSet.algorithm().name());
stmt.setInt(4, domainRankingSet.depth());
stmt.setString(5, domainRankingSet.definition());
stmt.executeUpdate();
}
catch (SQLException ex) {
logger.error("Failed to update domain set", ex);
}
}
public void delete(DomainRankingSet domainRankingSet) {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
DELETE FROM CONF_DOMAIN_RANKING_SET
WHERE NAME = ?
"""))
{
stmt.setString(1, domainRankingSet.name());
stmt.executeUpdate();
}
catch (SQLException ex) {
logger.error("Failed to delete domain set", ex);
}
}
public List<DomainRankingSet> getAll() {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
FROM CONF_DOMAIN_RANKING_SET
""")) {
var rs = stmt.executeQuery();
List<DomainRankingSet> ret = new ArrayList<>();
while (rs.next()) {
ret.add(
new DomainRankingSet(
rs.getString("NAME"),
rs.getString("DESCRIPTION"),
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
rs.getInt("DEPTH"),
rs.getString("DEFINITION"))
);
}
return ret;
}
catch (SQLException ex) {
logger.error("Failed to get domain set", ex);
return List.of();
}
}
public enum DomainSetAlgorithm {
/** Use link graph, do a pagerank */
LINKS_PAGERANK,
/** Use link graph, do a cheirank */
LINKS_CHEIRANK,
/** Use adjacency graph, do a pagerank */
ADJACENCY_PAGERANK,
/** Use adjacency graph, do a cheirank */
ADJACENCY_CHEIRANK,
/** For reserved names. Use special algorithm, function of name */
SPECIAL
};
/** Defines a domain ranking set, parameters for the ranking algorithms.
*
* @param name Key and name of the set
* @param description Human-readable description
* @param algorithm Algorithm to use
* @param depth Depth of the algorithm
* @param definition Definition of the set, typically a list of domains or globs for domain-names
* */
@With
public record DomainRankingSet(String name,
String description,
DomainSetAlgorithm algorithm,
int depth,
String definition)
{
public Path fileName(Path base) {
return base.resolve(name().toLowerCase() + ".dat");
}
public String[] domains() {
return Arrays.stream(definition().split("\n+"))
.map(String::trim)
.filter(s -> !s.isBlank())
.filter(s -> !s.startsWith("#"))
.toArray(String[]::new);
}
}
}

View File

@ -0,0 +1,12 @@
CREATE TABLE IF NOT EXISTS CONF_DOMAIN_RANKING_SET (
NAME VARCHAR(255) PRIMARY KEY COLLATE utf8mb4_unicode_ci,
DESCRIPTION VARCHAR(255) NOT NULL,
ALGORITHM VARCHAR(255) NOT NULL,
DEPTH INT NOT NULL,
DEFINITION LONGTEXT NOT NULL
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('NONE', 'Reserved: No Ranking Algorithm', 'SPECIAL', 50000, '');
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('BLOGS', 'Reserved: Blogs Set', 'SPECIAL', 50000, '');
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('RANK', 'Reserved: Main Domain Ranking', 'SPECIAL', 50000, '');

View File

@ -0,0 +1,84 @@
package nu.marginalia.db;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.test.TestMigrationLoader;
import org.junit.jupiter.api.*;
import org.testcontainers.containers.MariaDBContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import static org.junit.jupiter.api.Assertions.*;
@Testcontainers
@Tag("slow")
class DomainRankingSetsServiceTest {
@Container
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withNetworkAliases("mariadb");
static HikariDataSource dataSource;
@BeforeAll
public static void setup() {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
config.setUsername("wmsa");
config.setPassword("wmsa");
dataSource = new HikariDataSource(config);
TestMigrationLoader.flywayMigration(dataSource);
}
@AfterAll
static void tearDownAll() {
dataSource.close();
mariaDBContainer.close();
}
@Test
public void testScenarios() throws Exception {
var service = new DomainRankingSetsService(dataSource);
// Clean up default values
service.get("BLOGS").ifPresent(service::delete);
service.get("NONE").ifPresent(service::delete);
var newValue = new DomainRankingSetsService.DomainRankingSet(
"test",
"Test domain set",
DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
10,
"test\\.nu"
);
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
"test2",
"Test domain set 2",
DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK,
20,
"test\\.nu 2"
);
service.upsert(newValue);
service.upsert(newValue2);
assertEquals(newValue, service.get("test").orElseThrow());
var allValues = service.getAll();
assertEquals(2, allValues.size());
assertTrue(allValues.contains(newValue));
assertTrue(allValues.contains(newValue2));
service.delete(newValue);
assertFalse(service.get("test").isPresent());
service.delete(newValue2);
assertFalse(service.get("test2").isPresent());
allValues = service.getAll();
assertEquals(0, allValues.size());
}
}