(db) Add a new configuration object 'domain ranking set' for storing ranking parameters
This commit is contained in:
parent
5a62b3058f
commit
36ad4c7466
3 changed files with 252 additions and 0 deletions
|
@ -0,0 +1,156 @@
|
|||
package nu.marginalia.db;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.With;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
public class DomainRankingSetsService {
|
||||
private static final Logger logger = LoggerFactory.getLogger(DomainRankingSetsService.class);
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
@Inject
|
||||
public DomainRankingSetsService(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public Optional<DomainRankingSet> get(String name) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
||||
FROM CONF_DOMAIN_RANKING_SET
|
||||
WHERE NAME = ?
|
||||
""")) {
|
||||
stmt.setString(1, name);
|
||||
var rs = stmt.executeQuery();
|
||||
|
||||
if (!rs.next()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.of(new DomainRankingSet(
|
||||
rs.getString("NAME"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
||||
rs.getInt("DEPTH"),
|
||||
rs.getString("DEFINITION")
|
||||
));
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to get domain set", ex);
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public void upsert(DomainRankingSet domainRankingSet) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, domainRankingSet.name());
|
||||
stmt.setString(2, domainRankingSet.description());
|
||||
stmt.setString(3, domainRankingSet.algorithm().name());
|
||||
stmt.setInt(4, domainRankingSet.depth());
|
||||
stmt.setString(5, domainRankingSet.definition());
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to update domain set", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public void delete(DomainRankingSet domainRankingSet) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
DELETE FROM CONF_DOMAIN_RANKING_SET
|
||||
WHERE NAME = ?
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, domainRankingSet.name());
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to delete domain set", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public List<DomainRankingSet> getAll() {
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
||||
FROM CONF_DOMAIN_RANKING_SET
|
||||
""")) {
|
||||
var rs = stmt.executeQuery();
|
||||
List<DomainRankingSet> ret = new ArrayList<>();
|
||||
|
||||
while (rs.next()) {
|
||||
ret.add(
|
||||
new DomainRankingSet(
|
||||
rs.getString("NAME"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
||||
rs.getInt("DEPTH"),
|
||||
rs.getString("DEFINITION"))
|
||||
);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to get domain set", ex);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
public enum DomainSetAlgorithm {
|
||||
/** Use link graph, do a pagerank */
|
||||
LINKS_PAGERANK,
|
||||
/** Use link graph, do a cheirank */
|
||||
LINKS_CHEIRANK,
|
||||
/** Use adjacency graph, do a pagerank */
|
||||
ADJACENCY_PAGERANK,
|
||||
/** Use adjacency graph, do a cheirank */
|
||||
ADJACENCY_CHEIRANK,
|
||||
/** For reserved names. Use special algorithm, function of name */
|
||||
SPECIAL
|
||||
};
|
||||
|
||||
/** Defines a domain ranking set, parameters for the ranking algorithms.
|
||||
*
|
||||
* @param name Key and name of the set
|
||||
* @param description Human-readable description
|
||||
* @param algorithm Algorithm to use
|
||||
* @param depth Depth of the algorithm
|
||||
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
||||
* */
|
||||
@With
|
||||
public record DomainRankingSet(String name,
|
||||
String description,
|
||||
DomainSetAlgorithm algorithm,
|
||||
int depth,
|
||||
String definition)
|
||||
{
|
||||
|
||||
public Path fileName(Path base) {
|
||||
return base.resolve(name().toLowerCase() + ".dat");
|
||||
}
|
||||
public String[] domains() {
|
||||
return Arrays.stream(definition().split("\n+"))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isBlank())
|
||||
.filter(s -> !s.startsWith("#"))
|
||||
.toArray(String[]::new);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
CREATE TABLE IF NOT EXISTS CONF_DOMAIN_RANKING_SET (
|
||||
NAME VARCHAR(255) PRIMARY KEY COLLATE utf8mb4_unicode_ci,
|
||||
DESCRIPTION VARCHAR(255) NOT NULL,
|
||||
ALGORITHM VARCHAR(255) NOT NULL,
|
||||
DEPTH INT NOT NULL,
|
||||
DEFINITION LONGTEXT NOT NULL
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||
|
||||
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('NONE', 'Reserved: No Ranking Algorithm', 'SPECIAL', 50000, '');
|
||||
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('BLOGS', 'Reserved: Blogs Set', 'SPECIAL', 50000, '');
|
||||
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('RANK', 'Reserved: Main Domain Ranking', 'SPECIAL', 50000, '');
|
|
@ -0,0 +1,84 @@
|
|||
package nu.marginalia.db;
|
||||
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@Testcontainers
|
||||
@Tag("slow")
|
||||
class DomainRankingSetsServiceTest {
|
||||
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
static void tearDownAll() {
|
||||
dataSource.close();
|
||||
mariaDBContainer.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScenarios() throws Exception {
|
||||
var service = new DomainRankingSetsService(dataSource);
|
||||
|
||||
// Clean up default values
|
||||
service.get("BLOGS").ifPresent(service::delete);
|
||||
service.get("NONE").ifPresent(service::delete);
|
||||
|
||||
var newValue = new DomainRankingSetsService.DomainRankingSet(
|
||||
"test",
|
||||
"Test domain set",
|
||||
DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
|
||||
10,
|
||||
"test\\.nu"
|
||||
);
|
||||
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
|
||||
"test2",
|
||||
"Test domain set 2",
|
||||
DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK,
|
||||
20,
|
||||
"test\\.nu 2"
|
||||
);
|
||||
service.upsert(newValue);
|
||||
service.upsert(newValue2);
|
||||
assertEquals(newValue, service.get("test").orElseThrow());
|
||||
|
||||
var allValues = service.getAll();
|
||||
assertEquals(2, allValues.size());
|
||||
assertTrue(allValues.contains(newValue));
|
||||
assertTrue(allValues.contains(newValue2));
|
||||
|
||||
service.delete(newValue);
|
||||
assertFalse(service.get("test").isPresent());
|
||||
|
||||
service.delete(newValue2);
|
||||
assertFalse(service.get("test2").isPresent());
|
||||
|
||||
allValues = service.getAll();
|
||||
assertEquals(0, allValues.size());
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue