(db) Add a new configuration object 'domain ranking set' for storing ranking parameters
This commit is contained in:
parent
5a62b3058f
commit
36ad4c7466
@ -0,0 +1,156 @@
|
|||||||
|
package nu.marginalia.db;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import lombok.With;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class DomainRankingSetsService {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(DomainRankingSetsService.class);
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DomainRankingSetsService(HikariDataSource dataSource) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<DomainRankingSet> get(String name) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
||||||
|
FROM CONF_DOMAIN_RANKING_SET
|
||||||
|
WHERE NAME = ?
|
||||||
|
""")) {
|
||||||
|
stmt.setString(1, name);
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
|
||||||
|
if (!rs.next()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional.of(new DomainRankingSet(
|
||||||
|
rs.getString("NAME"),
|
||||||
|
rs.getString("DESCRIPTION"),
|
||||||
|
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
||||||
|
rs.getInt("DEPTH"),
|
||||||
|
rs.getString("DEFINITION")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Failed to get domain set", ex);
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void upsert(DomainRankingSet domainRankingSet) {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
stmt.setString(1, domainRankingSet.name());
|
||||||
|
stmt.setString(2, domainRankingSet.description());
|
||||||
|
stmt.setString(3, domainRankingSet.algorithm().name());
|
||||||
|
stmt.setInt(4, domainRankingSet.depth());
|
||||||
|
stmt.setString(5, domainRankingSet.definition());
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Failed to update domain set", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void delete(DomainRankingSet domainRankingSet) {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
DELETE FROM CONF_DOMAIN_RANKING_SET
|
||||||
|
WHERE NAME = ?
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
stmt.setString(1, domainRankingSet.name());
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Failed to delete domain set", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<DomainRankingSet> getAll() {
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
|
||||||
|
FROM CONF_DOMAIN_RANKING_SET
|
||||||
|
""")) {
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
List<DomainRankingSet> ret = new ArrayList<>();
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
ret.add(
|
||||||
|
new DomainRankingSet(
|
||||||
|
rs.getString("NAME"),
|
||||||
|
rs.getString("DESCRIPTION"),
|
||||||
|
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
|
||||||
|
rs.getInt("DEPTH"),
|
||||||
|
rs.getString("DEFINITION"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Failed to get domain set", ex);
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum DomainSetAlgorithm {
|
||||||
|
/** Use link graph, do a pagerank */
|
||||||
|
LINKS_PAGERANK,
|
||||||
|
/** Use link graph, do a cheirank */
|
||||||
|
LINKS_CHEIRANK,
|
||||||
|
/** Use adjacency graph, do a pagerank */
|
||||||
|
ADJACENCY_PAGERANK,
|
||||||
|
/** Use adjacency graph, do a cheirank */
|
||||||
|
ADJACENCY_CHEIRANK,
|
||||||
|
/** For reserved names. Use special algorithm, function of name */
|
||||||
|
SPECIAL
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Defines a domain ranking set, parameters for the ranking algorithms.
|
||||||
|
*
|
||||||
|
* @param name Key and name of the set
|
||||||
|
* @param description Human-readable description
|
||||||
|
* @param algorithm Algorithm to use
|
||||||
|
* @param depth Depth of the algorithm
|
||||||
|
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
||||||
|
* */
|
||||||
|
@With
|
||||||
|
public record DomainRankingSet(String name,
|
||||||
|
String description,
|
||||||
|
DomainSetAlgorithm algorithm,
|
||||||
|
int depth,
|
||||||
|
String definition)
|
||||||
|
{
|
||||||
|
|
||||||
|
public Path fileName(Path base) {
|
||||||
|
return base.resolve(name().toLowerCase() + ".dat");
|
||||||
|
}
|
||||||
|
public String[] domains() {
|
||||||
|
return Arrays.stream(definition().split("\n+"))
|
||||||
|
.map(String::trim)
|
||||||
|
.filter(s -> !s.isBlank())
|
||||||
|
.filter(s -> !s.startsWith("#"))
|
||||||
|
.toArray(String[]::new);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS CONF_DOMAIN_RANKING_SET (
|
||||||
|
NAME VARCHAR(255) PRIMARY KEY COLLATE utf8mb4_unicode_ci,
|
||||||
|
DESCRIPTION VARCHAR(255) NOT NULL,
|
||||||
|
ALGORITHM VARCHAR(255) NOT NULL,
|
||||||
|
DEPTH INT NOT NULL,
|
||||||
|
DEFINITION LONGTEXT NOT NULL
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
|
|
||||||
|
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('NONE', 'Reserved: No Ranking Algorithm', 'SPECIAL', 50000, '');
|
||||||
|
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('BLOGS', 'Reserved: Blogs Set', 'SPECIAL', 50000, '');
|
||||||
|
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('RANK', 'Reserved: Main Domain Ranking', 'SPECIAL', 50000, '');
|
@ -0,0 +1,84 @@
|
|||||||
|
package nu.marginalia.db;
|
||||||
|
|
||||||
|
import com.zaxxer.hikari.HikariConfig;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.test.TestMigrationLoader;
|
||||||
|
import org.junit.jupiter.api.*;
|
||||||
|
import org.testcontainers.containers.MariaDBContainer;
|
||||||
|
import org.testcontainers.junit.jupiter.Container;
|
||||||
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
@Testcontainers
|
||||||
|
@Tag("slow")
|
||||||
|
class DomainRankingSetsServiceTest {
|
||||||
|
|
||||||
|
@Container
|
||||||
|
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||||
|
.withDatabaseName("WMSA_prod")
|
||||||
|
.withUsername("wmsa")
|
||||||
|
.withPassword("wmsa")
|
||||||
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
|
static HikariDataSource dataSource;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setup() {
|
||||||
|
HikariConfig config = new HikariConfig();
|
||||||
|
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||||
|
config.setUsername("wmsa");
|
||||||
|
config.setPassword("wmsa");
|
||||||
|
|
||||||
|
dataSource = new HikariDataSource(config);
|
||||||
|
|
||||||
|
TestMigrationLoader.flywayMigration(dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterAll
|
||||||
|
static void tearDownAll() {
|
||||||
|
dataSource.close();
|
||||||
|
mariaDBContainer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testScenarios() throws Exception {
|
||||||
|
var service = new DomainRankingSetsService(dataSource);
|
||||||
|
|
||||||
|
// Clean up default values
|
||||||
|
service.get("BLOGS").ifPresent(service::delete);
|
||||||
|
service.get("NONE").ifPresent(service::delete);
|
||||||
|
|
||||||
|
var newValue = new DomainRankingSetsService.DomainRankingSet(
|
||||||
|
"test",
|
||||||
|
"Test domain set",
|
||||||
|
DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
|
||||||
|
10,
|
||||||
|
"test\\.nu"
|
||||||
|
);
|
||||||
|
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
|
||||||
|
"test2",
|
||||||
|
"Test domain set 2",
|
||||||
|
DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK,
|
||||||
|
20,
|
||||||
|
"test\\.nu 2"
|
||||||
|
);
|
||||||
|
service.upsert(newValue);
|
||||||
|
service.upsert(newValue2);
|
||||||
|
assertEquals(newValue, service.get("test").orElseThrow());
|
||||||
|
|
||||||
|
var allValues = service.getAll();
|
||||||
|
assertEquals(2, allValues.size());
|
||||||
|
assertTrue(allValues.contains(newValue));
|
||||||
|
assertTrue(allValues.contains(newValue2));
|
||||||
|
|
||||||
|
service.delete(newValue);
|
||||||
|
assertFalse(service.get("test").isPresent());
|
||||||
|
|
||||||
|
service.delete(newValue2);
|
||||||
|
assertFalse(service.get("test2").isPresent());
|
||||||
|
|
||||||
|
allValues = service.getAll();
|
||||||
|
assertEquals(0, allValues.size());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user