Merge pull request #73 from MarginaliaSearch/configurable-search-sets

(WIP) Configurable domain ranking sets
This commit is contained in:
Viktor 2024-01-17 21:12:20 +01:00 committed by GitHub
commit ca80957143
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 818 additions and 258 deletions

View file

@ -15,7 +15,7 @@ public class SearchSpecification {
/** If present and not empty, limit the search to these domain IDs */
public List<Integer> domains;
public SearchSetIdentifier searchSetIdentifier;
public String searchSetIdentifier;
public final String humanQuery;

View file

@ -31,7 +31,7 @@ public class QueryProtobufCodec {
builder.addSubqueries(IndexProtobufCodec.convertSearchSubquery(subquery));
}
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier.name());
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
builder.setHumanQuery(request.getHumanQuery());
builder.setQuality(convertSpecLimit(query.specs.quality));
@ -62,7 +62,7 @@ public class QueryProtobufCodec {
convertSpecLimit(request.getDomainCount()),
request.getDomainIdsList(),
IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()),
SearchSetIdentifier.valueOf(request.getSearchSetIdentifier()));
request.getSearchSetIdentifier());
}
@ -133,7 +133,7 @@ public class QueryProtobufCodec {
return new SearchSpecification(
subqueries,
specs.getDomainsList(),
SearchSetIdentifier.valueOf(specs.getSearchSetIdentifier()),
specs.getSearchSetIdentifier(),
specs.getHumanQuery(),
IndexProtobufCodec.convertSpecLimit(specs.getQuality()),
IndexProtobufCodec.convertSpecLimit(specs.getYear()),
@ -159,7 +159,7 @@ public class QueryProtobufCodec {
.setYear(convertSpecLimit(params.year()))
.setSize(convertSpecLimit(params.size()))
.setRank(convertSpecLimit(params.rank()))
.setSearchSetIdentifier(params.identifier().name());
.setSearchSetIdentifier(params.identifier());
if (params.nearDomain() != null)
builder.setNearDomain(params.nearDomain());

View file

@ -23,10 +23,10 @@ public record QueryParams(
SpecificationLimit domainCount,
List<Integer> domainIds,
QueryLimits limits,
SearchSetIdentifier identifier
String identifier
)
{
public QueryParams(String query, QueryLimits limits, SearchSetIdentifier identifier) {
public QueryParams(String query, QueryLimits limits, String identifier) {
this(query, null,
List.of(),
List.of(),

View file

@ -0,0 +1,166 @@
package nu.marginalia.db;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import lombok.With;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
public class DomainRankingSetsService {
private static final Logger logger = LoggerFactory.getLogger(DomainRankingSetsService.class);
private final HikariDataSource dataSource;
@Inject
public DomainRankingSetsService(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
public Optional<DomainRankingSet> get(String name) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
FROM CONF_DOMAIN_RANKING_SET
WHERE NAME = ?
""")) {
stmt.setString(1, name);
var rs = stmt.executeQuery();
if (!rs.next()) {
return Optional.empty();
}
return Optional.of(new DomainRankingSet(
rs.getString("NAME"),
rs.getString("DESCRIPTION"),
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
rs.getInt("DEPTH"),
rs.getString("DEFINITION")
));
}
catch (SQLException ex) {
logger.error("Failed to get domain set", ex);
return Optional.empty();
}
}
public void upsert(DomainRankingSet domainRankingSet) {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION)
VALUES (?, ?, ?, ?, ?)
"""))
{
stmt.setString(1, domainRankingSet.name());
stmt.setString(2, domainRankingSet.description());
stmt.setString(3, domainRankingSet.algorithm().name());
stmt.setInt(4, domainRankingSet.depth());
stmt.setString(5, domainRankingSet.definition());
stmt.executeUpdate();
if (!conn.getAutoCommit())
conn.commit();
}
catch (SQLException ex) {
logger.error("Failed to update domain set", ex);
}
}
public void delete(DomainRankingSet domainRankingSet) {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
DELETE FROM CONF_DOMAIN_RANKING_SET
WHERE NAME = ?
"""))
{
stmt.setString(1, domainRankingSet.name());
stmt.executeUpdate();
if (!conn.getAutoCommit())
conn.commit();
}
catch (SQLException ex) {
logger.error("Failed to delete domain set", ex);
}
}
public List<DomainRankingSet> getAll() {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION
FROM CONF_DOMAIN_RANKING_SET
""")) {
var rs = stmt.executeQuery();
List<DomainRankingSet> ret = new ArrayList<>();
while (rs.next()) {
ret.add(
new DomainRankingSet(
rs.getString("NAME"),
rs.getString("DESCRIPTION"),
DomainSetAlgorithm.valueOf(rs.getString("ALGORITHM")),
rs.getInt("DEPTH"),
rs.getString("DEFINITION"))
);
}
return ret;
}
catch (SQLException ex) {
logger.error("Failed to get domain set", ex);
return List.of();
}
}
public enum DomainSetAlgorithm {
/** Use link graph, do a pagerank */
LINKS_PAGERANK,
/** Use link graph, do a cheirank */
LINKS_CHEIRANK,
/** Use adjacency graph, do a pagerank */
ADJACENCY_PAGERANK,
/** Use adjacency graph, do a cheirank */
ADJACENCY_CHEIRANK,
/** For reserved names. Use special algorithm, function of name */
SPECIAL
};
/** Defines a domain ranking set, parameters for the ranking algorithms.
*
* @param name Key and name of the set
* @param description Human-readable description
* @param algorithm Algorithm to use
* @param depth Depth of the algorithm
* @param definition Definition of the set, typically a list of domains or globs for domain-names
* */
@With
public record DomainRankingSet(String name,
String description,
DomainSetAlgorithm algorithm,
int depth,
String definition)
{
public Path fileName(Path base) {
return base.resolve(name().toLowerCase() + ".dat");
}
public String[] domains() {
return Arrays.stream(definition().split("\n+"))
.map(String::trim)
.filter(s -> !s.isBlank())
.filter(s -> !s.startsWith("#"))
.toArray(String[]::new);
}
public boolean isSpecial() {
return algorithm() == DomainSetAlgorithm.SPECIAL;
}
}
}

View file

@ -0,0 +1,12 @@
CREATE TABLE IF NOT EXISTS CONF_DOMAIN_RANKING_SET (
NAME VARCHAR(255) PRIMARY KEY COLLATE utf8mb4_unicode_ci,
DESCRIPTION VARCHAR(255) NOT NULL,
ALGORITHM VARCHAR(255) NOT NULL,
DEPTH INT NOT NULL,
DEFINITION LONGTEXT NOT NULL
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('NONE', 'Reserved: No Ranking Algorithm', 'SPECIAL', 50000, '');
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('BLOGS', 'Reserved: Blogs Set', 'SPECIAL', 50000, '');
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('RANK', 'Reserved: Main Domain Ranking', 'SPECIAL', 50000, '');

View file

@ -0,0 +1,84 @@
package nu.marginalia.db;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.test.TestMigrationLoader;
import org.junit.jupiter.api.*;
import org.testcontainers.containers.MariaDBContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import static org.junit.jupiter.api.Assertions.*;
@Testcontainers
@Tag("slow")
class DomainRankingSetsServiceTest {
@Container
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withNetworkAliases("mariadb");
static HikariDataSource dataSource;
@BeforeAll
public static void setup() {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
config.setUsername("wmsa");
config.setPassword("wmsa");
dataSource = new HikariDataSource(config);
TestMigrationLoader.flywayMigration(dataSource);
}
@AfterAll
static void tearDownAll() {
dataSource.close();
mariaDBContainer.close();
}
@Test
public void testScenarios() throws Exception {
var service = new DomainRankingSetsService(dataSource);
// Clean up default values
service.get("BLOGS").ifPresent(service::delete);
service.get("NONE").ifPresent(service::delete);
var newValue = new DomainRankingSetsService.DomainRankingSet(
"test",
"Test domain set",
DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
10,
"test\\.nu"
);
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
"test2",
"Test domain set 2",
DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK,
20,
"test\\.nu 2"
);
service.upsert(newValue);
service.upsert(newValue2);
assertEquals(newValue, service.get("test").orElseThrow());
var allValues = service.getAll();
assertEquals(2, allValues.size());
assertTrue(allValues.contains(newValue));
assertTrue(allValues.contains(newValue2));
service.delete(newValue);
assertFalse(service.get("test").isPresent());
service.delete(newValue2);
assertFalse(service.get("test2").isPresent());
allValues = service.getAll();
assertEquals(0, allValues.size());
}
}

View file

@ -55,7 +55,7 @@ public class ApiSearchOperator {
Math.min(100, count),
150,
8192),
searchSet);
searchSet.name());
}
private SearchSetIdentifier selectSearchSet(int index) {

View file

@ -35,7 +35,7 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
List.of(),
new QueryLimits(1, 25, 200, 8192),
profile.searchSetIdentifier
profile.searchSetIdentifier.name()
);
}
@ -54,7 +54,7 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
List.of(),
new QueryLimits(count, count, 100, 512),
SearchSetIdentifier.NONE
SearchSetIdentifier.NONE.name()
);
}
@ -72,7 +72,7 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
List.of(),
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE
SearchSetIdentifier.NONE.name()
);
}
@ -90,7 +90,7 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
List.of(),
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE
SearchSetIdentifier.NONE.name()
);
}
}

View file

@ -53,7 +53,9 @@ public class ControlService extends Service {
RandomExplorationService randomExplorationService,
DataSetsService dataSetsService,
ControlNodeService controlNodeService,
ControlActorService controlActorService
ControlDomainRankingSetsService controlDomainRankingSetsService,
ControlActorService controlActorService,
ControlErrorHandler errorHandler
) throws IOException {
super(params);
@ -66,6 +68,7 @@ public class ControlService extends Service {
messageQueueService.register();
sysActionsService.register();
dataSetsService.register();
controlDomainRankingSetsService.register();
// node
controlFileStorageService.register();
@ -79,6 +82,8 @@ public class ControlService extends Service {
domainComplaintService.register();
randomExplorationService.register();
errorHandler.register();
var indexRenderer = rendererFactory.renderer("control/index");
var eventsRenderer = rendererFactory.renderer("control/sys/events");
var serviceByIdRenderer = rendererFactory.renderer("control/sys/service-by-id");
@ -104,6 +109,7 @@ public class ControlService extends Service {
Spark.get("/public/:resource", this::serveStatic);
monitors.subscribe(this::logMonitorStateChange);
controlActorService.startDefaultActors();

View file

@ -0,0 +1,15 @@
package nu.marginalia.control;
public class ControlValidationError extends RuntimeException {
public final String title;
public final String messageLong;
public final String redirect;
public ControlValidationError(String title, String messageLong, String redirect) {
super(title);
this.title = title;
this.messageLong = messageLong;
this.redirect = redirect;
}
}

View file

@ -8,6 +8,7 @@ public class Redirects {
public static final HtmlRedirect redirectToOverview = new HtmlRedirect("/");
public static final HtmlRedirect redirectToBlacklist = new HtmlRedirect("/blacklist");
public static final HtmlRedirect redirectToComplaints = new HtmlRedirect("/complaints");
public static final HtmlRedirect redirectToRankingDataSets = new HtmlRedirect("/domain-ranking-sets");
public static final HtmlRedirect redirectToMessageQueue = new HtmlRedirect("/message-queue");
public static class HtmlRedirect implements ResponseTransformer {

View file

@ -79,7 +79,7 @@ public class SearchToBanService {
private Object executeQuery(Context ctx, String query) {
return queryClient.search(ctx, new QueryParams(
query, new QueryLimits(2, 200, 250, 8192),
SearchSetIdentifier.NONE
"NONE"
));
}
}

View file

@ -360,22 +360,29 @@ public class ControlNodeService {
}
private List<FileStorageBaseWithStorage> makeFileStorageBaseWithStorage(List<FileStorageId> storageIds) throws SQLException {
Map<FileStorageBaseId, FileStorageBase> fileStorageBaseByBaseId = new HashMap<>();
Map<FileStorageBaseId, List<FileStorageWithActions>> fileStoragByBaseId = new HashMap<>();
Map<FileStorageBaseId, List<FileStorageWithActions>> fileStorageByBaseId = new HashMap<>();
for (var id : storageIds) {
var storage = fileStorageService.getStorage(id);
fileStorageBaseByBaseId.computeIfAbsent(storage.base().id(), k -> storage.base());
fileStoragByBaseId.computeIfAbsent(storage.base().id(), k -> new ArrayList<>()).add(new FileStorageWithActions(storage));
fileStorageByBaseId.computeIfAbsent(storage.base().id(), k -> new ArrayList<>()).add(new FileStorageWithActions(storage));
}
List<FileStorageBaseWithStorage> result = new ArrayList<>();
for (var baseId : fileStorageBaseByBaseId.keySet()) {
result.add(new FileStorageBaseWithStorage(fileStorageBaseByBaseId.get(baseId),
fileStoragByBaseId.get(baseId)
));
for (var baseId : fileStorageBaseByBaseId.keySet()) {
var base = fileStorageBaseByBaseId.get(baseId);
var items = fileStorageByBaseId.get(baseId);
// Sort by timestamp, then by relPath
// this ensures that the newest file is listed last
items.sort(Comparator
.comparing(FileStorageWithActions::getTimestamp)
.thenComparing(FileStorageWithActions::getRelPath)
);
result.add(new FileStorageBaseWithStorage(base, items));
}
return result;

View file

@ -0,0 +1,111 @@
package nu.marginalia.control.sys.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.ControlValidationError;
import nu.marginalia.control.Redirects;
import nu.marginalia.db.DomainRankingSetsService;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Map;
public class ControlDomainRankingSetsService {
private final HikariDataSource dataSource;
private final ControlRendererFactory rendererFactory;
private final DomainRankingSetsService domainRankingSetsService;
@Inject
public ControlDomainRankingSetsService(HikariDataSource dataSource,
ControlRendererFactory rendererFactory,
DomainRankingSetsService domainRankingSetsService) {
this.dataSource = dataSource;
this.rendererFactory = rendererFactory;
this.domainRankingSetsService = domainRankingSetsService;
}
public void register() throws IOException {
var datasetsRenderer = rendererFactory.renderer("control/sys/domain-ranking-sets");
var updateDatasetRenderer = rendererFactory.renderer("control/sys/update-domain-ranking-set");
var newDatasetRenderer = rendererFactory.renderer("control/sys/new-domain-ranking-set");
Spark.get("/public/domain-ranking-sets", this::rankingSetsModel, datasetsRenderer::render);
Spark.get("/public/domain-ranking-sets/new", (rq,rs) -> new Object(), newDatasetRenderer::render);
Spark.get("/public/domain-ranking-sets/:id", this::rankingSetModel, updateDatasetRenderer::render);
Spark.post("/public/domain-ranking-sets/:id", this::alterSetModel, Redirects.redirectToRankingDataSets);
}
private Object alterSetModel(Request request, Response response) throws SQLException {
final String act = request.queryParams("act");
final String id = request.params("id");
if ("update".equals(act)) {
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
id,
request.queryParams("description"),
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
Integer.parseInt(request.queryParams("depth")),
request.queryParams("definition")
));
return "";
}
else if ("delete".equals(act)) {
var model = domainRankingSetsService.get(id).orElseThrow();
if (model.isSpecial()) {
throw new ControlValidationError("Cannot delete special ranking set",
"""
SPECIAL data sets are reserved by the system and can not be deleted.
""",
"/domain-ranking-sets");
}
domainRankingSetsService.delete(model);
return "";
}
else if ("create".equals(act)) {
if (domainRankingSetsService.get(request.queryParams("name")).isPresent()) {
throw new ControlValidationError("Ranking set with that name already exists",
"""
Ensure the new data set has a unique name and try again.
""",
"/domain-ranking-sets");
}
domainRankingSetsService.upsert(new DomainRankingSetsService.DomainRankingSet(
request.queryParams("name").toUpperCase(),
request.queryParams("description"),
DomainRankingSetsService.DomainSetAlgorithm.valueOf(request.queryParams("algorithm")),
Integer.parseInt(request.queryParams("depth")),
request.queryParams("definition")
));
return "";
}
throw new ControlValidationError("Unknown action", """
An unknown action was requested and the system does not understand how to act on it.
""",
"/domain-ranking-sets");
}
private Object rankingSetsModel(Request request, Response response) {
return Map.of("rankingSets", domainRankingSetsService.getAll());
}
private Object rankingSetModel(Request request, Response response) throws SQLException {
var model = domainRankingSetsService.get(request.params("id")).orElseThrow();
return Map.of("rankingSet", model,
"selectedAlgo", Map.of(
"special", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.SPECIAL,
"adjacency_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_CHEIRANK,
"adjacency_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.ADJACENCY_PAGERANK,
"links_cheirank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_CHEIRANK,
"links_pagerank", model.algorithm() == DomainRankingSetsService.DomainSetAlgorithm.LINKS_PAGERANK)
);
}
}

View file

@ -0,0 +1,35 @@
package nu.marginalia.control.sys.svc;
import com.google.inject.Inject;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.ControlValidationError;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.util.Map;
public class ControlErrorHandler {
private final ControlRendererFactory.Renderer renderer;
@Inject
public ControlErrorHandler(ControlRendererFactory rendererFactory) {
this.renderer = rendererFactory.renderer("control/error");
}
public void render(ControlValidationError error, Request request, Response response) {
String text = renderer.render(
Map.of(
"title", error.title,
"messageLong", error.messageLong,
"redirect", error.redirect
)
);
response.body(text);
}
public void register() {
Spark.exception(ControlValidationError.class, this::render);
}
}

View file

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service: Error</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/style.css" />
{{> control/partials/head-includes }}
</head>
<body>
{{> control/partials/nav}}
<div class="container">
<h1 class="my-3">Error: {{title}}</h1>
<div class="my-3 p-3 border bg-light">
<p>{{messageLong}}</p>
<a href="{{redirect}}">Go back</a>
</div>
</div>
</body>
{{> control/partials/foot-includes }}
<script>
window.setInterval(() => {
refresh(["processes", "services", "jobs", "events"]);
}, 2000);
</script>
</html>

View file

@ -34,7 +34,8 @@
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false">System</a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/actions" title="System actions">Actions</a></li>
<li><a class="dropdown-item" href="/datasets" title="View and update the data sets">Datasets</a></li>
<li><a class="dropdown-item" href="/datasets" title="View and update the data sets">Data Sets</a></li>
<li><a class="dropdown-item" href="/domain-ranking-sets" title="View and update domain rankings ">Domain Ranking Sets</a></li>
<li><a class="dropdown-item" href="/events" title="View the event log">Events</a></li>
<li><a class="dropdown-item" href="/message-queue" title="View or manipulate the system message queue">Message Queue</a></li>
</ul>

View file

@ -0,0 +1,53 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
{{> control/partials/head-includes }}
</head>
<body>
{{> control/partials/nav}}
<div class="container">
<h1 class="my-3">Domain Ranking Sets</h1>
<div class="border my-3 p-3 bg-light">
Domain ranking sets configure the ranking algorithms used to determine the importance of a domain.
</div>
<table class="table">
<tr>
<th>Name</th>
<th>Description</th>
<th>Algorithm</th>
<th>Depth</th>
</tr>
{{#each rankingSets}}
<tr>
<td><a href="/domain-ranking-sets/{{name}}">{{name}}</td></td>
<td>{{description}}</td>
<td>{{algorithm}}</td>
<td>{{depth}}</td>
</tr>
{{/each}}
</table>
<div class="my-3">
<a href="/domain-ranking-sets/new" class="btn btn-primary">New Domain Ranking Set</a>
</div>
<div class="border my-3 p-3 bg-light">
<p>Several reserved ranking sets are available for use in the query parameters.</p>
<dl>
<dt>NONE</dt><dd>Placeholder for no restriction on the domains returned.
Does nothing, and exists only to prevent a new ranking
set from being created with this name.</dd>
<dt>RANK</dt><dd>Used to calculate the domain ranking for a given domain.
This affects the order they are stored in the index, and increases the odds they'll
even be considered within the time restrictions of the query.</dd>
<dt>BLOGS</dt><dd>Returns a fixed list of domains, configurable in <a href="/datasets">Datasets</a>.
Changes to this list will not be reflected in the index until the next time the index is rebuilt.</dd>
</dl>
</div>
</div>
</body>
{{> control/partials/foot-includes }}
</html>

View file

@ -0,0 +1,75 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
{{> control/partials/head-includes }}
</head>
<body>
{{> control/partials/nav}}
<div class="container">
<h1 class="my-3">Create Domain Ranking Set</h1>
<form method="post" action="?act=create">
<table class="table">
<tr>
<th><label for="name">Name</label></th>
<td>
<input pattern="\w+" type="text" value="{{name}}" id="name" name="name" style="text-transform: uppercase" />
<div>
<small class="text-muted">Must be all letters.
The name is how the ranking set is identified in the query parameters,
and also decides the file name of the persisted ranking set definition. Keep it simple.</small>
</div>
</td>
</tr>
<tr>
<th><label for="algorithm">Algorithm</label></th>
<td>
<select id="algorithm" name="algorithm">
<option value="LINKS_PAGERANK">LINKS_PAGERANK</option>
<option value="LINKS_CHEIRANK">LINKS_CHEIRANK</option>
<option value="ADJACENCY_PAGERANK">ADJACENCY_PAGERANK</option>
<option value="ADJACENCY_CHEIRANK">ADJACENCY_CHEIRANK</option>
</select>
<div>
<small class="text-muted">
The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY
algorithms use the adjacency graph. CheiRank is a variant of PageRank that uses the reversed graph.
</small>
</div>
</td>
</tr>
<tr>
<th><label for="description">Description</label></th>
<td>
<input type="text" value="{{description}}" id="description" name="description" {{#if special}}disabled{{/if}} />
<div>
<small class="text-muted">This is purely to help keep track of what this ranking set does.</small>
</div>
</td>
</tr>
<tr>
<th><label for="depth">Depth</label></th>
<td>
<input pattern="\d+" type="text" value="{{depth}}" id="depth" name="depth" />
<div>
<small class="text-muted">Number. Up to this number of domains are ranked, the rest are excluded.</small>
</div>
</td>
</tr>
<tr><th colspan="2"><label for="definition">Definition</label></th></tr>
<tr><td colspan="2">
<textarea name="definition" id="definition" rows="10" style="width: 100%">{{definition}}</textarea>
<div>
<small class="text-muted">A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards.
These are used as the origin point for the Personalized PageRank algorithm, and will be considered
the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper.
</small>
</div>
</td></tr>
</table>
<button type="submit" class="btn btn-primary">Create</button>
</form>
</div>
</body>
{{> control/partials/foot-includes }}
</html>

View file

@ -0,0 +1,88 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
{{> control/partials/head-includes }}
</head>
<body>
{{> control/partials/nav}}
<div class="container">
{{#with rankingSet}}
<h1 class="my-3">Domain Ranking Set: {{name}}</h1>
<form method="post" action="?act=update" id="update-form">
<table class="table">
<tr>
<th><label for="name">Name</label></th>
<td>
{{#if special}}<input type="hidden" name="name" value="{{name}}" />{{/if}}
<input type="text" value="{{name}}" id="name" name="name" {{#if special}}disabled{{/if}} />
<div>
<small class="text-muted">The name is how the ranking set is identified in the query parameters,
and also decides the file name of the persisted ranking set definition. Keep it simple.</small>
</div>
</td>
</tr>
<tr>
<th><label for="algorithm">Algorithm</label></th>
<td>
{{#if special}}<input type="hidden" name="algorithm" value="{{algorithm}}" />{{/if}}
<select id="algorithm" name="algorithm" {{#if special}}disabled{{/if}}>
{{#with algorithm}}
<option value="SPECIAL" disabled {{#if selectedAlgo.special}}selected{{/if}}>SPECIAL</option>
<option value="LINKS_PAGERANK" {{#if selectedAlgo.links_pagerank}}selected{{/if}}>LINKS_PAGERANK</option>
<option value="LINKS_CHEIRANK" {{#if selectedAlgo.links_cheirank}}selected{{/if}}>LINKS_CHEIRANK</option>
<option value="ADJACENCY_PAGERANK" {{#if selectedAlgo.adjacency_pagerank}}selected{{/if}}>ADJACENCY_PAGERANK</option>
<option value="ADJACENCY_CHEIRANK" {{#if selectedAlgo.adjacency_cheirank}}selected{{/if}}>ADJACENCY_CHEIRANK</option>
{{/with}}
</select>
<div>
<small class="text-muted">
The algorithm used to rank the domains. The LINKS algorithms use the link graph, and the ADJACENCY
algorithms use the adjacency graph. CheiRank is a variant of PageRank that uses the reversed graph.
</small>
</div>
</td>
</tr>
<tr>
<th><label for="description">Description</label></th>
<td>
{{#if special}}<input type="hidden" name="description" value="{{description}}" />{{/if}}
<input type="text" value="{{description}}" id="description" name="description" {{#if special}}disabled{{/if}} />
<div>
<small class="text-muted">This is purely to help keep track of what this ranking set does.</small>
</div>
</td>
</tr>
<tr>
<th><label for="depth">Depth</label></th>
<td>
<input pattern="\d+" type="text" value="{{depth}}" id="depth" name="depth" />
<div>
<small class="text-muted">Number. Up to this number of domains are ranked, the rest are excluded.</small>
</div>
</td>
</tr>
<tr><th colspan="2"><label for="definition">Definition</label></th></tr>
<tr><td colspan="2">
<textarea name="definition" id="definition" rows="10" style="width: 100%">{{definition}}</textarea>
<div>
<small class="text-muted">A list of domain names, one per line, possibly globbed with SQL-style '%' wildcards.
These are used as the origin point for the Personalized PageRank algorithm, and will be considered
the central points of the link or adjacency graph. If no domains are specified, the entire domain space is used, as per the PageRank paper.
</small>
</div>
</td></tr>
</table>
</form>
<form method="post" action="?act=delete" id="delete-form"></form>
<button type="submit" class="btn btn-danger" form="delete-form" style="float:right" {{#if special}}disabled title="Cannot delete special sets!"{{/if}} onclick="return confirm('Confirm deletion of ranking set')">Delete</button>
<button type="submit" class="btn btn-primary" form="update-form">Update</button>
{{/with}}
</div>
</body>
{{> control/partials/foot-includes }}
</html>

View file

@ -6,14 +6,10 @@ import com.google.inject.Singleton;
import com.google.inject.name.Named;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.linkdb.dlinks.DomainLinkDb;
import nu.marginalia.linkdb.dlinks.FileDomainLinkDb;
import nu.marginalia.linkdb.dlinks.SelectingDomainLinkDb;
import nu.marginalia.linkdb.dlinks.SqlDomainLinkDb;
import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.IndexLocations;
import nu.marginalia.index.config.RankingSettings;
import nu.marginalia.WmsaHome;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -30,12 +26,6 @@ public class IndexModule extends AbstractModule {
public void configure() {
}
@Provides
public RankingSettings rankingSettings() {
Path dir = WmsaHome.getHomePath().resolve("conf/ranking-settings.yaml");
return RankingSettings.from(dir);
}
@Provides
@Singleton
public DomainLinkDb domainLinkDb (

View file

@ -1,26 +0,0 @@
package nu.marginalia.index.config;
import lombok.ToString;
import org.yaml.snakeyaml.Yaml;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@ToString
public class RankingSettings {
public RankingSettingsEntry small;
public RankingSettingsEntry retro;
public RankingSettingsEntry standard;
public RankingSettingsEntry academia;
public RankingSettingsEntry ranking;
public static RankingSettings from(Path dir) {
try {
return new Yaml().loadAs(Files.readString(dir), RankingSettings.class);
}
catch (IOException ex) {
throw new RuntimeException("Failed to load " + dir, ex);
}
}
}

View file

@ -1,11 +0,0 @@
package nu.marginalia.index.config;
import java.util.List;
public class RankingSettingsEntry {
/** Bias the ranking toward these domains */
public List<String> domains;
/** Number of domains to include in ranking */
public int max;
}

View file

@ -43,13 +43,16 @@ public class IndexOpsService {
if (!run(searchSetService::recalculateAll)) {
Spark.halt(503, "Operations busy");
}
return "OK";
}
public Object reindexEndpoint(Request request, Response response) throws Exception {
if (!run(index::switchIndex).isPresent()) {
Spark.halt(503, "Operations busy");
}
return "OK";
}

View file

@ -261,9 +261,7 @@ public class IndexQueryService extends IndexApiImplBase {
return new SmallSearchSet(request.getDomainsList());
}
return searchSetsService.getSearchSetByName(
SearchSetIdentifier.valueOf(request.getSearchSetIdentifier())
);
return searchSetsService.getSearchSetByName(request.getSearchSetIdentifier());
}
private SearchResultSet executeSearch(SearchParameters params) throws SQLException {

View file

@ -4,10 +4,11 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import gnu.trove.list.TIntList;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import lombok.SneakyThrows;
import nu.marginalia.db.DomainRankingSetsService;
import nu.marginalia.db.DomainTypes;
import nu.marginalia.index.IndexServicesFactory;
import nu.marginalia.index.searchset.SearchSet;
import nu.marginalia.ranking.RankingAlgorithm;
import nu.marginalia.ranking.ReversePageRank;
import nu.marginalia.ranking.StandardPageRank;
import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator;
@ -16,31 +17,32 @@ import nu.marginalia.ranking.data.RankingDomainFetcher;
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
import nu.marginalia.index.svc.searchset.RankingSearchSet;
import nu.marginalia.index.svc.searchset.SearchSetAny;
import nu.marginalia.index.config.RankingSettings;
import nu.marginalia.ranking.DomainRankings;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.db.DbUpdateRanks;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.control.ServiceHeartbeat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
@Singleton
public class IndexSearchSetsService {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final DomainTypes domainTypes;
private final ServiceHeartbeat heartbeat;
private final IndexServicesFactory indexServicesFactory;
private final ServiceEventLog eventLog;
private final DomainRankingSetsService domainRankingSetsService;
private final DbUpdateRanks dbUpdateRanks;
private final RankingDomainFetcher similarityDomains;
private final RankingSettings rankingSettings;
private final RankingDomainFetcher linksDomains;
private final ConcurrentHashMap<String, SearchSet> rankingSets = new ConcurrentHashMap<>();
// Below are binary indices that are used to constrain a search
private volatile RankingSearchSet popularSet;
private volatile RankingSearchSet smallWebSet;
private volatile RankingSearchSet academiaSet;
private volatile RankingSearchSet blogsSet;
private final SearchSet anySet = new SearchSetAny();
// The ranking value of the domains used in sorting the domains
@ -51,83 +53,121 @@ public class IndexSearchSetsService {
ServiceHeartbeat heartbeat,
RankingDomainFetcher rankingDomains,
RankingDomainFetcherForSimilarityData similarityDomains,
RankingSettings rankingSettings,
IndexServicesFactory servicesFactory,
IndexServicesFactory indexServicesFactory,
ServiceEventLog eventLog,
DomainRankingSetsService domainRankingSetsService,
DbUpdateRanks dbUpdateRanks) throws IOException {
this.domainTypes = domainTypes;
this.heartbeat = heartbeat;
this.indexServicesFactory = indexServicesFactory;
this.eventLog = eventLog;
this.domainRankingSetsService = domainRankingSetsService;
this.dbUpdateRanks = dbUpdateRanks;
if (similarityDomains.hasData()) {
this.similarityDomains = similarityDomains;
this.linksDomains = rankingDomains;
}
else {
// on test environments the cosine similarity graph may not be present
logger.info("Domain similarity is not present, falling back on link graph");
this.similarityDomains = rankingDomains;
this.linksDomains = rankingDomains;
}
this.rankingSettings = rankingSettings;
smallWebSet = new RankingSearchSet(SearchSetIdentifier.SMALLWEB, servicesFactory.getSearchSetsBase().resolve("small-web.dat"));
academiaSet = new RankingSearchSet(SearchSetIdentifier.ACADEMIA, servicesFactory.getSearchSetsBase().resolve("academia.dat"));
popularSet = new RankingSearchSet(SearchSetIdentifier.POPULAR, servicesFactory.getSearchSetsBase().resolve("popular.dat"));
blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, servicesFactory.getSearchSetsBase().resolve("blogs.dat"));
for (var rankingSet : domainRankingSetsService.getAll()) {
rankingSets.put(rankingSet.name(),
new RankingSearchSet(rankingSet.name(),
rankingSet.fileName(indexServicesFactory.getSearchSetsBase())
)
);
}
}
public DomainRankings getDomainRankings() {
return domainRankings;
}
public SearchSet getSearchSetByName(SearchSetIdentifier searchSetIdentifier) {
public SearchSet getSearchSetByName(String searchSetIdentifier) {
if (null == searchSetIdentifier) {
return anySet;
}
return switch (searchSetIdentifier) {
case NONE -> anySet;
case POPULAR -> popularSet;
case ACADEMIA -> academiaSet;
case SMALLWEB -> smallWebSet;
case BLOGS -> blogsSet;
};
if ("NONE".equals(searchSetIdentifier) || "".equals(searchSetIdentifier)) {
return anySet;
}
enum RepartitionSteps {
UPDATE_ACADEMIA,
UPDATE_POPULAR,
UPDATE_SMALL_WEB,
UPDATE_BLOGS,
UPDATE_RANKINGS,
FINISHED
return Objects.requireNonNull(rankingSets.get(searchSetIdentifier), "Unknown search set");
}
public void recalculateAll() {
try (var processHeartbeat = heartbeat.createServiceTaskHeartbeat(RepartitionSteps.class, "repartitionAll")) {
processHeartbeat.progress(RepartitionSteps.UPDATE_ACADEMIA);
updateAcademiaDomainsSet();
processHeartbeat.progress(RepartitionSteps.UPDATE_POPULAR);
updatePopularDomainsSet();
processHeartbeat.progress(RepartitionSteps.UPDATE_SMALL_WEB);
updateSmallWebDomainsSet();
processHeartbeat.progress(RepartitionSteps.UPDATE_BLOGS);
updateBlogsSet();
processHeartbeat.progress(RepartitionSteps.UPDATE_RANKINGS);
updateDomainRankings();
processHeartbeat.progress(RepartitionSteps.FINISHED);
for (var rankingSet : domainRankingSetsService.getAll()) {
try {
if (DomainRankingSetsService.DomainSetAlgorithm.SPECIAL.equals(rankingSet.algorithm())) {
switch (rankingSet.name()) {
case "BLOGS" -> recalculateBlogsSet(rankingSet);
case "RANK" -> updateDomainRankings(rankingSet);
case "NONE" -> {}
}
} else {
recalculateNornal(rankingSet);
}
}
catch (Exception ex) {
logger.warn("Failed to recalculate ranking set {}", rankingSet.name(), ex);
}
eventLog.logEvent("RANKING-SET-RECALCULATED", rankingSet.name());
}
}
private void updateDomainRankings() {
var entry = rankingSettings.ranking;
private void recalculateNornal(DomainRankingSetsService.DomainRankingSet rankingSet) {
String[] domains = rankingSet.domains();
var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new));
var ranks = spr.pageRankWithPeripheralNodes(entry.max, () -> new RankingResultHashMapAccumulator(100_000));
RankingAlgorithm rankingAlgorithm = switch (rankingSet.algorithm()) {
case LINKS_PAGERANK -> new StandardPageRank(linksDomains, domains);
case LINKS_CHEIRANK -> new ReversePageRank(linksDomains, domains);
case ADJACENCY_PAGERANK -> new StandardPageRank(similarityDomains, domains);
case ADJACENCY_CHEIRANK -> new ReversePageRank(similarityDomains, domains);
default -> throw new IllegalStateException("Unexpected value: " + rankingSet.algorithm());
};
var data = rankingAlgorithm.pageRankWithPeripheralNodes(rankingSet.depth(), RankingResultHashSetAccumulator::new);
var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data);
rankingSets.put(rankingSet.name(), set);
try {
set.write();
}
catch (IOException ex) {
logger.warn("Failed to write search set", ex);
}
}
private void recalculateBlogsSet(DomainRankingSetsService.DomainRankingSet rankingSet) throws SQLException, IOException {
TIntList knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
if (knownDomains.isEmpty()) {
// FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe?
domainTypes.reloadDomainsList(DomainTypes.Type.BLOG);
knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
}
synchronized (this) {
var blogSet = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), new IntOpenHashSet(knownDomains.toArray()));
rankingSets.put(rankingSet.name(), blogSet);
blogSet.write();
}
}
private void updateDomainRankings(DomainRankingSetsService.DomainRankingSet rankingSet) {
var spr = new StandardPageRank(similarityDomains, rankingSet.domains());
var ranks = spr.pageRankWithPeripheralNodes(rankingSet.depth(), () -> new RankingResultHashMapAccumulator(rankingSet.depth()));
synchronized (this) {
domainRankings = new DomainRankings(ranks);
@ -138,60 +178,4 @@ public class IndexSearchSetsService {
dbUpdateRanks.execute(ranks);
}
@SneakyThrows
public void updatePopularDomainsSet() {
var entry = rankingSettings.retro;
var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new));
var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultHashSetAccumulator::new);
synchronized (this) {
popularSet = new RankingSearchSet(SearchSetIdentifier.POPULAR, popularSet.source, data);
popularSet.write();
}
}
@SneakyThrows
public void updateSmallWebDomainsSet() {
var entry = rankingSettings.small;
var rpr = new ReversePageRank(similarityDomains, entry.domains.toArray(String[]::new));
rpr.setMaxKnownUrls(750);
var data = rpr.pageRankWithPeripheralNodes(entry.max, RankingResultHashSetAccumulator::new);
synchronized (this) {
smallWebSet = new RankingSearchSet(SearchSetIdentifier.SMALLWEB, smallWebSet.source, data);
smallWebSet.write();
}
}
@SneakyThrows
public void updateBlogsSet() {
TIntList knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
if (knownDomains.isEmpty()) {
// FIXME: We don't want to reload the entire list every time, but we do want to do it sometimes. Actor maybe?
domainTypes.reloadDomainsList(DomainTypes.Type.BLOG);
knownDomains = domainTypes.getKnownDomainsByType(DomainTypes.Type.BLOG);
}
synchronized (this) {
blogsSet = new RankingSearchSet(SearchSetIdentifier.BLOGS, blogsSet.source, new IntOpenHashSet(knownDomains.toArray()));
blogsSet.write();
}
}
@SneakyThrows
public void updateAcademiaDomainsSet() {
var entry = rankingSettings.academia;
var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new));
var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultHashSetAccumulator::new);
synchronized (this) {
academiaSet = new RankingSearchSet(SearchSetIdentifier.ACADEMIA, academiaSet.source, data);
academiaSet.write();
}
}
}

View file

@ -24,17 +24,17 @@ public class RankingSearchSet implements SearchSet {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final IntOpenHashSet set;
public final SearchSetIdentifier identifier;
public final String name;
public final Path source;
public RankingSearchSet(SearchSetIdentifier identifier, Path source, IntOpenHashSet set) {
this.identifier = identifier;
public RankingSearchSet(String name, Path source, IntOpenHashSet set) {
this.name = name;
this.source = source;
this.set = set;
}
public RankingSearchSet(SearchSetIdentifier identifier, Path source) throws IOException {
this.identifier = identifier;
public RankingSearchSet(String name, Path source) throws IOException {
this.name = name;
this.source = source;
if (!Files.exists(source)) {
@ -45,7 +45,7 @@ public class RankingSearchSet implements SearchSet {
}
if (set.isEmpty()) {
logger.warn("Search set {} is empty", identifier);
logger.warn("Search set {} is empty", name);
}
}
@ -87,6 +87,6 @@ public class RankingSearchSet implements SearchSet {
}
public String toString() {
return identifier.toString();
return name;
}
}

View file

@ -1,61 +0,0 @@
package nu.marginalia.index.model;
import nu.marginalia.index.config.RankingSettings;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
class RankingSettingsTest {
Path tempFile;
@BeforeEach
void setUp() throws IOException {
tempFile = Files.createTempFile(getClass().getSimpleName(), ".tmp");
}
@AfterEach
void tearDown() throws IOException {
Files.delete(tempFile);
}
@Test
void testParseRankingSettings() throws IOException {
Files.writeString(tempFile, """
retro:
max: 50
domains:
- "www.rep.routledge.com"
- "www.personal.kent.edu"
small:
max: 10
domains:
- "bikobatanari.art"
- "wiki.xxiivv.com"
academia:
max: 101
domains:
- "%edu"
standard:
max: 23
domains:
- "memex.marginalia.nu"
""");
var settings = RankingSettings.from(tempFile);
assertEquals(List.of("www.rep.routledge.com","www.personal.kent.edu"), settings.retro.domains);
assertEquals(50, settings.retro.max);
assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains);
assertEquals(10, settings.small.max);
assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains);
assertEquals(List.of("%edu"), settings.academia.domains);
assertEquals(List.of("memex.marginalia.nu"), settings.standard.domains);
}
}

View file

@ -129,7 +129,7 @@ public class IndexQueryServiceIntegrationSmokeTest {
.domainCount(SpecificationLimit.none())
.rankingParams(ResultRankingParameters.sensibleDefaults())
.domains(new ArrayList<>())
.searchSetIdentifier(SearchSetIdentifier.NONE)
.searchSetIdentifier("NONE")
.subqueries(List.of(new SearchSubquery(
List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList(),
Collections.emptyList()))).build());
@ -207,7 +207,7 @@ public class IndexQueryServiceIntegrationSmokeTest {
.rank(SpecificationLimit.none())
.domainCount(SpecificationLimit.none())
.queryStrategy(QueryStrategy.SENTENCE)
.searchSetIdentifier(SearchSetIdentifier.NONE)
.searchSetIdentifier("NONE")
.rankingParams(ResultRankingParameters.sensibleDefaults())
.subqueries(List.of(new SearchSubquery(
List.of("4"), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(),

View file

@ -431,7 +431,7 @@ public class IndexQueryServiceIntegrationTest {
.domainCount(SpecificationLimit.none())
.rankingParams(ResultRankingParameters.sensibleDefaults())
.domains(new ArrayList<>())
.searchSetIdentifier(SearchSetIdentifier.NONE)
.searchSetIdentifier("NONE")
.subqueries(List.of());
return mutator.apply(builder).build();

View file

@ -69,7 +69,7 @@ public class IndexQueryServiceIntegrationTestModule extends AbstractModule {
bind(ProcessHeartbeat.class).toInstance(new FakeProcessHeartbeat());
IndexSearchSetsService setsServiceMock = Mockito.mock(IndexSearchSetsService.class);
when(setsServiceMock.getSearchSetByName(SearchSetIdentifier.NONE)).thenReturn(new SearchSetAny());
when(setsServiceMock.getSearchSetByName("NONE")).thenReturn(new SearchSetAny());
when(setsServiceMock.getDomainRankings()).thenReturn(new DomainRankings());
bind(IndexSearchSetsService.class).toInstance(setsServiceMock);

View file

@ -44,9 +44,13 @@ public class QueryBasicInterface {
if (queryParam == null) {
return renderer.render(new Object());
}
int count = request.queryParams("count") == null ? 10 : Integer.parseInt(request.queryParams("count"));
String set = request.queryParams("set") == null ? "" : request.queryParams("set");
var query = queryFactory.createQuery(new QueryParams(queryParam, new QueryLimits(
1, 10, 250, 8192
), SearchSetIdentifier.NONE));
1, count, 250, 8192
), set));
var rsp = indexClient.query(
Context.fromRequest(request),

View file

@ -49,7 +49,7 @@ public class QueryFactoryTest {
SpecificationLimit.none(),
null,
new QueryLimits(100, 100, 100, 100),
SearchSetIdentifier.BLOGS)).specs;
"NONE")).specs;
}
@Test