Merge pull request #53 from MarginaliaSearch/standalone-index

Move ranking to the index-service, and query parsing to a new query-service; separate out the search-service
This commit is contained in:
Viktor 2023-10-09 15:42:06 +02:00 committed by GitHub
commit c8d820c17b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
192 changed files with 1242 additions and 818 deletions

View File

@ -5,4 +5,6 @@ public class IndexMqEndpoints {
public static final String INDEX_REPARTITION = "INDEX-REPARTITION"; public static final String INDEX_REPARTITION = "INDEX-REPARTITION";
public static final String SWITCH_INDEX = "SWITCH-INDEX"; public static final String SWITCH_INDEX = "SWITCH-INDEX";
public static final String SWITCH_LINKDB = "SWITCH_LINKDB";
} }

View File

@ -0,0 +1,82 @@
package nu.marginalia.index.client.model.results;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.model.EdgeUrl;
import org.jetbrains.annotations.NotNull;
import javax.annotation.Nullable;
import java.util.List;
@Getter
@ToString
public class DecoratedSearchResultItem {
public final SearchResultItem rawIndexResult;
@NotNull
public final EdgeUrl url;
@NotNull
public final String title;
@NotNull
public final String description;
public final double urlQuality;
@NotNull
public final String format;
/** Document features bitmask, see HtmlFeature */
public final int features;
@Nullable
public final Integer pubYear;
public final long dataHash;
public final int wordsTotal;
public final double rankingScore;
public long documentId() {
return rawIndexResult.getDocumentId();
}
public int domainId() {
return rawIndexResult.getDomainId();
}
public int resultsFromDomain() {
return rawIndexResult.getResultsFromDomain();
}
public List<SearchResultKeywordScore> keywordScores() {
return rawIndexResult.getKeywordScores();
}
public long rankingId() {
return rawIndexResult.getRanking();
}
public DecoratedSearchResultItem(SearchResultItem rawIndexResult,
@NotNull
EdgeUrl url,
@NotNull
String title,
@NotNull
String description,
double urlQuality,
@NotNull
String format,
int features,
@Nullable
Integer pubYear,
long dataHash,
int wordsTotal,
double rankingScore)
{
this.rawIndexResult = rawIndexResult;
this.url = url;
this.title = title;
this.description = description;
this.urlQuality = urlQuality;
this.format = format;
this.features = features;
this.pubYear = pubYear;
this.dataHash = dataHash;
this.wordsTotal = wordsTotal;
this.rankingScore = rankingScore;
}
}

View File

@ -8,8 +8,7 @@ import java.util.List;
@AllArgsConstructor @Getter @ToString @AllArgsConstructor @Getter @ToString
public class SearchResultSet { public class SearchResultSet {
public List<SearchResultItem> results; public List<DecoratedSearchResultItem> results;
public ResultRankingContext rankingContext;
public int size() { public int size() {
return results.size(); return results.size();
} }

View File

@ -1,7 +1,5 @@
plugins { plugins {
id 'java' id 'java'
id 'jvm-test-suite' id 'jvm-test-suite'
} }
@ -13,8 +11,10 @@ java {
dependencies { dependencies {
implementation project(':code:common:model') implementation project(':code:common:model')
implementation project(':code:api:index-api')
implementation project(':code:common:config') implementation project(':code:common:config')
implementation project(':code:libraries:message-queue') implementation project(':code:libraries:message-queue')
implementation project(':code:features-index:index-query')
implementation project(':code:common:service-discovery') implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client') implementation project(':code:common:service-client')

View File

@ -0,0 +1,65 @@
package nu.marginalia.query.client;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import io.prometheus.client.Summary;
import io.reactivex.rxjava3.core.Observable;
import nu.marginalia.WmsaHome;
import nu.marginalia.client.AbstractDynamicClient;
import nu.marginalia.client.Context;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.SearchResultSet;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.service.descriptor.ServiceDescriptors;
import nu.marginalia.service.id.ServiceId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.CheckReturnValue;
import java.util.UUID;
@Singleton
public class QueryClient extends AbstractDynamicClient {
private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register();
private static final Summary wmsa_search_index_api_search_time = Summary.build().name("wmsa_search_index_api_search_time").help("-").register();
private final Logger logger = LoggerFactory.getLogger(getClass());
private final MqOutbox outbox;
@Inject
public QueryClient(ServiceDescriptors descriptors,
MessageQueueFactory messageQueueFactory) {
super(descriptors.forId(ServiceId.Query), WmsaHome.getHostsFile(), GsonFactory::get);
String inboxName = ServiceId.Query.name + ":" + "0";
String outboxName = System.getProperty("service-name", UUID.randomUUID().toString());
outbox = messageQueueFactory.createOutbox(inboxName, outboxName, UUID.randomUUID());
}
/** Delegate an Index API style query directly to the index service */
@CheckReturnValue
public SearchResultSet delegate(Context ctx, SearchSpecification specs) {
return wmsa_search_index_api_delegate_time.time(
() -> this.postGet(ctx, "/delegate/", specs, SearchResultSet.class).blockingFirst()
);
}
@CheckReturnValue
public QueryResponse search(Context ctx, QueryParams params) {
return wmsa_search_index_api_search_time.time(
() -> this.postGet(ctx, "/search/", params, QueryResponse.class).blockingFirst()
);
}
public MqOutbox outbox() {
return outbox;
}
}

View File

@ -0,0 +1,21 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.query.SearchSpecification;
import java.util.*;
public class ProcessedQuery {
public final SearchSpecification specs;
public final List<String> searchTermsHuman;
public final String domain;
public ProcessedQuery(SearchSpecification specs, List<String> searchTermsHuman, String domain) {
this.specs = specs;
this.searchTermsHuman = searchTermsHuman;
this.domain = domain;
}
public ProcessedQuery(SearchSpecification justSpecs) {
this(justSpecs, List.of(), null);
}
}

View File

@ -0,0 +1,41 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import java.util.List;
public record QueryParams(
String humanQuery,
String nearDomain,
List<String> tacitIncludes,
List<String> tacitExcludes,
List<String> tacitPriority,
List<String> tacitAdvice,
SpecificationLimit quality,
SpecificationLimit year,
SpecificationLimit size,
SpecificationLimit rank,
List<Integer> domainIds,
QueryLimits limits,
SearchSetIdentifier identifier
)
{
public QueryParams(String query, QueryLimits limits, SearchSetIdentifier identifier) {
this(query, null,
List.of(),
List.of(),
List.of(),
List.of(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(),
limits,
identifier
);
}
}

View File

@ -0,0 +1,23 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public record QueryResponse(SearchSpecification specs,
List<DecoratedSearchResultItem> results,
List<String> searchTermsHuman,
List<String> problems,
String domain)
{
public Set<String> getAllKeywords() {
Set<String> keywords = new HashSet<>(100);
for (var sq : specs.subqueries) {
keywords.addAll(sq.searchTermsInclude);
}
return keywords;
}
}

View File

@ -3,7 +3,7 @@
## Core Services ## Core Services
* [assistant-api](assistant-api/) * [assistant-api](assistant-api/)
* [search-api](search-api/) * [query-api](query-api/)
* [index-api](index-api/) * [index-api](index-api/)
These are clients for the [core services](../services-core/), along with what models These are clients for the [core services](../services-core/), along with what models
@ -13,11 +13,11 @@ are necessary for speaking to them. They each implement the abstract client clas
All that is necessary is to `@Inject` them into the constructor and then All that is necessary is to `@Inject` them into the constructor and then
requests can be sent. requests can be sent.
**Note:** If you are looking for the public API, it's handled by the api service in [services-satellite/api-service](../services-satellite/api-service). **Note:** If you are looking for the public API, it's handled by the api service in [services-application/api-service](../services-application/api-service).
## MQ-API Process API ## MQ-API Process API
[process-mqapi](process-mqapi/) defines requests and inboxes for the message queue based API used [process-mqapi](process-mqapi/) defines requests and inboxes for the message queue based API used
for interacting with processes. for interacting with processes.
See [libraries/message-queue](../libraries/message-queue) and [services-satellite/control-service](../services-satellite/control-service). See [libraries/message-queue](../libraries/message-queue) and [services-application/control-service](../services-core/control-service).

View File

@ -1,8 +0,0 @@
# Search API
Client and models for talking to the [search-service](../../services-core/search-service),
implemented with the base client from [service-client](../../common/service-client).
## Central Classes
* [SearchClient](src/main/java/nu/marginalia/search/client/SearchClient.java)

View File

@ -1,52 +0,0 @@
package nu.marginalia.search.client;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import io.reactivex.rxjava3.core.Observable;
import nu.marginalia.client.AbstractDynamicClient;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.search.client.model.ApiSearchResults;
import nu.marginalia.service.descriptor.ServiceDescriptors;
import nu.marginalia.service.id.ServiceId;
import nu.marginalia.WmsaHome;
import nu.marginalia.client.Context;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.CheckReturnValue;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
@Singleton
public class SearchClient extends AbstractDynamicClient {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final MqOutbox outbox;
@Inject
public SearchClient(ServiceDescriptors descriptors,
MessageQueueFactory messageQueueFactory) {
super(descriptors.forId(ServiceId.Search), WmsaHome.getHostsFile(), GsonFactory::get);
String inboxName = ServiceId.Search.name + ":" + "0";
String outboxName = System.getProperty("service-name", UUID.randomUUID().toString());
outbox = messageQueueFactory.createOutbox(inboxName, outboxName, UUID.randomUUID());
}
public MqOutbox outbox() {
return outbox;
}
@CheckReturnValue
public Observable<ApiSearchResults> query(Context ctx, String queryString, int count, int profile) {
return this.get(ctx, String.format("/api/search?query=%s&count=%d&index=%d", URLEncoder.encode(queryString, StandardCharsets.UTF_8), count, profile), ApiSearchResults.class);
}
}

View File

@ -1,7 +0,0 @@
package nu.marginalia.search.client;
public class SearchMqEndpoints {
/** Flushes the URL caches, run if significant changes have occurred in the URLs database */
public static final String FLUSH_CACHES = "FLUSH_CACHES";
public static final String SWITCH_LINKDB = "SWITCH_LINKDB";
}

View File

@ -8,4 +8,4 @@ this information take effect in production immediately, even before
the information was searchable. the information was searchable.
It is constructed by the [loading-process](../../processes/loading-process), and consumed It is constructed by the [loading-process](../../processes/loading-process), and consumed
by the [search-service](../../services-core/search-service). by the [index-service](../../services-core/index-service).

View File

@ -62,6 +62,15 @@ public class LinkdbReader {
connection = createConnection(); connection = createConnection();
} }
/** Re-establishes the connection, useful in tests and not
* much else */
public void reconnect() throws SQLException {
if (connection != null)
connection.close();
connection = createConnection();
}
public List<String> getUrlsFromDomain(int domainId) throws SQLException { public List<String> getUrlsFromDomain(int domainId) throws SQLException {
if (connection == null || if (connection == null ||
connection.isClosed()) connection.isClosed())

View File

@ -6,7 +6,6 @@ This package contains common models to the search engine
* [EdgeDomain](src/main/java/nu/marginalia/model/EdgeDomain.java) * [EdgeDomain](src/main/java/nu/marginalia/model/EdgeDomain.java)
* [EdgeUrl](src/main/java/nu/marginalia/model/EdgeUrl.java) * [EdgeUrl](src/main/java/nu/marginalia/model/EdgeUrl.java)
* [EdgeId](src/main/java/nu/marginalia/model/id/EdgeId.java)
* [DocumentMetadata](src/main/java/nu/marginalia/model/idx/DocumentMetadata.java) * [DocumentMetadata](src/main/java/nu/marginalia/model/idx/DocumentMetadata.java)
* [DocumentFlags](src/main/java/nu/marginalia/model/idx/DocumentFlags.java) * [DocumentFlags](src/main/java/nu/marginalia/model/idx/DocumentFlags.java)
* [WordMetadata](src/main/java/nu/marginalia/model/idx/WordMetadata.java) * [WordMetadata](src/main/java/nu/marginalia/model/idx/WordMetadata.java)

View File

@ -10,6 +10,7 @@ public class SearchServiceDescriptors {
public static ServiceDescriptors descriptors = new ServiceDescriptors( public static ServiceDescriptors descriptors = new ServiceDescriptors(
List.of(new ServiceDescriptor(ServiceId.Api, 5004), List.of(new ServiceDescriptor(ServiceId.Api, 5004),
new ServiceDescriptor(ServiceId.Index, 5021), new ServiceDescriptor(ServiceId.Index, 5021),
new ServiceDescriptor(ServiceId.Query, 5022),
new ServiceDescriptor(ServiceId.Search, 5023), new ServiceDescriptor(ServiceId.Search, 5023),
new ServiceDescriptor(ServiceId.Assistant, 5025), new ServiceDescriptor(ServiceId.Assistant, 5025),
new ServiceDescriptor(ServiceId.Dating, 5070), new ServiceDescriptor(ServiceId.Dating, 5070),

View File

@ -6,6 +6,7 @@ public enum ServiceId {
Api("api-service"), Api("api-service"),
Search("search-service"), Search("search-service"),
Index("index-service"), Index("index-service"),
Query("query-service"),
Control("control-service"), Control("control-service"),

View File

@ -17,7 +17,7 @@ Contains domain ranking algorithms.
## See Also ## See Also
* [features-search/result-ranking](../../features-search/result-ranking) - Ranks search results * [result-ranking](../result-ranking) - Ranks search results
## Useful Resources ## Useful Resources

View File

@ -1,4 +1,7 @@
package nu.marginalia.index.query.limit; package nu.marginalia.index.query.limit;
public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) { public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) {
public QueryLimits forSingleDomain() {
return new QueryLimits(resultsTotal, resultsTotal, timeoutMs, fetchSize);
}
} }

View File

@ -14,11 +14,11 @@ These indices rely heavily on the [libraries/btree](../libraries/btree) and [lib
## Algorithms ## Algorithms
* [domain-ranking](domain-ranking/) contains ranking algorithms. * [domain-ranking](domain-ranking/) contains domain ranking algorithms.
* [result-ranking](result-ranking/) contains logic for ranking search results by relevance.
# Libraries # Libraries
* [index-query](index-query/) contains structures for evaluating search queries. * [index-query](index-query/) contains structures for evaluating search queries.
* [index-journal](index-journal/) contains tools for writing and reading index data. * [index-journal](index-journal/) contains tools for writing and reading index data.
* [lexicon](lexicon/) contains a mapping between words' string representation and an unique integer identifier.

View File

@ -11,4 +11,4 @@ results higher.
## See Also ## See Also
* [features-index/domain-ranking](../../features-index/domain-ranking) - Ranks domains * [features-index/domain-ranking](../domain-ranking) - Ranks domains

View File

@ -1,6 +1,6 @@
# Query Parser # Query Parser
End-user search query parsing tools used by the [search-service](../../services-core/search-service). End-user search query parsing tools used by the [query-service](../../services-core/query-service).
## Central Classes ## Central Classes

View File

@ -36,7 +36,6 @@ public class Token {
case EXCLUDE_TERM: visitor.onExcludeTerm(this); break; case EXCLUDE_TERM: visitor.onExcludeTerm(this); break;
case PRIORTY_TERM: visitor.onPriorityTerm(this); break; case PRIORTY_TERM: visitor.onPriorityTerm(this); break;
case ADVICE_TERM: visitor.onAdviceTerm(this); break; case ADVICE_TERM: visitor.onAdviceTerm(this); break;
case NEAR_TERM: visitor.onNearTerm(this); break;
case LITERAL_TERM: visitor.onLiteralTerm(this); break; case LITERAL_TERM: visitor.onLiteralTerm(this); break;
case YEAR_TERM: visitor.onYearTerm(this); break; case YEAR_TERM: visitor.onYearTerm(this); break;

View File

@ -6,8 +6,6 @@ public interface TokenVisitor {
void onExcludeTerm(Token token); void onExcludeTerm(Token token);
void onPriorityTerm(Token token); void onPriorityTerm(Token token);
void onAdviceTerm(Token token); void onAdviceTerm(Token token);
void onNearTerm(Token token);
void onYearTerm(Token token); void onYearTerm(Token token);
void onSizeTerm(Token token); void onSizeTerm(Token token);
void onRankTerm(Token token); void onRankTerm(Token token);

View File

@ -0,0 +1,7 @@
**Note**: This package is called `features-qs` rather than `features-query` because the latter,
though more consistent with other packages like features-index, would be very confusing
as there are other packages elsewhere with the 'query' name (e.g. features-index/index-query).
## Contents
* [query-parser](query-parser/) contains code for parsing the user-facing query grammar.

View File

@ -3,8 +3,6 @@
These are bits of search-engine related code that are relatively isolated pieces of business logic, These are bits of search-engine related code that are relatively isolated pieces of business logic,
that benefit from the clarity of being kept separate from the rest of the that benefit from the clarity of being kept separate from the rest of the
search engine code. search engine code.
*
* [query-parser](query-parser/) contains code for parsing the user-facing query grammar.
* [result-ranking](result-ranking/) contains logic for ranking search results by relevance.
* [screenshots](screenshots/) and [random-websites](random-websites/) contains SQL queries random * [screenshots](screenshots/) and [random-websites](random-websites/) contains SQL queries random
exploration mode. exploration mode.

View File

@ -13,4 +13,4 @@ its words, how they stem, POS tags, and so on.
[features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords [features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords
are important. are important.
[features-search/query-parser](../../features-search/query-parser) also does some language processing. [features-qs/query-parser](../../features-qs/query-parser) also does some language processing.

View File

@ -7,7 +7,6 @@ Contains models shared by the [crawling-process](../../processes/crawling-proces
* [CrawledDocument](src/main/java/nu/marginalia/crawling/model/CrawledDocument.java) * [CrawledDocument](src/main/java/nu/marginalia/crawling/model/CrawledDocument.java)
* [CrawledDomain](src/main/java/nu/marginalia/crawling/model/CrawledDomain.java) * [CrawledDomain](src/main/java/nu/marginalia/crawling/model/CrawledDomain.java)
* [CrawlingSpecification](src/main/java/nu/marginalia/crawling/model/spec/CrawlingSpecification.java)
### Serialization ### Serialization
* [CrawledDomainReader](src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java) * [CrawledDomainReader](src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java)

View File

@ -8,7 +8,7 @@ into per-domain snapshots.
* [CrawlerMain](src/main/java/nu/marginalia/crawl/CrawlerMain.java) orchestrates the crawling. * [CrawlerMain](src/main/java/nu/marginalia/crawl/CrawlerMain.java) orchestrates the crawling.
* [CrawlerRetreiver](src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java) * [CrawlerRetreiver](src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java)
visits known addresses from a domain and downloads each document. visits known addresses from a domain and downloads each document.
* [HttpFetcher](src/main/java/nu/marginalia/crawl/retreival/HttpFetcher.java) * [HttpFetcher](src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java)
fetches a URL. fetches a URL.
## See Also ## See Also

View File

@ -32,7 +32,7 @@ dependencies {
implementation project(':code:libraries:message-queue') implementation project(':code:libraries:message-queue')
implementation project(':code:libraries:language-processing') implementation project(':code:libraries:language-processing')
implementation project(':third-party:commons-codec') implementation project(':third-party:commons-codec')
testImplementation project(':code:services-core:search-service') testImplementation project(':code:services-application:search-service')
implementation project(':code:process-models:crawling-model') implementation project(':code:process-models:crawling-model')
implementation project(':code:process-models:processed-data') implementation project(':code:process-models:processed-data')

View File

@ -6,5 +6,4 @@ the index-service.
## Central Classes ## Central Classes
* [LoaderMain](src/main/java/nu/marginalia/loading/LoaderMain.java) main class. * [LoaderMain](src/main/java/nu/marginalia/loading/LoaderMain.java) main class.
* [Loader](src/main/java/nu/marginalia/loading/loader/Loader.java) evaluates loading instructions.

View File

@ -5,8 +5,7 @@
The [crawling-process](crawling-process/) fetches website contents and saves them The [crawling-process](crawling-process/) fetches website contents and saves them
as compressed JSON models described in [crawling-model](../process-models/crawling-model/). as compressed JSON models described in [crawling-model](../process-models/crawling-model/).
The operation is specified by a crawl job specification. This is generated by [tools/crawl-job-extractor](../tools/crawl-job-extractor/) The operation is specified by a [crawl specification](../process-models/crawl-spec), which can be created in the control GUI.
based on the content in the database.
## 2. Converting Process ## 2. Converting Process

View File

@ -14,14 +14,15 @@ A map of the most important components and how they relate can be found below.
### Services ### Services
* [core services](services-core/) "macroservices", stateful, memory hungry doing heavy lifting. * [core services](services-core/) "macroservices", stateful, memory hungry doing heavy lifting.
* * [control-service](services-core/control-service) * * [control](services-core/control-service)
* * [search](services-core/search-service) * * [query](services-core/query-service)
* * [index](services-core/index-service) * * [index](services-core/index-service)
* * [assistant](services-core/assistant-service) * * [assistant](services-core/assistant-service)
* [satellite services](services-satellite/) "microservices", stateless providing additional functionality. * [application services](services-application/) "microservices", stateless providing additional functionality and making an application out of the search engine.
* * [api](services-satellite/api-service) - public API * * [api](services-application/api-service) - public API
* * [dating](services-satellite/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/) * * [search](services-application/search-service) - marginalia search application
* * [explorer](services-satellite/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/) * * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/)
* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/)
* an [internal API](api/) * an [internal API](api/)
### Processes ### Processes

View File

@ -28,7 +28,9 @@ dependencies {
implementation project(':code:common:config') implementation project(':code:common:config')
implementation project(':code:common:service-discovery') implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client') implementation project(':code:common:service-client')
implementation project(':code:api:search-api') implementation project(':code:api:query-api')
implementation project(':code:api:index-api')
implementation project(':code:features-index:index-query')
implementation libs.bundles.slf4j implementation libs.bundles.slf4j

View File

@ -0,0 +1,111 @@
package nu.marginalia.api;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.api.model.ApiSearchResult;
import nu.marginalia.api.model.ApiSearchResultQueryDetails;
import nu.marginalia.api.model.ApiSearchResults;
import nu.marginalia.client.Context;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.index.searchset.SearchSet;
import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryParams;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
@Singleton
public class ApiSearchOperator {
private final QueryClient queryClient;
@Inject
public ApiSearchOperator(QueryClient queryClient) {
this.queryClient = queryClient;
}
public ApiSearchResults query(Context context,
String query,
int count,
int index)
{
var rsp = queryClient.search(context, createParams(query, count, index));
return new ApiSearchResults("RESTRICTED", query,
rsp.results()
.stream()
.map(this::convert)
.sorted(Comparator.comparing(ApiSearchResult::getQuality).reversed())
.limit(count)
.collect(Collectors.toList()));
}
private QueryParams createParams(String query, int count, int index) {
SearchSetIdentifier searchSet = selectSearchSet(index);
return new QueryParams(
query,
new QueryLimits(
2,
Math.min(100, count),
150,
8192),
searchSet);
}
private SearchSetIdentifier selectSearchSet(int index) {
return switch (index) {
case 0 -> SearchSetIdentifier.NONE;
case 1 -> SearchSetIdentifier.SMALLWEB;
case 2 -> SearchSetIdentifier.RETRO;
case 3 -> SearchSetIdentifier.NONE;
case 5 -> SearchSetIdentifier.NONE;
default -> SearchSetIdentifier.NONE;
};
}
ApiSearchResult convert(DecoratedSearchResultItem url) {
List<List<ApiSearchResultQueryDetails>> details = new ArrayList<>();
if (url.rawIndexResult != null) {
var bySet = url.rawIndexResult.keywordScores.stream().collect(Collectors.groupingBy(SearchResultKeywordScore::subquery));
outer:
for (var entries : bySet.values()) {
List<ApiSearchResultQueryDetails> lst = new ArrayList<>();
for (var entry : entries) {
var metadata = new WordMetadata(entry.encodedWordMetadata());
if (metadata.isEmpty())
continue outer;
Set<String> flags = metadata.flagSet().stream().map(Object::toString).collect(Collectors.toSet());
lst.add(new ApiSearchResultQueryDetails(entry.keyword, Long.bitCount(metadata.positions()), flags));
}
details.add(lst);
}
}
return new ApiSearchResult(
url.url.toString(),
url.getTitle(),
url.getDescription(),
sanitizeNaN(url.rankingScore, -100),
details
);
}
private double sanitizeNaN(double value, double alternative) {
if (!Double.isFinite(value)) {
return alternative;
}
return value;
}
}

View File

@ -3,13 +3,13 @@ package nu.marginalia.api;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.google.inject.Inject; import com.google.inject.Inject;
import nu.marginalia.api.model.ApiLicense; import nu.marginalia.api.model.ApiLicense;
import nu.marginalia.api.model.ApiSearchResults;
import nu.marginalia.api.svc.LicenseService; import nu.marginalia.api.svc.LicenseService;
import nu.marginalia.api.svc.RateLimiterService; import nu.marginalia.api.svc.RateLimiterService;
import nu.marginalia.api.svc.ResponseCache; import nu.marginalia.api.svc.ResponseCache;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.search.client.SearchClient; import nu.marginalia.query.client.QueryClient;
import nu.marginalia.search.client.model.ApiSearchResults;
import nu.marginalia.service.server.*; import nu.marginalia.service.server.*;
import nu.marginalia.service.server.mq.MqNotification; import nu.marginalia.service.server.mq.MqNotification;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -24,29 +24,32 @@ public class ApiService extends Service {
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final Gson gson = GsonFactory.get(); private final Gson gson = GsonFactory.get();
private final SearchClient searchClient; private final QueryClient queryClient;
private final ResponseCache responseCache; private final ResponseCache responseCache;
private final LicenseService licenseService; private final LicenseService licenseService;
private final RateLimiterService rateLimiterService; private final RateLimiterService rateLimiterService;
private final ApiSearchOperator searchOperator;
// Marker for filtering out sensitive content from the persistent logs // Marker for filtering out sensitive content from the persistent logs
private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
@Inject @Inject
public ApiService(BaseServiceParams params, public ApiService(BaseServiceParams params,
SearchClient searchClient, QueryClient queryClient,
ResponseCache responseCache, ResponseCache responseCache,
LicenseService licenseService, LicenseService licenseService,
RateLimiterService rateLimiterService RateLimiterService rateLimiterService,
ApiSearchOperator searchOperator
) { ) {
super(params); super(params);
this.searchClient = searchClient; this.queryClient = queryClient;
this.responseCache = responseCache; this.responseCache = responseCache;
this.licenseService = licenseService; this.licenseService = licenseService;
this.rateLimiterService = rateLimiterService; this.rateLimiterService = rateLimiterService;
this.searchOperator = searchOperator;
Spark.get("/public/api/", (rq, rsp) -> { Spark.get("/public/api/", (rq, rsp) -> {
rsp.redirect("https://memex.marginalia.nu/projects/edge/api.gmi"); rsp.redirect("https://memex.marginalia.nu/projects/edge/api.gmi");
@ -76,6 +79,8 @@ public class ApiService extends Service {
var license = licenseService.getLicense(request.params("key")); var license = licenseService.getLicense(request.params("key"));
response.type("application/json");
var cachedResponse = responseCache.getResults(license, args[0], request.queryString()); var cachedResponse = responseCache.getResults(license, args[0], request.queryString());
if (cachedResponse.isPresent()) { if (cachedResponse.isPresent()) {
return cachedResponse.get(); return cachedResponse.get();
@ -87,7 +92,6 @@ public class ApiService extends Service {
// We set content type late because in the case of error, we don't want to tell the client // We set content type late because in the case of error, we don't want to tell the client
// that the error message is JSON when it is plain text. // that the error message is JSON when it is plain text.
response.type("application/json");
return result; return result;
} }
@ -102,8 +106,9 @@ public class ApiService extends Service {
logger.info(queryMarker, "{} Search {}", license.key, query); logger.info(queryMarker, "{} Search {}", license.key, query);
return searchClient.query(Context.fromRequest(request), query, count, index) return searchOperator
.blockingFirst().withLicense(license.getLicense()); .query(Context.fromRequest(request), query, count, index)
.withLicense(license.getLicense());
} }
private int intParam(Request request, String name, int defaultValue) { private int intParam(Request request, String name, int defaultValue) {

View File

@ -1,4 +1,4 @@
package nu.marginalia.search.client.model; package nu.marginalia.api.model;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;

View File

@ -1,4 +1,4 @@
package nu.marginalia.search.client.model; package nu.marginalia.api.model;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;

View File

@ -1,4 +1,4 @@
package nu.marginalia.search.client.model; package nu.marginalia.api.model;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;

View File

@ -3,20 +3,12 @@ package nu.marginalia.api.svc;
import com.google.common.cache.Cache; import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheBuilder;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import nu.marginalia.api.model.ApiLicense; import nu.marginalia.api.model.*;
import nu.marginalia.search.client.model.ApiSearchResults;
import java.time.Duration; import java.time.Duration;
import java.util.Optional; import java.util.Optional;
/** This response cache exists entirely to help SearXNG with its rate limiting. /** This response cache exists entirely to help clients with its rate limiting.
* For some reason they're hitting the API with like 5-12 identical requests.
* <p/>
* I've submitted an issue, they were like nah mang it works fine must
* be something else ¯\_()_/¯.
* <p/>
* So we're going to cache the API responses for a short while to mitigate the
* impact of such shotgun queries on the ratelimit.
*/ */
@Singleton @Singleton
public class ResponseCache { public class ResponseCache {

View File

@ -1,7 +1,6 @@
package nu.marginalia.api.svc; package nu.marginalia.api.svc;
import nu.marginalia.api.model.ApiLicense; import nu.marginalia.api.model.*;
import nu.marginalia.search.client.model.ApiSearchResults;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;

View File

@ -0,0 +1,9 @@
# Application Services
The application services offer user interfaces/applications around
interacting with the [core services](../services-core).
* The [api-service](api-service/) offers a public API
* The [dating-service](dating-service/) is [explore.marginalia.nu](https://explore.marginalia.nu/)
* The [explorer-service](dating-service/) is [explore2.marginalia.nu](https://explore2.marginalia.nu/)
* The [search-service](search-service/) is the main application for [search.marginalia.nu](https://search.marginalia.nu/)

View File

@ -25,7 +25,6 @@ dependencies {
implementation project(':code:common:model') implementation project(':code:common:model')
implementation project(':code:common:service') implementation project(':code:common:service')
implementation project(':code:common:config') implementation project(':code:common:config')
implementation project(':code:common:linkdb')
implementation project(':code:features-index:index-query') implementation project(':code:features-index:index-query')
implementation project(':code:libraries:easy-lsh') implementation project(':code:libraries:easy-lsh')
@ -34,16 +33,16 @@ dependencies {
implementation project(':code:libraries:term-frequency-dict') implementation project(':code:libraries:term-frequency-dict')
implementation project(':code:api:assistant-api') implementation project(':code:api:assistant-api')
implementation project(':code:api:query-api')
implementation project(':code:api:index-api') implementation project(':code:api:index-api')
implementation project(':code:api:search-api')
implementation project(':code:common:service-discovery') implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client') implementation project(':code:common:service-client')
implementation project(':code:common:renderer') implementation project(':code:common:renderer')
implementation project(':code:features-search:screenshots') implementation project(':code:features-search:screenshots')
implementation project(':code:features-search:random-websites') implementation project(':code:features-search:random-websites')
implementation project(':code:features-search:query-parser') implementation project(':code:features-qs:query-parser')
implementation project(':code:features-search:result-ranking') implementation project(':code:features-index:result-ranking')
implementation libs.bundles.slf4j implementation libs.bundles.slf4j

View File

@ -20,8 +20,6 @@ appropriate services.
* [CommandEvaluator](src/main/java/nu/marginalia/search/command/CommandEvaluator.java) interprets a user query and acts * [CommandEvaluator](src/main/java/nu/marginalia/search/command/CommandEvaluator.java) interprets a user query and acts
upon it, dealing with special operations like `browse:` or `site:`. upon it, dealing with special operations like `browse:` or `site:`.
* [QueryFactory](src/main/java/nu/marginalia/search/query/QueryFactory.java) parses a search query into a machine readable query specification.
* [SearchQueryIndexService](src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java) passes a parsed search query to the index service, and * [SearchQueryIndexService](src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java) passes a parsed search query to the index service, and
then decorates the search results so that they can be rendered. then decorates the search results so that they can be rendered.

View File

@ -0,0 +1,15 @@
package nu.marginalia.search;
import com.google.inject.AbstractModule;
import nu.marginalia.LanguageModels;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.WmsaHome;
public class SearchModule extends AbstractModule {
public void configure() {
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
bind(WebsiteUrl.class).toInstance(new WebsiteUrl(System.getProperty("website-url", "https://search.marginalia.nu/")));
}
}

View File

@ -7,12 +7,12 @@ import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.assistant.client.AssistantClient; import nu.marginalia.assistant.client.AssistantClient;
import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
import nu.marginalia.search.model.DecoratedSearchResults; import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.query.QueryFactory; import nu.marginalia.search.model.UserSearchParameters;
import nu.marginalia.search.query.model.SearchQuery;
import nu.marginalia.search.query.model.UserSearchParameters;
import nu.marginalia.search.svc.SearchQueryIndexService; import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.search.svc.SearchUnitConversionService; import nu.marginalia.search.svc.SearchUnitConversionService;
import org.apache.logging.log4j.util.Strings; import org.apache.logging.log4j.util.Strings;
@ -37,58 +37,59 @@ public class SearchOperator {
private final AssistantClient assistantClient; private final AssistantClient assistantClient;
private final DbDomainQueries domainQueries; private final DbDomainQueries domainQueries;
private final QueryFactory queryFactory; private final QueryClient queryClient;
private final SearchQueryIndexService searchQueryService; private final SearchQueryIndexService searchQueryService;
private final SearchQueryParamFactory paramFactory;
private final SearchUnitConversionService searchUnitConversionService; private final SearchUnitConversionService searchUnitConversionService;
@Inject @Inject
public SearchOperator(AssistantClient assistantClient, public SearchOperator(AssistantClient assistantClient,
DbDomainQueries domainQueries, DbDomainQueries domainQueries,
QueryFactory queryFactory, QueryClient queryClient,
SearchQueryIndexService searchQueryService, SearchQueryIndexService searchQueryService,
SearchUnitConversionService searchUnitConversionService) { SearchQueryParamFactory paramFactory,
SearchUnitConversionService searchUnitConversionService)
{
this.assistantClient = assistantClient; this.assistantClient = assistantClient;
this.domainQueries = domainQueries; this.domainQueries = domainQueries;
this.queryFactory = queryFactory; this.queryClient = queryClient;
this.searchQueryService = searchQueryService; this.searchQueryService = searchQueryService;
this.paramFactory = paramFactory;
this.searchUnitConversionService = searchUnitConversionService; this.searchUnitConversionService = searchUnitConversionService;
} }
public List<UrlDetails> doApiSearch(Context ctx, public List<UrlDetails> doSiteSearch(Context ctx,
UserSearchParameters params) { String domain) {
var queryParams = paramFactory.forSiteSearch(domain);
var queryResponse = queryClient.search(ctx, queryParams);
SearchQuery processedQuery = queryFactory.createQuery(params); return searchQueryService.getResultsFromQuery(queryResponse);
logger.info(queryMarker, "Human terms (API): {}", Strings.join(processedQuery.searchTermsHuman, ','));
return searchQueryService.executeQuery(ctx, processedQuery);
} }
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters params) { public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters userParams) {
Future<String> eval = searchUnitConversionService.tryEval(ctx, params.humanQuery()); Future<String> eval = searchUnitConversionService.tryEval(ctx, userParams.humanQuery());
SearchQuery processedQuery = queryFactory.createQuery(params); var queryParams = paramFactory.forRegularSearch(userParams);
var queryResponse = queryClient.search(ctx, queryParams);
logger.info(queryMarker, "Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ',')); List<UrlDetails> queryResults = searchQueryService.getResultsFromQuery(queryResponse);
List<UrlDetails> queryResults = searchQueryService.executeQuery(ctx, processedQuery);
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
logger.info(queryMarker, "Search Result Count: {}", queryResults.size()); logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
String evalResult = getFutureOrDefault(eval, ""); String evalResult = getFutureOrDefault(eval, "");
return DecoratedSearchResults.builder() return DecoratedSearchResults.builder()
.params(params) .params(userParams)
.problems(getProblems(ctx, evalResult, queryResults, processedQuery)) .problems(getProblems(ctx, evalResult, queryResults, queryResponse))
.evalResult(evalResult) .evalResult(evalResult)
.results(queryResults) .results(queryResults)
.focusDomain(processedQuery.domain) .focusDomain(queryResponse.domain())
.focusDomainId(getDomainId(processedQuery.domain)) .focusDomainId(getDomainId(queryResponse.domain()))
.build(); .build();
} }
@ -113,20 +114,20 @@ public class SearchOperator {
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1); return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
} }
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) { private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, QueryResponse response) {
final List<String> problems = new ArrayList<>(processedQuery.problems); final List<String> problems = new ArrayList<>(response.problems());
boolean siteSearch = processedQuery.domain != null; boolean siteSearch = response.domain() != null;
if (!siteSearch) { if (!siteSearch) {
if (queryResults.size() <= 5 && null == evalResult) { if (queryResults.size() <= 5 && null == evalResult) {
spellCheckTerms(ctx, processedQuery).forEach(problems::add); spellCheckTerms(ctx, response).forEach(problems::add);
} }
if (queryResults.size() <= 5) { if (queryResults.size() <= 5) {
problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>."); problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>.");
} }
Set<String> representativeKeywords = processedQuery.getAllKeywords(); Set<String> representativeKeywords = response.getAllKeywords();
if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning"))) if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning")))
{ {
problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition"); problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition");
@ -137,8 +138,8 @@ public class SearchOperator {
} }
private Iterable<String> spellCheckTerms(Context ctx, SearchQuery disjointedQuery) { private Iterable<String> spellCheckTerms(Context ctx, QueryResponse response) {
return Observable.fromIterable(disjointedQuery.searchTermsHuman) return Observable.fromIterable(response.searchTermsHuman())
.subscribeOn(Schedulers.io()) .subscribeOn(Schedulers.io())
.flatMap(term -> assistantClient.spellCheck(ctx, term) .flatMap(term -> assistantClient.spellCheck(ctx, term)
.onErrorReturn(e -> Collections.emptyList()) .onErrorReturn(e -> Collections.emptyList())

View File

@ -0,0 +1,53 @@
package nu.marginalia.search;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.search.model.UserSearchParameters;
import java.util.List;
public class SearchQueryParamFactory {
public QueryParams forRegularSearch(UserSearchParameters userParams) {
SearchSubquery prototype = new SearchSubquery();
var profile = userParams.profile();
profile.addTacitTerms(prototype);
return new QueryParams(
userParams.humanQuery(),
null,
prototype.searchTermsInclude,
prototype.searchTermsExclude,
prototype.searchTermsPriority,
prototype.searchTermsAdvice,
profile.getQualityLimit(),
profile.getYearLimit(),
profile.getSizeLimit(),
SpecificationLimit.none(),
List.of(),
new QueryLimits(2, 100, 200, 8192),
profile.searchSetIdentifier
);
}
public QueryParams forSiteSearch(String domain) {
return new QueryParams("site:"+domain,
null,
List.of(),
List.of(),
List.of(),
List.of(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(),
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE
);
}
}

View File

@ -6,15 +6,11 @@ import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl; import nu.marginalia.WebsiteUrl;
import nu.marginalia.client.Context; import nu.marginalia.client.Context;
import nu.marginalia.db.storage.FileStorageService; import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.linkdb.LinkdbReader;
import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.search.client.SearchMqEndpoints;
import nu.marginalia.search.svc.SearchFrontPageService; import nu.marginalia.search.svc.SearchFrontPageService;
import nu.marginalia.search.svc.*; import nu.marginalia.search.svc.*;
import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.server.*; import nu.marginalia.service.server.*;
import nu.marginalia.service.server.mq.MqNotification;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import spark.Request; import spark.Request;
@ -23,18 +19,13 @@ import spark.Spark;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
public class SearchService extends Service { public class SearchService extends Service {
private final WebsiteUrl websiteUrl; private final WebsiteUrl websiteUrl;
private final StaticResources staticResources; private final StaticResources staticResources;
private final FileStorageService fileStorageService;
private final LinkdbReader linkdbReader;
private static final Logger logger = LoggerFactory.getLogger(SearchService.class); private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
private final ServiceEventLog eventLog;
@SneakyThrows @SneakyThrows
@Inject @Inject
@ -45,18 +36,12 @@ public class SearchService extends Service {
SearchErrorPageService errorPageService, SearchErrorPageService errorPageService,
SearchAddToCrawlQueueService addToCrawlQueueService, SearchAddToCrawlQueueService addToCrawlQueueService,
SearchFlagSiteService flagSiteService, SearchFlagSiteService flagSiteService,
SearchQueryService searchQueryService, SearchQueryService searchQueryService
SearchApiQueryService apiQueryService,
FileStorageService fileStorageService,
LinkdbReader linkdbReader
) { ) {
super(params); super(params);
this.eventLog = params.eventLog;
this.websiteUrl = websiteUrl; this.websiteUrl = websiteUrl;
this.staticResources = staticResources; this.staticResources = staticResources;
this.fileStorageService = fileStorageService;
this.linkdbReader = linkdbReader;
Spark.staticFiles.expireTime(600); Spark.staticFiles.expireTime(600);
@ -64,7 +49,6 @@ public class SearchService extends Service {
Gson gson = GsonFactory.get(); Gson gson = GsonFactory.get();
Spark.get("/api/search", apiQueryService::apiSearch, gson::toJson);
Spark.get("/public/search", searchQueryService::pathSearch); Spark.get("/public/search", searchQueryService::pathSearch);
Spark.get("/public/site-search/:site/*", this::siteSearchRedir); Spark.get("/public/site-search/:site/*", this::siteSearchRedir);
Spark.get("/public/", frontPageService::render); Spark.get("/public/", frontPageService::render);
@ -87,21 +71,6 @@ public class SearchService extends Service {
Spark.awaitInitialization(); Spark.awaitInitialization();
} }
@SneakyThrows
@MqNotification(endpoint = SearchMqEndpoints.SWITCH_LINKDB)
public void switchLinkdb(String unusedArg) {
logger.info("Switching link database");
Path newPath = fileStorageService.getStorageByType(FileStorageType.LINKDB_STAGING)
.asPath()
.resolve("links.db");
if (Files.exists(newPath)) {
eventLog.logEvent("SEARCH-SWITCH-LINKDB", "");
linkdbReader.switchInput(newPath);
}
}
private Object serveStatic(Request request, Response response) { private Object serveStatic(Request request, Response response) {
String resource = request.params("resource"); String resource = request.params("resource");
staticResources.serveStatic("search", resource, request, response); staticResources.serveStatic("search", resource, request, response);

Some files were not shown because too many files have changed in this diff Show More