Merge pull request #53 from MarginaliaSearch/standalone-index
Move ranking to the index-service, and query parsing to a new query-service; separate out the search-service
This commit is contained in:
commit
c8d820c17b
@ -5,4 +5,6 @@ public class IndexMqEndpoints {
|
||||
public static final String INDEX_REPARTITION = "INDEX-REPARTITION";
|
||||
public static final String SWITCH_INDEX = "SWITCH-INDEX";
|
||||
|
||||
public static final String SWITCH_LINKDB = "SWITCH_LINKDB";
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,82 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
@ToString
|
||||
public class DecoratedSearchResultItem {
|
||||
public final SearchResultItem rawIndexResult;
|
||||
|
||||
@NotNull
|
||||
public final EdgeUrl url;
|
||||
@NotNull
|
||||
public final String title;
|
||||
@NotNull
|
||||
public final String description;
|
||||
public final double urlQuality;
|
||||
@NotNull
|
||||
public final String format;
|
||||
|
||||
/** Document features bitmask, see HtmlFeature */
|
||||
public final int features;
|
||||
|
||||
@Nullable
|
||||
public final Integer pubYear;
|
||||
public final long dataHash;
|
||||
public final int wordsTotal;
|
||||
public final double rankingScore;
|
||||
|
||||
public long documentId() {
|
||||
return rawIndexResult.getDocumentId();
|
||||
}
|
||||
public int domainId() {
|
||||
return rawIndexResult.getDomainId();
|
||||
}
|
||||
public int resultsFromDomain() {
|
||||
return rawIndexResult.getResultsFromDomain();
|
||||
}
|
||||
|
||||
public List<SearchResultKeywordScore> keywordScores() {
|
||||
return rawIndexResult.getKeywordScores();
|
||||
}
|
||||
|
||||
public long rankingId() {
|
||||
return rawIndexResult.getRanking();
|
||||
}
|
||||
|
||||
public DecoratedSearchResultItem(SearchResultItem rawIndexResult,
|
||||
@NotNull
|
||||
EdgeUrl url,
|
||||
@NotNull
|
||||
String title,
|
||||
@NotNull
|
||||
String description,
|
||||
double urlQuality,
|
||||
@NotNull
|
||||
String format,
|
||||
int features,
|
||||
@Nullable
|
||||
Integer pubYear,
|
||||
long dataHash,
|
||||
int wordsTotal,
|
||||
double rankingScore)
|
||||
{
|
||||
this.rawIndexResult = rawIndexResult;
|
||||
this.url = url;
|
||||
this.title = title;
|
||||
this.description = description;
|
||||
this.urlQuality = urlQuality;
|
||||
this.format = format;
|
||||
this.features = features;
|
||||
this.pubYear = pubYear;
|
||||
this.dataHash = dataHash;
|
||||
this.wordsTotal = wordsTotal;
|
||||
this.rankingScore = rankingScore;
|
||||
}
|
||||
}
|
@ -8,8 +8,7 @@ import java.util.List;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class SearchResultSet {
|
||||
public List<SearchResultItem> results;
|
||||
public ResultRankingContext rankingContext;
|
||||
public List<DecoratedSearchResultItem> results;
|
||||
public int size() {
|
||||
return results.size();
|
||||
}
|
||||
|
@ -1,7 +1,5 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
@ -13,8 +11,10 @@ java {
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:features-index:index-query')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:common:service-client')
|
||||
|
@ -0,0 +1,65 @@
|
||||
package nu.marginalia.query.client;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.prometheus.client.Summary;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.client.AbstractDynamicClient;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.SearchResultSet;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.mq.MessageQueueFactory;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.CheckReturnValue;
|
||||
import java.util.UUID;
|
||||
|
||||
@Singleton
|
||||
public class QueryClient extends AbstractDynamicClient {
|
||||
|
||||
private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register();
|
||||
private static final Summary wmsa_search_index_api_search_time = Summary.build().name("wmsa_search_index_api_search_time").help("-").register();
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final MqOutbox outbox;
|
||||
|
||||
@Inject
|
||||
public QueryClient(ServiceDescriptors descriptors,
|
||||
MessageQueueFactory messageQueueFactory) {
|
||||
|
||||
super(descriptors.forId(ServiceId.Query), WmsaHome.getHostsFile(), GsonFactory::get);
|
||||
|
||||
String inboxName = ServiceId.Query.name + ":" + "0";
|
||||
String outboxName = System.getProperty("service-name", UUID.randomUUID().toString());
|
||||
|
||||
outbox = messageQueueFactory.createOutbox(inboxName, outboxName, UUID.randomUUID());
|
||||
|
||||
}
|
||||
|
||||
/** Delegate an Index API style query directly to the index service */
|
||||
@CheckReturnValue
|
||||
public SearchResultSet delegate(Context ctx, SearchSpecification specs) {
|
||||
return wmsa_search_index_api_delegate_time.time(
|
||||
() -> this.postGet(ctx, "/delegate/", specs, SearchResultSet.class).blockingFirst()
|
||||
);
|
||||
}
|
||||
@CheckReturnValue
|
||||
public QueryResponse search(Context ctx, QueryParams params) {
|
||||
return wmsa_search_index_api_search_time.time(
|
||||
() -> this.postGet(ctx, "/search/", params, QueryResponse.class).blockingFirst()
|
||||
);
|
||||
}
|
||||
public MqOutbox outbox() {
|
||||
return outbox;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
package nu.marginalia.query.model;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ProcessedQuery {
|
||||
public final SearchSpecification specs;
|
||||
public final List<String> searchTermsHuman;
|
||||
public final String domain;
|
||||
|
||||
public ProcessedQuery(SearchSpecification specs, List<String> searchTermsHuman, String domain) {
|
||||
this.specs = specs;
|
||||
this.searchTermsHuman = searchTermsHuman;
|
||||
this.domain = domain;
|
||||
}
|
||||
|
||||
public ProcessedQuery(SearchSpecification justSpecs) {
|
||||
this(justSpecs, List.of(), null);
|
||||
}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package nu.marginalia.query.model;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record QueryParams(
|
||||
String humanQuery,
|
||||
String nearDomain,
|
||||
List<String> tacitIncludes,
|
||||
List<String> tacitExcludes,
|
||||
List<String> tacitPriority,
|
||||
List<String> tacitAdvice,
|
||||
SpecificationLimit quality,
|
||||
SpecificationLimit year,
|
||||
SpecificationLimit size,
|
||||
SpecificationLimit rank,
|
||||
List<Integer> domainIds,
|
||||
QueryLimits limits,
|
||||
SearchSetIdentifier identifier
|
||||
)
|
||||
{
|
||||
public QueryParams(String query, QueryLimits limits, SearchSetIdentifier identifier) {
|
||||
this(query, null,
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
limits,
|
||||
identifier
|
||||
);
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package nu.marginalia.query.model;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public record QueryResponse(SearchSpecification specs,
|
||||
List<DecoratedSearchResultItem> results,
|
||||
List<String> searchTermsHuman,
|
||||
List<String> problems,
|
||||
String domain)
|
||||
{
|
||||
public Set<String> getAllKeywords() {
|
||||
Set<String> keywords = new HashSet<>(100);
|
||||
for (var sq : specs.subqueries) {
|
||||
keywords.addAll(sq.searchTermsInclude);
|
||||
}
|
||||
return keywords;
|
||||
}
|
||||
}
|
@ -3,7 +3,7 @@
|
||||
## Core Services
|
||||
|
||||
* [assistant-api](assistant-api/)
|
||||
* [search-api](search-api/)
|
||||
* [query-api](query-api/)
|
||||
* [index-api](index-api/)
|
||||
|
||||
These are clients for the [core services](../services-core/), along with what models
|
||||
@ -13,11 +13,11 @@ are necessary for speaking to them. They each implement the abstract client clas
|
||||
All that is necessary is to `@Inject` them into the constructor and then
|
||||
requests can be sent.
|
||||
|
||||
**Note:** If you are looking for the public API, it's handled by the api service in [services-satellite/api-service](../services-satellite/api-service).
|
||||
**Note:** If you are looking for the public API, it's handled by the api service in [services-application/api-service](../services-application/api-service).
|
||||
|
||||
## MQ-API Process API
|
||||
|
||||
[process-mqapi](process-mqapi/) defines requests and inboxes for the message queue based API used
|
||||
for interacting with processes.
|
||||
|
||||
See [libraries/message-queue](../libraries/message-queue) and [services-satellite/control-service](../services-satellite/control-service).
|
||||
See [libraries/message-queue](../libraries/message-queue) and [services-application/control-service](../services-core/control-service).
|
@ -1,8 +0,0 @@
|
||||
# Search API
|
||||
|
||||
Client and models for talking to the [search-service](../../services-core/search-service),
|
||||
implemented with the base client from [service-client](../../common/service-client).
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [SearchClient](src/main/java/nu/marginalia/search/client/SearchClient.java)
|
@ -1,52 +0,0 @@
|
||||
package nu.marginalia.search.client;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import nu.marginalia.client.AbstractDynamicClient;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.mq.MessageQueueFactory;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.search.client.model.ApiSearchResults;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.client.Context;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.CheckReturnValue;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.UUID;
|
||||
|
||||
@Singleton
|
||||
public class SearchClient extends AbstractDynamicClient {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final MqOutbox outbox;
|
||||
|
||||
@Inject
|
||||
public SearchClient(ServiceDescriptors descriptors,
|
||||
MessageQueueFactory messageQueueFactory) {
|
||||
|
||||
super(descriptors.forId(ServiceId.Search), WmsaHome.getHostsFile(), GsonFactory::get);
|
||||
|
||||
String inboxName = ServiceId.Search.name + ":" + "0";
|
||||
String outboxName = System.getProperty("service-name", UUID.randomUUID().toString());
|
||||
|
||||
outbox = messageQueueFactory.createOutbox(inboxName, outboxName, UUID.randomUUID());
|
||||
|
||||
}
|
||||
|
||||
|
||||
public MqOutbox outbox() {
|
||||
return outbox;
|
||||
}
|
||||
|
||||
@CheckReturnValue
|
||||
public Observable<ApiSearchResults> query(Context ctx, String queryString, int count, int profile) {
|
||||
return this.get(ctx, String.format("/api/search?query=%s&count=%d&index=%d", URLEncoder.encode(queryString, StandardCharsets.UTF_8), count, profile), ApiSearchResults.class);
|
||||
}
|
||||
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
package nu.marginalia.search.client;
|
||||
|
||||
public class SearchMqEndpoints {
|
||||
/** Flushes the URL caches, run if significant changes have occurred in the URLs database */
|
||||
public static final String FLUSH_CACHES = "FLUSH_CACHES";
|
||||
public static final String SWITCH_LINKDB = "SWITCH_LINKDB";
|
||||
}
|
@ -8,4 +8,4 @@ this information take effect in production immediately, even before
|
||||
the information was searchable.
|
||||
|
||||
It is constructed by the [loading-process](../../processes/loading-process), and consumed
|
||||
by the [search-service](../../services-core/search-service).
|
||||
by the [index-service](../../services-core/index-service).
|
@ -62,6 +62,15 @@ public class LinkdbReader {
|
||||
connection = createConnection();
|
||||
}
|
||||
|
||||
/** Re-establishes the connection, useful in tests and not
|
||||
* much else */
|
||||
public void reconnect() throws SQLException {
|
||||
if (connection != null)
|
||||
connection.close();
|
||||
|
||||
connection = createConnection();
|
||||
}
|
||||
|
||||
public List<String> getUrlsFromDomain(int domainId) throws SQLException {
|
||||
if (connection == null ||
|
||||
connection.isClosed())
|
||||
|
@ -6,7 +6,6 @@ This package contains common models to the search engine
|
||||
|
||||
* [EdgeDomain](src/main/java/nu/marginalia/model/EdgeDomain.java)
|
||||
* [EdgeUrl](src/main/java/nu/marginalia/model/EdgeUrl.java)
|
||||
* [EdgeId](src/main/java/nu/marginalia/model/id/EdgeId.java)
|
||||
* [DocumentMetadata](src/main/java/nu/marginalia/model/idx/DocumentMetadata.java)
|
||||
* [DocumentFlags](src/main/java/nu/marginalia/model/idx/DocumentFlags.java)
|
||||
* [WordMetadata](src/main/java/nu/marginalia/model/idx/WordMetadata.java)
|
||||
|
@ -10,6 +10,7 @@ public class SearchServiceDescriptors {
|
||||
public static ServiceDescriptors descriptors = new ServiceDescriptors(
|
||||
List.of(new ServiceDescriptor(ServiceId.Api, 5004),
|
||||
new ServiceDescriptor(ServiceId.Index, 5021),
|
||||
new ServiceDescriptor(ServiceId.Query, 5022),
|
||||
new ServiceDescriptor(ServiceId.Search, 5023),
|
||||
new ServiceDescriptor(ServiceId.Assistant, 5025),
|
||||
new ServiceDescriptor(ServiceId.Dating, 5070),
|
||||
|
@ -6,6 +6,7 @@ public enum ServiceId {
|
||||
Api("api-service"),
|
||||
Search("search-service"),
|
||||
Index("index-service"),
|
||||
Query("query-service"),
|
||||
|
||||
Control("control-service"),
|
||||
|
||||
|
@ -17,7 +17,7 @@ Contains domain ranking algorithms.
|
||||
|
||||
## See Also
|
||||
|
||||
* [features-search/result-ranking](../../features-search/result-ranking) - Ranks search results
|
||||
* [result-ranking](../result-ranking) - Ranks search results
|
||||
|
||||
## Useful Resources
|
||||
|
||||
|
@ -1,4 +1,7 @@
|
||||
package nu.marginalia.index.query.limit;
|
||||
|
||||
public record QueryLimits(int resultsByDomain, int resultsTotal, int timeoutMs, int fetchSize) {
|
||||
public QueryLimits forSingleDomain() {
|
||||
return new QueryLimits(resultsTotal, resultsTotal, timeoutMs, fetchSize);
|
||||
}
|
||||
}
|
||||
|
@ -14,11 +14,11 @@ These indices rely heavily on the [libraries/btree](../libraries/btree) and [lib
|
||||
|
||||
## Algorithms
|
||||
|
||||
* [domain-ranking](domain-ranking/) contains ranking algorithms.
|
||||
* [domain-ranking](domain-ranking/) contains domain ranking algorithms.
|
||||
* [result-ranking](result-ranking/) contains logic for ranking search results by relevance.
|
||||
|
||||
# Libraries
|
||||
|
||||
* [index-query](index-query/) contains structures for evaluating search queries.
|
||||
* [index-journal](index-journal/) contains tools for writing and reading index data.
|
||||
* [lexicon](lexicon/) contains a mapping between words' string representation and an unique integer identifier.
|
||||
|
||||
|
@ -11,4 +11,4 @@ results higher.
|
||||
|
||||
## See Also
|
||||
|
||||
* [features-index/domain-ranking](../../features-index/domain-ranking) - Ranks domains
|
||||
* [features-index/domain-ranking](../domain-ranking) - Ranks domains
|
@ -1,6 +1,6 @@
|
||||
# Query Parser
|
||||
|
||||
End-user search query parsing tools used by the [search-service](../../services-core/search-service).
|
||||
End-user search query parsing tools used by the [query-service](../../services-core/query-service).
|
||||
|
||||
## Central Classes
|
||||
|
@ -36,7 +36,6 @@ public class Token {
|
||||
case EXCLUDE_TERM: visitor.onExcludeTerm(this); break;
|
||||
case PRIORTY_TERM: visitor.onPriorityTerm(this); break;
|
||||
case ADVICE_TERM: visitor.onAdviceTerm(this); break;
|
||||
case NEAR_TERM: visitor.onNearTerm(this); break;
|
||||
case LITERAL_TERM: visitor.onLiteralTerm(this); break;
|
||||
|
||||
case YEAR_TERM: visitor.onYearTerm(this); break;
|
@ -6,8 +6,6 @@ public interface TokenVisitor {
|
||||
void onExcludeTerm(Token token);
|
||||
void onPriorityTerm(Token token);
|
||||
void onAdviceTerm(Token token);
|
||||
void onNearTerm(Token token);
|
||||
|
||||
void onYearTerm(Token token);
|
||||
void onSizeTerm(Token token);
|
||||
void onRankTerm(Token token);
|
7
code/features-qs/readme.md
Normal file
7
code/features-qs/readme.md
Normal file
@ -0,0 +1,7 @@
|
||||
**Note**: This package is called `features-qs` rather than `features-query` because the latter,
|
||||
though more consistent with other packages like features-index, would be very confusing
|
||||
as there are other packages elsewhere with the 'query' name (e.g. features-index/index-query).
|
||||
|
||||
## Contents
|
||||
|
||||
* [query-parser](query-parser/) contains code for parsing the user-facing query grammar.
|
@ -3,8 +3,6 @@
|
||||
These are bits of search-engine related code that are relatively isolated pieces of business logic,
|
||||
that benefit from the clarity of being kept separate from the rest of the
|
||||
search engine code.
|
||||
|
||||
* [query-parser](query-parser/) contains code for parsing the user-facing query grammar.
|
||||
* [result-ranking](result-ranking/) contains logic for ranking search results by relevance.
|
||||
*
|
||||
* [screenshots](screenshots/) and [random-websites](random-websites/) contains SQL queries random
|
||||
exploration mode.
|
||||
|
@ -13,4 +13,4 @@ its words, how they stem, POS tags, and so on.
|
||||
[features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords
|
||||
are important.
|
||||
|
||||
[features-search/query-parser](../../features-search/query-parser) also does some language processing.
|
||||
[features-qs/query-parser](../../features-qs/query-parser) also does some language processing.
|
@ -7,7 +7,6 @@ Contains models shared by the [crawling-process](../../processes/crawling-proces
|
||||
|
||||
* [CrawledDocument](src/main/java/nu/marginalia/crawling/model/CrawledDocument.java)
|
||||
* [CrawledDomain](src/main/java/nu/marginalia/crawling/model/CrawledDomain.java)
|
||||
* [CrawlingSpecification](src/main/java/nu/marginalia/crawling/model/spec/CrawlingSpecification.java)
|
||||
|
||||
### Serialization
|
||||
* [CrawledDomainReader](src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java)
|
||||
|
@ -8,7 +8,7 @@ into per-domain snapshots.
|
||||
* [CrawlerMain](src/main/java/nu/marginalia/crawl/CrawlerMain.java) orchestrates the crawling.
|
||||
* [CrawlerRetreiver](src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java)
|
||||
visits known addresses from a domain and downloads each document.
|
||||
* [HttpFetcher](src/main/java/nu/marginalia/crawl/retreival/HttpFetcher.java)
|
||||
* [HttpFetcher](src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java)
|
||||
fetches a URL.
|
||||
|
||||
## See Also
|
||||
|
@ -32,7 +32,7 @@ dependencies {
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':third-party:commons-codec')
|
||||
testImplementation project(':code:services-core:search-service')
|
||||
testImplementation project(':code:services-application:search-service')
|
||||
|
||||
implementation project(':code:process-models:crawling-model')
|
||||
implementation project(':code:process-models:processed-data')
|
||||
|
@ -6,5 +6,4 @@ the index-service.
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [LoaderMain](src/main/java/nu/marginalia/loading/LoaderMain.java) main class.
|
||||
* [Loader](src/main/java/nu/marginalia/loading/loader/Loader.java) evaluates loading instructions.
|
||||
* [LoaderMain](src/main/java/nu/marginalia/loading/LoaderMain.java) main class.
|
@ -5,8 +5,7 @@
|
||||
The [crawling-process](crawling-process/) fetches website contents and saves them
|
||||
as compressed JSON models described in [crawling-model](../process-models/crawling-model/).
|
||||
|
||||
The operation is specified by a crawl job specification. This is generated by [tools/crawl-job-extractor](../tools/crawl-job-extractor/)
|
||||
based on the content in the database.
|
||||
The operation is specified by a [crawl specification](../process-models/crawl-spec), which can be created in the control GUI.
|
||||
|
||||
## 2. Converting Process
|
||||
|
||||
|
@ -14,14 +14,15 @@ A map of the most important components and how they relate can be found below.
|
||||
|
||||
### Services
|
||||
* [core services](services-core/) "macroservices", stateful, memory hungry doing heavy lifting.
|
||||
* * [control-service](services-core/control-service)
|
||||
* * [search](services-core/search-service)
|
||||
* * [control](services-core/control-service)
|
||||
* * [query](services-core/query-service)
|
||||
* * [index](services-core/index-service)
|
||||
* * [assistant](services-core/assistant-service)
|
||||
* [satellite services](services-satellite/) "microservices", stateless providing additional functionality.
|
||||
* * [api](services-satellite/api-service) - public API
|
||||
* * [dating](services-satellite/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/)
|
||||
* * [explorer](services-satellite/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/)
|
||||
* [application services](services-application/) "microservices", stateless providing additional functionality and making an application out of the search engine.
|
||||
* * [api](services-application/api-service) - public API
|
||||
* * [search](services-application/search-service) - marginalia search application
|
||||
* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/)
|
||||
* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/)
|
||||
* an [internal API](api/)
|
||||
|
||||
### Processes
|
||||
|
@ -28,7 +28,9 @@ dependencies {
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:common:service-client')
|
||||
implementation project(':code:api:search-api')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:features-index:index-query')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
@ -0,0 +1,111 @@
|
||||
package nu.marginalia.api;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.api.model.ApiSearchResult;
|
||||
import nu.marginalia.api.model.ApiSearchResultQueryDetails;
|
||||
import nu.marginalia.api.model.ApiSearchResults;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.index.searchset.SearchSet;
|
||||
import nu.marginalia.model.idx.WordMetadata;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Singleton
|
||||
public class ApiSearchOperator {
|
||||
private final QueryClient queryClient;
|
||||
|
||||
@Inject
|
||||
public ApiSearchOperator(QueryClient queryClient) {
|
||||
this.queryClient = queryClient;
|
||||
}
|
||||
|
||||
public ApiSearchResults query(Context context,
|
||||
String query,
|
||||
int count,
|
||||
int index)
|
||||
{
|
||||
var rsp = queryClient.search(context, createParams(query, count, index));
|
||||
|
||||
return new ApiSearchResults("RESTRICTED", query,
|
||||
rsp.results()
|
||||
.stream()
|
||||
.map(this::convert)
|
||||
.sorted(Comparator.comparing(ApiSearchResult::getQuality).reversed())
|
||||
.limit(count)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
private QueryParams createParams(String query, int count, int index) {
|
||||
SearchSetIdentifier searchSet = selectSearchSet(index);
|
||||
|
||||
return new QueryParams(
|
||||
query,
|
||||
new QueryLimits(
|
||||
2,
|
||||
Math.min(100, count),
|
||||
150,
|
||||
8192),
|
||||
searchSet);
|
||||
}
|
||||
|
||||
private SearchSetIdentifier selectSearchSet(int index) {
|
||||
return switch (index) {
|
||||
case 0 -> SearchSetIdentifier.NONE;
|
||||
case 1 -> SearchSetIdentifier.SMALLWEB;
|
||||
case 2 -> SearchSetIdentifier.RETRO;
|
||||
case 3 -> SearchSetIdentifier.NONE;
|
||||
case 5 -> SearchSetIdentifier.NONE;
|
||||
default -> SearchSetIdentifier.NONE;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
ApiSearchResult convert(DecoratedSearchResultItem url) {
|
||||
List<List<ApiSearchResultQueryDetails>> details = new ArrayList<>();
|
||||
if (url.rawIndexResult != null) {
|
||||
var bySet = url.rawIndexResult.keywordScores.stream().collect(Collectors.groupingBy(SearchResultKeywordScore::subquery));
|
||||
|
||||
outer:
|
||||
for (var entries : bySet.values()) {
|
||||
List<ApiSearchResultQueryDetails> lst = new ArrayList<>();
|
||||
for (var entry : entries) {
|
||||
var metadata = new WordMetadata(entry.encodedWordMetadata());
|
||||
if (metadata.isEmpty())
|
||||
continue outer;
|
||||
|
||||
Set<String> flags = metadata.flagSet().stream().map(Object::toString).collect(Collectors.toSet());
|
||||
lst.add(new ApiSearchResultQueryDetails(entry.keyword, Long.bitCount(metadata.positions()), flags));
|
||||
}
|
||||
details.add(lst);
|
||||
}
|
||||
}
|
||||
|
||||
return new ApiSearchResult(
|
||||
url.url.toString(),
|
||||
url.getTitle(),
|
||||
url.getDescription(),
|
||||
sanitizeNaN(url.rankingScore, -100),
|
||||
details
|
||||
);
|
||||
}
|
||||
|
||||
private double sanitizeNaN(double value, double alternative) {
|
||||
if (!Double.isFinite(value)) {
|
||||
return alternative;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
@ -3,13 +3,13 @@ package nu.marginalia.api;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.api.model.ApiLicense;
|
||||
import nu.marginalia.api.model.ApiSearchResults;
|
||||
import nu.marginalia.api.svc.LicenseService;
|
||||
import nu.marginalia.api.svc.RateLimiterService;
|
||||
import nu.marginalia.api.svc.ResponseCache;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.search.client.SearchClient;
|
||||
import nu.marginalia.search.client.model.ApiSearchResults;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.service.server.*;
|
||||
import nu.marginalia.service.server.mq.MqNotification;
|
||||
import org.slf4j.Logger;
|
||||
@ -24,29 +24,32 @@ public class ApiService extends Service {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final Gson gson = GsonFactory.get();
|
||||
private final SearchClient searchClient;
|
||||
private final QueryClient queryClient;
|
||||
|
||||
private final ResponseCache responseCache;
|
||||
private final LicenseService licenseService;
|
||||
private final RateLimiterService rateLimiterService;
|
||||
private final ApiSearchOperator searchOperator;
|
||||
|
||||
// Marker for filtering out sensitive content from the persistent logs
|
||||
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
|
||||
|
||||
@Inject
|
||||
public ApiService(BaseServiceParams params,
|
||||
SearchClient searchClient,
|
||||
QueryClient queryClient,
|
||||
ResponseCache responseCache,
|
||||
LicenseService licenseService,
|
||||
RateLimiterService rateLimiterService
|
||||
RateLimiterService rateLimiterService,
|
||||
ApiSearchOperator searchOperator
|
||||
) {
|
||||
|
||||
super(params);
|
||||
|
||||
this.searchClient = searchClient;
|
||||
this.queryClient = queryClient;
|
||||
this.responseCache = responseCache;
|
||||
this.licenseService = licenseService;
|
||||
this.rateLimiterService = rateLimiterService;
|
||||
this.searchOperator = searchOperator;
|
||||
|
||||
Spark.get("/public/api/", (rq, rsp) -> {
|
||||
rsp.redirect("https://memex.marginalia.nu/projects/edge/api.gmi");
|
||||
@ -76,6 +79,8 @@ public class ApiService extends Service {
|
||||
|
||||
var license = licenseService.getLicense(request.params("key"));
|
||||
|
||||
response.type("application/json");
|
||||
|
||||
var cachedResponse = responseCache.getResults(license, args[0], request.queryString());
|
||||
if (cachedResponse.isPresent()) {
|
||||
return cachedResponse.get();
|
||||
@ -87,7 +92,6 @@ public class ApiService extends Service {
|
||||
// We set content type late because in the case of error, we don't want to tell the client
|
||||
// that the error message is JSON when it is plain text.
|
||||
|
||||
response.type("application/json");
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -102,8 +106,9 @@ public class ApiService extends Service {
|
||||
|
||||
logger.info(queryMarker, "{} Search {}", license.key, query);
|
||||
|
||||
return searchClient.query(Context.fromRequest(request), query, count, index)
|
||||
.blockingFirst().withLicense(license.getLicense());
|
||||
return searchOperator
|
||||
.query(Context.fromRequest(request), query, count, index)
|
||||
.withLicense(license.getLicense());
|
||||
}
|
||||
|
||||
private int intParam(Request request, String name, int defaultValue) {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.search.client.model;
|
||||
package nu.marginalia.api.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.search.client.model;
|
||||
package nu.marginalia.api.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.search.client.model;
|
||||
package nu.marginalia.api.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -3,20 +3,12 @@ package nu.marginalia.api.svc;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.api.model.ApiLicense;
|
||||
import nu.marginalia.search.client.model.ApiSearchResults;
|
||||
import nu.marginalia.api.model.*;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Optional;
|
||||
|
||||
/** This response cache exists entirely to help SearXNG with its rate limiting.
|
||||
* For some reason they're hitting the API with like 5-12 identical requests.
|
||||
* <p/>
|
||||
* I've submitted an issue, they were like nah mang it works fine must
|
||||
* be something else ¯\_(ツ)_/¯.
|
||||
* <p/>
|
||||
* So we're going to cache the API responses for a short while to mitigate the
|
||||
* impact of such shotgun queries on the ratelimit.
|
||||
/** This response cache exists entirely to help clients with its rate limiting.
|
||||
*/
|
||||
@Singleton
|
||||
public class ResponseCache {
|
@ -1,7 +1,6 @@
|
||||
package nu.marginalia.api.svc;
|
||||
|
||||
import nu.marginalia.api.model.ApiLicense;
|
||||
import nu.marginalia.search.client.model.ApiSearchResults;
|
||||
import nu.marginalia.api.model.*;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
9
code/services-application/readme.md
Normal file
9
code/services-application/readme.md
Normal file
@ -0,0 +1,9 @@
|
||||
# Application Services
|
||||
|
||||
The application services offer user interfaces/applications around
|
||||
interacting with the [core services](../services-core).
|
||||
|
||||
* The [api-service](api-service/) offers a public API
|
||||
* The [dating-service](dating-service/) is [explore.marginalia.nu](https://explore.marginalia.nu/)
|
||||
* The [explorer-service](dating-service/) is [explore2.marginalia.nu](https://explore2.marginalia.nu/)
|
||||
* The [search-service](search-service/) is the main application for [search.marginalia.nu](https://search.marginalia.nu/)
|
@ -25,7 +25,6 @@ dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:linkdb')
|
||||
implementation project(':code:features-index:index-query')
|
||||
|
||||
implementation project(':code:libraries:easy-lsh')
|
||||
@ -34,16 +33,16 @@ dependencies {
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
|
||||
implementation project(':code:api:assistant-api')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:api:search-api')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:common:service-client')
|
||||
implementation project(':code:common:renderer')
|
||||
|
||||
implementation project(':code:features-search:screenshots')
|
||||
implementation project(':code:features-search:random-websites')
|
||||
implementation project(':code:features-search:query-parser')
|
||||
implementation project(':code:features-search:result-ranking')
|
||||
implementation project(':code:features-qs:query-parser')
|
||||
implementation project(':code:features-index:result-ranking')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
@ -20,8 +20,6 @@ appropriate services.
|
||||
* [CommandEvaluator](src/main/java/nu/marginalia/search/command/CommandEvaluator.java) interprets a user query and acts
|
||||
upon it, dealing with special operations like `browse:` or `site:`.
|
||||
|
||||
* [QueryFactory](src/main/java/nu/marginalia/search/query/QueryFactory.java) parses a search query into a machine readable query specification.
|
||||
|
||||
* [SearchQueryIndexService](src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java) passes a parsed search query to the index service, and
|
||||
then decorates the search results so that they can be rendered.
|
||||
|
@ -0,0 +1,15 @@
|
||||
package nu.marginalia.search;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.WebsiteUrl;
|
||||
import nu.marginalia.WmsaHome;
|
||||
|
||||
public class SearchModule extends AbstractModule {
|
||||
|
||||
public void configure() {
|
||||
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
||||
bind(WebsiteUrl.class).toInstance(new WebsiteUrl(System.getProperty("website-url", "https://search.marginalia.nu/")));
|
||||
}
|
||||
|
||||
}
|
@ -7,12 +7,12 @@ import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import nu.marginalia.assistant.client.AssistantClient;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||
import nu.marginalia.search.query.QueryFactory;
|
||||
import nu.marginalia.search.query.model.SearchQuery;
|
||||
import nu.marginalia.search.query.model.UserSearchParameters;
|
||||
import nu.marginalia.search.model.UserSearchParameters;
|
||||
import nu.marginalia.search.svc.SearchQueryIndexService;
|
||||
import nu.marginalia.search.svc.SearchUnitConversionService;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
@ -37,58 +37,59 @@ public class SearchOperator {
|
||||
|
||||
private final AssistantClient assistantClient;
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final QueryFactory queryFactory;
|
||||
|
||||
private final QueryClient queryClient;
|
||||
private final SearchQueryIndexService searchQueryService;
|
||||
private final SearchQueryParamFactory paramFactory;
|
||||
private final SearchUnitConversionService searchUnitConversionService;
|
||||
|
||||
|
||||
@Inject
|
||||
public SearchOperator(AssistantClient assistantClient,
|
||||
DbDomainQueries domainQueries,
|
||||
QueryFactory queryFactory,
|
||||
QueryClient queryClient,
|
||||
SearchQueryIndexService searchQueryService,
|
||||
SearchUnitConversionService searchUnitConversionService) {
|
||||
SearchQueryParamFactory paramFactory,
|
||||
SearchUnitConversionService searchUnitConversionService)
|
||||
{
|
||||
|
||||
this.assistantClient = assistantClient;
|
||||
this.domainQueries = domainQueries;
|
||||
this.queryFactory = queryFactory;
|
||||
this.queryClient = queryClient;
|
||||
|
||||
this.searchQueryService = searchQueryService;
|
||||
this.paramFactory = paramFactory;
|
||||
this.searchUnitConversionService = searchUnitConversionService;
|
||||
}
|
||||
|
||||
public List<UrlDetails> doApiSearch(Context ctx,
|
||||
UserSearchParameters params) {
|
||||
public List<UrlDetails> doSiteSearch(Context ctx,
|
||||
String domain) {
|
||||
|
||||
var queryParams = paramFactory.forSiteSearch(domain);
|
||||
var queryResponse = queryClient.search(ctx, queryParams);
|
||||
|
||||
SearchQuery processedQuery = queryFactory.createQuery(params);
|
||||
|
||||
logger.info(queryMarker, "Human terms (API): {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
||||
|
||||
return searchQueryService.executeQuery(ctx, processedQuery);
|
||||
return searchQueryService.getResultsFromQuery(queryResponse);
|
||||
}
|
||||
|
||||
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters params) {
|
||||
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters userParams) {
|
||||
|
||||
Future<String> eval = searchUnitConversionService.tryEval(ctx, params.humanQuery());
|
||||
SearchQuery processedQuery = queryFactory.createQuery(params);
|
||||
Future<String> eval = searchUnitConversionService.tryEval(ctx, userParams.humanQuery());
|
||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||
var queryResponse = queryClient.search(ctx, queryParams);
|
||||
|
||||
logger.info(queryMarker, "Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
||||
|
||||
List<UrlDetails> queryResults = searchQueryService.executeQuery(ctx, processedQuery);
|
||||
List<UrlDetails> queryResults = searchQueryService.getResultsFromQuery(queryResponse);
|
||||
|
||||
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||
logger.info(queryMarker, "Search Result Count: {}", queryResults.size());
|
||||
|
||||
String evalResult = getFutureOrDefault(eval, "");
|
||||
|
||||
return DecoratedSearchResults.builder()
|
||||
.params(params)
|
||||
.problems(getProblems(ctx, evalResult, queryResults, processedQuery))
|
||||
.params(userParams)
|
||||
.problems(getProblems(ctx, evalResult, queryResults, queryResponse))
|
||||
.evalResult(evalResult)
|
||||
.results(queryResults)
|
||||
.focusDomain(processedQuery.domain)
|
||||
.focusDomainId(getDomainId(processedQuery.domain))
|
||||
.focusDomain(queryResponse.domain())
|
||||
.focusDomainId(getDomainId(queryResponse.domain()))
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -113,20 +114,20 @@ public class SearchOperator {
|
||||
return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1);
|
||||
}
|
||||
|
||||
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, SearchQuery processedQuery) {
|
||||
final List<String> problems = new ArrayList<>(processedQuery.problems);
|
||||
boolean siteSearch = processedQuery.domain != null;
|
||||
private List<String> getProblems(Context ctx, String evalResult, List<UrlDetails> queryResults, QueryResponse response) {
|
||||
final List<String> problems = new ArrayList<>(response.problems());
|
||||
boolean siteSearch = response.domain() != null;
|
||||
|
||||
if (!siteSearch) {
|
||||
if (queryResults.size() <= 5 && null == evalResult) {
|
||||
spellCheckTerms(ctx, processedQuery).forEach(problems::add);
|
||||
spellCheckTerms(ctx, response).forEach(problems::add);
|
||||
}
|
||||
|
||||
if (queryResults.size() <= 5) {
|
||||
problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. <a href=\"https://memex.marginalia.nu/projects/edge/search-tips.gmi\">Tips</a>.");
|
||||
}
|
||||
|
||||
Set<String> representativeKeywords = processedQuery.getAllKeywords();
|
||||
Set<String> representativeKeywords = response.getAllKeywords();
|
||||
if (representativeKeywords.size()>1 && (representativeKeywords.contains("definition") || representativeKeywords.contains("define") || representativeKeywords.contains("meaning")))
|
||||
{
|
||||
problems.add("Tip: Try using a query that looks like <tt>define:word</tt> if you want a dictionary definition");
|
||||
@ -137,8 +138,8 @@ public class SearchOperator {
|
||||
}
|
||||
|
||||
|
||||
private Iterable<String> spellCheckTerms(Context ctx, SearchQuery disjointedQuery) {
|
||||
return Observable.fromIterable(disjointedQuery.searchTermsHuman)
|
||||
private Iterable<String> spellCheckTerms(Context ctx, QueryResponse response) {
|
||||
return Observable.fromIterable(response.searchTermsHuman())
|
||||
.subscribeOn(Schedulers.io())
|
||||
.flatMap(term -> assistantClient.spellCheck(ctx, term)
|
||||
.onErrorReturn(e -> Collections.emptyList())
|
@ -0,0 +1,53 @@
|
||||
package nu.marginalia.search;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.search.model.UserSearchParameters;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class SearchQueryParamFactory {
|
||||
|
||||
public QueryParams forRegularSearch(UserSearchParameters userParams) {
|
||||
SearchSubquery prototype = new SearchSubquery();
|
||||
var profile = userParams.profile();
|
||||
profile.addTacitTerms(prototype);
|
||||
|
||||
return new QueryParams(
|
||||
userParams.humanQuery(),
|
||||
null,
|
||||
prototype.searchTermsInclude,
|
||||
prototype.searchTermsExclude,
|
||||
prototype.searchTermsPriority,
|
||||
prototype.searchTermsAdvice,
|
||||
profile.getQualityLimit(),
|
||||
profile.getYearLimit(),
|
||||
profile.getSizeLimit(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(2, 100, 200, 8192),
|
||||
profile.searchSetIdentifier
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
public QueryParams forSiteSearch(String domain) {
|
||||
return new QueryParams("site:"+domain,
|
||||
null,
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
List.of(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(100, 100, 100, 512),
|
||||
SearchSetIdentifier.NONE
|
||||
);
|
||||
}
|
||||
}
|
@ -6,15 +6,11 @@ import lombok.SneakyThrows;
|
||||
import nu.marginalia.WebsiteUrl;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.db.storage.FileStorageService;
|
||||
import nu.marginalia.db.storage.model.FileStorageType;
|
||||
import nu.marginalia.linkdb.LinkdbReader;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.search.client.SearchMqEndpoints;
|
||||
import nu.marginalia.search.svc.SearchFrontPageService;
|
||||
import nu.marginalia.search.svc.*;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import nu.marginalia.service.server.*;
|
||||
import nu.marginalia.service.server.mq.MqNotification;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
@ -23,18 +19,13 @@ import spark.Spark;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class SearchService extends Service {
|
||||
|
||||
private final WebsiteUrl websiteUrl;
|
||||
private final StaticResources staticResources;
|
||||
private final FileStorageService fileStorageService;
|
||||
private final LinkdbReader linkdbReader;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
|
||||
private final ServiceEventLog eventLog;
|
||||
|
||||
@SneakyThrows
|
||||
@Inject
|
||||
@ -45,18 +36,12 @@ public class SearchService extends Service {
|
||||
SearchErrorPageService errorPageService,
|
||||
SearchAddToCrawlQueueService addToCrawlQueueService,
|
||||
SearchFlagSiteService flagSiteService,
|
||||
SearchQueryService searchQueryService,
|
||||
SearchApiQueryService apiQueryService,
|
||||
FileStorageService fileStorageService,
|
||||
LinkdbReader linkdbReader
|
||||
SearchQueryService searchQueryService
|
||||
) {
|
||||
super(params);
|
||||
|
||||
this.eventLog = params.eventLog;
|
||||
this.websiteUrl = websiteUrl;
|
||||
this.staticResources = staticResources;
|
||||
this.fileStorageService = fileStorageService;
|
||||
this.linkdbReader = linkdbReader;
|
||||
|
||||
Spark.staticFiles.expireTime(600);
|
||||
|
||||
@ -64,7 +49,6 @@ public class SearchService extends Service {
|
||||
|
||||
Gson gson = GsonFactory.get();
|
||||
|
||||
Spark.get("/api/search", apiQueryService::apiSearch, gson::toJson);
|
||||
Spark.get("/public/search", searchQueryService::pathSearch);
|
||||
Spark.get("/public/site-search/:site/*", this::siteSearchRedir);
|
||||
Spark.get("/public/", frontPageService::render);
|
||||
@ -87,21 +71,6 @@ public class SearchService extends Service {
|
||||
Spark.awaitInitialization();
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@MqNotification(endpoint = SearchMqEndpoints.SWITCH_LINKDB)
|
||||
public void switchLinkdb(String unusedArg) {
|
||||
logger.info("Switching link database");
|
||||
|
||||
Path newPath = fileStorageService.getStorageByType(FileStorageType.LINKDB_STAGING)
|
||||
.asPath()
|
||||
.resolve("links.db");
|
||||
|
||||
if (Files.exists(newPath)) {
|
||||
eventLog.logEvent("SEARCH-SWITCH-LINKDB", "");
|
||||
linkdbReader.switchInput(newPath);
|
||||
}
|
||||
}
|
||||
|
||||
private Object serveStatic(Request request, Response response) {
|
||||
String resource = request.params("resource");
|
||||
staticResources.serveStatic("search", resource, request, response);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user