* Extract the search-query function

This commit is contained in:
Viktor Lofgren 2024-02-22 15:27:39 +01:00
parent 66c1281301
commit 3fd2a83184
83 changed files with 261 additions and 766 deletions

View File

@ -1,7 +1,5 @@
plugins {
id 'java'
id "com.google.protobuf" version "0.9.4"
id 'jvm-test-suite'
}
@ -11,16 +9,6 @@ java {
}
}
sourceSets {
main {
proto {
srcDir 'src/main/protobuf'
}
}
}
apply from: "$rootProject.projectDir/protobuf.gradle"
dependencies {
implementation project(':code:common:model')
implementation project(':code:common:config')
@ -28,6 +16,8 @@ dependencies {
implementation project(':code:libraries:message-queue')
implementation project(':code:features-index:index-query')
implementation project(':code:functions:search-query:api')
implementation libs.bundles.slf4j
implementation libs.prometheus

View File

@ -1,7 +1,8 @@
package nu.marginalia.index.client;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.IndexProtobufCodec;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import org.junit.jupiter.api.Test;

View File

@ -14,6 +14,7 @@ dependencies {
implementation project(':code:common:model')
implementation project(':code:common:service')
implementation project(':code:api:index-api')
implementation project(':code:functions:search-query:api')
implementation libs.bundles.slf4j
implementation libs.guice

View File

@ -1,6 +1,7 @@
package nu.marginalia.ranking;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import java.util.List;

View File

@ -1,8 +1,8 @@
package nu.marginalia.ranking;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.model.crawl.HtmlFeature;
import nu.marginalia.model.crawl.PubDate;
import nu.marginalia.model.idx.DocumentFlags;

View File

@ -1,8 +1,8 @@
package nu.marginalia.ranking.factors;
import nu.marginalia.index.client.model.results.Bm25Parameters;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.model.idx.WordFlags;
import nu.marginalia.ranking.ResultKeywordSet;

View File

@ -1,6 +1,7 @@
package nu.marginalia.ranking.factors;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import java.util.List;

View File

@ -1,10 +0,0 @@
# Query Parser
End-user search query parsing tools used by the [query-service](../../services-core/query-service).
## Central Classes
* [QueryTokenizer](src/main/java/nu/marginalia/query_parser/QueryTokenizer.java)
* [QueryParser](src/main/java/nu/marginalia/query_parser/QueryParser.java)
* [QueryPermutations](src/main/java/nu/marginalia/query_parser/QueryVariants.java) - here be dragons
* [QueryVariants](src/main/java/nu/marginalia/query_parser/QueryVariants.java) - here be dragons

View File

@ -1,57 +0,0 @@
package nu.marginalia.ngrams;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
class DenseBitMapTest {
@Test
public void testSetAll() {
var dbm = new DenseBitMap(129);
for (int i = 0; i < dbm.cardinality; i++) {
dbm.set(i);
}
for (int i = 0; i < dbm.cardinality; i++) {
assertTrue(dbm.get(i));
}
}
@Test
public void testSetEven() {
var dbm = new DenseBitMap(131);
for (int i = 0; i < dbm.cardinality; i+=2) {
dbm.set(i);
}
for (int i = 0; i < dbm.cardinality; i+=2) {
assertTrue(dbm.get(i));
}
for (int i = 1; i < dbm.cardinality; i+=2) {
assertFalse(dbm.get(i));
}
}
@Test
public void testSetAllClearSome() {
var dbm = new DenseBitMap(129);
for (int i = 0; i < dbm.cardinality; i++) {
dbm.set(i);
}
for (int i = 1; i < dbm.cardinality; i+=2) {
dbm.clear(i);
}
for (int i = 0; i < dbm.cardinality; i+=2) {
assertTrue(dbm.get(i), "Expected " + i + " to be set");
}
for (int i = 1; i < dbm.cardinality; i+=2) {
assertFalse(dbm.get(i), "Expected " + i + " to be clear");
}
}
}

View File

@ -1,116 +0,0 @@
package nu.marginalia.query_parser;
import nu.marginalia.LanguageModels;
import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.query_parser.token.TokenType;
import nu.marginalia.util.TestLanguageModels;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
class BodyQueryParserTest {
private QueryParser parser;
private static TermFrequencyDict dict;
private static EnglishDictionary englishDictionary;
private static NGramBloomFilter nGramBloomFilter;
private static final LanguageModels lm = TestLanguageModels.getLanguageModels();
private QueryPermutation permutation;
@BeforeAll
public static void init() throws IOException {
dict = new TermFrequencyDict(lm);
nGramBloomFilter = new NGramBloomFilter(lm);
englishDictionary = new EnglishDictionary(dict);
}
@BeforeEach
public void setUp() {
parser = new QueryParser();
permutation = new QueryPermutation(new QueryVariants(lm, dict, nGramBloomFilter, englishDictionary));
}
@Test
public void testTitleMatcher() {
List<String> terms = List.of("3d", "realms");
assertEquals(2, terms.stream().map(String::toLowerCase).filter("3D Realms Site: Forums".toLowerCase()::contains).count());
}
@Test
void parseSimple() {
var results = parser.parse("hello");
results.forEach(System.out::println);
assertEquals(1, results.size());
assertEquals(TokenType.LITERAL_TERM, results.get(0).type);
assertEquals("hello", results.get(0).str);
}
@Test
void parseQuotes() {
var results = parser.parse("\u201Chello world\u201D");
results.forEach(System.out::println);
assertEquals(TokenType.QUOT_TERM, results.get(0).type);
assertEquals("hello_world", results.get(0).str);
assertEquals("\"hello world\"", results.get(0).displayStr);
}
@Test
void parseExclude() {
var results = parser.parse("-Hello");
results.forEach(System.out::println);
assertEquals(TokenType.EXCLUDE_TERM, results.get(0).type);
assertEquals("hello", results.get(0).str);
assertEquals("-hello", results.get(0).displayStr);
}
@Test
void parseNear() {
var results = parser.parse("near:memex.marginalia.nu");
results.forEach(System.out::println);
assertEquals(TokenType.NEAR_TERM, results.get(0).type);
assertEquals("memex.marginalia.nu", results.get(0).str);
assertEquals("near:memex.marginalia.nu", results.get(0).displayStr);
}
@Test
void parseCombined() {
for (var list : permutation.permuteQueries(parser.parse("dune 2 remake"))) {
for (var t: list) {
System.out.printf("%s ", t.str);
}
System.out.println();
}
}
@Test
void parseCombinedDOS() {
for (var list : permutation.permuteQueries(parser.parse("ab ba baa abba baba ab ba"))) {
for (var t: list) {
System.out.printf("%s ", t.str);
}
System.out.println();
}
}
@Test
void parseCombinedSuperman() {
for (var list : permutation.permuteQueries(parser.parse("wizardry proving grounds of the mad overlord"))) {
for (var t: list) {
System.out.printf("%s ", t.str);
}
System.out.println();
}
}
@Test
void testEdgeCases() {
parser.parse("site:localhost 3D").forEach(System.out::println);
parser.parse("-wolfenstein 3D").forEach(System.out::println);
parser.parse("-wolfenstein 3D \"").forEach(System.out::println);
}
}

View File

@ -1,93 +0,0 @@
package nu.marginalia.query_parser;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenType;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
class QueryParserTest {
private static QueryParser parser;
@BeforeAll
public static void setUp() {
parser = new QueryParser();
}
@Test
public void testTabHandling() {
var query = " lorem\tipsum\ndolor sit";
var ret = parser.parse(query);
assertEquals(4, ret.size());
var lorem = ret.get(0);
assertEquals("lorem", lorem.str);
assertEquals("lorem", lorem.displayStr);
var ipsum = ret.get(1);
assertEquals("ipsum", ipsum.str);
assertEquals("ipsum", ipsum.displayStr);
var dolor = ret.get(2);
assertEquals("dolor", dolor.str);
assertEquals("dolor", dolor.displayStr);
var sit = ret.get(3);
assertEquals("sit", sit.str);
assertEquals("sit", sit.displayStr);
}
@Test
public void testAdviceString() {
var ret = parser.parse("alcibiades (plato) \"my query\" -cars");
assertEquals(4, ret.size());
var alcibiades = ret.get(0);
assertEquals(TokenType.LITERAL_TERM, alcibiades.type);
assertEquals("alcibiades", alcibiades.str);
assertEquals("alcibiades", alcibiades.displayStr);
var plato = ret.get(1);
assertEquals(TokenType.ADVICE_TERM, plato.type);
assertEquals("plato", plato.str);
assertEquals("(plato)", plato.displayStr);
var my_query = ret.get(2);
assertEquals(TokenType.QUOT_TERM, my_query.type);
assertEquals("my_query", my_query.str);
assertEquals("\"my query\"", my_query.displayStr);
var not_cars = ret.get(3);
assertEquals(TokenType.EXCLUDE_TERM, not_cars.type);
assertEquals("cars", not_cars.str);
assertEquals("-cars", not_cars.displayStr);
}
@Test
public void testParseYear() {
System.out.println(parser.parse("year>2000"));
System.out.println(parser.parse("year=2000"));
System.out.println(parser.parse("year<2000"));
}
@Test
public void testNonAsciiNames() {
verifyParseResult("André the Giant", "andre", "the", "giant");
verifyParseResult("Stanisław Lem", "stanislaw", "lem");
verifyParseResult("Nicolae Ceaușescu", "nicolae", "ceausescu");
verifyParseResult("Þorrablót", "thorrablot");
verifyParseResult("Karolis Koncevičius", "karolis", "koncevicius");
}
private void verifyParseResult(String query, String... expectedTokens) {
assertArrayEquals(expectedTokens, getTokenStrings(parser.parse(query)));
}
private String[] getTokenStrings(List<Token> tokens) {
return tokens.stream().map(t -> t.str).toArray(String[]::new);
}
}

View File

@ -1,75 +0,0 @@
package nu.marginalia.query_parser;
import nu.marginalia.LanguageModels;
import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.util.TestLanguageModels;
import nu.marginalia.language.sentence.SentenceExtractor;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.io.IOException;
class QueryVariantsTest {
static QueryVariants variants;
static QueryParser parser;
static SentenceExtractor se;
@BeforeAll
public static void setUp() throws IOException {
LanguageModels lm = TestLanguageModels.getLanguageModels();
se = new SentenceExtractor(lm);
var dict = new TermFrequencyDict(lm);
var ngrams = new NGramBloomFilter(lm);
variants = new QueryVariants(lm, dict, ngrams, new EnglishDictionary(dict));
parser = new QueryParser();
}
@Test
void getQueryFood() {
System.out.println(se.extractSentence("we are alone"));
testCase("Omelet recipe");
}
@Test
void queryNegation() {
System.out.println(se.extractSentence("salt lake -city"));
testCase("salt lake -city");
}
@Test
void getQueryVariants() {
System.out.println(se.extractSentence("we are alone"));
testCase("inside job reviews");
testCase("plato apology");
testCase("mechanical keyboard");
testCase("DOS");
testCase("dos");
testCase("we are alone");
testCase("3D Realms");
testCase("I am alone");
testCase("plato cave");
testCase("The internet is dead");
testCase("TRS80");
testCase("TRS-80");
testCase("TRS-80");
testCase("Raspberry Pi 2");
testCase("Duke Nukem 3D");
testCase("The Man of Tomorrow");
testCase("Computer Manual");
testCase("Knitting");
testCase("capcom");
testCase("the man of tomorrow");
}
private void testCase(String input) {
var tokens = variants.getQueryVariants(parser.parse(input));
System.out.println(tokens);
}
}

View File

@ -1,120 +0,0 @@
package nu.marginalia.transform_list;
import org.junit.jupiter.api.Test;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.assertEquals;
class TransformListTest {
@Test
void transformEach() {
List<Integer> values = Stream.of(1,2,3,4).collect(Collectors.toList());
new TransformList<>(values).transformEach(e -> {
int v = e.value();
if (v == 1) e.remove();
if (v == 2) e.replace(5);
if (v == 4) e.remove();
});
assertEquals(List.of(5,3), values);
}
@Test
void transformEachPairRemoveReplace() {
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
new TransformList<>(values).transformEachPair((a,b) -> {
System.out.println(a.value() + ":" + b.value());
int v = a.value();
if (v == 1 || v == 3 || v == 5) {
a.remove();
b.replace(-b.value());
}
});
assertEquals(List.of(-2, -4, -6), values);
}
@Test
void transformEachPairRemoveRemove() {
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
new TransformList<>(values).transformEachPair((a,b) -> {
System.out.println(a.value() + ":" + b.value());
int v = a.value();
if (v == 1 || v == 3 || v == 5) {
a.remove();
b.remove();
}
});
assertEquals(List.of(), values);
}
@Test
void transformEachPairReplaceRemove() {
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
new TransformList<>(values).transformEachPair((a,b) -> {
System.out.println(a.value() + ":" + b.value());
int v = a.value();
if (v == 1 || v == 3 || v == 5) {
a.replace(-a.value());
b.remove();
}
});
assertEquals(List.of(-1, -3, -5), values);
}
@Test
void transformEachPairReplaceReplace() {
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
new TransformList<>(values).transformEachPair((a,b) -> {
System.out.println(a.value() + ":" + b.value());
int v = a.value();
if (v == 1 || v == 3 || v == 5) {
a.replace(-a.value());
b.replace(-b.value());
}
});
assertEquals(List.of(-1, -2, -3, -4, -5, -6), values);
}
@Test
void scanAndTransform() {
List<Integer> values = Stream.of(1,2,3,4,5,6,7,8,9,10).collect(Collectors.toList());
new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(7)::equals, entity -> {
entity.replace(entity.value() * 2);
});
assertEquals(List.of(1,2,6,8,10,12,14,8,9,10), values);
}
@Test
void scanAndTransformEndsAtEnd() {
List<Integer> values = Stream.of(1,2,3,4,5,6,7,8,9,10).collect(Collectors.toList());
new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(10)::equals, entity -> {
entity.replace(entity.value() * 2);
});
assertEquals(List.of(1,2,6,8,10,12,14,16,18,20), values);
}
@Test
void scanAndTransformOverlap() {
List<Integer> values = Stream.of(1,2,3,3,5,7,7,8,9,10).collect(Collectors.toList());
new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(7)::equals, entity -> {
entity.replace(entity.value() * 2);
});
assertEquals(List.of(1, 2, 6, 6, 10, 14, 7, 8, 9, 10), values);
}
}

View File

@ -1,38 +0,0 @@
package nu.marginalia.util;
import nu.marginalia.LanguageModels;
import nu.marginalia.WmsaHome;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Optional;
public class TestLanguageModels {
private static final Path LANGUAGE_MODELS_DEFAULT = WmsaHome.getHomePath().resolve("model");
public static Path getLanguageModelsPath() {
final Path languageModelsHome = Optional.ofNullable(System.getenv("LANGUAGE_MODELS_HOME"))
.map(Path::of)
.orElse(LANGUAGE_MODELS_DEFAULT);
if (!Files.isDirectory(languageModelsHome)) {
throw new IllegalStateException("Could not find $LANGUAGE_MODELS_HOME, see doc/language-models.md");
}
return languageModelsHome;
}
public static LanguageModels getLanguageModels() {
var languageModelsHome = getLanguageModelsPath();
return new LanguageModels(
languageModelsHome.resolve("ngrams.bin"),
languageModelsHome.resolve("tfreq-new-algo3.bin"),
languageModelsHome.resolve("opennlp-sentence.bin"),
languageModelsHome.resolve("English.RDR"),
languageModelsHome.resolve("English.DICT"),
languageModelsHome.resolve("opennlp-tokens.bin"),
languageModelsHome.resolve("lid.176.ftz")
);
}
}

View File

@ -1,7 +0,0 @@
**Note**: This package is called `features-qs` rather than `features-query` because the latter,
though more consistent with other packages like features-index, would be very confusing
as there are other packages elsewhere with the 'query' name (e.g. features-index/index-query).
## Contents
* [query-parser](query-parser/) contains code for parsing the user-facing query grammar.

View File

@ -19,7 +19,6 @@ dependencies {
implementation libs.bundles.grpc
implementation libs.notnull
implementation libs.guice
implementation libs.spark
implementation libs.opencsv
implementation libs.trove
implementation libs.fastutil

View File

@ -1,5 +1,7 @@
plugins {
id 'java'
id "com.google.protobuf" version "0.9.4"
id 'jvm-test-suite'
}
@ -9,24 +11,34 @@ java {
}
}
jar.archiveBaseName = 'search-query-api'
sourceSets {
main {
proto {
srcDir 'src/main/protobuf'
}
}
}
apply from: "$rootProject.projectDir/protobuf.gradle"
dependencies {
implementation project(':code:common:model')
implementation project(':code:api:index-api')
implementation project(':code:common:config')
implementation project(':code:libraries:message-queue')
implementation project(':code:features-index:index-query')
implementation project(':code:common:service-discovery')
implementation libs.bundles.slf4j
implementation libs.roaringbitmap
implementation libs.prometheus
implementation libs.notnull
implementation libs.trove
implementation libs.guice
implementation libs.gson
implementation libs.bundles.grpc
implementation libs.protobuf
implementation libs.javax.annotation
implementation libs.bundles.grpc
implementation libs.fastutil
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit

View File

@ -1,9 +1,9 @@
package nu.marginalia.index.client;
package nu.marginalia.api.searchquery;
import nu.marginalia.index.api.*;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.Bm25Parameters;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.*;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.index.query.limit.SpecificationLimitType;

View File

@ -1,12 +1,10 @@
package nu.marginalia.query.client;
package nu.marginalia.api.searchquery;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import io.prometheus.client.Summary;
import nu.marginalia.index.api.*;
import nu.marginalia.query.QueryProtobufCodec;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.service.client.GrpcChannelPoolFactory;
import nu.marginalia.service.client.GrpcSingleNodeChannelPool;
import nu.marginalia.service.discovery.property.ServiceKey;

View File

@ -1,25 +1,21 @@
package nu.marginalia.query;
package nu.marginalia.api.searchquery;
import lombok.SneakyThrows;
import nu.marginalia.index.api.*;
import nu.marginalia.index.client.IndexProtobufCodec;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.query.model.ProcessedQuery;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import java.util.ArrayList;
import java.util.List;
import static nu.marginalia.index.client.IndexProtobufCodec.*;
public class QueryProtobufCodec {
public static RpcIndexQuery convertQuery(RpcQsQuery request, ProcessedQuery query) {
@ -34,11 +30,11 @@ public class QueryProtobufCodec {
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
builder.setHumanQuery(request.getHumanQuery());
builder.setQuality(convertSpecLimit(query.specs.quality));
builder.setYear(convertSpecLimit(query.specs.year));
builder.setSize(convertSpecLimit(query.specs.size));
builder.setRank(convertSpecLimit(query.specs.rank));
builder.setDomainCount(convertSpecLimit(query.specs.domainCount));
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
@ -63,11 +59,11 @@ public class QueryProtobufCodec {
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
builder.setHumanQuery(humanQuery);
builder.setQuality(convertSpecLimit(query.specs.quality));
builder.setYear(convertSpecLimit(query.specs.year));
builder.setSize(convertSpecLimit(query.specs.size));
builder.setRank(convertSpecLimit(query.specs.rank));
builder.setDomainCount(convertSpecLimit(query.specs.domainCount));
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
@ -92,11 +88,11 @@ public class QueryProtobufCodec {
request.getTacitExcludesList(),
request.getTacitPriorityList(),
request.getTacitAdviceList(),
convertSpecLimit(request.getQuality()),
convertSpecLimit(request.getYear()),
convertSpecLimit(request.getSize()),
convertSpecLimit(request.getRank()),
convertSpecLimit(request.getDomainCount()),
IndexProtobufCodec.convertSpecLimit(request.getQuality()),
IndexProtobufCodec.convertSpecLimit(request.getYear()),
IndexProtobufCodec.convertSpecLimit(request.getSize()),
IndexProtobufCodec.convertSpecLimit(request.getRank()),
IndexProtobufCodec.convertSpecLimit(request.getDomainCount()),
request.getDomainIdsList(),
IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()),
request.getSearchSetIdentifier(),
@ -167,7 +163,7 @@ public class QueryProtobufCodec {
List<SearchSubquery> subqueries = new ArrayList<>(specs.getSubqueriesCount());
for (int i = 0; i < specs.getSubqueriesCount(); i++) {
subqueries.add(convertSearchSubquery(specs.getSubqueries(i)));
subqueries.add(IndexProtobufCodec.convertSearchSubquery(specs.getSubqueries(i)));
}
return new SearchSpecification(
@ -182,7 +178,7 @@ public class QueryProtobufCodec {
IndexProtobufCodec.convertSpecLimit(specs.getDomainCount()),
IndexProtobufCodec.convertQueryLimits(specs.getQueryLimits()),
QueryStrategy.valueOf(specs.getQueryStrategy()),
convertRankingParameterss(specs.getParameters())
IndexProtobufCodec.convertRankingParameterss(specs.getParameters())
);
}
@ -194,11 +190,11 @@ public class QueryProtobufCodec {
.addAllTacitIncludes(params.tacitIncludes())
.addAllTacitPriority(params.tacitPriority())
.setHumanQuery(params.humanQuery())
.setQueryLimits(convertQueryLimits(params.limits()))
.setQuality(convertSpecLimit(params.quality()))
.setYear(convertSpecLimit(params.year()))
.setSize(convertSpecLimit(params.size()))
.setRank(convertSpecLimit(params.rank()))
.setQueryLimits(IndexProtobufCodec.convertQueryLimits(params.limits()))
.setQuality(IndexProtobufCodec.convertSpecLimit(params.quality()))
.setYear(IndexProtobufCodec.convertSpecLimit(params.year()))
.setSize(IndexProtobufCodec.convertSpecLimit(params.size()))
.setRank(IndexProtobufCodec.convertSpecLimit(params.rank()))
.setSearchSetIdentifier(params.identifier())
.setQueryStrategy(params.queryStrategy().name())
.setTemporalBias(RpcTemporalBias.newBuilder()
@ -227,4 +223,5 @@ public class QueryProtobufCodec {
rpcDecoratedResultItem.getRankingScore()
);
}
}

View File

@ -1,6 +1,4 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.query.SearchSpecification;
package nu.marginalia.api.searchquery.model.query;
import java.util.*;

View File

@ -1,10 +1,9 @@
package nu.marginalia.query.model;
package nu.marginalia.api.searchquery.model.query;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import javax.annotation.Nullable;
import java.util.List;

View File

@ -1,7 +1,6 @@
package nu.marginalia.query.model;
package nu.marginalia.api.searchquery.model.query;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import java.util.HashSet;
import java.util.List;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.query;
package nu.marginalia.api.searchquery.model.query;
/** Identifies a RankingSearchSet, associated with an EdgeSearchProfile
*

View File

@ -1,7 +1,7 @@
package nu.marginalia.index.client.model.query;
package nu.marginalia.api.searchquery.model.query;
import lombok.*;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.query;
package nu.marginalia.api.searchquery.model.query;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
/** Tuning parameters for BM25.
*

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
import lombok.Getter;
import lombok.ToString;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import lombok.ToString;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
import lombok.AllArgsConstructor;
import lombok.Builder;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
import lombok.AllArgsConstructor;
import lombok.Getter;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
import nu.marginalia.model.idx.WordFlags;
import nu.marginalia.model.idx.WordMetadata;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
import org.jetbrains.annotations.NotNull;

View File

@ -1,4 +1,4 @@
package nu.marginalia.index.client.model.results;
package nu.marginalia.api.searchquery.model.results;
import lombok.AllArgsConstructor;
import lombok.Getter;

View File

@ -1,7 +1,7 @@
syntax="proto3";
package actorapi;
package nu.marginalia.api.searchquery;
option java_package="nu.marginalia.index.api";
option java_package="nu.marginalia.api.searchquery";
option java_multiple_files=true;
service QueryApi {
@ -35,11 +35,11 @@ message RpcQsQuery {
/* Query service query response */
message RpcQsResponse {
RpcIndexQuery specs = 1;
repeated RpcDecoratedResultItem results = 2;
repeated string searchTermsHuman = 3;
repeated string problems = 4;
string domain = 5;
RpcIndexQuery specs = 1;
repeated RpcDecoratedResultItem results = 2;
repeated string searchTermsHuman = 3;
repeated string problems = 4;
string domain = 5;
}
message RpcTemporalBias {

View File

@ -1,34 +1,46 @@
plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation project(':code:api:index-api')
implementation project(':code:common:db')
implementation project(':code:common:config')
implementation project(':code:common:model')
implementation project(':code:common:service-discovery')
implementation project(':code:functions:search-query:api')
implementation project(':code:features-index:index-query')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:term-frequency-dict')
implementation project(':third-party:porterstemmer')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:term-frequency-dict')
implementation project(':code:features-convert:keyword-extraction')
implementation project(':code:common:config')
implementation project(':code:common:model')
implementation project(':third-party:porterstemmer')
implementation libs.bundles.slf4j
implementation libs.bundles.nlp
implementation libs.bundles.handlebars
implementation libs.trove
implementation libs.prometheus
implementation libs.bundles.grpc
implementation libs.notnull
implementation libs.guice
implementation libs.trove
implementation libs.fastutil
implementation libs.bundles.gson
implementation libs.bundles.mariadb
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@ -1,15 +1,18 @@
package nu.marginalia.query;
package nu.marginalia.functions.searchquery;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import io.grpc.stub.StreamObserver;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows;
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
import nu.marginalia.api.searchquery.*;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.index.api.*;
import nu.marginalia.functions.searchquery.svc.QueryFactory;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.model.id.UrlIdCodec;
import nu.marginalia.query.svc.QueryFactory;
import nu.marginalia.service.client.GrpcChannelPoolFactory;
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
import nu.marginalia.service.discovery.property.ServiceKey;
import nu.marginalia.service.discovery.property.ServicePartition;
import org.slf4j.Logger;
@ -48,8 +51,7 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
IndexApiGrpc::newBlockingStub);
}
public void query(nu.marginalia.index.api.RpcQsQuery request,
io.grpc.stub.StreamObserver<nu.marginalia.index.api.RpcQsResponse> responseObserver)
public void query(RpcQsQuery request, StreamObserver<RpcQsResponse> responseObserver)
{
try {
wmsa_qs_query_time_grpc
@ -105,4 +107,14 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
private boolean isBlacklisted(RpcDecoratedResultItem item) {
return blacklist.isBlacklisted(UrlIdCodec.getDomainId(item.getRawItem().getCombinedId()));
}
public List<DecoratedSearchResultItem> executeDirect(String originalQuery, QueryParams params, int count) {
var query = queryFactory.createQuery(params);
return executeQueries(
QueryProtobufCodec.convertQuery(originalQuery, query),
count)
.stream().map(QueryProtobufCodec::convertQueryResult)
.toList();
}
}

View File

@ -1,9 +1,9 @@
package nu.marginalia.query_parser;
package nu.marginalia.functions.searchquery.query_parser;
import nu.marginalia.language.WordPatterns;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenType;
import nu.marginalia.transform_list.TransformList;
import nu.marginalia.functions.searchquery.query_parser.token.Token;
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
import nu.marginalia.util.transform_list.TransformList;
import java.util.List;

View File

@ -1,8 +1,8 @@
package nu.marginalia.query_parser;
package nu.marginalia.functions.searchquery.query_parser;
import nu.marginalia.functions.searchquery.query_parser.token.Token;
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
import nu.marginalia.language.WordPatterns;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -1,8 +1,8 @@
package nu.marginalia.query_parser;
package nu.marginalia.functions.searchquery.query_parser;
import nu.marginalia.functions.searchquery.query_parser.token.Token;
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
import nu.marginalia.language.encoding.AsciiFlattener;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenType;
import java.util.ArrayList;
import java.util.List;

View File

@ -1,20 +1,20 @@
package nu.marginalia.query_parser;
package nu.marginalia.functions.searchquery.query_parser;
import ca.rmen.porterstemmer.PorterStemmer;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.functions.searchquery.query_parser.token.Token;
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
import nu.marginalia.util.language.EnglishDictionary;
import nu.marginalia.LanguageModels;
import nu.marginalia.keyword.KeywordExtractor;
import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.language.sentence.SentenceExtractor;
import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.util.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.language.model.DocumentSentence;
import nu.marginalia.language.model.WordSpan;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenType;
import opennlp.tools.stemmer.PorterStemmer;
import java.util.*;
import java.util.regex.Pattern;
@ -251,7 +251,7 @@ public class QueryVariants {
for (var span : ls) {
var matcher = dashBoundary.matcher(span.word);
if (matcher.find() && nGramBloomFilter.isKnownNGram(ps.stem(dashBoundary.matcher(span.word).replaceAll("")))) {
if (matcher.find() && nGramBloomFilter.isKnownNGram(ps.stemWord(dashBoundary.matcher(span.word).replaceAll("")))) {
dash = true;
String combined = dashBoundary.matcher(span.word).replaceAll("");
asTokens2.add(combined);
@ -332,7 +332,7 @@ public class QueryVariants {
var a = span.get(i);
var b = span.get(i+1);
var stemmed = ps.stem(a.word + b.word);
var stemmed = ps.stemWord(a.word + b.word);
double scoreCombo = dict.getTermFreqStemmed(stemmed);
if (scoreCombo > 10000) {

View File

@ -1,4 +1,4 @@
package nu.marginalia.query_parser.token;
package nu.marginalia.functions.searchquery.query_parser.token;
import lombok.EqualsAndHashCode;
import lombok.ToString;

View File

@ -1,4 +1,4 @@
package nu.marginalia.query_parser.token;
package nu.marginalia.functions.searchquery.query_parser.token;
import java.util.function.Predicate;

View File

@ -1,4 +1,4 @@
package nu.marginalia.query_parser.token;
package nu.marginalia.functions.searchquery.query_parser.token;
public interface TokenVisitor {
void onLiteralTerm(Token token);

View File

@ -1,21 +1,21 @@
package nu.marginalia.query.svc;
package nu.marginalia.functions.searchquery.svc;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.LanguageModels;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.util.language.EnglishDictionary;
import nu.marginalia.language.WordPatterns;
import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query.model.ProcessedQuery;
import nu.marginalia.query_parser.QueryParser;
import nu.marginalia.query_parser.QueryPermutation;
import nu.marginalia.query_parser.QueryVariants;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenType;
import nu.marginalia.util.ngrams.NGramBloomFilter;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
import nu.marginalia.functions.searchquery.query_parser.QueryParser;
import nu.marginalia.functions.searchquery.query_parser.QueryPermutation;
import nu.marginalia.functions.searchquery.query_parser.QueryVariants;
import nu.marginalia.functions.searchquery.query_parser.token.Token;
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -1,10 +1,10 @@
package nu.marginalia.query.svc;
package nu.marginalia.functions.searchquery.svc;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenVisitor;
import nu.marginalia.functions.searchquery.query_parser.token.Token;
import nu.marginalia.functions.searchquery.query_parser.token.TokenVisitor;
public class QueryLimitsAccumulator implements TokenVisitor {
public SpecificationLimit qualityLimit;

View File

@ -1,9 +1,9 @@
package nu.marginalia.query.svc;
package nu.marginalia.functions.searchquery.svc;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.language.WordPatterns;
import nu.marginalia.query_parser.token.Token;
import nu.marginalia.query_parser.token.TokenVisitor;
import nu.marginalia.functions.searchquery.query_parser.token.Token;
import nu.marginalia.functions.searchquery.query_parser.token.TokenVisitor;
import java.util.ArrayList;
import java.util.Arrays;

View File

@ -1,4 +1,4 @@
package nu.marginalia.language;
package nu.marginalia.util.language;
import com.google.inject.Inject;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;

View File

@ -1,4 +1,4 @@
package nu.marginalia.ngrams;
package nu.marginalia.util.ngrams;
import java.io.IOException;
import java.nio.ByteBuffer;

View File

@ -1,4 +1,4 @@
package nu.marginalia.ngrams;
package nu.marginalia.util.ngrams;
import ca.rmen.porterstemmer.PorterStemmer;
import com.google.common.hash.HashFunction;

View File

@ -1,4 +1,4 @@
package nu.marginalia.transform_list;
package nu.marginalia.util.transform_list;
import java.util.List;
import java.util.function.BiConsumer;

View File

@ -41,8 +41,7 @@ dependencies {
implementation project(':code:common:service')
implementation project(':code:common:config')
implementation project(':code:common:service-discovery')
implementation project(':code:api:query-api')
implementation project(':code:api:index-api')
implementation project(':code:functions:search-query:api')
implementation project(':code:features-index:index-query')
implementation libs.bundles.slf4j

View File

@ -5,13 +5,12 @@ import com.google.inject.Singleton;
import nu.marginalia.api.model.ApiSearchResult;
import nu.marginalia.api.model.ApiSearchResultQueryDetails;
import nu.marginalia.api.model.ApiSearchResults;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.QueryClient;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
import nu.marginalia.api.searchquery.model.results.*;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryParams;
import java.util.ArrayList;
import java.util.Comparator;

View File

@ -56,7 +56,9 @@ dependencies {
implementation project(':code:functions:math:api')
implementation project(':code:functions:domain-info:api')
implementation project(':code:api:query-api')
implementation project(':code:functions:search-query:api')
implementation project(':code:api:index-api')
implementation project(':code:common:service-discovery')
implementation project(':code:common:renderer')

View File

@ -5,10 +5,10 @@ import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.api.math.MathClient;
import nu.marginalia.api.searchquery.QueryClient;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.*;
import nu.marginalia.search.svc.SearchQueryIndexService;

View File

@ -1,12 +1,12 @@
package nu.marginalia.search;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.search.command.SearchParameters;
import java.util.List;

View File

@ -1,6 +1,6 @@
package nu.marginalia.search;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.search.model.ClusteredUrlDetails;
import nu.marginalia.search.model.UrlDetails;

View File

@ -1,6 +1,6 @@
package nu.marginalia.search.command;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import javax.annotation.Nullable;
import java.util.Arrays;

View File

@ -1,6 +1,6 @@
package nu.marginalia.search.command;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import javax.annotation.Nullable;
import java.util.Arrays;

View File

@ -1,7 +1,7 @@
package nu.marginalia.search.command;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.search.model.SearchProfile;

View File

@ -2,8 +2,8 @@ package nu.marginalia.search.model;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.model.crawl.HtmlFeature;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
import java.util.Objects;

View File

@ -1,8 +1,8 @@
package nu.marginalia.search.model;
import lombok.*;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.model.crawl.HtmlFeature;

View File

@ -4,7 +4,7 @@ import gnu.trove.list.TLongList;
import gnu.trove.list.array.TLongArrayList;
import gnu.trove.map.hash.TObjectIntHashMap;
import gnu.trove.set.hash.TIntHashSet;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.lsh.EasyLSH;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -5,11 +5,11 @@ import com.google.inject.Singleton;
import it.unimi.dsi.fastutil.ints.Int2LongArrayMap;
import lombok.SneakyThrows;
import nu.marginalia.bbpc.BrailleBlockPunchCards;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.api.searchquery.model.query.QueryResponse;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.results.UrlDeduplicator;
import org.slf4j.Logger;

View File

@ -44,7 +44,6 @@ dependencies {
implementation project(':code:functions:domain-info')
implementation project(':code:functions:domain-info:api')
implementation project(':code:api:query-api')
implementation project(':code:common:config')
implementation project(':code:common:service')
implementation project(':code:common:model')

View File

@ -45,9 +45,9 @@ dependencies {
implementation project(':code:common:renderer')
implementation project(':code:libraries:message-queue')
implementation project(':code:common:service-discovery')
implementation project(':code:api:index-api')
implementation project(':code:api:query-api')
implementation project(':code:functions:search-query:api')
implementation project(':code:api:executor-api')
implementation project(':code:api:index-api')
implementation project(':code:api:process-mqapi')
implementation project(':code:features-search:screenshots')
implementation project(':code:features-index:index-journal')

View File

@ -1,12 +1,12 @@
package nu.marginalia.control.app.svc;
import com.google.inject.Inject;
import nu.marginalia.api.searchquery.QueryClient;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;

View File

@ -71,7 +71,6 @@ dependencies {
implementation project(':code:features-convert:reddit-json')
implementation project(':code:features-index:index-journal')
implementation project(':code:api:index-api')
implementation project(':code:api:query-api')
implementation project(':code:api:process-mqapi')
implementation project(':code:api:executor-api')
implementation project(':third-party:encyclopedia-marginalia-nu')

View File

@ -7,7 +7,6 @@ import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.actor.prototype.RecordActorPrototype;
import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;

View File

@ -43,6 +43,7 @@ dependencies {
implementation project(':code:functions:domain-links:partition')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:search-query:api')
implementation project(':code:common:service')
implementation project(':code:api:index-api')

View File

@ -6,7 +6,7 @@ import gnu.trove.set.hash.TLongHashSet;
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.index.index.SearchIndex;
import nu.marginalia.index.svc.SearchTermsService;
import nu.marginalia.model.idx.WordMetadata;

View File

@ -4,9 +4,9 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import gnu.trove.list.TLongList;
import gnu.trove.list.array.TLongArrayList;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.linkdb.docs.DocumentDbReader;
import nu.marginalia.linkdb.model.DocdbUrlDetail;
import nu.marginalia.ranking.ResultValuator;

View File

@ -2,7 +2,7 @@ package nu.marginalia.index.results;
import gnu.trove.map.TLongIntMap;
import gnu.trove.map.hash.TLongIntHashMap;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
public class IndexResultDomainDeduplicator {
final TLongIntMap resultsByDomainId = CachedObjects.getMap();

View File

@ -2,15 +2,15 @@ package nu.marginalia.index.results;
import gnu.trove.list.TLongList;
import gnu.trove.set.hash.TLongHashSet;
import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.api.searchquery.model.results.SearchResultPreliminaryScore;
import nu.marginalia.model.id.UrlIdCodec;
import nu.marginalia.model.idx.WordFlags;
import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.query.IndexQueryParams;
import nu.marginalia.ranking.ResultValuator;

View File

@ -4,18 +4,18 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import gnu.trove.list.TLongList;
import gnu.trove.list.array.TLongArrayList;
import io.grpc.stub.StreamObserver;
import io.prometheus.client.Counter;
import io.prometheus.client.Gauge;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows;
import nu.marginalia.index.api.*;
import nu.marginalia.index.api.IndexApiGrpc.IndexApiImplBase;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.index.client.model.results.SearchResultSet;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.*;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.api.searchquery.model.results.SearchResultSet;
import nu.marginalia.index.index.SearchIndex;
import nu.marginalia.index.index.SearchIndexSearchTerms;
import nu.marginalia.index.query.IndexQueryPriority;
@ -41,7 +41,7 @@ import java.util.*;
import java.util.stream.Collectors;
@Singleton
public class IndexQueryService extends IndexApiImplBase {
public class IndexQueryService extends IndexApiGrpc.IndexApiImplBase {
private final Logger logger = LoggerFactory.getLogger(getClass());
@ -122,8 +122,8 @@ public class IndexQueryService extends IndexApiImplBase {
// GRPC endpoint
@SneakyThrows
public void query(nu.marginalia.index.api.RpcIndexQuery request,
io.grpc.stub.StreamObserver<nu.marginalia.index.api.RpcDecoratedResultItem> responseObserver) {
public void query(RpcIndexQuery request,
StreamObserver<RpcDecoratedResultItem> responseObserver) {
try {
var params = new SearchParameters(request, getSearchSet(request));

View File

@ -1,11 +1,11 @@
package nu.marginalia.index.svc;
import gnu.trove.set.hash.TLongHashSet;
import nu.marginalia.index.api.RpcIndexQuery;
import nu.marginalia.index.client.IndexProtobufCodec;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.RpcIndexQuery;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.IndexProtobufCodec;
import nu.marginalia.index.index.SearchIndex;
import nu.marginalia.index.index.SearchIndexSearchTerms;
import nu.marginalia.index.query.IndexQuery;

View File

@ -3,8 +3,8 @@ package nu.marginalia.index.svc;
import com.google.inject.Singleton;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongList;
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
import nu.marginalia.hash.MurmurHash3_128;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.index.SearchIndexSearchTerms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -1,7 +1,6 @@
package nu.marginalia.index.svc.searchset;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.searchset.SearchSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -43,13 +43,11 @@ dependencies {
implementation project(':code:common:service')
implementation project(':code:common:renderer')
implementation project(':code:api:index-api')
implementation project(':code:api:query-api')
implementation project(':code:common:service-discovery')
implementation project(':code:features-qs:query-parser')
implementation project(':code:features-index:index-query')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:term-frequency-dict')
implementation project(':code:functions:search-query')
implementation project(':code:functions:search-query:api')
implementation project(':code:functions:domain-links:api')
implementation project(':code:functions:domain-links:aggregate')

View File

@ -2,12 +2,12 @@ package nu.marginalia.query;
import com.google.gson.Gson;
import com.google.inject.Inject;
import nu.marginalia.functions.searchquery.QueryGRPCService;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.query.svc.QueryFactory;
import spark.Request;
import spark.Response;
@ -16,20 +16,16 @@ import java.util.Map;
public class QueryBasicInterface {
private final MustacheRenderer<Object> renderer;
private final QueryFactory queryFactory;
private final Gson gson = GsonFactory.get();
private final QueryGRPCService queryGRPCService;
@Inject
public QueryBasicInterface(RendererFactory rendererFactory,
QueryFactory queryFactory,
QueryGRPCService queryGRPCService
) throws IOException
{
this.renderer = rendererFactory.renderer("search");
this.queryFactory = queryFactory;
this.queryGRPCService = queryGRPCService;
}
@ -43,17 +39,15 @@ public class QueryBasicInterface {
int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount"));
String set = request.queryParams("set") == null ? "" : request.queryParams("set");
var query = queryFactory.createQuery(new QueryParams(queryParam, new QueryLimits(
var params = new QueryParams(queryParam, new QueryLimits(
domainCount, count, 250, 8192
), set));
), set);
var rsp = queryGRPCService.executeQueries(QueryProtobufCodec.convertQuery(queryParam, query), count);
var results = rsp.stream().map(QueryProtobufCodec::convertQueryResult).toList();
var results = queryGRPCService.executeDirect(queryParam, params, count);
if (request.headers("Accept").contains("application/json")) {
response.type("application/json");
return gson.toJson(rsp);
return gson.toJson(results);
}
else {
return renderer.render(

View File

@ -4,6 +4,7 @@ import com.google.inject.Inject;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows;
import nu.marginalia.functions.domainlinks.AggregateDomainLinksService;
import nu.marginalia.functions.searchquery.QueryGRPCService;
import nu.marginalia.service.discovery.property.ServicePartition;
import nu.marginalia.service.server.BaseServiceParams;
import nu.marginalia.service.server.Service;

View File

@ -1,15 +1,15 @@
package nu.marginalia.query.svc;
import nu.marginalia.WmsaHome;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.index.query.limit.SpecificationLimitType;
import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.util.language.EnglishDictionary;
import nu.marginalia.util.ngrams.NGramBloomFilter;
import nu.marginalia.api.searchquery.model.query.QueryParams;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

View File

@ -21,6 +21,9 @@ include 'code:functions:domain-links:partition'
include 'code:functions:domain-links:aggregate'
include 'code:functions:domain-links:api'
include 'code:functions:search-query'
include 'code:functions:search-query:api'
include 'code:libraries:array'
include 'code:libraries:geo-ip'
include 'code:libraries:btree'
@ -40,7 +43,6 @@ include 'code:libraries:message-queue'
include 'code:features-search:screenshots'
include 'code:features-search:random-websites'
include 'code:features-search:feedlot-client'
include 'code:features-qs:query-parser'
include 'code:features-index:result-ranking'
include 'code:features-convert:adblock'
@ -63,7 +65,6 @@ include 'code:features-index:index-forward'
include 'code:features-index:index-reverse'
include 'code:features-index:domain-ranking'
include 'code:api:query-api'
include 'code:api:index-api'
include 'code:api:process-mqapi'
include 'code:api:executor-api'