* Extract the search-query function
This commit is contained in:
parent
66c1281301
commit
3fd2a83184
@ -1,7 +1,5 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id "com.google.protobuf" version "0.9.4"
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
@ -11,16 +9,6 @@ java {
|
||||
}
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
main {
|
||||
proto {
|
||||
srcDir 'src/main/protobuf'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/protobuf.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:config')
|
||||
@ -28,6 +16,8 @@ dependencies {
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:features-index:index-query')
|
||||
|
||||
implementation project(':code:functions:search-query:api')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.prometheus
|
||||
|
@ -1,7 +1,8 @@
|
||||
package nu.marginalia.index.client;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.IndexProtobufCodec;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
@ -14,6 +14,7 @@ dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.guice
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.ranking;
|
||||
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
package nu.marginalia.ranking;
|
||||
|
||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.model.crawl.HtmlFeature;
|
||||
import nu.marginalia.model.crawl.PubDate;
|
||||
import nu.marginalia.model.idx.DocumentFlags;
|
||||
|
@ -1,8 +1,8 @@
|
||||
package nu.marginalia.ranking.factors;
|
||||
|
||||
import nu.marginalia.index.client.model.results.Bm25Parameters;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.model.idx.WordFlags;
|
||||
import nu.marginalia.ranking.ResultKeywordSet;
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.ranking.factors;
|
||||
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -1,10 +0,0 @@
|
||||
# Query Parser
|
||||
|
||||
End-user search query parsing tools used by the [query-service](../../services-core/query-service).
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [QueryTokenizer](src/main/java/nu/marginalia/query_parser/QueryTokenizer.java)
|
||||
* [QueryParser](src/main/java/nu/marginalia/query_parser/QueryParser.java)
|
||||
* [QueryPermutations](src/main/java/nu/marginalia/query_parser/QueryVariants.java) - here be dragons
|
||||
* [QueryVariants](src/main/java/nu/marginalia/query_parser/QueryVariants.java) - here be dragons
|
@ -1,57 +0,0 @@
|
||||
package nu.marginalia.ngrams;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class DenseBitMapTest {
|
||||
|
||||
@Test
|
||||
public void testSetAll() {
|
||||
var dbm = new DenseBitMap(129);
|
||||
for (int i = 0; i < dbm.cardinality; i++) {
|
||||
dbm.set(i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < dbm.cardinality; i++) {
|
||||
assertTrue(dbm.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetEven() {
|
||||
var dbm = new DenseBitMap(131);
|
||||
for (int i = 0; i < dbm.cardinality; i+=2) {
|
||||
dbm.set(i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < dbm.cardinality; i+=2) {
|
||||
assertTrue(dbm.get(i));
|
||||
}
|
||||
|
||||
for (int i = 1; i < dbm.cardinality; i+=2) {
|
||||
assertFalse(dbm.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetAllClearSome() {
|
||||
var dbm = new DenseBitMap(129);
|
||||
|
||||
for (int i = 0; i < dbm.cardinality; i++) {
|
||||
dbm.set(i);
|
||||
}
|
||||
for (int i = 1; i < dbm.cardinality; i+=2) {
|
||||
dbm.clear(i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < dbm.cardinality; i+=2) {
|
||||
assertTrue(dbm.get(i), "Expected " + i + " to be set");
|
||||
}
|
||||
|
||||
for (int i = 1; i < dbm.cardinality; i+=2) {
|
||||
assertFalse(dbm.get(i), "Expected " + i + " to be clear");
|
||||
}
|
||||
}
|
||||
}
|
@ -1,116 +0,0 @@
|
||||
package nu.marginalia.query_parser;
|
||||
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class BodyQueryParserTest {
|
||||
private QueryParser parser;
|
||||
private static TermFrequencyDict dict;
|
||||
private static EnglishDictionary englishDictionary;
|
||||
private static NGramBloomFilter nGramBloomFilter;
|
||||
private static final LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
private QueryPermutation permutation;
|
||||
|
||||
@BeforeAll
|
||||
public static void init() throws IOException {
|
||||
dict = new TermFrequencyDict(lm);
|
||||
nGramBloomFilter = new NGramBloomFilter(lm);
|
||||
englishDictionary = new EnglishDictionary(dict);
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
parser = new QueryParser();
|
||||
permutation = new QueryPermutation(new QueryVariants(lm, dict, nGramBloomFilter, englishDictionary));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTitleMatcher() {
|
||||
List<String> terms = List.of("3d", "realms");
|
||||
assertEquals(2, terms.stream().map(String::toLowerCase).filter("3D Realms Site: Forums".toLowerCase()::contains).count());
|
||||
}
|
||||
@Test
|
||||
void parseSimple() {
|
||||
var results = parser.parse("hello");
|
||||
results.forEach(System.out::println);
|
||||
assertEquals(1, results.size());
|
||||
assertEquals(TokenType.LITERAL_TERM, results.get(0).type);
|
||||
assertEquals("hello", results.get(0).str);
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseQuotes() {
|
||||
var results = parser.parse("\u201Chello world\u201D");
|
||||
results.forEach(System.out::println);
|
||||
assertEquals(TokenType.QUOT_TERM, results.get(0).type);
|
||||
assertEquals("hello_world", results.get(0).str);
|
||||
assertEquals("\"hello world\"", results.get(0).displayStr);
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseExclude() {
|
||||
var results = parser.parse("-Hello");
|
||||
results.forEach(System.out::println);
|
||||
assertEquals(TokenType.EXCLUDE_TERM, results.get(0).type);
|
||||
assertEquals("hello", results.get(0).str);
|
||||
assertEquals("-hello", results.get(0).displayStr);
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseNear() {
|
||||
var results = parser.parse("near:memex.marginalia.nu");
|
||||
results.forEach(System.out::println);
|
||||
assertEquals(TokenType.NEAR_TERM, results.get(0).type);
|
||||
assertEquals("memex.marginalia.nu", results.get(0).str);
|
||||
assertEquals("near:memex.marginalia.nu", results.get(0).displayStr);
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseCombined() {
|
||||
for (var list : permutation.permuteQueries(parser.parse("dune 2 remake"))) {
|
||||
for (var t: list) {
|
||||
System.out.printf("%s ", t.str);
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
@Test
|
||||
void parseCombinedDOS() {
|
||||
for (var list : permutation.permuteQueries(parser.parse("ab ba baa abba baba ab ba"))) {
|
||||
for (var t: list) {
|
||||
System.out.printf("%s ", t.str);
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void parseCombinedSuperman() {
|
||||
for (var list : permutation.permuteQueries(parser.parse("wizardry proving grounds of the mad overlord"))) {
|
||||
for (var t: list) {
|
||||
System.out.printf("%s ", t.str);
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
@Test
|
||||
void testEdgeCases() {
|
||||
parser.parse("site:localhost 3D").forEach(System.out::println);
|
||||
parser.parse("-wolfenstein 3D").forEach(System.out::println);
|
||||
parser.parse("-wolfenstein 3D \"").forEach(System.out::println);
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
package nu.marginalia.query_parser;
|
||||
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class QueryParserTest {
|
||||
private static QueryParser parser;
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() {
|
||||
parser = new QueryParser();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTabHandling() {
|
||||
var query = " lorem\tipsum\ndolor sit";
|
||||
var ret = parser.parse(query);
|
||||
assertEquals(4, ret.size());
|
||||
|
||||
var lorem = ret.get(0);
|
||||
assertEquals("lorem", lorem.str);
|
||||
assertEquals("lorem", lorem.displayStr);
|
||||
|
||||
var ipsum = ret.get(1);
|
||||
assertEquals("ipsum", ipsum.str);
|
||||
assertEquals("ipsum", ipsum.displayStr);
|
||||
|
||||
var dolor = ret.get(2);
|
||||
assertEquals("dolor", dolor.str);
|
||||
assertEquals("dolor", dolor.displayStr);
|
||||
|
||||
var sit = ret.get(3);
|
||||
assertEquals("sit", sit.str);
|
||||
assertEquals("sit", sit.displayStr);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdviceString() {
|
||||
var ret = parser.parse("alcibiades (plato) \"my query\" -cars");
|
||||
assertEquals(4, ret.size());
|
||||
|
||||
var alcibiades = ret.get(0);
|
||||
assertEquals(TokenType.LITERAL_TERM, alcibiades.type);
|
||||
assertEquals("alcibiades", alcibiades.str);
|
||||
assertEquals("alcibiades", alcibiades.displayStr);
|
||||
|
||||
var plato = ret.get(1);
|
||||
assertEquals(TokenType.ADVICE_TERM, plato.type);
|
||||
assertEquals("plato", plato.str);
|
||||
assertEquals("(plato)", plato.displayStr);
|
||||
|
||||
var my_query = ret.get(2);
|
||||
assertEquals(TokenType.QUOT_TERM, my_query.type);
|
||||
assertEquals("my_query", my_query.str);
|
||||
assertEquals("\"my query\"", my_query.displayStr);
|
||||
|
||||
var not_cars = ret.get(3);
|
||||
assertEquals(TokenType.EXCLUDE_TERM, not_cars.type);
|
||||
assertEquals("cars", not_cars.str);
|
||||
assertEquals("-cars", not_cars.displayStr);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseYear() {
|
||||
System.out.println(parser.parse("year>2000"));
|
||||
System.out.println(parser.parse("year=2000"));
|
||||
System.out.println(parser.parse("year<2000"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonAsciiNames() {
|
||||
verifyParseResult("André the Giant", "andre", "the", "giant");
|
||||
verifyParseResult("Stanisław Lem", "stanislaw", "lem");
|
||||
verifyParseResult("Nicolae Ceaușescu", "nicolae", "ceausescu");
|
||||
verifyParseResult("Þorrablót", "thorrablot");
|
||||
verifyParseResult("Karolis Koncevičius", "karolis", "koncevicius");
|
||||
}
|
||||
|
||||
private void verifyParseResult(String query, String... expectedTokens) {
|
||||
assertArrayEquals(expectedTokens, getTokenStrings(parser.parse(query)));
|
||||
}
|
||||
private String[] getTokenStrings(List<Token> tokens) {
|
||||
return tokens.stream().map(t -> t.str).toArray(String[]::new);
|
||||
}
|
||||
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
package nu.marginalia.query_parser;
|
||||
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.language.sentence.SentenceExtractor;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
class QueryVariantsTest {
|
||||
static QueryVariants variants;
|
||||
static QueryParser parser;
|
||||
static SentenceExtractor se;
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() throws IOException {
|
||||
LanguageModels lm = TestLanguageModels.getLanguageModels();
|
||||
|
||||
se = new SentenceExtractor(lm);
|
||||
|
||||
var dict = new TermFrequencyDict(lm);
|
||||
var ngrams = new NGramBloomFilter(lm);
|
||||
variants = new QueryVariants(lm, dict, ngrams, new EnglishDictionary(dict));
|
||||
parser = new QueryParser();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getQueryFood() {
|
||||
System.out.println(se.extractSentence("we are alone"));
|
||||
testCase("Omelet recipe");
|
||||
}
|
||||
|
||||
@Test
|
||||
void queryNegation() {
|
||||
System.out.println(se.extractSentence("salt lake -city"));
|
||||
testCase("salt lake -city");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void getQueryVariants() {
|
||||
System.out.println(se.extractSentence("we are alone"));
|
||||
testCase("inside job reviews");
|
||||
testCase("plato apology");
|
||||
testCase("mechanical keyboard");
|
||||
testCase("DOS");
|
||||
testCase("dos");
|
||||
testCase("we are alone");
|
||||
testCase("3D Realms");
|
||||
testCase("I am alone");
|
||||
testCase("plato cave");
|
||||
testCase("The internet is dead");
|
||||
|
||||
testCase("TRS80");
|
||||
testCase("TRS-80");
|
||||
testCase("TRS-80");
|
||||
testCase("Raspberry Pi 2");
|
||||
testCase("Duke Nukem 3D");
|
||||
testCase("The Man of Tomorrow");
|
||||
testCase("Computer Manual");
|
||||
testCase("Knitting");
|
||||
testCase("capcom");
|
||||
testCase("the man of tomorrow");
|
||||
|
||||
}
|
||||
|
||||
private void testCase(String input) {
|
||||
var tokens = variants.getQueryVariants(parser.parse(input));
|
||||
System.out.println(tokens);
|
||||
}
|
||||
}
|
@ -1,120 +0,0 @@
|
||||
package nu.marginalia.transform_list;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class TransformListTest {
|
||||
|
||||
@Test
|
||||
void transformEach() {
|
||||
|
||||
List<Integer> values = Stream.of(1,2,3,4).collect(Collectors.toList());
|
||||
new TransformList<>(values).transformEach(e -> {
|
||||
int v = e.value();
|
||||
if (v == 1) e.remove();
|
||||
if (v == 2) e.replace(5);
|
||||
if (v == 4) e.remove();
|
||||
});
|
||||
|
||||
assertEquals(List.of(5,3), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void transformEachPairRemoveReplace() {
|
||||
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
|
||||
new TransformList<>(values).transformEachPair((a,b) -> {
|
||||
System.out.println(a.value() + ":" + b.value());
|
||||
int v = a.value();
|
||||
if (v == 1 || v == 3 || v == 5) {
|
||||
a.remove();
|
||||
b.replace(-b.value());
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
assertEquals(List.of(-2, -4, -6), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void transformEachPairRemoveRemove() {
|
||||
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
|
||||
new TransformList<>(values).transformEachPair((a,b) -> {
|
||||
System.out.println(a.value() + ":" + b.value());
|
||||
int v = a.value();
|
||||
if (v == 1 || v == 3 || v == 5) {
|
||||
a.remove();
|
||||
b.remove();
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
assertEquals(List.of(), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void transformEachPairReplaceRemove() {
|
||||
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
|
||||
new TransformList<>(values).transformEachPair((a,b) -> {
|
||||
System.out.println(a.value() + ":" + b.value());
|
||||
int v = a.value();
|
||||
if (v == 1 || v == 3 || v == 5) {
|
||||
a.replace(-a.value());
|
||||
b.remove();
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
assertEquals(List.of(-1, -3, -5), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void transformEachPairReplaceReplace() {
|
||||
List<Integer> values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList());
|
||||
new TransformList<>(values).transformEachPair((a,b) -> {
|
||||
System.out.println(a.value() + ":" + b.value());
|
||||
int v = a.value();
|
||||
if (v == 1 || v == 3 || v == 5) {
|
||||
a.replace(-a.value());
|
||||
b.replace(-b.value());
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
assertEquals(List.of(-1, -2, -3, -4, -5, -6), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndTransform() {
|
||||
List<Integer> values = Stream.of(1,2,3,4,5,6,7,8,9,10).collect(Collectors.toList());
|
||||
new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(7)::equals, entity -> {
|
||||
entity.replace(entity.value() * 2);
|
||||
});
|
||||
|
||||
assertEquals(List.of(1,2,6,8,10,12,14,8,9,10), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndTransformEndsAtEnd() {
|
||||
List<Integer> values = Stream.of(1,2,3,4,5,6,7,8,9,10).collect(Collectors.toList());
|
||||
new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(10)::equals, entity -> {
|
||||
entity.replace(entity.value() * 2);
|
||||
});
|
||||
|
||||
assertEquals(List.of(1,2,6,8,10,12,14,16,18,20), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void scanAndTransformOverlap() {
|
||||
List<Integer> values = Stream.of(1,2,3,3,5,7,7,8,9,10).collect(Collectors.toList());
|
||||
new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(7)::equals, entity -> {
|
||||
entity.replace(entity.value() * 2);
|
||||
});
|
||||
|
||||
assertEquals(List.of(1, 2, 6, 6, 10, 14, 7, 8, 9, 10), values);
|
||||
}
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package nu.marginalia.util;
|
||||
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.WmsaHome;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Optional;
|
||||
|
||||
public class TestLanguageModels {
|
||||
private static final Path LANGUAGE_MODELS_DEFAULT = WmsaHome.getHomePath().resolve("model");
|
||||
|
||||
public static Path getLanguageModelsPath() {
|
||||
final Path languageModelsHome = Optional.ofNullable(System.getenv("LANGUAGE_MODELS_HOME"))
|
||||
.map(Path::of)
|
||||
.orElse(LANGUAGE_MODELS_DEFAULT);
|
||||
|
||||
if (!Files.isDirectory(languageModelsHome)) {
|
||||
throw new IllegalStateException("Could not find $LANGUAGE_MODELS_HOME, see doc/language-models.md");
|
||||
}
|
||||
return languageModelsHome;
|
||||
}
|
||||
|
||||
public static LanguageModels getLanguageModels() {
|
||||
|
||||
var languageModelsHome = getLanguageModelsPath();
|
||||
|
||||
return new LanguageModels(
|
||||
languageModelsHome.resolve("ngrams.bin"),
|
||||
languageModelsHome.resolve("tfreq-new-algo3.bin"),
|
||||
languageModelsHome.resolve("opennlp-sentence.bin"),
|
||||
languageModelsHome.resolve("English.RDR"),
|
||||
languageModelsHome.resolve("English.DICT"),
|
||||
languageModelsHome.resolve("opennlp-tokens.bin"),
|
||||
languageModelsHome.resolve("lid.176.ftz")
|
||||
);
|
||||
}
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
**Note**: This package is called `features-qs` rather than `features-query` because the latter,
|
||||
though more consistent with other packages like features-index, would be very confusing
|
||||
as there are other packages elsewhere with the 'query' name (e.g. features-index/index-query).
|
||||
|
||||
## Contents
|
||||
|
||||
* [query-parser](query-parser/) contains code for parsing the user-facing query grammar.
|
@ -19,7 +19,6 @@ dependencies {
|
||||
implementation libs.bundles.grpc
|
||||
implementation libs.notnull
|
||||
implementation libs.guice
|
||||
implementation libs.spark
|
||||
implementation libs.opencsv
|
||||
implementation libs.trove
|
||||
implementation libs.fastutil
|
||||
|
@ -1,5 +1,7 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id "com.google.protobuf" version "0.9.4"
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
@ -9,24 +11,34 @@ java {
|
||||
}
|
||||
}
|
||||
|
||||
jar.archiveBaseName = 'search-query-api'
|
||||
|
||||
sourceSets {
|
||||
main {
|
||||
proto {
|
||||
srcDir 'src/main/protobuf'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/protobuf.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:features-index:index-query')
|
||||
implementation project(':code:common:service-discovery')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.roaringbitmap
|
||||
implementation libs.prometheus
|
||||
implementation libs.notnull
|
||||
implementation libs.trove
|
||||
implementation libs.guice
|
||||
implementation libs.gson
|
||||
implementation libs.bundles.grpc
|
||||
implementation libs.protobuf
|
||||
implementation libs.javax.annotation
|
||||
implementation libs.bundles.grpc
|
||||
implementation libs.fastutil
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.index.client;
|
||||
package nu.marginalia.api.searchquery;
|
||||
|
||||
import nu.marginalia.index.api.*;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.Bm25Parameters;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.*;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.Bm25Parameters;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
@ -1,12 +1,10 @@
|
||||
package nu.marginalia.query.client;
|
||||
package nu.marginalia.api.searchquery;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.prometheus.client.Summary;
|
||||
import nu.marginalia.index.api.*;
|
||||
import nu.marginalia.query.QueryProtobufCodec;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||
import nu.marginalia.service.client.GrpcSingleNodeChannelPool;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
@ -1,25 +1,21 @@
|
||||
package nu.marginalia.query;
|
||||
package nu.marginalia.api.searchquery;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.index.api.*;
|
||||
import nu.marginalia.index.client.IndexProtobufCodec;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.query.model.ProcessedQuery;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static nu.marginalia.index.client.IndexProtobufCodec.*;
|
||||
|
||||
public class QueryProtobufCodec {
|
||||
|
||||
public static RpcIndexQuery convertQuery(RpcQsQuery request, ProcessedQuery query) {
|
||||
@ -34,11 +30,11 @@ public class QueryProtobufCodec {
|
||||
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
||||
builder.setHumanQuery(request.getHumanQuery());
|
||||
|
||||
builder.setQuality(convertSpecLimit(query.specs.quality));
|
||||
builder.setYear(convertSpecLimit(query.specs.year));
|
||||
builder.setSize(convertSpecLimit(query.specs.size));
|
||||
builder.setRank(convertSpecLimit(query.specs.rank));
|
||||
builder.setDomainCount(convertSpecLimit(query.specs.domainCount));
|
||||
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
|
||||
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
|
||||
|
||||
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
|
||||
|
||||
@ -63,11 +59,11 @@ public class QueryProtobufCodec {
|
||||
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
||||
builder.setHumanQuery(humanQuery);
|
||||
|
||||
builder.setQuality(convertSpecLimit(query.specs.quality));
|
||||
builder.setYear(convertSpecLimit(query.specs.year));
|
||||
builder.setSize(convertSpecLimit(query.specs.size));
|
||||
builder.setRank(convertSpecLimit(query.specs.rank));
|
||||
builder.setDomainCount(convertSpecLimit(query.specs.domainCount));
|
||||
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
|
||||
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
|
||||
|
||||
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
|
||||
|
||||
@ -92,11 +88,11 @@ public class QueryProtobufCodec {
|
||||
request.getTacitExcludesList(),
|
||||
request.getTacitPriorityList(),
|
||||
request.getTacitAdviceList(),
|
||||
convertSpecLimit(request.getQuality()),
|
||||
convertSpecLimit(request.getYear()),
|
||||
convertSpecLimit(request.getSize()),
|
||||
convertSpecLimit(request.getRank()),
|
||||
convertSpecLimit(request.getDomainCount()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getQuality()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getYear()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getSize()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getRank()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getDomainCount()),
|
||||
request.getDomainIdsList(),
|
||||
IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()),
|
||||
request.getSearchSetIdentifier(),
|
||||
@ -167,7 +163,7 @@ public class QueryProtobufCodec {
|
||||
List<SearchSubquery> subqueries = new ArrayList<>(specs.getSubqueriesCount());
|
||||
|
||||
for (int i = 0; i < specs.getSubqueriesCount(); i++) {
|
||||
subqueries.add(convertSearchSubquery(specs.getSubqueries(i)));
|
||||
subqueries.add(IndexProtobufCodec.convertSearchSubquery(specs.getSubqueries(i)));
|
||||
}
|
||||
|
||||
return new SearchSpecification(
|
||||
@ -182,7 +178,7 @@ public class QueryProtobufCodec {
|
||||
IndexProtobufCodec.convertSpecLimit(specs.getDomainCount()),
|
||||
IndexProtobufCodec.convertQueryLimits(specs.getQueryLimits()),
|
||||
QueryStrategy.valueOf(specs.getQueryStrategy()),
|
||||
convertRankingParameterss(specs.getParameters())
|
||||
IndexProtobufCodec.convertRankingParameterss(specs.getParameters())
|
||||
);
|
||||
}
|
||||
|
||||
@ -194,11 +190,11 @@ public class QueryProtobufCodec {
|
||||
.addAllTacitIncludes(params.tacitIncludes())
|
||||
.addAllTacitPriority(params.tacitPriority())
|
||||
.setHumanQuery(params.humanQuery())
|
||||
.setQueryLimits(convertQueryLimits(params.limits()))
|
||||
.setQuality(convertSpecLimit(params.quality()))
|
||||
.setYear(convertSpecLimit(params.year()))
|
||||
.setSize(convertSpecLimit(params.size()))
|
||||
.setRank(convertSpecLimit(params.rank()))
|
||||
.setQueryLimits(IndexProtobufCodec.convertQueryLimits(params.limits()))
|
||||
.setQuality(IndexProtobufCodec.convertSpecLimit(params.quality()))
|
||||
.setYear(IndexProtobufCodec.convertSpecLimit(params.year()))
|
||||
.setSize(IndexProtobufCodec.convertSpecLimit(params.size()))
|
||||
.setRank(IndexProtobufCodec.convertSpecLimit(params.rank()))
|
||||
.setSearchSetIdentifier(params.identifier())
|
||||
.setQueryStrategy(params.queryStrategy().name())
|
||||
.setTemporalBias(RpcTemporalBias.newBuilder()
|
||||
@ -227,4 +223,5 @@ public class QueryProtobufCodec {
|
||||
rpcDecoratedResultItem.getRankingScore()
|
||||
);
|
||||
}
|
||||
|
||||
}
|
@ -1,6 +1,4 @@
|
||||
package nu.marginalia.query.model;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
package nu.marginalia.api.searchquery.model.query;
|
||||
|
||||
import java.util.*;
|
||||
|
@ -1,10 +1,9 @@
|
||||
package nu.marginalia.query.model;
|
||||
package nu.marginalia.api.searchquery.model.query;
|
||||
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.List;
|
||||
|
@ -1,7 +1,6 @@
|
||||
package nu.marginalia.query.model;
|
||||
package nu.marginalia.api.searchquery.model.query;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.query;
|
||||
package nu.marginalia.api.searchquery.model.query;
|
||||
|
||||
/** Identifies a RankingSearchSet, associated with an EdgeSearchProfile
|
||||
*
|
@ -1,7 +1,7 @@
|
||||
package nu.marginalia.index.client.model.query;
|
||||
package nu.marginalia.api.searchquery.model.query;
|
||||
|
||||
import lombok.*;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.query;
|
||||
package nu.marginalia.api.searchquery.model.query;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.EqualsAndHashCode;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
/** Tuning parameters for BM25.
|
||||
*
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
|
||||
import lombok.ToString;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import nu.marginalia.model.idx.WordFlags;
|
||||
import nu.marginalia.model.idx.WordMetadata;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.index.client.model.results;
|
||||
package nu.marginalia.api.searchquery.model.results;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,7 +1,7 @@
|
||||
syntax="proto3";
|
||||
package actorapi;
|
||||
package nu.marginalia.api.searchquery;
|
||||
|
||||
option java_package="nu.marginalia.index.api";
|
||||
option java_package="nu.marginalia.api.searchquery";
|
||||
option java_multiple_files=true;
|
||||
|
||||
service QueryApi {
|
||||
@ -35,11 +35,11 @@ message RpcQsQuery {
|
||||
|
||||
/* Query service query response */
|
||||
message RpcQsResponse {
|
||||
RpcIndexQuery specs = 1;
|
||||
repeated RpcDecoratedResultItem results = 2;
|
||||
repeated string searchTermsHuman = 3;
|
||||
repeated string problems = 4;
|
||||
string domain = 5;
|
||||
RpcIndexQuery specs = 1;
|
||||
repeated RpcDecoratedResultItem results = 2;
|
||||
repeated string searchTermsHuman = 3;
|
||||
repeated string problems = 4;
|
||||
string domain = 5;
|
||||
}
|
||||
|
||||
message RpcTemporalBias {
|
@ -1,34 +1,46 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(21))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:api:index-api')
|
||||
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:service-discovery')
|
||||
|
||||
implementation project(':code:functions:search-query:api')
|
||||
|
||||
implementation project(':code:features-index:index-query')
|
||||
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
|
||||
implementation project(':third-party:porterstemmer')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
implementation project(':code:features-convert:keyword-extraction')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:model')
|
||||
|
||||
implementation project(':third-party:porterstemmer')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.bundles.nlp
|
||||
|
||||
implementation libs.bundles.handlebars
|
||||
implementation libs.trove
|
||||
implementation libs.prometheus
|
||||
implementation libs.bundles.grpc
|
||||
implementation libs.notnull
|
||||
implementation libs.guice
|
||||
implementation libs.trove
|
||||
implementation libs.fastutil
|
||||
implementation libs.bundles.gson
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
@ -1,15 +1,18 @@
|
||||
package nu.marginalia.query;
|
||||
package nu.marginalia.functions.searchquery;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import io.prometheus.client.Histogram;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||
import nu.marginalia.api.searchquery.*;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.index.api.*;
|
||||
import nu.marginalia.functions.searchquery.svc.QueryFactory;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.query.svc.QueryFactory;
|
||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import org.slf4j.Logger;
|
||||
@ -48,8 +51,7 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
|
||||
IndexApiGrpc::newBlockingStub);
|
||||
}
|
||||
|
||||
public void query(nu.marginalia.index.api.RpcQsQuery request,
|
||||
io.grpc.stub.StreamObserver<nu.marginalia.index.api.RpcQsResponse> responseObserver)
|
||||
public void query(RpcQsQuery request, StreamObserver<RpcQsResponse> responseObserver)
|
||||
{
|
||||
try {
|
||||
wmsa_qs_query_time_grpc
|
||||
@ -105,4 +107,14 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase {
|
||||
private boolean isBlacklisted(RpcDecoratedResultItem item) {
|
||||
return blacklist.isBlacklisted(UrlIdCodec.getDomainId(item.getRawItem().getCombinedId()));
|
||||
}
|
||||
|
||||
public List<DecoratedSearchResultItem> executeDirect(String originalQuery, QueryParams params, int count) {
|
||||
var query = queryFactory.createQuery(params);
|
||||
|
||||
return executeQueries(
|
||||
QueryProtobufCodec.convertQuery(originalQuery, query),
|
||||
count)
|
||||
.stream().map(QueryProtobufCodec::convertQueryResult)
|
||||
.toList();
|
||||
}
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.query_parser;
|
||||
package nu.marginalia.functions.searchquery.query_parser;
|
||||
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import nu.marginalia.transform_list.TransformList;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
|
||||
import nu.marginalia.util.transform_list.TransformList;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -1,8 +1,8 @@
|
||||
package nu.marginalia.query_parser;
|
||||
package nu.marginalia.functions.searchquery.query_parser;
|
||||
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -1,8 +1,8 @@
|
||||
package nu.marginalia.query_parser;
|
||||
package nu.marginalia.functions.searchquery.query_parser;
|
||||
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
|
||||
import nu.marginalia.language.encoding.AsciiFlattener;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
@ -1,20 +1,20 @@
|
||||
package nu.marginalia.query_parser;
|
||||
package nu.marginalia.functions.searchquery.query_parser;
|
||||
|
||||
import ca.rmen.porterstemmer.PorterStemmer;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
|
||||
import nu.marginalia.util.language.EnglishDictionary;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.keyword.KeywordExtractor;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.language.sentence.SentenceExtractor;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.util.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.language.model.DocumentSentence;
|
||||
import nu.marginalia.language.model.WordSpan;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import opennlp.tools.stemmer.PorterStemmer;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
@ -251,7 +251,7 @@ public class QueryVariants {
|
||||
|
||||
for (var span : ls) {
|
||||
var matcher = dashBoundary.matcher(span.word);
|
||||
if (matcher.find() && nGramBloomFilter.isKnownNGram(ps.stem(dashBoundary.matcher(span.word).replaceAll("")))) {
|
||||
if (matcher.find() && nGramBloomFilter.isKnownNGram(ps.stemWord(dashBoundary.matcher(span.word).replaceAll("")))) {
|
||||
dash = true;
|
||||
String combined = dashBoundary.matcher(span.word).replaceAll("");
|
||||
asTokens2.add(combined);
|
||||
@ -332,7 +332,7 @@ public class QueryVariants {
|
||||
var a = span.get(i);
|
||||
var b = span.get(i+1);
|
||||
|
||||
var stemmed = ps.stem(a.word + b.word);
|
||||
var stemmed = ps.stemWord(a.word + b.word);
|
||||
|
||||
double scoreCombo = dict.getTermFreqStemmed(stemmed);
|
||||
if (scoreCombo > 10000) {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.query_parser.token;
|
||||
package nu.marginalia.functions.searchquery.query_parser.token;
|
||||
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.ToString;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.query_parser.token;
|
||||
package nu.marginalia.functions.searchquery.query_parser.token;
|
||||
|
||||
import java.util.function.Predicate;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.query_parser.token;
|
||||
package nu.marginalia.functions.searchquery.query_parser.token;
|
||||
|
||||
public interface TokenVisitor {
|
||||
void onLiteralTerm(Token token);
|
@ -1,21 +1,21 @@
|
||||
package nu.marginalia.query.svc;
|
||||
package nu.marginalia.functions.searchquery.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.util.language.EnglishDictionary;
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query.model.ProcessedQuery;
|
||||
import nu.marginalia.query_parser.QueryParser;
|
||||
import nu.marginalia.query_parser.QueryPermutation;
|
||||
import nu.marginalia.query_parser.QueryVariants;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import nu.marginalia.util.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
|
||||
import nu.marginalia.functions.searchquery.query_parser.QueryParser;
|
||||
import nu.marginalia.functions.searchquery.query_parser.QueryPermutation;
|
||||
import nu.marginalia.functions.searchquery.query_parser.QueryVariants;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenType;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
@ -1,10 +1,10 @@
|
||||
package nu.marginalia.query.svc;
|
||||
package nu.marginalia.functions.searchquery.svc;
|
||||
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenVisitor;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenVisitor;
|
||||
|
||||
public class QueryLimitsAccumulator implements TokenVisitor {
|
||||
public SpecificationLimit qualityLimit;
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.query.svc;
|
||||
package nu.marginalia.functions.searchquery.svc;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.language.WordPatterns;
|
||||
import nu.marginalia.query_parser.token.Token;
|
||||
import nu.marginalia.query_parser.token.TokenVisitor;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.Token;
|
||||
import nu.marginalia.functions.searchquery.query_parser.token.TokenVisitor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.language;
|
||||
package nu.marginalia.util.language;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.ngrams;
|
||||
package nu.marginalia.util.ngrams;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.ngrams;
|
||||
package nu.marginalia.util.ngrams;
|
||||
|
||||
import ca.rmen.porterstemmer.PorterStemmer;
|
||||
import com.google.common.hash.HashFunction;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.transform_list;
|
||||
package nu.marginalia.util.transform_list;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.function.BiConsumer;
|
@ -41,8 +41,7 @@ dependencies {
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
implementation project(':code:features-index:index-query')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
@ -5,13 +5,12 @@ import com.google.inject.Singleton;
|
||||
import nu.marginalia.api.model.ApiSearchResult;
|
||||
import nu.marginalia.api.model.ApiSearchResultQueryDetails;
|
||||
import nu.marginalia.api.model.ApiSearchResults;
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.api.searchquery.QueryClient;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.api.searchquery.model.results.*;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.model.idx.WordMetadata;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
|
@ -56,7 +56,9 @@ dependencies {
|
||||
|
||||
implementation project(':code:functions:math:api')
|
||||
implementation project(':code:functions:domain-info:api')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
|
||||
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:common:renderer')
|
||||
|
@ -5,10 +5,10 @@ import com.google.inject.Singleton;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.WebsiteUrl;
|
||||
import nu.marginalia.api.math.MathClient;
|
||||
import nu.marginalia.api.searchquery.QueryClient;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||
import nu.marginalia.search.command.SearchParameters;
|
||||
import nu.marginalia.search.model.*;
|
||||
import nu.marginalia.search.svc.SearchQueryIndexService;
|
||||
|
@ -1,12 +1,12 @@
|
||||
package nu.marginalia.search;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.search.command.SearchParameters;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package nu.marginalia.search;
|
||||
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||
import nu.marginalia.search.model.ClusteredUrlDetails;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
package nu.marginalia.search.command;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Arrays;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package nu.marginalia.search.command;
|
||||
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Arrays;
|
||||
|
@ -1,7 +1,7 @@
|
||||
package nu.marginalia.search.command;
|
||||
|
||||
import nu.marginalia.WebsiteUrl;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
|
@ -2,8 +2,8 @@ package nu.marginalia.search.model;
|
||||
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.model.crawl.HtmlFeature;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
package nu.marginalia.search.model;
|
||||
|
||||
import lombok.*;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||
import nu.marginalia.model.crawl.HtmlFeature;
|
||||
|
@ -4,7 +4,7 @@ import gnu.trove.list.TLongList;
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import gnu.trove.map.hash.TObjectIntHashMap;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.lsh.EasyLSH;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -5,11 +5,11 @@ import com.google.inject.Singleton;
|
||||
import it.unimi.dsi.fastutil.ints.Int2LongArrayMap;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.bbpc.BrailleBlockPunchCards;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||
import nu.marginalia.query.model.QueryResponse;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.search.results.UrlDeduplicator;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -44,7 +44,6 @@ dependencies {
|
||||
implementation project(':code:functions:domain-info')
|
||||
implementation project(':code:functions:domain-info:api')
|
||||
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:model')
|
||||
|
@ -45,9 +45,9 @@ dependencies {
|
||||
implementation project(':code:common:renderer')
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
implementation project(':code:api:executor-api')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:api:process-mqapi')
|
||||
implementation project(':code:features-search:screenshots')
|
||||
implementation project(':code:features-index:index-journal')
|
||||
|
@ -1,12 +1,12 @@
|
||||
package nu.marginalia.control.app.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.api.searchquery.QueryClient;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.control.ControlRendererFactory;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
|
@ -71,7 +71,6 @@ dependencies {
|
||||
implementation project(':code:features-convert:reddit-json')
|
||||
implementation project(':code:features-index:index-journal')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:api:process-mqapi')
|
||||
implementation project(':code:api:executor-api')
|
||||
implementation project(':third-party:encyclopedia-marginalia-nu')
|
||||
|
@ -7,7 +7,6 @@ import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
||||
import nu.marginalia.query.client.QueryClient;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
|
@ -43,6 +43,7 @@ dependencies {
|
||||
|
||||
implementation project(':code:functions:domain-links:partition')
|
||||
implementation project(':code:functions:domain-links:api')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:api:index-api')
|
||||
|
@ -6,7 +6,7 @@ import gnu.trove.set.hash.TLongHashSet;
|
||||
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.index.SearchIndex;
|
||||
import nu.marginalia.index.svc.SearchTermsService;
|
||||
import nu.marginalia.model.idx.WordMetadata;
|
||||
|
@ -4,9 +4,9 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import gnu.trove.list.TLongList;
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.ranking.ResultValuator;
|
||||
|
@ -2,7 +2,7 @@ package nu.marginalia.index.results;
|
||||
|
||||
import gnu.trove.map.TLongIntMap;
|
||||
import gnu.trove.map.hash.TLongIntHashMap;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
|
||||
public class IndexResultDomainDeduplicator {
|
||||
final TLongIntMap resultsByDomainId = CachedObjects.getMap();
|
||||
|
@ -2,15 +2,15 @@ package nu.marginalia.index.results;
|
||||
|
||||
import gnu.trove.list.TLongList;
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultPreliminaryScore;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.model.idx.WordFlags;
|
||||
import nu.marginalia.model.idx.WordMetadata;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.query.IndexQueryParams;
|
||||
import nu.marginalia.ranking.ResultValuator;
|
||||
|
||||
|
@ -4,18 +4,18 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import gnu.trove.list.TLongList;
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import io.prometheus.client.Counter;
|
||||
import io.prometheus.client.Gauge;
|
||||
import io.prometheus.client.Histogram;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.index.api.*;
|
||||
import nu.marginalia.index.api.IndexApiGrpc.IndexApiImplBase;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingContext;
|
||||
import nu.marginalia.index.client.model.results.SearchResultSet;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.*;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultSet;
|
||||
import nu.marginalia.index.index.SearchIndex;
|
||||
import nu.marginalia.index.index.SearchIndexSearchTerms;
|
||||
import nu.marginalia.index.query.IndexQueryPriority;
|
||||
@ -41,7 +41,7 @@ import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Singleton
|
||||
public class IndexQueryService extends IndexApiImplBase {
|
||||
public class IndexQueryService extends IndexApiGrpc.IndexApiImplBase {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@ -122,8 +122,8 @@ public class IndexQueryService extends IndexApiImplBase {
|
||||
|
||||
// GRPC endpoint
|
||||
@SneakyThrows
|
||||
public void query(nu.marginalia.index.api.RpcIndexQuery request,
|
||||
io.grpc.stub.StreamObserver<nu.marginalia.index.api.RpcDecoratedResultItem> responseObserver) {
|
||||
public void query(RpcIndexQuery request,
|
||||
StreamObserver<RpcDecoratedResultItem> responseObserver) {
|
||||
|
||||
try {
|
||||
var params = new SearchParameters(request, getSearchSet(request));
|
||||
|
@ -1,11 +1,11 @@
|
||||
package nu.marginalia.index.svc;
|
||||
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import nu.marginalia.index.api.RpcIndexQuery;
|
||||
import nu.marginalia.index.client.IndexProtobufCodec;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.RpcIndexQuery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.IndexProtobufCodec;
|
||||
import nu.marginalia.index.index.SearchIndex;
|
||||
import nu.marginalia.index.index.SearchIndexSearchTerms;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
|
@ -3,8 +3,8 @@ package nu.marginalia.index.svc;
|
||||
import com.google.inject.Singleton;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongList;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSubquery;
|
||||
import nu.marginalia.hash.MurmurHash3_128;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.index.SearchIndexSearchTerms;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -1,7 +1,6 @@
|
||||
package nu.marginalia.index.svc.searchset;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||
import nu.marginalia.index.searchset.SearchSet;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -43,13 +43,11 @@ dependencies {
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:renderer')
|
||||
implementation project(':code:api:index-api')
|
||||
implementation project(':code:api:query-api')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:features-qs:query-parser')
|
||||
implementation project(':code:features-index:index-query')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
|
||||
implementation project(':code:functions:search-query')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
implementation project(':code:functions:domain-links:api')
|
||||
implementation project(':code:functions:domain-links:aggregate')
|
||||
|
||||
|
@ -2,12 +2,12 @@ package nu.marginalia.query;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.functions.searchquery.QueryGRPCService;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.renderer.MustacheRenderer;
|
||||
import nu.marginalia.renderer.RendererFactory;
|
||||
import nu.marginalia.query.svc.QueryFactory;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
|
||||
@ -16,20 +16,16 @@ import java.util.Map;
|
||||
|
||||
public class QueryBasicInterface {
|
||||
private final MustacheRenderer<Object> renderer;
|
||||
private final QueryFactory queryFactory;
|
||||
private final Gson gson = GsonFactory.get();
|
||||
|
||||
private final QueryGRPCService queryGRPCService;
|
||||
|
||||
@Inject
|
||||
public QueryBasicInterface(RendererFactory rendererFactory,
|
||||
QueryFactory queryFactory,
|
||||
QueryGRPCService queryGRPCService
|
||||
) throws IOException
|
||||
{
|
||||
this.renderer = rendererFactory.renderer("search");
|
||||
|
||||
this.queryFactory = queryFactory;
|
||||
this.queryGRPCService = queryGRPCService;
|
||||
}
|
||||
|
||||
@ -43,17 +39,15 @@ public class QueryBasicInterface {
|
||||
int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount"));
|
||||
String set = request.queryParams("set") == null ? "" : request.queryParams("set");
|
||||
|
||||
var query = queryFactory.createQuery(new QueryParams(queryParam, new QueryLimits(
|
||||
var params = new QueryParams(queryParam, new QueryLimits(
|
||||
domainCount, count, 250, 8192
|
||||
), set));
|
||||
), set);
|
||||
|
||||
var rsp = queryGRPCService.executeQueries(QueryProtobufCodec.convertQuery(queryParam, query), count);
|
||||
|
||||
var results = rsp.stream().map(QueryProtobufCodec::convertQueryResult).toList();
|
||||
var results = queryGRPCService.executeDirect(queryParam, params, count);
|
||||
|
||||
if (request.headers("Accept").contains("application/json")) {
|
||||
response.type("application/json");
|
||||
return gson.toJson(rsp);
|
||||
return gson.toJson(results);
|
||||
}
|
||||
else {
|
||||
return renderer.render(
|
||||
|
@ -4,6 +4,7 @@ import com.google.inject.Inject;
|
||||
import io.prometheus.client.Histogram;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.functions.domainlinks.AggregateDomainLinksService;
|
||||
import nu.marginalia.functions.searchquery.QueryGRPCService;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import nu.marginalia.service.server.BaseServiceParams;
|
||||
import nu.marginalia.service.server.Service;
|
||||
|
@ -1,15 +1,15 @@
|
||||
package nu.marginalia.query.svc;
|
||||
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.query.model.QueryParams;
|
||||
import nu.marginalia.util.language.EnglishDictionary;
|
||||
import nu.marginalia.util.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
@ -21,6 +21,9 @@ include 'code:functions:domain-links:partition'
|
||||
include 'code:functions:domain-links:aggregate'
|
||||
include 'code:functions:domain-links:api'
|
||||
|
||||
include 'code:functions:search-query'
|
||||
include 'code:functions:search-query:api'
|
||||
|
||||
include 'code:libraries:array'
|
||||
include 'code:libraries:geo-ip'
|
||||
include 'code:libraries:btree'
|
||||
@ -40,7 +43,6 @@ include 'code:libraries:message-queue'
|
||||
include 'code:features-search:screenshots'
|
||||
include 'code:features-search:random-websites'
|
||||
include 'code:features-search:feedlot-client'
|
||||
include 'code:features-qs:query-parser'
|
||||
include 'code:features-index:result-ranking'
|
||||
|
||||
include 'code:features-convert:adblock'
|
||||
@ -63,7 +65,6 @@ include 'code:features-index:index-forward'
|
||||
include 'code:features-index:index-reverse'
|
||||
include 'code:features-index:domain-ranking'
|
||||
|
||||
include 'code:api:query-api'
|
||||
include 'code:api:index-api'
|
||||
include 'code:api:process-mqapi'
|
||||
include 'code:api:executor-api'
|
||||
|
Loading…
Reference in New Issue
Block a user