NPE fix for index buckets that aren't loaded, experimental new query mode for domains.
This commit is contained in:
parent
514074112e
commit
23b7a5fc22
@ -241,7 +241,6 @@ public class EdgeSearchE2ETest extends E2ETestBase {
|
|||||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-search"));
|
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-search"));
|
||||||
|
|
||||||
assertEquals(List.of("Frog", "Binomial nomenclature", "Mantis", "Amphibian"), getTitlesFromSearchResults(html));
|
assertEquals(List.of("Frog", "Binomial nomenclature", "Mantis", "Amphibian"), getTitlesFromSearchResults(html));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -12,7 +12,6 @@ public class RankingDomainData {
|
|||||||
private int alias;
|
private int alias;
|
||||||
private EdgeDomainIndexingState state;
|
private EdgeDomainIndexingState state;
|
||||||
public final int knownUrls;
|
public final int knownUrls;
|
||||||
public boolean peripheral;
|
|
||||||
|
|
||||||
public int resolveAlias() {
|
public int resolveAlias() {
|
||||||
if (alias == 0) return id;
|
if (alias == 0) return id;
|
||||||
|
@ -56,7 +56,7 @@ public class RankingDomainFetcher {
|
|||||||
while (rsp.next()) {
|
while (rsp.next()) {
|
||||||
int id = rsp.getInt(1);
|
int id = rsp.getInt(1);
|
||||||
if (!blacklist.isBlacklisted(id)) {
|
if (!blacklist.isBlacklisted(id)) {
|
||||||
consumer.accept(new RankingDomainData(id, rsp.getString(2), rsp.getInt(3), EdgeDomainIndexingState.valueOf(rsp.getString(4)), rsp.getInt(5), false));
|
consumer.accept(new RankingDomainData(id, rsp.getString(2), rsp.getInt(3), EdgeDomainIndexingState.valueOf(rsp.getString(4)), rsp.getInt(5)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -108,6 +108,14 @@ public class PerusePageRankV2 {
|
|||||||
domainIndexToId.put(domainIndexToId.size(), id);
|
domainIndexToId.put(domainIndexToId.size(), id);
|
||||||
domainIdToIndex.put(id, domainIdToIndex.size());
|
domainIdToIndex.put(id, domainIdToIndex.size());
|
||||||
});
|
});
|
||||||
|
domainFetcher.getPeripheralDomains(domainData -> {
|
||||||
|
int id = domainData.id;
|
||||||
|
|
||||||
|
domainsById.put(id, domainData);
|
||||||
|
|
||||||
|
domainIndexToId.put(domainIndexToId.size(), id);
|
||||||
|
domainIdToIndex.put(id, domainIdToIndex.size());
|
||||||
|
});
|
||||||
|
|
||||||
linkDataSrc2Dest = new TIntArrayList[domainIndexToId.size()];
|
linkDataSrc2Dest = new TIntArrayList[domainIndexToId.size()];
|
||||||
linkDataDest2Src = new TIntArrayList[domainIndexToId.size()];
|
linkDataDest2Src = new TIntArrayList[domainIndexToId.size()];
|
||||||
|
@ -1,21 +1,20 @@
|
|||||||
package nu.marginalia.wmsa.edge.index;
|
package nu.marginalia.wmsa.edge.index;
|
||||||
|
|
||||||
|
import nu.marginalia.wmsa.edge.index.journal.SearchIndexJournalWriter;
|
||||||
import nu.marginalia.wmsa.edge.index.model.EdgeIndexSearchTerms;
|
import nu.marginalia.wmsa.edge.index.model.EdgeIndexSearchTerms;
|
||||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||||
import nu.marginalia.wmsa.edge.index.reader.SearchIndexReader;
|
import nu.marginalia.wmsa.edge.index.reader.SearchIndexReader;
|
||||||
import nu.marginalia.wmsa.edge.index.journal.SearchIndexJournalWriter;
|
|
||||||
import nu.marginalia.wmsa.edge.index.reader.query.IndexSearchBudget;
|
import nu.marginalia.wmsa.edge.index.reader.query.IndexSearchBudget;
|
||||||
import nu.marginalia.wmsa.edge.index.reader.query.Query;
|
import nu.marginalia.wmsa.edge.index.reader.query.Query;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.Comparator;
|
||||||
import java.util.concurrent.locks.Lock;
|
import java.util.concurrent.locks.Lock;
|
||||||
import java.util.concurrent.locks.ReadWriteLock;
|
import java.util.concurrent.locks.ReadWriteLock;
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
import java.util.function.LongPredicate;
|
import java.util.function.LongPredicate;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.LongStream;
|
import java.util.stream.LongStream;
|
||||||
|
|
||||||
public class EdgeIndexBucket {
|
public class EdgeIndexBucket {
|
||||||
@ -101,6 +100,10 @@ public class EdgeIndexBucket {
|
|||||||
return indexReader != null;
|
return indexReader != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public LongStream findHotDomainsForKeyword(IndexBlock block, int wordId, int queryDepth, int minHitCount, int maxResults) {
|
||||||
|
return indexReader.findHotDomainsForKeyword(block, wordId, queryDepth, minHitCount, maxResults);
|
||||||
|
}
|
||||||
|
|
||||||
public LongStream getQuery(IndexBlock block, LongPredicate filter, IndexSearchBudget budget, EdgeIndexSearchTerms searchTerms) {
|
public LongStream getQuery(IndexBlock block, LongPredicate filter, IndexSearchBudget budget, EdgeIndexSearchTerms searchTerms) {
|
||||||
if (null == indexReader) {
|
if (null == indexReader) {
|
||||||
logger.warn("Index reader not neady {}", block);
|
logger.warn("Index reader not neady {}", block);
|
||||||
@ -114,15 +117,8 @@ public class EdgeIndexBucket {
|
|||||||
.mapToInt(Integer::intValue)
|
.mapToInt(Integer::intValue)
|
||||||
.toArray();
|
.toArray();
|
||||||
|
|
||||||
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("Includes: ({}); excludes: ({})", Arrays.
|
|
||||||
stream(orderedIncludes)
|
|
||||||
.mapToObj(String::valueOf)
|
|
||||||
.collect(Collectors.joining(",")),
|
|
||||||
searchTerms.excludes.stream().map(String::valueOf).collect(Collectors.joining(",")));
|
|
||||||
}
|
|
||||||
Query query;
|
Query query;
|
||||||
|
|
||||||
if (orderedIncludes.length == 1) {
|
if (orderedIncludes.length == 1) {
|
||||||
query = indexReader.findUnderspecified(block, budget, filter, orderedIncludes[0]);
|
query = indexReader.findUnderspecified(block, budget, filter, orderedIncludes[0]);
|
||||||
}
|
}
|
||||||
@ -136,6 +132,7 @@ public class EdgeIndexBucket {
|
|||||||
for (int term : searchTerms.excludes) {
|
for (int term : searchTerms.excludes) {
|
||||||
query = query.not(term);
|
query = query.not(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
return query.stream();
|
return query.stream();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,6 +32,8 @@ import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
|||||||
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet;
|
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet;
|
||||||
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWords;
|
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWords;
|
||||||
import nu.marginalia.wmsa.edge.model.search.*;
|
import nu.marginalia.wmsa.edge.model.search.*;
|
||||||
|
import nu.marginalia.wmsa.edge.model.search.domain.EdgeDomainSearchResults;
|
||||||
|
import nu.marginalia.wmsa.edge.model.search.domain.EdgeDomainSearchSpecification;
|
||||||
import org.apache.http.HttpStatus;
|
import org.apache.http.HttpStatus;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -88,6 +90,7 @@ public class EdgeIndexService extends Service {
|
|||||||
|
|
||||||
Spark.post("/words/", this::putWords);
|
Spark.post("/words/", this::putWords);
|
||||||
Spark.post("/search/", this::search, gson::toJson);
|
Spark.post("/search/", this::search, gson::toJson);
|
||||||
|
Spark.post("/search-domain/", this::searchDomain, gson::toJson);
|
||||||
|
|
||||||
Spark.post("/dictionary/*", this::getWordId, gson::toJson);
|
Spark.post("/dictionary/*", this::getWordId, gson::toJson);
|
||||||
|
|
||||||
@ -204,6 +207,26 @@ public class EdgeIndexService extends Service {
|
|||||||
.toArray();
|
.toArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Object searchDomain(Request request, Response response) {
|
||||||
|
if (indexes.getDictionaryReader() == null) {
|
||||||
|
logger.warn("Dictionary reader not yet initialized");
|
||||||
|
halt(HttpStatus.SC_SERVICE_UNAVAILABLE, "Come back in a few minutes");
|
||||||
|
}
|
||||||
|
|
||||||
|
String json = request.body();
|
||||||
|
EdgeDomainSearchSpecification specsSet = gson.fromJson(json, EdgeDomainSearchSpecification.class);
|
||||||
|
|
||||||
|
final int wordId = keywordLexicon.getOrInsert(specsSet.keyword);
|
||||||
|
|
||||||
|
List<EdgeId<EdgeUrl>> urlIds = indexes
|
||||||
|
.getBucket(specsSet.bucket)
|
||||||
|
.findHotDomainsForKeyword(specsSet.block, wordId, specsSet.queryDepth, specsSet.minHitCount, specsSet.maxResults)
|
||||||
|
.mapToObj(lv -> new EdgeId<EdgeUrl>((int)(lv & 0xFFFF_FFFFL)))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
return new EdgeDomainSearchResults(specsSet.keyword, urlIds);
|
||||||
|
}
|
||||||
|
|
||||||
private Object search(Request request, Response response) {
|
private Object search(Request request, Response response) {
|
||||||
if (indexes.getDictionaryReader() == null) {
|
if (indexes.getDictionaryReader() == null) {
|
||||||
logger.warn("Dictionary reader not yet initialized");
|
logger.warn("Dictionary reader not yet initialized");
|
||||||
@ -387,6 +410,16 @@ public class EdgeIndexService extends Service {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public LongStream getHotDomainsQuery(int bucket, IndexBlock block, int wordId,
|
||||||
|
int queryDepth, int minHitCount, int maxResults) {
|
||||||
|
if (!indexes.isValidBucket(bucket)) {
|
||||||
|
logger.warn("Invalid bucket {}", bucket);
|
||||||
|
return LongStream.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
return indexes.getBucket(bucket).findHotDomainsForKeyword(block, wordId, queryDepth, minHitCount, maxResults);
|
||||||
|
}
|
||||||
|
|
||||||
private LongStream getQuery(int bucket, IndexSearchBudget budget, IndexBlock block,
|
private LongStream getQuery(int bucket, IndexSearchBudget budget, IndexBlock block,
|
||||||
LongPredicate filter, EdgeIndexSearchTerms searchTerms) {
|
LongPredicate filter, EdgeIndexSearchTerms searchTerms) {
|
||||||
if (!indexes.isValidBucket(bucket)) {
|
if (!indexes.isValidBucket(bucket)) {
|
||||||
|
@ -10,10 +10,14 @@ import nu.marginalia.wmsa.client.HttpStatusCode;
|
|||||||
import nu.marginalia.wmsa.configuration.ServiceDescriptor;
|
import nu.marginalia.wmsa.configuration.ServiceDescriptor;
|
||||||
import nu.marginalia.wmsa.configuration.server.Context;
|
import nu.marginalia.wmsa.configuration.server.Context;
|
||||||
import nu.marginalia.wmsa.edge.index.model.EdgePutWordsRequest;
|
import nu.marginalia.wmsa.edge.index.model.EdgePutWordsRequest;
|
||||||
import nu.marginalia.wmsa.edge.model.*;
|
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||||
|
import nu.marginalia.wmsa.edge.model.EdgeId;
|
||||||
|
import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||||
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet;
|
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet;
|
||||||
import nu.marginalia.wmsa.edge.model.search.EdgeSearchResultSet;
|
import nu.marginalia.wmsa.edge.model.search.EdgeSearchResultSet;
|
||||||
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification;
|
import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification;
|
||||||
|
import nu.marginalia.wmsa.edge.model.search.domain.EdgeDomainSearchResults;
|
||||||
|
import nu.marginalia.wmsa.edge.model.search.domain.EdgeDomainSearchSpecification;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -60,6 +64,11 @@ public class EdgeIndexClient extends AbstractDynamicClient {
|
|||||||
.blockingGet();
|
.blockingGet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@CheckReturnValue
|
||||||
|
public EdgeDomainSearchResults queryDomains(Context ctx, EdgeDomainSearchSpecification specs) {
|
||||||
|
return this.postGet(ctx, "/search-domain/", specs, EdgeDomainSearchResults.class).blockingFirst();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@CheckReturnValue
|
@CheckReturnValue
|
||||||
public Observable<Boolean> isBlocked(Context ctx) {
|
public Observable<Boolean> isBlocked(Context ctx) {
|
||||||
|
@ -13,8 +13,10 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.EnumMap;
|
import java.util.EnumMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.function.LongPredicate;
|
import java.util.function.LongPredicate;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.LongStream;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public class SearchIndexReader implements AutoCloseable {
|
public class SearchIndexReader implements AutoCloseable {
|
||||||
@ -55,18 +57,53 @@ public class SearchIndexReader implements AutoCloseable {
|
|||||||
queryBuilders = new EnumMap<>(IndexBlock.class);
|
queryBuilders = new EnumMap<>(IndexBlock.class);
|
||||||
underspecifiedQueryBuilders = new EnumMap<>(IndexBlock.class);
|
underspecifiedQueryBuilders = new EnumMap<>(IndexBlock.class);
|
||||||
|
|
||||||
queryBuilders.put(IndexBlock.Words, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex, lowIndex, namesIndex, wordsIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.Words, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex, lowIndex, namesIndex, wordsIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.Low, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex, lowIndex, namesIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.Low, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex, lowIndex, namesIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.Middle, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.Middle, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.Top, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.Top, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.PositionWords, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, namesIndex, positionIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.PositionWords, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, namesIndex, positionIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.NamesWords, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, namesIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.NamesWords, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, namesIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.Link, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, linkIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.Link, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, linkIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.Title, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.Title, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex, topicIndex, titleIndex), wordsIndex));
|
||||||
queryBuilders.put(IndexBlock.TitleKeywords, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex).collect(Collectors.toList()), wordsIndex));
|
queryBuilders.put(IndexBlock.TitleKeywords, new IndexQueryBuilder(listOfNonNulls(metaIndex, titleKeywordsIndex), wordsIndex));
|
||||||
|
|
||||||
underspecifiedQueryBuilders.put(IndexBlock.TitleKeywords, new IndexQueryBuilder(Stream.of(titleKeywordsIndex, linkIndex, topicIndex, topIndex, midIndex, lowIndex, namesIndex, positionIndex, metaIndex).collect(Collectors.toList()), wordsIndex));
|
underspecifiedQueryBuilders.put(IndexBlock.TitleKeywords, new IndexQueryBuilder(listOfNonNulls(titleKeywordsIndex, linkIndex, topicIndex, topIndex, midIndex, lowIndex, namesIndex, positionIndex, metaIndex), wordsIndex));
|
||||||
underspecifiedQueryBuilders.put(IndexBlock.Link, new IndexQueryBuilder(Stream.of(linkIndex, topicIndex, topIndex, midIndex, lowIndex, namesIndex, positionIndex, metaIndex).collect(Collectors.toList()), wordsIndex));
|
underspecifiedQueryBuilders.put(IndexBlock.Link, new IndexQueryBuilder(listOfNonNulls(linkIndex, topicIndex, topIndex, midIndex, lowIndex, namesIndex, positionIndex, metaIndex), wordsIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
@SafeVarargs
|
||||||
|
public final <T> List<T> listOfNonNulls(T... vals) {
|
||||||
|
return Stream.of(vals).filter(Objects::nonNull).toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public LongStream findHotDomainsForKeyword(IndexBlock block, int wordId, int queryDepth, int minHitCount, int maxResults) {
|
||||||
|
var index = indices.get(block);
|
||||||
|
|
||||||
|
if (index == null)
|
||||||
|
return LongStream.empty();
|
||||||
|
|
||||||
|
return index.rangeForWord(wordId)
|
||||||
|
.stream()
|
||||||
|
.limit(queryDepth)
|
||||||
|
.filter(new LongPredicate() {
|
||||||
|
long last = Long.MIN_VALUE;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean test(long value) {
|
||||||
|
if ((last >>> 32L) == (value >>> 32L)) {
|
||||||
|
return count++ == minHitCount;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
last = value;
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.limit(maxResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query findUnderspecified(
|
public Query findUnderspecified(
|
||||||
@ -116,6 +153,7 @@ public class SearchIndexReader implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var range = index.rangeForWord(searchTerm);
|
var range = index.rangeForWord(searchTerm);
|
||||||
|
|
||||||
if (index.hasUrl(urlId, range)) {
|
if (index.hasUrl(urlId, range)) {
|
||||||
return block;
|
return block;
|
||||||
}
|
}
|
||||||
|
@ -112,6 +112,11 @@ public class IndexQueryBuilder {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query not(int wordId) {
|
public Query not(int wordId) {
|
||||||
|
// Happens when an index simply isn't present, won't find data anyway
|
||||||
|
// so it's safe to no-op the query
|
||||||
|
if (excludeIndex == null)
|
||||||
|
return new QueryForIndices(budget, LongStream::empty);
|
||||||
|
|
||||||
return new QueryForIndices(budget, () -> notStream(wordId));
|
return new QueryForIndices(budget, () -> notStream(wordId));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,12 +15,12 @@ import java.util.List;
|
|||||||
public class EdgeSearchResultItem {
|
public class EdgeSearchResultItem {
|
||||||
public final int blockId;
|
public final int blockId;
|
||||||
public final int queryLength;
|
public final int queryLength;
|
||||||
public final EdgeId<EdgeDomain> domain;
|
public final EdgeId<EdgeDomain> domain; // this isn't the external domain ID, but a ranking
|
||||||
public final EdgeId<EdgeUrl> url;
|
public final EdgeId<EdgeUrl> url;
|
||||||
public final List<EdgeSearchResultKeywordScore> scores;
|
public final List<EdgeSearchResultKeywordScore> scores;
|
||||||
|
|
||||||
public EdgeSearchResultItem(int blockId, int queryLength, long val) {
|
public EdgeSearchResultItem(int blockId, int queryLength, long val) {
|
||||||
int urlId = (int) (val & 0xFFFFFFFFL);
|
int urlId = (int) (val & 0xFFFF_FFFFL);
|
||||||
int domainId = (int) (val >>> 32);
|
int domainId = (int) (val >>> 32);
|
||||||
|
|
||||||
this.queryLength = queryLength;
|
this.queryLength = queryLength;
|
||||||
|
@ -0,0 +1,15 @@
|
|||||||
|
package nu.marginalia.wmsa.edge.model.search.domain;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.ToString;
|
||||||
|
import nu.marginalia.wmsa.edge.model.EdgeId;
|
||||||
|
import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@AllArgsConstructor @Getter @ToString
|
||||||
|
public class EdgeDomainSearchResults {
|
||||||
|
public final String keyword;
|
||||||
|
public final List<EdgeId<EdgeUrl>> results;
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
package nu.marginalia.wmsa.edge.model.search.domain;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.ToString;
|
||||||
|
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||||
|
|
||||||
|
@ToString @AllArgsConstructor
|
||||||
|
public class EdgeDomainSearchSpecification {
|
||||||
|
public final int bucket;
|
||||||
|
public final IndexBlock block;
|
||||||
|
public final String keyword;
|
||||||
|
public final int queryDepth;
|
||||||
|
public final int minHitCount;
|
||||||
|
public final int maxResults;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user