(index) Retire count operation, clean up index code.
This commit is contained in:
parent
823ca73a3f
commit
427f3e922f
@ -34,7 +34,6 @@ public class QueryProtobufCodec {
|
||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
|
||||
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
|
||||
|
||||
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
|
||||
|
||||
@ -63,7 +62,6 @@ public class QueryProtobufCodec {
|
||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||
builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank));
|
||||
builder.setDomainCount(IndexProtobufCodec.convertSpecLimit(query.specs.domainCount));
|
||||
|
||||
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
|
||||
|
||||
@ -92,7 +90,6 @@ public class QueryProtobufCodec {
|
||||
IndexProtobufCodec.convertSpecLimit(request.getYear()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getSize()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getRank()),
|
||||
IndexProtobufCodec.convertSpecLimit(request.getDomainCount()),
|
||||
request.getDomainIdsList(),
|
||||
IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()),
|
||||
request.getSearchSetIdentifier(),
|
||||
@ -174,7 +171,6 @@ public class QueryProtobufCodec {
|
||||
IndexProtobufCodec.convertSpecLimit(specs.getYear()),
|
||||
IndexProtobufCodec.convertSpecLimit(specs.getSize()),
|
||||
IndexProtobufCodec.convertSpecLimit(specs.getRank()),
|
||||
IndexProtobufCodec.convertSpecLimit(specs.getDomainCount()),
|
||||
IndexProtobufCodec.convertQueryLimits(specs.getQueryLimits()),
|
||||
QueryStrategy.valueOf(specs.getQueryStrategy()),
|
||||
IndexProtobufCodec.convertRankingParameterss(specs.getParameters())
|
||||
|
@ -19,7 +19,6 @@ public record QueryParams(
|
||||
SpecificationLimit year,
|
||||
SpecificationLimit size,
|
||||
SpecificationLimit rank,
|
||||
SpecificationLimit domainCount,
|
||||
List<Integer> domainIds,
|
||||
QueryLimits limits,
|
||||
String identifier,
|
||||
@ -37,7 +36,6 @@ public record QueryParams(
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
limits,
|
||||
identifier,
|
||||
|
@ -24,8 +24,6 @@ public class SearchSpecification {
|
||||
public final SpecificationLimit size;
|
||||
public final SpecificationLimit rank;
|
||||
|
||||
public final SpecificationLimit domainCount;
|
||||
|
||||
public final QueryLimits queryLimits;
|
||||
|
||||
public final QueryStrategy queryStrategy;
|
||||
|
@ -25,7 +25,6 @@ message RpcQsQuery {
|
||||
RpcSpecLimit year = 8;
|
||||
RpcSpecLimit size = 9;
|
||||
RpcSpecLimit rank = 10;
|
||||
RpcSpecLimit domainCount = 11;
|
||||
repeated int32 domainIds = 12;
|
||||
RpcQueryLimits queryLimits = 13;
|
||||
string searchSetIdentifier = 14;
|
||||
@ -61,7 +60,6 @@ message RpcIndexQuery {
|
||||
RpcSpecLimit year = 6;
|
||||
RpcSpecLimit size = 7;
|
||||
RpcSpecLimit rank = 8;
|
||||
RpcSpecLimit domainCount = 9;
|
||||
RpcQueryLimits queryLimits = 10;
|
||||
string queryStrategy = 11; // Named query configuration
|
||||
RpcResultRankingParameters parameters = 12;
|
||||
|
@ -82,8 +82,6 @@ public class QueryParser {
|
||||
entity.replace(new Token(TokenType.SIZE_TERM, t.str.substring(4), t.displayStr));
|
||||
} else if (t.str.startsWith("rank") && t.str.matches("rank[=><]\\d+")) {
|
||||
entity.replace(new Token(TokenType.RANK_TERM, t.str.substring(4), t.displayStr));
|
||||
} else if (t.str.startsWith("count") && t.str.matches("count[=><]\\d+")) {
|
||||
entity.replace(new Token(TokenType.DOMAIN_COUNT_TERM, t.str.substring(5), t.displayStr));
|
||||
} else if (t.str.startsWith("qs=")) {
|
||||
entity.replace(new Token(TokenType.QS_TERM, t.str.substring(3), t.displayStr));
|
||||
} else if (t.str.contains(":")) {
|
||||
|
@ -40,7 +40,6 @@ public class Token {
|
||||
|
||||
case YEAR_TERM: visitor.onYearTerm(this); break;
|
||||
case RANK_TERM: visitor.onRankTerm(this); break;
|
||||
case DOMAIN_COUNT_TERM: visitor.onDomainCountTerm(this); break;
|
||||
case SIZE_TERM: visitor.onSizeTerm(this); break;
|
||||
case QS_TERM: visitor.onQsTerm(this); break;
|
||||
|
||||
|
@ -16,7 +16,6 @@ public enum TokenType implements Predicate<Token> {
|
||||
YEAR_TERM,
|
||||
SIZE_TERM,
|
||||
RANK_TERM,
|
||||
DOMAIN_COUNT_TERM,
|
||||
NEAR_TERM,
|
||||
|
||||
QS_TERM,
|
||||
|
@ -9,7 +9,6 @@ public interface TokenVisitor {
|
||||
void onYearTerm(Token token);
|
||||
void onSizeTerm(Token token);
|
||||
void onRankTerm(Token token);
|
||||
void onDomainCountTerm(Token token);
|
||||
void onQualityTerm(Token token);
|
||||
void onQsTerm(Token token);
|
||||
}
|
||||
|
@ -127,7 +127,6 @@ public class QueryFactory {
|
||||
.subqueries(subqueries)
|
||||
.humanQuery(query)
|
||||
.quality(qualityLimits.qualityLimit)
|
||||
.domainCount(qualityLimits.domainCount)
|
||||
.year(qualityLimits.year)
|
||||
.size(qualityLimits.size)
|
||||
.rank(qualityLimits.rank)
|
||||
|
@ -11,7 +11,6 @@ public class QueryLimitsAccumulator implements TokenVisitor {
|
||||
public SpecificationLimit year;
|
||||
public SpecificationLimit size;
|
||||
public SpecificationLimit rank;
|
||||
public SpecificationLimit domainCount;
|
||||
|
||||
public QueryStrategy queryStrategy = QueryStrategy.AUTO;
|
||||
|
||||
@ -20,7 +19,6 @@ public class QueryLimitsAccumulator implements TokenVisitor {
|
||||
year = params.year();
|
||||
size = params.size();
|
||||
rank = params.rank();
|
||||
domainCount = params.domainCount();
|
||||
}
|
||||
|
||||
private SpecificationLimit parseSpecificationLimit(String str) {
|
||||
@ -67,11 +65,6 @@ public class QueryLimitsAccumulator implements TokenVisitor {
|
||||
rank = parseSpecificationLimit(token.str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onDomainCountTerm(Token token) {
|
||||
domainCount = parseSpecificationLimit(token.str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onQualityTerm(Token token) {
|
||||
qualityLimit = parseSpecificationLimit(token.str);
|
||||
|
@ -103,8 +103,6 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
|
||||
@Override
|
||||
public void onRankTerm(Token token) {}
|
||||
@Override
|
||||
public void onDomainCountTerm(Token token) {}
|
||||
@Override
|
||||
public void onQualityTerm(Token token) {}
|
||||
@Override
|
||||
public void onQsTerm(Token token) {}
|
||||
|
@ -48,7 +48,6 @@ public class QueryFactoryTest {
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
null,
|
||||
new QueryLimits(100, 100, 100, 100),
|
||||
"NONE",
|
||||
|
@ -79,7 +79,6 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
private final IndexQueryService indexQueryService;
|
||||
private final IndexResultValuatorService resultValuator;
|
||||
|
||||
private final int nodeId;
|
||||
private final String nodeName;
|
||||
|
||||
private final int indexValuationThreads = Integer.getInteger("index.valuationThreads", 8);
|
||||
@ -91,7 +90,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
IndexQueryService indexQueryService,
|
||||
IndexResultValuatorService resultValuator)
|
||||
{
|
||||
this.nodeId = serviceConfiguration.node();
|
||||
var nodeId = serviceConfiguration.node();
|
||||
this.nodeName = Integer.toString(nodeId);
|
||||
this.index = index;
|
||||
this.searchSetsService = searchSetsService;
|
||||
@ -107,6 +106,8 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
try {
|
||||
var params = new SearchParameters(request, getSearchSet(request));
|
||||
|
||||
long endTime = System.currentTimeMillis() + request.getQueryLimits().getTimeoutMs();
|
||||
|
||||
SearchResultSet results = wmsa_query_time
|
||||
.labels(nodeName, "GRPC")
|
||||
.time(() -> {
|
||||
@ -119,7 +120,7 @@ public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase {
|
||||
.labels(nodeName, "GRPC")
|
||||
.set(params.getDataCost());
|
||||
|
||||
if (!params.hasTimeLeft()) {
|
||||
if (System.currentTimeMillis() >= endTime) {
|
||||
wmsa_query_timeouts
|
||||
.labels(nodeName, "GRPC")
|
||||
.inc();
|
||||
|
@ -10,8 +10,6 @@ import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
* @param year The year limit.
|
||||
* @param size The size limit. Eliminates results from domains that do not satisfy the size criteria.
|
||||
* @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria.
|
||||
* @param domainCount The domain count limit. Filters out results from domains that do not contain enough
|
||||
* documents that match the query.
|
||||
* @param searchSet The search set. Limits the search to a set of domains.
|
||||
* @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring
|
||||
* the keywords to appear in the title, or in the domain.
|
||||
@ -20,7 +18,6 @@ public record QueryParams(SpecificationLimit qualityLimit,
|
||||
SpecificationLimit year,
|
||||
SpecificationLimit size,
|
||||
SpecificationLimit rank,
|
||||
SpecificationLimit domainCount,
|
||||
SearchSet searchSet,
|
||||
QueryStrategy queryStrategy
|
||||
)
|
||||
|
@ -52,7 +52,6 @@ public class SearchParameters {
|
||||
specsSet.year,
|
||||
specsSet.size,
|
||||
specsSet.rank,
|
||||
specsSet.domainCount,
|
||||
searchSet,
|
||||
specsSet.queryStrategy);
|
||||
|
||||
@ -80,7 +79,6 @@ public class SearchParameters {
|
||||
convertSpecLimit(request.getYear()),
|
||||
convertSpecLimit(request.getSize()),
|
||||
convertSpecLimit(request.getRank()),
|
||||
convertSpecLimit(request.getDomainCount()),
|
||||
searchSet,
|
||||
QueryStrategy.valueOf(request.getQueryStrategy()));
|
||||
|
||||
|
@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Singleton
|
||||
@ -77,33 +78,15 @@ public class IndexResultValuatorService {
|
||||
|
||||
for (var item : results) {
|
||||
if (domainCountFilter.test(item)) {
|
||||
resultsList.add(item);
|
||||
// It's important that this filter runs across all results, not just the top N
|
||||
if (resultsList.size() < params.limitTotal) {
|
||||
resultsList.add(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.queryParams.domainCount().isNone()) {
|
||||
// Remove items that don't meet the domain count requirement
|
||||
// This isn't perfect because the domain count is calculated
|
||||
// after the results are sorted
|
||||
resultsList.removeIf(item -> !params.queryParams.domainCount().test(domainCountFilter.getCount(item)));
|
||||
}
|
||||
|
||||
if (resultsList.size() > params.limitTotal) {
|
||||
// This can't be made a stream limit() operation because we need domainCountFilter
|
||||
// to run over the entire list to provide accurate statistics
|
||||
|
||||
resultsList.subList(params.limitTotal, resultsList.size()).clear();
|
||||
}
|
||||
|
||||
// populate results with the total number of results encountered from
|
||||
// the same domain so this information can be presented to the user
|
||||
for (var result : resultsList) {
|
||||
result.resultsFromDomain = domainCountFilter.getCount(result);
|
||||
}
|
||||
|
||||
LongArrayList idsList = new LongArrayList(resultsList.size());
|
||||
for (var result : resultsList) {
|
||||
idsList.add(result.getCombinedId());
|
||||
for (var item : resultsList) {
|
||||
item.resultsFromDomain = domainCountFilter.getCount(item);
|
||||
}
|
||||
|
||||
return decorateAndRerank(resultsList, rankingContext);
|
||||
@ -125,23 +108,19 @@ public class IndexResultValuatorService {
|
||||
for (var item : documentDbReader.getUrlDetails(idsList))
|
||||
urlDetailsById.put(item.urlId(), item);
|
||||
|
||||
List<DecoratedSearchResultItem> decoratedItems = new ArrayList<>();
|
||||
List<DecoratedSearchResultItem> resultItems = new ArrayList<>(rawResults.size());
|
||||
for (var result : rawResults) {
|
||||
var docData = urlDetailsById.get(result.getDocumentId());
|
||||
var id = result.getDocumentId();
|
||||
var docData = urlDetailsById.get(id);
|
||||
|
||||
if (null == docData) {
|
||||
logger.warn("No data for document id {}", result.getDocumentId());
|
||||
if (docData == null) {
|
||||
logger.warn("No document data for id {}", id);
|
||||
continue;
|
||||
}
|
||||
|
||||
decoratedItems.add(createCombinedItem(result, docData, rankingContext));
|
||||
resultItems.add(createCombinedItem(result, docData, rankingContext));
|
||||
}
|
||||
|
||||
if (decoratedItems.size() != rawResults.size())
|
||||
logger.warn("Result list shrunk during decoration?");
|
||||
|
||||
decoratedItems.sort(Comparator.naturalOrder());
|
||||
return decoratedItems;
|
||||
return resultItems;
|
||||
}
|
||||
|
||||
private DecoratedSearchResultItem createCombinedItem(SearchResultItem result,
|
||||
|
@ -120,7 +120,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
|
||||
.quality(SpecificationLimit.none())
|
||||
.size(SpecificationLimit.none())
|
||||
.rank(SpecificationLimit.none())
|
||||
.domainCount(SpecificationLimit.none())
|
||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||
.domains(new ArrayList<>())
|
||||
.searchSetIdentifier("NONE")
|
||||
@ -164,7 +163,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
|
||||
.quality(SpecificationLimit.none())
|
||||
.size(SpecificationLimit.none())
|
||||
.rank(SpecificationLimit.none())
|
||||
.domainCount(SpecificationLimit.none())
|
||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||
.queryStrategy(QueryStrategy.SENTENCE)
|
||||
.domains(List.of(2))
|
||||
@ -201,7 +199,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
|
||||
.year(SpecificationLimit.equals(1998))
|
||||
.size(SpecificationLimit.none())
|
||||
.rank(SpecificationLimit.none())
|
||||
.domainCount(SpecificationLimit.none())
|
||||
.queryStrategy(QueryStrategy.SENTENCE)
|
||||
.searchSetIdentifier("NONE")
|
||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||
|
@ -422,7 +422,6 @@ public class IndexQueryServiceIntegrationTest {
|
||||
.quality(SpecificationLimit.none())
|
||||
.size(SpecificationLimit.none())
|
||||
.rank(SpecificationLimit.none())
|
||||
.domainCount(SpecificationLimit.none())
|
||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||
.domains(new ArrayList<>())
|
||||
.searchSetIdentifier("NONE")
|
||||
|
@ -33,7 +33,6 @@ public class SearchQueryParamFactory {
|
||||
profile.getYearLimit(),
|
||||
profile.getSizeLimit(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(5, 100, 200, 8192),
|
||||
profile.searchSetIdentifier.name(),
|
||||
@ -54,7 +53,6 @@ public class SearchQueryParamFactory {
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(count, count, 100, 512),
|
||||
SearchSetIdentifier.NONE.name(),
|
||||
@ -74,7 +72,6 @@ public class SearchQueryParamFactory {
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(100, 100, 100, 512),
|
||||
SearchSetIdentifier.NONE.name(),
|
||||
@ -94,7 +91,6 @@ public class SearchQueryParamFactory {
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
List.of(),
|
||||
new QueryLimits(100, 100, 100, 512),
|
||||
SearchSetIdentifier.NONE.name(),
|
||||
|
Loading…
Reference in New Issue
Block a user