Added instrumentation for search queries

This commit is contained in:
vlofgren 2022-05-30 20:59:04 +02:00
parent 730e964475
commit dc963d3e44

View File

@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.search;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import io.prometheus.client.Summary;
import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.core.Observable;
import io.reactivex.rxjava3.schedulers.Schedulers; import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.wmsa.configuration.server.Context; import nu.marginalia.wmsa.configuration.server.Context;
@ -47,6 +48,9 @@ public class EdgeSearchOperator {
private final SearchResultValuator valuator; private final SearchResultValuator valuator;
private final Comparator<EdgeUrlDetails> resultListComparator; private final Comparator<EdgeUrlDetails> resultListComparator;
private static final Summary wmsa_search_index_api_time = Summary.build().name("wmsa_search_index_api_time").help("-").register();
private static final Summary wmsa_search_result_decoration_time = Summary.build().name("wmsa_search_result_decoration_time").help("-").register();
@Inject @Inject
public EdgeSearchOperator(AssistantClient assistantClient, public EdgeSearchOperator(AssistantClient assistantClient,
EncyclopediaClient encyclopediaClient, EncyclopediaClient encyclopediaClient,
@ -141,16 +145,17 @@ public class EdgeSearchOperator {
AccumulatedQueryResults queryResults = new AccumulatedQueryResults(); AccumulatedQueryResults queryResults = new AccumulatedQueryResults();
UrlDeduplicator deduplicator = new UrlDeduplicator(processedQuery.specs.limitByDomain); UrlDeduplicator deduplicator = new UrlDeduplicator(processedQuery.specs.limitByDomain);
if (processedQuery.searchTermsHuman.size()<=4 && !asFastAsPossible) {
fetchResultsMulti(ctx, processedQuery, queryResults, deduplicator);
}
else {
fetchResultsSimple(ctx, processedQuery, queryResults, deduplicator);
}
List<EdgeUrlDetails> resultList = new ArrayList<>(queryResults.size()); List<EdgeUrlDetails> resultList = new ArrayList<>(queryResults.size());
wmsa_search_index_api_time.time(() -> {
if (processedQuery.searchTermsHuman.size() <= 4 && !asFastAsPossible) {
fetchResultsMulti(ctx, processedQuery, queryResults, deduplicator);
} else {
fetchResultsSimple(ctx, processedQuery, queryResults, deduplicator);
}
});
wmsa_search_result_decoration_time.time(() -> {
for (var details : queryResults.results) { for (var details : queryResults.results) {
if (details.getUrlQuality() < -100) { if (details.getUrlQuality() < -100) {
continue; continue;
@ -162,6 +167,8 @@ public class EdgeSearchOperator {
} }
resultList.sort(resultListComparator); resultList.sort(resultListComparator);
}
);
return new DecoratedSearchResultSet(resultList); return new DecoratedSearchResultSet(resultList);
} }
@ -254,31 +261,14 @@ public class EdgeSearchOperator {
var blocksOrder = processedQuery.specs.subqueries.stream().map(sq -> sq.block).distinct().sorted(Comparator.comparing(block -> block.sortOrder)).toList(); var blocksOrder = processedQuery.specs.subqueries.stream().map(sq -> sq.block).distinct().sorted(Comparator.comparing(block -> block.sortOrder)).toList();
EdgeSearchSpecification[] specsArray = EdgeSearchSpecification[] specsArray =
processedQuery.specs.subqueries.stream() processedQuery.specs.subqueries.stream()
.filter(sq -> sq.block == IndexBlock.TitleKeywords) .filter(sq -> sq.block == IndexBlock.TitleKeywords)
.map(sq -> processedQuery.specs.withSubqueries(blocksOrder.stream().map(sq::withBlock).collect(Collectors.toList()))) .map(sq -> processedQuery.specs.withSubqueries(blocksOrder.stream().map(sq::withBlock).collect(Collectors.toList())))
//.flatMap(specs -> processedQuery.specs.buckets.stream().map(bucket -> specs.withBuckets(List.of(bucket))))
.toArray(EdgeSearchSpecification[]::new); .toArray(EdgeSearchSpecification[]::new);
var resultSets = indexClient.multiQuery(ctx, specsArray); var resultSets = indexClient.multiQuery(ctx, specsArray);
if (debug) {
for (var s : specsArray) {
logger.info("{}", s);
}
for (IndexBlock block : indexBlockSearchOrder) {
resultSets.forEach(res -> {
res.resultsList.getOrDefault(block, Collections.emptyList()).forEach(b2 -> {
b2.results.forEach((idx,items) -> {
items.forEach(i ->
logger.info("{} {} - {}", block, idx, i)
);
});
});
});
}
}
Set<EdgeId<EdgeUrl>> seenUrls = new HashSet<>(); Set<EdgeId<EdgeUrl>> seenUrls = new HashSet<>();
for (IndexBlock block : indexBlockSearchOrder) { for (IndexBlock block : indexBlockSearchOrder) {
var resultsJoined = resultSets.stream().flatMap(rs -> rs.resultsList.getOrDefault(block, Collections.emptyList()).stream()) var resultsJoined = resultSets.stream().flatMap(rs -> rs.resultsList.getOrDefault(block, Collections.emptyList()).stream())