Actually add the commit with the previously mentioned instrumetation (#18)

Co-authored-by: vlofgren <vlofgren@gmail.com>
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/18
This commit is contained in:
Viktor Lofgren 2022-05-30 21:12:15 +02:00
parent c201201c2d
commit 44bee371e6

View File

@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.search;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import io.prometheus.client.Summary;
import io.reactivex.rxjava3.core.Observable;
import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.wmsa.configuration.server.Context;
@ -47,6 +48,9 @@ public class EdgeSearchOperator {
private final SearchResultValuator valuator;
private final Comparator<EdgeUrlDetails> resultListComparator;
private static final Summary wmsa_search_index_api_time = Summary.build().name("wmsa_search_index_api_time").help("-").register();
private static final Summary wmsa_search_result_decoration_time = Summary.build().name("wmsa_search_result_decoration_time").help("-").register();
@Inject
public EdgeSearchOperator(AssistantClient assistantClient,
EncyclopediaClient encyclopediaClient,
@ -141,27 +145,30 @@ public class EdgeSearchOperator {
AccumulatedQueryResults queryResults = new AccumulatedQueryResults();
UrlDeduplicator deduplicator = new UrlDeduplicator(processedQuery.specs.limitByDomain);
if (processedQuery.searchTermsHuman.size()<=4 && !asFastAsPossible) {
fetchResultsMulti(ctx, processedQuery, queryResults, deduplicator);
}
else {
fetchResultsSimple(ctx, processedQuery, queryResults, deduplicator);
}
List<EdgeUrlDetails> resultList = new ArrayList<>(queryResults.size());
for (var details : queryResults.results) {
if (details.getUrlQuality() < -100) {
continue;
wmsa_search_index_api_time.time(() -> {
if (processedQuery.searchTermsHuman.size() <= 4 && !asFastAsPossible) {
fetchResultsMulti(ctx, processedQuery, queryResults, deduplicator);
} else {
fetchResultsSimple(ctx, processedQuery, queryResults, deduplicator);
}
});
wmsa_search_result_decoration_time.time(() -> {
for (var details : queryResults.results) {
if (details.getUrlQuality() < -100) {
continue;
}
var scoreAdjustment = adjustScoreBasedOnQuery(details, processedQuery.specs);
details = details.withUrlQualityAdjustment(scoreAdjustment);
resultList.add(details);
}
var scoreAdjustment = adjustScoreBasedOnQuery(details, processedQuery.specs);
details = details.withUrlQualityAdjustment(scoreAdjustment);
resultList.add(details);
resultList.sort(resultListComparator);
}
resultList.sort(resultListComparator);
);
return new DecoratedSearchResultSet(resultList);
}
@ -254,31 +261,14 @@ public class EdgeSearchOperator {
var blocksOrder = processedQuery.specs.subqueries.stream().map(sq -> sq.block).distinct().sorted(Comparator.comparing(block -> block.sortOrder)).toList();
EdgeSearchSpecification[] specsArray =
processedQuery.specs.subqueries.stream()
.filter(sq -> sq.block == IndexBlock.TitleKeywords)
.map(sq -> processedQuery.specs.withSubqueries(blocksOrder.stream().map(sq::withBlock).collect(Collectors.toList())))
//.flatMap(specs -> processedQuery.specs.buckets.stream().map(bucket -> specs.withBuckets(List.of(bucket))))
.toArray(EdgeSearchSpecification[]::new);
var resultSets = indexClient.multiQuery(ctx, specsArray);
if (debug) {
for (var s : specsArray) {
logger.info("{}", s);
}
for (IndexBlock block : indexBlockSearchOrder) {
resultSets.forEach(res -> {
res.resultsList.getOrDefault(block, Collections.emptyList()).forEach(b2 -> {
b2.results.forEach((idx,items) -> {
items.forEach(i ->
logger.info("{} {} - {}", block, idx, i)
);
});
});
});
}
}
Set<EdgeId<EdgeUrl>> seenUrls = new HashSet<>();
for (IndexBlock block : indexBlockSearchOrder) {
var resultsJoined = resultSets.stream().flatMap(rs -> rs.resultsList.getOrDefault(block, Collections.emptyList()).stream())