Merge pull request 'Better index metrics, fix bug where domain result show up with advisory search terms.' (#117) from master into release

Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/117
This commit is contained in:
Viktor Lofgren 2022-09-15 17:04:49 +02:00
commit 9f106e61ad
3 changed files with 36 additions and 8 deletions

View File

@ -5,6 +5,7 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import gnu.trove.set.hash.TIntHashSet;
import io.prometheus.client.Counter;
import io.prometheus.client.Gauge;
import io.prometheus.client.Histogram;
import nu.marginalia.util.dict.DictionaryHashMap;
import nu.marginalia.wmsa.client.GsonFactory;
@ -47,8 +48,9 @@ public class EdgeIndexQueryService {
private static final Counter wmsa_edge_index_query_timeouts = Counter.build().name("wmsa_edge_index_query_timeouts").help("-").register();
private static final Histogram wmsa_edge_index_query_time = Histogram.build().name("wmsa_edge_index_query_time").linearBuckets(50, 50, 15).help("-").register();
private static final Histogram wmsa_edge_index_domain_query_time = Histogram.build().name("wmsa_edge_index_domain_query_time").linearBuckets(50, 50, 15).help("-").register();
private static final Gauge wmsa_edge_index_query_cost = Gauge.build().name("wmsa_edge_index_query_cost").help("-").register();
private static final Histogram wmsa_edge_index_query_time = Histogram.build().name("wmsa_edge_index_query_time").linearBuckets(25/1000., 25/1000., 15).help("-").register();
private static final Histogram wmsa_edge_index_domain_query_time = Histogram.build().name("wmsa_edge_index_domain_query_time").linearBuckets(25/1000., 25/1000., 15).help("-").register();
private final Gson gson = GsonFactory.get();
@ -109,7 +111,12 @@ public class EdgeIndexQueryService {
public EdgeSearchResultSet query(EdgeSearchSpecification specsSet) {
List<EdgeSearchResultItem> results = new SearchQuery(specsSet).execute();
SearchQuery searchQuery = new SearchQuery(specsSet);
List<EdgeSearchResultItem> results = searchQuery.execute();
wmsa_edge_index_query_cost.set(searchQuery.getDataCost());
return new EdgeSearchResultSet(results);
}
@ -155,6 +162,8 @@ public class EdgeIndexQueryService {
private final IndexSearchBudget budget;
private final IndexQueryCachePool cachePool = new IndexQueryCachePool();
private long dataCost = 0;
public SearchQuery(EdgeSearchSpecification specsSet) {
this.specsSet = specsSet;
this.budget = new IndexSearchBudget(specsSet.timeoutMs);
@ -245,6 +254,8 @@ public class EdgeIndexQueryService {
}
}
dataCost += query.dataCost();
}
return results;
@ -309,6 +320,10 @@ public class EdgeIndexQueryService {
);
}
public long getDataCost() {
return dataCost;
}
record ResultTerm (int bucket, int termId, long combinedUrlId) {}
record ResultTermData (IndexBlock index,
boolean title,

View File

@ -21,6 +21,7 @@ public class IndexQuery {
}
private int si = 0;
private int dataCost;
public boolean hasMore() {
return si < sources.size();
@ -36,9 +37,13 @@ public class IndexQuery {
return 0;
}
dataCost += bufferUtilizedLength;
for (var filter : inclusionFilter) {
bufferUtilizedLength = filter.retainDestructive(dest, bufferUtilizedLength);
dataCost += bufferUtilizedLength;
if (bufferUtilizedLength <= 0) {
si++;
return 0;
@ -47,9 +52,13 @@ public class IndexQuery {
int count = min(bufferUtilizedLength, dest.length);
System.arraycopy(dest, 0, dest, 0, count);
return count;
}
public long dataCost() {
return dataCost;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Sources:\n");

View File

@ -27,13 +27,10 @@ public class EdgeSearchDomainSearchService {
this.edgeDataStoreDao = edgeDataStoreDao;
}
public List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
List<String> keywords = specs.subqueries.stream()
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
.map(sq -> sq.searchTermsInclude.get(0))
.distinct()
.toList();
List<String> keywords = getKeywordsFromSpecs(specs);
if (keywords.isEmpty())
return Collections.emptyList();
@ -61,4 +58,11 @@ public class EdgeSearchDomainSearchService {
}
private List<String> getKeywordsFromSpecs(EdgeSearchSpecification specs) {
return specs.subqueries.stream()
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1 && sq.searchTermsAdvice.isEmpty())
.map(sq -> sq.searchTermsInclude.get(0))
.distinct()
.toList();
}
}