Merge pull request 'Better index metrics, fix bug where domain result show up with advisory search terms.' (#117) from master into release

Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/117
This commit is contained in:
Viktor Lofgren 2022-09-15 17:04:49 +02:00
commit 9f106e61ad
3 changed files with 36 additions and 8 deletions

View File

@ -5,6 +5,7 @@ import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import gnu.trove.set.hash.TIntHashSet; import gnu.trove.set.hash.TIntHashSet;
import io.prometheus.client.Counter; import io.prometheus.client.Counter;
import io.prometheus.client.Gauge;
import io.prometheus.client.Histogram; import io.prometheus.client.Histogram;
import nu.marginalia.util.dict.DictionaryHashMap; import nu.marginalia.util.dict.DictionaryHashMap;
import nu.marginalia.wmsa.client.GsonFactory; import nu.marginalia.wmsa.client.GsonFactory;
@ -47,8 +48,9 @@ public class EdgeIndexQueryService {
private static final Counter wmsa_edge_index_query_timeouts = Counter.build().name("wmsa_edge_index_query_timeouts").help("-").register(); private static final Counter wmsa_edge_index_query_timeouts = Counter.build().name("wmsa_edge_index_query_timeouts").help("-").register();
private static final Histogram wmsa_edge_index_query_time = Histogram.build().name("wmsa_edge_index_query_time").linearBuckets(50, 50, 15).help("-").register(); private static final Gauge wmsa_edge_index_query_cost = Gauge.build().name("wmsa_edge_index_query_cost").help("-").register();
private static final Histogram wmsa_edge_index_domain_query_time = Histogram.build().name("wmsa_edge_index_domain_query_time").linearBuckets(50, 50, 15).help("-").register(); private static final Histogram wmsa_edge_index_query_time = Histogram.build().name("wmsa_edge_index_query_time").linearBuckets(25/1000., 25/1000., 15).help("-").register();
private static final Histogram wmsa_edge_index_domain_query_time = Histogram.build().name("wmsa_edge_index_domain_query_time").linearBuckets(25/1000., 25/1000., 15).help("-").register();
private final Gson gson = GsonFactory.get(); private final Gson gson = GsonFactory.get();
@ -109,7 +111,12 @@ public class EdgeIndexQueryService {
public EdgeSearchResultSet query(EdgeSearchSpecification specsSet) { public EdgeSearchResultSet query(EdgeSearchSpecification specsSet) {
List<EdgeSearchResultItem> results = new SearchQuery(specsSet).execute(); SearchQuery searchQuery = new SearchQuery(specsSet);
List<EdgeSearchResultItem> results = searchQuery.execute();
wmsa_edge_index_query_cost.set(searchQuery.getDataCost());
return new EdgeSearchResultSet(results); return new EdgeSearchResultSet(results);
} }
@ -155,6 +162,8 @@ public class EdgeIndexQueryService {
private final IndexSearchBudget budget; private final IndexSearchBudget budget;
private final IndexQueryCachePool cachePool = new IndexQueryCachePool(); private final IndexQueryCachePool cachePool = new IndexQueryCachePool();
private long dataCost = 0;
public SearchQuery(EdgeSearchSpecification specsSet) { public SearchQuery(EdgeSearchSpecification specsSet) {
this.specsSet = specsSet; this.specsSet = specsSet;
this.budget = new IndexSearchBudget(specsSet.timeoutMs); this.budget = new IndexSearchBudget(specsSet.timeoutMs);
@ -245,6 +254,8 @@ public class EdgeIndexQueryService {
} }
} }
dataCost += query.dataCost();
} }
return results; return results;
@ -309,6 +320,10 @@ public class EdgeIndexQueryService {
); );
} }
public long getDataCost() {
return dataCost;
}
record ResultTerm (int bucket, int termId, long combinedUrlId) {} record ResultTerm (int bucket, int termId, long combinedUrlId) {}
record ResultTermData (IndexBlock index, record ResultTermData (IndexBlock index,
boolean title, boolean title,

View File

@ -21,6 +21,7 @@ public class IndexQuery {
} }
private int si = 0; private int si = 0;
private int dataCost;
public boolean hasMore() { public boolean hasMore() {
return si < sources.size(); return si < sources.size();
@ -36,9 +37,13 @@ public class IndexQuery {
return 0; return 0;
} }
dataCost += bufferUtilizedLength;
for (var filter : inclusionFilter) { for (var filter : inclusionFilter) {
bufferUtilizedLength = filter.retainDestructive(dest, bufferUtilizedLength); bufferUtilizedLength = filter.retainDestructive(dest, bufferUtilizedLength);
dataCost += bufferUtilizedLength;
if (bufferUtilizedLength <= 0) { if (bufferUtilizedLength <= 0) {
si++; si++;
return 0; return 0;
@ -47,9 +52,13 @@ public class IndexQuery {
int count = min(bufferUtilizedLength, dest.length); int count = min(bufferUtilizedLength, dest.length);
System.arraycopy(dest, 0, dest, 0, count); System.arraycopy(dest, 0, dest, 0, count);
return count; return count;
} }
public long dataCost() {
return dataCost;
}
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append("Sources:\n"); sb.append("Sources:\n");

View File

@ -27,13 +27,10 @@ public class EdgeSearchDomainSearchService {
this.edgeDataStoreDao = edgeDataStoreDao; this.edgeDataStoreDao = edgeDataStoreDao;
} }
public List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) { public List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
List<String> keywords = specs.subqueries.stream() List<String> keywords = getKeywordsFromSpecs(specs);
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
.map(sq -> sq.searchTermsInclude.get(0))
.distinct()
.toList();
if (keywords.isEmpty()) if (keywords.isEmpty())
return Collections.emptyList(); return Collections.emptyList();
@ -61,4 +58,11 @@ public class EdgeSearchDomainSearchService {
} }
private List<String> getKeywordsFromSpecs(EdgeSearchSpecification specs) {
return specs.subqueries.stream()
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1 && sq.searchTermsAdvice.isEmpty())
.map(sq -> sq.searchTermsInclude.get(0))
.distinct()
.toList();
}
} }