Merge pull request 'Better index metrics, fix bug where domain result show up with advisory search terms.' (#117) from master into release
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/117
This commit is contained in:
commit
9f106e61ad
@ -5,6 +5,7 @@ import com.google.inject.Inject;
|
|||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
import io.prometheus.client.Counter;
|
import io.prometheus.client.Counter;
|
||||||
|
import io.prometheus.client.Gauge;
|
||||||
import io.prometheus.client.Histogram;
|
import io.prometheus.client.Histogram;
|
||||||
import nu.marginalia.util.dict.DictionaryHashMap;
|
import nu.marginalia.util.dict.DictionaryHashMap;
|
||||||
import nu.marginalia.wmsa.client.GsonFactory;
|
import nu.marginalia.wmsa.client.GsonFactory;
|
||||||
@ -47,8 +48,9 @@ public class EdgeIndexQueryService {
|
|||||||
|
|
||||||
private static final Counter wmsa_edge_index_query_timeouts = Counter.build().name("wmsa_edge_index_query_timeouts").help("-").register();
|
private static final Counter wmsa_edge_index_query_timeouts = Counter.build().name("wmsa_edge_index_query_timeouts").help("-").register();
|
||||||
|
|
||||||
private static final Histogram wmsa_edge_index_query_time = Histogram.build().name("wmsa_edge_index_query_time").linearBuckets(50, 50, 15).help("-").register();
|
private static final Gauge wmsa_edge_index_query_cost = Gauge.build().name("wmsa_edge_index_query_cost").help("-").register();
|
||||||
private static final Histogram wmsa_edge_index_domain_query_time = Histogram.build().name("wmsa_edge_index_domain_query_time").linearBuckets(50, 50, 15).help("-").register();
|
private static final Histogram wmsa_edge_index_query_time = Histogram.build().name("wmsa_edge_index_query_time").linearBuckets(25/1000., 25/1000., 15).help("-").register();
|
||||||
|
private static final Histogram wmsa_edge_index_domain_query_time = Histogram.build().name("wmsa_edge_index_domain_query_time").linearBuckets(25/1000., 25/1000., 15).help("-").register();
|
||||||
|
|
||||||
private final Gson gson = GsonFactory.get();
|
private final Gson gson = GsonFactory.get();
|
||||||
|
|
||||||
@ -109,7 +111,12 @@ public class EdgeIndexQueryService {
|
|||||||
|
|
||||||
|
|
||||||
public EdgeSearchResultSet query(EdgeSearchSpecification specsSet) {
|
public EdgeSearchResultSet query(EdgeSearchSpecification specsSet) {
|
||||||
List<EdgeSearchResultItem> results = new SearchQuery(specsSet).execute();
|
SearchQuery searchQuery = new SearchQuery(specsSet);
|
||||||
|
|
||||||
|
List<EdgeSearchResultItem> results = searchQuery.execute();
|
||||||
|
|
||||||
|
wmsa_edge_index_query_cost.set(searchQuery.getDataCost());
|
||||||
|
|
||||||
return new EdgeSearchResultSet(results);
|
return new EdgeSearchResultSet(results);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,6 +162,8 @@ public class EdgeIndexQueryService {
|
|||||||
private final IndexSearchBudget budget;
|
private final IndexSearchBudget budget;
|
||||||
private final IndexQueryCachePool cachePool = new IndexQueryCachePool();
|
private final IndexQueryCachePool cachePool = new IndexQueryCachePool();
|
||||||
|
|
||||||
|
private long dataCost = 0;
|
||||||
|
|
||||||
public SearchQuery(EdgeSearchSpecification specsSet) {
|
public SearchQuery(EdgeSearchSpecification specsSet) {
|
||||||
this.specsSet = specsSet;
|
this.specsSet = specsSet;
|
||||||
this.budget = new IndexSearchBudget(specsSet.timeoutMs);
|
this.budget = new IndexSearchBudget(specsSet.timeoutMs);
|
||||||
@ -245,6 +254,8 @@ public class EdgeIndexQueryService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dataCost += query.dataCost();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
@ -309,6 +320,10 @@ public class EdgeIndexQueryService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getDataCost() {
|
||||||
|
return dataCost;
|
||||||
|
}
|
||||||
|
|
||||||
record ResultTerm (int bucket, int termId, long combinedUrlId) {}
|
record ResultTerm (int bucket, int termId, long combinedUrlId) {}
|
||||||
record ResultTermData (IndexBlock index,
|
record ResultTermData (IndexBlock index,
|
||||||
boolean title,
|
boolean title,
|
||||||
|
@ -21,6 +21,7 @@ public class IndexQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private int si = 0;
|
private int si = 0;
|
||||||
|
private int dataCost;
|
||||||
|
|
||||||
public boolean hasMore() {
|
public boolean hasMore() {
|
||||||
return si < sources.size();
|
return si < sources.size();
|
||||||
@ -36,9 +37,13 @@ public class IndexQuery {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dataCost += bufferUtilizedLength;
|
||||||
|
|
||||||
for (var filter : inclusionFilter) {
|
for (var filter : inclusionFilter) {
|
||||||
bufferUtilizedLength = filter.retainDestructive(dest, bufferUtilizedLength);
|
bufferUtilizedLength = filter.retainDestructive(dest, bufferUtilizedLength);
|
||||||
|
|
||||||
|
dataCost += bufferUtilizedLength;
|
||||||
|
|
||||||
if (bufferUtilizedLength <= 0) {
|
if (bufferUtilizedLength <= 0) {
|
||||||
si++;
|
si++;
|
||||||
return 0;
|
return 0;
|
||||||
@ -47,9 +52,13 @@ public class IndexQuery {
|
|||||||
|
|
||||||
int count = min(bufferUtilizedLength, dest.length);
|
int count = min(bufferUtilizedLength, dest.length);
|
||||||
System.arraycopy(dest, 0, dest, 0, count);
|
System.arraycopy(dest, 0, dest, 0, count);
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long dataCost() {
|
||||||
|
return dataCost;
|
||||||
|
}
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("Sources:\n");
|
sb.append("Sources:\n");
|
||||||
|
@ -27,13 +27,10 @@ public class EdgeSearchDomainSearchService {
|
|||||||
this.edgeDataStoreDao = edgeDataStoreDao;
|
this.edgeDataStoreDao = edgeDataStoreDao;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
|
public List<BrowseResult> getDomainResults(Context ctx, EdgeSearchSpecification specs) {
|
||||||
|
|
||||||
List<String> keywords = specs.subqueries.stream()
|
List<String> keywords = getKeywordsFromSpecs(specs);
|
||||||
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1)
|
|
||||||
.map(sq -> sq.searchTermsInclude.get(0))
|
|
||||||
.distinct()
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
if (keywords.isEmpty())
|
if (keywords.isEmpty())
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
@ -61,4 +58,11 @@ public class EdgeSearchDomainSearchService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private List<String> getKeywordsFromSpecs(EdgeSearchSpecification specs) {
|
||||||
|
return specs.subqueries.stream()
|
||||||
|
.filter(sq -> sq.searchTermsExclude.isEmpty() && sq.searchTermsInclude.size() == 1 && sq.searchTermsAdvice.isEmpty())
|
||||||
|
.map(sq -> sq.searchTermsInclude.get(0))
|
||||||
|
.distinct()
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user