From 6c3b49417f75723c47940ccfea364ef3a531a4af Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 15 Feb 2024 11:33:50 +0100 Subject: [PATCH] (index-query) Improve documentation and code quality --- code/features-index/index-query/readme.md | 11 ++- .../index/query/EmptyEntrySource.java | 1 + .../marginalia/index/query/EntrySource.java | 7 ++ .../nu/marginalia/index/query/IndexQuery.java | 39 ++++++++++- .../index/query/IndexQueryParams.java | 12 ++++ .../index/query/IndexSearchBudget.java | 1 + .../index/query/filter/QueryFilterAnyOf.java | 5 +- .../array/buffer/LongQueryBuffer.java | 70 ++++++++++++++----- 8 files changed, 124 insertions(+), 22 deletions(-) diff --git a/code/features-index/index-query/readme.md b/code/features-index/index-query/readme.md index 991f80e1..3334cada 100644 --- a/code/features-index/index-query/readme.md +++ b/code/features-index/index-query/readme.md @@ -2,6 +2,14 @@ Contains interfaces and primitives for creating and evaluating queries against the indices. +Central to interacting with the query interface is the `IndexQuery` class. This class is used +to create and evaluate queries against the index. The class will fill a `LongQueryBuffer` with +the results of the query. + +This is a relatively light library consisting of a few classes and interfaces. Many of the +interfaces are implemented within the index-service module. + + ## Central Classes * [IndexQuery](src/main/java/nu/marginalia/index/query/IndexQuery.java) @@ -10,4 +18,5 @@ Contains interfaces and primitives for creating and evaluating queries against t ## See Also * [index/index-reverse](../index-reverse) implements many of these interfaces. -* [libraries/array](../../libraries/array) \ No newline at end of file +* [libraries/array](../../libraries/array) +* [libraries/array/.../LongQueryBuffer](../../libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java) \ No newline at end of file diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java index ce515fab..62e06f5f 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java @@ -2,6 +2,7 @@ package nu.marginalia.index.query; import nu.marginalia.array.buffer.LongQueryBuffer; +/** Dummy EntrySource that returns no entries. */ public class EmptyEntrySource implements EntrySource { @Override public void skip(int n) { diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java index c3a35e90..9c643e2b 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java @@ -2,11 +2,18 @@ package nu.marginalia.index.query; import nu.marginalia.array.buffer.LongQueryBuffer; +/** An EntrySource is a source of entries for a query. + */ public interface EntrySource { + /** Skip n entries. */ void skip(int n); + + /** Fill the buffer with entries, updating its data and length appropriately. */ void read(LongQueryBuffer buffer); + /** Returns true if there are more entries to read. */ boolean hasMore(); + /** Returns the name of the index, for debugging purposes. */ String indexName(); } diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java index aa4102c8..b55a8bef 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java @@ -7,19 +7,42 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +/** A query to the index. The query is composed of a list of sources + * and a list of filters. + *

+ * The sources are read in order, and the filters are applied to the results. + *

+ * The query is executed by providing it with a buffer to fill with results, + * and + */ public class IndexQuery { private final List sources; private final List inclusionFilter = new ArrayList<>(10); - public final IndexQueryPriority queryPriority; + public final IndexQueryPriority queryPriority; public final int fetchSizeMultiplier; - public IndexQuery(List sources, IndexQueryPriority priority, int fetchSizeMultiplier) { + /** + * Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier. + * + * @param sources List of EntrySource objects representing the sources to query from + * @param priority IndexQueryPriority of the query, determining how many results to fetch before stopping + * @param fetchSizeMultiplier Affects the fetch size of the query, determining how deep the query should go + */ + public IndexQuery(List sources, + IndexQueryPriority priority, + int fetchSizeMultiplier) + { this.sources = sources; this.queryPriority = priority; this.fetchSizeMultiplier = fetchSizeMultiplier; } + /** Adds a filter to the query. The filter will be applied to the results + * after they are read from the sources. + * + * @param filter The filter to add + */ public void addInclusionFilter(QueryFilterStepIf filter) { inclusionFilter.add(filter); } @@ -27,10 +50,22 @@ public class IndexQuery { private int si = 0; private int dataCost; + /** Returns true if there are more results to read from the sources. + * May return true even if there are no more results, but will eventually + * return false. + */ public boolean hasMore() { return si < sources.size(); } + /** Fills the given buffer with more results from the sources. + * The results are filtered by the inclusion filters. + *

+ * The method will advance the sources and filters as needed + * to fill the buffer. + * + * @param dest The buffer to fill with results + */ public void getMoreResults(LongQueryBuffer dest) { if (!fillBuffer(dest)) return; diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java index 03060cb6..46224638 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java @@ -4,6 +4,18 @@ import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.searchset.SearchSet; import nu.marginalia.index.query.limit.SpecificationLimit; +/** IndexQueryParams is a set of parameters for a query. + * + * @param qualityLimit The quality limit. + * @param year The year limit. + * @param size The size limit. Eliminates results from domains that do not satisfy the size criteria. + * @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria. + * @param domainCount The domain count limit. Filters out results from domains that do not contain enough + * documents that match the query. + * @param searchSet The search set. Limits the search to a set of domains. + * @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring + * the keywords to appear in the title, or in the domain. + */ public record IndexQueryParams(SpecificationLimit qualityLimit, SpecificationLimit year, SpecificationLimit size, diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java index 5551839e..1ed211d1 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java @@ -1,6 +1,7 @@ package nu.marginalia.index.query; +/** An execution time budget for index search operations. */ public class IndexSearchBudget { private final long timeout; diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java index 2569ec42..c9ee2c6e 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java @@ -28,10 +28,13 @@ public class QueryFilterAnyOf implements QueryFilterStepIf { public void apply(LongQueryBuffer buffer) { + if (steps.isEmpty()) + return; + int start; int end = buffer.end; - steps.get(0).apply(buffer); + steps.getFirst().apply(buffer); // The filter functions will partition the data in the buffer from 0 to END, // and update END to the length of the retained items, keeping the retained diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java b/code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java index ed0f36fb..68de658f 100644 --- a/code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java +++ b/code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java @@ -2,8 +2,24 @@ package nu.marginalia.array.buffer; import java.util.Arrays; +/** A buffer for long values that can be used to filter and manipulate the data. + * It is central to the query processing in the index service. + *

+ * The class contains both a read pointer, write pointer, and a buffer end pointer. + *

+ * The read and write pointers are used for filtering the data in the buffer, and + * the end pointer is used to keep track of the length of the data in the buffer. + *

+ * Filtering is done via the methods {@link #rejectAndAdvance()}, {@link #retainAndAdvance()}, + * and {@link #finalizeFiltering()}. + * + */ public class LongQueryBuffer { + /** Direct access to the data in the buffer, + * guaranteed to be populated until `end` */ public final long[] data; + + /** Number of items in the data buffer */ public int end; private int read = 0; @@ -14,15 +30,11 @@ public class LongQueryBuffer { this.end = size; } - public LongQueryBuffer(long [] data, int size) { + public LongQueryBuffer(long[] data, int size) { this.data = data; this.end = size; } - public boolean hasRetainedData() { - return write > 0; - } - public long[] copyData() { return Arrays.copyOf(data, end); } @@ -35,14 +47,36 @@ public class LongQueryBuffer { return end; } + public void reset() { + end = data.length; + read = 0; + write = 0; + } + + public void zero() { + end = 0; + read = 0; + write = 0; + } + + /* == Filtering methods == */ + + /** Returns the current value at the read pointer. + */ public long currentValue() { return data[read]; } + /** Advances the read pointer and returns true if there are more values to read. */ public boolean rejectAndAdvance() { return ++read < end; } + /** Retains the current value at the read pointer and advances the read and write pointers. + * Returns true if there are more values to read. + *

To enable "or" style criterias, the method swaps the current value with the value + * at the write pointer, so that it's retained at the end of the buffer. + */ public boolean retainAndAdvance() { if (read != write) { long tmp = data[write]; @@ -59,6 +93,15 @@ public class LongQueryBuffer { return read < end; } + public boolean hasRetainedData() { + return write > 0; + } + + /** Finalizes the filtering by setting the end pointer to the write pointer, + * and resetting the read and write pointers to zero. + *

+ * At this point the buffer can either be read, or additional filtering can be applied. + */ public void finalizeFiltering() { end = write; read = 0; @@ -70,19 +113,10 @@ public class LongQueryBuffer { this.end = end; } - public void reset() { - end = data.length; - read = 0; - write = 0; - } - - public void zero() { - end = 0; - read = 0; - write = 0; - Arrays.fill(data, 0); - } - + /** Retain only unique values in the buffer, and update the end pointer to the new length. + *

+ * The buffer is assumed to be sorted up until the end pointer. + */ public void uniq() { if (end <= 1) return;