(index-query) Improve documentation and code quality

2024-02-15 11:33:50 +01:00 · 2024-02-15 11:33:50 +01:00 · 6c3b49417f
commit 6c3b49417f
parent dcc5cfb7c0
8 changed files with 124 additions and 22 deletions
--- a/code/features-index/index-query/readme.md
+++ b/code/features-index/index-query/readme.md
@ -2,6 +2,14 @@

 Contains interfaces and primitives for creating and evaluating queries against the indices.

+Central to interacting with the query interface is the `IndexQuery` class. This class is used 
+to create and evaluate queries against the index. The class will fill a `LongQueryBuffer` with
+the results of the query. 
+
+This is a relatively light library consisting of a few classes and interfaces. Many of the
+interfaces are implemented within the index-service module.
+
+
 ## Central Classes

 * [IndexQuery](src/main/java/nu/marginalia/index/query/IndexQuery.java)
@ -10,4 +18,5 @@ Contains interfaces and primitives for creating and evaluating queries against t
 ## See Also

 * [index/index-reverse](../index-reverse) implements many of these interfaces.
-* [libraries/array](../../libraries/array)
+* [libraries/array](../../libraries/array)
+* [libraries/array/.../LongQueryBuffer](../../libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java)
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java
@ -2,6 +2,7 @@ package nu.marginalia.index.query;

 import nu.marginalia.array.buffer.LongQueryBuffer;

+/** Dummy EntrySource that returns no entries. */
 public class EmptyEntrySource implements EntrySource {
    @Override
    public void skip(int n) {
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java
@ -2,11 +2,18 @@ package nu.marginalia.index.query;

 import nu.marginalia.array.buffer.LongQueryBuffer;

+/** An EntrySource is a source of entries for a query.
+ */
 public interface EntrySource {
+    /** Skip n entries. */
    void skip(int n);
+
+    /** Fill the buffer with entries, updating its data and length appropriately. */
    void read(LongQueryBuffer buffer);

+    /** Returns true if there are more entries to read. */
    boolean hasMore();

+    /** Returns the name of the index, for debugging purposes. */
    String indexName();
 }
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java
@ -7,19 +7,42 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.stream.Collectors;

+/** A query to the index.  The query is composed of a list of sources
+ * and a list of filters.
+ * <p></p>
+ * The sources are read in order, and the filters are applied to the results.
+ * <p></p>
+ * The query is executed by providing it with a buffer to fill with results,
+ * and
+ */
 public class IndexQuery {
    private final List<EntrySource> sources;
    private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10);
-    public final IndexQueryPriority queryPriority;

+    public final IndexQueryPriority queryPriority;
    public final int fetchSizeMultiplier;

-    public IndexQuery(List<EntrySource> sources, IndexQueryPriority priority, int fetchSizeMultiplier) {
+    /**
+     * Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier.
+     *
+     * @param sources              List of EntrySource objects representing the sources to query from
+     * @param priority             IndexQueryPriority of the query, determining how many results to fetch before stopping
+     * @param fetchSizeMultiplier  Affects the fetch size of the query, determining how deep the query should go
+     */
+    public IndexQuery(List<EntrySource> sources,
+                      IndexQueryPriority priority,
+                      int fetchSizeMultiplier)
+    {
        this.sources = sources;
        this.queryPriority = priority;
        this.fetchSizeMultiplier = fetchSizeMultiplier;
    }

+    /** Adds a filter to the query.  The filter will be applied to the results
+     * after they are read from the sources.
+     *
+     * @param filter  The filter to add
+     */
    public void addInclusionFilter(QueryFilterStepIf filter) {
        inclusionFilter.add(filter);
    }
@ -27,10 +50,22 @@ public class IndexQuery {
    private int si = 0;
    private int dataCost;

+    /** Returns true if there are more results to read from the sources.
+     *  May return true even if there are no more results, but will eventually
+     *  return false.
+     */
    public boolean hasMore() {
        return si < sources.size();
    }

+    /** Fills the given buffer with more results from the sources.
+     *  The results are filtered by the inclusion filters.
+     *  <p></p>
+     *  The method will advance the sources and filters as needed
+     *  to fill the buffer.
+     *
+     * @param dest  The buffer to fill with results
+     */
    public void getMoreResults(LongQueryBuffer dest) {
        if (!fillBuffer(dest))
            return;
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java
@ -4,6 +4,18 @@ import nu.marginalia.index.query.limit.QueryStrategy;
 import nu.marginalia.index.searchset.SearchSet;
 import nu.marginalia.index.query.limit.SpecificationLimit;

+/** IndexQueryParams is a set of parameters for a query.
+ *
+ * @param qualityLimit The quality limit.
+ * @param year The year limit.
+ * @param size The size limit.  Eliminates results from domains that do not satisfy the size criteria.
+ * @param rank The rank limit.  Eliminates results from domains that do not satisfy the domain rank criteria.
+ * @param domainCount The domain count limit.  Filters out results from domains that do not contain enough
+ *                    documents that match the query.
+ * @param searchSet The search set.  Limits the search to a set of domains.
+ * @param queryStrategy The query strategy.  May impose additional constraints on the query, such as requiring
+ *                      the keywords to appear in the title, or in the domain.
+ */
 public record IndexQueryParams(SpecificationLimit qualityLimit,
                               SpecificationLimit year,
                               SpecificationLimit size,
--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java
@ -1,6 +1,7 @@
 package nu.marginalia.index.query;


+/** An execution time budget for index search operations. */
 public class IndexSearchBudget {
    private final long timeout;

--- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java
+++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java
@ -28,10 +28,13 @@ public class QueryFilterAnyOf implements QueryFilterStepIf {


    public void apply(LongQueryBuffer buffer) {
+        if (steps.isEmpty())
+            return;
+
        int start;
        int end = buffer.end;

-        steps.get(0).apply(buffer);
+        steps.getFirst().apply(buffer);

        // The filter functions will partition the data in the buffer from 0 to END,
        // and update END to the length of the retained items, keeping the retained
--- a/code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java
+++ b/code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java
@ -2,8 +2,24 @@ package nu.marginalia.array.buffer;

 import java.util.Arrays;

+/** A buffer for long values that can be used to filter and manipulate the data.
+ * It is central to the query processing in the index service.
+ * <p></p>
+ * The class contains both a read pointer, write pointer, and a buffer end pointer.
+ * <p></p>
+ * The read and write pointers are used for filtering the data in the buffer, and
+ * the end pointer is used to keep track of the length of the data in the buffer.
+ * <p></p>
+ * Filtering is done via the methods {@link #rejectAndAdvance()}, {@link #retainAndAdvance()},
+ * and {@link #finalizeFiltering()}.
+ *
+ */
 public class LongQueryBuffer {
+    /** Direct access to the data in the buffer,
+     * guaranteed to be populated until `end` */
    public final long[] data;
+
+    /** Number of items in the data buffer */
    public int end;

    private int read = 0;
@ -14,15 +30,11 @@ public class LongQueryBuffer {
        this.end = size;
    }

-    public LongQueryBuffer(long [] data, int size) {
+    public LongQueryBuffer(long[] data, int size) {
        this.data = data;
        this.end = size;
    }

-    public boolean hasRetainedData() {
-        return write > 0;
-    }
-
    public long[] copyData() {
        return Arrays.copyOf(data, end);
    }
@ -35,14 +47,36 @@ public class LongQueryBuffer {
        return end;
    }

+    public void reset() {
+        end = data.length;
+        read = 0;
+        write = 0;
+    }
+
+    public void zero() {
+        end = 0;
+        read = 0;
+        write = 0;
+    }
+
+    /* ==  Filtering methods == */
+
+    /** Returns the current value at the read pointer.
+     */
    public long currentValue() {
        return data[read];
    }

+    /** Advances the read pointer and returns true if there are more values to read. */
    public boolean rejectAndAdvance() {
        return ++read < end;
    }

+    /** Retains the current value at the read pointer and advances the read and write pointers.
+     *  Returns true if there are more values to read.
+     *  <p></p> To enable "or" style criterias, the method swaps the current value with the value
+     *  at the write pointer, so that it's retained at the end of the buffer.
+     */
    public boolean retainAndAdvance() {
        if (read != write) {
            long tmp = data[write];
@ -59,6 +93,15 @@ public class LongQueryBuffer {
        return read < end;
    }

+    public boolean hasRetainedData() {
+        return write > 0;
+    }
+
+    /** Finalizes the filtering by setting the end pointer to the write pointer,
+     * and resetting the read and write pointers to zero.
+     * <p></p>
+     * At this point the buffer can either be read, or additional filtering can be applied.
+     */
    public void finalizeFiltering() {
        end = write;
        read = 0;
@ -70,19 +113,10 @@ public class LongQueryBuffer {
        this.end = end;
    }

-    public void reset() {
-        end = data.length;
-        read = 0;
-        write = 0;
-    }
-
-    public void zero() {
-        end = 0;
-        read = 0;
-        write = 0;
-        Arrays.fill(data, 0);
-    }
-
+    /**  Retain only unique values in the buffer, and update the end pointer to the new length.
+     * <p></p>
+     *   The buffer is assumed to be sorted up until the end pointer.
+     */
    public void uniq() {
        if (end <= 1) return;