(index-query) Improve documentation and code quality

This commit is contained in:
Viktor Lofgren 2024-02-15 11:33:50 +01:00
parent dcc5cfb7c0
commit 6c3b49417f
8 changed files with 124 additions and 22 deletions

View File

@ -2,6 +2,14 @@
Contains interfaces and primitives for creating and evaluating queries against the indices. Contains interfaces and primitives for creating and evaluating queries against the indices.
Central to interacting with the query interface is the `IndexQuery` class. This class is used
to create and evaluate queries against the index. The class will fill a `LongQueryBuffer` with
the results of the query.
This is a relatively light library consisting of a few classes and interfaces. Many of the
interfaces are implemented within the index-service module.
## Central Classes ## Central Classes
* [IndexQuery](src/main/java/nu/marginalia/index/query/IndexQuery.java) * [IndexQuery](src/main/java/nu/marginalia/index/query/IndexQuery.java)
@ -10,4 +18,5 @@ Contains interfaces and primitives for creating and evaluating queries against t
## See Also ## See Also
* [index/index-reverse](../index-reverse) implements many of these interfaces. * [index/index-reverse](../index-reverse) implements many of these interfaces.
* [libraries/array](../../libraries/array) * [libraries/array](../../libraries/array)
* [libraries/array/.../LongQueryBuffer](../../libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java)

View File

@ -2,6 +2,7 @@ package nu.marginalia.index.query;
import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.array.buffer.LongQueryBuffer;
/** Dummy EntrySource that returns no entries. */
public class EmptyEntrySource implements EntrySource { public class EmptyEntrySource implements EntrySource {
@Override @Override
public void skip(int n) { public void skip(int n) {

View File

@ -2,11 +2,18 @@ package nu.marginalia.index.query;
import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.array.buffer.LongQueryBuffer;
/** An EntrySource is a source of entries for a query.
*/
public interface EntrySource { public interface EntrySource {
/** Skip n entries. */
void skip(int n); void skip(int n);
/** Fill the buffer with entries, updating its data and length appropriately. */
void read(LongQueryBuffer buffer); void read(LongQueryBuffer buffer);
/** Returns true if there are more entries to read. */
boolean hasMore(); boolean hasMore();
/** Returns the name of the index, for debugging purposes. */
String indexName(); String indexName();
} }

View File

@ -7,19 +7,42 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
/** A query to the index. The query is composed of a list of sources
* and a list of filters.
* <p></p>
* The sources are read in order, and the filters are applied to the results.
* <p></p>
* The query is executed by providing it with a buffer to fill with results,
* and
*/
public class IndexQuery { public class IndexQuery {
private final List<EntrySource> sources; private final List<EntrySource> sources;
private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10); private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10);
public final IndexQueryPriority queryPriority;
public final IndexQueryPriority queryPriority;
public final int fetchSizeMultiplier; public final int fetchSizeMultiplier;
public IndexQuery(List<EntrySource> sources, IndexQueryPriority priority, int fetchSizeMultiplier) { /**
* Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier.
*
* @param sources List of EntrySource objects representing the sources to query from
* @param priority IndexQueryPriority of the query, determining how many results to fetch before stopping
* @param fetchSizeMultiplier Affects the fetch size of the query, determining how deep the query should go
*/
public IndexQuery(List<EntrySource> sources,
IndexQueryPriority priority,
int fetchSizeMultiplier)
{
this.sources = sources; this.sources = sources;
this.queryPriority = priority; this.queryPriority = priority;
this.fetchSizeMultiplier = fetchSizeMultiplier; this.fetchSizeMultiplier = fetchSizeMultiplier;
} }
/** Adds a filter to the query. The filter will be applied to the results
* after they are read from the sources.
*
* @param filter The filter to add
*/
public void addInclusionFilter(QueryFilterStepIf filter) { public void addInclusionFilter(QueryFilterStepIf filter) {
inclusionFilter.add(filter); inclusionFilter.add(filter);
} }
@ -27,10 +50,22 @@ public class IndexQuery {
private int si = 0; private int si = 0;
private int dataCost; private int dataCost;
/** Returns true if there are more results to read from the sources.
* May return true even if there are no more results, but will eventually
* return false.
*/
public boolean hasMore() { public boolean hasMore() {
return si < sources.size(); return si < sources.size();
} }
/** Fills the given buffer with more results from the sources.
* The results are filtered by the inclusion filters.
* <p></p>
* The method will advance the sources and filters as needed
* to fill the buffer.
*
* @param dest The buffer to fill with results
*/
public void getMoreResults(LongQueryBuffer dest) { public void getMoreResults(LongQueryBuffer dest) {
if (!fillBuffer(dest)) if (!fillBuffer(dest))
return; return;

View File

@ -4,6 +4,18 @@ import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.searchset.SearchSet; import nu.marginalia.index.searchset.SearchSet;
import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimit;
/** IndexQueryParams is a set of parameters for a query.
*
* @param qualityLimit The quality limit.
* @param year The year limit.
* @param size The size limit. Eliminates results from domains that do not satisfy the size criteria.
* @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria.
* @param domainCount The domain count limit. Filters out results from domains that do not contain enough
* documents that match the query.
* @param searchSet The search set. Limits the search to a set of domains.
* @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring
* the keywords to appear in the title, or in the domain.
*/
public record IndexQueryParams(SpecificationLimit qualityLimit, public record IndexQueryParams(SpecificationLimit qualityLimit,
SpecificationLimit year, SpecificationLimit year,
SpecificationLimit size, SpecificationLimit size,

View File

@ -1,6 +1,7 @@
package nu.marginalia.index.query; package nu.marginalia.index.query;
/** An execution time budget for index search operations. */
public class IndexSearchBudget { public class IndexSearchBudget {
private final long timeout; private final long timeout;

View File

@ -28,10 +28,13 @@ public class QueryFilterAnyOf implements QueryFilterStepIf {
public void apply(LongQueryBuffer buffer) { public void apply(LongQueryBuffer buffer) {
if (steps.isEmpty())
return;
int start; int start;
int end = buffer.end; int end = buffer.end;
steps.get(0).apply(buffer); steps.getFirst().apply(buffer);
// The filter functions will partition the data in the buffer from 0 to END, // The filter functions will partition the data in the buffer from 0 to END,
// and update END to the length of the retained items, keeping the retained // and update END to the length of the retained items, keeping the retained

View File

@ -2,8 +2,24 @@ package nu.marginalia.array.buffer;
import java.util.Arrays; import java.util.Arrays;
/** A buffer for long values that can be used to filter and manipulate the data.
* It is central to the query processing in the index service.
* <p></p>
* The class contains both a read pointer, write pointer, and a buffer end pointer.
* <p></p>
* The read and write pointers are used for filtering the data in the buffer, and
* the end pointer is used to keep track of the length of the data in the buffer.
* <p></p>
* Filtering is done via the methods {@link #rejectAndAdvance()}, {@link #retainAndAdvance()},
* and {@link #finalizeFiltering()}.
*
*/
public class LongQueryBuffer { public class LongQueryBuffer {
/** Direct access to the data in the buffer,
* guaranteed to be populated until `end` */
public final long[] data; public final long[] data;
/** Number of items in the data buffer */
public int end; public int end;
private int read = 0; private int read = 0;
@ -14,15 +30,11 @@ public class LongQueryBuffer {
this.end = size; this.end = size;
} }
public LongQueryBuffer(long [] data, int size) { public LongQueryBuffer(long[] data, int size) {
this.data = data; this.data = data;
this.end = size; this.end = size;
} }
public boolean hasRetainedData() {
return write > 0;
}
public long[] copyData() { public long[] copyData() {
return Arrays.copyOf(data, end); return Arrays.copyOf(data, end);
} }
@ -35,14 +47,36 @@ public class LongQueryBuffer {
return end; return end;
} }
public void reset() {
end = data.length;
read = 0;
write = 0;
}
public void zero() {
end = 0;
read = 0;
write = 0;
}
/* == Filtering methods == */
/** Returns the current value at the read pointer.
*/
public long currentValue() { public long currentValue() {
return data[read]; return data[read];
} }
/** Advances the read pointer and returns true if there are more values to read. */
public boolean rejectAndAdvance() { public boolean rejectAndAdvance() {
return ++read < end; return ++read < end;
} }
/** Retains the current value at the read pointer and advances the read and write pointers.
* Returns true if there are more values to read.
* <p></p> To enable "or" style criterias, the method swaps the current value with the value
* at the write pointer, so that it's retained at the end of the buffer.
*/
public boolean retainAndAdvance() { public boolean retainAndAdvance() {
if (read != write) { if (read != write) {
long tmp = data[write]; long tmp = data[write];
@ -59,6 +93,15 @@ public class LongQueryBuffer {
return read < end; return read < end;
} }
public boolean hasRetainedData() {
return write > 0;
}
/** Finalizes the filtering by setting the end pointer to the write pointer,
* and resetting the read and write pointers to zero.
* <p></p>
* At this point the buffer can either be read, or additional filtering can be applied.
*/
public void finalizeFiltering() { public void finalizeFiltering() {
end = write; end = write;
read = 0; read = 0;
@ -70,19 +113,10 @@ public class LongQueryBuffer {
this.end = end; this.end = end;
} }
public void reset() { /** Retain only unique values in the buffer, and update the end pointer to the new length.
end = data.length; * <p></p>
read = 0; * The buffer is assumed to be sorted up until the end pointer.
write = 0; */
}
public void zero() {
end = 0;
read = 0;
write = 0;
Arrays.fill(data, 0);
}
public void uniq() { public void uniq() {
if (end <= 1) return; if (end <= 1) return;