(index-query) Improve documentation and code quality
This commit is contained in:
parent
dcc5cfb7c0
commit
6c3b49417f
@ -2,6 +2,14 @@
|
|||||||
|
|
||||||
Contains interfaces and primitives for creating and evaluating queries against the indices.
|
Contains interfaces and primitives for creating and evaluating queries against the indices.
|
||||||
|
|
||||||
|
Central to interacting with the query interface is the `IndexQuery` class. This class is used
|
||||||
|
to create and evaluate queries against the index. The class will fill a `LongQueryBuffer` with
|
||||||
|
the results of the query.
|
||||||
|
|
||||||
|
This is a relatively light library consisting of a few classes and interfaces. Many of the
|
||||||
|
interfaces are implemented within the index-service module.
|
||||||
|
|
||||||
|
|
||||||
## Central Classes
|
## Central Classes
|
||||||
|
|
||||||
* [IndexQuery](src/main/java/nu/marginalia/index/query/IndexQuery.java)
|
* [IndexQuery](src/main/java/nu/marginalia/index/query/IndexQuery.java)
|
||||||
@ -10,4 +18,5 @@ Contains interfaces and primitives for creating and evaluating queries against t
|
|||||||
## See Also
|
## See Also
|
||||||
|
|
||||||
* [index/index-reverse](../index-reverse) implements many of these interfaces.
|
* [index/index-reverse](../index-reverse) implements many of these interfaces.
|
||||||
* [libraries/array](../../libraries/array)
|
* [libraries/array](../../libraries/array)
|
||||||
|
* [libraries/array/.../LongQueryBuffer](../../libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java)
|
@ -2,6 +2,7 @@ package nu.marginalia.index.query;
|
|||||||
|
|
||||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||||
|
|
||||||
|
/** Dummy EntrySource that returns no entries. */
|
||||||
public class EmptyEntrySource implements EntrySource {
|
public class EmptyEntrySource implements EntrySource {
|
||||||
@Override
|
@Override
|
||||||
public void skip(int n) {
|
public void skip(int n) {
|
||||||
|
@ -2,11 +2,18 @@ package nu.marginalia.index.query;
|
|||||||
|
|
||||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||||
|
|
||||||
|
/** An EntrySource is a source of entries for a query.
|
||||||
|
*/
|
||||||
public interface EntrySource {
|
public interface EntrySource {
|
||||||
|
/** Skip n entries. */
|
||||||
void skip(int n);
|
void skip(int n);
|
||||||
|
|
||||||
|
/** Fill the buffer with entries, updating its data and length appropriately. */
|
||||||
void read(LongQueryBuffer buffer);
|
void read(LongQueryBuffer buffer);
|
||||||
|
|
||||||
|
/** Returns true if there are more entries to read. */
|
||||||
boolean hasMore();
|
boolean hasMore();
|
||||||
|
|
||||||
|
/** Returns the name of the index, for debugging purposes. */
|
||||||
String indexName();
|
String indexName();
|
||||||
}
|
}
|
||||||
|
@ -7,19 +7,42 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/** A query to the index. The query is composed of a list of sources
|
||||||
|
* and a list of filters.
|
||||||
|
* <p></p>
|
||||||
|
* The sources are read in order, and the filters are applied to the results.
|
||||||
|
* <p></p>
|
||||||
|
* The query is executed by providing it with a buffer to fill with results,
|
||||||
|
* and
|
||||||
|
*/
|
||||||
public class IndexQuery {
|
public class IndexQuery {
|
||||||
private final List<EntrySource> sources;
|
private final List<EntrySource> sources;
|
||||||
private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10);
|
private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10);
|
||||||
public final IndexQueryPriority queryPriority;
|
|
||||||
|
|
||||||
|
public final IndexQueryPriority queryPriority;
|
||||||
public final int fetchSizeMultiplier;
|
public final int fetchSizeMultiplier;
|
||||||
|
|
||||||
public IndexQuery(List<EntrySource> sources, IndexQueryPriority priority, int fetchSizeMultiplier) {
|
/**
|
||||||
|
* Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier.
|
||||||
|
*
|
||||||
|
* @param sources List of EntrySource objects representing the sources to query from
|
||||||
|
* @param priority IndexQueryPriority of the query, determining how many results to fetch before stopping
|
||||||
|
* @param fetchSizeMultiplier Affects the fetch size of the query, determining how deep the query should go
|
||||||
|
*/
|
||||||
|
public IndexQuery(List<EntrySource> sources,
|
||||||
|
IndexQueryPriority priority,
|
||||||
|
int fetchSizeMultiplier)
|
||||||
|
{
|
||||||
this.sources = sources;
|
this.sources = sources;
|
||||||
this.queryPriority = priority;
|
this.queryPriority = priority;
|
||||||
this.fetchSizeMultiplier = fetchSizeMultiplier;
|
this.fetchSizeMultiplier = fetchSizeMultiplier;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Adds a filter to the query. The filter will be applied to the results
|
||||||
|
* after they are read from the sources.
|
||||||
|
*
|
||||||
|
* @param filter The filter to add
|
||||||
|
*/
|
||||||
public void addInclusionFilter(QueryFilterStepIf filter) {
|
public void addInclusionFilter(QueryFilterStepIf filter) {
|
||||||
inclusionFilter.add(filter);
|
inclusionFilter.add(filter);
|
||||||
}
|
}
|
||||||
@ -27,10 +50,22 @@ public class IndexQuery {
|
|||||||
private int si = 0;
|
private int si = 0;
|
||||||
private int dataCost;
|
private int dataCost;
|
||||||
|
|
||||||
|
/** Returns true if there are more results to read from the sources.
|
||||||
|
* May return true even if there are no more results, but will eventually
|
||||||
|
* return false.
|
||||||
|
*/
|
||||||
public boolean hasMore() {
|
public boolean hasMore() {
|
||||||
return si < sources.size();
|
return si < sources.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Fills the given buffer with more results from the sources.
|
||||||
|
* The results are filtered by the inclusion filters.
|
||||||
|
* <p></p>
|
||||||
|
* The method will advance the sources and filters as needed
|
||||||
|
* to fill the buffer.
|
||||||
|
*
|
||||||
|
* @param dest The buffer to fill with results
|
||||||
|
*/
|
||||||
public void getMoreResults(LongQueryBuffer dest) {
|
public void getMoreResults(LongQueryBuffer dest) {
|
||||||
if (!fillBuffer(dest))
|
if (!fillBuffer(dest))
|
||||||
return;
|
return;
|
||||||
|
@ -4,6 +4,18 @@ import nu.marginalia.index.query.limit.QueryStrategy;
|
|||||||
import nu.marginalia.index.searchset.SearchSet;
|
import nu.marginalia.index.searchset.SearchSet;
|
||||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||||
|
|
||||||
|
/** IndexQueryParams is a set of parameters for a query.
|
||||||
|
*
|
||||||
|
* @param qualityLimit The quality limit.
|
||||||
|
* @param year The year limit.
|
||||||
|
* @param size The size limit. Eliminates results from domains that do not satisfy the size criteria.
|
||||||
|
* @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria.
|
||||||
|
* @param domainCount The domain count limit. Filters out results from domains that do not contain enough
|
||||||
|
* documents that match the query.
|
||||||
|
* @param searchSet The search set. Limits the search to a set of domains.
|
||||||
|
* @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring
|
||||||
|
* the keywords to appear in the title, or in the domain.
|
||||||
|
*/
|
||||||
public record IndexQueryParams(SpecificationLimit qualityLimit,
|
public record IndexQueryParams(SpecificationLimit qualityLimit,
|
||||||
SpecificationLimit year,
|
SpecificationLimit year,
|
||||||
SpecificationLimit size,
|
SpecificationLimit size,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package nu.marginalia.index.query;
|
package nu.marginalia.index.query;
|
||||||
|
|
||||||
|
|
||||||
|
/** An execution time budget for index search operations. */
|
||||||
public class IndexSearchBudget {
|
public class IndexSearchBudget {
|
||||||
private final long timeout;
|
private final long timeout;
|
||||||
|
|
||||||
|
@ -28,10 +28,13 @@ public class QueryFilterAnyOf implements QueryFilterStepIf {
|
|||||||
|
|
||||||
|
|
||||||
public void apply(LongQueryBuffer buffer) {
|
public void apply(LongQueryBuffer buffer) {
|
||||||
|
if (steps.isEmpty())
|
||||||
|
return;
|
||||||
|
|
||||||
int start;
|
int start;
|
||||||
int end = buffer.end;
|
int end = buffer.end;
|
||||||
|
|
||||||
steps.get(0).apply(buffer);
|
steps.getFirst().apply(buffer);
|
||||||
|
|
||||||
// The filter functions will partition the data in the buffer from 0 to END,
|
// The filter functions will partition the data in the buffer from 0 to END,
|
||||||
// and update END to the length of the retained items, keeping the retained
|
// and update END to the length of the retained items, keeping the retained
|
||||||
|
@ -2,8 +2,24 @@ package nu.marginalia.array.buffer;
|
|||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/** A buffer for long values that can be used to filter and manipulate the data.
|
||||||
|
* It is central to the query processing in the index service.
|
||||||
|
* <p></p>
|
||||||
|
* The class contains both a read pointer, write pointer, and a buffer end pointer.
|
||||||
|
* <p></p>
|
||||||
|
* The read and write pointers are used for filtering the data in the buffer, and
|
||||||
|
* the end pointer is used to keep track of the length of the data in the buffer.
|
||||||
|
* <p></p>
|
||||||
|
* Filtering is done via the methods {@link #rejectAndAdvance()}, {@link #retainAndAdvance()},
|
||||||
|
* and {@link #finalizeFiltering()}.
|
||||||
|
*
|
||||||
|
*/
|
||||||
public class LongQueryBuffer {
|
public class LongQueryBuffer {
|
||||||
|
/** Direct access to the data in the buffer,
|
||||||
|
* guaranteed to be populated until `end` */
|
||||||
public final long[] data;
|
public final long[] data;
|
||||||
|
|
||||||
|
/** Number of items in the data buffer */
|
||||||
public int end;
|
public int end;
|
||||||
|
|
||||||
private int read = 0;
|
private int read = 0;
|
||||||
@ -14,15 +30,11 @@ public class LongQueryBuffer {
|
|||||||
this.end = size;
|
this.end = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongQueryBuffer(long [] data, int size) {
|
public LongQueryBuffer(long[] data, int size) {
|
||||||
this.data = data;
|
this.data = data;
|
||||||
this.end = size;
|
this.end = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasRetainedData() {
|
|
||||||
return write > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long[] copyData() {
|
public long[] copyData() {
|
||||||
return Arrays.copyOf(data, end);
|
return Arrays.copyOf(data, end);
|
||||||
}
|
}
|
||||||
@ -35,14 +47,36 @@ public class LongQueryBuffer {
|
|||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
end = data.length;
|
||||||
|
read = 0;
|
||||||
|
write = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void zero() {
|
||||||
|
end = 0;
|
||||||
|
read = 0;
|
||||||
|
write = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* == Filtering methods == */
|
||||||
|
|
||||||
|
/** Returns the current value at the read pointer.
|
||||||
|
*/
|
||||||
public long currentValue() {
|
public long currentValue() {
|
||||||
return data[read];
|
return data[read];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Advances the read pointer and returns true if there are more values to read. */
|
||||||
public boolean rejectAndAdvance() {
|
public boolean rejectAndAdvance() {
|
||||||
return ++read < end;
|
return ++read < end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Retains the current value at the read pointer and advances the read and write pointers.
|
||||||
|
* Returns true if there are more values to read.
|
||||||
|
* <p></p> To enable "or" style criterias, the method swaps the current value with the value
|
||||||
|
* at the write pointer, so that it's retained at the end of the buffer.
|
||||||
|
*/
|
||||||
public boolean retainAndAdvance() {
|
public boolean retainAndAdvance() {
|
||||||
if (read != write) {
|
if (read != write) {
|
||||||
long tmp = data[write];
|
long tmp = data[write];
|
||||||
@ -59,6 +93,15 @@ public class LongQueryBuffer {
|
|||||||
return read < end;
|
return read < end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasRetainedData() {
|
||||||
|
return write > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Finalizes the filtering by setting the end pointer to the write pointer,
|
||||||
|
* and resetting the read and write pointers to zero.
|
||||||
|
* <p></p>
|
||||||
|
* At this point the buffer can either be read, or additional filtering can be applied.
|
||||||
|
*/
|
||||||
public void finalizeFiltering() {
|
public void finalizeFiltering() {
|
||||||
end = write;
|
end = write;
|
||||||
read = 0;
|
read = 0;
|
||||||
@ -70,19 +113,10 @@ public class LongQueryBuffer {
|
|||||||
this.end = end;
|
this.end = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset() {
|
/** Retain only unique values in the buffer, and update the end pointer to the new length.
|
||||||
end = data.length;
|
* <p></p>
|
||||||
read = 0;
|
* The buffer is assumed to be sorted up until the end pointer.
|
||||||
write = 0;
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
public void zero() {
|
|
||||||
end = 0;
|
|
||||||
read = 0;
|
|
||||||
write = 0;
|
|
||||||
Arrays.fill(data, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void uniq() {
|
public void uniq() {
|
||||||
if (end <= 1) return;
|
if (end <= 1) return;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user