Index optimizations that should reduce small object churn and IOPS a bit.
This commit is contained in:
parent
460dd098b0
commit
beafdfda9c
@ -36,6 +36,16 @@ sourceSets {
|
|||||||
resources.srcDir file('src/e2e/resources')
|
resources.srcDir file('src/e2e/resources')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
jmh {
|
||||||
|
java {
|
||||||
|
java {
|
||||||
|
compileClasspath += main.output + test.output
|
||||||
|
runtimeClasspath += main.output + test.output
|
||||||
|
srcDir file('src/jmh/java')
|
||||||
|
}
|
||||||
|
resources.srcDir file('src/jmh/resources')
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
java {
|
java {
|
||||||
@ -43,7 +53,9 @@ java {
|
|||||||
languageVersion.set(JavaLanguageVersion.of(17))
|
languageVersion.set(JavaLanguageVersion.of(17))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
jmhJar {
|
||||||
|
zip64 true
|
||||||
|
}
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':third_party')
|
implementation project(':third_party')
|
||||||
|
|
||||||
@ -142,6 +154,9 @@ dependencies {
|
|||||||
implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.1.4'
|
implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.1.4'
|
||||||
implementation 'org.seleniumhq.selenium:selenium-java:4.3.0'
|
implementation 'org.seleniumhq.selenium:selenium-java:4.3.0'
|
||||||
implementation 'org.sejda.imageio:webp-imageio:0.1.6'
|
implementation 'org.sejda.imageio:webp-imageio:0.1.6'
|
||||||
|
|
||||||
|
jmh 'org.openjdk.jmh:jmh-core:1.35'
|
||||||
|
jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.35'
|
||||||
}
|
}
|
||||||
|
|
||||||
configurations {
|
configurations {
|
||||||
|
@ -0,0 +1,85 @@
|
|||||||
|
package nu.marginalia;
|
||||||
|
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
import nu.marginalia.util.multimap.MultimapFileLong;
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
import java.util.stream.LongStream;
|
||||||
|
|
||||||
|
public class ByteBufferBlockReadVsIndividualRead {
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
public static class ByteBufferState {
|
||||||
|
private MultimapFileLong mmf;
|
||||||
|
private Path file;
|
||||||
|
private static final int size = 800*1024*1024;
|
||||||
|
@Setup(Level.Iteration)
|
||||||
|
@SneakyThrows
|
||||||
|
public void setUp() {
|
||||||
|
file = Files.createTempFile("jmh", ".dat");
|
||||||
|
mmf = MultimapFileLong.forOutput(file, size);
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
mmf.put(i, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@TearDown(Level.Iteration)
|
||||||
|
@SneakyThrows
|
||||||
|
public void tearDown() {
|
||||||
|
mmf.close();
|
||||||
|
Files.delete(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
LongStream basicStream() {
|
||||||
|
return IntStream.range(0, size).mapToLong(mmf::get);
|
||||||
|
}
|
||||||
|
|
||||||
|
LongStream blockStream(int blockSize) {
|
||||||
|
long urlOffset = 0;
|
||||||
|
long endOffset = size;
|
||||||
|
|
||||||
|
long[] arry = new long[blockSize];
|
||||||
|
|
||||||
|
return LongStream
|
||||||
|
.iterate(urlOffset, i -> i< endOffset, i->i+blockSize)
|
||||||
|
.flatMap(pos -> {
|
||||||
|
int sz = (int)(Math.min(pos+blockSize, endOffset) - pos);
|
||||||
|
mmf.read(arry, sz, pos);
|
||||||
|
return Arrays.stream(arry, 0, sz);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// @Benchmark @BenchmarkMode(Mode.Throughput)
|
||||||
|
// @Fork(value = 1, warmups = 1)
|
||||||
|
// @Warmup(iterations = 1)
|
||||||
|
public long testBasic(ByteBufferState state) {
|
||||||
|
return state.basicStream().sum();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Benchmark @BenchmarkMode(Mode.Throughput)
|
||||||
|
@Fork(value = 1, warmups = 1)
|
||||||
|
@Warmup(iterations = 0)
|
||||||
|
public long testBlock128(ByteBufferState state) {
|
||||||
|
return state.blockStream(128).sum();
|
||||||
|
}
|
||||||
|
@Benchmark @BenchmarkMode(Mode.Throughput)
|
||||||
|
@Fork(value = 1, warmups = 1)
|
||||||
|
@Warmup(iterations = 0)
|
||||||
|
public long testBlock1024(ByteBufferState state) {
|
||||||
|
return state.blockStream(1024).sum();
|
||||||
|
}
|
||||||
|
@Benchmark @BenchmarkMode(Mode.Throughput)
|
||||||
|
@Fork(value = 1, warmups = 1)
|
||||||
|
@Warmup(iterations = 0)
|
||||||
|
public long testBlock8192(ByteBufferState state) {
|
||||||
|
return state.blockStream(8192).sum();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,111 @@
|
|||||||
|
package nu.marginalia.util.btree;
|
||||||
|
|
||||||
|
import nu.marginalia.util.btree.model.BTreeContext;
|
||||||
|
import nu.marginalia.util.btree.model.BTreeHeader;
|
||||||
|
import nu.marginalia.util.multimap.MultimapFileLong;
|
||||||
|
import nu.marginalia.util.multimap.MultimapSearcher;
|
||||||
|
|
||||||
|
import static java.lang.Math.min;
|
||||||
|
|
||||||
|
public class CachingBTreeReader {
|
||||||
|
|
||||||
|
private final MultimapFileLong file;
|
||||||
|
public final BTreeContext ctx;
|
||||||
|
|
||||||
|
private final MultimapSearcher dataSearcher;
|
||||||
|
|
||||||
|
public CachingBTreeReader(MultimapFileLong file, BTreeContext ctx) {
|
||||||
|
this.file = file;
|
||||||
|
this.dataSearcher = MultimapSearcher.forContext(file, ctx.equalityMask(), ctx.entrySize());
|
||||||
|
|
||||||
|
this.ctx = ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BTreeHeader getHeader(long fileOffset) {
|
||||||
|
return new BTreeHeader(file.get(fileOffset), file.get(fileOffset+1), file.get(fileOffset+2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Cache prepareCache() {
|
||||||
|
return new Cache();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return file offset of entry matching keyRaw, negative if absent
|
||||||
|
*/
|
||||||
|
public long findEntry(BTreeHeader header, Cache cache, final long keyRaw) {
|
||||||
|
final int blockSize = ctx.BLOCK_SIZE_WORDS();
|
||||||
|
|
||||||
|
final long key = keyRaw & ctx.equalityMask();
|
||||||
|
final long dataAddress = header.dataOffsetLongs();
|
||||||
|
|
||||||
|
final long searchStart;
|
||||||
|
final long numEntries;
|
||||||
|
|
||||||
|
if (header.layers() == 0) { // For small data, there is no index block, only a flat data block
|
||||||
|
searchStart = dataAddress;
|
||||||
|
numEntries = header.numEntries();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cache.load(header);
|
||||||
|
|
||||||
|
long dataLayerOffset = searchIndex(header, cache, key);
|
||||||
|
if (dataLayerOffset < 0) {
|
||||||
|
return dataLayerOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
searchStart = dataAddress + dataLayerOffset * ctx.entrySize();
|
||||||
|
numEntries = min(header.numEntries() - dataLayerOffset, blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
return dataSearcher.binarySearch(key, searchStart, numEntries);
|
||||||
|
}
|
||||||
|
|
||||||
|
private long searchIndex(BTreeHeader header, Cache cache, long key) {
|
||||||
|
final int blockSize = ctx.BLOCK_SIZE_WORDS();
|
||||||
|
long layerOffset = 0;
|
||||||
|
|
||||||
|
for (int i = header.layers() - 1; i >= 0; --i) {
|
||||||
|
final long indexLayerBlockOffset = header.relativeIndexLayerOffset(ctx, i) + layerOffset;
|
||||||
|
|
||||||
|
final long nextLayerOffset = cache.relativePositionInIndex(key, (int) indexLayerBlockOffset, blockSize);
|
||||||
|
if (nextLayerOffset < 0)
|
||||||
|
return nextLayerOffset;
|
||||||
|
|
||||||
|
layerOffset = blockSize * (nextLayerOffset + layerOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return layerOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public class Cache {
|
||||||
|
long[] indexData;
|
||||||
|
|
||||||
|
public void load(BTreeHeader header) {
|
||||||
|
if (indexData != null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
int size = (int)(header.dataOffsetLongs() - header.indexOffsetLongs());
|
||||||
|
indexData = new long[size];
|
||||||
|
file.read(indexData, header.indexOffsetLongs());
|
||||||
|
}
|
||||||
|
|
||||||
|
long relativePositionInIndex(long key, int fromIndex, int n) {
|
||||||
|
int low = 0;
|
||||||
|
int high = n - 1;
|
||||||
|
|
||||||
|
while (low <= high) {
|
||||||
|
int mid = (low + high) >>> 1;
|
||||||
|
long midVal = indexData[fromIndex + mid];
|
||||||
|
|
||||||
|
if (midVal < key)
|
||||||
|
low = mid + 1;
|
||||||
|
else if (midVal > key)
|
||||||
|
high = mid - 1;
|
||||||
|
else
|
||||||
|
return mid;
|
||||||
|
}
|
||||||
|
return low;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -93,7 +93,7 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
|
|||||||
channel = file.getChannel();
|
channel = file.getChannel();
|
||||||
mappedSize = 0;
|
mappedSize = 0;
|
||||||
|
|
||||||
logger.debug("Creating multimap file size = {} / buffer size = {}, mode = {}",
|
logger.trace("Creating multimap file size = {} / buffer size = {}, mode = {}",
|
||||||
readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode);
|
readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,7 +149,7 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void force() {
|
public void force() {
|
||||||
logger.debug("Forcing");
|
logger.trace("Forcing");
|
||||||
|
|
||||||
for (MappedByteBuffer buffer: mappedByteBuffers) {
|
for (MappedByteBuffer buffer: mappedByteBuffers) {
|
||||||
buffer.force();
|
buffer.force();
|
||||||
|
@ -125,8 +125,11 @@ public class EdgeIndexBucket {
|
|||||||
else {
|
else {
|
||||||
query = indexReader.findWord(block, budget, filter, orderedIncludes[0]);
|
query = indexReader.findWord(block, budget, filter, orderedIncludes[0]);
|
||||||
}
|
}
|
||||||
|
int i;
|
||||||
for (int i = 1; i < orderedIncludes.length; i++) {
|
for (i = 1; (i < 2 && i < orderedIncludes.length) || i < orderedIncludes.length-1; i++) {
|
||||||
|
query = query.alsoCached(orderedIncludes[i]);
|
||||||
|
}
|
||||||
|
for (; i < orderedIncludes.length; i++) {
|
||||||
query = query.also(orderedIncludes[i]);
|
query = query.also(orderedIncludes[i]);
|
||||||
}
|
}
|
||||||
for (int term : searchTerms.excludes) {
|
for (int term : searchTerms.excludes) {
|
||||||
|
@ -5,6 +5,8 @@ import com.google.inject.name.Named;
|
|||||||
import com.upserve.uppend.blobs.NativeIO;
|
import com.upserve.uppend.blobs.NativeIO;
|
||||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||||
import nu.marginalia.util.btree.BTreeReader;
|
import nu.marginalia.util.btree.BTreeReader;
|
||||||
|
import nu.marginalia.util.btree.CachingBTreeReader;
|
||||||
|
import nu.marginalia.util.btree.model.BTreeHeader;
|
||||||
import nu.marginalia.util.multimap.MultimapFileLong;
|
import nu.marginalia.util.multimap.MultimapFileLong;
|
||||||
import nu.marginalia.wmsa.edge.index.conversion.SearchIndexConverter;
|
import nu.marginalia.wmsa.edge.index.conversion.SearchIndexConverter;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -13,6 +15,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.stream.LongStream;
|
import java.util.stream.LongStream;
|
||||||
|
|
||||||
public class SearchIndex implements AutoCloseable {
|
public class SearchIndex implements AutoCloseable {
|
||||||
@ -21,6 +24,8 @@ public class SearchIndex implements AutoCloseable {
|
|||||||
private final IndexWordsTable words;
|
private final IndexWordsTable words;
|
||||||
private final RandomAccessFile wordsFile;
|
private final RandomAccessFile wordsFile;
|
||||||
private final BTreeReader bTreeReader;
|
private final BTreeReader bTreeReader;
|
||||||
|
private final CachingBTreeReader cachingBTreeReader;
|
||||||
|
|
||||||
private final Logger logger;
|
private final Logger logger;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
@ -40,6 +45,7 @@ public class SearchIndex implements AutoCloseable {
|
|||||||
words = IndexWordsTable.ofFile(wordsFile);
|
words = IndexWordsTable.ofFile(wordsFile);
|
||||||
|
|
||||||
bTreeReader = new BTreeReader(urls, SearchIndexConverter.urlsBTreeContext);
|
bTreeReader = new BTreeReader(urls, SearchIndexConverter.urlsBTreeContext);
|
||||||
|
cachingBTreeReader = new CachingBTreeReader(urls, SearchIndexConverter.urlsBTreeContext);
|
||||||
|
|
||||||
Schedulers.io().scheduleDirect(() -> madvise(urls, bTreeReader));
|
Schedulers.io().scheduleDirect(() -> madvise(urls, bTreeReader));
|
||||||
}
|
}
|
||||||
@ -64,27 +70,16 @@ public class SearchIndex implements AutoCloseable {
|
|||||||
if (length < 0) return 0;
|
if (length < 0) return 0;
|
||||||
if (length > 0) return length;
|
if (length > 0) return length;
|
||||||
|
|
||||||
var range = rangeForWord(wordId);
|
return rangeForWord(wordId).numEntries();
|
||||||
if (range.isPresent()) {
|
|
||||||
return bTreeReader.getHeader(range.dataOffset).numEntries();
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public UrlIndexTree rangeForWord(int wordId) {
|
public UrlIndexTree rangeForWord(int wordId) {
|
||||||
return new UrlIndexTree(words.positionForWord(wordId));
|
return new UrlIndexTree(words.positionForWord(wordId));
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasUrl(long url, UrlIndexTree range) {
|
|
||||||
if (!range.isPresent())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return bTreeReader.findEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public class UrlIndexTree {
|
public class UrlIndexTree {
|
||||||
final long dataOffset;
|
final long dataOffset;
|
||||||
|
private BTreeHeader header;
|
||||||
public UrlIndexTree(long dataOffset) {
|
public UrlIndexTree(long dataOffset) {
|
||||||
this.dataOffset = dataOffset;
|
this.dataOffset = dataOffset;
|
||||||
}
|
}
|
||||||
@ -93,15 +88,65 @@ public class SearchIndex implements AutoCloseable {
|
|||||||
if (dataOffset < 0) {
|
if (dataOffset < 0) {
|
||||||
return LongStream.empty();
|
return LongStream.empty();
|
||||||
}
|
}
|
||||||
var header = bTreeReader.getHeader(dataOffset);
|
if (header == null) {
|
||||||
|
header = bTreeReader.getHeader(dataOffset);
|
||||||
|
}
|
||||||
|
|
||||||
long urlOffset = header.dataOffsetLongs();
|
long urlOffset = header.dataOffsetLongs();
|
||||||
return LongStream.range(urlOffset, urlOffset + header.numEntries()).map(urls::get);
|
long endOffset = header.dataOffsetLongs() + header.numEntries();
|
||||||
|
int stepSize = Math.min(1024, header.numEntries());
|
||||||
|
|
||||||
|
long[] buffer = new long[stepSize];
|
||||||
|
|
||||||
|
return LongStream
|
||||||
|
.iterate(urlOffset, i -> i< endOffset, i->i+stepSize)
|
||||||
|
.flatMap(pos -> {
|
||||||
|
int sz = (int)(Math.min(pos+stepSize, endOffset) - pos);
|
||||||
|
urls.read(buffer, sz, pos);
|
||||||
|
return Arrays.stream(buffer, 0, sz);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isPresent() {
|
public boolean isPresent() {
|
||||||
return dataOffset >= 0;
|
return dataOffset >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long numEntries() {
|
||||||
|
if (header != null) {
|
||||||
|
return header.numEntries();
|
||||||
|
}
|
||||||
|
else if (dataOffset < 0) return 0L;
|
||||||
|
else {
|
||||||
|
header = bTreeReader.getHeader(dataOffset);
|
||||||
|
return header.numEntries();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasUrl(long url) {
|
||||||
|
if (header != null) {
|
||||||
|
return bTreeReader.findEntry(header, url) >= 0;
|
||||||
|
}
|
||||||
|
else if (dataOffset < 0) return false;
|
||||||
|
else {
|
||||||
|
header = bTreeReader.getHeader(dataOffset);
|
||||||
|
return bTreeReader.findEntry(header, url) >= 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasUrl(CachingBTreeReader.Cache cache, long url) {
|
||||||
|
if (header != null) {
|
||||||
|
return cachingBTreeReader.findEntry(header, cache, url) >= 0;
|
||||||
|
}
|
||||||
|
else if (dataOffset < 0) return false;
|
||||||
|
else {
|
||||||
|
header = bTreeReader.getHeader(dataOffset);
|
||||||
|
return cachingBTreeReader.findEntry(header, cache, url) >= 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public CachingBTreeReader.Cache createIndexCache() {
|
||||||
|
return cachingBTreeReader.prepareCache();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,14 +1,11 @@
|
|||||||
package nu.marginalia.wmsa.edge.index.reader;
|
package nu.marginalia.wmsa.edge.index.reader;
|
||||||
|
|
||||||
import com.google.common.cache.Cache;
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||||
import nu.marginalia.wmsa.edge.index.reader.query.IndexQueryBuilder;
|
import nu.marginalia.wmsa.edge.index.reader.query.IndexQueryBuilder;
|
||||||
import nu.marginalia.wmsa.edge.index.reader.query.IndexSearchBudget;
|
import nu.marginalia.wmsa.edge.index.reader.query.IndexSearchBudget;
|
||||||
import nu.marginalia.wmsa.edge.index.reader.query.Query;
|
import nu.marginalia.wmsa.edge.index.reader.query.Query;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -27,7 +24,6 @@ public class SearchIndexReader implements AutoCloseable {
|
|||||||
private final EnumMap<IndexBlock, IndexQueryBuilder> underspecifiedQueryBuilders;
|
private final EnumMap<IndexBlock, IndexQueryBuilder> underspecifiedQueryBuilders;
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final Cache<Pair<IndexBlock, Integer>, Long> numHitsCache = CacheBuilder.newBuilder().maximumSize(1000).build();
|
|
||||||
|
|
||||||
private static final IndexBlock[] indicesBySearchOrder = new IndexBlock[] {
|
private static final IndexBlock[] indicesBySearchOrder = new IndexBlock[] {
|
||||||
IndexBlock.Top,
|
IndexBlock.Top,
|
||||||
@ -131,26 +127,20 @@ public class SearchIndexReader implements AutoCloseable {
|
|||||||
for (var idx : indices.values()) {
|
for (var idx : indices.values()) {
|
||||||
idx.close();
|
idx.close();
|
||||||
}
|
}
|
||||||
numHitsCache.invalidateAll();
|
|
||||||
numHitsCache.cleanUp();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public long numHits(IndexBlock block, int word) {
|
public long numHits(IndexBlock block, int word) {
|
||||||
return numHitsCache.get(Pair.of(block, word), () -> numHitsForBlockWord(block, word));
|
|
||||||
}
|
|
||||||
|
|
||||||
private long numHitsForBlockWord(IndexBlock block, int word) {
|
|
||||||
IndexQueryBuilder builder = queryBuilders.get(block);
|
IndexQueryBuilder builder = queryBuilders.get(block);
|
||||||
|
|
||||||
if (builder == null)
|
if (builder == null)
|
||||||
return 0L;
|
return 0L;
|
||||||
|
|
||||||
return builder
|
long hits = 0;
|
||||||
.getIndicies()
|
for (var index : builder.getIndicies()) {
|
||||||
.stream()
|
hits += index.numUrls(word);
|
||||||
.mapToLong(idx -> idx.numUrls(word))
|
}
|
||||||
.sum();
|
return hits;
|
||||||
}
|
}
|
||||||
|
|
||||||
public IndexBlock getBlockForResult(int searchTerm, long urlId) {
|
public IndexBlock getBlockForResult(int searchTerm, long urlId) {
|
||||||
@ -163,7 +153,7 @@ public class SearchIndexReader implements AutoCloseable {
|
|||||||
|
|
||||||
var range = index.rangeForWord(searchTerm);
|
var range = index.rangeForWord(searchTerm);
|
||||||
|
|
||||||
if (index.hasUrl(urlId, range)) {
|
if (range.hasUrl(urlId)) {
|
||||||
return block;
|
return block;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -174,8 +164,8 @@ public class SearchIndexReader implements AutoCloseable {
|
|||||||
final var index = indices.get(block);
|
final var index = indices.get(block);
|
||||||
if (null == index) return false;
|
if (null == index) return false;
|
||||||
|
|
||||||
final var range = index.rangeForWord(searchTerm);
|
return index
|
||||||
|
.rangeForWord(searchTerm)
|
||||||
return index.hasUrl(urlId, range);
|
.hasUrl(urlId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,6 @@ import nu.marginalia.wmsa.edge.index.reader.SearchIndex;
|
|||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.function.Function;
|
|
||||||
import java.util.function.LongPredicate;
|
import java.util.function.LongPredicate;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -51,13 +50,13 @@ public class IndexQueryBuilder {
|
|||||||
|
|
||||||
var fstRange = requiredIndices.get(relevantIndices[0]).rangeForWord(wordId);
|
var fstRange = requiredIndices.get(relevantIndices[0]).rangeForWord(wordId);
|
||||||
|
|
||||||
return new QueryForIndices(budget, () ->
|
LongStream priorityStream = underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[0], wordId);
|
||||||
Streams.concat(IntStream.range(1, relevantIndices.length)
|
for (int i = 1; i < relevantIndices.length; i++) {
|
||||||
.mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId))
|
priorityStream = Streams.concat(priorityStream, underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId));
|
||||||
.flatMapToLong(Function.identity()),
|
}
|
||||||
fstRange.stream().takeWhile(budget::take))
|
LongStream stream = LongStream.concat(priorityStream, fstRange.stream().takeWhile(budget::take)).filter(filter);
|
||||||
.filter(filter)
|
|
||||||
);
|
return new QueryForIndices(budget, () -> stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
|
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
|
||||||
@ -77,10 +76,9 @@ public class IndexQueryBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var sndRange = snd.rangeForWord(wordId);
|
var sndRange = snd.rangeForWord(wordId);
|
||||||
|
var cache = sndRange.createIndexCache();
|
||||||
|
|
||||||
return fst.rangeForWord(wordId).stream().takeWhile(budget::take).limit(limit).filter(
|
return fst.rangeForWord(wordId).stream().takeWhile(budget::take).limit(limit).filter(data -> sndRange.hasUrl(cache, data));
|
||||||
url -> snd.hasUrl(url, sndRange)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -110,6 +108,12 @@ public class IndexQueryBuilder {
|
|||||||
() -> requiredIndices.stream().flatMapToLong(idx -> alsoStream(idx, wordId)));
|
() -> requiredIndices.stream().flatMapToLong(idx -> alsoStream(idx, wordId)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query alsoCached(int wordId) {
|
||||||
|
return new QueryForIndices(budget,
|
||||||
|
() -> requiredIndices.stream().flatMapToLong(idx -> alsoStreamCached(idx, wordId)));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query not(int wordId) {
|
public Query not(int wordId) {
|
||||||
// Happens when an index simply isn't present, won't find data anyway
|
// Happens when an index simply isn't present, won't find data anyway
|
||||||
@ -123,12 +127,21 @@ public class IndexQueryBuilder {
|
|||||||
private LongStream alsoStream(SearchIndex idx, int wordId) {
|
private LongStream alsoStream(SearchIndex idx, int wordId) {
|
||||||
var range = idx.rangeForWord(wordId);
|
var range = idx.rangeForWord(wordId);
|
||||||
|
|
||||||
return stream().filter(url -> idx.hasUrl(url, range)).takeWhile(budget::take);
|
return stream().filter(range::hasUrl).takeWhile(budget::take);
|
||||||
|
}
|
||||||
|
|
||||||
|
private LongStream alsoStreamCached(SearchIndex idx, int wordId) {
|
||||||
|
var range = idx.rangeForWord(wordId);
|
||||||
|
var cache = range.createIndexCache();
|
||||||
|
|
||||||
|
return stream().filter(data -> range.hasUrl(cache, data)).takeWhile(budget::take);
|
||||||
}
|
}
|
||||||
|
|
||||||
private LongStream notStream(int wordId) {
|
private LongStream notStream(int wordId) {
|
||||||
var bodyRange = excludeIndex.rangeForWord(wordId);
|
var bodyRange = excludeIndex.rangeForWord(wordId);
|
||||||
return stream().filter(url -> !excludeIndex.hasUrl(url, bodyRange)).takeWhile(budget::take);
|
var cache = bodyRange.createIndexCache();
|
||||||
|
|
||||||
|
return stream().filter(url -> !bodyRange.hasUrl(cache, url)).takeWhile(budget::take);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongStream stream() {
|
public LongStream stream() {
|
||||||
|
@ -7,6 +7,9 @@ public interface Query {
|
|||||||
@Override
|
@Override
|
||||||
public Query also(int wordId) { return this; }
|
public Query also(int wordId) { return this; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query alsoCached(int wordId) { return this; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query not(int wordId) { return this; }
|
public Query not(int wordId) { return this; }
|
||||||
|
|
||||||
@ -15,6 +18,8 @@ public interface Query {
|
|||||||
};
|
};
|
||||||
|
|
||||||
Query also(int wordId);
|
Query also(int wordId);
|
||||||
|
Query alsoCached(int wordId);
|
||||||
|
|
||||||
Query not(int wordId);
|
Query not(int wordId);
|
||||||
|
|
||||||
LongStream stream();
|
LongStream stream();
|
||||||
|
@ -0,0 +1,335 @@
|
|||||||
|
package nu.marginalia.util.btree;
|
||||||
|
|
||||||
|
import nu.marginalia.util.btree.model.BTreeContext;
|
||||||
|
import nu.marginalia.util.btree.model.BTreeHeader;
|
||||||
|
import nu.marginalia.util.multimap.MultimapFileLong;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.StringJoiner;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
class BTreeWriterTestCachedReader {
|
||||||
|
|
||||||
|
final BTreeContext ctx = new BTreeContext(4, 2, 0xFFFF_FFFF_FFFF_FFFFL, 3);
|
||||||
|
final BTreeWriter writer = new BTreeWriter(null, ctx);
|
||||||
|
|
||||||
|
Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
@Test
|
||||||
|
void testSmallDataBlock() {
|
||||||
|
var header = writer.makeHeader(1024, ctx.BLOCK_SIZE_WORDS()/2);
|
||||||
|
assertEquals(1024 + BTreeHeader.BTreeHeaderSizeLongs, header.dataOffsetLongs());
|
||||||
|
assertEquals(header.dataOffsetLongs(), header.indexOffsetLongs());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testLayerCount() {
|
||||||
|
int wsq = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
|
||||||
|
int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
|
||||||
|
|
||||||
|
assertEquals(2, writer.makeHeader(1024, wsq-1).layers());
|
||||||
|
assertEquals(2, writer.makeHeader(1024, wsq).layers());
|
||||||
|
assertEquals(3, writer.makeHeader(1024, wsq+1).layers());
|
||||||
|
|
||||||
|
assertEquals(3, writer.makeHeader(1024, wcub-1).layers());
|
||||||
|
assertEquals(3, writer.makeHeader(1024, wcub).layers());
|
||||||
|
assertEquals(4, writer.makeHeader(1024, wcub+1).layers());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testLayerOffset() {
|
||||||
|
int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
|
||||||
|
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 0));
|
||||||
|
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 1));
|
||||||
|
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 2));
|
||||||
|
|
||||||
|
for (int i = 0; i < 1024; i++) {
|
||||||
|
var header = writer.makeHeader(0, i);
|
||||||
|
|
||||||
|
|
||||||
|
printTreeLayout(i, header, ctx);
|
||||||
|
|
||||||
|
if (header.layers() >= 1) {
|
||||||
|
assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) {
|
||||||
|
StringJoiner sj = new StringJoiner(",");
|
||||||
|
for (int l = 0; l < header.layers(); l++) {
|
||||||
|
sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS());
|
||||||
|
}
|
||||||
|
System.out.println(numEntries + ":" + sj);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteEntrySize2() throws IOException {
|
||||||
|
|
||||||
|
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
|
||||||
|
Set<Integer> toPut = new HashSet<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
while (!toPut.add((int)(Integer.MAX_VALUE * Math.random())));
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray();
|
||||||
|
|
||||||
|
try {
|
||||||
|
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
|
||||||
|
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
|
||||||
|
|
||||||
|
{
|
||||||
|
var writer = new BTreeWriter(mmf, ctx);
|
||||||
|
writer.write(0, toPut.size(), (slice) -> {
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
slice.put(2L*i, data[i]);
|
||||||
|
slice.put( 2L*i + 1, i);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
mmf.force();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var reader = new CachingBTreeReader(mmf, ctx);
|
||||||
|
var cache = reader.prepareCache();
|
||||||
|
var header = reader.getHeader(0);
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
long offset = reader.findEntry(header, cache, data[i]);
|
||||||
|
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
|
||||||
|
assertEquals(i, mmf.get(offset+1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
Files.delete(tempFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteEntrySize2Small() throws IOException {
|
||||||
|
|
||||||
|
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
|
||||||
|
Set<Integer> toPut = new HashSet<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
while (!toPut.add((int)(Integer.MAX_VALUE * Math.random())));
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray();
|
||||||
|
|
||||||
|
try {
|
||||||
|
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
|
||||||
|
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
|
||||||
|
|
||||||
|
{
|
||||||
|
var writer = new BTreeWriter(mmf, ctx);
|
||||||
|
writer.write( 0, toPut.size(), (slice) -> {
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
slice.put(2L*i, data[i]);
|
||||||
|
slice.put(2L*i + 1, i);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
mmf.force();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var reader = new CachingBTreeReader(mmf, ctx);
|
||||||
|
var cache = reader.prepareCache();
|
||||||
|
var header = reader.getHeader(0);
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
long offset = reader.findEntry(header, cache, data[i]);
|
||||||
|
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
|
||||||
|
assertEquals(i, mmf.get(offset+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
long val = (long)(Long.MAX_VALUE * Math.random());
|
||||||
|
while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random());
|
||||||
|
assertEquals(-1, reader.findEntry(header, cache, val));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
Files.delete(tempFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteEqualityNotMasked() throws IOException {
|
||||||
|
for (int bs = 2; bs <= 4; bs++) {
|
||||||
|
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
|
||||||
|
Set<Long> toPut = new HashSet<>();
|
||||||
|
|
||||||
|
var ctx = new BTreeContext(5, 1, ~0, bs);
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
|
||||||
|
|
||||||
|
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
|
||||||
|
{
|
||||||
|
var writer = new BTreeWriter(mmf, ctx);
|
||||||
|
writer.write(0, toPut.size(), (slice) -> {
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
slice.put(i, data[i]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
mmf.force();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var reader = new CachingBTreeReader(mmf, ctx);
|
||||||
|
var cache = reader.prepareCache();
|
||||||
|
var header = reader.getHeader(0);
|
||||||
|
|
||||||
|
printTreeLayout(toPut.size(), header, ctx);
|
||||||
|
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
long offset = reader.findEntry(header, cache, data[i]);
|
||||||
|
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
|
||||||
|
assertEquals(data[i], mmf.get(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
long val = (long) (Long.MAX_VALUE * Math.random());
|
||||||
|
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
|
||||||
|
assertEquals(-1, reader.findEntry(header, cache, val));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
Files.delete(tempFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteEqualityMasked() throws IOException {
|
||||||
|
|
||||||
|
for (int bs = 2; bs <= 4; bs++) {
|
||||||
|
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
|
||||||
|
Set<Long> toPut = new HashSet<>();
|
||||||
|
|
||||||
|
long mask = 0xFFFF_FFFF_0000_0000L;
|
||||||
|
var ctx = new BTreeContext(5, 1, mask, bs);
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
|
||||||
|
|
||||||
|
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
|
||||||
|
{
|
||||||
|
var writer = new BTreeWriter(mmf, ctx);
|
||||||
|
writer.write(0, toPut.size(), (slice) -> {
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
slice.put(i, data[i]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
mmf.force();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var reader = new CachingBTreeReader(mmf, ctx);
|
||||||
|
var cache = reader.prepareCache();
|
||||||
|
var header = reader.getHeader(0);
|
||||||
|
|
||||||
|
printTreeLayout(toPut.size(), header, ctx);
|
||||||
|
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
long offset = reader.findEntry(header, cache,data[i] & mask);
|
||||||
|
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
|
||||||
|
assertEquals(data[i], mmf.get(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
long val = (long) (Long.MAX_VALUE * Math.random());
|
||||||
|
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
|
||||||
|
assertEquals(-1, reader.findEntry(header, cache, val & mask));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
Files.delete(tempFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteTwoEqualityMasked() throws IOException {
|
||||||
|
|
||||||
|
for (int bs = 2; bs <= 4; bs++) {
|
||||||
|
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
|
||||||
|
Set<Long> toPut = new HashSet<>();
|
||||||
|
|
||||||
|
long mask = 0xFFFF_FFFF_0000_0000L;
|
||||||
|
var ctx = new BTreeContext(5, 2, mask, bs);
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
|
||||||
|
|
||||||
|
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
|
||||||
|
{
|
||||||
|
var writer = new BTreeWriter(mmf, ctx);
|
||||||
|
writer.write(0, toPut.size(), (slice) -> {
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
slice.put(i*2L, data[i]);
|
||||||
|
slice.put(i*2L+1, i);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
mmf.force();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var reader = new CachingBTreeReader(mmf, ctx);
|
||||||
|
var cache = reader.prepareCache();
|
||||||
|
var header = reader.getHeader(0);
|
||||||
|
|
||||||
|
printTreeLayout(toPut.size(), header, ctx);
|
||||||
|
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
long offset = reader.findEntry(header, cache, data[i] & mask);
|
||||||
|
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
|
||||||
|
assertEquals(data[i], mmf.get(offset));
|
||||||
|
assertEquals(i, mmf.get(offset+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 500; i++) {
|
||||||
|
long val = (long) (Long.MAX_VALUE * Math.random());
|
||||||
|
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
|
||||||
|
assertEquals(-1, reader.findEntry(header, cache,val & mask));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
Files.delete(tempFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user