Index optimizations that should reduce small object churn and IOPS a bit.

This commit is contained in:
vlofgren 2022-08-15 13:58:18 +02:00
parent 460dd098b0
commit beafdfda9c
10 changed files with 654 additions and 52 deletions

View File

@ -36,6 +36,16 @@ sourceSets {
resources.srcDir file('src/e2e/resources') resources.srcDir file('src/e2e/resources')
} }
} }
jmh {
java {
java {
compileClasspath += main.output + test.output
runtimeClasspath += main.output + test.output
srcDir file('src/jmh/java')
}
resources.srcDir file('src/jmh/resources')
}
}
} }
java { java {
@ -43,7 +53,9 @@ java {
languageVersion.set(JavaLanguageVersion.of(17)) languageVersion.set(JavaLanguageVersion.of(17))
} }
} }
jmhJar {
zip64 true
}
dependencies { dependencies {
implementation project(':third_party') implementation project(':third_party')
@ -142,6 +154,9 @@ dependencies {
implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.1.4' implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.1.4'
implementation 'org.seleniumhq.selenium:selenium-java:4.3.0' implementation 'org.seleniumhq.selenium:selenium-java:4.3.0'
implementation 'org.sejda.imageio:webp-imageio:0.1.6' implementation 'org.sejda.imageio:webp-imageio:0.1.6'
jmh 'org.openjdk.jmh:jmh-core:1.35'
jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.35'
} }
configurations { configurations {

View File

@ -0,0 +1,85 @@
package nu.marginalia;
import lombok.SneakyThrows;
import nu.marginalia.util.multimap.MultimapFileLong;
import org.openjdk.jmh.annotations.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.stream.IntStream;
import java.util.stream.LongStream;
public class ByteBufferBlockReadVsIndividualRead {
@State(Scope.Benchmark)
public static class ByteBufferState {
private MultimapFileLong mmf;
private Path file;
private static final int size = 800*1024*1024;
@Setup(Level.Iteration)
@SneakyThrows
public void setUp() {
file = Files.createTempFile("jmh", ".dat");
mmf = MultimapFileLong.forOutput(file, size);
for (int i = 0; i < size; i++) {
mmf.put(i, i);
}
}
@TearDown(Level.Iteration)
@SneakyThrows
public void tearDown() {
mmf.close();
Files.delete(file);
}
LongStream basicStream() {
return IntStream.range(0, size).mapToLong(mmf::get);
}
LongStream blockStream(int blockSize) {
long urlOffset = 0;
long endOffset = size;
long[] arry = new long[blockSize];
return LongStream
.iterate(urlOffset, i -> i< endOffset, i->i+blockSize)
.flatMap(pos -> {
int sz = (int)(Math.min(pos+blockSize, endOffset) - pos);
mmf.read(arry, sz, pos);
return Arrays.stream(arry, 0, sz);
});
}
}
// @Benchmark @BenchmarkMode(Mode.Throughput)
// @Fork(value = 1, warmups = 1)
// @Warmup(iterations = 1)
public long testBasic(ByteBufferState state) {
return state.basicStream().sum();
}
@Benchmark @BenchmarkMode(Mode.Throughput)
@Fork(value = 1, warmups = 1)
@Warmup(iterations = 0)
public long testBlock128(ByteBufferState state) {
return state.blockStream(128).sum();
}
@Benchmark @BenchmarkMode(Mode.Throughput)
@Fork(value = 1, warmups = 1)
@Warmup(iterations = 0)
public long testBlock1024(ByteBufferState state) {
return state.blockStream(1024).sum();
}
@Benchmark @BenchmarkMode(Mode.Throughput)
@Fork(value = 1, warmups = 1)
@Warmup(iterations = 0)
public long testBlock8192(ByteBufferState state) {
return state.blockStream(8192).sum();
}
}

View File

@ -0,0 +1,111 @@
package nu.marginalia.util.btree;
import nu.marginalia.util.btree.model.BTreeContext;
import nu.marginalia.util.btree.model.BTreeHeader;
import nu.marginalia.util.multimap.MultimapFileLong;
import nu.marginalia.util.multimap.MultimapSearcher;
import static java.lang.Math.min;
public class CachingBTreeReader {
private final MultimapFileLong file;
public final BTreeContext ctx;
private final MultimapSearcher dataSearcher;
public CachingBTreeReader(MultimapFileLong file, BTreeContext ctx) {
this.file = file;
this.dataSearcher = MultimapSearcher.forContext(file, ctx.equalityMask(), ctx.entrySize());
this.ctx = ctx;
}
public BTreeHeader getHeader(long fileOffset) {
return new BTreeHeader(file.get(fileOffset), file.get(fileOffset+1), file.get(fileOffset+2));
}
public Cache prepareCache() {
return new Cache();
}
/**
*
* @return file offset of entry matching keyRaw, negative if absent
*/
public long findEntry(BTreeHeader header, Cache cache, final long keyRaw) {
final int blockSize = ctx.BLOCK_SIZE_WORDS();
final long key = keyRaw & ctx.equalityMask();
final long dataAddress = header.dataOffsetLongs();
final long searchStart;
final long numEntries;
if (header.layers() == 0) { // For small data, there is no index block, only a flat data block
searchStart = dataAddress;
numEntries = header.numEntries();
}
else {
cache.load(header);
long dataLayerOffset = searchIndex(header, cache, key);
if (dataLayerOffset < 0) {
return dataLayerOffset;
}
searchStart = dataAddress + dataLayerOffset * ctx.entrySize();
numEntries = min(header.numEntries() - dataLayerOffset, blockSize);
}
return dataSearcher.binarySearch(key, searchStart, numEntries);
}
private long searchIndex(BTreeHeader header, Cache cache, long key) {
final int blockSize = ctx.BLOCK_SIZE_WORDS();
long layerOffset = 0;
for (int i = header.layers() - 1; i >= 0; --i) {
final long indexLayerBlockOffset = header.relativeIndexLayerOffset(ctx, i) + layerOffset;
final long nextLayerOffset = cache.relativePositionInIndex(key, (int) indexLayerBlockOffset, blockSize);
if (nextLayerOffset < 0)
return nextLayerOffset;
layerOffset = blockSize * (nextLayerOffset + layerOffset);
}
return layerOffset;
}
public class Cache {
long[] indexData;
public void load(BTreeHeader header) {
if (indexData != null)
return;
int size = (int)(header.dataOffsetLongs() - header.indexOffsetLongs());
indexData = new long[size];
file.read(indexData, header.indexOffsetLongs());
}
long relativePositionInIndex(long key, int fromIndex, int n) {
int low = 0;
int high = n - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
long midVal = indexData[fromIndex + mid];
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return mid;
}
return low;
}
}
}

View File

@ -93,7 +93,7 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
channel = file.getChannel(); channel = file.getChannel();
mappedSize = 0; mappedSize = 0;
logger.debug("Creating multimap file size = {} / buffer size = {}, mode = {}", logger.trace("Creating multimap file size = {} / buffer size = {}, mode = {}",
readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode); readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode);
} }
@ -149,7 +149,7 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
} }
public void force() { public void force() {
logger.debug("Forcing"); logger.trace("Forcing");
for (MappedByteBuffer buffer: mappedByteBuffers) { for (MappedByteBuffer buffer: mappedByteBuffers) {
buffer.force(); buffer.force();

View File

@ -125,8 +125,11 @@ public class EdgeIndexBucket {
else { else {
query = indexReader.findWord(block, budget, filter, orderedIncludes[0]); query = indexReader.findWord(block, budget, filter, orderedIncludes[0]);
} }
int i;
for (int i = 1; i < orderedIncludes.length; i++) { for (i = 1; (i < 2 && i < orderedIncludes.length) || i < orderedIncludes.length-1; i++) {
query = query.alsoCached(orderedIncludes[i]);
}
for (; i < orderedIncludes.length; i++) {
query = query.also(orderedIncludes[i]); query = query.also(orderedIncludes[i]);
} }
for (int term : searchTerms.excludes) { for (int term : searchTerms.excludes) {

View File

@ -5,6 +5,8 @@ import com.google.inject.name.Named;
import com.upserve.uppend.blobs.NativeIO; import com.upserve.uppend.blobs.NativeIO;
import io.reactivex.rxjava3.schedulers.Schedulers; import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.util.btree.BTreeReader; import nu.marginalia.util.btree.BTreeReader;
import nu.marginalia.util.btree.CachingBTreeReader;
import nu.marginalia.util.btree.model.BTreeHeader;
import nu.marginalia.util.multimap.MultimapFileLong; import nu.marginalia.util.multimap.MultimapFileLong;
import nu.marginalia.wmsa.edge.index.conversion.SearchIndexConverter; import nu.marginalia.wmsa.edge.index.conversion.SearchIndexConverter;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -13,6 +15,7 @@ import org.slf4j.LoggerFactory;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.util.Arrays;
import java.util.stream.LongStream; import java.util.stream.LongStream;
public class SearchIndex implements AutoCloseable { public class SearchIndex implements AutoCloseable {
@ -21,6 +24,8 @@ public class SearchIndex implements AutoCloseable {
private final IndexWordsTable words; private final IndexWordsTable words;
private final RandomAccessFile wordsFile; private final RandomAccessFile wordsFile;
private final BTreeReader bTreeReader; private final BTreeReader bTreeReader;
private final CachingBTreeReader cachingBTreeReader;
private final Logger logger; private final Logger logger;
@Inject @Inject
@ -40,6 +45,7 @@ public class SearchIndex implements AutoCloseable {
words = IndexWordsTable.ofFile(wordsFile); words = IndexWordsTable.ofFile(wordsFile);
bTreeReader = new BTreeReader(urls, SearchIndexConverter.urlsBTreeContext); bTreeReader = new BTreeReader(urls, SearchIndexConverter.urlsBTreeContext);
cachingBTreeReader = new CachingBTreeReader(urls, SearchIndexConverter.urlsBTreeContext);
Schedulers.io().scheduleDirect(() -> madvise(urls, bTreeReader)); Schedulers.io().scheduleDirect(() -> madvise(urls, bTreeReader));
} }
@ -64,27 +70,16 @@ public class SearchIndex implements AutoCloseable {
if (length < 0) return 0; if (length < 0) return 0;
if (length > 0) return length; if (length > 0) return length;
var range = rangeForWord(wordId); return rangeForWord(wordId).numEntries();
if (range.isPresent()) {
return bTreeReader.getHeader(range.dataOffset).numEntries();
}
return 0;
} }
public UrlIndexTree rangeForWord(int wordId) { public UrlIndexTree rangeForWord(int wordId) {
return new UrlIndexTree(words.positionForWord(wordId)); return new UrlIndexTree(words.positionForWord(wordId));
} }
public boolean hasUrl(long url, UrlIndexTree range) {
if (!range.isPresent())
return false;
return bTreeReader.findEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0;
}
public class UrlIndexTree { public class UrlIndexTree {
final long dataOffset; final long dataOffset;
private BTreeHeader header;
public UrlIndexTree(long dataOffset) { public UrlIndexTree(long dataOffset) {
this.dataOffset = dataOffset; this.dataOffset = dataOffset;
} }
@ -93,15 +88,65 @@ public class SearchIndex implements AutoCloseable {
if (dataOffset < 0) { if (dataOffset < 0) {
return LongStream.empty(); return LongStream.empty();
} }
var header = bTreeReader.getHeader(dataOffset); if (header == null) {
header = bTreeReader.getHeader(dataOffset);
}
long urlOffset = header.dataOffsetLongs(); long urlOffset = header.dataOffsetLongs();
return LongStream.range(urlOffset, urlOffset + header.numEntries()).map(urls::get); long endOffset = header.dataOffsetLongs() + header.numEntries();
int stepSize = Math.min(1024, header.numEntries());
long[] buffer = new long[stepSize];
return LongStream
.iterate(urlOffset, i -> i< endOffset, i->i+stepSize)
.flatMap(pos -> {
int sz = (int)(Math.min(pos+stepSize, endOffset) - pos);
urls.read(buffer, sz, pos);
return Arrays.stream(buffer, 0, sz);
});
} }
public boolean isPresent() { public boolean isPresent() {
return dataOffset >= 0; return dataOffset >= 0;
} }
public long numEntries() {
if (header != null) {
return header.numEntries();
}
else if (dataOffset < 0) return 0L;
else {
header = bTreeReader.getHeader(dataOffset);
return header.numEntries();
}
}
public boolean hasUrl(long url) {
if (header != null) {
return bTreeReader.findEntry(header, url) >= 0;
}
else if (dataOffset < 0) return false;
else {
header = bTreeReader.getHeader(dataOffset);
return bTreeReader.findEntry(header, url) >= 0;
}
}
public boolean hasUrl(CachingBTreeReader.Cache cache, long url) {
if (header != null) {
return cachingBTreeReader.findEntry(header, cache, url) >= 0;
}
else if (dataOffset < 0) return false;
else {
header = bTreeReader.getHeader(dataOffset);
return cachingBTreeReader.findEntry(header, cache, url) >= 0;
}
}
public CachingBTreeReader.Cache createIndexCache() {
return cachingBTreeReader.prepareCache();
}
} }

View File

@ -1,14 +1,11 @@
package nu.marginalia.wmsa.edge.index.reader; package nu.marginalia.wmsa.edge.index.reader;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.inject.Inject; import com.google.inject.Inject;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.wmsa.edge.index.model.IndexBlock; import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import nu.marginalia.wmsa.edge.index.reader.query.IndexQueryBuilder; import nu.marginalia.wmsa.edge.index.reader.query.IndexQueryBuilder;
import nu.marginalia.wmsa.edge.index.reader.query.IndexSearchBudget; import nu.marginalia.wmsa.edge.index.reader.query.IndexSearchBudget;
import nu.marginalia.wmsa.edge.index.reader.query.Query; import nu.marginalia.wmsa.edge.index.reader.query.Query;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -27,7 +24,6 @@ public class SearchIndexReader implements AutoCloseable {
private final EnumMap<IndexBlock, IndexQueryBuilder> underspecifiedQueryBuilders; private final EnumMap<IndexBlock, IndexQueryBuilder> underspecifiedQueryBuilders;
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final Cache<Pair<IndexBlock, Integer>, Long> numHitsCache = CacheBuilder.newBuilder().maximumSize(1000).build();
private static final IndexBlock[] indicesBySearchOrder = new IndexBlock[] { private static final IndexBlock[] indicesBySearchOrder = new IndexBlock[] {
IndexBlock.Top, IndexBlock.Top,
@ -131,26 +127,20 @@ public class SearchIndexReader implements AutoCloseable {
for (var idx : indices.values()) { for (var idx : indices.values()) {
idx.close(); idx.close();
} }
numHitsCache.invalidateAll();
numHitsCache.cleanUp();
} }
@SneakyThrows @SneakyThrows
public long numHits(IndexBlock block, int word) { public long numHits(IndexBlock block, int word) {
return numHitsCache.get(Pair.of(block, word), () -> numHitsForBlockWord(block, word));
}
private long numHitsForBlockWord(IndexBlock block, int word) {
IndexQueryBuilder builder = queryBuilders.get(block); IndexQueryBuilder builder = queryBuilders.get(block);
if (builder == null) if (builder == null)
return 0L; return 0L;
return builder long hits = 0;
.getIndicies() for (var index : builder.getIndicies()) {
.stream() hits += index.numUrls(word);
.mapToLong(idx -> idx.numUrls(word)) }
.sum(); return hits;
} }
public IndexBlock getBlockForResult(int searchTerm, long urlId) { public IndexBlock getBlockForResult(int searchTerm, long urlId) {
@ -163,7 +153,7 @@ public class SearchIndexReader implements AutoCloseable {
var range = index.rangeForWord(searchTerm); var range = index.rangeForWord(searchTerm);
if (index.hasUrl(urlId, range)) { if (range.hasUrl(urlId)) {
return block; return block;
} }
} }
@ -174,8 +164,8 @@ public class SearchIndexReader implements AutoCloseable {
final var index = indices.get(block); final var index = indices.get(block);
if (null == index) return false; if (null == index) return false;
final var range = index.rangeForWord(searchTerm); return index
.rangeForWord(searchTerm)
return index.hasUrl(urlId, range); .hasUrl(urlId);
} }
} }

View File

@ -6,7 +6,6 @@ import nu.marginalia.wmsa.edge.index.reader.SearchIndex;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.function.Function;
import java.util.function.LongPredicate; import java.util.function.LongPredicate;
import java.util.function.Supplier; import java.util.function.Supplier;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -51,13 +50,13 @@ public class IndexQueryBuilder {
var fstRange = requiredIndices.get(relevantIndices[0]).rangeForWord(wordId); var fstRange = requiredIndices.get(relevantIndices[0]).rangeForWord(wordId);
return new QueryForIndices(budget, () -> LongStream priorityStream = underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[0], wordId);
Streams.concat(IntStream.range(1, relevantIndices.length) for (int i = 1; i < relevantIndices.length; i++) {
.mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId)) priorityStream = Streams.concat(priorityStream, underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId));
.flatMapToLong(Function.identity()), }
fstRange.stream().takeWhile(budget::take)) LongStream stream = LongStream.concat(priorityStream, fstRange.stream().takeWhile(budget::take)).filter(filter);
.filter(filter)
); return new QueryForIndices(budget, () -> stream);
} }
private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) { private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) {
@ -77,10 +76,9 @@ public class IndexQueryBuilder {
} }
var sndRange = snd.rangeForWord(wordId); var sndRange = snd.rangeForWord(wordId);
var cache = sndRange.createIndexCache();
return fst.rangeForWord(wordId).stream().takeWhile(budget::take).limit(limit).filter( return fst.rangeForWord(wordId).stream().takeWhile(budget::take).limit(limit).filter(data -> sndRange.hasUrl(cache, data));
url -> snd.hasUrl(url, sndRange)
);
} }
@ -110,6 +108,12 @@ public class IndexQueryBuilder {
() -> requiredIndices.stream().flatMapToLong(idx -> alsoStream(idx, wordId))); () -> requiredIndices.stream().flatMapToLong(idx -> alsoStream(idx, wordId)));
} }
@Override
public Query alsoCached(int wordId) {
return new QueryForIndices(budget,
() -> requiredIndices.stream().flatMapToLong(idx -> alsoStreamCached(idx, wordId)));
}
@Override @Override
public Query not(int wordId) { public Query not(int wordId) {
// Happens when an index simply isn't present, won't find data anyway // Happens when an index simply isn't present, won't find data anyway
@ -123,12 +127,21 @@ public class IndexQueryBuilder {
private LongStream alsoStream(SearchIndex idx, int wordId) { private LongStream alsoStream(SearchIndex idx, int wordId) {
var range = idx.rangeForWord(wordId); var range = idx.rangeForWord(wordId);
return stream().filter(url -> idx.hasUrl(url, range)).takeWhile(budget::take); return stream().filter(range::hasUrl).takeWhile(budget::take);
}
private LongStream alsoStreamCached(SearchIndex idx, int wordId) {
var range = idx.rangeForWord(wordId);
var cache = range.createIndexCache();
return stream().filter(data -> range.hasUrl(cache, data)).takeWhile(budget::take);
} }
private LongStream notStream(int wordId) { private LongStream notStream(int wordId) {
var bodyRange = excludeIndex.rangeForWord(wordId); var bodyRange = excludeIndex.rangeForWord(wordId);
return stream().filter(url -> !excludeIndex.hasUrl(url, bodyRange)).takeWhile(budget::take); var cache = bodyRange.createIndexCache();
return stream().filter(url -> !bodyRange.hasUrl(cache, url)).takeWhile(budget::take);
} }
public LongStream stream() { public LongStream stream() {

View File

@ -7,6 +7,9 @@ public interface Query {
@Override @Override
public Query also(int wordId) { return this; } public Query also(int wordId) { return this; }
@Override
public Query alsoCached(int wordId) { return this; }
@Override @Override
public Query not(int wordId) { return this; } public Query not(int wordId) { return this; }
@ -15,6 +18,8 @@ public interface Query {
}; };
Query also(int wordId); Query also(int wordId);
Query alsoCached(int wordId);
Query not(int wordId); Query not(int wordId);
LongStream stream(); LongStream stream();

View File

@ -0,0 +1,335 @@
package nu.marginalia.util.btree;
import nu.marginalia.util.btree.model.BTreeContext;
import nu.marginalia.util.btree.model.BTreeHeader;
import nu.marginalia.util.multimap.MultimapFileLong;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.Set;
import java.util.StringJoiner;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
class BTreeWriterTestCachedReader {
final BTreeContext ctx = new BTreeContext(4, 2, 0xFFFF_FFFF_FFFF_FFFFL, 3);
final BTreeWriter writer = new BTreeWriter(null, ctx);
Logger logger = LoggerFactory.getLogger(getClass());
@Test
void testSmallDataBlock() {
var header = writer.makeHeader(1024, ctx.BLOCK_SIZE_WORDS()/2);
assertEquals(1024 + BTreeHeader.BTreeHeaderSizeLongs, header.dataOffsetLongs());
assertEquals(header.dataOffsetLongs(), header.indexOffsetLongs());
}
@Test
void testLayerCount() {
int wsq = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
assertEquals(2, writer.makeHeader(1024, wsq-1).layers());
assertEquals(2, writer.makeHeader(1024, wsq).layers());
assertEquals(3, writer.makeHeader(1024, wsq+1).layers());
assertEquals(3, writer.makeHeader(1024, wcub-1).layers());
assertEquals(3, writer.makeHeader(1024, wcub).layers());
assertEquals(4, writer.makeHeader(1024, wcub+1).layers());
}
@Test
void testLayerOffset() {
int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 0));
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 1));
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 2));
for (int i = 0; i < 1024; i++) {
var header = writer.makeHeader(0, i);
printTreeLayout(i, header, ctx);
if (header.layers() >= 1) {
assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS());
}
}
}
private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) {
StringJoiner sj = new StringJoiner(",");
for (int l = 0; l < header.layers(); l++) {
sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS());
}
System.out.println(numEntries + ":" + sj);
}
@Test
public void testWriteEntrySize2() throws IOException {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Integer> toPut = new HashSet<>();
for (int i = 0; i < 500; i++) {
while (!toPut.add((int)(Integer.MAX_VALUE * Math.random())));
}
int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray();
try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(2L*i, data[i]);
slice.put( 2L*i + 1, i);
}
});
mmf.force();
}
{
var reader = new CachingBTreeReader(mmf, ctx);
var cache = reader.prepareCache();
var header = reader.getHeader(0);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(header, cache, data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(i, mmf.get(offset+1));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
@Test
public void testWriteEntrySize2Small() throws IOException {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Integer> toPut = new HashSet<>();
for (int i = 0; i < 5; i++) {
while (!toPut.add((int)(Integer.MAX_VALUE * Math.random())));
}
int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray();
try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
{
var writer = new BTreeWriter(mmf, ctx);
writer.write( 0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(2L*i, data[i]);
slice.put(2L*i + 1, i);
}
});
mmf.force();
}
{
var reader = new CachingBTreeReader(mmf, ctx);
var cache = reader.prepareCache();
var header = reader.getHeader(0);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(header, cache, data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(i, mmf.get(offset+1));
}
for (int i = 0; i < 500; i++) {
long val = (long)(Long.MAX_VALUE * Math.random());
while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(header, cache, val));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
@Test
public void testWriteEqualityNotMasked() throws IOException {
for (int bs = 2; bs <= 4; bs++) {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Long> toPut = new HashSet<>();
var ctx = new BTreeContext(5, 1, ~0, bs);
for (int i = 0; i < 500; i++) {
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
}
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(i, data[i]);
}
});
mmf.force();
}
{
var reader = new CachingBTreeReader(mmf, ctx);
var cache = reader.prepareCache();
var header = reader.getHeader(0);
printTreeLayout(toPut.size(), header, ctx);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(header, cache, data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset));
}
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(header, cache, val));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
}
@Test
public void testWriteEqualityMasked() throws IOException {
for (int bs = 2; bs <= 4; bs++) {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Long> toPut = new HashSet<>();
long mask = 0xFFFF_FFFF_0000_0000L;
var ctx = new BTreeContext(5, 1, mask, bs);
for (int i = 0; i < 500; i++) {
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
}
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(i, data[i]);
}
});
mmf.force();
}
{
var reader = new CachingBTreeReader(mmf, ctx);
var cache = reader.prepareCache();
var header = reader.getHeader(0);
printTreeLayout(toPut.size(), header, ctx);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(header, cache,data[i] & mask);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset));
}
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(header, cache, val & mask));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
}
@Test
public void testWriteTwoEqualityMasked() throws IOException {
for (int bs = 2; bs <= 4; bs++) {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Long> toPut = new HashSet<>();
long mask = 0xFFFF_FFFF_0000_0000L;
var ctx = new BTreeContext(5, 2, mask, bs);
for (int i = 0; i < 500; i++) {
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
}
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(i*2L, data[i]);
slice.put(i*2L+1, i);
}
});
mmf.force();
}
{
var reader = new CachingBTreeReader(mmf, ctx);
var cache = reader.prepareCache();
var header = reader.getHeader(0);
printTreeLayout(toPut.size(), header, ctx);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(header, cache, data[i] & mask);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset));
assertEquals(i, mmf.get(offset+1));
}
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(header, cache,val & mask));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
}
}