diff --git a/marginalia_nu/build.gradle b/marginalia_nu/build.gradle index 94e240fe..304af5c8 100644 --- a/marginalia_nu/build.gradle +++ b/marginalia_nu/build.gradle @@ -36,6 +36,16 @@ sourceSets { resources.srcDir file('src/e2e/resources') } } + jmh { + java { + java { + compileClasspath += main.output + test.output + runtimeClasspath += main.output + test.output + srcDir file('src/jmh/java') + } + resources.srcDir file('src/jmh/resources') + } + } } java { @@ -43,7 +53,9 @@ java { languageVersion.set(JavaLanguageVersion.of(17)) } } - +jmhJar { + zip64 true +} dependencies { implementation project(':third_party') @@ -142,6 +154,9 @@ dependencies { implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.1.4' implementation 'org.seleniumhq.selenium:selenium-java:4.3.0' implementation 'org.sejda.imageio:webp-imageio:0.1.6' + + jmh 'org.openjdk.jmh:jmh-core:1.35' + jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.35' } configurations { diff --git a/marginalia_nu/src/jmh/java/nu/marginalia/ByteBufferBlockReadVsIndividualRead.java b/marginalia_nu/src/jmh/java/nu/marginalia/ByteBufferBlockReadVsIndividualRead.java new file mode 100644 index 00000000..097e1408 --- /dev/null +++ b/marginalia_nu/src/jmh/java/nu/marginalia/ByteBufferBlockReadVsIndividualRead.java @@ -0,0 +1,85 @@ +package nu.marginalia; + +import lombok.SneakyThrows; +import nu.marginalia.util.multimap.MultimapFileLong; +import org.openjdk.jmh.annotations.*; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +public class ByteBufferBlockReadVsIndividualRead { + + @State(Scope.Benchmark) + public static class ByteBufferState { + private MultimapFileLong mmf; + private Path file; + private static final int size = 800*1024*1024; + @Setup(Level.Iteration) + @SneakyThrows + public void setUp() { + file = Files.createTempFile("jmh", ".dat"); + mmf = MultimapFileLong.forOutput(file, size); + for (int i = 0; i < size; i++) { + mmf.put(i, i); + } + } + + @TearDown(Level.Iteration) + @SneakyThrows + public void tearDown() { + mmf.close(); + Files.delete(file); + } + + LongStream basicStream() { + return IntStream.range(0, size).mapToLong(mmf::get); + } + + LongStream blockStream(int blockSize) { + long urlOffset = 0; + long endOffset = size; + + long[] arry = new long[blockSize]; + + return LongStream + .iterate(urlOffset, i -> i< endOffset, i->i+blockSize) + .flatMap(pos -> { + int sz = (int)(Math.min(pos+blockSize, endOffset) - pos); + mmf.read(arry, sz, pos); + return Arrays.stream(arry, 0, sz); + }); + } + } + + + + // @Benchmark @BenchmarkMode(Mode.Throughput) + // @Fork(value = 1, warmups = 1) + // @Warmup(iterations = 1) + public long testBasic(ByteBufferState state) { + return state.basicStream().sum(); + } + + + @Benchmark @BenchmarkMode(Mode.Throughput) + @Fork(value = 1, warmups = 1) + @Warmup(iterations = 0) + public long testBlock128(ByteBufferState state) { + return state.blockStream(128).sum(); + } + @Benchmark @BenchmarkMode(Mode.Throughput) + @Fork(value = 1, warmups = 1) + @Warmup(iterations = 0) + public long testBlock1024(ByteBufferState state) { + return state.blockStream(1024).sum(); + } + @Benchmark @BenchmarkMode(Mode.Throughput) + @Fork(value = 1, warmups = 1) + @Warmup(iterations = 0) + public long testBlock8192(ByteBufferState state) { + return state.blockStream(8192).sum(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/CachingBTreeReader.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/CachingBTreeReader.java new file mode 100644 index 00000000..c3c79cd3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/CachingBTreeReader.java @@ -0,0 +1,111 @@ +package nu.marginalia.util.btree; + +import nu.marginalia.util.btree.model.BTreeContext; +import nu.marginalia.util.btree.model.BTreeHeader; +import nu.marginalia.util.multimap.MultimapFileLong; +import nu.marginalia.util.multimap.MultimapSearcher; + +import static java.lang.Math.min; + +public class CachingBTreeReader { + + private final MultimapFileLong file; + public final BTreeContext ctx; + + private final MultimapSearcher dataSearcher; + + public CachingBTreeReader(MultimapFileLong file, BTreeContext ctx) { + this.file = file; + this.dataSearcher = MultimapSearcher.forContext(file, ctx.equalityMask(), ctx.entrySize()); + + this.ctx = ctx; + } + + public BTreeHeader getHeader(long fileOffset) { + return new BTreeHeader(file.get(fileOffset), file.get(fileOffset+1), file.get(fileOffset+2)); + } + + public Cache prepareCache() { + return new Cache(); + } + /** + * + * @return file offset of entry matching keyRaw, negative if absent + */ + public long findEntry(BTreeHeader header, Cache cache, final long keyRaw) { + final int blockSize = ctx.BLOCK_SIZE_WORDS(); + + final long key = keyRaw & ctx.equalityMask(); + final long dataAddress = header.dataOffsetLongs(); + + final long searchStart; + final long numEntries; + + if (header.layers() == 0) { // For small data, there is no index block, only a flat data block + searchStart = dataAddress; + numEntries = header.numEntries(); + } + else { + cache.load(header); + + long dataLayerOffset = searchIndex(header, cache, key); + if (dataLayerOffset < 0) { + return dataLayerOffset; + } + + searchStart = dataAddress + dataLayerOffset * ctx.entrySize(); + numEntries = min(header.numEntries() - dataLayerOffset, blockSize); + } + + return dataSearcher.binarySearch(key, searchStart, numEntries); + } + + private long searchIndex(BTreeHeader header, Cache cache, long key) { + final int blockSize = ctx.BLOCK_SIZE_WORDS(); + long layerOffset = 0; + + for (int i = header.layers() - 1; i >= 0; --i) { + final long indexLayerBlockOffset = header.relativeIndexLayerOffset(ctx, i) + layerOffset; + + final long nextLayerOffset = cache.relativePositionInIndex(key, (int) indexLayerBlockOffset, blockSize); + if (nextLayerOffset < 0) + return nextLayerOffset; + + layerOffset = blockSize * (nextLayerOffset + layerOffset); + } + + return layerOffset; + } + + + public class Cache { + long[] indexData; + + public void load(BTreeHeader header) { + if (indexData != null) + return; + + int size = (int)(header.dataOffsetLongs() - header.indexOffsetLongs()); + indexData = new long[size]; + file.read(indexData, header.indexOffsetLongs()); + } + + long relativePositionInIndex(long key, int fromIndex, int n) { + int low = 0; + int high = n - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + long midVal = indexData[fromIndex + mid]; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return mid; + } + return low; + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java index 251b2c30..6423d18a 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java @@ -93,7 +93,7 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice { channel = file.getChannel(); mappedSize = 0; - logger.debug("Creating multimap file size = {} / buffer size = {}, mode = {}", + logger.trace("Creating multimap file size = {} / buffer size = {}, mode = {}", readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode); } @@ -149,7 +149,7 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice { } public void force() { - logger.debug("Forcing"); + logger.trace("Forcing"); for (MappedByteBuffer buffer: mappedByteBuffers) { buffer.force(); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexBucket.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexBucket.java index c883b513..84cca0a0 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexBucket.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexBucket.java @@ -125,8 +125,11 @@ public class EdgeIndexBucket { else { query = indexReader.findWord(block, budget, filter, orderedIncludes[0]); } - - for (int i = 1; i < orderedIncludes.length; i++) { + int i; + for (i = 1; (i < 2 && i < orderedIncludes.length) || i < orderedIncludes.length-1; i++) { + query = query.alsoCached(orderedIncludes[i]); + } + for (; i < orderedIncludes.length; i++) { query = query.also(orderedIncludes[i]); } for (int term : searchTerms.excludes) { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java index c2888f7a..80e45a07 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java @@ -5,6 +5,8 @@ import com.google.inject.name.Named; import com.upserve.uppend.blobs.NativeIO; import io.reactivex.rxjava3.schedulers.Schedulers; import nu.marginalia.util.btree.BTreeReader; +import nu.marginalia.util.btree.CachingBTreeReader; +import nu.marginalia.util.btree.model.BTreeHeader; import nu.marginalia.util.multimap.MultimapFileLong; import nu.marginalia.wmsa.edge.index.conversion.SearchIndexConverter; import org.slf4j.Logger; @@ -13,6 +15,7 @@ import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; +import java.util.Arrays; import java.util.stream.LongStream; public class SearchIndex implements AutoCloseable { @@ -21,6 +24,8 @@ public class SearchIndex implements AutoCloseable { private final IndexWordsTable words; private final RandomAccessFile wordsFile; private final BTreeReader bTreeReader; + private final CachingBTreeReader cachingBTreeReader; + private final Logger logger; @Inject @@ -40,6 +45,7 @@ public class SearchIndex implements AutoCloseable { words = IndexWordsTable.ofFile(wordsFile); bTreeReader = new BTreeReader(urls, SearchIndexConverter.urlsBTreeContext); + cachingBTreeReader = new CachingBTreeReader(urls, SearchIndexConverter.urlsBTreeContext); Schedulers.io().scheduleDirect(() -> madvise(urls, bTreeReader)); } @@ -64,27 +70,16 @@ public class SearchIndex implements AutoCloseable { if (length < 0) return 0; if (length > 0) return length; - var range = rangeForWord(wordId); - if (range.isPresent()) { - return bTreeReader.getHeader(range.dataOffset).numEntries(); - } - return 0; + return rangeForWord(wordId).numEntries(); } public UrlIndexTree rangeForWord(int wordId) { return new UrlIndexTree(words.positionForWord(wordId)); } - public boolean hasUrl(long url, UrlIndexTree range) { - if (!range.isPresent()) - return false; - - return bTreeReader.findEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0; - } - public class UrlIndexTree { final long dataOffset; - + private BTreeHeader header; public UrlIndexTree(long dataOffset) { this.dataOffset = dataOffset; } @@ -93,15 +88,65 @@ public class SearchIndex implements AutoCloseable { if (dataOffset < 0) { return LongStream.empty(); } - var header = bTreeReader.getHeader(dataOffset); + if (header == null) { + header = bTreeReader.getHeader(dataOffset); + } long urlOffset = header.dataOffsetLongs(); - return LongStream.range(urlOffset, urlOffset + header.numEntries()).map(urls::get); + long endOffset = header.dataOffsetLongs() + header.numEntries(); + int stepSize = Math.min(1024, header.numEntries()); + + long[] buffer = new long[stepSize]; + + return LongStream + .iterate(urlOffset, i -> i< endOffset, i->i+stepSize) + .flatMap(pos -> { + int sz = (int)(Math.min(pos+stepSize, endOffset) - pos); + urls.read(buffer, sz, pos); + return Arrays.stream(buffer, 0, sz); + }); } public boolean isPresent() { return dataOffset >= 0; } + + public long numEntries() { + if (header != null) { + return header.numEntries(); + } + else if (dataOffset < 0) return 0L; + else { + header = bTreeReader.getHeader(dataOffset); + return header.numEntries(); + } + } + + public boolean hasUrl(long url) { + if (header != null) { + return bTreeReader.findEntry(header, url) >= 0; + } + else if (dataOffset < 0) return false; + else { + header = bTreeReader.getHeader(dataOffset); + return bTreeReader.findEntry(header, url) >= 0; + } + } + + public boolean hasUrl(CachingBTreeReader.Cache cache, long url) { + if (header != null) { + return cachingBTreeReader.findEntry(header, cache, url) >= 0; + } + else if (dataOffset < 0) return false; + else { + header = bTreeReader.getHeader(dataOffset); + return cachingBTreeReader.findEntry(header, cache, url) >= 0; + } + } + + public CachingBTreeReader.Cache createIndexCache() { + return cachingBTreeReader.prepareCache(); + } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndexReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndexReader.java index 74bc8e18..65a4dafe 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndexReader.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndexReader.java @@ -1,14 +1,11 @@ package nu.marginalia.wmsa.edge.index.reader; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; import com.google.inject.Inject; import lombok.SneakyThrows; import nu.marginalia.wmsa.edge.index.model.IndexBlock; import nu.marginalia.wmsa.edge.index.reader.query.IndexQueryBuilder; import nu.marginalia.wmsa.edge.index.reader.query.IndexSearchBudget; import nu.marginalia.wmsa.edge.index.reader.query.Query; -import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,7 +24,6 @@ public class SearchIndexReader implements AutoCloseable { private final EnumMap underspecifiedQueryBuilders; private final Logger logger = LoggerFactory.getLogger(getClass()); - private final Cache, Long> numHitsCache = CacheBuilder.newBuilder().maximumSize(1000).build(); private static final IndexBlock[] indicesBySearchOrder = new IndexBlock[] { IndexBlock.Top, @@ -131,26 +127,20 @@ public class SearchIndexReader implements AutoCloseable { for (var idx : indices.values()) { idx.close(); } - numHitsCache.invalidateAll(); - numHitsCache.cleanUp(); } @SneakyThrows public long numHits(IndexBlock block, int word) { - return numHitsCache.get(Pair.of(block, word), () -> numHitsForBlockWord(block, word)); - } - - private long numHitsForBlockWord(IndexBlock block, int word) { IndexQueryBuilder builder = queryBuilders.get(block); if (builder == null) return 0L; - return builder - .getIndicies() - .stream() - .mapToLong(idx -> idx.numUrls(word)) - .sum(); + long hits = 0; + for (var index : builder.getIndicies()) { + hits += index.numUrls(word); + } + return hits; } public IndexBlock getBlockForResult(int searchTerm, long urlId) { @@ -163,7 +153,7 @@ public class SearchIndexReader implements AutoCloseable { var range = index.rangeForWord(searchTerm); - if (index.hasUrl(urlId, range)) { + if (range.hasUrl(urlId)) { return block; } } @@ -174,8 +164,8 @@ public class SearchIndexReader implements AutoCloseable { final var index = indices.get(block); if (null == index) return false; - final var range = index.rangeForWord(searchTerm); - - return index.hasUrl(urlId, range); + return index + .rangeForWord(searchTerm) + .hasUrl(urlId); } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/IndexQueryBuilder.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/IndexQueryBuilder.java index 82b4dbf3..1b27ddd0 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/IndexQueryBuilder.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/IndexQueryBuilder.java @@ -6,7 +6,6 @@ import nu.marginalia.wmsa.edge.index.reader.SearchIndex; import java.util.Collection; import java.util.List; import java.util.Objects; -import java.util.function.Function; import java.util.function.LongPredicate; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -51,13 +50,13 @@ public class IndexQueryBuilder { var fstRange = requiredIndices.get(relevantIndices[0]).rangeForWord(wordId); - return new QueryForIndices(budget, () -> - Streams.concat(IntStream.range(1, relevantIndices.length) - .mapToObj(i -> underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId)) - .flatMapToLong(Function.identity()), - fstRange.stream().takeWhile(budget::take)) - .filter(filter) - ); + LongStream priorityStream = underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[0], wordId); + for (int i = 1; i < relevantIndices.length; i++) { + priorityStream = Streams.concat(priorityStream, underspecifiedPairStream(budget, 1000, relevantIndices[0], relevantIndices[i], wordId)); + } + LongStream stream = LongStream.concat(priorityStream, fstRange.stream().takeWhile(budget::take)).filter(filter); + + return new QueryForIndices(budget, () -> stream); } private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) { @@ -77,10 +76,9 @@ public class IndexQueryBuilder { } var sndRange = snd.rangeForWord(wordId); + var cache = sndRange.createIndexCache(); - return fst.rangeForWord(wordId).stream().takeWhile(budget::take).limit(limit).filter( - url -> snd.hasUrl(url, sndRange) - ); + return fst.rangeForWord(wordId).stream().takeWhile(budget::take).limit(limit).filter(data -> sndRange.hasUrl(cache, data)); } @@ -110,6 +108,12 @@ public class IndexQueryBuilder { () -> requiredIndices.stream().flatMapToLong(idx -> alsoStream(idx, wordId))); } + @Override + public Query alsoCached(int wordId) { + return new QueryForIndices(budget, + () -> requiredIndices.stream().flatMapToLong(idx -> alsoStreamCached(idx, wordId))); + } + @Override public Query not(int wordId) { // Happens when an index simply isn't present, won't find data anyway @@ -123,12 +127,21 @@ public class IndexQueryBuilder { private LongStream alsoStream(SearchIndex idx, int wordId) { var range = idx.rangeForWord(wordId); - return stream().filter(url -> idx.hasUrl(url, range)).takeWhile(budget::take); + return stream().filter(range::hasUrl).takeWhile(budget::take); + } + + private LongStream alsoStreamCached(SearchIndex idx, int wordId) { + var range = idx.rangeForWord(wordId); + var cache = range.createIndexCache(); + + return stream().filter(data -> range.hasUrl(cache, data)).takeWhile(budget::take); } private LongStream notStream(int wordId) { var bodyRange = excludeIndex.rangeForWord(wordId); - return stream().filter(url -> !excludeIndex.hasUrl(url, bodyRange)).takeWhile(budget::take); + var cache = bodyRange.createIndexCache(); + + return stream().filter(url -> !bodyRange.hasUrl(cache, url)).takeWhile(budget::take); } public LongStream stream() { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/Query.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/Query.java index ac941b83..149be164 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/Query.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/query/Query.java @@ -7,6 +7,9 @@ public interface Query { @Override public Query also(int wordId) { return this; } + @Override + public Query alsoCached(int wordId) { return this; } + @Override public Query not(int wordId) { return this; } @@ -15,6 +18,8 @@ public interface Query { }; Query also(int wordId); + Query alsoCached(int wordId); + Query not(int wordId); LongStream stream(); diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTestCachedReader.java b/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTestCachedReader.java new file mode 100644 index 00000000..037055dc --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTestCachedReader.java @@ -0,0 +1,335 @@ +package nu.marginalia.util.btree; + +import nu.marginalia.util.btree.model.BTreeContext; +import nu.marginalia.util.btree.model.BTreeHeader; +import nu.marginalia.util.multimap.MultimapFileLong; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.Set; +import java.util.StringJoiner; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class BTreeWriterTestCachedReader { + + final BTreeContext ctx = new BTreeContext(4, 2, 0xFFFF_FFFF_FFFF_FFFFL, 3); + final BTreeWriter writer = new BTreeWriter(null, ctx); + + Logger logger = LoggerFactory.getLogger(getClass()); + @Test + void testSmallDataBlock() { + var header = writer.makeHeader(1024, ctx.BLOCK_SIZE_WORDS()/2); + assertEquals(1024 + BTreeHeader.BTreeHeaderSizeLongs, header.dataOffsetLongs()); + assertEquals(header.dataOffsetLongs(), header.indexOffsetLongs()); + } + + @Test + void testLayerCount() { + int wsq = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); + int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); + + assertEquals(2, writer.makeHeader(1024, wsq-1).layers()); + assertEquals(2, writer.makeHeader(1024, wsq).layers()); + assertEquals(3, writer.makeHeader(1024, wsq+1).layers()); + + assertEquals(3, writer.makeHeader(1024, wcub-1).layers()); + assertEquals(3, writer.makeHeader(1024, wcub).layers()); + assertEquals(4, writer.makeHeader(1024, wcub+1).layers()); + } + + @Test + void testLayerOffset() { + int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); + System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 0)); + System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 1)); + System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 2)); + + for (int i = 0; i < 1024; i++) { + var header = writer.makeHeader(0, i); + + + printTreeLayout(i, header, ctx); + + if (header.layers() >= 1) { + assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS()); + } + } + } + + private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) { + StringJoiner sj = new StringJoiner(","); + for (int l = 0; l < header.layers(); l++) { + sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS()); + } + System.out.println(numEntries + ":" + sj); + } + + @Test + public void testWriteEntrySize2() throws IOException { + + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((int)(Integer.MAX_VALUE * Math.random()))); + } + + int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray(); + + try { + RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000); + + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (slice) -> { + for (int i = 0; i < data.length; i++) { + slice.put(2L*i, data[i]); + slice.put( 2L*i + 1, i); + } + }); + mmf.force(); + } + + { + var reader = new CachingBTreeReader(mmf, ctx); + var cache = reader.prepareCache(); + var header = reader.getHeader(0); + for (int i = 0; i < data.length; i++) { + long offset = reader.findEntry(header, cache, data[i]); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(i, mmf.get(offset+1)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + + @Test + public void testWriteEntrySize2Small() throws IOException { + + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + for (int i = 0; i < 5; i++) { + while (!toPut.add((int)(Integer.MAX_VALUE * Math.random()))); + } + + int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray(); + + try { + RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000); + + { + var writer = new BTreeWriter(mmf, ctx); + writer.write( 0, toPut.size(), (slice) -> { + for (int i = 0; i < data.length; i++) { + slice.put(2L*i, data[i]); + slice.put(2L*i + 1, i); + } + }); + mmf.force(); + } + + { + var reader = new CachingBTreeReader(mmf, ctx); + var cache = reader.prepareCache(); + var header = reader.getHeader(0); + for (int i = 0; i < data.length; i++) { + long offset = reader.findEntry(header, cache, data[i]); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(i, mmf.get(offset+1)); + } + + for (int i = 0; i < 500; i++) { + long val = (long)(Long.MAX_VALUE * Math.random()); + while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.findEntry(header, cache, val)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + + + @Test + public void testWriteEqualityNotMasked() throws IOException { + for (int bs = 2; bs <= 4; bs++) { + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + var ctx = new BTreeContext(5, 1, ~0, bs); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ; + } + + long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray(); + + try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) { + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (slice) -> { + for (int i = 0; i < data.length; i++) { + slice.put(i, data[i]); + } + }); + mmf.force(); + } + + { + var reader = new CachingBTreeReader(mmf, ctx); + var cache = reader.prepareCache(); + var header = reader.getHeader(0); + + printTreeLayout(toPut.size(), header, ctx); + + for (int i = 0; i < data.length; i++) { + long offset = reader.findEntry(header, cache, data[i]); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(data[i], mmf.get(offset)); + } + + for (int i = 0; i < 500; i++) { + long val = (long) (Long.MAX_VALUE * Math.random()); + while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.findEntry(header, cache, val)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + } + + @Test + public void testWriteEqualityMasked() throws IOException { + + for (int bs = 2; bs <= 4; bs++) { + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + long mask = 0xFFFF_FFFF_0000_0000L; + var ctx = new BTreeContext(5, 1, mask, bs); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ; + } + + long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray(); + + try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) { + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (slice) -> { + for (int i = 0; i < data.length; i++) { + slice.put(i, data[i]); + } + }); + mmf.force(); + } + + { + var reader = new CachingBTreeReader(mmf, ctx); + var cache = reader.prepareCache(); + var header = reader.getHeader(0); + + printTreeLayout(toPut.size(), header, ctx); + + for (int i = 0; i < data.length; i++) { + long offset = reader.findEntry(header, cache,data[i] & mask); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(data[i], mmf.get(offset)); + } + + for (int i = 0; i < 500; i++) { + long val = (long) (Long.MAX_VALUE * Math.random()); + while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.findEntry(header, cache, val & mask)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + } + + @Test + public void testWriteTwoEqualityMasked() throws IOException { + + for (int bs = 2; bs <= 4; bs++) { + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + long mask = 0xFFFF_FFFF_0000_0000L; + var ctx = new BTreeContext(5, 2, mask, bs); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ; + } + + long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray(); + + try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) { + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (slice) -> { + for (int i = 0; i < data.length; i++) { + slice.put(i*2L, data[i]); + slice.put(i*2L+1, i); + } + }); + mmf.force(); + } + + { + var reader = new CachingBTreeReader(mmf, ctx); + var cache = reader.prepareCache(); + var header = reader.getHeader(0); + + printTreeLayout(toPut.size(), header, ctx); + + for (int i = 0; i < data.length; i++) { + long offset = reader.findEntry(header, cache, data[i] & mask); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(data[i], mmf.get(offset)); + assertEquals(i, mmf.get(offset+1)); + } + + for (int i = 0; i < 500; i++) { + long val = (long) (Long.MAX_VALUE * Math.random()); + while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.findEntry(header, cache,val & mask)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + } + + + +} \ No newline at end of file