Cleaning up the BTree library a bit.

This commit is contained in:
Viktor Lofgren 2023-03-05 11:27:56 +01:00
parent 4464055715
commit cf00963e57
8 changed files with 111 additions and 52 deletions

View File

@ -1,9 +1,16 @@
package nu.marginalia.index.reverse;
import nu.marginalia.btree.model.BTreeBlockSize;
import nu.marginalia.btree.model.BTreeContext;
class ReverseIndexParameters {
public static final int ENTRY_SIZE = 2;
public static final BTreeContext bTreeContext = new BTreeContext(5, ENTRY_SIZE, 8);
// This is the byte size per index page on disk, the data pages are twice as large due to ENTRY_SIZE = 2.
//
// Given a hardware limit of 4k reads, 2k block size should be optimal.
public static final BTreeBlockSize blockSize = BTreeBlockSize.BS_2048;
public static final BTreeContext bTreeContext = new BTreeContext(5, ENTRY_SIZE, blockSize);
}

View File

@ -14,7 +14,7 @@ read with a [BTreeReader](src/main/java/nu/marginalia/btree/BTreeReader.java).
BTreeContext ctx = new BTreeContext(
4, // num layers max
1, // entry size
8); // block size bits, in practice this should be 8
BTreeBlockSize.BS_4096); // page size
// Allocate a memory area to work in, see the array library for how to do this with files
LongArray array = LongArray.allocate(8192);

View File

@ -22,8 +22,8 @@ class BTreeDogEar {
if (header.numEntries() > 3) {
var sentinelSlice = base.range(
(long) header.numEntries() * ctx.entrySize() - 3,
(long) header.numEntries() * ctx.entrySize());
(long) header.numEntries() * ctx.entrySize - 3,
(long) header.numEntries() * ctx.entrySize);
sentinelSlice.set(0, 4L);
sentinelSlice.set(1, 5L);
sentinelSlice.set(2, 1L);

View File

@ -23,7 +23,7 @@ public class BTreeReader {
this.ctx = ctx;
this.header = readHeader(file, offset);
dataBlockEnd = (long) ctx.entrySize() * header.numEntries();
dataBlockEnd = (long) ctx.entrySize * header.numEntries();
index = file.range(header.indexOffsetLongs(), header.dataOffsetLongs());
data = file.range(header.dataOffsetLongs(), header.dataOffsetLongs() + dataBlockEnd);
@ -153,7 +153,7 @@ public class BTreeReader {
long searchStart = 0;
for (int i = 0; i < keys.length; i++) {
long key = keys[i];
searchStart = data.binarySearchN(ctx.entrySize(), key, searchStart, data.size);
searchStart = data.binarySearchN(ctx.entrySize, key, searchStart, data.size);
if (searchStart < 0) {
searchStart = LongArraySearch.decodeSearchMiss(searchStart);
}
@ -218,11 +218,11 @@ public class BTreeReader {
final long searchStart = layerOffsets[layer] + offset;
final long nextLayerOffset = (int) index.binarySearchUpperBound(key, searchStart, searchStart + ctx.blockSizeWords()) - searchStart;
final long nextLayerOffset = (int) index.binarySearchUpperBound(key, searchStart, searchStart + ctx.pageSize()) - searchStart;
layer --;
boundary = index.get(searchStart + nextLayerOffset);
offset = ctx.blockSizeWords() * (offset + nextLayerOffset);
offset = ctx.pageSize() * (offset + nextLayerOffset);
return true;
}
@ -249,17 +249,17 @@ public class BTreeReader {
throw new IllegalStateException("Looking for data in an index layer");
}
long searchStart = offset * ctx.entrySize();
long remainingTotal = dataBlockEnd - offset * ctx.entrySize();
long searchStart = offset * ctx.entrySize;
long remainingTotal = dataBlockEnd - offset * ctx.entrySize;
long remainingBlock;
remainingBlock = (layerOffsets.length == 0)
? remainingTotal
: (long) ctx.blockSizeWords() * ctx.entrySize();
: (long) ctx.pageSize() * ctx.entrySize;
long searchEnd = searchStart + (int) min(remainingTotal, remainingBlock);
return data.binarySearchN(ctx.entrySize(), key, searchStart, searchEnd);
return data.binarySearchN(ctx.entrySize, key, searchStart, searchEnd);
}
public void retainData(LongQueryBuffer buffer) {
@ -269,15 +269,15 @@ public class BTreeReader {
buffer.retainAndAdvance();
if (buffer.hasMore() && buffer.currentValue() <= boundary) {
long blockBase = offset * ctx.entrySize();
long blockBase = offset * ctx.entrySize;
long relOffset = dataOffset - blockBase;
long remainingTotal = dataBlockEnd - dataOffset;
long remainingBlock = ctx.blockSizeWords() - relOffset;
long remainingBlock = ctx.pageSize() - relOffset;
long searchEnd = dataOffset + (int) min(remainingTotal, remainingBlock);
data.range(dataOffset, searchEnd).retainN(buffer, ctx.entrySize(), boundary);
data.range(dataOffset, searchEnd).retainN(buffer, ctx.entrySize, boundary);
}
}
else {
@ -293,15 +293,15 @@ public class BTreeReader {
buffer.rejectAndAdvance();
if (buffer.hasMore() && buffer.currentValue() <= boundary) {
long blockBase = offset * ctx.entrySize();
long blockBase = offset * ctx.entrySize;
long relOffset = dataOffset - blockBase;
long remainingTotal = dataBlockEnd - dataOffset;
long remainingBlock = ctx.blockSizeWords() - relOffset;
long remainingBlock = ctx.pageSize() - relOffset;
long searchEnd = dataOffset + (int) min(remainingTotal, remainingBlock);
data.range(dataOffset, searchEnd).rejectN(buffer, ctx.entrySize(), boundary);
data.range(dataOffset, searchEnd).rejectN(buffer, ctx.entrySize, boundary);
}
}
else {

View File

@ -41,7 +41,7 @@ public class BTreeWriter {
writeHeader(header, map, offset);
final long startRange = header.dataOffsetLongs();
final long endRange = startRange + (long) numEntries * ctx.entrySize();
final long endRange = startRange + (long) numEntries * ctx.entrySize;
var slice = map.range(startRange, endRange);
@ -97,7 +97,7 @@ public class BTreeWriter {
* a sorted list, there needs to be padding between the header and the index
* in order to get aligned blocks
*/
padding = (int) (ctx.blockSizeWords() - ((offset + BTreeHeader.BTreeHeaderSizeLongs) % ctx.blockSizeWords()));
padding = (int) (ctx.pageSize() - ((offset + BTreeHeader.BTreeHeaderSizeLongs) % ctx.pageSize()));
}
return padding;
}
@ -110,7 +110,7 @@ public class BTreeWriter {
private void writeIndex(BTreeHeader header) {
var layerOffsets = header.getRelativeLayerOffsets(ctx);
long indexedDataStepSize = ctx.blockSizeWords();
long indexedDataStepSize = ctx.pageSize();
/* Index layer 0 indexes the data itself
Index layer 1 indexes layer 0
@ -118,7 +118,7 @@ public class BTreeWriter {
And so on
*/
for (int layer = 0; layer < header.layers(); layer++,
indexedDataStepSize*=ctx.blockSizeWords()) {
indexedDataStepSize*=ctx.pageSize()) {
writeIndexLayer(header, layerOffsets, indexedDataStepSize, layer);
}
@ -134,7 +134,7 @@ public class BTreeWriter {
final long dataOffsetBase = header.dataOffsetLongs();
final long dataEntriesMax = header.numEntries();
final int entrySize = ctx.entrySize();
final int entrySize = ctx.entrySize;
final long lastDataEntryOffset = indexedDataStepSize - 1;
@ -153,8 +153,8 @@ public class BTreeWriter {
final long trailerStart = indexOffsetBase + indexWord;
final long trailerEnd = trailerStart
+ ctx.blockSizeWords()
- (int) (indexWord % ctx.blockSizeWords());
+ ctx.pageSize()
- (int) (indexWord % ctx.pageSize());
if (trailerStart < trailerEnd) {
map.fill(trailerStart, trailerEnd, Long.MAX_VALUE);

View File

@ -0,0 +1,36 @@
package nu.marginalia.btree.model;
public enum BTreeBlockSize {
BS_16(1),
BS_32(2),
BS_64(3),
BS_128(4),
BS_245(5),
BS_512(6),
BS_1024(7),
BS_2048(8),
BS_4096(9),
BS_8192(10);
// blockSizeBits can be viewed as the number of logical branches at each index layer.
// where 1 is the world's most over-engineered binary tree. In practice you want as many
// as possible while staying below the page size limit.
//
// The formula for converting between the blockSizeBits as used in BTreeContext, and
// the byte size on disk is (1<<blockSizeBits) * sizeof(long) [ * entrySize ]
final int blockSizeBits;
BTreeBlockSize(int blockSizeBits) {
this.blockSizeBits = blockSizeBits;
}
public static BTreeBlockSize fromBitCount(int blockSizeBits) {
for (var size : values()) {
if (size.blockSizeBits == blockSizeBits)
return size;
}
throw new IllegalArgumentException();
}
}

View File

@ -2,26 +2,37 @@ package nu.marginalia.btree.model;
import nu.marginalia.btree.BTreeWriter;
/**
*
* @param maxLayers The maximum number of index layers
* @param entrySize The entry size, for size 1 the key is the data. For sizes larger than 1,
* the data will be expected to sit in the successive position to the key
* in the data layer
* @param blockSizeBits Bits per data block
* @param blockSizeWords Words per data block
*/
public record BTreeContext(int maxLayers, int entrySize, int blockSizeBits, int blockSizeWords) {
/** Specifies the parameters of a BTree. */
public class BTreeContext {
public final int maxLayers;
public final int entrySize;
private final int blockSizeBits;
private final int pageSize;
// Below this number of data pages, a b-tree will not be constructed.
//
// 8 pages is the breaking point where using a B-tree is actually advantageous
// over just binary searching in a sorted list. Above 8 pages, binary search will
// worst-case four page faults. A b-tree will incur three page faults up until
// ~100k-200k entries with typical configurations.
private static final int MIN_PAGES_FOR_BTREE = 8;
public BTreeContext(int maxLayers, int entrySize, int blockSizeBits) {
this(maxLayers, entrySize, blockSizeBits, 1 << blockSizeBits);
/**
* @param maxLayers The maximum number of index layers
* @param entrySize The entry size, for size 1 the key is the data. For sizes larger than 1,
* the data will be expected to sit in the successive position to the key
* in the data layer
* @param blockSize Specifies the size of each index layer. The data layers' size will be entrySize times
* the blockSize. For on-disk BTrees ideal is anywhere below 4096b data size.
* When testing the BTree you probably want as small a value as you can get away
* with to reduce the need for RAM.
*
*/
public BTreeContext(int maxLayers, int entrySize, BTreeBlockSize blockSize) {
this.maxLayers = maxLayers;
this.entrySize = entrySize;
this.blockSizeBits = blockSize.blockSizeBits;
this.pageSize = 1 << blockSizeBits;
}
public long calculateSize(int numEntries) {
@ -31,7 +42,7 @@ public record BTreeContext(int maxLayers, int entrySize, int blockSizeBits, int
}
public int numIndexLayers(int numEntries) {
if (numEntries <= blockSizeWords *MIN_PAGES_FOR_BTREE/entrySize) {
if (entrySize * numEntries <= pageSize * MIN_PAGES_FOR_BTREE) {
return 0;
}
for (int i = 1; i < maxLayers; i++) {
@ -50,7 +61,11 @@ public record BTreeContext(int maxLayers, int entrySize, int blockSizeBits, int
public long indexLayerSize(int numWords, int level) {
final long layerSize = 1L<<(blockSizeBits *(level+1));
return blockSizeWords * (numWords / layerSize + Long.signum(numWords % layerSize));
return pageSize * (numWords / layerSize + Long.signum(numWords % layerSize));
}
public int pageSize() {
return pageSize;
}
}

View File

@ -1,6 +1,7 @@
package nu.marginalia.btree;
import nu.marginalia.array.LongArray;
import nu.marginalia.btree.model.BTreeBlockSize;
import nu.marginalia.btree.model.BTreeContext;
import nu.marginalia.btree.model.BTreeHeader;
import org.junit.jupiter.api.Test;
@ -23,21 +24,21 @@ class BTreeWriterTest {
@Test
void testSmallDataBlock() {
BTreeContext ctx = new BTreeContext(4, 2, 3);
BTreeContext ctx = new BTreeContext(4, 2, BTreeBlockSize.BS_64);
BTreeWriter writer = new BTreeWriter(null, ctx);
var header = writer.makeHeader(1024, ctx.blockSizeWords()/2);
var header = writer.makeHeader(1024, ctx.pageSize()/2);
assertEquals(1024 + BTreeHeader.BTreeHeaderSizeLongs, header.dataOffsetLongs());
assertEquals(header.dataOffsetLongs(), header.indexOffsetLongs());
}
@Test
void testLayerCount() {
BTreeContext ctx = new BTreeContext(4, 2, 3);
BTreeContext ctx = new BTreeContext(4, 2, BTreeBlockSize.BS_64);
BTreeWriter writer = new BTreeWriter(null, ctx);
int wsq = ctx.blockSizeWords()*ctx.blockSizeWords();
int wcub = ctx.blockSizeWords()*ctx.blockSizeWords()*ctx.blockSizeWords();
int wsq = ctx.pageSize()*ctx.pageSize();
int wcub = ctx.pageSize()*ctx.pageSize()*ctx.pageSize();
assertEquals(2, writer.makeHeader(1024, wsq-1).layers());
assertEquals(2, writer.makeHeader(1024, wsq).layers());
@ -50,10 +51,10 @@ class BTreeWriterTest {
@Test
void testLayerOffset() {
BTreeContext ctx = new BTreeContext(4, 2, 3);
BTreeContext ctx = new BTreeContext(4, 2, BTreeBlockSize.BS_64);
BTreeWriter writer = new BTreeWriter(null, ctx);
int wcub = ctx.blockSizeWords()*ctx.blockSizeWords()*ctx.blockSizeWords();
int wcub = ctx.pageSize()*ctx.pageSize()*ctx.pageSize();
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 0));
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 1));
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 2));
@ -65,7 +66,7 @@ class BTreeWriterTest {
printTreeLayout(i, header, ctx);
if (header.layers() >= 1) {
assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.blockSizeWords());
assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.pageSize());
}
}
}
@ -73,14 +74,14 @@ class BTreeWriterTest {
private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) {
StringJoiner sj = new StringJoiner(",");
for (int l = 0; l < header.layers(); l++) {
sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.blockSizeWords());
sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.pageSize());
}
System.out.println(numEntries + ":" + sj);
}
@Test
public void testWriteEntrySize2() throws IOException {
BTreeContext ctx = new BTreeContext(4, 2, 3);
BTreeContext ctx = new BTreeContext(4, 2, BTreeBlockSize.BS_64);
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
@ -107,7 +108,7 @@ class BTreeWriterTest {
@Test
public void testWriteEntrySize2Small() throws IOException {
BTreeContext ctx = new BTreeContext(4, 2, 3);
BTreeContext ctx = new BTreeContext(4, 2, BTreeBlockSize.BS_64);
int[] data = generateItems32(5);
Set<Integer> items = IntStream.of(data).boxed().collect(Collectors.toSet());
@ -135,7 +136,7 @@ class BTreeWriterTest {
@Test
public void testWriteEqualityNotMasked() throws IOException {
for (int bs = 2; bs <= 4; bs++) {
var ctx = new BTreeContext(5, 1, bs);
var ctx = new BTreeContext(5, 1, BTreeBlockSize.fromBitCount(bs));
long[] data = generateItems64(500);
Set<Long> items = LongStream.of(data).boxed().collect(Collectors.toSet());