(array) Fix spurious search results

This was caused by a bug in the binary search algorithm causing it to sometimes return positive values when encoding a search miss.

It was also necessary to get rid of the vestiges of the old LongArray and IntArray classes to make this fix doable.
This commit is contained in:
Viktor Lofgren 2023-10-26 15:27:02 +02:00
parent a497e4c920
commit f613f4f2df
32 changed files with 438 additions and 1652 deletions

View File

@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.Executors;
import static nu.marginalia.index.forward.ForwardIndexParameters.*;
@ -52,6 +53,9 @@ public class ForwardIndexReader {
data = loadData(dataFile);
}
public void selfTest() {
}
private static TLongIntHashMap loadIds(Path idsFile) throws IOException {
try (var idsArray = LongArrayFactory.mmapForReadingShared(idsFile)) {
assert idsArray.size() < Integer.MAX_VALUE;

View File

@ -53,67 +53,18 @@ public class ReverseIndexReader {
logger.info("Running self test program");
long wordsDataSize = wordsBTreeReader.getHeader().numEntries() * 2L;
var wordsDataRange = words.range(wordsDataOffset, wordsDataOffset + wordsDataSize);
if (!wordsDataRange.isSortedN(2, 0, wordsDataSize))
logger.error("Failed test 1: Words data is not sorted");
else
logger.info("Passed test 1");
boolean failed2 = false;
for (long i = 1; i < wordsDataRange.size(); i+=2) {
var docsBTreeReader = new BTreeReader(this.documents, ReverseIndexParameters.docsBTreeContext, wordsDataRange.get(i));
var header = docsBTreeReader.getHeader();
var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
if (!docRange.isSortedN(2, 0, header.numEntries() * 2L)) {
logger.error("Failed test 2: numEntries={}, offset={}", header.numEntries(), header.dataOffsetLongs());
failed2 = true;
break;
}
}
if (!failed2)
logger.info("Passed test 2");
boolean failed3 = false;
for (long i = 0; i < wordsDataRange.size(); i+=2) {
if (wordOffset(wordsDataRange.get(i)) < 0) {
failed3 = true;
logger.error("Failed test 3");
if (wordsBTreeReader.findEntry(wordsDataRange.get(i)) < 0) {
logger.error("Scenario A");
}
else {
logger.error("Scenario B");
}
break;
}
}
if (!failed3) {
logger.info("Passed test 3");
}
boolean failed4 = false;
outer:
for (long i = 1; i < wordsDataRange.size(); i+=2) {
var docsBTreeReader = new BTreeReader(this.documents, ReverseIndexParameters.docsBTreeContext, wordsDataRange.get(i));
var header = docsBTreeReader.getHeader();
var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
for (int j = 0; j < docRange.size(); j+=2) {
if (docsBTreeReader.findEntry(docRange.get(j)) < 0) {
logger.info("Failed test 4");
break outer;
}
}
}
if (!failed4) {
logger.info("Passed test 4");
}
// ReverseIndexSelfTest.runSelfTest1(wordsDataRange, wordsDataSize);
// ReverseIndexSelfTest.runSelfTest2(wordsDataRange, documents);
// ReverseIndexSelfTest.runSelfTest3(wordsDataRange, wordsBTreeReader);
// ReverseIndexSelfTest.runSelfTest4(wordsDataRange, documents);
ReverseIndexSelfTest.runSelfTest5(wordsDataRange, wordsBTreeReader);
ReverseIndexSelfTest.runSelfTest6(wordsDataRange, documents);
}
private long wordOffset(long wordId) {
long wordOffset(long wordId) {
long idx = wordsBTreeReader.findEntry(wordId);
if (idx < 0)

View File

@ -0,0 +1,111 @@
package nu.marginalia.index;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import nu.marginalia.array.LongArray;
import nu.marginalia.btree.BTreeReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
public class ReverseIndexSelfTest {
private static final Logger logger = LoggerFactory.getLogger(ReverseIndexSelfTest.class);
public static void runSelfTest1(LongArray wordsDataRange, long wordsDataSize) {
logger.info("Starting test 1");
if (!wordsDataRange.isSortedN(2, 0, wordsDataSize))
logger.error("Failed test 1: Words data is not sorted");
else
logger.info("Passed test 1");
}
public static void runSelfTest2(LongArray wordsDataRange, LongArray documents) {
logger.info("Starting test 2");
for (long i = 1; i < wordsDataRange.size(); i+=2) {
var docsBTreeReader = new BTreeReader(documents, ReverseIndexParameters.docsBTreeContext, wordsDataRange.get(i));
var header = docsBTreeReader.getHeader();
var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
if (!docRange.isSortedN(2, 0, header.numEntries() * 2L)) {
logger.error("Failed test 2: numEntries={}, offset={}", header.numEntries(), header.dataOffsetLongs());
return;
}
}
logger.info("Passed test 2");
}
public static void runSelfTest3(LongArray wordsDataRange, BTreeReader reader) {
logger.info("Starting test 3");
for (long i = 0; i < wordsDataRange.size(); i+=2) {
if (reader.findEntry(wordsDataRange.get(i)) < 0) {
logger.error("Failed Test 3");
return;
}
}
logger.info("Passed test 3");
}
public static void runSelfTest4(LongArray wordsDataRange, LongArray documents) {
logger.info("Starting test 4");
for (long i = 1; i < wordsDataRange.size(); i+=2) {
var docsBTreeReader = new BTreeReader(documents, ReverseIndexParameters.docsBTreeContext, wordsDataRange.get(i));
var header = docsBTreeReader.getHeader();
var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
for (int j = 0; j < docRange.size(); j+=2) {
if (docsBTreeReader.findEntry(docRange.get(j)) < 0) {
logger.info("Failed test 4");
return;
}
}
}
logger.info("Passed test 4");
}
public static void runSelfTest5(LongArray wordsDataRange, BTreeReader wordsBTreeReader) {
logger.info("Starting test 5");
LongOpenHashSet words = new LongOpenHashSet((int)wordsDataRange.size()/2);
for (int i = 0; i < wordsDataRange.size(); i+=2) {
words.add(wordsDataRange.get(i));
}
var random = new Random();
for (int i = 0; i < 100_000_000; i++) {
long v;
do {
v = random.nextLong();
} while (words.contains(v));
if (wordsBTreeReader.findEntry(v) >= 0) {
logger.error("Failed test 5 @ W{}", v);
return;
}
}
logger.info("Passed test 5");
}
public static void runSelfTest6(LongArray wordsDataRange, LongArray documents) {
logger.info("Starting test 6");
for (long i = 1; i < wordsDataRange.size(); i+=2) {
var docsBTreeReader = new BTreeReader(documents, ReverseIndexParameters.docsBTreeContext, wordsDataRange.get(i));
var header = docsBTreeReader.getHeader();
var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
Long prev = null;
for (int j = 0; j < docRange.size(); j+=2) {
if (prev == null) {
prev = docRange.get(j);
continue;
}
long thisVal = prev + 1;
long nextVal = docRange.get(j);
while (thisVal < nextVal) {
if (docsBTreeReader.findEntry(thisVal) >= 0) {
logger.info("Failed test 6 @ W{}:D{}", wordsDataRange.get(i-1), thisVal);
return;
}
thisVal++;
}
}
}
logger.info("Passed test 6");
}
}

View File

@ -0,0 +1,47 @@
package nu.marginalia.index;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.btree.BTreeReader;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
public class ReverseIndexDebugTest {
@Test
public void debug() throws IOException {
long problemWord = -7909917549851025932L;
long problemDoc = 9079256848846028801L;
var words = LongArrayFactory.mmapForReadingConfined(Path.of("/home/vlofgren/Code/MarginaliaSearch/run/node-1/index/ir/rev-words.dat"));
var documents = LongArrayFactory.mmapForReadingConfined(Path.of("/home/vlofgren/Code/MarginaliaSearch/run/node-1/index/ir/rev-docs.dat"));
var wordsBTreeReader = new BTreeReader(words, ReverseIndexParameters.wordsBTreeContext, 0);
var wordsDataOffset = wordsBTreeReader.getHeader().dataOffsetLongs();
long wordOffset = wordsBTreeReader.findEntry(problemWord);
assertTrue(wordOffset >= 0);
var docsReader = new BTreeReader(documents, ReverseIndexParameters.docsBTreeContext, wordOffset);
// We find problemDoc even though it doesn't exist in the document range
long docOffset = docsReader.findEntry(problemDoc);
assertTrue(docOffset < 0);
// We know it doesn't exist because when we check, we can't find it,
// either by iterating...
var dataRange = docsReader.data();
System.out.println(dataRange.size());
for (int i = 0; i < dataRange.size(); i+=2) {
assertNotEquals(problemDoc, dataRange.get(i));
}
// or by binary searching
assertTrue(dataRange.binarySearchN(2, problemDoc, 0, dataRange.size()) < 0);
}
}

View File

@ -17,7 +17,6 @@ public class FoldBenchmark {
array.transformEach(0, size, (pos,old) -> ~pos);
array2.transformEach(0, size, (pos,old) -> ~pos);
array3.transformEach(0, size, (pos,old) -> ~pos);
pagingArray.transformEach(0, size, (pos,old) -> ~pos);
simulateNaiveApproach.transformEach(0, size, (pos,old) -> ~pos);
}
@ -26,17 +25,7 @@ public class FoldBenchmark {
LongArray array = LongArray.allocate(size);
LongArray array2 = SegmentLongArray.onHeap(Arena.ofShared(), size);
LongArray array3 = SegmentLongArray.onHeap(Arena.ofConfined(), size);
LongArray pagingArray;
LongArray simulateNaiveApproach = new SimulatedNaiveArray(size, pageSize);
{
// Artificially create an unnaturally small PagingLongArray to compare with SimulatedNaiveArray above
LongArrayPage[] pages = new LongArrayPage[size / pageSize];
for (int i = 0; i < pages.length; i++) {
pages[i] = LongArrayPage.onHeap(pageSize);
}
pagingArray = new PagingLongArray(ArrayPartitioningScheme.forPartitionSize(pageSize), pages, size);
}
}
@Fork(value = 1, warmups = 1)
@ -73,12 +62,4 @@ public class FoldBenchmark {
return state.simulateNaiveApproach.fold(0, 0, state.size, Long::sum);
}
@Fork(value = 1, warmups = 1)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long benchArrayPaged(BenchState state) {
return state.pagingArray.fold(0, 0, state.size, Long::sum);
}
}

View File

@ -18,7 +18,6 @@ public class QuicksortBenchmark {
array.transformEach(0, size, (pos,old) -> ~pos);
array2.transformEach(0, size, (pos,old) -> ~pos);
array3.transformEach(0, size, (pos,old) -> ~pos);
pagingArray.transformEach(0, size, (pos,old) -> ~pos);
simulateNaiveApproach.transformEach(0, size, (pos,old) -> ~pos);
}
@ -27,17 +26,7 @@ public class QuicksortBenchmark {
LongArray array = LongArray.allocate(size);
LongArray array2 = SegmentLongArray.onHeap(Arena.ofShared(), size);
LongArray array3 = SegmentLongArray.onHeap(Arena.ofConfined(), size);
LongArray pagingArray;
LongArray simulateNaiveApproach = new SimulatedNaiveArray(size, pageSize);
{
// Artificially create an unnaturally small PagingLongArray to compare with SimulatedNaiveArray above
LongArrayPage[] pages = new LongArrayPage[size / pageSize];
for (int i = 0; i < pages.length; i++) {
pages[i] = LongArrayPage.onHeap(pageSize);
}
pagingArray = new PagingLongArray(ArrayPartitioningScheme.forPartitionSize(pageSize), pages, size);
}
}
@Fork(value = 1, warmups = 1)
@ -94,18 +83,4 @@ public class QuicksortBenchmark {
return array;
}
@Fork(value = 1, warmups = 1)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray benchArrayPaged(BenchState state) {
var array = state.pagingArray;
for (int i = 0; i + 100 < state.size; i+=100) {
array.quickSort(i, i + 100);
}
return array;
}
}

View File

@ -6,11 +6,13 @@ import nu.marginalia.array.algo.IntArraySearch;
import nu.marginalia.array.algo.IntArraySort;
import nu.marginalia.array.algo.IntArrayTransformations;
import nu.marginalia.array.delegate.ShiftedIntArray;
import nu.marginalia.array.page.IntArrayPage;
import nu.marginalia.array.page.PagingIntArray;
import nu.marginalia.array.delegate.ShiftedLongArray;
import nu.marginalia.array.page.SegmentIntArray;
import nu.marginalia.array.page.SegmentLongArray;
import nu.marginalia.array.scheme.ArrayPartitioningScheme;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.nio.file.Files;
import java.nio.file.Path;
@ -23,35 +25,13 @@ public interface IntArray extends IntArrayBase, IntArrayTransformations, IntArra
int MAX_CONTINUOUS_SIZE = Integer.MAX_VALUE/WORD_SIZE - 16;
static IntArray allocate(long size) {
if (size < MAX_CONTINUOUS_SIZE) {
return IntArrayPage.onHeap((int) size);
}
return PagingIntArray.newOnHeap(DEFAULT_PARTITIONING_SCHEME, size);
return SegmentIntArray.onHeap(Arena.ofShared(), size);
}
static IntArray mmapRead(Path path) throws IOException {
long sizeBytes = Files.size(path);
if (sizeBytes < MAX_CONTINUOUS_SIZE) {
return IntArrayPage.fromMmapReadOnly(path, 0, (int) sizeBytes / 4);
}
return PagingIntArray.mapFileReadOnly(DEFAULT_PARTITIONING_SCHEME, path);
}
static IntArray mmapForWriting(Path path) throws IOException {
return PagingIntArray.mapFileReadWrite(DEFAULT_PARTITIONING_SCHEME, path);
}
static IntArray mmapForWriting(Path path, long size) throws IOException {
return PagingIntArray.mapFileReadWrite(DEFAULT_PARTITIONING_SCHEME, path, size);
}
default ShiftedIntArray shifted(long offset) {
default IntArray shifted(long offset) {
return new ShiftedIntArray(offset, this);
}
default ShiftedIntArray range(long start, long end) {
default IntArray range(long start, long end) {
return new ShiftedIntArray(start, end, this);
}

View File

@ -16,7 +16,7 @@ public interface IntArraySearch extends IntArrayBase {
if (val > key) break;
}
return LongArraySearch.encodeSearchMiss(pos - 1);
return LongArraySearch.encodeSearchMiss(1, pos - 1);
}
default long binarySearch(int key, long fromIndex, long toIndex) {

View File

@ -16,7 +16,7 @@ public interface LongArraySearch extends LongArrayBase {
if (val > key) break;
}
return encodeSearchMiss(pos - 1);
return encodeSearchMiss(1, pos - 1);
}
default long linearSearchUpperBound(long key, long fromIndex, long toIndex) {
@ -35,10 +35,10 @@ public interface LongArraySearch extends LongArrayBase {
long val = get(pos);
if (val == key) return pos;
if (val > key) return encodeSearchMiss(pos);
if (val > key) return encodeSearchMiss(sz, pos);
}
return encodeSearchMiss(toIndex - sz);
return encodeSearchMiss(sz, toIndex - sz);
}
default long binarySearch(long key, long fromIndex, long toIndex) {
@ -80,10 +80,10 @@ public interface LongArraySearch extends LongArrayBase {
long val = get(fromIndex);
if (val == key) return fromIndex;
if (val > key) return encodeSearchMiss(fromIndex);
if (val > key) return encodeSearchMiss(sz, fromIndex);
}
return encodeSearchMiss(toIndex - sz);
return encodeSearchMiss(sz, toIndex - sz);
}
@ -253,11 +253,11 @@ public interface LongArraySearch extends LongArrayBase {
}
static long encodeSearchMiss(long value) {
return -1 - value;
static long encodeSearchMiss(int entrySize, long value) {
return -entrySize - Math.max(0, value);
}
static long decodeSearchMiss(long value) {
return -value - 1;
static long decodeSearchMiss(int entrySize, long value) {
return -value - entrySize;
}
}

View File

@ -199,37 +199,37 @@ public class ShiftedLongArray implements LongArray {
@Override
public long linearSearch(long key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
return translateSearchResult(1, delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
}
@Override
public long linearSearchN(int sz, long key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
return translateSearchResult(sz, delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
}
@Override
public long binarySearch(long key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.binarySearch(key, fromIndex + shift, toIndex+shift));
return translateSearchResult(1, delegate.binarySearch(key, fromIndex + shift, toIndex+shift));
}
@Override
public long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift));
return translateSearchResult(sz, delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift));
}
@Override
public long binarySearchUpperBound(long key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.binarySearchUpperBound(key, fromIndex + shift, toIndex+shift));
return translateSearchResult(1, delegate.binarySearchUpperBound(key, fromIndex + shift, toIndex+shift));
}
@Override
public long linearSearchUpperBound(long key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.linearSearchUpperBound(key, fromIndex + shift, toIndex+shift));
return translateSearchResult(1, delegate.linearSearchUpperBound(key, fromIndex + shift, toIndex+shift));
}
@Override
public long binarySearchUpperBoundN(int sz, long key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.binarySearchUpperBoundN(sz, key, fromIndex + shift, toIndex+shift));
return translateSearchResult(sz, delegate.binarySearchUpperBoundN(sz, key, fromIndex + shift, toIndex+shift));
}
private long translateSearchResult(long delegatedIdx) {
private long translateSearchResult(int sz, long delegatedIdx) {
long ret;
if (delegatedIdx >= 0) ret = delegatedIdx - shift;
else ret = LongArraySearch.encodeSearchMiss(Math.max(0, LongArraySearch.decodeSearchMiss(delegatedIdx) - shift));
else ret = LongArraySearch.encodeSearchMiss(sz, Math.max(0, LongArraySearch.decodeSearchMiss(sz, delegatedIdx) - shift));
return ret;
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.array.functor;
import nu.marginalia.array.functional.AddressRangeCall;
import nu.marginalia.array.functional.IntBinaryOperation;
import nu.marginalia.array.page.IntArrayPage;
import java.io.IOException;
public class IntFolder implements AddressRangeCall<IntArrayPage> {
public int acc;
private final IntBinaryOperation operator;
public IntFolder(int zero, IntBinaryOperation operator) {
this.acc = zero;
this.operator = operator;
}
public void apply(IntArrayPage array, int start, int end) {
acc = array.fold(acc, start, end, operator);
}
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.array.functor;
import nu.marginalia.array.functional.AddressRangeCallIO;
import nu.marginalia.array.functional.IntBinaryIOOperation;
import nu.marginalia.array.page.IntArrayPage;
import java.io.IOException;
public class IntIOFolder implements AddressRangeCallIO<IntArrayPage> {
public int acc;
private final IntBinaryIOOperation operator;
public IntIOFolder(int zero, IntBinaryIOOperation operator) {
this.acc = zero;
this.operator = operator;
}
public void apply(IntArrayPage array, int start, int end) throws IOException {
acc = array.foldIO(acc, start, end, operator);
}
}

View File

@ -1,19 +0,0 @@
package nu.marginalia.array.functor;
import nu.marginalia.array.functional.AddressRangeCall;
import nu.marginalia.array.functional.LongBinaryOperation;
import nu.marginalia.array.page.LongArrayPage;
public class LongFolder implements AddressRangeCall<LongArrayPage> {
public long acc;
private final LongBinaryOperation operator;
public LongFolder(long zero, LongBinaryOperation operator) {
this.acc = zero;
this.operator = operator;
}
public void apply(LongArrayPage array, int start, int end) {
acc = array.fold(acc, start, end, operator);
}
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.array.functor;
import nu.marginalia.array.functional.AddressRangeCallIO;
import nu.marginalia.array.functional.LongBinaryIOOperation;
import nu.marginalia.array.page.LongArrayPage;
import java.io.IOException;
public class LongIOFolder implements AddressRangeCallIO<LongArrayPage> {
public long acc;
private final LongBinaryIOOperation operator;
public LongIOFolder(long zero, LongBinaryIOOperation operator) {
this.acc = zero;
this.operator = operator;
}
public void apply(LongArrayPage array, int start, int end) throws IOException {
acc = array.foldIO(acc, start, end, operator);
}
}

View File

@ -61,9 +61,9 @@ public class AbstractPagingArray<T extends BulkTransferArray<B>, B> {
if (ret >= 0) {
return partitioningScheme.toRealIndex(page, (int) ret);
} else {
ret = decodeSearchMiss(ret);
ret = decodeSearchMiss(1, ret);
ret = partitioningScheme.toRealIndex(page, (int) ret);
return encodeSearchMiss(ret);
return encodeSearchMiss(1, ret);
}
}

View File

@ -1,126 +0,0 @@
package nu.marginalia.array.page;
import com.upserve.uppend.blobs.NativeIO;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.IntArray;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class IntArrayPage implements PartitionPage, IntArray {
final IntBuffer intBuffer;
final ByteBuffer byteBuffer;
private IntArrayPage(ByteBuffer byteBuffer) {
this.byteBuffer = byteBuffer;
this.intBuffer = byteBuffer.asIntBuffer();
}
public static IntArrayPage onHeap(int size) {
return new IntArrayPage(ByteBuffer.allocateDirect(WORD_SIZE*size));
}
public static IntArrayPage fromMmapReadOnly(Path file, long offset, int size) throws IOException {
return new IntArrayPage(mmapFile(file, offset, size, FileChannel.MapMode.READ_ONLY, StandardOpenOption.READ));
}
public static IntArrayPage fromMmapReadWrite(Path file, long offset, int size) throws IOException {
return new IntArrayPage(mmapFile(file, offset, size, FileChannel.MapMode.READ_WRITE, StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE));
}
private static ByteBuffer mmapFile(Path file, long offset, int size, FileChannel.MapMode mode, OpenOption... openOptions) throws IOException {
try (var channel = (FileChannel) Files.newByteChannel(file, openOptions)) {
return channel.map(mode, WORD_SIZE*offset, (long) size*WORD_SIZE);
}
catch (IOException ex) {
throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex);
}
}
@Override
public int get(long at) {
return intBuffer.get((int) at);
}
@Override
public void get(long start, long end, int[] buffer) {
intBuffer.get((int) start, buffer, 0, (int) (end - start));
}
@Override
public void set(long at, int val) {
intBuffer.put((int) at, val);
}
@Override
public void set(long start, long end, IntBuffer buffer, int bufferStart) {
intBuffer.put((int) start, buffer, bufferStart, (int) (end-start));
}
@Override
public long size() {
return intBuffer.capacity();
}
public void increment(int at) {
set(at, get(at) + 1);
}
@Override
public ByteBuffer getByteBuffer() {
return byteBuffer;
}
@Override
public void write(Path filename) throws IOException {
try (var channel = (FileChannel) Files.newByteChannel(filename, StandardOpenOption.WRITE, StandardOpenOption.CREATE)) {
write(channel);
}
}
@Override
public ArrayRangeReference<IntArray> directRangeIfPossible(long start, long end) {
return new ArrayRangeReference<>(this, start, end);
}
@Override
public void force() {
if (byteBuffer instanceof MappedByteBuffer mb) {
mb.force();
}
}
@Override
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
int index = (int) (arrayStart * WORD_SIZE);
int length = (int) ((arrayEnd - arrayStart) * WORD_SIZE);
var slice = byteBuffer.slice(index, length);
long startPos = sourceStart * WORD_SIZE;
while (slice.position() < slice.capacity()) {
source.read(slice, startPos + slice.position());
}
}
@Override
public void advice(NativeIO.Advice advice) throws IOException {
NativeIO.madvise((MappedByteBuffer) byteBuffer, advice);
}
@Override
public void advice(NativeIO.Advice advice, long start, long end) throws IOException {
NativeIO.madviseRange((MappedByteBuffer) byteBuffer, advice, (int) start, (int) (end-start));
}
}

View File

@ -1,166 +0,0 @@
package nu.marginalia.array.page;
import com.upserve.uppend.blobs.NativeIO;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.LongArray;
import javax.annotation.Nullable;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.nio.ByteBuffer;
import java.nio.LongBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class LongArrayPage implements PartitionPage, LongArray {
final LongBuffer longBuffer;
@Nullable
private final Arena arena;
final ByteBuffer byteBuffer;
private boolean closed;
LongArrayPage(ByteBuffer byteBuffer,
@Nullable Arena arena) {
this.byteBuffer = byteBuffer;
this.longBuffer = byteBuffer.asLongBuffer();
this.arena = arena;
}
public static LongArrayPage onHeap(int size) {
var arena = Arena.ofShared();
return new LongArrayPage(arena.allocate((long) WORD_SIZE*size, 8).asByteBuffer(), arena);
}
public static LongArrayPage fromMmapReadOnly(Path file, long offset, int size) throws IOException {
var arena = Arena.ofShared();
return new LongArrayPage(
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_ONLY, StandardOpenOption.READ),
arena);
}
public static LongArrayPage fromMmapReadWrite(Path file, long offset, int size) throws IOException {
var arena = Arena.ofShared();
return new LongArrayPage(
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_WRITE,
StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE),
arena);
}
private static ByteBuffer mmapFile(Arena arena,
Path file,
long offset,
int size,
FileChannel.MapMode mode,
OpenOption... openOptions) throws IOException
{
try (var channel = (FileChannel) Files.newByteChannel(file, openOptions)) {
return channel.map(mode,
WORD_SIZE*offset,
(long) size*WORD_SIZE,
arena)
.asByteBuffer();
}
catch (IOException ex) {
throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex);
}
}
@Override
public long get(long at) {
try {
return longBuffer.get((int) at);
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("@" + at + "(" + 0 + ":" + longBuffer.capacity() + ")");
}
}
@Override
public void get(long start, long end, long[] buffer) {
longBuffer.get((int) start, buffer, 0, (int) (end - start));
}
@Override
public void set(long at, long val) {
longBuffer.put((int) at, val);
}
@Override
public void set(long start, long end, LongBuffer buffer, int bufferStart) {
longBuffer.put((int) start, buffer, bufferStart, (int) (end-start));
}
@Override
public synchronized void close() {
if (arena != null) {
if (!closed) {
arena.close();
closed = true;
}
}
}
@Override
public long size() {
return longBuffer.capacity();
}
public void increment(int at) {
set(at, get(at) + 1);
}
@Override
public ByteBuffer getByteBuffer() {
return byteBuffer;
}
@Override
public void write(Path filename) throws IOException {
try (var channel = (FileChannel) Files.newByteChannel(filename, StandardOpenOption.WRITE, StandardOpenOption.CREATE)) {
write(channel);
}
}
@Override
public void force() {
if (byteBuffer instanceof MappedByteBuffer mb) {
mb.force();
}
}
public ArrayRangeReference<LongArray> directRangeIfPossible(long start, long end) {
return new ArrayRangeReference<>(this, start, end);
}
@Override
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
int index = (int) (arrayStart * WORD_SIZE);
int length = (int) ((arrayEnd - arrayStart) * WORD_SIZE);
var slice = byteBuffer.slice(index, length);
long startPos = sourceStart * WORD_SIZE;
while (slice.position() < slice.capacity()) {
source.read(slice, startPos + slice.position());
}
}
@Override
public void advice(NativeIO.Advice advice) throws IOException {
NativeIO.madvise((MappedByteBuffer) byteBuffer, advice);
}
@Override
public void advice(NativeIO.Advice advice, long start, long end) throws IOException {
NativeIO.madviseRange((MappedByteBuffer) byteBuffer, advice, (int) start, (int) (end-start));
}
}

View File

@ -1,438 +0,0 @@
package nu.marginalia.array.page;
import com.upserve.uppend.blobs.NativeIO;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.algo.SortingContext;
import nu.marginalia.array.buffer.IntQueryBuffer;
import nu.marginalia.array.delegate.ReferenceImplIntArrayDelegate;
import nu.marginalia.array.delegate.ShiftedIntArray;
import nu.marginalia.array.functional.*;
import nu.marginalia.array.functor.IntFolder;
import nu.marginalia.array.functor.IntIOFolder;
import nu.marginalia.array.scheme.ArrayPartitioningScheme;
import java.io.IOException;
import java.nio.IntBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class PagingIntArray extends AbstractPagingArray<IntArrayPage, IntBuffer> implements IntArray {
private final ReferenceImplIntArrayDelegate defaults;
private PagingIntArray(ArrayPartitioningScheme partitioningScheme,
IntArrayPage[] pages,
long size) {
super(partitioningScheme, pages, size);
defaults = new ReferenceImplIntArrayDelegate(this);
}
public static IntArray newOnHeap(ArrayPartitioningScheme partitioningScheme, long cardinality) {
if (cardinality < MAX_CONTINUOUS_SIZE) {
return IntArrayPage.onHeap((int) cardinality);
}
return newPartitionedOnHeap(partitioningScheme, cardinality);
}
public static IntArray newPartitionedOnHeap(ArrayPartitioningScheme partitioningScheme, long cardinality) {
IntArrayPage[] pages = new IntArrayPage[partitioningScheme.getPartitions(cardinality)];
for (int i = 0; i < pages.length; i++) {
pages[i] = IntArrayPage.onHeap(partitioningScheme.getRequiredPageSize(i, cardinality));
}
return new PagingIntArray(partitioningScheme, pages, cardinality);
}
public static PagingIntArray mapFileReadOnly(ArrayPartitioningScheme partitioningScheme, Path file)
throws IOException
{
long sizeBytes = Files.size(file);
assert sizeBytes % WORD_SIZE == 0;
long size = sizeBytes / WORD_SIZE;
IntArrayPage[] pages = new IntArrayPage[partitioningScheme.getPartitions(size)];
long offset = 0;
for (int i = 0; i < pages.length; i++) {
int partitionSize = partitioningScheme.getRequiredPageSize(i, size);
pages[i] = IntArrayPage.fromMmapReadOnly(file, offset, partitionSize);
offset += partitionSize;
}
return new PagingIntArray(partitioningScheme, pages, size);
}
public static PagingIntArray mapFileReadWrite(ArrayPartitioningScheme partitioningScheme, Path file)
throws IOException
{
long sizeBytes = Files.size(file);
assert sizeBytes % LongArrayPage.WORD_SIZE == 0;
long size = sizeBytes / LongArrayPage.WORD_SIZE;
IntArrayPage[] pages = new IntArrayPage[partitioningScheme.getPartitions(size)];
long offset = 0;
for (int i = 0; i < pages.length; i++) {
int partitionSize = partitioningScheme.getRequiredPageSize(i, size);
pages[i] = IntArrayPage.fromMmapReadWrite(file, offset, partitionSize);
offset += partitionSize;
}
return new PagingIntArray(partitioningScheme, pages, size);
}
public static PagingIntArray mapFileReadWrite(ArrayPartitioningScheme partitioningScheme, Path file, long size)
throws IOException
{
IntArrayPage[] pages = new IntArrayPage[partitioningScheme.getPartitions(size)];
long offset = 0;
for (int i = 0; i < pages.length; i++) {
int partitionSize = partitioningScheme.getRequiredPageSize(i, size);
pages[i] = IntArrayPage.fromMmapReadWrite(file, offset, partitionSize);
offset += partitionSize;
}
return new PagingIntArray(partitioningScheme, pages, size);
}
public int get(long pos) {
int page = partitioningScheme.getPage(pos);
int offset = partitioningScheme.getOffset(pos);
try {
return pages[page].get(partitioningScheme.getOffset(pos));
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("Index out of bounds for " + pos + " => (" + page + ":" + offset + ")");
}
}
@Override
public int getAndIncrement(long pos) {
return pages[partitioningScheme.getPage(pos)].getAndIncrement(partitioningScheme.getOffset(pos));
}
@Override
public void get(long start, long end, int[] buffer) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
pages[partitioningScheme.getPage(start)].get(sOff, eOff, buffer);
}
else {
defaults.get(start, end, buffer);
}
}
@Override
public void set(long pos, int value) {
int page = partitioningScheme.getPage(pos);
int offset = partitioningScheme.getOffset(pos);
try {
pages[page].set(offset, value);
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("Index out of bounds for " + pos + " => (" + page + ":" + offset + ")");
}
}
@Override
public long size() {
return size;
}
@Override
public void increment(long pos) {
int page = partitioningScheme.getPage(pos);
int offset = partitioningScheme.getOffset(pos);
try {
pages[page].increment(partitioningScheme.getOffset(pos));
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("Index out of bounds for " + pos + " => (" + page + ":" + offset + ")");
}
}
@Override
public void forEach(long start, long end, LongIntConsumer consumer) {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
var p = pages[page++];
for (long i = sOff; i < eOff; i++) {
consumer.accept(i + (pos - sOff), p.get(i));
}
}
}
@Override
public void fill(long fromIndex, long toIndex, int value) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
pages[partitioningScheme.getPage(fromIndex)].fill(sOff, eOff, value);
}
else if (toIndex >= fromIndex) {
delegateToEachPage(fromIndex, toIndex, (page, s, e) -> page.fill(s, e, value));
}
}
@Override
public void transformEach(long start, long end, IntTransformer transformer) {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
var p = pages[page++];
for (long i = sOff; i < eOff; i++) {
p.set(i, transformer.transform(i + (pos - sOff), p.get(i)));
}
}
}
@Override
public void transformEachIO(long start, long end, IntIOTransformer transformer) throws IOException {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
var p = pages[page++];
for (long i = sOff; i < eOff; i++) {
p.set(i, transformer.transform(i + (pos - sOff), p.get(i)));
}
}
}
@Override
public int foldIO(int zero, long start, long end, IntBinaryIOOperation operator) throws IOException {
var folder = new IntIOFolder(zero, operator);
delegateToEachPageIO(start, end, folder);
return folder.acc;
}
@Override
public int fold(int zero, long start, long end, IntBinaryOperation operator) {
var folder = new IntFolder(zero, operator);
delegateToEachPage(start, end, folder);
return folder.acc;
}
@Override
public long linearSearch(int key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].linearSearch(key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.linearSearch(key, fromIndex, toIndex);
}
}
@Override
public long binarySearch(int key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].binarySearch(key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.binarySearch(key, fromIndex, toIndex);
}
}
@Override
public long binarySearchUpperBound(int key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].binarySearchUpperBound(key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.binarySearchUpperBound(key, fromIndex, toIndex);
}
}
@Override
public void retain(IntQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
if (partitioningScheme.isSamePage(searchStart, searchEnd)) {
int sOff = partitioningScheme.getOffset(searchStart);
int eOff = partitioningScheme.getEndOffset(searchStart, searchEnd);
if (eOff > sOff) {
pages[partitioningScheme.getPage(searchStart)].retain(buffer, boundary, sOff, eOff);
}
}
else {
defaults.retain(buffer, boundary, searchStart, searchEnd);
}
}
@Override
public void reject(IntQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
if (partitioningScheme.isSamePage(searchStart, searchEnd)) {
int sOff = partitioningScheme.getOffset(searchStart);
int eOff = partitioningScheme.getEndOffset(searchStart, searchEnd);
if (eOff > sOff) {
pages[partitioningScheme.getPage(searchStart)].reject(buffer, boundary, sOff, eOff);
}
}
else {
defaults.reject(buffer, boundary, searchStart, searchEnd);
}
}
public void sortLargeSpan(SortingContext ctx, long start, long end) throws IOException {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].sortLargeSpan(ctx, sOff, eOff);
}
}
else {
defaults.sortLargeSpan(ctx, start, end);
}
}
public void write(Path fileName) throws IOException {
try (var channel = (FileChannel) Files.newByteChannel(fileName, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
for (int i = 0; i < pages.length; i++) {
pages[i].write(channel);
}
channel.force(false);
}
}
public long getSize() {
if (size < 0) {
throw new UnsupportedOperationException();
}
return size;
}
public ShiftedIntArray range(long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
return pages[partitioningScheme.getPage(start)]
.range(partitioningScheme.getOffset(start),
partitioningScheme.getEndOffset(start, end)
);
}
else {
return new ShiftedIntArray(start, end, this);
}
}
@Override
public ArrayRangeReference<IntArray> directRangeIfPossible(long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
return new ArrayRangeReference<>(
pages[partitioningScheme.getPage(start)],
partitioningScheme.getOffset(start),
partitioningScheme.getEndOffset(start, end));
}
else {
return new ArrayRangeReference<>(this, start, end);
}
}
@Override
public void force() {
for (var page : pages) {
page.force();
}
}
@Override
public void advice(NativeIO.Advice advice) throws IOException {
for (var page : pages) {
page.advice(advice);
}
}
@Override
public void advice(NativeIO.Advice advice, long start, long end) throws IOException {
delegateToEachPageIO(start, end, (a,s,e) -> a.advice(advice, s, e));
}
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
assert arrayEnd >= arrayStart;
int page = partitioningScheme.getPage(arrayStart);
long endPos;
for (long pos = arrayStart; pos < arrayEnd; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, arrayEnd);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(pos, endPos);
pages[page++].transferFrom(source, sourceStart, sOff, eOff);
sourceStart+=(endPos - pos);
}
}
}

View File

@ -1,619 +0,0 @@
package nu.marginalia.array.page;
import com.upserve.uppend.blobs.NativeIO;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.algo.SortingContext;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.delegate.ReferenceImplLongArrayDelegate;
import nu.marginalia.array.delegate.ShiftedLongArray;
import nu.marginalia.array.functional.LongBinaryIOOperation;
import nu.marginalia.array.functional.LongIOTransformer;
import nu.marginalia.array.functional.LongLongConsumer;
import nu.marginalia.array.functional.LongTransformer;
import nu.marginalia.array.functor.LongIOFolder;
import nu.marginalia.array.scheme.ArrayPartitioningScheme;
import java.io.IOException;
import java.nio.LongBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class PagingLongArray extends AbstractPagingArray<LongArrayPage, LongBuffer> implements LongArray {
private final ReferenceImplLongArrayDelegate defaults;
PagingLongArray(ArrayPartitioningScheme partitioningScheme, LongArrayPage[] pages, long size) {
super(partitioningScheme, pages, size);
defaults = new ReferenceImplLongArrayDelegate(this);
}
public static LongArray newOnHeap(ArrayPartitioningScheme partitioningScheme, long cardinality) {
return newPartitionedOnHeap(partitioningScheme, cardinality);
}
public static LongArray newPartitionedOnHeap(ArrayPartitioningScheme partitioningScheme, long cardinality) {
LongArrayPage[] pages = new LongArrayPage[partitioningScheme.getPartitions(cardinality)];
for (int i = 0; i < pages.length; i++) {
pages[i] = LongArrayPage.onHeap(partitioningScheme.getRequiredPageSize(i, cardinality));
}
return new PagingLongArray(partitioningScheme, pages, cardinality);
}
public static PagingLongArray mapFileReadOnly(ArrayPartitioningScheme partitioningScheme, Path file)
throws IOException
{
long sizeBytes = Files.size(file);
assert sizeBytes % WORD_SIZE == 0;
long size = sizeBytes / WORD_SIZE;
LongArrayPage[] pages = new LongArrayPage[partitioningScheme.getPartitions(size)];
long offset = 0;
for (int i = 0; i < pages.length; i++) {
int partitionSize = partitioningScheme.getRequiredPageSize(i, size);
pages[i] = LongArrayPage.fromMmapReadOnly(file, offset, partitionSize);
offset += partitionSize;
}
return new PagingLongArray(partitioningScheme, pages, size);
}
public static PagingLongArray mapFileReadWrite(ArrayPartitioningScheme partitioningScheme, Path file, long size)
throws IOException
{
LongArrayPage[] pages = new LongArrayPage[partitioningScheme.getPartitions(size)];
long offset = 0;
for (int i = 0; i < pages.length; i++) {
int partitionSize = partitioningScheme.getRequiredPageSize(i, size);
pages[i] = LongArrayPage.fromMmapReadWrite(file, offset, partitionSize);
offset += partitionSize;
}
return new PagingLongArray(partitioningScheme, pages, size);
}
@Override
public long get(long pos) {
int page = partitioningScheme.getPage(pos);
int offset = partitioningScheme.getOffset(pos);
try {
return pages[page].get(offset);
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("Index out of bounds for " + pos + " => (" + page + ":" + offset + ")");
}
}
@Override
public void get(long start, long end, long[] buffer) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
pages[partitioningScheme.getPage(start)].get(sOff, eOff, buffer);
}
else {
defaults.get(start, end, buffer);
}
}
@Override
public long getAndIncrement(long pos) {
return pages[partitioningScheme.getPage(pos)].getAndIncrement(partitioningScheme.getOffset(pos));
}
@Override
public void set(long pos, long value) {
int page = partitioningScheme.getPage(pos);
int offset = partitioningScheme.getOffset(pos);
try {
pages[page].set(offset, value);
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("Index out of bounds for " + pos + " => (" + page + ":" + offset + ")");
}
}
@Override
public long size() {
return size;
}
@Override
public void increment(long pos) {
int page = partitioningScheme.getPage(pos);
int offset = partitioningScheme.getOffset(pos);
try {
pages[page].increment(partitioningScheme.getOffset(pos));
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("Index out of bounds for " + pos + " => (" + page + ":" + offset + ")");
}
}
@Override
public void forEach(long start, long end, LongLongConsumer consumer) {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
var p = pages[page++];
for (long i = sOff; i < eOff; i++) {
consumer.accept(i + (pos - sOff), p.get(i));
}
}
}
@Override
public void fill(long fromIndex, long toIndex, long value) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
pages[partitioningScheme.getPage(fromIndex)].fill(sOff, eOff, value);
}
else {
delegateToEachPage(fromIndex, toIndex, (page, s, e) -> page.fill(s, e, value));
}
}
@Override
public void transformEach(long start, long end, LongTransformer transformer) {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
var p = pages[page++];
for (long i = sOff; i < eOff; i++) {
p.set(i, transformer.transform(i + (pos - sOff), p.get(i)));
}
}
}
@Override
public void transformEachIO(long start, long end, LongIOTransformer transformer) throws IOException {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
var p = pages[page++];
for (long i = sOff; i < eOff; i++) {
p.set(i, transformer.transform(i + (pos - sOff), p.get(i)));
}
}
}
@Override
public long foldIO(long zero, long start, long end, LongBinaryIOOperation operator) throws IOException {
var folder = new LongIOFolder(zero, operator);
delegateToEachPageIO(start, end, folder);
return folder.acc;
}
@Override
public long linearSearch(long key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].linearSearch(key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.linearSearch(key, fromIndex, toIndex);
}
}
@Override
public long linearSearchN(int sz, long key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].linearSearchN(sz, key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.linearSearchN(sz, key, fromIndex, toIndex);
}
}
@Override
public long binarySearch(long key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].binarySearch(key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.binarySearch(key, fromIndex, toIndex);
}
}
@Override
public long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].binarySearchN(sz, key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.binarySearchN(sz, key, fromIndex, toIndex);
}
}
@Override
public long binarySearchUpperBound(long key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].binarySearchUpperBound(key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.binarySearchUpperBound(key, fromIndex, toIndex);
}
}
@Override
public long linearSearchUpperBound(long key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].linearSearchUpperBound(key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.linearSearchUpperBound(key, fromIndex, toIndex);
}
}
@Override
public long binarySearchUpperBoundN(int sz, long key, long fromIndex, long toIndex) {
if (partitioningScheme.isSamePage(fromIndex, toIndex)) {
int sOff = partitioningScheme.getOffset(fromIndex);
int eOff = partitioningScheme.getEndOffset(fromIndex, toIndex);
long ret = pages[partitioningScheme.getPage(fromIndex)].binarySearchUpperBoundN(sz, key, sOff, eOff);
return translateSearchResultsFromPage(fromIndex, ret);
}
else {
return defaults.binarySearchUpperBoundN(sz, key, fromIndex, toIndex);
}
}
@Override
public void retain(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
if (partitioningScheme.isSamePage(searchStart, searchEnd)) {
int sOff = partitioningScheme.getOffset(searchStart);
int eOff = partitioningScheme.getEndOffset(searchStart, searchEnd);
if (eOff > sOff) {
pages[partitioningScheme.getPage(searchStart)].retain(buffer, boundary, sOff, eOff);
}
}
else {
defaults.retain(buffer, boundary, searchStart, searchEnd);
}
}
@Override
public void retainN(LongQueryBuffer buffer, int sz, long boundary, long searchStart, long searchEnd) {
if (partitioningScheme.isSamePage(searchStart, searchEnd)) {
int sOff = partitioningScheme.getOffset(searchStart);
int eOff = partitioningScheme.getEndOffset(searchStart, searchEnd);
if (eOff > sOff) {
pages[partitioningScheme.getPage(searchStart)].retainN(buffer, sz, boundary, sOff, eOff);
}
}
else {
defaults.retainN(buffer, sz, boundary, searchStart, searchEnd);
}
}
@Override
public void reject(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
if (partitioningScheme.isSamePage(searchStart, searchEnd)) {
int sOff = partitioningScheme.getOffset(searchStart);
int eOff = partitioningScheme.getEndOffset(searchStart, searchEnd);
if (eOff > sOff) {
pages[partitioningScheme.getPage(searchStart)].reject(buffer, boundary, sOff, eOff);
}
}
else {
defaults.reject(buffer, boundary, searchStart, searchEnd);
}
}
@Override
public void rejectN(LongQueryBuffer buffer, int sz, long boundary, long searchStart, long searchEnd) {
if (partitioningScheme.isSamePage(searchStart, searchEnd)) {
int sOff = partitioningScheme.getOffset(searchStart);
int eOff = partitioningScheme.getEndOffset(searchStart, searchEnd);
if (eOff > sOff) {
pages[partitioningScheme.getPage(searchStart)].rejectN(buffer, sz, boundary, sOff, eOff);
}
}
else {
defaults.rejectN(buffer, sz, boundary, searchStart, searchEnd);
}
}
@Override
public void insertionSort(long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].insertionSort(sOff, eOff);
}
}
else {
defaults.insertionSort(start, end);
}
}
@Override
public void insertionSortN(int sz, long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].insertionSortN(sz, sOff, eOff);
}
}
else {
defaults.insertionSortN(sz, start, end);
}
}
@Override
public void quickSort(long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].quickSort(sOff, eOff);
}
}
else {
defaults.quickSort(start, end);
}
}
@Override
public void quickSortN(int sz, long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].quickSortN(sz, sOff, eOff);
}
}
else {
defaults.quickSortN(sz, start, end);
}
}
@Override
public void mergeSort(long start, long end, Path tempDir) throws IOException {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].mergeSort(sOff, eOff, tempDir);
}
}
else {
defaults.mergeSort(start, end, tempDir);
}
}
@Override
public void mergeSortN(int sz, long start, long end, Path tempDir) throws IOException {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].mergeSortN(sz, sOff, eOff, tempDir);
}
}
else {
defaults.mergeSortN(sz, start, end, tempDir);
}
}
public void sortLargeSpanN(SortingContext ctx, int sz, long start, long end) throws IOException {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].sortLargeSpanN(ctx, sz, sOff, eOff);
}
}
else {
defaults.sortLargeSpanN(ctx, sz, start, end);
}
}
public boolean isSorted(long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
return pages[partitioningScheme.getPage(start)].isSorted(sOff, eOff);
}
else {
return defaults.isSorted(start, end);
}
}
public boolean isSortedN(int sz, long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
return pages[partitioningScheme.getPage(start)].isSortedN(sz, sOff, eOff);
}
else {
return defaults.isSortedN(sz, start, end);
}
}
public void sortLargeSpan(SortingContext ctx, long start, long end) throws IOException {
if (partitioningScheme.isSamePage(start, end)) {
int sOff = partitioningScheme.getOffset(start);
int eOff = partitioningScheme.getEndOffset(start, end);
if (eOff > sOff) {
pages[partitioningScheme.getPage(start)].sortLargeSpan(ctx, sOff, eOff);
}
}
else {
defaults.sortLargeSpan(ctx, start, end);
}
}
public void write(Path fileName) throws IOException {
try (var channel = (FileChannel) Files.newByteChannel(fileName, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
for (int i = 0; i < pages.length; i++) {
pages[i].write(channel);
}
channel.force(false);
}
}
public ArrayPartitioningScheme getPartitioningScheme() {
return partitioningScheme;
}
public LongArrayPage getPage(int forOffset) {
return pages[partitioningScheme.getPage(forOffset)];
}
public LongArray range(long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
return pages[partitioningScheme.getPage(start)]
.range(partitioningScheme.getOffset(start),
partitioningScheme.getEndOffset(start, end)
);
}
else {
return new ShiftedLongArray(start, end, this);
}
}
public ArrayRangeReference<LongArray> directRangeIfPossible(long start, long end) {
if (partitioningScheme.isSamePage(start, end)) {
return new ArrayRangeReference<>(
pages[partitioningScheme.getPage(start)],
partitioningScheme.getOffset(start),
partitioningScheme.getEndOffset(start, end));
}
else {
return new ArrayRangeReference<>(this, start, end);
}
}
public long getSize() {
if (size < 0) {
throw new UnsupportedOperationException();
}
return size;
}
@Override
public void force() {
for (var page : pages) {
page.force();
}
}
@Override
public void advice(NativeIO.Advice advice) throws IOException {
for (var page : pages) {
page.advice(advice);
}
}
@Override
public void advice(NativeIO.Advice advice, long start, long end) throws IOException {
delegateToEachPageIO(start, end, (a,s,e) -> a.advice(advice, s, e));
}
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
assert arrayEnd >= arrayStart;
int page = partitioningScheme.getPage(arrayStart);
long endPos;
for (long pos = arrayStart; pos < arrayEnd; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, arrayEnd);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(pos, endPos);
pages[page++].transferFrom(source, sourceStart, sOff, eOff);
sourceStart+=(endPos - pos);
}
}
}

View File

@ -0,0 +1,189 @@
package nu.marginalia.array.page;
import com.upserve.uppend.blobs.NativeIO;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.IntArray;
import javax.annotation.Nullable;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.nio.LongBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import static java.lang.foreign.ValueLayout.JAVA_INT;
public class SegmentIntArray implements PartitionPage, IntArray {
@Nullable
private final Arena arena;
private final MemorySegment segment;
private boolean closed;
SegmentIntArray(MemorySegment segment,
@Nullable Arena arena) {
this.segment = segment;
this.arena = arena;
}
public static SegmentIntArray onHeap(Arena arena, long size) {
return new SegmentIntArray(arena.allocate(WORD_SIZE*size, 8), arena);
}
public static SegmentIntArray fromMmapReadOnly(Arena arena, Path file, long offset, long size) throws IOException {
return new SegmentIntArray(
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_ONLY, StandardOpenOption.READ),
arena);
}
public static SegmentIntArray fromMmapReadWrite(Arena arena, Path file, long offset, long size) throws IOException {
return new SegmentIntArray(
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_WRITE,
StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE),
arena);
}
private static MemorySegment mmapFile(Arena arena,
Path file,
long offset,
long size,
FileChannel.MapMode mode,
OpenOption... openOptions) throws IOException
{
try (var channel = (FileChannel) Files.newByteChannel(file, openOptions)) {
return channel.map(mode,
JAVA_INT.byteSize() * offset,
JAVA_INT.byteSize() * size,
arena);
}
catch (IOException ex) {
throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex);
}
}
@Override
public IntArray range(long start, long end) {
return new SegmentIntArray(
segment.asSlice(
start * JAVA_INT.byteSize(),
(end-start) * JAVA_INT.byteSize()),
null);
}
@Override
public IntArray shifted(long start) {
return new SegmentIntArray(
segment.asSlice(start * JAVA_INT.byteSize()),
null);
}
@Override
public int get(long at) {
try {
return segment.getAtIndex(JAVA_INT, at);
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("@" + at + "(" + 0 + ":" + segment.byteSize()/8 + ")");
}
}
@Override
public void get(long start, long end, int[] buffer) {
for (int i = 0; i < end - start; i++) {
buffer[i] = segment.getAtIndex(JAVA_INT, start + i);
}
}
@Override
public void set(long at, int val) {
segment.setAtIndex(JAVA_INT, at, val);
}
@Override
public void set(long start, long end, IntBuffer buffer, int bufferStart) {
for (int i = 0; i < end - start; i++) {
set(start + i, buffer.get(bufferStart + i));
}
}
@Override
public synchronized void close() {
if (arena != null && !closed) {
arena.close();
}
closed = true;
}
@Override
public long size() {
return segment.byteSize() / JAVA_INT.byteSize();
}
@Override
public ByteBuffer getByteBuffer() {
return segment.asByteBuffer();
}
@Override
public void write(Path filename) throws IOException {
try (var arena = Arena.ofConfined()) {
var destSegment = SegmentIntArray.fromMmapReadWrite(arena, filename, 0, segment.byteSize());
destSegment.segment.copyFrom(segment);
destSegment.force();
}
}
@Override
public void force() {
if (segment.isMapped()) {
segment.force();
}
}
public ArrayRangeReference<IntArray> directRangeIfPossible(long start, long end) {
return new ArrayRangeReference<>(this, start, end);
}
@Override
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
final int stride = 1024*1204*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering
long ss = sourceStart;
for (long as = arrayStart; as < arrayEnd; as += stride, ss += stride) {
long ae = Math.min(as + stride, arrayEnd);
long index = as * JAVA_INT.byteSize();
long length = (ae - as) * JAVA_INT.byteSize();
var bufferSlice = segment.asSlice(index, length).asByteBuffer();
long startPos = ss * JAVA_INT.byteSize();
while (bufferSlice.position() < bufferSlice.capacity()) {
source.read(bufferSlice, startPos + bufferSlice.position());
}
}
}
@Override
public void advice(NativeIO.Advice advice) throws IOException {
// NativeIO.madvise((MappedByteBuffer) byteBuffer, advice);
}
@Override
public void advice(NativeIO.Advice advice, long start, long end) throws IOException {
// NativeIO.madviseRange((MappedByteBuffer) byteBuffer, advice, (int) start, (int) (end-start));
}
}

View File

@ -2,18 +2,20 @@ package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.buffer.IntQueryBuffer;
import nu.marginalia.array.page.PagingIntArray;
import nu.marginalia.array.page.SegmentIntArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.lang.foreign.Arena;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
class IntArraySearchTest {
IntArray basicArray = IntArray.allocate(1024);
IntArray pagingArray = PagingIntArray.newOnHeap(new PowerOf2PartitioningScheme(64), 1024);
IntArray pagingArray = SegmentIntArray.onHeap(Arena.global(), 1024);
IntArray shiftedArray = IntArray.allocate(1054).range(30, 1054);
@ -56,7 +58,7 @@ class IntArraySearchTest {
assertEquals(i, array.get(ret));
}
else {
long higher = LongArraySearch.decodeSearchMiss(ret);
long higher = LongArraySearch.decodeSearchMiss(1, ret);
if (i > 0 && higher < array.size()) {
assertTrue(array.get(higher) < i);
}
@ -73,7 +75,7 @@ class IntArraySearchTest {
assertEquals(i, array.get(ret));
}
else {
long higher = LongArraySearch.decodeSearchMiss(ret);
long higher = LongArraySearch.decodeSearchMiss(1, ret);
if (i > 0 && higher+1 < array.size()) {
assertTrue(array.get(higher) < i);
}

View File

@ -1,12 +1,9 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.page.IntArrayPage;
import nu.marginalia.array.page.PagingIntArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import nu.marginalia.util.test.TestUtil;
import org.apache.commons.lang3.ArrayUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
@ -30,9 +27,9 @@ class IntArraySortTest {
@BeforeEach
public void setUp() {
basic = IntArrayPage.onHeap(size);
paged = PagingIntArray.newOnHeap(new PowerOf2PartitioningScheme(32), size);
shifted = IntArrayPage.onHeap(size + 30).shifted(30);
basic = IntArray.allocate(size);
paged = IntArray.allocate(size);
shifted = IntArray.allocate(size+30).shifted(30);
var random = new Random();
int[] values = new int[size];

View File

@ -1,11 +1,6 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.page.IntArrayPage;
import nu.marginalia.array.page.LongArrayPage;
import nu.marginalia.array.page.PagingIntArray;
import nu.marginalia.array.page.PagingLongArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -24,9 +19,9 @@ class IntArrayTransformations2Test {
@BeforeEach
public void setUp() {
basic = IntArrayPage.onHeap(size);
paged = PagingIntArray.newOnHeap(new PowerOf2PartitioningScheme(32), size);
shifted = IntArrayPage.onHeap(size + 30).shifted(30);
basic = IntArray.allocate(size);
paged = IntArray.allocate(size);
shifted = IntArray.allocate(size+30).shifted(30);
int[] vals = new int[size];
for (int i = 0; i < vals.length; i++) {

View File

@ -1,8 +1,7 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.page.IntArrayPage;
import nu.marginalia.array.page.PagingIntArray;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -20,9 +19,9 @@ class IntArrayTransformationsTest {
@BeforeEach
public void setUp() {
basic = IntArrayPage.onHeap(size);
paged = PagingIntArray.newOnHeap(new PowerOf2PartitioningScheme(32), size);
shifted = IntArrayPage.onHeap(size + 30).shifted(30);
basic = IntArray.allocate(size);
paged = IntArray.allocate(size);
shifted = IntArray.allocate(size+30).shifted(30);
for (int i = 0; i < basic.size(); i++) {
basic.set(i, 3*i);

View File

@ -3,7 +3,7 @@ package nu.marginalia.array.algo;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.PagingLongArray;
import nu.marginalia.array.page.SegmentLongArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -14,7 +14,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
class LongArraySearchTest {
LongArray basicArray = LongArray.allocate(1024);
LongArray pagingArray = PagingLongArray.newOnHeap(new PowerOf2PartitioningScheme(64), 1024);
LongArray segmentArray = LongArrayFactory.onHeapConfined(1024);
LongArray shiftedArray = LongArray.allocate(1054).range(30, 1054);
@ -23,7 +22,6 @@ class LongArraySearchTest {
public void setUp() {
for (int i = 0; i < basicArray.size(); i++) {
basicArray.set(i, 3L*i);
pagingArray.set(i, 3L*i);
shiftedArray.set(i, 3L*i);
segmentArray.set(i, 3L*i);
}
@ -32,7 +30,6 @@ class LongArraySearchTest {
@Test
void linearSearch() {
linearSearchTester(basicArray);
linearSearchTester(pagingArray);
linearSearchTester(shiftedArray);
linearSearchTester(segmentArray);
}
@ -40,7 +37,6 @@ class LongArraySearchTest {
@Test
void binarySearch() {
binarySearchTester(basicArray);
binarySearchTester(pagingArray);
binarySearchTester(shiftedArray);
binarySearchTester(segmentArray);
}
@ -48,7 +44,6 @@ class LongArraySearchTest {
@Test
void binarySearchUpperBound() {
binarySearchUpperBoundTester(basicArray);
binarySearchUpperBoundTester(pagingArray);
binarySearchUpperBoundTester(shiftedArray);
binarySearchUpperBoundTester(segmentArray);
}
@ -56,11 +51,20 @@ class LongArraySearchTest {
@Test
void linearSearchUpperBound() {
linearSearchUpperBoundTester(basicArray);
linearSearchUpperBoundTester(pagingArray);
linearSearchUpperBoundTester(shiftedArray);
linearSearchUpperBoundTester(segmentArray);
}
@Test
public void testEmptyRange() {
assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) < 0);
assertTrue(segmentArray.linearSearchN(2, 0, 0, 0) < 0);
assertTrue(segmentArray.binarySearch(0, 0, 0) < 0);
assertTrue(segmentArray.linearSearch(0, 0, 0) < 0);
assertEquals(0, segmentArray.linearSearchUpperBound(0, 0, 0));
}
void linearSearchTester(LongArray array) {
for (int i = 0; i < array.size() * 3; i++) {
long ret = array.linearSearch(i, 0, array.size());
@ -70,7 +74,7 @@ class LongArraySearchTest {
assertEquals(i, array.get(ret));
}
else {
long higher = LongArraySearch.decodeSearchMiss(ret);
long higher = LongArraySearch.decodeSearchMiss(1, ret);
if (i > 0 && higher < array.size()) {
assertTrue(array.get(higher) < i);
}
@ -87,7 +91,7 @@ class LongArraySearchTest {
assertEquals(i, array.get(ret));
}
else {
long higher = LongArraySearch.decodeSearchMiss(ret);
long higher = LongArraySearch.decodeSearchMiss(1, ret);
if (i > 0 && higher+1 < array.size()) {
assertTrue(array.get(higher) < i);
}

View File

@ -4,8 +4,6 @@ import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.array.page.LongArrayPage;
import nu.marginalia.array.page.PagingLongArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import nu.marginalia.util.test.TestUtil;
import org.apache.commons.lang3.ArrayUtils;
@ -35,9 +33,9 @@ class LongArraySortNTest {
@BeforeEach
public void setUp() {
basic = LongArrayPage.onHeap(size);
paged = PagingLongArray.newOnHeap(new PowerOf2PartitioningScheme(32), size);
shifted = LongArrayPage.onHeap(size + 30).shifted(30);
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
segment = LongArrayFactory.onHeapShared(size + 30).shifted(30);
var random = new Random();

View File

@ -3,8 +3,6 @@ package nu.marginalia.array.algo;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.array.page.LongArrayPage;
import nu.marginalia.array.page.PagingLongArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import nu.marginalia.util.test.TestUtil;
import org.apache.commons.lang3.ArrayUtils;
@ -34,9 +32,9 @@ class LongArraySortTest {
@BeforeEach
public void setUp() {
basic = LongArrayPage.onHeap(size);
paged = PagingLongArray.newOnHeap(new PowerOf2PartitioningScheme(32), size);
shifted = LongArrayPage.onHeap(size + 30).shifted(30);
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
segment = LongArrayFactory.onHeapConfined(size + 30).shifted(30);
valueSet = new LongOpenHashSet();

View File

@ -2,8 +2,6 @@ package nu.marginalia.array.algo;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.array.page.LongArrayPage;
import nu.marginalia.array.page.PagingLongArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -17,16 +15,14 @@ class LongArrayTransformations2Test {
LongArray basic;
LongArray paged;
LongArray shifted;
LongArray segment;
final int size = 1026;
@BeforeEach
public void setUp() {
basic = LongArrayPage.onHeap(size);
paged = PagingLongArray.newOnHeap(new PowerOf2PartitioningScheme(32), size);
shifted = LongArrayPage.onHeap(size + 30).shifted(30);
segment = LongArrayFactory.onHeapShared(size);
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
long[] vals = new long[size];
for (int i = 0; i < vals.length; i++) {
@ -35,21 +31,18 @@ class LongArrayTransformations2Test {
basic.set(0, vals);
paged.set(0, vals);
shifted.set(0, vals);
segment.set(0, vals);
}
@Test
void forEach() {
foreachTester(basic);
foreachTester(paged);
foreachTester(shifted);
foreachTester(segment);
}
@Test
void transformEach() {
transformTester(basic);
transformTester(paged);
transformTester(shifted);
transformTester(segment);
}
@Test
@ -57,7 +50,6 @@ class LongArrayTransformations2Test {
transformTesterIO(basic);
transformTesterIO(paged);
transformTesterIO(shifted);
transformTesterIO(segment);
}
private void transformTester(LongArray array) {

View File

@ -2,8 +2,6 @@ package nu.marginalia.array.algo;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.array.page.LongArrayPage;
import nu.marginalia.array.page.PagingLongArray;
import nu.marginalia.array.scheme.PowerOf2PartitioningScheme;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -16,20 +14,17 @@ class LongArrayTransformationsTest {
LongArray basic;
LongArray paged;
LongArray shifted;
LongArray segment;
final int size = 1026;
@BeforeEach
public void setUp() {
basic = LongArrayPage.onHeap(size);
segment = LongArrayFactory.onHeapConfined(size);
paged = PagingLongArray.newOnHeap(new PowerOf2PartitioningScheme(32), size);
shifted = LongArrayPage.onHeap(size + 30).shifted(30);
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
for (int i = 0; i < basic.size(); i++) {
basic.set(i, 3L*i);
segment.set(i, 3L*i);
paged.set(i, 3L*i);
shifted.set(i, 3L*i);
}
@ -40,7 +35,6 @@ class LongArrayTransformationsTest {
transformTester(basic);
transformTester(paged);
transformTester(shifted);
transformTester(segment);
}
@Test
@ -48,7 +42,6 @@ class LongArrayTransformationsTest {
transformTesterIO(basic);
transformTesterIO(paged);
transformTesterIO(shifted);
transformTesterIO(segment);
}
@Test
@ -56,7 +49,6 @@ class LongArrayTransformationsTest {
assertEquals(3*(5+6+7+8+9), basic.fold(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), paged.fold(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), shifted.fold(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), segment.fold(0, 5, 10, Long::sum));
}
@Test
@ -64,7 +56,6 @@ class LongArrayTransformationsTest {
assertEquals(3*(5+6+7+8+9), basic.foldIO(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), paged.foldIO(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), shifted.foldIO(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), segment.foldIO(0, 5, 10, Long::sum));
}
private void transformTester(LongArray array) {

View File

@ -30,10 +30,10 @@ public class BTreeReader {
assert file.size() >= header.dataOffsetLongs() + dataBlockEnd;
}
LongArray data() {
public LongArray data() {
return data;
}
LongArray index() {
public LongArray index() {
return index;
}
@ -101,9 +101,11 @@ public class BTreeReader {
public long findEntry(final long key) {
BTreePointer ip = new BTreePointer(header);
while (!ip.isDataLayer())
if (!ip.walkToChild(key))
while (!ip.isDataLayer()) {
if (!ip.walkToChild(key)) {
return -1;
}
}
return ip.findData(key);
}
@ -151,7 +153,7 @@ public class BTreeReader {
long key = keys[i];
searchStart = data.binarySearchN(ctx.entrySize, key, searchStart, data.size());
if (searchStart < 0) {
searchStart = LongArraySearch.decodeSearchMiss(searchStart);
searchStart = LongArraySearch.decodeSearchMiss(ctx.entrySize, searchStart);
}
else {
ret[i] = data.get(searchStart + offset);

View File

@ -80,6 +80,7 @@ class EncyclopediaMarginaliaNuSideloaderTest {
var sideloader = new EncyclopediaMarginaliaNuSideloader(
pathToDbFile,
"https://en.wikipedia.org/wiki/",
GsonFactory.get(),
processing
);

View File

@ -53,9 +53,9 @@ public class IndexQueryServiceIntegrationTestModule extends AbstractModule {
try {
var fileStorageServiceMock = Mockito.mock(FileStorageService.class);
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.WORK)).thenReturn(new FileStorageBase(null, null, null, slowDir.toString()));
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.CURRENT)).thenReturn(new FileStorageBase(null, null, null, fastDir.toString()));
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.STORAGE)).thenReturn(new FileStorageBase(null, null, null, fastDir.toString()));
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.WORK)).thenReturn(new FileStorageBase(null, null, 0,null, slowDir.toString()));
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.CURRENT)).thenReturn(new FileStorageBase(null, null, 0,null, fastDir.toString()));
Mockito.when(fileStorageServiceMock.getStorageBase(FileStorageBaseType.STORAGE)).thenReturn(new FileStorageBase(null, null, 0, null, fastDir.toString()));
bind(LinkdbReader.class).toInstance(new LinkdbReader(
IndexLocations.getLinkdbLivePath(fileStorageServiceMock)