(reverse-index) Fix over-allocation of the count array in merging

This commit is contained in:
Viktor Lofgren 2023-08-28 14:36:28 +02:00
parent 3101b74580
commit 00c4686ef0
3 changed files with 14 additions and 3 deletions

View File

@ -56,10 +56,11 @@ public class ReverseIndexConstructor {
List<ReversePreindex> merged = new ArrayList<>();
while (toMerge.size() != 1) {
for (int i = 0; i < toMerge.size(); i+=2) {
for (int i = 0; i + 1< toMerge.size(); i+=2) {
var left = toMerge.get(i);
var right = toMerge.get(i+1);
logger.info("Merge {}, {}", i, i+1);
merged.add(ReversePreindex.merge(workDir, left, right));
left.delete();

View File

@ -38,7 +38,6 @@ public class ReversePreindex {
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2);
offsets.fold(0, 0, offsets.size(), sizeEstimator);
System.out.println("size estimate = " + sizeEstimator.size);
// Write the docs file
LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
@ -48,6 +47,8 @@ public class ReversePreindex {
LongArray wordIds = segments.wordIds;
assert offsets.size() == wordIds.size() : "Offsets and word-ids of different size";
// Estimate the size of the words index data
long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
@ -108,7 +109,7 @@ public class ReversePreindex {
0, left.wordIds.size(),
0, right.wordIds.size());
LongArray counts = LongArray.mmapForWriting(segmentCountsFile, 8*segmentsSize);
LongArray counts = LongArray.mmapForWriting(segmentCountsFile, segmentsSize);
return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
}
@ -177,6 +178,8 @@ public class ReversePreindex {
mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize());
mergingSegment.force();
return new ReversePreindex(
mergingSegment,
new ReversePreindexDocuments(mergedDocuments, docsFile)

View File

@ -28,6 +28,8 @@ public class ReversePreindexWordSegments {
Path wordsFile,
Path countsFile)
{
assert wordIds.size() == counts.size();
this.wordIds = wordIds;
this.counts = counts;
this.wordsFile = wordsFile;
@ -97,6 +99,11 @@ public class ReversePreindexWordSegments {
Files.delete(wordsFile);
}
public void force() {
counts.force();
wordIds.force();
}
public class SegmentIterator {
private final int recordSize;
private final long fileSize;