(loader) Update the size of the keyword files created by the loader
Previously these ended up being about 200 Mb each, which is wastefully small. Increasing the size of these files makes the index construction faster.
This commit is contained in:
parent
f44222ce53
commit
14b7680328
@ -6,7 +6,12 @@ import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface IndexJournalWriter {
|
||||
/** Responsible for writing to the index journal.
|
||||
* <p></p>
|
||||
* @see IndexJournalWriterSingleFileImpl
|
||||
* @see IndexJournalWriterPagingImpl
|
||||
*/
|
||||
public interface IndexJournalWriter extends AutoCloseable {
|
||||
void put(IndexJournalEntryHeader header, IndexJournalEntryData entry);
|
||||
default void put(IndexJournalEntry entry) {
|
||||
put(entry.header(), entry.data());
|
||||
|
@ -10,10 +10,25 @@ import org.slf4j.LoggerFactory;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/** IndexJournalWriter implementation that creates a sequence of journal files,
|
||||
* delegating to IndexJournalWriterSingleFileImpl to write the individual files.
|
||||
*
|
||||
*/
|
||||
public class IndexJournalWriterPagingImpl implements IndexJournalWriter {
|
||||
private final Path outputDir;
|
||||
private int fileNumber = 0;
|
||||
|
||||
/* Number of entries to write to each file before switching to the next.
|
||||
*
|
||||
* A large limit increases the memory foot print of the index, but reduces
|
||||
* the construction time. A small number increases the memory footprint, but
|
||||
* reduces the construction time.
|
||||
*
|
||||
* The limit is set to 1,000,000, which amounts to about 1 GB on disk.
|
||||
*/
|
||||
private static final int SWITCH_LIMIT = 1_000_000;
|
||||
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private IndexJournalWriter currentWriter = null;
|
||||
private int inputsForFile = 0;
|
||||
@ -35,7 +50,7 @@ public class IndexJournalWriterPagingImpl implements IndexJournalWriter {
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public void put(IndexJournalEntryHeader header, IndexJournalEntryData entry) {
|
||||
if (++inputsForFile > 100_000) {
|
||||
if (++inputsForFile > SWITCH_LIMIT) {
|
||||
inputsForFile = 0;
|
||||
switchToNextWriter();
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.nio.file.attribute.PosixFilePermissions;
|
||||
|
||||
/** IndexJournalWriter implementation that creates a single journal file */
|
||||
public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
|
||||
|
||||
private static final int ZSTD_BUFFER_SIZE = 8192;
|
||||
|
@ -3,7 +3,6 @@ package nu.marginalia.loading.documents;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.io.processed.DocumentRecordParquetFileReader;
|
||||
import nu.marginalia.io.processed.ProcessedDataFileNames;
|
||||
import nu.marginalia.keyword.model.DocumentKeywords;
|
||||
import nu.marginalia.loading.LoaderIndexJournalWriter;
|
||||
import nu.marginalia.loading.LoaderInputData;
|
||||
|
Loading…
Reference in New Issue
Block a user