(keyword lexicon) Use three hash tables to increase the possible number of keywords to 2^31 from 0.75 x 2^30.
This commit is contained in:
parent
bf92c270dc
commit
1a05cba60a
@ -3,27 +3,59 @@ package nu.marginalia.dict;
|
|||||||
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
|
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
|
||||||
|
|
||||||
public class OnHeapDictionaryMap implements DictionaryMap {
|
public class OnHeapDictionaryMap implements DictionaryMap {
|
||||||
private static final int DEFAULT_SIZE = Integer.getInteger("lexiconSizeHint", 100_000);
|
/* Use three different hash tables to get around the limitations of Java's array sizes.
|
||||||
private final Long2IntOpenHashMap entries = new Long2IntOpenHashMap(DEFAULT_SIZE, 0.75f);
|
*
|
||||||
|
* Each map fits 0.75 * 2^30 entries (~800mn); the three maps together fit a bit over 2^31 entries.
|
||||||
|
* We're happy with 2^31.
|
||||||
|
*
|
||||||
|
* We'll assign each term to one of the three maps based on their modulo of 3. We'll pray each
|
||||||
|
* night that Long2IntOpenHashMap hash function is good enough to cope with this. The keys we are
|
||||||
|
* inserting are 64 bit hashes already, so odds are the rest of the bits have very good entropy.
|
||||||
|
*/
|
||||||
|
private static final int DEFAULT_SIZE = Integer.getInteger("lexiconSizeHint", 100_000)/3;
|
||||||
|
private final Long2IntOpenHashMap[] entries = new Long2IntOpenHashMap[3];
|
||||||
|
|
||||||
|
public OnHeapDictionaryMap() {
|
||||||
|
for (int i = 0; i < entries.length; i++) {
|
||||||
|
entries[i] = new Long2IntOpenHashMap(DEFAULT_SIZE, 0.75f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
entries.clear();
|
for (var map : entries) {
|
||||||
|
map.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int size() {
|
public int size() {
|
||||||
return entries.size();
|
int totalSize = 0;
|
||||||
|
for (var map : entries) {
|
||||||
|
totalSize += map.size();
|
||||||
|
}
|
||||||
|
return totalSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int put(long key) {
|
public int put(long key) {
|
||||||
entries.putIfAbsent(key, entries.size());
|
int shardIdx = (int) Long.remainderUnsigned(key, 3);
|
||||||
|
var shard = entries[shardIdx];
|
||||||
|
int size = size();
|
||||||
|
|
||||||
|
if (size == Integer.MAX_VALUE)
|
||||||
|
throw new IllegalStateException("DictionaryMap is full");
|
||||||
|
|
||||||
|
shard.putIfAbsent(key, size);
|
||||||
|
|
||||||
return get(key);
|
return get(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int get(long key) {
|
public int get(long key) {
|
||||||
return entries.getOrDefault(key, NO_VALUE);
|
int shardIdx = (int) Long.remainderUnsigned(key, 3);
|
||||||
|
var shard = entries[shardIdx];
|
||||||
|
|
||||||
|
return shard.getOrDefault(key, NO_VALUE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -145,7 +145,7 @@ public class KeywordLexicon implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int size() {
|
public long size() {
|
||||||
Lock lock = memoryLock.readLock();
|
Lock lock = memoryLock.readLock();
|
||||||
try {
|
try {
|
||||||
lock.lock();
|
lock.lock();
|
||||||
|
Loading…
Reference in New Issue
Block a user