(index) Clean up result domain deduplicator
This commit is contained in:
parent
1e6800565a
commit
56eb83319d
@ -11,7 +11,8 @@ import java.util.List;
|
||||
/** Represents a document matching a search query */
|
||||
@AllArgsConstructor @Getter
|
||||
public class SearchResultItem implements Comparable<SearchResultItem> {
|
||||
/** Encoded ID that contains both the URL id and its ranking */
|
||||
/** Encoded ID that contains both the URL id and its ranking. This is
|
||||
* probably not what you want, use getDocumentId() instead */
|
||||
public final long combinedId;
|
||||
|
||||
/** How did the subqueries match against the document ? */
|
||||
@ -20,8 +21,8 @@ public class SearchResultItem implements Comparable<SearchResultItem> {
|
||||
/** How many other potential results existed in the same domain */
|
||||
public int resultsFromDomain;
|
||||
|
||||
public SearchResultItem(long val) {
|
||||
this.combinedId = val;
|
||||
public SearchResultItem(long combinedId) {
|
||||
this.combinedId = combinedId;
|
||||
this.keywordScores = new ArrayList<>(16);
|
||||
}
|
||||
|
||||
@ -66,16 +67,6 @@ public class SearchResultItem implements Comparable<SearchResultItem> {
|
||||
return false;
|
||||
}
|
||||
|
||||
public long deduplicationKey() {
|
||||
final int domainId = getDomainId();
|
||||
|
||||
if (domainId == Integer.MAX_VALUE || domainId == Integer.MIN_VALUE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return domainId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(@NotNull SearchResultItem o) {
|
||||
// this looks like a bug, but we actually want this in a reversed order
|
||||
|
@ -5,7 +5,7 @@ import gnu.trove.map.hash.TLongIntHashMap;
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
|
||||
public class IndexResultDomainDeduplicator {
|
||||
final TLongIntMap resultsByRankingId = CachedObjects.getMap();
|
||||
final TLongIntMap resultsByDomainId = CachedObjects.getMap();
|
||||
final int limitByDomain;
|
||||
|
||||
public IndexResultDomainDeduplicator(int limitByDomain) {
|
||||
@ -13,19 +13,15 @@ public class IndexResultDomainDeduplicator {
|
||||
}
|
||||
|
||||
public boolean test(SearchResultItem item) {
|
||||
final long key = item.deduplicationKey();
|
||||
if (key == 0)
|
||||
return true;
|
||||
final long key = item.getDomainId();
|
||||
|
||||
return resultsByRankingId.adjustOrPutValue(key, 1, 1) <= limitByDomain;
|
||||
return resultsByDomainId.adjustOrPutValue(key, 1, 1) <= limitByDomain;
|
||||
}
|
||||
|
||||
public int getCount(SearchResultItem item) {
|
||||
final long key = item.deduplicationKey();
|
||||
if (key == 0)
|
||||
return 1;
|
||||
final long key = item.getDomainId();
|
||||
|
||||
return resultsByRankingId.get(key);
|
||||
return resultsByDomainId.get(key);
|
||||
}
|
||||
|
||||
private static class CachedObjects {
|
||||
@ -38,7 +34,14 @@ public class IndexResultDomainDeduplicator {
|
||||
ret.clear();
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static void clear() {
|
||||
mapCache.remove();
|
||||
}
|
||||
}
|
||||
|
||||
static void clearCachedObjects() {
|
||||
CachedObjects.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,35 @@
|
||||
package nu.marginalia.index.results;
|
||||
|
||||
import nu.marginalia.index.client.model.results.SearchResultItem;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class IndexResultDomainDeduplicatorTest {
|
||||
|
||||
@AfterEach
|
||||
public void clear() {
|
||||
IndexResultDomainDeduplicator.clearCachedObjects();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeduplicator() {
|
||||
|
||||
IndexResultDomainDeduplicator deduplicator = new IndexResultDomainDeduplicator(3);
|
||||
|
||||
assertTrue(deduplicator.test(forId(3, 0)));
|
||||
assertTrue(deduplicator.test(forId(3, 1)));
|
||||
assertTrue(deduplicator.test(forId(3, 2)));
|
||||
assertFalse(deduplicator.test(forId(3, 3)));
|
||||
assertFalse(deduplicator.test(forId(3, 4)));
|
||||
|
||||
assertEquals(5, deduplicator.getCount(forId(3, 3)));
|
||||
}
|
||||
|
||||
SearchResultItem forId(int domain, int ordinal) {
|
||||
return new SearchResultItem(UrlIdCodec.encodeId(domain, ordinal));
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user