diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java index 23b1f14a..19f879ca 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java @@ -11,7 +11,8 @@ import java.util.List; /** Represents a document matching a search query */ @AllArgsConstructor @Getter public class SearchResultItem implements Comparable { - /** Encoded ID that contains both the URL id and its ranking */ + /** Encoded ID that contains both the URL id and its ranking. This is + * probably not what you want, use getDocumentId() instead */ public final long combinedId; /** How did the subqueries match against the document ? */ @@ -20,8 +21,8 @@ public class SearchResultItem implements Comparable { /** How many other potential results existed in the same domain */ public int resultsFromDomain; - public SearchResultItem(long val) { - this.combinedId = val; + public SearchResultItem(long combinedId) { + this.combinedId = combinedId; this.keywordScores = new ArrayList<>(16); } @@ -66,16 +67,6 @@ public class SearchResultItem implements Comparable { return false; } - public long deduplicationKey() { - final int domainId = getDomainId(); - - if (domainId == Integer.MAX_VALUE || domainId == Integer.MIN_VALUE) { - return 0; - } - - return domainId; - } - @Override public int compareTo(@NotNull SearchResultItem o) { // this looks like a bug, but we actually want this in a reversed order diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java index 8dbf740f..4c29886a 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java @@ -5,7 +5,7 @@ import gnu.trove.map.hash.TLongIntHashMap; import nu.marginalia.index.client.model.results.SearchResultItem; public class IndexResultDomainDeduplicator { - final TLongIntMap resultsByRankingId = CachedObjects.getMap(); + final TLongIntMap resultsByDomainId = CachedObjects.getMap(); final int limitByDomain; public IndexResultDomainDeduplicator(int limitByDomain) { @@ -13,19 +13,15 @@ public class IndexResultDomainDeduplicator { } public boolean test(SearchResultItem item) { - final long key = item.deduplicationKey(); - if (key == 0) - return true; + final long key = item.getDomainId(); - return resultsByRankingId.adjustOrPutValue(key, 1, 1) <= limitByDomain; + return resultsByDomainId.adjustOrPutValue(key, 1, 1) <= limitByDomain; } public int getCount(SearchResultItem item) { - final long key = item.deduplicationKey(); - if (key == 0) - return 1; + final long key = item.getDomainId(); - return resultsByRankingId.get(key); + return resultsByDomainId.get(key); } private static class CachedObjects { @@ -38,7 +34,14 @@ public class IndexResultDomainDeduplicator { ret.clear(); return ret; } + + public static void clear() { + mapCache.remove(); + } } + static void clearCachedObjects() { + CachedObjects.clear(); + } } diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java new file mode 100644 index 00000000..a2960a67 --- /dev/null +++ b/code/services-core/index-service/src/test/java/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java @@ -0,0 +1,35 @@ +package nu.marginalia.index.results; + +import nu.marginalia.index.client.model.results.SearchResultItem; +import nu.marginalia.model.id.UrlIdCodec; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class IndexResultDomainDeduplicatorTest { + + @AfterEach + public void clear() { + IndexResultDomainDeduplicator.clearCachedObjects(); + } + + @Test + public void testDeduplicator() { + + IndexResultDomainDeduplicator deduplicator = new IndexResultDomainDeduplicator(3); + + assertTrue(deduplicator.test(forId(3, 0))); + assertTrue(deduplicator.test(forId(3, 1))); + assertTrue(deduplicator.test(forId(3, 2))); + assertFalse(deduplicator.test(forId(3, 3))); + assertFalse(deduplicator.test(forId(3, 4))); + + assertEquals(5, deduplicator.getCount(forId(3, 3))); + } + + SearchResultItem forId(int domain, int ordinal) { + return new SearchResultItem(UrlIdCodec.encodeId(domain, ordinal)); + } + +} \ No newline at end of file