From 34d4834ff6e8393ebfad22e17424396228748547 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sat, 16 Dec 2023 18:27:24 +0100 Subject: [PATCH] (assistant) Clean up the site info related domains view by filtering viable domains --- .../domains/SimilarDomainsService.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/SimilarDomainsService.java b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/SimilarDomainsService.java index 9156408c..0ef04649 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/SimilarDomainsService.java +++ b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/SimilarDomainsService.java @@ -246,11 +246,26 @@ public class SimilarDomainsService { )); } - domains.removeIf(d -> d.url().domain.toString().length() > 32); + domains.removeIf(this::shouldRemove); return domains; } + boolean shouldRemove(SimilarDomain domainResult) { + if (domainResult.url().domain.toString().length() > 32) + return true; + + // Remove domains that have a relatively high likelihood of being dead links + // or not very interesting + if (!domainResult.indexed() + && !domainResult.active() + && domainResult.relatedness() < 0.5) + { + return true; + } + return false; + } + private TIntSet getLinkingIdsDToS(int domainIdx) { var items = linkDtoS[domainIdx]; if (items == null) @@ -332,7 +347,7 @@ public class SimilarDomainsService { )); } - domains.removeIf(d -> d.url().domain.toString().length() > 32); + domains.removeIf(this::shouldRemove); return domains; }