(assistant) Clean up the site info related domains view by filtering viable domains

This commit is contained in:
Viktor Lofgren 2023-12-16 18:27:24 +01:00
parent 117ddd17d7
commit 34d4834ff6

View File

@ -246,11 +246,26 @@ public class SimilarDomainsService {
));
}
domains.removeIf(d -> d.url().domain.toString().length() > 32);
domains.removeIf(this::shouldRemove);
return domains;
}
boolean shouldRemove(SimilarDomain domainResult) {
if (domainResult.url().domain.toString().length() > 32)
return true;
// Remove domains that have a relatively high likelihood of being dead links
// or not very interesting
if (!domainResult.indexed()
&& !domainResult.active()
&& domainResult.relatedness() < 0.5)
{
return true;
}
return false;
}
private TIntSet getLinkingIdsDToS(int domainIdx) {
var items = linkDtoS[domainIdx];
if (items == null)
@ -332,7 +347,7 @@ public class SimilarDomainsService {
));
}
domains.removeIf(d -> d.url().domain.toString().length() > 32);
domains.removeIf(this::shouldRemove);
return domains;
}