(converter) Adjust which flags are set by anchor text keywords
It's a mistake to let it bleed into Title, as this is a high quality signal. We'll co-opt Site and SiteAdjacent instead to reinforce the ExternalLink when count is high.
This commit is contained in:
parent
3fff7f6878
commit
0081328aca
@ -46,7 +46,7 @@ public class AnchorTextKeywords {
|
||||
return ret;
|
||||
}
|
||||
|
||||
public List<String> getAnchorTextKeywords(DomainLinks links, EdgeUrl url) {
|
||||
public Map<String, Integer> getAnchorTextKeywords(DomainLinks links, EdgeUrl url) {
|
||||
var keywordsRaw = links.forUrl(url);
|
||||
|
||||
// Extract and count keywords from anchor text
|
||||
@ -62,10 +62,10 @@ public class AnchorTextKeywords {
|
||||
}
|
||||
|
||||
// Filter out keywords that appear infrequently
|
||||
final List<String> keywords = new ArrayList<>(wordsWithCount.size());
|
||||
final Map<String, Integer> keywords = new HashMap<>(wordsWithCount.size());
|
||||
for (var wordEntry : wordsWithCount.entrySet()) {
|
||||
if (wordEntry.getValue() > 2) {
|
||||
keywords.add(wordEntry.getKey());
|
||||
keywords.put(wordEntry.getKey(), wordEntry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,12 +75,20 @@ public class DocumentKeywordsBuilder {
|
||||
newWords.forEach(word -> words.putIfAbsent(word, meta));
|
||||
}
|
||||
|
||||
public void addAnchorTerms(List<String> keywords) {
|
||||
long meta = WordFlags.Title.asBit()
|
||||
| WordFlags.ExternalLink.asBit()
|
||||
| (1L << WordMetadata.POSITIONS_SHIFT);
|
||||
public void addAnchorTerms(Map<String, Integer> keywords) {
|
||||
long flagA = WordFlags.ExternalLink.asBit();
|
||||
long flagB = flagA | WordFlags.Site.asBit();
|
||||
long flagC = flagB | WordFlags.SiteAdjacent.asBit();
|
||||
|
||||
keywords.forEach(word -> words.mergeLong(word, meta, (a, b) -> a|b));
|
||||
keywords.forEach((word, count) -> {
|
||||
if (count > 5) {
|
||||
words.mergeLong(word, flagC, (a, b) -> a|b);
|
||||
} else if (count > 2) {
|
||||
words.mergeLong(word, flagB, (a, b) -> a|b);
|
||||
} else {
|
||||
words.mergeLong(word, flagA, (a, b) -> a|b);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public List<String> getWordsWithAnyFlag(long flags) {
|
||||
|
Loading…
Reference in New Issue
Block a user