Fix bug where results would sometimes be presented solely based on the fact that the document is important on the site in general, regardless of whether it's important to the document.
This commit is contained in:
parent
2e2916cebe
commit
4cec89da91
@ -53,13 +53,16 @@ public final class SearchResultKeywordScore {
|
||||
public double termValue() {
|
||||
double sum = 0;
|
||||
|
||||
double tfIdf = WordMetadata.decodeTfidf(encodedWordMetadata);
|
||||
int positionBits = WordMetadata.decodePositions(encodedWordMetadata);
|
||||
|
||||
if (hasTermFlag(WordFlags.Title)) {
|
||||
sum -= 15;
|
||||
}
|
||||
|
||||
if (hasTermFlag(WordFlags.Site)) {
|
||||
if (hasTermFlag(WordFlags.Site) && positionBits != 0) {
|
||||
sum -= 10;
|
||||
} else if (hasTermFlag(WordFlags.SiteAdjacent)) {
|
||||
} else if (hasTermFlag(WordFlags.SiteAdjacent) && positionBits != 0) {
|
||||
sum -= 5;
|
||||
}
|
||||
|
||||
@ -78,8 +81,6 @@ public final class SearchResultKeywordScore {
|
||||
sum -= 5;
|
||||
}
|
||||
|
||||
double tfIdf = WordMetadata.decodeTfidf(encodedWordMetadata);
|
||||
int positionBits = WordMetadata.decodePositions(encodedWordMetadata);
|
||||
|
||||
sum -= tfIdf / 10.;
|
||||
sum -= Integer.bitCount(positionBits) / 3.;
|
||||
|
@ -73,7 +73,7 @@ public record WordMetadata(int tfIdf,
|
||||
sb.append('[')
|
||||
.append("tfidf=").append(tfIdf).append(", ")
|
||||
.append("positions=[").append(BrailleBlockPunchCards.printBits(positions, 32)).append(']');
|
||||
sb.append(", flags=").append(flags).append(']');
|
||||
sb.append(", flags=").append(flagSet()).append(']');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@ -101,4 +101,8 @@ public record WordMetadata(int tfIdf,
|
||||
public EnumSet<WordFlags> flagSet() {
|
||||
return WordFlags.decode(flags);
|
||||
}
|
||||
|
||||
public int positionCount() {
|
||||
return Integer.bitCount(positions);
|
||||
}
|
||||
}
|
||||
|
@ -138,25 +138,29 @@ public class SearchResultValuator {
|
||||
}
|
||||
|
||||
private double calculateSingleTermBonus(SearchResultsKeywordSet set, double totalFactor) {
|
||||
var theKeyword = set.iterator().next();
|
||||
final var theKeyword = set.iterator().next();
|
||||
|
||||
if (theKeyword.wordMetadata.hasFlag(WordFlags.Title)) {
|
||||
final var wordMetadata = theKeyword.wordMetadata;
|
||||
final int posCount = wordMetadata.positionCount();
|
||||
|
||||
if (wordMetadata.hasFlag(WordFlags.Title)) {
|
||||
return totalFactor * 0.5;
|
||||
}
|
||||
else if (theKeyword.wordMetadata.hasFlag(WordFlags.Subjects)) {
|
||||
else if (wordMetadata.hasFlag(WordFlags.Subjects)) {
|
||||
return totalFactor * 0.6;
|
||||
}
|
||||
else if (theKeyword.wordMetadata.hasFlag(WordFlags.SiteAdjacent)) {
|
||||
else if (wordMetadata.hasFlag(WordFlags.SiteAdjacent) && posCount > 0) {
|
||||
return totalFactor * 0.65;
|
||||
}
|
||||
else if (theKeyword.wordMetadata.hasFlag(WordFlags.Site)) {
|
||||
else if (wordMetadata.hasFlag(WordFlags.Site) && posCount > 0) {
|
||||
return totalFactor * 0.7;
|
||||
}
|
||||
|
||||
if (theKeyword.wordMetadata.hasFlag(WordFlags.UrlDomain)) {
|
||||
if (wordMetadata.hasFlag(WordFlags.UrlDomain)) {
|
||||
return totalFactor * 0.8;
|
||||
}
|
||||
else if (theKeyword.wordMetadata.hasFlag(WordFlags.UrlPath)) {
|
||||
else if (wordMetadata.hasFlag(WordFlags.UrlPath) && posCount > 2)
|
||||
{
|
||||
return totalFactor * 0.9;
|
||||
}
|
||||
|
||||
@ -213,6 +217,8 @@ public class SearchResultValuator {
|
||||
|
||||
final double k = keyword.weight() / totalWeight;
|
||||
|
||||
int posCount = keyword.wordMetadata.positionCount();
|
||||
|
||||
EnumSet<WordFlags> flags = keyword.flags();
|
||||
|
||||
final boolean title = flags.contains(WordFlags.Title);
|
||||
@ -235,11 +241,12 @@ public class SearchResultValuator {
|
||||
}
|
||||
}
|
||||
|
||||
if (site) {
|
||||
f *= Math.pow(0.75, k);
|
||||
}
|
||||
else if (siteAdjacent) {
|
||||
f *= Math.pow(0.8, k);
|
||||
if (posCount != 0) {
|
||||
if (site) {
|
||||
f *= Math.pow(0.75, k);
|
||||
} else if (siteAdjacent) {
|
||||
f *= Math.pow(0.8, k);
|
||||
}
|
||||
}
|
||||
|
||||
if (subject) {
|
||||
@ -249,7 +256,7 @@ public class SearchResultValuator {
|
||||
if (urlDomain) {
|
||||
f *= Math.pow(0.8, k);
|
||||
}
|
||||
else if (urlPath) {
|
||||
else if (urlPath && posCount > 1) {
|
||||
f *= Math.pow(0.9, k);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user