diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java index 9e08ba35..ef286613 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java @@ -53,13 +53,16 @@ public final class SearchResultKeywordScore { public double termValue() { double sum = 0; + double tfIdf = WordMetadata.decodeTfidf(encodedWordMetadata); + int positionBits = WordMetadata.decodePositions(encodedWordMetadata); + if (hasTermFlag(WordFlags.Title)) { sum -= 15; } - if (hasTermFlag(WordFlags.Site)) { + if (hasTermFlag(WordFlags.Site) && positionBits != 0) { sum -= 10; - } else if (hasTermFlag(WordFlags.SiteAdjacent)) { + } else if (hasTermFlag(WordFlags.SiteAdjacent) && positionBits != 0) { sum -= 5; } @@ -78,8 +81,6 @@ public final class SearchResultKeywordScore { sum -= 5; } - double tfIdf = WordMetadata.decodeTfidf(encodedWordMetadata); - int positionBits = WordMetadata.decodePositions(encodedWordMetadata); sum -= tfIdf / 10.; sum -= Integer.bitCount(positionBits) / 3.; diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java b/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java index 511563f3..e07cbcbb 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java +++ b/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java @@ -73,7 +73,7 @@ public record WordMetadata(int tfIdf, sb.append('[') .append("tfidf=").append(tfIdf).append(", ") .append("positions=[").append(BrailleBlockPunchCards.printBits(positions, 32)).append(']'); - sb.append(", flags=").append(flags).append(']'); + sb.append(", flags=").append(flagSet()).append(']'); return sb.toString(); } @@ -101,4 +101,8 @@ public record WordMetadata(int tfIdf, public EnumSet flagSet() { return WordFlags.decode(flags); } + + public int positionCount() { + return Integer.bitCount(positions); + } } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java index 46fb0cb5..8055dcf8 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java @@ -138,25 +138,29 @@ public class SearchResultValuator { } private double calculateSingleTermBonus(SearchResultsKeywordSet set, double totalFactor) { - var theKeyword = set.iterator().next(); + final var theKeyword = set.iterator().next(); - if (theKeyword.wordMetadata.hasFlag(WordFlags.Title)) { + final var wordMetadata = theKeyword.wordMetadata; + final int posCount = wordMetadata.positionCount(); + + if (wordMetadata.hasFlag(WordFlags.Title)) { return totalFactor * 0.5; } - else if (theKeyword.wordMetadata.hasFlag(WordFlags.Subjects)) { + else if (wordMetadata.hasFlag(WordFlags.Subjects)) { return totalFactor * 0.6; } - else if (theKeyword.wordMetadata.hasFlag(WordFlags.SiteAdjacent)) { + else if (wordMetadata.hasFlag(WordFlags.SiteAdjacent) && posCount > 0) { return totalFactor * 0.65; } - else if (theKeyword.wordMetadata.hasFlag(WordFlags.Site)) { + else if (wordMetadata.hasFlag(WordFlags.Site) && posCount > 0) { return totalFactor * 0.7; } - if (theKeyword.wordMetadata.hasFlag(WordFlags.UrlDomain)) { + if (wordMetadata.hasFlag(WordFlags.UrlDomain)) { return totalFactor * 0.8; } - else if (theKeyword.wordMetadata.hasFlag(WordFlags.UrlPath)) { + else if (wordMetadata.hasFlag(WordFlags.UrlPath) && posCount > 2) + { return totalFactor * 0.9; } @@ -213,6 +217,8 @@ public class SearchResultValuator { final double k = keyword.weight() / totalWeight; + int posCount = keyword.wordMetadata.positionCount(); + EnumSet flags = keyword.flags(); final boolean title = flags.contains(WordFlags.Title); @@ -235,11 +241,12 @@ public class SearchResultValuator { } } - if (site) { - f *= Math.pow(0.75, k); - } - else if (siteAdjacent) { - f *= Math.pow(0.8, k); + if (posCount != 0) { + if (site) { + f *= Math.pow(0.75, k); + } else if (siteAdjacent) { + f *= Math.pow(0.8, k); + } } if (subject) { @@ -249,7 +256,7 @@ public class SearchResultValuator { if (urlDomain) { f *= Math.pow(0.8, k); } - else if (urlPath) { + else if (urlPath && posCount > 1) { f *= Math.pow(0.9, k); }