From 1e06aee6a24cd6ebf869397783ff295362b93adf Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 3 Jan 2024 16:30:46 +0100 Subject: [PATCH] (index) Adjust BM25 parameters --- .../ranking/factors/Bm25Factor.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java b/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java index a11281db..43a63ab6 100644 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java +++ b/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java @@ -56,16 +56,27 @@ public class Bm25Factor { int pcount = keyword.positionCount(); double qcount = 0.; + if ((keyword.encodedWordMetadata() & WordFlags.Site.asBit()) != 0) qcount += 0.5; if ((keyword.encodedWordMetadata() & WordFlags.SiteAdjacent.asBit()) != 0) qcount += 0.5; - if ((keyword.encodedWordMetadata() & WordFlags.UrlPath.asBit()) != 0) - qcount += 1.25; - if ((keyword.encodedWordMetadata() & WordFlags.UrlDomain.asBit()) != 0) - qcount += 1.25; - if ((keyword.encodedWordMetadata() & WordFlags.ExternalLink.asBit()) != 0) + + if ((keyword.encodedWordMetadata() & WordFlags.ExternalLink.asBit()) != 0) { qcount += 2.5; + + if ((keyword.encodedWordMetadata() & WordFlags.UrlDomain.asBit()) != 0) + qcount += 2.5; + else if ((keyword.encodedWordMetadata() & WordFlags.UrlPath.asBit()) != 0) + qcount += 1; + } + else { + if ((keyword.encodedWordMetadata() & WordFlags.UrlPath.asBit()) != 0) + qcount += 1; + if ((keyword.encodedWordMetadata() & WordFlags.UrlDomain.asBit()) != 0) + qcount += 1.5; + } + if ((keyword.encodedWordMetadata() & WordFlags.Title.asBit()) != 0) qcount += 1.5;