CatgirlIntelligenceAgency/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java
Viktor Lofgren 1d34224416 (refac) Remove src/main from all source code paths.
Look, this will make the git history look funny, but trimming unnecessary depth from the source tree is a very necessary sanity-preserving measure when dealing with a super-modularized codebase like this one.

While it makes the project configuration a bit less conventional, it will save you several clicks every time you jump between modules.  Which you'll do a lot, because it's *modul*ar.  The src/main/java convention makes a lot of sense for a non-modular project though.  This ain't that.
2024-02-23 16:13:40 +01:00

92 lines
3.6 KiB
Java

package nu.marginalia.ranking.results;
import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.model.idx.DocumentFlags;
import nu.marginalia.model.idx.WordFlags;
import nu.marginalia.model.crawl.PubDate;
import nu.marginalia.model.idx.DocumentMetadata;
import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.ranking.results.factors.*;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import java.util.*;
import static org.mockito.Mockito.when;
class ResultValuatorTest {
TermFrequencyDict dict;
ResultValuator valuator;
@BeforeEach
public void setUp() {
dict = Mockito.mock(TermFrequencyDict.class);
when(dict.docCount()).thenReturn(100_000);
valuator = new ResultValuator(
new Bm25Factor(),
new TermCoherenceFactor(),
new PriorityTermBonus()
);
}
List<SearchResultKeywordScore> titleOnlyLowCountSet = List.of(
new SearchResultKeywordScore(0, "bob",
wordMetadata(Set.of(1), EnumSet.of(WordFlags.Title)),
docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)),
0, false)
);
List<SearchResultKeywordScore> highCountNoTitleSet = List.of(
new SearchResultKeywordScore(0, "bob",
wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)),
docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)),
0, false)
);
List<SearchResultKeywordScore> highCountSubjectSet = List.of(
new SearchResultKeywordScore(0, "bob",
wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)),
docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)),
0, false)
);
@Test
void evaluateTerms() {
when(dict.getTermFreq("bob")).thenReturn(10);
ResultRankingContext context = new ResultRankingContext(100000,
ResultRankingParameters.sensibleDefaults(),
Map.of("bob", 10), Collections.emptyMap());
double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, context);
double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, context);
double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, 10_000, context);
double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, 10_000, context);
System.out.println(titleOnlyLowCount);
System.out.println(titleLongOnlyLowCount);
System.out.println(highCountNoTitle);
System.out.println(highCountSubject);
}
private long docMetadata(int topology, int year, int quality, EnumSet<DocumentFlags> flags) {
return new DocumentMetadata(topology, PubDate.toYearByte(year), quality, flags).encode();
}
private long wordMetadata(Set<Integer> positions, Set<WordFlags> wordFlags) {
long posBits = positions.stream()
.mapToLong(i -> ((1L << i) & 0xFF_FFFF_FFFF_FFFFL))
.reduce((a,b) -> a|b)
.orElse(0L);
return new WordMetadata(posBits, wordFlags).encode();
}
}