(search) Expose more search options

This change set updates the query APIs to enable the search service to add additional criteria, such as QueryStrategy and TemporalBias.

The QueryStrategy makes it possible to e.g. require a match is in the title of a result, and TemporalBias enables penalizing results that are not within a particular time period.

These options are added to the search interface.  The old 'recent results' is modified to use TemporalBias, and a new filter 'Search In Title' is added as well.

The vintage filter is modified to add a temporal bias for the past.
This commit is contained in:
Viktor Lofgren 2024-02-15 13:39:51 +01:00
parent 652d151373
commit 66b3e71e56
16 changed files with 176 additions and 58 deletions

View File

@ -88,14 +88,15 @@ public class IndexProtobufCodec {
params.getBm25FullWeight(),
params.getBm25PrioWeight(),
params.getTcfWeight(),
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().name()),
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
params.getTemporalBiasWeight()
);
};
public static RpcResultRankingParameters convertRankingParameterss(ResultRankingParameters rankingParams) {
return
RpcResultRankingParameters.newBuilder()
public static RpcResultRankingParameters convertRankingParameterss(ResultRankingParameters rankingParams,
RpcTemporalBias temporalBias)
{
var builder = RpcResultRankingParameters.newBuilder()
.setFullB(rankingParams.fullParams.b())
.setFullK(rankingParams.fullParams.k())
.setPrioB(rankingParams.prioParams.b())
@ -109,9 +110,16 @@ public class IndexProtobufCodec {
.setBm25FullWeight(rankingParams.bm25FullWeight)
.setBm25PrioWeight(rankingParams.bm25PrioWeight)
.setTcfWeight(rankingParams.tcfWeight)
.setTemporalBias(RpcResultRankingParameters.TEMPORAL_BIAS.valueOf(rankingParams.temporalBias.name()))
.setTemporalBiasWeight(rankingParams.temporalBiasWeight)
.build();
.setTemporalBiasWeight(rankingParams.temporalBiasWeight);
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
builder.setTemporalBias(temporalBias);
}
else {
builder.setTemporalBias(RpcTemporalBias.newBuilder()
.setBias(RpcTemporalBias.Bias.valueOf(rankingParams.temporalBias.name())));
}
return builder.build();
}
}

View File

@ -52,7 +52,7 @@ public class ResultRankingParameters {
.bm25PrioWeight(1.)
.tcfWeight(2.)
.temporalBias(TemporalBias.NONE)
.temporalBiasWeight(1. / (10.))
.temporalBiasWeight(1. / (5.))
.build();
}

View File

@ -51,7 +51,8 @@ message RpcQsQuery {
repeated int32 domainIds = 12;
RpcQueryLimits queryLimits = 13;
string searchSetIdentifier = 14;
string queryStrategy = 15; // Named query configuration
RpcTemporalBias temporalBias = 16;
}
/* Query service query response */
@ -63,6 +64,15 @@ message RpcQsResponse {
string domain = 5;
}
message RpcTemporalBias {
enum Bias {
NONE = 0;
RECENT = 1;
OLD = 2;
}
Bias bias = 1;
}
/* Index service query request */
message RpcIndexQuery {
repeated RpcSubquery subqueries = 1;
@ -146,14 +156,8 @@ message RpcResultRankingParameters {
double bm25FullWeight = 11;
double bm25PrioWeight = 12;
double tcfWeight = 13;
TEMPORAL_BIAS temporalBias = 14;
RpcTemporalBias temporalBias = 14;
double temporalBiasWeight = 15;
enum TEMPORAL_BIAS {
NONE = 0;
RECENT = 1; // Prefer recent documents
OLD = 2; // Prefer older documents
}
}
/* Defines a single subquery */

View File

@ -1,5 +1,6 @@
package nu.marginalia.index.client;
import nu.marginalia.index.api.RpcTemporalBias;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
@ -23,7 +24,7 @@ class IndexProtobufCodecTest {
@Test
public void testRankingParameters() {
verifyIsIdentityTransformation(ResultRankingParameters.sensibleDefaults(),
p -> IndexProtobufCodec.convertRankingParameterss(IndexProtobufCodec.convertRankingParameterss(p)));
p -> IndexProtobufCodec.convertRankingParameterss(IndexProtobufCodec.convertRankingParameterss(p, null)));
}
@Test

View File

@ -3,10 +3,10 @@ package nu.marginalia.query;
import lombok.SneakyThrows;
import nu.marginalia.index.api.*;
import nu.marginalia.index.client.IndexProtobufCodec;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.DecoratedSearchResultItem;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.client.model.results.SearchResultItem;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.index.query.limit.QueryStrategy;
@ -41,8 +41,14 @@ public class QueryProtobufCodec {
builder.setDomainCount(convertSpecLimit(query.specs.domainCount));
builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits));
builder.setQueryStrategy(query.specs.queryStrategy.name());
builder.setParameters(IndexProtobufCodec.convertRankingParameterss(query.specs.rankingParams));
// Query strategy may be overridden by the query, but if not, use the one from the request
if (query.specs.queryStrategy != null && query.specs.queryStrategy != QueryStrategy.AUTO)
builder.setQueryStrategy(query.specs.queryStrategy.name());
else
builder.setQueryStrategy(request.getQueryStrategy());
builder.setParameters(IndexProtobufCodec.convertRankingParameterss(query.specs.rankingParams, request.getTemporalBias()));
return builder.build();
}
@ -62,7 +68,10 @@ public class QueryProtobufCodec {
convertSpecLimit(request.getDomainCount()),
request.getDomainIdsList(),
IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()),
request.getSearchSetIdentifier());
request.getSearchSetIdentifier(),
QueryStrategy.valueOf(request.getQueryStrategy()),
ResultRankingParameters.TemporalBias.valueOf(request.getTemporalBias().getBias().name())
);
}
@ -159,7 +168,11 @@ public class QueryProtobufCodec {
.setYear(convertSpecLimit(params.year()))
.setSize(convertSpecLimit(params.size()))
.setRank(convertSpecLimit(params.rank()))
.setSearchSetIdentifier(params.identifier());
.setSearchSetIdentifier(params.identifier())
.setQueryStrategy(params.queryStrategy().name())
.setTemporalBias(RpcTemporalBias.newBuilder()
.setBias(RpcTemporalBias.Bias.valueOf(params.temporalBias().name()))
.build());
if (params.nearDomain() != null)
builder.setNearDomain(params.nearDomain());

View File

@ -1,6 +1,8 @@
package nu.marginalia.query.model;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import javax.annotation.Nullable;
@ -21,7 +23,9 @@ public record QueryParams(
SpecificationLimit domainCount,
List<Integer> domainIds,
QueryLimits limits,
String identifier
String identifier,
QueryStrategy queryStrategy,
ResultRankingParameters.TemporalBias temporalBias
)
{
public QueryParams(String query, QueryLimits limits, String identifier) {
@ -37,7 +41,9 @@ public record QueryParams(
SpecificationLimit.none(),
List.of(),
limits,
identifier
identifier,
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
);
}
}

View File

@ -2,7 +2,9 @@ package nu.marginalia.search;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.search.command.SearchParameters;
@ -19,8 +21,6 @@ public class SearchQueryParamFactory {
userParams.js().addTacitTerms(prototype);
userParams.adtech().addTacitTerms(prototype);
SpecificationLimit yearLimit = userParams.recent().yearLimit();
return new QueryParams(
userParams.query(),
null,
@ -29,13 +29,15 @@ public class SearchQueryParamFactory {
prototype.searchTermsPriority,
prototype.searchTermsAdvice,
profile.getQualityLimit(),
yearLimit,
SpecificationLimit.none(),
profile.getSizeLimit(),
SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(),
new QueryLimits(5, 100, 200, 8192),
profile.searchSetIdentifier.name()
profile.searchSetIdentifier.name(),
userParams.strategy(),
userParams.temporalBias()
);
}
@ -54,7 +56,9 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
List.of(),
new QueryLimits(count, count, 100, 512),
SearchSetIdentifier.NONE.name()
SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
);
}
@ -72,7 +76,9 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
List.of(),
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE.name()
SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
);
}
@ -90,7 +96,9 @@ public class SearchQueryParamFactory {
SpecificationLimit.none(),
List.of(),
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE.name()
SearchSetIdentifier.NONE.name(),
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE
);
}
}

View File

@ -1,15 +1,20 @@
package nu.marginalia.search.command;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.search.model.SearchProfile;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import static nu.marginalia.search.command.SearchRecentParameter.RECENT;
public record SearchParameters(String query,
SearchProfile profile,
SearchJsParameter js,
SearchRecentParameter recent,
SearchTitleParameter title,
SearchAdtechParameter adtech
) {
public String profileStr() {
@ -17,29 +22,53 @@ public record SearchParameters(String query,
}
public SearchParameters withProfile(SearchProfile profile) {
return new SearchParameters(query, profile, js, recent, adtech);
return new SearchParameters(query, profile, js, recent, title, adtech);
}
public SearchParameters withJs(SearchJsParameter js) {
return new SearchParameters(query, profile, js, recent, adtech);
return new SearchParameters(query, profile, js, recent, title, adtech);
}
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
return new SearchParameters(query, profile, js, recent, adtech);
return new SearchParameters(query, profile, js, recent, title, adtech);
}
public SearchParameters withRecent(SearchRecentParameter recent) {
return new SearchParameters(query, profile, js, recent, adtech);
return new SearchParameters(query, profile, js, recent, title, adtech);
}
public SearchParameters withTitle(SearchTitleParameter title) {
return new SearchParameters(query, profile, js, recent, title, adtech);
}
public String renderUrl(WebsiteUrl baseUrl) {
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s",
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s",
URLEncoder.encode(query, StandardCharsets.UTF_8),
URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8),
URLEncoder.encode(js.value, StandardCharsets.UTF_8),
URLEncoder.encode(adtech.value, StandardCharsets.UTF_8),
URLEncoder.encode(recent.value, StandardCharsets.UTF_8)
URLEncoder.encode(recent.value, StandardCharsets.UTF_8),
URLEncoder.encode(title.value, StandardCharsets.UTF_8)
);
return baseUrl.withPath(path);
}
public ResultRankingParameters.TemporalBias temporalBias() {
if (recent == RECENT) {
return ResultRankingParameters.TemporalBias.RECENT;
}
else if (profile == SearchProfile.VINTAGE) {
return ResultRankingParameters.TemporalBias.OLD;
}
return ResultRankingParameters.TemporalBias.NONE;
}
public QueryStrategy strategy() {
if (title == SearchTitleParameter.TITLE) {
return QueryStrategy.REQUIRE_FIELD_TITLE;
}
return QueryStrategy.AUTO;
}
}

View File

@ -1,11 +1,6 @@
package nu.marginalia.search.command;
import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.query.limit.SpecificationLimit;
import javax.annotation.Nullable;
import java.time.LocalDateTime;
import java.util.Arrays;
public enum SearchRecentParameter {
DEFAULT("default"),
@ -23,11 +18,4 @@ public enum SearchRecentParameter {
return DEFAULT;
}
public SpecificationLimit yearLimit() {
if (this == RECENT) {
return SpecificationLimit.greaterThan(LocalDateTime.now().getYear() - 1);
} else {
return SpecificationLimit.none();
}
}
}

View File

@ -0,0 +1,21 @@
package nu.marginalia.search.command;
import javax.annotation.Nullable;
public enum SearchTitleParameter {
DEFAULT("default"),
TITLE("title");
public final String value;
SearchTitleParameter(String value) {
this.value = value;
}
public static SearchTitleParameter parse(@Nullable String value) {
if (TITLE.value.equals(value)) return TITLE;
return DEFAULT;
}
}

View File

@ -2,10 +2,7 @@ package nu.marginalia.search.model;
import lombok.Getter;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.SearchAdtechParameter;
import nu.marginalia.search.command.SearchJsParameter;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.command.SearchRecentParameter;
import nu.marginalia.search.command.*;
import java.util.List;
@ -23,6 +20,8 @@ public class SearchFilters {
public final ReduceAdtechOption reduceAdtechOption;
@Getter
public final ShowRecentOption showRecentOption;
@Getter
public final SearchTitleOption searchTitleOption;
@Getter
public final List<List<Filter>> filterGroups;
@ -35,6 +34,7 @@ public class SearchFilters {
removeJsOption = new RemoveJsOption(parameters);
reduceAdtechOption = new ReduceAdtechOption(parameters);
showRecentOption = new ShowRecentOption(parameters);
searchTitleOption = new SearchTitleOption(parameters);
currentFilter = parameters.profile().filterId;
@ -141,6 +141,32 @@ public class SearchFilters {
}
}
public class SearchTitleOption {
private final SearchTitleParameter value;
@Getter
public final String url;
public boolean isSet() {
return value.equals(SearchTitleParameter.TITLE);
}
public String name() {
return "Search In Title";
}
public SearchTitleOption(SearchParameters parameters) {
this.value = parameters.title();
var toggledValue = switch (parameters.title()) {
case TITLE -> SearchTitleParameter.DEFAULT;
default -> SearchTitleParameter.TITLE;
};
this.url = parameters.withTitle(toggledValue).renderUrl(SearchFilters.this.url);
}
}
public class Filter {
@Getter
public final String displayName;

View File

@ -1,7 +1,6 @@
package nu.marginalia.search.svc;
import com.google.inject.Inject;
import io.prometheus.client.Histogram;
import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.*;
@ -61,7 +60,9 @@ public class SearchQueryService {
SearchProfile.getSearchProfile(request.queryParams("profile")),
SearchJsParameter.parse(request.queryParams("js")),
SearchRecentParameter.parse(request.queryParams("recent")),
SearchAdtechParameter.parse(request.queryParams("adtech")));
SearchTitleParameter.parse(request.queryParams("searchTitle")),
SearchAdtechParameter.parse(request.queryParams("adtech"))
);
}
catch (Exception ex) {
// Bots keep sending bad requests, suppress the error otherwise it will

View File

@ -17,7 +17,15 @@
</li>
{{/with}}
{{#with showRecentOption}}
<li title="Show Recent Options"
<li title="Prefer recent results"
{{#if set}}aria-checked="true" class="current"{{/if}}
{{#unless set}}aria-checked="false"{{/unless}}
role="checkbox">
<a href="{{url}}">{{name}}</a>
</li>
{{/with}}
{{#with searchTitleOption}}
<li title="Require title match"
{{#if set}}aria-checked="true" class="current"{{/if}}
{{#unless set}}aria-checked="false"{{/unless}}
role="checkbox">

View File

@ -7,6 +7,7 @@
<input type="text" id="query" name="query" placeholder="Search..." value="{{query}}">
<input type="hidden" name="js" value="{{js}}">
<input type="hidden" name="adtech" value="{{adtech}}">
<input type="hidden" name="searchTitle" value="{{searchTitle}}">
<input type="hidden" name="profile" value="{{profile}}">
<input type="hidden" name="recent" value="{{recent}}">

View File

@ -17,7 +17,7 @@ class BangCommandTest {
bangCommand.process(Context.internal(),
null,
new SearchParameters(" !g test",
null, null, null, null)
null, null, null, null, null)
);
Assertions.fail("Should have thrown RedirectException");
}

View File

@ -3,7 +3,9 @@ package nu.marginalia.query.svc;
import nu.marginalia.WmsaHome;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.index.query.limit.SpecificationLimitType;
import nu.marginalia.language.EnglishDictionary;
@ -49,7 +51,9 @@ public class QueryFactoryTest {
SpecificationLimit.none(),
null,
new QueryLimits(100, 100, 100, 100),
"NONE")).specs;
"NONE",
QueryStrategy.AUTO,
ResultRankingParameters.TemporalBias.NONE)).specs;
}
@Test