diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java index a5f7390f..27d815e9 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java @@ -24,6 +24,8 @@ public class SearchSpecification { public final SpecificationLimit size; public final SpecificationLimit rank; + public final SpecificationLimit domainCount; + public final QueryLimits queryLimits; public final QueryStrategy queryStrategy; diff --git a/code/api/index-api/src/main/protobuf/index-api.proto b/code/api/index-api/src/main/protobuf/index-api.proto index 6862857f..a53842d4 100644 --- a/code/api/index-api/src/main/protobuf/index-api.proto +++ b/code/api/index-api/src/main/protobuf/index-api.proto @@ -25,9 +25,11 @@ message RpcQsQuery { RpcSpecLimit year = 8; RpcSpecLimit size = 9; RpcSpecLimit rank = 10; - repeated int32 domainIds = 11; - RpcQueryLimits queryLimits = 12; - string searchSetIdentifier = 13; + RpcSpecLimit domainCount = 11; + repeated int32 domainIds = 12; + RpcQueryLimits queryLimits = 13; + string searchSetIdentifier = 14; + } /* Query service query response */ @@ -49,9 +51,10 @@ message RpcIndexQuery { RpcSpecLimit year = 6; RpcSpecLimit size = 7; RpcSpecLimit rank = 8; - RpcQueryLimits queryLimits = 9; - string queryStrategy = 10; // Named query configuration - RpcResultRankingParameters parameters = 11; + RpcSpecLimit domainCount = 9; + RpcQueryLimits queryLimits = 10; + string queryStrategy = 11; // Named query configuration + RpcResultRankingParameters parameters = 12; } /* A tagged union encoding some limit on a field */ diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java b/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java index b8cd4fec..48e8a94a 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java +++ b/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java @@ -38,6 +38,7 @@ public class QueryProtobufCodec { builder.setYear(convertSpecLimit(query.specs.year)); builder.setSize(convertSpecLimit(query.specs.size)); builder.setRank(convertSpecLimit(query.specs.rank)); + builder.setDomainCount(convertSpecLimit(query.specs.domainCount)); builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits)); builder.setQueryStrategy(query.specs.queryStrategy.name()); @@ -58,6 +59,7 @@ public class QueryProtobufCodec { convertSpecLimit(request.getYear()), convertSpecLimit(request.getSize()), convertSpecLimit(request.getRank()), + convertSpecLimit(request.getDomainCount()), request.getDomainIdsList(), IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()), SearchSetIdentifier.valueOf(request.getSearchSetIdentifier())); @@ -137,6 +139,7 @@ public class QueryProtobufCodec { IndexProtobufCodec.convertSpecLimit(specs.getYear()), IndexProtobufCodec.convertSpecLimit(specs.getSize()), IndexProtobufCodec.convertSpecLimit(specs.getRank()), + IndexProtobufCodec.convertSpecLimit(specs.getDomainCount()), IndexProtobufCodec.convertQueryLimits(specs.getQueryLimits()), QueryStrategy.valueOf(specs.getQueryStrategy()), convertRankingParameterss(specs.getParameters()) diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java index 6e74d90c..6b88dbc6 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java +++ b/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java @@ -20,6 +20,7 @@ public record QueryParams( SpecificationLimit year, SpecificationLimit size, SpecificationLimit rank, + SpecificationLimit domainCount, List domainIds, QueryLimits limits, SearchSetIdentifier identifier @@ -35,6 +36,7 @@ public record QueryParams( SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), + SpecificationLimit.none(), List.of(), limits, identifier diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java index 1b840815..03060cb6 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java @@ -8,6 +8,7 @@ public record IndexQueryParams(SpecificationLimit qualityLimit, SpecificationLimit year, SpecificationLimit size, SpecificationLimit rank, + SpecificationLimit domainCount, SearchSet searchSet, QueryStrategy queryStrategy ) diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java index 1c4a6aef..62a368c0 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java +++ b/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java @@ -1,8 +1,10 @@ package nu.marginalia.index.query.limit; -import lombok.ToString; - public record SpecificationLimit(SpecificationLimitType type, int value) { + public boolean isNone() { + return type == SpecificationLimitType.NONE; + } + public static SpecificationLimit none() { return new SpecificationLimit(SpecificationLimitType.NONE, 0); } diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryParser.java b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryParser.java index 5b01fc7b..2abda918 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryParser.java +++ b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryParser.java @@ -82,6 +82,8 @@ public class QueryParser { entity.replace(new Token(TokenType.SIZE_TERM, t.str.substring(4), t.displayStr)); } else if (t.str.startsWith("rank") && t.str.matches("rank[=><]\\d+")) { entity.replace(new Token(TokenType.RANK_TERM, t.str.substring(4), t.displayStr)); + } else if (t.str.startsWith("count") && t.str.matches("count[=><]\\d+")) { + entity.replace(new Token(TokenType.DOMAIN_COUNT_TERM, t.str.substring(5), t.displayStr)); } else if (t.str.startsWith("qs=")) { entity.replace(new Token(TokenType.QS_TERM, t.str.substring(3), t.displayStr)); } else if (t.str.contains(":")) { diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java index b8576a51..31aa4e22 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java +++ b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java @@ -40,6 +40,7 @@ public class Token { case YEAR_TERM: visitor.onYearTerm(this); break; case RANK_TERM: visitor.onRankTerm(this); break; + case DOMAIN_COUNT_TERM: visitor.onDomainCountTerm(this); break; case SIZE_TERM: visitor.onSizeTerm(this); break; case QS_TERM: visitor.onQsTerm(this); break; diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenType.java b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenType.java index dc25b332..d929665f 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenType.java +++ b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenType.java @@ -16,6 +16,7 @@ public enum TokenType implements Predicate { YEAR_TERM, SIZE_TERM, RANK_TERM, + DOMAIN_COUNT_TERM, NEAR_TERM, QS_TERM, diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java index d653eaf6..a5c53e4e 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java +++ b/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java @@ -9,6 +9,7 @@ public interface TokenVisitor { void onYearTerm(Token token); void onSizeTerm(Token token); void onRankTerm(Token token); + void onDomainCountTerm(Token token); void onQualityTerm(Token token); void onQsTerm(Token token); } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java index e2efb9d5..6b913402 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java @@ -29,6 +29,7 @@ public class SearchQueryParamFactory { profile.getYearLimit(), profile.getSizeLimit(), SpecificationLimit.none(), + SpecificationLimit.none(), List.of(), new QueryLimits(1, 100, 200, 8192), profile.searchSetIdentifier @@ -47,6 +48,7 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), + SpecificationLimit.none(), List.of(), new QueryLimits(count, count, 100, 512), SearchSetIdentifier.NONE @@ -64,6 +66,7 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), + SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), SearchSetIdentifier.NONE @@ -81,6 +84,7 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), + SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), SearchSetIdentifier.NONE diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb b/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb index 88b6ad84..771ebd91 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb @@ -56,8 +56,12 @@ year=2005(beta) The document was ostensibly published in 2005 year<2005(beta) The document was ostensibly published in or before 2005 - rank>50(beta) The ranking of the website is at least 50 in a span of 1 - 255 - rank<50(beta) The ranking of the website is at most 50 in a span of 1 - 255 + rank>50The ranking of the website is at least 50 in a span of 1 - 255 + rank<50The ranking of the website is at most 50 in a span of 1 - 255 + + count>10 The search term must appear in at least 10 results form the domain + count<10 The search term must appear in at most 10 results from the domain + format:html5Filter documents using the HTML5 standard. This is typically modern websites. format:xhtmlFilter documents using the XHTML standard diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java index f00bace2..a912beee 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java @@ -415,6 +415,13 @@ public class IndexQueryService extends IndexApiImplBase { } } + if (!params.queryParams.domainCount().isNone()) { + // Remove items that don't meet the domain count requirement + // This isn't perfect because the domain count is calculated + // after the results are sorted + resultsList.removeIf(item -> !params.queryParams.domainCount().test(domainCountFilter.getCount(item))); + } + if (resultsList.size() > params.limitTotal) { // This can't be made a stream limit() operation because we need domainCountFilter // to run over the entire list to provide accurate statistics diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java index 141dc32d..52195a6b 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java @@ -65,6 +65,7 @@ public class SearchParameters { specsSet.year, specsSet.size, specsSet.rank, + specsSet.domainCount, searchSet, specsSet.queryStrategy); @@ -90,6 +91,7 @@ public class SearchParameters { IndexProtobufCodec.convertSpecLimit(request.getYear()), IndexProtobufCodec.convertSpecLimit(request.getSize()), IndexProtobufCodec.convertSpecLimit(request.getRank()), + IndexProtobufCodec.convertSpecLimit(request.getDomainCount()), searchSet, QueryStrategy.valueOf(request.getQueryStrategy())); diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryFactory.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryFactory.java index 9ece8551..d0234326 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryFactory.java +++ b/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryFactory.java @@ -127,6 +127,7 @@ public class QueryFactory { .subqueries(subqueries) .humanQuery(query) .quality(qualityLimits.qualityLimit) + .domainCount(qualityLimits.domainCount) .year(qualityLimits.year) .size(qualityLimits.size) .rank(qualityLimits.rank) diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryLimitsAccumulator.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryLimitsAccumulator.java index c56ddf25..663d4cfc 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryLimitsAccumulator.java +++ b/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryLimitsAccumulator.java @@ -11,6 +11,7 @@ public class QueryLimitsAccumulator implements TokenVisitor { public SpecificationLimit year; public SpecificationLimit size; public SpecificationLimit rank; + public SpecificationLimit domainCount; public QueryStrategy queryStrategy = QueryStrategy.AUTO; @@ -19,6 +20,7 @@ public class QueryLimitsAccumulator implements TokenVisitor { year = params.year(); size = params.size(); rank = params.rank(); + domainCount = params.domainCount(); } private SpecificationLimit parseSpecificationLimit(String str) { @@ -64,6 +66,11 @@ public class QueryLimitsAccumulator implements TokenVisitor { rank = parseSpecificationLimit(token.str); } + @Override + public void onDomainCountTerm(Token token) { + domainCount = parseSpecificationLimit(token.str); + } + @Override public void onQualityTerm(Token token) { qualityLimit = parseSpecificationLimit(token.str); diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QuerySearchTermsAccumulator.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QuerySearchTermsAccumulator.java index e20b9b69..1d5588bc 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QuerySearchTermsAccumulator.java +++ b/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QuerySearchTermsAccumulator.java @@ -97,27 +97,15 @@ public class QuerySearchTermsAccumulator implements TokenVisitor { } @Override - public void onYearTerm(Token token) { - - } - + public void onYearTerm(Token token) {} @Override - public void onSizeTerm(Token token) { - - } - + public void onSizeTerm(Token token) {} @Override - public void onRankTerm(Token token) { - - } - + public void onRankTerm(Token token) {} @Override - public void onQualityTerm(Token token) { - - } - + public void onDomainCountTerm(Token token) {} @Override - public void onQsTerm(Token token) { - - } + public void onQualityTerm(Token token) {} + @Override + public void onQsTerm(Token token) {} } diff --git a/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java b/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java index 64ca1116..c5f2eb42 100644 --- a/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java +++ b/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java @@ -46,6 +46,7 @@ public class QueryFactoryTest { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), + SpecificationLimit.none(), null, new QueryLimits(100, 100, 100, 100), SearchSetIdentifier.BLOGS)).specs;