Fix a query variant creation bug that caused the search engine to sometimes drop important words from a query.
This commit is contained in:
parent
8c24ac761a
commit
10d1307dd6
@ -3,6 +3,7 @@ package nu.marginalia.wmsa.edge.search.query;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
import nu.marginalia.util.language.conf.LanguageModels;
|
import nu.marginalia.util.language.conf.LanguageModels;
|
||||||
@ -54,7 +55,7 @@ public class QueryVariants {
|
|||||||
public final String wordOriginal;
|
public final String wordOriginal;
|
||||||
}
|
}
|
||||||
|
|
||||||
@AllArgsConstructor @Getter @ToString
|
@AllArgsConstructor @Getter @ToString @EqualsAndHashCode
|
||||||
public static class QueryVariant {
|
public static class QueryVariant {
|
||||||
public final List<String> terms;
|
public final List<String> terms;
|
||||||
public final double value;
|
public final double value;
|
||||||
@ -97,12 +98,14 @@ public class QueryVariants {
|
|||||||
|
|
||||||
var first = byStart.firstEntry();
|
var first = byStart.firstEntry();
|
||||||
if (first == null) {
|
if (first == null) {
|
||||||
byStart.put(0, List.of(new WordSpan(0, sentence.length())));
|
var span = new WordSpan(0, sentence.length());
|
||||||
|
byStart.put(0, List.of(span));
|
||||||
}
|
}
|
||||||
else if (first.getKey() > 0) {
|
else if (first.getKey() > 0) {
|
||||||
List<WordSpan> elongatedFirstWords = new ArrayList<>(first.getValue().size());
|
List<WordSpan> elongatedFirstWords = new ArrayList<>(first.getValue().size());
|
||||||
|
|
||||||
first.getValue().forEach(span -> {
|
first.getValue().forEach(span -> {
|
||||||
|
elongatedFirstWords.add(new WordSpan(0, span.start));
|
||||||
elongatedFirstWords.add(new WordSpan(0, span.end));
|
elongatedFirstWords.add(new WordSpan(0, span.end));
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -142,8 +145,7 @@ public class QueryVariants {
|
|||||||
QueryVariantSet returnValue = new QueryVariantSet();
|
QueryVariantSet returnValue = new QueryVariantSet();
|
||||||
|
|
||||||
returnValue.faithful.addAll(evaluateQueries(faithfulQueries));
|
returnValue.faithful.addAll(evaluateQueries(faithfulQueries));
|
||||||
|
returnValue.alternative.addAll(evaluateQueries(alternativeQueries));
|
||||||
returnValue.faithful.addAll(evaluateQueries(alternativeQueries));
|
|
||||||
|
|
||||||
returnValue.faithful.sort(Comparator.comparing(QueryVariant::getValue));
|
returnValue.faithful.sort(Comparator.comparing(QueryVariant::getValue));
|
||||||
returnValue.alternative.sort(Comparator.comparing(QueryVariant::getValue));
|
returnValue.alternative.sort(Comparator.comparing(QueryVariant::getValue));
|
||||||
@ -154,6 +156,7 @@ public class QueryVariants {
|
|||||||
final Pattern underscore = Pattern.compile("_");
|
final Pattern underscore = Pattern.compile("_");
|
||||||
|
|
||||||
private List<QueryVariant> evaluateQueries(List<List<String>> queryStrings) {
|
private List<QueryVariant> evaluateQueries(List<List<String>> queryStrings) {
|
||||||
|
Set<QueryVariant> variantsSet = new HashSet<>();
|
||||||
List<QueryVariant> ret = new ArrayList<>();
|
List<QueryVariant> ret = new ArrayList<>();
|
||||||
for (var lst : queryStrings) {
|
for (var lst : queryStrings) {
|
||||||
double q = 0;
|
double q = 0;
|
||||||
@ -165,7 +168,10 @@ public class QueryVariants {
|
|||||||
}
|
}
|
||||||
q += 1.0 / qp;
|
q += 1.0 / qp;
|
||||||
}
|
}
|
||||||
ret.add(new QueryVariant(lst, q));
|
var qv = new QueryVariant(lst, q);
|
||||||
|
if (variantsSet.add(qv)) {
|
||||||
|
ret.add(qv);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -269,7 +275,7 @@ public class QueryVariants {
|
|||||||
|
|
||||||
private List<List<Word>> getWordSpans(TreeMap<Integer, List<WordSpan>> byStart, DocumentSentence sentence, List<ArrayList<WordSpan>> livingSpans) {
|
private List<List<Word>> getWordSpans(TreeMap<Integer, List<WordSpan>> byStart, DocumentSentence sentence, List<ArrayList<WordSpan>> livingSpans) {
|
||||||
List<List<Word>> goodSpans = new ArrayList<>();
|
List<List<Word>> goodSpans = new ArrayList<>();
|
||||||
for (int i = 0; i < sentence.length(); i++) {
|
for (int i = 0; i < 1; i++) {
|
||||||
var spans = byStart.get(i);
|
var spans = byStart.get(i);
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user