(search) Fix a bug where space-like characters weren't normalized in query processing.

This commit is contained in:
Viktor Lofgren 2023-07-07 20:02:05 +02:00
parent 2619d196bb
commit ae9537b68e
2 changed files with 24 additions and 1 deletions

View File

@ -9,7 +9,7 @@ import java.util.List;
import java.util.regex.Pattern;
public class QueryTokenizer {
private static final Pattern noisePattern = Pattern.compile("[,]");
private static final Pattern noisePattern = Pattern.compile("[,\\s]");
public List<Token> tokenizeQuery(String rawQuery) {
List<Token> tokens = new ArrayList<>();

View File

@ -18,6 +18,29 @@ class QueryParserTest {
parser = new QueryParser();
}
@Test
public void testTabHandling() {
var query = " lorem\tipsum\ndolor sit";
var ret = parser.parse(query);
assertEquals(4, ret.size());
var lorem = ret.get(0);
assertEquals("lorem", lorem.str);
assertEquals("lorem", lorem.displayStr);
var ipsum = ret.get(1);
assertEquals("ipsum", ipsum.str);
assertEquals("ipsum", ipsum.displayStr);
var dolor = ret.get(2);
assertEquals("dolor", dolor.str);
assertEquals("dolor", dolor.displayStr);
var sit = ret.get(3);
assertEquals("sit", sit.str);
assertEquals("sit", sit.displayStr);
}
@Test
public void testAdviceString() {
var ret = parser.parse("alcibiades (plato) \"my query\" -cars");