Refactored EdgeSearchService and broke functions like define:, browse:, site: etc. into separate classes.

This commit is contained in:
vlofgren 2022-05-30 16:40:59 +02:00
parent 41b686955f
commit 25776a9718
27 changed files with 580 additions and 236 deletions

View File

@ -15,9 +15,9 @@ import java.nio.file.Path;
import java.time.Duration;
public abstract class E2ETestBase {
public Network network = Network.newNetwork();
public static Network network = Network.newNetwork();
public MariaDBContainer<?> getMariaDBContainer() {
public static MariaDBContainer<?> getMariaDBContainer() {
return new MariaDBContainer<>("mariadb")
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
@ -27,7 +27,7 @@ public abstract class E2ETestBase {
.withNetworkAliases("mariadb");
}
public GenericContainer<?> forService(ServiceDescriptor service, GenericContainer<?> mariaDB) {
public static GenericContainer<?> forService(ServiceDescriptor service, GenericContainer<?> mariaDB) {
return new GenericContainer<>("openjdk:17-alpine")
.dependsOn(mariaDB)
.withCopyFileToContainer(jarFile(), "/WMSA.jar")

View File

@ -7,6 +7,7 @@ import org.jsoup.Jsoup;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.openqa.selenium.By;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openzim.ZIMTypes.ZIMFile;
import org.openzim.ZIMTypes.ZIMReader;
@ -22,6 +23,7 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
@ -31,31 +33,33 @@ import static nu.marginalia.wmsa.configuration.ServiceDescriptor.*;
@Testcontainers
public class EdgeSearchE2ETest extends E2ETestBase {
@Container
public GenericContainer<?> mariaDB = getMariaDBContainer();
public static GenericContainer<?> mariaDB = getMariaDBContainer();
@Container
public GenericContainer<?> searchContainer = forService(EDGE_SEARCH, mariaDB);
public static GenericContainer<?> searchContainer = forService(EDGE_SEARCH, mariaDB);
@Container
public GenericContainer<?> assistantContainer = forService(EDGE_ASSISTANT, mariaDB);
public static GenericContainer<?> assistantContainer = forService(EDGE_ASSISTANT, mariaDB);
@Container
public GenericContainer<?> indexContainer = forService(EDGE_INDEX, mariaDB);
public static GenericContainer<?> encyclopediaContainer = forService(ENCYCLOPEDIA, mariaDB);
@Container
public static GenericContainer<?> indexContainer = forService(EDGE_INDEX, mariaDB);
@Container
public NginxContainer<?> mockWikipedia = new NginxContainer<>("nginx:stable")
public static NginxContainer<?> mockWikipedia = new NginxContainer<>("nginx:stable")
.dependsOn(searchContainer)
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("wikipedia")))
.withFileSystemBind(getWikipediaFiles(), "/usr/share/nginx/html/", BindMode.READ_ONLY)
.withNetwork(network)
.withNetworkAliases("wikipedia");
.withNetworkAliases("wikipedia.local");
@Container
public BrowserWebDriverContainer<?> chrome = new BrowserWebDriverContainer<>()
public static BrowserWebDriverContainer<?> chrome = new BrowserWebDriverContainer<>()
.withNetwork(network)
.withCapabilities(new ChromeOptions());
@Container
public GenericContainer<?> crawlerContainer = new GenericContainer<>("openjdk:17-alpine")
public static GenericContainer<?> crawlerContainer = new GenericContainer<>("openjdk:17-alpine")
.dependsOn(mockWikipedia)
.dependsOn(indexContainer)
.withNetwork(network)
@ -69,14 +73,13 @@ public class EdgeSearchE2ETest extends E2ETestBase {
.waitingFor(Wait.forLogMessage(".*ALL DONE.*", 1).withStartupTimeout(Duration.ofMinutes(10)));
@Container
public NginxContainer<?> proxyNginx = new NginxContainer<>("nginx:stable")
public static NginxContainer<?> proxyNginx = new NginxContainer<>("nginx:stable")
.dependsOn(searchContainer)
.dependsOn(crawlerContainer)
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("nginx")))
.withCopyFileToContainer(MountableFile.forClasspathResource("nginx/search.conf"), "/etc/nginx/conf.d/default.conf")
.withNetwork(network)
.withNetworkAliases("proxyNginx");
;
public static MountableFile ipDatabasePath() {
Path modelsPath = Path.of(System.getProperty("user.dir")).resolve("data/models/IP2LOC/IP2LOCATION-LITE-DB1.CSV");
@ -87,11 +90,22 @@ public class EdgeSearchE2ETest extends E2ETestBase {
return MountableFile.forHostPath(modelsPath.toString());
}
private Path getCrawlPath() {
private static Path getCrawlPath() {
return Path.of(System.getProperty("user.dir")).resolve("build/tmp/crawl");
}
private String getWikipediaFiles() {
private static Path screenshotFilename(String operation) throws IOException {
var path = Path.of(System.getProperty("user.dir")).resolve("build/test/e2e/");
Files.createDirectories(path);
String name = String.format("test-%s-%s.png", operation, LocalDateTime.now());
path = path.resolve(name);
System.out.println("Screenshot in " + path);
return path;
}
private static String getWikipediaFiles() {
Path wikipediaFiles = Path.of(System.getProperty("user.dir")).resolve("build/tmp/wikipedia");
Path crawlFiles = getCrawlPath();
Path zimFile = Path.of(System.getProperty("user.dir")).resolve("data/test/wikipedia_en_100_nopic.zim");
@ -120,7 +134,7 @@ public class EdgeSearchE2ETest extends E2ETestBase {
var zr = new ZIMReader(new ZIMFile(zimFile.toString()));
zr.forEachArticles((url, art) -> {
urls.add("http://wikipedia/" + url + ".html");
urls.add("http://wikipedia.local/" + url + ".html");
if (art != null) {
try {
@ -134,7 +148,7 @@ public class EdgeSearchE2ETest extends E2ETestBase {
}, pred -> true);
urls.forEach(System.out::println);
Files.writeString(wikipediaFiles.resolve("index.html"), "<html/>");
CrawlJobExtractorMain.writeSpec(crawlFiles.resolve("crawl.spec"), "wikipedia", urls);
CrawlJobExtractorMain.writeSpec(crawlFiles.resolve("crawl.spec"), "wikipedia.local", urls);
}
catch (IOException ex) {
ex.printStackTrace();
@ -143,19 +157,80 @@ public class EdgeSearchE2ETest extends E2ETestBase {
}
@Test
public void run() {
public void testFrontPage() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("frontpage"));
}
@Test
public void testQuery() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/search?query=bird&profile=corpo");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
driver.get("http://proxyNginx/search?query=site:wikipedia");
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("query"));
}
@Test
public void testSiteInfo() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/search?query=site:wikipedia.local");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-info"));
}
@Test
public void testSiteSearch() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/search?query=site:wikipedia.local%20frog");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-search"));
}
@Test
public void testBrowse() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/search?query=browse:wikipedia.local");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("browse"));
}
@Test
public void testDefine() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/search?query=define:adiabatic");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("define"));
}
@Test
public void testEval() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/search?query=3%2B3");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("eval"));
}
}

View File

@ -4,10 +4,8 @@ import com.google.inject.ImplementedBy;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.*;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink;
import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit;
import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails;
import nu.marginalia.wmsa.edge.search.BrowseResult;
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import java.util.Collection;
import java.util.List;

View File

@ -13,7 +13,7 @@ import nu.marginalia.wmsa.edge.model.EdgeUrl;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.model.search.EdgePageScoreAdjustment;
import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails;
import nu.marginalia.wmsa.edge.search.BrowseResult;
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -10,7 +10,7 @@ import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.EdgeId;
import nu.marginalia.wmsa.edge.search.BrowseResult;
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.jetbrains.annotations.NotNull;

View File

@ -4,7 +4,7 @@ import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeId;
import nu.marginalia.wmsa.edge.search.BrowseResult;
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import java.util.LinkedList;

View File

@ -4,7 +4,7 @@ import lombok.*;
import nu.marginalia.wmsa.edge.converting.processor.logic.HtmlFeature;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
import nu.marginalia.wmsa.edge.search.EdgeSearchRankingSymbols;
import nu.marginalia.wmsa.edge.search.model.EdgeSearchRankingSymbols;
import java.util.Objects;

View File

@ -13,6 +13,8 @@ import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import nu.marginalia.wmsa.edge.index.service.SearchOrder;
import nu.marginalia.wmsa.edge.model.*;
import nu.marginalia.wmsa.edge.model.search.*;
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResultSet;
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResults;
import nu.marginalia.wmsa.edge.search.query.model.EdgeSearchQuery;
import nu.marginalia.wmsa.edge.search.query.QueryFactory;
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
@ -26,7 +28,10 @@ import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.*;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Singleton
@ -77,17 +82,16 @@ public class EdgeSearchOperator {
return queryResults.resultSet;
}
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, String evalResult) {
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future<String> eval) {
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.getHumanQuery());
var processedQuery = queryFactory.createQuery(params);
logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
DecoratedSearchResultSet queryResults = performQuery(ctx, processedQuery, false);
String evalResult = getEvalResult(eval);
return new DecoratedSearchResults(params,
getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery),
evalResult,
@ -97,6 +101,19 @@ public class EdgeSearchOperator {
getDomainId(processedQuery.domain));
}
private String getEvalResult(@Nullable Future<String> eval) {
if (eval == null || eval.isCancelled()) {
return "";
}
try {
return eval.get(50, TimeUnit.MILLISECONDS);
}
catch (Exception ex) {
logger.warn("Error fetching eval result", ex);
return "";
}
}
private int getDomainId(String domain) {
int domainId = -1;
try {

View File

@ -13,20 +13,11 @@ import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.configuration.server.Initialization;
import nu.marginalia.wmsa.configuration.server.MetricsServer;
import nu.marginalia.wmsa.configuration.server.Service;
import nu.marginalia.wmsa.edge.assistant.client.AssistantClient;
import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse;
import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeId;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.search.command.CommandEvaluator;
import nu.marginalia.wmsa.edge.search.command.ResponseType;
import nu.marginalia.wmsa.edge.search.command.SearchParameters;
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
import nu.marginalia.wmsa.edge.search.siteinfo.DomainInformationService;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -35,84 +26,39 @@ import spark.Spark;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
public class EdgeSearchService extends Service {
private final EdgeDataStoreDao edgeDataStoreDao;
private final EdgeIndexClient indexClient;
private final AssistantClient assistantClient;
private final UnitConversion unitConversion;
private final EdgeSearchOperator searchOperator;
private final EdgeDomainBlacklist blacklist;
private final ScreenshotService screenshotService;
private DomainInformationService domainInformationService;
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
private final MustacheRenderer<DecoratedSearchResults> searchResultsRenderer;
private final MustacheRenderer<DecoratedSearchResults> searchResultsRendererGmi;
private final MustacheRenderer<DictionaryResponse> dictionaryRenderer;
private final MustacheRenderer<DictionaryResponse> dictionaryRendererGmi;
private final MustacheRenderer<Map<String, String>> conversionRenderer;
private final MustacheRenderer<Map<String, String>> conversionRendererGmi;
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
private final MustacheRenderer<DomainInformation> siteInfoRendererGmi;
private final Gson gson = new GsonBuilder().create();
private final CommandEvaluator searchCommandEvaulator;
private static final Logger logger = LoggerFactory.getLogger(EdgeSearchService.class);
private final int indexSize = 0;
private final String maintenanceMessage = null;
@SneakyThrows
@Inject
public EdgeSearchService(@Named("service-host") String ip,
@Named("service-port") Integer port,
EdgeDataStoreDao edgeDataStoreDao,
EdgeIndexClient indexClient,
RendererFactory rendererFactory,
Initialization initialization,
MetricsServer metricsServer,
AssistantClient assistantClient,
UnitConversion unitConversion,
EdgeSearchOperator searchOperator,
EdgeDomainBlacklist blacklist,
ScreenshotService screenshotService,
DomainInformationService domainInformationService
CommandEvaluator searchCommandEvaulator
) {
super(ip, port, initialization, metricsServer);
this.edgeDataStoreDao = edgeDataStoreDao;
this.indexClient = indexClient;
browseResultsRenderer = rendererFactory.renderer("edge/browse-results");
searchResultsRenderer = rendererFactory.renderer("edge/search-results");
searchResultsRendererGmi = rendererFactory.renderer("edge/search-results-gmi");
dictionaryRenderer = rendererFactory.renderer("edge/dictionary-results");
dictionaryRendererGmi = rendererFactory.renderer("edge/dictionary-results-gmi");
siteInfoRenderer = rendererFactory.renderer("edge/site-info");
siteInfoRendererGmi = rendererFactory.renderer("edge/site-info-gmi");
conversionRenderer = rendererFactory.renderer("edge/conversion-results");
conversionRendererGmi = rendererFactory.renderer("edge/conversion-results-gmi");
this.assistantClient = assistantClient;
this.unitConversion = unitConversion;
this.searchOperator = searchOperator;
this.blacklist = blacklist;
this.screenshotService = screenshotService;
this.domainInformationService = domainInformationService;
this.searchCommandEvaulator = searchCommandEvaulator;
Spark.staticFiles.expireTime(600);
Spark.get("/search", this::pathSearch);
Gson gson = new GsonBuilder().create();
Spark.get("/api/search", this::apiSearch, gson::toJson);
Spark.get("/public/search", this::pathSearch);
Spark.get("/site-search/:site/*", this::siteSearchRedir);
@ -200,144 +146,32 @@ public class EdgeSearchService extends Service {
}
final String profileStr = Optional.ofNullable(request.queryParams("profile")).orElse("yolo");
final String humanQuery = queryParam.trim();
final String format = request.queryParams("format");
ResponseType responseType;
try {
final String humanQuery = queryParam.trim();
final String format = request.queryParams("format");
var eval = unitConversion.tryEval(ctx, humanQuery);
var conversion = unitConversion.tryConversion(ctx, humanQuery);
if (conversion.isPresent()) {
if ("gmi".equals(format)) {
response.type("text/gemini");
return conversionRendererGmi.render(Map.of("query", humanQuery, "result", conversion.get()));
} else {
return conversionRenderer.render(Map.of("query", humanQuery, "result", conversion.get(), "profile", profileStr));
}
}
if (humanQuery.matches("define:[A-Za-z\\s-0-9]+")) {
var results = lookupDefinition(ctx, humanQuery);
if ("gmi".equals(format)) {
response.type("text/gemini");
return dictionaryRendererGmi.render(results, Map.of("query", humanQuery));
} else {
return dictionaryRenderer.render(results, Map.of("query", humanQuery, "profile", profileStr));
}
} else if (humanQuery.matches("site:[.A-Za-z\\-0-9]+")) {
var results = siteInfo(ctx, humanQuery);
var domain = results.getDomain();
logger.info("Domain: {}", domain);
DecoratedSearchResultSet resultSet;
Path screenshotPath = null;
if (null != domain) {
resultSet = searchOperator.performDumbQuery(ctx, EdgeSearchProfile.CORPO, IndexBlock.Words, 100, 100, "site:"+domain);
screenshotPath = Path.of("/screenshot/" + edgeDataStoreDao.getDomainId(domain).getId());
}
else {
resultSet = new DecoratedSearchResultSet(Collections.emptyList());
}
if ("gmi".equals(format)) {
response.type("text/gemini");
return siteInfoRendererGmi.render(results, Map.of("query", humanQuery));
} else {
return siteInfoRenderer.render(results, Map.of("query", humanQuery, "focusDomain", Objects.requireNonNullElse(domain, ""), "profile", profileStr, "results", resultSet.resultSet, "screenshot", screenshotPath == null ? "" : screenshotPath.toString()));
}
} else if (humanQuery.matches("browse:[.A-Za-z\\-0-9]+")) {
var results = browseSite(ctx, humanQuery);
if (null != results) {
return browseResultsRenderer.render(results, Map.of("query", humanQuery, "profile", profileStr));
}
}
final var jsSetting = Optional.ofNullable(request.queryParams("js")).orElse("default");
var results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(humanQuery,
EdgeSearchProfile.getSearchProfile(profileStr), jsSetting), eval.orElse(null)
);
results.getResults().removeIf(detail -> blacklist.isBlacklisted(edgeDataStoreDao.getDomainId(detail.url.domain)));
if ("gmi".equals(format)) {
response.type("text/gemini");
return searchResultsRendererGmi.render(results);
} else {
if (maintenanceMessage != null) {
return searchResultsRenderer.render(results, Map.of("maintenanceMessage", maintenanceMessage));
}
else {
return searchResultsRenderer.render(results);
}
}
if ("gmi".equals(format)) {
response.type("text/gemini");
responseType = ResponseType.GEMINI;
}
catch (TimeoutException te) {
serveError(ctx, response);
return null;
else {
responseType = ResponseType.HTML;
}
var params = new SearchParameters(
EdgeSearchProfile.getSearchProfile(profileStr),
Optional.ofNullable(request.queryParams("js")).orElse("default"),
responseType);
try {
return searchCommandEvaulator.eval(ctx, params, humanQuery);
}
catch (Exception ex) {
logger.error("Error", ex);
serveError(ctx, response);
return null;
}
return "";
}
private DomainInformation siteInfo(Context ctx, String humanQuery) {
String definePrefix = "site:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
logger.info("Fetching Site Info: {}", word);
var results = domainInformationService.domainInfo(word)
.orElseGet(() -> new DomainInformation(null, false, 0, 0, 0, 0, 0, 0, 0, EdgeDomainIndexingState.UNKNOWN, Collections.emptyList()));
logger.debug("Results = {}", results);
return results;
}
private BrowseResultSet browseSite(Context ctx, String humanQuery) {
String definePrefix = "browse:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
try {
if ("random".equals(word)) {
var results = edgeDataStoreDao.getRandomDomains(25, blacklist);
results.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
return new BrowseResultSet(results);
}
else {
var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word));
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45);
neighbors.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
return new BrowseResultSet(neighbors);
}
}
catch (Exception ex) {
logger.info("No Results");
return null;
}
}
@SneakyThrows
private DictionaryResponse lookupDefinition(Context ctx, String humanQuery) {
String definePrefix = "define:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
logger.info("Defining: {}", word);
var results = assistantClient
.dictionaryLookup(ctx, word)
.blockingFirst();
logger.debug("Results = {}", results);
return results;
}
}

View File

@ -6,9 +6,13 @@ import nu.marginalia.wmsa.edge.assistant.client.AssistantClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.CheckForNull;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.function.Predicate;
import java.util.regex.Pattern;
@ -54,24 +58,24 @@ public class UnitConversion {
}
}
public Optional<String> tryEval(Context context, String query) {
public @CheckForNull Future<String> tryEval(Context context, String query) {
if (!evalPredicate.test(query)) {
return Optional.empty();
return null;
}
var expr = query.toLowerCase().trim();
if (expr.chars().allMatch(Character::isDigit)) {
return Optional.empty();
return null;
}
logger.info("eval({})", expr);
try {
return Optional.of(assistantClient.evalMath(context, expr).blockingFirst());
return assistantClient.evalMath(context, expr).toFuture();
}
catch (RemoteException ex) {
return Optional.empty();
return null;
}
}
}

View File

@ -0,0 +1,80 @@
package nu.marginalia.wmsa.edge.search.command;
import com.google.inject.Inject;
import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeId;
import nu.marginalia.wmsa.edge.search.model.BrowseResultSet;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class BrowseCommand implements SearchCommandInterface {
private final EdgeDataStoreDao edgeDataStoreDao;
private final ScreenshotService screenshotService;
private final EdgeDomainBlacklist blacklist;
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Predicate<String> queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9]+$").asPredicate();
@Inject
public BrowseCommand(EdgeDataStoreDao edgeDataStoreDao,
ScreenshotService screenshotService,
EdgeDomainBlacklist blacklist,
RendererFactory rendererFactory)
throws IOException
{
this.edgeDataStoreDao = edgeDataStoreDao;
this.screenshotService = screenshotService;
this.blacklist = blacklist;
browseResultsRenderer = rendererFactory.renderer("edge/browse-results");
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
if (!queryPatternPredicate.test(query)) {
return Optional.empty();
}
return Optional.ofNullable(browseSite(ctx, query))
.map(results -> browseResultsRenderer.render(results, Map.of("query", query, "profile", parameters.profileStr())));
}
private BrowseResultSet browseSite(Context ctx, String humanQuery) {
String definePrefix = "browse:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
try {
if ("random".equals(word)) {
var results = edgeDataStoreDao.getRandomDomains(25, blacklist);
results.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
return new BrowseResultSet(results);
}
else {
var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word));
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45);
neighbors.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
return new BrowseResultSet(neighbors);
}
}
catch (Exception ex) {
logger.info("No Results");
return null;
}
}
}

View File

@ -0,0 +1,40 @@
package nu.marginalia.wmsa.edge.search.command;
import com.google.inject.Inject;
import nu.marginalia.wmsa.configuration.server.Context;
import java.util.ArrayList;
import java.util.List;
public class CommandEvaluator {
List<SearchCommandInterface> commands = new ArrayList<>();
@Inject
public CommandEvaluator(
BrowseCommand browse,
ConvertCommand convert,
DefinitionCommand define,
SiteSearchCommand site,
SearchCommand search
) {
commands.add(browse);
commands.add(convert);
commands.add(define);
commands.add(site);
commands.add(search);
}
public Object eval(Context ctx, SearchParameters parameters, String query) {
for (var cmd : commands) {
var ret = cmd.process(ctx, parameters, query);
if (ret.isPresent()) {
return ret.get();
}
}
// Search command *should* always evaluate
throw new IllegalStateException("Search Command returned Optional.empty()");
}
}

View File

@ -0,0 +1,40 @@
package nu.marginalia.wmsa.edge.search.command;
import com.google.inject.Inject;
import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.edge.search.UnitConversion;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
public class ConvertCommand implements SearchCommandInterface {
private final UnitConversion unitConversion;
private final MustacheRenderer<Map<String, String>> conversionRenderer;
private final MustacheRenderer<Map<String, String>> conversionRendererGmi;
@Inject
public ConvertCommand(UnitConversion unitConversion, RendererFactory rendererFactory) throws IOException {
this.unitConversion = unitConversion;
conversionRenderer = rendererFactory.renderer("edge/conversion-results");
conversionRendererGmi = rendererFactory.renderer("edge/conversion-results-gmi");
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
var conversion = unitConversion.tryConversion(ctx, query);
if (conversion.isEmpty()) {
return Optional.empty();
}
if (parameters.responseType() == ResponseType.GEMINI) {
return Optional.of(conversionRendererGmi.render(Map.of("query", query, "result", conversion.get())));
} else {
return Optional.of(conversionRenderer.render(Map.of("query", query, "result", conversion.get(), "profile", parameters.profileStr())));
}
}
}

View File

@ -0,0 +1,69 @@
package nu.marginalia.wmsa.edge.search.command;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.edge.assistant.client.AssistantClient;
import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class DefinitionCommand implements SearchCommandInterface {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final MustacheRenderer<DictionaryResponse> dictionaryRenderer;
private final MustacheRenderer<DictionaryResponse> dictionaryRendererGmi;
private final AssistantClient assistantClient;
private final Predicate<String> queryPatternPredicate = Pattern.compile("^define:[A-Za-z\\s-0-9]+$").asPredicate();
@Inject
public DefinitionCommand(RendererFactory rendererFactory, AssistantClient assistantClient)
throws IOException
{
dictionaryRenderer = rendererFactory.renderer("edge/dictionary-results");
dictionaryRendererGmi = rendererFactory.renderer("edge/dictionary-results-gmi");
this.assistantClient = assistantClient;
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
if (!queryPatternPredicate.test(query.trim())) {
return Optional.empty();
}
var results = lookupDefinition(ctx, query);
if (parameters.responseType() == ResponseType.GEMINI) {
return Optional.of(dictionaryRendererGmi.render(results, Map.of("query", parameters.profileStr())));
} else {
return Optional.of(dictionaryRenderer.render(results, Map.of("query", query, "profile", parameters.profileStr())));
}
}
@SneakyThrows
private DictionaryResponse lookupDefinition(Context ctx, String humanQuery) {
String definePrefix = "define:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
logger.info("Defining: {}", word);
var results = assistantClient
.dictionaryLookup(ctx, word)
.blockingFirst();
logger.debug("Results = {}", results);
return results;
}
}

View File

@ -0,0 +1,5 @@
package nu.marginalia.wmsa.edge.search.command;
public enum ResponseType {
HTML, GEMINI
}

View File

@ -0,0 +1,58 @@
package nu.marginalia.wmsa.edge.search.command;
import com.google.inject.Inject;
import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.search.EdgeSearchOperator;
import nu.marginalia.wmsa.edge.search.UnitConversion;
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResults;
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import javax.annotation.CheckForNull;
import java.io.IOException;
import java.util.Optional;
import java.util.concurrent.Future;
public class SearchCommand implements SearchCommandInterface {
private EdgeDomainBlacklist blacklist;
private EdgeDataStoreDao dataStoreDao;
private EdgeSearchOperator searchOperator;
private UnitConversion unitConversion;
private final MustacheRenderer<DecoratedSearchResults> searchResultsRenderer;
private final MustacheRenderer<DecoratedSearchResults> searchResultsRendererGmi;
@Inject
public SearchCommand(EdgeDomainBlacklist blacklist,
EdgeDataStoreDao dataStoreDao,
EdgeSearchOperator searchOperator,
UnitConversion unitConversion,
RendererFactory rendererFactory) throws IOException {
this.blacklist = blacklist;
this.dataStoreDao = dataStoreDao;
this.searchOperator = searchOperator;
this.unitConversion = unitConversion;
searchResultsRenderer = rendererFactory.renderer("edge/search-results");
searchResultsRendererGmi = rendererFactory.renderer("edge/search-results-gmi");
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
@CheckForNull Future<String> eval = unitConversion.tryEval(ctx, query);
var results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(query,
parameters.profile(), parameters.js()), eval
);
results.getResults().removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain)));
if (parameters.responseType() == ResponseType.GEMINI) {
return Optional.of(searchResultsRendererGmi.render(results));
} else {
return Optional.of(searchResultsRenderer.render(results));
}
}
}

View File

@ -0,0 +1,9 @@
package nu.marginalia.wmsa.edge.search.command;
import nu.marginalia.wmsa.configuration.server.Context;
import java.util.Optional;
public interface SearchCommandInterface {
Optional<Object> process(Context ctx, SearchParameters parameters, String query);
}

View File

@ -0,0 +1,9 @@
package nu.marginalia.wmsa.edge.search.command;
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
public record SearchParameters(EdgeSearchProfile profile, String js, ResponseType responseType) {
public String profileStr() {
return profile.name;
}
}

View File

@ -0,0 +1,105 @@
package nu.marginalia.wmsa.edge.search.command;
import com.google.inject.Inject;
import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.search.EdgeSearchOperator;
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResultSet;
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResults;
import nu.marginalia.wmsa.edge.search.model.DomainInformation;
import nu.marginalia.wmsa.edge.search.siteinfo.DomainInformationService;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class SiteSearchCommand implements SearchCommandInterface {
private EdgeDomainBlacklist blacklist;
private final EdgeDataStoreDao dataStoreDao;
private final EdgeSearchOperator searchOperator;
private DomainInformationService domainInformationService;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
private final MustacheRenderer<DomainInformation> siteInfoRendererGmi;
private final Predicate<String> queryPatternPredicate = Pattern.compile("^site:[.A-Za-z\\-0-9]+$").asPredicate();
@Inject
public SiteSearchCommand(
EdgeDomainBlacklist blacklist,
EdgeDataStoreDao dataStoreDao,
RendererFactory rendererFactory,
EdgeSearchOperator searchOperator,
DomainInformationService domainInformationService)
throws IOException
{
this.blacklist = blacklist;
this.dataStoreDao = dataStoreDao;
siteInfoRenderer = rendererFactory.renderer("edge/site-info");
siteInfoRendererGmi = rendererFactory.renderer("edge/site-info-gmi");
this.searchOperator = searchOperator;
this.domainInformationService = domainInformationService;
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
if (!queryPatternPredicate.test(query)) {
return Optional.empty();
}
var results = siteInfo(ctx, query);
var domain = results.getDomain();
logger.info("Domain: {}", domain);
DecoratedSearchResultSet resultSet;
Path screenshotPath = null;
if (null != domain) {
resultSet = searchOperator.performDumbQuery(ctx, EdgeSearchProfile.CORPO, IndexBlock.Words, 100, 100, "site:"+domain);
screenshotPath = Path.of("/screenshot/" + dataStoreDao.getDomainId(domain).getId());
}
else {
resultSet = new DecoratedSearchResultSet(Collections.emptyList());
}
if (parameters.responseType() == ResponseType.GEMINI) {
return Optional.of(siteInfoRendererGmi.render(results, Map.of("query", query)));
} else {
return Optional.of(siteInfoRenderer.render(results, Map.of("query", query, "focusDomain", Objects.requireNonNullElse(domain, ""), "profile", parameters.profileStr(), "results", resultSet.resultSet, "screenshot", screenshotPath == null ? "" : screenshotPath.toString())));
}
}
private DomainInformation siteInfo(Context ctx, String humanQuery) {
String definePrefix = "site:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
logger.info("Fetching Site Info: {}", word);
var results = domainInformationService.domainInfo(word)
.orElseGet(() -> new DomainInformation(null, false, 0, 0, 0, 0, 0, 0, 0, EdgeDomainIndexingState.UNKNOWN, Collections.emptyList()));
logger.debug("Results = {}", results);
return results;
}
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.wmsa.edge.search;
package nu.marginalia.wmsa.edge.search.model;
import lombok.Data;
import lombok.EqualsAndHashCode;

View File

@ -1,4 +1,4 @@
package nu.marginalia.wmsa.edge.search;
package nu.marginalia.wmsa.edge.search.model;
import lombok.AllArgsConstructor;
import lombok.Getter;

View File

@ -1,4 +1,4 @@
package nu.marginalia.wmsa.edge.search;
package nu.marginalia.wmsa.edge.search.model;
import lombok.Getter;
import lombok.ToString;

View File

@ -1,4 +1,4 @@
package nu.marginalia.wmsa.edge.search;
package nu.marginalia.wmsa.edge.search.model;
import lombok.AllArgsConstructor;
import lombok.Getter;

View File

@ -1,4 +1,4 @@
package nu.marginalia.wmsa.edge.search;
package nu.marginalia.wmsa.edge.search.model;
import lombok.AllArgsConstructor;
import lombok.Getter;

View File

@ -1,4 +1,4 @@
package nu.marginalia.wmsa.edge.search;
package nu.marginalia.wmsa.edge.search.model;
import java.util.TreeMap;

View File

@ -4,11 +4,10 @@ import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeId;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.search.DomainInformation;
import nu.marginalia.wmsa.edge.search.model.DomainInformation;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Optional;

View File

@ -21,6 +21,8 @@ import org.junit.jupiter.api.parallel.ResourceAccessMode;
import org.junit.jupiter.api.parallel.ResourceLock;
import spark.Spark;
import java.util.concurrent.ExecutionException;
import static nu.marginalia.util.TestUtil.getConnection;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -123,7 +125,7 @@ class AssistantTest {
}
@Test
public void testEvalWithParser() {
public void testEvalWithParser() throws ExecutionException, InterruptedException {
var conversion = new UnitConversion(client);
assertEquals("305", conversion.tryEval(Context.internal(), "300+5").get());
assertEquals("1.772", conversion.tryEval(Context.internal(), "sqrt(pi)").get());