Refactored EdgeSearchService and broke functions like define:, browse:, site: etc. into separate classes.
This commit is contained in:
parent
41b686955f
commit
25776a9718
@ -15,9 +15,9 @@ import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
|
||||
public abstract class E2ETestBase {
|
||||
public Network network = Network.newNetwork();
|
||||
public static Network network = Network.newNetwork();
|
||||
|
||||
public MariaDBContainer<?> getMariaDBContainer() {
|
||||
public static MariaDBContainer<?> getMariaDBContainer() {
|
||||
return new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
@ -27,7 +27,7 @@ public abstract class E2ETestBase {
|
||||
.withNetworkAliases("mariadb");
|
||||
}
|
||||
|
||||
public GenericContainer<?> forService(ServiceDescriptor service, GenericContainer<?> mariaDB) {
|
||||
public static GenericContainer<?> forService(ServiceDescriptor service, GenericContainer<?> mariaDB) {
|
||||
return new GenericContainer<>("openjdk:17-alpine")
|
||||
.dependsOn(mariaDB)
|
||||
.withCopyFileToContainer(jarFile(), "/WMSA.jar")
|
||||
|
@ -7,6 +7,7 @@ import org.jsoup.Jsoup;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.OutputType;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
import org.openzim.ZIMTypes.ZIMFile;
|
||||
import org.openzim.ZIMTypes.ZIMReader;
|
||||
@ -22,6 +23,7 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@ -31,31 +33,33 @@ import static nu.marginalia.wmsa.configuration.ServiceDescriptor.*;
|
||||
@Testcontainers
|
||||
public class EdgeSearchE2ETest extends E2ETestBase {
|
||||
@Container
|
||||
public GenericContainer<?> mariaDB = getMariaDBContainer();
|
||||
public static GenericContainer<?> mariaDB = getMariaDBContainer();
|
||||
|
||||
@Container
|
||||
public GenericContainer<?> searchContainer = forService(EDGE_SEARCH, mariaDB);
|
||||
public static GenericContainer<?> searchContainer = forService(EDGE_SEARCH, mariaDB);
|
||||
@Container
|
||||
public GenericContainer<?> assistantContainer = forService(EDGE_ASSISTANT, mariaDB);
|
||||
public static GenericContainer<?> assistantContainer = forService(EDGE_ASSISTANT, mariaDB);
|
||||
@Container
|
||||
public GenericContainer<?> indexContainer = forService(EDGE_INDEX, mariaDB);
|
||||
public static GenericContainer<?> encyclopediaContainer = forService(ENCYCLOPEDIA, mariaDB);
|
||||
@Container
|
||||
public static GenericContainer<?> indexContainer = forService(EDGE_INDEX, mariaDB);
|
||||
|
||||
@Container
|
||||
public NginxContainer<?> mockWikipedia = new NginxContainer<>("nginx:stable")
|
||||
public static NginxContainer<?> mockWikipedia = new NginxContainer<>("nginx:stable")
|
||||
.dependsOn(searchContainer)
|
||||
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("wikipedia")))
|
||||
.withFileSystemBind(getWikipediaFiles(), "/usr/share/nginx/html/", BindMode.READ_ONLY)
|
||||
.withNetwork(network)
|
||||
.withNetworkAliases("wikipedia");
|
||||
.withNetworkAliases("wikipedia.local");
|
||||
|
||||
|
||||
@Container
|
||||
public BrowserWebDriverContainer<?> chrome = new BrowserWebDriverContainer<>()
|
||||
public static BrowserWebDriverContainer<?> chrome = new BrowserWebDriverContainer<>()
|
||||
.withNetwork(network)
|
||||
.withCapabilities(new ChromeOptions());
|
||||
|
||||
@Container
|
||||
public GenericContainer<?> crawlerContainer = new GenericContainer<>("openjdk:17-alpine")
|
||||
public static GenericContainer<?> crawlerContainer = new GenericContainer<>("openjdk:17-alpine")
|
||||
.dependsOn(mockWikipedia)
|
||||
.dependsOn(indexContainer)
|
||||
.withNetwork(network)
|
||||
@ -69,14 +73,13 @@ public class EdgeSearchE2ETest extends E2ETestBase {
|
||||
.waitingFor(Wait.forLogMessage(".*ALL DONE.*", 1).withStartupTimeout(Duration.ofMinutes(10)));
|
||||
|
||||
@Container
|
||||
public NginxContainer<?> proxyNginx = new NginxContainer<>("nginx:stable")
|
||||
public static NginxContainer<?> proxyNginx = new NginxContainer<>("nginx:stable")
|
||||
.dependsOn(searchContainer)
|
||||
.dependsOn(crawlerContainer)
|
||||
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("nginx")))
|
||||
.withCopyFileToContainer(MountableFile.forClasspathResource("nginx/search.conf"), "/etc/nginx/conf.d/default.conf")
|
||||
.withNetwork(network)
|
||||
.withNetworkAliases("proxyNginx");
|
||||
;
|
||||
|
||||
public static MountableFile ipDatabasePath() {
|
||||
Path modelsPath = Path.of(System.getProperty("user.dir")).resolve("data/models/IP2LOC/IP2LOCATION-LITE-DB1.CSV");
|
||||
@ -87,11 +90,22 @@ public class EdgeSearchE2ETest extends E2ETestBase {
|
||||
return MountableFile.forHostPath(modelsPath.toString());
|
||||
}
|
||||
|
||||
private Path getCrawlPath() {
|
||||
private static Path getCrawlPath() {
|
||||
return Path.of(System.getProperty("user.dir")).resolve("build/tmp/crawl");
|
||||
}
|
||||
|
||||
private String getWikipediaFiles() {
|
||||
private static Path screenshotFilename(String operation) throws IOException {
|
||||
var path = Path.of(System.getProperty("user.dir")).resolve("build/test/e2e/");
|
||||
Files.createDirectories(path);
|
||||
|
||||
String name = String.format("test-%s-%s.png", operation, LocalDateTime.now());
|
||||
path = path.resolve(name);
|
||||
|
||||
System.out.println("Screenshot in " + path);
|
||||
return path;
|
||||
}
|
||||
|
||||
private static String getWikipediaFiles() {
|
||||
Path wikipediaFiles = Path.of(System.getProperty("user.dir")).resolve("build/tmp/wikipedia");
|
||||
Path crawlFiles = getCrawlPath();
|
||||
Path zimFile = Path.of(System.getProperty("user.dir")).resolve("data/test/wikipedia_en_100_nopic.zim");
|
||||
@ -120,7 +134,7 @@ public class EdgeSearchE2ETest extends E2ETestBase {
|
||||
|
||||
var zr = new ZIMReader(new ZIMFile(zimFile.toString()));
|
||||
zr.forEachArticles((url, art) -> {
|
||||
urls.add("http://wikipedia/" + url + ".html");
|
||||
urls.add("http://wikipedia.local/" + url + ".html");
|
||||
|
||||
if (art != null) {
|
||||
try {
|
||||
@ -134,7 +148,7 @@ public class EdgeSearchE2ETest extends E2ETestBase {
|
||||
}, pred -> true);
|
||||
urls.forEach(System.out::println);
|
||||
Files.writeString(wikipediaFiles.resolve("index.html"), "<html/>");
|
||||
CrawlJobExtractorMain.writeSpec(crawlFiles.resolve("crawl.spec"), "wikipedia", urls);
|
||||
CrawlJobExtractorMain.writeSpec(crawlFiles.resolve("crawl.spec"), "wikipedia.local", urls);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
ex.printStackTrace();
|
||||
@ -143,19 +157,80 @@ public class EdgeSearchE2ETest extends E2ETestBase {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void run() {
|
||||
public void testFrontPage() throws IOException {
|
||||
var driver = chrome.getWebDriver();
|
||||
|
||||
driver.get("http://proxyNginx/");
|
||||
System.out.println(driver.getTitle());
|
||||
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
|
||||
|
||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("frontpage"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuery() throws IOException {
|
||||
var driver = chrome.getWebDriver();
|
||||
|
||||
driver.get("http://proxyNginx/search?query=bird&profile=corpo");
|
||||
System.out.println(driver.getTitle());
|
||||
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
|
||||
|
||||
driver.get("http://proxyNginx/search?query=site:wikipedia");
|
||||
|
||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("query"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSiteInfo() throws IOException {
|
||||
var driver = chrome.getWebDriver();
|
||||
|
||||
driver.get("http://proxyNginx/search?query=site:wikipedia.local");
|
||||
System.out.println(driver.getTitle());
|
||||
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
|
||||
|
||||
|
||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-info"));
|
||||
}
|
||||
@Test
|
||||
public void testSiteSearch() throws IOException {
|
||||
var driver = chrome.getWebDriver();
|
||||
|
||||
driver.get("http://proxyNginx/search?query=site:wikipedia.local%20frog");
|
||||
System.out.println(driver.getTitle());
|
||||
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
|
||||
|
||||
|
||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-search"));
|
||||
}
|
||||
@Test
|
||||
public void testBrowse() throws IOException {
|
||||
var driver = chrome.getWebDriver();
|
||||
|
||||
driver.get("http://proxyNginx/search?query=browse:wikipedia.local");
|
||||
System.out.println(driver.getTitle());
|
||||
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
|
||||
|
||||
|
||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("browse"));
|
||||
}
|
||||
@Test
|
||||
public void testDefine() throws IOException {
|
||||
var driver = chrome.getWebDriver();
|
||||
|
||||
driver.get("http://proxyNginx/search?query=define:adiabatic");
|
||||
System.out.println(driver.getTitle());
|
||||
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
|
||||
|
||||
|
||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("define"));
|
||||
}
|
||||
@Test
|
||||
public void testEval() throws IOException {
|
||||
var driver = chrome.getWebDriver();
|
||||
|
||||
driver.get("http://proxyNginx/search?query=3%2B3");
|
||||
System.out.println(driver.getTitle());
|
||||
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
|
||||
|
||||
Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("eval"));
|
||||
}
|
||||
}
|
||||
|
@ -4,10 +4,8 @@ import com.google.inject.ImplementedBy;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.model.*;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit;
|
||||
import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails;
|
||||
import nu.marginalia.wmsa.edge.search.BrowseResult;
|
||||
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
@ -13,7 +13,7 @@ import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
|
||||
import nu.marginalia.wmsa.edge.model.search.EdgePageScoreAdjustment;
|
||||
import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails;
|
||||
import nu.marginalia.wmsa.edge.search.BrowseResult;
|
||||
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -10,7 +10,7 @@ import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
|
||||
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeId;
|
||||
import nu.marginalia.wmsa.edge.search.BrowseResult;
|
||||
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
@ -4,7 +4,7 @@ import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeId;
|
||||
import nu.marginalia.wmsa.edge.search.BrowseResult;
|
||||
import nu.marginalia.wmsa.edge.search.model.BrowseResult;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
||||
|
@ -4,7 +4,7 @@ import lombok.*;
|
||||
import nu.marginalia.wmsa.edge.converting.processor.logic.HtmlFeature;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||
import nu.marginalia.wmsa.edge.search.EdgeSearchRankingSymbols;
|
||||
import nu.marginalia.wmsa.edge.search.model.EdgeSearchRankingSymbols;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
|
@ -13,6 +13,8 @@ import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||
import nu.marginalia.wmsa.edge.index.service.SearchOrder;
|
||||
import nu.marginalia.wmsa.edge.model.*;
|
||||
import nu.marginalia.wmsa.edge.model.search.*;
|
||||
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResultSet;
|
||||
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResults;
|
||||
import nu.marginalia.wmsa.edge.search.query.model.EdgeSearchQuery;
|
||||
import nu.marginalia.wmsa.edge.search.query.QueryFactory;
|
||||
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
|
||||
@ -26,7 +28,10 @@ import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Singleton
|
||||
@ -77,17 +82,16 @@ public class EdgeSearchOperator {
|
||||
return queryResults.resultSet;
|
||||
}
|
||||
|
||||
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, String evalResult) {
|
||||
|
||||
|
||||
public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future<String> eval) {
|
||||
Observable<WikiArticles> definitions = getWikiArticle(ctx, params.getHumanQuery());
|
||||
|
||||
var processedQuery = queryFactory.createQuery(params);
|
||||
|
||||
logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
|
||||
|
||||
DecoratedSearchResultSet queryResults = performQuery(ctx, processedQuery, false);
|
||||
|
||||
String evalResult = getEvalResult(eval);
|
||||
|
||||
return new DecoratedSearchResults(params,
|
||||
getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery),
|
||||
evalResult,
|
||||
@ -97,6 +101,19 @@ public class EdgeSearchOperator {
|
||||
getDomainId(processedQuery.domain));
|
||||
}
|
||||
|
||||
private String getEvalResult(@Nullable Future<String> eval) {
|
||||
if (eval == null || eval.isCancelled()) {
|
||||
return "";
|
||||
}
|
||||
try {
|
||||
return eval.get(50, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Error fetching eval result", ex);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
private int getDomainId(String domain) {
|
||||
int domainId = -1;
|
||||
try {
|
||||
|
@ -13,20 +13,11 @@ import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.configuration.server.Initialization;
|
||||
import nu.marginalia.wmsa.configuration.server.MetricsServer;
|
||||
import nu.marginalia.wmsa.configuration.server.Service;
|
||||
import nu.marginalia.wmsa.edge.assistant.client.AssistantClient;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse;
|
||||
import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
|
||||
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
|
||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeId;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
|
||||
import nu.marginalia.wmsa.edge.search.command.CommandEvaluator;
|
||||
import nu.marginalia.wmsa.edge.search.command.ResponseType;
|
||||
import nu.marginalia.wmsa.edge.search.command.SearchParameters;
|
||||
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
|
||||
import nu.marginalia.wmsa.edge.search.siteinfo.DomainInformationService;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
@ -35,84 +26,39 @@ import spark.Spark;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class EdgeSearchService extends Service {
|
||||
|
||||
private final EdgeDataStoreDao edgeDataStoreDao;
|
||||
private final EdgeIndexClient indexClient;
|
||||
private final AssistantClient assistantClient;
|
||||
private final UnitConversion unitConversion;
|
||||
private final EdgeSearchOperator searchOperator;
|
||||
private final EdgeDomainBlacklist blacklist;
|
||||
private final ScreenshotService screenshotService;
|
||||
private DomainInformationService domainInformationService;
|
||||
|
||||
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
|
||||
private final MustacheRenderer<DecoratedSearchResults> searchResultsRenderer;
|
||||
private final MustacheRenderer<DecoratedSearchResults> searchResultsRendererGmi;
|
||||
private final MustacheRenderer<DictionaryResponse> dictionaryRenderer;
|
||||
private final MustacheRenderer<DictionaryResponse> dictionaryRendererGmi;
|
||||
private final MustacheRenderer<Map<String, String>> conversionRenderer;
|
||||
private final MustacheRenderer<Map<String, String>> conversionRendererGmi;
|
||||
|
||||
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
|
||||
private final MustacheRenderer<DomainInformation> siteInfoRendererGmi;
|
||||
|
||||
private final Gson gson = new GsonBuilder().create();
|
||||
private final CommandEvaluator searchCommandEvaulator;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(EdgeSearchService.class);
|
||||
private final int indexSize = 0;
|
||||
|
||||
private final String maintenanceMessage = null;
|
||||
|
||||
@SneakyThrows
|
||||
@Inject
|
||||
public EdgeSearchService(@Named("service-host") String ip,
|
||||
@Named("service-port") Integer port,
|
||||
EdgeDataStoreDao edgeDataStoreDao,
|
||||
EdgeIndexClient indexClient,
|
||||
RendererFactory rendererFactory,
|
||||
Initialization initialization,
|
||||
MetricsServer metricsServer,
|
||||
AssistantClient assistantClient,
|
||||
UnitConversion unitConversion,
|
||||
EdgeSearchOperator searchOperator,
|
||||
EdgeDomainBlacklist blacklist,
|
||||
ScreenshotService screenshotService,
|
||||
DomainInformationService domainInformationService
|
||||
CommandEvaluator searchCommandEvaulator
|
||||
) {
|
||||
super(ip, port, initialization, metricsServer);
|
||||
this.edgeDataStoreDao = edgeDataStoreDao;
|
||||
this.indexClient = indexClient;
|
||||
|
||||
browseResultsRenderer = rendererFactory.renderer("edge/browse-results");
|
||||
|
||||
searchResultsRenderer = rendererFactory.renderer("edge/search-results");
|
||||
searchResultsRendererGmi = rendererFactory.renderer("edge/search-results-gmi");
|
||||
|
||||
dictionaryRenderer = rendererFactory.renderer("edge/dictionary-results");
|
||||
dictionaryRendererGmi = rendererFactory.renderer("edge/dictionary-results-gmi");
|
||||
|
||||
siteInfoRenderer = rendererFactory.renderer("edge/site-info");
|
||||
siteInfoRendererGmi = rendererFactory.renderer("edge/site-info-gmi");
|
||||
|
||||
conversionRenderer = rendererFactory.renderer("edge/conversion-results");
|
||||
conversionRendererGmi = rendererFactory.renderer("edge/conversion-results-gmi");
|
||||
|
||||
this.assistantClient = assistantClient;
|
||||
this.unitConversion = unitConversion;
|
||||
this.searchOperator = searchOperator;
|
||||
this.blacklist = blacklist;
|
||||
this.screenshotService = screenshotService;
|
||||
this.domainInformationService = domainInformationService;
|
||||
this.searchCommandEvaulator = searchCommandEvaulator;
|
||||
|
||||
Spark.staticFiles.expireTime(600);
|
||||
|
||||
Spark.get("/search", this::pathSearch);
|
||||
|
||||
Gson gson = new GsonBuilder().create();
|
||||
|
||||
Spark.get("/api/search", this::apiSearch, gson::toJson);
|
||||
Spark.get("/public/search", this::pathSearch);
|
||||
Spark.get("/site-search/:site/*", this::siteSearchRedir);
|
||||
@ -200,144 +146,32 @@ public class EdgeSearchService extends Service {
|
||||
}
|
||||
|
||||
final String profileStr = Optional.ofNullable(request.queryParams("profile")).orElse("yolo");
|
||||
final String humanQuery = queryParam.trim();
|
||||
final String format = request.queryParams("format");
|
||||
ResponseType responseType;
|
||||
|
||||
try {
|
||||
final String humanQuery = queryParam.trim();
|
||||
final String format = request.queryParams("format");
|
||||
|
||||
var eval = unitConversion.tryEval(ctx, humanQuery);
|
||||
var conversion = unitConversion.tryConversion(ctx, humanQuery);
|
||||
if (conversion.isPresent()) {
|
||||
if ("gmi".equals(format)) {
|
||||
response.type("text/gemini");
|
||||
return conversionRendererGmi.render(Map.of("query", humanQuery, "result", conversion.get()));
|
||||
} else {
|
||||
return conversionRenderer.render(Map.of("query", humanQuery, "result", conversion.get(), "profile", profileStr));
|
||||
}
|
||||
}
|
||||
if (humanQuery.matches("define:[A-Za-z\\s-0-9]+")) {
|
||||
var results = lookupDefinition(ctx, humanQuery);
|
||||
|
||||
if ("gmi".equals(format)) {
|
||||
response.type("text/gemini");
|
||||
return dictionaryRendererGmi.render(results, Map.of("query", humanQuery));
|
||||
} else {
|
||||
return dictionaryRenderer.render(results, Map.of("query", humanQuery, "profile", profileStr));
|
||||
}
|
||||
} else if (humanQuery.matches("site:[.A-Za-z\\-0-9]+")) {
|
||||
var results = siteInfo(ctx, humanQuery);
|
||||
|
||||
|
||||
var domain = results.getDomain();
|
||||
logger.info("Domain: {}", domain);
|
||||
|
||||
DecoratedSearchResultSet resultSet;
|
||||
Path screenshotPath = null;
|
||||
if (null != domain) {
|
||||
resultSet = searchOperator.performDumbQuery(ctx, EdgeSearchProfile.CORPO, IndexBlock.Words, 100, 100, "site:"+domain);
|
||||
|
||||
screenshotPath = Path.of("/screenshot/" + edgeDataStoreDao.getDomainId(domain).getId());
|
||||
}
|
||||
else {
|
||||
resultSet = new DecoratedSearchResultSet(Collections.emptyList());
|
||||
}
|
||||
|
||||
if ("gmi".equals(format)) {
|
||||
response.type("text/gemini");
|
||||
return siteInfoRendererGmi.render(results, Map.of("query", humanQuery));
|
||||
} else {
|
||||
return siteInfoRenderer.render(results, Map.of("query", humanQuery, "focusDomain", Objects.requireNonNullElse(domain, ""), "profile", profileStr, "results", resultSet.resultSet, "screenshot", screenshotPath == null ? "" : screenshotPath.toString()));
|
||||
}
|
||||
} else if (humanQuery.matches("browse:[.A-Za-z\\-0-9]+")) {
|
||||
var results = browseSite(ctx, humanQuery);
|
||||
|
||||
if (null != results) {
|
||||
return browseResultsRenderer.render(results, Map.of("query", humanQuery, "profile", profileStr));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final var jsSetting = Optional.ofNullable(request.queryParams("js")).orElse("default");
|
||||
var results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(humanQuery,
|
||||
EdgeSearchProfile.getSearchProfile(profileStr), jsSetting), eval.orElse(null)
|
||||
);
|
||||
|
||||
results.getResults().removeIf(detail -> blacklist.isBlacklisted(edgeDataStoreDao.getDomainId(detail.url.domain)));
|
||||
|
||||
if ("gmi".equals(format)) {
|
||||
response.type("text/gemini");
|
||||
return searchResultsRendererGmi.render(results);
|
||||
} else {
|
||||
if (maintenanceMessage != null) {
|
||||
return searchResultsRenderer.render(results, Map.of("maintenanceMessage", maintenanceMessage));
|
||||
}
|
||||
else {
|
||||
return searchResultsRenderer.render(results);
|
||||
}
|
||||
}
|
||||
if ("gmi".equals(format)) {
|
||||
response.type("text/gemini");
|
||||
responseType = ResponseType.GEMINI;
|
||||
}
|
||||
catch (TimeoutException te) {
|
||||
serveError(ctx, response);
|
||||
return null;
|
||||
else {
|
||||
responseType = ResponseType.HTML;
|
||||
}
|
||||
|
||||
var params = new SearchParameters(
|
||||
EdgeSearchProfile.getSearchProfile(profileStr),
|
||||
Optional.ofNullable(request.queryParams("js")).orElse("default"),
|
||||
responseType);
|
||||
try {
|
||||
return searchCommandEvaulator.eval(ctx, params, humanQuery);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Error", ex);
|
||||
serveError(ctx, response);
|
||||
return null;
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
private DomainInformation siteInfo(Context ctx, String humanQuery) {
|
||||
String definePrefix = "site:";
|
||||
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||
|
||||
logger.info("Fetching Site Info: {}", word);
|
||||
var results = domainInformationService.domainInfo(word)
|
||||
.orElseGet(() -> new DomainInformation(null, false, 0, 0, 0, 0, 0, 0, 0, EdgeDomainIndexingState.UNKNOWN, Collections.emptyList()));
|
||||
|
||||
logger.debug("Results = {}", results);
|
||||
|
||||
return results;
|
||||
|
||||
}
|
||||
|
||||
private BrowseResultSet browseSite(Context ctx, String humanQuery) {
|
||||
String definePrefix = "browse:";
|
||||
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||
|
||||
try {
|
||||
if ("random".equals(word)) {
|
||||
var results = edgeDataStoreDao.getRandomDomains(25, blacklist);
|
||||
results.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
|
||||
return new BrowseResultSet(results);
|
||||
}
|
||||
else {
|
||||
var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word));
|
||||
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45);
|
||||
|
||||
neighbors.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
|
||||
|
||||
return new BrowseResultSet(neighbors);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.info("No Results");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private DictionaryResponse lookupDefinition(Context ctx, String humanQuery) {
|
||||
String definePrefix = "define:";
|
||||
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||
|
||||
logger.info("Defining: {}", word);
|
||||
var results = assistantClient
|
||||
.dictionaryLookup(ctx, word)
|
||||
.blockingFirst();
|
||||
logger.debug("Results = {}", results);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,9 +6,13 @@ import nu.marginalia.wmsa.edge.assistant.client.AssistantClient;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.CheckForNull;
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -54,24 +58,24 @@ public class UnitConversion {
|
||||
}
|
||||
}
|
||||
|
||||
public Optional<String> tryEval(Context context, String query) {
|
||||
public @CheckForNull Future<String> tryEval(Context context, String query) {
|
||||
if (!evalPredicate.test(query)) {
|
||||
return Optional.empty();
|
||||
return null;
|
||||
}
|
||||
|
||||
var expr = query.toLowerCase().trim();
|
||||
|
||||
if (expr.chars().allMatch(Character::isDigit)) {
|
||||
return Optional.empty();
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.info("eval({})", expr);
|
||||
|
||||
try {
|
||||
return Optional.of(assistantClient.evalMath(context, expr).blockingFirst());
|
||||
return assistantClient.evalMath(context, expr).toFuture();
|
||||
}
|
||||
catch (RemoteException ex) {
|
||||
return Optional.empty();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,80 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
|
||||
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeId;
|
||||
import nu.marginalia.wmsa.edge.search.model.BrowseResultSet;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class BrowseCommand implements SearchCommandInterface {
|
||||
private final EdgeDataStoreDao edgeDataStoreDao;
|
||||
private final ScreenshotService screenshotService;
|
||||
private final EdgeDomainBlacklist blacklist;
|
||||
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9]+$").asPredicate();
|
||||
|
||||
@Inject
|
||||
public BrowseCommand(EdgeDataStoreDao edgeDataStoreDao,
|
||||
ScreenshotService screenshotService,
|
||||
EdgeDomainBlacklist blacklist,
|
||||
RendererFactory rendererFactory)
|
||||
throws IOException
|
||||
{
|
||||
this.edgeDataStoreDao = edgeDataStoreDao;
|
||||
this.screenshotService = screenshotService;
|
||||
this.blacklist = blacklist;
|
||||
|
||||
browseResultsRenderer = rendererFactory.renderer("edge/browse-results");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
||||
if (!queryPatternPredicate.test(query)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.ofNullable(browseSite(ctx, query))
|
||||
.map(results -> browseResultsRenderer.render(results, Map.of("query", query, "profile", parameters.profileStr())));
|
||||
}
|
||||
|
||||
|
||||
private BrowseResultSet browseSite(Context ctx, String humanQuery) {
|
||||
String definePrefix = "browse:";
|
||||
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||
|
||||
try {
|
||||
if ("random".equals(word)) {
|
||||
var results = edgeDataStoreDao.getRandomDomains(25, blacklist);
|
||||
results.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
|
||||
return new BrowseResultSet(results);
|
||||
}
|
||||
else {
|
||||
var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word));
|
||||
var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45);
|
||||
|
||||
neighbors.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId)));
|
||||
|
||||
return new BrowseResultSet(neighbors);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.info("No Results");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class CommandEvaluator {
|
||||
|
||||
List<SearchCommandInterface> commands = new ArrayList<>();
|
||||
|
||||
|
||||
@Inject
|
||||
public CommandEvaluator(
|
||||
BrowseCommand browse,
|
||||
ConvertCommand convert,
|
||||
DefinitionCommand define,
|
||||
SiteSearchCommand site,
|
||||
SearchCommand search
|
||||
) {
|
||||
commands.add(browse);
|
||||
commands.add(convert);
|
||||
commands.add(define);
|
||||
commands.add(site);
|
||||
commands.add(search);
|
||||
}
|
||||
|
||||
public Object eval(Context ctx, SearchParameters parameters, String query) {
|
||||
for (var cmd : commands) {
|
||||
var ret = cmd.process(ctx, parameters, query);
|
||||
if (ret.isPresent()) {
|
||||
return ret.get();
|
||||
}
|
||||
}
|
||||
// Search command *should* always evaluate
|
||||
throw new IllegalStateException("Search Command returned Optional.empty()");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.edge.search.UnitConversion;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
public class ConvertCommand implements SearchCommandInterface {
|
||||
private final UnitConversion unitConversion;
|
||||
private final MustacheRenderer<Map<String, String>> conversionRenderer;
|
||||
private final MustacheRenderer<Map<String, String>> conversionRendererGmi;
|
||||
|
||||
@Inject
|
||||
public ConvertCommand(UnitConversion unitConversion, RendererFactory rendererFactory) throws IOException {
|
||||
this.unitConversion = unitConversion;
|
||||
|
||||
conversionRenderer = rendererFactory.renderer("edge/conversion-results");
|
||||
conversionRendererGmi = rendererFactory.renderer("edge/conversion-results-gmi");
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
||||
var conversion = unitConversion.tryConversion(ctx, query);
|
||||
if (conversion.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
if (parameters.responseType() == ResponseType.GEMINI) {
|
||||
return Optional.of(conversionRendererGmi.render(Map.of("query", query, "result", conversion.get())));
|
||||
} else {
|
||||
return Optional.of(conversionRenderer.render(Map.of("query", query, "result", conversion.get(), "profile", parameters.profileStr())));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,69 @@
|
||||
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.edge.assistant.client.AssistantClient;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class DefinitionCommand implements SearchCommandInterface {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final MustacheRenderer<DictionaryResponse> dictionaryRenderer;
|
||||
private final MustacheRenderer<DictionaryResponse> dictionaryRendererGmi;
|
||||
private final AssistantClient assistantClient;
|
||||
|
||||
|
||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^define:[A-Za-z\\s-0-9]+$").asPredicate();
|
||||
|
||||
@Inject
|
||||
public DefinitionCommand(RendererFactory rendererFactory, AssistantClient assistantClient)
|
||||
throws IOException
|
||||
{
|
||||
|
||||
dictionaryRenderer = rendererFactory.renderer("edge/dictionary-results");
|
||||
dictionaryRendererGmi = rendererFactory.renderer("edge/dictionary-results-gmi");
|
||||
this.assistantClient = assistantClient;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
||||
if (!queryPatternPredicate.test(query.trim())) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var results = lookupDefinition(ctx, query);
|
||||
|
||||
if (parameters.responseType() == ResponseType.GEMINI) {
|
||||
return Optional.of(dictionaryRendererGmi.render(results, Map.of("query", parameters.profileStr())));
|
||||
} else {
|
||||
return Optional.of(dictionaryRenderer.render(results, Map.of("query", query, "profile", parameters.profileStr())));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private DictionaryResponse lookupDefinition(Context ctx, String humanQuery) {
|
||||
String definePrefix = "define:";
|
||||
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||
|
||||
logger.info("Defining: {}", word);
|
||||
var results = assistantClient
|
||||
.dictionaryLookup(ctx, word)
|
||||
.blockingFirst();
|
||||
logger.debug("Results = {}", results);
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
public enum ResponseType {
|
||||
HTML, GEMINI
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.search.EdgeSearchOperator;
|
||||
import nu.marginalia.wmsa.edge.search.UnitConversion;
|
||||
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResults;
|
||||
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
|
||||
import javax.annotation.CheckForNull;
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
public class SearchCommand implements SearchCommandInterface {
|
||||
private EdgeDomainBlacklist blacklist;
|
||||
private EdgeDataStoreDao dataStoreDao;
|
||||
private EdgeSearchOperator searchOperator;
|
||||
private UnitConversion unitConversion;
|
||||
private final MustacheRenderer<DecoratedSearchResults> searchResultsRenderer;
|
||||
private final MustacheRenderer<DecoratedSearchResults> searchResultsRendererGmi;
|
||||
|
||||
@Inject
|
||||
public SearchCommand(EdgeDomainBlacklist blacklist,
|
||||
EdgeDataStoreDao dataStoreDao,
|
||||
EdgeSearchOperator searchOperator,
|
||||
UnitConversion unitConversion,
|
||||
RendererFactory rendererFactory) throws IOException {
|
||||
this.blacklist = blacklist;
|
||||
this.dataStoreDao = dataStoreDao;
|
||||
this.searchOperator = searchOperator;
|
||||
this.unitConversion = unitConversion;
|
||||
|
||||
searchResultsRenderer = rendererFactory.renderer("edge/search-results");
|
||||
searchResultsRendererGmi = rendererFactory.renderer("edge/search-results-gmi");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
||||
@CheckForNull Future<String> eval = unitConversion.tryEval(ctx, query);
|
||||
|
||||
var results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(query,
|
||||
parameters.profile(), parameters.js()), eval
|
||||
);
|
||||
|
||||
results.getResults().removeIf(detail -> blacklist.isBlacklisted(dataStoreDao.getDomainId(detail.url.domain)));
|
||||
|
||||
if (parameters.responseType() == ResponseType.GEMINI) {
|
||||
return Optional.of(searchResultsRendererGmi.render(results));
|
||||
} else {
|
||||
return Optional.of(searchResultsRenderer.render(results));
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
public interface SearchCommandInterface {
|
||||
Optional<Object> process(Context ctx, SearchParameters parameters, String query);
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
||||
|
||||
public record SearchParameters(EdgeSearchProfile profile, String js, ResponseType responseType) {
|
||||
public String profileStr() {
|
||||
return profile.name;
|
||||
}
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
|
||||
import nu.marginalia.wmsa.edge.search.EdgeSearchOperator;
|
||||
import nu.marginalia.wmsa.edge.search.EdgeSearchProfile;
|
||||
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResultSet;
|
||||
import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResults;
|
||||
import nu.marginalia.wmsa.edge.search.model.DomainInformation;
|
||||
import nu.marginalia.wmsa.edge.search.siteinfo.DomainInformationService;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class SiteSearchCommand implements SearchCommandInterface {
|
||||
private EdgeDomainBlacklist blacklist;
|
||||
private final EdgeDataStoreDao dataStoreDao;
|
||||
private final EdgeSearchOperator searchOperator;
|
||||
private DomainInformationService domainInformationService;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
|
||||
private final MustacheRenderer<DomainInformation> siteInfoRendererGmi;
|
||||
|
||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^site:[.A-Za-z\\-0-9]+$").asPredicate();
|
||||
@Inject
|
||||
public SiteSearchCommand(
|
||||
EdgeDomainBlacklist blacklist,
|
||||
EdgeDataStoreDao dataStoreDao,
|
||||
RendererFactory rendererFactory,
|
||||
EdgeSearchOperator searchOperator,
|
||||
DomainInformationService domainInformationService)
|
||||
throws IOException
|
||||
{
|
||||
this.blacklist = blacklist;
|
||||
this.dataStoreDao = dataStoreDao;
|
||||
|
||||
siteInfoRenderer = rendererFactory.renderer("edge/site-info");
|
||||
siteInfoRendererGmi = rendererFactory.renderer("edge/site-info-gmi");
|
||||
|
||||
this.searchOperator = searchOperator;
|
||||
this.domainInformationService = domainInformationService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
|
||||
if (!queryPatternPredicate.test(query)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
var results = siteInfo(ctx, query);
|
||||
|
||||
var domain = results.getDomain();
|
||||
logger.info("Domain: {}", domain);
|
||||
|
||||
DecoratedSearchResultSet resultSet;
|
||||
Path screenshotPath = null;
|
||||
if (null != domain) {
|
||||
resultSet = searchOperator.performDumbQuery(ctx, EdgeSearchProfile.CORPO, IndexBlock.Words, 100, 100, "site:"+domain);
|
||||
|
||||
screenshotPath = Path.of("/screenshot/" + dataStoreDao.getDomainId(domain).getId());
|
||||
}
|
||||
else {
|
||||
resultSet = new DecoratedSearchResultSet(Collections.emptyList());
|
||||
}
|
||||
|
||||
if (parameters.responseType() == ResponseType.GEMINI) {
|
||||
return Optional.of(siteInfoRendererGmi.render(results, Map.of("query", query)));
|
||||
} else {
|
||||
return Optional.of(siteInfoRenderer.render(results, Map.of("query", query, "focusDomain", Objects.requireNonNullElse(domain, ""), "profile", parameters.profileStr(), "results", resultSet.resultSet, "screenshot", screenshotPath == null ? "" : screenshotPath.toString())));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
private DomainInformation siteInfo(Context ctx, String humanQuery) {
|
||||
String definePrefix = "site:";
|
||||
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
|
||||
|
||||
logger.info("Fetching Site Info: {}", word);
|
||||
var results = domainInformationService.domainInfo(word)
|
||||
.orElseGet(() -> new DomainInformation(null, false, 0, 0, 0, 0, 0, 0, 0, EdgeDomainIndexingState.UNKNOWN, Collections.emptyList()));
|
||||
|
||||
logger.debug("Results = {}", results);
|
||||
|
||||
return results;
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.search;
|
||||
package nu.marginalia.wmsa.edge.search.model;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.search;
|
||||
package nu.marginalia.wmsa.edge.search.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.search;
|
||||
package nu.marginalia.wmsa.edge.search.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.search;
|
||||
package nu.marginalia.wmsa.edge.search.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.search;
|
||||
package nu.marginalia.wmsa.edge.search.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.search;
|
||||
package nu.marginalia.wmsa.edge.search.model;
|
||||
|
||||
import java.util.TreeMap;
|
||||
|
@ -4,11 +4,10 @@ import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeId;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
|
||||
import nu.marginalia.wmsa.edge.search.DomainInformation;
|
||||
import nu.marginalia.wmsa.edge.search.model.DomainInformation;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
|
@ -21,6 +21,8 @@ import org.junit.jupiter.api.parallel.ResourceAccessMode;
|
||||
import org.junit.jupiter.api.parallel.ResourceLock;
|
||||
import spark.Spark;
|
||||
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static nu.marginalia.util.TestUtil.getConnection;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
@ -123,7 +125,7 @@ class AssistantTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEvalWithParser() {
|
||||
public void testEvalWithParser() throws ExecutionException, InterruptedException {
|
||||
var conversion = new UnitConversion(client);
|
||||
assertEquals("305", conversion.tryEval(Context.internal(), "300+5").get());
|
||||
assertEquals("1.772", conversion.tryEval(Context.internal(), "sqrt(pi)").get());
|
||||
|
Loading…
Reference in New Issue
Block a user