Merge pull request #61 from MarginaliaSearch/new-look

Design Revamp For search.marginalia.nu
This commit is contained in:
Viktor 2023-12-05 13:28:54 +01:00 committed by GitHub
commit 21abfc6424
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
84 changed files with 2525 additions and 1039 deletions

View File

@ -15,6 +15,8 @@ dependencies {
implementation libs.bundles.slf4j
implementation libs.bundles.handlebars
implementation libs.guice
implementation libs.spark
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit

View File

@ -5,11 +5,13 @@ import com.github.jknack.handlebars.helper.ConditionalHelpers;
import com.github.jknack.handlebars.io.ClassPathTemplateLoader;
import com.github.jknack.handlebars.io.TemplateLoader;
import lombok.SneakyThrows;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Response;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
@ -17,38 +19,18 @@ public class MustacheRenderer<T> {
private final Template template;
private final Logger logger = LoggerFactory.getLogger(getClass());
MustacheRenderer(String templateFile) throws IOException {
MustacheRenderer(HandlebarsConfigurator configurator, String templateFile) throws IOException {
TemplateLoader loader = new ClassPathTemplateLoader();
loader.setPrefix("/templates");
loader.setSuffix(".hdb");
var handlebars = new Handlebars(loader);
handlebars.registerHelpers(ConditionalHelpers.class);
handlebars.registerHelper("md", new MarkdownHelper());
handlebars.registerHelper("readableUUID", (context, options) -> {
if (context == null) return "";
String instance = context.toString();
if (instance.length() < 31) return "";
instance = instance.replace("-", "");
String color1 = "#"+instance.substring(0, 6);
String color2 = "#"+instance.substring(6, 12);
String color3 = "#"+instance.substring(12, 18);
String color4 = "#"+instance.substring(18, 24);
String shortName1 = instance.substring(0, 2);
String shortName2 = instance.substring(2, 4);
String shortName3 = instance.substring(4, 6);
String shortName4 = instance.substring(6, 8);
String ret = "<span title=\"%s\">".formatted(context.toString()) +
"<span style=\"text-shadow: 0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color1, shortName1) +
"<span style=\"text-shadow: 0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color2, shortName2) +
"<span style=\"text-shadow: 0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color3, shortName3) +
"<span style=\"text-shadow :0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color4, shortName4);
return ret;
});
configurator.configure(handlebars);
try {
template = handlebars.compile(templateFile);
@ -67,6 +49,14 @@ public class MustacheRenderer<T> {
return template.apply(model);
}
@SneakyThrows
public Object renderInto(Response response, T model) {
response.raw().getOutputStream().write(template.apply(model).getBytes(StandardCharsets.UTF_8));
return "";
}
@SneakyThrows
public <T2> String render(T model, String name, List<T2> children) {
Context ctx = Context.newBuilder(model).combine(name, children).build();
@ -75,9 +65,14 @@ public class MustacheRenderer<T> {
}
@SneakyThrows
public <T2> String render(T model, Map<String, ?> children) {
public String render(T model, Map<String, ?> children) {
Context ctx = Context.newBuilder(model).combine(children).build();
return template.apply(ctx);
}
@SneakyThrows
public void renderInto(Response response, T model, Map<String, ?> children) {
Context ctx = Context.newBuilder(model).combine(children).build();
response.raw().getOutputStream().write(template.apply(ctx).getBytes(StandardCharsets.UTF_8));
}
}

View File

@ -1,13 +1,21 @@
package nu.marginalia.renderer;
import com.google.inject.Inject;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
import java.io.IOException;
public class RendererFactory {
public RendererFactory() {
private final HandlebarsConfigurator configurator;
@Inject
public RendererFactory(HandlebarsConfigurator configurator) {
this.configurator = configurator;
}
/** Create a renderer for the given template */
public <T> MustacheRenderer<T> renderer(String template) throws IOException {
return new MustacheRenderer<>(template);
return new MustacheRenderer<>(configurator, template);
}
}

View File

@ -0,0 +1,8 @@
package nu.marginalia.renderer.config;
import com.github.jknack.handlebars.Handlebars;
public class DefaultHandlebarsConfigurator implements HandlebarsConfigurator {
@Override
public void configure(Handlebars handlebars) {}
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.renderer.config;
import com.github.jknack.handlebars.Handlebars;
/** Configure handlebars rendering by injecting helper methods
* into the setup process */
public interface HandlebarsConfigurator {
/** Set up helpers for this handlebars instance */
void configure(Handlebars handlebars);
}

View File

@ -109,12 +109,14 @@ public class AbstractClientTest {
assertError(client.post(Context.internal(), 0,"/post", "test"));
}
@Test
public void testGet404() {
testServer.get(this::error404);
assertError(client.get(Context.internal(), 0,"/get"));
}
@Test
public void testDelete404() {
testServer.delete(this::error404);

View File

@ -26,7 +26,7 @@ public class DbBrowseDomainsRandom {
public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) {
final String q = """
SELECT DOMAIN_ID, DOMAIN_NAME
SELECT DOMAIN_ID, DOMAIN_NAME, INDEXED
FROM EC_RANDOM_DOMAINS
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID
WHERE STATE<2
@ -44,9 +44,10 @@ public class DbBrowseDomainsRandom {
while (rsp.next()) {
int id = rsp.getInt(1);
String domain = rsp.getString(2);
boolean indexed = rsp.getBoolean("INDEXED");
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0, indexed));
}
}
}

View File

@ -3,6 +3,7 @@ package nu.marginalia.browse;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.set.hash.TIntHashSet;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DomainBlacklist;
@ -23,14 +24,15 @@ public class DbBrowseDomainsSimilarCosine {
this.dataSource = dataSource;
}
public List<BrowseResult> getDomainNeighborsAdjacentCosine(int domainId, DomainBlacklist blacklist, int count) {
public List<BrowseResult> getDomainNeighborsAdjacentCosineRequireScreenshot(int domainId, DomainBlacklist blacklist, int count) {
List<BrowseResult> domains = new ArrayList<>(count);
String q = """
SELECT
EC_DOMAIN.ID,
NV.NEIGHBOR_NAME,
NV.RELATEDNESS
NV.RELATEDNESS,
EC_DOMAIN.INDEXED
FROM EC_NEIGHBORS_VIEW NV
INNER JOIN DATA_DOMAIN_SCREENSHOT ON DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME=NV.NEIGHBOR_NAME
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=NV.NEIGHBOR_ID
@ -49,9 +51,10 @@ public class DbBrowseDomainsSimilarCosine {
int id = rsp.getInt(1);
String domain = rsp.getString(2);
double relatedness = rsp.getDouble(3);
boolean indexed = rsp.getBoolean("INDEXED");
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, relatedness));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, relatedness, indexed));
}
}
}

View File

@ -27,7 +27,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
final Set<BrowseResult> domains = new HashSet<>(count*3);
final String q = """
SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, DOMAIN_NAME, COUNT(*) AS CNT
SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, DOMAIN_NAME, COUNT(*) AS CNT, INDEXED
FROM EC_DOMAIN_NEIGHBORS
INNER JOIN EC_DOMAIN ON NEIGHBOR_ID=EC_DOMAIN.ID
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
@ -54,14 +54,14 @@ public class DbBrowseDomainsSimilarOldAlgo {
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0, rsp.getBoolean("INDEXED")));
}
}
}
if (domains.size() < count/2) {
final String q2 = """
SELECT EC_DOMAIN.ID, DOMAIN_NAME
SELECT EC_DOMAIN.ID, DOMAIN_NAME, INDEXED
FROM EC_DOMAIN
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
INNER JOIN EC_DOMAIN_LINK B ON DEST_DOMAIN_ID=EC_DOMAIN.ID
@ -83,7 +83,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0, rsp.getBoolean("INDEXED")));
}
}
}
@ -91,7 +91,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
if (domains.size() < count/2) {
final String q3 = """
SELECT EC_DOMAIN.ID, DOMAIN_NAME
SELECT EC_DOMAIN.ID, DOMAIN_NAME, INDEXED
FROM EC_DOMAIN
INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
INNER JOIN EC_DOMAIN_LINK B ON B.SOURCE_DOMAIN_ID=EC_DOMAIN.ID
@ -115,7 +115,7 @@ public class DbBrowseDomainsSimilarOldAlgo {
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0, rsp.getBoolean("INDEXED")));
}
}
}
@ -128,38 +128,5 @@ public class DbBrowseDomainsSimilarOldAlgo {
return new ArrayList<>(domains);
}
public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) {
final String q = """
SELECT DOMAIN_ID, DOMAIN_NAME
FROM EC_RANDOM_DOMAINS
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID
WHERE STATE<2
AND DOMAIN_SET=?
AND DOMAIN_ALIAS IS NULL
ORDER BY RAND()
LIMIT ?
""";
List<BrowseResult> domains = new ArrayList<>(count);
try (var conn = dataSource.getConnection()) {
try (var stmt = conn.prepareStatement(q)) {
stmt.setInt(1, set);;
stmt.setInt(2, count);
var rsp = stmt.executeQuery();
while (rsp.next()) {
int id = rsp.getInt(1);
String domain = rsp.getString(2);
if (!blacklist.isBlacklisted(id)) {
domains.add(new BrowseResult(new EdgeDomain(domain).toRootUrl(), id, 0));
}
}
}
}
catch (SQLException ex) {
logger.error("SQL error", ex);
}
return domains;
}
}

View File

@ -2,7 +2,10 @@ package nu.marginalia.browse.model;
import nu.marginalia.model.EdgeUrl;
public record BrowseResult (EdgeUrl url, int domainId, double relatedness) {
public record BrowseResult (EdgeUrl url,
int domainId,
double relatedness,
boolean indexed) {
public String domainHash() {
var domain = url.domain;
@ -11,4 +14,20 @@ public record BrowseResult (EdgeUrl url, int domainId, double relatedness) {
}
return domain.toString();
}
public String displayDomain() {
String ret;
var domain = url.domain;
if ("www".equals(domain.subDomain)) {
ret = domain.domain;
}
else {
ret = domain.toString();
}
if (ret.length() > 25) {
ret = ret.substring(0, 22) + "...";
}
return ret;
}
}

View File

@ -1,6 +1,11 @@
package nu.marginalia.dating;
import com.google.inject.AbstractModule;
import nu.marginalia.renderer.config.DefaultHandlebarsConfigurator;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
public class DatingModule extends AbstractModule {
public void configure() {
bind(HandlebarsConfigurator.class).to(DefaultHandlebarsConfigurator.class);
}
}

View File

@ -14,9 +14,7 @@ import org.jetbrains.annotations.NotNull;
import spark.Request;
import spark.Response;
import spark.Spark;
import spark.resource.ClassPathResource;
import java.io.FileNotFoundException;
import java.util.Map;
import java.util.Optional;

View File

@ -28,7 +28,7 @@ public class DatingSessionObject {
}
public BrowseResult nextSimilar(int domainId, DbBrowseDomainsSimilarCosine adjacent, DomainBlacklist blacklist) {
adjacent.getDomainNeighborsAdjacentCosine(domainId, blacklist, 25).forEach(queue::addFirst);
adjacent.getDomainNeighborsAdjacentCosineRequireScreenshot(domainId, blacklist, 25).forEach(queue::addFirst);
while (queue.size() > MAX_QUEUE_SIZE) {
queue.removeLast();

View File

@ -26,6 +26,7 @@ public class ExplorerMain extends MainClass {
Injector injector = Guice.createInjector(
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Explorer),
new ExplorerModule(),
new DatabaseModule()
);

View File

@ -0,0 +1,11 @@
package nu.marginalia.explorer;
import com.google.inject.AbstractModule;
import nu.marginalia.renderer.config.DefaultHandlebarsConfigurator;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
public class ExplorerModule extends AbstractModule {
public void configure() {
bind(HandlebarsConfigurator.class).to(DefaultHandlebarsConfigurator.class);
}
}

View File

@ -1,6 +1,7 @@
plugins {
id 'java'
id 'io.freefair.sass-base' version '8.4'
id 'io.freefair.sass-java' version '8.4'
id 'com.palantir.docker' version '0.35.0'
id 'application'
id 'jvm-test-suite'
@ -20,6 +21,11 @@ java {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
sass {
sourceMapEnabled = true
sourceMapEmbed = true
outputStyle = EXPANDED
}
dependencies {
implementation project(':code:common:db')
implementation project(':code:common:model')
@ -48,6 +54,7 @@ dependencies {
implementation libs.notnull
implementation libs.guice
implementation libs.rxjava
implementation libs.handlebars
implementation libs.spark
implementation libs.opencsv
implementation libs.trove

View File

@ -0,0 +1,12 @@
package nu.marginalia.search;
import com.github.jknack.handlebars.Handlebars;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
public class SearchHandlebarsConfigurator implements HandlebarsConfigurator {
@Override
public void configure(Handlebars handlebars) {
}
}

View File

@ -4,12 +4,17 @@ import com.google.inject.AbstractModule;
import nu.marginalia.LanguageModels;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.WmsaHome;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
public class SearchModule extends AbstractModule {
public void configure() {
bind(HandlebarsConfigurator.class).to(SearchHandlebarsConfigurator.class);
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
bind(WebsiteUrl.class).toInstance(new WebsiteUrl(System.getProperty("website-url", "https://search.marginalia.nu/")));
bind(WebsiteUrl.class).toInstance(new WebsiteUrl(
System.getProperty("website-url", "https://search.marginalia.nu/")));
}
}

View File

@ -4,15 +4,18 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import io.reactivex.rxjava3.core.Observable;
import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.assistant.client.AssistantClient;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryResponse;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.SearchFilters;
import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.client.Context;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.UserSearchParameters;
import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.search.svc.SearchUnitConversionService;
import org.apache.logging.log4j.util.Strings;
@ -40,6 +43,7 @@ public class SearchOperator {
private final QueryClient queryClient;
private final SearchQueryIndexService searchQueryService;
private final SearchQueryParamFactory paramFactory;
private final WebsiteUrl websiteUrl;
private final SearchUnitConversionService searchUnitConversionService;
@ -49,6 +53,7 @@ public class SearchOperator {
QueryClient queryClient,
SearchQueryIndexService searchQueryService,
SearchQueryParamFactory paramFactory,
WebsiteUrl websiteUrl,
SearchUnitConversionService searchUnitConversionService)
{
@ -58,6 +63,7 @@ public class SearchOperator {
this.searchQueryService = searchQueryService;
this.paramFactory = paramFactory;
this.websiteUrl = websiteUrl;
this.searchUnitConversionService = searchUnitConversionService;
}
@ -69,10 +75,17 @@ public class SearchOperator {
return searchQueryService.getResultsFromQuery(queryResponse);
}
public List<UrlDetails> doBacklinkSearch(Context ctx,
String domain) {
public DecoratedSearchResults doSearch(Context ctx, UserSearchParameters userParams) {
var queryParams = paramFactory.forBacklinkSearch(domain);
var queryResponse = queryClient.search(ctx, queryParams);
Future<String> eval = searchUnitConversionService.tryEval(ctx, userParams.humanQuery());
return searchQueryService.getResultsFromQuery(queryResponse);
}
public DecoratedSearchResults doSearch(Context ctx, SearchParameters userParams) {
Future<String> eval = searchUnitConversionService.tryEval(ctx, userParams.query());
var queryParams = paramFactory.forRegularSearch(userParams);
var queryResponse = queryClient.search(ctx, queryParams);
@ -88,6 +101,7 @@ public class SearchOperator {
.problems(getProblems(ctx, evalResult, queryResults, queryResponse))
.evalResult(evalResult)
.results(queryResults)
.filters(new SearchFilters(websiteUrl, userParams))
.focusDomain(queryResponse.domain())
.focusDomainId(getDomainId(queryResponse.domain()))
.build();

View File

@ -5,20 +5,21 @@ import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.search.model.UserSearchParameters;
import nu.marginalia.search.command.SearchParameters;
import java.util.List;
public class SearchQueryParamFactory {
public QueryParams forRegularSearch(UserSearchParameters userParams) {
public QueryParams forRegularSearch(SearchParameters userParams) {
SearchSubquery prototype = new SearchSubquery();
var profile = userParams.profile();
profile.addTacitTerms(prototype);
userParams.jsSetting().addTacitTerms(prototype);
userParams.js().addTacitTerms(prototype);
userParams.adtech().addTacitTerms(prototype);
return new QueryParams(
userParams.humanQuery(),
userParams.query(),
null,
prototype.searchTermsInclude,
prototype.searchTermsExclude,
@ -51,4 +52,22 @@ public class SearchQueryParamFactory {
SearchSetIdentifier.NONE
);
}
public QueryParams forBacklinkSearch(String domain) {
return new QueryParams("links:"+domain,
null,
List.of(),
List.of(),
List.of(),
List.of(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
SpecificationLimit.none(),
List.of(),
new QueryLimits(100, 100, 100, 512),
SearchSetIdentifier.NONE
);
}
}

View File

@ -32,6 +32,7 @@ public class SearchService extends Service {
SearchErrorPageService errorPageService,
SearchAddToCrawlQueueService addToCrawlQueueService,
SearchFlagSiteService flagSiteService,
SearchSiteInfoService siteInfoService,
SearchQueryService searchQueryService
) {
super(params);
@ -50,10 +51,10 @@ public class SearchService extends Service {
Spark.post("/public/site/suggest/", addToCrawlQueueService::suggestCrawling);
Spark.get("/public/site/flag-site/:domainId", flagSiteService::flagSiteForm);
Spark.post("/public/site/flag-site/:domainId", flagSiteService::flagSiteAction);
Spark.get("/public/site-search/:site/*", this::siteSearchRedir);
Spark.get("/public/site/:site", this::siteSearchRedir);
Spark.get("/public/site/:site", siteInfoService::handle);
Spark.post("/public/site/:site", siteInfoService::handlePost);
Spark.exception(Exception.class, (e,p,q) -> {
logger.error("Error during processing", e);

View File

@ -3,6 +3,7 @@ package nu.marginalia.search.command;
import com.google.inject.Inject;
import nu.marginalia.search.command.commands.*;
import nu.marginalia.client.Context;
import spark.Response;
import java.util.ArrayList;
import java.util.List;
@ -17,30 +18,32 @@ public class CommandEvaluator {
BrowseCommand browse,
ConvertCommand convert,
DefinitionCommand define,
SiteListCommand site,
BangCommand bang,
SiteRedirectCommand siteRedirect,
SearchCommand search
) {
specialCommands.add(browse);
specialCommands.add(convert);
specialCommands.add(define);
specialCommands.add(site);
specialCommands.add(bang);
specialCommands.add(siteRedirect);
defaultCommand = search;
}
public Object eval(Context ctx, SearchParameters parameters, String query) {
public Object eval(Context ctx, Response response, SearchParameters parameters) {
for (var cmd : specialCommands) {
var ret = cmd.process(ctx, parameters, query);
if (ret.isPresent()) {
return ret.get();
if (cmd.process(ctx, response, parameters)) {
// The commands will write directly to the response, so we don't need to do anything else
// but it's important we don't return null, as this signals to Spark that we haven't handled
// the request.
return "";
}
}
// Always process the search command last
return defaultCommand.process(ctx, parameters, query)
.orElseThrow(() -> new IllegalStateException("Search Command returned Optional.empty()!") /* This Should Not be Possible™ */ );
defaultCommand.process(ctx, response, parameters);
return "";
}
}

View File

@ -0,0 +1,29 @@
package nu.marginalia.search.command;
import nu.marginalia.index.client.model.query.SearchSubquery;
import javax.annotation.Nullable;
import java.util.Arrays;
public enum SearchAdtechParameter {
DEFAULT("default"),
REDUCE("reduce", "special:ads", "special:affiliate");
public final String value;
public final String[] implictExcludeSearchTerms;
SearchAdtechParameter(String value, String... implictExcludeSearchTerms) {
this.value = value;
this.implictExcludeSearchTerms = implictExcludeSearchTerms;
}
public static SearchAdtechParameter parse(@Nullable String value) {
if (REDUCE.value.equals(value)) return REDUCE;
return DEFAULT;
}
public void addTacitTerms(SearchSubquery subquery) {
subquery.searchTermsExclude.addAll(Arrays.asList(implictExcludeSearchTerms));
}
}

View File

@ -2,9 +2,8 @@ package nu.marginalia.search.command;
import nu.marginalia.client.Context;
import java.util.Optional;
import spark.Response;
public interface SearchCommandInterface {
Optional<Object> process(Context ctx, SearchParameters parameters, String query);
boolean process(Context ctx, Response response, SearchParameters parameters);
}

View File

@ -1,9 +1,39 @@
package nu.marginalia.search.command;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.model.SearchProfile;
public record SearchParameters(SearchProfile profile, SearchJsParameter js, boolean detailedResults) {
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
public record SearchParameters(String query,
SearchProfile profile,
SearchJsParameter js,
SearchAdtechParameter adtech
) {
public String profileStr() {
return profile.name;
return profile.filterId;
}
public SearchParameters withProfile(SearchProfile profile) {
return new SearchParameters(query, profile, js, adtech);
}
public SearchParameters withJs(SearchJsParameter js) {
return new SearchParameters(query, profile, js, adtech);
}
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
return new SearchParameters(query, profile, js, adtech);
}
public String renderUrl(WebsiteUrl baseUrl) {
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s",
URLEncoder.encode(query, StandardCharsets.UTF_8),
URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8),
URLEncoder.encode(js.value, StandardCharsets.UTF_8),
URLEncoder.encode(adtech.value, StandardCharsets.UTF_8)
);
return baseUrl.withPath(path);
}
}

View File

@ -5,6 +5,7 @@ import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.client.Context;
import nu.marginalia.search.exceptions.RedirectException;
import spark.Response;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
@ -23,36 +24,75 @@ public class BangCommand implements SearchCommandInterface {
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
public boolean process(Context ctx, Response response, SearchParameters parameters) {
for (var entry : bangsToPattern.entrySet()) {
matchBangPattern(query, entry.getKey(), entry.getValue());
String bangPattern = entry.getKey();
String redirectPattern = entry.getValue();
var match = matchBangPattern(parameters.query(), bangPattern);
if (match.isPresent()) {
var url = String.format(redirectPattern, URLEncoder.encode(match.get(), StandardCharsets.UTF_8));
throw new RedirectException(url);
}
}
return false;
}
private Optional<String> matchBangPattern(String query, String bangKey) {
var bm = new BangMatcher(query);
while (bm.findNext(bangKey)) {
if (bm.isRelativeSpaceOrInvalid(-1))
continue;
if (bm.isRelativeSpaceOrInvalid(bangKey.length()))
continue;
String queryWithoutBang = bm.prefix().trim() + " " + bm.suffix(bangKey.length()).trim();
return Optional.of(queryWithoutBang);
}
return Optional.empty();
}
private void matchBangPattern(String query, String bangKey, String urlPattern) {
for (int idx = query.indexOf(bangKey); idx >= 0; idx = query.indexOf(bangKey, idx + 1)) {
private static class BangMatcher {
private final String str;
private int pos;
if (idx > 0) { // Don't match "search term!b", require either "!b term" or "search term !b"
if (!Character.isSpaceChar(query.charAt(idx-1))) {
continue;
}
}
int nextIdx = idx + bangKey.length();
if (nextIdx >= query.length()) { // allow "search term !b"
redirect(urlPattern, query.substring(0, idx));
}
else if (Character.isSpaceChar(query.charAt(nextIdx))) { // skip matches on pattern "!bsearch term" for !b
redirect(urlPattern, query.substring(0, idx).stripTrailing() + " " + query.substring(nextIdx).stripLeading());
}
public String prefix() {
return str.substring(0, pos);
}
public String suffix(int offset) {
if (pos+offset < str.length())
return str.substring(pos + offset);
return "";
}
public BangMatcher(String str) {
this.str = str;
this.pos = -1;
}
public boolean findNext(String pattern) {
if (pos + 1 >= str.length())
return false;
return (pos = str.indexOf(pattern, pos + 1)) >= 0;
}
public boolean isRelativeSpaceOrInvalid(int offset) {
if (offset + pos < 0)
return true;
if (offset + pos >= str.length())
return true;
return Character.isSpaceChar(str.charAt(offset + pos));
}
}
private void redirect(String pattern, String terms) {
var url = String.format(pattern, URLEncoder.encode(terms.trim(), StandardCharsets.UTF_8));
throw new RedirectException(url);
}
}

View File

@ -1,71 +1,55 @@
package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import nu.marginalia.browse.DbBrowseDomainsRandom;
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.browse.model.BrowseResultSet;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.results.BrowseResultCleaner;
import nu.marginalia.client.Context;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.svc.SearchBrowseService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Response;
import java.io.IOException;
import java.util.*;
import java.util.Map;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import static java.util.Collections.shuffle;
public class BrowseCommand implements SearchCommandInterface {
private final DbBrowseDomainsRandom randomDomains;
private final DbBrowseDomainsSimilarCosine similarDomains;
private final DbBrowseDomainsSimilarOldAlgo similarDomainsOld;
private final DbDomainQueries domainQueries;
private final DomainBlacklist blacklist;
private final SearchBrowseService browseService;
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
private final BrowseResultCleaner browseResultCleaner;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Predicate<String> queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9:]+$").asPredicate();
@Inject
public BrowseCommand(DbBrowseDomainsRandom randomDomains,
DbBrowseDomainsSimilarCosine similarDomains,
DbBrowseDomainsSimilarOldAlgo similarDomainsOld, DbDomainQueries domainQueries,
DomainBlacklist blacklist,
RendererFactory rendererFactory,
BrowseResultCleaner browseResultCleaner)
public BrowseCommand(SearchBrowseService browseService,
RendererFactory rendererFactory)
throws IOException
{
this.randomDomains = randomDomains;
this.similarDomains = similarDomains;
this.similarDomainsOld = similarDomainsOld;
this.domainQueries = domainQueries;
this.blacklist = blacklist;
this.browseResultCleaner = browseResultCleaner;
this.browseService = browseService;
browseResultsRenderer = rendererFactory.renderer("search/browse-results");
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
if (!queryPatternPredicate.test(query)) {
return Optional.empty();
public boolean process(Context ctx, Response response, SearchParameters parameters) {
if (!queryPatternPredicate.test(parameters.query())) {
return false;
}
return Optional.ofNullable(browseSite(ctx, query))
.map(results -> browseResultsRenderer.render(results,
Map.of("query", query,
var model = browseSite(ctx, parameters.query());
if (null == model)
return false;
browseResultsRenderer.renderInto(response, model,
Map.of("query", parameters.query(),
"profile", parameters.profileStr(),
"focusDomain", results.focusDomain())));
"focusDomain", model.focusDomain())
);
return true;
}
@ -75,14 +59,14 @@ public class BrowseCommand implements SearchCommandInterface {
try {
if ("random".equals(word)) {
return getRandomEntries(0);
return browseService.getRandomEntries(0);
}
if (word.startsWith("random:")) {
int set = Integer.parseInt(word.split(":")[1]);
return getRandomEntries(set);
return browseService.getRandomEntries(set);
}
else {
return getRelatedEntries(word);
return browseService.getRelatedEntries(word);
}
}
catch (Exception ex) {
@ -91,34 +75,5 @@ public class BrowseCommand implements SearchCommandInterface {
}
}
private BrowseResultSet getRandomEntries(int set) {
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
return new BrowseResultSet(results);
}
private BrowseResultSet getRelatedEntries(String word) {
var domain = domainQueries.getDomainId(new EdgeDomain(word));
var neighbors = similarDomains.getDomainNeighborsAdjacentCosine(domain, blacklist, 256);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
// If the results are very few, supplement with the alternative shitty algorithm
if (neighbors.size() < 25) {
Set<BrowseResult> allNeighbors = new HashSet<>(neighbors);
allNeighbors.addAll(similarDomainsOld.getDomainNeighborsAdjacent(domain, blacklist, 50));
neighbors.clear();
neighbors.addAll(allNeighbors);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
}
// shuffle the items for a less repetitive experience
shuffle(neighbors);
return new BrowseResultSet(neighbors, word);
}
}

View File

@ -1,16 +1,17 @@
package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.svc.SearchUnitConversionService;
import nu.marginalia.client.Context;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import spark.Response;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
public class ConvertCommand implements SearchCommandInterface {
private final SearchUnitConversionService searchUnitConversionService;
@ -24,12 +25,19 @@ public class ConvertCommand implements SearchCommandInterface {
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
var conversion = searchUnitConversionService.tryConversion(ctx, query);
@SneakyThrows
public boolean process(Context ctx, Response response, SearchParameters parameters) {
var conversion = searchUnitConversionService.tryConversion(ctx, parameters.query());
if (conversion.isEmpty()) {
return Optional.empty();
return false;
}
return Optional.of(conversionRenderer.render(Map.of("query", query, "result", conversion.get(), "profile", parameters.profileStr())));
conversionRenderer.renderInto(response, Map.of(
"query", parameters.query(),
"result", conversion.get(),
"profile", parameters.profileStr())
);
return true;
}
}

View File

@ -12,10 +12,10 @@ import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Response;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
@ -38,14 +38,19 @@ public class DefinitionCommand implements SearchCommandInterface {
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
if (!queryPatternPredicate.test(query.trim())) {
return Optional.empty();
public boolean process(Context ctx, Response response, SearchParameters parameters) {
if (!queryPatternPredicate.test(parameters.query())) {
return false;
}
var results = lookupDefinition(ctx, query);
var results = lookupDefinition(ctx, parameters.query());
return Optional.of(dictionaryRenderer.render(results, Map.of("query", query, "profile", parameters.profileStr())));
dictionaryRenderer.renderInto(response, results,
Map.of("query", parameters.query(),
"profile", parameters.profileStr())
);
return true;
}

View File

@ -8,12 +8,11 @@ import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.model.UserSearchParameters;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import spark.Response;
import java.io.IOException;
import java.util.Optional;
public class SearchCommand implements SearchCommandInterface {
private final DomainBlacklist blacklist;
@ -33,12 +32,12 @@ public class SearchCommand implements SearchCommandInterface {
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
UserSearchParameters params = new UserSearchParameters(query, parameters.profile(), parameters.js());
public boolean process(Context ctx, Response response, SearchParameters parameters) {
DecoratedSearchResults results = searchOperator.doSearch(ctx, parameters);
DecoratedSearchResults results = searchOperator.doSearch(ctx, params);
searchResultsRenderer.renderInto(response, results);
return Optional.of(searchResultsRenderer.render(results));
return true;
}
private boolean isBlacklisted(UrlDetails details) {

View File

@ -1,119 +0,0 @@
package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.DomainInformation;
import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.search.siteinfo.DomainInformationService;
import nu.marginalia.search.svc.SearchQueryIndexService;
import nu.marginalia.client.Context;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class SiteListCommand implements SearchCommandInterface {
private final DbDomainQueries domainQueries;
private final DomainInformationService domainInformationService;
private final SearchQueryIndexService searchQueryIndexService;
private final SearchOperator searchOperator;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final MustacheRenderer<DomainInformation> siteInfoRenderer;
private final Predicate<String> queryPatternPredicate = Pattern.compile("^site:[.A-Za-z\\-0-9]+$").asPredicate();
@Inject
public SiteListCommand(
DomainInformationService domainInformationService,
DbDomainQueries domainQueries,
RendererFactory rendererFactory,
SearchQueryIndexService searchQueryIndexService, SearchOperator searchOperator)
throws IOException
{
this.domainQueries = domainQueries;
this.domainInformationService = domainInformationService;
siteInfoRenderer = rendererFactory.renderer("search/site-info");
this.searchQueryIndexService = searchQueryIndexService;
this.searchOperator = searchOperator;
}
@Override
public Optional<Object> process(Context ctx, SearchParameters parameters, String query) {
if (!queryPatternPredicate.test(query)) {
return Optional.empty();
}
var results = siteInfo(ctx, query);
var domain = results.getDomain();
List<UrlDetails> resultSet;
Path screenshotPath = null;
int domainId = -1;
if (null != domain) {
resultSet = searchOperator.doSiteSearch(ctx, domain.toString());
var maybeId = domainQueries.tryGetDomainId(domain);
if (maybeId.isPresent()) {
domainId = maybeId.getAsInt();
screenshotPath = Path.of("/screenshot/" + domainId);
}
else {
domainId = -1;
screenshotPath = Path.of("/screenshot/0");
}
}
else {
resultSet = Collections.emptyList();
}
Map<String, Object> renderObject = new HashMap<>(10);
renderObject.put("query", query);
renderObject.put("hideRanking", true);
renderObject.put("profile", parameters.profileStr());
renderObject.put("results", resultSet);
renderObject.put("screenshot", screenshotPath == null ? "" : screenshotPath.toString());
renderObject.put("domainId", domainId);
renderObject.put("focusDomain", domain);
return Optional.of(siteInfoRenderer.render(results, renderObject));
}
private DomainInformation siteInfo(Context ctx, String humanQuery) {
String definePrefix = "site:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
logger.info("Fetching Site Info: {}", word);
var results = domainInformationService
.domainInfo(word)
.orElseGet(() -> unknownSite(word));
logger.debug("Results = {}", results);
return results;
}
private DomainInformation unknownSite(String url) {
return DomainInformation.builder()
.domain(new EdgeDomain(url))
.suggestForCrawling(true)
.unknownDomain(true)
.build();
}
}

View File

@ -0,0 +1,55 @@
package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.client.Context;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Response;
import java.io.IOException;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class SiteRedirectCommand implements SearchCommandInterface {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Predicate<String> queryPatternPredicate = Pattern.compile("^(site|links|similar):[.A-Za-z\\-0-9]+$").asPredicate();
@Inject
public SiteRedirectCommand() {
}
@SneakyThrows
@Override
public boolean process(Context ctx, Response response, SearchParameters parameters) {
if (!queryPatternPredicate.test(parameters.query())) {
return false;
}
int idx = parameters.query().indexOf(':');
String prefix = parameters.query().substring(0, idx);
String domain = parameters.query().substring(idx + 1).toLowerCase();
// Use an HTML redirect here, so we can use relative URLs
String view = switch (prefix) {
case "links" -> "links";
case "similar" -> "similar";
default -> "info";
};
response.raw().getOutputStream().println("""
<!DOCTYPE html>
<html lang="en">
<meta charset="UTF-8">
<title>Redirecting...</title>
<meta http-equiv="refresh" content="0; url=/site/%s?view=%s">
""".formatted(domain, view));
return true;
}
}

View File

@ -3,12 +3,13 @@ package nu.marginalia.search.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import nu.marginalia.search.command.SearchParameters;
import java.util.List;
@AllArgsConstructor @Getter @Builder
public class DecoratedSearchResults {
private final UserSearchParameters params;
private final SearchParameters params;
private final List<String> problems;
private final String evalResult;
@ -16,14 +17,15 @@ public class DecoratedSearchResults {
private final String focusDomain;
private final int focusDomainId;
private final SearchFilters filters;
public String getQuery() {
return params.humanQuery();
return params.query();
}
public String getProfile() {
return params.profile().name;
return params.profile().filterId;
}
public String getJs() {
return params.jsSetting().value;
return params.js().value;
}
public String getAdtech() { return params.adtech().value; }
}

View File

@ -0,0 +1,132 @@
package nu.marginalia.search.model;
import lombok.Getter;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.SearchAdtechParameter;
import nu.marginalia.search.command.SearchJsParameter;
import nu.marginalia.search.command.SearchParameters;
import org.apache.regexp.RE;
import java.util.List;
/** Models the search filters displayed next to the search results */
public class SearchFilters {
private final WebsiteUrl url;
@Getter
public final String currentFilter;
// These are necessary for the renderer to access the data
@Getter
public final RemoveJsOption removeJsOption;
@Getter
public final ReduceAdtechOption reduceAdtechOption;
@Getter
public final List<List<Filter>> filterGroups;
public SearchFilters(WebsiteUrl url, SearchParameters parameters) {
this.url = url;
removeJsOption = new RemoveJsOption(parameters);
reduceAdtechOption = new ReduceAdtechOption(parameters);
currentFilter = parameters.profile().filterId;
filterGroups = List.of(
List.of(
new Filter("No Filter", SearchProfile.NO_FILTER, parameters),
new Filter("Popular", SearchProfile.DEFAULT, parameters),
new Filter("Small Web", SearchProfile.SMALLWEB, parameters),
new Filter("Blogosphere", SearchProfile.BLOGOSPHERE, parameters),
new Filter("Academia", SearchProfile.ACADEMIA, parameters)
),
List.of(
new Filter("Vintage", SearchProfile.VINTAGE, parameters),
new Filter("Plain Text", SearchProfile.PLAIN_TEXT, parameters),
new Filter("~tilde", SearchProfile.TILDE, parameters)
),
List.of(
new Filter("Wiki", SearchProfile.WIKI, parameters),
new Filter("Forum", SearchProfile.FORUM, parameters),
new Filter("Docs", SearchProfile.DOCS, parameters),
new Filter("Recipes", SearchProfile.FOOD, parameters)
)
);
}
public class RemoveJsOption {
private final SearchJsParameter value;
@Getter
public final String url;
public boolean isSet() {
return value.equals(SearchJsParameter.DENY_JS);
}
public String name() {
return "Remove Javascript";
}
public RemoveJsOption(SearchParameters parameters) {
this.value = parameters.js();
var toggledValue = switch (parameters.js()) {
case DENY_JS -> SearchJsParameter.DEFAULT;
default -> SearchJsParameter.DENY_JS;
};
this.url = parameters.withJs(toggledValue).renderUrl(SearchFilters.this.url);
}
}
public class ReduceAdtechOption {
private final SearchAdtechParameter value;
@Getter
public final String url;
public boolean isSet() {
return value.equals(SearchAdtechParameter.REDUCE);
}
public String name() {
return "Reduce Adtech";
}
public ReduceAdtechOption(SearchParameters parameters) {
this.value = parameters.adtech();
var toggledValue = switch (parameters.adtech()) {
case REDUCE -> SearchAdtechParameter.DEFAULT;
default -> SearchAdtechParameter.REDUCE;
};
this.url = parameters.withAdtech(toggledValue).renderUrl(SearchFilters.this.url);
}
}
public class Filter {
@Getter
public final String displayName;
public final SearchProfile profile;
@Getter
public final boolean current;
@Getter
public final String url;
public Filter(String displayName, SearchProfile profile, SearchParameters parameters) {
this.displayName = displayName;
this.profile = profile;
this.current = profile.equals(parameters.profile());
this.url = parameters.withProfile(profile).renderUrl(SearchFilters.this.url);
}
}
}

View File

@ -9,9 +9,9 @@ import java.util.Objects;
public enum SearchProfile {
DEFAULT("default", SearchSetIdentifier.RETRO),
MODERN("modern", SearchSetIdentifier.SMALLWEB),
SMALLWEB("modern", SearchSetIdentifier.SMALLWEB),
BLOGOSPHERE("blogosphere", SearchSetIdentifier.BLOGS),
CORPO("corpo", SearchSetIdentifier.NONE),
NO_FILTER("corpo", SearchSetIdentifier.NONE),
YOLO("yolo", SearchSetIdentifier.NONE),
VINTAGE("vintage", SearchSetIdentifier.NONE),
TILDE("tilde", SearchSetIdentifier.NONE),
@ -27,27 +27,27 @@ public enum SearchProfile {
;
public final String name;
public final String filterId;
public final SearchSetIdentifier searchSetIdentifier;
SearchProfile(String name, SearchSetIdentifier searchSetIdentifier) {
this.name = name;
SearchProfile(String filterId, SearchSetIdentifier searchSetIdentifier) {
this.filterId = filterId;
this.searchSetIdentifier = searchSetIdentifier;
}
private final static SearchProfile[] values = values();
public static SearchProfile getSearchProfile(String param) {
if (null == param) {
return YOLO;
return NO_FILTER;
}
for (var profile : values) {
if (Objects.equals(profile.name, param)) {
if (Objects.equals(profile.filterId, param)) {
return profile;
}
}
return YOLO;
return NO_FILTER;
}
public void addTacitTerms(SearchSubquery subquery) {
@ -82,7 +82,7 @@ public enum SearchProfile {
}
public SpecificationLimit getYearLimit() {
if (this == MODERN) {
if (this == SMALLWEB) {
return SpecificationLimit.greaterThan(2015);
}
if (this == VINTAGE) {
@ -92,7 +92,7 @@ public enum SearchProfile {
}
public SpecificationLimit getSizeLimit() {
if (this == MODERN) {
if (this == SMALLWEB) {
return SpecificationLimit.lessThan(500);
}
else return SpecificationLimit.none();
@ -100,7 +100,7 @@ public enum SearchProfile {
public SpecificationLimit getQualityLimit() {
if (this == MODERN) {
if (this == SMALLWEB) {
return SpecificationLimit.lessThan(5);
}
else return SpecificationLimit.none();

View File

@ -1,7 +0,0 @@
package nu.marginalia.search.model;
import nu.marginalia.search.command.SearchJsParameter;
import nu.marginalia.search.model.SearchProfile;
public record UserSearchParameters(String humanQuery, SearchProfile profile, SearchJsParameter jsSetting) {
}

View File

@ -76,7 +76,7 @@ public class DomainInformationService {
.linkingDomains(linkingDomains)
.inCrawlQueue(inCrawlQueue)
.nodeAffinity(nodeAffinity)
.suggestForCrawling((pagesVisited == 0 && !inCrawlQueue))
.suggestForCrawling((pagesVisited == 0 && outboundLinks == 0 && !inCrawlQueue))
.build();
return Optional.of(di);
@ -218,23 +218,6 @@ public class DomainInformationService {
}
}
@SneakyThrows
public double getDomainQuality(int domainId) {
try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement("SELECT QUALITY FROM EC_DOMAIN WHERE ID=?")) {
stmt.setInt(1, domainId);
var rsp = stmt.executeQuery();
if (rsp.next()) {
return rsp.getDouble(1);
}
} catch (Exception ex) {
logger.error("DB error", ex);
}
return -5;
}
}
public DomainIndexingState getDomainState(int domainId) {
try (var connection = dataSource.getConnection()) {

View File

@ -0,0 +1,73 @@
package nu.marginalia.search.svc;
import com.google.inject.Inject;
import nu.marginalia.browse.DbBrowseDomainsRandom;
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.browse.model.BrowseResultSet;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.results.BrowseResultCleaner;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static java.util.Collections.shuffle;
public class SearchBrowseService {
private final DbBrowseDomainsRandom randomDomains;
private final DbBrowseDomainsSimilarCosine similarDomains;
private final DbBrowseDomainsSimilarOldAlgo similarDomainsOld;
private final DbDomainQueries domainQueries;
private final DomainBlacklist blacklist;
private final BrowseResultCleaner browseResultCleaner;
@Inject
public SearchBrowseService(DbBrowseDomainsRandom randomDomains,
DbBrowseDomainsSimilarCosine similarDomains,
DbBrowseDomainsSimilarOldAlgo similarDomainsOld,
DbDomainQueries domainQueries,
DomainBlacklist blacklist,
BrowseResultCleaner browseResultCleaner)
{
this.randomDomains = randomDomains;
this.similarDomains = similarDomains;
this.similarDomainsOld = similarDomainsOld;
this.domainQueries = domainQueries;
this.blacklist = blacklist;
this.browseResultCleaner = browseResultCleaner;
}
public BrowseResultSet getRandomEntries(int set) {
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
return new BrowseResultSet(results);
}
public BrowseResultSet getRelatedEntries(String word) {
var domain = domainQueries.getDomainId(new EdgeDomain(word));
var neighbors = similarDomains.getDomainNeighborsAdjacentCosineRequireScreenshot(domain, blacklist, 256);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
// If the results are very few, supplement with the alternative shitty algorithm
if (neighbors.size() < 25) {
Set<BrowseResult> allNeighbors = new HashSet<>(neighbors);
allNeighbors.addAll(similarDomainsOld.getDomainNeighborsAdjacent(domain, blacklist, 50));
neighbors.clear();
neighbors.addAll(allNeighbors);
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
}
// shuffle the items for a less repetitive experience
shuffle(neighbors);
return new BrowseResultSet(neighbors, word);
}
}

View File

@ -2,13 +2,7 @@ package nu.marginalia.search.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
@ -21,7 +15,6 @@ import java.util.stream.Collectors;
* DomainComplaintService in control-service
*/
public class SearchFlagSiteService {
private final MustacheRenderer<FlagSiteViewModel> formTemplate;
private final HikariDataSource dataSource;
private final CategoryItem unknownCategory = new CategoryItem("unknown", "Unknown");
@ -39,62 +32,21 @@ public class SearchFlagSiteService {
private final Map<String, CategoryItem> categoryItemMap =
categories.stream().collect(Collectors.toMap(CategoryItem::categoryName, Function.identity()));
@Inject
public SearchFlagSiteService(RendererFactory rendererFactory,
HikariDataSource dataSource) throws IOException {
formTemplate = rendererFactory.renderer("search/indict/indict-form");
public SearchFlagSiteService(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
public Object flagSiteForm(Request request, Response response) throws SQLException {
final int domainId = Integer.parseInt(request.params("domainId"));
var model = getModel(domainId, false);
return formTemplate.render(model);
public List<CategoryItem> getCategories() {
return categories;
}
public Object flagSiteAction(Request request, Response response) throws SQLException {
int domainId = Integer.parseInt(request.params("domainId"));
var formData = new FlagSiteFormData(
domainId,
request.queryParams("category"),
request.queryParams("description"),
request.queryParams("samplequery")
);
insertComplaint(formData);
return formTemplate.render(getModel(domainId, true));
}
private void insertComplaint(FlagSiteFormData formData) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement(
"""
INSERT INTO DOMAIN_COMPLAINT(DOMAIN_ID, CATEGORY, DESCRIPTION, SAMPLE) VALUES (?, ?, ?, ?)
""")) {
stmt.setInt(1, formData.domainId);
stmt.setString(2, formData.category);
stmt.setString(3, formData.description);
stmt.setString(4, formData.sampleQuery);
stmt.executeUpdate();
}
}
private FlagSiteViewModel getModel(int id, boolean isSubmitted) throws SQLException {
public List<FlagSiteComplaintModel> getExistingComplaints(int id) throws SQLException {
try (var conn = dataSource.getConnection();
var complaintsStmt = conn.prepareStatement("""
SELECT CATEGORY, FILE_DATE, REVIEWED, DECISION
FROM DOMAIN_COMPLAINT
WHERE DOMAIN_ID=?
""");
var stmt = conn.prepareStatement(
"""
SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE EC_DOMAIN.ID=?
"""))
"""))
{
List<FlagSiteComplaintModel> complaints = new ArrayList<>();
@ -109,21 +61,25 @@ public class SearchFlagSiteService {
rs.getString(4)));
}
stmt.setInt(1, id);
rs = stmt.executeQuery();
if (!rs.next()) {
Spark.halt(404);
}
return new FlagSiteViewModel(id,
rs.getString(1),
categories,
complaints,
isSubmitted);
return complaints;
}
}
public void insertComplaint(FlagSiteFormData formData) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement(
"""
INSERT INTO DOMAIN_COMPLAINT(DOMAIN_ID, CATEGORY, DESCRIPTION, SAMPLE) VALUES (?, ?, ?, ?)
""")) {
stmt.setInt(1, formData.domainId);
stmt.setString(2, formData.category);
stmt.setString(3, formData.description);
stmt.setString(4, formData.sampleQuery);
stmt.executeUpdate();
}
}
public record CategoryItem(String categoryName, String categoryDesc) {}
public record FlagSiteViewModel(int domainId, String domain, List<CategoryItem> category, List<FlagSiteComplaintModel> complaints, boolean isSubmitted) {}
public record FlagSiteComplaintModel(String category, String submitTime, boolean isReviewed, String decision) {}
public record FlagSiteFormData(int domainId, String category, String description, String sampleQuery) {};
}

View File

@ -3,6 +3,7 @@ package nu.marginalia.search.svc;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.SearchAdtechParameter;
import nu.marginalia.search.model.SearchProfile;
import nu.marginalia.client.Context;
import nu.marginalia.search.command.CommandEvaluator;
@ -14,8 +15,6 @@ import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
import java.util.Optional;
public class SearchQueryService {
private final WebsiteUrl websiteUrl;
@ -38,23 +37,8 @@ public class SearchQueryService {
final var ctx = Context.fromRequest(request);
final String queryParam = request.queryParams("query");
if (null == queryParam || queryParam.isBlank()) {
response.redirect(websiteUrl.url());
return null;
}
final String profileStr = Optional.ofNullable(request.queryParams("profile")).orElse(SearchProfile.YOLO.name);
final String humanQuery = queryParam.trim();
var params = new SearchParameters(
SearchProfile.getSearchProfile(profileStr),
SearchJsParameter.parse(request.queryParams("js")),
Boolean.parseBoolean(request.queryParams("detailed"))
);
try {
return searchCommandEvaulator.eval(ctx, params, humanQuery);
return searchCommandEvaulator.eval(ctx, response, parseParameters(request));
}
catch (RedirectException ex) {
response.redirect(ex.newUrl);
@ -67,4 +51,16 @@ public class SearchQueryService {
return "";
}
private SearchParameters parseParameters(Request request) {
final String queryParam = request.queryParams("query");
if (null == queryParam || queryParam.isBlank()) {
throw new RedirectException(websiteUrl.url());
}
return new SearchParameters(queryParam.trim(),
SearchProfile.getSearchProfile(request.queryParams("profile")),
SearchJsParameter.parse(request.queryParams("js")),
SearchAdtechParameter.parse(request.queryParams("adtech")));
}
}

View File

@ -0,0 +1,258 @@
package nu.marginalia.search.svc;
import com.google.inject.Inject;
import nu.marginalia.client.Context;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.model.DomainInformation;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.siteinfo.DomainInformationService;
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
import spark.Request;
import spark.Response;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
public class SearchSiteInfoService {
private final SearchOperator searchOperator;
private final SimilarDomainsService similarDomains;
private final DomainInformationService domainInformationService;
private final SearchFlagSiteService flagSiteService;
private final DbDomainQueries domainQueries;
private final MustacheRenderer<Object> renderer;
@Inject
public SearchSiteInfoService(SearchOperator searchOperator,
SimilarDomainsService similarDomains,
DomainInformationService domainInformationService,
RendererFactory rendererFactory,
SearchFlagSiteService flagSiteService,
DbDomainQueries domainQueries) throws IOException {
this.searchOperator = searchOperator;
this.similarDomains = similarDomains;
this.domainInformationService = domainInformationService;
this.flagSiteService = flagSiteService;
this.domainQueries = domainQueries;
this.renderer = rendererFactory.renderer("search/site-info/site-info");
}
public Object handle(Request request, Response response) throws SQLException {
String domainName = request.params("site");
String view = request.queryParamOrDefault("view", "info");
if (null == domainName || domainName.isBlank()) {
return null;
}
var ctx = Context.fromRequest(request);
var model = switch (view) {
case "links" -> listLinks(ctx, domainName);
case "docs" -> listDocs(ctx, domainName);
case "info" -> listInfo(ctx, domainName);
case "report" -> reportSite(ctx, domainName);
default -> listInfo(ctx, domainName);
};
return renderer.renderInto(response, model);
}
public Object handlePost(Request request, Response response) throws SQLException {
String domainName = request.params("site");
String view = request.queryParamOrDefault("view", "info");
if (null == domainName || domainName.isBlank()) {
return null;
}
if (!view.equals("report"))
return null;
final int domainId = domainQueries.getDomainId(new EdgeDomain(domainName));
FlagSiteFormData formData = new FlagSiteFormData(
domainId,
request.queryParams("category"),
request.queryParams("description"),
request.queryParams("sampleQuery")
);
flagSiteService.insertComplaint(formData);
var complaints = flagSiteService.getExistingComplaints(domainId);
var model = new ReportDomain(domainName, domainId, complaints, List.of(), true);
return renderer.renderInto(response, model);
}
private Object reportSite(Context ctx, String domainName) throws SQLException {
int domainId = domainQueries.getDomainId(new EdgeDomain(domainName));
var existingComplaints = flagSiteService.getExistingComplaints(domainId);
return new ReportDomain(domainName,
domainId,
existingComplaints,
flagSiteService.getCategories(),
false);
}
private DomainInformation dummyInformation(String domainName) {
return DomainInformation.builder()
.domain(new EdgeDomain(domainName))
.suggestForCrawling(true)
.unknownDomain(true)
.build();
}
private Backlinks listLinks(Context ctx, String domainName) {
return new Backlinks(domainName,
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
searchOperator.doBacklinkSearch(ctx, domainName));
}
private SiteInfoWithContext listInfo(Context ctx, String domainName) {
final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
final DomainInformation domainInfo = domainInformationService.domainInfo(domainName)
.orElseGet(() -> dummyInformation(domainName));
final List<SimilarDomainsService.SimilarDomain> similarSet =
similarDomains.getSimilarDomains(domainId, 100);
final List<SimilarDomainsService.SimilarDomain> linkingDomains =
similarDomains.getLinkingDomains(domainId, 100);
return new SiteInfoWithContext(domainName,
domainId,
domainInfo,
similarSet,
linkingDomains
);
}
private Docs listDocs(Context ctx, String domainName) {
return new Docs(domainName,
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
searchOperator.doSiteSearch(ctx, domainName));
}
public record Docs(Map<String, Boolean> view,
String domain,
long domainId,
List<UrlDetails> results) {
public Docs(String domain, long domainId, List<UrlDetails> results) {
this(Map.of("docs", true), domain, domainId, results);
}
public String focusDomain() { return domain; }
public String query() { return "site:" + domain; }
public boolean isKnown() {
return domainId > 0;
}
}
public record Backlinks(Map<String, Boolean> view, String domain, long domainId, List<UrlDetails> results) {
public Backlinks(String domain, long domainId, List<UrlDetails> results) {
this(Map.of("links", true), domain, domainId, results);
}
public String query() { return "links:" + domain; }
public boolean isKnown() {
return domainId > 0;
}
}
public record SiteInfoWithContext(Map<String, Boolean> view,
Map<String, Boolean> domainState,
String domain,
long domainId,
DomainInformation domainInformation,
List<SimilarDomainsService.SimilarDomain> similar,
List<SimilarDomainsService.SimilarDomain> linking) {
public SiteInfoWithContext(String domain,
long domainId,
DomainInformation domainInformation,
List<SimilarDomainsService.SimilarDomain> similar,
List<SimilarDomainsService.SimilarDomain> linking
)
{
this(Map.of("info", true),
Map.of(domainInfoState(domainInformation), true),
domain,
domainId,
domainInformation,
similar,
linking);
}
public String getLayout() {
// My CSS is too weak to handle this in CSS alone, so I guess we're doing layout in Java...
if (similar.size() < 25) {
return "lopsided";
}
else {
return "balanced";
}
}
public String query() { return "site:" + domain; }
private static String domainInfoState(DomainInformation info) {
if (info.isBlacklisted()) {
return "blacklisted";
}
if (!info.isUnknownDomain() && info.isSuggestForCrawling()) {
return "suggestForCrawling";
}
if (info.isInCrawlQueue()) {
return "inCrawlQueue";
}
if (info.isUnknownDomain()) {
return "unknownDomain";
}
else {
return "indexed";
}
}
public boolean isKnown() {
return domainId > 0;
}
}
public record ReportDomain(
Map<String, Boolean> view,
String domain,
int domainId,
List<SearchFlagSiteService.FlagSiteComplaintModel> complaints,
List<SearchFlagSiteService.CategoryItem> category,
boolean submitted)
{
public ReportDomain(String domain,
int domainId,
List<SearchFlagSiteService.FlagSiteComplaintModel> complaints,
List<SearchFlagSiteService.CategoryItem> category,
boolean submitted) {
this(Map.of("report", true), domain, domainId, complaints, category, submitted);
}
public String query() { return "site:" + domain; }
public boolean isKnown() {
return domainId > 0;
}
}
}

View File

@ -0,0 +1,239 @@
package nu.marginalia.search.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.set.hash.TIntHashSet;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
public class SimilarDomainsService {
private static final Logger logger = LoggerFactory.getLogger(SimilarDomainsService.class);
private final HikariDataSource dataSource;
@Inject
public SimilarDomainsService(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
public List<SimilarDomain> getSimilarDomains(int domainId, int count) {
// Tell me you've worked in enterprise software without telling me you've worked in enterprise software
String q1 = """
SELECT
NEIGHBOR.ID AS ID,
NEIGHBOR.DOMAIN_NAME AS DOMAIN_NAME,
SCREENSHOT.DOMAIN_NAME IS NOT NULL AS HAS_SCREENSHOT,
NODE_AFFINITY > 0 AS INDEXED,
STATE='ACTIVE' AS ACTIVE,
RELATEDNESS,
RANK,
STOD.ID IS NOT NULL AS LINK_STOD,
DTOS.ID IS NOT NULL AS LINK_DTOS
FROM EC_DOMAIN_NEIGHBORS_2
INNER JOIN EC_DOMAIN AS NEIGHBOR ON EC_DOMAIN_NEIGHBORS_2.NEIGHBOR_ID = NEIGHBOR.ID
LEFT JOIN DATA_DOMAIN_SCREENSHOT AS SCREENSHOT ON NEIGHBOR.DOMAIN_NAME = SCREENSHOT.DOMAIN_NAME
LEFT JOIN EC_DOMAIN_LINK STOD ON STOD.SOURCE_DOMAIN_ID = NEIGHBOR.ID AND STOD.DEST_DOMAIN_ID = EC_DOMAIN_NEIGHBORS_2.DOMAIN_ID
LEFT JOIN EC_DOMAIN_LINK DTOS ON DTOS.DEST_DOMAIN_ID = NEIGHBOR.ID AND DTOS.SOURCE_DOMAIN_ID = EC_DOMAIN_NEIGHBORS_2.DOMAIN_ID
WHERE DOMAIN_ID = ?
ORDER BY RELATEDNESS DESC, RANK ASC
LIMIT ?
""";
String q2 = """
SELECT
NEIGHBOR.ID AS ID,
NEIGHBOR.DOMAIN_NAME AS DOMAIN_NAME,
SCREENSHOT.DOMAIN_NAME IS NOT NULL AS HAS_SCREENSHOT,
NODE_AFFINITY > 0 AS INDEXED,
STATE='ACTIVE' AS ACTIVE,
RELATEDNESS,
RANK,
STOD.ID IS NOT NULL AS LINK_STOD,
DTOS.ID IS NOT NULL AS LINK_DTOS
FROM EC_DOMAIN_NEIGHBORS_2
INNER JOIN EC_DOMAIN AS NEIGHBOR ON EC_DOMAIN_NEIGHBORS_2.DOMAIN_ID = NEIGHBOR.ID
LEFT JOIN DATA_DOMAIN_SCREENSHOT AS SCREENSHOT ON NEIGHBOR.DOMAIN_NAME = SCREENSHOT.DOMAIN_NAME
LEFT JOIN EC_DOMAIN_LINK STOD ON STOD.SOURCE_DOMAIN_ID = NEIGHBOR.ID AND STOD.DEST_DOMAIN_ID = EC_DOMAIN_NEIGHBORS_2.NEIGHBOR_ID
LEFT JOIN EC_DOMAIN_LINK DTOS ON DTOS.DEST_DOMAIN_ID = NEIGHBOR.ID AND DTOS.SOURCE_DOMAIN_ID = EC_DOMAIN_NEIGHBORS_2.NEIGHBOR_ID
WHERE NEIGHBOR_ID = ?
ORDER BY RELATEDNESS DESC, RANK ASC
LIMIT ?
""";
var domains = executeSimilarDomainsQueries(domainId, count, q1, q2);
domains.sort(Comparator.comparing(SimilarDomain::relatedness).reversed().thenComparing(SimilarDomain::domainId));
return domains;
}
public List<SimilarDomain> getLinkingDomains(int domainId, int count) {
String q1 = """
SELECT
NEIGHBOR.ID AS ID,
NEIGHBOR.DOMAIN_NAME AS DOMAIN_NAME,
SCREENSHOT.DOMAIN_NAME IS NOT NULL AS HAS_SCREENSHOT,
NODE_AFFINITY > 0 AS INDEXED,
STATE='ACTIVE' AS ACTIVE,
COALESCE(COALESCE(NA.RELATEDNESS, NB.RELATEDNESS), 0) AS RELATEDNESS,
RANK,
TRUE AS LINK_STOD,
DTOS.ID IS NOT NULL AS LINK_DTOS
FROM EC_DOMAIN_LINK STOD
INNER JOIN EC_DOMAIN AS NEIGHBOR ON STOD.SOURCE_DOMAIN_ID = NEIGHBOR.ID
LEFT JOIN EC_DOMAIN_NEIGHBORS_2 NA ON STOD.SOURCE_DOMAIN_ID = NA.DOMAIN_ID AND STOD.DEST_DOMAIN_ID = NA.NEIGHBOR_ID
LEFT JOIN EC_DOMAIN_NEIGHBORS_2 NB ON STOD.SOURCE_DOMAIN_ID = NB.NEIGHBOR_ID AND STOD.DEST_DOMAIN_ID = NA.DOMAIN_ID
LEFT JOIN DATA_DOMAIN_SCREENSHOT AS SCREENSHOT ON NEIGHBOR.DOMAIN_NAME = SCREENSHOT.DOMAIN_NAME
LEFT JOIN EC_DOMAIN_LINK DTOS ON DTOS.DEST_DOMAIN_ID = STOD.SOURCE_DOMAIN_ID AND DTOS.SOURCE_DOMAIN_ID = STOD.DEST_DOMAIN_ID
WHERE STOD.DEST_DOMAIN_ID = ?
GROUP BY NEIGHBOR.ID
ORDER BY RELATEDNESS DESC, RANK ASC
LIMIT ?
""";
String q2 = """
SELECT
NEIGHBOR.ID AS ID,
NEIGHBOR.DOMAIN_NAME AS DOMAIN_NAME,
SCREENSHOT.DOMAIN_NAME IS NOT NULL AS HAS_SCREENSHOT,
NODE_AFFINITY > 0 AS INDEXED,
STATE='ACTIVE' AS ACTIVE,
COALESCE(COALESCE(NA.RELATEDNESS, NB.RELATEDNESS), 0) AS RELATEDNESS,
RANK,
STOD.ID IS NOT NULL AS LINK_STOD,
TRUE AS LINK_DTOS
FROM EC_DOMAIN_LINK DTOS
INNER JOIN EC_DOMAIN AS NEIGHBOR ON DTOS.DEST_DOMAIN_ID = NEIGHBOR.ID
LEFT JOIN EC_DOMAIN_NEIGHBORS_2 NA ON DTOS.DEST_DOMAIN_ID = NA.DOMAIN_ID AND DTOS.SOURCE_DOMAIN_ID = NA.NEIGHBOR_ID
LEFT JOIN EC_DOMAIN_NEIGHBORS_2 NB ON DTOS.DEST_DOMAIN_ID = NB.NEIGHBOR_ID AND DTOS.SOURCE_DOMAIN_ID = NA.DOMAIN_ID
LEFT JOIN DATA_DOMAIN_SCREENSHOT AS SCREENSHOT ON NEIGHBOR.DOMAIN_NAME = SCREENSHOT.DOMAIN_NAME
LEFT JOIN EC_DOMAIN_LINK STOD ON STOD.DEST_DOMAIN_ID = DTOS.SOURCE_DOMAIN_ID AND STOD.SOURCE_DOMAIN_ID = DTOS.DEST_DOMAIN_ID
WHERE DTOS.SOURCE_DOMAIN_ID = ?
GROUP BY NEIGHBOR.ID
ORDER BY RELATEDNESS DESC, RANK ASC
LIMIT ?
""";
var domains = executeSimilarDomainsQueries(domainId, count, q1, q2);
domains.sort(Comparator.comparing(SimilarDomain::rank)
.thenComparing(SimilarDomain::relatedness)
.thenComparing(SimilarDomain::indexed).reversed()
.thenComparing(SimilarDomain::domainId));
return domains;
}
private List<SimilarDomain> executeSimilarDomainsQueries(int domainId, int count, String... queries) {
List<SimilarDomain> domains = new ArrayList<>(count);
TIntHashSet seen = new TIntHashSet();
try (var connection = dataSource.getConnection()) {
for (var query : queries) {
try (var stmt = connection.prepareStatement(query)) {
stmt.setFetchSize(count);
stmt.setInt(1, domainId);
stmt.setInt(2, count);
var rsp = stmt.executeQuery();
while (rsp.next() && domains.size() < count * 2) {
int id = rsp.getInt("ID");
if (seen.add(id)) {
boolean linkStod = rsp.getBoolean("LINK_STOD");
boolean linkDtos = rsp.getBoolean("LINK_DTOS");
LinkType linkType = LinkType.find(linkStod, linkDtos);
domains.add(new SimilarDomain(
new EdgeDomain(rsp.getString("DOMAIN_NAME")).toRootUrl(),
id,
Math.round(100 * rsp.getDouble("RELATEDNESS")),
Math.round(100 * (1. - rsp.getDouble("RANK"))),
rsp.getBoolean("INDEXED"),
rsp.getBoolean("ACTIVE"),
rsp.getBoolean("HAS_SCREENSHOT"),
linkType
));
}
}
}
}
} catch (SQLException throwables) {
logger.warn("Failed to get domain neighbors for domain", throwables);
}
return domains;
}
public record SimilarDomain(EdgeUrl url,
int domainId,
double relatedness,
double rank,
boolean indexed,
boolean active,
boolean screenshot,
LinkType linkType)
{
public String getRankSymbols() {
if (rank > 90) {
return "&#9733;&#9733;&#9733;&#9733;&#9733;";
}
if (rank > 70) {
return "&#9733;&#9733;&#9733;&#9733;";
}
if (rank > 50) {
return "&#9733;&#9733;&#9733;";
}
if (rank > 30) {
return "&#9733;&#9733;";
}
if (rank > 10) {
return "&#9733;";
}
return "";
}
}
enum LinkType {
BACKWARD,
FOWARD,
BIDIRECTIONAL,
NONE;
public static LinkType find(boolean linkStod,
boolean linkDtos)
{
if (linkDtos && linkStod)
return BIDIRECTIONAL;
if (linkDtos)
return FOWARD;
if (linkStod)
return BACKWARD;
return NONE;
}
public String toString() {
return switch (this) {
case FOWARD -> "&#8594;";
case BACKWARD -> "&#8592;";
case BIDIRECTIONAL -> "&#8646;";
case NONE -> "-";
};
}
public String getDescription() {
return switch (this) {
case BACKWARD -> "Backward Link";
case FOWARD -> "Forward Link";
case BIDIRECTIONAL -> "Mutual Link";
case NONE -> "No Link";
};
}
};
}

View File

@ -0,0 +1,13 @@
// This sets the data-has-js attribute on the body tag to true, so we can style the page with the assumption that
// the browser supports JS. This is a progressive enhancement, so the page will still work without JS.
document.getElementsByTagName('body')[0].setAttribute('data-has-js', 'true');
// To prevent the filter menu from being opened when the user hits enter on the search box, we need to add a keydown
// handler to the search box that stops the event from propagating. Janky hack, but it works.
document.getElementById('query').addEventListener('keydown', e=> {
if (e.key === "Enter") {
const form = document.getElementById('search-form');
form.submit();
e.preventDefault();
}
});

View File

@ -0,0 +1,34 @@
const registerCloseButton = () => {
// Add a button to close the filters for mobile; we do this in js to not pollute the DOM for text-only browsers
const closeButton = document.createElement('button');
closeButton.setAttribute('id', 'menu-close');
closeButton.setAttribute('title', 'Close the menu');
closeButton.setAttribute('aria-controls', '#filters');
closeButton.innerHTML = 'X';
closeButton.onclick = (event) => {
document.getElementById('filters').style.display = 'none';
event.stopPropagation();
return false;
}
document.getElementById('filters').getElementsByTagName('h2')[0].append(closeButton);
}
// Add a button to open the filters for mobile; we do this in js to not pollute the DOM for text-only browsers
const filtersButton = document.createElement('button');
filtersButton.setAttribute('id', 'mcfeast');
filtersButton.setAttribute('aria-controls', '#filters');
filtersButton.innerHTML = '&Xi;';
filtersButton.setAttribute('title', 'Open the filters menu');
filtersButton.onclick = (event) => {
// Defer creation of the close button until the menu is opened. This is needed because the script for creating
// the filter button is run early to avoid layout shifts.
if (document.getElementById('menu-close') === null) {
registerCloseButton();
}
document.getElementById('filters').style.display = 'block';
event.stopPropagation();
return false;
}
document.getElementById('search-box').getElementsByTagName('h1')[0].append(filtersButton);

View File

@ -0,0 +1,677 @@
$nicotine-dark: #acae89;
$nicotine-light: #f8f8ee;
$fg-dark: #000;
$fg-light: #fff;
$highlight-dark: #2f4858;
$highlight-light: #3F5F6F;
$highlight-light2: #eee;
$border-color: #ccc;
$heading-fonts: serif;
$visited: #fcc;
* {
box-sizing: border-box;
}
h1 a, h2 a {
color: $fg-light;
}
h1 a:visited, h2 a:visited {
color: $visited;
}
progress {
width: 10ch;
}
body {
background-color: $nicotine-light;
color: $fg-dark;
font-family: sans-serif;
font-size: 14px;
line-height: 1.6;
margin-left: 2ch;
margin-right: 4ch;
max-width: 120ch;
padding: 0;
}
#frontpage {
display: grid;
grid-template-columns: 1fr auto;
grid-template-rows: auto 1fr;
grid-gap: 1ch;
align-items: start;
justify-content: start;
margin-top: 1ch;
margin-bottom: 1ch;
// named grid areas
grid-template-areas:
"frontpage-about frontpage-news"
"frontpage-tips frontpage-news";
@media (max-device-width: 624px) {
grid-template-columns: 1fr;
grid-template-rows: auto auto auto;
grid-gap: 1ch;
align-items: start;
justify-content: start;
margin-top: 1ch;
margin-bottom: 1ch;
// named grid areas
grid-template-areas:
"frontpage-about"
"frontpage-tips"
"frontpage-news";
* { max-width: unset !important; min-width: unset !important; }
}
#frontpage-news {
grid-area: frontpage-news;
max-width: 40ch;
@extend .dialog;
}
#frontpage-about {
grid-area: frontpage-about;
min-width: 40ch;
@extend .dialog;
}
#frontpage-tips {
grid-area: frontpage-tips;
min-width: 40ch;
@extend .dialog;
}
}
#siteinfo-nav {
display: block;
width: 100%;
@extend .dialog;
padding: 0.25ch !important;
margin-top: 1.5ch;
ul {
list-style: none;
padding: 0;
margin: 1ch;
li {
display: inline;
padding: 1ch;
background-color: $highlight-light2;
a {
text-decoration: none;
display: inline-block;
color: #000;
}
}
li.current {
background-color: $highlight-light;
a {
color: #fff;
}
}
}
}
.dialog {
border: 1px solid $border-color;
box-shadow: 0 0 1ch $border-color;
background-color: #fff;
padding: 1ch;
h2 {
margin: 0;
font-family: sans-serif;
font-weight: normal;
padding: 0.5ch;
font-size: 12pt;
background-color: $highlight-light;
color: #fff;
}
}
header {
background-color: $nicotine-dark;
color: #fff;
border: 1px solid #888;
box-shadow: 0 0 0.5ch #888;
margin-bottom: 1ch;
nav {
a {
text-decoration: none;
color: #000;
padding: .5ch;
display: inline-block;
}
a.extra {
background: #ccc linear-gradient(45deg,
rgba(255,100,100,1) 0%,
rgba(100,255,100,1) 50%,
rgba(100,100,255,1) 100%);
color: black;
text-shadow: 0 0 0.25ch #ccc;
}
a:hover, a:focus {
background: #2f4858;
color: #fff !important;
}
}
}
#complaint {
@extend .dialog;
max-width: 60ch;
margin-left: auto;
margin-right: auto;
margin-top: 2ch;
textarea {
width: 100%;
height: 10ch;
}
}
#siteinfo {
margin-top: 1ch;
display: flex;
gap: 1ch;
flex-grow: 0.5;
flex-shrink: 0.5;
flex-basis: 10ch 10ch;
flex-direction: row;
flex-wrap: wrap;
align-content: stretch;
align-items: stretch;
justify-content: stretch;
#index-info, #link-info {
width: 32ch;
@extend .dialog;
}
#screenshot {
@extend .dialog;
}
#screenshot img {
width: 30ch;
height: 22.5ch;
}
}
.infobox {
h2 {
@extend .heading;
}
background-color: #fff;
padding: 1ch;
margin: 1ch;
border: 1px solid $border-color;
box-shadow: 0 0 1ch $border-color;
}
section.cards {
display: flex;
flex-direction: row;
flex-wrap: wrap;
padding-top: 1ch;
gap: 2ch;
justify-content: flex-start;
.card {
border: 2px #ccc;
background-color: #fff;
border-left: 1px solid #ecb;
border-top: 1px solid #ecb;
box-shadow: #0008 0 0 5px;
h2 {
@extend .heading;
word-break: break-word;
}
h2 a {
display: block !important;
color: #fff;
text-decoration: none;
}
a:focus img {
filter: sepia(100%);
box-shadow: #444 0px 0px 20px;
}
a:focus:not(.nofocus) {
background-color: black;
color: white;
}
.description {
padding-left: 1ch;
padding-right: 1ch;
overflow: auto;
-webkit-hyphens: auto;
-moz-hyphens: auto;
-ms-hyphens: auto;
hyphens: auto;
}
img {
width: 28ch;
height: auto;
}
.info {
padding-left: 1ch;
padding-right: 1ch;
line-height: 1.6;
}
}
}
.positions {
box-shadow: 0 0 2px #888;
background-color: #e4e4e4;
padding: 2px;
margin-right: -1ch;
margin-left: 1ch;
}
footer {
clear: both;
padding: 2ch;
margin: 16ch 0 0 0;
font-size: 12pt;
display: flex;
flex-direction: row;
flex-wrap: wrap;
justify-content: flex-start;
h1 {
font-weight: normal;
border-bottom: 4px solid $highlight-light;
}
h2 {
font-size: 14pt;
font-weight: normal;
border-bottom: 2px solid $highlight-dark;
width: 80%;
}
section {
line-height: 1.5;
flex-basis: 40ch;
flex-grow: 1.1;
background-color: #fff;
border-left: 1px solid $border-color;
box-shadow: -1px -1px 5px $border-color;
padding-left: 1ch;
padding-right: 1ch;
margin-left: 1ch;
padding-bottom: 1ch;
margin-bottom: 1ch;
}
}
#mcfeast, #menu-close {
display: none;
}
.shadowbox {
box-shadow: 0 0 1ch $border-color;
border: 1px solid $border-color;
}
.heading {
margin: 0;
padding: 0.5ch;
background-color: $highlight-light;
border-bottom: 1px solid $border-color;
font-family: $heading-fonts;
font-weight: normal;
color: $fg-light;
font-size: 12pt;
word-break: break-word;
}
.sidebar-narrow {
display: grid;
grid-template-columns: auto max-content;
grid-gap: 1ch;
align-items: start;
}
#similar-view {
display: grid;
grid-template-columns: 1fr 1fr;
grid-template-rows: auto 1fr;
grid-gap: 1ch;
align-content: start;
justify-content: start;
align-items: start;
table {
th {
text-align: left;
}
}
.screenshot {
width: 100%;
height: auto;
}
}
#similar-view[data-layout="lopsided"] {
#similar-info {
@extend .dialog;
grid-column: 1;
grid-row: 1 / span 2;
}
#similar-domains {
@extend .dialog;
grid-column: 2;
grid-row: 1;
}
#similar-links {
@extend .dialog;
grid-row: 2;
grid-column: 2;
}
}
#similar-view[data-layout="balanced"] {
#similar-info {
@extend .dialog;
}
#similar-domains {
grid-row: span 2;
@extend .dialog;
}
#similar-links {
@extend .dialog;
}
}
@media (max-device-width: 900px) {
#similar-view {
display: block;
* {
margin-bottom: 1ch;
}
}
}
#search-box {
@extend .shadowbox;
padding: 0.5ch;
background-color: $fg-light;
display: grid;
grid-template-columns: max-content 0 auto max-content;
grid-gap: 0.5ch;
grid-auto-rows: minmax(1ch, auto);
width: 100%;
h1 {
margin: 0;
padding: 0.5ch;
font-size: 14pt;
word-break: keep-all;
background-color: $highlight-light;
color: $fg-light;
font-family: $heading-fonts;
font-weight: normal;
border: 1px solid;
text-align: center;
}
#suggestions-anchor {
margin: -0.5ch; // We need this anchor for the typeahead suggestions, but we don't want it to affect the layout
padding: 0;
}
input[type="text"] {
font-family: monospace;
font-size: 12pt;
padding: 0.5ch;
border: 1px solid $border-color;
background-color: $fg-light;
color: $fg-dark;
}
input[type="submit"] {
font-size: 12pt;
border: 1px solid $border-color;
background-color: $fg-light;
color: $fg-dark;
}
.suggestions {
background-color: #fff;
padding: .5ch;
margin-top: 5.5ch;
margin-left: 1ch;
position: absolute;
display: inline-block;
width: 300px;
border-left: 1px solid #ccc;
border-top: 1px solid #ccc;
box-shadow: 5px 5px 5px #888;
z-index: 10;
a {
display: block;
color: #000;
font-size: 12pt;
font-family: 'fixedsys', monospace, serif;
text-decoration: none;
outline: none;
}
a:focus {
display: block;
background-color: #000;
color: #eee;
}
}
}
.filter-toggle-on {
a:before {
content: '';
margin-right: 1.5ch;
}
}
.filter-toggle-off {
a:before {
content: '';
margin-right: 1.5ch;
}
}
#filters {
@extend .shadowbox;
margin-top: 1ch;
background-color: $fg-light;
h2 {
@extend .heading;
background-color: $highlight-light;
}
h3 {
@extend .heading;
background-color: $highlight-light2;
font-family: sans-serif;
color: #000;
border-bottom: 1px solid #000;
}
hr {
border-top: 0.5px solid $border-color;
border-bottom: none;
}
ul {
list-style-type: none;
padding-left: 0;
li {
padding: 1ch;
a {
color: $fg-dark;
text-decoration: none;
}
a:hover, a:focus {
border-bottom: 1px solid $highlight-light;
}
}
li.current {
border-left: 4px solid $highlight-light;
background-color: $highlight-light2;
a {
margin-left: -4px;
}
}
}
}
.search-result {
@extend .shadowbox;
margin: 1ch 0 2ch 0;
.url {
background-color: $highlight-light;
padding-left: 0.5ch;
a {
word-break: break-all;
font-family: monospace;
font-size: 8pt;
color: $fg-light;
text-shadow: 0 0 1ch #000; // guarantee decent contrast across background colors
}
a:visited {
color: $visited;
}
}
h2 {
a {
word-break: break-all;
color: $fg-dark;
text-decoration: none;
}
font-size: 12pt;
@extend .heading;
background-color: $highlight-light2;
}
.description {
background-color: $fg-light;
word-break: break-word;
padding: 1ch;
margin: 0;
}
}
.search-result[data-ms-rank="1"] { .url, h2 { filter: grayscale(0%); } }
.search-result[data-ms-rank="2"] { .url, h2 { filter: grayscale(5%); } }
.search-result[data-ms-rank="3"] { .url, h2 { filter: grayscale(15%); } }
.search-result[data-ms-rank="4"] { .url, h2 { filter: grayscale(20%); } }
.search-result[data-ms-rank="5"] { .url, h2 { filter: grayscale(30%); } }
.search-result[data-ms-rank="10"] { .url, h2 { filter: grayscale(60%); } }
.utils {
display: flex;
font-size: 10pt;
padding: 1ch;
background-color: #eee;
> * {
margin-right: 1ch;
margin-left: 1ch;
}
.meta {
flex-grow: 2;
text-align: right;
}
.meta > * {
padding-left: 4px;
}
a {
color: #000;
}
}
@media (max-device-width: 624px) {
body[data-has-js="true"] { // This property is set via js so we can selectively enable these changes only if JS is enabled;
// This is desirable since mobile navigation is JS-driven. If JS is disabled, having a squished
// GUI is better than having no working UI.
margin: 0 !important;
padding: 0 0 0 0 !important;
max-width: 100%;
#suggestions-anchor { display: none; } // suggestions are not useful on mobile
.sidebar-narrow {
display: block; // fix for bizarre chrome rendering issue
}
#mcfeast {
display: inline;
float: right;
}
#menu-close {
float: right;
display: inline;
}
#filters {
display: none;
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
margin: 0;
padding: 0;
z-index: 100;
}
.sidebar-narrow {
grid-template-columns: auto;
}
#search-box {
grid-template-columns: auto;
}
#filters {
margin-top: 0;
}
.search-result {
margin-left: 0;
margin-right: 0;
}
}
}
// The search results page is very confusing on text-based browsers, so we add a hr to separate the search results. This is
// hidden on modern browsers via CSS.
hr.w3m-helper { display: none; }

View File

@ -0,0 +1,115 @@
function setupTypeahead() {
const query = document.getElementById('query');
query.setAttribute('autocomplete', 'off');
const queryBox = document.getElementById('suggestions-anchor');
let timer = null;
function fetchSuggestions(e) {
if (timer != null) {
clearTimeout(timer);
}
timer = setTimeout(() => {
const req = new XMLHttpRequest();
req.onload = rsp => {
let items = JSON.parse(req.responseText);
const old = document.getElementById('suggestions');
if (old != null) old.remove();
if (items.length === 0) return;
console.log(items);
const suggestions = document.createElement('div');
suggestions.setAttribute('id', 'suggestions');
suggestions.setAttribute('class', 'suggestions');
for (i=0;i<items.length;i++) {
item = document.createElement('a');
item.innerHTML=items[i];
item.setAttribute('href', '#')
function suggestionClickHandler(e) {
query.value = e.target.text;
query.focus();
document.getElementById('suggestions').remove();
e.preventDefault()
}
item.addEventListener('click', suggestionClickHandler);
item.addEventListener('keydown', e=> {
if (e.key === "ArrowDown") {
if (e.target.nextElementSibling != null) {
e.target.nextElementSibling.focus();
}
e.preventDefault()
}
else if (e.key === "ArrowUp") {
if (e.target.previousElementSibling != null) {
e.target.previousElementSibling.focus();
}
else {
query.focus();
}
e.preventDefault()
}
else if (e.key === "Escape") {
var suggestions = document.getElementById('suggestions');
if (suggestions != null) {
suggestions.remove();
}
query.focus();
e.preventDefault();
}
});
item.addEventListener('keypress', e=> {
if (e.key === "Enter") {
suggestionClickHandler(e);
}
});
suggestions.appendChild(item);
}
queryBox.prepend(suggestions);
}
req.open("GET", "/suggest/?partial="+encodeURIComponent(query.value));
req.send();
}, 250);
}
query.addEventListener("input", fetchSuggestions);
query.addEventListener("click", e=> {
const suggestions = document.getElementById('suggestions');
if (suggestions != null) {
suggestions.remove();
}
});
query.addEventListener("keydown", e => {
if (e.key === "ArrowDown") {
const suggestions = document.getElementById('suggestions');
if (suggestions != null) {
suggestions.childNodes[0].focus();
}
else {
fetchSuggestions(e);
}
e.preventDefault()
}
else if (e.key === "Escape") {
const suggestions = document.getElementById('suggestions');
if (suggestions != null) {
suggestions.remove();
}
query.focus();
e.preventDefault();
}
});
}
if(!window.matchMedia("(pointer: coarse)").matches) {
setupTypeahead();
}

View File

@ -1,101 +0,0 @@
if(!window.matchMedia("(pointer: coarse)").matches) {
query = document.getElementById('query');
query.setAttribute('autocomplete', 'off');
timer = null;
function fetchSuggestions(e) {
if (timer != null) {
clearTimeout(timer);
}
timer = setTimeout(() => {
req = new XMLHttpRequest();
req.onload = rsp => {
items = JSON.parse(req.responseText);
var old = document.getElementById('suggestions');
if (old != null) old.remove();
if (items.length == 0) return;
suggestions = document.createElement('div');
suggestions.setAttribute('id', 'suggestions');
suggestions.setAttribute('class', 'suggestions');
for (i=0;i<items.length;i++) {
item = document.createElement('a');
item.innerHTML=items[i];
item.setAttribute('href', '#')
function suggestionClickHandler(e) {
query.value = e.target.text;
query.focus();
document.getElementById('suggestions').remove();
e.preventDefault()
}
item.addEventListener('click', suggestionClickHandler);
item.addEventListener('keydown', e=> {
if (e.key === "ArrowDown") {
if (e.target.nextElementSibling != null) {
e.target.nextElementSibling.focus();
}
e.preventDefault()
}
else if (e.key === "ArrowUp") {
if (e.target.previousElementSibling != null) {
e.target.previousElementSibling.focus();
}
else {
query.focus();
}
e.preventDefault()
}
else if (e.key === "Escape") {
var suggestions = document.getElementById('suggestions');
if (suggestions != null) {
suggestions.remove();
}
query.focus();
e.preventDefault();
}
});
item.addEventListener('keypress', e=> {
if (e.key === "Enter") {
suggestionClickHandler(e);
}
});
suggestions.appendChild(item);
}
document.getElementsByClassName('input')[0].appendChild(suggestions);
}
req.open("GET", "/suggest/?partial="+encodeURIComponent(query.value));
req.send();
}, 250);
}
query.addEventListener("input", fetchSuggestions);
query.addEventListener("click", e=> { var suggestions = document.getElementById('suggestions'); if (suggestions != null) suggestions.remove(); });
query.addEventListener("keydown", e => {
if (e.key === "ArrowDown") {
var suggestions = document.getElementById('suggestions');
if (suggestions != null) {
suggestions.childNodes[0].focus();
}
else {
fetchSuggestions(e);
}
e.preventDefault()
}
else if (e.key === "Escape") {
var suggestions = document.getElementById('suggestions');
if (suggestions != null) {
suggestions.remove();
}
query.focus();
e.preventDefault();
}
});
}

View File

@ -1,5 +1,5 @@
<section class="card browse-result">
<h2>{{url.domain}}</h2>
<h2 title="{{url.domain}}">{{displayDomain}}</h2>
<a href="{{url.proto}}://{{url.domain}}/">
<img src="/screenshot/{{domainId}}" title="{{description}}" loading="lazy"/>

View File

@ -4,7 +4,7 @@
<meta charset="UTF-8">
<title>Marginalia Search - {{query}}</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="stylesheet" href="/serp.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
@ -13,38 +13,22 @@
<body>
{{>search/parts/search-header}}
{{>search/parts/search-form}}
<article>
{{>search/parts/search-form}}
<div class="infobox">
{{#if focusDomain}}
Showing domains similar to <tt>{{focusDomain}}</tt>.
{{/if}}
{{#unless focusDomain}}
This list of domains is random. <a href="https://search.marginalia.nu/explore/random">Refresh</a> to get
new domains, or click <b>Similar Domains</b> to
take the helm.
{{/unless}}
</div>
<section class="cards">
{{#if focusDomain}}
<section class="card semantic">
<h2>Similar Domains</h2>
<p class="description">
Showing domains similar to <tt>{{focusDomain}}</tt>.
</p>
</section>
{{/if}}
{{#each results}}{{>search/browse-result}}{{/each}}
{{#unless focusDomain}}
<section class="card semantic">
<h2>Random Domains</h2>
<p class="description">
This list of domains is random. <a href="/explore/random">Refresh</a> to get
new domains, or click <b>Similar Domains</b> to
take the helm.
</p>
</section>
{{/unless}}
</section>
</article>
{{>search/parts/search-footer}}
</body>

View File

@ -4,7 +4,7 @@
<meta charset="UTF-8">
<title>Marginalia Search - {{query}}</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="stylesheet" href="/serp.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
@ -12,26 +12,12 @@
<body>
{{>search/parts/search-header}}
{{>search/parts/search-form}}
<article>
{{>search/parts/search-form}}
<div class="infobox">
{{query}} = {{result}}
</div>
<section class="cards">
<div class="card semantic">
<h2>{{query}}</h2>
<p class="description">{{result}}</p>
</div>
<div class="card problem">
<h2>Warning</h2>
<p class="description">
These results use floating point calculations, and may not be accurate
for very large or very small numbers. Do not use for orbital calculations,
thesis projects, or other sensitive work.
</p>
</div>
</section>
</article>
{{>search/parts/search-footer}}
</body>

View File

@ -4,7 +4,7 @@
<meta charset="UTF-8">
<title>Marginalia Search - {{query}}</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="stylesheet" href="/serp.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
@ -12,37 +12,29 @@
<body>
{{>search/parts/search-header}}
{{>search/parts/search-form}}
<article>
{{>search/parts/search-form}}
<div class="infobox">
{{#unless entries}}
No definitions were found for that word
{{/unless}}
<section class="cards">
{{#unless entries}}
<div class="card problems">
<h2>No Results</h2>
<div class="description">No definitions were found for that word</div>
</div>
{{/unless}}
{{#if entries}}
<ul>
{{#each entries}}
<li>{{word}}, {{type}}: {{definition}}<br></li>
{{/each}}
</ul>
{{/if}}
</div>
{{#each entries}}
<div class="card definition">
<h2>{{type}} - {{word}}</h2>
<div class="description">{{definition}}</div>
</div>
{{/each}}
{{#if entries}}
<div class="card problem">
<h2>Legal</h2>
<p class="description">
This data is derived from <a href="https://en.wiktionary.org/">wiktionary</a>,
available under GFDL and CC BY-SA 3.0. <a href="https://dumps.wikimedia.org/legal.html">More Information</a>.
</p>
</div>
{{/if}}
</section>
</article>
{{#if entries}}
<div class="infobox">
<h2>Legal</h2>
This data is derived from <a href="https://en.wiktionary.org/">wiktionary</a>,
available under GFDL and CC BY-SA 3.0. <a href="https://dumps.wikimedia.org/legal.html">More Information</a>.
</div>
{{/if}}
{{>search/parts/search-footer}}
</body>

View File

@ -4,7 +4,7 @@
<meta charset="UTF-8">
<title>Marginalia Search - {{title}}</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="stylesheet" href="/serp.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
@ -13,17 +13,12 @@
<body>
{{>search/parts/search-header}}
<article>
{{>search/parts/search-form}}
<div class="cards big">
<div class="card problems">
<h2> {{ title }} </h2>
<div class="info"> {{{message}}} </div>
</div>
<div class="infobox">
<h2> {{ title }} </h2>
<div class="info"> {{{message}}} </div>
</div>
</article>
{{>search/parts/search-footer}}
</body>

View File

@ -1,11 +1,11 @@
<html>
<head>
<title>Error</title>
<link rel="stylesheet" href="https://www.marginalia.nu/style.css">
<link rel="stylesheet" href="serp.css">
<meta http-equiv="refresh" content="5">
</head>
<body>
<article>
<div class="infobox">
<h1>Error</h1>
<p>Oops! It appears the index server is <span class="headline">{{indexState}}</span>.</p>
<p>The server was probably restarted to bring online some changes. Restarting the index typically takes
@ -15,6 +15,6 @@
on Twitter may have details, otherwise you can always send me an email at <tt>kontakt@marginalia.nu</tt>.</p>
<p>This page will attempt to refresh automatically every few seconds.</p>
</article>
</div>
</body>
</html>

View File

@ -1,4 +1,4 @@
<section class="card">
<section id="frontpage-about">
<h2>About</h2>
<div class="info">
<p>This is an independent DIY search engine that focuses on non-commercial content, and attempts to

View File

@ -1,17 +1,17 @@
{{#if news}}
<section class="card rightbox">
<h2>Publicity, Discussion and Events</h2>
<div class="info">
<dl>
{{#each news}}
<dt><a href="{{url}}" rel="nofollow">{{title}}</a></dt>
<dd>{{date}} {{source}} </dd>
{{/each}}
</dl>
</div>
<div class="utils">
<a href="/news.xml">📡 RSS Feed</a>
</div>
</section>
<section id="frontpage-news">
<h2>Publicity, Discussion and Events</h2>
<div class="info">
<dl>
{{#each news}}
<dt><a href="{{url}}" rel="nofollow">{{title}}</a></dt>
<dd>{{date}} {{source}} </dd>
{{/each}}
</dl>
</div>
<div class="utils">
<a href="/news.xml">📡 RSS Feed</a>
</div>
</section>
{{/if}}

View File

@ -1,23 +1,21 @@
<section class="card">
<h2>Tips</h2>
<div class="info">
<p>
This search engine isn't particularly well equipped to answering queries
posed like questions, instead try to imagine some text that might appear
in the website you are looking for, and search for that.</p>
<p>
Where this search engine really shines is finding small, old and obscure websites about some
given topic, perhaps
<a href="/search?query=commander+keen&profile=yolo&js=default">old video games</a>,
<a href="/search?query=voynich+&profile=yolo&js=default">a mystery</a>,
<a href="/search?query=augustine+confessions&profile=yolo&js=default">theology</a>,
<a href="/search?query=Hermes+Trismegistus&profile=yolo&js=default">the occult</a>,
<a href="/search?query=knitting&profile=yolo&js=default">knitting</a>,
<a href="/search?query=scc+graph+algorithm&profile=yolo&js=default">computer science</a>,
or <a href="/search?query=salvador+dali&profile=yolo&js=default">art</a>.
</p>
<section id="frontpage-tips">
<h2>Tips</h2>
<div class="info">
<p>
This search engine isn't particularly well equipped to answering queries
posed like questions, instead try to imagine some text that might appear
in the website you are looking for, and search for that.</p>
<p>
Where this search engine really shines is finding small, old and obscure websites about some
given topic, perhaps
<a href="/search?query=commander+keen&profile=yolo&js=default">old video games</a>,
<a href="/search?query=voynich+&profile=yolo&js=default">a mystery</a>,
<a href="/search?query=augustine+confessions&profile=yolo&js=default">theology</a>,
<a href="/search?query=Hermes+Trismegistus&profile=yolo&js=default">the occult</a>,
<a href="/search?query=knitting&profile=yolo&js=default">knitting</a>,
<a href="/search?query=scc+graph+algorithm&profile=yolo&js=default">computer science</a>,
or <a href="/search?query=salvador+dali&profile=yolo&js=default">art</a>.
</p>
</div>
<div class="utils">
</div>
</section>
</div>
</section>

View File

@ -1,10 +0,0 @@
<section class="card">
<h2>Updates</h2>
<div class="info">
<p>☛ It is possible to search by website generator.</p>
<p>☛ New filters for forums and wikis are added.</p>
</div>
<div class="utils">
<a href="https://www.marginalia.nu/release-notes/">Release Notes</a>
</div>
</section>

View File

@ -4,7 +4,7 @@
<meta charset="UTF-8">
<title>Marginalia Search</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="stylesheet" href="/serp.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
@ -20,18 +20,12 @@
<!-- Hi there, fellow human being :-) -->
{{>search/parts/search-header}}
<article>
{{>search/parts/search-form}}
<section id="frontpage">
{{>search/index/index-news}}
<div class="cards big">
{{>search/index/index-about}}
{{>search/index/index-tips}}
{{>search/index/index-updates}}
</div>
</article>
</section>
{{>search/parts/search-footer}}
</body>

View File

@ -1,80 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Marginalia Search - File complaint against {{domain}}</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
</head>
<body>
{{>search/parts/search-header}}
<article>
{{>search/parts/search-form}}
<section class="form">
{{#if isSubmitted}}
<h1>Your complaint against {{domain}} has been submitted</h1>
<p>The review process is manual and may take a while.</p>
{{/if}}
{{#unless isSubmitted}}
<h1>Flag {{domain}} for review</h1>
Note, this is not intended to police acceptable thoughts or ideas.
<p>
That said, offensive content in obvious bad faith is not tolerated, especially when designed
to crop up when you didn't go looking for it. How and where it is said is more
important than what is said.
<p>
This form can also be used to appeal unfairly blacklisted sites.
<p>
<form method="POST" action="/site/flag-site/{{domainId}}">
<fieldset>
<legend>Flag for Review</legend>
<label for="category">Category</label><br>
<select name="category" id="category">
{{#each category}} <option value="{{categoryName}}">{{categoryDesc}}</option> {{/each}}
</select>
<br>
<br>
<label for="description">Description</label><br>
<textarea type="text" name="description" id="description" rows=4></textarea><br>
<br>
<label for="samplequery">(Optional) Search Query </label><br>
<input type="text" name="samplequery" id="samplequery" length=255 /><br>
<br>
<br/>
<input type="submit" value="File complaint" />
</fieldset>
</form>
<p>
Communicating through forms and tables is a bit impersonal,
you may also reach a human being through email at <tt>kontakt@marginalia.nu</tt>.
{{/unless}}
{{#if complaints}}
<hr>
<h2> Complaints against {{domain}} </h2>
<table border width=100%>
<tr><th>Category</th><th>Submitted</th><th>Reviewed</th></tr>
{{#each complaints}}
<tr>
<td>{{category}}</td>
<td>{{submitTime}}</td>
<td>{{#if reviewed}}&check;{{/if}}</td>
</tr>
{{/each}}
</table>
{{/if}}
</section>
{{>search/parts/search-footer}}
</body>

View File

@ -0,0 +1,30 @@
<h2>Filters</h2>
<ul>
{{#with removeJsOption}}
<li title="Exclude results with javascript"
{{#if set}}aria-checked="true" class="current"{{/if}}
{{#unless set}}aria-checked="false"{{/unless}}
role="checkbox">
<a href="{{url}}">{{name}}</a>
</li>
{{/with}}
{{#with reduceAdtechOption}}
<li title="Exclude results with tracking or likely affiliate links"
{{#if set}}aria-checked="true" class="current"{{/if}}
{{#unless set}}aria-checked="false"{{/unless}}
role="checkbox">
<a href="{{url}}">{{name}}</a>
</li>
{{/with}}
</ul>
<h3>Domains</h3>
<ul>
{{#each filterGroups}}
{{#each .}}
<li {{#if current}}aria-selected="true" class="current"{{/if}}><a href="{{url}}">{{displayName}}</a></li>
{{/each}}
<hr>
{{/each}}
</ul>
<!-- load this ASAP to avoid flicker -->
<script src="/menu.js"></script>

View File

@ -112,4 +112,5 @@
</section>
</footer>
<script src="/tts.js" rel="javascript"></script>
<script src="/typeahead.js"></script>

View File

@ -1,41 +1,16 @@
<form method="get" action="/search">
<section class="search-box">
<h1>Search the Internet</h1>
<div class="input">
<input id="query" name="query" placeholder="Search terms" value="{{query}}">
<input value="Go" type="submit">
</div>
<div class="settings">
<select name="profile" id="profile">
<optgroup label="General Search">
<option {{#eq profile "default"}}selected{{/eq}} value="default">Popular Sites</option>
<option {{#eq profile "modern"}}selected{{/eq}} value="modern">Personal Websites</option>
<option {{#eq profile "academia"}}selected{{/eq}} value="academia">Academia</option>
<option {{#eq profile "corpo"}}selected{{/eq}} value="corpo">No Domain Ranking</option>
</optgroup>
<optgroup label="Vintage">
<option {{#eq profile "vintage"}}selected{{/eq}} value="vintage">Web 1.0</option>
<option {{#eq profile "tilde"}}selected{{/eq}} value="tilde">~tilde/</option>
<option {{#eq profile "plain-text"}}selected{{/eq}} value="plain-text">Text Files</option>
</optgroup>
<optgroup label="Category">
<option {{#eq profile "blogosphere"}}selected{{/eq}} value="blogosphere">Blogosphere (NEW)</option>
<option {{#eq profile "wiki"}}selected{{/eq}} value="wiki">Wiki</option>
<option {{#eq profile "forum"}}selected{{/eq}} value="forum">Forum</option>
<option {{#eq profile "docs"}}selected{{/eq}} value="docs">Docs (experimental)</option>
</optgroup>
<optgroup label="Topics Search">
<option {{#eq profile "food"}}selected{{/eq}} value="food">Recipes &#127859;</option>
<option {{#eq profile "crafts"}}selected{{/eq}} value="crafts">Crafts &#129697;&#128296; (WIP; mostly textile-craft)</option>
<option {{#eq profile "classics"}}selected{{/eq}} value="classics">Classics and Antiquity &#128220;</option>
</optgroup>
<form action="/search" method="get" id="search-form">
<div id="search-box">
<h1>
Search The Internet
</h1>
<div id="suggestions-anchor"></div>
<input type="text" id="query" name="query" placeholder="Search..." value="{{query}}">
<input type="hidden" name="js" value="{{js}}">
<input type="hidden" name="adtech" value="{{adtech}}">
<input type="hidden" name="profile" value="{{profile}}">
</select>
<select name="js" id="js">
<option {{#eq js "default"}}selected{{/eq}} value="default">Allow JS</option>
<option {{#eq js "no-js"}}selected{{/eq}} value="no-js">Deny JS</option>
<option {{#eq js "yes-js"}}selected{{/eq}} value="yes-js">Require JS</option>
</select>
</div>
</section>
</form>
<input type="submit" form="search-form" title="Execute Search" value="Search">
</div>
</form>
<!-- load the first stage mobile customizations script early to avoid flicker -->
<script src="/main.js"></script>

View File

@ -0,0 +1,19 @@
<section {{#unless hideRanking}} data-rs-rank="{{logRank}}" data-ms-rank="{{matchRank}}"{{/unless}}
class="card search-result {{#if specialDomain}}special-domain{{/if}}" >
<div class="url"><a rel="nofollow external" href="{{url}}">{{url}}</a></div>
<h2> <a tabindex="-1" class="title" rel="nofollow external" href="{{url}}">{{title}}</a> </h2>
<p class="description">{{description}}</p>
<div class="utils">
{{#unless focusDomain}}
<a href="/site/{{url.domain}}" title="Domain Information">Info</a>
{{#if hasMoreResults}}<a href="/site-search/{{url.domain}}/{{query}}?profile={{profile}}" title="More results from this domain">{{resultsFromSameDomain}}+</a>{{/if}}
{{/unless}}
<div class="meta">
{{#if problems}} <span class="problems" title="{{problems}}"> ⚠ {{problemCount}} </span> {{/if}}
<span aria-hidden="true" class="meta positions"
title="Positions where keywords were found within the document">{{positions}}</span>
</div>
</div>
</section>
<hr class="w3m-helper" />

View File

@ -1,66 +0,0 @@
<h2>Indexing Information</h2>
<div class="description">
<br>
{{#if blacklisted}}
This website is <em>blacklisted</em>. This excludes it from crawling and indexing.
<p>This is usually because of some form of misbehavior on the webmaster's end.
Either annoying search engine spam, or tasteless content bad faith content.
<p>Occasionally this is done hastily and in error. If you would like the decision
reviewed, you may use <a href="/site/flag-site/{{domainId}}">this form</a> to file a report.</tt>
{{/if}}
{{#unless blacklisted}}
<fieldset>
<legend>Index</legend>
State: {{state}}<br/>
Node Affinity: {{nodeAffinity}} </br>
Pages Known: {{pagesKnown}} <br/>
Pages Crawled: {{pagesFetched}} <br/>
Pages Indexed: {{pagesIndexed}} <br/>
</fieldset>
<br/>
{{#if inCrawlQueue}}
This website is in the queue for crawling.
It may take up to a month before it is indexed.
{{/if}}
{{#if suggestForCrawling}}
{{#if unknownDomain}}
<fieldset>
<legend>Crawling</legend>
This website is not known to the search engine.
To submit the website for crawling, follow <a
rel="noopener noreferrer"
target="_blank"
href="https://github.com/MarginaliaSearch/submit-site-to-marginalia-search">these instructions</a>.
</fieldset>
{{/if}}
{{#unless unknownDomain}}
<form method="POST" action="/site/suggest/">
<fieldset>
<legend>Crawling</legend>
This website is not queued for crawling. If you would like it to be crawled,
use the checkbox and button below.<p/>
<input type="hidden" name="id" value="{{domainId}}" />
<input type="checkbox" id="nomisclick" name="nomisclick" /> <label for="nomisclick"> This is not a mis-click </label>
<br/>
<br/>
<input type="submit" value="Add {{domain}} to queue" />
</fieldset>
</form>
{{/unless}}
{{/if}}
{{#if pagesFetched}}
<p>
If you've found a reason why this website should not be indexed,
you may use <a href="/site/flag-site/{{domainId}}">this form</a> to file a report.<p>
{{/if}}
{{/unless}}
</div>
</p>
</div>

View File

@ -1,18 +0,0 @@
<div class="card info">
<h2>Links</h2>
<div class="description">
<br>
<fieldset>
<legend>Link Graph</legend>
Ranking: {{ranking}}%<br/>
Incoming Links: {{incomingLinks}} <br/>
Outbound Links: {{outboundLinks}} <br/>
</fieldset>
<br>
<fieldset>
<legend>Explore</legend>
<a href="/links/{{domain.domain}}">Which pages link here?</a><br/>
<a href="/explore/{{domain}}">Explore similar domains</a><br/>
</fieldset>
</div>
</div>

View File

@ -1,8 +0,0 @@
{{#if problems}} <span class="problems" title="{{problems}}"> ⚠ {{problemCount}} </span> {{/if}}
<span
aria-hidden="true"
class="meta positions"
title="Positions where keywords were found within the document">
{{positions}}
</span>

View File

@ -1,23 +0,0 @@
<!-- RankingID: {{rankingId}}
ID: {{id}} - {{combinedId}}
Ranking: {{ranking}}
TermScore: {{termScore}}
Quality: {{urlQuality}}
-->
<!--
{{#each keywordScores}} {{{.}}} {{/each}}
-->
<section class="card search-result {{#unless hideRanking}}rs-rank-{{logRank}} ms-rank-{{matchRank}}{{/unless}} {{#if specialDomain}}special-domain{{/if}}" >
<div class="url"><a rel="nofollow external" href="{{url}}">{{url}}</a></div>
<h2> <a tabindex="-1" class="title" rel="nofollow external" href="{{url}}">{{title}}</a> </h2>
<p class="description">{{description}}</p>
<div class="utils">
{{#unless focusDomain}}
<a href="/site/{{url.domain}}" title="Domain Information">Info</a>
{{#if hasMoreResults}}<a href="/site-search/{{url.domain}}/{{query}}?profile={{profile}}" title="More results from this domain">{{resultsFromSameDomain}}+</a>{{/if}}
{{/unless}}
<div class="meta">{{>search/search-result-metadata}}</div>
</div>
<hr class="w3m-helper" />
</section>

View File

@ -4,49 +4,37 @@
<meta charset="UTF-8">
<title>Marginalia Search - {{query}}</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="stylesheet" href="/serp.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
</head>
<body>
<body data-filter="{{filters.currentFilter}}">
<!-- Hi there, fellow human being :-) -->
{{>search/parts/search-header}}
<article>
{{>search/parts/search-form}}
<section class="sidebar-narrow">
<section id="results" class="sb-left">
{{#if focusDomain}}
<div class="infobox">
Showing search results from <a href="/site/{{focusDomain}}">{{focusDomain}}</a>.
</div>
<hr class="w3m-helper" />
<section class="cards">
{{#if maintenanceMessage}}<section class="card problems onlyscreen"><h2>Maintenance</h2><p class="description">{{maintenanceMessage}}</p></section>{{/if}}
{{#if evalResult}}<section class="card semantic onlyscreen"><h2>Evaluation</h2><p class="description">{{query}} = {{evalResult}}</p><hr class="w3m-helper" /></section>{{/if}}
{{#each wiki.entries}}<section class="card semantic onlyscreen"><h2>Encyclopedia</h2><p class="description"><a href="https://encyclopedia.marginalia.nu/wiki/{{.}}"><em>{{.}}</em> Encyclopedia Page</a></p><hr class="w3m-helper" /></section>{{/each}}
{{#if focusDomain}}
<section class="card semantic">
<h2>{{focusDomain}}</h2>
<p class="description">
Showing results from <tt>{{focusDomain}}</tt>
</p>
<div class="utils">
<a href="/site/{{focusDomain}}">Info</a>
<a href="/explore/{{focusDomain}}">Similar Domains</a>
</div>
{{/if}}
{{#each results}}{{>search/parts/search-result}}{{/each}}
</section>
{{/if}}
{{#unless evalResult}}{{#if problems}}<section class="card problems onlyscreen"><h2>Suggestions</h2><ul class="onlyscreen search-problems">{{#each problems}}<li>{{{.}}}</li>{{/each}}</ul></section> {{/if}}{{/unless}}
{{#each domainResults}}{{>search/browse-result}}{{/each}}
{{#each results}}{{>search/search-result}}{{/each}}
{{#with filters}}
<section id="filters" class="sb-right">
{{>search/parts/search-filters}}
</section>
{{/with}}
</section>
</article>
{{>search/parts/search-footer}}
</body>
</html>

View File

@ -1,37 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Marginalia Search - {{query}}</title>
<link rel="stylesheet" href="/style-new.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
</head>
<body>
{{>search/parts/search-header}}
<article>
{{>search/parts/search-form}}
<section class="cards">
<div class="card">
<h2>{{domain}}</h2>
<a href="http://{{domain}}/"><img src="{{screenshot}}" alt="Thumbnail image of {{domain}}"/></a>
</div>
<div class="card info">
{{>search/parts/site-info-index}}
{{>search/parts/site-info-links}}
{{#each results}}{{>search/search-result}}{{/each}}
</section>
</article>
{{>search/parts/search-footer}}
</body>

View File

@ -0,0 +1,8 @@
<p>This website is <em>blacklisted</em>. This excludes it from crawling and indexing.</p>
<p>This is usually because of some form of misbehavior on the webmaster's end.
Either annoying search engine spam, or tasteless content bad faith content.</p>
<p>Occasionally this is done hastily and in error. If you would like the decision
reviewed, you may use the <a href="?v=report">report form</a> to file an appeal.</tt>
</p>

View File

@ -0,0 +1,10 @@
<fieldset>
<legend>Index</legend>
State: {{state}}<br/>
Domain ID: {{domainId}} <br/>
Node Affinity: {{nodeAffinity}} <br/>
Pages Known: {{pagesKnown}} <br/>
Pages Crawled: {{pagesFetched}} <br/>
Pages Indexed: {{pagesIndexed}} <br/>
</fieldset>
<br/>

View File

@ -0,0 +1,12 @@
<form method="POST" action="/site/suggest/">
<fieldset>
<legend>Crawling</legend>
This website is not queued for crawling. If you would like it to be crawled,
use the checkbox and button below.<p/>
<input type="hidden" name="id" value="{{domainId}}" />
<input type="checkbox" id="nomisclick" name="nomisclick" /> <label for="nomisclick"> This is not a mis-click </label>
<br/>
<br/>
<input type="submit" value="Add {{domain}} to queue" />
</fieldset>
</form>

View File

@ -0,0 +1,9 @@
<fieldset>
<legend>Crawling</legend>
This website is not known to the search engine.
To submit the website for crawling, follow <a
rel="noopener noreferrer"
target="_blank"
href="https://github.com/MarginaliaSearch/submit-site-to-marginalia-search">these instructions</a>.
</fieldset>

View File

@ -0,0 +1,23 @@
<h2>Indexing Information</h2>
{{#if domainState.blacklisted}}
{{>search/site-info/site-info-index-blacklisted}}
{{/if}}
{{#if domainState.unknownDomain}}
{{>search/site-info/site-info-index-unknown}}
{{/if}}
{{#if domainState.inCrawlQueue}}
<p>
This website is in the queue for crawling.
It may take up to a month before it is indexed.
</p>
{{/if}}
{{#if domainState.suggestForCrawling}}
{{>search/site-info/site-info-index-suggest}}
{{/if}}
{{#if domainState.indexed}}
{{>search/site-info/site-info-index-indexed}}
{{/if}}

View File

@ -0,0 +1,7 @@
<h2>Links</h2>
<fieldset>
<legend>Link Graph</legend>
Ranking: {{ranking}}%<br/>
Incoming Links: {{incomingLinks}} <br/>
Outbound Links: {{outboundLinks}} <br/>
</fieldset>

View File

@ -0,0 +1,60 @@
<section id="complaint">
{{#if submitted}}
<h2>Your complaint against {{domain}} has been submitted</h2>
<p>The review process is manual and may take a while. If urgent action is necessary,
reach me at kontakt@marginalia.nu!
</p>
{{/if}}
{{#unless submitted}}
<h2>Flag {{domain}} for review</h2>
<p>
Note, this is not intended to police acceptable thoughts or ideas.
<p>
That said, offensive content in obvious bad faith is not tolerated, especially when designed
to crop up when you didn't go looking for it. How and where it is said is more
important than what is said.
<p>
This form can also be used to appeal unfairly blacklisted sites.
<p>
<form method="POST">
<fieldset>
<legend>Flag for Review</legend>
<label for="category">Category</label><br>
<select name="category" id="category">
{{#each category}} <option value="{{categoryName}}">{{categoryDesc}}</option> {{/each}}
</select>
<br>
<br>
<label for="description">Description</label><br>
<textarea type="text" name="description" id="description" rows=4></textarea><br>
<br>
<label for="samplequery">(Optional) Search Query </label><br>
<input type="text" name="samplequery" id="samplequery" length=255 /><br>
<br>
<br/>
<input type="submit" value="File complaint" />
</fieldset>
</form>
<p>
Communicating through forms and tables is a bit impersonal,
you may also reach a human being through email at <tt>kontakt@marginalia.nu</tt>.
{{/unless}}
{{#if complaints}}
<hr>
<h2> Complaints against {{domain}} </h2>
<table border width=100%>
<tr><th>Category</th><th>Submitted</th><th>Reviewed</th></tr>
{{#each complaints}}
<tr>
<td>{{category}}</td>
<td>{{submitTime}}</td>
<td>{{#if reviewed}}&check;{{/if}}</td>
</tr>
{{/each}}
</table>
{{/if}}
</section>

View File

@ -0,0 +1,110 @@
<div class="infobox">
A <a href="/explore/{{domain}}">visual exploration</a> mode is also available.
</div>
<div id="similar-view" data-layout="{{layout}}">
<div id="similar-info">
<h2><span title="External Link">&#x1F30E;</span>&nbsp;<a rel="external noopener" href="https://{{domain}}/">{{domain}}</a></h2>
<a rel="external noopener" href="https://{{domain}}/">
<img class="screenshot" width="300" height="225" src="/screenshot/{{domainId}}" alt="Screenshot of {{domain}}" />
</a>
{{#with domainInformation}}
{{> search/site-info/site-info-index}}
{{> search/site-info/site-info-links}}
{{/with}}
</div>
{{#if similar}}
<div id="similar-domains">
<h2>Similar Domains</h2>
<table class="similarity-table">
<tr>
<th colspan="3">Meta</th>
<th>Rank</th>
<th>Domain</th>
<th>Similarity</th>
</tr>
{{#each similar}}
<tr>
<td>
{{#if indexed}}
{{#if active}}
<span title="Indexed">&#128064;</span>
{{/if}}
{{#unless active}}
<span title="Problem">&#128293;</span>
{{/unless}}
{{/if}}
</td>
<td>
{{#if screenshot}}&#x1f4f7;{{/if}}
</td>
<td>
<span title="{{linkType.description}}">{{{linkType}}}</span>
</td>
<td>
<span title="{{rank}}%">{{{rankSymbols}}}</span>
</td>
<td>
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
<td>
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
</td>
</tr>
{{/each}}
</table>
<p><b>Note</b>: Because two domains are considered similar does not always mean they're in
cahoots. Similarity is a measure of how often they appear in the same contexts,
which may be an association like peas and carrots, but some pairings are also defined by their
contrasting opposition, like Sparta and Athens.</p>
</div>
{{/if}}
{{#if linking}}
<div id="similar-links">
<h2>Linking Domains</h2>
<table class="similarity-table">
<tr>
<th colspan="3">Meta</th>
<th>Rank</th>
<th>Domain</th>
<th>Similarity</th>
</tr>
{{#each linking}}
<tr>
<td>
{{#if indexed}}
{{#if active}}
<span title="Indexed">&#128064;</span>
{{/if}}
{{#unless active}}
<span title="Problem">&#128293;</span>
{{/unless}}
{{/if}}
</td>
<td>
{{#if screenshot}}&#x1f4f7;{{/if}}
</td>
<td>
<span title="{{linkType.description}}">{{{linkType}}}</span>
</td>
<td>
<span title="{{rank}}%">{{{rankSymbols}}}</span>
</td>
<td>
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
<td>
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
</td>
</tr>
{{/each}}
</table>
</div>
{{/if}}
</div>

View File

@ -0,0 +1,57 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Marginalia Search - {{domain}}</title>
<link rel="stylesheet" href="/serp.css" />
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex" />
</head>
<body>
{{>search/parts/search-header}}
{{>search/parts/search-form}}
{{#with view}}
<nav id="siteinfo-nav">
<h2>{{domain}}</h2>
<ul>
<li {{#if info}}class="current"{{/if}}><a href="?view=info">Info</a></li>
<li {{#if docs}}class="current"{{/if}}>{{#if known}}<a href="?view=docs">Docs</a>{{/if}}{{#unless known}}<a class="link-unavailable" title="This domain is not known by the search engine">Docs</a>{{/unless}}</li>
<li {{#if links}}class="current"{{/if}}><a href="?view=links">Backlinks</a></li>
<li {{#if report}}class="current"{{/if}}>{{#if known}}<a href="?view=report">Report</a>{{/if}}{{#unless known}}<a class="link-unavailable" title="This domain is not known by the search engine">Report</a>{{/unless}}</li>
</ul>
</nav>
{{/with}}
{{#if view.links}}
<div class="infobox">
Showing search results with links to {{domain}}.
</div>
{{#each results}}{{>search/parts/search-result}}{{/each}}
{{/if}}
{{#if view.docs}}
<div class="infobox">
Showing documents found in {{domain}}.
</div>
{{#each results}}{{>search/parts/search-result}}{{/each}}
{{/if}}
{{#if view.report}}
{{>search/site-info/site-info-report}}
{{/if}}
{{#if view.info}}
{{>search/site-info/site-info-summary}}
{{/if}}
{{>search/parts/search-footer}}
</body>

View File

@ -1,38 +0,0 @@
package nu.marginalia.search.command.commands;
import nu.marginalia.search.exceptions.RedirectException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.fail;
class BangCommandTest {
@Test
public void testBang() {
var bc = new BangCommand();
expectRedirectUrl("https://www.google.com/search?q=search+terms", () -> bc.process(null, null, "search terms !g"));
expectNoRedirect(() -> bc.process(null, null, "search terms!g"));
expectNoRedirect(() -> bc.process(null, null, "!gsearch terms"));
expectRedirectUrl("https://www.google.com/search?q=search+terms", () -> bc.process(null, null, "!g search terms"));
}
void expectNoRedirect(Runnable op) {
try {
op.run();
}
catch (RedirectException ex) {
fail("Expected no redirection, but got " + ex.newUrl);
}
}
void expectRedirectUrl(String expectedUrl, Runnable op) {
try {
op.run();
fail("Didn't intercept exception");
}
catch (RedirectException ex) {
Assertions.assertEquals(expectedUrl, ex.newUrl, "Unexpected redirect");
}
}
}

View File

@ -49,6 +49,7 @@ dependencies {
implementation libs.guice
implementation libs.zstd
implementation libs.rxjava
implementation libs.handlebars
implementation libs.trove
implementation libs.spark

View File

@ -0,0 +1,41 @@
package nu.marginalia.control;
import com.github.jknack.handlebars.Handlebars;
import com.github.jknack.handlebars.Helper;
import com.github.jknack.handlebars.Options;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
public class ControlHandlebarsConfigurator implements HandlebarsConfigurator {
@Override
public void configure(Handlebars handlebars) {
handlebars.registerHelper("readableUUID", new UUIDHelper());
}
}
/** Helper for rendering UUIDs in a more readable way */
class UUIDHelper implements Helper<Object> {
@Override
public Object apply(Object context, Options options) {
if (context == null) return "";
String instance = context.toString();
if (instance.length() < 31) return "";
instance = instance.replace("-", "");
String color1 = "#" + instance.substring(0, 6);
String color2 = "#" + instance.substring(6, 12);
String color3 = "#" + instance.substring(12, 18);
String color4 = "#" + instance.substring(18, 24);
String shortName1 = instance.substring(0, 2);
String shortName2 = instance.substring(2, 4);
String shortName3 = instance.substring(4, 6);
String shortName4 = instance.substring(6, 8);
String ret = "<span title=\"%s\">".formatted(context.toString()) +
"<span style=\"text-shadow: 0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color1, shortName1) +
"<span style=\"text-shadow: 0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color2, shortName2) +
"<span style=\"text-shadow: 0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color3, shortName3) +
"<span style=\"text-shadow :0 0 0.2ch %s; font-family: monospace;\">%s</span>".formatted(color4, shortName4);
return ret;
}
}

View File

@ -3,10 +3,13 @@ package nu.marginalia.control;
import com.google.inject.AbstractModule;
import com.google.inject.Module;
import com.google.inject.name.Names;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
import java.nio.file.Path;
public class ControlProcessModule extends AbstractModule {
@Override
protected void configure() {}
protected void configure() {
bind(HandlebarsConfigurator.class).to(ControlHandlebarsConfigurator.class);
}
}

View File

@ -4,7 +4,6 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.db.DomainTypes;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import spark.Request;
import spark.Response;

1
run/env/service.env vendored
View File

@ -6,3 +6,4 @@ CONVERTER_PROCESS_OPTS="-Dservice-name=converter -Dservice-host=0.0.0.0"
CRAWLER_PROCESS_OPTS="-Dservice-name=crawler -Dservice-host=0.0.0.0"
LOADER_PROCESS_OPTS="-Dservice-name=loader -Dservice-host=0.0.0.0"
INDEX_CONSTRUCTION_PROCESS_OPTS="-Dservice-name=index-constructor -Djava.util.concurrent.ForkJoinPool.common.parallelism=4"
SEARCH_SERVICE_OPTS="-Dwebsite-url=http://localhost:8080"