Serve assets from search service instead of resource-store,
dynamically render index for future goodies, css tweaks.
This commit is contained in:
parent
db4cf70784
commit
ee0580273e
@ -3,7 +3,7 @@ server {
|
||||
listen [::]:80;
|
||||
server_name nginx;
|
||||
|
||||
location /search {
|
||||
location / {
|
||||
if ( $request_method = POST ) {
|
||||
return 444;
|
||||
}
|
||||
@ -14,12 +14,7 @@ server {
|
||||
proxy_set_header X-Extern-Domain $scheme://$host;
|
||||
proxy_set_header X-User-Agent $http_user_agent;
|
||||
|
||||
proxy_pass http://edge-search:5023/public/search;
|
||||
tcp_nodelay on;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_pass http://edge-search:5023/;
|
||||
proxy_pass http://edge-search:5023/public/;
|
||||
tcp_nodelay on;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,40 @@
|
||||
package nu.marginalia.wmsa.edge.search.command;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
|
||||
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
|
||||
import nu.marginalia.wmsa.edge.search.model.BrowseResultSet;
|
||||
import nu.marginalia.wmsa.edge.search.results.BrowseResultCleaner;
|
||||
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
|
||||
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@Singleton
|
||||
public class IndexCommand {
|
||||
|
||||
private final EdgeDataStoreDao dataStoreDao;
|
||||
private final BrowseResultCleaner browseResultCleaner;
|
||||
private final MustacheRenderer<BrowseResultSet> template;
|
||||
private final EdgeDomainBlacklist blacklist;
|
||||
@Inject
|
||||
public IndexCommand(EdgeDataStoreDao dataStoreDao, RendererFactory rendererFactory, BrowseResultCleaner browseResultCleaner, EdgeDomainBlacklist blacklist) throws IOException {
|
||||
this.dataStoreDao = dataStoreDao;
|
||||
this.browseResultCleaner = browseResultCleaner;
|
||||
|
||||
template = rendererFactory.renderer("edge/index");
|
||||
this.blacklist = blacklist;
|
||||
}
|
||||
|
||||
public String render(Request request, Response response) {
|
||||
response.header("Cache-control", "public,max-age=3600");
|
||||
|
||||
var results = dataStoreDao.getRandomDomains(5, blacklist, 0);
|
||||
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||
|
||||
return template.render(new BrowseResultSet(results.stream().limit(1).toList()));
|
||||
}
|
||||
}
|
@ -22,7 +22,6 @@ import spark.Spark;
|
||||
import spark.resource.ClassPathResource;
|
||||
import spark.staticfiles.MimeType;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.net.URLEncoder;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
@ -35,6 +34,7 @@ public class ResourceStoreService extends Service {
|
||||
|
||||
private final AuthClient authClient;
|
||||
private final ResourceEntityStore resourceStore;
|
||||
private StaticResources staticResources;
|
||||
|
||||
@Inject
|
||||
public ResourceStoreService(@Named("service-host") String ip,
|
||||
@ -42,11 +42,13 @@ public class ResourceStoreService extends Service {
|
||||
AuthClient authClient,
|
||||
ResourceEntityStore resourceStore,
|
||||
Initialization initialization,
|
||||
MetricsServer metricsServer
|
||||
MetricsServer metricsServer,
|
||||
StaticResources staticResources
|
||||
) {
|
||||
super(ip, port, initialization, metricsServer);
|
||||
this.authClient = authClient;
|
||||
this.resourceStore = resourceStore;
|
||||
this.staticResources = staticResources;
|
||||
|
||||
Schedulers.io().schedulePeriodicallyDirect(resourceStore::reapStaleResources,
|
||||
5, 5, TimeUnit.MINUTES);
|
||||
@ -109,12 +111,9 @@ public class ResourceStoreService extends Service {
|
||||
|
||||
return serveDynamic(data, request, response);
|
||||
}
|
||||
else if (serveStatic(domain + "/" + resource, request, response)) {
|
||||
logger.info("getResource({}/{}, static)", domain, resource);
|
||||
}
|
||||
else {
|
||||
logger.info("Could not serve {}/{}", domain, resource);
|
||||
Spark.halt(404, "Not Found");
|
||||
logger.info("getResource({}/{}, static)", domain, resource);
|
||||
staticResources.serveStatic(domain, resource, request, response);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
@ -138,19 +137,7 @@ public class ResourceStoreService extends Service {
|
||||
return data.data;
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private boolean serveStatic(String path, Request req, Response rsp) {
|
||||
try {
|
||||
ClassPathResource resource = new ClassPathResource("static/" + path);
|
||||
handleEtagStatic(resource, req, rsp);
|
||||
resource.getInputStream().transferTo(rsp.raw().getOutputStream());
|
||||
}
|
||||
catch (IllegalArgumentException|FileNotFoundException ex) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private void handleEtag(RenderedResource page, Request req, Response rsp) {
|
||||
|
@ -0,0 +1,46 @@
|
||||
package nu.marginalia.wmsa.resource_store;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
import spark.resource.ClassPathResource;
|
||||
import spark.staticfiles.MimeType;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
|
||||
public class StaticResources {
|
||||
private final long startTime = LocalDateTime.now().toEpochSecond(ZoneOffset.UTC);
|
||||
|
||||
@SneakyThrows
|
||||
public void serveStatic(String domain, String path, Request req, Response rsp) {
|
||||
try {
|
||||
ClassPathResource resource = new ClassPathResource("static/" + domain + "/" + path);
|
||||
handleEtagStatic(resource, req, rsp);
|
||||
resource.getInputStream().transferTo(rsp.raw().getOutputStream());
|
||||
}
|
||||
catch (IllegalArgumentException | FileNotFoundException ex) {
|
||||
Spark.halt(404);
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private void handleEtagStatic(ClassPathResource resource, Request req, Response rsp) {
|
||||
rsp.header("Cache-Control", "public,max-age=3600");
|
||||
rsp.type(MimeType.fromResource(resource));
|
||||
|
||||
final String etag = staticResourceEtag(resource.getFilename());
|
||||
|
||||
if (etag.equals(req.headers("If-None-Match"))) {
|
||||
Spark.halt(304);
|
||||
}
|
||||
|
||||
rsp.header("ETag", etag);
|
||||
}
|
||||
|
||||
private String staticResourceEtag(String resource) {
|
||||
return "\"" + resource.hashCode() + "-" + startTime + "\"";
|
||||
}
|
||||
}
|
@ -7,6 +7,12 @@ body {
|
||||
background-color: #f8f8ee;
|
||||
}
|
||||
|
||||
.rightbox {
|
||||
float: right;
|
||||
display: block;
|
||||
max-width: 40ch;
|
||||
clear: both;
|
||||
}
|
||||
|
||||
.sticker {
|
||||
ruby-position: under;
|
||||
@ -70,6 +76,9 @@ ul.semantic-results a {
|
||||
|
||||
article > section > p { display: none; }
|
||||
|
||||
.cards.big .card { flex-grow: 1 }
|
||||
.cards.big { padding-right: 1ch; }
|
||||
|
||||
.w3m-helper {
|
||||
display: none;
|
||||
}
|
||||
@ -296,6 +305,7 @@ select {
|
||||
}
|
||||
|
||||
footer {
|
||||
clear: both;
|
||||
padding: 2ch;
|
||||
margin: 16ch 0px 0px 0px;
|
||||
background-color: #acae89;
|
||||
@ -337,7 +347,7 @@ a.underline {
|
||||
}
|
||||
|
||||
@media only screen and (max-device-width: 1024px) {
|
||||
|
||||
.rightbox { width: 30ch !important; }
|
||||
.card {
|
||||
margin-right: 2ch;
|
||||
}
|
||||
@ -355,6 +365,7 @@ a.underline {
|
||||
}
|
||||
|
||||
@media only screen and (max-device-width: 800px) {
|
||||
.rightbox { display: none; }
|
||||
.search-box {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
@ -0,0 +1,12 @@
|
||||
<section class="card browse-result rightbox">
|
||||
<h2>{{url.domain}}</h2>
|
||||
|
||||
<a href="{{url.proto}}://{{url.domain}}/">
|
||||
<img src="/screenshot/{{domainId}}" title="{{description}}" loading="lazy"/>
|
||||
</a>
|
||||
|
||||
<div class="utils">
|
||||
<a href="/site/{{url.domain}}">Info</a>
|
||||
<a href="/explore/{{url.domain}}">Similar Domains</a>
|
||||
</div>
|
||||
</section>
|
130
marginalia_nu/src/main/resources/templates/edge/index.hdb
Normal file
130
marginalia_nu/src/main/resources/templates/edge/index.hdb
Normal file
@ -0,0 +1,130 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Marginalia Search}</title>
|
||||
|
||||
<link rel="stylesheet" href="/style-new.css" />
|
||||
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="robots" content="noindex" />
|
||||
|
||||
<meta property="og:description" content="search.marginalia.nu is a small independent do-it-yourself search engine for surprising but content-rich websites that never ask you to accept cookies or subscribe to newsletters. The goal is to bring you the sort of grass fed, free range HTML your grandma used to write. " />
|
||||
<meta property="og:locale" content="en_US" />
|
||||
<meta property="og:site_name" content="search.marginalia.nu" />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://search.marginalia.nu/" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<!-- Hi there, fellow human being :-) -->
|
||||
|
||||
{{>edge/parts/search-header}}
|
||||
|
||||
<article>
|
||||
{{>edge/parts/search-form}}
|
||||
|
||||
<section class="card rightbox">
|
||||
<h2>Publicity, Discussion and Events</h2>
|
||||
<div class="info">
|
||||
<dl>
|
||||
<dt><a href="https://www.deutschlandfunkkultur.de/google-suche-100.html">Kritik an Googles Suche - Platzhirsch auf dem Nebenschauplatz</a></dt>
|
||||
<dd>Deutschlandfunk Kultur 🇩🇪, 2022-08-18</dd>
|
||||
<dt><a href="https://news.ycombinator.com/item?id=31536626" rel="nofollow">Marginalia Goes Open Source</a></dt>
|
||||
<dd>Hacker News, 2022-05-28</dd>
|
||||
<dt><a href="https://www.youtube.com/watch?v=rTSEr0cRJY8" rel="nofollow">You Should Check Out the Indie Web</a> 🎞️</dt>
|
||||
<dd>YouTube, You've Got Kat, 2022-03-15 </dd>
|
||||
<dt>
|
||||
<a href="https://www.newyorker.com/culture/infinite-scroll/what-google-search-isnt-showing-you" rel="nofollow">What Google Search Isn't Showing You</a>
|
||||
</dt>
|
||||
<dd>The New Yorker 🎩, 2022-03-10</dd>
|
||||
<dt>
|
||||
<a href="https://www.metafilter.com/194653/Marginalia-Search-Serendipity-Engineering" rel="nofollow">Marginalia Search - Serendipity Engineering</a>
|
||||
</dt>
|
||||
<dd>MetaFilter, 2022-03-09</dd>
|
||||
<dt>
|
||||
🎂 <a href="https://memex.marginalia.nu/log/49-marginalia-1-year.gmi">First anniversary</a>! 🎊
|
||||
</dt>
|
||||
<dd>
|
||||
2022-02-26
|
||||
</dd>
|
||||
<dt>
|
||||
<a href="https://onezero.medium.com/a-search-engine-designed-to-surprise-you-b81944ed5c06" rel="nofollow">A Search Engine Designed To Surprise You</a>
|
||||
</dt>
|
||||
<dd>Clive Thompson OneZero, 2021-09-16</dd>
|
||||
<dt>
|
||||
<a href="https://news.ycombinator.com/item?id=28550764" rel="nofollow"> A search engine that favors text-heavy sites and punishes modern web design</a>
|
||||
</dt>
|
||||
<dd>
|
||||
Hacker News, 2021-09-16
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<div class="cards big">
|
||||
<section class="card">
|
||||
<h2>About</h2>
|
||||
<div class="info">
|
||||
<p>This is an independent DIY search engine that focuses on non-commercial content, and attempts to
|
||||
show you sites you perhaps weren't aware of in favor of the sort of sites you probably already knew
|
||||
existed. </p>
|
||||
<p>
|
||||
The software for this search engine is all custom-built, and all crawling and indexing is
|
||||
done in-house. The project is open source. Feel free to poke about in the <a
|
||||
href="https://git.marginalia.nu/marginalia/marginalia.nu">source code</a> or contribute
|
||||
to the development!
|
||||
</p>
|
||||
<p>Consider <a href="https://memex.marginalia.nu/projects/edge/supporting.gmi">supporting the
|
||||
project</a>!</p>
|
||||
</div>
|
||||
<div class="utils">
|
||||
<a href="https://memex.marginalia.nu/projects/edge/about.gmi">Read More</a>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="card">
|
||||
<h2>Tips</h2>
|
||||
<div class="info">
|
||||
<p>
|
||||
This search engine isn't particularly well equipped to answering queries
|
||||
posed like questions, instead try to imagine some text that might appear
|
||||
in the website you are looking for, and search for that.</p>
|
||||
<p>
|
||||
Where this search engine really shines is finding small, old and obscure websites about some
|
||||
given topic, perhaps
|
||||
<a href="https://search.marginalia.nu/search?query=commander+keen&profile=yolo&js=default">old video games</a>,
|
||||
<a href="https://search.marginalia.nu/search?query=voynich+&profile=yolo&js=default">a mystery</a>,
|
||||
<a href="https://search.marginalia.nu/search?query=augustine+confessions&profile=yolo&js=default">theology</a>,
|
||||
<a href="https://search.marginalia.nu/search?query=Hermes+Trismegistus&profile=yolo&js=default">the occult</a>,
|
||||
<a href="https://search.marginalia.nu/search?query=knitting&profile=yolo&js=default">knitting</a>,
|
||||
<a href="https://search.marginalia.nu/search?query=scc+graph+algorithm&profile=yolo&js=default">computer science</a>,
|
||||
or <a href="https://search.marginalia.nu/search?query=salvador+dali&profile=yolo&js=default">art</a>.
|
||||
</p>
|
||||
|
||||
</div>
|
||||
<div class="utils">
|
||||
<a href="https://memex.marginalia.nu/projects/edge/search-tips.gmi">Additional Tips</a>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
<section class="card">
|
||||
<h2>Updates</h2>
|
||||
<div class="info">
|
||||
<p>☛ A recipe filter has been added to the algorithm selector.</p>
|
||||
<p>☛ The <a href="https://search.marginalia.nu/explore/random">Random Mode</a> has been overhauled, and is
|
||||
quite entertaining. I encourage you to give it a spin. </p>
|
||||
<p>☛ A simple <a href="https://api.marginalia.nu/">public API</a> is now available.</p>
|
||||
</div>
|
||||
<div class="utils">
|
||||
<a href="https://memex.marginalia.nu/projects/edge/changelog.gmi">Change Log</a>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
</div>
|
||||
</article>
|
||||
|
||||
{{>edge/parts/search-footer}}
|
||||
</body>
|
@ -18,7 +18,9 @@
|
||||
|
||||
<article>
|
||||
{{>edge/parts/search-form}}
|
||||
<hr class="w3m-helper" />
|
||||
|
||||
{{#each domainResults}}{{>edge/browse-result-rb}}{{/each}}
|
||||
|
||||
<section class="cards">
|
||||
{{#if maintenanceMessage}}<section class="card problems onlyscreen"><h2>Maintenance</h2><p class="description">{{maintenanceMessage}}</p></section>{{/if}}
|
||||
{{#if evalResult}}<section class="card semantic onlyscreen"><h2>Evaluation</h2><p class="description">{{query}} = {{evalResult}}</p><hr class="w3m-helper" /></section>{{/if}}
|
||||
@ -37,7 +39,6 @@
|
||||
</section>
|
||||
{{/if}}
|
||||
|
||||
{{#each domainResults}}{{>edge/browse-result}}{{/each}}
|
||||
{{#each results}}{{>edge/search-result}}{{/each}}
|
||||
|
||||
{{#unless evalResult}}{{#if problems}}<section class="card problems onlyscreen"><h2>Suggestions</h2><ul class="onlyscreen search-problems">{{#each problems}}<li>{{{.}}}</li>{{/each}}</ul></section> {{/if}}{{/unless}}
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.wmsa.edge.crawling;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.wmsa.edge.crawling.retreival.HttpFetcher;
|
||||
import nu.marginalia.wmsa.edge.crawling.retreival.HttpRedirectResolver;
|
||||
import nu.marginalia.wmsa.edge.crawling.retreival.RateLimitException;
|
||||
import nu.marginalia.wmsa.edge.crawling.retreival.logic.ContentTypeLogic;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
@ -27,14 +28,14 @@ class HttpFetcherTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void fetchUTF8() throws URISyntaxException {
|
||||
void fetchUTF8() throws URISyntaxException, RateLimitException {
|
||||
var fetcher = new HttpFetcher("nu.marginalia.edge-crawler");
|
||||
var str = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu"));
|
||||
System.out.println(str.contentType);
|
||||
}
|
||||
|
||||
@Test
|
||||
void fetchText() throws URISyntaxException {
|
||||
void fetchText() throws URISyntaxException, RateLimitException {
|
||||
var fetcher = new HttpFetcher("nu.marginalia.edge-crawler");
|
||||
var str = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu/robots.txt"));
|
||||
System.out.println(str);
|
||||
|
@ -14,7 +14,6 @@ import org.slf4j.LoggerFactory;
|
||||
import spark.Spark;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.LocalDateTime;
|
||||
@ -41,7 +40,7 @@ class ResourceStoreServiceTest {
|
||||
tempDir = Files.createTempDirectory("ResourceStoreServiceTest");
|
||||
resourceStore = new ResourceEntityStore(tempDir);
|
||||
service = new ResourceStoreService("127.0.0.1", testPort, null,
|
||||
resourceStore, new Initialization(), null);
|
||||
resourceStore, new Initialization(), null, new StaticResources());
|
||||
|
||||
Spark.awaitInitialization();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user