(search) Add RSS Feeds to site info

This change integrates the Feedlot RSS Bot with Marginalia's site info view to offer a preview of the latest updates.

 The change introduces a new tiny feature that is a feedlot-client based on Java's HttpClient.
This commit is contained in:
Viktor Lofgren 2023-12-26 16:21:40 +01:00
parent 4763077b76
commit 1694e9c78c
12 changed files with 237 additions and 49 deletions

View File

@ -0,0 +1,22 @@
plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.gson
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@ -0,0 +1,20 @@
Client for [FeedlotTheFeedBot](https://github.com/MarginaliaSearch/FeedLotTheFeedBot),
the RSS/Atom feed fetcher and cache for Marginalia Search.
This service is external to the Marginalia Search codebase,
as it is not a core part of the search engine and has other
utilities.
## Example
```java
import java.time.Duration;
var client = new FeedlotClient("localhost", 8080,
gson,
Duration.ofMillis(100), // connect timeout
Duration.ofMillis(100)); // request timeout
CompleteableFuture<FeedItems> items = client.getFeedItems("www.marginalia.nu");
```

View File

@ -0,0 +1,58 @@
package nu.marginalia.feedlot;
import com.google.gson.Gson;
import nu.marginalia.feedlot.model.FeedItems;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.concurrent.Executors;
import java.util.concurrent.CompletableFuture;
public class FeedlotClient {
private final String feedlotHost;
private final int feedlotPort;
private final Gson gson;
private final HttpClient httpClient;
private final Duration requestTimeout;
public FeedlotClient(String feedlotHost,
int feedlotPort,
Gson gson,
Duration connectTimeout,
Duration requestTimeout
)
{
this.feedlotHost = feedlotHost;
this.feedlotPort = feedlotPort;
this.gson = gson;
httpClient = HttpClient.newBuilder()
.executor(Executors.newVirtualThreadPerTaskExecutor())
.connectTimeout(connectTimeout)
.build();
this.requestTimeout = requestTimeout;
}
public CompletableFuture<FeedItems> getFeedItems(String domainName) {
return httpClient.sendAsync(
HttpRequest.newBuilder()
.uri(URI.create("http://%s:%d/feed/%s".formatted(feedlotHost, feedlotPort, domainName)))
.GET()
.timeout(requestTimeout)
.build(),
HttpResponse.BodyHandlers.ofString()
).thenApply(HttpResponse::body)
.thenApply(this::parseFeedItems);
}
private FeedItems parseFeedItems(String s) {
return gson.fromJson(s, FeedItems.class);
}
public void stop() {
httpClient.close();
}
}

View File

@ -0,0 +1,12 @@
package nu.marginalia.feedlot.model;
public record FeedItem(String title, String date, String description, String url) {
public String pubDay() { // Extract the date from an ISO style date string
if (date.length() > 10) {
return date.substring(0, 10);
}
return date;
}
}

View File

@ -0,0 +1,6 @@
package nu.marginalia.feedlot.model;
import java.util.List;
public record FeedItems(String domain, String feedUrl, String updated, List<FeedItem> items) {
}

View File

@ -47,6 +47,7 @@ dependencies {
implementation project(':code:features-search:screenshots')
implementation project(':code:features-search:random-websites')
implementation project(':code:features-search:feedlot-client')
implementation libs.bundles.slf4j

View File

@ -1,10 +1,15 @@
package nu.marginalia.search;
import com.google.inject.AbstractModule;
import com.google.inject.Provides;
import nu.marginalia.LanguageModels;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.WmsaHome;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
import nu.marginalia.feedlot.FeedlotClient;
import java.time.Duration;
public class SearchModule extends AbstractModule {
@ -17,4 +22,14 @@ public class SearchModule extends AbstractModule {
System.getProperty("website-url", "https://search.marginalia.nu/")));
}
@Provides
public FeedlotClient provideFeedlotClient() {
return new FeedlotClient(
System.getProperty("ext-svc-feedlot-host", "feedlot"),
Integer.getInteger("ext-svc-feedlot-port", 80),
GsonFactory.get(),
Duration.ofMillis(250),
Duration.ofMillis(100)
);
}
}

View File

@ -5,13 +5,17 @@ import nu.marginalia.assistant.client.AssistantClient;
import nu.marginalia.assistant.client.model.SimilarDomain;
import nu.marginalia.client.Context;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.feedlot.model.FeedItems;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.assistant.client.model.DomainInformation;
import nu.marginalia.feedlot.FeedlotClient;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
@ -21,19 +25,23 @@ import java.util.List;
import java.util.Map;
public class SearchSiteInfoService {
private static final Logger logger = LoggerFactory.getLogger(SearchSiteInfoService.class);
private final SearchOperator searchOperator;
private final AssistantClient assistantClient;
private final SearchFlagSiteService flagSiteService;
private final DbDomainQueries domainQueries;
private final MustacheRenderer<Object> renderer;
private final FeedlotClient feedlotClient;
@Inject
public SearchSiteInfoService(SearchOperator searchOperator,
AssistantClient assistantClient,
RendererFactory rendererFactory,
SearchFlagSiteService flagSiteService,
DbDomainQueries domainQueries) throws IOException {
DbDomainQueries domainQueries,
FeedlotClient feedlotClient) throws IOException
{
this.searchOperator = searchOperator;
this.assistantClient = assistantClient;
this.flagSiteService = flagSiteService;
@ -41,6 +49,7 @@ public class SearchSiteInfoService {
this.renderer = rendererFactory.renderer("search/site-info/site-info");
this.feedlotClient = feedlotClient;
}
public Object handle(Request request, Response response) throws SQLException {
@ -121,6 +130,8 @@ public class SearchSiteInfoService {
final List<SimilarDomain> linkingDomains;
String url = "https://" + domainName + "/";;
var feedItemsFuture = feedlotClient.getFeedItems(domainName);
if (domainId < 0 || !assistantClient.isAccepting()) {
domainInfo = createDummySiteInfo(domainName);
similarSet = List.of();
@ -141,12 +152,20 @@ public class SearchSiteInfoService {
}
}
FeedItems feedItems = null;
try {
feedItems = feedItemsFuture.get();
} catch (Exception e) {
logger.debug("Failed to get feed items for {}: {}", domainName, e.getMessage());
}
return new SiteInfoWithContext(domainName,
domainId,
url,
domainInfo,
similarSet,
linkingDomains
linkingDomains,
feedItems
);
}
@ -200,13 +219,16 @@ public class SearchSiteInfoService {
String siteUrl,
DomainInformation domainInformation,
List<SimilarDomain> similar,
List<SimilarDomain> linking) {
List<SimilarDomain> linking,
FeedItems feed
) {
public SiteInfoWithContext(String domain,
long domainId,
String siteUrl,
DomainInformation domainInformation,
List<SimilarDomain> similar,
List<SimilarDomain> linking
List<SimilarDomain> linking,
FeedItems feedInfo
)
{
this(Map.of("info", true),
@ -216,7 +238,8 @@ public class SearchSiteInfoService {
siteUrl,
domainInformation,
similar,
linking);
linking,
feedInfo);
}
public String getLayout() {

View File

@ -0,0 +1,17 @@
<?xml version="1.0"?>
<!-- CC0 -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Capa_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
viewBox="0 0 455.731 455.731" xml:space="preserve">
<g>
<rect x="0" y="0" style="fill:#F78422;" width="455.731" height="455.731"/>
<g>
<path style="fill:#FFFFFF;" d="M296.208,159.16C234.445,97.397,152.266,63.382,64.81,63.382v64.348
c70.268,0,136.288,27.321,185.898,76.931c49.609,49.61,76.931,115.63,76.931,185.898h64.348
C391.986,303.103,357.971,220.923,296.208,159.16z"/>
<path style="fill:#FFFFFF;" d="M64.143,172.273v64.348c84.881,0,153.938,69.056,153.938,153.939h64.348
C282.429,270.196,184.507,172.273,64.143,172.273z"/>
<circle style="fill:#FFFFFF;" cx="109.833" cy="346.26" r="46.088"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 891 B

View File

@ -0,0 +1,10 @@
{{#with feed}}
<h2><a title="Atom/RSS feed" target="external" href="{{feedUrl}}"><img width="16" height="16" src="/rss.svg"></a> Feed (Experimental)</h2>
<dl>
{{#each items}}
<dt><a href="{{url}}" rel="external noopener ugc">{{title}}</a></dt>
<dd><date>{{pubDay}}</date><br>{{{description}}}</dd>
{{/each}}
</dl>
{{/with}}

View File

@ -12,11 +12,58 @@
<img class="screenshot" width="300" height="225" src="/screenshot/{{domainId}}" alt="Screenshot of {{domain}}" />
</a>
{{#with domainInformation}}
{{> search/site-info/site-info-feed}}
{{> search/site-info/site-info-index}}
{{> search/site-info/site-info-links}}
{{/with}}
</div>
{{#if linking}}
<div id="similar-links">
<h2>Linking Domains</h2>
<table class="similarity-table">
<tr>
<th colspan="3">Meta</th>
<th>Rank</th>
<th>Domain</th>
<th>Similarity</th>
</tr>
{{#each linking}}
<tr>
<td>
{{#if indexed}}
{{#if active}}
<span title="Indexed">&#128064;</span>
{{/if}}
{{#unless active}}
<span title="Problem">&#128293;</span>
{{/unless}}
{{/if}}
</td>
<td>
{{#if screenshot}}&#x1f4f7;{{/if}}
</td>
<td>
{{#if linkType.isLinked}}
<span title="{{linkType.description}}"><a href="/crosstalk/?domains={{domain}},{{url.domain}}">{{{linkType}}}</a></span>
{{/if}}
</td>
<td>
<span title="{{rank}}%">{{{rankSymbols}}}</span>
</td>
<td>
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
<td>
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
</td>
</tr>
{{/each}}
</table>
</div>
{{/if}}
{{#if similar}}
<div id="similar-domains">
<h2>Similar Domains</h2>
@ -67,48 +114,4 @@
</div>
{{/if}}
{{#if linking}}
<div id="similar-links">
<h2>Linking Domains</h2>
<table class="similarity-table">
<tr>
<th colspan="3">Meta</th>
<th>Rank</th>
<th>Domain</th>
<th>Similarity</th>
</tr>
{{#each linking}}
<tr>
<td>
{{#if indexed}}
{{#if active}}
<span title="Indexed">&#128064;</span>
{{/if}}
{{#unless active}}
<span title="Problem">&#128293;</span>
{{/unless}}
{{/if}}
</td>
<td>
{{#if screenshot}}&#x1f4f7;{{/if}}
</td>
<td>
{{#if linkType.isLinked}}
<span title="{{linkType.description}}"><a href="/crosstalk/?domains={{domain}},{{url.domain}}">{{{linkType}}}</a></span>
{{/if}}
</td>
<td>
<span title="{{rank}}%">{{{rankSymbols}}}</span>
</td>
<td>
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
<td>
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
</td>
</tr>
{{/each}}
</table>
</div>
{{/if}}
</div>

View File

@ -28,6 +28,7 @@ include 'code:libraries:message-queue'
include 'code:features-search:screenshots'
include 'code:features-search:random-websites'
include 'code:features-search:feedlot-client'
include 'code:features-qs:query-parser'
include 'code:features-index:result-ranking'