Merge pull request #67 from MarginaliaSearch/rss-feeds-in-site-info
Add RSS Feeds to site info (WIP)
This commit is contained in:
commit
775974d5ec
22
code/features-search/feedlot-client/build.gradle
Normal file
22
code/features-search/feedlot-client/build.gradle
Normal file
@ -0,0 +1,22 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(21))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.notnull
|
||||
implementation libs.gson
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
}
|
20
code/features-search/feedlot-client/readme.md
Normal file
20
code/features-search/feedlot-client/readme.md
Normal file
@ -0,0 +1,20 @@
|
||||
Client for [FeedlotTheFeedBot](https://github.com/MarginaliaSearch/FeedLotTheFeedBot),
|
||||
the RSS/Atom feed fetcher and cache for Marginalia Search.
|
||||
|
||||
This service is external to the Marginalia Search codebase,
|
||||
as it is not a core part of the search engine and has other
|
||||
utilities.
|
||||
|
||||
## Example
|
||||
|
||||
```java
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
var client = new FeedlotClient("localhost", 8080,
|
||||
gson,
|
||||
Duration.ofMillis(100), // connect timeout
|
||||
Duration.ofMillis(100)); // request timeout
|
||||
|
||||
CompleteableFuture<FeedItems> items = client.getFeedItems("www.marginalia.nu");
|
||||
```
|
@ -0,0 +1,58 @@
|
||||
package nu.marginalia.feedlot;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import nu.marginalia.feedlot.model.FeedItems;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.time.Duration;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
public class FeedlotClient {
|
||||
private final String feedlotHost;
|
||||
private final int feedlotPort;
|
||||
private final Gson gson;
|
||||
private final HttpClient httpClient;
|
||||
private final Duration requestTimeout;
|
||||
|
||||
public FeedlotClient(String feedlotHost,
|
||||
int feedlotPort,
|
||||
Gson gson,
|
||||
Duration connectTimeout,
|
||||
Duration requestTimeout
|
||||
)
|
||||
{
|
||||
this.feedlotHost = feedlotHost;
|
||||
this.feedlotPort = feedlotPort;
|
||||
this.gson = gson;
|
||||
|
||||
httpClient = HttpClient.newBuilder()
|
||||
.executor(Executors.newVirtualThreadPerTaskExecutor())
|
||||
.connectTimeout(connectTimeout)
|
||||
.build();
|
||||
this.requestTimeout = requestTimeout;
|
||||
}
|
||||
|
||||
public CompletableFuture<FeedItems> getFeedItems(String domainName) {
|
||||
return httpClient.sendAsync(
|
||||
HttpRequest.newBuilder()
|
||||
.uri(URI.create("http://%s:%d/feed/%s".formatted(feedlotHost, feedlotPort, domainName)))
|
||||
.GET()
|
||||
.timeout(requestTimeout)
|
||||
.build(),
|
||||
HttpResponse.BodyHandlers.ofString()
|
||||
).thenApply(HttpResponse::body)
|
||||
.thenApply(this::parseFeedItems);
|
||||
}
|
||||
|
||||
private FeedItems parseFeedItems(String s) {
|
||||
return gson.fromJson(s, FeedItems.class);
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
httpClient.close();
|
||||
}
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package nu.marginalia.feedlot.model;
|
||||
|
||||
public record FeedItem(String title, String date, String description, String url) {
|
||||
|
||||
public String pubDay() { // Extract the date from an ISO style date string
|
||||
if (date.length() > 10) {
|
||||
return date.substring(0, 10);
|
||||
}
|
||||
return date;
|
||||
}
|
||||
|
||||
public String descriptionSafe() {
|
||||
return description
|
||||
.replace("<", "<")
|
||||
.replace(">", ">");
|
||||
}
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
package nu.marginalia.feedlot.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record FeedItems(String domain, String feedUrl, String updated, List<FeedItem> items) {
|
||||
}
|
@ -47,6 +47,7 @@ dependencies {
|
||||
|
||||
implementation project(':code:features-search:screenshots')
|
||||
implementation project(':code:features-search:random-websites')
|
||||
implementation project(':code:features-search:feedlot-client')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
|
@ -1,10 +1,15 @@
|
||||
package nu.marginalia.search;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.WebsiteUrl;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.renderer.config.HandlebarsConfigurator;
|
||||
import nu.marginalia.feedlot.FeedlotClient;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
public class SearchModule extends AbstractModule {
|
||||
|
||||
@ -17,4 +22,14 @@ public class SearchModule extends AbstractModule {
|
||||
System.getProperty("website-url", "https://search.marginalia.nu/")));
|
||||
}
|
||||
|
||||
@Provides
|
||||
public FeedlotClient provideFeedlotClient() {
|
||||
return new FeedlotClient(
|
||||
System.getProperty("ext-svc-feedlot-host", "feedlot"),
|
||||
Integer.getInteger("ext-svc-feedlot-port", 80),
|
||||
GsonFactory.get(),
|
||||
Duration.ofMillis(250),
|
||||
Duration.ofMillis(100)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -5,13 +5,17 @@ import nu.marginalia.assistant.client.AssistantClient;
|
||||
import nu.marginalia.assistant.client.model.SimilarDomain;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.feedlot.model.FeedItems;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.renderer.MustacheRenderer;
|
||||
import nu.marginalia.renderer.RendererFactory;
|
||||
import nu.marginalia.search.SearchOperator;
|
||||
import nu.marginalia.assistant.client.model.DomainInformation;
|
||||
import nu.marginalia.feedlot.FeedlotClient;
|
||||
import nu.marginalia.search.model.UrlDetails;
|
||||
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
|
||||
@ -21,19 +25,23 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class SearchSiteInfoService {
|
||||
private static final Logger logger = LoggerFactory.getLogger(SearchSiteInfoService.class);
|
||||
|
||||
private final SearchOperator searchOperator;
|
||||
private final AssistantClient assistantClient;
|
||||
private final SearchFlagSiteService flagSiteService;
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final MustacheRenderer<Object> renderer;
|
||||
private final FeedlotClient feedlotClient;
|
||||
|
||||
@Inject
|
||||
public SearchSiteInfoService(SearchOperator searchOperator,
|
||||
AssistantClient assistantClient,
|
||||
RendererFactory rendererFactory,
|
||||
SearchFlagSiteService flagSiteService,
|
||||
DbDomainQueries domainQueries) throws IOException {
|
||||
DbDomainQueries domainQueries,
|
||||
FeedlotClient feedlotClient) throws IOException
|
||||
{
|
||||
this.searchOperator = searchOperator;
|
||||
this.assistantClient = assistantClient;
|
||||
this.flagSiteService = flagSiteService;
|
||||
@ -41,6 +49,7 @@ public class SearchSiteInfoService {
|
||||
|
||||
this.renderer = rendererFactory.renderer("search/site-info/site-info");
|
||||
|
||||
this.feedlotClient = feedlotClient;
|
||||
}
|
||||
|
||||
public Object handle(Request request, Response response) throws SQLException {
|
||||
@ -121,6 +130,7 @@ public class SearchSiteInfoService {
|
||||
final List<SimilarDomain> linkingDomains;
|
||||
String url = "https://" + domainName + "/";;
|
||||
|
||||
var feedItemsFuture = feedlotClient.getFeedItems(domainName);
|
||||
if (domainId < 0 || !assistantClient.isAccepting()) {
|
||||
domainInfo = createDummySiteInfo(domainName);
|
||||
similarSet = List.of();
|
||||
@ -134,11 +144,18 @@ public class SearchSiteInfoService {
|
||||
linkingDomains = assistantClient
|
||||
.linkedDomains(ctx, domainId, 100)
|
||||
.blockingFirst();
|
||||
}
|
||||
|
||||
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(ctx, domainName, 1);
|
||||
if (!sampleResults.isEmpty()) {
|
||||
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
||||
}
|
||||
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(ctx, domainName, 5);
|
||||
if (!sampleResults.isEmpty()) {
|
||||
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
||||
}
|
||||
|
||||
FeedItems feedItems = null;
|
||||
try {
|
||||
feedItems = feedItemsFuture.get();
|
||||
} catch (Exception e) {
|
||||
logger.debug("Failed to get feed items for {}: {}", domainName, e.getMessage());
|
||||
}
|
||||
|
||||
return new SiteInfoWithContext(domainName,
|
||||
@ -146,7 +163,9 @@ public class SearchSiteInfoService {
|
||||
url,
|
||||
domainInfo,
|
||||
similarSet,
|
||||
linkingDomains
|
||||
linkingDomains,
|
||||
feedItems,
|
||||
sampleResults
|
||||
);
|
||||
}
|
||||
|
||||
@ -200,13 +219,18 @@ public class SearchSiteInfoService {
|
||||
String siteUrl,
|
||||
DomainInformation domainInformation,
|
||||
List<SimilarDomain> similar,
|
||||
List<SimilarDomain> linking) {
|
||||
List<SimilarDomain> linking,
|
||||
FeedItems feed,
|
||||
List<UrlDetails> samples
|
||||
) {
|
||||
public SiteInfoWithContext(String domain,
|
||||
long domainId,
|
||||
String siteUrl,
|
||||
DomainInformation domainInformation,
|
||||
List<SimilarDomain> similar,
|
||||
List<SimilarDomain> linking
|
||||
List<SimilarDomain> linking,
|
||||
FeedItems feedInfo,
|
||||
List<UrlDetails> samples
|
||||
)
|
||||
{
|
||||
this(Map.of("info", true),
|
||||
@ -216,7 +240,9 @@ public class SearchSiteInfoService {
|
||||
siteUrl,
|
||||
domainInformation,
|
||||
similar,
|
||||
linking);
|
||||
linking,
|
||||
feedInfo,
|
||||
samples);
|
||||
}
|
||||
|
||||
public String getLayout() {
|
||||
@ -224,6 +250,12 @@ public class SearchSiteInfoService {
|
||||
if (similar.size() < 25) {
|
||||
return "lopsided";
|
||||
}
|
||||
else if (!feed.items().isEmpty()) {
|
||||
return "lopsided";
|
||||
}
|
||||
else if (!samples.isEmpty()) {
|
||||
return "lopsided";
|
||||
}
|
||||
else {
|
||||
return "balanced";
|
||||
}
|
||||
|
@ -0,0 +1,17 @@
|
||||
<?xml version="1.0"?>
|
||||
<!-- CC0 -->
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<svg version="1.1" id="Capa_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
viewBox="0 0 455.731 455.731" xml:space="preserve">
|
||||
<g>
|
||||
<rect x="0" y="0" style="fill:#F78422;" width="455.731" height="455.731"/>
|
||||
<g>
|
||||
<path style="fill:#FFFFFF;" d="M296.208,159.16C234.445,97.397,152.266,63.382,64.81,63.382v64.348
|
||||
c70.268,0,136.288,27.321,185.898,76.931c49.609,49.61,76.931,115.63,76.931,185.898h64.348
|
||||
C391.986,303.103,357.971,220.923,296.208,159.16z"/>
|
||||
<path style="fill:#FFFFFF;" d="M64.143,172.273v64.348c84.881,0,153.938,69.056,153.938,153.939h64.348
|
||||
C282.429,270.196,184.507,172.273,64.143,172.273z"/>
|
||||
<circle style="fill:#FFFFFF;" cx="109.833" cy="346.26" r="46.088"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 891 B |
@ -0,0 +1,20 @@
|
||||
{{#with feed}}
|
||||
<h2><a title="Atom/RSS feed" target="external" href="{{feedUrl}}"><img width="16" height="16" src="/rss.svg"></a> Feed (Experimental)</h2>
|
||||
|
||||
<dl>
|
||||
{{#each items}}
|
||||
<dt><a href="{{url}}" rel="external noopener ugc">{{title}}</a></dt>
|
||||
<dd><date>{{pubDay}}</date><br>{{{descriptionSafe}}}</dd>
|
||||
{{/each}}
|
||||
</dl>
|
||||
{{/with}}
|
||||
|
||||
{{#unless feed}}{{#if samples}}
|
||||
<h2>Sample</h2>
|
||||
<dl>
|
||||
{{#each samples}}
|
||||
<dt><a href="{{url}}" rel="external noopener ugc">{{title}}</a></dt>
|
||||
<dd>{{{description}}}</dd>
|
||||
{{/each}}
|
||||
</dl>
|
||||
{{/if}}{{/unless}}
|
@ -12,11 +12,58 @@
|
||||
<img class="screenshot" width="300" height="225" src="/screenshot/{{domainId}}" alt="Screenshot of {{domain}}" />
|
||||
</a>
|
||||
{{#with domainInformation}}
|
||||
{{> search/site-info/site-info-feed}}
|
||||
{{> search/site-info/site-info-index}}
|
||||
{{> search/site-info/site-info-links}}
|
||||
{{/with}}
|
||||
</div>
|
||||
|
||||
{{#if linking}}
|
||||
<div id="similar-links">
|
||||
<h2>Linking Domains</h2>
|
||||
|
||||
<table class="similarity-table">
|
||||
<tr>
|
||||
<th colspan="3">Meta</th>
|
||||
<th>Rank</th>
|
||||
<th>Domain</th>
|
||||
<th>Similarity</th>
|
||||
</tr>
|
||||
{{#each linking}}
|
||||
<tr>
|
||||
<td>
|
||||
{{#if indexed}}
|
||||
{{#if active}}
|
||||
<span title="Indexed">👀</span>
|
||||
{{/if}}
|
||||
{{#unless active}}
|
||||
<span title="Problem">🔥</span>
|
||||
{{/unless}}
|
||||
{{/if}}
|
||||
</td>
|
||||
<td>
|
||||
{{#if screenshot}}📷{{/if}}
|
||||
</td>
|
||||
<td>
|
||||
{{#if linkType.isLinked}}
|
||||
<span title="{{linkType.description}}"><a href="/crosstalk/?domains={{domain}},{{url.domain}}">{{{linkType}}}</a></span>
|
||||
{{/if}}
|
||||
</td>
|
||||
<td>
|
||||
<span title="{{rank}}%">{{{rankSymbols}}}</span>
|
||||
</td>
|
||||
<td>
|
||||
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
|
||||
<td>
|
||||
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
|
||||
</td>
|
||||
</tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
</div>
|
||||
{{/if}}
|
||||
|
||||
|
||||
{{#if similar}}
|
||||
<div id="similar-domains">
|
||||
<h2>Similar Domains</h2>
|
||||
@ -67,48 +114,4 @@
|
||||
</div>
|
||||
{{/if}}
|
||||
|
||||
{{#if linking}}
|
||||
<div id="similar-links">
|
||||
<h2>Linking Domains</h2>
|
||||
|
||||
<table class="similarity-table">
|
||||
<tr>
|
||||
<th colspan="3">Meta</th>
|
||||
<th>Rank</th>
|
||||
<th>Domain</th>
|
||||
<th>Similarity</th>
|
||||
</tr>
|
||||
{{#each linking}}
|
||||
<tr>
|
||||
<td>
|
||||
{{#if indexed}}
|
||||
{{#if active}}
|
||||
<span title="Indexed">👀</span>
|
||||
{{/if}}
|
||||
{{#unless active}}
|
||||
<span title="Problem">🔥</span>
|
||||
{{/unless}}
|
||||
{{/if}}
|
||||
</td>
|
||||
<td>
|
||||
{{#if screenshot}}📷{{/if}}
|
||||
</td>
|
||||
<td>
|
||||
{{#if linkType.isLinked}}
|
||||
<span title="{{linkType.description}}"><a href="/crosstalk/?domains={{domain}},{{url.domain}}">{{{linkType}}}</a></span>
|
||||
{{/if}}
|
||||
</td>
|
||||
<td>
|
||||
<span title="{{rank}}%">{{{rankSymbols}}}</span>
|
||||
</td>
|
||||
<td>
|
||||
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
|
||||
<td>
|
||||
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
|
||||
</td>
|
||||
</tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
</div>
|
||||
{{/if}}
|
||||
</div>
|
@ -28,6 +28,7 @@ include 'code:libraries:message-queue'
|
||||
|
||||
include 'code:features-search:screenshots'
|
||||
include 'code:features-search:random-websites'
|
||||
include 'code:features-search:feedlot-client'
|
||||
include 'code:features-qs:query-parser'
|
||||
include 'code:features-index:result-ranking'
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user