Merge branch 'master' into converter-optimizations

This commit is contained in:
Viktor Lofgren 2023-12-28 13:26:19 +01:00
commit 5ce46a61d4
15 changed files with 457 additions and 125 deletions

View File

@ -0,0 +1,22 @@
plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.gson
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@ -0,0 +1,20 @@
Client for [FeedlotTheFeedBot](https://github.com/MarginaliaSearch/FeedLotTheFeedBot),
the RSS/Atom feed fetcher and cache for Marginalia Search.
This service is external to the Marginalia Search codebase,
as it is not a core part of the search engine and has other
utilities.
## Example
```java
import java.time.Duration;
var client = new FeedlotClient("localhost", 8080,
gson,
Duration.ofMillis(100), // connect timeout
Duration.ofMillis(100)); // request timeout
CompleteableFuture<FeedItems> items = client.getFeedItems("www.marginalia.nu");
```

View File

@ -0,0 +1,58 @@
package nu.marginalia.feedlot;
import com.google.gson.Gson;
import nu.marginalia.feedlot.model.FeedItems;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.concurrent.Executors;
import java.util.concurrent.CompletableFuture;
public class FeedlotClient {
private final String feedlotHost;
private final int feedlotPort;
private final Gson gson;
private final HttpClient httpClient;
private final Duration requestTimeout;
public FeedlotClient(String feedlotHost,
int feedlotPort,
Gson gson,
Duration connectTimeout,
Duration requestTimeout
)
{
this.feedlotHost = feedlotHost;
this.feedlotPort = feedlotPort;
this.gson = gson;
httpClient = HttpClient.newBuilder()
.executor(Executors.newVirtualThreadPerTaskExecutor())
.connectTimeout(connectTimeout)
.build();
this.requestTimeout = requestTimeout;
}
public CompletableFuture<FeedItems> getFeedItems(String domainName) {
return httpClient.sendAsync(
HttpRequest.newBuilder()
.uri(URI.create("http://%s:%d/feed/%s".formatted(feedlotHost, feedlotPort, domainName)))
.GET()
.timeout(requestTimeout)
.build(),
HttpResponse.BodyHandlers.ofString()
).thenApply(HttpResponse::body)
.thenApply(this::parseFeedItems);
}
private FeedItems parseFeedItems(String s) {
return gson.fromJson(s, FeedItems.class);
}
public void stop() {
httpClient.close();
}
}

View File

@ -0,0 +1,17 @@
package nu.marginalia.feedlot.model;
public record FeedItem(String title, String date, String description, String url) {
public String pubDay() { // Extract the date from an ISO style date string
if (date.length() > 10) {
return date.substring(0, 10);
}
return date;
}
public String descriptionSafe() {
return description
.replace("<", "&lt;")
.replace(">", "&gt;");
}
}

View File

@ -0,0 +1,6 @@
package nu.marginalia.feedlot.model;
import java.util.List;
public record FeedItems(String domain, String feedUrl, String updated, List<FeedItem> items) {
}

View File

@ -40,8 +40,8 @@ public class CrawlerWarcResynchronizer {
for (var item : reader) {
accept(item);
}
} catch (IOException e) {
logger.info(STR."Failed read full warc file \{tempFile}", e);
} catch (Exception e) {
logger.info(STR."(Expected) Failed read full warc file \{tempFile}: \{e.getClass().getSimpleName()} \{e.getMessage()}");
}
// Second pass, copy records to the new warc file
@ -49,8 +49,8 @@ public class CrawlerWarcResynchronizer {
for (var item : reader) {
recorder.resync(item);
}
} catch (IOException e) {
logger.info(STR."Failed read full warc file \{tempFile}", e);
} catch (Exception e) {
logger.info(STR."(Expected) Failed read full warc file \{tempFile}: \{e.getClass().getSimpleName()} \{e.getMessage()}");
}
}

View File

@ -37,7 +37,7 @@ public class WarcRecorder implements AutoCloseable {
private final Path warcFile;
private static final Logger logger = LoggerFactory.getLogger(WarcRecorder.class);
private final ThreadLocal<byte[]> bufferThreadLocal = ThreadLocal.withInitial(() -> new byte[MAX_SIZE]);
private final static ThreadLocal<byte[]> bufferThreadLocal = ThreadLocal.withInitial(() -> new byte[MAX_SIZE]);
private boolean temporaryFile = false;

View File

@ -4,23 +4,24 @@ import lombok.SneakyThrows;
import nu.marginalia.UserAgent;
import nu.marginalia.WmsaHome;
import nu.marginalia.atags.model.DomainLinks;
import nu.marginalia.crawl.retreival.CrawlDataReference;
import nu.marginalia.crawl.retreival.CrawlerRetreiver;
import nu.marginalia.crawl.retreival.DomainProber;
import nu.marginalia.crawl.retreival.*;
import nu.marginalia.crawl.retreival.fetcher.HttpFetcher;
import nu.marginalia.crawl.retreival.fetcher.HttpFetcherImpl;
import nu.marginalia.crawl.retreival.fetcher.warc.WarcRecorder;
import nu.marginalia.crawling.io.CrawledDomainReader;
import nu.marginalia.crawling.io.SerializableCrawlDataStream;
import nu.marginalia.crawling.model.CrawledDocument;
import nu.marginalia.crawling.model.CrawledDomain;
import nu.marginalia.crawling.model.SerializableCrawlData;
import nu.marginalia.crawling.parquet.CrawledDocumentParquetRecordFileWriter;
import nu.marginalia.io.crawlspec.CrawlSpecRecordParquetFileWriter;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.crawlspec.CrawlSpecRecord;
import org.junit.jupiter.api.*;
import org.netpreserve.jwarc.*;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
@ -37,6 +38,7 @@ class CrawlerRetreiverTest {
Path tempFileParquet1;
Path tempFileWarc2;
Path tempFileParquet2;
Path tempFileWarc3;
@BeforeEach
public void setUp() throws IOException {
httpFetcher = new HttpFetcherImpl("search.marginalia.nu; testing a bit :D");
@ -66,7 +68,11 @@ class CrawlerRetreiverTest {
if (tempFileParquet2 != null) {
Files.deleteIfExists(tempFileParquet2);
}
if (tempFileWarc3 != null) {
Files.deleteIfExists(tempFileWarc3);
}
}
@Test
public void testWarcOutput() throws IOException {
var specs = CrawlSpecRecord
@ -79,11 +85,7 @@ class CrawlerRetreiverTest {
try {
tempFile = Files.createTempFile("crawling-process", "warc");
try (var recorder = new WarcRecorder(tempFile)) {
new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).fetch();
} catch (IOException ex) {
Assertions.fail(ex);
}
doCrawl(tempFile, specs);
Set<String> requests = new HashSet<>();
Set<String> responses = new HashSet<>();
@ -112,6 +114,57 @@ class CrawlerRetreiverTest {
Files.deleteIfExists(tempFile);
}
}
@SneakyThrows
@Test
public void testResync() throws IOException {
var specs = CrawlSpecRecord
.builder()
.crawlDepth(5)
.domain("www.marginalia.nu")
.urls(List.of("https://www.marginalia.nu/misc/debian-laptop-install-log/"))
.build();
tempFileWarc1 = Files.createTempFile("crawling-process", "warc");
tempFileWarc2 = Files.createTempFile("crawling-process", "warc");
doCrawl(tempFileWarc1, specs);
Set<String> requests = new HashSet<>();
Set<String> responses = new HashSet<>();
var revisitCrawlFrontier = new DomainCrawlFrontier(
new EdgeDomain("www.marginalia.nu"),
List.of(), 100);
var resync = new CrawlerWarcResynchronizer(revisitCrawlFrontier,
new WarcRecorder(tempFileWarc2)
);
// truncate the size of the file to simulate a crash
simulatePartialWrite(tempFileWarc1);
resync.run(tempFileWarc1);
assertTrue(revisitCrawlFrontier.addKnown(new EdgeUrl("https://www.marginalia.nu/misc/debian-laptop-install-log/")));
try (var reader = new WarcReader(tempFileWarc2)) {
reader.forEach(record -> {
if (record instanceof WarcRequest req) {
requests.add(req.target());
System.out.println(req.type() + ":" + req.target());
}
else if (record instanceof WarcResponse rsp) {
responses.add(rsp.target());
System.out.println(rsp.type() + ":" + rsp.target());
}
else {
System.out.println(record.type());
}
});
}
assertTrue(requests.contains("https://www.marginalia.nu/misc/debian-laptop-install-log/"));
assertEquals(requests, responses);
}
@Test
public void testWithKnownDomains() throws IOException {
var specs = CrawlSpecRecord
@ -125,15 +178,9 @@ class CrawlerRetreiverTest {
tempFileWarc1 = Files.createTempFile("crawling-process", ".warc");
try (var recorder = new WarcRecorder(tempFileWarc1)) {
new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).fetch();
}
catch (IOException ex) {
Assertions.fail(ex);
}
doCrawl(tempFileWarc1, specs);
CrawledDocumentParquetRecordFileWriter.convertWarc("www.marginalia.nu",
new UserAgent("test"), tempFileWarc1, tempFileParquet1);
convertToParquet(tempFileWarc1, tempFileParquet1);
try (var stream = CrawledDomainReader.createDataStream(tempFileParquet1)) {
while (stream.hasNext()) {
@ -177,16 +224,8 @@ class CrawlerRetreiverTest {
tempFileWarc1 = Files.createTempFile("crawling-process", ".warc");
try (var recorder = new WarcRecorder(tempFileWarc1)) {
new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).fetch();
}
catch (IOException ex) {
Assertions.fail(ex);
}
CrawledDocumentParquetRecordFileWriter.convertWarc("www.marginalia.nu",
new UserAgent("test"), tempFileWarc1, tempFileParquet1);
doCrawl(tempFileWarc1, specs);
convertToParquet(tempFileWarc1, tempFileParquet1);
try (var stream = CrawledDomainReader.createDataStream(tempFileParquet1)) {
while (stream.hasNext()) {
@ -232,46 +271,11 @@ class CrawlerRetreiverTest {
tempFileWarc1 = Files.createTempFile("crawling-process", ".warc.gz");
tempFileWarc2 = Files.createTempFile("crawling-process", ".warc.gz");
Map<Class<? extends SerializableCrawlData>, List<SerializableCrawlData>> data = new HashMap<>();
try (var recorder = new WarcRecorder(tempFileWarc1)) {
new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).fetch();
}
catch (IOException ex) {
Assertions.fail(ex);
}
CrawledDocumentParquetRecordFileWriter.convertWarc("www.marginalia.nu",
new UserAgent("test"), tempFileWarc1, tempFileParquet1);
try (var stream = CrawledDomainReader.createDataStream(tempFileParquet1)) {
while (stream.hasNext()) {
var doc = stream.next();
data.computeIfAbsent(doc.getClass(), c -> new ArrayList<>()).add(doc);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
var stream = CrawledDomainReader.createDataStream(tempFileParquet1);
System.out.println("---");
CrawledDomain domain = (CrawledDomain) data.get(CrawledDomain.class).get(0);
domain.doc = data.get(CrawledDocument.class).stream().map(CrawledDocument.class::cast).collect(Collectors.toList());
try (var recorder = new WarcRecorder(tempFileWarc2)) {
new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).fetch(new DomainLinks(),
new CrawlDataReference(stream));
}
catch (IOException ex) {
Assertions.fail(ex);
}
CrawledDocumentParquetRecordFileWriter.convertWarc("www.marginalia.nu",
new UserAgent("test"), tempFileWarc2, tempFileParquet2);
doCrawl(tempFileWarc1, specs);
doCrawlWithReferenceStream(specs,
CrawledDomainReader.createDataStream(tempFileParquet1)
);
convertToParquet(tempFileWarc2, tempFileParquet2);
try (var reader = new WarcReader(tempFileWarc2)) {
WarcXResponseReference.register(reader);
@ -304,4 +308,120 @@ class CrawlerRetreiverTest {
throw new RuntimeException(e);
}
}
private void convertToParquet(Path tempFileWarc2, Path tempFileParquet2) {
CrawledDocumentParquetRecordFileWriter.convertWarc("www.marginalia.nu",
new UserAgent("test"), tempFileWarc2, tempFileParquet2);
}
@SneakyThrows
@Test
public void testRecrawlWithResync() throws IOException {
var specs = CrawlSpecRecord
.builder()
.crawlDepth(12)
.domain("www.marginalia.nu")
.urls(List.of("https://www.marginalia.nu/some-dead-link"))
.build();
tempFileWarc1 = Files.createTempFile("crawling-process", ".warc.gz");
tempFileWarc2 = Files.createTempFile("crawling-process", ".warc.gz");
tempFileWarc3 = Files.createTempFile("crawling-process", ".warc.gz");
Map<Class<? extends SerializableCrawlData>, List<SerializableCrawlData>> data = new HashMap<>();
doCrawl(tempFileWarc1, specs);
convertToParquet(tempFileWarc1, tempFileParquet1);
try (var stream = CrawledDomainReader.createDataStream(tempFileParquet1)) {
while (stream.hasNext()) {
var doc = stream.next();
data.computeIfAbsent(doc.getClass(), c -> new ArrayList<>()).add(doc);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
var stream = CrawledDomainReader.createDataStream(tempFileParquet1);
System.out.println("---");
doCrawlWithReferenceStream(specs, stream);
var revisitCrawlFrontier = new DomainCrawlFrontier(
new EdgeDomain("www.marginalia.nu"),
List.of(), 100);
var resync = new CrawlerWarcResynchronizer(revisitCrawlFrontier,
new WarcRecorder(tempFileWarc3)
);
// truncate the size of the file to simulate a crash
simulatePartialWrite(tempFileWarc2);
resync.run(tempFileWarc2);
assertTrue(revisitCrawlFrontier.addKnown(new EdgeUrl("https://www.marginalia.nu/")));
convertToParquet(tempFileWarc3, tempFileParquet2);
try (var reader = new WarcReader(tempFileWarc3)) {
WarcXResponseReference.register(reader);
reader.forEach(record -> {
if (record instanceof WarcResponse rsp) {
try {
System.out.println(rsp.type() + ":" + rsp.target() + "/" + rsp.http().status());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
if (record instanceof WarcMetadata rsp) {
System.out.println("meta:" + rsp.target());
}
});
}
try (var ds = CrawledDomainReader.createDataStream(tempFileParquet2)) {
while (ds.hasNext()) {
var doc = ds.next();
if (doc instanceof CrawledDomain dr) {
System.out.println(dr.domain + "/" + dr.crawlerStatus);
}
else if (doc instanceof CrawledDocument dc) {
System.out.println(dc.url + "/" + dc.crawlerStatus + "/" + dc.httpStatus + "/" + dc.timestamp);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void simulatePartialWrite(Path tempFileWarc2) throws IOException {
try (var raf = new RandomAccessFile(tempFileWarc2.toFile(), "rw")) {
raf.setLength(raf.length() - 10);
}
}
private void doCrawlWithReferenceStream(CrawlSpecRecord specs, SerializableCrawlDataStream stream) {
try (var recorder = new WarcRecorder(tempFileWarc2)) {
new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).fetch(new DomainLinks(),
new CrawlDataReference(stream));
}
catch (IOException ex) {
Assertions.fail(ex);
}
}
private void doCrawl(Path tempFileWarc1, CrawlSpecRecord specs) {
try (var recorder = new WarcRecorder(tempFileWarc1)) {
new CrawlerRetreiver(httpFetcher, new DomainProber(d -> true), specs, recorder).fetch();
} catch (IOException ex) {
Assertions.fail(ex);
}
}
}

View File

@ -47,6 +47,7 @@ dependencies {
implementation project(':code:features-search:screenshots')
implementation project(':code:features-search:random-websites')
implementation project(':code:features-search:feedlot-client')
implementation libs.bundles.slf4j

View File

@ -1,10 +1,15 @@
package nu.marginalia.search;
import com.google.inject.AbstractModule;
import com.google.inject.Provides;
import nu.marginalia.LanguageModels;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.WmsaHome;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
import nu.marginalia.feedlot.FeedlotClient;
import java.time.Duration;
public class SearchModule extends AbstractModule {
@ -17,4 +22,14 @@ public class SearchModule extends AbstractModule {
System.getProperty("website-url", "https://search.marginalia.nu/")));
}
@Provides
public FeedlotClient provideFeedlotClient() {
return new FeedlotClient(
System.getProperty("ext-svc-feedlot-host", "feedlot"),
Integer.getInteger("ext-svc-feedlot-port", 80),
GsonFactory.get(),
Duration.ofMillis(250),
Duration.ofMillis(100)
);
}
}

View File

@ -5,13 +5,17 @@ import nu.marginalia.assistant.client.AssistantClient;
import nu.marginalia.assistant.client.model.SimilarDomain;
import nu.marginalia.client.Context;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.feedlot.model.FeedItems;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.assistant.client.model.DomainInformation;
import nu.marginalia.feedlot.FeedlotClient;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
@ -21,19 +25,23 @@ import java.util.List;
import java.util.Map;
public class SearchSiteInfoService {
private static final Logger logger = LoggerFactory.getLogger(SearchSiteInfoService.class);
private final SearchOperator searchOperator;
private final AssistantClient assistantClient;
private final SearchFlagSiteService flagSiteService;
private final DbDomainQueries domainQueries;
private final MustacheRenderer<Object> renderer;
private final FeedlotClient feedlotClient;
@Inject
public SearchSiteInfoService(SearchOperator searchOperator,
AssistantClient assistantClient,
RendererFactory rendererFactory,
SearchFlagSiteService flagSiteService,
DbDomainQueries domainQueries) throws IOException {
DbDomainQueries domainQueries,
FeedlotClient feedlotClient) throws IOException
{
this.searchOperator = searchOperator;
this.assistantClient = assistantClient;
this.flagSiteService = flagSiteService;
@ -41,6 +49,7 @@ public class SearchSiteInfoService {
this.renderer = rendererFactory.renderer("search/site-info/site-info");
this.feedlotClient = feedlotClient;
}
public Object handle(Request request, Response response) throws SQLException {
@ -121,6 +130,7 @@ public class SearchSiteInfoService {
final List<SimilarDomain> linkingDomains;
String url = "https://" + domainName + "/";;
var feedItemsFuture = feedlotClient.getFeedItems(domainName);
if (domainId < 0 || !assistantClient.isAccepting()) {
domainInfo = createDummySiteInfo(domainName);
similarSet = List.of();
@ -134,11 +144,18 @@ public class SearchSiteInfoService {
linkingDomains = assistantClient
.linkedDomains(ctx, domainId, 100)
.blockingFirst();
}
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(ctx, domainName, 1);
if (!sampleResults.isEmpty()) {
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
}
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(ctx, domainName, 5);
if (!sampleResults.isEmpty()) {
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
}
FeedItems feedItems = null;
try {
feedItems = feedItemsFuture.get();
} catch (Exception e) {
logger.debug("Failed to get feed items for {}: {}", domainName, e.getMessage());
}
return new SiteInfoWithContext(domainName,
@ -146,7 +163,9 @@ public class SearchSiteInfoService {
url,
domainInfo,
similarSet,
linkingDomains
linkingDomains,
feedItems,
sampleResults
);
}
@ -200,13 +219,18 @@ public class SearchSiteInfoService {
String siteUrl,
DomainInformation domainInformation,
List<SimilarDomain> similar,
List<SimilarDomain> linking) {
List<SimilarDomain> linking,
FeedItems feed,
List<UrlDetails> samples
) {
public SiteInfoWithContext(String domain,
long domainId,
String siteUrl,
DomainInformation domainInformation,
List<SimilarDomain> similar,
List<SimilarDomain> linking
List<SimilarDomain> linking,
FeedItems feedInfo,
List<UrlDetails> samples
)
{
this(Map.of("info", true),
@ -216,7 +240,9 @@ public class SearchSiteInfoService {
siteUrl,
domainInformation,
similar,
linking);
linking,
feedInfo,
samples);
}
public String getLayout() {
@ -224,6 +250,12 @@ public class SearchSiteInfoService {
if (similar.size() < 25) {
return "lopsided";
}
else if (!feed.items().isEmpty()) {
return "lopsided";
}
else if (!samples.isEmpty()) {
return "lopsided";
}
else {
return "balanced";
}

View File

@ -0,0 +1,17 @@
<?xml version="1.0"?>
<!-- CC0 -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Capa_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
viewBox="0 0 455.731 455.731" xml:space="preserve">
<g>
<rect x="0" y="0" style="fill:#F78422;" width="455.731" height="455.731"/>
<g>
<path style="fill:#FFFFFF;" d="M296.208,159.16C234.445,97.397,152.266,63.382,64.81,63.382v64.348
c70.268,0,136.288,27.321,185.898,76.931c49.609,49.61,76.931,115.63,76.931,185.898h64.348
C391.986,303.103,357.971,220.923,296.208,159.16z"/>
<path style="fill:#FFFFFF;" d="M64.143,172.273v64.348c84.881,0,153.938,69.056,153.938,153.939h64.348
C282.429,270.196,184.507,172.273,64.143,172.273z"/>
<circle style="fill:#FFFFFF;" cx="109.833" cy="346.26" r="46.088"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 891 B

View File

@ -0,0 +1,20 @@
{{#with feed}}
<h2><a title="Atom/RSS feed" target="external" href="{{feedUrl}}"><img width="16" height="16" src="/rss.svg"></a> Feed (Experimental)</h2>
<dl>
{{#each items}}
<dt><a href="{{url}}" rel="external noopener ugc">{{title}}</a></dt>
<dd><date>{{pubDay}}</date><br>{{{descriptionSafe}}}</dd>
{{/each}}
</dl>
{{/with}}
{{#unless feed}}{{#if samples}}
<h2>Sample</h2>
<dl>
{{#each samples}}
<dt><a href="{{url}}" rel="external noopener ugc">{{title}}</a></dt>
<dd>{{{description}}}</dd>
{{/each}}
</dl>
{{/if}}{{/unless}}

View File

@ -12,11 +12,58 @@
<img class="screenshot" width="300" height="225" src="/screenshot/{{domainId}}" alt="Screenshot of {{domain}}" />
</a>
{{#with domainInformation}}
{{> search/site-info/site-info-feed}}
{{> search/site-info/site-info-index}}
{{> search/site-info/site-info-links}}
{{/with}}
</div>
{{#if linking}}
<div id="similar-links">
<h2>Linking Domains</h2>
<table class="similarity-table">
<tr>
<th colspan="3">Meta</th>
<th>Rank</th>
<th>Domain</th>
<th>Similarity</th>
</tr>
{{#each linking}}
<tr>
<td>
{{#if indexed}}
{{#if active}}
<span title="Indexed">&#128064;</span>
{{/if}}
{{#unless active}}
<span title="Problem">&#128293;</span>
{{/unless}}
{{/if}}
</td>
<td>
{{#if screenshot}}&#x1f4f7;{{/if}}
</td>
<td>
{{#if linkType.isLinked}}
<span title="{{linkType.description}}"><a href="/crosstalk/?domains={{domain}},{{url.domain}}">{{{linkType}}}</a></span>
{{/if}}
</td>
<td>
<span title="{{rank}}%">{{{rankSymbols}}}</span>
</td>
<td>
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
<td>
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
</td>
</tr>
{{/each}}
</table>
</div>
{{/if}}
{{#if similar}}
<div id="similar-domains">
<h2>Similar Domains</h2>
@ -67,48 +114,4 @@
</div>
{{/if}}
{{#if linking}}
<div id="similar-links">
<h2>Linking Domains</h2>
<table class="similarity-table">
<tr>
<th colspan="3">Meta</th>
<th>Rank</th>
<th>Domain</th>
<th>Similarity</th>
</tr>
{{#each linking}}
<tr>
<td>
{{#if indexed}}
{{#if active}}
<span title="Indexed">&#128064;</span>
{{/if}}
{{#unless active}}
<span title="Problem">&#128293;</span>
{{/unless}}
{{/if}}
</td>
<td>
{{#if screenshot}}&#x1f4f7;{{/if}}
</td>
<td>
{{#if linkType.isLinked}}
<span title="{{linkType.description}}"><a href="/crosstalk/?domains={{domain}},{{url.domain}}">{{{linkType}}}</a></span>
{{/if}}
</td>
<td>
<span title="{{rank}}%">{{{rankSymbols}}}</span>
</td>
<td>
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
<td>
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
</td>
</tr>
{{/each}}
</table>
</div>
{{/if}}
</div>

View File

@ -28,6 +28,7 @@ include 'code:libraries:message-queue'
include 'code:features-search:screenshots'
include 'code:features-search:random-websites'
include 'code:features-search:feedlot-client'
include 'code:features-qs:query-parser'
include 'code:features-index:result-ranking'