(control) Fix incorrect storage base and clean up GUI for data

This commit is contained in:
Viktor Lofgren 2023-10-16 13:30:26 +02:00
parent 3d1c15ef99
commit 39911e3acd
16 changed files with 122 additions and 26 deletions

View File

@ -10,7 +10,6 @@ import nu.marginalia.client.exception.LocalException;
import nu.marginalia.client.exception.NetworkException;
import nu.marginalia.client.exception.RemoteException;
import nu.marginalia.client.exception.RouteNotConfiguredException;
import nu.marginalia.client.route.ServiceRoute;
import nu.marginalia.client.route.RouteProvider;
import nu.marginalia.client.route.ServiceRoutes;
import nu.marginalia.service.descriptor.ServiceDescriptor;
@ -20,7 +19,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.ConnectException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;

View File

@ -31,6 +31,7 @@ public class EndpointLivenessMonitor {
@SneakyThrows
public void run() {
Thread.sleep(100); // Wait for initialization
try {
while (!Thread.interrupted()) {
if (updateLivenessMap()) {

View File

@ -92,7 +92,7 @@ public class ConvertActor extends AbstractActorPrototype {
// Create processed data area
var toProcess = storageService.getStorage(sourceStorageId);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Data; " + toProcess.description());
@ -125,7 +125,7 @@ public class ConvertActor extends AbstractActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Encylopedia Data; " + fileName);
@ -157,7 +157,7 @@ public class ConvertActor extends AbstractActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Dirtree Data; " + fileName);
@ -188,7 +188,7 @@ public class ConvertActor extends AbstractActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Stackexchange Data; " + fileName);

View File

@ -126,7 +126,7 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
var toProcess = storageService.getStorage(message.crawlStorageId);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base, FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Data; " + toProcess.description());

View File

@ -98,7 +98,7 @@ public class CrawlActor extends AbstractActorPrototype {
var toCrawl = storageService.getStorage(message.crawlSpecId);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var dataArea = storageService.allocateTemporaryStorage(
base,
FileStorageType.CRAWL_DATA,

View File

@ -65,7 +65,7 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
error("This actor requires a CrawlJobExtractorArgumentsWithURL argument");
}
var base = fileStorageService.getStorageBase(FileStorageBaseType.WORK);
var base = fileStorageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", arg.description());
Path urlsTxt = storage.asPath().resolve("urls.txt");
@ -102,7 +102,7 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
error("This actor requires a CrawlJobExtractorArguments argument");
}
var base = fileStorageService.getStorageBase(FileStorageBaseType.WORK);
var base = fileStorageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", arg.description());
final Path path = CrawlSpecFileNames.resolve(storage);

View File

@ -156,12 +156,13 @@ public class ControlNodeService {
final String description = request.queryParams("description");
final String url = request.queryParams("url");
final String source = request.queryParams("source");
int nodeId = Integer.parseInt(request.params("id"));
if ("db".equals(source)) {
executorClient.createCrawlSpecFromDb(Context.fromRequest(request), 0, description);
executorClient.createCrawlSpecFromDb(Context.fromRequest(request), nodeId, description);
}
else if ("download".equals(source)) {
executorClient.createCrawlSpecFromDownload(Context.fromRequest(request), 0, description, url);
executorClient.createCrawlSpecFromDownload(Context.fromRequest(request), nodeId, description, url);
}
else {
throw new IllegalArgumentException("Unknown source: " + source);

View File

@ -6,7 +6,13 @@
{{> control/partials/nav}}
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">

View File

@ -6,7 +6,13 @@
{{> control/partials/nav}}
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">

View File

@ -5,10 +5,14 @@
<body>
{{> control/partials/nav}}
<nav aria-label="breadcrumb">
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">

View File

@ -6,7 +6,13 @@
{{> control/partials/nav}}
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">

View File

@ -6,7 +6,13 @@
{{> control/partials/nav}}
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">

View File

@ -6,7 +6,13 @@
{{> control/partials/nav}}
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">

View File

@ -6,7 +6,13 @@
{{> control/partials/nav}}
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">

View File

@ -7,7 +7,13 @@
<div class="container">
<h1 class="my-5">Index Node {{node.id}}</h1>
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
@ -139,10 +145,60 @@
xhr.send();
}
</script>
{{#if view.specs}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">About Specs</h3>
<p>Crawl specifications are set of domains to be crawled. A specification is necessary for the crawler
to understand which domains to visit.</p>
<p>Setting a spec as active means it will be used in <em>recrawls</em>. A fresh crawl can be triggered
from within the details view of a specification.</p>
</div>
{{/if}}
{{#if view.crawl}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">About Crawls</h3>
<p>Crawl data is the downloaded and compressed raw HTML of a website.</p>
<p>To start a new crawl, first a <em>crawl specification</em> must be created.</p>
<p>Unlike other data sets, only a single crawl set may be <em>ACTIVE</em> at any given time.</p>
<h4 class="my-3">Recrawl</h4>
<p>A set of existing crawl data can be <em>recrawled</em> and amended with new domains
and a fresh look at the existing domains. This is an operation that uses the currently <em>ACTIVE</em>
crawl data and crawl set(s).
</p>
<h4 class="my-3">Processing</h4>
<p>
Before the data can be loaded into the search engine, it needs to be processed. The process operation
uses the currently <em>ACTIVE</em> crawl set.
</p>
</div>
{{/if}}
{{#if view.processed}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">Processed Data</h3>
<p>
Processed data is HTML that has been analyzed and had its properties and keywords extracted. If one or more
processed data sets is <em>ACTIVE</em>, it can be Loaded into the system to make it searchable. Loading processed
data also creates a backup of the Index Journal and Link Database that can be quickly restored in the event
of a problem.
</p>
<p>
Processed data may come from crawl data, but it may also be sideloaded into the system from e.g.
stackexchange data dumps through the Node Actions view.
</p>
</div>
{{/if}}
{{#if view.backup}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">Backups</h3>
<p>
Backups are snapshots of the Index Journal and Link Database immediately before a searchable index
is constructed. They are relatively quick to restore and allow you to roll back a problematic deployment
or recover from disk failures (assuming they are on a different drive).
</p>
</div>
{{/if}}
</div>
</div>
</body>
{{> control/partials/foot-includes }}

View File

@ -6,7 +6,7 @@
{{> control/partials/nav}}
<div class="container">
<h1 class="my-5">Index Nodes</h1>
<h1 class="my-3">Nodes</h1>
{{#unless nodes}}
It appears no nodes have been configured! This is necessary before any index or executor services