(control) Fix incorrect storage base and clean up GUI for data
This commit is contained in:
parent
3d1c15ef99
commit
39911e3acd
@ -10,7 +10,6 @@ import nu.marginalia.client.exception.LocalException;
|
||||
import nu.marginalia.client.exception.NetworkException;
|
||||
import nu.marginalia.client.exception.RemoteException;
|
||||
import nu.marginalia.client.exception.RouteNotConfiguredException;
|
||||
import nu.marginalia.client.route.ServiceRoute;
|
||||
import nu.marginalia.client.route.RouteProvider;
|
||||
import nu.marginalia.client.route.ServiceRoutes;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptor;
|
||||
@ -20,7 +19,6 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.ConnectException;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
|
@ -31,6 +31,7 @@ public class EndpointLivenessMonitor {
|
||||
@SneakyThrows
|
||||
public void run() {
|
||||
Thread.sleep(100); // Wait for initialization
|
||||
|
||||
try {
|
||||
while (!Thread.interrupted()) {
|
||||
if (updateLivenessMap()) {
|
||||
|
@ -92,7 +92,7 @@ public class ConvertActor extends AbstractActorPrototype {
|
||||
// Create processed data area
|
||||
|
||||
var toProcess = storageService.getStorage(sourceStorageId);
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||
"Processed Data; " + toProcess.description());
|
||||
@ -125,7 +125,7 @@ public class ConvertActor extends AbstractActorPrototype {
|
||||
|
||||
String fileName = sourcePath.toFile().getName();
|
||||
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||
"Processed Encylopedia Data; " + fileName);
|
||||
@ -157,7 +157,7 @@ public class ConvertActor extends AbstractActorPrototype {
|
||||
|
||||
String fileName = sourcePath.toFile().getName();
|
||||
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||
"Processed Dirtree Data; " + fileName);
|
||||
@ -188,7 +188,7 @@ public class ConvertActor extends AbstractActorPrototype {
|
||||
|
||||
String fileName = sourcePath.toFile().getName();
|
||||
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||
"Processed Stackexchange Data; " + fileName);
|
||||
|
@ -126,7 +126,7 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
|
||||
|
||||
var toProcess = storageService.getStorage(message.crawlStorageId);
|
||||
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var processedArea = storageService.allocateTemporaryStorage(base, FileStorageType.PROCESSED_DATA, "processed-data",
|
||||
"Processed Data; " + toProcess.description());
|
||||
|
||||
|
@ -98,7 +98,7 @@ public class CrawlActor extends AbstractActorPrototype {
|
||||
|
||||
var toCrawl = storageService.getStorage(message.crawlSpecId);
|
||||
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var dataArea = storageService.allocateTemporaryStorage(
|
||||
base,
|
||||
FileStorageType.CRAWL_DATA,
|
||||
|
@ -65,7 +65,7 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
|
||||
error("This actor requires a CrawlJobExtractorArgumentsWithURL argument");
|
||||
}
|
||||
|
||||
var base = fileStorageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = fileStorageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", arg.description());
|
||||
|
||||
Path urlsTxt = storage.asPath().resolve("urls.txt");
|
||||
@ -102,7 +102,7 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
|
||||
error("This actor requires a CrawlJobExtractorArguments argument");
|
||||
}
|
||||
|
||||
var base = fileStorageService.getStorageBase(FileStorageBaseType.WORK);
|
||||
var base = fileStorageService.getStorageBase(FileStorageBaseType.STORAGE);
|
||||
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", arg.description());
|
||||
|
||||
final Path path = CrawlSpecFileNames.resolve(storage);
|
||||
|
@ -156,12 +156,13 @@ public class ControlNodeService {
|
||||
final String description = request.queryParams("description");
|
||||
final String url = request.queryParams("url");
|
||||
final String source = request.queryParams("source");
|
||||
int nodeId = Integer.parseInt(request.params("id"));
|
||||
|
||||
if ("db".equals(source)) {
|
||||
executorClient.createCrawlSpecFromDb(Context.fromRequest(request), 0, description);
|
||||
executorClient.createCrawlSpecFromDb(Context.fromRequest(request), nodeId, description);
|
||||
}
|
||||
else if ("download".equals(source)) {
|
||||
executorClient.createCrawlSpecFromDownload(Context.fromRequest(request), 0, description, url);
|
||||
executorClient.createCrawlSpecFromDownload(Context.fromRequest(request), nodeId, description, url);
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("Unknown source: " + source);
|
||||
|
@ -6,7 +6,13 @@
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
|
@ -6,7 +6,13 @@
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
|
@ -5,10 +5,14 @@
|
||||
<body>
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
|
@ -6,7 +6,13 @@
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
|
@ -6,7 +6,13 @@
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
|
@ -6,7 +6,13 @@
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
|
@ -6,7 +6,13 @@
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
|
@ -7,7 +7,13 @@
|
||||
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Node {{node.id}}</h1>
|
||||
|
||||
<nav aria-label="breadcrumb">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
|
||||
<li class="breadcrumb-item">node-{{node.id}}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<nav class="nav nav-tabs">
|
||||
<li class="nav-item">
|
||||
@ -139,10 +145,60 @@
|
||||
xhr.send();
|
||||
}
|
||||
</script>
|
||||
|
||||
{{#if view.specs}}
|
||||
<div class="m-3 p-3 border bg-light">
|
||||
<h3 class="my-3">About Specs</h3>
|
||||
<p>Crawl specifications are set of domains to be crawled. A specification is necessary for the crawler
|
||||
to understand which domains to visit.</p>
|
||||
<p>Setting a spec as active means it will be used in <em>recrawls</em>. A fresh crawl can be triggered
|
||||
from within the details view of a specification.</p>
|
||||
</div>
|
||||
{{/if}}
|
||||
{{#if view.crawl}}
|
||||
<div class="m-3 p-3 border bg-light">
|
||||
<h3 class="my-3">About Crawls</h3>
|
||||
<p>Crawl data is the downloaded and compressed raw HTML of a website.</p>
|
||||
<p>To start a new crawl, first a <em>crawl specification</em> must be created.</p>
|
||||
<p>Unlike other data sets, only a single crawl set may be <em>ACTIVE</em> at any given time.</p>
|
||||
<h4 class="my-3">Recrawl</h4>
|
||||
<p>A set of existing crawl data can be <em>recrawled</em> and amended with new domains
|
||||
and a fresh look at the existing domains. This is an operation that uses the currently <em>ACTIVE</em>
|
||||
crawl data and crawl set(s).
|
||||
</p>
|
||||
<h4 class="my-3">Processing</h4>
|
||||
<p>
|
||||
Before the data can be loaded into the search engine, it needs to be processed. The process operation
|
||||
uses the currently <em>ACTIVE</em> crawl set.
|
||||
</p>
|
||||
</div>
|
||||
{{/if}}
|
||||
{{#if view.processed}}
|
||||
<div class="m-3 p-3 border bg-light">
|
||||
<h3 class="my-3">Processed Data</h3>
|
||||
<p>
|
||||
Processed data is HTML that has been analyzed and had its properties and keywords extracted. If one or more
|
||||
processed data sets is <em>ACTIVE</em>, it can be Loaded into the system to make it searchable. Loading processed
|
||||
data also creates a backup of the Index Journal and Link Database that can be quickly restored in the event
|
||||
of a problem.
|
||||
</p>
|
||||
<p>
|
||||
Processed data may come from crawl data, but it may also be sideloaded into the system from e.g.
|
||||
stackexchange data dumps through the Node Actions view.
|
||||
</p>
|
||||
</div>
|
||||
{{/if}}
|
||||
{{#if view.backup}}
|
||||
<div class="m-3 p-3 border bg-light">
|
||||
<h3 class="my-3">Backups</h3>
|
||||
<p>
|
||||
Backups are snapshots of the Index Journal and Link Database immediately before a searchable index
|
||||
is constructed. They are relatively quick to restore and allow you to roll back a problematic deployment
|
||||
or recover from disk failures (assuming they are on a different drive).
|
||||
</p>
|
||||
</div>
|
||||
{{/if}}
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
{{> control/partials/foot-includes }}
|
||||
|
@ -6,7 +6,7 @@
|
||||
{{> control/partials/nav}}
|
||||
|
||||
<div class="container">
|
||||
<h1 class="my-5">Index Nodes</h1>
|
||||
<h1 class="my-3">Nodes</h1>
|
||||
|
||||
{{#unless nodes}}
|
||||
It appears no nodes have been configured! This is necessary before any index or executor services
|
||||
|
Loading…
Reference in New Issue
Block a user