(control) Add toggle for automatic loading of processed data

This commit is contained in:
Viktor Lofgren 2024-01-18 11:51:31 +01:00
parent 19e781b104
commit 753d000788
5 changed files with 32 additions and 13 deletions

View file

@ -68,7 +68,7 @@ public class ControlNodeActionsService {
Spark.post("/public/nodes/:id/actions/recrawl", this::triggerAutoRecrawl,
redirectControl.renderRedirectAcknowledgement("Recrawling", "..")
);
Spark.post("/public/nodes/:id/actions/process", this::triggerAutoProcess,
Spark.post("/public/nodes/:id/actions/process", this::triggerProcess,
redirectControl.renderRedirectAcknowledgement("Processing", "..")
);
Spark.post("/public/nodes/:id/actions/load", this::triggerLoadSelected,
@ -183,16 +183,23 @@ public class ControlNodeActionsService {
return "";
}
private Object triggerAutoProcess(Request request, Response response) throws SQLException {
private Object triggerProcess(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
boolean isAutoload = "on".equalsIgnoreCase(request.queryParams("autoload"));
var toProcess = parseSourceFileStorageId(request.queryParams("source"));
changeActiveStorage(nodeId, FileStorageType.PROCESSED_DATA, toProcess);
if (isAutoload) {
executorClient.triggerConvertAndLoad(Context.fromRequest(request),
nodeId,
toProcess);
}
else {
executorClient.triggerConvert(Context.fromRequest(request),
nodeId,
toProcess);
}
return "";
}

View file

@ -7,7 +7,7 @@
{{#if allProcessedData}}
<div class="my-3 p-3 border bg-light">
This will load <a href="storage/processed">processed data</a> on Node {{node.id}}. This isn't typically necessary for crawled data,
but is needed for sideloaded data. Select the processed data to load below.
but is necessary for sideloaded data. Select the processed data to be loaded below.
</div>
<form method="post" action="actions/load">

View file

@ -6,9 +6,7 @@
</p>
<p>
The reprocessing will extract keywords, titles, and other metadata from the raw crawl data
and generate parquet files that can be loaded into the index. This loading will be done automatically,
and unless the node is <a href="/nodes/{{node.id}}/configuration">configured</a> to keep the processed data,
the processed data will be deleted after loading to save disk space.
and generate parquet files that can be loaded into the index.
</p>
</div>
@ -33,6 +31,18 @@
</tr>
{{/each}}
</table>
<div class="form-group mt-3 mb-5">
<div class="form-check form-switch mb-3">
<input class="form-check-input" type="checkbox" name="autoload" id="autoload" checked>
<label for="autoload">Load Automatically</label>
</div>
<small class="text-muted mt-3">
If <em>enabled</em>, the data will be loaded into the search engine automatically after it is created. Depending on how the
node is <a href="/nodes/{{node.id}}/configuration">configured</a>, the intermediate processed data may be deleted
after loading. If <em>disabled</em>, the data will be processed, but loading needs to be <a href="?view=load">triggered manually</a>.
This is useful if multiple data sets are to be loaded, such as when intermingling crawled data with sideloaded data.
</small>
</div>
<button
class="btn btn-primary me-md-2"

View file

@ -1,8 +1,8 @@
<h1 class="my-3">Load</h1>
{{#unless allBackups}}
No <a href="storage/backups">processed data</a> exist on this node. Backups are created when loading data into
the index.
No <a href="storage/backups">backups</a> exist on this node. Backups are created when loading data into
the index. Either no data has been ever been loaded, or the backups have been deleted.
{{/unless}}
{{#if allBackups}}

View file

@ -15,8 +15,9 @@
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=new-crawl">New Crawl</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=re-crawl">Re-crawl</a></li>
<li><hr class="dropdown-divider"></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=process">Process Crawl Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=load">Load Crawl Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=load">Load Processed Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=repartition">Repartition Index</a></li>
<li><hr class="dropdown-divider"></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-encyclopedia">Sideload Encyclopedia</a></li>
@ -25,6 +26,7 @@
<li><hr class="dropdown-divider"></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-db-data">Export Database Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-from-crawl-data">Export From Crawl Data</a></li>
<li><hr class="dropdown-divider"></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=restore-backup">Restore Index Backup</a></li>
</ul>
</li>