(control) UX-improvements for control service

This commit overhauls a lot of the UX for the control service, adding a new actions menu to the nodes views.  It has many small tweaks to make the work flow better.

It also adds a new /uploads directory in each index node, from which sideloaded data can be selected.  This is a bit of a breaking change, as this directory needs to exist in each index node.
This commit is contained in:
Viktor Lofgren 2024-01-12 12:33:05 +01:00
parent 734996002c
commit 264e2db539
46 changed files with 806 additions and 754 deletions

View File

@ -8,6 +8,7 @@ import nu.marginalia.executor.model.load.LoadParameters;
import nu.marginalia.executor.model.transfer.TransferItem;
import nu.marginalia.executor.model.transfer.TransferSpec;
import nu.marginalia.executor.storage.FileStorageContent;
import nu.marginalia.executor.upload.UploadDirContents;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.service.descriptor.ServiceDescriptors;
import nu.marginalia.service.id.ServiceId;
@ -38,7 +39,7 @@ public class ExecutorClient extends AbstractDynamicClient {
}
public void triggerCrawl(Context ctx, int node, String fid) {
public void triggerCrawl(Context ctx, int node, FileStorageId fid) {
post(ctx, node, "/process/crawl/" + fid, "").blockingSubscribe();
}
@ -112,6 +113,10 @@ public class ExecutorClient extends AbstractDynamicClient {
return get(context, node, "/actor", ActorRunStates.class).blockingFirst();
}
public UploadDirContents listSideloadDir(Context context, int node) {
return get(context, node, "/sideload/", UploadDirContents.class).blockingFirst();
}
public FileStorageContent listFileStorage(Context context, int node, FileStorageId fileId) {
return get(context, node, "/storage/"+fileId.id(), FileStorageContent.class).blockingFirst();
}

View File

@ -0,0 +1,6 @@
package nu.marginalia.executor.upload;
import java.util.List;
public record UploadDirContents(String path, List<UploadDirItem> items) {
}

View File

@ -0,0 +1,29 @@
package nu.marginalia.executor.upload;
import lombok.SneakyThrows;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
public record UploadDirItem (
String name,
String lastModifiedTime,
boolean isDirectory,
long size
) {
@SneakyThrows
public static UploadDirItem fromPath(Path path) {
boolean isDir = Files.isDirectory(path);
long size = isDir ? 0 : Files.size(path);
var mtime = Files.getLastModifiedTime(path);
return new UploadDirItem(path.toString(),
LocalDateTime.ofInstant(mtime.toInstant(), ZoneId.systemDefault()).format(DateTimeFormatter.ISO_DATE_TIME), isDir, size);
}
}

View File

@ -23,6 +23,10 @@ public class WmsaHome {
}
public static Path getUploadDir() {
return Path.of("/uploads");
}
public static Path getHomePath() {
var retStr = Optional.ofNullable(System.getenv("WMSA_HOME")).orElseGet(WmsaHome::findDefaultHomePath);

View File

@ -455,6 +455,57 @@ public class FileStorageService {
return ret;
}
public List<FileStorage> getEachFileStorage(FileStorageType type) {
return getEachFileStorage(node, type);
}
public List<FileStorage> getEachFileStorage(int node, FileStorageType type) {
List<FileStorage> ret = new ArrayList<>();
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT PATH, STATE, TYPE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
FROM FILE_STORAGE_VIEW
WHERE NODE=? AND TYPE=?
""")) {
stmt.setInt(1, node);
stmt.setString(2, type.name());
long storageId;
long baseId;
String path;
String state;
String description;
LocalDateTime createDateTime;
try (var rs = stmt.executeQuery()) {
while (rs.next()) {
baseId = rs.getLong("BASE_ID");
storageId = rs.getLong("ID");
path = rs.getString("PATH");
state = rs.getString("STATE");
description = rs.getString("DESCRIPTION");
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
var base = getStorageBase(new FileStorageBaseId(baseId));
ret.add(new FileStorage(
new FileStorageId(storageId),
base,
type,
createDateTime,
path,
FileStorageState.parse(state),
description
));
}
}
} catch (SQLException e) {
e.printStackTrace();
}
return ret;
}
public void flagFileForDeletion(FileStorageId id) throws SQLException {
setFileStorageState(id, FileStorageState.DELETE);
}

View File

@ -1,10 +1,10 @@
package nu.marginalia.control;
import com.github.jknack.handlebars.Handlebars;
import com.github.jknack.handlebars.Helper;
import com.github.jknack.handlebars.Options;
import com.github.jknack.handlebars.*;
import nu.marginalia.renderer.config.HandlebarsConfigurator;
import java.io.IOException;
public class ControlHandlebarsConfigurator implements HandlebarsConfigurator {
@Override
public void configure(Handlebars handlebars) {

View File

@ -0,0 +1,38 @@
package nu.marginalia.control;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.renderer.RendererFactory;
import java.util.Map;
/** Wrapper for the renderer factory that adds global context
* with the nodes listing
*/
public class ControlRendererFactory {
private final RendererFactory rendererFactory;
private final NodeConfigurationService nodeConfigurationService;
@Inject
public ControlRendererFactory(RendererFactory rendererFactory,
NodeConfigurationService nodeConfigurationService)
{
this.rendererFactory = rendererFactory;
this.nodeConfigurationService = nodeConfigurationService;
}
@SneakyThrows
public Renderer renderer(String template) {
Map<String, Object> globalContext = Map.of(
"nodes", nodeConfigurationService.getAll()
);
var baseRenderer = rendererFactory.renderer(template);
return (context) -> baseRenderer.render(context, Map.of("global-context", globalContext));
}
public interface Renderer {
String render(Object context);
}
}

View File

@ -10,7 +10,6 @@ import nu.marginalia.control.node.svc.ControlFileStorageService;
import nu.marginalia.control.node.svc.ControlNodeService;
import nu.marginalia.control.sys.svc.*;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.service.server.*;
import org.slf4j.Logger;
@ -40,7 +39,7 @@ public class ControlService extends Service {
ServiceMonitors monitors,
HeartbeatService heartbeatService,
EventLogService eventLogService,
RendererFactory rendererFactory,
ControlRendererFactory rendererFactory,
StaticResources staticResources,
MessageQueueService messageQueueService,
ControlFileStorageService controlFileStorageService,

View File

@ -1,18 +1,16 @@
package nu.marginalia.control;
import jakarta.inject.Inject;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import spark.ResponseTransformer;
import java.io.IOException;
import java.util.Map;
public class RedirectControl {
private final MustacheRenderer<Object> renderer;
private final ControlRendererFactory.Renderer renderer;
@Inject
public RedirectControl(RendererFactory rendererFactory) throws IOException {
public RedirectControl(ControlRendererFactory rendererFactory) throws IOException {
renderer = rendererFactory.renderer("control/redirect-ok");
}

View File

@ -2,9 +2,9 @@ package nu.marginalia.control.app.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.Redirects;
import nu.marginalia.control.app.model.ApiKeyModel;
import nu.marginalia.renderer.RendererFactory;
import org.eclipse.jetty.util.StringUtil;
import spark.Request;
import spark.Response;
@ -20,11 +20,11 @@ import java.util.UUID;
public class ApiKeyService {
private final HikariDataSource dataSource;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
@Inject
public ApiKeyService(HikariDataSource dataSource,
RendererFactory rendererFactory
ControlRendererFactory rendererFactory
) {
this.dataSource = dataSource;
this.rendererFactory = rendererFactory;

View File

@ -2,10 +2,10 @@ package nu.marginalia.control.app.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.Redirects;
import nu.marginalia.control.app.model.BlacklistedDomainModel;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -22,12 +22,12 @@ import java.util.Objects;
public class ControlBlacklistService {
private final HikariDataSource dataSource;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public ControlBlacklistService(HikariDataSource dataSource,
RendererFactory rendererFactory) {
ControlRendererFactory rendererFactory) {
this.dataSource = dataSource;
this.rendererFactory = rendererFactory;
}

View File

@ -3,11 +3,11 @@ package nu.marginalia.control.app.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.Redirects;
import nu.marginalia.control.app.model.DomainComplaintCategory;
import nu.marginalia.control.app.model.DomainComplaintModel;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -26,14 +26,14 @@ import java.util.stream.Collectors;
*/
public class DomainComplaintService {
private final HikariDataSource dataSource;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
private final ControlBlacklistService blacklistService;
private final RandomExplorationService randomExplorationService;
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public DomainComplaintService(HikariDataSource dataSource,
RendererFactory rendererFactory,
ControlRendererFactory rendererFactory,
ControlBlacklistService blacklistService,
RandomExplorationService randomExplorationService
) {

View File

@ -3,8 +3,8 @@ package nu.marginalia.control.app.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.list.array.TIntArrayList;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.RendererFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
@ -19,12 +19,12 @@ import java.util.Objects;
public class RandomExplorationService {
private final HikariDataSource dataSource;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
@Inject
public RandomExplorationService(HikariDataSource dataSource,
RendererFactory rendererFactory
) {
ControlRendererFactory rendererFactory
) {
this.dataSource = dataSource;
this.rendererFactory = rendererFactory;
}
@ -33,6 +33,7 @@ public class RandomExplorationService {
var reviewRandomDomainsRenderer = rendererFactory.renderer("control/app/review-random-domains");
Spark.get("/public/review-random-domains", this::reviewRandomDomainsModel, reviewRandomDomainsRenderer::render);
Spark.post("/public/review-random-domains", this::reviewRandomDomainsAction);
}

View File

@ -2,12 +2,13 @@ package nu.marginalia.control.app.svc;
import com.google.inject.Inject;
import nu.marginalia.client.Context;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.query.model.QueryParams;
import nu.marginalia.renderer.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -20,19 +21,21 @@ import java.util.Objects;
public class SearchToBanService {
private final ControlBlacklistService blacklistService;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
private final QueryClient queryClient;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final NodeConfigurationService nodeConfigurationService;
@Inject
public SearchToBanService(ControlBlacklistService blacklistService,
RendererFactory rendererFactory,
QueryClient queryClient)
ControlRendererFactory rendererFactory,
QueryClient queryClient, NodeConfigurationService nodeConfigurationService)
{
this.blacklistService = blacklistService;
this.rendererFactory = rendererFactory;
this.queryClient = queryClient;
this.nodeConfigurationService = nodeConfigurationService;
}
public void register() throws IOException {

View File

@ -5,20 +5,27 @@ import com.google.inject.Singleton;
import nu.marginalia.client.Context;
import nu.marginalia.control.RedirectControl;
import nu.marginalia.executor.client.ExecutorClient;
import nu.marginalia.executor.model.load.LoadParameters;
import nu.marginalia.index.client.IndexClient;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageState;
import nu.marginalia.storage.model.FileStorageType;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;
@Singleton
public class ControlNodeActionsService {
private final IndexClient indexClient;
private final RedirectControl redirectControl;
private final FileStorageService fileStorageService;
private final ServiceEventLog eventLog;
private final ExecutorClient executorClient;
@ -26,12 +33,14 @@ public class ControlNodeActionsService {
public ControlNodeActionsService(ExecutorClient executorClient,
IndexClient indexClient,
RedirectControl redirectControl,
FileStorageService fileStorageService,
ServiceEventLog eventLog)
{
this.executorClient = executorClient;
this.indexClient = indexClient;
this.redirectControl = redirectControl;
this.fileStorageService = fileStorageService;
this.eventLog = eventLog;
}
@ -52,15 +61,24 @@ public class ControlNodeActionsService {
Spark.post("/public/nodes/:node/actions/sideload-stackexchange", this::sideloadStackexchange,
redirectControl.renderRedirectAcknowledgement("Sideloading", "..")
);
Spark.post("/public/nodes/:id/actions/new-crawl", this::triggerNewCrawl,
redirectControl.renderRedirectAcknowledgement("Crawling", ".."));
Spark.post("/public/nodes/:id/actions/recrawl", this::triggerAutoRecrawl,
redirectControl.renderRedirectAcknowledgement("Recrawling", ".."));
Spark.post("/public/nodes/:id/actions/process", this::triggerAutoProcess,
redirectControl.renderRedirectAcknowledgement("Processing", "..")
);
Spark.post("/public/nodes/:id/actions/load", this::triggerLoadSelected,
redirectControl.renderRedirectAcknowledgement("Loading", "..")
);
Spark.post("/public/nodes/:id/actions/restore-backup", this::triggerRestoreBackup,
redirectControl.renderRedirectAcknowledgement("Restoring", "..")
);
}
public Object sideloadEncyclopedia(Request request, Response response) throws Exception {
Path sourcePath = Path.of(request.queryParams("source"));
if (!Files.exists(sourcePath)) {
Spark.halt(404);
return "No such file " + sourcePath;
}
String baseUrl = request.queryParams("baseUrl");
final int nodeId = Integer.parseInt(request.params("node"));
@ -75,11 +93,6 @@ public class ControlNodeActionsService {
public Object sideloadDirtree(Request request, Response response) throws Exception {
Path sourcePath = Path.of(request.queryParams("source"));
if (!Files.exists(sourcePath)) {
Spark.halt(404);
return "No such file " + sourcePath;
}
final int nodeId = Integer.parseInt(request.params("node"));
eventLog.logEvent("USER-ACTION", "SIDELOAD DIRTREE " + nodeId);
@ -92,11 +105,6 @@ public class ControlNodeActionsService {
public Object sideloadWarc(Request request, Response response) throws Exception {
Path sourcePath = Path.of(request.queryParams("source"));
if (!Files.exists(sourcePath)) {
Spark.halt(404);
return "No such file " + sourcePath;
}
final int nodeId = Integer.parseInt(request.params("node"));
eventLog.logEvent("USER-ACTION", "SIDELOAD WARC " + nodeId);
@ -108,11 +116,6 @@ public class ControlNodeActionsService {
public Object sideloadStackexchange(Request request, Response response) throws Exception {
Path sourcePath = Path.of(request.queryParams("source"));
if (!Files.exists(sourcePath)) {
Spark.halt(404);
return "No such file " + sourcePath;
}
final int nodeId = Integer.parseInt(request.params("node"));
eventLog.logEvent("USER-ACTION", "SIDELOAD STACKEXCHANGE " + nodeId);
@ -127,5 +130,89 @@ public class ControlNodeActionsService {
return "";
}
private Object triggerAutoRecrawl(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
var toCrawl = FileStorageId.parse(request.queryParams("source"));
changeActiveStorage(nodeId, FileStorageType.CRAWL_DATA, toCrawl);
executorClient.triggerRecrawl(
Context.fromRequest(request),
nodeId,
toCrawl
);
return "";
}
private Object triggerNewCrawl(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
var toCrawl = FileStorageId.parse(request.queryParams("source"));
changeActiveStorage(nodeId, FileStorageType.CRAWL_SPEC, toCrawl);
executorClient.triggerCrawl(
Context.fromRequest(request),
nodeId,
toCrawl
);
return "";
}
private Object triggerAutoProcess(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
var toProcess = FileStorageId.parse(request.queryParams("source"));
changeActiveStorage(nodeId, FileStorageType.PROCESSED_DATA, toProcess);
executorClient.triggerConvertAndLoad(Context.fromRequest(request),
nodeId,
toProcess);
return "";
}
private Object triggerLoadSelected(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
String[] values = request.queryParamsValues("source");
List<FileStorageId> ids = Arrays.stream(values).map(FileStorageId::parse).toList();
changeActiveStorage(nodeId, FileStorageType.PROCESSED_DATA, ids.toArray(new FileStorageId[0]));
executorClient.loadProcessedData(Context.fromRequest(request),
nodeId,
new LoadParameters(ids)
);
return "";
}
private Object triggerRestoreBackup(Request request, Response response) {
int nodeId = Integer.parseInt(request.params("id"));
executorClient.restoreBackup(Context.fromRequest(request), nodeId, request.queryParams("source"));
return "";
}
/** Change the active storage for a node of a particular type. */
private void changeActiveStorage(int nodeId, FileStorageType type, FileStorageId... newActiveStorage) throws SQLException {
// It is desirable to have the active storage set to reflect which storage was last used
// for a particular node.
// Ideally we'd do this in a transaction, but as this is a reminder for the user, and not
// used for any actual processing, we don't need to be that strict.
for (var oldActiveStorage : fileStorageService.getActiveFileStorages(nodeId, type)) {
fileStorageService.setFileStorageState(oldActiveStorage, FileStorageState.UNSET);
}
for (var id : newActiveStorage) {
fileStorageService.setFileStorageState(id, FileStorageState.ACTIVE);
}
}
}

View File

@ -5,6 +5,7 @@ import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.client.Context;
import nu.marginalia.client.ServiceMonitors;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.RedirectControl;
import nu.marginalia.control.Redirects;
import nu.marginalia.control.node.model.*;
@ -15,8 +16,6 @@ import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.nodecfg.model.NodeConfiguration;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.executor.client.ExecutorClient;
import nu.marginalia.executor.model.load.LoadParameters;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.service.id.ServiceId;
import nu.marginalia.storage.model.*;
import org.slf4j.Logger;
@ -32,7 +31,7 @@ import java.util.*;
public class ControlNodeService {
private final FileStorageService fileStorageService;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
private final EventLogService eventLogService;
private final HeartbeatService heartbeatService;
private final ExecutorClient executorClient;
@ -46,7 +45,7 @@ public class ControlNodeService {
@Inject
public ControlNodeService(
FileStorageService fileStorageService,
RendererFactory rendererFactory,
ControlRendererFactory rendererFactory,
EventLogService eventLogService,
HeartbeatService heartbeatService,
ExecutorClient executorClient,
@ -100,23 +99,9 @@ public class ControlNodeService {
Spark.get("/public/nodes/:id/configuration", this::nodeConfigModel, configRenderer::render);
Spark.post("/public/nodes/:id/configuration", this::updateConfigModel, configRenderer::render);
Spark.post("/public/nodes/:id/storage/recrawl-auto", this::triggerAutoRecrawl,
redirectControl.renderRedirectAcknowledgement("Recrawling", ".."));
Spark.post("/public/nodes/:id/storage/process-auto", this::triggerAutoProcess,
redirectControl.renderRedirectAcknowledgement("Processing", "..")
);
Spark.post("/public/nodes/:id/storage/load-selected", this::triggerLoadSelected,
redirectControl.renderRedirectAcknowledgement("Loading", "..")
);
Spark.post("/public/nodes/:id/storage/crawl/:fid", this::triggerCrawl,
redirectControl.renderRedirectAcknowledgement("Crawling", "..")
);
Spark.post("/public/nodes/:id/storage/reset-state/:fid", this::resetState,
redirectControl.renderRedirectAcknowledgement("Restoring", "..")
);
Spark.post("/public/nodes/:id/storage/backup-restore/:fid", this::triggerRestoreBackup,
redirectControl.renderRedirectAcknowledgement("Restoring", "..")
);
Spark.post("/public/nodes/:id/actions/export-data", this::exportData,
redirectControl.renderRedirectAcknowledgement("Exporting", "../storage/exports")
);
@ -159,16 +144,9 @@ public class ControlNodeService {
int nextId = configs.stream().mapToInt(NodeConfiguration::node).map(i -> i+1).max().orElse(1);
return Map.of("nodes", nodeConfigurationService.getAll(),
"nextNodeId", nextId);
}
private Object triggerCrawl(Request request, Response response) {
int nodeId = Integer.parseInt(request.params("id"));
executorClient.triggerCrawl(Context.fromRequest(request), nodeId, request.params("fid"));
return "";
return Map.of(
"nodes", nodeConfigurationService.getAll(),
"nextNodeId", nextId);
}
private Object stopProcess(Request request, Response response) {
@ -180,14 +158,6 @@ public class ControlNodeService {
return "";
}
private Object triggerRestoreBackup(Request request, Response response) {
int nodeId = Integer.parseInt(request.params("id"));
executorClient.restoreBackup(Context.fromRequest(request), nodeId, request.params("fid"));
return "";
}
@SneakyThrows
public String redirectToOverview(int nodeId) {
return new Redirects.HtmlRedirect("/nodes/"+nodeId).render(null);
@ -208,59 +178,21 @@ public class ControlNodeService {
return "";
}
private Object newSpecsModel(Request request, Response response) {
private Object newSpecsModel(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
return Map.of(
"tab", Map.of("storage", true),
"node", new IndexNode(nodeId),
"view", Map.of("specs", true)
);
}
private Object triggerAutoRecrawl(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
var toCrawl = fileStorageService.getOnlyActiveFileStorage(nodeId, FileStorageType.CRAWL_DATA);
executorClient.triggerRecrawl(
Context.fromRequest(request),
nodeId,
toCrawl.orElseThrow(AssertionError::new)
);
return "";
}
private Object triggerAutoProcess(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
var toConvert = fileStorageService.getOnlyActiveFileStorage(nodeId, FileStorageType.CRAWL_DATA);
executorClient.triggerConvertAndLoad(Context.fromRequest(request),
nodeId,
toConvert.orElseThrow(AssertionError::new));
return "";
}
private Object triggerLoadSelected(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
var toLoadStorages = fileStorageService.getActiveFileStorages(nodeId, FileStorageType.PROCESSED_DATA);
executorClient.loadProcessedData(Context.fromRequest(request),
nodeId,
new LoadParameters(toLoadStorages)
);
return "";
}
private Object nodeActorsModel(Request request, Response response) {
private Object nodeActorsModel(Request request, Response response) throws SQLException {
int nodeId = Integer.parseInt(request.params("id"));
return Map.of(
"tab", Map.of("actors", true),
"node", new IndexNode(nodeId),
"actors", executorClient.getActorStates(Context.fromRequest(request), nodeId).states()
);
@ -270,11 +202,18 @@ public class ControlNodeService {
int nodeId = Integer.parseInt(request.params("id"));
return Map.of(
"tab", Map.of("actions", true),
"node", new IndexNode(nodeId),
"currentCrawlData",
fileStorageService.getStorage(fileStorageService.getActiveFileStorages(nodeId, FileStorageType.CRAWL_DATA)),
"currentProcessData",
fileStorageService.getStorage(fileStorageService.getActiveFileStorages(nodeId, FileStorageType.PROCESSED_DATA))
"view", Map.of(request.queryParams("view"), true),
"uploadDirContents", executorClient.listSideloadDir(Context.fromRequest(request), nodeId),
"allBackups",
fileStorageService.getEachFileStorage(nodeId, FileStorageType.BACKUP),
"allCrawlData",
fileStorageService.getEachFileStorage(nodeId, FileStorageType.CRAWL_DATA),
"allProcessedData",
fileStorageService.getEachFileStorage(nodeId, FileStorageType.PROCESSED_DATA),
"allCrawlSpecs",
fileStorageService.getEachFileStorage(nodeId, FileStorageType.CRAWL_SPEC)
);
}
@ -282,6 +221,7 @@ public class ControlNodeService {
int nodeId = Integer.parseInt(request.params("id"));
return Map.of(
"tab", Map.of("storage", true),
"view", Map.of("conf", true),
"node", new IndexNode(nodeId),
"storagebase", getStorageBaseList(nodeId)
@ -303,6 +243,7 @@ public class ControlNodeService {
};
return Map.of(
"tab", Map.of("storage", true),
"view", Map.of(view, true),
"node", new IndexNode(nodeId),
"storage", makeFileStorageBaseWithStorage(getFileStorageIds(type, nodeId))
@ -323,9 +264,11 @@ public class ControlNodeService {
};
return Map.of(
"tab", Map.of("storage", true),
"view", Map.of(view, true),
"node", new IndexNode(nodeId),
"storage", storage);
"storage", storage
);
}
private Object nodeConfigModel(Request request, Response response) throws SQLException {
@ -340,6 +283,7 @@ public class ControlNodeService {
}
return Map.of(
"tab", Map.of("config", true),
"node", new IndexNode(nodeId),
"config", Objects.requireNonNull(nodeConfigurationService.get(nodeId), "Failed to fetch configuration"),
"storage", storage);
@ -379,7 +323,8 @@ public class ControlNodeService {
"status", getStatus(config),
"events", getEvents(nodeId),
"processes", heartbeatService.getProcessHeartbeatsForNode(nodeId),
"jobs", heartbeatService.getTaskHeartbeatsForNode(nodeId)
"jobs", heartbeatService.getTaskHeartbeatsForNode(nodeId),
"tab", Map.of("overview", true)
);
}

View File

@ -3,6 +3,7 @@ package nu.marginalia.control.sys.svc;
import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.client.Context;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.Redirects;
import nu.marginalia.control.actor.ControlActor;
import nu.marginalia.control.actor.ControlActorService;
@ -11,8 +12,6 @@ import nu.marginalia.executor.client.ExecutorClient;
import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.nodecfg.model.NodeConfiguration;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.id.ServiceId;
import nu.marginalia.storage.FileStorageService;
@ -27,7 +26,7 @@ public class ControlSysActionsService {
private final MqOutbox apiOutbox;
private final DomainTypes domainTypes;
private final ServiceEventLog eventLog;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
private final ControlActorService controlActorService;
private final NodeConfigurationService nodeConfigurationService;
private final FileStorageService fileStorageService;
@ -37,7 +36,7 @@ public class ControlSysActionsService {
public ControlSysActionsService(MessageQueueFactory mqFactory,
DomainTypes domainTypes,
ServiceEventLog eventLog,
RendererFactory rendererFactory,
ControlRendererFactory rendererFactory,
ControlActorService controlActorService,
NodeConfigurationService nodeConfigurationService,
FileStorageService fileStorageService,

View File

@ -3,8 +3,8 @@ package nu.marginalia.control.sys.svc;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.db.DomainTypes;
import nu.marginalia.renderer.RendererFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
@ -17,12 +17,12 @@ import java.util.Map;
public class DataSetsService {
private final HikariDataSource dataSource;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
private final DomainTypes domainTypes;
@Inject
public DataSetsService(HikariDataSource dataSource,
RendererFactory rendererFactory,
ControlRendererFactory rendererFactory,
DomainTypes domainTypes) {
this.dataSource = dataSource;
this.rendererFactory = rendererFactory;

View File

@ -3,11 +3,11 @@ package nu.marginalia.control.sys.svc;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.control.ControlRendererFactory;
import nu.marginalia.control.Redirects;
import nu.marginalia.control.sys.model.MessageQueueEntry;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.renderer.RendererFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
@ -24,12 +24,12 @@ import java.util.Optional;
public class MessageQueueService {
private final HikariDataSource dataSource;
private final RendererFactory rendererFactory;
private final ControlRendererFactory rendererFactory;
private final MqPersistence persistence;
@Inject
public MessageQueueService(HikariDataSource dataSource,
RendererFactory rendererFactory,
ControlRendererFactory rendererFactory,
MqPersistence persistence) {
this.dataSource = dataSource;
this.rendererFactory = rendererFactory;

View File

@ -0,0 +1,13 @@
<h1 class="my-3">Export Data</h1>
This will export database data: Domains, blacklist and domain links.
<form method="post" action="actions/export-data" onsubmit="return confirm('Confirm export')">
<div class="my-3 py-3">
<div class="row">
<div class="col">
<button type="submit" class="btn btn-primary">Export</button>
</div>
</div>
</div>
</form>

View File

@ -0,0 +1,38 @@
<h1 class="my-3">Load</h1>
{{#unless allProcessedData}}
No <a href="storage/processed">processed data</a> is currently set as active for this node. Loading is not possible.
{{/unless}}
{{#if allProcessedData}}
<div class="my-3 p-3 border bg-light">
This will load <a href="storage/processed">processed data</a> on Node {{node.id}}. This isn't typically necessary for crawled data,
but is needed for sideloaded data. Select the processed data to load below.
</div>
<form method="post" action="actions/load">
<table class="table">
<tr>
<th>Use</th>
<th>Path</th>
<th>Description</th>
<th>Details</th>
</tr>
{{#each allProcessedData}}
<tr>
<td><input {{#if active}}checked{{/if}} class="form-check-input" type="checkbox" name="source" id="{{id}}" value="{{id}}"></td>
<td><label for="{{id}}" class="form-check-label" >{{path}}</label></td>
<td>{{description}}</td>
<td><a href="/nodes/{{node}}/storage/details?fid={{id}}">[Details]</a></td>
</tr>
{{/each}}
</table>
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm loading of the selected data on node {{node.id}}');"
type="submit">
Trigger Loading</button>
</form>
{{/if}}

View File

@ -0,0 +1,53 @@
<h1 class="my-3">New Crawl</h1>
{{#unless allCrawlSpecs}}
No crawling specifications have been created. You must create a crawl spec before you can crawl.
<a href="/nodes/{{node.id}}/storage/new-specs">create a crawl spec</a>.
{{/unless}}
{{#if allCrawlSpecs}}
<div class="my-3 p-3 border bg-light">
This will perform a new crawl on node {{node.id}} based on the crawl spec you select below.
Additional specifications can be created <a href="/nodes/{{node.id}}/storage/new-specs">with this form</a>.
</div>
<div class="my-3 p-3 border text-danger">
<p><strong>IMPORTANT!</strong> Be sure you've read and understood the
<a href="https://github.com/MarginaliaSearch/MarginaliaSearch/blob/master/doc/crawling.md">crawling documentation</a>
before you begin a crawl. You will be accessing real servers from your connection, and you may end up on IP greylists
that temporarily block your access to those servers for up to a few weeks; on rare occasions permanently. The crawler
is generally polite and well-behaved, but it is still a crawler, and it will be accessing a lot of pages.
</p>
<p>
You run the risk of getting in trouble with your ISP, the law, your partner, your landlord, and/or your parents;
depending on terms of service, jurisdiction, marital status, living conditions and age.
<p>
</p>
This action is performed at your own risk.
</p>
</div>
<form method="post" action="actions/new-crawl">
<table class="table">
<tr>
<th>Use</th>
<th>Path</th>
<th>Description</th>
<th>Details</th>
</tr>
{{#each allCrawlSpecs}}
<tr>
<td><input {{#if active}}checked{{/if}} class="form-check-input" type="checkbox" name="source" id="{{id}}" value="{{id}}"></td>
<td><label for="{{id}}" class="form-check-label" >{{path}}</label></td>
<td>{{description}}</td>
<td><a href="/nodes/{{node}}/storage/details?fid={{id}}">[Details]</a></td>
</tr>
{{/each}}
</table>
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm crawling on node {{node.id}}');"
type="submit">
Trigger New Crawl</button>
</form>
{{/if}}

View File

@ -0,0 +1,39 @@
<h1 class="my-3">Process Data</h1>
<div class="my-3 p-3 border bg-light">
<p>
This will reprocess a set of crawl data on Node {{node.id}}.
</p>
<p>
The reprocessing will extract keywords, titles, and other metadata from the raw crawl data
and generate parquet files that can be loaded into the index. This loading will be done automatically,
and unless the node is <a href="/nodes/{{node.id}}/configuration">configured</a> to keep the processed data,
the processed data will be deleted after loading to save disk space.
</p>
</div>
<form method="post" action="actions/process">
<p>Select the crawl data to use:</p>
<table class="table">
<tr>
<th>Use</th>
<th>Path</th>
<th>Description</th>
<th>Details</th>
</tr>
{{#each allCrawlData}}
<tr>
<td><input {{#if active}}checked{{/if}} class="form-check-input" type="radio" name="source" id="{{id}}" value="{{id}}"></td>
<td><label for="{{id}}" class="form-check-label" >{{path}}</label></td>
<td>{{description}}</td>
<td><a href="/nodes/{{node}}/storage/details?fid={{id}}">[Details]</td>
</tr>
{{/each}}
</table>
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm reprocessing of the selected data on node {{node.id}}');"
type="submit">
Trigger Reprocessing</button>
</form>

View File

@ -0,0 +1,48 @@
<h1 class="my-3">Recrawl</h1>
{{#unless allCrawlData}}
<p>
No <a href="storage/crawl">crawl data</a> currently exists on this node. Recrawling is not possible.
First, create a crawl spec and do an initial crawl. Then, you can recrawl.
</p>
{{/unless}}
{{#if allCrawlData}}
<div class="my-3 p-3 border bg-light">
<p>
This will perform a re-crawl on node {{node.id}}.
</p><p>
A re-crawl is a smart crawl that uses the existing crawl data
as a starting point. It will use the existing crawl data to perform conditional GETs on the domains in the
crawl spec. If the document has changed, it will be re-crawled. If it has not changed, it will be skipped,
and the previous data will be retained. This is both faster and easier on the target server.
</p>
</div>
<form method="post" action="actions/recrawl">
<p>Select the crawl data to use:</p>
<table class="table">
<tr>
<th>Use</th>
<th>Path</th>
<th>Description</th>
<th>Details</th>
</tr>
{{#each allCrawlData}}
<tr>
<td><input {{#if active}}checked{{/if}} class="form-check-input" type="radio" name="source" id="{{id}}" value="{{id}}"></td>
<td><label for="{{id}}" class="form-check-label" >{{path}}</label></td>
<td>{{description}}</td>
<td><a href="/nodes/{{node}}/storage/details?fid={{id}}">[Details]</a></td>
</tr>
{{/each}}
</table>
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm recrawl of the selected data on node {{node.id}}');"
type="submit">
Trigger Recrawl</button>
</form>
{{/if}}

View File

@ -0,0 +1,7 @@
<h1 class="my-3">Repartition</h1>
<p>This will recalculate the rankings and search sets for the index</p>
<form method="post" action="actions/repartition-index" onsubmit="return confirm('Confirm repartition')">
<button type="submit" class="btn btn-primary">Trigger Repartitioning</button>
</form>

View File

@ -0,0 +1,38 @@
<h1 class="my-3">Load</h1>
{{#unless allBackups}}
No <a href="storage/backups">processed data</a> exist on this node. Backups are created when loading data into
the index.
{{/unless}}
{{#if allBackups}}
<div class="my-3 p-3 border bg-light">
This will restore an index backup on Node {{node.id}}. The index will need to be re-generated after this operation,
this happens automatically.
</div>
<form method="post" action="actions/restore-backup">
<table class="table">
<tr>
<th>Use</th>
<th>Path</th>
<th>Description</th>
<th>Details</th>
</tr>
{{#each allBackups}}
<tr>
<td><input {{#if active}}checked{{/if}} class="form-check-input" type="radio" name="source" id="{{id}}" value="{{id}}"></td>
<td><label for="{{id}}" class="form-check-label" >{{path}}</label></td>
<td>{{description}}</td>
<td><a href="/nodes/{{node}}/storage/details?fid={{id}}">[Details]</a></td>
</tr>
{{/each}}
</table>
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm restoring selected data on node {{node.id}}');"
type="submit">
Trigger Restoration</button>
</form>
{{/if}}

View File

@ -0,0 +1,55 @@
<h1 class="my-3">Sideload Encyclopedia</h1>
<div class="my-3 p-3 border bg-light">
<p>This will sideload a pre-converted MediaWiki-style OpenZim data set.
See the <a href="https://github.com/MarginaliaSearch/MarginaliaSearch/blob/master/doc/sideloading-howto.md">sideloading howto</a>
for instructions how to produce this file. </p>
<p>Place an articles.db file in the upload directory on the server, and select it from the list
below. </p>
</div>
<form method="post" action="actions/sideload-encyclopedia" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<table class="table">
<th></th><th>Filename</th><th>Size</th><th>Last Modified</th>
{{#each uploadDirContents.items}}
<tr>
<td><input {{#if directory}}disabled{{/if}} class="form-check-input" type="radio" name="source" id="{{name}}" value="{{name}}"></td>
<td {{#if directory}}class="text-muted"{{/if}}>
<label class="form-check-label" for="inlineRadio1">{{name}}{{#if directory}}/{{/if}}</label>
</td>
<td>{{#unless directory}}{{size}}{{/unless}}</td>
<td>{{lastModifiedTime}}</td>
</tr>
{{/each}}
{{#unless uploadDirContents.items}}
<tr>
<td colspan="3">Nothing found in upload directory</td>
</tr>
{{/unless}}
</table>
<p>
<small class="text-muted">
The upload directory is typically mounted to /uploads on the server. The external
directory is typically something like index-{{node.id}}/uploads.
</small>
</p>
<label for="baseUrl" class="form-label">Base URL</label>
<div class="col mb-3">
<input id="baseUrl" name="baseUrl" class="form-control" value="https://en.wikipedia.org/wiki/">
</div>
<small class="text-muted">
The base URL is used to construct the URL for each article. The base URL is prepended to the article path,
and the result is used as the URL for the article. The default value will point to English Wikipedia.
</small>
<div class="row mt-3">
<div class="col">
<button {{#unless uploadDirContents.items}}disabled{{/unless}} type="submit" class="btn btn-primary">Sideload Encyclopedia</button>
</div>
</div>
</div>
</form>

View File

@ -0,0 +1,41 @@
<h1 class="my-3">Sideload Stackexchange</h1>
<div class="my-3 p-3 border bg-light">
This will load a set of pre-converted stackexchange .db files
<p>
See the <a href="https://github.com/MarginaliaSearch/MarginaliaSearch/blob/master/doc/sideloading-howto.md">sideloading howto</a>
for instructions how to produce this file. </p>
</div>
<form method="post" action="actions/sideload-stackexchange" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<table class="table">
<th></th><th>Filename</th><th>Size</th><th>Last Modified</th>
{{#each uploadDirContents.items}}
<tr>
<td><input {{#unless directory}}disabled{{/unless}} class="form-check-input" type="radio" name="source" id="{{name}}" value="{{name}}"></td>
<td {{#unless directory}}class="text-muted"{{/unless}}>
<label class="form-check-label" for="inlineRadio1">{{name}}{{#if directory}}/{{/if}}</label>
</td>
<td>{{#unless directory}}{{size}}{{/unless}}</td>
<td>{{lastModifiedTime}}</td>
</tr>
{{/each}}
{{#unless uploadDirContents.items}}
<tr>
<td colspan="3">Nothing found in upload directory</td>
</tr>
{{/unless}}
</table>
<p>
<small class="text-muted">
The upload directory is typically mounted to /uploads on the server. The external
directory is typically something like index-{{node.id}}/uploads.
</small>
</p>
<button type="submit" {{#unless uploadDirContents.items}}disabled{{/unless}} class="btn btn-primary">Sideload Stackexchange</button>
</div>
</form>

View File

@ -0,0 +1,39 @@
<h1 class="my-3">Sideload WARC</h1>
<div class="my-3 p-3 border bg-light">
This will load HTML from a directory structure containing WARC files, one per domain.
A warc export can be created using e.g. wget: <p>
<code>$ wget -r --warc-file=www.marginalia.nu www.marginalia.nu</code>
</div>
<form method="post" action="actions/sideload-warc" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<table class="table">
<th></th><th>Filename</th><th>Size</th><th>Last Modified</th>
{{#each uploadDirContents.items}}
<tr>
<td><input {{#unless directory}}disabled{{/unless}} class="form-check-input" type="radio" name="source" id="{{name}}" value="{{name}}"></td>
<td {{#unless directory}}class="text-muted"{{/unless}}>
<label class="form-check-label" for="inlineRadio1">{{name}}{{#if directory}}/{{/if}}</label>
</td>
<td>{{#unless directory}}{{size}}{{/unless}}</td>
<td>{{lastModifiedTime}}</td>
</tr>
{{/each}}
{{#unless uploadDirContents.items}}
<tr>
<td colspan="3">Nothing found in upload directory</td>
</tr>
{{/unless}}
</table>
<p>
<small class="text-muted">
The upload directory is typically mounted to /uploads on the server. The external
directory is typically something like index-{{node.id}}/uploads.
</small>
</p>
<button type="submit" {{#unless uploadDirContents.items}}disabled{{/unless}} class="btn btn-primary">Sideload WARCs</button>
</div>
</form>

View File

@ -7,352 +7,20 @@
<div class="container">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
{{> control/node/partial-node-nav }}
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link active" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
<div class="accordion mt-1" id="accordionActions">
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseTriggerCrawl"
aria-expanded="false"
aria-controls="collapseTriggerCrawl">
Trigger Crawl
</button>
</h2>
<div id="collapseTriggerCrawl" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
{{#unless currentCrawlData}}
No <a href="storage/crawl">crawl data</a> is currently set as active for this node. A crawl is not possible.
{{/unless}}
{{#if currentCrawlData}}
This will trigger a crawl on Node {{node.id}}. This <a href="storage/crawl">crawl data</a> will be loaded:
<table class="table">
<tr>
<th>Date</th>
<th>Path</th>
<th>Description</th>
</tr>
{{#each currentCrawlData}}
<tr>
<td>{{date}}</td>
<td><a href="storage/details?fid={{id}}">{{path}}</a></td>
<td>{{description}}</td>
</tr>
{{/each}}
</table>
<form method="post" action="storage/recrawl-auto">
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm recrawl of the current ACTIVE crawl set on node {{node.id}}');"
type="submit">
Trigger Recrawl</button>
</form>
{{/if}}
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseProcessCrawlData"
aria-expanded="false"
aria-controls="collapseProcessCrawlData">
Process Crawl Data
</button>
</h2>
<div id="collapseProcessCrawlData" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
{{#unless currentCrawlData}}
No <a href="storage/crawl">crawl data</a> is currently set as active for this node. Processing is not possible.
{{/unless}}
{{#if currentCrawlData}}
This will reprocess crawl data on Node {{node.id}}. This <a href="storage/crawl">crawl data</a> will be loaded:
<table class="table">
<tr>
<th>Date</th>
<th>Path</th>
<th>Description</th>
</tr>
{{#each currentCrawlData}}
<tr>
<td>{{date}}</td>
<td><a href="storage/details?fid={{id}}">{{path}}</a></td>
<td>{{description}}</td>
</tr>
{{/each}}
</table>
<form method="post" action="storage/process-auto">
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm reprocessing of the current ACTIVE data on node {{node.id}}');"
type="submit">
Trigger Reprocessing</button>
</form>
{{/if}}
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseLoadData"
aria-expanded="false"
aria-controls="collapseLoadData">
Load Processed Data
</button>
</h2>
<div id="collapseLoadData" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
{{#unless currentProcessData}}
No <a href="storage/processed">processed data</a> is currently set as active for this node. Loading is not possible.
{{/unless}}
{{#if currentProcessData}}
This will load processed data on Node {{node.id}}. This <a href="storage/processed">processed data</a> will be loaded:
<table class="table">
<tr>
<th>Date</th>
<th>Path</th>
<th>Description</th>
</tr>
{{#each currentProcessData}}
<tr>
<td>{{date}}</td>
<td><a href="storage/details?fid={{id}}">{{path}}</a></td>
<td>{{description}}</td>
</tr>
{{/each}}
</table>
<form method="post" action="storage/load-selected">
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm loading of the current ACTIVE data on node {{node.id}}');"
type="submit">
Trigger Loading</button>
</form>
{{/if}}
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseRepartition"
aria-expanded="true"
aria-controls="collapseRepartition">
Repartition Index
</button>
</h2>
<div id="collapseRepartition" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
<p>This will recalculate the rankings and search sets for the index</p>
<form method="post" action="actions/repartition-index" onsubmit="return confirm('Confirm repartition')">
<button type="submit" class="btn btn-primary">Trigger Repartitioning</button>
</form>
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseSideloadEncyclopedia"
aria-expanded="false"
aria-controls="collapseSideloadEncyclopedia">
Sideload Encyclopedia
</button>
</h2>
<div id="collapseSideloadEncyclopedia" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
This will load pre-digested encyclopedia data from an <tt>encyclopedia.marginalia.nu</tt>-style database.
<form method="post" action="actions/sideload-encyclopedia" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<label for="baseUrl" class="form-label">Base URL</label>
<div class="col mb-3">
<input id="baseUrl" name="baseUrl" class="form-control" value="https://en.wikipedia.org/wiki/">
</div>
<label for="source" class="form-label">Path to articles.db accessible from the node on the server</label>
<div class="row">
<div class="col">
<input id="source" name="source" class="form-control" value="">
</div>
<div class="col">
<button type="submit" class="btn btn-primary">Sideload Encyclopedia</button>
</div>
</div>
</div>
</form>
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseSideloadStackexchange"
aria-expanded="false"
aria-controls="collapseSideloadStackexchange">
Sideload Stackexchange
</button>
</h2>
<div id="collapseSideloadStackexchange" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
This will load a set of pre-converted stackexchange .db files
<form method="post" action="actions/sideload-stackexchange" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<label for="source" class="form-label">Directory with .db files accessible from the node on the server</label>
<div class="row">
<div class="col">
<input id="source" name="source" class="form-control" value="">
</div>
<div class="col">
<button type="submit" class="btn btn-primary">Sideload Stackexchange</button>
</div>
</div>
</div>
</form>
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseSideloadDirtree"
aria-expanded="false"
aria-controls="collapseSideloadDirtree">
Sideload Dirtree
</button>
</h2>
<div id="collapseSideloadDirtree" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
This will load HTML from a directory structure as specified by a YAML file.
<form method="post" action="actions/sideload-dirtree" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<label for="source" class="form-label">Path yaml file location on server</label>
<div class="row">
<div class="col">
<input id="source" name="source" class="form-control" value="">
</div>
<div class="col">
<button type="submit" class="btn btn-primary">Sideload Dirtree</button>
</div>
</div>
</div>
</form>
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseSideloadWarc"
aria-expanded="false"
aria-controls="collapseSideloadWarc">
Sideload WARC Files
</button>
</h2>
<div id="collapseSideloadWarc" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
This will load HTML from a directory structure containing WARC files, one per domain.
<form method="post" action="actions/sideload-warc" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<label for="source" class="form-label">Path WARC file location on server</label>
<div class="row">
<div class="col">
<input id="source" name="source" class="form-control" value="">
</div>
<div class="col">
<button type="submit" class="btn btn-primary">Sideload WARC Files</button>
</div>
</div>
</div>
</form>
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseExportData"
aria-expanded="false"
aria-controls="collapseExportData">
Export Database Data
</button>
</h2>
<div id="collapseExportData" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
This will export database data: Domains, blacklist and domain links.
<form method="post" action="actions/export-data" onsubmit="return confirm('Confirm export')">
<div class="my-3 py-3">
<div class="row">
<div class="col">
<button type="submit" class="btn btn-primary">Export</button>
</div>
</div>
</div>
</form>
</div>
</div>
</div>
{{#if view.new-crawl}} {{> control/node/actions/partial-new-crawl }} {{/if}}
{{#if view.re-crawl}} {{> control/node/actions/partial-recrawl }} {{/if}}
{{#if view.process}} {{> control/node/actions/partial-process }} {{/if}}
{{#if view.load}} {{> control/node/actions/partial-load }} {{/if}}
{{#if view.repartition}} {{> control/node/actions/partial-repartition }} {{/if}}
{{#if view.sideload-encyclopedia}} {{> control/node/actions/partial-sideload-encyclopedia }} {{/if}}
{{#if view.sideload-stackexchange}} {{> control/node/actions/partial-sideload-stackexchange }} {{/if}}
{{#if view.sideload-warc}} {{> control/node/actions/partial-sideload-warc }} {{/if}}
{{#if view.export-data}} {{> control/node/actions/partial-export-data }} {{/if}}
{{#if view.restore-backup}} {{> control/node/actions/partial-restore-backup }} {{/if}}
<div class="mt-10">&nbsp;</div>
</div>
</body>
{{> control/partials/foot-includes }}
</html>

View File

@ -7,31 +7,7 @@
<div class="container">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link active" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
{{> control/node/partial-node-nav }}
<div class="mt-2">
{{> control/partials/actors-table }}

View File

@ -7,31 +7,7 @@
<div class="container">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link active" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
{{> control/node/partial-node-nav }}
<h1 class="my-5">Node Configuration</h1>

View File

@ -7,39 +7,21 @@
<div class="container">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link active" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
{{> control/node/partial-node-nav }}
<div class="container">
{{>control/partials/storage-types}}
<div class="col-4">
<div>
<h2>Create New Specification</h2>
<p>To create a new specification fill out the form below. </p>
<div class="my-3 p-3 border bg-light">
<p>A crawling specification is a list of domain names to crawl.</p>
<p>
If you are just looking to test the software, feel free to use <a href="https://downloads.marginalia.nu/domain-list-test.txt">this
short list of marginalia-related websites</a>, that are safe to crawl repeatedly without causing any problems.
</p>
</div>
<form method="post" action="new-specs">
<div class="form">

View File

@ -7,31 +7,7 @@
<div class="container">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link active" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
{{> control/node/partial-node-nav }}
<div class="mt-2">
{{> control/partials/processes-table }}

View File

@ -7,31 +7,7 @@
<div class="container">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
{{> control/node/partial-node-nav }}
<div class="container">
{{>control/partials/storage-types}}

View File

@ -14,24 +14,7 @@
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
{{> control/node/partial-node-nav }}
<div class="container">
{{>control/partials/storage-types}}
@ -59,14 +42,6 @@
<th>Trigger</th>
</tr>
{{#with storage.self}}
{{#if isCrawlable}}
<form method="post" action="/nodes/{{node.id}}/storage/crawl/{{storage.id}}" onsubmit="return confirm('Confirm crawling of {{storage.path}}')">
<tr>
<td>Perform a full crawl of this data into a new file storage area</td>
<td><button class="btn btn-primary" type="submit">Crawl</button></td>
</tr>
</form>
{{/if}}
{{#if isStatusNew}}
<form method="post" action="/nodes/{{node.id}}/storage/reset-state/{{storage.id}}" onsubmit="return confirm('Confirm removal of \'NEW\' state for {{storage.path}}')">
<tr>
@ -75,6 +50,7 @@
</tr>
</form>
{{/if}}
{{#if isAtagsExportable}}
<form method="post" action="/nodes/{{node.id}}/storage/{{storage.id}}/export-atags" onsubmit="return confirm('Confirm export of anchor tags from {{storage.path}}')">
<tr>
@ -83,14 +59,6 @@
</tr>
</form>
{{/if}}
{{#if isRestorable}}
<form method="post" action="/nodes/{{node.id}}/storage/backup-restore/{{storage.id}}" onsubmit="return confirm('Confirm restoring backup {{storage.path}}')">
<tr>
<td>Restore into live index</td>
<td><button class="btn btn-primary" type="submit">Restore</button></td>
</tr>
</form>
{{/if}}
{{#if isDeletable}}
<form method="post" action="/nodes/{{node.id}}/storage/{{storage.id}}/delete" onsubmit="return confirm('Confirm deletion of {{storage.path}}')">

View File

@ -8,31 +8,7 @@
<div class="container">
<nav aria-label="breadcrumb">
<ol class="breadcrumb">
<li class="breadcrumb-item"><a href="/nodes">nodes</a></li>
<li class="breadcrumb-item">node-{{node.id}}</li>
</ol>
</nav>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actions">Actions</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>
{{> control/node/partial-node-nav }}
<div class="container">
{{>control/partials/storage-types}}
@ -72,14 +48,14 @@
{{#if storage.noState}}
<div class="form-check form-switch">
<label class="form-check-label" for="flexSwitchCheckDefault">Archived</label>
<input class="form-check-input" type="checkbox" id="flexSwitchCheckDefault" onclick="setActive({{storage.id}})">
<input class="form-check-input" type="checkbox" id="flexSwitchCheckDefault">
</div>
{{/if}}
{{#if storage.active}}
<div class="form-check form-switch">
<label class="form-check-label" for="flexSwitchCheckDefault">Active</label>
<input class="form-check-input" type="checkbox" id="flexSwitchCheckDefault" checked onclick="setArchived({{storage.id}})">
<input class="form-check-input" type="checkbox" id="flexSwitchCheckDefault">
</div>
{{/if}}
</td>
@ -96,80 +72,7 @@
</tr>
{{/unless}}
</table>
<script language="javascript">
function setActive(fileId) {
var xhr = new XMLHttpRequest();
xhr.open("POST", "/nodes/{{node.id}}/storage/"+fileId+"/enable");
xhr.onload = () => {
window.location.href=window.location.href;
}
xhr.send();
}
function setArchived(fileId) {
var xhr = new XMLHttpRequest();
xhr.open("POST", "/nodes/{{node.id}}/storage/"+fileId+"/disable");
xhr.onload = () => {
window.location.href=window.location.href;
}
xhr.send();
}
</script>
{{#if view.specs}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">About Specs</h3>
<p>Crawl specifications are set of domains to be crawled. Normally the crawler uses the domain database
to find domains to crawl, but sometimes, when bootstrapping a system or during testing, it's useful
to be able to specify which domains to crawl with more control.
</p>
<p>The <em>ACTIVE</em> status does nothing for crawl specs.</p>
<p><a href="new-specs">Create New Crawl Spec</p>
</div>
{{/if}}
{{#if view.crawl}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">About Crawls</h3>
<p>Crawl data is the downloaded and compressed raw HTML of a website.</p>
<p>A new crawl may be bootstrappped from a Crawl Set</p>
<p>Unlike other data sets, only a single crawl set may be <em>ACTIVE</em> at any given time.</p>
<h4 class="my-3">Recrawl</h4>
<p>A set of existing crawl data can be <em>recrawled</em> and amended with new domains
and a fresh look at the existing domains. This is an operation that uses the currently <em>ACTIVE</em>
crawl data.
</p>
<h4 class="my-3">Processing</h4>
<p>
Before the data can be loaded into the search engine, it needs to be processed. The process operation
uses the currently <em>ACTIVE</em> crawl set.
</p>
</div>
{{/if}}
{{#if view.processed}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">Processed Data</h3>
<p>
Processed data is HTML that has been analyzed and had its properties and keywords extracted. If one or more
processed data sets is <em>ACTIVE</em>, it can be Loaded into the system to make it searchable. Loading processed
data also creates a backup of the Index Journal and Link Database that can be quickly restored in the event
of a problem.
</p>
<p>
Processed data may come from crawl data, but it may also be sideloaded into the system from e.g.
stackexchange data dumps through the Node Actions view.
</p>
</div>
{{/if}}
{{#if view.backup}}
<div class="m-3 p-3 border bg-light">
<h3 class="my-3">Backups</h3>
<p>
Backups are snapshots of the Index Journal and Link Database immediately before a searchable index
is constructed. They are relatively quick to restore and allow you to roll back a problematic deployment
or recover from disk failures (assuming they are on a different drive).
</p>
</div>
{{/if}}
</div>
</div>
</body>
{{> control/partials/foot-includes }}

View File

@ -0,0 +1,33 @@
<h1 class="my-3">Index Node {{node.id}}</h1>
<nav class="nav nav-tabs">
<li class="nav-item">
<a class="nav-link {{#if tab.overview}}active{{/if}}" href="/nodes/{{node.id}}/">Overview</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle {{#if tab.actions}}active{{/if}}" data-bs-toggle="dropdown" href="#" role="button" aria-expanded="false">Actions</a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=new-crawl">New Crawl</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=re-crawl">Re-crawl</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=process">Process Crawl Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=load">Load Crawl Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=repartition">Repartition Index</a></li>
<li><hr class="dropdown-divider"></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-encyclopedia">Sideload Encyclopedia</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-stackexchange">Sideload Stackexchange</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-warc">Sideload WARC Files</a></li>
<li><hr class="dropdown-divider"></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-data">Export Database Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=restore-backup">Restore Index Backup</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link {{#if tab.actors}}active{{/if}}" href="/nodes/{{node.id}}/actors">Actors</a>
</li>
<li class="nav-item">
<a class="nav-link {{#if tab.storage}}active{{/if}}" href="/nodes/{{node.id}}/storage/">Storage</a>
</li>
<li class="nav-item">
<a class="nav-link {{#if tab.config}}active{{/if}}" href="/nodes/{{node.id}}/configuration">Configuration</a>
</li>
</nav>

View File

@ -18,11 +18,20 @@
<li><a class="dropdown-item" href="/review-random-domains" title="Review random domains list">Random Exploration</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false">Index Nodes</a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/nodes" title="Node Overview">Overview</a></li>
<li><hr class="dropdown-divider"></li>
{{#each global-context.nodes}}
<li><a class="dropdown-item" href="/nodes/{{node}}" title="Manage Node {{node}}">Node {{node}}</a></li>
{{/each}}
</ul>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false">System</a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/actions" title="System actions">Actions</a></li>
<li><a class="dropdown-item" href="/nodes" title="View and configure index nodes">Nodes</a></li>
<li><a class="dropdown-item" href="/datasets" title="View and update the data sets">Datasets</a></li>
<li><a class="dropdown-item" href="/events" title="View the event log">Events</a></li>
<li><a class="dropdown-item" href="/message-queue" title="View or manipulate the system message queue">Message Queue</a></li>

View File

@ -65,6 +65,7 @@ public class ExecutorSvc extends Service {
Spark.post("/process/load", processingService::startLoad);
Spark.post("/process/adjacency-calculation", processingService::startAdjacencyCalculation);
Spark.get("/sideload/", sideloadService::listUploadDir, gson::toJson);
Spark.post("/sideload/dirtree", sideloadService::sideloadDirtree);
Spark.post("/sideload/warc", sideloadService::sideloadWarc);
Spark.post("/sideload/stackexchange", sideloadService::sideloadStackexchange);

View File

@ -1,12 +1,21 @@
package nu.marginalia.executor.svc;
import com.google.inject.Inject;
import nu.marginalia.WmsaHome;
import nu.marginalia.actor.ExecutorActor;
import nu.marginalia.actor.ExecutorActorControlService;
import nu.marginalia.actor.task.ConvertActor;
import nu.marginalia.executor.upload.UploadDirContents;
import nu.marginalia.executor.upload.UploadDirItem;
import spark.Request;
import spark.Response;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
public class SideloadService {
private final ExecutorActorControlService actorControlService;
@ -36,4 +45,15 @@ public class SideloadService {
actorControlService.startFrom(ExecutorActor.CONVERT, new ConvertActor.ConvertStackexchange(request.queryParams("path")));
return "";
}
public UploadDirContents listUploadDir(Request request, Response response) throws IOException {
Path uploadDir = WmsaHome.getUploadDir();
try (var items = Files.list(uploadDir)) {
return new UploadDirContents(uploadDir.toString(),
items.map(UploadDirItem::fromPath).toList());
}
}
}

View File

@ -80,7 +80,7 @@ public class ExecutorSvcApiIntegrationTest {
@Test
public void triggerCrawl() throws Exception {
testInstances.client.triggerCrawl(Context.internal(), 0, "1");
testInstances.client.triggerCrawl(Context.internal(), 0, FileStorageId.of(1));
Mockito.verify(testInstances.actorControlService).startFrom(eq(ExecutorActor.CRAWL), any());
}

View File

@ -35,6 +35,7 @@ x-p1: &partition-1
- work-1:/work
- backup-1:/backup
- samples-1:/storage
- uploads-1:/uploads
networks:
- wmsa
depends_on:
@ -171,4 +172,10 @@ volumes:
driver_opts:
type: none
o: bind
device: run/node-1/backup
device: run/node-1/backup
uploads-1:
driver: local
driver_opts:
type: none
o: bind
device: run/node-1/uploads

View File

@ -29,6 +29,7 @@ x-p1: &partition-1
- work-1:/work
- backup-1:/backup
- samples-1:/storage
- uploads-1:/uploads
networks:
- wmsa
depends_on:
@ -47,6 +48,7 @@ x-p2: &partition-2
- work-2:/work
- backup-2:/backup
- samples-2:/storage
- uploads-2:/uploads
networks:
- wmsa
depends_on:
@ -275,6 +277,12 @@ volumes:
type: none
o: bind
device: run/node-1/backup
uploads-1:
driver: local
driver_opts:
type: none
o: bind
device: run/node-1/uploads
samples-2:
driver: local
driver_opts:
@ -298,4 +306,10 @@ volumes:
driver_opts:
type: none
o: bind
device: run/node-2/backup
device: run/node-2/backup
uploads-2:
driver: local
driver_opts:
type: none
o: bind
device: run/node-2/uploads

View File

@ -31,7 +31,8 @@ x-p1: &partition-1
- index-1:/idx
- work-1:/work
- backup-1:/backup
- samples-1:/storage
- storage-1:/storage
- uploads-1:/uploads
networks:
- wmsa
environment:
@ -200,12 +201,18 @@ volumes:
type: none
o: bind
device: ${INSTALL_DIR}/data
samples-1:
storage-1:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/index-1/storage
uploads-1:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/index-1/uploads
index-1:
driver: local
driver_opts:

View File

@ -20,7 +20,7 @@ function download_model {
pushd $(dirname $0)
mkdir -p model logs db install data samples
mkdir -p {node-1,node-2}/{work,index,backup,samples/export}
mkdir -p {node-1,node-2}/{work,index,backup,samples/export,uploads}
download_model model/English.DICT https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT
download_model model/English.RDR https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR