(*) WIP Control GUI redesign, executor-service, multi-node mq

This turned out to be very difficult to do in small isolated steps.

* Design overhaul of the control gui using bootstrap
* Move the actors out of control-service into to a new executor-service, that can be run on multiple nodes
* Add node-affinity to message queue
This commit is contained in:
Viktor Lofgren 2023-10-14 12:07:40 +02:00
parent 199c459697
commit 4baf9527d7
209 changed files with 4878 additions and 2672 deletions

View File

@ -0,0 +1,34 @@
plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation project(':code:common:model')
implementation project(':code:api:index-api')
implementation project(':code:common:config')
implementation project(':code:common:db')
implementation project(':code:libraries:message-queue')
implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client')
implementation libs.bundles.slf4j
implementation libs.prometheus
implementation libs.notnull
implementation libs.guice
implementation libs.rxjava
implementation libs.protobuf
implementation libs.gson
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@ -0,0 +1,108 @@
package nu.marginalia.executor.client;
import com.google.inject.Inject;
import nu.marginalia.WmsaHome;
import nu.marginalia.client.AbstractDynamicClient;
import nu.marginalia.client.Context;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.executor.model.ActorRunStates;
import nu.marginalia.executor.model.crawl.RecrawlParameters;
import nu.marginalia.executor.model.load.LoadParameters;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.service.descriptor.ServiceDescriptors;
import nu.marginalia.service.id.ServiceId;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.List;
public class ExecutorClient extends AbstractDynamicClient {
@Inject
public ExecutorClient(ServiceDescriptors descriptors) {
super(descriptors.forId(ServiceId.Executor), WmsaHome.getHostsFile(), GsonFactory::get);
}
public void startFsm(Context ctx, int node, String actorName) {
post(ctx, node, "/actor/"+actorName+"/start", "").blockingSubscribe();
}
public void stopFsm(Context ctx, int node, String actorName) {
post(ctx, node, "/actor/"+actorName+"/stop", "").blockingSubscribe();
}
public void triggerCrawl(Context ctx, int node, String fid) {
post(ctx, node, "/process/crawl/" + fid, "").blockingSubscribe();
}
public void triggerRecrawl(Context ctx, int node, RecrawlParameters parameters) {
post(ctx, node, "/process/recrawl", parameters).blockingSubscribe();
}
public void triggerConvert(Context ctx, int node, FileStorageId fid) {
post(ctx, node, "/process/convert/" + fid.id(), "").blockingSubscribe();
}
@Deprecated
public void triggerConvert(Context ctx, int node, String fid) {
post(ctx, node, "/process/convert/" + fid, "").blockingSubscribe();
}
public void triggerProcessAndLoad(Context ctx, int node, String fid) {
post(ctx, node, "/process/convert-load/" + fid, "").blockingSubscribe();
}
@Deprecated
public void loadProcessedData(Context ctx, int node, String fid) {
loadProcessedData(ctx, node, new LoadParameters(List.of(new FileStorageId(Long.parseLong(fid)))));
}
public void loadProcessedData(Context ctx, int node, LoadParameters ids) {
post(ctx, node, "/process/load", ids).blockingSubscribe();
}
public void calculateAdjacencies(Context ctx, int node) {
post(ctx, node, "/process/adjacency-calculation", "").blockingSubscribe();
}
public void exportData(Context ctx) {
// post(ctx, node, "/process/adjacency-calculation/", "").blockingSubscribe();
// FIXME
}
public void sideloadEncyclopedia(Context ctx, int node, Path sourcePath) {
post(ctx, node,
"/sideload/encyclopedia?path="+ URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8),
"").blockingSubscribe();
}
public void sideloadDirtree(Context ctx, int node, Path sourcePath) {
post(ctx, node,
"/sideload/dirtree?path="+ URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8),
"").blockingSubscribe();
}
public void sideloadStackexchange(Context ctx, int node, Path sourcePath) {
post(ctx, node,
"/sideload/stackexchange?path="+URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8),
"").blockingSubscribe();
}
public void createCrawlSpecFromDb(Context context, int node, String description) {
post(context, node, "/process/crawl-spec/from-db?description="+URLEncoder.encode(description, StandardCharsets.UTF_8), "")
.blockingSubscribe();
}
public void createCrawlSpecFromDownload(Context context, int node, String description, String url) {
post(context, node, "/process/crawl-spec/from-download?description="+URLEncoder.encode(description, StandardCharsets.UTF_8)+"&url="+URLEncoder.encode(url, StandardCharsets.UTF_8), "")
.blockingSubscribe();
}
public void restoreBackup(Context context, int node, String fid) {
post(context, node, "/backup/" + fid + "/restore", "").blockingSubscribe();
}
public ActorRunStates getActorStates(Context context, int node) {
return get(context, node, "/actor", ActorRunStates.class).blockingFirst();
}
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.model;
package nu.marginalia.executor.model;
public record ActorRunState(String name,
String state,

View File

@ -0,0 +1,5 @@
package nu.marginalia.executor.model;
import java.util.List;
public record ActorRunStates(int node, List<ActorRunState> states) {}

View File

@ -0,0 +1,11 @@
package nu.marginalia.executor.model.crawl;
import nu.marginalia.storage.model.FileStorageId;
import java.util.List;
public record RecrawlParameters(
FileStorageId crawlDataId,
List<FileStorageId> crawlSpecIds
) {
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.executor.model.load;
import nu.marginalia.storage.model.FileStorageId;
import java.util.List;
public record LoadParameters(
List<FileStorageId> ids
) {
}

View File

@ -2,6 +2,7 @@ package nu.marginalia.index.client;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import io.prometheus.client.Summary;
import io.reactivex.rxjava3.core.Observable;
import nu.marginalia.WmsaHome;
@ -23,23 +24,21 @@ public class IndexClient extends AbstractDynamicClient {
private static final Summary wmsa_search_index_api_time = Summary.build().name("wmsa_search_index_api_time").help("-").register();
private final MqOutbox outbox;
MqOutbox outbox;
@Inject
public IndexClient(ServiceDescriptors descriptors,
MessageQueueFactory messageQueueFactory)
MessageQueueFactory messageQueueFactory,
@Named("wmsa-system-node") Integer nodeId)
{
super(descriptors.forId(ServiceId.Index), WmsaHome.getHostsFile(), GsonFactory::get);
String inboxName = ServiceId.Index.name + ":" + "0";
String outboxName = System.getProperty("service-name", UUID.randomUUID().toString());
outbox = messageQueueFactory.createOutbox(inboxName, outboxName, UUID.randomUUID());
String inboxName = ServiceId.Index.name;
String outboxName = System.getProperty("service-name:"+nodeId, UUID.randomUUID().toString());
outbox = messageQueueFactory.createOutbox(inboxName, nodeId, outboxName, nodeId, UUID.randomUUID());
setTimeout(30);
}
public MqOutbox outbox() {
return outbox;
}

View File

@ -12,7 +12,7 @@ java {
}
dependencies {
implementation project(':code:common:db')
implementation project(':code:common:config')
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit

View File

@ -1,7 +1,7 @@
package nu.marginalia.mqapi.converting;
import lombok.AllArgsConstructor;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageId;
@AllArgsConstructor
public class ConvertRequest {

View File

@ -1,11 +1,13 @@
package nu.marginalia.mqapi.crawling;
import lombok.AllArgsConstructor;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageId;
import java.util.List;
/** A request to start a crawl */
@AllArgsConstructor
public class CrawlRequest {
public FileStorageId specStorage;
public List<FileStorageId> specStorage;
public FileStorageId crawlStorage;
}

View File

@ -1,7 +1,7 @@
package nu.marginalia.mqapi.loading;
import lombok.AllArgsConstructor;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageId;
import java.util.List;

View File

@ -29,19 +29,11 @@ public class QueryClient extends AbstractDynamicClient {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final MqOutbox outbox;
@Inject
public QueryClient(ServiceDescriptors descriptors,
MessageQueueFactory messageQueueFactory) {
super(descriptors.forId(ServiceId.Query), WmsaHome.getHostsFile(), GsonFactory::get);
String inboxName = ServiceId.Query.name + ":" + "0";
String outboxName = System.getProperty("service-name", UUID.randomUUID().toString());
outbox = messageQueueFactory.createOutbox(inboxName, outboxName, UUID.randomUUID());
}
/** Delegate an Index API style query directly to the index service */
@ -57,8 +49,5 @@ public class QueryClient extends AbstractDynamicClient {
() -> this.postGet(ctx, 0, "/search/", params, QueryResponse.class).blockingFirst()
);
}
public MqOutbox outbox() {
return outbox;
}
}

View File

@ -14,4 +14,22 @@ java {
dependencies {
implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client')
implementation project(':code:common:db')
implementation project(':code:common:model')
implementation libs.bundles.slf4j
implementation libs.bundles.mariadb
implementation libs.mockito
implementation libs.guice
implementation libs.gson
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
testImplementation 'org.testcontainers:mariadb:1.17.4'
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
}

View File

@ -0,0 +1,67 @@
package nu.marginalia;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
/** The IndexLocations class is responsible for knowledge about the locations
* of various important system paths. The methods take a FileStorageService,
* as these paths are node-dependent.
*/
public class IndexLocations {
private static final Logger logger = LoggerFactory.getLogger(IndexLocations.class);
/** Return the path to the current link database */
public static Path getLinkdbLivePath(FileStorageService fileStorage) {
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ldbr");
}
/** Return the path to the next link database */
public static Path getLinkdbWritePath(FileStorageService fileStorage) {
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ldbw");
}
/** Return the path to the current live index */
public static Path getCurrentIndex(FileStorageService fileStorage) {
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ir");
}
/** Return the path to the designated index construction area */
public static Path getIndexConstructionArea(FileStorageService fileStorage) {
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "iw");
}
/** Return the path to the search sets */
public static Path getSearchSetsPath(FileStorageService fileStorage) {
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ss");
}
private static Path getStorage(FileStorageService service, FileStorageBaseType baseType, String pathPart) {
try {
var base = service.getStorageBase(baseType);
if (base == null) {
throw new IllegalStateException("File storage base " + baseType + " is not configured!");
}
// Ensure the directory exists
Path ret = base.asPath().resolve(pathPart);
if (!Files.exists(ret)) {
logger.info("Creating system directory {}", ret);
Files.createDirectories(ret);
}
return ret;
}
catch (SQLException | IOException ex) {
throw new IllegalStateException("Error fetching storage " + baseType + " / " + pathPart, ex);
}
}
}

View File

@ -1,5 +1,3 @@
package nu.marginalia;
public record UserAgent(String uaString) {
}
public record UserAgent(String uaString) {}

View File

@ -0,0 +1,104 @@
package nu.marginalia.nodecfg;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.nodecfg.model.NodeConfiguration;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
public class NodeConfigurationService {
private final HikariDataSource dataSource;
public NodeConfigurationService(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
public NodeConfiguration create(String description, boolean acceptQueries) throws SQLException {
try (var conn = dataSource.getConnection();
var is = conn.prepareStatement("""
INSERT INTO NODE_CONFIGURATION(DESCRIPTION, ACCEPT_QUERIES) VALUES(?, ?)
""");
var qs = conn.prepareStatement("""
SELECT LAST_INSERT_ID()
"""))
{
is.setString(1, description);
is.setBoolean(2, acceptQueries);
if (is.executeUpdate() <= 0) {
throw new IllegalStateException("Failed to insert configuration");
}
var rs = qs.executeQuery();
if (rs.next()) {
return get(rs.getInt(1));
}
throw new AssertionError("No LAST_INSERT_ID()");
}
}
public List<NodeConfiguration> getAll() throws SQLException {
try (var conn = dataSource.getConnection();
var qs = conn.prepareStatement("""
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, DISABLED
FROM NODE_CONFIGURATION
""")) {
var rs = qs.executeQuery();
List<NodeConfiguration> ret = new ArrayList<>();
while (rs.next()) {
ret.add(new NodeConfiguration(
rs.getInt("ID"),
rs.getString("DESCRIPTION"),
rs.getBoolean("ACCEPT_QUERIES"),
rs.getBoolean("DISABLED")
));
}
return ret;
}
}
public NodeConfiguration get(int nodeId) throws SQLException {
try (var conn = dataSource.getConnection();
var qs = conn.prepareStatement("""
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, DISABLED
FROM NODE_CONFIGURATION
WHERE ID=?
""")) {
qs.setInt(1, nodeId);
var rs = qs.executeQuery();
if (rs.next()) {
return new NodeConfiguration(
rs.getInt("ID"),
rs.getString("DESCRIPTION"),
rs.getBoolean("ACCEPT_QUERIES"),
rs.getBoolean("DISABLED")
);
}
}
return null;
}
public void save(NodeConfiguration config) throws SQLException {
try (var conn = dataSource.getConnection();
var us = conn.prepareStatement("""
UPDATE NODE_CONFIGURATION
SET DESCRIPTION=?, ACCEPT_QUERIES=?, DISABLED=?
WHERE ID=?
"""))
{
us.setString(1, config.description());
us.setBoolean(2, config.acceptQueries());
us.setBoolean(3, config.disabled());
us.setInt(4, config.node());
if (us.executeUpdate() <= 0)
throw new IllegalStateException("Failed to update configuration");
}
}
}

View File

@ -0,0 +1,9 @@
package nu.marginalia.nodecfg.model;
public record NodeConfiguration(int node,
String description,
boolean acceptQueries,
boolean disabled
)
{
}

View File

@ -1,9 +1,9 @@
package nu.marginalia.db.storage;
package nu.marginalia.storage;
import com.google.gson.Gson;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.storage.model.FileStorage;
import nu.marginalia.storage.model.FileStorageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -1,9 +1,9 @@
package nu.marginalia.db.storage;
package nu.marginalia.storage;
import com.google.inject.name.Named;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.db.storage.model.*;
import nu.marginalia.storage.model.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -19,7 +19,6 @@ import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
/** Manages file storage for processes and services
*/
@ -34,7 +33,7 @@ public class FileStorageService {
public Optional<FileStorage> findFileStorageToDelete() {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT ID FROM FILE_STORAGE WHERE DO_PURGE LIMIT 1
SELECT ID FROM FILE_STORAGE WHERE STATE='DELETE' LIMIT 1
""")) {
var rs = stmt.executeQuery();
if (rs.next()) {
@ -46,6 +45,24 @@ public class FileStorageService {
return Optional.empty();
}
public Set<Integer> getConfiguredNodes() {
Set<Integer> ret = new HashSet<>();
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT DISTINCT(NODE) FROM FILE_STORAGE_BASE
""")) {
var rs = stmt.executeQuery();
while (rs.next()) {
ret.add(rs.getInt(1));
}
} catch (SQLException e) {
logger.warn("SQL error getting nodes", e);
}
return ret;
}
@Inject
public FileStorageService(HikariDataSource dataSource, @Named("wmsa-system-node") Integer node) {
this.dataSource = dataSource;
@ -58,7 +75,7 @@ public class FileStorageService {
continue;
logger.info("FileStorage override present: {} -> {}", type,
FileStorage.createOverrideStorage(type, overrideProperty).asPath());
FileStorage.createOverrideStorage(type, FileStorageBaseType.CURRENT, overrideProperty).asPath());
}
}
@ -66,7 +83,7 @@ public class FileStorageService {
public FileStorageBase getStorageBase(FileStorageBaseId type) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT ID, NAME, PATH, TYPE, PERMIT_TEMP
SELECT ID, NAME, PATH, TYPE
FROM FILE_STORAGE_BASE WHERE ID = ?
""")) {
stmt.setLong(1, type.id());
@ -76,8 +93,7 @@ public class FileStorageService {
new FileStorageBaseId(rs.getLong(1)),
FileStorageBaseType.valueOf(rs.getString(4)),
rs.getString(2),
rs.getString(3),
rs.getBoolean(5)
rs.getString(3)
);
}
}
@ -128,6 +144,7 @@ public class FileStorageService {
}
}
}
public void relateFileStorages(FileStorageId source, FileStorageId target) {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
@ -173,9 +190,13 @@ public class FileStorageService {
/** @return the storage base with the given type, or null if it does not exist */
public FileStorageBase getStorageBase(FileStorageBaseType type) throws SQLException {
return getStorageBase(type, node);
}
public FileStorageBase getStorageBase(FileStorageBaseType type, int node) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT ID, NAME, PATH, TYPE, PERMIT_TEMP
SELECT ID, NAME, PATH, TYPE
FROM FILE_STORAGE_BASE WHERE TYPE = ? AND NODE = ?
""")) {
stmt.setString(1, type.name());
@ -186,16 +207,14 @@ public class FileStorageService {
new FileStorageBaseId(rs.getLong(1)),
FileStorageBaseType.valueOf(rs.getString(4)),
rs.getString(2),
rs.getString(3),
rs.getBoolean(5)
rs.getString(3)
);
}
}
}
return null;
}
public FileStorageBase createStorageBase(String name, Path path, FileStorageBaseType type, boolean permitTemp) throws SQLException, FileNotFoundException {
public FileStorageBase createStorageBase(String name, Path path, FileStorageBaseType type) throws SQLException, FileNotFoundException {
if (!Files.exists(path)) {
throw new FileNotFoundException("Storage base path does not exist: " + path);
@ -203,14 +222,13 @@ public class FileStorageService {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
INSERT INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, PERMIT_TEMP, NODE)
VALUES (?, ?, ?, ?, ?)
INSERT INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, NODE)
VALUES (?, ?, ?, ?)
""")) {
stmt.setString(1, name);
stmt.setString(2, path.toString());
stmt.setString(3, type.name());
stmt.setBoolean(4, permitTemp);
stmt.setInt(5, node);
stmt.setInt(4, node);
int update = stmt.executeUpdate();
if (update < 0) {
@ -250,10 +268,6 @@ public class FileStorageService {
String prefix,
String description) throws IOException, SQLException
{
if (!base.permitTemp()) {
throw new IllegalArgumentException("Temporary storage not permitted in base " + base.name());
}
Path newDir = allocateDirectory(base.asPath(), prefix);
String relDir = base.asPath().relativize(newDir).normalize().toString();
@ -299,7 +313,11 @@ public class FileStorageService {
/** Allocate permanent storage in base */
public FileStorage allocatePermanentStorage(FileStorageBase base, String relativePath, FileStorageType type, String description) throws IOException, SQLException {
public FileStorage allocatePermanentStorage(FileStorageBase base,
String relativePath,
FileStorageType type,
String description) throws IOException, SQLException
{
Path newDir = base.asPath().resolve(relativePath);
@ -338,6 +356,7 @@ public class FileStorageService {
type,
LocalDateTime.now(),
newDir.toString(),
"",
description
);
}
@ -359,12 +378,12 @@ public class FileStorageService {
throw new IllegalStateException("FileStorageType " + type.name() + " was overridden, but location '" + override + "' does not exist!");
}
return FileStorage.createOverrideStorage(type, override);
return FileStorage.createOverrideStorage(type, FileStorageBaseType.CURRENT, override);
}
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT PATH, DESCRIPTION, ID, BASE_ID, CREATE_DATE
SELECT PATH, STATE, DESCRIPTION, ID, BASE_ID, CREATE_DATE
FROM FILE_STORAGE_VIEW WHERE TYPE = ? AND NODE = ?
""")) {
stmt.setString(1, type.name());
@ -373,6 +392,7 @@ public class FileStorageService {
long storageId;
long baseId;
String path;
String state;
String description;
LocalDateTime createDateTime;
@ -382,6 +402,7 @@ public class FileStorageService {
storageId = rs.getLong("ID");
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
path = rs.getString("PATH");
state = rs.getString("STATE");
description = rs.getString("DESCRIPTION");
}
else {
@ -396,18 +417,27 @@ public class FileStorageService {
type,
createDateTime,
path,
state,
description
);
}
}
}
public List<FileStorage> getStorage(List<FileStorageId> ids) throws SQLException {
List<FileStorage> ret = new ArrayList<>();
for (var id : ids) {
ret.add(getStorage(id));
}
return ret;
}
/** @return the storage with the given id, or null if it does not exist */
public FileStorage getStorage(FileStorageId id) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT PATH, TYPE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
SELECT PATH, TYPE, STATE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
FROM FILE_STORAGE_VIEW WHERE ID = ?
""")) {
stmt.setLong(1, id.id());
@ -415,6 +445,7 @@ public class FileStorageService {
long storageId;
long baseId;
String path;
String state;
String description;
FileStorageType type;
LocalDateTime createDateTime;
@ -425,6 +456,7 @@ public class FileStorageService {
storageId = rs.getLong("ID");
type = FileStorageType.valueOf(rs.getString("TYPE"));
path = rs.getString("PATH");
state = rs.getString("STATE");
description = rs.getString("DESCRIPTION");
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
}
@ -440,6 +472,7 @@ public class FileStorageService {
type,
createDateTime,
path,
state,
description
);
}
@ -460,13 +493,14 @@ public class FileStorageService {
List<FileStorage> ret = new ArrayList<>();
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT PATH, TYPE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
SELECT PATH, STATE, TYPE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
FROM FILE_STORAGE_VIEW
""")) {
long storageId;
long baseId;
String path;
String state;
String description;
LocalDateTime createDateTime;
FileStorageType type;
@ -476,6 +510,7 @@ public class FileStorageService {
baseId = rs.getLong("BASE_ID");
storageId = rs.getLong("ID");
path = rs.getString("PATH");
state = rs.getString("STATE");
type = FileStorageType.valueOf(rs.getString("TYPE"));
description = rs.getString("DESCRIPTION");
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
@ -487,6 +522,7 @@ public class FileStorageService {
type,
createDateTime,
path,
state,
description
));
}
@ -497,4 +533,62 @@ public class FileStorageService {
return ret;
}
public void flagFileForDeletion(FileStorageId id) throws SQLException {
setFileStorageState(id, "DELETE");
}
public void enableFileStorage(FileStorageId id) throws SQLException {
setFileStorageState(id, "ACTIVE");
}
public void disableFileStorage(FileStorageId id) throws SQLException {
setFileStorageState(id, "");
}
private void setFileStorageState(FileStorageId id, String state) throws SQLException {
try (var conn = dataSource.getConnection();
var flagStmt = conn.prepareStatement("UPDATE FILE_STORAGE SET STATE = ? WHERE ID = ?")) {
flagStmt.setString(1, state);
flagStmt.setLong(2, id.id());
flagStmt.executeUpdate();
}
}
public void disableFileStorageOfType(int nodeId, FileStorageType type) throws SQLException {
try (var conn = dataSource.getConnection();
var flagStmt = conn.prepareStatement("""
UPDATE FILE_STORAGE
INNER JOIN FILE_STORAGE_BASE ON BASE_ID=FILE_STORAGE_BASE.ID
SET FILE_STORAGE.STATE = ''
WHERE FILE_STORAGE.TYPE = ?
AND FILE_STORAGE_BASE.NODE=?
""")) {
flagStmt.setString(1, type.name());
flagStmt.setInt(2, nodeId);
flagStmt.executeUpdate();
}
}
public List<FileStorageId> getActiveFileStorages(int nodeId, FileStorageType type) throws SQLException
{
try (var conn = dataSource.getConnection();
var queryStmt = conn.prepareStatement("""
SELECT FILE_STORAGE.ID FROM FILE_STORAGE
INNER JOIN FILE_STORAGE_BASE ON BASE_ID=FILE_STORAGE_BASE.ID
WHERE FILE_STORAGE.TYPE = ?
AND STATE='ACTIVE'
AND FILE_STORAGE_BASE.NODE=?
""")) {
queryStmt.setString(1, type.name());
queryStmt.setInt(2, nodeId);
var rs = queryStmt.executeQuery();
List<FileStorageId> ids = new ArrayList<>();
while (rs.next()) {
ids.add(new FileStorageId(rs.getInt(1)));
}
return ids;
}
}
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.db.storage.model;
package nu.marginalia.storage.model;
import java.nio.file.Path;
import java.time.LocalDateTime;
@ -19,19 +19,19 @@ public record FileStorage(
FileStorageType type,
LocalDateTime createDateTime,
String path,
String state,
String description)
{
/** It is sometimes desirable to be able to create an override that isn't
* backed by the database. This constructor permits this.
*/
public static FileStorage createOverrideStorage(FileStorageType type, String override) {
public static FileStorage createOverrideStorage(FileStorageType type, FileStorageBaseType baseType, String override) {
var mockBase = new FileStorageBase(
new FileStorageBaseId(-1),
FileStorageBaseType.SSD_INDEX,
baseType,
"OVERRIDE:" + type.name(),
"INVALIDINVALIDINVALID",
false
"INVALIDINVALIDINVALID"
);
return new FileStorage(
@ -40,6 +40,7 @@ public record FileStorage(
type,
LocalDateTime.now(),
override,
"OVERRIDE",
"OVERRIDE:" + type.name()
);
}
@ -48,6 +49,9 @@ public record FileStorage(
return Path.of(path);
}
public boolean isActive() {
return "ACTIVE".equals(state);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -1,4 +1,4 @@
package nu.marginalia.db.storage.model;
package nu.marginalia.storage.model;
import java.nio.file.Path;
@ -9,15 +9,16 @@ import java.nio.file.Path;
* @param type the type of the storage base
* @param name the name of the storage base
* @param path the path of the storage base
* @param permitTemp if true, the storage may be used for temporary files
*/
public record FileStorageBase(FileStorageBaseId id,
FileStorageBaseType type,
String name,
String path,
boolean permitTemp
String path
) {
public Path asPath() {
return Path.of(path);
}
public boolean isValid() {
return id.id() >= 0;
}
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.db.storage.model;
package nu.marginalia.storage.model;
public record FileStorageBaseId(long id) {

View File

@ -0,0 +1,12 @@
package nu.marginalia.storage.model;
public enum FileStorageBaseType {
CURRENT,
WORK,
STORAGE,
BACKUP;
public String overrideName() {
return "FS_BASE_OVERRIDE:"+name();
}
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.db.storage.model;
package nu.marginalia.storage.model;
public record FileStorageId(long id) {
public static FileStorageId parse(String str) {

View File

@ -1,17 +1,11 @@
package nu.marginalia.db.storage.model;
package nu.marginalia.storage.model;
public enum FileStorageType {
CRAWL_SPEC,
CRAWL_DATA,
PROCESSED_DATA,
INDEX_STAGING,
LINKDB_STAGING,
LINKDB_LIVE,
INDEX_LIVE,
BACKUP,
EXPORT,
SEARCH_SETS;
EXPORT;
public String overrideName() {
return "FS_OVERRIDE:"+name();
}

View File

@ -0,0 +1,70 @@
package nu.marginalia.nodecfg;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.storage.FileStorageService;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
import org.testcontainers.containers.MariaDBContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import java.io.IOException;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import static org.junit.jupiter.api.Assertions.*;
@Testcontainers
@Execution(ExecutionMode.SAME_THREAD)
@Tag("slow")
public class NodeConfigurationServiceTest {
@Container
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withInitScript("db/migration/V23_11_0_005__node_config.sql")
.withNetworkAliases("mariadb");
static HikariDataSource dataSource;
static NodeConfigurationService nodeConfigurationService;
@BeforeAll
public static void setup() {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
config.setUsername("wmsa");
config.setPassword("wmsa");
dataSource = new HikariDataSource(config);
nodeConfigurationService = new NodeConfigurationService(dataSource);
}
@Test
public void test() throws SQLException {
var a = nodeConfigurationService.create("Test", false);
var b = nodeConfigurationService.create("Foo", true);
assertEquals(1, a.node());
assertEquals("Test", a.description());
assertFalse(a.acceptQueries());
assertEquals(2, b.node());
assertEquals("Foo", b.description());
assertTrue(b.acceptQueries());
var list = nodeConfigurationService.getAll();
assertEquals(2, list.size());
assertEquals(a, list.get(0));
assertEquals(b, list.get(1));
}
}

View File

@ -1,20 +1,19 @@
package nu.marginalia.db.storage;
package nu.marginalia.storage;
import com.google.common.collect.Lists;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageType;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
import org.testcontainers.containers.MariaDBContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
@ -23,11 +22,10 @@ import java.util.List;
import java.util.Objects;
import java.util.UUID;
import static org.junit.Assert.*;
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
@Testcontainers
@Execution(SAME_THREAD)
@Execution(ExecutionMode.SAME_THREAD)
@Tag("slow")
public class FileStorageServiceTest {
@Container
@ -54,7 +52,11 @@ public class FileStorageServiceTest {
// apply migrations
List<String> migrations = List.of("db/migration/V23_11_0_000__file_storage_node.sql");
List<String> migrations = List.of(
"db/migration/V23_11_0_000__file_storage_node.sql",
"db/migration/V23_11_0_002__file_storage_state.sql",
"db/migration/V23_11_0_004__file_storage_base_type.sql"
);
for (String migration : migrations) {
try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream(migration),
"Could not load migration script " + migration);
@ -135,38 +137,19 @@ public class FileStorageServiceTest {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource, 0);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.WORK);
Assertions.assertEquals(name, base.name());
Assertions.assertEquals(FileStorageBaseType.SLOW, base.type());
Assertions.assertFalse(base.permitTemp());
Assertions.assertEquals(FileStorageBaseType.WORK, base.type());
}
@Test
public void testAllocateTempInNonPermitted() throws SQLException, FileNotFoundException {
public void testAllocatePermanent() throws SQLException, IOException {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource, 0);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false);
try {
storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldFail");
fail();
}
catch (IllegalArgumentException ex) {} // ok
catch (Exception ex) {
ex.printStackTrace();
fail();
}
}
@Test
public void testAllocatePermanentInNonPermitted() throws SQLException, IOException {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource, 0);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.WORK);
var created = storage.allocatePermanentStorage(base, "xyz", FileStorageType.CRAWL_DATA, "thisShouldSucceed");
tempDirs.add(created.asPath());
@ -176,12 +159,12 @@ public class FileStorageServiceTest {
}
@Test
public void testAllocateTempInPermitted() throws IOException, SQLException {
public void testAllocateTemp() throws IOException, SQLException {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource, 0);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, true);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.WORK);
var fileStorage = storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed");
System.out.println("Allocated " + fileStorage.asPath());
Assertions.assertTrue(Files.exists(fileStorage.asPath()));

View File

@ -1,8 +0,0 @@
package nu.marginalia.db.storage.model;
public enum FileStorageBaseType {
SSD_INDEX,
SSD_WORK,
SLOW,
BACKUP
}

View File

@ -0,0 +1,3 @@
ALTER TABLE TASK_HEARTBEAT ADD COLUMN NODE INT NOT NULL DEFAULT -1;
ALTER TABLE PROCESS_HEARTBEAT ADD COLUMN NODE INT NOT NULL DEFAULT -1;
ALTER TABLE SERVICE_HEARTBEAT ADD COLUMN NODE INT NOT NULL DEFAULT -1;

View File

@ -0,0 +1,17 @@
ALTER TABLE FILE_STORAGE ADD COLUMN STATE VARCHAR(255) NOT NULL DEFAULT '';
ALTER TABLE FILE_STORAGE DROP COLUMN DO_PURGE;
DROP VIEW FILE_STORAGE_VIEW;
CREATE VIEW FILE_STORAGE_VIEW
AS SELECT
CONCAT(BASE.PATH, '/', STORAGE.PATH) AS PATH,
STORAGE.TYPE AS TYPE,
STATE AS STATE,
NODE AS NODE,
DESCRIPTION AS DESCRIPTION,
CREATE_DATE AS CREATE_DATE,
STORAGE.ID AS ID,
BASE.ID AS BASE_ID
FROM FILE_STORAGE STORAGE
INNER JOIN FILE_STORAGE_BASE BASE ON STORAGE.BASE_ID=BASE.ID;

View File

@ -0,0 +1,4 @@
CREATE TABLE NODE_CONFIGURATION(
ID INT PRIMARY KEY,
DESCRIPTION VARCHAR(255)
);

View File

@ -0,0 +1,10 @@
ALTER TABLE FILE_STORAGE_BASE DROP COLUMN PERMIT_TEMP;
ALTER TABLE FILE_STORAGE_BASE ADD COLUMN TYPE_NEW VARCHAR(255) NOT NULL;
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'CURRENT' WHERE TYPE='SSD_INDEX';
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'WORK' WHERE TYPE='SSD_WORK';
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'STORAGE' WHERE TYPE='SLOW';
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'BACKUP' WHERE TYPE='BACKUP';
ALTER TABLE FILE_STORAGE_BASE DROP COLUMN TYPE;
ALTER TABLE FILE_STORAGE_BASE CHANGE COLUMN TYPE_NEW TYPE VARCHAR(255) NOT NULL;

View File

@ -0,0 +1,6 @@
CREATE TABLE NODE_CONFIGURATION (
ID INT PRIMARY KEY AUTO_INCREMENT,
DESCRIPTION VARCHAR(255),
ACCEPT_QUERIES BOOLEAN,
DISABLED BOOLEAN DEFAULT FALSE
);

View File

@ -24,7 +24,7 @@ import java.util.List;
@Singleton
public class LinkdbReader {
private Path dbFile;
private final Path dbFile;
private volatile Connection connection;
private final Logger logger = LoggerFactory.getLogger(getClass());
@ -34,29 +34,36 @@ public class LinkdbReader {
this.dbFile = dbFile;
if (Files.exists(dbFile)) {
try {
connection = createConnection();
}
catch (SQLException ex) {
connection = null;
logger.error("Failed to load linkdb file", ex);
}
}
else {
logger.warn("No linkdb file {}", dbFile);
}
}
private Connection createConnection() throws SQLException {
try {
String connStr = "jdbc:sqlite:" + dbFile.toString();
return DriverManager.getConnection(connStr);
}
catch (SQLException ex) {
logger.error("Failed to connect to link database " + dbFile, ex);
return null;
}
}
public void switchInput(Path newDbFile) throws IOException, SQLException {
if (!Files.isRegularFile(newDbFile)) {
logger.error("Source is not a file, refusing switch-over {}", newDbFile);
return;
}
if (connection != null) {
connection.close();
}
logger.info("Moving {} to {}", newDbFile, dbFile);
Files.move(newDbFile, dbFile, StandardCopyOption.REPLACE_EXISTING);
connection = createConnection();

View File

@ -19,6 +19,7 @@ public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHo
private final Logger logger = LoggerFactory.getLogger(ProcessAdHocTaskHeartbeatImpl.class);
private final String taskName;
private final String taskBase;
private final int node;
private final String instanceUUID;
private final HikariDataSource dataSource;
@ -37,6 +38,7 @@ public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHo
{
this.taskName = configuration.processName() + "." + taskName + ":" + configuration.node();
this.taskBase = configuration.processName() + "." + taskName;
this.node = configuration.node();
this.dataSource = dataSource;
this.instanceUUID = UUID.randomUUID().toString();
@ -110,8 +112,8 @@ public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHo
try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement(
"""
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, NODE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
ON DUPLICATE KEY UPDATE
INSTANCE = ?,
SERVICE_INSTANCE = ?,
@ -122,10 +124,11 @@ public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHo
{
stmt.setString(1, taskName);
stmt.setString(2, taskBase);
stmt.setString(3, instanceUUID);
stmt.setString(4, serviceInstanceUUID);
stmt.setString(5, instanceUUID);
stmt.setString(6, serviceInstanceUUID);
stmt.setInt(3, node);
stmt.setString(4, instanceUUID);
stmt.setString(5, serviceInstanceUUID);
stmt.setString(6, instanceUUID);
stmt.setString(7, serviceInstanceUUID);
stmt.executeUpdate();
}
}

View File

@ -18,6 +18,7 @@ public class ProcessHeartbeatImpl implements ProcessHeartbeat {
private final Logger logger = LoggerFactory.getLogger(ProcessHeartbeatImpl.class);
private final String processName;
private final String processBase;
private final int node;
private final String instanceUUID;
@org.jetbrains.annotations.NotNull
private final ProcessConfiguration configuration;
@ -37,6 +38,7 @@ public class ProcessHeartbeatImpl implements ProcessHeartbeat {
{
this.processName = configuration.processName() + ":" + configuration.node();
this.processBase = configuration.processName();
this.node = configuration.node();
this.configuration = configuration;
this.dataSource = dataSource;
@ -115,8 +117,8 @@ public class ProcessHeartbeatImpl implements ProcessHeartbeat {
try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement(
"""
INSERT INTO PROCESS_HEARTBEAT (PROCESS_NAME, PROCESS_BASE, INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
INSERT INTO PROCESS_HEARTBEAT (PROCESS_NAME, PROCESS_BASE, NODE, INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
ON DUPLICATE KEY UPDATE
INSTANCE = ?,
HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
@ -126,8 +128,9 @@ public class ProcessHeartbeatImpl implements ProcessHeartbeat {
{
stmt.setString(1, processName);
stmt.setString(2, processBase);
stmt.setString(3, instanceUUID);
stmt.setInt(3, node);
stmt.setString(4, instanceUUID);
stmt.setString(5, instanceUUID);
stmt.executeUpdate();
}
}

View File

@ -19,6 +19,8 @@ public class ProcessTaskHeartbeatImpl<T extends Enum<T>> implements AutoCloseabl
private final Logger logger = LoggerFactory.getLogger(ProcessTaskHeartbeatImpl.class);
private final String taskName;
private final String taskBase;
private final int node;
private final String instanceUUID;
private final HikariDataSource dataSource;
@ -39,6 +41,7 @@ public class ProcessTaskHeartbeatImpl<T extends Enum<T>> implements AutoCloseabl
{
this.taskName = configuration.processName() + "." + taskName + ":" + configuration.node();
this.taskBase = configuration.processName() + "." + taskName;
this.node = configuration.node();
this.dataSource = dataSource;
this.instanceUUID = UUID.randomUUID().toString();
@ -115,8 +118,8 @@ public class ProcessTaskHeartbeatImpl<T extends Enum<T>> implements AutoCloseabl
try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement(
"""
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, NODE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
ON DUPLICATE KEY UPDATE
INSTANCE = ?,
SERVICE_INSTANCE = ?,
@ -127,10 +130,11 @@ public class ProcessTaskHeartbeatImpl<T extends Enum<T>> implements AutoCloseabl
{
stmt.setString(1, taskName);
stmt.setString(2, taskBase);
stmt.setString(3, instanceUUID);
stmt.setString(4, serviceInstanceUUID);
stmt.setString(5, instanceUUID);
stmt.setString(6, serviceInstanceUUID);
stmt.setInt(3, node);
stmt.setString(4, instanceUUID);
stmt.setString(5, serviceInstanceUUID);
stmt.setString(6, instanceUUID);
stmt.setString(7, serviceInstanceUUID);
stmt.executeUpdate();
}
}

View File

@ -14,12 +14,11 @@ import java.util.concurrent.TimeUnit;
@Singleton
public class ServiceMonitors {
private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass());
private static final Logger logger = LoggerFactory.getLogger(ServiceMonitors.class);
private final Set<String> runningServices = new HashSet<>();
private final Set<ServiceNode> runningServices = new HashSet<>();
private final Set<Runnable> callbacks = new HashSet<>();
private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 5);
private volatile boolean running;
@ -80,14 +79,14 @@ public class ServiceMonitors {
private boolean updateRunningServices() {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT SERVICE_BASE, TIMESTAMPDIFF(SECOND, HEARTBEAT_TIME, CURRENT_TIMESTAMP(6))
SELECT SERVICE_NAME, TIMESTAMPDIFF(SECOND, HEARTBEAT_TIME, CURRENT_TIMESTAMP(6))
FROM SERVICE_HEARTBEAT
WHERE ALIVE=1
""")) {
try (var rs = stmt.executeQuery()) {
Set<String> newRunningServices = new HashSet<>(10);
Set<ServiceNode> newRunningServices = new HashSet<>(10);
while (rs.next()) {
String svc = rs.getString(1);
ServiceNode svc = ServiceNode.parse(rs.getString(1));
int dtime = rs.getInt(2);
if (dtime < 2.5 * heartbeatInterval) {
newRunningServices.add(svc);
@ -113,21 +112,37 @@ public class ServiceMonitors {
return false;
}
public boolean isServiceUp(ServiceId serviceId) {
public boolean isServiceUp(ServiceId serviceId, int node) {
synchronized (runningServices) {
return runningServices.contains(serviceId.name);
return runningServices.contains(new ServiceNode(serviceId.name, node));
}
}
public List<ServiceId> getRunningServices() {
List<ServiceId> ret = new ArrayList<>(ServiceId.values().length);
public List<ServiceNode> getRunningServices() {
List<ServiceNode> ret = new ArrayList<>(ServiceId.values().length);
synchronized (runningServices) {
for (var runningService : runningServices) {
ret.add(ServiceId.byName(runningService));
}
ret.addAll(runningServices);
}
return ret;
}
public record ServiceNode(String service, int node) {
public static ServiceNode parse(String serviceName) {
if (serviceName.contains(":")) {
String[] parts = serviceName.split(":", 2);
try {
return new ServiceNode(parts[0], Integer.parseInt(parts[1]));
}
catch (NumberFormatException ex) {
logger.warn("Failed to parse serviceName '" + serviceName + "'", ex);
//fallthrough
}
}
return new ServiceNode(serviceName, -1);
}
}
}

View File

@ -12,6 +12,7 @@ public class SearchServiceDescriptors {
new ServiceDescriptor(ServiceId.Index, 5021),
new ServiceDescriptor(ServiceId.Query, 5022),
new ServiceDescriptor(ServiceId.Search, 5023),
new ServiceDescriptor(ServiceId.Executor, 5024),
new ServiceDescriptor(ServiceId.Assistant, 5025),
new ServiceDescriptor(ServiceId.Dating, 5070),
new ServiceDescriptor(ServiceId.Explorer, 5071),

View File

@ -12,7 +12,11 @@ public class ServiceDescriptor {
this.name = id.name;
this.port = port;
}
public ServiceDescriptor(ServiceId id, String host, int port) {
this.id = id;
this.name = host;
this.port = port;
}
public String toString() {
return name;
}

View File

@ -7,6 +7,7 @@ public enum ServiceId {
Search("search-service"),
Index("index-service"),
Query("query-service"),
Executor("executor-service"),
Control("control-service"),

View File

@ -18,6 +18,7 @@ public class ServiceHeartbeatImpl implements ServiceHeartbeat {
private final Logger logger = LoggerFactory.getLogger(ServiceHeartbeatImpl.class);
private final String serviceName;
private final String serviceBase;
private final int node;
private final String instanceUUID;
private final ServiceConfiguration configuration;
private final ServiceEventLog eventLog;
@ -36,6 +37,7 @@ public class ServiceHeartbeatImpl implements ServiceHeartbeat {
{
this.serviceName = configuration.serviceName() + ":" + configuration.node();
this.serviceBase = configuration.serviceName();
this.node = configuration.node();
this.configuration = configuration;
this.eventLog = eventLog;
this.dataSource = dataSource;
@ -105,8 +107,8 @@ public class ServiceHeartbeatImpl implements ServiceHeartbeat {
try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement(
"""
INSERT INTO SERVICE_HEARTBEAT (SERVICE_NAME, SERVICE_BASE, INSTANCE, HEARTBEAT_TIME, ALIVE)
VALUES (?, ?, ?, CURRENT_TIMESTAMP(6), 1)
INSERT INTO SERVICE_HEARTBEAT (SERVICE_NAME, SERVICE_BASE, NODE, INSTANCE, HEARTBEAT_TIME, ALIVE)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 1)
ON DUPLICATE KEY UPDATE
INSTANCE = ?,
HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
@ -116,8 +118,9 @@ public class ServiceHeartbeatImpl implements ServiceHeartbeat {
{
stmt.setString(1, serviceName);
stmt.setString(2, serviceBase);
stmt.setString(3, instanceUUID);
stmt.setInt(3, node);
stmt.setString(4, instanceUUID);
stmt.setString(5, instanceUUID);
stmt.executeUpdate();
}
}

View File

@ -19,6 +19,7 @@ public class ServiceTaskHeartbeatImpl<T extends Enum<T>> implements ServiceTaskH
private final Logger logger = LoggerFactory.getLogger(ServiceTaskHeartbeatImpl.class);
private final String taskName;
private final String taskBase;
private final int node;
private final String instanceUUID;
private final HikariDataSource dataSource;
@ -27,6 +28,7 @@ public class ServiceTaskHeartbeatImpl<T extends Enum<T>> implements ServiceTaskH
private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
private final String serviceInstanceUUID;
private final int stepCount;
private final ServiceEventLog eventLog;
private volatile boolean running = false;
@ -42,6 +44,7 @@ public class ServiceTaskHeartbeatImpl<T extends Enum<T>> implements ServiceTaskH
this.eventLog = eventLog;
this.taskName = configuration.serviceName() + "." + taskName + ":" + configuration.node();
this.taskBase = configuration.serviceName() + "." + taskName;
this.node = configuration.node();
this.dataSource = dataSource;
this.instanceUUID = UUID.randomUUID().toString();
@ -118,8 +121,8 @@ public class ServiceTaskHeartbeatImpl<T extends Enum<T>> implements ServiceTaskH
try (var connection = dataSource.getConnection()) {
try (var stmt = connection.prepareStatement(
"""
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, NODE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
ON DUPLICATE KEY UPDATE
INSTANCE = ?,
SERVICE_INSTANCE = ?,
@ -130,10 +133,11 @@ public class ServiceTaskHeartbeatImpl<T extends Enum<T>> implements ServiceTaskH
{
stmt.setString(1, taskName);
stmt.setString(2, taskBase);
stmt.setString(3, instanceUUID);
stmt.setString(4, serviceInstanceUUID);
stmt.setString(5, instanceUUID);
stmt.setString(6, serviceInstanceUUID);
stmt.setInt(3, node);
stmt.setString(4, instanceUUID);
stmt.setString(5, serviceInstanceUUID);
stmt.setString(6, instanceUUID);
stmt.setString(7, serviceInstanceUUID);
stmt.executeUpdate();
}
}

View File

@ -47,11 +47,11 @@ public class Service {
this.initialization = params.initialization;
var config = params.configuration;
String inboxName = config.serviceName() + ":" + config.node();
String inboxName = config.serviceName();
logger.info("Inbox name: {}", inboxName);
var mqInboxFactory = params.messageQueueInboxFactory;
messageQueueInbox = mqInboxFactory.createAsynchronousInbox(inboxName, config.instanceUuid());
messageQueueInbox = mqInboxFactory.createAsynchronousInbox(inboxName, config.node(), config.instanceUuid());
messageQueueInbox.subscribe(new ServiceMqSubscription(this));
serviceName = System.getProperty("service-name");

View File

@ -0,0 +1,41 @@
plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation project(':code:libraries:message-queue')
implementation project(':code:common:service')
implementation project(':code:common:process')
implementation project(':code:common:model')
implementation project(':code:common:service-client')
implementation project(':code:common:db')
implementation project(':code:common:config')
implementation project(':code:api:process-mqapi')
implementation project(':code:api:index-api')
implementation project(':code:features-control:process-execution')
implementation project(':code:features-index:index-journal')
implementation project(':code:process-models:crawl-spec')
implementation libs.bundles.slf4j
implementation libs.guice
implementation libs.notnull
implementation libs.spark
implementation libs.jsoup
implementation libs.zstd
implementation libs.bundles.mariadb
implementation libs.commons.io
implementation libs.bundles.gson
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor;
package nu.marginalia.actor;
public enum Actor {
CRAWL,

View File

@ -0,0 +1,55 @@
package nu.marginalia.actor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
@Singleton
public class ActorApi {
private final ActorControlService actors;
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public ActorApi(ActorControlService actors) {
this.actors = actors;
}
public Object startActorFromState(Request request, Response response) throws Exception {
Actor actor = translateActor(request.params("id"));
String state = request.params("state");
actors.startFromJSON(actor, state, request.body());
return "";
}
public Object startActor(Request request, Response response) throws Exception {
Actor actor = translateActor(request.params("id"));
actors.startJSON(actor, request.body());
return "";
}
public Object stopActor(Request request, Response response) {
Actor actor = translateActor(request.params("id"));
actors.stop(actor);
return "OK";
}
public Actor translateActor(String name) {
try {
return Actor.valueOf(name.toUpperCase());
}
catch (IllegalArgumentException ex) {
logger.error("Unknown actor {}", name);
Spark.halt(400, "Unknown actor name provided");
return null;
}
}
}

View File

@ -1,18 +1,15 @@
package nu.marginalia.control.actor;
package nu.marginalia.actor;
import com.google.gson.Gson;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.control.actor.task.*;
import nu.marginalia.control.actor.monitor.*;
import nu.marginalia.control.actor.monitor.ConverterMonitorActor;
import nu.marginalia.control.actor.monitor.LoaderMonitorActor;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.actor.ActorStateMachine;
import nu.marginalia.actor.monitor.*;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorStateInstance;
import nu.marginalia.actor.task.*;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.server.BaseServiceParams;
@ -21,17 +18,17 @@ import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;
/** This class is responsible for starting and stopping the various actors in the controller service */
/** This class is responsible for starting and stopping the various actors in the responsible service */
@Singleton
public class ControlActors {
public class ActorControlService {
private final ServiceEventLog eventLog;
private final Gson gson;
private final MessageQueueFactory messageQueueFactory;
public Map<Actor, ActorStateMachine> stateMachines = new HashMap<>();
public Map<Actor, AbstractActorPrototype> actorDefinitions = new HashMap<>();
private final int node;
@Inject
public ControlActors(MessageQueueFactory messageQueueFactory,
public ActorControlService(MessageQueueFactory messageQueueFactory,
GsonFactory gsonFactory,
BaseServiceParams baseServiceParams,
ConvertActor convertActor,
@ -54,6 +51,7 @@ public class ControlActors {
this.messageQueueFactory = messageQueueFactory;
this.eventLog = baseServiceParams.eventLog;
this.gson = gsonFactory.get();
this.node = baseServiceParams.configuration.node();
register(Actor.CRAWL, crawlActor);
register(Actor.RECRAWL, recrawlActor);
@ -76,7 +74,7 @@ public class ControlActors {
}
private void register(Actor process, AbstractActorPrototype graph) {
var sm = new ActorStateMachine(messageQueueFactory, process.id(), UUID.randomUUID(), graph);
var sm = new ActorStateMachine(messageQueueFactory, process.id(), node, UUID.randomUUID(), graph);
sm.listen((function, param) -> logStateChange(process, function));
stateMachines.put(process, sm);
@ -105,12 +103,22 @@ public class ControlActors {
stateMachines.get(process).initFrom(state, gson.toJson(arg));
}
public <T> void startFromJSON(Actor process, String state, String json) throws Exception {
eventLog.logEvent("FSM-START", process.id());
stateMachines.get(process).initFrom(state, json);
}
public <T> void start(Actor process, Object arg) throws Exception {
eventLog.logEvent("FSM-START", process.id());
stateMachines.get(process).init(gson.toJson(arg));
}
public <T> void startJSON(Actor process, String json) throws Exception {
eventLog.logEvent("FSM-START", process.id());
stateMachines.get(process).init(json);
}
@SneakyThrows
public void stop(Actor process) {
eventLog.logEvent("FSM-STOP", process.id());

View File

@ -1,15 +1,16 @@
package nu.marginalia.control.actor.monitor;
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorTerminalState;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorTerminalState;
import nu.marginalia.service.module.ServiceConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -39,6 +40,7 @@ public class AbstractProcessSpawnerActor extends AbstractActorPrototype {
private final String inboxName;
private final ProcessService.ProcessId processId;
private final ExecutorService executorService = Executors.newSingleThreadExecutor();
private final int node;
public String describe() {
return "Spawns a(n) " + processId + " process and monitors its inbox for messages";
@ -46,14 +48,16 @@ public class AbstractProcessSpawnerActor extends AbstractActorPrototype {
@Inject
public AbstractProcessSpawnerActor(ActorStateFactory stateFactory,
ServiceConfiguration configuration,
MqPersistence persistence,
ProcessService processService,
String inboxName,
ProcessService.ProcessId processId) {
super(stateFactory);
this.node = configuration.node();
this.persistence = persistence;
this.processService = processService;
this.inboxName = inboxName;
this.inboxName = inboxName + ":" + node;
this.processId = processId;
}

View File

@ -1,11 +1,12 @@
package nu.marginalia.control.actor.monitor;
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.service.module.ServiceConfiguration;
@Singleton
public class ConverterMonitorActor extends AbstractProcessSpawnerActor {
@ -13,9 +14,15 @@ public class ConverterMonitorActor extends AbstractProcessSpawnerActor {
@Inject
public ConverterMonitorActor(ActorStateFactory stateFactory,
ServiceConfiguration configuration,
MqPersistence persistence,
ProcessService processService) {
super(stateFactory, persistence, processService, ProcessInboxNames.CONVERTER_INBOX, ProcessService.ProcessId.CONVERTER);
super(stateFactory,
configuration,
persistence,
processService,
ProcessInboxNames.CONVERTER_INBOX,
ProcessService.ProcessId.CONVERTER);
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.monitor;
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
@ -6,15 +6,18 @@ import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.service.module.ServiceConfiguration;
@Singleton
public class CrawlerMonitorActor extends AbstractProcessSpawnerActor {
@Inject
public CrawlerMonitorActor(ActorStateFactory stateFactory,
ServiceConfiguration configuration,
MqPersistence persistence,
ProcessService processService) {
super(stateFactory,
configuration,
persistence,
processService,
ProcessInboxNames.CRAWLER_INBOX,

View File

@ -1,15 +1,15 @@
package nu.marginalia.control.actor.monitor;
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorage;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -74,7 +74,7 @@ public class FileStorageMonitorActor extends AbstractActorPrototype {
transition(REMOVE_STALE, missing.get().id());
}
fileStorageService.synchronizeStorageManifests(fileStorageService.getStorageBase(FileStorageBaseType.SLOW));
fileStorageService.synchronizeStorageManifests(fileStorageService.getStorageBase(FileStorageBaseType.WORK));
TimeUnit.SECONDS.sleep(10);
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.monitor;
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
@ -6,6 +6,7 @@ import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.service.module.ServiceConfiguration;
@Singleton
public class IndexConstructorMonitorActor extends AbstractProcessSpawnerActor {
@ -13,9 +14,15 @@ public class IndexConstructorMonitorActor extends AbstractProcessSpawnerActor {
@Inject
public IndexConstructorMonitorActor(ActorStateFactory stateFactory,
ServiceConfiguration configuration,
MqPersistence persistence,
ProcessService processService) {
super(stateFactory, persistence, processService, ProcessInboxNames.INDEX_CONSTRUCTOR_INBOX, ProcessService.ProcessId.INDEX_CONSTRUCTOR);
super(stateFactory,
configuration,
persistence,
processService,
ProcessInboxNames.INDEX_CONSTRUCTOR_INBOX,
ProcessService.ProcessId.INDEX_CONSTRUCTOR);
}

View File

@ -1,11 +1,12 @@
package nu.marginalia.control.actor.monitor;
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.service.module.ServiceConfiguration;
@Singleton
public class LoaderMonitorActor extends AbstractProcessSpawnerActor {
@ -13,10 +14,13 @@ public class LoaderMonitorActor extends AbstractProcessSpawnerActor {
@Inject
public LoaderMonitorActor(ActorStateFactory stateFactory,
ServiceConfiguration configuration,
MqPersistence persistence,
ProcessService processService) {
super(stateFactory, persistence, processService,
super(stateFactory,
configuration,
persistence, processService,
ProcessInboxNames.LOADER_INBOX,
ProcessService.ProcessId.LOADER);
}

View File

@ -1,12 +1,12 @@
package nu.marginalia.control.actor.monitor;
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.mq.persistence.MqPersistence;
import java.util.concurrent.TimeUnit;

View File

@ -0,0 +1,217 @@
package nu.marginalia.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.service.control.ServiceEventLog;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
@Singleton
public class ProcessLivenessMonitorActor extends AbstractActorPrototype {
// STATES
private static final String INITIAL = "INITIAL";
private static final String MONITOR = "MONITOR";
private static final String END = "END";
private final ServiceEventLog eventLogService;
private final ProcessService processService;
private final HikariDataSource dataSource;
@Inject
public ProcessLivenessMonitorActor(ActorStateFactory stateFactory,
ServiceEventLog eventLogService,
ProcessService processService,
HikariDataSource dataSource) {
super(stateFactory);
this.eventLogService = eventLogService;
this.processService = processService;
this.dataSource = dataSource;
}
@Override
public String describe() {
return "Periodically check to ensure that the control service's view of running processes is agreement with the process heartbeats table.";
}
@ActorState(name = INITIAL, next = MONITOR)
public void init() {
}
@ActorState(name = MONITOR, next = MONITOR, resume = ActorResumeBehavior.RETRY, description = """
Periodically check to ensure that the control service's view of
running processes is agreement with the process heartbeats table.
If the process is not running, mark the process as stopped in the table.
""")
public void monitor() throws Exception {
for (;;) {
for (var heartbeat : getProcessHeartbeats()) {
if (!heartbeat.isRunning()) {
continue;
}
var processId = heartbeat.getProcessId();
if (null == processId)
continue;
if (processService.isRunning(processId) && heartbeat.lastSeenMillis() < 10_000) {
continue;
}
flagProcessAsStopped(heartbeat);
}
for (var heartbeat : getTaskHeartbeats()) {
if (heartbeat.lastSeenMillis() < 10_000) {
continue;
}
removeTaskHeartbeat(heartbeat);
}
TimeUnit.SECONDS.sleep(60);
}
}
private List<ProcessHeartbeat> getProcessHeartbeats() {
List<ProcessHeartbeat> heartbeats = new ArrayList<>();
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT PROCESS_NAME, PROCESS_BASE, INSTANCE, STATUS, PROGRESS,
TIMESTAMPDIFF(MICROSECOND, HEARTBEAT_TIME, CURRENT_TIMESTAMP(6)) AS TSDIFF
FROM PROCESS_HEARTBEAT
""")) {
var rs = stmt.executeQuery();
while (rs.next()) {
int progress = rs.getInt("PROGRESS");
heartbeats.add(new ProcessHeartbeat(
rs.getString("PROCESS_NAME"),
rs.getString("PROCESS_BASE"),
rs.getString("INSTANCE"),
rs.getLong("TSDIFF") / 1000.,
progress < 0 ? null : progress,
rs.getString("STATUS")
));
}
}
catch (SQLException ex) {
throw new RuntimeException(ex);
}
return heartbeats;
}
private void flagProcessAsStopped(ProcessHeartbeat processHeartbeat) {
eventLogService.logEvent("PROCESS-MISSING", "Marking stale process heartbeat "
+ processHeartbeat.processId() + " / " + processHeartbeat.uuidFull() + " as stopped");
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
UPDATE PROCESS_HEARTBEAT
SET STATUS = 'STOPPED'
WHERE INSTANCE = ?
""")) {
stmt.setString(1, processHeartbeat.uuidFull());
stmt.executeUpdate();
}
catch (SQLException ex) {
throw new RuntimeException(ex);
}
}
private List<TaskHeartbeat> getTaskHeartbeats() {
List<TaskHeartbeat> heartbeats = new ArrayList<>();
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, STATUS, STAGE_NAME, PROGRESS, TIMESTAMPDIFF(MICROSECOND, TASK_HEARTBEAT.HEARTBEAT_TIME, CURRENT_TIMESTAMP(6)) AS TSDIFF
FROM TASK_HEARTBEAT
""")) {
var rs = stmt.executeQuery();
while (rs.next()) {
int progress = rs.getInt("PROGRESS");
heartbeats.add(new TaskHeartbeat(
rs.getString("TASK_NAME"),
rs.getString("TASK_BASE"),
rs.getString("INSTANCE"),
rs.getString("SERVICE_INSTANCE"),
rs.getLong("TSDIFF") / 1000.,
progress < 0 ? null : progress,
rs.getString("STAGE_NAME"),
rs.getString("STATUS")
));
}
}
catch (SQLException ex) {
throw new RuntimeException(ex);
}
return heartbeats;
}
private void removeTaskHeartbeat(TaskHeartbeat heartbeat) {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
DELETE FROM TASK_HEARTBEAT
WHERE INSTANCE = ?
""")) {
stmt.setString(1, heartbeat.instanceUuidFull());
stmt.executeUpdate();
}
catch (SQLException ex) {
throw new RuntimeException(ex);
}
}
private record ProcessHeartbeat(
String processId,
String processBase,
String uuidFull,
double lastSeenMillis,
Integer progress,
String status
) {
public boolean isRunning() {
return "RUNNING".equals(status);
}
public ProcessService.ProcessId getProcessId() {
return switch (processBase) {
case "converter" -> ProcessService.ProcessId.CONVERTER;
case "crawler" -> ProcessService.ProcessId.CRAWLER;
case "loader" -> ProcessService.ProcessId.LOADER;
case "website-adjacencies-calculator" -> ProcessService.ProcessId.ADJACENCIES_CALCULATOR;
case "index-constructor" -> ProcessService.ProcessId.INDEX_CONSTRUCTOR;
default -> null;
};
}
}
private record TaskHeartbeat(
String taskName,
String taskBase,
String instanceUuidFull,
String serviceUuuidFull,
double lastSeenMillis,
Integer progress,
String stage,
String status
) { }
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.inject.Inject;
import com.google.inject.Singleton;

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.gson.Gson;
import com.google.inject.Inject;
@ -6,20 +6,20 @@ import com.google.inject.Singleton;
import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.With;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.control.process.ProcessOutboxes;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.mqapi.converting.ConvertAction;
import nu.marginalia.mqapi.converting.ConvertRequest;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -92,7 +92,7 @@ public class ConvertActor extends AbstractActorPrototype {
// Create processed data area
var toProcess = storageService.getStorage(sourceStorageId);
var base = storageService.getStorageBase(FileStorageBaseType.SLOW);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Data; " + toProcess.description());
@ -125,7 +125,7 @@ public class ConvertActor extends AbstractActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.SLOW);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Encylopedia Data; " + fileName);
@ -157,7 +157,7 @@ public class ConvertActor extends AbstractActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.SLOW);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Dirtree Data; " + fileName);
@ -188,7 +188,7 @@ public class ConvertActor extends AbstractActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.SLOW);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var processedArea = storageService.allocateTemporaryStorage(base,
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Stackexchange Data; " + fileName);

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.gson.Gson;
import com.google.inject.Inject;
@ -7,25 +7,25 @@ import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.With;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.control.process.ProcessOutboxes;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.control.svc.BackupService;
import nu.marginalia.svc.BackupService;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginalia.index.client.IndexClient;
import nu.marginalia.index.client.IndexMqEndpoints;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.mqapi.converting.ConvertAction;
import nu.marginalia.mqapi.converting.ConvertRequest;
import nu.marginalia.mqapi.index.CreateIndexRequest;
import nu.marginalia.mqapi.index.IndexName;
import nu.marginalia.mqapi.loading.LoadRequest;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -64,7 +64,7 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
@AllArgsConstructor @With @NoArgsConstructor
public static class Message {
public FileStorageId crawlStorageId = null;
public FileStorageId processedStorageId = null;
public List<FileStorageId> processedStorageId = null;
public long converterMsgId = 0L;
public long loaderMsgId = 0L;
};
@ -126,7 +126,7 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
var toProcess = storageService.getStorage(message.crawlStorageId);
var base = storageService.getStorageBase(FileStorageBaseType.SLOW);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var processedArea = storageService.allocateTemporaryStorage(base, FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Data; " + toProcess.description());
@ -140,7 +140,7 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
long id = mqConverterOutbox.sendAsync(ConvertRequest.class.getSimpleName(), gson.toJson(request));
return message
.withProcessedStorageId(processedArea.id())
.withProcessedStorageId(List.of(processedArea.id()))
.withConverterMsgId(id);
}
@ -171,7 +171,7 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
""")
public Message load(Message message) throws Exception {
if (message.loaderMsgId <= 0) {
var request = new LoadRequest(List.of(message.processedStorageId));
var request = new LoadRequest(message.processedStorageId);
long id = mqLoaderOutbox.sendAsync(LoadRequest.class.getSimpleName(), gson.toJson(request));
transition(LOAD, message.withLoaderMsgId(id));
@ -192,7 +192,7 @@ public class ConvertAndLoadActor extends AbstractActorPrototype {
Create a backup snapshot of the new data
""")
public void createBackup(Message message) throws SQLException, IOException {
backupService.createBackupFromStaging(List.of(message.processedStorageId));
backupService.createBackupFromStaging(message.processedStorageId);
}
@ActorState(

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.gson.Gson;
import com.google.inject.Inject;
@ -7,21 +7,23 @@ import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.With;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.control.process.ProcessOutboxes;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.mqapi.crawling.CrawlRequest;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
@Singleton
public class CrawlActor extends AbstractActorPrototype {
@ -96,7 +98,7 @@ public class CrawlActor extends AbstractActorPrototype {
var toCrawl = storageService.getStorage(message.crawlSpecId);
var base = storageService.getStorageBase(FileStorageBaseType.SLOW);
var base = storageService.getStorageBase(FileStorageBaseType.WORK);
var dataArea = storageService.allocateTemporaryStorage(
base,
FileStorageType.CRAWL_DATA,
@ -106,7 +108,7 @@ public class CrawlActor extends AbstractActorPrototype {
storageService.relateFileStorages(toCrawl.id(), dataArea.id());
// Pre-send convert request
var request = new CrawlRequest(message.crawlSpecId, dataArea.id());
var request = new CrawlRequest(List.of(message.crawlSpecId), dataArea.id());
long id = mqCrawlerOutbox.sendAsync(CrawlRequest.class.getSimpleName(), gson.toJson(request));
return message

View File

@ -1,19 +1,17 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.control.svc.ControlFileStorageService;
import nu.marginalia.crawlspec.CrawlSpecFileNames;
import nu.marginalia.crawlspec.CrawlSpecGenerator;
import nu.marginalia.db.DbDomainStatsExportMultitool;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.crawlspec.CrawlSpecFileNames;
import nu.marginalia.db.DbDomainStatsExportMultitool;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -34,18 +32,15 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
public static final String CREATE_FROM_LINK = "CREATE_FROM_LINK";
public static final String END = "END";
private final FileStorageService fileStorageService;
private final ControlFileStorageService controlFileStorageService;
private final HikariDataSource dataSource;
@Inject
public CrawlJobExtractorActor(ActorStateFactory stateFactory,
FileStorageService fileStorageService,
ControlFileStorageService controlFileStorageService,
HikariDataSource dataSource
) {
super(stateFactory);
this.fileStorageService = fileStorageService;
this.controlFileStorageService = controlFileStorageService;
this.dataSource = dataSource;
}
@ -70,7 +65,7 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
error("This actor requires a CrawlJobExtractorArgumentsWithURL argument");
}
var base = fileStorageService.getStorageBase(FileStorageBaseType.SLOW);
var base = fileStorageService.getStorageBase(FileStorageBaseType.WORK);
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", arg.description());
Path urlsTxt = storage.asPath().resolve("urls.txt");
@ -81,7 +76,7 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
is.transferTo(os);
}
catch (Exception ex) {
controlFileStorageService.flagFileForDeletion(storage.id());
fileStorageService.flagFileForDeletion(storage.id());
error("Error downloading " + arg.url());
}
@ -107,7 +102,7 @@ public class CrawlJobExtractorActor extends AbstractActorPrototype {
error("This actor requires a CrawlJobExtractorArguments argument");
}
var base = fileStorageService.getStorageBase(FileStorageBaseType.SLOW);
var base = fileStorageService.getStorageBase(FileStorageBaseType.WORK);
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", arg.description());
final Path path = CrawlSpecFileNames.resolve(storage);

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.inject.Inject;
import com.google.inject.Singleton;
@ -7,12 +7,12 @@ import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.With;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.gson.Gson;
import com.google.inject.Inject;
@ -7,21 +7,22 @@ import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.With;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.control.process.ProcessOutboxes;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorage;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.mqapi.crawling.CrawlRequest;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import java.nio.file.Files;
import java.sql.SQLException;
import java.util.List;
import java.util.Optional;
@Singleton
@ -41,7 +42,7 @@ public class RecrawlActor extends AbstractActorPrototype {
@AllArgsConstructor @With @NoArgsConstructor
public static class RecrawlMessage {
public FileStorageId crawlSpecId = null;
public List<FileStorageId> crawlSpecId = null;
public FileStorageId crawlStorageId = null;
public long crawlerMsgId = 0L;
};
@ -50,10 +51,8 @@ public class RecrawlActor extends AbstractActorPrototype {
public String describe() {
return "Run the crawler with the given crawl spec using previous crawl data for a reference";
}
public static RecrawlMessage recrawlFromCrawlData(FileStorageId crawlData) {
return new RecrawlMessage(null, crawlData, 0L);
}
public static RecrawlMessage recrawlFromCrawlDataAndCralSpec(FileStorageId crawlData, FileStorageId crawlSpec) {
public static RecrawlMessage recrawlFromCrawlDataAndCralSpec(FileStorageId crawlData, List<FileStorageId> crawlSpec) {
return new RecrawlMessage(crawlSpec, crawlData, 0L);
}
@ -83,24 +82,22 @@ public class RecrawlActor extends AbstractActorPrototype {
}
var crawlStorage = storageService.getStorage(recrawlMessage.crawlStorageId);
FileStorage specStorage;
if (recrawlMessage.crawlSpecId != null) {
specStorage = storageService.getStorage(recrawlMessage.crawlSpecId);
}
else {
specStorage = getSpec(crawlStorage).orElse(null);
}
for (var specs : recrawlMessage.crawlSpecId) {
FileStorage specStorage = storageService.getStorage(specs);
if (specStorage == null) error("Bad storage id");
if (specStorage.type() != FileStorageType.CRAWL_SPEC) error("Bad storage type " + specStorage.type());
}
if (crawlStorage == null) error("Bad storage id");
if (crawlStorage.type() != FileStorageType.CRAWL_DATA) error("Bad storage type " + specStorage.type());
if (crawlStorage.type() != FileStorageType.CRAWL_DATA) error("Bad storage type " + crawlStorage.type());
Files.deleteIfExists(crawlStorage.asPath().resolve("crawler.log"));
return recrawlMessage
.withCrawlSpecId(specStorage.id());
.withCrawlSpecId(recrawlMessage.crawlSpecId);
}
private Optional<FileStorage> getSpec(FileStorage crawlStorage) throws SQLException {
@ -119,6 +116,7 @@ public class RecrawlActor extends AbstractActorPrototype {
)
public RecrawlMessage crawl(RecrawlMessage recrawlMessage) throws Exception {
// Pre-send crawl request
var request = new CrawlRequest(recrawlMessage.crawlSpecId, recrawlMessage.crawlStorageId);
long id = mqCrawlerOutbox.sendAsync(CrawlRequest.class.getSimpleName(), gson.toJson(request));

View File

@ -1,13 +1,14 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.inject.Inject;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.control.actor.Actor;
import nu.marginalia.control.svc.BackupService;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.actor.Actor;
import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.svc.BackupService;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.mq.persistence.MqPersistence;
@ -18,6 +19,7 @@ public class RestoreBackupActor extends AbstractActorPrototype {
public static final String END = "END";
private final BackupService backupService;
private final int node;
private final MqPersistence mqPersistence;
@Override
@ -27,11 +29,13 @@ public class RestoreBackupActor extends AbstractActorPrototype {
@Inject
public RestoreBackupActor(ActorStateFactory stateFactory,
MqPersistence mqPersistence,
BackupService backupService
BackupService backupService,
ServiceConfiguration configuration
) {
super(stateFactory);
this.mqPersistence = mqPersistence;
this.backupService = backupService;
this.node = configuration.node();
}
@ActorState(name=RESTORE, next = END, resume = ActorResumeBehavior.ERROR)
@ -39,7 +43,7 @@ public class RestoreBackupActor extends AbstractActorPrototype {
backupService.restoreBackup(id);
mqPersistence.sendNewMessage(
Actor.CONVERT_AND_LOAD.id(),
Actor.CONVERT_AND_LOAD.id() + ":" + node,
null,
null,
ConvertAndLoadActor.REPARTITION,

View File

@ -1,12 +1,12 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.control.process.ProcessService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -1,4 +1,4 @@
package nu.marginalia.control.actor.task;
package nu.marginalia.actor.task;
import com.google.inject.Inject;
import com.google.inject.Singleton;
@ -7,10 +7,10 @@ import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.With;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.storage.model.FileStorageId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -1,18 +1,19 @@
package nu.marginalia.control.svc;
package nu.marginalia.svc;
import com.github.luben.zstd.ZstdInputStream;
import com.github.luben.zstd.ZstdOutputStream;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.IndexLocations;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginallia.index.journal.IndexJournalFileNames;
import org.apache.commons.io.IOUtils;
import com.google.inject.Inject;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.time.LocalDateTime;
import java.util.List;
@ -34,37 +35,39 @@ public class BackupService {
String desc = "Pre-load backup snapshot " + LocalDateTime.now();
var backupStorage = storageService.allocateTemporaryStorage(backupBase, FileStorageType.BACKUP, "snapshot", desc);
var backupStorage = storageService.allocateTemporaryStorage(backupBase,
FileStorageType.BACKUP, "snapshot", desc);
for (var associatedId : associatedIds) {
storageService.relateFileStorages(associatedId, backupStorage.id());
}
var indexStagingStorage = storageService.getStorageByType(FileStorageType.INDEX_STAGING);
var linkdbStagingStorage = storageService.getStorageByType(FileStorageType.LINKDB_STAGING);
backupFileCompressed("links.db", linkdbStagingStorage, backupStorage);
var indexStagingStorage = IndexLocations.getIndexConstructionArea(storageService);
var linkdbStagingStorage = IndexLocations.getLinkdbWritePath(storageService);
backupFileCompressed("links.db", linkdbStagingStorage, backupStorage.asPath());
// This file format is already compressed
backupJournal(indexStagingStorage, backupStorage);
backupJournal(indexStagingStorage, backupStorage.asPath());
}
/** Read back a backup into _STAGING */
public void restoreBackup(FileStorageId backupId) throws SQLException, IOException {
var backupStorage = storageService.getStorage(backupId);
var backupStorage = storageService.getStorage(backupId).asPath();
var indexStagingStorage = storageService.getStorageByType(FileStorageType.INDEX_STAGING);
var linkdbStagingStorage = storageService.getStorageByType(FileStorageType.LINKDB_STAGING);
var indexStagingStorage = IndexLocations.getIndexConstructionArea(storageService);
var linkdbStagingStorage = IndexLocations.getLinkdbWritePath(storageService);
restoreBackupCompressed("links.db", linkdbStagingStorage, backupStorage);
restoreJournal(indexStagingStorage, backupStorage);
}
private void backupJournal(FileStorage inputStorage, FileStorage backupStorage) throws IOException
private void backupJournal(Path inputStorage, Path backupStorage) throws IOException
{
for (var source : IndexJournalFileNames.findJournalFiles(inputStorage.asPath())) {
var dest = backupStorage.asPath().resolve(source.toFile().getName());
for (var source : IndexJournalFileNames.findJournalFiles(inputStorage)) {
var dest = backupStorage.resolve(source.toFile().getName());
try (var is = Files.newInputStream(source);
var os = Files.newOutputStream(dest)
@ -75,15 +78,15 @@ public class BackupService {
}
private void restoreJournal(FileStorage destStorage, FileStorage backupStorage) throws IOException {
private void restoreJournal(Path destStorage, Path backupStorage) throws IOException {
// Remove any old journal files first to avoid them getting loaded
for (var garbage : IndexJournalFileNames.findJournalFiles(destStorage.asPath())) {
for (var garbage : IndexJournalFileNames.findJournalFiles(destStorage)) {
Files.delete(garbage);
}
for (var source : IndexJournalFileNames.findJournalFiles(backupStorage.asPath())) {
var dest = destStorage.asPath().resolve(source.toFile().getName());
for (var source : IndexJournalFileNames.findJournalFiles(backupStorage)) {
var dest = destStorage.resolve(source.toFile().getName());
try (var is = Files.newInputStream(source);
var os = Files.newOutputStream(dest)
@ -94,18 +97,18 @@ public class BackupService {
}
private void backupFileCompressed(String fileName, FileStorage inputStorage, FileStorage backupStorage) throws IOException
private void backupFileCompressed(String fileName, Path inputStorage, Path backupStorage) throws IOException
{
try (var is = Files.newInputStream(inputStorage.asPath().resolve(fileName));
var os = new ZstdOutputStream(Files.newOutputStream(backupStorage.asPath().resolve(fileName)))
try (var is = Files.newInputStream(inputStorage.resolve(fileName));
var os = new ZstdOutputStream(Files.newOutputStream(backupStorage.resolve(fileName)))
) {
IOUtils.copyLarge(is, os);
}
}
private void restoreBackupCompressed(String fileName, FileStorage destStorage, FileStorage backupStorage) throws IOException
private void restoreBackupCompressed(String fileName, Path destStorage, Path backupStorage) throws IOException
{
try (var is = new ZstdInputStream(Files.newInputStream(backupStorage.asPath().resolve(fileName)));
var os = Files.newOutputStream(destStorage.asPath().resolve(fileName))
try (var is = new ZstdInputStream(Files.newInputStream(backupStorage.resolve(fileName)));
var os = Files.newOutputStream(destStorage.resolve(fileName))
) {
IOUtils.copyLarge(is, os);
}

View File

@ -0,0 +1,28 @@
plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation project(':code:libraries:message-queue')
implementation project(':code:common:service')
implementation project(':code:common:process')
implementation project(':code:api:process-mqapi')
implementation libs.bundles.slf4j
implementation libs.guice
implementation libs.notnull
implementation libs.jsoup
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@ -2,9 +2,9 @@ package nu.marginalia.control.process;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.service.server.BaseServiceParams;
@Singleton
@ -18,22 +18,30 @@ public class ProcessOutboxes {
public ProcessOutboxes(BaseServiceParams params, MqPersistence persistence) {
converterOutbox = new MqOutbox(persistence,
ProcessInboxNames.CONVERTER_INBOX,
params.configuration.node(),
params.configuration.serviceName(),
params.configuration.node(),
params.configuration.instanceUuid()
);
loaderOutbox = new MqOutbox(persistence,
ProcessInboxNames.LOADER_INBOX,
params.configuration.node(),
params.configuration.serviceName(),
params.configuration.node(),
params.configuration.instanceUuid()
);
crawlerOutbox = new MqOutbox(persistence,
ProcessInboxNames.CRAWLER_INBOX,
params.configuration.node(),
params.configuration.serviceName(),
params.configuration.node(),
params.configuration.instanceUuid()
);
indexConstructorOutbox = new MqOutbox(persistence,
ProcessInboxNames.INDEX_CONSTRUCTOR_INBOX,
params.configuration.node(),
params.configuration.serviceName(),
params.configuration.node(),
params.configuration.instanceUuid()
);
}

View File

@ -1,5 +1,7 @@
package nu.marginalia.control.process;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.server.BaseServiceParams;
@ -8,14 +10,14 @@ import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
@Singleton

View File

@ -2,6 +2,8 @@ package nu.marginalia.index.journal.reader;
import nu.marginalia.index.journal.reader.pointer.IndexJournalPointer;
import nu.marginallia.index.journal.IndexJournalFileNames;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Path;
@ -10,10 +12,16 @@ import java.util.List;
public class IndexJournalReaderPagingImpl implements IndexJournalReader {
private static final Logger logger = LoggerFactory.getLogger(IndexJournalReaderPagingImpl.class);
private final List<IndexJournalReader> readers;
public IndexJournalReaderPagingImpl(Path baseDir) throws IOException {
var inputFiles = IndexJournalFileNames.findJournalFiles(baseDir);
if (inputFiles.isEmpty())
logger.warn("Creating paging index journal file in {}, found no inputs!", baseDir);
else
logger.info("Creating paging index journal reader for {} inputs", inputFiles.size());
this.readers = new ArrayList<>(inputFiles.size());
for (var inputFile : inputFiles) {

View File

@ -49,13 +49,14 @@ public class SimpleBlockingThreadPool {
public void shutDownNow() {
this.shutDown = true;
tasks.clear();
for (Thread worker : workers) {
worker.interrupt();
}
}
private void worker() {
while (!shutDown) {
while (!tasks.isEmpty() || !shutDown) {
try {
Task task = tasks.poll(1, TimeUnit.SECONDS);
if (task == null) {
@ -89,6 +90,14 @@ public class SimpleBlockingThreadPool {
final long start = System.currentTimeMillis();
final long deadline = start + timeUnit.toMillis(i);
// Drain the queue
while (!tasks.isEmpty()) {
long timeRemaining = deadline - System.currentTimeMillis();
if (timeRemaining <= 0)
return false;
}
// Wait for termination
for (var thread : workers) {
if (!thread.isAlive())
continue;

View File

@ -44,14 +44,15 @@ public class ActorStateMachine {
private final boolean isDirectlyInitializable;
public ActorStateMachine(MessageQueueFactory messageQueueFactory,
String queueName,
String fsmName,
int node,
UUID instanceUUID,
ActorPrototype statePrototype)
{
this.queueName = queueName;
this.queueName = fsmName;
smInbox = messageQueueFactory.createSynchronousInbox(queueName, instanceUUID);
smOutbox = messageQueueFactory.createOutbox(queueName, queueName+"//out", instanceUUID);
smInbox = messageQueueFactory.createSynchronousInbox(queueName, node, instanceUUID);
smOutbox = messageQueueFactory.createOutbox(queueName, node, queueName+"//out", node, instanceUUID);
smInbox.subscribe(new StateEventSubscription());

View File

@ -20,25 +20,25 @@ public class MessageQueueFactory {
this.persistence = persistence;
}
public MqSingleShotInbox createSingleShotInbox(String inboxName, UUID instanceUUID)
public MqSingleShotInbox createSingleShotInbox(String inboxName, int node, UUID instanceUUID)
{
return new MqSingleShotInbox(persistence, inboxName, instanceUUID);
return new MqSingleShotInbox(persistence, inboxName + ":" + node, instanceUUID);
}
public MqAsynchronousInbox createAsynchronousInbox(String inboxName, UUID instanceUUID)
public MqAsynchronousInbox createAsynchronousInbox(String inboxName, int node, UUID instanceUUID)
{
return new MqAsynchronousInbox(persistence, inboxName, instanceUUID);
return new MqAsynchronousInbox(persistence, inboxName + ":" + node, instanceUUID);
}
public MqSynchronousInbox createSynchronousInbox(String inboxName, UUID instanceUUID)
public MqSynchronousInbox createSynchronousInbox(String inboxName, int node, UUID instanceUUID)
{
return new MqSynchronousInbox(persistence, inboxName, instanceUUID);
return new MqSynchronousInbox(persistence, inboxName + ":" + node, instanceUUID);
}
public MqOutbox createOutbox(String inboxName, String outboxName, UUID instanceUUID)
public MqOutbox createOutbox(String inboxName, int inboxNode, String outboxName, int outboxNode, UUID instanceUUID)
{
return new MqOutbox(persistence, inboxName, outboxName, instanceUUID);
return new MqOutbox(persistence, inboxName, inboxNode, outboxName, outboxNode, instanceUUID);
}
}

View File

@ -30,12 +30,14 @@ public class MqOutbox {
public MqOutbox(MqPersistence persistence,
String inboxName,
int inboxNode,
String outboxName,
int outboxNode,
UUID instanceUUID) {
this.persistence = persistence;
this.inboxName = inboxName;
this.replyInboxName = outboxName + "//" + inboxName;
this.inboxName = inboxName + ":" + inboxNode;
this.replyInboxName = String.format("%s:%d//%s:%d", outboxName, outboxNode, inboxName, inboxNode);
this.instanceUUID = instanceUUID.toString();
pollThread = new Thread(this::poll, "mq-outbox-poll-thread:" + inboxName);

View File

@ -88,14 +88,14 @@ public class ActorStateMachineErrorTest {
@Test
public void smResumeResumableFromNew() throws Exception {
var stateFactory = new ActorStateFactory(new GsonBuilder().create());
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new ErrorHurdles(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new ErrorHurdles(stateFactory));
sm.init();
sm.join(2, TimeUnit.SECONDS);
sm.stop();
List<String> states = MqTestUtil.getMessages(dataSource, inboxId)
List<String> states = MqTestUtil.getMessages(dataSource, inboxId, 0)
.stream()
.peek(System.out::println)
.map(MqMessageRow::function)

View File

@ -86,7 +86,7 @@ public class ActorStateMachineNullTest {
var graph = new TestPrototypeActor(stateFactory);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), graph);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), graph);
sm.registerStates(graph);
sm.init();
@ -94,7 +94,7 @@ public class ActorStateMachineNullTest {
sm.join(2, TimeUnit.SECONDS);
sm.stop();
MqTestUtil.getMessages(dataSource, inboxId).forEach(System.out::println);
MqTestUtil.getMessages(dataSource, inboxId, 0).forEach(System.out::println);
}

View File

@ -87,14 +87,14 @@ public class ActorStateMachineResumeTest {
public void smResumeResumableFromNew() throws Exception {
var stateFactory = new ActorStateFactory(new GsonBuilder().create());
sendMessage(inboxId, 0, "RESUMABLE");
persistence.sendNewMessage(inboxId, null, -1L, "RESUMABLE", "", null);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
sm.join(2, TimeUnit.SECONDS);
sm.stop();
List<String> states = MqTestUtil.getMessages(dataSource, inboxId)
List<String> states = MqTestUtil.getMessages(dataSource, inboxId, 0)
.stream()
.peek(System.out::println)
.map(MqMessageRow::function)
@ -103,19 +103,23 @@ public class ActorStateMachineResumeTest {
assertEquals(List.of("RESUMABLE", "NON-RESUMABLE", "OK", "END"), states);
}
private long sendMessage(String inboxId, int node, String function) throws Exception {
return persistence.sendNewMessage(inboxId+":"+node, null, -1L, function, "", null);
}
@Test
public void smResumeFromAck() throws Exception {
var stateFactory = new ActorStateFactory(new GsonBuilder().create());
long id = persistence.sendNewMessage(inboxId, null, -1L, "RESUMABLE", "", null);
long id = sendMessage(inboxId, 0, "RESUMABLE");
persistence.updateMessageState(id, MqMessageState.ACK);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
sm.join(4, TimeUnit.SECONDS);
sm.stop();
List<String> states = MqTestUtil.getMessages(dataSource, inboxId)
List<String> states = MqTestUtil.getMessages(dataSource, inboxId, 0)
.stream()
.peek(System.out::println)
.map(MqMessageRow::function)
@ -129,15 +133,14 @@ public class ActorStateMachineResumeTest {
public void smResumeNonResumableFromNew() throws Exception {
var stateFactory = new ActorStateFactory(new GsonBuilder().create());
sendMessage(inboxId, 0, "NON-RESUMABLE");
persistence.sendNewMessage(inboxId, null, -1L, "NON-RESUMABLE", "", null);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
sm.join(2, TimeUnit.SECONDS);
sm.stop();
List<String> states = MqTestUtil.getMessages(dataSource, inboxId)
List<String> states = MqTestUtil.getMessages(dataSource, inboxId, 0)
.stream()
.peek(System.out::println)
.map(MqMessageRow::function)
@ -151,15 +154,15 @@ public class ActorStateMachineResumeTest {
var stateFactory = new ActorStateFactory(new GsonBuilder().create());
long id = persistence.sendNewMessage(inboxId, null, null, "NON-RESUMABLE", "", null);
long id = sendMessage(inboxId, 0, "NON-RESUMABLE");
persistence.updateMessageState(id, MqMessageState.ACK);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
sm.join(2, TimeUnit.SECONDS);
sm.stop();
List<String> states = MqTestUtil.getMessages(dataSource, inboxId)
List<String> states = MqTestUtil.getMessages(dataSource, inboxId, 0)
.stream()
.peek(System.out::println)
.map(MqMessageRow::function)
@ -172,13 +175,12 @@ public class ActorStateMachineResumeTest {
public void smResumeEmptyQueue() throws Exception {
var stateFactory = new ActorStateFactory(new GsonBuilder().create());
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new ResumeTrialsPrototypeActor(stateFactory));
sm.join(2, TimeUnit.SECONDS);
sm.stop();
List<String> states = MqTestUtil.getMessages(dataSource, inboxId)
List<String> states = MqTestUtil.getMessages(dataSource, inboxId, 0)
.stream()
.peek(System.out::println)
.map(MqMessageRow::function)

View File

@ -93,7 +93,7 @@ public class ActorStateMachineTest {
var graph = new TestPrototypeActor(stateFactory);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), graph);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), graph);
sm.registerStates(graph);
sm.init();
@ -101,14 +101,14 @@ public class ActorStateMachineTest {
sm.join(2, TimeUnit.SECONDS);
sm.stop();
MqTestUtil.getMessages(dataSource, inboxId).forEach(System.out::println);
MqTestUtil.getMessages(dataSource, inboxId, 0).forEach(System.out::println);
}
@Test
public void testStartStopStartStop() throws Exception {
var stateFactory = new ActorStateFactory(new GsonBuilder().create());
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new TestPrototypeActor(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new TestPrototypeActor(stateFactory));
sm.init();
@ -117,11 +117,11 @@ public class ActorStateMachineTest {
System.out.println("-------------------- ");
var sm2 = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new TestPrototypeActor(stateFactory));
var sm2 = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new TestPrototypeActor(stateFactory));
sm2.join(2, TimeUnit.SECONDS);
sm2.stop();
MqTestUtil.getMessages(dataSource, inboxId).forEach(System.out::println);
MqTestUtil.getMessages(dataSource, inboxId, 0).forEach(System.out::println);
}
@Test
@ -134,14 +134,14 @@ public class ActorStateMachineTest {
persistence.sendNewMessage(inboxId, null, null, "INITIAL", "", null);
persistence.sendNewMessage(inboxId, null, null, "INITIAL", "", null);
var sm = new ActorStateMachine(messageQueueFactory, inboxId, UUID.randomUUID(), new TestPrototypeActor(stateFactory));
var sm = new ActorStateMachine(messageQueueFactory, inboxId, 0, UUID.randomUUID(), new TestPrototypeActor(stateFactory));
Thread.sleep(50);
sm.join(2, TimeUnit.SECONDS);
sm.stop();
MqTestUtil.getMessages(dataSource, inboxId).forEach(System.out::println);
MqTestUtil.getMessages(dataSource, inboxId, 0).forEach(System.out::println);
}
}

View File

@ -8,7 +8,7 @@ import java.util.ArrayList;
import java.util.List;
public class MqTestUtil {
public static List<MqMessageRow> getMessages(HikariDataSource dataSource, String inbox) {
public static List<MqMessageRow> getMessages(HikariDataSource dataSource, String inbox, int node) {
List<MqMessageRow> messages = new ArrayList<>();
try (var conn = dataSource.getConnection();
@ -24,7 +24,7 @@ public class MqTestUtil {
WHERE RECIPIENT_INBOX = ?
"""))
{
stmt.setString(1, inbox);
stmt.setString(1, inbox+":"+node);
var rsp = stmt.executeQuery();
while (rsp.next()) {
messages.add(new MqMessageRow(

View File

@ -54,7 +54,7 @@ public class MqOutboxTest {
@Test
public void testOpenClose() throws InterruptedException {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
outbox.stop();
}
@ -67,7 +67,7 @@ public class MqOutboxTest {
@Test
public void testOutboxTimeout() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
long id = outbox.sendAsync("test", "Hello World");
try {
outbox.waitResponse(id, 100, TimeUnit.MILLISECONDS);
@ -84,11 +84,11 @@ public class MqOutboxTest {
@Test
public void testSingleShotInbox() throws Exception {
// Send a message to the inbox
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
long id = outbox.sendAsync("test", "Hello World");
// Create a single-shot inbox
var inbox = new MqSingleShotInbox(new MqPersistence(dataSource), inboxId, UUID.randomUUID());
var inbox = new MqSingleShotInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
// Wait for the message to arrive
var message = inbox.waitForMessage(1, TimeUnit.SECONDS);
@ -110,12 +110,12 @@ public class MqOutboxTest {
@Test
public void testSend() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
Executors.newSingleThreadExecutor().submit(() -> outbox.send("test", "Hello World"));
TimeUnit.MILLISECONDS.sleep(100);
var messages = MqTestUtil.getMessages(dataSource, inboxId);
var messages = MqTestUtil.getMessages(dataSource, inboxId, 0);
assertEquals(1, messages.size());
System.out.println(messages.get(0));
@ -125,9 +125,9 @@ public class MqOutboxTest {
@Test
public void testSendAndRespondAsyncInbox() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId, UUID.randomUUID());
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
inbox.subscribe(justRespond("Alright then"));
inbox.start();
@ -136,7 +136,7 @@ public class MqOutboxTest {
assertEquals(MqMessageState.OK, rsp.state());
assertEquals("Alright then", rsp.payload());
var messages = MqTestUtil.getMessages(dataSource, inboxId);
var messages = MqTestUtil.getMessages(dataSource, inboxId, 0);
assertEquals(1, messages.size());
assertEquals(MqMessageState.OK, messages.get(0).state());
@ -146,9 +146,9 @@ public class MqOutboxTest {
@Test
public void testSendAndRespondSyncInbox() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId, UUID.randomUUID());
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
inbox.subscribe(justRespond("Alright then"));
inbox.start();
@ -157,7 +157,7 @@ public class MqOutboxTest {
assertEquals(MqMessageState.OK, rsp.state());
assertEquals("Alright then", rsp.payload());
var messages = MqTestUtil.getMessages(dataSource, inboxId);
var messages = MqTestUtil.getMessages(dataSource, inboxId, 0);
assertEquals(1, messages.size());
assertEquals(MqMessageState.OK, messages.get(0).state());
@ -167,9 +167,9 @@ public class MqOutboxTest {
@Test
public void testSendMultipleAsyncInbox() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId, UUID.randomUUID());
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
inbox.subscribe(echo());
inbox.start();
@ -189,7 +189,7 @@ public class MqOutboxTest {
assertEquals(MqMessageState.OK, rsp4.state());
assertEquals("four", rsp4.payload());
var messages = MqTestUtil.getMessages(dataSource, inboxId);
var messages = MqTestUtil.getMessages(dataSource, inboxId, 0);
assertEquals(4, messages.size());
for (var message : messages) {
assertEquals(MqMessageState.OK, message.state());
@ -201,9 +201,9 @@ public class MqOutboxTest {
@Test
public void testSendMultipleSyncInbox() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId, UUID.randomUUID());
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
inbox.subscribe(echo());
inbox.start();
@ -223,7 +223,7 @@ public class MqOutboxTest {
assertEquals(MqMessageState.OK, rsp4.state());
assertEquals("four", rsp4.payload());
var messages = MqTestUtil.getMessages(dataSource, inboxId);
var messages = MqTestUtil.getMessages(dataSource, inboxId, 0);
assertEquals(4, messages.size());
for (var message : messages) {
assertEquals(MqMessageState.OK, message.state());
@ -235,8 +235,8 @@ public class MqOutboxTest {
@Test
public void testSendAndRespondWithErrorHandlerAsyncInbox() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId, UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
inbox.start();
@ -244,7 +244,7 @@ public class MqOutboxTest {
assertEquals(MqMessageState.ERR, rsp.state());
var messages = MqTestUtil.getMessages(dataSource, inboxId);
var messages = MqTestUtil.getMessages(dataSource, inboxId, 0);
assertEquals(1, messages.size());
assertEquals(MqMessageState.ERR, messages.get(0).state());
@ -254,8 +254,8 @@ public class MqOutboxTest {
@Test
public void testSendAndRespondWithErrorHandlerSyncInbox() throws Exception {
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId,inboxId+"/reply", UUID.randomUUID());
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId, UUID.randomUUID());
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
inbox.start();
@ -263,7 +263,7 @@ public class MqOutboxTest {
assertEquals(MqMessageState.ERR, rsp.state());
var messages = MqTestUtil.getMessages(dataSource, inboxId);
var messages = MqTestUtil.getMessages(dataSource, inboxId, 0);
assertEquals(1, messages.size());
assertEquals(MqMessageState.ERR, messages.get(0).state());

View File

@ -54,13 +54,18 @@ public class MqPersistenceTest {
dataSource.close();
}
public long sendMessage(String recipient, String sender, String function, String payload, Duration ttl) throws Exception {
return persistence.sendNewMessage(recipient+":0", sender != null ? (sender+":0") : null, null, function, payload, ttl);
}
@Test
public void testReaper() throws Exception {
long id = persistence.sendNewMessage(recipientId, senderId, null, "function", "payload", Duration.ofSeconds(2));
sendMessage(recipientId, senderId, "function", "payload", Duration.ofSeconds(2));
persistence.reapDeadMessages();
var messages = MqTestUtil.getMessages(dataSource, recipientId);
var messages = MqTestUtil.getMessages(dataSource, recipientId, 0);
assertEquals(1, messages.size());
assertEquals(MqMessageState.NEW, messages.get(0).state());
System.out.println(messages);
@ -69,7 +74,7 @@ public class MqPersistenceTest {
persistence.reapDeadMessages();
messages = MqTestUtil.getMessages(dataSource, recipientId);
messages = MqTestUtil.getMessages(dataSource, recipientId, 0);
assertEquals(1, messages.size());
assertEquals(MqMessageState.DEAD, messages.get(0).state());
}
@ -77,9 +82,9 @@ public class MqPersistenceTest {
@Test
public void sendWithReplyAddress() throws Exception {
long id = persistence.sendNewMessage(recipientId, senderId, null, "function", "payload", Duration.ofSeconds(30));
long id = sendMessage(recipientId, senderId, "function", "payload", Duration.ofSeconds(30));
var messages = MqTestUtil.getMessages(dataSource, recipientId);
var messages = MqTestUtil.getMessages(dataSource, recipientId, 0);
assertEquals(1, messages.size());
var message = messages.get(0);
@ -95,9 +100,9 @@ public class MqPersistenceTest {
@Test
public void sendNoReplyAddress() throws Exception {
long id = persistence.sendNewMessage(recipientId, null, null, "function", "payload", Duration.ofSeconds(30));
long id = sendMessage(recipientId, null, "function", "payload", Duration.ofSeconds(30));
var messages = MqTestUtil.getMessages(dataSource, recipientId);
var messages = MqTestUtil.getMessages(dataSource, recipientId, 0);
assertEquals(1, messages.size());
var message = messages.get(0);
@ -114,11 +119,13 @@ public class MqPersistenceTest {
@Test
public void updateState() throws Exception {
long id = persistence.sendNewMessage(recipientId, senderId, null, "function", "payload", Duration.ofSeconds(30));
long id = sendMessage(recipientId, senderId, "function", "payload", Duration.ofSeconds(30));
persistence.updateMessageState(id, MqMessageState.OK);
System.out.println(id);
var messages = MqTestUtil.getMessages(dataSource, recipientId);
var messages = MqTestUtil.getMessages(dataSource, recipientId, 0);
assertEquals(1, messages.size());
var message = messages.get(0);
@ -131,10 +138,10 @@ public class MqPersistenceTest {
@Test
public void testReply() throws Exception {
long request = persistence.sendNewMessage(recipientId, senderId, null, "function", "payload", Duration.ofSeconds(30));
long request = sendMessage(recipientId, senderId, "function", "payload", Duration.ofSeconds(30));
long response = persistence.sendResponse(request, MqMessageState.OK, "response");
var sentMessages = MqTestUtil.getMessages(dataSource, recipientId);
var sentMessages = MqTestUtil.getMessages(dataSource, recipientId, 0);
System.out.println(sentMessages);
assertEquals(1, sentMessages.size());
@ -143,7 +150,7 @@ public class MqPersistenceTest {
assertEquals(MqMessageState.OK, requestMessage.state());
var replies = MqTestUtil.getMessages(dataSource, senderId);
var replies = MqTestUtil.getMessages(dataSource, senderId, 0);
System.out.println(replies);
assertEquals(1, replies.size());
@ -159,9 +166,9 @@ public class MqPersistenceTest {
String instanceId = "BATMAN";
long tick = 1234L;
long id = persistence.sendNewMessage(recipientId, null, null, "function", "payload", Duration.ofSeconds(30));
long id = sendMessage(recipientId, null, "function", "payload", Duration.ofSeconds(30));
var messagesPollFirstTime = persistence.pollInbox(recipientId, instanceId , tick, 10);
var messagesPollFirstTime = persistence.pollInbox(recipientId+":0", instanceId , tick, 10);
/** CHECK POLL RESULT */
assertEquals(1, messagesPollFirstTime.size());
@ -171,7 +178,7 @@ public class MqPersistenceTest {
assertEquals("payload", firstPollMessage.payload());
/** CHECK DB TABLE */
var messages = MqTestUtil.getMessages(dataSource, recipientId);
var messages = MqTestUtil.getMessages(dataSource, recipientId, 0);
assertEquals(1, messages.size());
var message = messages.get(0);
@ -184,7 +191,7 @@ public class MqPersistenceTest {
assertEquals(tick, message.ownerTick());
/** VERIFY SECOND POLL IS EMPTY */
var messagePollSecondTime = persistence.pollInbox(recipientId, instanceId , 1, 10);
var messagePollSecondTime = persistence.pollInbox(recipientId+":0", instanceId , 1, 10);
assertEquals(0, messagePollSecondTime.size());
}
}

View File

@ -14,6 +14,7 @@ dependencies {
implementation libs.bundles.slf4j
implementation project(':third-party:parquet-floor')
implementation project(':code:common:config')
implementation project(':code:common:db')
implementation project(':code:common:linkdb')

View File

@ -1,9 +1,11 @@
package nu.marginalia.crawlspec;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.storage.model.FileStorage;
import nu.marginalia.storage.model.FileStorageType;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
public class CrawlSpecFileNames {
public static Path resolve(Path base) {
@ -17,4 +19,16 @@ public class CrawlSpecFileNames {
return resolve(storage.asPath());
}
public static List<Path> resolve(List<FileStorage> storageList) {
List<Path> ret = new ArrayList<>();
for (var storage : storageList) {
if (storage.type() != FileStorageType.CRAWL_SPEC)
throw new IllegalArgumentException("Provided file storage is of unexpected type " +
storage.type() + ", expected CRAWL_SPEC");
ret.add(resolve(storage));
}
return ret;
}
}

View File

@ -84,8 +84,12 @@ public class CrawlSpecGenerator {
static DomainSource fromFile(Path file) {
return () -> {
var lines = Files.readAllLines(file);
lines.replaceAll(s -> s.trim().toLowerCase());
lines.removeIf(line -> line.isBlank() || line.startsWith("#"));
lines.replaceAll(s ->
s.split("#", 2)[0]
.trim()
.toLowerCase()
);
lines.removeIf(String::isBlank);
return lines;
};
}

View File

@ -4,13 +4,14 @@ import com.google.gson.Gson;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.ProcessConfigurationModule;
import nu.marginalia.converting.model.ProcessedDomain;
import nu.marginalia.converting.sideload.SideloadSource;
import nu.marginalia.converting.sideload.SideloadSourceFactory;
import nu.marginalia.converting.writer.ConverterBatchWriter;
import nu.marginalia.converting.writer.ConverterWriter;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.mq.MqMessage;
import nu.marginalia.mq.inbox.MqInboxResponse;
@ -46,6 +47,8 @@ public class ConverterMain {
private final FileStorageService fileStorageService;
private final SideloadSourceFactory sideloadSourceFactory;
private final int node;
public static void main(String... args) throws Exception {
Injector injector = Guice.createInjector(
new ConverterModule(),
@ -73,7 +76,8 @@ public class ConverterMain {
ProcessHeartbeatImpl heartbeat,
MessageQueueFactory messageQueueFactory,
FileStorageService fileStorageService,
SideloadSourceFactory sideloadSourceFactory
SideloadSourceFactory sideloadSourceFactory,
ProcessConfiguration processConfiguration
)
{
this.processor = processor;
@ -82,6 +86,7 @@ public class ConverterMain {
this.messageQueueFactory = messageQueueFactory;
this.fileStorageService = fileStorageService;
this.sideloadSourceFactory = sideloadSourceFactory;
this.node = processConfiguration.node();
heartbeat.start();
}
@ -214,7 +219,7 @@ public class ConverterMain {
private ConvertRequest fetchInstructions() throws Exception {
var inbox = messageQueueFactory.createSingleShotInbox(CONVERTER_INBOX, UUID.randomUUID());
var inbox = messageQueueFactory.createSingleShotInbox(CONVERTER_INBOX, node, UUID.randomUUID());
var msgOpt = getMessage(inbox, nu.marginalia.mqapi.converting.ConvertRequest.class.getSimpleName());
var msg = msgOpt.orElseThrow(() -> new RuntimeException("No message received"));

View File

@ -4,6 +4,7 @@ import com.google.gson.Gson;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.ProcessConfigurationModule;
import nu.marginalia.UserAgent;
import nu.marginalia.WmsaHome;
@ -11,7 +12,7 @@ import nu.marginalia.crawl.retreival.CrawlDataReference;
import nu.marginalia.crawl.retreival.fetcher.HttpFetcherImpl;
import nu.marginalia.crawling.io.CrawledDomainReader;
import nu.marginalia.crawlspec.CrawlSpecFileNames;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.io.crawlspec.CrawlSpecRecordParquetFileReader;
import nu.marginalia.model.crawlspec.CrawlSpecRecord;
import nu.marginalia.mq.MessageQueueFactory;
@ -43,8 +44,6 @@ import static nu.marginalia.mqapi.ProcessInboxNames.CRAWLER_INBOX;
public class CrawlerMain {
private final Logger logger = LoggerFactory.getLogger(getClass());
private Path crawlDataDir;
private final ProcessHeartbeatImpl heartbeat;
private final ConnectionPool connectionPool = new ConnectionPool(5, 10, TimeUnit.SECONDS);
@ -55,6 +54,7 @@ public class CrawlerMain {
private final MessageQueueFactory messageQueueFactory;
private final FileStorageService fileStorageService;
private final Gson gson;
private final int node;
private final SimpleBlockingThreadPool pool;
private final Map<String, String> processingIds = new ConcurrentHashMap<>();
@ -71,12 +71,14 @@ public class CrawlerMain {
ProcessHeartbeatImpl heartbeat,
MessageQueueFactory messageQueueFactory,
FileStorageService fileStorageService,
ProcessConfiguration processConfiguration,
Gson gson) {
this.heartbeat = heartbeat;
this.userAgent = userAgent;
this.messageQueueFactory = messageQueueFactory;
this.fileStorageService = fileStorageService;
this.gson = gson;
this.node = processConfiguration.node();
// maybe need to set -Xss for JVM to deal with this?
pool = new SimpleBlockingThreadPool("CrawlerPool", CrawlLimiter.maxPoolSize, 1);
@ -121,25 +123,31 @@ public class CrawlerMain {
System.exit(0);
}
public void run(Path crawlSpec, Path outputDir) throws InterruptedException, IOException {
public void run(List<Path> crawlSpec, Path outputDir) throws InterruptedException, IOException {
heartbeat.start();
try (WorkLog workLog = new WorkLog(outputDir.resolve("crawler.log"))) {
// First a validation run to ensure the file is all good to parse
logger.info("Validating JSON");
totalTasks = CrawlSpecRecordParquetFileReader.count(crawlSpec);
int taskCount = 0;
for (var specs : crawlSpec) {
taskCount += CrawlSpecRecordParquetFileReader.count(specs);
}
totalTasks = taskCount;
logger.info("Let's go");
logger.info("Queued {} crawl tasks, let's go", taskCount);
try (var specStream = CrawlSpecRecordParquetFileReader.stream(crawlSpec)) {
for (var specs : crawlSpec) {
try (var specStream = CrawlSpecRecordParquetFileReader.stream(specs)) {
specStream
.takeWhile((e) -> abortMonitor.isAlive())
.filter(e -> workLog.isJobFinished(e.domain))
.filter(e -> !workLog.isJobFinished(e.domain))
.filter(e -> processingIds.put(e.domain, "") == null)
.map(e -> new CrawlTask(e, workLog))
.map(e -> new CrawlTask(e, outputDir, workLog))
.forEach(pool::submitQuietly);
}
}
logger.info("Shutting down the pool, waiting for tasks to complete...");
@ -160,10 +168,14 @@ public class CrawlerMain {
private final String domain;
private final String id;
private final Path outputDir;
private final WorkLog workLog;
CrawlTask(CrawlSpecRecord specification, WorkLog workLog) {
CrawlTask(CrawlSpecRecord specification,
Path outputDir,
WorkLog workLog) {
this.specification = specification;
this.outputDir = outputDir;
this.workLog = workLog;
this.domain = specification.domain;
@ -177,7 +189,7 @@ public class CrawlerMain {
HttpFetcher fetcher = new HttpFetcherImpl(userAgent.uaString(), dispatcher, connectionPool);
try (CrawledDomainWriter writer = new CrawledDomainWriter(crawlDataDir, domain, id);
try (CrawledDomainWriter writer = new CrawledDomainWriter(outputDir, domain, id);
CrawlDataReference reference = getReference())
{
Thread.currentThread().setName("crawling:" + specification.domain);
@ -202,7 +214,7 @@ public class CrawlerMain {
private CrawlDataReference getReference() {
try {
var dataStream = reader.createDataStream(crawlDataDir, domain, id);
var dataStream = reader.createDataStream(outputDir, domain, id);
return new CrawlDataReference(dataStream);
} catch (IOException e) {
logger.debug("Failed to read previous crawl data for {}", specification.domain);
@ -215,12 +227,12 @@ public class CrawlerMain {
private static class CrawlRequest {
private final Path crawlSpec;
private final List<Path> crawlSpec;
private final Path outputDir;
private final MqMessage message;
private final MqSingleShotInbox inbox;
CrawlRequest(Path crawlSpec, Path outputDir, MqMessage message, MqSingleShotInbox inbox) {
CrawlRequest(List<Path> crawlSpec, Path outputDir, MqMessage message, MqSingleShotInbox inbox) {
this.message = message;
this.inbox = inbox;
this.crawlSpec = crawlSpec;
@ -239,7 +251,7 @@ public class CrawlerMain {
private CrawlRequest fetchInstructions() throws Exception {
var inbox = messageQueueFactory.createSingleShotInbox(CRAWLER_INBOX, UUID.randomUUID());
var inbox = messageQueueFactory.createSingleShotInbox(CRAWLER_INBOX, node, UUID.randomUUID());
logger.info("Waiting for instructions");
var msgOpt = getMessage(inbox, nu.marginalia.mqapi.crawling.CrawlRequest.class.getSimpleName());

View File

@ -23,6 +23,7 @@ dependencies {
implementation project(':code:common:process')
implementation project(':code:common:service')
implementation project(':code:common:db')
implementation project(':code:common:config')
implementation project(':code:common:model')
implementation project(':code:libraries:message-queue')
@ -31,6 +32,8 @@ dependencies {
implementation project(':code:features-index:index-journal')
implementation project(':code:features-index:domain-ranking')
implementation project(':code:services-core:index-service')
implementation libs.bundles.slf4j
implementation libs.guice
implementation libs.bundles.mariadb

View File

@ -3,10 +3,10 @@ package nu.marginalia.index;
import com.google.gson.Gson;
import com.google.inject.Guice;
import com.google.inject.Inject;
import nu.marginalia.IndexLocations;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.ProcessConfigurationModule;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.index.construction.ReverseIndexConstructor;
import nu.marginalia.index.forward.ForwardIndexConverter;
import nu.marginalia.index.forward.ForwardIndexFileNames;
@ -43,6 +43,8 @@ public class IndexConstructorMain {
private final ProcessHeartbeatImpl heartbeat;
private final MessageQueueFactory messageQueueFactory;
private final DomainRankings domainRankings;
private final int node;
private static final Logger logger = LoggerFactory.getLogger(IndexConstructorMain.class);
private final Gson gson = GsonFactory.get();
public static void main(String[] args) throws Exception {
@ -74,12 +76,14 @@ public class IndexConstructorMain {
public IndexConstructorMain(FileStorageService fileStorageService,
ProcessHeartbeatImpl heartbeat,
MessageQueueFactory messageQueueFactory,
ProcessConfiguration processConfiguration,
DomainRankings domainRankings) {
this.fileStorageService = fileStorageService;
this.heartbeat = heartbeat;
this.messageQueueFactory = messageQueueFactory;
this.domainRankings = domainRankings;
this.node = processConfiguration.node();
}
private void run(CreateIndexInstructions instructions) throws SQLException, IOException {
@ -96,33 +100,27 @@ public class IndexConstructorMain {
private void createFullReverseIndex() throws SQLException, IOException {
FileStorage indexLive = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE);
FileStorage indexStaging = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
Path tmpDir = workDir.resolve("tmp");
Path outputFileDocs = ReverseIndexFullFileNames.resolve(indexLive.asPath(), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path outputFileWords = ReverseIndexFullFileNames.resolve(indexLive.asPath(), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path tmpDir = indexStaging.asPath().resolve("tmp");
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
new ReverseIndexConstructor(outputFileDocs, outputFileWords,
IndexJournalReader::singleFile,
this::addRankToIdEncoding, tmpDir)
.createReverseIndex(heartbeat, indexStaging.asPath());
.createReverseIndex(heartbeat, workDir);
}
private void createPrioReverseIndex() throws SQLException, IOException {
FileStorage indexLive = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE);
FileStorage indexStaging = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
Path outputFileDocs = ReverseIndexPrioFileNames.resolve(indexLive.asPath(), ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.NEXT);
Path outputFileWords = ReverseIndexPrioFileNames.resolve(indexLive.asPath(), ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.NEXT);
Path tmpDir = indexStaging.asPath().resolve("tmp");
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
Path tmpDir = workDir.resolve("tmp");
// The priority index only includes words that have bits indicating they are
// important to the document. This filter will act on the encoded {@see WordMetadata}
@ -131,7 +129,7 @@ public class IndexConstructorMain {
new ReverseIndexConstructor(outputFileDocs, outputFileWords,
(path) -> IndexJournalReader.singleFile(path).filtering(wordMetaFilter),
this::addRankToIdEncoding, tmpDir)
.createReverseIndex(heartbeat, indexStaging.asPath());
.createReverseIndex(heartbeat, workDir);
}
private static LongPredicate getPriorityIndexWordMetaFilter() {
@ -149,16 +147,14 @@ public class IndexConstructorMain {
return r -> WordMetadata.hasAnyFlags(r, highPriorityFlags);
}
private void createForwardIndex() throws SQLException, IOException {
private void createForwardIndex() throws IOException {
FileStorage indexLive = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE);
FileStorage indexStaging = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
Path outputFileDocsId = ForwardIndexFileNames.resolve(indexLive.asPath(), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
Path outputFileDocsData = ForwardIndexFileNames.resolve(indexLive.asPath(), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
Path outputFileDocsId = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
Path outputFileDocsData = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
ForwardIndexConverter converter = new ForwardIndexConverter(heartbeat,
IndexJournalReader.paging(indexStaging.asPath()),
IndexJournalReader.paging(workDir),
outputFileDocsId,
outputFileDocsData,
domainRankings
@ -198,7 +194,7 @@ public class IndexConstructorMain {
private CreateIndexInstructions fetchInstructions() throws Exception {
var inbox = messageQueueFactory.createSingleShotInbox(INDEX_CONSTRUCTOR_INBOX, UUID.randomUUID());
var inbox = messageQueueFactory.createSingleShotInbox(INDEX_CONSTRUCTOR_INBOX, node, UUID.randomUUID());
logger.info("Waiting for instructions");
var msgOpt = getMessage(inbox, CreateIndexRequest.class.getSimpleName());

View File

@ -3,8 +3,8 @@ package nu.marginalia.loading;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.IndexLocations;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.hash.MurmurHash3_128;
import nu.marginalia.index.journal.model.IndexJournalEntryData;
import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
@ -34,14 +34,14 @@ public class LoaderIndexJournalWriter {
@Inject
public LoaderIndexJournalWriter(FileStorageService fileStorageService) throws IOException, SQLException {
var indexArea = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING);
var indexArea = IndexLocations.getIndexConstructionArea(fileStorageService);
var existingIndexFiles = IndexJournalFileNames.findJournalFiles(indexArea.asPath());
var existingIndexFiles = IndexJournalFileNames.findJournalFiles(indexArea);
for (var existingFile : existingIndexFiles) {
Files.delete(existingFile);
}
indexWriter = new IndexJournalWriterPagingImpl(indexArea.asPath());
indexWriter = new IndexJournalWriterPagingImpl(indexArea);
}
public void putWords(long combinedId,

View File

@ -6,8 +6,9 @@ import com.google.inject.Inject;
import com.google.inject.Injector;
import lombok.Getter;
import lombok.SneakyThrows;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.ProcessConfigurationModule;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.linkdb.LinkdbWriter;
import nu.marginalia.loading.documents.DocumentLoaderService;
import nu.marginalia.loading.documents.KeywordLoaderService;
@ -16,11 +17,10 @@ import nu.marginalia.loading.domains.DomainLoaderService;
import nu.marginalia.loading.links.DomainLinksLoaderService;
import nu.marginalia.mq.MessageQueueFactory;
import nu.marginalia.mq.MqMessage;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.inbox.MqInboxResponse;
import nu.marginalia.mq.inbox.MqSingleShotInbox;
import nu.marginalia.process.control.ProcessHeartbeatImpl;
import nu.marginalia.worklog.BatchingWorkLogInspector;
import plan.CrawlPlan;
import nu.marginalia.service.module.DatabaseModule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -49,6 +49,7 @@ public class LoaderMain {
private final DomainLinksLoaderService linksService;
private final KeywordLoaderService keywordLoaderService;
private final DocumentLoaderService documentLoaderService;
private final int node;
private final Gson gson;
public static void main(String... args) throws Exception {
@ -81,9 +82,10 @@ public class LoaderMain {
DomainLinksLoaderService linksService,
KeywordLoaderService keywordLoaderService,
DocumentLoaderService documentLoaderService,
ProcessConfiguration processConfiguration,
Gson gson
) {
this.node = processConfiguration.node();
this.heartbeat = heartbeat;
this.messageQueueFactory = messageQueueFactory;
this.fileStorageService = fileStorageService;
@ -157,7 +159,7 @@ public class LoaderMain {
private LoadRequest fetchInstructions() throws Exception {
var inbox = messageQueueFactory.createSingleShotInbox(LOADER_INBOX, UUID.randomUUID());
var inbox = messageQueueFactory.createSingleShotInbox(LOADER_INBOX, node, UUID.randomUUID());
var msgOpt = getMessage(inbox, nu.marginalia.mqapi.loading.LoadRequest.class.getSimpleName());
if (msgOpt.isEmpty())
@ -168,6 +170,7 @@ public class LoaderMain {
throw new RuntimeException("Unexpected message in inbox: " + msg);
}
try {
var request = gson.fromJson(msg.payload(), nu.marginalia.mqapi.loading.LoadRequest.class);
List<Path> inputSources = new ArrayList<>();
@ -177,6 +180,11 @@ public class LoaderMain {
return new LoadRequest(new LoaderInputData(inputSources), msg, inbox);
}
catch (Exception ex) {
inbox.sendResponse(msg, new MqInboxResponse("FAILED", MqMessageState.ERR));
throw ex;
}
}
private Optional<MqMessage> getMessage(MqSingleShotInbox inbox, String expectedFunction) throws SQLException, InterruptedException {
var opt = inbox.waitForMessage(30, TimeUnit.SECONDS);

View File

@ -7,10 +7,9 @@ import com.google.inject.Provides;
import com.google.inject.Singleton;
import com.google.inject.name.Names;
import nu.marginalia.LanguageModels;
import nu.marginalia.ProcessConfiguration;
import nu.marginalia.WmsaHome;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.IndexLocations;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.linkdb.LinkdbStatusWriter;
import nu.marginalia.linkdb.LinkdbWriter;
import nu.marginalia.model.gson.GsonFactory;
@ -21,7 +20,6 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.UUID;
public class LoaderModule extends AbstractModule {
@ -38,8 +36,8 @@ public class LoaderModule extends AbstractModule {
@Inject @Provides @Singleton
private LinkdbWriter createLinkdbWriter(FileStorageService service) throws SQLException, IOException {
var storage = service.getStorageByType(FileStorageType.LINKDB_STAGING);
Path dbPath = storage.asPath().resolve("links.db");
Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve("links.db");
if (Files.exists(dbPath)) {
Files.delete(dbPath);
@ -49,8 +47,7 @@ public class LoaderModule extends AbstractModule {
@Inject @Provides @Singleton
private LinkdbStatusWriter createLinkdbStatusWriter(FileStorageService service) throws SQLException, IOException {
var storage = service.getStorageByType(FileStorageType.LINKDB_STAGING);
Path dbPath = storage.asPath().resolve("urlstatus.db");
Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve("urlstatus.db");
if (Files.exists(dbPath)) {
Files.delete(dbPath);

View File

@ -1,8 +1,8 @@
package nu.marginalia.loading.loader;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBase;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile;
import nu.marginalia.keyword.model.DocumentKeywords;
import nu.marginalia.loading.LoaderIndexJournalWriter;
@ -31,18 +31,19 @@ class LoaderIndexJournalWriterTest {
public void setUp() throws IOException, SQLException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
FileStorageService storageService = Mockito.mock(FileStorageService.class);
Mockito.when(storageService.getStorageByType(FileStorageType.INDEX_STAGING)).
thenReturn(new FileStorage(null, null, null, null, tempDir.toString(),
"test"));
Mockito.when(storageService.getStorageBase(FileStorageBaseType.CURRENT)).thenReturn(new FileStorageBase(null, null, null, tempDir.toString()));
writer = new LoaderIndexJournalWriter(storageService);
}
@AfterEach
public void tearDown() throws Exception {
writer.close();
List<Path> junk = Files.list(tempDir).toList();
List<Path> junk = Files.list(tempDir.resolve("iw")).toList();
for (var item : junk)
Files.delete(item);
Files.delete(tempDir.resolve("iw"));
Files.delete(tempDir);
}
@ -60,7 +61,7 @@ class LoaderIndexJournalWriterTest {
writer.close();
List<Path> journalFiles =IndexJournalFileNames.findJournalFiles(tempDir);
List<Path> journalFiles = IndexJournalFileNames.findJournalFiles(tempDir.resolve("iw"));
assertEquals(1, journalFiles.size());
var reader = new IndexJournalReaderSingleFile(journalFiles.get(0));

View File

@ -5,11 +5,9 @@ import com.google.inject.Inject;
import lombok.SneakyThrows;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.client.Context;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.search.svc.SearchFrontPageService;
import nu.marginalia.search.svc.*;
import nu.marginalia.service.control.ServiceEventLog;
import nu.marginalia.service.server.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -34,10 +34,12 @@ dependencies {
implementation project(':code:common:service-client')
implementation project(':code:api:index-api')
implementation project(':code:api:query-api')
implementation project(':code:api:executor-api')
implementation project(':code:api:process-mqapi')
implementation project(':code:features-search:screenshots')
implementation project(':code:features-index:index-journal')
implementation project(':code:features-index:index-query')
implementation project(':code:process-models:crawl-spec')
implementation libs.bundles.slf4j

View File

@ -8,8 +8,5 @@ import java.nio.file.Path;
public class ControlProcessModule extends AbstractModule {
@Override
protected void configure() {
String dist = System.getProperty("distPath", System.getProperty("WMSA_HOME", "/var/lib/wmsa") + "/dist/current");
bind(Path.class).annotatedWith(Names.named("distPath")).toInstance(Path.of(dist));
}
protected void configure() {}
}

View File

@ -2,19 +2,20 @@ package nu.marginalia.control;
import com.google.gson.Gson;
import com.google.inject.Inject;
import gnu.trove.list.array.TIntArrayList;
import nu.marginalia.client.ServiceMonitors;
import nu.marginalia.control.actor.Actor;
import nu.marginalia.control.model.*;
import nu.marginalia.control.svc.*;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.control.app.svc.*;
import nu.marginalia.control.node.svc.ControlNodeActionsService;
import nu.marginalia.control.node.svc.ControlActorService;
import nu.marginalia.control.node.svc.ControlFileStorageService;
import nu.marginalia.control.node.svc.ControlNodeService;
import nu.marginalia.control.sys.svc.ControlSysActionsService;
import nu.marginalia.control.sys.svc.EventLogService;
import nu.marginalia.control.sys.svc.HeartbeatService;
import nu.marginalia.control.sys.svc.MessageQueueService;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.service.server.*;
import org.eclipse.jetty.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -22,9 +23,7 @@ import spark.Response;
import spark.Spark;
import java.io.IOException;
import java.sql.SQLException;
import java.util.*;
import java.util.stream.Collectors;
public class ControlService extends Service {
@ -34,15 +33,10 @@ public class ControlService extends Service {
private final ServiceMonitors monitors;
private final HeartbeatService heartbeatService;
private final EventLogService eventLogService;
private final ApiKeyService apiKeyService;
private final DomainComplaintService domainComplaintService;
private final ControlBlacklistService blacklistService;
private final SearchToBanService searchToBanService;
private final RandomExplorationService randomExplorationService;
private final ControlNodeService controlNodeService;
private final ControlActorService controlActorService;
private final StaticResources staticResources;
private final MessageQueueService messageQueueService;
private final ControlFileStorageService controlFileStorageService;
@Inject
@ -58,54 +52,48 @@ public class ControlService extends Service {
ApiKeyService apiKeyService,
DomainComplaintService domainComplaintService,
ControlBlacklistService blacklistService,
ControlActionsService controlActionsService,
ControlNodeActionsService nodeActionsService,
ControlSysActionsService sysActionsService,
ScreenshotService screenshotService,
SearchToBanService searchToBanService,
RandomExplorationService randomExplorationService
RandomExplorationService randomExplorationService,
ControlNodeService controlNodeService
) throws IOException {
super(params);
this.monitors = monitors;
this.heartbeatService = heartbeatService;
this.eventLogService = eventLogService;
this.apiKeyService = apiKeyService;
this.domainComplaintService = domainComplaintService;
this.blacklistService = blacklistService;
this.searchToBanService = searchToBanService;
this.randomExplorationService = randomExplorationService;
this.controlNodeService = controlNodeService;
// sys
messageQueueService.register();
sysActionsService.register();
// node
controlFileStorageService.register();
controlActorService.register();
nodeActionsService.register();
controlNodeService.register();
// app
blacklistService.register();
searchToBanService.register();
apiKeyService.register();
domainComplaintService.register();
randomExplorationService.register();
var indexRenderer = rendererFactory.renderer("control/index");
var eventsRenderer = rendererFactory.renderer("control/events");
var servicesRenderer = rendererFactory.renderer("control/services");
var serviceByIdRenderer = rendererFactory.renderer("control/service-by-id");
var actorsRenderer = rendererFactory.renderer("control/actors");
var actorDetailsRenderer = rendererFactory.renderer("control/actor-details");
var storageRenderer = rendererFactory.renderer("control/storage-overview");
var storageSpecsRenderer = rendererFactory.renderer("control/storage-specs");
var storageCrawlsRenderer = rendererFactory.renderer("control/storage-crawls");
var storageBackupsRenderer = rendererFactory.renderer("control/storage-backups");
var storageProcessedRenderer = rendererFactory.renderer("control/storage-processed");
var reviewRandomDomainsRenderer = rendererFactory.renderer("control/review-random-domains");
var apiKeysRenderer = rendererFactory.renderer("control/api-keys");
var domainComplaintsRenderer = rendererFactory.renderer("control/domain-complaints");
var messageQueueRenderer = rendererFactory.renderer("control/message-queue");
var storageDetailsRenderer = rendererFactory.renderer("control/storage-details");
var updateMessageStateRenderer = rendererFactory.renderer("control/update-message-state");
var newMessageRenderer = rendererFactory.renderer("control/new-message");
var viewMessageRenderer = rendererFactory.renderer("control/view-message");
var eventsRenderer = rendererFactory.renderer("control/sys/events");
var servicesRenderer = rendererFactory.renderer("control/sys/services");
var serviceByIdRenderer = rendererFactory.renderer("control/sys/service-by-id");
var actionsViewRenderer = rendererFactory.renderer("control/actions");
var blacklistRenderer = rendererFactory.renderer("control/blacklist");
var searchToBanRenderer = rendererFactory.renderer("control/search-to-ban");
this.controlActorService = controlActorService;
this.staticResources = staticResources;
this.messageQueueService = messageQueueService;
this.controlFileStorageService = controlFileStorageService;
Spark.get("/public/heartbeats", (req, res) -> {
res.type("application/json");
@ -114,224 +102,30 @@ public class ControlService extends Service {
Spark.get("/public/", this::overviewModel, indexRenderer::render);
Spark.get("/public/actions", (rq,rsp) -> new Object() , actionsViewRenderer::render);
Spark.get("/public/actions", (req,rs) -> new Object() , actionsViewRenderer::render);
Spark.get("/public/events", eventLogService::eventsListModel , eventsRenderer::render);
Spark.get("/public/services", this::servicesModel, servicesRenderer::render);
Spark.get("/public/services/:id", this::serviceModel, serviceByIdRenderer::render);
Spark.get("/public/actors", this::processesModel, actorsRenderer::render);
Spark.get("/public/actors/:fsm", this::actorDetailsModel, actorDetailsRenderer::render);
final HtmlRedirect redirectToServices = new HtmlRedirect("/services");
final HtmlRedirect redirectToActors = new HtmlRedirect("/actors");
final HtmlRedirect redirectToApiKeys = new HtmlRedirect("/api-keys");
final HtmlRedirect redirectToStorage = new HtmlRedirect("/storage");
final HtmlRedirect redirectToBlacklist = new HtmlRedirect("/blacklist");
final HtmlRedirect redirectToComplaints = new HtmlRedirect("/complaints");
final HtmlRedirect redirectToMessageQueue = new HtmlRedirect("/message-queue");
// Needed to be able to show website screenshots
Spark.get("/public/screenshot/:id", screenshotService::serveScreenshotRequest);
// FSMs
Spark.post("/public/fsms/:fsm/start", controlActorService::startFsm, redirectToActors);
Spark.post("/public/fsms/:fsm/stop", controlActorService::stopFsm, redirectToActors);
// Message Queue
Spark.get("/public/message-queue", messageQueueService::listMessageQueueModel, messageQueueRenderer::render);
Spark.post("/public/message-queue/", messageQueueService::createMessage, redirectToMessageQueue);
Spark.get("/public/message-queue/new", messageQueueService::newMessageModel, newMessageRenderer::render);
Spark.get("/public/message-queue/:id", messageQueueService::viewMessageModel, viewMessageRenderer::render);
Spark.get("/public/message-queue/:id/reply", messageQueueService::replyMessageModel, newMessageRenderer::render);
Spark.get("/public/message-queue/:id/edit", messageQueueService::viewMessageForEditStateModel, updateMessageStateRenderer::render);
Spark.post("/public/message-queue/:id/edit", messageQueueService::editMessageState, redirectToMessageQueue);
// Storage
Spark.get("/public/storage", this::storageModel, storageRenderer::render);
Spark.get("/public/storage/specs", this::storageModelSpecs, storageSpecsRenderer::render);
Spark.get("/public/storage/crawls", this::storageModelCrawls, storageCrawlsRenderer::render);
Spark.get("/public/storage/backups", this::storageModelBackups, storageBackupsRenderer::render);
Spark.get("/public/storage/processed", this::storageModelProcessed, storageProcessedRenderer::render);
Spark.get("/public/storage/:id", this::storageDetailsModel, storageDetailsRenderer::render);
Spark.get("/public/storage/:id/file", controlFileStorageService::downloadFileFromStorage);
// Storage Actions
Spark.post("/public/storage/:fid/crawl", controlActorService::triggerCrawling, redirectToActors);
Spark.post("/public/storage/:fid/recrawl", controlActorService::triggerRecrawling, redirectToActors);
Spark.post("/public/storage/:fid/process", controlActorService::triggerProcessing, redirectToActors);
Spark.post("/public/storage/:fid/process-and-load", controlActorService::triggerProcessingWithLoad, redirectToActors);
Spark.post("/public/storage/:fid/load", controlActorService::loadProcessedData, redirectToActors);
Spark.post("/public/storage/:fid/restore-backup", controlActorService::restoreBackup, redirectToActors);
Spark.post("/public/storage/specs", controlActorService::createCrawlSpecification, redirectToStorage);
Spark.post("/public/storage/:fid/delete", controlFileStorageService::flagFileForDeletionRequest, redirectToStorage);
// Blacklist
Spark.get("/public/blacklist", this::blacklistModel, blacklistRenderer::render);
Spark.post("/public/blacklist", this::updateBlacklist, redirectToBlacklist);
Spark.get("/public/search-to-ban", searchToBanService::handle, searchToBanRenderer::render);
Spark.post("/public/search-to-ban", searchToBanService::handle, searchToBanRenderer::render);
// API Keys
Spark.get("/public/api-keys", this::apiKeysModel, apiKeysRenderer::render);
Spark.post("/public/api-keys", this::createApiKey, redirectToApiKeys);
Spark.delete("/public/api-keys/:key", this::deleteApiKey, redirectToApiKeys);
// HTML forms don't support the DELETE verb :-(
Spark.post("/public/api-keys/:key/delete", this::deleteApiKey, redirectToApiKeys);
Spark.get("/public/complaints", this::complaintsModel, domainComplaintsRenderer::render);
Spark.post("/public/complaints/:domain", this::reviewComplaint, redirectToComplaints);
// Actions
Spark.post("/public/actions/calculate-adjacencies", controlActionsService::calculateAdjacencies, redirectToActors);
Spark.post("/public/actions/reload-blogs-list", controlActionsService::reloadBlogsList, redirectToActors);
Spark.post("/public/actions/repartition-index", controlActionsService::triggerRepartition, redirectToActors);
Spark.post("/public/actions/trigger-data-exports", controlActionsService::triggerDataExports, redirectToActors);
Spark.post("/public/actions/flush-api-caches", controlActionsService::flushApiCaches, redirectToActors);
Spark.post("/public/actions/truncate-links-database", controlActionsService::truncateLinkDatabase, redirectToActors);
Spark.post("/public/actions/sideload-encyclopedia", controlActionsService::sideloadEncyclopedia, redirectToActors);
Spark.post("/public/actions/sideload-dirtree", controlActionsService::sideloadDirtree, redirectToActors);
Spark.post("/public/actions/sideload-stackexchange", controlActionsService::sideloadStackexchange, redirectToActors);
// Review Random Domains
Spark.get("/public/review-random-domains", this::reviewRandomDomainsModel, reviewRandomDomainsRenderer::render);
Spark.post("/public/review-random-domains", this::reviewRandomDomainsAction);
Spark.get("/public/:resource", this::serveStatic);
monitors.subscribe(this::logMonitorStateChange);
}
private Object reviewRandomDomainsModel(Request request, Response response) throws SQLException {
String afterVal = Objects.requireNonNullElse(request.queryParams("after"), "0");
int after = Integer.parseInt(afterVal);
var domains = randomExplorationService.getDomains(after, 25);
int nextAfter = domains.stream().mapToInt(RandomExplorationService.RandomDomainResult::id).max().orElse(Integer.MAX_VALUE);
return Map.of("domains", domains,
"after", nextAfter);
}
private Object reviewRandomDomainsAction(Request request, Response response) throws SQLException {
TIntArrayList idList = new TIntArrayList();
request.queryParams().forEach(key -> {
if (key.startsWith("domain-")) {
String value = request.queryParams(key);
if ("on".equalsIgnoreCase(value)) {
int id = Integer.parseInt(key.substring(7));
idList.add(id);
}
}
});
randomExplorationService.removeRandomDomains(idList.toArray());
String after = request.queryParams("after");
return """
<?doctype html>
<html><head><meta http-equiv="refresh" content="0;URL='/review-random-domains?after=%s'" /></head></html>
""".formatted(after);
}
private Object blacklistModel(Request request, Response response) {
return Map.of("blacklist", blacklistService.lastNAdditions(100));
}
private Object updateBlacklist(Request request, Response response) {
var domain = new EdgeDomain(request.queryParams("domain"));
if ("add".equals(request.queryParams("act"))) {
var comment = Objects.requireNonNullElse(request.queryParams("comment"), "");
blacklistService.addToBlacklist(domain, comment);
} else if ("del".equals(request.queryParams("act"))) {
blacklistService.removeFromBlacklist(domain);
}
return "";
}
private Object overviewModel(Request request, Response response) {
return Map.of("processes", heartbeatService.getProcessHeartbeats(),
"nodes", controlNodeService.getNodeStatusList(),
"jobs", heartbeatService.getTaskHeartbeats(),
"actors", controlActorService.getActorStates(),
"services", heartbeatService.getServiceHeartbeats(),
"events", eventLogService.getLastEntries(Long.MAX_VALUE, 20)
);
}
private Object complaintsModel(Request request, Response response) {
Map<Boolean, List<DomainComplaintModel>> complaintsByReviewed =
domainComplaintService.getComplaints().stream().collect(Collectors.partitioningBy(DomainComplaintModel::reviewed));
var reviewed = complaintsByReviewed.get(true);
var unreviewed = complaintsByReviewed.get(false);
reviewed.sort(Comparator.comparing(DomainComplaintModel::reviewDate).reversed());
unreviewed.sort(Comparator.comparing(DomainComplaintModel::fileDate).reversed());
return Map.of("complaintsNew", unreviewed, "complaintsReviewed", reviewed);
}
private Object reviewComplaint(Request request, Response response) {
var domain = new EdgeDomain(request.params("domain"));
String action = request.queryParams("action");
logger.info("Reviewing complaint for domain {} with action {}", domain, action);
switch (action) {
case "noop" -> domainComplaintService.reviewNoAction(domain);
case "appeal" -> domainComplaintService.approveAppealBlacklisting(domain);
case "blacklist" -> domainComplaintService.blacklistDomain(domain);
default -> throw new UnsupportedOperationException();
}
return "";
}
private Object createApiKey(Request request, Response response) {
String license = request.queryParams("license");
String name = request.queryParams("name");
String email = request.queryParams("email");
int rate = Integer.parseInt(request.queryParams("rate"));
if (StringUtil.isBlank(license) ||
StringUtil.isBlank(name) ||
StringUtil.isBlank(email) ||
rate <= 0)
{
response.status(400);
return "";
}
apiKeyService.addApiKey(license, name, email, rate);
return "";
}
private Object deleteApiKey(Request request, Response response) {
String licenseKey = request.params("key");
apiKeyService.deleteApiKey(licenseKey);
return "";
}
private Object apiKeysModel(Request request, Response response) {
return Map.of("apikeys", apiKeyService.getApiKeys());
}
@Override
public void logRequest(Request request) {
if ("GET".equals(request.requestMethod()))
@ -358,25 +152,6 @@ public class ControlService extends Service {
"events", eventLogService.getLastEntriesForService(serviceName, Long.MAX_VALUE, 20));
}
private Object storageModel(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList());
}
private Object storageDetailsModel(Request request, Response response) throws SQLException {
return Map.of("storage", controlFileStorageService.getFileStorageWithRelatedEntries(FileStorageId.parse(request.params("id"))));
}
private Object storageModelSpecs(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.CRAWL_SPEC));
}
private Object storageModelCrawls(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.CRAWL_DATA));
}
private Object storageModelBackups(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.BACKUP));
}
private Object storageModelProcessed(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.PROCESSED_DATA));
}
private Object servicesModel(Request request, Response response) {
return Map.of("services", heartbeatService.getServiceHeartbeats(),
"events", eventLogService.getLastEntries(Long.MAX_VALUE, 20));
@ -388,18 +163,20 @@ public class ControlService extends Service {
return Map.of("processes", processes,
"jobs", jobs,
"actors", controlActorService.getActorStates(),
"actors", controlActorService.getActorStates(request),
"messages", messageQueueService.getLastEntries(20));
}
private Object actorDetailsModel(Request request, Response response) {
final Actor actor = Actor.valueOf(request.params("fsm").toUpperCase());
final String inbox = actor.id();
return Map.of(
"actor", actor,
"state-graph", controlActorService.getActorStateGraph(actor),
"messages", messageQueueService.getLastEntriesForInbox(inbox, 20));
}
// private Object actorDetailsModel(Request request, Response response) {
// final Actor actor = Actor.valueOf(request.params("fsm").toUpperCase());
// final String inbox = actor.id();
//
// return Map.of(
// "actor", actor,
// "state-graph", controlActorService.getActorStateGraph(actor),
// "messages", messageQueueService.getLastEntriesForInbox(inbox, 20));
// }
private Object serveStatic(Request request, Response response) {
String resource = request.params("resource");

View File

@ -1,22 +0,0 @@
package nu.marginalia.control;
import spark.ResponseTransformer;
public class HtmlRedirect implements ResponseTransformer {
private final String html;
/** Because Spark doesn't have a redirect method that works with relative URLs
* (without explicitly providing the external address),we use HTML and let the
* browser resolve the relative redirect instead */
public HtmlRedirect(String destination) {
this.html = """
<?doctype html>
<html><head><meta http-equiv="refresh" content="0;URL='%s'" /></head></html>
""".formatted(destination);
}
@Override
public String render(Object any) throws Exception {
return html;
}
}

View File

@ -0,0 +1,32 @@
package nu.marginalia.control;
import spark.ResponseTransformer;
public class Redirects {
public static final HtmlRedirect redirectToServices = new HtmlRedirect("/services");
public static final HtmlRedirect redirectToActors = new HtmlRedirect("/actors");
public static final HtmlRedirect redirectToApiKeys = new HtmlRedirect("/api-keys");
public static final HtmlRedirect redirectToStorage = new HtmlRedirect("/storage");
public static final HtmlRedirect redirectToBlacklist = new HtmlRedirect("/blacklist");
public static final HtmlRedirect redirectToComplaints = new HtmlRedirect("/complaints");
public static final HtmlRedirect redirectToMessageQueue = new HtmlRedirect("/message-queue");
public static class HtmlRedirect implements ResponseTransformer {
private final String html;
/** Because Spark doesn't have a redirect method that works with relative URLs
* (without explicitly providing the external address),we use HTML and let the
* browser resolve the relative redirect instead */
public HtmlRedirect(String destination) {
this.html = """
<?doctype html>
<html><head><meta http-equiv="refresh" content="0;URL='%s'" /></head></html>
""".formatted(destination);
}
@Override
public String render(Object any) throws Exception {
return html;
}
}
}

View File

@ -1,84 +0,0 @@
package nu.marginalia.control.actor.monitor;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.ActorStateFactory;
import nu.marginalia.control.model.ServiceHeartbeat;
import nu.marginalia.control.svc.HeartbeatService;
import nu.marginalia.control.process.ProcessService;
import nu.marginalia.actor.prototype.AbstractActorPrototype;
import nu.marginalia.actor.state.ActorState;
import nu.marginalia.actor.state.ActorResumeBehavior;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Singleton
public class ProcessLivenessMonitorActor extends AbstractActorPrototype {
// STATES
private static final String INITIAL = "INITIAL";
private static final String MONITOR = "MONITOR";
private static final String END = "END";
private final ProcessService processService;
private final HeartbeatService heartbeatService;
@Inject
public ProcessLivenessMonitorActor(ActorStateFactory stateFactory,
ProcessService processService,
HeartbeatService heartbeatService) {
super(stateFactory);
this.processService = processService;
this.heartbeatService = heartbeatService;
}
@Override
public String describe() {
return "Periodically check to ensure that the control service's view of running processes is agreement with the process heartbeats table.";
}
@ActorState(name = INITIAL, next = MONITOR)
public void init() {
}
@ActorState(name = MONITOR, next = MONITOR, resume = ActorResumeBehavior.RETRY, description = """
Periodically check to ensure that the control service's view of
running processes is agreement with the process heartbeats table.
If the process is not running, mark the process as stopped in the table.
""")
public void monitor() throws Exception {
for (;;) {
for (var heartbeat : heartbeatService.getProcessHeartbeats()) {
if (!heartbeat.isRunning()) {
continue;
}
var processId = heartbeat.getProcessId();
if (null == processId)
continue;
if (processService.isRunning(processId) && heartbeat.lastSeenMillis() < 10_000) {
continue;
}
heartbeatService.flagProcessAsStopped(heartbeat);
}
for (var heartbeat : heartbeatService.getTaskHeartbeats()) {
if (heartbeat.lastSeenMillis() < 10_000) {
continue;
}
heartbeatService.removeTaskHeartbeat(heartbeat);
}
TimeUnit.SECONDS.sleep(60);
}
}
}

Some files were not shown because too many files have changed in this diff Show More