From dd26819d66834811b22900ec2aca5131c61f99bb Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 22 Jan 2024 21:22:38 +0100 Subject: [PATCH] (actor) Try to rare data race where a finished job is considered dead. --- .../nu/marginalia/actor/task/ActorProcessWatcher.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ActorProcessWatcher.java b/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ActorProcessWatcher.java index 8f0445bd..4bae8674 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ActorProcessWatcher.java +++ b/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ActorProcessWatcher.java @@ -73,6 +73,16 @@ public class ActorProcessWatcher { // Maybe the process died, wait a moment for it to restart if (!waitForProcess(processId, TimeUnit.SECONDS, 30)) { + + // Check if the process has already responded, but we missed it + // This infrequently happens if we get unlucky with the timing of the process terminating + // and the polling thread... + + var maybeResponse = outbox.pollResponse(msgId); + if (maybeResponse.isPresent()) { + return maybeResponse.get(); + } + throw new ActorControlFlowException("Process " + processId + " died and did not re-launch"); } }