fix: detect hung agent processes via defensive signal.json polling

Claude CLI occasionally hangs after writing signal.json but never exits.
Add an optional signal check to pollForCompletion: after a 60s grace
period, check signal.json every 30s. If a valid completion signal is
found while the process is still alive, SIGTERM it and proceed to
normal completion handling.
This commit is contained in:
Lukas May
2026-03-06 21:23:19 +01:00
parent 388befd7c3
commit 56efc0bad6
3 changed files with 63 additions and 7 deletions

View File

@@ -369,6 +369,7 @@ export class MultiProviderAgentManager implements AgentManager {
agentId, pid,
() => this.handleDetachedAgentCompletion(agentId),
() => this.activeAgents.get(agentId)?.tailer,
this.createEarlyCompletionChecker(agentId),
);
activeEntry.cancelPoll = cancel;
@@ -406,6 +407,20 @@ export class MultiProviderAgentManager implements AgentManager {
return this.toAgentInfo(agent);
}
/**
* Create a callback that checks if an agent has a valid signal.json,
* used by pollForCompletion to detect hung processes.
*/
private createEarlyCompletionChecker(agentId: string): () => Promise<boolean> {
return async () => {
const agent = await this.repository.findById(agentId);
if (!agent?.worktreeId) return false;
const agentWorkdir = this.processManager.getAgentWorkdir(agent.worktreeId);
const signal = await this.outputHandler.readSignalCompletion(agentWorkdir);
return signal !== null;
};
}
/**
* Handle completion of a detached agent.
*/
@@ -525,6 +540,7 @@ export class MultiProviderAgentManager implements AgentManager {
agentId, pid,
() => this.handleDetachedAgentCompletion(agentId),
() => this.activeAgents.get(agentId)?.tailer,
this.createEarlyCompletionChecker(agentId),
);
commitActiveEntry.cancelPoll = commitCancel;
@@ -633,6 +649,7 @@ export class MultiProviderAgentManager implements AgentManager {
agentId, pid,
() => this.handleDetachedAgentCompletion(agentId),
() => this.activeAgents.get(agentId)?.tailer,
this.createEarlyCompletionChecker(agentId),
);
activeEntry.cancelPoll = cancel;
@@ -704,6 +721,7 @@ export class MultiProviderAgentManager implements AgentManager {
agentId, pid,
() => this.handleDetachedAgentCompletion(agentId),
() => this.activeAgents.get(agentId)?.tailer,
this.createEarlyCompletionChecker(agentId),
);
activeEntry.cancelPoll = cancel;
@@ -890,6 +908,7 @@ export class MultiProviderAgentManager implements AgentManager {
agentId, pid,
() => this.handleDetachedAgentCompletion(agentId),
() => this.activeAgents.get(agentId)?.tailer,
this.createEarlyCompletionChecker(agentId),
);
resumeActiveEntry.cancelPoll = resumeCancel;
}
@@ -1013,6 +1032,7 @@ export class MultiProviderAgentManager implements AgentManager {
agentId, pid,
() => this.handleDetachedAgentCompletion(agentId),
() => this.activeAgents.get(agentId)?.tailer,
this.createEarlyCompletionChecker(agentId),
);
const active = this.activeAgents.get(agentId);
if (active) active.cancelPoll = cancel;