fix: detect hung agent processes via defensive signal.json polling
Claude CLI occasionally hangs after writing signal.json but never exits. Add an optional signal check to pollForCompletion: after a 60s grace period, check signal.json every 30s. If a valid completion signal is found while the process is still alive, SIGTERM it and proceed to normal completion handling.
This commit is contained in:
@@ -369,6 +369,7 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
agentId, pid,
|
||||
() => this.handleDetachedAgentCompletion(agentId),
|
||||
() => this.activeAgents.get(agentId)?.tailer,
|
||||
this.createEarlyCompletionChecker(agentId),
|
||||
);
|
||||
activeEntry.cancelPoll = cancel;
|
||||
|
||||
@@ -406,6 +407,20 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
return this.toAgentInfo(agent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a callback that checks if an agent has a valid signal.json,
|
||||
* used by pollForCompletion to detect hung processes.
|
||||
*/
|
||||
private createEarlyCompletionChecker(agentId: string): () => Promise<boolean> {
|
||||
return async () => {
|
||||
const agent = await this.repository.findById(agentId);
|
||||
if (!agent?.worktreeId) return false;
|
||||
const agentWorkdir = this.processManager.getAgentWorkdir(agent.worktreeId);
|
||||
const signal = await this.outputHandler.readSignalCompletion(agentWorkdir);
|
||||
return signal !== null;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle completion of a detached agent.
|
||||
*/
|
||||
@@ -525,6 +540,7 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
agentId, pid,
|
||||
() => this.handleDetachedAgentCompletion(agentId),
|
||||
() => this.activeAgents.get(agentId)?.tailer,
|
||||
this.createEarlyCompletionChecker(agentId),
|
||||
);
|
||||
commitActiveEntry.cancelPoll = commitCancel;
|
||||
|
||||
@@ -633,6 +649,7 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
agentId, pid,
|
||||
() => this.handleDetachedAgentCompletion(agentId),
|
||||
() => this.activeAgents.get(agentId)?.tailer,
|
||||
this.createEarlyCompletionChecker(agentId),
|
||||
);
|
||||
activeEntry.cancelPoll = cancel;
|
||||
|
||||
@@ -704,6 +721,7 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
agentId, pid,
|
||||
() => this.handleDetachedAgentCompletion(agentId),
|
||||
() => this.activeAgents.get(agentId)?.tailer,
|
||||
this.createEarlyCompletionChecker(agentId),
|
||||
);
|
||||
activeEntry.cancelPoll = cancel;
|
||||
|
||||
@@ -890,6 +908,7 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
agentId, pid,
|
||||
() => this.handleDetachedAgentCompletion(agentId),
|
||||
() => this.activeAgents.get(agentId)?.tailer,
|
||||
this.createEarlyCompletionChecker(agentId),
|
||||
);
|
||||
resumeActiveEntry.cancelPoll = resumeCancel;
|
||||
}
|
||||
@@ -1013,6 +1032,7 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
agentId, pid,
|
||||
() => this.handleDetachedAgentCompletion(agentId),
|
||||
() => this.activeAgents.get(agentId)?.tailer,
|
||||
this.createEarlyCompletionChecker(agentId),
|
||||
);
|
||||
const active = this.activeAgents.get(agentId);
|
||||
if (active) active.cancelPoll = cancel;
|
||||
|
||||
Reference in New Issue
Block a user