diff --git a/apps/server/execution/orchestrator.ts b/apps/server/execution/orchestrator.ts index 138aaa3..e87e9d4 100644 --- a/apps/server/execution/orchestrator.ts +++ b/apps/server/execution/orchestrator.ts @@ -20,7 +20,7 @@ import type { ProjectRepository } from '../db/repositories/project-repository.js import type { AgentRepository } from '../db/repositories/agent-repository.js'; import type { DispatchManager, PhaseDispatchManager } from '../dispatch/types.js'; import type { ConflictResolutionService } from '../coordination/conflict-resolution-service.js'; -import { phaseBranchName, taskBranchName } from '../git/branch-naming.js'; +import { phaseBranchName, taskBranchName, isPlanningCategory } from '../git/branch-naming.js'; import { ensureProjectClone } from '../git/project-clones.js'; import { createModuleLogger } from '../logger/index.js'; import { phaseMetaCache, fileDiffCache } from '../review/diff-cache.js'; @@ -637,6 +637,23 @@ export class ExecutionOrchestrator { } } + // Clean up stale duplicate planning tasks (e.g. a crashed detail task + // that was reset to pending, then a new detail task was created and completed). + const tasksAfterRecovery = await this.taskRepository.findByPhaseId(phase.id); + const completedPlanningNames = new Set(); + for (const t of tasksAfterRecovery) { + if (isPlanningCategory(t.category) && t.status === 'completed') { + completedPlanningNames.add(`${t.category}:${t.phaseId}`); + } + } + for (const t of tasksAfterRecovery) { + if (isPlanningCategory(t.category) && t.status === 'pending' && completedPlanningNames.has(`${t.category}:${t.phaseId}`)) { + await this.taskRepository.update(t.id, { status: 'completed', summary: 'Superseded by retry' }); + tasksRecovered++; + log.info({ taskId: t.id, category: t.category }, 'recovered stale duplicate planning task'); + } + } + // Re-read tasks after recovery updates and check if phase is now fully done const updatedTasks = await this.taskRepository.findByPhaseId(phase.id); const allDone = updatedTasks.every((t) => t.status === 'completed'); diff --git a/apps/server/git/simple-git-branch-manager.ts b/apps/server/git/simple-git-branch-manager.ts index bfe969d..841b18c 100644 --- a/apps/server/git/simple-git-branch-manager.ts +++ b/apps/server/git/simple-git-branch-manager.ts @@ -46,8 +46,17 @@ export class SimpleGitBranchManager implements BranchManager { return; } - await git.branch([branch, baseBranch]); - log.info({ repoPath, branch, baseBranch }, 'branch created'); + try { + await git.branch([branch, baseBranch]); + log.info({ repoPath, branch, baseBranch }, 'branch created'); + } catch (err) { + // Handle TOCTOU race: branch may have been created between the exists check and now + if (err instanceof Error && err.message.includes('already exists')) { + log.debug({ repoPath, branch }, 'branch created by concurrent process'); + return; + } + throw err; + } } async mergeBranch(repoPath: string, sourceBranch: string, targetBranch: string): Promise { diff --git a/apps/server/trpc/routers/architect.ts b/apps/server/trpc/routers/architect.ts index 24bc3e9..b1f0354 100644 --- a/apps/server/trpc/routers/architect.ts +++ b/apps/server/trpc/routers/architect.ts @@ -337,6 +337,14 @@ export function architectProcedures(publicProcedure: ProcedureBuilder) { }); } + // Clean up orphaned pending/in_progress detail tasks from previous failed attempts + const phaseTasks = await taskRepo.findByPhaseId(input.phaseId); + for (const t of phaseTasks) { + if (t.category === 'detail' && (t.status === 'pending' || t.status === 'in_progress')) { + await taskRepo.update(t.id, { status: 'completed', summary: 'Superseded by retry' }); + } + } + const detailTaskName = input.taskName ?? `Detail: ${phase.name}`; const task = await taskRepo.create({ phaseId: phase.id, diff --git a/docs/dispatch-events.md b/docs/dispatch-events.md index 9fe59fe..3ce7d08 100644 --- a/docs/dispatch-events.md +++ b/docs/dispatch-events.md @@ -149,8 +149,11 @@ When an agent crashes (`agent:crashed` event), the orchestrator automatically re On server restart, `recoverDispatchQueues()` also recovers: - Stuck `in_progress` tasks whose agents are dead (status is not `running` or `waiting_for_input`) — reset to `pending` and re-queued - Erroneously `blocked` tasks whose agents completed successfully (status is `idle` or `stopped`) — marked `completed` so the phase can progress. This handles the legacy case where conflict resolution incorrectly blocked already-completed tasks. +- Stale duplicate planning tasks — if a phase has both a completed and a pending task of the same planning category (e.g. two `detail` tasks from a crash-and-retry), the pending one is marked `completed` with summary "Superseded by retry" - Fully-completed `in_progress` phases — after task recovery, if all tasks in an `in_progress` phase are completed, triggers `handlePhaseAllTasksDone` to complete/review the phase +The `detailPhase` mutation in `architect.ts` also cleans up orphaned pending/in_progress detail tasks before creating new ones, preventing duplicates at the source. + Manual retry via `retryBlockedTask()` resets `retryCount` to 0, giving the task a fresh set of automatic retries. ### Coalesced Scheduling