From 3168b301858193eb896694eeebdff3ab519bbbde Mon Sep 17 00:00:00 2001 From: Lukas May Date: Sat, 31 Jan 2026 15:19:14 +0100 Subject: [PATCH] docs(08.1, 09): insert agent output schema phase, update phase 9 Phase 8.1: Agent Output Schema (INSERTED) - 2 plans in 2 waves (sequential) - Defines discriminated union schema (done/question/error) - Updates ClaudeAgentManager to use --json-schema flag - Aligns MockAgentManager with new schema Phase 9: Extended Scenarios (updated) - 2 plans in 1 wave (parallel) - Now depends on Phase 8.1 - Updated scenario format references --- .planning/ROADMAP.md | 27 +- .../08.1-agent-output-schema/08.1-01-PLAN.md | 269 +++++++++++++++++ .../08.1-agent-output-schema/08.1-02-PLAN.md | 272 ++++++++++++++++++ .../09-extended-scenarios/09-01-PLAN.md | 4 +- .../09-extended-scenarios/09-02-PLAN.md | 8 +- 5 files changed, 571 insertions(+), 9 deletions(-) create mode 100644 .planning/phases/08.1-agent-output-schema/08.1-01-PLAN.md create mode 100644 .planning/phases/08.1-agent-output-schema/08.1-02-PLAN.md diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index 60a1c0b..2118e31 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -141,20 +141,32 @@ Plans: - [x] 08-01: Happy Path E2E Tests - [x] 08-02: Edge Case E2E Tests -#### Phase 9: Extended Scenarios & CI +#### Phase 8.1: Agent Output Schema (INSERTED) -**Goal**: Additional scenario coverage + CI pipeline integration for automated test runs +**Goal**: Define structured agent output schema (done/question/error discriminated union) and update ClaudeAgentManager to use `--json-schema` flag for validated output parsing **Depends on**: Phase 8 -**Research**: Unlikely (standard CI patterns) -**Plans**: TBD +**Research**: Unlikely (Zod schemas, Claude CLI flags documented) +**Plans**: 2 plans Plans: -- [ ] 09-01: TBD (run /gsd:plan-phase 9 to break down) +- [ ] 08.1-01: Agent Output Schema & ClaudeAgentManager +- [ ] 08.1-02: MockAgentManager Schema Alignment + +#### Phase 9: Extended Scenarios + +**Goal**: Extended E2E scenario coverage — conflict hand-back round-trip, multi-agent parallel work, recovery/resume flows +**Depends on**: Phase 8.1 +**Research**: Unlikely (testing existing functionality) +**Plans**: 2 plans + +Plans: +- [ ] 09-01: Conflict & Parallel E2E Tests +- [ ] 09-02: Recovery & Resume E2E Tests ## Progress **Execution Order:** -Phases execute in numeric order: 1 → 1.1 → 2 → 3 → 4 → 5 → 6 → 7 → 8 → 9 +Phases execute in numeric order: 1 → 1.1 → 2 → 3 → 4 → 5 → 6 → 7 → 8 → 8.1 → 9 | Phase | Milestone | Plans Complete | Status | Completed | |-------|-----------|----------------|--------|-----------| @@ -167,7 +179,8 @@ Phases execute in numeric order: 1 → 1.1 → 2 → 3 → 4 → 5 → 6 → 7 | 6. Coordination | v1.0 | 3/3 | Complete | 2026-01-30 | | 7. Mock Agent & Test Harness | v1.1 | 2/2 | Complete | 2026-01-31 | | 8. E2E Scenario Tests | v1.1 | 2/2 | Complete | 2026-01-31 | -| 9. Extended Scenarios & CI | v1.1 | 0/? | Not started | - | +| 8.1. Agent Output Schema | v1.1 | 0/2 | Not started | - | +| 9. Extended Scenarios | v1.1 | 0/2 | Not started | - | --- *Roadmap created: 2026-01-30* diff --git a/.planning/phases/08.1-agent-output-schema/08.1-01-PLAN.md b/.planning/phases/08.1-agent-output-schema/08.1-01-PLAN.md new file mode 100644 index 0000000..96dfdda --- /dev/null +++ b/.planning/phases/08.1-agent-output-schema/08.1-01-PLAN.md @@ -0,0 +1,269 @@ +--- +phase: 08.1-agent-output-schema +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: [src/agent/schema.ts, src/agent/manager.ts, src/agent/types.ts] +autonomous: true +--- + + +Define structured agent output schema and update ClaudeAgentManager to use `--json-schema` flag for validated output parsing. + +Purpose: Replace broken AskUserQuestion detection with explicit agent status signaling via discriminated union schema. +Output: Zod schema for agent output, updated ClaudeAgentManager with proper question/resume flow. + + + +@~/.claude/get-shit-done/workflows/execute-plan.md +@~/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md + +@src/agent/manager.ts +@src/agent/types.ts +@src/events/types.ts + + + + + + Task 1: Define agent output schema with Zod + src/agent/schema.ts + +Create new file `src/agent/schema.ts` with discriminated union schema: + +```typescript +import { z } from 'zod'; + +// Option for questions +const optionSchema = z.object({ + label: z.string(), + description: z.string().optional(), +}); + +// Discriminated union for agent output +export const agentOutputSchema = z.discriminatedUnion('status', [ + // Agent completed successfully + z.object({ + status: z.literal('done'), + result: z.string(), + filesModified: z.array(z.string()).optional(), + }), + + // Agent needs user input to continue + z.object({ + status: z.literal('question'), + question: z.string(), + options: z.array(optionSchema).optional(), + multiSelect: z.boolean().optional(), + }), + + // Agent hit unrecoverable error + z.object({ + status: z.literal('unrecoverable_error'), + error: z.string(), + attempted: z.string().optional(), + }), +]); + +export type AgentOutput = z.infer; + +// JSON Schema for --json-schema flag (convert Zod to JSON Schema) +export const agentOutputJsonSchema = { + type: 'object', + oneOf: [ + { + properties: { + status: { const: 'done' }, + result: { type: 'string' }, + filesModified: { type: 'array', items: { type: 'string' } }, + }, + required: ['status', 'result'], + }, + { + properties: { + status: { const: 'question' }, + question: { type: 'string' }, + options: { + type: 'array', + items: { + type: 'object', + properties: { + label: { type: 'string' }, + description: { type: 'string' }, + }, + required: ['label'], + }, + }, + multiSelect: { type: 'boolean' }, + }, + required: ['status', 'question'], + }, + { + properties: { + status: { const: 'unrecoverable_error' }, + error: { type: 'string' }, + attempted: { type: 'string' }, + }, + required: ['status', 'error'], + }, + ], +}; +``` + +Export both the Zod schema (for runtime validation) and JSON schema string (for CLI flag). + + npx tsc --noEmit src/agent/schema.ts + Schema file created with discriminated union and JSON schema export + + + + Task 2: Update ClaudeAgentManager to use schema and handle question flow + src/agent/manager.ts, src/agent/types.ts + +Update ClaudeAgentManager: + +1. **Import schema:** + ```typescript + import { agentOutputSchema, agentOutputJsonSchema } from './schema.js'; + ``` + +2. **Update spawn() to pass --json-schema:** + ```typescript + const subprocess = execa( + 'claude', + [ + '-p', prompt, + '--output-format', 'json', + '--json-schema', JSON.stringify(agentOutputJsonSchema), + ], + { cwd: cwd ?? worktree.path, detached: true, stdio: ['ignore', 'pipe', 'pipe'] } + ); + ``` + +3. **Update handleAgentCompletion() to parse discriminated union:** + + Replace the current ClaudeCliResult parsing with: + ```typescript + const cliResult = JSON.parse(stdout as string); + + // Store session_id for resume capability + if (cliResult.session_id) { + await this.repository.updateSessionId(agentId, cliResult.session_id); + } + + // Parse the agent's structured output from result field + const agentOutput = agentOutputSchema.parse(JSON.parse(cliResult.result)); + + switch (agentOutput.status) { + case 'done': + // Success path - existing logic + active.result = { success: true, message: agentOutput.result, filesModified: agentOutput.filesModified }; + await this.repository.updateStatus(agentId, 'idle'); + // Emit agent:stopped event + break; + + case 'question': + // Question path - agent needs input + await this.repository.updateStatus(agentId, 'waiting_for_input'); + // Store question metadata for later retrieval + active.pendingQuestion = agentOutput; + // Emit agent:waiting event with structured question + break; + + case 'unrecoverable_error': + // Error path + active.result = { success: false, message: agentOutput.error }; + await this.repository.updateStatus(agentId, 'crashed'); + // Emit agent:crashed event + break; + } + ``` + +4. **Update ActiveAgent interface:** + ```typescript + interface ActiveAgent { + subprocess: ResultPromise; + result?: AgentResult; + pendingQuestion?: { question: string; options?: Array<{label: string; description?: string}>; multiSelect?: boolean }; + } + ``` + +5. **Update resume() to use same session_id:** + - Already uses `--resume` flag with stored session_id ✓ + - Ensure prompt passed is the user's answer to the question + +6. **Add method to get pending question:** + ```typescript + async getPendingQuestion(agentId: string): Promise<{question: string; options?: ...} | null> { + const active = this.activeAgents.get(agentId); + return active?.pendingQuestion ?? null; + } + ``` + +7. **Update AgentManager interface in types.ts:** + Add getPendingQuestion method signature. + +8. **Remove the hacky string matching** in handleAgentError() for 'waiting for input' detection - no longer needed. + + npm run build && npm run typecheck + ClaudeAgentManager uses --json-schema, parses discriminated union, handles question/resume flow correctly + + + + Task 3: Update AgentWaitingEvent to include structured question data + src/events/types.ts + +Update AgentWaitingEvent payload to include structured question data: + +```typescript +export interface AgentWaitingEvent extends BaseEvent { + type: 'agent:waiting'; + payload: { + agentId: string; + name: string; + taskId: string; + sessionId: string; + question: string; + options?: Array<{ label: string; description?: string }>; + multiSelect?: boolean; + }; +} +``` + +This allows event consumers to receive the full question structure, not just a plain string. + + npm run typecheck + AgentWaitingEvent includes structured question metadata + + + + + +Before declaring plan complete: +- [ ] `npm run build` succeeds +- [ ] `npm run typecheck` passes +- [ ] Schema exports both Zod and JSON Schema formats +- [ ] ClaudeAgentManager passes --json-schema to CLI +- [ ] Question status triggers waiting_for_input with stored question +- [ ] Resume uses same session_id + + + + +- All tasks completed +- All verification checks pass +- Agent output is validated against schema +- Question -> answer -> resume flow uses same session_id +- No breaking changes to existing tests (MockAgentManager updated separately) + + + +After completion, create `.planning/phases/08.1-agent-output-schema/08.1-01-SUMMARY.md` + diff --git a/.planning/phases/08.1-agent-output-schema/08.1-02-PLAN.md b/.planning/phases/08.1-agent-output-schema/08.1-02-PLAN.md new file mode 100644 index 0000000..516dc9c --- /dev/null +++ b/.planning/phases/08.1-agent-output-schema/08.1-02-PLAN.md @@ -0,0 +1,272 @@ +--- +phase: 08.1-agent-output-schema +plan: 02 +type: execute +wave: 2 +depends_on: ["08.1-01"] +files_modified: [src/agent/mock-manager.ts, src/test/harness.ts, src/agent/mock-manager.test.ts] +autonomous: true +--- + + +Update MockAgentManager to use the new agent output schema, enabling proper testing of question/resume flows. + +Purpose: Align mock implementation with real ClaudeAgentManager behavior for accurate E2E testing. +Output: MockAgentManager that simulates structured output schema with question/resume support. + + + +@~/.claude/get-shit-done/workflows/execute-plan.md +@~/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/08.1-agent-output-schema/08.1-01-SUMMARY.md + +@src/agent/schema.ts +@src/agent/mock-manager.ts +@src/agent/types.ts +@src/test/harness.ts + + + + + + Task 1: Update MockAgentManager to use schema-aligned scenarios + src/agent/mock-manager.ts + +Update MockAgentManager to match the new schema behavior: + +1. **Update MockAgentScenario to align with schema:** + ```typescript + export type MockAgentScenario = + | { status: 'done'; result?: string; filesModified?: string[]; delay?: number } + | { status: 'question'; question: string; options?: Array<{label: string; description?: string}>; multiSelect?: boolean; delay?: number } + | { status: 'unrecoverable_error'; error: string; attempted?: string; delay?: number }; + ``` + +2. **Update DEFAULT_SCENARIO:** + ```typescript + const DEFAULT_SCENARIO: MockAgentScenario = { + status: 'done', + result: 'Task completed successfully', + filesModified: [], + delay: 0, + }; + ``` + +3. **Update MockAgentRecord to store pending question:** + ```typescript + interface MockAgentRecord { + info: AgentInfo; + scenario: MockAgentScenario; + result?: AgentResult; + pendingQuestion?: { question: string; options?: Array<{label: string; description?: string}>; multiSelect?: boolean }; + completionTimer?: ReturnType; + } + ``` + +4. **Update completeAgent() to handle new schema:** + ```typescript + private completeAgent(agentId: string, scenario: MockAgentScenario): void { + const record = this.agents.get(agentId); + if (!record) return; + + switch (scenario.status) { + case 'done': + record.result = { + success: true, + message: scenario.result ?? 'Task completed successfully', + filesModified: scenario.filesModified, + }; + record.info.status = 'idle'; + // Emit agent:stopped event + break; + + case 'question': + record.info.status = 'waiting_for_input'; + record.pendingQuestion = { + question: scenario.question, + options: scenario.options, + multiSelect: scenario.multiSelect, + }; + // Emit agent:waiting event with full question data + if (this.eventBus) { + const event: AgentWaitingEvent = { + type: 'agent:waiting', + timestamp: new Date(), + payload: { + agentId, + name: record.info.name, + taskId: record.info.taskId, + sessionId: record.info.sessionId ?? '', + question: scenario.question, + options: scenario.options, + multiSelect: scenario.multiSelect, + }, + }; + this.eventBus.emit(event); + } + break; + + case 'unrecoverable_error': + record.result = { + success: false, + message: scenario.error, + }; + record.info.status = 'crashed'; + // Emit agent:crashed event + break; + } + } + ``` + +5. **Add getPendingQuestion() method:** + ```typescript + async getPendingQuestion(agentId: string): Promise<{question: string; options?: Array<{label: string; description?: string}>; multiSelect?: boolean} | null> { + const record = this.agents.get(agentId); + return record?.pendingQuestion ?? null; + } + ``` + +6. **Update resume() to clear pending question:** + After successful resume, clear `record.pendingQuestion = undefined`. + + npm run typecheck + MockAgentManager uses schema-aligned scenarios with structured question support + + + + Task 2: Update TestHarness setAgentScenario helper + src/test/harness.ts + +Update TestHarness to use new scenario format: + +1. **Update setAgentScenario type:** + ```typescript + import type { MockAgentScenario } from '../agent/mock-manager.js'; + + // In TestHarness interface: + setAgentScenario(agentName: string, scenario: MockAgentScenario): void; + ``` + +2. **Add helper for common scenarios:** + ```typescript + // In TestHarness interface, add convenience methods: + setAgentDone(agentName: string, result?: string): void; + setAgentQuestion(agentName: string, question: string, options?: Array<{label: string; description?: string}>): void; + setAgentError(agentName: string, error: string): void; + + // Implementation: + setAgentDone: (name, result) => agentManager.setScenario(name, { status: 'done', result }), + setAgentQuestion: (name, question, options) => agentManager.setScenario(name, { status: 'question', question, options }), + setAgentError: (name, error) => agentManager.setScenario(name, { status: 'unrecoverable_error', error }), + ``` + +3. **Add getPendingQuestion to harness:** + ```typescript + getPendingQuestion(agentId: string): Promise<{question: string; options?: ...} | null>; + ``` + + npm run typecheck + TestHarness updated with new scenario helpers + + + + Task 3: Update MockAgentManager tests + src/agent/mock-manager.test.ts + +Update existing MockAgentManager tests to use new scenario format: + +1. **Update scenario declarations:** + Change from: + ```typescript + { outcome: 'success', message: '...' } + { outcome: 'crash', message: '...' } + { outcome: 'waiting_for_input', question: '...' } + ``` + To: + ```typescript + { status: 'done', result: '...' } + { status: 'unrecoverable_error', error: '...' } + { status: 'question', question: '...', options: [...] } + ``` + +2. **Add test for structured question data:** + ```typescript + it('emits agent:waiting with structured question data', async () => { + vi.useFakeTimers(); + manager.setScenario('test-agent', { + status: 'question', + question: 'Which database?', + options: [ + { label: 'PostgreSQL', description: 'Full-featured' }, + { label: 'SQLite', description: 'Lightweight' }, + ], + multiSelect: false, + }); + + await manager.spawn({ name: 'test-agent', taskId: 'task-1', prompt: 'test' }); + await vi.runAllTimersAsync(); + + const events = eventBus.getEventsByType('agent:waiting'); + expect(events[0].payload.options).toHaveLength(2); + expect(events[0].payload.options[0].label).toBe('PostgreSQL'); + }); + ``` + +3. **Add test for getPendingQuestion:** + ```typescript + it('stores pending question for retrieval', async () => { + vi.useFakeTimers(); + manager.setScenario('test-agent', { + status: 'question', + question: 'Which database?', + options: [{ label: 'PostgreSQL' }], + }); + + const agent = await manager.spawn({ name: 'test-agent', taskId: 'task-1', prompt: 'test' }); + await vi.runAllTimersAsync(); + + const pending = await manager.getPendingQuestion(agent.id); + expect(pending?.question).toBe('Which database?'); + expect(pending?.options).toHaveLength(1); + }); + ``` + +4. **Add test for resume clearing pending question:** + ```typescript + it('clears pending question after resume', async () => { + // Setup question scenario, resume, verify pendingQuestion is null + }); + ``` + + npm test src/agent/mock-manager.test.ts -- --run + MockAgentManager tests updated and passing with new schema + + + + + +Before declaring plan complete: +- [ ] `npm run build` succeeds +- [ ] `npm run typecheck` passes +- [ ] `npm test src/agent/mock-manager.test.ts -- --run` passes +- [ ] Existing E2E tests still pass (may need scenario format updates) + + + + +- All tasks completed +- All verification checks pass +- MockAgentManager matches ClaudeAgentManager behavior +- TestHarness provides convenient scenario helpers +- All mock-manager tests pass with new schema + + + +After completion, create `.planning/phases/08.1-agent-output-schema/08.1-02-SUMMARY.md` + diff --git a/.planning/phases/09-extended-scenarios/09-01-PLAN.md b/.planning/phases/09-extended-scenarios/09-01-PLAN.md index 50315ee..12baf64 100644 --- a/.planning/phases/09-extended-scenarios/09-01-PLAN.md +++ b/.planning/phases/09-extended-scenarios/09-01-PLAN.md @@ -6,6 +6,7 @@ wave: 1 depends_on: [] files_modified: [src/test/e2e/extended-scenarios.test.ts] autonomous: true +phase_depends_on: [08.1-agent-output-schema] --- @@ -27,6 +28,7 @@ Output: Extended E2E test file with conflict round-trip and parallel completion @.planning/phases/09-extended-scenarios/09-CONTEXT.md @.planning/phases/08-e2e-scenario-tests/08-01-SUMMARY.md @.planning/phases/08-e2e-scenario-tests/08-02-SUMMARY.md +@.planning/phases/08.1-agent-output-schema/08.1-02-SUMMARY.md @src/test/harness.ts @src/test/fixtures.ts @@ -71,7 +73,7 @@ Create new test file with describe block "Conflict hand-back round-trip". Test t Use same patterns as edge-cases.test.ts: - vi.useFakeTimers() for async control - Pre-seed idle agent before dispatch -- harness.setAgentScenario for agent behavior +- harness.setAgentScenario with new schema format: `{ status: 'done' | 'question' | 'unrecoverable_error', ... }` - harness.worktreeManager.setMergeResult for conflict injection - Manual agentRepository.create for coordination tests diff --git a/.planning/phases/09-extended-scenarios/09-02-PLAN.md b/.planning/phases/09-extended-scenarios/09-02-PLAN.md index 4ac8f72..a81b54c 100644 --- a/.planning/phases/09-extended-scenarios/09-02-PLAN.md +++ b/.planning/phases/09-extended-scenarios/09-02-PLAN.md @@ -6,6 +6,7 @@ wave: 1 depends_on: [] files_modified: [src/test/e2e/recovery-scenarios.test.ts] autonomous: true +phase_depends_on: [08.1-agent-output-schema] --- @@ -26,6 +27,7 @@ Output: Recovery scenarios test file with state persistence and Q&A flow tests. @.planning/STATE.md @.planning/phases/09-extended-scenarios/09-CONTEXT.md @.planning/phases/08-e2e-scenario-tests/08-02-SUMMARY.md +@.planning/phases/08.1-agent-output-schema/08.1-02-SUMMARY.md @src/test/harness.ts @src/test/fixtures.ts @@ -117,7 +119,11 @@ Add describe block "Agent Q&A extended scenarios" to the test file. Test scenari - Now complete task - Verify: proper state transitions -Use edge-cases.test.ts patterns for waiting/resume flow. +Use new schema format for scenarios: +- `{ status: 'question', question: '...', options: [...] }` for questions +- `{ status: 'done', result: '...' }` for success +- `{ status: 'unrecoverable_error', error: '...' }` for failures +- harness.getPendingQuestion() to retrieve structured question data npm test src/test/e2e/recovery-scenarios.test.ts -- --run passes 4 Q&A tests passing, proving extended question flows work