refactor: Restructure monorepo to apps/server/ and apps/web/ layout

Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt standard monorepo conventions (apps/ for runnable apps, packages/ for reusable libraries). Update all config files, shared package imports, test fixtures, and documentation to reflect new paths. Key fixes: - Update workspace config to ["apps/*", "packages/*"] - Update tsconfig.json rootDir/include for apps/server/ - Add apps/web/** to vitest exclude list - Update drizzle.config.ts schema path - Fix ensure-schema.ts migration path detection (3 levels up in dev, 2 levels up in dist) - Fix tests/integration/cli-server.test.ts import paths - Update packages/shared imports to apps/server/ paths - Update all docs/ files with new paths
2026-03-03 11:22:53 +01:00
parent 8c38d958ce
commit 34578d39c6
535 changed files with 75452 additions and 687 deletions
--- a/apps/server/test/cassette/cassette.test.ts
+++ b/apps/server/test/cassette/cassette.test.ts
@@ -0,0 +1,265 @@
+/**
+ * Cassette System Unit Tests
+ *
+ * Verifies normalizer, key generation, and store in isolation.
+ * These run without any real processes or API calls.
+ */
+
+import { describe, it, expect, beforeEach } from 'vitest';
+import { mkdtempSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
+import { hashWorktreeFiles, buildCassetteKey } from './key.js';
+import { CassetteStore } from './store.js';
+import type { CassetteEntry, CassetteKey } from './types.js';
+
+// ---------------------------------------------------------------------------
+// Normalizer
+// ---------------------------------------------------------------------------
+
+describe('normalizePrompt', () => {
+  it('strips UUIDs', () => {
+    const prompt = 'Agent 550e8400-e29b-41d4-a716-446655440000 is running task abc123ef-0000-0000-0000-000000000000';
+    const result = normalizePrompt(prompt, '');
+    expect(result).not.toContain('550e8400');
+    expect(result).not.toContain('abc123ef');
+    expect(result).toContain('__UUID__');
+  });
+
+  it('strips workspace root path', () => {
+    const workspaceRoot = '/tmp/cw-test-abc123';
+    const prompt = `Working directory: ${workspaceRoot}/agent-workdirs/my-agent`;
+    const result = normalizePrompt(prompt, workspaceRoot);
+    expect(result).not.toContain(workspaceRoot);
+    expect(result).toContain('__WORKSPACE__');
+  });
+
+  it('strips ISO timestamps', () => {
+    const prompt = 'Started at 2026-03-01T14:30:00Z, last seen 2026-03-01T14:35:22.456Z';
+    const result = normalizePrompt(prompt, '');
+    expect(result).not.toContain('2026-03-01');
+    expect(result).toContain('__TIMESTAMP__');
+  });
+
+  it('strips session numbers', () => {
+    const prompt = 'Resuming session 3 with agent session-42';
+    const result = normalizePrompt(prompt, '');
+    expect(result).toContain('session__N__');
+    expect(result).not.toContain('session 3');
+    expect(result).not.toContain('session-42');
+  });
+
+  it('leaves static content unchanged', () => {
+    const prompt = 'You are a Worker agent. Execute the assigned coding task.';
+    const result = normalizePrompt(prompt, '/tmp/ws');
+    expect(result).toBe(prompt);
+  });
+
+  it('strips nanoid strings (21-char alphanumeric)', () => {
+    const nanoid = 'V1StGXR8_Z5jdHi6B-myT';
+    const prompt = `Agent worktree: /tmp/cw-preview-${nanoid}/app`;
+    const result = normalizePrompt(prompt, '');
+    expect(result).not.toContain(nanoid);
+    expect(result).toContain('__ID__');
+  });
+
+  it('strips workspace root before UUID replacement to avoid double-normalizing', () => {
+    const workspaceRoot = '/tmp/cw-test-abc123';
+    const uuid = '550e8400-e29b-41d4-a716-446655440000';
+    const prompt = `Dir: ${workspaceRoot}/agents/${uuid}`;
+    const result = normalizePrompt(prompt, workspaceRoot);
+    expect(result).toBe('Dir: __WORKSPACE__/agents/__UUID__');
+  });
+});
+
+describe('stripPromptFromArgs', () => {
+  it('strips -p <prompt> style (Claude native)', () => {
+    const prompt = 'Do the task.';
+    const args = ['--dangerously-skip-permissions', '--verbose', '-p', prompt, '--output-format', 'stream-json'];
+    const result = stripPromptFromArgs(args, prompt);
+    expect(result).toEqual(['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json']);
+  });
+
+  it('strips --prompt <prompt> style', () => {
+    const prompt = 'Do the task.';
+    const args = ['--flag', '--prompt', prompt, '--json'];
+    const result = stripPromptFromArgs(args, prompt);
+    expect(result).toEqual(['--flag', '--json']);
+  });
+
+  it('strips bare positional prompt', () => {
+    const prompt = 'Do the task.';
+    const args = ['--full-auto', prompt];
+    const result = stripPromptFromArgs(args, prompt);
+    expect(result).toEqual(['--full-auto']);
+  });
+
+  it('returns unchanged args when prompt is empty', () => {
+    const args = ['--flag', '--value'];
+    expect(stripPromptFromArgs(args, '')).toEqual(args);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Key generation
+// ---------------------------------------------------------------------------
+
+describe('buildCassetteKey', () => {
+  const baseKey: CassetteKey = {
+    normalizedPrompt: 'You are a Worker agent.',
+    providerName: 'claude',
+    modelArgs: ['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json'],
+    worktreeHash: 'empty',
+  };
+
+  it('produces a 32-char hex string', () => {
+    const key = buildCassetteKey(baseKey);
+    expect(key).toMatch(/^[0-9a-f]{32}$/);
+  });
+
+  it('is deterministic for the same key', () => {
+    expect(buildCassetteKey(baseKey)).toBe(buildCassetteKey(baseKey));
+  });
+
+  it('differs when normalizedPrompt changes', () => {
+    const key2 = { ...baseKey, normalizedPrompt: 'You are a Discuss agent.' };
+    expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
+  });
+
+  it('differs when providerName changes', () => {
+    const key2 = { ...baseKey, providerName: 'codex' };
+    expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
+  });
+
+  it('differs when worktreeHash changes', () => {
+    const key2 = { ...baseKey, worktreeHash: 'abcdef1234567890' };
+    expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
+  });
+
+  it('is stable regardless of modelArgs insertion order', () => {
+    const key1 = { ...baseKey, modelArgs: ['--verbose', '--dangerously-skip-permissions'] };
+    const key2 = { ...baseKey, modelArgs: ['--dangerously-skip-permissions', '--verbose'] };
+    expect(buildCassetteKey(key1)).toBe(buildCassetteKey(key2));
+  });
+});
+
+describe('hashWorktreeFiles', () => {
+  it('returns "empty" for a non-existent directory', () => {
+    expect(hashWorktreeFiles('/does/not/exist')).toBe('empty');
+  });
+
+  it('returns "empty" for a directory with only hidden files', () => {
+    const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
+    try {
+      // Only hidden entries present
+      const { mkdirSync } = require('node:fs');
+      mkdirSync(join(dir, '.git'));
+      expect(hashWorktreeFiles(dir)).toBe('empty');
+    } finally {
+      rmSync(dir, { recursive: true });
+    }
+  });
+
+  it('produces a 16-char hex string for a directory with files', () => {
+    const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
+    try {
+      const { writeFileSync } = require('node:fs');
+      writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
+      const hash = hashWorktreeFiles(dir);
+      expect(hash).toMatch(/^[0-9a-f]{16}$/);
+    } finally {
+      rmSync(dir, { recursive: true });
+    }
+  });
+
+  it('changes when file content changes', () => {
+    const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
+    try {
+      const { writeFileSync } = require('node:fs');
+      writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
+      const hash1 = hashWorktreeFiles(dir);
+      writeFileSync(join(dir, 'index.ts'), 'export const x = 2;');
+      const hash2 = hashWorktreeFiles(dir);
+      expect(hash1).not.toBe(hash2);
+    } finally {
+      rmSync(dir, { recursive: true });
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// CassetteStore
+// ---------------------------------------------------------------------------
+
+describe('CassetteStore', () => {
+  let dir: string;
+  let store: CassetteStore;
+
+  const key: CassetteKey = {
+    normalizedPrompt: 'Test prompt',
+    providerName: 'claude',
+    modelArgs: ['--verbose'],
+    worktreeHash: 'empty',
+  };
+
+  const entry: CassetteEntry = {
+    version: 1,
+    key,
+    recording: {
+      jsonlLines: ['{"type":"system","session_id":"test-session"}', '{"type":"result","subtype":"success"}'],
+      signalJson: { status: 'done', message: 'Task completed' },
+      exitCode: 0,
+      recordedAt: '2026-03-01T00:00:00.000Z',
+    },
+  };
+
+  beforeEach(() => {
+    dir = mkdtempSync(join(tmpdir(), 'cw-store-test-'));
+    store = new CassetteStore(dir);
+  });
+
+  it('returns null for unknown key', () => {
+    expect(store.find(key)).toBeNull();
+  });
+
+  it('round-trips a cassette entry', () => {
+    store.save(key, entry);
+    const loaded = store.find(key);
+    expect(loaded).not.toBeNull();
+    expect(loaded?.recording.signalJson).toEqual({ status: 'done', message: 'Task completed' });
+    expect(loaded?.recording.jsonlLines).toHaveLength(2);
+  });
+
+  it('overwrites an existing cassette', () => {
+    store.save(key, entry);
+    const updated: CassetteEntry = {
+      ...entry,
+      recording: { ...entry.recording, jsonlLines: ['new line'], recordedAt: '2026-03-02T00:00:00.000Z' },
+    };
+    store.save(key, updated);
+    const loaded = store.find(key);
+    expect(loaded?.recording.jsonlLines).toEqual(['new line']);
+  });
+
+  it('uses same file for same key', () => {
+    store.save(key, entry);
+    const { readdirSync } = require('node:fs');
+    const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
+    expect(files).toHaveLength(1);
+
+    store.save(key, entry); // overwrite
+    const files2 = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
+    expect(files2).toHaveLength(1);
+  });
+
+  it('uses different files for different keys', () => {
+    const key2: CassetteKey = { ...key, providerName: 'codex' };
+    store.save(key, entry);
+    store.save(key2, { ...entry, key: key2 });
+
+    const { readdirSync } = require('node:fs');
+    const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
+    expect(files).toHaveLength(2);
+  });
+});
--- a/apps/server/test/cassette/harness.ts
+++ b/apps/server/test/cassette/harness.ts
@@ -0,0 +1,200 @@
+/**
+ * Cassette Test Harness
+ *
+ * Wraps RealProviderHarness with the CassetteProcessManager so tests run
+ * against recorded cassettes instead of real AI APIs.
+ *
+ * Usage:
+ *
+ *   let harness: RealProviderHarness;
+ *
+ *   beforeAll(async () => {
+ *     harness = await createCassetteHarness({ provider: 'claude' });
+ *   });
+ *
+ *   afterAll(() => harness.cleanup());
+ *
+ *   it('completes a task', async () => {
+ *     const agent = await harness.agentManager.spawn({ prompt: MINIMAL_PROMPTS.done, ... });
+ *     const result = await harness.waitForAgentCompletion(agent.id);
+ *     expect(result?.success).toBe(true);
+ *   });
+ *
+ * Mode control via env vars:
+ *   (default)                 → replay mode: cassette must exist, throws if missing
+ *   CW_CASSETTE_RECORD=1      → auto mode: replay if exists, record if missing
+ *   CW_CASSETTE_FORCE_RECORD=1→ record mode: always run real agent, overwrite cassette
+ */
+
+import { mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { execSync } from 'node:child_process';
+import { join } from 'node:path';
+import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
+import {
+  DrizzleAgentRepository,
+  DrizzleProjectRepository,
+  DrizzleAccountRepository,
+  DrizzleInitiativeRepository,
+} from '../../db/repositories/drizzle/index.js';
+import { MultiProviderAgentManager } from '../../agent/manager.js';
+import { CapturingEventBus, sleep, type RealProviderHarness } from '../integration/real-providers/harness.js';
+import { CassetteStore } from './store.js';
+import { CassetteProcessManager, type CassetteMode } from './process-manager.js';
+
+export interface CassetteHarnessOptions {
+  /** Which provider the agent runs as (default: 'claude'). */
+  provider?: 'claude' | 'codex';
+  /**
+   * Directory where cassette JSON files are stored and read from.
+   * Defaults to CW_CASSETTE_DIR env var, then src/test/cassettes/.
+   */
+  cassetteDir?: string;
+  /**
+   * Override cassette mode. Normally derived from env vars:
+   * - CW_CASSETTE_FORCE_RECORD=1  → 'record'
+   * - CW_CASSETTE_RECORD=1        → 'auto'
+   * - (default)                   → 'replay'
+   */
+  mode?: CassetteMode;
+}
+
+const DEFAULT_CASSETTE_DIR = new URL('../cassettes', import.meta.url).pathname;
+
+/**
+ * Resolve cassette mode from env vars (highest priority) or options.
+ */
+function resolveCassetteMode(options: CassetteHarnessOptions): CassetteMode {
+  if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
+  if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
+  return options.mode ?? 'replay';
+}
+
+/**
+ * Create a test harness backed by the cassette system.
+ *
+ * The harness exposes the same interface as RealProviderHarness so tests
+ * written for real providers work unchanged with cassettes.
+ *
+ * Replay is much faster than real API calls (typically < 500ms) and
+ * exercises the full pipeline: ProcessManager → FileTailer → OutputHandler
+ * → SignalManager → event emission.
+ */
+export async function createCassetteHarness(options: CassetteHarnessOptions = {}): Promise<RealProviderHarness> {
+  const cassetteDir = options.cassetteDir ?? process.env.CW_CASSETTE_DIR ?? DEFAULT_CASSETTE_DIR;
+  const cassetteMode = resolveCassetteMode(options);
+
+  // Create a temporary git workspace (required for worktree operations).
+  const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-cassette-'));
+  execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
+  execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
+  execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
+  execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
+
+  const db = createTestDatabase();
+  const agentRepository = new DrizzleAgentRepository(db);
+  const projectRepository = new DrizzleProjectRepository(db);
+  const accountRepository = new DrizzleAccountRepository(db);
+  const initiativeRepository = new DrizzleInitiativeRepository(db);
+  const eventBus = new CapturingEventBus();
+
+  const store = new CassetteStore(cassetteDir);
+  const cassetteProcessManager = new CassetteProcessManager(
+    workspaceRoot,
+    projectRepository,
+    store,
+    cassetteMode,
+  );
+
+  const agentManager = new MultiProviderAgentManager(
+    agentRepository,
+    workspaceRoot,
+    projectRepository,
+    accountRepository,
+    eventBus,
+    undefined, // credentialManager
+    undefined, // changeSetRepository
+    undefined, // phaseRepository
+    undefined, // taskRepository
+    undefined, // pageRepository
+    undefined, // logChunkRepository
+    false,     // debug
+    cassetteProcessManager,
+  );
+
+  const harness: RealProviderHarness = {
+    db,
+    eventBus,
+    agentManager,
+    workspaceRoot,
+    agentRepository,
+    projectRepository,
+    accountRepository,
+    initiativeRepository,
+
+    // Cassette replays are fast — use a short poll interval and default timeout.
+    async waitForAgentCompletion(agentId, timeoutMs = 30_000) {
+      const deadline = Date.now() + timeoutMs;
+      while (Date.now() < deadline) {
+        const agent = await agentRepository.findById(agentId);
+        if (!agent) return null;
+        if (agent.status === 'idle' || agent.status === 'stopped') {
+          return agentManager.getResult(agentId);
+        }
+        if (agent.status === 'crashed') {
+          return agentManager.getResult(agentId);
+        }
+        if (agent.status === 'waiting_for_input') return null;
+        await sleep(100);
+      }
+      throw new Error(`[cassette] Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
+    },
+
+    async waitForAgentWaiting(agentId, timeoutMs = 30_000) {
+      const deadline = Date.now() + timeoutMs;
+      while (Date.now() < deadline) {
+        const agent = await agentRepository.findById(agentId);
+        if (!agent) return null;
+        if (agent.status === 'waiting_for_input') return agentManager.getPendingQuestions(agentId);
+        if (['idle', 'stopped', 'crashed'].includes(agent.status)) return null;
+        await sleep(100);
+      }
+      throw new Error(`[cassette] Timeout waiting for agent ${agentId} to enter waiting state after ${timeoutMs}ms`);
+    },
+
+    async waitForAgentStatus(agentId, status, timeoutMs = 30_000) {
+      const deadline = Date.now() + timeoutMs;
+      while (Date.now() < deadline) {
+        const agent = await agentRepository.findById(agentId);
+        if (!agent) throw new Error(`Agent ${agentId} not found`);
+        if (agent.status === status) return;
+        await sleep(100);
+      }
+      throw new Error(`[cassette] Timeout waiting for agent ${agentId} to reach status '${status}' after ${timeoutMs}ms`);
+    },
+
+    getEventsByType(type) {
+      return eventBus.getEventsByType(type);
+    },
+
+    clearEvents() {
+      eventBus.clearEvents();
+    },
+
+    async killAllAgents() {
+      const agents = await agentRepository.findAll();
+      for (const agent of agents) {
+        if (agent.status === 'running') {
+          try { await agentManager.stop(agent.id); } catch { /* ignore */ }
+        }
+      }
+    },
+
+    async cleanup() {
+      await harness.killAllAgents();
+      try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
+    },
+  };
+
+  return harness;
+}
--- a/apps/server/test/cassette/index.ts
+++ b/apps/server/test/cassette/index.ts
@@ -0,0 +1,6 @@
+export { CassetteStore } from './store.js';
+export { CassetteProcessManager, type CassetteMode } from './process-manager.js';
+export { createCassetteHarness, type CassetteHarnessOptions } from './harness.js';
+export { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
+export { hashWorktreeFiles, buildCassetteKey } from './key.js';
+export type { CassetteKey, CassetteRecording, CassetteEntry } from './types.js';
--- a/apps/server/test/cassette/key.ts
+++ b/apps/server/test/cassette/key.ts
@@ -0,0 +1,76 @@
+/**
+ * Cassette Key Generation
+ *
+ * Builds stable SHA256-based identifiers for cassettes.
+ * Two spans are separate concerns:
+ * - hashWorktreeFiles: fingerprints the worktree state at spawn time (for execute mode drift)
+ * - buildCassetteKey: hashes all key components into a 32-char hex filename
+ */
+
+import { createHash } from 'node:crypto';
+import { readdirSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import type { CassetteKey } from './types.js';
+
+/**
+ * Recursively hash all non-hidden files in a directory.
+ *
+ * Hidden entries (starting with '.') are skipped — this excludes .git, .cw, etc.
+ * Entries are processed in sorted order for determinism across platforms.
+ *
+ * Returns the first 16 hex chars of the SHA256, or 'empty' if the directory
+ * is absent or contains no readable files.
+ */
+export function hashWorktreeFiles(dir: string): string {
+  const hash = createHash('sha256');
+  let hasContent = false;
+
+  function walkDir(currentDir: string): void {
+    let entries;
+    try {
+      entries = readdirSync(currentDir, { withFileTypes: true });
+    } catch {
+      return;
+    }
+
+    for (const entry of [...entries].sort((a, b) => a.name.localeCompare(b.name))) {
+      if (entry.name.startsWith('.')) continue;
+
+      const fullPath = join(currentDir, entry.name);
+      const relPath = fullPath.slice(dir.length);
+
+      if (entry.isDirectory()) {
+        hash.update(`d:${relPath}\n`);
+        walkDir(fullPath);
+      } else if (entry.isFile()) {
+        try {
+          const content = readFileSync(fullPath);
+          hash.update(`f:${relPath}:${content.length}\n`);
+          hash.update(content);
+          hasContent = true;
+        } catch {
+          // skip unreadable files
+        }
+      }
+    }
+  }
+
+  walkDir(dir);
+  return hasContent ? hash.digest('hex').slice(0, 16) : 'empty';
+}
+
+/**
+ * Compute a stable 32-char hex identifier for a cassette key.
+ *
+ * modelArgs are sorted before hashing so insertion order differences
+ * between providers don't produce different cassettes.
+ */
+export function buildCassetteKey(key: CassetteKey): string {
+  const canonical = JSON.stringify({
+    normalizedPrompt: key.normalizedPrompt,
+    providerName: key.providerName,
+    modelArgs: [...key.modelArgs].sort(),
+    worktreeHash: key.worktreeHash,
+  });
+  return createHash('sha256').update(canonical).digest('hex').slice(0, 32);
+}
--- a/apps/server/test/cassette/normalizer.ts
+++ b/apps/server/test/cassette/normalizer.ts
@@ -0,0 +1,76 @@
+/**
+ * Cassette Normalizer
+ *
+ * Strips dynamic content from prompts and CLI args before hashing into a cassette key.
+ * Dynamic content (UUIDs, temp paths, timestamps, session numbers) varies between
+ * test runs but doesn't affect how the agent responds — so we replace them with
+ * stable placeholders to get a stable cache key.
+ */
+
+const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
+const NANOID_RE = /(?<![A-Za-z0-9])[A-Za-z0-9_-]{21}(?![A-Za-z0-9_-])/g;
+const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g;
+const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g;
+const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
+// Agent worktree paths: agent-workdirs/<random-agent-name> (with or without trailing slash)
+// The agent name (e.g. "available-sheep") changes every run but is not a UUID or nanoid.
+// Stop at the first slash so the project name after it is preserved.
+const AGENT_WORKDIR_RE = /agent-workdirs\/[^\s/\\]+/g;
+
+/**
+ * Normalize a prompt for stable cassette key generation.
+ *
+ * Replacements applied in order (most-specific first to avoid partial matches):
+ * 1. Absolute workspace root path → __WORKSPACE__
+ * 2. UUIDs → __UUID__
+ * 2.5. Nanoid IDs (21-char alphanumeric) → __ID__
+ * 3. ISO 8601 timestamps → __TIMESTAMP__
+ * 4. Unix epoch milliseconds → __EPOCH__
+ * 5. Session numbers → session__N__
+ * 6. Agent worktree path segment → agent-workdirs/__AGENT__/
+ */
+export function normalizePrompt(prompt: string, workspaceRoot: string): string {
+  let normalized = prompt;
+
+  if (workspaceRoot) {
+    normalized = normalized.replaceAll(workspaceRoot, '__WORKSPACE__');
+  }
+
+  normalized = normalized.replace(UUID_RE, '__UUID__');
+  normalized = normalized.replace(NANOID_RE, '__ID__');
+  normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__');
+  normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__');
+  normalized = normalized.replace(SESSION_NUM_RE, 'session__N__');
+  normalized = normalized.replace(AGENT_WORKDIR_RE, 'agent-workdirs/__AGENT__');
+
+  return normalized;
+}
+
+/**
+ * Strip the prompt value from CLI args to produce stable modelArgs for the cassette key.
+ *
+ * Handles all provider prompt flag styles:
+ * - Native: `-p <prompt>` (Claude)
+ * - Flag: `--prompt <prompt>`, `-p <prompt>` (Gemini, Cursor, Auggie, Amp, Opencode)
+ * - Also removes the bare prompt value if it appears as a positional arg.
+ */
+export function stripPromptFromArgs(args: string[], prompt: string): string[] {
+  if (!prompt) return [...args];
+
+  const result: string[] = [];
+  let i = 0;
+  while (i < args.length) {
+    const arg = args[i];
+    const PROMPT_FLAGS = ['-p', '--prompt', '--message'];
+
+    if (PROMPT_FLAGS.includes(arg) && args[i + 1] === prompt) {
+      i += 2; // skip flag + value
+    } else if (arg === prompt) {
+      i += 1; // skip bare positional prompt
+    } else {
+      result.push(arg);
+      i++;
+    }
+  }
+  return result;
+}
--- a/apps/server/test/cassette/process-manager.ts
+++ b/apps/server/test/cassette/process-manager.ts
@@ -0,0 +1,258 @@
+/**
+ * CassetteProcessManager
+ *
+ * Extends ProcessManager to intercept subprocess spawning and either:
+ * - Replay a recorded cassette (no API cost, deterministic)
+ * - Record a new cassette by running the real agent and capturing its output
+ *
+ * Modes:
+ * - 'replay': cassette MUST exist; throws if missing (safe for CI)
+ * - 'record': always runs real agent; saves/overwrites cassette on completion
+ * - 'auto':   replays if cassette exists; falls through to record if missing
+ *
+ * The cassette key is built from:
+ * - Normalized prompt (dynamic content replaced with placeholders)
+ * - Provider name and stable CLI args (prompt value stripped)
+ * - Worktree file hash (detects content drift for execute-mode agents)
+ */
+
+import { readFileSync, existsSync, mkdirSync, writeFileSync, readdirSync } from 'node:fs';
+import { join, dirname, relative } from 'node:path';
+import { ProcessManager } from '../../agent/process-manager.js';
+import type { StreamEvent } from '../../agent/providers/parsers/index.js';
+import type { FileTailer } from '../../agent/file-tailer.js';
+import type { ProjectRepository } from '../../db/repositories/project-repository.js';
+import type { CassetteKey, CassetteEntry } from './types.js';
+import type { CassetteStore } from './store.js';
+import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
+import { hashWorktreeFiles } from './key.js';
+
+export type CassetteMode = 'replay' | 'record' | 'auto';
+
+interface PendingRecording {
+  key: CassetteKey;
+  outputFilePath: string;
+  agentCwd: string;
+}
+
+interface PendingReplay {
+  cassette: CassetteEntry;
+  agentCwd: string;
+}
+
+export class CassetteProcessManager extends ProcessManager {
+  private readonly _workspaceRoot: string;
+  private readonly replayWorkerPath: string;
+  private readonly pendingRecordings = new Map<number, PendingRecording>();
+  private readonly pendingReplays = new Map<number, PendingReplay>();
+
+  constructor(
+    workspaceRoot: string,
+    projectRepository: ProjectRepository,
+    private readonly store: CassetteStore,
+    private readonly cassetteMode: CassetteMode = 'auto',
+  ) {
+    super(workspaceRoot, projectRepository);
+    this._workspaceRoot = workspaceRoot;
+    this.replayWorkerPath = new URL('./replay-worker.mjs', import.meta.url).pathname;
+  }
+
+  override spawnDetached(
+    agentId: string,
+    agentName: string,
+    command: string,
+    args: string[],
+    cwd: string,
+    env: Record<string, string>,
+    providerName: string,
+    prompt?: string,
+    onEvent?: (event: StreamEvent) => void,
+    onRawContent?: (content: string) => void,
+  ): { pid: number; outputFilePath: string; tailer: FileTailer } {
+    const key: CassetteKey = {
+      normalizedPrompt: normalizePrompt(prompt ?? '', this._workspaceRoot),
+      providerName,
+      modelArgs: stripPromptFromArgs(args, prompt ?? ''),
+      worktreeHash: hashWorktreeFiles(cwd),
+    };
+
+    // In record mode we always skip the store lookup and go straight to real spawn.
+    const existing = this.cassetteMode !== 'record' ? this.store.find(key) : null;
+
+    if (existing) {
+      const result = this.replayFromCassette(agentId, agentName, cwd, env, providerName, existing, onEvent, onRawContent);
+      this.pendingReplays.set(result.pid, { cassette: existing, agentCwd: cwd });
+      return result;
+    }
+
+    if (this.cassetteMode === 'replay') {
+      throw new Error(
+        `[cassette] No cassette found for agent '${agentName}' (provider=${providerName}, mode=replay).\n` +
+        `Run with CW_CASSETTE_RECORD=1 to record it.`,
+      );
+    }
+
+    // auto or record: run the real agent and record the cassette on completion.
+    console.log(`[cassette] recording new cassette for agent '${agentName}' (${providerName})`);
+    const result = super.spawnDetached(agentId, agentName, command, args, cwd, env, providerName, prompt, onEvent, onRawContent);
+    this.pendingRecordings.set(result.pid, { key, outputFilePath: result.outputFilePath, agentCwd: cwd });
+    return result;
+  }
+
+  override pollForCompletion(
+    agentId: string,
+    pid: number,
+    onComplete: () => Promise<void>,
+    getTailer: () => FileTailer | undefined,
+  ): { cancel: () => void } {
+    const recording = this.pendingRecordings.get(pid);
+    if (recording) {
+      // Record mode — wrap onComplete to save the cassette before handing off.
+      return super.pollForCompletion(agentId, pid, async () => {
+        await this.saveCassette(recording);
+        this.pendingRecordings.delete(pid);
+        await onComplete();
+      }, getTailer);
+    }
+
+    const replay = this.pendingReplays.get(pid);
+    if (replay) {
+      // Replay mode — restore .cw/output/ files before onComplete so that
+      // readPhaseFiles / readTaskFiles / readProposalFiles find their data.
+      return super.pollForCompletion(agentId, pid, async () => {
+        this.restoreOutputFiles(replay.cassette, replay.agentCwd);
+        this.pendingReplays.delete(pid);
+        await onComplete();
+      }, getTailer);
+    }
+
+    return super.pollForCompletion(agentId, pid, onComplete, getTailer);
+  }
+
+  private async saveCassette(pending: PendingRecording): Promise<void> {
+    // Read all JSONL lines from the output file the agent wrote to.
+    let jsonlLines: string[] = [];
+    try {
+      const content = readFileSync(pending.outputFilePath, 'utf-8');
+      jsonlLines = content.split('\n').filter(l => l.trim() !== '');
+    } catch {
+      // No output produced — record an empty cassette.
+    }
+
+    // Read signal.json from the agent working directory.
+    let signalJson: Record<string, unknown> | null = null;
+    const outputDir = join(pending.agentCwd, '.cw', 'output');
+    const signalPath = join(outputDir, 'signal.json');
+    if (existsSync(signalPath)) {
+      try {
+        signalJson = JSON.parse(readFileSync(signalPath, 'utf-8')) as Record<string, unknown>;
+      } catch {
+        // Corrupt signal file — record null.
+      }
+    }
+
+    // Capture all other files in .cw/output/ (phase files, task files, etc.)
+    const outputFiles: Record<string, string> = {};
+    if (existsSync(outputDir)) {
+      this.walkOutputDir(outputDir, outputDir, (relPath, content) => {
+        if (relPath !== 'signal.json') {
+          outputFiles[relPath] = content;
+        }
+      });
+    }
+
+    const entry: CassetteEntry = {
+      version: 1,
+      key: pending.key,
+      recording: {
+        jsonlLines,
+        signalJson,
+        exitCode: 0,
+        recordedAt: new Date().toISOString(),
+        outputFiles,
+      },
+    };
+
+    this.store.save(pending.key, entry);
+  }
+
+  /**
+   * Restore captured .cw/output/ files to the new agent working directory.
+   * Called before onComplete so that downstream readers (readPhaseFiles, etc.)
+   * find the expected files in place.
+   */
+  private restoreOutputFiles(cassette: CassetteEntry, agentCwd: string): void {
+    const { outputFiles, signalJson } = cassette.recording;
+    const outputDir = join(agentCwd, '.cw', 'output');
+
+    // Restore captured output files
+    if (outputFiles) {
+      for (const [relPath, content] of Object.entries(outputFiles)) {
+        const fullPath = join(outputDir, relPath);
+        mkdirSync(dirname(fullPath), { recursive: true });
+        writeFileSync(fullPath, content, 'utf-8');
+      }
+    }
+
+    // Write signal.json (the manager reads this to detect completion status)
+    if (signalJson) {
+      mkdirSync(outputDir, { recursive: true });
+      writeFileSync(join(outputDir, 'signal.json'), JSON.stringify(signalJson), 'utf-8');
+    }
+  }
+
+  private walkOutputDir(
+    baseDir: string,
+    currentDir: string,
+    callback: (relPath: string, content: string) => void,
+  ): void {
+    let entries;
+    try {
+      entries = readdirSync(currentDir, { withFileTypes: true });
+    } catch {
+      return;
+    }
+
+    for (const entry of entries) {
+      const fullPath = join(currentDir, entry.name);
+      const relPath = relative(baseDir, fullPath);
+
+      if (entry.isDirectory()) {
+        this.walkOutputDir(baseDir, fullPath, callback);
+      } else if (entry.isFile()) {
+        try {
+          const content = readFileSync(fullPath, 'utf-8');
+          callback(relPath, content);
+        } catch {
+          // Skip unreadable files
+        }
+      }
+    }
+  }
+
+  private replayFromCassette(
+    agentId: string,
+    agentName: string,
+    cwd: string,
+    env: Record<string, string>,
+    providerName: string,
+    cassette: CassetteEntry,
+    onEvent?: (event: StreamEvent) => void,
+    onRawContent?: (content: string) => void,
+  ): { pid: number; outputFilePath: string; tailer: FileTailer } {
+    console.log(`[cassette] replaying cassette for agent '${agentName}' (${cassette.recording.jsonlLines.length} lines)`);
+
+    return super.spawnDetached(
+      agentId,
+      agentName,
+      process.execPath,           // use the running node binary
+      [this.replayWorkerPath],    // replay-worker.mjs
+      cwd,
+      { ...env, CW_CASSETTE_DATA: JSON.stringify(cassette.recording) },
+      providerName,               // use original provider's parser for the tailer
+      undefined,                  // no prompt — worker handles output directly
+      onEvent,
+      onRawContent,
+    );
+  }
+}
--- a/apps/server/test/cassette/replay-worker.mjs
+++ b/apps/server/test/cassette/replay-worker.mjs
@@ -0,0 +1,48 @@
+#!/usr/bin/env node
+/**
+ * Cassette Replay Worker
+ *
+ * Spawned as a detached subprocess by CassetteProcessManager instead of the real
+ * agent CLI. Reads the cassette recording from CW_CASSETTE_DATA env var, replays
+ * the JSONL output to stdout (which spawnDetached redirects to the output file),
+ * writes signal.json relative to the process cwd, and exits.
+ *
+ * This is a plain .mjs file (no TypeScript) so it can be spawned with bare `node`
+ * without any build step or tsx dependency.
+ */
+
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+
+const data = process.env.CW_CASSETTE_DATA;
+if (!data) {
+  process.stderr.write('[replay-worker] CW_CASSETTE_DATA env var not set\n');
+  process.exit(1);
+}
+
+let recording;
+try {
+  recording = JSON.parse(data);
+} catch (err) {
+  process.stderr.write(`[replay-worker] failed to parse CW_CASSETTE_DATA: ${err.message}\n`);
+  process.exit(1);
+}
+
+const { jsonlLines = [], signalJson = null, exitCode = 0 } = recording;
+
+// Write JSONL lines to stdout.
+// spawnDetached redirects stdout to the output file via open()+fd redirection,
+// so writing to process.stdout here is equivalent to writing to the output file.
+for (const line of jsonlLines) {
+  process.stdout.write(line + '\n');
+}
+
+// Write signal.json to the expected location relative to cwd.
+// The agent's cwd is set by spawnDetached to the agent working directory.
+if (signalJson) {
+  const signalDir = join(process.cwd(), '.cw', 'output');
+  mkdirSync(signalDir, { recursive: true });
+  writeFileSync(join(signalDir, 'signal.json'), JSON.stringify(signalJson, null, 2), 'utf-8');
+}
+
+process.exit(exitCode);
--- a/apps/server/test/cassette/store.ts
+++ b/apps/server/test/cassette/store.ts
@@ -0,0 +1,50 @@
+/**
+ * CassetteStore
+ *
+ * Reads and writes cassette files from a directory on disk.
+ * Each cassette is stored as a JSON file named after the 32-char key hash.
+ * Cassette files are intended to be committed to git — they are the
+ * "recorded interactions" that allow tests to run without real API calls.
+ */
+
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import type { CassetteKey, CassetteEntry } from './types.js';
+import { buildCassetteKey } from './key.js';
+
+export class CassetteStore {
+  constructor(private readonly cassetteDir: string) {}
+
+  private pathFor(keyHash: string): string {
+    return join(this.cassetteDir, `${keyHash}.json`);
+  }
+
+  /**
+   * Look up a cassette by its key.
+   * Returns null if not found or if the file is corrupt.
+   */
+  find(key: CassetteKey): CassetteEntry | null {
+    const hash = buildCassetteKey(key);
+    const path = this.pathFor(hash);
+
+    if (!existsSync(path)) return null;
+
+    try {
+      return JSON.parse(readFileSync(path, 'utf-8')) as CassetteEntry;
+    } catch {
+      return null;
+    }
+  }
+
+  /**
+   * Save a cassette to disk. Creates the cassette directory if needed.
+   * Prints the cassette filename so it's visible during recording runs.
+   */
+  save(key: CassetteKey, entry: CassetteEntry): void {
+    mkdirSync(this.cassetteDir, { recursive: true });
+    const hash = buildCassetteKey(key);
+    const path = this.pathFor(hash);
+    writeFileSync(path, JSON.stringify(entry, null, 2), 'utf-8');
+    console.log(`[cassette] recorded → ${hash}.json  (${entry.recording.jsonlLines.length} lines)`);
+  }
+}
--- a/apps/server/test/cassette/types.ts
+++ b/apps/server/test/cassette/types.ts
@@ -0,0 +1,42 @@
+/**
+ * Cassette Types
+ *
+ * VCR-style cassette format for recording and replaying agent subprocess I/O.
+ * A cassette captures everything an agent process writes so tests can replay
+ * it deterministically without hitting real AI APIs.
+ */
+
+export interface CassetteKey {
+  /** Prompt with dynamic content (UUIDs, paths, timestamps) replaced with placeholders. */
+  normalizedPrompt: string;
+  /** Provider name, e.g. 'claude', 'codex'. */
+  providerName: string;
+  /** Stable CLI args with the prompt value stripped. */
+  modelArgs: string[];
+  /** SHA256 prefix of all non-hidden files in the agent worktree at spawn time. */
+  worktreeHash: string;
+}
+
+export interface CassetteRecording {
+  /** All JSONL lines the agent wrote to stdout (captured from output file). */
+  jsonlLines: string[];
+  /** Content of signal.json written by the agent, or null if missing. */
+  signalJson: Record<string, unknown> | null;
+  /** Process exit code (0 = success). */
+  exitCode: number;
+  /** ISO timestamp when this cassette was recorded. */
+  recordedAt: string;
+  /**
+   * All files the agent wrote to .cw/output/ (relative path → UTF-8 content),
+   * excluding signal.json (which is captured separately in signalJson).
+   * Restored during replay before onComplete fires so downstream readers
+   * (e.g. readPhaseFiles, readTaskFiles) see the expected directory contents.
+   */
+  outputFiles?: Record<string, string>;
+}
+
+export interface CassetteEntry {
+  version: 1;
+  key: CassetteKey;
+  recording: CassetteRecording;
+}
--- a/apps/server/test/cassettes/.gitkeep
+++ b/apps/server/test/cassettes/.gitkeep
--- a/apps/server/test/cassettes/1cd13ac7ceb5fffb6a8fd52fe5825dd5.json
+++ b/apps/server/test/cassettes/1cd13ac7ceb5fffb6a8fd52fe5825dd5.json
--- a/apps/server/test/cassettes/3ebb6b15ba29592585517881a8deabcd.json
+++ b/apps/server/test/cassettes/3ebb6b15ba29592585517881a8deabcd.json
--- a/apps/server/test/cassettes/80831e59bdc5ad35515a4c68d5d4ed22.json
+++ b/apps/server/test/cassettes/80831e59bdc5ad35515a4c68d5d4ed22.json
--- a/apps/server/test/cassettes/ff2b1ae8f39a02ab1009de52b5fbd8de.json
+++ b/apps/server/test/cassettes/ff2b1ae8f39a02ab1009de52b5fbd8de.json
--- a/apps/server/test/e2e/architect-workflow.test.ts
+++ b/apps/server/test/e2e/architect-workflow.test.ts
@@ -0,0 +1,285 @@
+/**
+ * E2E Tests for Architect Workflow
+ *
+ * Tests the complete architect workflow from discussion through phase creation:
+ * - Discuss mode: Gather context, answer questions, capture decisions
+ * - Plan mode: Break initiative into phases
+ * - Full workflow: Discuss -> Plan -> Phase persistence
+ *
+ * Uses TestHarness from src/test/ for full system wiring.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { createTestHarness, type TestHarness } from '../index.js';
+import type { AgentStoppedEvent } from '../../events/types.js';
+
+describe('Architect Workflow E2E', () => {
+  let harness: TestHarness;
+
+  beforeEach(() => {
+    harness = createTestHarness();
+  });
+
+  afterEach(() => {
+    harness.cleanup();
+    vi.useRealTimers();
+  });
+
+  describe('discuss mode', () => {
+    it('should spawn architect in discuss mode and complete with decisions', async () => {
+      vi.useFakeTimers();
+
+      // Create initiative
+      const initiative = await harness.createInitiative('Auth System');
+
+      // Set up discuss completion scenario
+      harness.setArchitectDiscussComplete('auth-discuss', [
+        { topic: 'Auth Method', decision: 'JWT', reason: 'Stateless, scalable' },
+        { topic: 'Token Storage', decision: 'httpOnly cookie', reason: 'XSS protection' },
+      ], 'Auth approach decided');
+
+      // Spawn architect in discuss mode
+      const agent = await harness.caller.spawnArchitectDiscuss({
+        name: 'auth-discuss',
+        initiativeId: initiative.id,
+      });
+
+      expect(agent.mode).toBe('discuss');
+
+      // Wait for completion
+      await harness.advanceTimers();
+
+      // Verify agent stopped with context_complete
+      const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
+      expect(events).toHaveLength(1);
+      expect(events[0].payload.reason).toBe('context_complete');
+    });
+
+    it('should pause on questions and resume with answers', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Auth System');
+
+      // First, agent asks questions
+      harness.setArchitectDiscussQuestions('auth-discuss', [
+        { id: 'q1', question: 'JWT or Session?', options: [{ label: 'JWT' }, { label: 'Session' }] },
+        { id: 'q2', question: 'OAuth providers?' },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectDiscuss({
+        name: 'auth-discuss',
+        initiativeId: initiative.id,
+      });
+
+      await harness.advanceTimers();
+
+      // Agent should be waiting
+      const waitingAgent = await harness.caller.getAgent({ name: 'auth-discuss' });
+      expect(waitingAgent?.status).toBe('waiting_for_input');
+
+      // Get pending questions
+      const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
+      expect(pending?.questions).toHaveLength(2);
+
+      // Now set up completion scenario for after resume
+      harness.setArchitectDiscussComplete('auth-discuss', [
+        { topic: 'Auth', decision: 'JWT', reason: 'User chose' },
+      ], 'Complete');
+
+      // Resume with answers
+      await harness.caller.resumeAgent({
+        name: 'auth-discuss',
+        answers: { q1: 'JWT', q2: 'Google, GitHub' },
+      });
+
+      await harness.advanceTimers();
+
+      // Should complete
+      const finalAgent = await harness.caller.getAgent({ name: 'auth-discuss' });
+      expect(finalAgent?.status).toBe('idle');
+    });
+  });
+
+  describe('plan mode', () => {
+    it('should spawn architect in plan mode and create phases', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Auth System');
+
+      // Set up plan completion
+      harness.setArchitectPlanComplete('auth-plan', [
+        { number: 1, name: 'Database Setup', description: 'User table and auth schema', dependencies: [] },
+        { number: 2, name: 'JWT Implementation', description: 'Token generation and validation', dependencies: [1] },
+        { number: 3, name: 'Protected Routes', description: 'Middleware and route guards', dependencies: [2] },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectPlan({
+        name: 'auth-plan',
+        initiativeId: initiative.id,
+      });
+
+      expect(agent.mode).toBe('plan');
+
+      await harness.advanceTimers();
+
+      // Verify stopped with plan_complete
+      const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
+      expect(events).toHaveLength(1);
+      expect(events[0].payload.reason).toBe('plan_complete');
+    });
+
+    it('should persist phases from plan output', async () => {
+      const initiative = await harness.createInitiative('Auth System');
+
+      const phasesData = [
+        { name: 'Foundation' },
+        { name: 'Features' },
+      ];
+
+      // Persist phases (simulating what would happen after plan)
+      const created = await harness.createPhasesFromPlan(initiative.id, phasesData);
+
+      expect(created).toHaveLength(2);
+
+      // Verify retrieval
+      const phases = await harness.getPhases(initiative.id);
+      expect(phases).toHaveLength(2);
+      expect(phases[0].name).toBe('Foundation');
+      expect(phases[1].name).toBe('Features');
+    });
+  });
+
+  describe('plan conflict detection', () => {
+    it('should reject if a plan agent is already running', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Auth System');
+
+      // Set up a long-running plan agent (never completes during this test)
+      harness.setArchitectPlanComplete('first-plan', [
+        { number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
+      ]);
+      // Use a delay so it stays running
+      harness.setAgentScenario('first-plan', { status: 'done', delay: 999999 });
+
+      await harness.caller.spawnArchitectPlan({
+        name: 'first-plan',
+        initiativeId: initiative.id,
+      });
+
+      // Agent should be running
+      const agents = await harness.caller.listAgents();
+      expect(agents.find(a => a.name === 'first-plan')?.status).toBe('running');
+
+      // Second plan should be rejected
+      await expect(
+        harness.caller.spawnArchitectPlan({
+          name: 'second-plan',
+          initiativeId: initiative.id,
+        }),
+      ).rejects.toThrow(/already running/);
+    });
+
+    it('should auto-dismiss stale plan agents before checking', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Auth System');
+
+      // Set up a plan agent that crashes immediately
+      harness.setAgentScenario('stale-plan', { status: 'error', error: 'crashed' });
+
+      await harness.caller.spawnArchitectPlan({
+        name: 'stale-plan',
+        initiativeId: initiative.id,
+      });
+      await harness.advanceTimers();
+
+      // Should be crashed
+      const agents = await harness.caller.listAgents();
+      expect(agents.find(a => a.name === 'stale-plan')?.status).toBe('crashed');
+
+      // New plan should succeed (stale one gets auto-dismissed)
+      harness.setArchitectPlanComplete('new-plan', [
+        { number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectPlan({
+        name: 'new-plan',
+        initiativeId: initiative.id,
+      });
+      expect(agent.mode).toBe('plan');
+    });
+
+    it('should allow plan for different initiatives', async () => {
+      vi.useFakeTimers();
+
+      const init1 = await harness.createInitiative('Initiative 1');
+      const init2 = await harness.createInitiative('Initiative 2');
+
+      // Long-running agent on initiative 1
+      harness.setAgentScenario('plan-1', { status: 'done', delay: 999999 });
+      await harness.caller.spawnArchitectPlan({
+        name: 'plan-1',
+        initiativeId: init1.id,
+      });
+
+      // Plan on initiative 2 should succeed
+      harness.setArchitectPlanComplete('plan-2', [
+        { number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectPlan({
+        name: 'plan-2',
+        initiativeId: init2.id,
+      });
+      expect(agent.mode).toBe('plan');
+    });
+  });
+
+  describe('full workflow', () => {
+    it('should complete discuss -> plan -> phases workflow', async () => {
+      vi.useFakeTimers();
+
+      // 1. Create initiative
+      const initiative = await harness.createInitiative('Full Workflow Test');
+
+      // 2. Discuss phase
+      harness.setArchitectDiscussComplete('discuss-agent', [
+        { topic: 'Scope', decision: 'MVP only', reason: 'Time constraint' },
+      ], 'Scope defined');
+
+      await harness.caller.spawnArchitectDiscuss({
+        name: 'discuss-agent',
+        initiativeId: initiative.id,
+      });
+      await harness.advanceTimers();
+
+      // 3. Plan phase
+      harness.setArchitectPlanComplete('plan-agent', [
+        { number: 1, name: 'Core', description: 'Core functionality', dependencies: [] },
+        { number: 2, name: 'Polish', description: 'UI and UX', dependencies: [1] },
+      ]);
+
+      await harness.caller.spawnArchitectPlan({
+        name: 'plan-agent',
+        initiativeId: initiative.id,
+        contextSummary: 'MVP scope defined',
+      });
+      await harness.advanceTimers();
+
+      // 4. Persist phases
+      await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Core' },
+        { name: 'Polish' },
+      ]);
+
+      // 5. Verify final state
+      const phases = await harness.getPhases(initiative.id);
+      expect(phases).toHaveLength(2);
+
+      // Both agents should be idle
+      const agents = await harness.caller.listAgents();
+      expect(agents.filter(a => a.status === 'idle')).toHaveLength(2);
+    });
+  });
+});
--- a/apps/server/test/e2e/decompose-workflow.test.ts
+++ b/apps/server/test/e2e/decompose-workflow.test.ts
@@ -0,0 +1,385 @@
+/**
+ * E2E Tests for Detail Workflow
+ *
+ * Tests the complete detail workflow from phase through task creation:
+ * - Detail mode: Break phase into executable tasks
+ * - Q&A flow: Handle clarifying questions during detailing
+ * - Task persistence: Save child tasks from detail output
+ *
+ * Uses TestHarness from src/test/ for full system wiring.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { createTestHarness, type TestHarness } from '../index.js';
+import type { AgentStoppedEvent, AgentWaitingEvent } from '../../events/types.js';
+
+describe('Detail Workflow E2E', () => {
+  let harness: TestHarness;
+
+  beforeEach(() => {
+    harness = createTestHarness();
+  });
+
+  afterEach(() => {
+    harness.cleanup();
+    vi.useRealTimers();
+  });
+
+  describe('spawn detail agent', () => {
+    it('should spawn agent in detail mode and complete with tasks', async () => {
+      vi.useFakeTimers();
+
+      // Setup: Create initiative -> phase -> plan
+      const initiative = await harness.createInitiative('Test Project');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+      const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan', 'Implement authentication');
+
+      // Set detail scenario
+      harness.setArchitectDetailComplete('detailer', [
+        { number: 1, name: 'Create schema', content: 'User table', type: 'auto', dependencies: [] },
+        { number: 2, name: 'Create endpoint', content: 'Login API', type: 'auto', dependencies: [1] },
+      ]);
+
+      // Spawn detail agent
+      const agent = await harness.caller.spawnArchitectDetail({
+        name: 'detailer',
+        phaseId: phases[0].id,
+      });
+
+      expect(agent.mode).toBe('detail');
+
+      // Advance timers for async completion
+      await harness.advanceTimers();
+
+      // Verify agent completed
+      const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
+      expect(events).toHaveLength(1);
+      expect(events[0].payload.name).toBe('detailer');
+      expect(events[0].payload.reason).toBe('detail_complete');
+    });
+
+    it('should pause on questions and resume', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Test Project');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+      const detailTask = await harness.createDetailTask(phases[0].id, 'Complex Plan');
+
+      // Set questions scenario
+      harness.setArchitectDetailQuestions('detailer', [
+        { id: 'q1', question: 'How granular should tasks be?' },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectDetail({
+        name: 'detailer',
+        phaseId: phases[0].id,
+      });
+
+      await harness.advanceTimers();
+
+      // Verify agent is waiting for input
+      const waitingAgent = await harness.caller.getAgent({ name: 'detailer' });
+      expect(waitingAgent?.status).toBe('waiting_for_input');
+
+      // Verify paused on questions (emits agent:waiting, not agent:stopped)
+      const waitingEvents = harness.getEmittedEvents('agent:waiting') as AgentWaitingEvent[];
+      expect(waitingEvents).toHaveLength(1);
+      expect(waitingEvents[0].payload.questions).toHaveLength(1);
+
+      // Get pending questions
+      const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
+      expect(pending?.questions).toHaveLength(1);
+      expect(pending?.questions[0].question).toBe('How granular should tasks be?');
+
+      // Set completion scenario for resume
+      harness.setArchitectDetailComplete('detailer', [
+        { number: 1, name: 'Task 1', content: 'Single task', type: 'auto', dependencies: [] },
+      ]);
+
+      // Resume with answer
+      await harness.caller.resumeAgent({
+        name: 'detailer',
+        answers: { q1: 'Very granular' },
+      });
+      await harness.advanceTimers();
+
+      // Verify completed after resume
+      const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
+      expect(finalAgent?.status).toBe('idle');
+    });
+
+    it('should handle multiple questions', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Multi-Q Project');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+      const detailTask = await harness.createDetailTask(phases[0].id, 'Complex Plan');
+
+      // Set multiple questions scenario
+      harness.setArchitectDetailQuestions('detailer', [
+        { id: 'q1', question: 'What task granularity?', options: [{ label: 'Fine' }, { label: 'Coarse' }] },
+        { id: 'q2', question: 'Include checkpoints?' },
+        { id: 'q3', question: 'Any blocking dependencies?' },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectDetail({
+        name: 'detailer',
+        phaseId: phases[0].id,
+      });
+
+      await harness.advanceTimers();
+
+      // Verify all questions received
+      const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
+      expect(pending?.questions).toHaveLength(3);
+
+      // Set completion scenario for resume
+      harness.setArchitectDetailComplete('detailer', [
+        { number: 1, name: 'Task 1', content: 'First task', type: 'auto', dependencies: [] },
+        { number: 2, name: 'Task 2', content: 'Second task', type: 'auto', dependencies: [1] },
+        { number: 3, name: 'Verify', content: 'Verify all', type: 'checkpoint:human-verify', dependencies: [2] },
+      ]);
+
+      // Resume with all answers
+      await harness.caller.resumeAgent({
+        name: 'detailer',
+        answers: {
+          q1: 'Fine',
+          q2: 'Yes, add human verification',
+          q3: 'Tasks 1 and 2 are sequential',
+        },
+      });
+      await harness.advanceTimers();
+
+      // Verify completed
+      const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
+      expect(finalAgent?.status).toBe('idle');
+    });
+  });
+
+  describe('detail conflict detection', () => {
+    it('should reject if a detail agent is already running for the same phase', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Test Project');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+
+      // Long-running detail agent
+      harness.setAgentScenario('detailer-1', { status: 'done', delay: 999999 });
+
+      await harness.caller.spawnArchitectDetail({
+        name: 'detailer-1',
+        phaseId: phases[0].id,
+      });
+
+      // Second detail for same phase should be rejected
+      await expect(
+        harness.caller.spawnArchitectDetail({
+          name: 'detailer-2',
+          phaseId: phases[0].id,
+        }),
+      ).rejects.toThrow(/already running/);
+    });
+
+    it('should auto-dismiss stale detail agents before checking', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Test Project');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+
+      // Detail agent that crashes immediately
+      harness.setAgentScenario('stale-detailer', { status: 'error', error: 'crashed' });
+
+      await harness.caller.spawnArchitectDetail({
+        name: 'stale-detailer',
+        phaseId: phases[0].id,
+      });
+      await harness.advanceTimers();
+
+      // New detail should succeed
+      harness.setArchitectDetailComplete('new-detailer', [
+        { number: 1, name: 'Task 1', content: 'Do it', type: 'auto', dependencies: [] },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectDetail({
+        name: 'new-detailer',
+        phaseId: phases[0].id,
+      });
+      expect(agent.mode).toBe('detail');
+    });
+
+    it('should allow detail for different phases simultaneously', async () => {
+      vi.useFakeTimers();
+
+      const initiative = await harness.createInitiative('Test Project');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+        { name: 'Phase 2' },
+      ]);
+
+      // Long-running agent on phase 1
+      harness.setAgentScenario('detailer-p1', { status: 'done', delay: 999999 });
+      await harness.caller.spawnArchitectDetail({
+        name: 'detailer-p1',
+        phaseId: phases[0].id,
+      });
+
+      // Detail on phase 2 should succeed
+      harness.setArchitectDetailComplete('detailer-p2', [
+        { number: 1, name: 'Task 1', content: 'Do it', type: 'auto', dependencies: [] },
+      ]);
+
+      const agent = await harness.caller.spawnArchitectDetail({
+        name: 'detailer-p2',
+        phaseId: phases[1].id,
+      });
+      expect(agent.mode).toBe('detail');
+    });
+  });
+
+  describe('task persistence', () => {
+    it('should create tasks from detail output', async () => {
+      const initiative = await harness.createInitiative('Test Project');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+      const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan');
+
+      // Create tasks from detail output
+      await harness.caller.createChildTasks({
+        parentTaskId: detailTask.id,
+        tasks: [
+          { number: 1, name: 'Schema', description: 'Create tables', type: 'auto', dependencies: [] },
+          { number: 2, name: 'API', description: 'Create endpoints', type: 'auto', dependencies: [1] },
+          { number: 3, name: 'Verify', description: 'Test flow', type: 'checkpoint:human-verify', dependencies: [2] },
+        ],
+      });
+
+      // Verify tasks created
+      const tasks = await harness.getChildTasks(detailTask.id);
+      expect(tasks).toHaveLength(3);
+      expect(tasks[0].name).toBe('Schema');
+      expect(tasks[1].name).toBe('API');
+      expect(tasks[2].name).toBe('Verify');
+      expect(tasks[2].type).toBe('checkpoint:human-verify');
+    });
+
+    it('should handle all task types', async () => {
+      const initiative = await harness.createInitiative('Task Types Test');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+      const detailTask = await harness.createDetailTask(phases[0].id, 'Mixed Tasks');
+
+      // Create tasks with all types
+      await harness.caller.createChildTasks({
+        parentTaskId: detailTask.id,
+        tasks: [
+          { number: 1, name: 'Auto Task', description: 'Automated work', type: 'auto' },
+          { number: 2, name: 'Human Verify', description: 'Visual check', type: 'checkpoint:human-verify', dependencies: [1] },
+          { number: 3, name: 'Decision', description: 'Choose approach', type: 'checkpoint:decision', dependencies: [2] },
+          { number: 4, name: 'Human Action', description: 'Manual step', type: 'checkpoint:human-action', dependencies: [3] },
+        ],
+      });
+
+      const tasks = await harness.getChildTasks(detailTask.id);
+      expect(tasks).toHaveLength(4);
+      expect(tasks[0].type).toBe('auto');
+      expect(tasks[1].type).toBe('checkpoint:human-verify');
+      expect(tasks[2].type).toBe('checkpoint:decision');
+      expect(tasks[3].type).toBe('checkpoint:human-action');
+    });
+
+    it('should create task dependencies', async () => {
+      const initiative = await harness.createInitiative('Dependencies Test');
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Phase 1' },
+      ]);
+      const detailTask = await harness.createDetailTask(phases[0].id, 'Dependent Tasks');
+
+      // Create tasks with complex dependencies
+      await harness.caller.createChildTasks({
+        parentTaskId: detailTask.id,
+        tasks: [
+          { number: 1, name: 'Task A', description: 'No deps', type: 'auto' },
+          { number: 2, name: 'Task B', description: 'Depends on A', type: 'auto', dependencies: [1] },
+          { number: 3, name: 'Task C', description: 'Depends on A', type: 'auto', dependencies: [1] },
+          { number: 4, name: 'Task D', description: 'Depends on B and C', type: 'auto', dependencies: [2, 3] },
+        ],
+      });
+
+      const tasks = await harness.getChildTasks(detailTask.id);
+      expect(tasks).toHaveLength(4);
+
+      // All tasks should be created with correct names
+      expect(tasks.map(t => t.name)).toEqual(['Task A', 'Task B', 'Task C', 'Task D']);
+    });
+  });
+
+  describe('full detail workflow', () => {
+    it('should complete initiative -> phase -> plan -> detail -> tasks workflow', async () => {
+      vi.useFakeTimers();
+
+      // 1. Create initiative
+      const initiative = await harness.createInitiative('Full Workflow Test');
+
+      // 2. Create phase
+      const phases = await harness.createPhasesFromPlan(initiative.id, [
+        { name: 'Auth Phase' },
+      ]);
+
+      // 3. Create plan
+      const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan', 'Implement JWT auth');
+
+      // 4. Spawn detail agent
+      harness.setArchitectDetailComplete('detailer', [
+        { number: 1, name: 'Create user schema', content: 'Define User model', type: 'auto', dependencies: [] },
+        { number: 2, name: 'Implement JWT', content: 'Token generation', type: 'auto', dependencies: [1] },
+        { number: 3, name: 'Protected routes', content: 'Middleware', type: 'auto', dependencies: [2] },
+        { number: 4, name: 'Verify auth', content: 'Test login flow', type: 'checkpoint:human-verify', dependencies: [3] },
+      ]);
+
+      await harness.caller.spawnArchitectDetail({
+        name: 'detailer',
+        phaseId: phases[0].id,
+      });
+      await harness.advanceTimers();
+
+      // 5. Verify agent completed
+      const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
+      expect(events).toHaveLength(1);
+      expect(events[0].payload.reason).toBe('detail_complete');
+
+      // 6. Persist tasks (simulating what orchestrator would do after detail)
+      await harness.caller.createChildTasks({
+        parentTaskId: detailTask.id,
+        tasks: [
+          { number: 1, name: 'Create user schema', description: 'Define User model', type: 'auto', dependencies: [] },
+          { number: 2, name: 'Implement JWT', description: 'Token generation', type: 'auto', dependencies: [1] },
+          { number: 3, name: 'Protected routes', description: 'Middleware', type: 'auto', dependencies: [2] },
+          { number: 4, name: 'Verify auth', description: 'Test login flow', type: 'checkpoint:human-verify', dependencies: [3] },
+        ],
+      });
+
+      // 7. Verify final state
+      const tasks = await harness.getChildTasks(detailTask.id);
+      expect(tasks).toHaveLength(4);
+      expect(tasks[0].name).toBe('Create user schema');
+      expect(tasks[3].type).toBe('checkpoint:human-verify');
+
+      // Agent should be idle
+      const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
+      expect(finalAgent?.status).toBe('idle');
+    });
+  });
+});
--- a/apps/server/test/e2e/edge-cases.test.ts
+++ b/apps/server/test/e2e/edge-cases.test.ts
@@ -0,0 +1,426 @@
+/**
+ * E2E Tests for Edge Cases
+ *
+ * Tests edge case scenarios in dispatch/coordination flow:
+ * - Agent crashes during task
+ * - Agent waiting for input
+ * - Task blocking
+ * - Merge conflicts
+ *
+ * Uses TestHarness from src/test/ for full system wiring.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import {
+  createTestHarness,
+  SIMPLE_FIXTURE,
+  type TestHarness,
+} from '../index.js';
+import type {
+  AgentSpawnedEvent,
+  AgentCrashedEvent,
+  AgentWaitingEvent,
+  TaskBlockedEvent,
+  MergeConflictedEvent,
+} from '../../events/types.js';
+
+describe('E2E Edge Cases', () => {
+  let harness: TestHarness;
+
+  beforeEach(() => {
+    harness = createTestHarness();
+  });
+
+  afterEach(() => {
+    harness.cleanup();
+    vi.useRealTimers();
+  });
+
+  describe('Agent crash during task', () => {
+    it('emits agent:spawned then agent:crashed events', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent for DispatchManager
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set error scenario BEFORE dispatch
+      harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
+        status: 'error',
+        error: 'Token limit exceeded',
+      });
+
+      await harness.dispatchManager.queue(taskAId);
+      harness.clearEvents();
+
+      await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify: agent:spawned event emitted
+      const spawnedEvents = harness.getEventsByType('agent:spawned');
+      expect(spawnedEvents.length).toBe(1);
+      const spawnedPayload = (spawnedEvents[0] as AgentSpawnedEvent).payload;
+      expect(spawnedPayload.taskId).toBe(taskAId);
+
+      // Verify: agent:crashed event emitted
+      const crashedEvents = harness.getEventsByType('agent:crashed');
+      expect(crashedEvents.length).toBe(1);
+      const crashedPayload = (crashedEvents[0] as AgentCrashedEvent).payload;
+      expect(crashedPayload.taskId).toBe(taskAId);
+      expect(crashedPayload.error).toBe('Token limit exceeded');
+    });
+
+    it('task status should NOT be completed after crash', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set error scenario
+      harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
+        status: 'error',
+        error: 'Token limit exceeded',
+      });
+
+      await harness.dispatchManager.queue(taskAId);
+      await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Task status should be 'in_progress' (not 'completed')
+      const task = await harness.taskRepository.findById(taskAId);
+      expect(task?.status).toBe('in_progress');
+    });
+
+    it('captures error message in agent result', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set error scenario
+      harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
+        status: 'error',
+        error: 'Out of memory',
+      });
+
+      await harness.dispatchManager.queue(taskAId);
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Get agent result - should have error
+      const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
+      expect(agentResult).not.toBeNull();
+      expect(agentResult?.success).toBe(false);
+      expect(agentResult?.message).toBe('Out of memory');
+    });
+  });
+
+  describe('Agent waiting for input and resume', () => {
+    it('emits agent:waiting event with question', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set questions scenario
+      harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
+        status: 'questions',
+        questions: [{ id: 'q1', question: 'Which database should I use?' }],
+      });
+
+      await harness.dispatchManager.queue(taskAId);
+      harness.clearEvents();
+
+      await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify: agent:waiting event emitted
+      const waitingEvents = harness.getEventsByType('agent:waiting');
+      expect(waitingEvents.length).toBe(1);
+      const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
+      expect(waitingPayload.taskId).toBe(taskAId);
+      expect(waitingPayload.questions[0].question).toBe('Which database should I use?');
+    });
+
+  });
+
+  describe('Task blocking', () => {
+    it('blocked task appears in blocked list from getQueueState', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      await harness.dispatchManager.queue(taskAId);
+      await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
+
+      const queueState = await harness.dispatchManager.getQueueState();
+
+      expect(queueState.blocked.length).toBe(1);
+      expect(queueState.blocked[0].taskId).toBe(taskAId);
+      expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
+    });
+
+    it('blocked task emits task:blocked event', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      await harness.dispatchManager.queue(taskAId);
+      harness.clearEvents();
+
+      await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
+
+      const blockedEvents = harness.getEventsByType('task:blocked');
+      expect(blockedEvents.length).toBe(1);
+      const blockedPayload = (blockedEvents[0] as TaskBlockedEvent).payload;
+      expect(blockedPayload.taskId).toBe(taskAId);
+      expect(blockedPayload.reason).toBe('Waiting for user decision');
+    });
+
+    it('getNextDispatchable does not return blocked task', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+      const taskBId = seeded.tasks.get('Task B')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Queue Task A and block it
+      await harness.dispatchManager.queue(taskAId);
+      await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
+
+      // Queue Task B (not blocked, but depends on Task A which needs to be completed first)
+      // Actually Task B depends on Task A in SIMPLE_FIXTURE, but the dependency
+      // isn't loaded into the queue. Queue a fresh task instead.
+      // For this test, we just verify blocked task is not returned.
+
+      // Get next dispatchable - should be null since Task A is blocked
+      const next = await harness.dispatchManager.getNextDispatchable();
+      expect(next).toBeNull();
+    });
+
+    it('task status is set to blocked in database', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      await harness.dispatchManager.queue(taskAId);
+      await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
+
+      const task = await harness.taskRepository.findById(taskAId);
+      expect(task?.status).toBe('blocked');
+    });
+  });
+
+  describe('Merge conflict handling', () => {
+    it('detects merge conflict and emits merge:conflicted event', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Mark task as completed (required for merge)
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+
+      // Create a worktree for this task
+      const worktreeId = `wt-${taskAId.slice(0, 6)}`;
+      await harness.worktreeManager.create(worktreeId, 'feature-task-a');
+
+      // Create agent in agentRepository with worktreeId
+      // (coordinationManager.queueMerge looks up agent by taskId)
+      const agent = await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId,
+        taskId: taskAId,
+        status: 'idle',
+      });
+
+      // Set up merge conflict result BEFORE processMerges
+      harness.worktreeManager.setMergeResult(worktreeId, {
+        success: false,
+        conflicts: ['src/shared.ts', 'src/types.ts'],
+        message: 'Merge conflict in 2 files',
+      });
+
+      // Queue for merge
+      await harness.coordinationManager.queueMerge(taskAId);
+      harness.clearEvents();
+
+      // Process merges - should hit conflict
+      const results = await harness.coordinationManager.processMerges('main');
+
+      // Verify: merge result indicates failure
+      expect(results.length).toBe(1);
+      expect(results[0].success).toBe(false);
+      expect(results[0].conflicts).toEqual(['src/shared.ts', 'src/types.ts']);
+
+      // Verify: merge:conflicted event emitted
+      const conflictEvents = harness.getEventsByType('merge:conflicted');
+      expect(conflictEvents.length).toBe(1);
+      const conflictPayload = (conflictEvents[0] as MergeConflictedEvent).payload;
+      expect(conflictPayload.taskId).toBe(taskAId);
+      expect(conflictPayload.conflictingFiles).toEqual(['src/shared.ts', 'src/types.ts']);
+    });
+
+    it('conflict appears in queue state as conflicted', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Mark task as completed
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+
+      // Create worktree
+      const worktreeId = `wt-${taskAId.slice(0, 6)}`;
+      await harness.worktreeManager.create(worktreeId, 'feature-task-a');
+
+      // Create agent in agentRepository
+      await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId,
+        taskId: taskAId,
+        status: 'idle',
+      });
+
+      // Set up merge conflict
+      harness.worktreeManager.setMergeResult(worktreeId, {
+        success: false,
+        conflicts: ['src/shared.ts'],
+        message: 'Merge conflict',
+      });
+
+      // Queue and process
+      await harness.coordinationManager.queueMerge(taskAId);
+      await harness.coordinationManager.processMerges('main');
+
+      // Check queue state
+      const queueState = await harness.coordinationManager.getQueueState();
+      expect(queueState.conflicted.length).toBe(1);
+      expect(queueState.conflicted[0].taskId).toBe(taskAId);
+      expect(queueState.conflicted[0].conflicts).toContain('src/shared.ts');
+    });
+
+    it('handleConflict creates conflict-resolution task', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Mark task as completed
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+
+      // Create worktree
+      const worktreeId = `wt-${taskAId.slice(0, 6)}`;
+      await harness.worktreeManager.create(worktreeId, 'feature-task-a');
+
+      // Create agent in agentRepository
+      await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId,
+        taskId: taskAId,
+        status: 'idle',
+      });
+
+      // Set up merge conflict
+      harness.worktreeManager.setMergeResult(worktreeId, {
+        success: false,
+        conflicts: ['src/shared.ts', 'src/types.ts'],
+        message: 'Merge conflict',
+      });
+
+      // Queue and process (handleConflict is called automatically)
+      await harness.coordinationManager.queueMerge(taskAId);
+      await harness.coordinationManager.processMerges('main');
+
+      // Verify: original task is now blocked
+      const originalTask = await harness.taskRepository.findById(taskAId);
+      expect(originalTask?.status).toBe('blocked');
+
+      // Verify: task:queued event emitted for conflict resolution task
+      const queuedEvents = harness.getEventsByType('task:queued');
+      const conflictTaskEvent = queuedEvents.find(
+        (e) => e.payload && (e.payload as { taskId: string }).taskId !== taskAId
+      );
+      expect(conflictTaskEvent).toBeDefined();
+    });
+
+    it('successful merge after clearing conflict result', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+      const taskBId = seeded.tasks.get('Task B')!;
+
+      // Set up Task A for merge (with conflict)
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+      const worktreeIdA = `wt-${taskAId.slice(0, 6)}`;
+      await harness.worktreeManager.create(worktreeIdA, 'feature-task-a');
+      await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId: worktreeIdA,
+        taskId: taskAId,
+        status: 'idle',
+      });
+
+      // Set conflict for Task A
+      harness.worktreeManager.setMergeResult(worktreeIdA, {
+        success: false,
+        conflicts: ['src/shared.ts'],
+        message: 'Merge conflict',
+      });
+
+      // Process Task A merge (will conflict)
+      await harness.coordinationManager.queueMerge(taskAId);
+      const conflictResults = await harness.coordinationManager.processMerges('main');
+      expect(conflictResults[0].success).toBe(false);
+
+      // Now set up Task B for merge (should succeed)
+      await harness.taskRepository.update(taskBId, { status: 'completed' });
+      const worktreeIdB = `wt-${taskBId.slice(0, 6)}`;
+      await harness.worktreeManager.create(worktreeIdB, 'feature-task-b');
+      await harness.agentRepository.create({
+        name: `agent-${taskBId.slice(0, 6)}`,
+        worktreeId: worktreeIdB,
+        taskId: taskBId,
+        status: 'idle',
+      });
+
+      // Task B merge should succeed (default behavior)
+      await harness.coordinationManager.queueMerge(taskBId);
+      harness.clearEvents();
+      const successResults = await harness.coordinationManager.processMerges('main');
+
+      // Verify Task B merged successfully
+      expect(successResults.length).toBe(1);
+      expect(successResults[0].taskId).toBe(taskBId);
+      expect(successResults[0].success).toBe(true);
+
+      // Verify Task B in merged list
+      const queueState = await harness.coordinationManager.getQueueState();
+      expect(queueState.merged).toContain(taskBId);
+    });
+  });
+});
--- a/apps/server/test/e2e/extended-scenarios.test.ts
+++ b/apps/server/test/e2e/extended-scenarios.test.ts
@@ -0,0 +1,551 @@
+/**
+ * E2E Tests for Extended Scenarios
+ *
+ * Tests extended scenarios in dispatch/coordination flow:
+ * - Conflict hand-back round-trip (conflict -> agent resolves -> merge succeeds)
+ * - Multi-agent parallel work and completion
+ *
+ * Uses TestHarness from src/test/ for full system wiring.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import {
+  createTestHarness,
+  SIMPLE_FIXTURE,
+  PARALLEL_FIXTURE,
+  COMPLEX_FIXTURE,
+  type TestHarness,
+} from '../index.js';
+import type {
+  MergeConflictedEvent,
+  MergeCompletedEvent,
+  TaskQueuedEvent,
+  AgentStoppedEvent,
+  AgentCrashedEvent,
+} from '../../events/types.js';
+
+describe('E2E Extended Scenarios', () => {
+  let harness: TestHarness;
+
+  beforeEach(() => {
+    harness = createTestHarness();
+  });
+
+  afterEach(() => {
+    harness.cleanup();
+    vi.useRealTimers();
+  });
+
+  // ===========================================================================
+  // Conflict Hand-back Round-trip
+  // ===========================================================================
+
+  describe('Conflict hand-back round-trip', () => {
+    it('conflict triggers resolution task, agent resolves, merge succeeds', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Step 1: Complete Task A
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+
+      // Step 2: Create agent in agentRepository with worktreeId
+      const worktreeId = `wt-${taskAId.slice(0, 6)}`;
+      await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId,
+        taskId: taskAId,
+        status: 'idle',
+      });
+
+      // Step 3: Create worktree via MockWorktreeManager
+      await harness.worktreeManager.create(worktreeId, 'feature-task-a');
+
+      // Step 4: Set merge conflict result for first merge attempt
+      harness.worktreeManager.setMergeResult(worktreeId, {
+        success: false,
+        conflicts: ['src/shared.ts', 'src/types.ts'],
+        message: 'Merge conflict in 2 files',
+      });
+
+      // Step 5: Queue and process merge (should fail with conflict)
+      await harness.coordinationManager.queueMerge(taskAId);
+      harness.clearEvents();
+      const conflictResults = await harness.coordinationManager.processMerges('main');
+
+      // Verify: merge failed with conflict
+      expect(conflictResults.length).toBe(1);
+      expect(conflictResults[0].success).toBe(false);
+      expect(conflictResults[0].conflicts).toEqual(['src/shared.ts', 'src/types.ts']);
+
+      // Verify: merge:conflicted event emitted
+      const conflictedEvents = harness.getEventsByType('merge:conflicted');
+      expect(conflictedEvents.length).toBe(1);
+      const conflictPayload = (conflictedEvents[0] as MergeConflictedEvent).payload;
+      expect(conflictPayload.taskId).toBe(taskAId);
+      expect(conflictPayload.conflictingFiles).toEqual(['src/shared.ts', 'src/types.ts']);
+
+      // Verify: original task marked blocked
+      const originalTask = await harness.taskRepository.findById(taskAId);
+      expect(originalTask?.status).toBe('blocked');
+
+      // Note: CoordinationManager.handleConflict updates task status to blocked
+      // but does not emit task:blocked event (that's emitted by DispatchManager.blockTask)
+
+      // Verify: task:queued event emitted for resolution task
+      const queuedEvents = harness.getEventsByType('task:queued');
+      const resolutionTaskEvent = queuedEvents.find(
+        (e) => (e as TaskQueuedEvent).payload.taskId !== taskAId
+      );
+      expect(resolutionTaskEvent).toBeDefined();
+
+      // Step 6: Clear the merge conflict (setMergeResult to success)
+      harness.worktreeManager.setMergeResult(worktreeId, {
+        success: true,
+        message: 'Merged successfully',
+      });
+
+      // Step 7: Re-queue original task for merge (simulating resolution completed)
+      // In a real system, the resolution task would fix conflicts and re-queue
+      // Here we simulate by clearing conflict and re-queuing
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+      harness.clearEvents();
+
+      await harness.coordinationManager.queueMerge(taskAId);
+      const successResults = await harness.coordinationManager.processMerges('main');
+
+      // Verify: merge succeeded
+      expect(successResults.length).toBe(1);
+      expect(successResults[0].taskId).toBe(taskAId);
+      expect(successResults[0].success).toBe(true);
+
+      // Verify: merge:completed event for original task
+      const completedEvents = harness.getEventsByType('merge:completed');
+      expect(completedEvents.length).toBe(1);
+      const completedPayload = (completedEvents[0] as MergeCompletedEvent).payload;
+      expect(completedPayload.taskId).toBe(taskAId);
+    });
+
+    it('conflict resolution preserves original task context', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Complete Task A
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+
+      // Create agent and worktree
+      const worktreeId = `wt-${taskAId.slice(0, 6)}`;
+      await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId,
+        taskId: taskAId,
+        status: 'idle',
+      });
+      await harness.worktreeManager.create(worktreeId, 'feature-task-a');
+
+      // Set conflict
+      harness.worktreeManager.setMergeResult(worktreeId, {
+        success: false,
+        conflicts: ['src/conflict-file.ts'],
+        message: 'Merge conflict',
+      });
+
+      // Process merge to trigger conflict handling
+      await harness.coordinationManager.queueMerge(taskAId);
+      harness.clearEvents();
+      await harness.coordinationManager.processMerges('main');
+
+      // Get the resolution task from task:queued events
+      const queuedEvents = harness.getEventsByType('task:queued');
+      expect(queuedEvents.length).toBeGreaterThan(0);
+
+      // Find resolution task (the one that isn't the original task)
+      const resolutionTaskQueuedEvent = queuedEvents.find(
+        (e) => (e as TaskQueuedEvent).payload.taskId !== taskAId
+      );
+      expect(resolutionTaskQueuedEvent).toBeDefined();
+
+      // Resolution task should exist and link back to original task
+      const resolutionTaskId = (resolutionTaskQueuedEvent as TaskQueuedEvent).payload.taskId;
+      const resolutionTask = await harness.taskRepository.findById(resolutionTaskId);
+      expect(resolutionTask).toBeDefined();
+
+      // Resolution task description should contain conflict file info
+      expect(resolutionTask?.description).toContain('conflict');
+    });
+
+    it('multiple sequential conflicts resolved in order', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+      const taskBId = seeded.tasks.get('Task B')!;
+
+      // Complete both tasks
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+      await harness.taskRepository.update(taskBId, { status: 'completed' });
+
+      // Set up worktrees and agents for both tasks
+      const worktreeIdA = `wt-${taskAId.slice(0, 6)}`;
+      const worktreeIdB = `wt-${taskBId.slice(0, 6)}`;
+
+      await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId: worktreeIdA,
+        taskId: taskAId,
+        status: 'idle',
+      });
+      await harness.agentRepository.create({
+        name: `agent-${taskBId.slice(0, 6)}`,
+        worktreeId: worktreeIdB,
+        taskId: taskBId,
+        status: 'idle',
+      });
+
+      await harness.worktreeManager.create(worktreeIdA, 'feature-task-a');
+      await harness.worktreeManager.create(worktreeIdB, 'feature-task-b');
+
+      // Set conflicts for both
+      harness.worktreeManager.setMergeResult(worktreeIdA, {
+        success: false,
+        conflicts: ['src/shared-a.ts'],
+        message: 'Conflict A',
+      });
+      harness.worktreeManager.setMergeResult(worktreeIdB, {
+        success: false,
+        conflicts: ['src/shared-b.ts'],
+        message: 'Conflict B',
+      });
+
+      // Queue both for merge
+      await harness.coordinationManager.queueMerge(taskAId);
+      await harness.coordinationManager.queueMerge(taskBId);
+      harness.clearEvents();
+
+      // Process merges - both should fail
+      const conflictResults = await harness.coordinationManager.processMerges('main');
+      expect(conflictResults.filter((r) => !r.success).length).toBe(2);
+
+      // Verify both are in conflicted state
+      const queueState = await harness.coordinationManager.getQueueState();
+      expect(queueState.conflicted.length).toBe(2);
+
+      // Resolve Task A's conflict
+      harness.worktreeManager.setMergeResult(worktreeIdA, {
+        success: true,
+        message: 'Merged A',
+      });
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+      await harness.coordinationManager.queueMerge(taskAId);
+      harness.clearEvents();
+
+      const resultA = await harness.coordinationManager.processMerges('main');
+      expect(resultA.length).toBe(1);
+      expect(resultA[0].taskId).toBe(taskAId);
+      expect(resultA[0].success).toBe(true);
+
+      // Verify merge:completed for A
+      const completedEventsA = harness.getEventsByType('merge:completed');
+      expect(completedEventsA.length).toBe(1);
+      expect((completedEventsA[0] as MergeCompletedEvent).payload.taskId).toBe(taskAId);
+
+      // Resolve Task B's conflict
+      harness.worktreeManager.setMergeResult(worktreeIdB, {
+        success: true,
+        message: 'Merged B',
+      });
+      await harness.taskRepository.update(taskBId, { status: 'completed' });
+      await harness.coordinationManager.queueMerge(taskBId);
+      harness.clearEvents();
+
+      const resultB = await harness.coordinationManager.processMerges('main');
+      expect(resultB.length).toBe(1);
+      expect(resultB[0].taskId).toBe(taskBId);
+      expect(resultB[0].success).toBe(true);
+
+      // Verify merge:completed for B
+      const completedEventsB = harness.getEventsByType('merge:completed');
+      expect(completedEventsB.length).toBe(1);
+      expect((completedEventsB[0] as MergeCompletedEvent).payload.taskId).toBe(taskBId);
+
+      // Verify final merged list has both
+      const finalState = await harness.coordinationManager.getQueueState();
+      expect(finalState.merged).toContain(taskAId);
+      expect(finalState.merged).toContain(taskBId);
+    });
+  });
+
+  // ===========================================================================
+  // Multi-agent Parallel Work
+  // ===========================================================================
+
+  describe('Multi-agent parallel work', () => {
+    it('multiple agents complete tasks in parallel', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
+      const taskXId = seeded.tasks.get('Task X')!;
+      const taskYId = seeded.tasks.get('Task Y')!;
+      const taskPId = seeded.tasks.get('Task P')!;
+      const taskQId = seeded.tasks.get('Task Q')!;
+
+      // Pre-seed 3 idle agents
+      await harness.agentManager.spawn({
+        name: 'pool-agent-1',
+        taskId: 'placeholder-1',
+        prompt: 'placeholder',
+      });
+      await harness.agentManager.spawn({
+        name: 'pool-agent-2',
+        taskId: 'placeholder-2',
+        prompt: 'placeholder',
+      });
+      await harness.agentManager.spawn({
+        name: 'pool-agent-3',
+        taskId: 'placeholder-3',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Queue all 4 tasks
+      await harness.dispatchManager.queue(taskXId);
+      await harness.dispatchManager.queue(taskYId);
+      await harness.dispatchManager.queue(taskPId);
+      await harness.dispatchManager.queue(taskQId);
+      harness.clearEvents();
+
+      // Dispatch 3 tasks in parallel (3 agents working)
+      const result1 = await harness.dispatchManager.dispatchNext();
+      const result2 = await harness.dispatchManager.dispatchNext();
+      const result3 = await harness.dispatchManager.dispatchNext();
+
+      expect(result1.success).toBe(true);
+      expect(result2.success).toBe(true);
+      expect(result3.success).toBe(true);
+
+      // All 3 should be dispatched to different agents
+      const dispatchedIds = [result1.agentId, result2.agentId, result3.agentId];
+      expect(new Set(dispatchedIds).size).toBe(3);
+
+      // Advance timers to complete all 3 agents
+      await harness.advanceTimers();
+
+      // Verify: 3 agent:stopped events
+      const stoppedEvents = harness.getEventsByType('agent:stopped');
+      expect(stoppedEvents.length).toBe(3);
+
+      // Complete all 3 tasks
+      await harness.dispatchManager.completeTask(result1.taskId!);
+      await harness.dispatchManager.completeTask(result2.taskId!);
+      await harness.dispatchManager.completeTask(result3.taskId!);
+
+      // Dispatch remaining task (Task Q)
+      const result4 = await harness.dispatchManager.dispatchNext();
+      expect(result4.success).toBe(true);
+
+      await harness.advanceTimers();
+      await harness.dispatchManager.completeTask(result4.taskId!);
+
+      // Verify: all 4 tasks completed in database
+      const tasks = await Promise.all([
+        harness.taskRepository.findById(taskXId),
+        harness.taskRepository.findById(taskYId),
+        harness.taskRepository.findById(taskPId),
+        harness.taskRepository.findById(taskQId),
+      ]);
+      expect(tasks.every((t) => t?.status === 'completed')).toBe(true);
+    });
+
+    it('parallel merges process in correct dependency order', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
+
+      const task1AId = seeded.tasks.get('Task 1A')!;
+      const task1BId = seeded.tasks.get('Task 1B')!;
+      const task2AId = seeded.tasks.get('Task 2A')!;
+      const task3AId = seeded.tasks.get('Task 3A')!;
+      const task4AId = seeded.tasks.get('Task 4A')!;
+
+      // Complete Task 1A and Task 1B (no dependencies)
+      await harness.taskRepository.update(task1AId, { status: 'completed' });
+      await harness.taskRepository.update(task1BId, { status: 'completed' });
+
+      // Set up worktrees and agents for both
+      const wt1A = `wt-${task1AId.slice(0, 6)}`;
+      const wt1B = `wt-${task1BId.slice(0, 6)}`;
+
+      await harness.agentRepository.create({
+        name: `agent-${task1AId.slice(0, 6)}`,
+        worktreeId: wt1A,
+        taskId: task1AId,
+        status: 'idle',
+      });
+      await harness.agentRepository.create({
+        name: `agent-${task1BId.slice(0, 6)}`,
+        worktreeId: wt1B,
+        taskId: task1BId,
+        status: 'idle',
+      });
+
+      await harness.worktreeManager.create(wt1A, 'feature-1a');
+      await harness.worktreeManager.create(wt1B, 'feature-1b');
+
+      // Queue both for merge
+      await harness.coordinationManager.queueMerge(task1AId);
+      await harness.coordinationManager.queueMerge(task1BId);
+      harness.clearEvents();
+
+      // Process merges - both should succeed (no dependencies between them)
+      const results1 = await harness.coordinationManager.processMerges('main');
+      expect(results1.length).toBe(2);
+      expect(results1.every((r) => r.success)).toBe(true);
+
+      // Verify: merge:completed for both in same batch
+      const completed1 = harness.getEventsByType('merge:completed');
+      expect(completed1.length).toBe(2);
+
+      // Complete Task 2A (depends on 1A) and Task 3A (depends on 1B)
+      await harness.taskRepository.update(task2AId, { status: 'completed' });
+      await harness.taskRepository.update(task3AId, { status: 'completed' });
+
+      const wt2A = `wt-${task2AId.slice(0, 6)}`;
+      const wt3A = `wt-${task3AId.slice(0, 6)}`;
+
+      await harness.agentRepository.create({
+        name: `agent-${task2AId.slice(0, 6)}`,
+        worktreeId: wt2A,
+        taskId: task2AId,
+        status: 'idle',
+      });
+      await harness.agentRepository.create({
+        name: `agent-${task3AId.slice(0, 6)}`,
+        worktreeId: wt3A,
+        taskId: task3AId,
+        status: 'idle',
+      });
+
+      await harness.worktreeManager.create(wt2A, 'feature-2a');
+      await harness.worktreeManager.create(wt3A, 'feature-3a');
+
+      // Queue and merge
+      await harness.coordinationManager.queueMerge(task2AId);
+      await harness.coordinationManager.queueMerge(task3AId);
+      harness.clearEvents();
+
+      const results2 = await harness.coordinationManager.processMerges('main');
+      expect(results2.length).toBe(2);
+      expect(results2.every((r) => r.success)).toBe(true);
+
+      // Complete Task 4A (depends on 2A and 3A)
+      await harness.taskRepository.update(task4AId, { status: 'completed' });
+
+      const wt4A = `wt-${task4AId.slice(0, 6)}`;
+      await harness.agentRepository.create({
+        name: `agent-${task4AId.slice(0, 6)}`,
+        worktreeId: wt4A,
+        taskId: task4AId,
+        status: 'idle',
+      });
+      await harness.worktreeManager.create(wt4A, 'feature-4a');
+
+      // Queue and merge
+      await harness.coordinationManager.queueMerge(task4AId);
+      harness.clearEvents();
+
+      const results3 = await harness.coordinationManager.processMerges('main');
+      expect(results3.length).toBe(1);
+      expect(results3[0].taskId).toBe(task4AId);
+      expect(results3[0].success).toBe(true);
+
+      // Verify: final merge order respects dependency graph
+      const finalState = await harness.coordinationManager.getQueueState();
+      expect(finalState.merged).toContain(task1AId);
+      expect(finalState.merged).toContain(task1BId);
+      expect(finalState.merged).toContain(task2AId);
+      expect(finalState.merged).toContain(task3AId);
+      expect(finalState.merged).toContain(task4AId);
+    });
+
+    it('parallel dispatch with mixed outcomes', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
+      const taskXId = seeded.tasks.get('Task X')!;
+      const taskYId = seeded.tasks.get('Task Y')!;
+
+      // Pre-seed 2 agents
+      await harness.agentManager.spawn({
+        name: 'pool-agent-1',
+        taskId: 'placeholder-1',
+        prompt: 'placeholder',
+      });
+      await harness.agentManager.spawn({
+        name: 'pool-agent-2',
+        taskId: 'placeholder-2',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set Task X to succeed, Task Y to crash
+      harness.setAgentDone(`agent-${taskXId.slice(0, 6)}`, 'Task X completed');
+      harness.setAgentError(`agent-${taskYId.slice(0, 6)}`, 'Out of memory error');
+
+      // Queue both tasks
+      await harness.dispatchManager.queue(taskXId);
+      await harness.dispatchManager.queue(taskYId);
+      harness.clearEvents();
+
+      // Dispatch both tasks
+      const result1 = await harness.dispatchManager.dispatchNext();
+      const result2 = await harness.dispatchManager.dispatchNext();
+
+      // Both should dispatch successfully
+      expect(result1.success).toBe(true);
+      expect(result2.success).toBe(true);
+
+      // Run timers to complete agents
+      await harness.advanceTimers();
+
+      // Verify: one agent:stopped, one agent:crashed
+      const stoppedEvents = harness.getEventsByType('agent:stopped');
+      const crashedEvents = harness.getEventsByType('agent:crashed');
+
+      expect(stoppedEvents.length).toBe(1);
+      expect(crashedEvents.length).toBe(1);
+
+      // Identify which task succeeded and which crashed
+      const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
+      const crashedPayload = (crashedEvents[0] as AgentCrashedEvent).payload;
+
+      // Find the successful task
+      const successTaskId = stoppedPayload.taskId;
+      const crashedTaskId = crashedPayload.taskId;
+
+      // Complete the successful task
+      await harness.dispatchManager.completeTask(successTaskId!);
+
+      // Verify: completed task is actually completed
+      const completedTask = await harness.taskRepository.findById(successTaskId!);
+      expect(completedTask?.status).toBe('completed');
+
+      // Verify: crashed task stays in_progress
+      const inProgressTask = await harness.taskRepository.findById(crashedTaskId!);
+      expect(inProgressTask?.status).toBe('in_progress');
+
+      // Verify: completed task can merge (set up infrastructure)
+      const wtSuccess = `wt-${successTaskId!.slice(0, 6)}`;
+      await harness.agentRepository.create({
+        name: `merge-agent-${successTaskId!.slice(0, 6)}`,
+        worktreeId: wtSuccess,
+        taskId: successTaskId!,
+        status: 'idle',
+      });
+      await harness.worktreeManager.create(wtSuccess, 'feature-success');
+
+      await harness.coordinationManager.queueMerge(successTaskId!);
+      const mergeResults = await harness.coordinationManager.processMerges('main');
+
+      expect(mergeResults.length).toBe(1);
+      expect(mergeResults[0].success).toBe(true);
+    });
+  });
+});
--- a/apps/server/test/e2e/happy-path.test.ts
+++ b/apps/server/test/e2e/happy-path.test.ts
@@ -0,0 +1,437 @@
+/**
+ * E2E Happy Path Tests
+ *
+ * Tests proving core dispatch/coordination flow works end-to-end
+ * using the TestHarness with mocked agents and worktrees.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import {
+  createTestHarness,
+  SIMPLE_FIXTURE,
+  PARALLEL_FIXTURE,
+  COMPLEX_FIXTURE,
+  type TestHarness,
+} from '../index.js';
+
+describe('E2E Happy Path', () => {
+  let harness: TestHarness;
+
+  beforeEach(() => {
+    harness = createTestHarness();
+  });
+
+  afterEach(() => {
+    harness.cleanup();
+    vi.useRealTimers();
+  });
+
+  // ===========================================================================
+  // Scenario 1: Single Task Flow
+  // ===========================================================================
+
+  describe('Single task flow', () => {
+    it('completes a single task from queue to completion', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed idle agent (required by DispatchManager before spawning new ones)
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Step 1: Queue task
+      await harness.dispatchManager.queue(taskAId);
+
+      // Verify task:queued event
+      const queuedEvents = harness.getEventsByType('task:queued');
+      expect(queuedEvents.length).toBe(1);
+      expect((queuedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
+
+      // Step 2: Dispatch task
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      expect(dispatchResult.success).toBe(true);
+      expect(dispatchResult.taskId).toBe(taskAId);
+      expect(dispatchResult.agentId).toBeDefined();
+
+      // Verify task:dispatched event
+      const dispatchedEvents = harness.getEventsByType('task:dispatched');
+      expect(dispatchedEvents.length).toBe(1);
+      expect((dispatchedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
+
+      // Verify agent:spawned event
+      const spawnedEvents = harness.getEventsByType('agent:spawned');
+      expect(spawnedEvents.length).toBe(1);
+
+      // Step 3: Wait for agent completion
+      await harness.advanceTimers();
+
+      // Verify agent:stopped event
+      const stoppedEvents = harness.getEventsByType('agent:stopped');
+      expect(stoppedEvents.length).toBe(1);
+
+      // Step 4: Mark task complete
+      await harness.dispatchManager.completeTask(taskAId);
+
+      // Verify task status in database
+      const task = await harness.taskRepository.findById(taskAId);
+      expect(task?.status).toBe('completed');
+    });
+  });
+
+  // ===========================================================================
+  // Scenario 2: Sequential Dependencies
+  // ===========================================================================
+
+  describe('Sequential dependencies', () => {
+    it('dispatches tasks in priority order (dependency ordering via task status)', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+      const taskBId = seeded.tasks.get('Task B')!;
+      const taskCId = seeded.tasks.get('Task C')!;
+
+      // Pre-seed idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Queue all three tasks
+      await harness.dispatchManager.queue(taskAId);
+      await harness.dispatchManager.queue(taskBId);
+      await harness.dispatchManager.queue(taskCId);
+      harness.clearEvents();
+
+      // All three tasks are queued
+      const queueState = await harness.dispatchManager.getQueueState();
+      expect(queueState.queued.length).toBe(3);
+
+      // First dispatchNext: Task A (high priority) dispatches first
+      const nextTask = await harness.dispatchManager.getNextDispatchable();
+      expect(nextTask).not.toBeNull();
+      expect(nextTask!.taskId).toBe(taskAId); // High priority first
+
+      // All tasks are "ready" in current implementation (dependency loading TBD)
+      const readyTaskIds = queueState.ready.map((t) => t.taskId);
+      expect(readyTaskIds).toContain(taskAId);
+
+      // Dispatch Task A
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      expect(dispatchResult.success).toBe(true);
+      expect(dispatchResult.taskId).toBe(taskAId);
+
+      // Wait for agent completion
+      await harness.advanceTimers();
+
+      // Complete Task A
+      await harness.dispatchManager.completeTask(taskAId);
+
+      // Verify Task A removed from queue, B and C remain
+      const queueStateAfter = await harness.dispatchManager.getQueueState();
+      const remainingTaskIds = queueStateAfter.queued.map((t) => t.taskId);
+      expect(remainingTaskIds).not.toContain(taskAId);
+      expect(remainingTaskIds).toContain(taskBId);
+      expect(remainingTaskIds).toContain(taskCId);
+
+      // Task A marked completed in database
+      const taskA = await harness.taskRepository.findById(taskAId);
+      expect(taskA?.status).toBe('completed');
+    });
+  });
+
+  // ===========================================================================
+  // Scenario 3: Parallel Dispatch
+  // ===========================================================================
+
+  describe('Parallel dispatch', () => {
+    it('dispatches multiple independent tasks to multiple agents', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
+      const taskXId = seeded.tasks.get('Task X')!;
+      const taskYId = seeded.tasks.get('Task Y')!;
+      const taskPId = seeded.tasks.get('Task P')!;
+      const taskQId = seeded.tasks.get('Task Q')!;
+
+      // Pre-seed 2 idle agents
+      await harness.agentManager.spawn({
+        name: 'pool-agent-1',
+        taskId: 'placeholder-1',
+        prompt: 'placeholder',
+      });
+      await harness.agentManager.spawn({
+        name: 'pool-agent-2',
+        taskId: 'placeholder-2',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Queue all 4 tasks
+      await harness.dispatchManager.queue(taskXId);
+      await harness.dispatchManager.queue(taskYId);
+      await harness.dispatchManager.queue(taskPId);
+      await harness.dispatchManager.queue(taskQId);
+      harness.clearEvents();
+
+      // All 4 tasks should be dispatchable (no dependencies)
+      const queueState = await harness.dispatchManager.getQueueState();
+      expect(queueState.ready.length).toBe(4);
+
+      // Dispatch first task
+      const result1 = await harness.dispatchManager.dispatchNext();
+      expect(result1.success).toBe(true);
+
+      // Dispatch second task (parallel)
+      const result2 = await harness.dispatchManager.dispatchNext();
+      expect(result2.success).toBe(true);
+
+      // Verify both agents assigned different tasks
+      expect(result1.taskId).not.toBe(result2.taskId);
+      expect(result1.agentId).not.toBe(result2.agentId);
+
+      // Both dispatches succeeded
+      const dispatchedEvents = harness.getEventsByType('task:dispatched');
+      expect(dispatchedEvents.length).toBe(2);
+    });
+  });
+
+  // ===========================================================================
+  // Scenario 4: Full Merge Flow
+  // ===========================================================================
+
+  describe('Full merge flow', () => {
+    it('queues and processes merge after task completion', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed idle agent in MockAgentManager
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Queue and dispatch task
+      await harness.dispatchManager.queue(taskAId);
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      expect(dispatchResult.success).toBe(true);
+
+      // Wait for agent completion
+      await harness.advanceTimers();
+
+      // Complete task
+      await harness.dispatchManager.completeTask(taskAId);
+      harness.clearEvents();
+
+      // Create agent in database (CoordinationManager.queueMerge requires it)
+      // This bridges the gap between MockAgentManager (in-memory) and AgentRepository (database)
+      const worktreeId = `worktree-${taskAId.slice(0, 8)}`;
+      const agent = await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        taskId: taskAId,
+        worktreeId,
+        status: 'idle',
+      });
+
+      // Create worktree for merge
+      await harness.worktreeManager.create(worktreeId, `feature-${taskAId.slice(0, 6)}`);
+
+      // Queue merge
+      await harness.coordinationManager.queueMerge(taskAId);
+
+      // Verify merge:queued event
+      const mergeQueuedEvents = harness.getEventsByType('merge:queued');
+      expect(mergeQueuedEvents.length).toBe(1);
+
+      // Process merges
+      const mergeResults = await harness.coordinationManager.processMerges('main');
+      expect(mergeResults.length).toBe(1);
+      expect(mergeResults[0].taskId).toBe(taskAId);
+      expect(mergeResults[0].success).toBe(true);
+
+      // Verify merge:completed event
+      const mergeCompletedEvents = harness.getEventsByType('merge:completed');
+      expect(mergeCompletedEvents.length).toBe(1);
+    });
+  });
+
+  // ===========================================================================
+  // Scenario 5: Complex Dependency Flow
+  // ===========================================================================
+
+  describe('Complex dependency flow', () => {
+    it('handles multi-level dependency graph with COMPLEX_FIXTURE', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
+
+      // Get all task IDs
+      const task1AId = seeded.tasks.get('Task 1A')!;
+      const task1BId = seeded.tasks.get('Task 1B')!;
+      const task2AId = seeded.tasks.get('Task 2A')!;
+      const task3AId = seeded.tasks.get('Task 3A')!;
+      const task4AId = seeded.tasks.get('Task 4A')!;
+
+      // Pre-seed idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Queue all 5 tasks
+      await harness.dispatchManager.queue(task1AId);
+      await harness.dispatchManager.queue(task1BId);
+      await harness.dispatchManager.queue(task2AId);
+      await harness.dispatchManager.queue(task3AId);
+      await harness.dispatchManager.queue(task4AId);
+      harness.clearEvents();
+
+      // Verify all 5 tasks are queued
+      const initialState = await harness.dispatchManager.getQueueState();
+      expect(initialState.queued.length).toBe(5);
+
+      // Only tasks with no dependencies are ready:
+      // - Task 1A: no deps -> READY
+      // - Task 1B: no deps -> READY
+      // - Task 2A: depends on 1A -> NOT READY
+      // - Task 3A: depends on 1B -> NOT READY
+      // - Task 4A: depends on 2A, 3A -> NOT READY
+      expect(initialState.ready.length).toBe(2);
+
+      // First dispatch: Task 1A (high priority, first queued)
+      const result1 = await harness.dispatchManager.dispatchNext();
+      expect(result1.success).toBe(true);
+      expect(result1.taskId).toBe(task1AId);
+
+      // Wait for agent completion
+      await harness.advanceTimers();
+
+      // Complete Task 1A
+      await harness.dispatchManager.completeTask(task1AId);
+
+      // Verify Task 1A completed in database
+      const task1A = await harness.taskRepository.findById(task1AId);
+      expect(task1A?.status).toBe('completed');
+
+      // 4 tasks remain in queue
+      const afterFirstState = await harness.dispatchManager.getQueueState();
+      expect(afterFirstState.queued.length).toBe(4);
+
+      // Dispatch and complete remaining tasks one by one
+      // Task 1B (high priority among remaining)
+      const result2 = await harness.dispatchManager.dispatchNext();
+      expect(result2.success).toBe(true);
+      await harness.advanceTimers();
+      await harness.dispatchManager.completeTask(result2.taskId!);
+
+      // 3 tasks remain
+      const midState = await harness.dispatchManager.getQueueState();
+      expect(midState.queued.length).toBe(3);
+
+      // Continue dispatching remaining tasks
+      const result3 = await harness.dispatchManager.dispatchNext();
+      expect(result3.success).toBe(true);
+      await harness.advanceTimers();
+      await harness.dispatchManager.completeTask(result3.taskId!);
+
+      const result4 = await harness.dispatchManager.dispatchNext();
+      expect(result4.success).toBe(true);
+      await harness.advanceTimers();
+      await harness.dispatchManager.completeTask(result4.taskId!);
+
+      const result5 = await harness.dispatchManager.dispatchNext();
+      expect(result5.success).toBe(true);
+      await harness.advanceTimers();
+      await harness.dispatchManager.completeTask(result5.taskId!);
+
+      // All tasks completed
+      const finalState = await harness.dispatchManager.getQueueState();
+      expect(finalState.queued.length).toBe(0);
+
+      // Verify all 5 tasks completed in database
+      const allTasks = await Promise.all([
+        harness.taskRepository.findById(task1AId),
+        harness.taskRepository.findById(task1BId),
+        harness.taskRepository.findById(task2AId),
+        harness.taskRepository.findById(task3AId),
+        harness.taskRepository.findById(task4AId),
+      ]);
+      expect(allTasks.every((t) => t?.status === 'completed')).toBe(true);
+
+      // Verify event sequence: 5 task:dispatched, 5 task:completed
+      const dispatchedEvents = harness.getEventsByType('task:dispatched');
+      expect(dispatchedEvents.length).toBe(5);
+
+      const completedEvents = harness.getEventsByType('task:completed');
+      expect(completedEvents.length).toBe(5);
+    });
+
+    it('fixture dependencies are stored correctly in database', async () => {
+      const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
+
+      // Get task IDs
+      const task1AId = seeded.tasks.get('Task 1A')!;
+      const task1BId = seeded.tasks.get('Task 1B')!;
+      const task2AId = seeded.tasks.get('Task 2A')!;
+      const task3AId = seeded.tasks.get('Task 3A')!;
+      const task4AId = seeded.tasks.get('Task 4A')!;
+
+      // Query task_dependencies directly to verify fixture setup
+      const { taskDependencies } = await import('../../db/schema.js');
+      const { eq } = await import('drizzle-orm');
+
+      // Task 2A should depend on Task 1A
+      const task2ADeps = await harness.db
+        .select()
+        .from(taskDependencies)
+        .where(eq(taskDependencies.taskId, task2AId));
+      expect(task2ADeps.length).toBe(1);
+      expect(task2ADeps[0].dependsOnTaskId).toBe(task1AId);
+
+      // Task 3A should depend on Task 1B
+      const task3ADeps = await harness.db
+        .select()
+        .from(taskDependencies)
+        .where(eq(taskDependencies.taskId, task3AId));
+      expect(task3ADeps.length).toBe(1);
+      expect(task3ADeps[0].dependsOnTaskId).toBe(task1BId);
+
+      // Task 4A should depend on both Task 2A and Task 3A
+      const task4ADeps = await harness.db
+        .select()
+        .from(taskDependencies)
+        .where(eq(taskDependencies.taskId, task4AId));
+      expect(task4ADeps.length).toBe(2);
+      const depIds = task4ADeps.map((d) => d.dependsOnTaskId);
+      expect(depIds).toContain(task2AId);
+      expect(depIds).toContain(task3AId);
+
+      // Tasks 1A and 1B should have no dependencies
+      const task1ADeps = await harness.db
+        .select()
+        .from(taskDependencies)
+        .where(eq(taskDependencies.taskId, task1AId));
+      expect(task1ADeps.length).toBe(0);
+
+      const task1BDeps = await harness.db
+        .select()
+        .from(taskDependencies)
+        .where(eq(taskDependencies.taskId, task1BId));
+      expect(task1BDeps.length).toBe(0);
+    });
+  });
+});
--- a/apps/server/test/e2e/index.ts
+++ b/apps/server/test/e2e/index.ts
@@ -0,0 +1,12 @@
+/**
+ * E2E Tests for Dispatch/Coordination Flows
+ *
+ * Test files:
+ * - happy-path.test.ts: Normal operation scenarios
+ * - edge-cases.test.ts: Error handling and edge cases
+ *
+ * Uses TestHarness from src/test/ for system wiring.
+ */
+
+// No exports needed - tests are self-contained
+export {};
--- a/apps/server/test/e2e/phase-dispatch.test.ts
+++ b/apps/server/test/e2e/phase-dispatch.test.ts
@@ -0,0 +1,480 @@
+/**
+ * E2E Tests for Phase Parallel Execution
+ *
+ * Tests proving phase dispatch/coordination flow works end-to-end
+ * using the TestHarness with phaseDispatchManager.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { createTestHarness, type TestHarness } from '../index.js';
+import type {
+  PhaseQueuedEvent,
+  PhaseStartedEvent,
+  PhaseCompletedEvent,
+  PhaseBlockedEvent,
+} from '../../events/types.js';
+
+describe('Phase Parallel Execution', () => {
+  let harness: TestHarness;
+
+  beforeEach(() => {
+    harness = createTestHarness();
+  });
+
+  afterEach(() => {
+    harness.cleanup();
+  });
+
+  // ===========================================================================
+  // Test 1: Independent phases dispatch in parallel
+  // ===========================================================================
+
+  describe('Independent phases dispatch in parallel', () => {
+    it('dispatches multiple independent phases when no dependencies exist', async () => {
+      // Create initiative with 2 independent phases (no dependencies)
+      const initiative = await harness.initiativeRepository.create({
+        name: 'Independent Phases Test',
+        status: 'active',
+      });
+
+      const phaseA = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase A',
+        content: 'Independent phase A',
+        status: 'pending',
+      });
+
+      const phaseB = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase B',
+        content: 'Independent phase B',
+        status: 'pending',
+      });
+
+      // Approve phases before queuing
+      await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
+
+      // Queue both phases
+      await harness.phaseDispatchManager.queuePhase(phaseA.id);
+      await harness.phaseDispatchManager.queuePhase(phaseB.id);
+
+      // Verify phase:queued events
+      const queuedEvents = harness.getEventsByType('phase:queued');
+      expect(queuedEvents.length).toBe(2);
+
+      // Get queue state - both should be ready (no dependencies)
+      const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(queueState.queued.length).toBe(2);
+      expect(queueState.ready.length).toBe(2);
+      expect(queueState.blocked.length).toBe(0);
+
+      // Both phases should be dispatchable immediately
+      const readyPhaseIds = queueState.ready.map((p) => p.phaseId);
+      expect(readyPhaseIds).toContain(phaseA.id);
+      expect(readyPhaseIds).toContain(phaseB.id);
+
+      harness.clearEvents();
+
+      // Dispatch first phase
+      const result1 = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(result1.success).toBe(true);
+
+      // Dispatch second phase (parallel)
+      const result2 = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(result2.success).toBe(true);
+
+      // Verify both dispatched to different phases
+      expect(result1.phaseId).not.toBe(result2.phaseId);
+
+      // Verify phase:started events
+      const startedEvents = harness.getEventsByType('phase:started');
+      expect(startedEvents.length).toBe(2);
+
+      // Verify both phases are now in_progress
+      const updatedPhaseA = await harness.phaseRepository.findById(phaseA.id);
+      const updatedPhaseB = await harness.phaseRepository.findById(phaseB.id);
+      expect(updatedPhaseA?.status).toBe('in_progress');
+      expect(updatedPhaseB?.status).toBe('in_progress');
+    });
+  });
+
+  // ===========================================================================
+  // Test 2: Dependent phase waits for prerequisite
+  // ===========================================================================
+
+  describe('Dependent phase waits for prerequisite', () => {
+    it('only dispatches phase A first, then B after A completes', async () => {
+      // Create phases: A, B (depends on A)
+      const initiative = await harness.initiativeRepository.create({
+        name: 'Sequential Phases Test',
+        status: 'active',
+      });
+
+      const phaseA = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase A',
+        content: 'First phase',
+        status: 'pending',
+      });
+
+      const phaseB = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase B',
+        content: 'Second phase, depends on A',
+        status: 'pending',
+      });
+
+      // Approve phases before queuing
+      await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
+
+      // Create dependency: B depends on A
+      await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
+
+      // Queue both phases
+      await harness.phaseDispatchManager.queuePhase(phaseA.id);
+      await harness.phaseDispatchManager.queuePhase(phaseB.id);
+
+      // Check queue state - only A should be ready
+      const queueState1 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(queueState1.queued.length).toBe(2);
+      expect(queueState1.ready.length).toBe(1);
+      expect(queueState1.ready[0].phaseId).toBe(phaseA.id);
+
+      harness.clearEvents();
+
+      // Dispatch - should get phase A
+      const result1 = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(result1.success).toBe(true);
+      expect(result1.phaseId).toBe(phaseA.id);
+
+      // Try to dispatch again - should fail (B is blocked by A)
+      const result2 = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(result2.success).toBe(false);
+      expect(result2.reason).toBe('No dispatchable phases');
+
+      // Verify phase B still in queue but not ready
+      const queueState2 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(queueState2.queued.length).toBe(1);
+      expect(queueState2.ready.length).toBe(0);
+
+      // Complete phase A
+      await harness.phaseDispatchManager.completePhase(phaseA.id);
+
+      // Verify phase:completed event for A
+      const completedEvents = harness.getEventsByType('phase:completed');
+      expect(completedEvents.length).toBe(1);
+      expect((completedEvents[0] as PhaseCompletedEvent).payload.phaseId).toBe(phaseA.id);
+
+      // Now B should be ready
+      const queueState3 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(queueState3.ready.length).toBe(1);
+      expect(queueState3.ready[0].phaseId).toBe(phaseB.id);
+
+      harness.clearEvents();
+
+      // Dispatch - should get phase B
+      const result3 = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(result3.success).toBe(true);
+      expect(result3.phaseId).toBe(phaseB.id);
+
+      // Verify phase B is now in_progress
+      const updatedPhaseB = await harness.phaseRepository.findById(phaseB.id);
+      expect(updatedPhaseB?.status).toBe('in_progress');
+    });
+  });
+
+  // ===========================================================================
+  // Test 3: Diamond dependency pattern
+  // ===========================================================================
+
+  describe('Diamond dependency pattern', () => {
+    it('handles diamond: A -> B,C -> D correctly', async () => {
+      // Create phases: A, B (depends on A), C (depends on A), D (depends on B, C)
+      const initiative = await harness.initiativeRepository.create({
+        name: 'Diamond Pattern Test',
+        status: 'active',
+      });
+
+      const phaseA = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase A',
+        content: 'Root phase',
+        status: 'pending',
+      });
+
+      const phaseB = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase B',
+        content: 'Depends on A',
+        status: 'pending',
+      });
+
+      const phaseC = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase C',
+        content: 'Depends on A',
+        status: 'pending',
+      });
+
+      const phaseD = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase D',
+        content: 'Depends on B and C',
+        status: 'pending',
+      });
+
+      // Approve all phases before queuing
+      await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseC.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseD.id, { status: 'approved' as const });
+
+      // Create dependencies
+      await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
+      await harness.phaseRepository.createDependency(phaseC.id, phaseA.id);
+      await harness.phaseRepository.createDependency(phaseD.id, phaseB.id);
+      await harness.phaseRepository.createDependency(phaseD.id, phaseC.id);
+
+      // Queue all phases
+      await harness.phaseDispatchManager.queuePhase(phaseA.id);
+      await harness.phaseDispatchManager.queuePhase(phaseB.id);
+      await harness.phaseDispatchManager.queuePhase(phaseC.id);
+      await harness.phaseDispatchManager.queuePhase(phaseD.id);
+
+      // Step 1: Only A should be ready
+      const state1 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(state1.queued.length).toBe(4);
+      expect(state1.ready.length).toBe(1);
+      expect(state1.ready[0].phaseId).toBe(phaseA.id);
+
+      // Dispatch A
+      const resultA = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(resultA.success).toBe(true);
+      expect(resultA.phaseId).toBe(phaseA.id);
+
+      // Step 2: After A completes, B and C should be ready (parallel)
+      await harness.phaseDispatchManager.completePhase(phaseA.id);
+
+      const state2 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(state2.queued.length).toBe(3); // B, C, D still queued
+      expect(state2.ready.length).toBe(2); // B and C ready
+
+      const readyIds = state2.ready.map((p) => p.phaseId);
+      expect(readyIds).toContain(phaseB.id);
+      expect(readyIds).toContain(phaseC.id);
+      expect(readyIds).not.toContain(phaseD.id);
+
+      // Dispatch B and C in parallel
+      const resultB = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(resultB.success).toBe(true);
+
+      const resultC = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(resultC.success).toBe(true);
+
+      // Verify D is still not ready (needs both B and C complete)
+      const state3 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(state3.ready.length).toBe(0);
+      expect(state3.queued.length).toBe(1);
+      expect(state3.queued[0].phaseId).toBe(phaseD.id);
+
+      // Step 3: Complete B only - D still not ready
+      await harness.phaseDispatchManager.completePhase(resultB.phaseId);
+
+      const state4 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(state4.ready.length).toBe(0); // D still blocked by C
+
+      // Step 4: Complete C - now D should be ready
+      await harness.phaseDispatchManager.completePhase(resultC.phaseId);
+
+      const state5 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(state5.ready.length).toBe(1);
+      expect(state5.ready[0].phaseId).toBe(phaseD.id);
+
+      // Dispatch D
+      const resultD = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(resultD.success).toBe(true);
+      expect(resultD.phaseId).toBe(phaseD.id);
+
+      // Verify D is now in_progress
+      const updatedPhaseD = await harness.phaseRepository.findById(phaseD.id);
+      expect(updatedPhaseD?.status).toBe('in_progress');
+    });
+  });
+
+  // ===========================================================================
+  // Test 4: Approval gate rejects non-approved phases
+  // ===========================================================================
+
+  describe('Approval gate rejects non-approved phases', () => {
+    it('rejects queuePhase for pending phase', async () => {
+      const initiative = await harness.initiativeRepository.create({
+        name: 'Approval Gate Test',
+        status: 'active',
+      });
+
+      const phase = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Unapproved Phase',
+        status: 'pending',
+      });
+
+      await expect(
+        harness.phaseDispatchManager.queuePhase(phase.id)
+      ).rejects.toThrow('must be approved before queuing');
+    });
+
+    it('rejects queuePhase for in_progress phase', async () => {
+      const initiative = await harness.initiativeRepository.create({
+        name: 'Approval Gate Test 2',
+        status: 'active',
+      });
+
+      const phase = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'In Progress Phase',
+        status: 'in_progress',
+      });
+
+      await expect(
+        harness.phaseDispatchManager.queuePhase(phase.id)
+      ).rejects.toThrow('must be approved before queuing');
+    });
+  });
+
+  // ===========================================================================
+  // Test 5: Blocked phase doesn't dispatch
+  // ===========================================================================
+
+  describe('Blocked phase does not dispatch', () => {
+    it('prevents dispatch of blocked phase even if dependencies complete', async () => {
+      // Create phases: A, B (depends on A)
+      const initiative = await harness.initiativeRepository.create({
+        name: 'Blocked Phase Test',
+        status: 'active',
+      });
+
+      const phaseA = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase A',
+        content: 'First phase that will be blocked',
+        status: 'pending',
+      });
+
+      const phaseB = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase B',
+        content: 'Second phase, depends on A',
+        status: 'pending',
+      });
+
+      // Approve phases before queuing
+      await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
+
+      // Create dependency: B depends on A
+      await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
+
+      // Queue phase A
+      await harness.phaseDispatchManager.queuePhase(phaseA.id);
+
+      // Block phase A
+      await harness.phaseDispatchManager.blockPhase(phaseA.id, 'External dependency unavailable');
+
+      // Verify phase:blocked event
+      const blockedEvents = harness.getEventsByType('phase:blocked');
+      expect(blockedEvents.length).toBe(1);
+      expect((blockedEvents[0] as PhaseBlockedEvent).payload.phaseId).toBe(phaseA.id);
+      expect((blockedEvents[0] as PhaseBlockedEvent).payload.reason).toBe(
+        'External dependency unavailable'
+      );
+
+      // Try to dispatch - should fail
+      const result = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(result.success).toBe(false);
+      expect(result.reason).toBe('No dispatchable phases');
+
+      // Verify queue state shows A as blocked
+      const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(queueState.blocked.length).toBe(1);
+      expect(queueState.blocked[0].phaseId).toBe(phaseA.id);
+      expect(queueState.blocked[0].reason).toBe('External dependency unavailable');
+
+      // Queue phase B
+      await harness.phaseDispatchManager.queuePhase(phaseB.id);
+
+      // B should never become ready because A is blocked (not completed)
+      const queueState2 = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(queueState2.ready.length).toBe(0);
+      expect(queueState2.queued.length).toBe(1); // Only B is queued (A is blocked, not queued)
+      expect(queueState2.queued[0].phaseId).toBe(phaseB.id);
+
+      // Try to dispatch B - should fail
+      const resultB = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(resultB.success).toBe(false);
+      expect(resultB.reason).toBe('No dispatchable phases');
+
+      // Verify phase A status is blocked in database
+      const updatedPhaseA = await harness.phaseRepository.findById(phaseA.id);
+      expect(updatedPhaseA?.status).toBe('blocked');
+    });
+
+    it('blocked phase prevents all downstream phases from dispatching', async () => {
+      // Create chain: A -> B -> C, then block A
+      const initiative = await harness.initiativeRepository.create({
+        name: 'Chain Block Test',
+        status: 'active',
+      });
+
+      const phaseA = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase A',
+        content: 'Root phase',
+        status: 'pending',
+      });
+
+      const phaseB = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase B',
+        content: 'Depends on A',
+        status: 'pending',
+      });
+
+      const phaseC = await harness.phaseRepository.create({
+        initiativeId: initiative.id,
+        name: 'Phase C',
+        content: 'Depends on B',
+        status: 'pending',
+      });
+
+      // Approve all phases before queuing
+      await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
+      await harness.phaseRepository.update(phaseC.id, { status: 'approved' as const });
+
+      // Create dependency chain: A -> B -> C
+      await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
+      await harness.phaseRepository.createDependency(phaseC.id, phaseB.id);
+
+      // Queue all phases
+      await harness.phaseDispatchManager.queuePhase(phaseA.id);
+      await harness.phaseDispatchManager.queuePhase(phaseB.id);
+      await harness.phaseDispatchManager.queuePhase(phaseC.id);
+
+      // Block phase A
+      await harness.phaseDispatchManager.blockPhase(phaseA.id, 'Resource unavailable');
+
+      // Verify only B and C are in queue (A is blocked)
+      const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
+      expect(queueState.queued.length).toBe(2);
+      expect(queueState.ready.length).toBe(0); // Neither B nor C can dispatch
+      expect(queueState.blocked.length).toBe(1);
+
+      // Try to dispatch any phase - should fail for all
+      const result = await harness.phaseDispatchManager.dispatchNextPhase();
+      expect(result.success).toBe(false);
+      expect(result.reason).toBe('No dispatchable phases');
+    });
+  });
+});
--- a/apps/server/test/e2e/recovery-scenarios.test.ts
+++ b/apps/server/test/e2e/recovery-scenarios.test.ts
@@ -0,0 +1,490 @@
+/**
+ * E2E Tests for Recovery and Extended Scenarios
+ *
+ * Tests recovery/resume after interruption scenarios:
+ * - Queue state survives harness recreation (DB is source of truth)
+ * - In-progress task recoverable after agent crash
+ * - Blocked task state persists and can be unblocked
+ * - Merge queue state recoverable
+ *
+ * Tests extended agent Q&A scenarios:
+ * - Multiple questions in sequence
+ * - Question surfaces in message queue
+ * - Agent resumes with answer in context
+ * - Waiting agent blocks task completion
+ *
+ * Uses TestHarness from src/test/ for full system wiring.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import {
+  createTestHarness,
+  SIMPLE_FIXTURE,
+  type TestHarness,
+} from '../index.js';
+import type {
+  AgentWaitingEvent,
+  AgentResumedEvent,
+  AgentStoppedEvent,
+} from '../../events/types.js';
+
+describe('E2E Recovery Scenarios', () => {
+  describe('Recovery after interruption', () => {
+    let harness: TestHarness;
+
+    beforeEach(() => {
+      harness = createTestHarness();
+    });
+
+    afterEach(() => {
+      harness.cleanup();
+      vi.useRealTimers();
+    });
+
+    it('queue state survives in database (source of truth)', async () => {
+      // Seed fixture, queue tasks
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Queue task
+      await harness.dispatchManager.queue(taskAId);
+
+      // Verify queue state shows task (queued, not pending)
+      const queueState1 = await harness.dispatchManager.getQueueState();
+      expect(queueState1.queued.length).toBe(1);
+      expect(queueState1.queued[0].taskId).toBe(taskAId);
+
+      // The queue state is in memory, but task status is in DB.
+      // Verify task status in database directly
+      const task = await harness.taskRepository.findById(taskAId);
+      expect(task?.status).toBe('pending');
+
+      // Verify: even after clearing in-memory queue state,
+      // we can still find pending tasks from database
+      const allTasks = await harness.taskRepository.findByParentTaskId(
+        seeded.taskGroups.get('Task Group 1')!
+      );
+      const pendingTasks = allTasks.filter((t) => t.status === 'pending');
+
+      // Task A is pending (not queued, but status is pending)
+      // Task B and C are also pending but depend on Task A
+      expect(pendingTasks.length).toBeGreaterThanOrEqual(1);
+    });
+
+    it('in-progress task recoverable after agent crash', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set crash scenario
+      harness.setAgentError(`agent-${taskAId.slice(0, 6)}`, 'Token limit exceeded');
+
+      // Queue and dispatch
+      await harness.dispatchManager.queue(taskAId);
+      await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify task status is 'in_progress' (not completed, not lost)
+      let task = await harness.taskRepository.findById(taskAId);
+      expect(task?.status).toBe('in_progress');
+
+      // Task can be re-queued and dispatched to a new agent
+      // First, clear agent manager and create new pool agent
+      harness.agentManager.clear();
+      await harness.agentManager.spawn({
+        name: 'new-pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Re-queue the task (it's still in_progress but we can retry)
+      await harness.dispatchManager.queue(taskAId);
+
+      // Set success scenario for the new agent
+      harness.setAgentDone(`agent-${taskAId.slice(0, 6)}`, 'Task completed after retry');
+
+      // Clear events and dispatch again
+      harness.clearEvents();
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify: agent completed successfully
+      expect(dispatchResult.agentId).toBeDefined();
+      const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
+      expect(agentResult?.success).toBe(true);
+    });
+
+    it('blocked task state persists in database', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Queue task and block it
+      await harness.dispatchManager.queue(taskAId);
+      await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
+
+      // Verify task in blocked state in DB
+      const task = await harness.taskRepository.findById(taskAId);
+      expect(task?.status).toBe('blocked');
+
+      // Query blocked tasks from queue state
+      const queueState = await harness.dispatchManager.getQueueState();
+      expect(queueState.blocked.length).toBe(1);
+      expect(queueState.blocked[0].taskId).toBe(taskAId);
+      expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
+
+      // Re-queue task to unblock (set status back to pending via repository)
+      await harness.taskRepository.update(taskAId, { status: 'pending' });
+      await harness.dispatchManager.queue(taskAId);
+
+      // Verify: task now in pending state in database
+      const unblocked = await harness.taskRepository.findById(taskAId);
+      expect(unblocked?.status).toBe('pending');
+
+      // Task should be in queued list
+      const queueState2 = await harness.dispatchManager.getQueueState();
+      expect(queueState2.queued.some((t) => t.taskId === taskAId)).toBe(true);
+    });
+
+    it('merge queue state recoverable', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Mark task as completed (required for merge)
+      await harness.taskRepository.update(taskAId, { status: 'completed' });
+
+      // Create worktree for task
+      const worktreeId = `wt-${taskAId.slice(0, 6)}`;
+      await harness.worktreeManager.create(worktreeId, 'feature-task-a');
+
+      // Create agent in agentRepository (required for merge lookup)
+      await harness.agentRepository.create({
+        name: `agent-${taskAId.slice(0, 6)}`,
+        worktreeId,
+        taskId: taskAId,
+        status: 'idle',
+      });
+
+      // Queue for merge
+      await harness.coordinationManager.queueMerge(taskAId);
+
+      // Verify merge queue has queued item
+      const queueState1 = await harness.coordinationManager.getQueueState();
+      expect(queueState1.queued.some((item) => item.taskId === taskAId)).toBe(true);
+
+      // Process merge
+      const results = await harness.coordinationManager.processMerges('main');
+
+      // Verify: merge completed correctly
+      expect(results.length).toBe(1);
+      expect(results[0].taskId).toBe(taskAId);
+      expect(results[0].success).toBe(true);
+
+      // Verify: task in merged list
+      const queueState2 = await harness.coordinationManager.getQueueState();
+      expect(queueState2.merged.includes(taskAId)).toBe(true);
+    });
+  });
+
+  describe('Agent Q&A extended scenarios', () => {
+    let harness: TestHarness;
+
+    beforeEach(() => {
+      harness = createTestHarness();
+    });
+
+    afterEach(() => {
+      harness.cleanup();
+      vi.useRealTimers();
+    });
+
+    it('question enters waiting state and completes after resume', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set questions scenario with options
+      harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
+        {
+          id: 'q1',
+          question: 'Which database should I use?',
+          options: [
+            { label: 'PostgreSQL', description: 'Relational, ACID compliant' },
+            { label: 'SQLite', description: 'Lightweight, file-based' },
+          ],
+        },
+      ]);
+
+      // Queue and dispatch
+      await harness.dispatchManager.queue(taskAId);
+      harness.clearEvents();
+
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify: agent:waiting event emitted
+      const waitingEvents = harness.getEventsByType('agent:waiting');
+      expect(waitingEvents.length).toBe(1);
+      const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
+      expect(waitingPayload.taskId).toBe(taskAId);
+      expect(waitingPayload.questions[0].question).toBe('Which database should I use?');
+
+      // Clear and resume with answers map
+      harness.clearEvents();
+      await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'PostgreSQL' });
+      await harness.advanceTimers();
+
+      // Verify: resumed and stopped events
+      const resumedEvents = harness.getEventsByType('agent:resumed');
+      expect(resumedEvents.length).toBe(1);
+      const resumedPayload = (resumedEvents[0] as AgentResumedEvent).payload;
+      expect(resumedPayload.taskId).toBe(taskAId);
+
+      const stoppedEvents = harness.getEventsByType('agent:stopped');
+      expect(stoppedEvents.length).toBe(1);
+      const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
+      expect(stoppedPayload.reason).toBe('task_complete');
+    });
+
+    it('questions surface as structured PendingQuestions', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set questions scenario with options
+      harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
+        {
+          id: 'q1',
+          question: 'Select your framework',
+          options: [
+            { label: 'React' },
+            { label: 'Vue' },
+            { label: 'Svelte' },
+          ],
+        },
+      ]);
+
+      // Queue and dispatch
+      await harness.dispatchManager.queue(taskAId);
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify: agent:waiting event has questions
+      const waitingEvents = harness.getEventsByType('agent:waiting');
+      expect(waitingEvents.length).toBe(1);
+      const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
+      expect(waitingPayload.questions[0].question).toBe('Select your framework');
+      expect(waitingPayload.questions[0].options).toEqual([
+        { label: 'React' },
+        { label: 'Vue' },
+        { label: 'Svelte' },
+      ]);
+
+      // Verify: getPendingQuestions returns structured data
+      const pendingQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
+      expect(pendingQuestions).not.toBeNull();
+      expect(pendingQuestions?.questions[0].question).toBe('Select your framework');
+      expect(pendingQuestions?.questions[0].options).toEqual([
+        { label: 'React' },
+        { label: 'Vue' },
+        { label: 'Svelte' },
+      ]);
+    });
+
+    it('agent resumes with answer and completes successfully', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set questions scenario
+      harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
+        { id: 'q1', question: 'Choose database type' },
+      ]);
+
+      // Queue and dispatch
+      await harness.dispatchManager.queue(taskAId);
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify agent is waiting
+      const agent = await harness.agentManager.get(dispatchResult.agentId!);
+      expect(agent?.status).toBe('waiting_for_input');
+
+      // Resume with answers map
+      await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'PostgreSQL' });
+      await harness.advanceTimers();
+
+      // Verify: agent completed successfully
+      const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
+      expect(agentResult).not.toBeNull();
+      expect(agentResult?.success).toBe(true);
+      expect(agentResult?.message).toBe('Resumed and completed successfully');
+
+      // Verify: agent status is now idle
+      const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
+      expect(finalAgent?.status).toBe('idle');
+    });
+
+    it('waiting agent status transitions correctly through full cycle', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Set questions scenario
+      harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
+        { id: 'q1', question: 'API key format?' },
+      ]);
+
+      // Queue and dispatch
+      await harness.dispatchManager.queue(taskAId);
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+
+      // Phase 1: Initially running
+      let agent = await harness.agentManager.get(dispatchResult.agentId!);
+      expect(agent?.status).toBe('running');
+
+      await harness.advanceTimers();
+
+      // Phase 2: After scenario completes, waiting_for_input
+      agent = await harness.agentManager.get(dispatchResult.agentId!);
+      expect(agent?.status).toBe('waiting_for_input');
+
+      // Verify pending questions exist
+      const pendingQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
+      expect(pendingQuestions?.questions[0].question).toBe('API key format?');
+
+      // Phase 3: Resume with answers map
+      await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'Bearer token' });
+
+      // After resume: running again briefly
+      agent = await harness.agentManager.get(dispatchResult.agentId!);
+      expect(agent?.status).toBe('running');
+
+      await harness.advanceTimers();
+
+      // Phase 4: After completion, idle
+      agent = await harness.agentManager.get(dispatchResult.agentId!);
+      expect(agent?.status).toBe('idle');
+
+      // Verify pending questions is cleared after resume
+      const clearedQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
+      expect(clearedQuestions).toBeNull();
+    });
+
+    it('should handle agent asking multiple questions at once', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed required idle agent
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+
+      // Setup: agent asks two questions
+      harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
+        {
+          id: 'q1',
+          question: 'Which database?',
+          options: [{ label: 'SQLite' }, { label: 'Postgres' }],
+        },
+        {
+          id: 'q2',
+          question: 'Include tests?',
+          options: [{ label: 'Yes' }, { label: 'No' }],
+        },
+      ]);
+
+      // Queue and dispatch task
+      await harness.dispatchManager.queue(taskAId);
+      harness.clearEvents();
+
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      await harness.advanceTimers();
+
+      // Verify: agent:waiting event emitted
+      const waitingEvents = harness.getEventsByType('agent:waiting');
+      expect(waitingEvents.length).toBe(1);
+      const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
+      expect(waitingPayload.taskId).toBe(taskAId);
+
+      // Verify both questions present
+      const pending = await harness.getPendingQuestions(dispatchResult.agentId!);
+      expect(pending?.questions).toHaveLength(2);
+      expect(pending?.questions[0].id).toBe('q1');
+      expect(pending?.questions[0].question).toBe('Which database?');
+      expect(pending?.questions[1].id).toBe('q2');
+      expect(pending?.questions[1].question).toBe('Include tests?');
+
+      // Resume with answers for both questions
+      harness.clearEvents();
+      await harness.agentManager.resume(dispatchResult.agentId!, {
+        q1: 'SQLite',
+        q2: 'Yes',
+      });
+      await harness.advanceTimers();
+
+      // Verify: agent:resumed event emitted
+      const resumedEvents = harness.getEventsByType('agent:resumed');
+      expect(resumedEvents.length).toBe(1);
+
+      // Verify: agent:stopped event emitted (after resume completes)
+      const stoppedEvents = harness.getEventsByType('agent:stopped');
+      expect(stoppedEvents.length).toBe(1);
+      const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
+      expect(stoppedPayload.taskId).toBe(taskAId);
+      expect(stoppedPayload.reason).toBe('task_complete');
+
+      // Verify task completed (agent result)
+      const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
+      expect(agentResult?.success).toBe(true);
+
+      // Verify agent is now idle
+      const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
+      expect(finalAgent?.status).toBe('idle');
+    });
+  });
+});
--- a/apps/server/test/fixtures.ts
+++ b/apps/server/test/fixtures.ts
@@ -0,0 +1,316 @@
+/**
+ * Test Fixtures for E2E Testing
+ *
+ * Provides fixture helpers that seed complete task hierarchies
+ * for integration and E2E tests.
+ */
+
+import { nanoid } from 'nanoid';
+import type { DrizzleDatabase } from '../db/index.js';
+import {
+  DrizzleInitiativeRepository,
+  DrizzlePhaseRepository,
+  DrizzleTaskRepository,
+} from '../db/repositories/drizzle/index.js';
+import { taskDependencies } from '../db/schema.js';
+
+// =============================================================================
+// Fixture Interfaces
+// =============================================================================
+
+/**
+ * Task fixture definition.
+ */
+export interface TaskFixture {
+  /** Unique identifier for this task (used for dependency references) */
+  id: string;
+  /** Task name */
+  name: string;
+  /** Task priority */
+  priority?: 'low' | 'medium' | 'high';
+  /** Task category */
+  category?: 'execute' | 'research' | 'discuss' | 'plan' | 'detail' | 'refine' | 'verify' | 'merge' | 'review';
+  /** Names of other tasks in same fixture this task depends on */
+  dependsOn?: string[];
+}
+
+/**
+ * Task group fixture definition (replaces Plan).
+ * Tasks are grouped by parent task in the new model.
+ */
+export interface TaskGroupFixture {
+  /** Group name (becomes a detail task) */
+  name: string;
+  /** Tasks in this group */
+  tasks: TaskFixture[];
+}
+
+/**
+ * Phase fixture definition.
+ */
+export interface PhaseFixture {
+  /** Phase name */
+  name: string;
+  /** Task groups in this phase (each group becomes a parent detail task) */
+  taskGroups: TaskGroupFixture[];
+}
+
+/**
+ * Initiative fixture definition (top-level).
+ */
+export interface InitiativeFixture {
+  /** Initiative name */
+  name: string;
+  /** Phases in this initiative */
+  phases: PhaseFixture[];
+}
+
+/**
+ * Result of seeding a fixture.
+ * Maps names to IDs for all created entities.
+ */
+export interface SeededFixture {
+  /** ID of the created initiative */
+  initiativeId: string;
+  /** Map of phase names to IDs */
+  phases: Map<string, string>;
+  /** Map of task group names to parent task IDs */
+  taskGroups: Map<string, string>;
+  /** Map of task names to IDs */
+  tasks: Map<string, string>;
+}
+
+// =============================================================================
+// Seed Function
+// =============================================================================
+
+/**
+ * Seed a complete task hierarchy from a fixture definition.
+ *
+ * Creates initiative, phases, detail tasks (as parent), and child tasks.
+ * Resolves task dependencies by name to actual task IDs.
+ *
+ * @param db - Drizzle database instance
+ * @param fixture - The fixture definition to seed
+ * @returns SeededFixture with all created entity IDs
+ */
+export async function seedFixture(
+  db: DrizzleDatabase,
+  fixture: InitiativeFixture
+): Promise<SeededFixture> {
+  // Create repositories
+  const initiativeRepo = new DrizzleInitiativeRepository(db);
+  const phaseRepo = new DrizzlePhaseRepository(db);
+  const taskRepo = new DrizzleTaskRepository(db);
+
+  // Result maps
+  const phasesMap = new Map<string, string>();
+  const taskGroupsMap = new Map<string, string>();
+  const tasksMap = new Map<string, string>();
+
+  // Collect all task dependencies to resolve after creation
+  const pendingDependencies: Array<{ taskId: string; dependsOnNames: string[] }> = [];
+
+  // Create initiative
+  const initiative = await initiativeRepo.create({
+    name: fixture.name,
+    status: 'active',
+  });
+
+  // Create phases
+  for (const phaseFixture of fixture.phases) {
+    const phase = await phaseRepo.create({
+      initiativeId: initiative.id,
+      name: phaseFixture.name,
+      status: 'pending',
+    });
+    phasesMap.set(phaseFixture.name, phase.id);
+
+    // Create task groups as parent detail tasks
+    let taskOrder = 0;
+    for (const groupFixture of phaseFixture.taskGroups) {
+      // Create parent detail task
+      const parentTask = await taskRepo.create({
+        phaseId: phase.id,
+        initiativeId: initiative.id,
+        name: groupFixture.name,
+        description: `Test task group: ${groupFixture.name}`,
+        category: 'detail',
+        type: 'auto',
+        priority: 'medium',
+        status: 'completed', // Detail tasks are completed once child tasks are created
+        order: taskOrder++,
+      });
+      taskGroupsMap.set(groupFixture.name, parentTask.id);
+
+      // Create child tasks linked to parent
+      let childOrder = 0;
+      for (const taskFixture of groupFixture.tasks) {
+        const task = await taskRepo.create({
+          parentTaskId: parentTask.id,
+          phaseId: phase.id,
+          initiativeId: initiative.id,
+          name: taskFixture.name,
+          description: `Test task: ${taskFixture.name}`,
+          category: taskFixture.category ?? 'execute',
+          type: 'auto',
+          priority: taskFixture.priority ?? 'medium',
+          status: 'pending',
+          order: childOrder++,
+        });
+        tasksMap.set(taskFixture.id, task.id);
+
+        // Collect dependencies to resolve later
+        if (taskFixture.dependsOn && taskFixture.dependsOn.length > 0) {
+          pendingDependencies.push({
+            taskId: task.id,
+            dependsOnNames: taskFixture.dependsOn,
+          });
+        }
+      }
+    }
+  }
+
+  // Resolve and insert task dependencies
+  for (const { taskId, dependsOnNames } of pendingDependencies) {
+    for (const depName of dependsOnNames) {
+      const dependsOnTaskId = tasksMap.get(depName);
+      if (!dependsOnTaskId) {
+        throw new Error(
+          `Dependency resolution failed: task "${depName}" not found in fixture`
+        );
+      }
+
+      // Insert into task_dependencies table
+      await db.insert(taskDependencies).values({
+        id: nanoid(),
+        taskId,
+        dependsOnTaskId,
+        createdAt: new Date(),
+      });
+    }
+  }
+
+  return {
+    initiativeId: initiative.id,
+    phases: phasesMap,
+    taskGroups: taskGroupsMap,
+    tasks: tasksMap,
+  };
+}
+
+// =============================================================================
+// Convenience Fixtures
+// =============================================================================
+
+/**
+ * Simple fixture: 1 initiative -> 1 phase -> 1 task group -> 3 tasks.
+ *
+ * Task dependency structure:
+ * - Task A: no dependencies
+ * - Task B: depends on Task A
+ * - Task C: depends on Task A
+ */
+export const SIMPLE_FIXTURE: InitiativeFixture = {
+  name: 'Simple Test Initiative',
+  phases: [
+    {
+      name: 'Phase 1',
+      taskGroups: [
+        {
+          name: 'Task Group 1',
+          tasks: [
+            { id: 'Task A', name: 'Task A', priority: 'high' },
+            { id: 'Task B', name: 'Task B', priority: 'medium', dependsOn: ['Task A'] },
+            { id: 'Task C', name: 'Task C', priority: 'medium', dependsOn: ['Task A'] },
+          ],
+        },
+      ],
+    },
+  ],
+};
+
+/**
+ * Parallel fixture: 1 initiative -> 1 phase -> 2 task groups (each with 2 independent tasks).
+ *
+ * Task structure:
+ * - Group A: Task X, Task Y (independent)
+ * - Group B: Task P, Task Q (independent)
+ */
+export const PARALLEL_FIXTURE: InitiativeFixture = {
+  name: 'Parallel Test Initiative',
+  phases: [
+    {
+      name: 'Parallel Phase',
+      taskGroups: [
+        {
+          name: 'Group A',
+          tasks: [
+            { id: 'Task X', name: 'Task X', priority: 'high' },
+            { id: 'Task Y', name: 'Task Y', priority: 'medium' },
+          ],
+        },
+        {
+          name: 'Group B',
+          tasks: [
+            { id: 'Task P', name: 'Task P', priority: 'high' },
+            { id: 'Task Q', name: 'Task Q', priority: 'low' },
+          ],
+        },
+      ],
+    },
+  ],
+};
+
+/**
+ * Complex fixture: 1 initiative -> 2 phases -> 4 task groups with cross-group dependencies.
+ *
+ * Structure:
+ * - Phase 1: Group 1 (Task 1A, 1B), Group 2 (Task 2A depends on 1A)
+ * - Phase 2: Group 3 (Task 3A depends on 1B), Group 4 (Task 4A depends on 2A and 3A)
+ */
+export const COMPLEX_FIXTURE: InitiativeFixture = {
+  name: 'Complex Test Initiative',
+  phases: [
+    {
+      name: 'Phase 1',
+      taskGroups: [
+        {
+          name: 'Group 1',
+          tasks: [
+            { id: 'Task 1A', name: 'Task 1A', priority: 'high' },
+            { id: 'Task 1B', name: 'Task 1B', priority: 'medium' },
+          ],
+        },
+        {
+          name: 'Group 2',
+          tasks: [
+            { id: 'Task 2A', name: 'Task 2A', priority: 'high', dependsOn: ['Task 1A'] },
+          ],
+        },
+      ],
+    },
+    {
+      name: 'Phase 2',
+      taskGroups: [
+        {
+          name: 'Group 3',
+          tasks: [
+            { id: 'Task 3A', name: 'Task 3A', priority: 'high', dependsOn: ['Task 1B'] },
+          ],
+        },
+        {
+          name: 'Group 4',
+          tasks: [
+            {
+              id: 'Task 4A',
+              name: 'Task 4A',
+              priority: 'high',
+              dependsOn: ['Task 2A', 'Task 3A'],
+            },
+          ],
+        },
+      ],
+    },
+  ],
+};
--- a/apps/server/test/fixtures/todo-api/README.md
+++ b/apps/server/test/fixtures/todo-api/README.md
@@ -0,0 +1,35 @@
+# todo-api
+
+A minimal zero-dependency in-memory todo list library for Node.js.
+
+## API
+
+```js
+import { TodoStore } from './src/todo.js';
+
+const store = new TodoStore();
+
+const id = store.add('buy milk');   // returns numeric id
+store.list();                        // returns [{ id, text, done }]
+store.remove(id);                    // deletes item
+store.complete(id);                  // NOT IMPLEMENTED — marks item done
+```
+
+## Status
+
+The `complete(id)` method is **missing**. The test suite in `src/todo.test.js` covers it and currently fails:
+
+```
+node --test src/todo.test.js
+# → TypeError: store.complete is not a function
+```
+
+## Task
+
+Implement `complete(id)` on `TodoStore` in `src/todo.js` so that it:
+
+1. Finds the item with the given `id`.
+2. Sets `item.done = true`.
+3. Does not throw if `id` is not found (silent no-op).
+
+All five tests in `src/todo.test.js` should pass after the fix.
--- a/apps/server/test/fixtures/todo-api/package.json
+++ b/apps/server/test/fixtures/todo-api/package.json
@@ -0,0 +1,8 @@
+{
+  "name": "todo-api",
+  "version": "1.0.0",
+  "type": "module",
+  "scripts": {
+    "test": "node --test src/todo.test.js"
+  }
+}
--- a/apps/server/test/fixtures/todo-api/src/todo.js
+++ b/apps/server/test/fixtures/todo-api/src/todo.js
@@ -0,0 +1,19 @@
+export class TodoStore {
+  #items = [];
+
+  add(text) {
+    const id = Date.now();
+    this.#items.push({ id, text, done: false });
+    return id;
+  }
+
+  list() {
+    return [...this.#items];
+  }
+
+  remove(id) {
+    this.#items = this.#items.filter(i => i.id !== id);
+  }
+
+  // complete(id) deliberately missing — implement me!
+}
--- a/apps/server/test/fixtures/todo-api/src/todo.test.js
+++ b/apps/server/test/fixtures/todo-api/src/todo.test.js
@@ -0,0 +1,41 @@
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { TodoStore } from './todo.js';
+
+test('add returns an id', () => {
+  const store = new TodoStore();
+  const id = store.add('buy milk');
+  assert.ok(typeof id === 'number', 'id should be a number');
+});
+
+test('list returns all items', () => {
+  const store = new TodoStore();
+  store.add('task one');
+  store.add('task two');
+  assert.equal(store.list().length, 2);
+});
+
+test('remove deletes an item', () => {
+  const store = new TodoStore();
+  const id = store.add('delete me');
+  store.remove(id);
+  assert.equal(store.list().length, 0);
+});
+
+test('complete marks item done', () => {
+  const store = new TodoStore();
+  const id = store.add('buy milk');
+  store.complete(id);
+  const item = store.list().find(i => i.id === id);
+  assert.ok(item, 'item should still exist after completing');
+  assert.equal(item.done, true, 'item.done should be true after complete()');
+});
+
+test('complete does not affect other items', () => {
+  const store = new TodoStore();
+  const id1 = store.add('task one');
+  const id2 = store.add('task two');
+  store.complete(id1);
+  const item2 = store.list().find(i => i.id === id2);
+  assert.equal(item2.done, false, 'other items should remain undone');
+});
--- a/apps/server/test/harness.test.ts
+++ b/apps/server/test/harness.test.ts
@@ -0,0 +1,394 @@
+/**
+ * Tests for Test Harness
+ *
+ * Proves that the test harness enables E2E testing scenarios.
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import {
+  createTestHarness,
+  SIMPLE_FIXTURE,
+  PARALLEL_FIXTURE,
+  COMPLEX_FIXTURE,
+  type TestHarness,
+} from './index.js';
+import { taskDependencies } from '../db/schema.js';
+import { eq } from 'drizzle-orm';
+
+describe('TestHarness', () => {
+  let harness: TestHarness;
+
+  beforeEach(() => {
+    harness = createTestHarness();
+  });
+
+  afterEach(() => {
+    harness.cleanup();
+    vi.useRealTimers();
+  });
+
+  describe('createTestHarness', () => {
+    it('returns all components', () => {
+      expect(harness.db).toBeDefined();
+      expect(harness.eventBus).toBeDefined();
+      expect(harness.agentManager).toBeDefined();
+      expect(harness.worktreeManager).toBeDefined();
+      expect(harness.dispatchManager).toBeDefined();
+      expect(harness.coordinationManager).toBeDefined();
+      expect(harness.taskRepository).toBeDefined();
+      expect(harness.messageRepository).toBeDefined();
+      expect(harness.agentRepository).toBeDefined();
+    });
+
+    it('provides helper methods', () => {
+      expect(typeof harness.seedFixture).toBe('function');
+      expect(typeof harness.setAgentScenario).toBe('function');
+      expect(typeof harness.setAgentQuestion).toBe('function');
+      expect(typeof harness.setAgentQuestions).toBe('function');
+      expect(typeof harness.getEventsByType).toBe('function');
+      expect(typeof harness.clearEvents).toBe('function');
+      expect(typeof harness.cleanup).toBe('function');
+    });
+  });
+
+  describe('setAgentQuestion convenience helper', () => {
+    it('wraps single question in array format', async () => {
+      vi.useFakeTimers();
+
+      // Set single question using convenience method
+      harness.setAgentQuestion('test-agent', 'q1', 'Which option?', [
+        { label: 'Option A', description: 'First option' },
+        { label: 'Option B', description: 'Second option' },
+      ]);
+
+      // Spawn agent with that scenario
+      const agent = await harness.agentManager.spawn({
+        name: 'test-agent',
+        taskId: 'task-1',
+        prompt: 'test',
+      });
+
+      await harness.advanceTimers();
+
+      // Verify questions array format
+      const pending = await harness.getPendingQuestions(agent.id);
+      expect(pending).not.toBeNull();
+      expect(pending?.questions).toHaveLength(1);
+      expect(pending?.questions[0].id).toBe('q1');
+      expect(pending?.questions[0].question).toBe('Which option?');
+      expect(pending?.questions[0].options).toHaveLength(2);
+    });
+  });
+
+  describe('seedFixture', () => {
+    it('creates task hierarchy from SIMPLE_FIXTURE', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+
+      // Check initiative created
+      expect(seeded.initiativeId).toBeDefined();
+
+      // Check phases created
+      expect(seeded.phases.size).toBe(1);
+      expect(seeded.phases.has('Phase 1')).toBe(true);
+
+      // Check task groups created
+      expect(seeded.taskGroups.size).toBe(1);
+      expect(seeded.taskGroups.has('Task Group 1')).toBe(true);
+
+      // Check tasks created
+      expect(seeded.tasks.size).toBe(3);
+      expect(seeded.tasks.has('Task A')).toBe(true);
+      expect(seeded.tasks.has('Task B')).toBe(true);
+      expect(seeded.tasks.has('Task C')).toBe(true);
+    });
+
+    it('returns correct IDs that exist in database', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+
+      // Verify task exists in database
+      const taskAId = seeded.tasks.get('Task A')!;
+      const taskA = await harness.taskRepository.findById(taskAId);
+      expect(taskA).not.toBeNull();
+      expect(taskA?.name).toBe('Task A');
+    });
+
+    it('creates PARALLEL_FIXTURE correctly', async () => {
+      const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
+
+      expect(seeded.phases.size).toBe(1);
+      expect(seeded.taskGroups.size).toBe(2);
+      expect(seeded.tasks.size).toBe(4);
+      expect(seeded.tasks.has('Task X')).toBe(true);
+      expect(seeded.tasks.has('Task Q')).toBe(true);
+    });
+
+    it('creates COMPLEX_FIXTURE correctly', async () => {
+      const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
+
+      expect(seeded.phases.size).toBe(2);
+      expect(seeded.taskGroups.size).toBe(4);
+      expect(seeded.tasks.size).toBe(5);
+    });
+  });
+
+  describe('task dependencies', () => {
+    it('resolves dependencies correctly (dependsOn contains actual task IDs)', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+
+      const taskAId = seeded.tasks.get('Task A')!;
+      const taskBId = seeded.tasks.get('Task B')!;
+
+      // Query task_dependencies table directly
+      const deps = await harness.db
+        .select()
+        .from(taskDependencies)
+        .where(eq(taskDependencies.taskId, taskBId));
+
+      expect(deps.length).toBe(1);
+      expect(deps[0].dependsOnTaskId).toBe(taskAId);
+    });
+
+    it('creates multiple dependencies for a task', async () => {
+      const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
+
+      // Task 4A depends on both Task 2A and Task 3A
+      const task4AId = seeded.tasks.get('Task 4A')!;
+      const task2AId = seeded.tasks.get('Task 2A')!;
+      const task3AId = seeded.tasks.get('Task 3A')!;
+
+      const deps = await harness.db
+        .select()
+        .from(taskDependencies)
+        .where(eq(taskDependencies.taskId, task4AId));
+
+      expect(deps.length).toBe(2);
+      const depIds = deps.map((d) => d.dependsOnTaskId);
+      expect(depIds).toContain(task2AId);
+      expect(depIds).toContain(task3AId);
+    });
+  });
+
+  describe('event capture', () => {
+    it('captures events via getEventsByType', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Queue a task (emits task:queued event)
+      await harness.dispatchManager.queue(taskAId);
+
+      const events = harness.getEventsByType('task:queued');
+      expect(events.length).toBe(1);
+      expect((events[0].payload as { taskId: string }).taskId).toBe(taskAId);
+    });
+
+    it('clears events via clearEvents', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      await harness.dispatchManager.queue(taskAId);
+      expect(harness.getEventsByType('task:queued').length).toBe(1);
+
+      harness.clearEvents();
+      expect(harness.getEventsByType('task:queued').length).toBe(0);
+    });
+  });
+
+  describe('dispatch flow', () => {
+    it('dispatchManager.queue() + dispatchNext() uses MockAgentManager', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Note: DispatchManager.dispatchNext() requires an idle agent in the pool
+      // before it will spawn a new agent. Pre-seed an idle agent.
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      // Wait for agent to complete and become idle
+      await harness.advanceTimers();
+
+      // Queue the task
+      await harness.dispatchManager.queue(taskAId);
+
+      // Clear events from queue and agent spawn
+      harness.clearEvents();
+
+      // Dispatch the task
+      const result = await harness.dispatchManager.dispatchNext();
+
+      // Advance timers to trigger mock agent completion
+      await harness.advanceTimers();
+
+      expect(result.success).toBe(true);
+      expect(result.taskId).toBe(taskAId);
+      expect(result.agentId).toBeDefined();
+
+      // Should have emitted task:dispatched
+      const dispatchedEvents = harness.getEventsByType('task:dispatched');
+      expect(dispatchedEvents.length).toBe(1);
+    });
+
+    it('returns failure when no tasks are queued', async () => {
+      const result = await harness.dispatchManager.dispatchNext();
+
+      expect(result.success).toBe(false);
+      expect(result.reason).toBe('No dispatchable tasks');
+    });
+
+    it('returns failure when no idle agents available', async () => {
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Queue the task but don't pre-seed any agents
+      await harness.dispatchManager.queue(taskAId);
+
+      // Dispatch without any agents in pool
+      const result = await harness.dispatchManager.dispatchNext();
+
+      expect(result.success).toBe(false);
+      expect(result.reason).toBe('No available agents');
+    });
+  });
+
+  describe('agent completion triggers events', () => {
+    it('agent completion emits agent:stopped event', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed an idle agent (required by DispatchManager)
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Queue and dispatch
+      await harness.dispatchManager.queue(taskAId);
+      harness.clearEvents();
+      await harness.dispatchManager.dispatchNext();
+
+      // Should have agent:spawned
+      const spawnedEvents = harness.getEventsByType('agent:spawned');
+      expect(spawnedEvents.length).toBe(1);
+
+      // Advance timers to trigger completion
+      await harness.advanceTimers();
+
+      // Should have agent:stopped
+      const stoppedEvents = harness.getEventsByType('agent:stopped');
+      expect(stoppedEvents.length).toBe(1);
+    });
+
+    it('custom scenario affects agent behavior', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed an idle agent (required by DispatchManager)
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Set error scenario for the agent that will be spawned
+      harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
+        status: 'error',
+        delay: 0,
+        error: 'Test crash',
+      });
+
+      // Queue and dispatch
+      await harness.dispatchManager.queue(taskAId);
+      harness.clearEvents();
+      await harness.dispatchManager.dispatchNext();
+
+      // Advance timers
+      await harness.advanceTimers();
+
+      // Should have agent:crashed
+      const crashedEvents = harness.getEventsByType('agent:crashed');
+      expect(crashedEvents.length).toBe(1);
+    });
+  });
+
+  describe('full dispatch -> complete -> merge flow', () => {
+    it('works end-to-end', async () => {
+      vi.useFakeTimers();
+      const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
+      const taskAId = seeded.tasks.get('Task A')!;
+
+      // Pre-seed an idle agent (required by DispatchManager)
+      await harness.agentManager.spawn({
+        name: 'pool-agent',
+        taskId: 'placeholder',
+        prompt: 'placeholder',
+      });
+      await harness.advanceTimers();
+      harness.clearEvents();
+
+      // Step 1: Queue task
+      await harness.dispatchManager.queue(taskAId);
+
+      // Step 2: Dispatch task
+      const dispatchResult = await harness.dispatchManager.dispatchNext();
+      expect(dispatchResult.success).toBe(true);
+
+      // Advance timers for agent completion
+      await harness.advanceTimers();
+
+      // Clear events for cleaner verification
+      harness.clearEvents();
+
+      // Step 3: Complete task
+      await harness.dispatchManager.completeTask(taskAId);
+
+      // Verify task:completed event
+      const completedEvents = harness.getEventsByType('task:completed');
+      expect(completedEvents.length).toBe(1);
+      expect((completedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
+
+      // Step 4: Verify task status in database
+      const task = await harness.taskRepository.findById(taskAId);
+      expect(task?.status).toBe('completed');
+    });
+  });
+
+  describe('MockWorktreeManager', () => {
+    it('creates fake worktrees', async () => {
+      const worktree = await harness.worktreeManager.create('wt-1', 'feature-1');
+
+      expect(worktree.id).toBe('wt-1');
+      expect(worktree.branch).toBe('feature-1');
+      expect(worktree.path).toContain('wt-1');
+    });
+
+    it('merge returns success by default', async () => {
+      await harness.worktreeManager.create('wt-1', 'feature-1');
+      const result = await harness.worktreeManager.merge('wt-1', 'main');
+
+      expect(result.success).toBe(true);
+    });
+
+    it('allows setting custom merge results', async () => {
+      await harness.worktreeManager.create('wt-1', 'feature-1');
+      harness.worktreeManager.setMergeResult('wt-1', {
+        success: false,
+        conflicts: ['file1.ts', 'file2.ts'],
+        message: 'Merge conflict',
+      });
+
+      const result = await harness.worktreeManager.merge('wt-1', 'main');
+
+      expect(result.success).toBe(false);
+      expect(result.conflicts).toEqual(['file1.ts', 'file2.ts']);
+    });
+  });
+});
--- a/apps/server/test/harness.ts
+++ b/apps/server/test/harness.ts
@@ -0,0 +1,636 @@
+/**
+ * Test Harness for E2E Testing
+ *
+ * Wires up the full system with mocks for E2E testing.
+ * Uses real managers (DispatchManager, CoordinationManager) with
+ * MockAgentManager and MockWorktreeManager for isolation.
+ */
+
+import { randomUUID } from 'crypto';
+import { vi } from 'vitest';
+import type { DrizzleDatabase } from '../db/index.js';
+import type { EventBus, DomainEvent } from '../events/types.js';
+import { EventEmitterBus } from '../events/bus.js';
+import type { AgentManager } from '../agent/types.js';
+import { MockAgentManager, type MockAgentScenario } from '../agent/mock-manager.js';
+import type { PendingQuestions, QuestionItem } from '../agent/types.js';
+import type { WorktreeManager, Worktree, WorktreeDiff, MergeResult } from '../git/types.js';
+import type { DispatchManager, PhaseDispatchManager } from '../dispatch/types.js';
+import { DefaultDispatchManager } from '../dispatch/manager.js';
+import { DefaultPhaseDispatchManager } from '../dispatch/phase-manager.js';
+import type { CoordinationManager } from '../coordination/types.js';
+import { DefaultCoordinationManager } from '../coordination/manager.js';
+import type { TaskRepository } from '../db/repositories/task-repository.js';
+import type { MessageRepository } from '../db/repositories/message-repository.js';
+import type { AgentRepository } from '../db/repositories/agent-repository.js';
+import type { InitiativeRepository } from '../db/repositories/initiative-repository.js';
+import type { PhaseRepository } from '../db/repositories/phase-repository.js';
+import type { Initiative, Phase, Task } from '../db/schema.js';
+import { createTestDatabase } from '../db/repositories/drizzle/test-helpers.js';
+import { createRepositories } from '../container.js';
+import {
+  seedFixture,
+  type InitiativeFixture,
+  type SeededFixture,
+} from './fixtures.js';
+import { appRouter, createCallerFactory } from '../trpc/router.js';
+import { createContext, type TRPCContext } from '../trpc/context.js';
+
+// =============================================================================
+// MockWorktreeManager
+// =============================================================================
+
+/**
+ * Simple in-memory WorktreeManager for testing.
+ * Creates fake worktrees without actual git operations.
+ */
+export class MockWorktreeManager implements WorktreeManager {
+  private worktrees: Map<string, Worktree> = new Map();
+  private mergeResults: Map<string, MergeResult> = new Map();
+
+  /**
+   * Set a custom merge result for a specific worktree.
+   * Used to test conflict scenarios.
+   */
+  setMergeResult(worktreeId: string, result: MergeResult): void {
+    this.mergeResults.set(worktreeId, result);
+  }
+
+  async create(id: string, branch: string, baseBranch?: string): Promise<Worktree> {
+    const worktree: Worktree = {
+      id,
+      branch,
+      path: `/tmp/test-worktrees/${id}`,
+      isMainWorktree: false,
+    };
+    this.worktrees.set(id, worktree);
+    return worktree;
+  }
+
+  async remove(id: string): Promise<void> {
+    if (!this.worktrees.has(id)) {
+      throw new Error(`Worktree not found: ${id}`);
+    }
+    this.worktrees.delete(id);
+    this.mergeResults.delete(id);
+  }
+
+  async list(): Promise<Worktree[]> {
+    return Array.from(this.worktrees.values());
+  }
+
+  async get(id: string): Promise<Worktree | null> {
+    return this.worktrees.get(id) ?? null;
+  }
+
+  async diff(id: string): Promise<WorktreeDiff> {
+    if (!this.worktrees.has(id)) {
+      throw new Error(`Worktree not found: ${id}`);
+    }
+    return {
+      files: [],
+      summary: 'No changes (mock)',
+    };
+  }
+
+  async merge(id: string, targetBranch: string): Promise<MergeResult> {
+    if (!this.worktrees.has(id)) {
+      throw new Error(`Worktree not found: ${id}`);
+    }
+
+    // Return custom result if set, otherwise success
+    const customResult = this.mergeResults.get(id);
+    if (customResult) {
+      return customResult;
+    }
+
+    return {
+      success: true,
+      message: `Merged ${id} into ${targetBranch} (mock)`,
+    };
+  }
+
+  /**
+   * Clear all worktrees.
+   * Useful for test cleanup.
+   */
+  clear(): void {
+    this.worktrees.clear();
+    this.mergeResults.clear();
+  }
+}
+
+// =============================================================================
+// CapturingEventBus
+// =============================================================================
+
+/**
+ * EventBus wrapper that captures all emitted events.
+ * Extends EventEmitterBus with event capture functionality.
+ */
+export class CapturingEventBus extends EventEmitterBus {
+  /** All emitted events */
+  emittedEvents: DomainEvent[] = [];
+
+  emit<T extends DomainEvent>(event: T): void {
+    this.emittedEvents.push(event);
+    super.emit(event);
+  }
+
+  /**
+   * Get events by type.
+   */
+  getEventsByType(type: string): DomainEvent[] {
+    return this.emittedEvents.filter((e) => e.type === type);
+  }
+
+  /**
+   * Clear captured events.
+   */
+  clearEvents(): void {
+    this.emittedEvents = [];
+  }
+}
+
+// =============================================================================
+// tRPC Caller Type
+// =============================================================================
+
+/**
+ * Create caller factory for the app router.
+ */
+const createCaller = createCallerFactory(appRouter);
+
+/**
+ * Type for the tRPC caller.
+ */
+export type TRPCCaller = ReturnType<typeof createCaller>;
+
+// =============================================================================
+// TestHarness Interface
+// =============================================================================
+
+/**
+ * Test harness for E2E testing.
+ * Provides access to all system components and helper methods.
+ */
+export interface TestHarness {
+  // Core components
+  /** In-memory SQLite database */
+  db: DrizzleDatabase;
+  /** Event bus with event capture */
+  eventBus: CapturingEventBus;
+  /** Mock agent manager */
+  agentManager: MockAgentManager;
+  /** Alias for agentManager - used in tests for clarity */
+  mockAgentManager: MockAgentManager;
+  /** Mock worktree manager */
+  worktreeManager: MockWorktreeManager;
+  /** Real dispatch manager wired to mocks */
+  dispatchManager: DispatchManager;
+  /** Real phase dispatch manager wired to phaseRepository */
+  phaseDispatchManager: PhaseDispatchManager;
+  /** Real coordination manager wired to mocks */
+  coordinationManager: CoordinationManager;
+
+  // Repositories
+  /** Task repository */
+  taskRepository: TaskRepository;
+  /** Message repository */
+  messageRepository: MessageRepository;
+  /** Agent repository */
+  agentRepository: AgentRepository;
+  /** Initiative repository */
+  initiativeRepository: InitiativeRepository;
+  /** Phase repository */
+  phaseRepository: PhaseRepository;
+
+  // tRPC Caller
+  /** tRPC caller for direct procedure calls */
+  caller: TRPCCaller;
+
+  // Helpers
+  /**
+   * Seed a fixture into the database.
+   */
+  seedFixture(fixture: InitiativeFixture): Promise<SeededFixture>;
+
+  /**
+   * Set scenario for a specific agent name.
+   */
+  setAgentScenario(agentName: string, scenario: MockAgentScenario): void;
+
+  /**
+   * Convenience: Set agent to complete with done status.
+   */
+  setAgentDone(agentName: string, result?: string): void;
+
+  /**
+   * Convenience: Set agent to ask questions (array form).
+   */
+  setAgentQuestions(
+    agentName: string,
+    questions: QuestionItem[]
+  ): void;
+
+  /**
+   * Convenience: Set agent to ask a single question.
+   * Wraps the question in an array internally.
+   */
+  setAgentQuestion(
+    agentName: string,
+    questionId: string,
+    question: string,
+    options?: Array<{ label: string; description?: string }>
+  ): void;
+
+  /**
+   * Convenience: Set agent to fail with unrecoverable error.
+   */
+  setAgentError(agentName: string, error: string): void;
+
+  /**
+   * Get pending questions for an agent.
+   */
+  getPendingQuestions(agentId: string): Promise<PendingQuestions | null>;
+
+  /**
+   * Get events by type.
+   */
+  getEventsByType(type: string): DomainEvent[];
+
+  /**
+   * Get emitted events by type (alias for getEventsByType).
+   */
+  getEmittedEvents(type: string): DomainEvent[];
+
+  /**
+   * Clear all captured events.
+   */
+  clearEvents(): void;
+
+  /**
+   * Clean up all resources.
+   */
+  cleanup(): void;
+
+  /**
+   * Advance fake timers (wrapper for vi.runAllTimersAsync).
+   * Only works when vi.useFakeTimers() is active.
+   */
+  advanceTimers(): Promise<void>;
+
+  /**
+   * Run a test body with fake timers enabled.
+   * Activates fake timers before the callback and restores real timers after,
+   * even if the callback throws.
+   */
+  withFakeTimers(fn: () => Promise<void>): Promise<void>;
+
+  // ==========================================================================
+  // Architect Mode Helpers
+  // ==========================================================================
+
+  /**
+   * Set up scenario where architect completes discussion.
+   */
+  setArchitectDiscussComplete(
+    agentName: string,
+    _decisions: unknown[],
+    summary: string
+  ): void;
+
+  /**
+   * Set up scenario where architect needs more questions in discuss mode.
+   */
+  setArchitectDiscussQuestions(
+    agentName: string,
+    questions: QuestionItem[]
+  ): void;
+
+  /**
+   * Set up scenario where architect completes plan.
+   */
+  setArchitectPlanComplete(
+    agentName: string,
+    _phases: unknown[]
+  ): void;
+
+  /**
+   * Set up scenario where architect completes detail.
+   */
+  setArchitectDetailComplete(
+    agentName: string,
+    _tasks: unknown[]
+  ): void;
+
+  /**
+   * Set up scenario where architect needs questions in detail mode.
+   */
+  setArchitectDetailQuestions(
+    agentName: string,
+    questions: QuestionItem[]
+  ): void;
+
+  // ==========================================================================
+  // Initiative/Phase/Plan Convenience Helpers
+  // ==========================================================================
+
+  /**
+   * Get initiative by ID through tRPC.
+   */
+  getInitiative(id: string): Promise<Initiative | null>;
+
+  /**
+   * Get phases for initiative through tRPC.
+   */
+  getPhases(initiativeId: string): Promise<Phase[]>;
+
+  /**
+   * Create initiative through tRPC.
+   */
+  createInitiative(name: string): Promise<Initiative>;
+
+  /**
+   * Create phases from plan output through tRPC.
+   */
+  createPhasesFromPlan(
+    initiativeId: string,
+    phases: Array<{ name: string }>
+  ): Promise<Phase[]>;
+
+  /**
+   * Create a detail task through tRPC (replaces createPlan).
+   */
+  createDetailTask(
+    phaseId: string,
+    name: string,
+    description?: string
+  ): Promise<Task>;
+
+  /**
+   * Get child tasks of a parent task through tRPC.
+   */
+  getChildTasks(parentTaskId: string): Promise<Task[]>;
+}
+
+// =============================================================================
+// createTestHarness Factory
+// =============================================================================
+
+/**
+ * Create a fully wired test harness for E2E testing.
+ *
+ * Wires:
+ * - In-memory SQLite database
+ * - CapturingEventBus (captures all events)
+ * - MockAgentManager (simulates agent behavior)
+ * - MockWorktreeManager (fake worktrees)
+ * - Real DefaultDispatchManager (with mock agent manager)
+ * - Real DefaultCoordinationManager (with mock worktree manager)
+ * - All repositories (Drizzle implementations)
+ * - tRPC caller with full context
+ */
+export function createTestHarness(): TestHarness {
+  // Create database
+  const db = createTestDatabase();
+
+  // Create event bus with capture
+  const eventBus = new CapturingEventBus();
+
+  // Create mock managers
+  const agentManager = new MockAgentManager({ eventBus });
+  const worktreeManager = new MockWorktreeManager();
+
+  // Create repositories
+  const repos = createRepositories(db);
+  const { taskRepository, messageRepository, agentRepository, initiativeRepository, phaseRepository } = repos;
+
+  // Create real managers wired to mocks
+  const dispatchManager = new DefaultDispatchManager(
+    taskRepository,
+    messageRepository,
+    agentManager,
+    eventBus
+  );
+
+  const phaseDispatchManager = new DefaultPhaseDispatchManager(
+    phaseRepository,
+    taskRepository,
+    dispatchManager,
+    eventBus
+  );
+
+  const coordinationManager = new DefaultCoordinationManager(
+    worktreeManager,
+    taskRepository,
+    agentRepository,
+    messageRepository,
+    eventBus
+  );
+
+  // Create tRPC context with all dependencies
+  const ctx: TRPCContext = createContext({
+    eventBus,
+    serverStartedAt: new Date(),
+    processCount: 0,
+    agentManager,
+    taskRepository,
+    messageRepository,
+    dispatchManager,
+    phaseDispatchManager,
+    coordinationManager,
+    initiativeRepository,
+    phaseRepository,
+  });
+
+  // Create tRPC caller
+  const caller = createCaller(ctx);
+
+  // Build harness
+  const harness: TestHarness = {
+    // Core components
+    db,
+    eventBus,
+    agentManager,
+    mockAgentManager: agentManager, // Alias for clarity in tests
+    worktreeManager,
+    dispatchManager,
+    phaseDispatchManager,
+    coordinationManager,
+
+    // Repositories
+    taskRepository,
+    messageRepository,
+    agentRepository,
+    initiativeRepository,
+    phaseRepository,
+
+    // tRPC Caller
+    caller,
+
+    // Helpers
+    seedFixture: (fixture: InitiativeFixture) => seedFixture(db, fixture),
+
+    setAgentScenario: (agentName: string, scenario: MockAgentScenario) => {
+      agentManager.setScenario(agentName, scenario);
+    },
+
+    setAgentDone: (agentName: string, result?: string) => {
+      agentManager.setScenario(agentName, { status: 'done', result });
+    },
+
+    setAgentQuestions: (
+      agentName: string,
+      questions: QuestionItem[]
+    ) => {
+      agentManager.setScenario(agentName, { status: 'questions', questions });
+    },
+
+    setAgentQuestion: (
+      agentName: string,
+      questionId: string,
+      question: string,
+      options?: Array<{ label: string; description?: string }>
+    ) => {
+      agentManager.setScenario(agentName, {
+        status: 'questions',
+        questions: [{ id: questionId, question, options }],
+      });
+    },
+
+    setAgentError: (agentName: string, error: string) => {
+      agentManager.setScenario(agentName, { status: 'error', error });
+    },
+
+    getPendingQuestions: (agentId: string) => agentManager.getPendingQuestions(agentId),
+
+    getEventsByType: (type: string) => eventBus.getEventsByType(type),
+
+    getEmittedEvents: (type: string) => eventBus.getEventsByType(type),
+
+    clearEvents: () => eventBus.clearEvents(),
+
+    cleanup: () => {
+      agentManager.clear();
+      worktreeManager.clear();
+      eventBus.clearEvents();
+    },
+
+    // Timer helper - requires vi.useFakeTimers() to be active
+    advanceTimers: async () => { await vi.runAllTimersAsync(); },
+
+    withFakeTimers: async (fn: () => Promise<void>) => {
+      vi.useFakeTimers();
+      try {
+        await fn();
+      } finally {
+        vi.useRealTimers();
+      }
+    },
+
+    // ========================================================================
+    // Architect Mode Helpers
+    // ========================================================================
+
+    setArchitectDiscussComplete: (
+      agentName: string,
+      _decisions: unknown[],
+      summary: string
+    ) => {
+      agentManager.setScenario(agentName, {
+        status: 'done',
+        result: summary,
+        delay: 0,
+      });
+    },
+
+    setArchitectDiscussQuestions: (
+      agentName: string,
+      questions: QuestionItem[]
+    ) => {
+      agentManager.setScenario(agentName, {
+        status: 'questions',
+        questions,
+        delay: 0,
+      });
+    },
+
+    setArchitectPlanComplete: (
+      agentName: string,
+      _phases: unknown[]
+    ) => {
+      agentManager.setScenario(agentName, {
+        status: 'done',
+        result: 'Plan complete',
+        delay: 0,
+      });
+    },
+
+    setArchitectDetailComplete: (
+      agentName: string,
+      _tasks: unknown[]
+    ) => {
+      agentManager.setScenario(agentName, {
+        status: 'done',
+        result: 'Detail complete',
+        delay: 0,
+      });
+    },
+
+    setArchitectDetailQuestions: (
+      agentName: string,
+      questions: QuestionItem[]
+    ) => {
+      agentManager.setScenario(agentName, {
+        status: 'questions',
+        questions,
+        delay: 0,
+      });
+    },
+
+    // ========================================================================
+    // Initiative/Phase/Plan Convenience Helpers
+    // ========================================================================
+
+    getInitiative: async (id: string) => {
+      try {
+        return await caller.getInitiative({ id });
+      } catch {
+        return null;
+      }
+    },
+
+    getPhases: (initiativeId: string) => {
+      return caller.listPhases({ initiativeId });
+    },
+
+    createInitiative: (name: string) => {
+      return caller.createInitiative({ name });
+    },
+
+    createPhasesFromPlan: (
+      initiativeId: string,
+      phases: Array<{ name: string }>
+    ) => {
+      return caller.createPhasesFromPlan({ initiativeId, phases });
+    },
+
+    createDetailTask: async (phaseId: string, name: string, description?: string) => {
+      return caller.createPhaseTask({
+        phaseId,
+        name,
+        description,
+        category: 'detail',
+        type: 'auto',
+        requiresApproval: true,
+      });
+    },
+
+    getChildTasks: (parentTaskId: string) => {
+      return caller.listTasks({ parentTaskId });
+    },
+  };
+
+  return harness;
+}
--- a/apps/server/test/index.ts
+++ b/apps/server/test/index.ts
@@ -0,0 +1,27 @@
+/**
+ * Test Module
+ *
+ * Provides test harness and fixtures for E2E testing.
+ */
+
+// Fixture helpers
+export {
+  seedFixture,
+  type TaskFixture,
+  type TaskGroupFixture,
+  type PhaseFixture,
+  type InitiativeFixture,
+  type SeededFixture,
+  SIMPLE_FIXTURE,
+  PARALLEL_FIXTURE,
+  COMPLEX_FIXTURE,
+} from './fixtures.js';
+
+// Test harness
+export {
+  createTestHarness,
+  MockWorktreeManager,
+  CapturingEventBus,
+  type TestHarness,
+  type TRPCCaller,
+} from './harness.js';
--- a/apps/server/test/integration/agent-workdir-verification.test.ts
+++ b/apps/server/test/integration/agent-workdir-verification.test.ts
@@ -0,0 +1,203 @@
+/**
+ * Agent Working Directory Verification Tests
+ *
+ * Tests that verify agents actually run in their intended working directories.
+ * These tests use simple shell commands to prove the agent execution location.
+ *
+ * IMPORTANT: These tests spawn real CLI processes and may incur API costs.
+ * They are SKIPPED by default to prevent accidental charges.
+ *
+ * To run these tests:
+ * ```bash
+ * REAL_WORKDIR_TESTS=1 npm test -- src/test/integration/agent-workdir-verification.test.ts --test-timeout=120000
+ * ```
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { mkdtemp, rm, readFile } from 'node:fs/promises';
+import { existsSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { MultiProviderAgentManager } from '../../agent/manager.js';
+import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
+import {
+  DrizzleAgentRepository,
+  DrizzleProjectRepository,
+  DrizzleAccountRepository,
+  DrizzleInitiativeRepository,
+} from '../../db/repositories/drizzle/index.js';
+import { EventEmitterBus } from '../../events/bus.js';
+
+const SHOULD_SKIP = !process.env.REAL_WORKDIR_TESTS;
+const TEST_TIMEOUT = 60000;
+
+describe.skipIf(SHOULD_SKIP)('Agent Working Directory Verification', () => {
+  let tempDir: string;
+  let agentManager: MultiProviderAgentManager;
+  let agentRepository: DrizzleAgentRepository;
+
+  beforeAll(async () => {
+    if (SHOULD_SKIP) return;
+
+    console.log('\n=== Running Agent Working Directory Tests ===');
+    console.log('These tests verify agents run in correct working directories.\n');
+
+    // Create temp directory for test workspace
+    tempDir = await mkdtemp(join(tmpdir(), 'cw-workdir-test-'));
+
+    // Set up test database and repositories
+    const db = await createTestDatabase();
+    const eventBus = new EventEmitterBus();
+
+    agentRepository = new DrizzleAgentRepository(db);
+    const projectRepository = new DrizzleProjectRepository(db);
+    const accountRepository = new DrizzleAccountRepository(db);
+
+    agentManager = new MultiProviderAgentManager(
+      agentRepository,
+      tempDir,
+      projectRepository,
+      accountRepository,
+      eventBus,
+    );
+  });
+
+  afterAll(async () => {
+    if (SHOULD_SKIP || !tempDir) return;
+    try {
+      await rm(tempDir, { recursive: true });
+    } catch (err) {
+      console.warn('Failed to cleanup temp directory:', err);
+    }
+  });
+
+  it('spawns agent in correct standalone working directory', async () => {
+    const prompt = `
+Write your current working directory to a file called 'verify-pwd.txt'.
+Use this exact bash command:
+
+pwd > verify-pwd.txt
+
+Then output the signal: {"done": true}
+`.trim();
+
+    // Spawn standalone agent
+    const agent = await agentManager.spawn({
+      taskId: null,
+      prompt,
+      mode: 'execute',
+      provider: 'claude',
+    });
+
+    expect(agent.id).toBeTruthy();
+    expect(agent.status).toBe('running');
+
+    // Wait for completion (poll agent status)
+    let attempts = 0;
+    const maxAttempts = 60; // 60 seconds timeout
+
+    while (attempts < maxAttempts) {
+      await new Promise(resolve => setTimeout(resolve, 1000));
+      attempts++;
+
+      const currentAgent = await agentRepository.findById(agent.id);
+      if (!currentAgent || currentAgent.status !== 'running') {
+        break;
+      }
+    }
+
+    // Verify final agent state
+    const completedAgent = await agentRepository.findById(agent.id);
+    expect(completedAgent).toBeTruthy();
+    expect(completedAgent!.status).not.toBe('running');
+
+    // Get the agent's expected working directory
+    const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');
+
+    // Read diagnostic files
+    const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
+    const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');
+    const verifyPwdFile = join(expectedWorkdir, 'verify-pwd.txt');
+
+    // Verify diagnostic files exist
+    expect(existsSync(diagnosticFile), 'spawn diagnostic file should exist').toBe(true);
+    expect(existsSync(expectedPwdFile), 'expected pwd file should exist').toBe(true);
+
+    // Read diagnostic data
+    const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
+    const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();
+
+    console.log('Diagnostic data:', diagnostic);
+    console.log('Expected working directory:', expectedPwd);
+
+    // Verify diagnostic consistency
+    expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
+    expect(diagnostic.cwdExistsAtSpawn).toBe(true);
+    expect(expectedPwd).toBe(expectedWorkdir);
+
+    // The critical test: verify the agent actually wrote the file in the expected location
+    if (existsSync(verifyPwdFile)) {
+      const actualPwd = (await readFile(verifyPwdFile, 'utf-8')).trim();
+      console.log('Agent reported working directory:', actualPwd);
+
+      // This is the key verification: the pwd reported by the agent should match expected
+      expect(actualPwd).toBe(expectedWorkdir);
+    } else {
+      // If the file doesn't exist, the agent either failed or ran somewhere else
+      console.warn('Agent did not create verify-pwd.txt file');
+      console.log('Expected at:', verifyPwdFile);
+
+      // Let's check if it was created elsewhere (debugging)
+      const alternativeLocations = [
+        join(tempDir, 'verify-pwd.txt'),
+        join(process.cwd(), 'verify-pwd.txt'),
+      ];
+
+      for (const loc of alternativeLocations) {
+        if (existsSync(loc)) {
+          const content = await readFile(loc, 'utf-8');
+          console.log(`Found verify-pwd.txt at unexpected location ${loc}:`, content.trim());
+        }
+      }
+
+      throw new Error('Agent did not create pwd verification file in expected location');
+    }
+  }, TEST_TIMEOUT);
+
+  it('creates diagnostic files with correct metadata', async () => {
+    const prompt = `Output the signal: {"done": true}`;
+
+    const agent = await agentManager.spawn({
+      taskId: null,
+      prompt,
+      mode: 'execute',
+      provider: 'claude',
+    });
+
+    // Wait a bit for spawn to complete
+    await new Promise(resolve => setTimeout(resolve, 2000));
+
+    const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');
+    const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
+    const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');
+
+    // Verify files exist immediately after spawn
+    expect(existsSync(diagnosticFile), 'diagnostic file should be created after spawn').toBe(true);
+    expect(existsSync(expectedPwdFile), 'expected pwd file should be created').toBe(true);
+
+    // Verify diagnostic content
+    const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
+    const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();
+
+    expect(diagnostic.agentId).toBe(agent.id);
+    expect(diagnostic.alias).toBe(agent.name);
+    expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
+    expect(diagnostic.provider).toBe('claude');
+    expect(diagnostic.cwdExistsAtSpawn).toBe(true);
+    expect(diagnostic.customCwdProvided).toBe(false);
+    expect(typeof diagnostic.timestamp).toBe('string');
+    expect(Array.isArray(diagnostic.args)).toBe(true);
+
+    expect(expectedPwd).toBe(expectedWorkdir);
+  });
+});
--- a/apps/server/test/integration/crash-race-condition.test.ts
+++ b/apps/server/test/integration/crash-race-condition.test.ts
@@ -0,0 +1,232 @@
+/**
+ * Integration test to reproduce and fix the crash marking race condition.
+ *
+ * This test simulates the exact scenario where agents complete successfully
+ * but get marked as crashed due to timing issues in the output handler.
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { writeFile, mkdir, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomBytes } from 'node:crypto';
+import { OutputHandler } from '../../agent/output-handler.js';
+import type { AgentRepository } from '../../db/repositories/agent-repository.js';
+
+interface TestAgent {
+  id: string;
+  name: string;
+  status: 'idle' | 'running' | 'waiting_for_input' | 'stopped' | 'crashed';
+  mode: 'execute' | 'discuss' | 'plan' | 'detail' | 'refine';
+  taskId: string | null;
+  sessionId: string | null;
+  worktreeId: string;
+  createdAt: Date;
+  updatedAt: Date;
+  provider: string;
+  accountId: string | null;
+  pid: number | null;
+  outputFilePath: string | null;
+  result: string | null;
+  pendingQuestions: string | null;
+  initiativeId: string | null;
+  userDismissedAt: Date | null;
+  exitCode: number | null;
+}
+
+describe('Crash marking race condition', () => {
+  let outputHandler: OutputHandler;
+  let testAgent: TestAgent;
+  let testDir: string;
+  let mockRepo: AgentRepository;
+
+  // Track all repository calls
+  let updateCalls: Array<{ id: string; data: any }> = [];
+  let finalAgentStatus: string | null = null;
+
+  beforeEach(async () => {
+    updateCalls = [];
+    finalAgentStatus = null;
+
+    // Create test directory structure
+    testDir = join(tmpdir(), `crash-test-${randomBytes(8).toString('hex')}`);
+    const outputDir = join(testDir, '.cw/output');
+    await mkdir(outputDir, { recursive: true });
+
+    // Create test agent
+    testAgent = {
+      id: 'test-agent-id',
+      name: 'test-agent',
+      status: 'running',
+      mode: 'refine',
+      taskId: 'task-1',
+      sessionId: 'session-1',
+      worktreeId: 'worktree-1',
+      createdAt: new Date(),
+      updatedAt: new Date(),
+      provider: 'claude',
+      accountId: null,
+      pid: 12345,
+      outputFilePath: join(testDir, 'output.jsonl'),
+      result: null,
+      pendingQuestions: null,
+      initiativeId: 'init-1',
+      userDismissedAt: null,
+      exitCode: null
+    };
+
+    // Mock repository that tracks all update calls
+    mockRepo = {
+      async findById(id: string) {
+        return id === testAgent.id ? { ...testAgent } : null;
+      },
+      async update(id: string, data: any) {
+        updateCalls.push({ id, data });
+        if (data.status) {
+          finalAgentStatus = data.status;
+          testAgent.status = data.status;
+        }
+        return { ...testAgent, ...data };
+      },
+      async create() { throw new Error('Not implemented'); },
+      async findAll() { throw new Error('Not implemented'); },
+      async findByStatus() { throw new Error('Not implemented'); },
+      async findByTaskId() { throw new Error('Not implemented'); },
+      async findByName() { throw new Error('Not implemented'); },
+      async findBySessionId() { throw new Error('Not implemented'); },
+      async delete() { throw new Error('Not implemented'); }
+    };
+
+    outputHandler = new OutputHandler(mockRepo);
+  });
+
+  afterEach(async () => {
+    try {
+      await rm(testDir, { recursive: true });
+    } catch {
+      // Ignore cleanup errors
+    }
+  });
+
+  it('should NOT mark agent as crashed when signal.json indicates completion', async () => {
+    // SETUP: Create a valid completion signal that should prevent crash marking
+    const signalPath = join(testDir, '.cw/output/signal.json');
+    const signalContent = {
+      status: 'questions',
+      questions: [
+        { id: 'q1', question: 'Test question?' }
+      ]
+    };
+    await writeFile(signalPath, JSON.stringify(signalContent, null, 2));
+
+    // SETUP: Create empty output file to simulate "no new output detected" scenario
+    const outputFilePath = join(testDir, 'output.jsonl');
+    await writeFile(outputFilePath, ''); // Empty file simulates the race condition
+
+    // Mock active agent with output file path
+    const mockActive = {
+      outputFilePath,
+      streamSessionId: 'session-1'
+    };
+
+    // Mock getAgentWorkdir function — receives worktreeId, not agentId
+    const getAgentWorkdir = (worktreeId: string) => {
+      expect(worktreeId).toBe(testAgent.worktreeId);
+      return testDir;
+    };
+
+    // EXECUTE: Call handleCompletion which should trigger the race condition scenario
+    // This simulates: no stream text + no new file content + valid signal.json
+    await (outputHandler as any).handleCompletion(
+      testAgent.id,
+      mockActive,
+      getAgentWorkdir
+    );
+
+    // VERIFY: Agent should NOT be marked as crashed
+    console.log('Update calls:', updateCalls);
+    console.log('Final agent status:', finalAgentStatus);
+
+    expect(updateCalls.length).toBeGreaterThan(0);
+    expect(finalAgentStatus).not.toBe('crashed');
+
+    // Should be marked with the appropriate completion status
+    expect(['idle', 'waiting_for_input', 'stopped']).toContain(finalAgentStatus);
+  });
+
+  it('should mark agent as crashed when no completion signal exists', async () => {
+    // SETUP: No signal.json file exists - agent should be marked as crashed
+    const outputFilePath = join(testDir, 'output.jsonl');
+    await writeFile(outputFilePath, ''); // Empty file
+
+    const mockActive = {
+      outputFilePath,
+      streamSessionId: 'session-1'
+    };
+
+    const getAgentWorkdir = (agentId: string) => testDir;
+
+    // EXECUTE: This should mark agent as crashed since no completion signal exists
+    await (outputHandler as any).handleCompletion(
+      testAgent.id,
+      mockActive,
+      getAgentWorkdir
+    );
+
+    // VERIFY: Agent SHOULD be marked as crashed
+    expect(finalAgentStatus).toBe('crashed');
+  });
+
+  it('should handle the exact slim-wildebeest scenario', async () => {
+    // SETUP: Reproduce the exact conditions that slim-wildebeest had
+    const signalPath = join(testDir, '.cw/output/signal.json');
+    const exactSignalContent = {
+      "status": "questions",
+      "questions": [
+        {
+          "id": "q1",
+          "question": "What UI framework/styling system is the admin UI currently using that needs to be replaced?"
+        },
+        {
+          "id": "q2",
+          "question": "What specific problems with the current admin UI are we solving? (e.g., poor developer experience, design inconsistency, performance issues, lack of accessibility)"
+        }
+      ]
+    };
+    await writeFile(signalPath, JSON.stringify(exactSignalContent, null, 2));
+
+    // Create SUMMARY.md like slim-wildebeest had
+    const summaryPath = join(testDir, '.cw/output/SUMMARY.md');
+    const summaryContent = `---
+files_modified: []
+---
+Initiative page is essentially empty — lacks context, scope, goals, and technical approach. Requested clarification on current state, problems being solved, scope boundaries, and success criteria before proposing meaningful improvements.`;
+    await writeFile(summaryPath, summaryContent);
+
+    // Simulate the output file scenario
+    const outputFilePath = join(testDir, 'output.jsonl');
+    await writeFile(outputFilePath, 'some initial content\n'); // Some content but no new lines
+
+    const mockActive = {
+      outputFilePath,
+      streamSessionId: 'session-1'
+    };
+
+    const getAgentWorkdir = (agentId: string) => testDir;
+
+    // EXECUTE: This is the exact scenario that caused slim-wildebeest to be marked as crashed
+    await (outputHandler as any).handleCompletion(
+      testAgent.id,
+      mockActive,
+      getAgentWorkdir
+    );
+
+    // VERIFY: This should NOT be marked as crashed
+    console.log('slim-wildebeest scenario - Final status:', finalAgentStatus);
+    console.log('slim-wildebeest scenario - Update calls:', updateCalls);
+
+    expect(finalAgentStatus).not.toBe('crashed');
+    expect(['idle', 'waiting_for_input', 'stopped']).toContain(finalAgentStatus);
+  });
+
+});
--- a/apps/server/test/integration/full-flow/full-flow-cassette.test.ts
+++ b/apps/server/test/integration/full-flow/full-flow-cassette.test.ts
@@ -0,0 +1,244 @@
+/**
+ * Full-Flow Cassette Integration Test
+ *
+ * Cassette-backed variant of the full multi-agent workflow test.
+ * Runs the same discuss → plan → detail → execute pipeline but intercepts
+ * subprocess spawning with CassetteProcessManager — no real API calls in CI.
+ *
+ * Recording (one-time, costs ~$2–5):
+ *   CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
+ *   # Commit the generated src/test/cassettes/<hash>.json files afterward
+ *
+ * Replay (default — runs in seconds):
+ *   npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts
+ *
+ * Force re-record (overwrites existing cassettes):
+ *   CW_CASSETTE_FORCE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { existsSync, readdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import type { Phase, Task } from '../../../db/schema.js';
+import type { AgentResult } from '../../../agent/types.js';
+import { buildExecutePrompt } from '../../../agent/prompts/index.js';
+import { CassetteStore } from '../../cassette/store.js';
+import { CassetteProcessManager, type CassetteMode } from '../../cassette/process-manager.js';
+import {
+  createFullFlowHarness,
+  type FullFlowHarness,
+} from './harness.js';
+import {
+  printHeader,
+  printDiscussResult,
+  printPlanResult,
+  printDetailResult,
+  printExecuteResult,
+  printFinalSummary,
+  type ExecutedTask,
+} from './report.js';
+
+// =============================================================================
+// Constants
+// =============================================================================
+
+const RECORDING =
+  process.env.CW_CASSETTE_FORCE_RECORD === '1' || process.env.CW_CASSETTE_RECORD === '1';
+
+/**
+ * Test timeout.
+ * - Replay: 5 min (cassettes complete in seconds; cap is generous headroom)
+ * - Record: 60 min (real agents doing discuss/plan/detail/execute take API time)
+ */
+const CASSETTE_FLOW_TIMEOUT = RECORDING ? 60 * 60_000 : 5 * 60_000;
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const CASSETTE_DIR =
+  process.env.CW_CASSETTE_DIR ?? join(__dirname, '../../cassettes');
+
+// =============================================================================
+// Mode helper
+// =============================================================================
+
+function cassetteMode(): CassetteMode {
+  if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
+  if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
+  return 'replay';
+}
+
+/**
+ * True when cassettes are available (at least one .json file) OR we're in a
+ * recording run. Skips the suite if no cassettes have been recorded yet so
+ * that `npm test` doesn't fail on a fresh clone before cassettes are committed.
+ */
+function cassettesAvailable(): boolean {
+  const mode = cassetteMode();
+  if (mode !== 'replay') return true; // recording runs always proceed
+  if (!existsSync(CASSETTE_DIR)) return false;
+  return readdirSync(CASSETTE_DIR).some((f) => f.endsWith('.json'));
+}
+
+// =============================================================================
+// Test
+// =============================================================================
+
+describe.skipIf(!cassettesAvailable())('full flow (cassette replay)', () => {
+  let harness: FullFlowHarness;
+  const startedAt = Date.now();
+
+  beforeAll(async () => {
+    const store = new CassetteStore(CASSETTE_DIR);
+    const mode = cassetteMode();
+
+    harness = await createFullFlowHarness('Add complete() method to TodoStore', {
+      processManagerFactory: (workspaceRoot, projectRepo) =>
+        new CassetteProcessManager(workspaceRoot, projectRepo, store, mode),
+    });
+
+    printHeader(harness.initiative.name);
+    console.log(`  Cassette mode : ${mode}`);
+    console.log(`  Cassette dir  : ${CASSETTE_DIR}`);
+    console.log(`  Initiative ID : ${harness.initiative.id}`);
+    console.log(`  Workspace     : ${harness.workspaceRoot}`);
+  }, CASSETTE_FLOW_TIMEOUT);
+
+  afterAll(async () => {
+    if (harness) await harness.cleanup();
+  });
+
+  it(
+    'runs the complete multi-agent workflow from cassettes',
+    async () => {
+      const { initiative, caller, agentManager, phaseRepository, taskRepository } = harness;
+      const initiativeId = initiative.id;
+
+      // ── Stage 2: Discuss ───────────────────────────────────────────────────
+      console.log('\n\n>>> Stage 2: DISCUSS <<<');
+      const discussAgent = await caller.spawnArchitectDiscuss({ initiativeId });
+      expect(discussAgent.id).toBeTruthy();
+      console.log(`  Spawned discuss agent: ${discussAgent.name} (${discussAgent.id})`);
+
+      const discussResult = await harness.driveToCompletion(
+        discussAgent.id,
+        'Use your best judgment and keep it simple. The focus is implementing complete(id) on TodoStore.',
+        CASSETTE_FLOW_TIMEOUT,
+      );
+      printDiscussResult(discussAgent.id, discussResult);
+
+      if (!discussResult?.success) {
+        console.warn('  [WARN] discuss agent did not succeed; continuing to plan stage');
+      }
+
+      // ── Stage 3: Plan ──────────────────────────────────────────────────────
+      console.log('\n\n>>> Stage 3: PLAN <<<');
+      const planAgent = await caller.spawnArchitectPlan({ initiativeId });
+      expect(planAgent.id).toBeTruthy();
+      console.log(`  Spawned plan agent: ${planAgent.name} (${planAgent.id})`);
+
+      const planResult = await harness.driveToCompletion(
+        planAgent.id,
+        'Keep it simple.',
+        CASSETTE_FLOW_TIMEOUT,
+      );
+      expect(planResult).toBeTruthy();
+
+      const phases: Phase[] = await phaseRepository.findByInitiativeId(initiativeId);
+      expect(phases.length).toBeGreaterThan(0);
+      printPlanResult(phases);
+
+      // ── Stage 4: Detail (per phase) ────────────────────────────────────────
+      console.log('\n\n>>> Stage 4: DETAIL <<<');
+      for (const phase of phases) {
+        const detailAgent = await caller.spawnArchitectDetail({ phaseId: phase.id });
+        expect(detailAgent.id).toBeTruthy();
+        console.log(`  Spawned detail agent for phase "${phase.name}": ${detailAgent.name}`);
+
+        const detailResult = await harness.driveToCompletion(
+          detailAgent.id,
+          'Keep it simple.',
+          CASSETTE_FLOW_TIMEOUT,
+        );
+        expect(detailResult).toBeTruthy();
+
+        const phaseTasks = await taskRepository.findByPhaseId(phase.id);
+        const executeTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
+        expect(executeTasks.length).toBeGreaterThan(0);
+        printDetailResult(phase, phaseTasks);
+      }
+
+      // ── Stage 5: Execute ───────────────────────────────────────────────────
+      console.log('\n\n>>> Stage 5: EXECUTE <<<');
+      const allTasks = await gatherAllExecuteTasks(taskRepository, phases);
+      console.log(`  Found ${allTasks.length} execute task(s) across ${phases.length} phase(s)`);
+
+      const executed: ExecutedTask[] = [];
+      for (const task of allTasks) {
+        console.log(`  Spawning execute agent for: "${task.name}"`);
+        const execAgent = await agentManager.spawn({
+          taskId: task.id,
+          prompt: buildExecutePrompt(task.description ?? task.name),
+          mode: 'execute',
+          initiativeId,
+          phaseId: task.phaseId ?? undefined,
+          inputContext: {
+            initiative,
+            task,
+          },
+        });
+        console.log(`    Agent: ${execAgent.name} (${execAgent.id})`);
+
+        const result = await harness.driveToCompletion(
+          execAgent.id,
+          'Use your best judgment and keep it simple.',
+          CASSETTE_FLOW_TIMEOUT,
+        );
+        executed.push({ task, result });
+
+        const icon = result?.success ? '✓' : '✗';
+        console.log(`    ${icon} Completed with success=${result?.success ?? null}`);
+        if (result && !result.success) {
+          console.log(`      Message: ${result.message?.slice(0, 200)}`);
+        }
+      }
+
+      printExecuteResult(executed);
+
+      // ── Assertions ─────────────────────────────────────────────────────────
+      expect(executed.length).toBeGreaterThan(0);
+
+      const allSucceeded = executed.every((e) => e.result?.success === true);
+      if (!allSucceeded) {
+        const failed = executed.filter((e) => !e.result?.success);
+        console.warn(`  [WARN] ${failed.length} execute task(s) did not succeed`);
+      }
+
+      // ── Final summary ──────────────────────────────────────────────────────
+      printFinalSummary(
+        initiative.name,
+        phases,
+        allTasks,
+        executed,
+        Date.now() - startedAt,
+      );
+    },
+    CASSETTE_FLOW_TIMEOUT,
+  );
+});
+
+// =============================================================================
+// Helpers
+// =============================================================================
+
+async function gatherAllExecuteTasks(
+  taskRepository: FullFlowHarness['taskRepository'],
+  phases: Phase[],
+): Promise<Task[]> {
+  const result: Task[] = [];
+  for (const phase of phases) {
+    const phaseTasks = await taskRepository.findByPhaseId(phase.id);
+    const execTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
+    result.push(...execTasks);
+  }
+  return result;
+}
--- a/apps/server/test/integration/full-flow/harness.ts
+++ b/apps/server/test/integration/full-flow/harness.ts
@@ -0,0 +1,399 @@
+/**
+ * Full-Flow Test Harness
+ *
+ * Wires up the complete system with real agents for end-to-end multi-agent
+ * workflow testing: discuss → plan → detail → execute.
+ *
+ * Unlike the standard TestHarness (MockAgentManager) or RealProviderHarness
+ * (agents only), this harness adds:
+ *  - All 11 repositories
+ *  - tRPC caller for architect/agent procedures
+ *  - A self-contained fixture git repo (todo-api) for agents to work on
+ *  - Helpers for driving agents through question/answer loops
+ *
+ * Used by full-flow-cassette.test.ts (replay) and for manual recording runs.
+ */
+
+import { mkdtemp, rm, cp } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { execSync } from 'node:child_process';
+import type { DrizzleDatabase } from '../../../db/index.js';
+import type { DomainEvent } from '../../../events/types.js';
+import { EventEmitterBus } from '../../../events/bus.js';
+import { MultiProviderAgentManager } from '../../../agent/manager.js';
+import type { AgentResult, PendingQuestions } from '../../../agent/types.js';
+import type { Initiative, Project, Phase, Task } from '../../../db/schema.js';
+import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
+import type { PhaseRepository } from '../../../db/repositories/phase-repository.js';
+import type { TaskRepository } from '../../../db/repositories/task-repository.js';
+import type { MessageRepository } from '../../../db/repositories/message-repository.js';
+import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
+import type { PageRepository } from '../../../db/repositories/page-repository.js';
+import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
+import type { AccountRepository } from '../../../db/repositories/account-repository.js';
+import type { ChangeSetRepository } from '../../../db/repositories/change-set-repository.js';
+import type { LogChunkRepository } from '../../../db/repositories/log-chunk-repository.js';
+import type { ConversationRepository } from '../../../db/repositories/conversation-repository.js';
+import type { ProcessManager } from '../../../agent/process-manager.js';
+import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
+import { createRepositories } from '../../../container.js';
+import { DefaultDispatchManager } from '../../../dispatch/manager.js';
+import { appRouter, createCallerFactory } from '../../../trpc/router.js';
+import { createContext } from '../../../trpc/context.js';
+
+// =============================================================================
+// CapturingEventBus
+// =============================================================================
+
+export class CapturingEventBus extends EventEmitterBus {
+  emittedEvents: DomainEvent[] = [];
+
+  emit<T extends DomainEvent>(event: T): void {
+    this.emittedEvents.push(event);
+    super.emit(event);
+  }
+
+  getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
+    return this.emittedEvents.filter((e) => e.type === type) as T[];
+  }
+
+  clearEvents(): void {
+    this.emittedEvents = [];
+  }
+}
+
+// =============================================================================
+// Sleep helper
+// =============================================================================
+
+export function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+// =============================================================================
+// tRPC caller type
+// =============================================================================
+
+const createCaller = createCallerFactory(appRouter);
+export type FullFlowCaller = ReturnType<typeof createCaller>;
+
+// =============================================================================
+// FullFlowHarness interface
+// =============================================================================
+
+/** Status of an agent that requires attention: done, waiting for answers, or crashed */
+export type AgentAttentionStatus = 'done' | 'waiting' | 'crashed';
+
+export interface FullFlowHarness {
+  /** Absolute path to the CW workspace (worktrees are created here) */
+  workspaceRoot: string;
+  /** Absolute path to the cloned todo-api fixture git repo */
+  fixtureRoot: string;
+  /** The registered todo-api project */
+  project: Project;
+  /** The initiative created for the test run */
+  initiative: Initiative;
+  /** tRPC caller (all procedures available) */
+  caller: FullFlowCaller;
+  /** Real MultiProviderAgentManager */
+  agentManager: MultiProviderAgentManager;
+  /** In-memory SQLite database */
+  db: DrizzleDatabase;
+  /** Event bus with capture capability */
+  eventBus: CapturingEventBus;
+
+  // All 11 repositories
+  initiativeRepository: InitiativeRepository;
+  phaseRepository: PhaseRepository;
+  taskRepository: TaskRepository;
+  messageRepository: MessageRepository;
+  agentRepository: AgentRepository;
+  pageRepository: PageRepository;
+  projectRepository: ProjectRepository;
+  accountRepository: AccountRepository;
+  changeSetRepository: ChangeSetRepository;
+  logChunkRepository: LogChunkRepository;
+  conversationRepository: ConversationRepository;
+
+  /**
+   * Wait for an agent to reach a terminal status (idle/stopped/crashed).
+   * Returns null if the agent enters waiting_for_input.
+   */
+  waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
+
+  /**
+   * Poll until the agent needs attention: done (idle/stopped), waiting for input, or crashed.
+   * Useful for the question/answer loop in discuss mode.
+   */
+  waitForAgentAttention(agentId: string, timeoutMs?: number): Promise<AgentAttentionStatus>;
+
+  /**
+   * Drive an agent to full completion, answering any questions along the way.
+   * Answers all questions with the provided answer string (or a default).
+   */
+  driveToCompletion(
+    agentId: string,
+    answer?: string,
+    timeoutMs?: number,
+  ): Promise<AgentResult | null>;
+
+  /**
+   * Get captured events filtered by type.
+   */
+  getEventsByType<T extends DomainEvent>(type: T['type']): T[];
+
+  /**
+   * Kill all running agents and remove temp directories.
+   */
+  cleanup(): Promise<void>;
+}
+
+// =============================================================================
+// Poll interval
+// =============================================================================
+
+const POLL_INTERVAL_MS = 1500;
+
+// =============================================================================
+// Factory
+// =============================================================================
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const FIXTURES_DIR = join(__dirname, '../../fixtures/todo-api');
+
+export interface FullFlowHarnessOptions {
+  /** Factory called after workspaceRoot + repos are created. Return a custom ProcessManager. */
+  processManagerFactory?: (workspaceRoot: string, projectRepo: ProjectRepository) => ProcessManager;
+}
+
+/**
+ * Create a full-flow test harness.
+ *
+ * Setup steps:
+ *  1. Copy todo-api fixture into a temp git repo (fixtureRoot).
+ *  2. Create workspace temp dir (workspaceRoot) for CW operations.
+ *  3. Init in-memory DB + all 11 repos.
+ *  4. Wire real MultiProviderAgentManager with all repos.
+ *  5. Wire DefaultDispatchManager for execute stage.
+ *  6. Create tRPC caller with full context.
+ *  7. Register project in DB directly (url = fixtureRoot).
+ *  8. Create initiative via tRPC (links project, creates root page).
+ */
+export async function createFullFlowHarness(
+  initiativeName = 'Add complete() method to TodoStore',
+  options?: FullFlowHarnessOptions,
+): Promise<FullFlowHarness> {
+  // ── 0. Allow nested claude invocations ────────────────────────────────────
+  // Claude Code sets CLAUDECODE in the environment, which prevents nested
+  // claude CLI calls from starting ("cannot be launched inside another Claude
+  // Code session").  Save and remove it so spawned agents can run normally.
+  // It is restored in cleanup().
+  const savedClaudeCodeEnv = process.env.CLAUDECODE;
+  delete process.env.CLAUDECODE;
+
+  // ── 1. Fixture project ────────────────────────────────────────────────────
+  // IMPORTANT: cp(src, dest) puts src INSIDE dest when dest already exists
+  // (like `cp -r src dest/` → creates dest/src/).  We need dest to NOT exist
+  // yet so that cp creates it as a copy of src directly.
+  const fixtureBase = await mkdtemp(join(tmpdir(), 'cw-fixture-'));
+  const fixtureRoot = join(fixtureBase, 'todo-api'); // does not exist yet
+  await cp(FIXTURES_DIR, fixtureRoot, { recursive: true });
+
+  // Verify files landed at the right level before git operations
+  execSync(`test -f "${join(fixtureRoot, 'package.json')}"`, { stdio: 'pipe' });
+
+  execSync('git init', { cwd: fixtureRoot, stdio: 'pipe' });
+  execSync('git config user.email "test@test.com"', { cwd: fixtureRoot, stdio: 'pipe' });
+  execSync('git config user.name "Test"', { cwd: fixtureRoot, stdio: 'pipe' });
+  execSync('git add .', { cwd: fixtureRoot, stdio: 'pipe' });
+  execSync('git commit -m "initial todo-api with missing complete()"', {
+    cwd: fixtureRoot,
+    stdio: 'pipe',
+  });
+
+  // ── 2. Workspace root ─────────────────────────────────────────────────────
+  // Just a plain temp directory — agent worktrees live under repos/ inside it.
+  // No git init needed; the PROJECT clone (repos/<name>-<id>/) is the git repo.
+  const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-workspace-'));
+
+  // ── 3. Database + repositories ────────────────────────────────────────────
+  const db = createTestDatabase();
+  const repos = createRepositories(db);
+
+  // ── 4. Event bus ──────────────────────────────────────────────────────────
+  const eventBus = new CapturingEventBus();
+
+  // ── 5. Real agent manager ─────────────────────────────────────────────────
+  const customProcessManager = options?.processManagerFactory?.(workspaceRoot, repos.projectRepository);
+  const agentManager = new MultiProviderAgentManager(
+    repos.agentRepository,
+    workspaceRoot,
+    repos.projectRepository,
+    repos.accountRepository,
+    eventBus,
+    undefined, // no credential manager needed for default claude account
+    repos.changeSetRepository,
+    repos.phaseRepository,
+    repos.taskRepository,
+    repos.pageRepository,
+    repos.logChunkRepository,
+    false,                 // debug
+    customProcessManager,  // processManagerOverride
+  );
+
+  // ── 6. Dispatch manager (for execute stage) ───────────────────────────────
+  const dispatchManager = new DefaultDispatchManager(
+    repos.taskRepository,
+    repos.messageRepository,
+    agentManager,
+    eventBus,
+    repos.initiativeRepository,
+    repos.phaseRepository,
+  );
+
+  // ── 7. tRPC caller ────────────────────────────────────────────────────────
+  const ctx = createContext({
+    eventBus,
+    serverStartedAt: new Date(),
+    processCount: 0,
+    agentManager,
+    dispatchManager,
+    workspaceRoot,
+    ...repos,
+  });
+  const caller = createCaller(ctx);
+
+  // ── 8. Register project directly in DB (bypass tRPC clone) ───────────────
+  const project = await repos.projectRepository.create({
+    name: 'todo-api',
+    url: fixtureRoot,
+  });
+
+  // ── 9. Create initiative via tRPC (creates root page automatically) ───────
+  const initiative = await caller.createInitiative({
+    name: initiativeName,
+    projectIds: [project.id],
+  });
+
+  // ── Helpers ───────────────────────────────────────────────────────────────
+
+  async function waitForAgentCompletion(
+    agentId: string,
+    timeoutMs = 120_000,
+  ): Promise<AgentResult | null> {
+    const deadline = Date.now() + timeoutMs;
+    while (Date.now() < deadline) {
+      const agent = await repos.agentRepository.findById(agentId);
+      if (!agent) return null;
+      if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
+        return agentManager.getResult(agentId);
+      }
+      if (agent.status === 'waiting_for_input') return null;
+      await sleep(POLL_INTERVAL_MS);
+    }
+    throw new Error(`Timeout: agent ${agentId} did not complete within ${timeoutMs}ms`);
+  }
+
+  async function waitForAgentAttention(
+    agentId: string,
+    timeoutMs = 120_000,
+  ): Promise<AgentAttentionStatus> {
+    const deadline = Date.now() + timeoutMs;
+    while (Date.now() < deadline) {
+      const agent = await repos.agentRepository.findById(agentId);
+      if (!agent) return 'crashed';
+      if (agent.status === 'idle' || agent.status === 'stopped') return 'done';
+      if (agent.status === 'crashed') return 'crashed';
+      if (agent.status === 'waiting_for_input') return 'waiting';
+      await sleep(POLL_INTERVAL_MS);
+    }
+    throw new Error(`Timeout: agent ${agentId} did not reach attention state within ${timeoutMs}ms`);
+  }
+
+  async function driveToCompletion(
+    agentId: string,
+    answer = 'Use your best judgment and keep it simple.',
+    timeoutMs = 10 * 60_000,
+  ): Promise<AgentResult | null> {
+    const deadline = Date.now() + timeoutMs;
+
+    while (Date.now() < deadline) {
+      const remaining = deadline - Date.now();
+      if (remaining <= 0) break;
+
+      let status: AgentAttentionStatus;
+      try {
+        status = await waitForAgentAttention(agentId, Math.min(remaining, 3 * 60_000));
+      } catch {
+        // Agent is still running (hasn't reached an attention state within the polling
+        // window). This is normal for long-running execute agents. Continue the outer
+        // loop — the deadline check above will terminate us if we truly time out.
+        continue;
+      }
+
+      if (status === 'done' || status === 'crashed') {
+        return agentManager.getResult(agentId);
+      }
+
+      if (status === 'waiting') {
+        const pending = await agentManager.getPendingQuestions(agentId);
+        if (!pending || pending.questions.length === 0) {
+          // Shouldn't happen, but guard against it
+          await sleep(POLL_INTERVAL_MS);
+          continue;
+        }
+        const answers = Object.fromEntries(
+          pending.questions.map((q) => [q.id, answer]),
+        );
+        await agentManager.resume(agentId, answers);
+      }
+    }
+
+    throw new Error(`driveToCompletion: agent ${agentId} did not finish within ${timeoutMs}ms`);
+  }
+
+  // ── Build and return harness ───────────────────────────────────────────────
+
+  const harness: FullFlowHarness = {
+    workspaceRoot,
+    fixtureRoot,
+    project,
+    initiative,
+    caller,
+    agentManager,
+    db,
+    eventBus,
+    ...repos,
+
+    waitForAgentCompletion,
+    waitForAgentAttention,
+    driveToCompletion,
+
+    getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
+      return eventBus.getEventsByType<T>(type);
+    },
+
+    async cleanup() {
+      // Kill any running agents
+      const agents = await repos.agentRepository.findAll();
+      await Promise.allSettled(
+        agents
+          .filter((a) => a.status === 'running')
+          .map((a) => agentManager.stop(a.id)),
+      );
+      // Restore CLAUDECODE env var
+      if (savedClaudeCodeEnv !== undefined) {
+        process.env.CLAUDECODE = savedClaudeCodeEnv;
+      }
+      // Remove temp directories (fixtureBase contains fixtureRoot)
+      await Promise.allSettled([
+        rm(fixtureBase, { recursive: true, force: true }),
+        rm(workspaceRoot, { recursive: true, force: true }),
+      ]);
+    },
+  };
+
+  return harness;
+}
--- a/apps/server/test/integration/full-flow/report.ts
+++ b/apps/server/test/integration/full-flow/report.ts
@@ -0,0 +1,156 @@
+/**
+ * Full-Flow Test Report Utility
+ *
+ * Plain console.log formatters for human-readable output at each stage of the
+ * full-flow integration test. No external dependencies.
+ */
+
+import { execSync } from 'node:child_process';
+import { join } from 'node:path';
+import type { Phase, Task } from '../../../db/schema.js';
+import type { AgentResult } from '../../../agent/types.js';
+
+// =============================================================================
+// Types
+// =============================================================================
+
+export interface ExecutedTask {
+  task: Task;
+  result: AgentResult | null;
+}
+
+// =============================================================================
+// Helpers
+// =============================================================================
+
+const DIVIDER = '═'.repeat(60);
+const THIN = '─'.repeat(60);
+
+function section(title: string): void {
+  console.log(`\n${DIVIDER}`);
+  console.log(`  ${title}`);
+  console.log(DIVIDER);
+}
+
+function line(msg: string): void {
+  console.log(`  ${msg}`);
+}
+
+// =============================================================================
+// Stage reporters
+// =============================================================================
+
+export function printHeader(initiativeName: string): void {
+  section(`FULL-FLOW TEST: ${initiativeName}`);
+  console.log(`  Started at: ${new Date().toISOString()}`);
+}
+
+export function printDiscussResult(agentId: string, result: AgentResult | null): void {
+  console.log(`\n[DISCUSS]`);
+  console.log(THIN);
+  line(`Agent: ${agentId}`);
+  if (result) {
+    line(`Success: ${result.success}`);
+    if (result.message) line(`Message: ${result.message.slice(0, 200)}`);
+  } else {
+    line('Result: null (agent may have crashed)');
+  }
+}
+
+export function printPlanResult(phases: Phase[]): void {
+  console.log(`\n[PLAN] ${phases.length} phase(s) created`);
+  console.log(THIN);
+  phases.forEach((ph, i) => {
+    line(`${i + 1}. ${ph.name}`);
+  });
+}
+
+export function printDetailResult(phase: Phase, tasks: Task[]): void {
+  console.log(`\n[DETAIL] Phase "${phase.name}" → ${tasks.length} task(s)`);
+  console.log(THIN);
+  tasks.forEach((t, i) => {
+    const flags = [t.category, t.type, t.requiresApproval ? 'approval-required' : 'auto'].join(', ');
+    line(`${i + 1}. ${t.name} [${flags}]`);
+    if (t.description) {
+      line(`   ${t.description.slice(0, 120)}`);
+    }
+  });
+}
+
+export function printExecuteResult(executed: ExecutedTask[]): void {
+  const succeeded = executed.filter((e) => e.result?.success).length;
+  console.log(`\n[EXECUTE] ${succeeded}/${executed.length} task(s) succeeded`);
+  console.log(THIN);
+  for (const { task, result } of executed) {
+    const icon = result?.success ? '✓' : '✗';
+    line(`${icon} ${task.name}`);
+    if (result && !result.success) {
+      line(`  Error: ${result.message?.slice(0, 120)}`);
+    }
+  }
+}
+
+export function printGitDiff(workspaceRoot: string, projectName: string): void {
+  console.log('\n[GIT DIFF — agent worktrees]');
+  console.log(THIN);
+
+  // Find all agent worktrees for this project
+  const worktreesBase = join(workspaceRoot, 'agent-workdirs');
+  try {
+    const dirs = execSync(`ls "${worktreesBase}" 2>/dev/null || echo ""`, { encoding: 'utf8' })
+      .trim()
+      .split('\n')
+      .filter(Boolean);
+
+    for (const dir of dirs) {
+      const projectDir = join(worktreesBase, dir, projectName);
+      try {
+        const stat = execSync(`git -C "${projectDir}" diff HEAD~1 --stat 2>/dev/null || echo ""`, {
+          encoding: 'utf8',
+        }).trim();
+        if (stat) {
+          line(`Worktree: ${dir}/${projectName}`);
+          stat.split('\n').forEach((l) => line(`  ${l}`));
+        }
+      } catch {
+        // Worktree might not have commits — skip silently
+      }
+    }
+  } catch {
+    line('(no agent worktrees found)');
+  }
+}
+
+export function printNpmTestResult(projectDir: string): void {
+  console.log('\n[NPM TEST]');
+  console.log(THIN);
+  try {
+    const output = execSync('node --test src/todo.test.js', {
+      cwd: projectDir,
+      encoding: 'utf8',
+      stdio: ['ignore', 'pipe', 'pipe'],
+    });
+    line('Tests passed:');
+    output.split('\n').forEach((l) => line(`  ${l}`));
+  } catch (err: unknown) {
+    const e = err as { stdout?: string; stderr?: string; status?: number };
+    line(`Tests FAILED (exit ${e.status ?? '?'})`);
+    if (e.stdout) e.stdout.split('\n').forEach((l) => line(`  ${l}`));
+    if (e.stderr) e.stderr.split('\n').forEach((l) => line(`  ${l}`));
+  }
+}
+
+export function printFinalSummary(
+  initiativeName: string,
+  phases: Phase[],
+  tasks: Task[],
+  executed: ExecutedTask[],
+  durationMs: number,
+): void {
+  section(`SUMMARY: ${initiativeName}`);
+  line(`Duration : ${Math.round(durationMs / 1000)}s`);
+  line(`Phases   : ${phases.length}`);
+  line(`Tasks    : ${tasks.length}`);
+  line(`Executed : ${executed.filter((e) => e.result?.success).length}/${executed.length} succeeded`);
+  console.log(DIVIDER);
+}
--- a/apps/server/test/integration/real-claude.test.ts
+++ b/apps/server/test/integration/real-claude.test.ts
@@ -0,0 +1,183 @@
+/**
+ * Real Claude CLI Integration Tests
+ *
+ * IMPORTANT: These tests call the real Claude CLI and incur API costs.
+ * They are SKIPPED by default and should only be run manually for validation.
+ *
+ * To run these tests:
+ * ```bash
+ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-claude.test.ts --test-timeout=120000
+ * ```
+ *
+ * Purpose:
+ * - Validate that JSON schemas work correctly with Claude CLI --json-schema flag
+ * - Confirm MockAgentManager accurately simulates real CLI behavior
+ * - Document actual response structure and costs
+ *
+ * Updated (2026-02-06): Now uses the universal agentSignalSchema instead of
+ * per-mode schemas. Agents output trivial signals (done/questions/error) and
+ * write files instead of producing mode-specific JSON.
+ *
+ * Total validation cost: ~$0.10 (3 tests)
+ */
+
+import { describe, it, expect, beforeAll } from 'vitest';
+import { execa } from 'execa';
+import {
+  agentSignalJsonSchema,
+  agentSignalSchema,
+} from '../../agent/schema.js';
+
+/**
+ * Result structure from Claude CLI with --output-format json
+ *
+ * When --json-schema is used:
+ * - result: "" (empty string)
+ * - structured_output: { ... } (the validated JSON object)
+ */
+interface ClaudeCliResult {
+  type: 'result';
+  subtype: 'success' | 'error' | 'error_max_turns';
+  is_error: boolean;
+  session_id: string;
+  result: string;
+  structured_output?: unknown;
+  total_cost_usd?: number;
+}
+
+/**
+ * Helper to call Claude CLI directly with a prompt and JSON schema.
+ *
+ * @param prompt - The prompt to send to Claude
+ * @param jsonSchema - JSON schema to enforce structured output
+ * @param timeoutMs - Timeout in milliseconds (default 90s)
+ * @returns Parsed CLI result with structured_output
+ */
+async function callClaudeCli(
+  prompt: string,
+  jsonSchema: object,
+  timeoutMs = 90000
+): Promise<{ cliResult: ClaudeCliResult; structuredOutput: unknown }> {
+  const startTime = Date.now();
+
+  const { stdout } = await execa(
+    'claude',
+    [
+      '-p',
+      prompt,
+      '--output-format',
+      'json',
+      '--json-schema',
+      JSON.stringify(jsonSchema),
+    ],
+    {
+      timeout: timeoutMs,
+    }
+  );
+
+  const duration = Date.now() - startTime;
+  const cliResult: ClaudeCliResult = JSON.parse(stdout);
+
+  console.log(`\n  Duration: ${(duration / 1000).toFixed(1)}s`);
+  console.log(`  Cost: $${cliResult.total_cost_usd?.toFixed(4) ?? 'N/A'}`);
+  console.log(`  Session ID: ${cliResult.session_id}`);
+  console.log(`  Result field empty: ${cliResult.result === ''}`);
+  console.log(`  Has structured_output: ${cliResult.structured_output !== undefined}`);
+
+  // When --json-schema is used, structured output is in structured_output field
+  // The result field is typically empty when using --json-schema
+  const structuredOutput = cliResult.structured_output ?? JSON.parse(cliResult.result);
+
+  return { cliResult, structuredOutput };
+}
+
+/**
+ * Check if real Claude tests should run.
+ * Set REAL_CLAUDE_TESTS=1 environment variable to enable.
+ */
+const shouldRunRealTests = process.env.REAL_CLAUDE_TESTS === '1';
+
+/**
+ * Skip wrapper - tests are expensive and should run manually
+ */
+const describeReal = shouldRunRealTests ? describe : describe.skip;
+
+// Individual test timeout - real API calls take 5-30 seconds
+const TEST_TIMEOUT = 120000; // 2 minutes
+
+describeReal('Real Claude CLI Integration', () => {
+  beforeAll(() => {
+    console.log('\n=== Running Real Claude CLI Tests ===');
+    console.log('These tests call the real Claude API and incur costs.\n');
+  });
+
+  describe('Universal Signal Schema', () => {
+    it(
+      'should return done status',
+      async () => {
+        const prompt = `Complete this simple task: Say "Hello, World!" as a test.
+
+Output your response in the required JSON format with status "done".`;
+
+        const { cliResult, structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
+
+        console.log('  Output:', JSON.stringify(structuredOutput, null, 2));
+
+        // Verify the CLI response structure
+        expect(cliResult.subtype).toBe('success');
+        expect(cliResult.result).toBe(''); // Empty when using --json-schema
+        expect(cliResult.structured_output).toBeDefined();
+
+        // Validate against Zod schema
+        const parsed = agentSignalSchema.parse(structuredOutput);
+        expect(parsed.status).toBe('done');
+      },
+      TEST_TIMEOUT
+    );
+
+    it(
+      'should return questions status with array',
+      async () => {
+        const prompt = `You are working on a vague task: "Make it better"
+
+You MUST ask clarifying questions before proceeding. You cannot complete this task without more information.
+
+Output your response with status "questions" and include at least 2 questions with unique IDs.`;
+
+        const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
+
+        console.log('  Output:', JSON.stringify(structuredOutput, null, 2));
+
+        // Validate against Zod schema
+        const parsed = agentSignalSchema.parse(structuredOutput);
+        expect(parsed.status).toBe('questions');
+        if (parsed.status === 'questions') {
+          expect(Array.isArray(parsed.questions)).toBe(true);
+          expect(parsed.questions.length).toBeGreaterThanOrEqual(1);
+          expect(parsed.questions[0].id).toBeTruthy();
+          expect(parsed.questions[0].question).toBeTruthy();
+        }
+      },
+      TEST_TIMEOUT
+    );
+
+    it(
+      'should return error status',
+      async () => {
+        const prompt = `You have encountered an unrecoverable error. Output your response with status "error" and a descriptive error message.`;
+
+        const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
+
+        console.log('  Output:', JSON.stringify(structuredOutput, null, 2));
+
+        // Validate against Zod schema
+        const parsed = agentSignalSchema.parse(structuredOutput);
+        expect(parsed.status).toBe('error');
+        if (parsed.status === 'error') {
+          expect(parsed.error).toBeTruthy();
+        }
+      },
+      TEST_TIMEOUT
+    );
+  });
+});
--- a/apps/server/test/integration/real-providers/claude-manager.test.ts
+++ b/apps/server/test/integration/real-providers/claude-manager.test.ts
@@ -0,0 +1,298 @@
+/**
+ * Real Claude CLI Manager Integration Tests
+ *
+ * IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
+ * They are SKIPPED by default and should only be run manually for validation.
+ *
+ * To run these tests:
+ * ```bash
+ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/claude-manager.test.ts --test-timeout=300000
+ * ```
+ *
+ * Tests covered:
+ * - Output stream parsing (text_delta events)
+ * - Session ID extraction from init event
+ * - Result parsing and validation
+ * - Session resume with user answers
+ *
+ * Estimated cost: ~$0.10 per full run
+ */
+
+import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
+import {
+  createRealProviderHarness,
+  describeRealClaude,
+  REAL_TEST_TIMEOUT,
+  sleep,
+  type RealProviderHarness,
+} from './harness.js';
+import { MINIMAL_PROMPTS } from './prompts.js';
+import type { AgentSpawnedEvent, AgentStoppedEvent, AgentOutputEvent } from '../../../events/types.js';
+
+describeRealClaude('Real Claude Manager Integration', () => {
+  let harness: RealProviderHarness;
+
+  beforeAll(async () => {
+    console.log('\n=== Running Real Claude Manager Tests ===');
+    console.log('These tests call the real Claude API and incur costs.\n');
+    harness = await createRealProviderHarness({ provider: 'claude' });
+  });
+
+  afterAll(async () => {
+    await harness.cleanup();
+  });
+
+  beforeEach(() => {
+    harness.clearEvents();
+  });
+
+  describe('Output Parsing', () => {
+    it(
+      'parses text_delta events from stream',
+      async () => {
+        // Spawn agent with streaming prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.streaming,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        expect(agent.id).toBeTruthy();
+        expect(agent.status).toBe('running');
+
+        // Wait for completion
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify we got output events
+        const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
+        console.log('  Output events:', outputEvents.length);
+
+        // Verify completion
+        expect(result).toBeTruthy();
+        console.log('  Result:', result?.message);
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'parses init event and extracts session ID',
+      async () => {
+        // Spawn agent with simple done prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.done,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for completion
+        await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify session ID was extracted and persisted
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.sessionId).toBeTruthy();
+        expect(dbAgent?.sessionId).toMatch(/^[a-f0-9-]+$/);
+
+        console.log('  Session ID:', dbAgent?.sessionId);
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'parses result event with completion',
+      async () => {
+        // Spawn agent with simple done prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.done,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for completion
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify result was parsed
+        expect(result).toBeTruthy();
+        expect(result?.success).toBe(true);
+        expect(result?.message).toBeTruthy();
+
+        // Verify events
+        const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
+        expect(spawnedEvents.length).toBe(1);
+        expect(spawnedEvents[0].payload.agentId).toBe(agent.id);
+        expect(spawnedEvents[0].payload.provider).toBe('claude');
+
+        const stoppedEvents = harness.getEventsByType<AgentStoppedEvent>('agent:stopped');
+        expect(stoppedEvents.length).toBe(1);
+        expect(stoppedEvents[0].payload.agentId).toBe(agent.id);
+        expect(stoppedEvents[0].payload.reason).toBe('task_complete');
+
+        console.log('  Result message:', result?.message);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+
+  describe('Questions Flow', () => {
+    it(
+      'parses questions status and enters waiting_for_input',
+      async () => {
+        // Spawn agent with questions prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.questions,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for waiting_for_input status
+        const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify questions were parsed
+        expect(questions).toBeTruthy();
+        expect(questions?.questions).toBeTruthy();
+        expect(questions?.questions.length).toBeGreaterThan(0);
+        expect(questions?.questions[0].id).toBeTruthy();
+        expect(questions?.questions[0].question).toBeTruthy();
+
+        // Verify agent status
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.status).toBe('waiting_for_input');
+        expect(dbAgent?.sessionId).toBeTruthy();
+
+        console.log('  Questions:', questions?.questions.length);
+        console.log('  First question:', questions?.questions[0].question);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+
+  describe('Session Resume', () => {
+    it(
+      'resumes session with user answers',
+      async () => {
+        // 1. Spawn agent that asks questions
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.questions,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // 2. Wait for waiting_for_input
+        const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
+        expect(questions?.questions.length).toBeGreaterThan(0);
+
+        const sessionIdBeforeResume = (await harness.agentRepository.findById(agent.id))?.sessionId;
+        console.log('  Session ID before resume:', sessionIdBeforeResume);
+        console.log('  Questions received:', questions?.questions.map((q) => q.id).join(', '));
+
+        harness.clearEvents();
+
+        // 3. Resume with answer
+        const answers: Record<string, string> = {};
+        for (const q of questions?.questions ?? []) {
+          answers[q.id] = `Answer to ${q.id}`;
+        }
+
+        await harness.agentManager.resume(agent.id, answers);
+
+        // 4. Wait for completion or another waiting state
+        let attempts = 0;
+        let finalStatus = 'running';
+        while (attempts < 60) {
+          const agent2 = await harness.agentRepository.findById(agent.id);
+          if (agent2?.status !== 'running') {
+            finalStatus = agent2?.status ?? 'unknown';
+            break;
+          }
+          await sleep(1000);
+          attempts++;
+        }
+
+        // Verify the agent processed the resume (either completed or asked more questions)
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  Final status:', dbAgent?.status);
+
+        // Agent should not still be running
+        expect(['idle', 'waiting_for_input', 'crashed']).toContain(dbAgent?.status);
+
+        // If idle, verify result
+        if (dbAgent?.status === 'idle') {
+          const result = await harness.agentManager.getResult(agent.id);
+          console.log('  Result:', result?.message);
+          expect(result).toBeTruthy();
+        }
+      },
+      REAL_TEST_TIMEOUT * 2 // Double timeout for two-step process
+    );
+
+    it(
+      'maintains session continuity across resume',
+      async () => {
+        // 1. Spawn agent that asks questions
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.questions,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // 2. Wait for waiting_for_input
+        const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
+        expect(questions?.questions.length).toBeGreaterThan(0);
+
+        const sessionIdBefore = (await harness.agentRepository.findById(agent.id))?.sessionId;
+        expect(sessionIdBefore).toBeTruthy();
+
+        // 3. Resume with answer
+        const answers: Record<string, string> = {};
+        for (const q of questions?.questions ?? []) {
+          answers[q.id] = `Answer to ${q.id}`;
+        }
+
+        await harness.agentManager.resume(agent.id, answers);
+
+        // 4. Wait for completion
+        await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify session ID exists (may be same or new depending on CLI behavior)
+        const sessionIdAfter = (await harness.agentRepository.findById(agent.id))?.sessionId;
+        expect(sessionIdAfter).toBeTruthy();
+
+        console.log('  Session ID before:', sessionIdBefore);
+        console.log('  Session ID after:', sessionIdAfter);
+      },
+      REAL_TEST_TIMEOUT * 2
+    );
+  });
+
+  describe('Error Handling', () => {
+    it(
+      'handles error status',
+      async () => {
+        // Spawn agent with error prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.error,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for completion (will be crashed)
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify error was handled
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.status).toBe('crashed');
+        expect(result?.success).toBe(false);
+        expect(result?.message).toContain('Test error');
+
+        console.log('  Error message:', result?.message);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+});
--- a/apps/server/test/integration/real-providers/codex-manager.test.ts
+++ b/apps/server/test/integration/real-providers/codex-manager.test.ts
@@ -0,0 +1,172 @@
+/**
+ * Real Codex CLI Manager Integration Tests
+ *
+ * IMPORTANT: These tests call the REAL Codex CLI and incur API costs!
+ * They are SKIPPED by default and should only be run manually for validation.
+ *
+ * To run these tests:
+ * ```bash
+ * REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts --test-timeout=300000
+ * ```
+ *
+ * Tests covered:
+ * - Codex spawn and thread_id extraction
+ * - Generic output parsing (non-schema)
+ * - Streaming output
+ *
+ * Estimated cost: ~$0.10 per full run
+ *
+ * Note: Codex uses different output format and session ID field (thread_id).
+ */
+
+import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
+import {
+  createRealProviderHarness,
+  describeRealCodex,
+  REAL_TEST_TIMEOUT,
+  type RealProviderHarness,
+} from './harness.js';
+import { CODEX_PROMPTS } from './prompts.js';
+import type { AgentSpawnedEvent, AgentOutputEvent } from '../../../events/types.js';
+
+describeRealCodex('Real Codex Manager Integration', () => {
+  let harness: RealProviderHarness;
+
+  beforeAll(async () => {
+    console.log('\n=== Running Real Codex Manager Tests ===');
+    console.log('These tests call the real Codex API and incur costs.\n');
+    harness = await createRealProviderHarness({ provider: 'codex' });
+  });
+
+  afterAll(async () => {
+    await harness.cleanup();
+  });
+
+  beforeEach(() => {
+    harness.clearEvents();
+  });
+
+  describe('Codex Spawn', () => {
+    it(
+      'spawns codex agent and extracts thread_id',
+      async () => {
+        // Spawn agent with simple task
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: CODEX_PROMPTS.done,
+          mode: 'execute',
+          provider: 'codex',
+        });
+
+        expect(agent.id).toBeTruthy();
+        expect(agent.provider).toBe('codex');
+        expect(agent.status).toBe('running');
+
+        // Verify spawned event
+        const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
+        expect(spawnedEvents.length).toBe(1);
+        expect(spawnedEvents[0].payload.provider).toBe('codex');
+
+        // Wait for completion
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify session ID (thread_id) was extracted
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  Thread ID:', dbAgent?.sessionId);
+        console.log('  Status:', dbAgent?.status);
+        console.log('  Result:', result?.message);
+
+        // Codex should complete or crash
+        expect(['idle', 'crashed']).toContain(dbAgent?.status);
+
+        // If completed successfully, should have extracted thread_id
+        if (dbAgent?.status === 'idle' && dbAgent?.sessionId) {
+          expect(dbAgent.sessionId).toBeTruthy();
+        }
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'uses generic parser for output',
+      async () => {
+        // Spawn agent with streaming prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: CODEX_PROMPTS.streaming,
+          mode: 'execute',
+          provider: 'codex',
+        });
+
+        // Wait for completion
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify output events were captured
+        const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
+        console.log('  Output events:', outputEvents.length);
+
+        // For generic provider, result should be captured
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  Status:', dbAgent?.status);
+        console.log('  Result:', result?.message?.substring(0, 100) + '...');
+
+        expect(['idle', 'crashed']).toContain(dbAgent?.status);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+
+  describe('Codex Provider Config', () => {
+    it(
+      'uses correct command and args for codex',
+      async () => {
+        // This is more of a config verification test
+        // The actual command execution is validated by the spawn test
+
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: 'Say hello',
+          mode: 'execute',
+          provider: 'codex',
+        });
+
+        // Verify agent was created with codex provider
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.provider).toBe('codex');
+
+        // Wait for completion (or timeout)
+        try {
+          await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+        } catch {
+          // Codex might fail if not installed, that's OK for config test
+        }
+
+        const finalAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  Provider:', finalAgent?.provider);
+        console.log('  Status:', finalAgent?.status);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+});
+
+/**
+ * Codex-specific observations from testing:
+ *
+ * 1. Output Format:
+ *    - Codex uses JSONL streaming with different event types
+ *    - thread.started event contains thread_id
+ *    - Output parsing is more generic (not JSON schema validated)
+ *
+ * 2. Command Structure:
+ *    - codex exec --full-auto --json -p "prompt"
+ *    - resume: codex exec resume <thread_id>
+ *
+ * 3. Session ID:
+ *    - Called "thread_id" in Codex
+ *    - Extracted from thread.started event
+ *
+ * 4. Resume:
+ *    - Uses subcommand style: codex exec resume <thread_id>
+ *    - Different from Claude's flag style: claude --resume <session_id>
+ */
--- a/apps/server/test/integration/real-providers/conversation.test.ts
+++ b/apps/server/test/integration/real-providers/conversation.test.ts
@@ -0,0 +1,540 @@
+/**
+ * Real Claude Inter-Agent Conversation Integration Tests
+ *
+ * IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
+ * They are SKIPPED by default and should only be run manually for validation.
+ *
+ * To run:
+ * ```bash
+ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
+ * ```
+ *
+ * Architecture:
+ * - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer)
+ * - In-memory ConversationRepository (no SQLite, no FK constraints)
+ * - Real agent harness for spawning two Claude sessions with actual coding tasks
+ * - Two sequential questions prove the listen→answer→re-listen cycle works
+ *
+ * Estimated cost: ~$0.30 per full run (two Claude sessions)
+ */
+
+import { it, expect, beforeAll, afterAll } from 'vitest';
+import { createServer } from 'node:http';
+import type { Server } from 'node:http';
+import { readFileSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { nanoid } from 'nanoid';
+import { fetchRequestHandler } from '@trpc/server/adapters/fetch';
+import { router, publicProcedure } from '../../../trpc/trpc.js';
+import { conversationProcedures } from '../../../trpc/routers/conversation.js';
+import { EventEmitterBus } from '../../../events/bus.js';
+import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js';
+import type { Conversation } from '../../../db/schema.js';
+import {
+  createRealProviderHarness,
+  describeRealClaude,
+  sleep,
+  type RealProviderHarness,
+} from './harness.js';
+
+const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation
+
+// ---------------------------------------------------------------------------
+// In-memory ConversationRepository — no SQLite, no FK constraints
+// ---------------------------------------------------------------------------
+
+class InMemoryConversationRepository implements ConversationRepository {
+  private store = new Map<string, Conversation>();
+
+  async create(data: CreateConversationData): Promise<Conversation> {
+    const now = new Date();
+    const conversation: Conversation = {
+      id: nanoid(),
+      fromAgentId: data.fromAgentId,
+      toAgentId: data.toAgentId,
+      initiativeId: data.initiativeId ?? null,
+      phaseId: data.phaseId ?? null,
+      taskId: data.taskId ?? null,
+      question: data.question,
+      answer: null,
+      status: 'pending',
+      createdAt: now,
+      updatedAt: now,
+    };
+    this.store.set(conversation.id, conversation);
+    return conversation;
+  }
+
+  async findById(id: string): Promise<Conversation | null> {
+    return this.store.get(id) ?? null;
+  }
+
+  async findPendingForAgent(toAgentId: string): Promise<Conversation[]> {
+    return [...this.store.values()]
+      .filter((c) => c.toAgentId === toAgentId && c.status === 'pending')
+      .sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
+  }
+
+  async answer(id: string, answer: string): Promise<Conversation | null> {
+    const conv = this.store.get(id);
+    if (!conv) return null;
+    const updated: Conversation = {
+      ...conv,
+      answer,
+      status: 'answered' as const,
+      updatedAt: new Date(),
+    };
+    this.store.set(id, updated);
+    return updated;
+  }
+
+  /** Test helper — return all conversations */
+  getAll(): Conversation[] {
+    return [...this.store.values()];
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Mock conversation server — serves ONLY conversation tRPC procedures
+// ---------------------------------------------------------------------------
+
+async function startMockConversationServer(): Promise<{
+  server: Server;
+  port: number;
+  repo: InMemoryConversationRepository;
+}> {
+  const repo = new InMemoryConversationRepository();
+  const eventBus = new EventEmitterBus();
+
+  // Mini router with only conversation procedures
+  const miniRouter = router({
+    ...conversationProcedures(publicProcedure),
+  });
+
+  const httpServer = createServer(async (req, res) => {
+    if (!req.url?.startsWith('/trpc')) {
+      res.writeHead(404);
+      res.end('Not found');
+      return;
+    }
+
+    const host = req.headers.host ?? 'localhost';
+    const url = new URL(req.url, `http://${host}`);
+
+    let body: string | undefined;
+    if (req.method !== 'GET' && req.method !== 'HEAD') {
+      body = await new Promise<string>((resolve) => {
+        let data = '';
+        req.on('data', (chunk: Buffer) => {
+          data += chunk.toString();
+        });
+        req.on('end', () => resolve(data));
+      });
+    }
+
+    const headers = new Headers();
+    for (const [key, value] of Object.entries(req.headers)) {
+      if (value) {
+        if (Array.isArray(value)) {
+          value.forEach((v) => headers.append(key, v));
+        } else {
+          headers.set(key, value);
+        }
+      }
+    }
+
+    const fetchRequest = new Request(url.toString(), {
+      method: req.method,
+      headers,
+      body: body ?? undefined,
+    });
+
+    const fetchResponse = await fetchRequestHandler({
+      endpoint: '/trpc',
+      req: fetchRequest,
+      router: miniRouter,
+      createContext: () =>
+        ({
+          eventBus,
+          serverStartedAt: new Date(),
+          processCount: 0,
+          conversationRepository: repo,
+          // Stub — requireAgentManager is called unconditionally in createConversation,
+          // but list() is only invoked for taskId/phaseId resolution. With --agent-id
+          // targeting, list() is never called.
+          agentManager: { list: async () => [] },
+        }) as any,
+    });
+
+    res.statusCode = fetchResponse.status;
+    fetchResponse.headers.forEach((value, key) => {
+      res.setHeader(key, value);
+    });
+
+    if (fetchResponse.body) {
+      const reader = fetchResponse.body.getReader();
+      const pump = async () => {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) {
+            res.end();
+            return;
+          }
+          res.write(value);
+        }
+      };
+      pump().catch(() => res.end());
+    } else {
+      res.end(await fetchResponse.text());
+    }
+  });
+
+  const port = 40000 + Math.floor(Math.random() * 10000);
+  await new Promise<void>((resolve) => {
+    httpServer.listen(port, '127.0.0.1', () => resolve());
+  });
+
+  return { server: httpServer, port, repo };
+}
+
+// ---------------------------------------------------------------------------
+// Diagnostic helpers
+// ---------------------------------------------------------------------------
+
+function dumpAgentLogs(workspaceRoot: string, agentName: string) {
+  const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName);
+  if (!existsSync(logDir)) {
+    console.log(`  [${agentName}] No log directory at ${logDir}`);
+    return;
+  }
+  // Dump output.jsonl (last 30 lines)
+  const outputPath = join(logDir, 'output.jsonl');
+  if (existsSync(outputPath)) {
+    const lines = readFileSync(outputPath, 'utf-8').trim().split('\n');
+    const last = lines.slice(-30);
+    console.log(`  [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`);
+    for (const line of last) {
+      try {
+        const ev = JSON.parse(line);
+        if (ev.type === 'assistant' && ev.message?.content) {
+          for (const block of ev.message.content) {
+            if (block.type === 'text') {
+              console.log(`    TEXT: ${block.text.substring(0, 200)}`);
+            } else if (block.type === 'tool_use') {
+              console.log(`    TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`);
+            }
+          }
+        } else if (ev.type === 'result') {
+          console.log(`    RESULT: ${JSON.stringify(ev).substring(0, 300)}`);
+        }
+      } catch {
+        console.log(`    RAW: ${line.substring(0, 200)}`);
+      }
+    }
+  }
+  // Dump stderr
+  const stderrPath = join(logDir, 'stderr.log');
+  if (existsSync(stderrPath)) {
+    const stderr = readFileSync(stderrPath, 'utf-8').trim();
+    if (stderr) {
+      console.log(`  [${agentName}] stderr: ${stderr.substring(0, 500)}`);
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Test suite
+// ---------------------------------------------------------------------------
+
+describeRealClaude('Real Inter-Agent Conversation (mock server)', () => {
+  let harness: RealProviderHarness;
+  let mockServer: Server;
+  let mockPort: number;
+  let mockRepo: InMemoryConversationRepository;
+  const originalCwPort = process.env.CW_PORT;
+
+  beforeAll(async () => {
+    console.log('\n=== Real Inter-Agent Conversation Test ===');
+    console.log('Mock conversation server + two Claude sessions.\n');
+
+    // Start mock conversation server (only listen/ask/answer endpoints)
+    const mock = await startMockConversationServer();
+    mockServer = mock.server;
+    mockPort = mock.port;
+    mockRepo = mock.repo;
+    console.log(`  Mock server on port ${mockPort}`);
+
+    // Set CW_PORT so agents' cw commands hit the mock server
+    process.env.CW_PORT = String(mockPort);
+
+    // Real agent harness for spawning + worktrees (no full CoordinationServer)
+    harness = await createRealProviderHarness({ provider: 'claude' });
+    console.log(`  Workspace: ${harness.workspaceRoot}`);
+  });
+
+  afterAll(async () => {
+    if (originalCwPort) {
+      process.env.CW_PORT = originalCwPort;
+    } else {
+      delete process.env.CW_PORT;
+    }
+    await harness?.cleanup();
+    mockServer?.close();
+  });
+
+  it(
+    'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)',
+    async () => {
+      const agentSuffix = nanoid(6); // unique suffix for temp files
+
+      // ---------------------------------------------------------------
+      // Agent A — builds a validator module WHILE answering questions
+      // in the background via cw listen
+      // ---------------------------------------------------------------
+      const agentA = await harness.agentManager.spawn({
+        taskId: null,
+        prompt: `You are Agent A in a multi-agent coordination test.
+
+You have TWO concurrent responsibilities:
+1. Build a TypeScript validator module (your main coding task)
+2. Answer questions from other agents via a background listener
+
+SETUP (do this first):
+- Read .cw/input/manifest.json to get your agentId
+- Start a background listener that writes to a temp file:
+    cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
+    LISTEN_PID=$!
+
+MAIN CODING TASK — implement a user registration validator:
+
+1. Create types.ts:
+   export interface RegistrationInput { name: string; email: string; password: string; }
+   export interface ValidationResult { valid: boolean; errors: string[]; }
+
+2. Create validator.ts:
+   Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult
+   Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars.
+
+3. Create index.ts that re-exports everything from types.ts and validator.ts.
+
+BETWEEN EACH FILE, check for incoming questions:
+   if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then
+     # parse the JSON, get conversationId and question
+     # answer: cw answer "<answer based on your code>" --conversation-id <id>
+     # clear and restart listener:
+     > /tmp/cw-listen-${agentSuffix}.txt
+     cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
+     LISTEN_PID=$!
+   fi
+
+You will receive TWO questions total while you work. Answer them based on the code you are writing.
+
+CLEANUP: After all 3 files are written and both questions answered:
+- kill $LISTEN_PID 2>/dev/null
+- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"}
+
+CRITICAL:
+- The listener MUST run in the background while you write code.
+- Check for questions between files, not as blocking waits.
+- The CW_PORT environment variable is already set to ${mockPort}.`,
+        mode: 'execute',
+        provider: 'claude',
+        inputContext: {},
+      });
+
+      console.log(`  Agent A: ${agentA.id} (${agentA.name})`);
+
+      // Give Agent A time to start its background listener and begin coding
+      await sleep(15000);
+
+      // ---------------------------------------------------------------
+      // Agent B — builds a client module, asks Agent A questions to
+      // learn the validation rules, then uses answers in its code
+      // ---------------------------------------------------------------
+      const agentB = await harness.agentManager.spawn({
+        taskId: null,
+        prompt: `You are Agent B in a multi-agent coordination test.
+
+Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module.
+
+YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules:
+
+1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>.
+   Leave a TODO comment where validation will go.
+
+2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks:
+   FIELDS=$(cw ask "What are the required fields and their types for registration?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
+
+3. Ask Agent A about the specific email validation rules:
+   EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
+
+4. Create validated-client.ts — a COMPLETE implementation using the answers:
+   Import the scaffold, add a validateBeforeSubmit(name, email, password) function
+   that implements the EXACT validation rules Agent A told you about.
+   Include a comment at the top with the rules you received.
+
+5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"}
+
+CRITICAL:
+- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first).
+- Use the ACTUAL answers from Agent A in your validated-client.ts implementation.
+- The CW_PORT environment variable is already set to ${mockPort}.`,
+        mode: 'execute',
+        provider: 'claude',
+        inputContext: {},
+      });
+
+      console.log(`  Agent B: ${agentB.id} (${agentB.name})`);
+
+      // ---------------------------------------------------------------
+      // Wait for both agents to stop running, then verify conversations
+      // ---------------------------------------------------------------
+      const deadline = Date.now() + TEST_TIMEOUT;
+      let aDone = false;
+      let bDone = false;
+      let lastLogTime = 0;
+
+      while (Date.now() < deadline && (!aDone || !bDone)) {
+        const agentAInfo = await harness.agentRepository.findById(agentA.id);
+        const agentBInfo = await harness.agentRepository.findById(agentB.id);
+
+        // Periodic progress logging every 30s
+        if (Date.now() - lastLogTime > 30000) {
+          const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000);
+          console.log(`  [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`);
+          lastLogTime = Date.now();
+        }
+
+        if (agentAInfo && agentAInfo.status !== 'running' && !aDone) {
+          aDone = true;
+          console.log(`  Agent A final status: ${agentAInfo.status}`);
+          dumpAgentLogs(harness.workspaceRoot, agentA.name);
+        }
+        if (agentBInfo && agentBInfo.status !== 'running' && !bDone) {
+          bDone = true;
+          console.log(`  Agent B final status: ${agentBInfo.status}`);
+          dumpAgentLogs(harness.workspaceRoot, agentB.name);
+        }
+
+        if (!aDone || !bDone) await sleep(2000);
+      }
+
+      expect(aDone).toBe(true);
+      expect(bDone).toBe(true);
+
+      // ---------------------------------------------------------------
+      // Verify conversations in mock repo
+      // ---------------------------------------------------------------
+      const allConversations = mockRepo.getAll();
+      console.log(`  Total conversations: ${allConversations.length}`);
+      for (const c of allConversations) {
+        console.log(
+          `    ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`,
+        );
+      }
+
+      // Exactly 2 conversations, both answered
+      expect(allConversations.length).toBe(2);
+      expect(allConversations.every((c) => c.status === 'answered')).toBe(true);
+
+      // Both target Agent A, both from Agent B
+      expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true);
+      expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true);
+
+      // Questions should be distinct (one about fields, one about email validation)
+      const questions = allConversations.map((c) => c.question);
+      expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true);
+      expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true);
+
+      // Both answers should be non-empty
+      expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true);
+
+      // ---------------------------------------------------------------
+      // Verify Agent A's coding output — validator module files exist
+      // ---------------------------------------------------------------
+      const aWorkdir = join(
+        harness.workspaceRoot,
+        'agent-workdirs',
+        agentA.name,
+        'workspace',
+      );
+      const aFiles = ['types.ts', 'validator.ts', 'index.ts'];
+      for (const f of aFiles) {
+        const filePath = join(aWorkdir, f);
+        const exists = existsSync(filePath);
+        console.log(`  Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
+        expect(exists).toBe(true);
+      }
+      // validator.ts should contain actual validation logic
+      const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8');
+      console.log(`  Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`);
+      expect(validatorContent.toLowerCase()).toContain('email');
+      expect(validatorContent.toLowerCase()).toContain('password');
+
+      // ---------------------------------------------------------------
+      // Verify Agent B's coding output — client module files exist
+      // ---------------------------------------------------------------
+      const bWorkdir = join(
+        harness.workspaceRoot,
+        'agent-workdirs',
+        agentB.name,
+        'workspace',
+      );
+      const bFiles = ['client-scaffold.ts', 'validated-client.ts'];
+      for (const f of bFiles) {
+        const filePath = join(bWorkdir, f);
+        const exists = existsSync(filePath);
+        console.log(`  Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
+        expect(exists).toBe(true);
+      }
+      // validated-client.ts should reference validation rules from Agent A's answers
+      const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8');
+      console.log(`  Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`);
+      expect(clientContent.toLowerCase()).toContain('email');
+
+      // ---------------------------------------------------------------
+      // Verify interleaving: Agent A's JSONL log has coding tool calls
+      // (Write for .ts files) interleaved with conversation tool calls
+      // (Bash for cw listen/answer)
+      // ---------------------------------------------------------------
+      const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl');
+      const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n');
+      const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = [];
+
+      for (const line of aLog) {
+        try {
+          const ev = JSON.parse(line);
+          if (ev.type !== 'assistant' || !ev.message?.content) continue;
+          for (const block of ev.message.content) {
+            if (block.type !== 'tool_use') continue;
+            const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input);
+            if (block.name === 'Write' && input.includes('.ts')) {
+              toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) });
+            } else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) {
+              toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) });
+            }
+          }
+        } catch { /* skip non-JSON lines */ }
+      }
+
+      console.log(`  Agent A interleaving (${toolCalls.length} relevant tool calls):`);
+      for (const tc of toolCalls) {
+        console.log(`    [${tc.type}] ${tc.name}: ${tc.detail}`);
+      }
+
+      // Must have both code and conversation tool calls
+      const hasCode = toolCalls.some((tc) => tc.type === 'code');
+      const hasConversation = toolCalls.some((tc) => tc.type === 'conversation');
+      expect(hasCode).toBe(true);
+      expect(hasConversation).toBe(true);
+
+      // Verify interleaving: at least one code call must appear AFTER a conversation call
+      // (proving coding continued after handling a question)
+      const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation');
+      const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code');
+      console.log(`  First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`);
+      expect(lastCodeIdx).toBeGreaterThan(firstConvIdx);
+    },
+    TEST_TIMEOUT,
+  );
+});
--- a/apps/server/test/integration/real-providers/crash-recovery.test.ts
+++ b/apps/server/test/integration/real-providers/crash-recovery.test.ts
@@ -0,0 +1,265 @@
+/**
+ * Crash Recovery Integration Tests
+ *
+ * IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
+ * They are SKIPPED by default and should only be run manually for validation.
+ *
+ * To run these tests:
+ * ```bash
+ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/crash-recovery.test.ts --test-timeout=300000
+ * ```
+ *
+ * Tests covered:
+ * - Server restart while agent is running
+ * - Resuming streaming after restart
+ * - Marking dead agents as crashed
+ * - Output file processing after restart
+ *
+ * Estimated cost: ~$0.08 per full run
+ */
+
+import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
+import {
+  createRealProviderHarness,
+  describeRealClaude,
+  REAL_TEST_TIMEOUT,
+  EXTENDED_TEST_TIMEOUT,
+  sleep,
+  type RealProviderHarness,
+} from './harness.js';
+import { MINIMAL_PROMPTS } from './prompts.js';
+import { MultiProviderAgentManager } from '../../../agent/manager.js';
+
+describeRealClaude('Crash Recovery', () => {
+  let harness: RealProviderHarness;
+
+  beforeAll(async () => {
+    console.log('\n=== Running Crash Recovery Tests ===');
+    console.log('These tests call the real Claude API and incur costs.\n');
+    harness = await createRealProviderHarness({ provider: 'claude' });
+  });
+
+  afterAll(async () => {
+    await harness.cleanup();
+  });
+
+  beforeEach(() => {
+    harness.clearEvents();
+  });
+
+  describe('Server Restart Simulation', () => {
+    it(
+      'resumes streaming for still-running agent after restart',
+      async () => {
+        // 1. Spawn agent with slow task
+        console.log('  1. Spawning agent with slow task...');
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.slow,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // 2. Wait for agent to be running
+        await harness.waitForAgentStatus(agent.id, 'running', 10000);
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.pid).toBeTruthy();
+        expect(dbAgent?.outputFilePath).toBeTruthy();
+        console.log('  2. Agent running with PID:', dbAgent?.pid);
+
+        // 3. Give the agent a moment to start writing output
+        await sleep(2000);
+
+        // 4. Simulate server crash - create NEW manager (old state lost)
+        console.log('  3. Simulating server restart with new manager...');
+        harness.clearEvents(); // Clear events from old manager
+
+        const newManager = new MultiProviderAgentManager(
+          harness.agentRepository,
+          harness.workspaceRoot,
+          harness.projectRepository,
+          harness.accountRepository,
+          harness.eventBus
+        );
+
+        // 5. Reconcile - should pick up running agent
+        console.log('  4. Reconciling agent state...');
+        await newManager.reconcileAfterRestart();
+
+        // 6. Wait for completion via new manager
+        console.log('  5. Waiting for completion via new manager...');
+        let attempts = 0;
+        let finalStatus = 'running';
+        while (attempts < 60) {
+          const refreshed = await harness.agentRepository.findById(agent.id);
+          if (refreshed?.status !== 'running') {
+            finalStatus = refreshed?.status ?? 'unknown';
+            break;
+          }
+          await sleep(2000);
+          attempts++;
+        }
+
+        const finalAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  6. Final status:', finalAgent?.status);
+
+        // Either completed successfully or crashed (both are valid outcomes)
+        expect(['idle', 'crashed', 'stopped']).toContain(finalAgent?.status);
+
+        if (finalAgent?.status === 'idle') {
+          const result = await newManager.getResult(agent.id);
+          console.log('  Result:', result?.message);
+        }
+      },
+      EXTENDED_TEST_TIMEOUT
+    );
+
+    it(
+      'marks dead agent as crashed during reconcile',
+      async () => {
+        // 1. Create a fake agent record with a dead PID
+        console.log('  1. Creating fake agent with dead PID...');
+        const fakeAgent = await harness.agentRepository.create({
+          name: 'dead-agent-test',
+          taskId: null,
+          initiativeId: null,
+          sessionId: null,
+          worktreeId: 'dead-worktree',
+          status: 'running',
+          mode: 'execute',
+          provider: 'claude',
+          accountId: null,
+        });
+
+        // Set a PID that's definitely dead (high number that won't exist)
+        await harness.agentRepository.update(fakeAgent.id, { pid: 999999, outputFilePath: '/nonexistent/path' });
+
+        // Verify it's marked as running
+        let agent = await harness.agentRepository.findById(fakeAgent.id);
+        expect(agent?.status).toBe('running');
+        expect(agent?.pid).toBe(999999);
+
+        // 2. Create new manager and reconcile
+        console.log('  2. Creating new manager and reconciling...');
+        const newManager = new MultiProviderAgentManager(
+          harness.agentRepository,
+          harness.workspaceRoot,
+          harness.projectRepository,
+          harness.accountRepository,
+          harness.eventBus
+        );
+
+        await newManager.reconcileAfterRestart();
+
+        // 3. Verify agent is now crashed
+        agent = await harness.agentRepository.findById(fakeAgent.id);
+        expect(agent?.status).toBe('crashed');
+        console.log('  3. Agent marked as crashed (dead PID detected)');
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'processes output file for dead agent during reconcile',
+      async () => {
+        // 1. Spawn agent and wait for completion
+        console.log('  1. Spawning agent to completion...');
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.done,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        const outputFilePath = dbAgent?.outputFilePath;
+        expect(outputFilePath).toBeTruthy();
+        console.log('  2. Output file:', outputFilePath);
+
+        // 2. Reset agent to "running" to simulate mid-crash state
+        await harness.agentRepository.update(agent.id, { status: 'running' });
+        // Clear result so reconcile has to re-process
+        await harness.agentRepository.update(agent.id, { result: null });
+
+        // Verify reset
+        let resetAgent = await harness.agentRepository.findById(agent.id);
+        expect(resetAgent?.status).toBe('running');
+
+        // 3. Create new manager and reconcile
+        console.log('  3. Creating new manager and reconciling...');
+        harness.clearEvents();
+
+        const newManager = new MultiProviderAgentManager(
+          harness.agentRepository,
+          harness.workspaceRoot,
+          harness.projectRepository,
+          harness.accountRepository,
+          harness.eventBus
+        );
+
+        await newManager.reconcileAfterRestart();
+
+        // Give it a moment to process the file
+        await sleep(1000);
+
+        // 4. Verify agent was processed from output file
+        const finalAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  4. Final status:', finalAgent?.status);
+
+        // Should either be idle (processed successfully) or crashed (couldn't process)
+        expect(['idle', 'crashed']).toContain(finalAgent?.status);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+
+  describe('Event Consistency', () => {
+    it(
+      'does not duplicate events on restart',
+      async () => {
+        // 1. Spawn agent with slow task
+        console.log('  1. Spawning agent...');
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.streaming,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // 2. Wait for some output events
+        await sleep(3000);
+        const initialOutputCount = harness.getEventsByType('agent:output').length;
+        console.log('  2. Initial output events:', initialOutputCount);
+
+        // 3. Wait for completion
+        await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+        const finalOutputCount = harness.getEventsByType('agent:output').length;
+        console.log('  3. Final output events:', finalOutputCount);
+
+        // 4. Create new manager and reconcile (agent already complete)
+        harness.clearEvents();
+
+        const newManager = new MultiProviderAgentManager(
+          harness.agentRepository,
+          harness.workspaceRoot,
+          harness.projectRepository,
+          harness.accountRepository,
+          harness.eventBus
+        );
+
+        await newManager.reconcileAfterRestart();
+        await sleep(1000);
+
+        // 5. Verify no new output events (agent was already complete)
+        const postReconcileOutputCount = harness.getEventsByType('agent:output').length;
+        console.log('  4. Post-reconcile output events:', postReconcileOutputCount);
+
+        // Should not have re-emitted all the old output events
+        expect(postReconcileOutputCount).toBe(0);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+});
--- a/apps/server/test/integration/real-providers/harness.ts
+++ b/apps/server/test/integration/real-providers/harness.ts
@@ -0,0 +1,378 @@
+/**
+ * Real Provider Test Harness
+ *
+ * Extends the existing test infrastructure to use REAL MultiProviderAgentManager
+ * for integration testing with actual CLI providers like Claude and Codex.
+ *
+ * Unlike the standard TestHarness which uses MockAgentManager, this harness:
+ * - Uses real CLI spawning (costs real API credits!)
+ * - Provides poll-based waiting helpers
+ * - Captures events for inspection
+ * - Manages temp directories for worktrees
+ */
+
+import { mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { describe } from 'vitest';
+import type { DrizzleDatabase } from '../../../db/index.js';
+import type { DomainEvent, EventBus } from '../../../events/types.js';
+import { EventEmitterBus } from '../../../events/bus.js';
+import { MultiProviderAgentManager } from '../../../agent/manager.js';
+import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
+import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
+import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
+import type { AccountRepository } from '../../../db/repositories/account-repository.js';
+import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
+import {
+  DrizzleAgentRepository,
+  DrizzleProjectRepository,
+  DrizzleAccountRepository,
+  DrizzleInitiativeRepository,
+} from '../../../db/repositories/drizzle/index.js';
+import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
+
+/**
+ * Sleep helper for polling loops.
+ */
+export function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+/**
+ * Event bus that captures all emitted events for inspection.
+ */
+export class CapturingEventBus extends EventEmitterBus {
+  emittedEvents: DomainEvent[] = [];
+
+  emit<T extends DomainEvent>(event: T): void {
+    this.emittedEvents.push(event);
+    super.emit(event);
+  }
+
+  getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
+    return this.emittedEvents.filter((e) => e.type === type) as T[];
+  }
+
+  clearEvents(): void {
+    this.emittedEvents = [];
+  }
+}
+
+/**
+ * Options for creating a real provider test harness.
+ */
+export interface RealProviderHarnessOptions {
+  /** Which provider to test (default: 'claude') */
+  provider?: 'claude' | 'codex';
+  /** Optional workspace root (temp dir created if omitted) */
+  workspaceRoot?: string;
+}
+
+/**
+ * Real Provider Test Harness interface.
+ *
+ * Provides everything needed to test against real CLI providers:
+ * - In-memory database with real repositories
+ * - Real MultiProviderAgentManager (spawns actual CLI processes)
+ * - Event capture for verification
+ * - Polling-based wait helpers
+ */
+export interface RealProviderHarness {
+  /** In-memory SQLite database */
+  db: DrizzleDatabase;
+  /** Event bus with capture capability */
+  eventBus: CapturingEventBus;
+  /** Real agent manager (not mock!) */
+  agentManager: MultiProviderAgentManager;
+  /** Workspace root directory */
+  workspaceRoot: string;
+
+  /** Agent repository */
+  agentRepository: AgentRepository;
+  /** Project repository */
+  projectRepository: ProjectRepository;
+  /** Account repository */
+  accountRepository: AccountRepository;
+  /** Initiative repository */
+  initiativeRepository: InitiativeRepository;
+
+  /**
+   * Wait for an agent to reach idle or crashed status.
+   * Polls the database at regular intervals.
+   *
+   * @param agentId - The agent ID to wait for
+   * @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
+   * @returns The agent result if completed, or null if crashed/timeout
+   */
+  waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
+
+  /**
+   * Wait for an agent to enter waiting_for_input status.
+   * Polls the database at regular intervals.
+   *
+   * @param agentId - The agent ID to wait for
+   * @param timeoutMs - Maximum time to wait (default 120000ms)
+   * @returns The pending questions if waiting, or null if timeout/other status
+   */
+  waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;
+
+  /**
+   * Wait for an agent to reach a specific status.
+   *
+   * @param agentId - The agent ID to wait for
+   * @param status - The target status
+   * @param timeoutMs - Maximum time to wait (default 120000ms)
+   */
+  waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;
+
+  /**
+   * Get captured events filtered by type.
+   */
+  getEventsByType<T extends DomainEvent>(type: T['type']): T[];
+
+  /**
+   * Clear all captured events.
+   */
+  clearEvents(): void;
+
+  /**
+   * Kill all running agents (for cleanup).
+   */
+  killAllAgents(): Promise<void>;
+
+  /**
+   * Clean up all resources (directories, processes).
+   * Call this in afterAll/afterEach.
+   */
+  cleanup(): Promise<void>;
+}
+
+/** Default poll interval for status checks */
+const POLL_INTERVAL_MS = 1000;
+
+/**
+ * Create a test harness for real provider integration tests.
+ *
+ * This creates:
+ * - In-memory SQLite database
+ * - Temp directory for worktrees (or uses provided workspace)
+ * - Real MultiProviderAgentManager
+ * - Event capture bus
+ *
+ * @example
+ * ```typescript
+ * let harness: RealProviderHarness;
+ *
+ * beforeAll(async () => {
+ *   harness = await createRealProviderHarness({ provider: 'claude' });
+ * });
+ *
+ * afterAll(async () => {
+ *   await harness.cleanup();
+ * });
+ *
+ * it('spawns and completes', async () => {
+ *   const agent = await harness.agentManager.spawn({...});
+ *   const result = await harness.waitForAgentCompletion(agent.id);
+ *   expect(result?.success).toBe(true);
+ * });
+ * ```
+ */
+export async function createRealProviderHarness(
+  options: RealProviderHarnessOptions = {}
+): Promise<RealProviderHarness> {
+  // Create workspace directory (temp if not provided)
+  const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
+  const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up
+
+  // Initialize git repo in temp workspace (required for worktree operations)
+  if (ownedWorkspace) {
+    const { execSync } = await import('node:child_process');
+    execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
+    execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
+    execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
+    // Create initial commit (worktrees require at least one commit)
+    execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
+  }
+
+  // Create in-memory database
+  const db = createTestDatabase();
+
+  // Create repositories
+  const agentRepository = new DrizzleAgentRepository(db);
+  const projectRepository = new DrizzleProjectRepository(db);
+  const accountRepository = new DrizzleAccountRepository(db);
+  const initiativeRepository = new DrizzleInitiativeRepository(db);
+
+  // Create event bus with capture (parent class already sets maxListeners to 100)
+  const eventBus = new CapturingEventBus();
+
+  // Create REAL agent manager (not mock!)
+  const agentManager = new MultiProviderAgentManager(
+    agentRepository,
+    workspaceRoot,
+    projectRepository,
+    accountRepository,
+    eventBus
+  );
+
+  // Build harness
+  const harness: RealProviderHarness = {
+    db,
+    eventBus,
+    agentManager,
+    workspaceRoot,
+    agentRepository,
+    projectRepository,
+    accountRepository,
+    initiativeRepository,
+
+    async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
+      const deadline = Date.now() + timeoutMs;
+
+      while (Date.now() < deadline) {
+        const agent = await agentRepository.findById(agentId);
+        if (!agent) return null;
+
+        if (agent.status === 'idle' || agent.status === 'stopped') {
+          // Agent completed - get result
+          return agentManager.getResult(agentId);
+        }
+
+        if (agent.status === 'crashed') {
+          // Agent crashed - return the error result
+          return agentManager.getResult(agentId);
+        }
+
+        if (agent.status === 'waiting_for_input') {
+          // Agent is waiting - return null (not completed)
+          return null;
+        }
+
+        // Still running - wait and check again
+        await sleep(POLL_INTERVAL_MS);
+      }
+
+      throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
+    },
+
+    async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
+      const deadline = Date.now() + timeoutMs;
+
+      while (Date.now() < deadline) {
+        const agent = await agentRepository.findById(agentId);
+        if (!agent) return null;
+
+        if (agent.status === 'waiting_for_input') {
+          return agentManager.getPendingQuestions(agentId);
+        }
+
+        if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
+          // Agent finished without asking questions
+          return null;
+        }
+
+        // Still running - wait and check again
+        await sleep(POLL_INTERVAL_MS);
+      }
+
+      throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
+    },
+
+    async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
+      const deadline = Date.now() + timeoutMs;
+
+      while (Date.now() < deadline) {
+        const agent = await agentRepository.findById(agentId);
+        if (!agent) {
+          throw new Error(`Agent ${agentId} not found`);
+        }
+
+        if (agent.status === status) {
+          return;
+        }
+
+        // Check for terminal states that mean we'll never reach target
+        if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
+          throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
+        }
+
+        await sleep(POLL_INTERVAL_MS);
+      }
+
+      throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
+    },
+
+    getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
+      return eventBus.getEventsByType<T>(type);
+    },
+
+    clearEvents(): void {
+      eventBus.clearEvents();
+    },
+
+    async killAllAgents(): Promise<void> {
+      const agents = await agentRepository.findAll();
+      for (const agent of agents) {
+        if (agent.status === 'running') {
+          try {
+            await agentManager.stop(agent.id);
+          } catch {
+            // Ignore errors during cleanup
+          }
+        }
+      }
+    },
+
+    async cleanup(): Promise<void> {
+      // Kill any running agents
+      await harness.killAllAgents();
+
+      // Clean up workspace directory if we created it
+      if (ownedWorkspace) {
+        try {
+          await rm(workspaceRoot, { recursive: true, force: true });
+        } catch {
+          // Ignore cleanup errors
+        }
+      }
+    },
+  };
+
+  return harness;
+}
+
+/**
+ * Check if real Claude tests should run.
+ * Set REAL_CLAUDE_TESTS=1 environment variable to enable.
+ */
+export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';
+
+/**
+ * Check if real Codex tests should run.
+ * Set REAL_CODEX_TESTS=1 environment variable to enable.
+ */
+export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';
+
+/**
+ * Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
+ */
+export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);
+
+/**
+ * Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
+ */
+export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);
+
+/**
+ * Default test timeout for real CLI tests (2 minutes).
+ * Real API calls take 5-30 seconds typically.
+ */
+export const REAL_TEST_TIMEOUT = 120000;
+
+/**
+ * Extended test timeout for slow tests (5 minutes).
+ * Used for schema retry tests and crash recovery tests.
+ */
+export const EXTENDED_TEST_TIMEOUT = 300000;
--- a/apps/server/test/integration/real-providers/index.ts
+++ b/apps/server/test/integration/real-providers/index.ts
@@ -0,0 +1,56 @@
+/**
+ * Real Provider Integration Tests
+ *
+ * This module provides infrastructure for testing against real CLI providers.
+ * Tests are expensive (real API calls) and skipped by default.
+ *
+ * ## Running Tests
+ *
+ * ```bash
+ * # Claude tests only
+ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/ --test-timeout=300000
+ *
+ * # Codex tests only
+ * REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts
+ *
+ * # All real provider tests
+ * REAL_CLAUDE_TESTS=1 REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/
+ * ```
+ *
+ * ## Cost Estimates
+ *
+ * | Suite | Tests | Est. Cost | Duration |
+ * |-------|-------|-----------|----------|
+ * | Output Parsing | 3 | $0.06 | ~2 min |
+ * | Schema Validation | 4 | $0.22 | ~4 min |
+ * | Crash Recovery | 3 | $0.08 | ~3 min |
+ * | Session Resume | 2 | $0.08 | ~3 min |
+ * | Codex Integration | 2 | $0.10 | ~2 min |
+ * | **TOTAL** | **14** | **~$0.54** | **~14 min** |
+ *
+ * ## Test Files
+ *
+ * - `harness.ts` - RealProviderHarness factory and utilities
+ * - `prompts.ts` - Minimal cost test prompts
+ * - `claude-manager.test.ts` - Claude spawn/resume/output tests
+ * - `codex-manager.test.ts` - Codex provider tests
+ * - `schema-retry.test.ts` - Schema validation + retry tests
+ * - `crash-recovery.test.ts` - Server restart simulation
+ * - `sample-outputs/` - Captured CLI output for parser unit tests
+ */
+
+export {
+  createRealProviderHarness,
+  CapturingEventBus,
+  sleep,
+  shouldRunRealClaudeTests,
+  shouldRunRealCodexTests,
+  describeRealClaude,
+  describeRealCodex,
+  REAL_TEST_TIMEOUT,
+  EXTENDED_TEST_TIMEOUT,
+  type RealProviderHarness,
+  type RealProviderHarnessOptions,
+} from './harness.js';
+
+export { MINIMAL_PROMPTS, CODEX_PROMPTS } from './prompts.js';
--- a/apps/server/test/integration/real-providers/prompts.ts
+++ b/apps/server/test/integration/real-providers/prompts.ts
@@ -0,0 +1,113 @@
+/**
+ * Minimal Cost Test Prompts
+ *
+ * Carefully crafted prompts designed to minimize token usage while
+ * testing specific CLI behaviors. Each prompt aims for the smallest
+ * possible API cost while still exercising the target functionality.
+ *
+ * Cost estimates assume Claude Sonnet pricing (~$3/M input, $15/M output).
+ */
+
+export const MINIMAL_PROMPTS = {
+  /**
+   * ~$0.01 - Cheapest done response
+   * Tests: basic spawn → completion flow, status parsing
+   */
+  done: `Output exactly this JSON with no other text:
+{"status":"done","result":"ok"}`,
+
+  /**
+   * ~$0.01 - Cheapest questions response
+   * Tests: waiting_for_input status, questions array parsing
+   */
+  questions: `Output exactly this JSON with no other text:
+{"status":"questions","questions":[{"id":"q1","question":"What is your name?"}]}`,
+
+  /**
+   * ~$0.03 - Slow task for timing tests
+   * Tests: streaming during long-running task, crash recovery
+   * Note: Agent may not actually wait 30 seconds, but will produce delayed output
+   */
+  slow: `Think through a simple problem step by step, counting from 1 to 10 slowly, then output:
+{"status":"done","result":"counted to 10"}`,
+
+  /**
+   * ~$0.02 - Produces text deltas for streaming tests
+   * Tests: text_delta event parsing, output buffering
+   */
+  streaming: `Count from 1 to 5, outputting each number, then output:
+{"status":"done","result":"counted"}`,
+
+  /**
+   * ~$0.03 - Deliberately produces non-JSON first
+   * Tests: schema validation failure, retry logic
+   */
+  badThenGood: `First say "thinking..." on its own line, then output:
+{"status":"done","result":"fixed"}`,
+
+  /**
+   * ~$0.02 - Multiple questions
+   * Tests: questions array with multiple items
+   */
+  multipleQuestions: `Output exactly this JSON with no other text:
+{"status":"questions","questions":[{"id":"q1","question":"First question?"},{"id":"q2","question":"Second question?"}]}`,
+
+  /**
+   * ~$0.01 - Error signal
+   * Tests: error status handling
+   */
+  error: `Output exactly this JSON with no other text:
+{"status":"error","error":"Test error message"}`,
+
+  /**
+   * ~$0.02 - Answer continuation
+   * Tests: session resume with answers
+   */
+  answerContinuation: (answers: Record<string, string>): string => {
+    const answerLines = Object.entries(answers)
+      .map(([id, answer]) => `${id}: ${answer}`)
+      .join('\n');
+    return `I received your answers:
+${answerLines}
+
+Now complete the task by outputting:
+{"status":"done","result":"completed with answers"}`;
+  },
+
+  /**
+   * ~$0.02 - Context complete for discuss mode
+   * Tests: discuss mode output handling (now uses universal done signal)
+   */
+  discussComplete: `Output exactly this JSON with no other text:
+{"status":"done"}`,
+
+  /**
+   * ~$0.02 - Plan complete
+   * Tests: plan mode output handling (now uses universal done signal)
+   */
+  planComplete: `Output exactly this JSON with no other text:
+{"status":"done"}`,
+
+  /**
+   * ~$0.02 - Detail complete
+   * Tests: detail mode output handling (now uses universal done signal)
+   */
+  detailComplete: `Output exactly this JSON with no other text:
+{"status":"done"}`,
+} as const;
+
+/**
+ * Prompts specifically for Codex provider testing.
+ * Codex may have different output format requirements.
+ */
+export const CODEX_PROMPTS = {
+  /**
+   * Basic completion for Codex
+   */
+  done: `Complete this simple task: output "done" and finish.`,
+
+  /**
+   * Produces streaming output
+   */
+  streaming: `Count from 1 to 5, saying each number aloud, then say "finished".`,
+} as const;
--- a/apps/server/test/integration/real-providers/sample-outputs/README.md
+++ b/apps/server/test/integration/real-providers/sample-outputs/README.md
@@ -0,0 +1,68 @@
+# Sample CLI Outputs
+
+This directory contains captured real CLI outputs for use in parser unit tests.
+These files allow testing stream parsers without incurring API costs.
+
+## Files
+
+### claude-stream-success.jsonl
+A successful Claude CLI session (v2.1.33) that:
+- Initializes with `system` event containing `session_id`
+- Emits `assistant` message with content
+- Completes with `result` event containing `done` status JSON
+
+### claude-stream-questions.jsonl
+A Claude CLI session that:
+- Initializes with `system` event containing `session_id`
+- Emits `assistant` message with content wrapped in markdown code block
+- Completes with `result` event containing `questions` status JSON
+
+### codex-stream-success.jsonl
+A successful Codex CLI session (v0.98.0) that:
+- Starts with `thread.started` event containing `thread_id`
+- Emits `turn.started`, `item.completed` events
+- Completes with `turn.completed` event containing usage stats
+
+## Event Type Differences
+
+### Claude CLI (`--output-format stream-json`)
+- `system` (subtype: `init`) - Contains `session_id`, tools, model info
+- `assistant` - Contains message content in `content[].text`
+- `result` - Contains final `result` text and `total_cost_usd`
+
+### Codex CLI (`--json`)
+- `thread.started` - Contains `thread_id` (equivalent to session_id)
+- `turn.started` - Marks beginning of turn
+- `item.completed` - Contains reasoning or agent_message items
+- `turn.completed` - Contains usage stats
+
+## Usage
+
+These files can be used to test stream parsers in isolation:
+
+```typescript
+import { readFileSync } from 'fs';
+import { ClaudeStreamParser } from '../../../agent/providers/parsers/claude.js';
+
+const output = readFileSync('sample-outputs/claude-stream-success.jsonl', 'utf-8');
+const parser = new ClaudeStreamParser();
+
+for (const line of output.split('\n')) {
+  if (line.trim()) {
+    const events = parser.parseLine(line);
+    // Assert on events...
+  }
+}
+```
+
+## Capturing New Outputs
+
+### Claude
+```bash
+claude -p "your prompt" --output-format stream-json --verbose > output.jsonl
+```
+
+### Codex
+```bash
+codex exec --full-auto --json "your prompt" > output.jsonl
+```
--- a/apps/server/test/integration/real-providers/sample-outputs/claude-stream-questions.jsonl
+++ b/apps/server/test/integration/real-providers/sample-outputs/claude-stream-questions.jsonl
@@ -0,0 +1,3 @@
+{"type":"system","subtype":"init","cwd":"/Users/lukasmay/development/projects/codewalk-district","session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","tools":["Task","TaskOutput","Bash","Glob","Grep","ExitPlanMode","Read","Edit","Write","NotebookEdit","WebFetch","TodoWrite","WebSearch","TaskStop","AskUserQuestion","Skill","EnterPlanMode","ToolSearch"],"mcp_servers":[],"model":"claude-opus-4-6","permissionMode":"default","slash_commands":["keybindings-help","debug","gsd:define-requirements","gsd:list-phase-assumptions","gsd:debug","gsd:remove-phase","gsd:complete-milestone","gsd:research-phase","gsd:plan-phase","gsd:check-todos","gsd:pause-work","gsd:execute-plan","gsd:research-project","gsd:add-todo","gsd:plan-fix","gsd:resume-work","gsd:progress","gsd:help","gsd:discuss-milestone","gsd:add-phase","gsd:create-roadmap","gsd:map-codebase","gsd:whats-new","gsd:insert-phase","gsd:new-milestone","gsd:new-project","gsd:execute-phase","gsd:verify-work","gsd:discuss-phase","compact","context","cost","init","pr-comments","release-notes","review","security-review","insights"],"apiKeySource":"none","claude_code_version":"2.1.33","output_style":"default","agents":["Bash","general-purpose","statusline-setup","Explore","Plan","claude-code-guide","jira-sw-assessment"],"skills":["keybindings-help","debug"],"plugins":[],"uuid":"224c683c-41f4-4fdd-9af6-f8cdca366ec1"}
+{"type":"assistant","message":{"model":"claude-opus-4-6","id":"msg_01CfDymxvSRFodJ5Zm6NjLHV","type":"message","role":"assistant","content":[{"type":"text","text":"```json\n{\"status\":\"questions\",\"questions\":[{\"id\":\"q1\",\"question\":\"What is your name?\"},{\"id\":\"q2\",\"question\":\"What is the deadline?\"}]}\n```"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":5983,"cache_read_input_tokens":18026,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":5983},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"},"context_management":null},"parent_tool_use_id":null,"session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","uuid":"29288f20-766c-4047-82f5-679024188f52"}
+{"type":"result","subtype":"success","is_error":false,"duration_ms":3213,"duration_api_ms":3203,"num_turns":1,"result":"```json\n{\"status\":\"questions\",\"questions\":[{\"id\":\"q1\",\"question\":\"What is your name?\"},{\"id\":\"q2\",\"question\":\"What is the deadline?\"}]}\n```","stop_reason":null,"session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","total_cost_usd":0.04754675,"usage":{"input_tokens":3,"cache_creation_input_tokens":5983,"cache_read_input_tokens":18026,"output_tokens":45,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":5983,"ephemeral_5m_input_tokens":0}},"modelUsage":{"claude-opus-4-6":{"inputTokens":3,"outputTokens":45,"cacheReadInputTokens":18026,"cacheCreationInputTokens":5983,"webSearchRequests":0,"costUSD":0.04754675,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"uuid":"08db08cd-0f12-47ae-8c21-c29e11a6d7df"}
--- a/apps/server/test/integration/real-providers/sample-outputs/claude-stream-success.jsonl
+++ b/apps/server/test/integration/real-providers/sample-outputs/claude-stream-success.jsonl
@@ -0,0 +1,3 @@
+{"type":"system","subtype":"init","cwd":"/Users/lukasmay/development/projects/codewalk-district","session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","tools":["Task","TaskOutput","Bash","Glob","Grep","ExitPlanMode","Read","Edit","Write","NotebookEdit","WebFetch","TodoWrite","WebSearch","TaskStop","AskUserQuestion","Skill","EnterPlanMode","ToolSearch"],"mcp_servers":[],"model":"claude-opus-4-6","permissionMode":"default","slash_commands":["keybindings-help","debug","gsd:define-requirements","gsd:list-phase-assumptions","gsd:debug","gsd:remove-phase","gsd:complete-milestone","gsd:research-phase","gsd:plan-phase","gsd:check-todos","gsd:pause-work","gsd:execute-plan","gsd:research-project","gsd:add-todo","gsd:plan-fix","gsd:resume-work","gsd:progress","gsd:help","gsd:discuss-milestone","gsd:add-phase","gsd:create-roadmap","gsd:map-codebase","gsd:whats-new","gsd:insert-phase","gsd:new-milestone","gsd:new-project","gsd:execute-phase","gsd:verify-work","gsd:discuss-phase","compact","context","cost","init","pr-comments","release-notes","review","security-review","insights"],"apiKeySource":"none","claude_code_version":"2.1.33","output_style":"default","agents":["Bash","general-purpose","statusline-setup","Explore","Plan","claude-code-guide","jira-sw-assessment"],"skills":["keybindings-help","debug"],"plugins":[],"uuid":"c1d6dced-ca04-4335-a624-624660479b7b"}
+{"type":"assistant","message":{"model":"claude-opus-4-6","id":"msg_01RjSiQY1RUgT47j73Dom93j","type":"message","role":"assistant","content":[{"type":"text","text":"{\"status\":\"done\",\"result\":\"ok\"}"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":5958,"cache_read_input_tokens":18026,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":5958},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"},"context_management":null},"parent_tool_use_id":null,"session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","uuid":"f1c8695a-534e-4de2-a684-fa4a1ec03749"}
+{"type":"result","subtype":"success","is_error":false,"duration_ms":2465,"duration_api_ms":2453,"num_turns":1,"result":"{\"status\":\"done\",\"result\":\"ok\"}","stop_reason":null,"session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","total_cost_usd":0.046565499999999996,"usage":{"input_tokens":3,"cache_creation_input_tokens":5958,"cache_read_input_tokens":18026,"output_tokens":12,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":5958,"ephemeral_5m_input_tokens":0}},"modelUsage":{"claude-opus-4-6":{"inputTokens":3,"outputTokens":12,"cacheReadInputTokens":18026,"cacheCreationInputTokens":5958,"webSearchRequests":0,"costUSD":0.046565499999999996,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"uuid":"53139e08-b4f3-4f94-b129-82759f77fdca"}
--- a/apps/server/test/integration/real-providers/sample-outputs/codex-stream-success.jsonl
+++ b/apps/server/test/integration/real-providers/sample-outputs/codex-stream-success.jsonl
@@ -0,0 +1,5 @@
+{"type":"thread.started","thread_id":"019c3242-955e-7140-9978-517f0b5a22cb"}
+{"type":"turn.started"}
+{"type":"item.completed","item":{"id":"item_0","type":"reasoning","text":"**Confirming simple greeting task**"}}
+{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"Hello!"}}
+{"type":"turn.completed","usage":{"input_tokens":8458,"cached_input_tokens":6912,"output_tokens":32}}
--- a/apps/server/test/integration/real-providers/schema-retry.test.ts
+++ b/apps/server/test/integration/real-providers/schema-retry.test.ts
@@ -0,0 +1,306 @@
+/**
+ * Schema Validation & Retry Integration Tests
+ *
+ * IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
+ * They are SKIPPED by default and should only be run manually for validation.
+ *
+ * To run these tests:
+ * ```bash
+ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/schema-retry.test.ts --test-timeout=300000
+ * ```
+ *
+ * Tests covered:
+ * - Valid JSON output validation
+ * - Questions status parsing
+ * - Schema validation failure with retry
+ * - Max retry limit handling
+ *
+ * Estimated cost: ~$0.20 per full run (includes retries)
+ */
+
+import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
+import {
+  createRealProviderHarness,
+  describeRealClaude,
+  REAL_TEST_TIMEOUT,
+  EXTENDED_TEST_TIMEOUT,
+  type RealProviderHarness,
+} from './harness.js';
+import { MINIMAL_PROMPTS } from './prompts.js';
+import type { AgentResumedEvent, AgentCrashedEvent } from '../../../events/types.js';
+
+describeRealClaude('Schema Validation & Retry', () => {
+  let harness: RealProviderHarness;
+
+  beforeAll(async () => {
+    console.log('\n=== Running Schema Validation & Retry Tests ===');
+    console.log('These tests call the real Claude API and incur costs.');
+    console.log('Retry tests may take longer and cost more.\n');
+    harness = await createRealProviderHarness({ provider: 'claude' });
+  });
+
+  afterAll(async () => {
+    await harness.cleanup();
+  });
+
+  beforeEach(() => {
+    harness.clearEvents();
+  });
+
+  describe('Valid Output', () => {
+    it(
+      'validates done status output',
+      async () => {
+        // Spawn agent with minimal done prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.done,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for completion
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify completion
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.status).toBe('idle');
+        expect(result?.success).toBe(true);
+
+        // No retry events should have been emitted
+        const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
+        expect(resumeEvents.length).toBe(0);
+
+        console.log('  Status: idle (valid done output)');
+        console.log('  Result:', result?.message);
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'validates questions status output',
+      async () => {
+        // Spawn agent with questions prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.questions,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for waiting_for_input
+        const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify questions were validated
+        expect(questions).toBeTruthy();
+        expect(questions?.questions).toBeInstanceOf(Array);
+        expect(questions?.questions.length).toBeGreaterThan(0);
+
+        // Each question should have id and question fields
+        for (const q of questions?.questions ?? []) {
+          expect(q.id).toBeTruthy();
+          expect(q.question).toBeTruthy();
+        }
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.status).toBe('waiting_for_input');
+
+        // No retry events
+        const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
+        expect(resumeEvents.length).toBe(0);
+
+        console.log('  Status: waiting_for_input (valid questions output)');
+        console.log('  Questions:', questions?.questions.length);
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'validates multiple questions',
+      async () => {
+        // Spawn agent with multiple questions prompt
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.multipleQuestions,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for waiting_for_input
+        const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
+
+        // Verify multiple questions
+        expect(questions?.questions.length).toBeGreaterThanOrEqual(2);
+
+        // Each question should have unique ID
+        const ids = questions?.questions.map((q) => q.id) ?? [];
+        const uniqueIds = new Set(ids);
+        expect(uniqueIds.size).toBe(ids.length);
+
+        console.log('  Questions:', questions?.questions.map((q) => q.id).join(', '));
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+
+  describe('Retry Logic', () => {
+    it(
+      'retries when output does not match schema',
+      async () => {
+        // Prompt that produces non-JSON first, then valid JSON
+        // Note: Claude may or may not produce invalid output first
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.badThenGood,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for completion (may involve retries)
+        const result = await harness.waitForAgentCompletion(agent.id, EXTENDED_TEST_TIMEOUT);
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+
+        // Either succeeded with retry OR succeeded first time
+        expect(['idle', 'crashed']).toContain(dbAgent?.status);
+
+        // Check for retry events
+        const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
+        console.log('  Retry attempts:', resumeEvents.length);
+        console.log('  Final status:', dbAgent?.status);
+
+        if (dbAgent?.status === 'idle') {
+          expect(result?.success).toBe(true);
+          console.log('  Result:', result?.message);
+        } else {
+          // Crashed after max retries
+          const crashedEvents = harness.getEventsByType<AgentCrashedEvent>('agent:crashed');
+          expect(crashedEvents.length).toBeGreaterThan(0);
+          console.log('  Crashed after retries');
+        }
+      },
+      EXTENDED_TEST_TIMEOUT
+    );
+
+    it(
+      'extracts JSON from markdown code blocks',
+      async () => {
+        // Prompt that produces JSON wrapped in markdown
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: `Output the result wrapped in a markdown code block like this:
+\`\`\`json
+{"status":"done","result":"extracted from markdown"}
+\`\`\``,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for completion
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  Status:', dbAgent?.status);
+        console.log('  Result:', result?.message);
+
+        // Should succeed (JSON extraction from code block)
+        if (dbAgent?.status === 'idle') {
+          expect(result?.success).toBe(true);
+        }
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'extracts JSON from text with surrounding content',
+      async () => {
+        // Prompt that produces JSON with text before it
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: `First say "Here is my response:" then output the JSON:
+{"status":"done","result":"extracted from text"}`,
+          mode: 'execute',
+          provider: 'claude',
+        });
+
+        // Wait for completion
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        console.log('  Status:', dbAgent?.status);
+        console.log('  Result:', result?.message);
+
+        // Should succeed (JSON extraction from last {...} block)
+        if (dbAgent?.status === 'idle') {
+          expect(result?.success).toBe(true);
+        }
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+
+  describe('Mode-Specific Schemas', () => {
+    it(
+      'validates discuss mode output',
+      async () => {
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.discussComplete,
+          mode: 'discuss',
+          provider: 'claude',
+        });
+
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.status).toBe('idle');
+        expect(result?.success).toBe(true);
+
+        console.log('  Discuss mode result:', result?.message);
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'validates plan mode output',
+      async () => {
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.planComplete,
+          mode: 'plan',
+          provider: 'claude',
+        });
+
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.status).toBe('idle');
+        expect(result?.success).toBe(true);
+
+        console.log('  Plan mode result:', result?.message);
+      },
+      REAL_TEST_TIMEOUT
+    );
+
+    it(
+      'validates detail mode output',
+      async () => {
+        const agent = await harness.agentManager.spawn({
+          taskId: null,
+          prompt: MINIMAL_PROMPTS.detailComplete,
+          mode: 'detail',
+          provider: 'claude',
+        });
+
+        const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
+
+        const dbAgent = await harness.agentRepository.findById(agent.id);
+        expect(dbAgent?.status).toBe('idle');
+        expect(result?.success).toBe(true);
+
+        console.log('  Detail mode result:', result?.message);
+      },
+      REAL_TEST_TIMEOUT
+    );
+  });
+});
--- a/apps/server/test/topological-sort.test.ts
+++ b/apps/server/test/topological-sort.test.ts
@@ -0,0 +1,134 @@
+import { describe, it, expect } from 'vitest';
+import { topologicalSortPhases, type PhaseForSort, type DependencyEdge } from '@codewalk-district/shared';
+
+function mkPhase(id: string, createdAt: string | Date): PhaseForSort {
+  return { id, createdAt };
+}
+
+describe('topologicalSortPhases', () => {
+  it('should return empty array for empty input', () => {
+    expect(topologicalSortPhases([], [])).toEqual([]);
+  });
+
+  it('should return phases in createdAt order when no edges', () => {
+    const phases = [
+      mkPhase('c', '2026-01-03'),
+      mkPhase('a', '2026-01-01'),
+      mkPhase('b', '2026-01-02'),
+    ];
+    const result = topologicalSortPhases(phases, []);
+    expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c']);
+  });
+
+  it('should sort linear chain correctly', () => {
+    // A -> B -> C (B depends on A, C depends on B)
+    const phases = [
+      mkPhase('a', '2026-01-01'),
+      mkPhase('b', '2026-01-02'),
+      mkPhase('c', '2026-01-03'),
+    ];
+    const edges: DependencyEdge[] = [
+      { phaseId: 'b', dependsOnPhaseId: 'a' },
+      { phaseId: 'c', dependsOnPhaseId: 'b' },
+    ];
+    const result = topologicalSortPhases(phases, edges);
+    expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c']);
+  });
+
+  it('should handle diamond dependency', () => {
+    //   A
+    //  / \
+    // B   C
+    //  \ /
+    //   D
+    const phases = [
+      mkPhase('a', '2026-01-01'),
+      mkPhase('b', '2026-01-02'),
+      mkPhase('c', '2026-01-03'),
+      mkPhase('d', '2026-01-04'),
+    ];
+    const edges: DependencyEdge[] = [
+      { phaseId: 'b', dependsOnPhaseId: 'a' },
+      { phaseId: 'c', dependsOnPhaseId: 'a' },
+      { phaseId: 'd', dependsOnPhaseId: 'b' },
+      { phaseId: 'd', dependsOnPhaseId: 'c' },
+    ];
+    const result = topologicalSortPhases(phases, edges);
+    // A must come first, D must come last, B before C by createdAt
+    expect(result[0].id).toBe('a');
+    expect(result[3].id).toBe('d');
+    expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c', 'd']);
+  });
+
+  it('should use createdAt as deterministic tiebreaker', () => {
+    // Three independent phases — should sort by createdAt
+    const phases = [
+      mkPhase('z', '2026-01-03'),
+      mkPhase('y', '2026-01-01'),
+      mkPhase('x', '2026-01-02'),
+    ];
+    const result = topologicalSortPhases(phases, []);
+    expect(result.map((p) => p.id)).toEqual(['y', 'x', 'z']);
+  });
+
+  it('should handle cycle gracefully by appending cycled nodes', () => {
+    // A -> B -> A (cycle), C is independent
+    const phases = [
+      mkPhase('a', '2026-01-01'),
+      mkPhase('b', '2026-01-02'),
+      mkPhase('c', '2026-01-03'),
+    ];
+    const edges: DependencyEdge[] = [
+      { phaseId: 'b', dependsOnPhaseId: 'a' },
+      { phaseId: 'a', dependsOnPhaseId: 'b' },
+    ];
+    const result = topologicalSortPhases(phases, edges);
+    // C has no deps so it comes first, then A and B appended (cycle)
+    expect(result[0].id).toBe('c');
+    expect(result.length).toBe(3);
+    // A and B are appended in createdAt order
+    expect(result[1].id).toBe('a');
+    expect(result[2].id).toBe('b');
+  });
+
+  it('should ignore edges referencing non-existent phases', () => {
+    const phases = [
+      mkPhase('a', '2026-01-01'),
+      mkPhase('b', '2026-01-02'),
+    ];
+    const edges: DependencyEdge[] = [
+      { phaseId: 'b', dependsOnPhaseId: 'nonexistent' },
+    ];
+    const result = topologicalSortPhases(phases, edges);
+    // Edge is ignored, both treated as independent
+    expect(result.map((p) => p.id)).toEqual(['a', 'b']);
+  });
+
+  it('should handle single phase with no edges', () => {
+    const phases = [mkPhase('only', '2026-01-01')];
+    const result = topologicalSortPhases(phases, []);
+    expect(result.map((p) => p.id)).toEqual(['only']);
+  });
+
+  it('should work with Date objects', () => {
+    const phases = [
+      mkPhase('b', new Date('2026-01-02')),
+      mkPhase('a', new Date('2026-01-01')),
+    ];
+    const edges: DependencyEdge[] = [
+      { phaseId: 'b', dependsOnPhaseId: 'a' },
+    ];
+    const result = topologicalSortPhases(phases, edges);
+    expect(result.map((p) => p.id)).toEqual(['a', 'b']);
+  });
+
+  it('should preserve extra properties on phase objects', () => {
+    const phases = [
+      { id: 'a', createdAt: '2026-01-01', name: 'Alpha', status: 'pending' },
+      { id: 'b', createdAt: '2026-01-02', name: 'Beta', status: 'active' },
+    ];
+    const result = topologicalSortPhases(phases, []);
+    expect(result[0].name).toBe('Alpha');
+    expect(result[1].name).toBe('Beta');
+  });
+});