refactor: Restructure monorepo to apps/server/ and apps/web/ layout

Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt
standard monorepo conventions (apps/ for runnable apps, packages/
for reusable libraries). Update all config files, shared package
imports, test fixtures, and documentation to reflect new paths.

Key fixes:
- Update workspace config to ["apps/*", "packages/*"]
- Update tsconfig.json rootDir/include for apps/server/
- Add apps/web/** to vitest exclude list
- Update drizzle.config.ts schema path
- Fix ensure-schema.ts migration path detection (3 levels up in dev,
  2 levels up in dist)
- Fix tests/integration/cli-server.test.ts import paths
- Update packages/shared imports to apps/server/ paths
- Update all docs/ files with new paths
This commit is contained in:
Lukas May
2026-03-03 11:22:53 +01:00
parent 8c38d958ce
commit 34578d39c6
535 changed files with 75452 additions and 687 deletions

View File

@@ -0,0 +1,265 @@
/**
* Cassette System Unit Tests
*
* Verifies normalizer, key generation, and store in isolation.
* These run without any real processes or API calls.
*/
import { describe, it, expect, beforeEach } from 'vitest';
import { mkdtempSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
import { hashWorktreeFiles, buildCassetteKey } from './key.js';
import { CassetteStore } from './store.js';
import type { CassetteEntry, CassetteKey } from './types.js';
// ---------------------------------------------------------------------------
// Normalizer
// ---------------------------------------------------------------------------
describe('normalizePrompt', () => {
it('strips UUIDs', () => {
const prompt = 'Agent 550e8400-e29b-41d4-a716-446655440000 is running task abc123ef-0000-0000-0000-000000000000';
const result = normalizePrompt(prompt, '');
expect(result).not.toContain('550e8400');
expect(result).not.toContain('abc123ef');
expect(result).toContain('__UUID__');
});
it('strips workspace root path', () => {
const workspaceRoot = '/tmp/cw-test-abc123';
const prompt = `Working directory: ${workspaceRoot}/agent-workdirs/my-agent`;
const result = normalizePrompt(prompt, workspaceRoot);
expect(result).not.toContain(workspaceRoot);
expect(result).toContain('__WORKSPACE__');
});
it('strips ISO timestamps', () => {
const prompt = 'Started at 2026-03-01T14:30:00Z, last seen 2026-03-01T14:35:22.456Z';
const result = normalizePrompt(prompt, '');
expect(result).not.toContain('2026-03-01');
expect(result).toContain('__TIMESTAMP__');
});
it('strips session numbers', () => {
const prompt = 'Resuming session 3 with agent session-42';
const result = normalizePrompt(prompt, '');
expect(result).toContain('session__N__');
expect(result).not.toContain('session 3');
expect(result).not.toContain('session-42');
});
it('leaves static content unchanged', () => {
const prompt = 'You are a Worker agent. Execute the assigned coding task.';
const result = normalizePrompt(prompt, '/tmp/ws');
expect(result).toBe(prompt);
});
it('strips nanoid strings (21-char alphanumeric)', () => {
const nanoid = 'V1StGXR8_Z5jdHi6B-myT';
const prompt = `Agent worktree: /tmp/cw-preview-${nanoid}/app`;
const result = normalizePrompt(prompt, '');
expect(result).not.toContain(nanoid);
expect(result).toContain('__ID__');
});
it('strips workspace root before UUID replacement to avoid double-normalizing', () => {
const workspaceRoot = '/tmp/cw-test-abc123';
const uuid = '550e8400-e29b-41d4-a716-446655440000';
const prompt = `Dir: ${workspaceRoot}/agents/${uuid}`;
const result = normalizePrompt(prompt, workspaceRoot);
expect(result).toBe('Dir: __WORKSPACE__/agents/__UUID__');
});
});
describe('stripPromptFromArgs', () => {
it('strips -p <prompt> style (Claude native)', () => {
const prompt = 'Do the task.';
const args = ['--dangerously-skip-permissions', '--verbose', '-p', prompt, '--output-format', 'stream-json'];
const result = stripPromptFromArgs(args, prompt);
expect(result).toEqual(['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json']);
});
it('strips --prompt <prompt> style', () => {
const prompt = 'Do the task.';
const args = ['--flag', '--prompt', prompt, '--json'];
const result = stripPromptFromArgs(args, prompt);
expect(result).toEqual(['--flag', '--json']);
});
it('strips bare positional prompt', () => {
const prompt = 'Do the task.';
const args = ['--full-auto', prompt];
const result = stripPromptFromArgs(args, prompt);
expect(result).toEqual(['--full-auto']);
});
it('returns unchanged args when prompt is empty', () => {
const args = ['--flag', '--value'];
expect(stripPromptFromArgs(args, '')).toEqual(args);
});
});
// ---------------------------------------------------------------------------
// Key generation
// ---------------------------------------------------------------------------
describe('buildCassetteKey', () => {
const baseKey: CassetteKey = {
normalizedPrompt: 'You are a Worker agent.',
providerName: 'claude',
modelArgs: ['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json'],
worktreeHash: 'empty',
};
it('produces a 32-char hex string', () => {
const key = buildCassetteKey(baseKey);
expect(key).toMatch(/^[0-9a-f]{32}$/);
});
it('is deterministic for the same key', () => {
expect(buildCassetteKey(baseKey)).toBe(buildCassetteKey(baseKey));
});
it('differs when normalizedPrompt changes', () => {
const key2 = { ...baseKey, normalizedPrompt: 'You are a Discuss agent.' };
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
});
it('differs when providerName changes', () => {
const key2 = { ...baseKey, providerName: 'codex' };
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
});
it('differs when worktreeHash changes', () => {
const key2 = { ...baseKey, worktreeHash: 'abcdef1234567890' };
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
});
it('is stable regardless of modelArgs insertion order', () => {
const key1 = { ...baseKey, modelArgs: ['--verbose', '--dangerously-skip-permissions'] };
const key2 = { ...baseKey, modelArgs: ['--dangerously-skip-permissions', '--verbose'] };
expect(buildCassetteKey(key1)).toBe(buildCassetteKey(key2));
});
});
describe('hashWorktreeFiles', () => {
it('returns "empty" for a non-existent directory', () => {
expect(hashWorktreeFiles('/does/not/exist')).toBe('empty');
});
it('returns "empty" for a directory with only hidden files', () => {
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
try {
// Only hidden entries present
const { mkdirSync } = require('node:fs');
mkdirSync(join(dir, '.git'));
expect(hashWorktreeFiles(dir)).toBe('empty');
} finally {
rmSync(dir, { recursive: true });
}
});
it('produces a 16-char hex string for a directory with files', () => {
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
try {
const { writeFileSync } = require('node:fs');
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
const hash = hashWorktreeFiles(dir);
expect(hash).toMatch(/^[0-9a-f]{16}$/);
} finally {
rmSync(dir, { recursive: true });
}
});
it('changes when file content changes', () => {
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
try {
const { writeFileSync } = require('node:fs');
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
const hash1 = hashWorktreeFiles(dir);
writeFileSync(join(dir, 'index.ts'), 'export const x = 2;');
const hash2 = hashWorktreeFiles(dir);
expect(hash1).not.toBe(hash2);
} finally {
rmSync(dir, { recursive: true });
}
});
});
// ---------------------------------------------------------------------------
// CassetteStore
// ---------------------------------------------------------------------------
describe('CassetteStore', () => {
let dir: string;
let store: CassetteStore;
const key: CassetteKey = {
normalizedPrompt: 'Test prompt',
providerName: 'claude',
modelArgs: ['--verbose'],
worktreeHash: 'empty',
};
const entry: CassetteEntry = {
version: 1,
key,
recording: {
jsonlLines: ['{"type":"system","session_id":"test-session"}', '{"type":"result","subtype":"success"}'],
signalJson: { status: 'done', message: 'Task completed' },
exitCode: 0,
recordedAt: '2026-03-01T00:00:00.000Z',
},
};
beforeEach(() => {
dir = mkdtempSync(join(tmpdir(), 'cw-store-test-'));
store = new CassetteStore(dir);
});
it('returns null for unknown key', () => {
expect(store.find(key)).toBeNull();
});
it('round-trips a cassette entry', () => {
store.save(key, entry);
const loaded = store.find(key);
expect(loaded).not.toBeNull();
expect(loaded?.recording.signalJson).toEqual({ status: 'done', message: 'Task completed' });
expect(loaded?.recording.jsonlLines).toHaveLength(2);
});
it('overwrites an existing cassette', () => {
store.save(key, entry);
const updated: CassetteEntry = {
...entry,
recording: { ...entry.recording, jsonlLines: ['new line'], recordedAt: '2026-03-02T00:00:00.000Z' },
};
store.save(key, updated);
const loaded = store.find(key);
expect(loaded?.recording.jsonlLines).toEqual(['new line']);
});
it('uses same file for same key', () => {
store.save(key, entry);
const { readdirSync } = require('node:fs');
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
expect(files).toHaveLength(1);
store.save(key, entry); // overwrite
const files2 = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
expect(files2).toHaveLength(1);
});
it('uses different files for different keys', () => {
const key2: CassetteKey = { ...key, providerName: 'codex' };
store.save(key, entry);
store.save(key2, { ...entry, key: key2 });
const { readdirSync } = require('node:fs');
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
expect(files).toHaveLength(2);
});
});

View File

@@ -0,0 +1,200 @@
/**
* Cassette Test Harness
*
* Wraps RealProviderHarness with the CassetteProcessManager so tests run
* against recorded cassettes instead of real AI APIs.
*
* Usage:
*
* let harness: RealProviderHarness;
*
* beforeAll(async () => {
* harness = await createCassetteHarness({ provider: 'claude' });
* });
*
* afterAll(() => harness.cleanup());
*
* it('completes a task', async () => {
* const agent = await harness.agentManager.spawn({ prompt: MINIMAL_PROMPTS.done, ... });
* const result = await harness.waitForAgentCompletion(agent.id);
* expect(result?.success).toBe(true);
* });
*
* Mode control via env vars:
* (default) → replay mode: cassette must exist, throws if missing
* CW_CASSETTE_RECORD=1 → auto mode: replay if exists, record if missing
* CW_CASSETTE_FORCE_RECORD=1→ record mode: always run real agent, overwrite cassette
*/
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { execSync } from 'node:child_process';
import { join } from 'node:path';
import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleInitiativeRepository,
} from '../../db/repositories/drizzle/index.js';
import { MultiProviderAgentManager } from '../../agent/manager.js';
import { CapturingEventBus, sleep, type RealProviderHarness } from '../integration/real-providers/harness.js';
import { CassetteStore } from './store.js';
import { CassetteProcessManager, type CassetteMode } from './process-manager.js';
export interface CassetteHarnessOptions {
/** Which provider the agent runs as (default: 'claude'). */
provider?: 'claude' | 'codex';
/**
* Directory where cassette JSON files are stored and read from.
* Defaults to CW_CASSETTE_DIR env var, then src/test/cassettes/.
*/
cassetteDir?: string;
/**
* Override cassette mode. Normally derived from env vars:
* - CW_CASSETTE_FORCE_RECORD=1 → 'record'
* - CW_CASSETTE_RECORD=1 → 'auto'
* - (default) → 'replay'
*/
mode?: CassetteMode;
}
const DEFAULT_CASSETTE_DIR = new URL('../cassettes', import.meta.url).pathname;
/**
* Resolve cassette mode from env vars (highest priority) or options.
*/
function resolveCassetteMode(options: CassetteHarnessOptions): CassetteMode {
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
return options.mode ?? 'replay';
}
/**
* Create a test harness backed by the cassette system.
*
* The harness exposes the same interface as RealProviderHarness so tests
* written for real providers work unchanged with cassettes.
*
* Replay is much faster than real API calls (typically < 500ms) and
* exercises the full pipeline: ProcessManager → FileTailer → OutputHandler
* → SignalManager → event emission.
*/
export async function createCassetteHarness(options: CassetteHarnessOptions = {}): Promise<RealProviderHarness> {
const cassetteDir = options.cassetteDir ?? process.env.CW_CASSETTE_DIR ?? DEFAULT_CASSETTE_DIR;
const cassetteMode = resolveCassetteMode(options);
// Create a temporary git workspace (required for worktree operations).
const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-cassette-'));
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
const db = createTestDatabase();
const agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
const initiativeRepository = new DrizzleInitiativeRepository(db);
const eventBus = new CapturingEventBus();
const store = new CassetteStore(cassetteDir);
const cassetteProcessManager = new CassetteProcessManager(
workspaceRoot,
projectRepository,
store,
cassetteMode,
);
const agentManager = new MultiProviderAgentManager(
agentRepository,
workspaceRoot,
projectRepository,
accountRepository,
eventBus,
undefined, // credentialManager
undefined, // changeSetRepository
undefined, // phaseRepository
undefined, // taskRepository
undefined, // pageRepository
undefined, // logChunkRepository
false, // debug
cassetteProcessManager,
);
const harness: RealProviderHarness = {
db,
eventBus,
agentManager,
workspaceRoot,
agentRepository,
projectRepository,
accountRepository,
initiativeRepository,
// Cassette replays are fast — use a short poll interval and default timeout.
async waitForAgentCompletion(agentId, timeoutMs = 30_000) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'idle' || agent.status === 'stopped') {
return agentManager.getResult(agentId);
}
if (agent.status === 'crashed') {
return agentManager.getResult(agentId);
}
if (agent.status === 'waiting_for_input') return null;
await sleep(100);
}
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
},
async waitForAgentWaiting(agentId, timeoutMs = 30_000) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'waiting_for_input') return agentManager.getPendingQuestions(agentId);
if (['idle', 'stopped', 'crashed'].includes(agent.status)) return null;
await sleep(100);
}
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to enter waiting state after ${timeoutMs}ms`);
},
async waitForAgentStatus(agentId, status, timeoutMs = 30_000) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) throw new Error(`Agent ${agentId} not found`);
if (agent.status === status) return;
await sleep(100);
}
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to reach status '${status}' after ${timeoutMs}ms`);
},
getEventsByType(type) {
return eventBus.getEventsByType(type);
},
clearEvents() {
eventBus.clearEvents();
},
async killAllAgents() {
const agents = await agentRepository.findAll();
for (const agent of agents) {
if (agent.status === 'running') {
try { await agentManager.stop(agent.id); } catch { /* ignore */ }
}
}
},
async cleanup() {
await harness.killAllAgents();
try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
},
};
return harness;
}

View File

@@ -0,0 +1,6 @@
export { CassetteStore } from './store.js';
export { CassetteProcessManager, type CassetteMode } from './process-manager.js';
export { createCassetteHarness, type CassetteHarnessOptions } from './harness.js';
export { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
export { hashWorktreeFiles, buildCassetteKey } from './key.js';
export type { CassetteKey, CassetteRecording, CassetteEntry } from './types.js';

View File

@@ -0,0 +1,76 @@
/**
* Cassette Key Generation
*
* Builds stable SHA256-based identifiers for cassettes.
* Two spans are separate concerns:
* - hashWorktreeFiles: fingerprints the worktree state at spawn time (for execute mode drift)
* - buildCassetteKey: hashes all key components into a 32-char hex filename
*/
import { createHash } from 'node:crypto';
import { readdirSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import type { CassetteKey } from './types.js';
/**
* Recursively hash all non-hidden files in a directory.
*
* Hidden entries (starting with '.') are skipped — this excludes .git, .cw, etc.
* Entries are processed in sorted order for determinism across platforms.
*
* Returns the first 16 hex chars of the SHA256, or 'empty' if the directory
* is absent or contains no readable files.
*/
export function hashWorktreeFiles(dir: string): string {
const hash = createHash('sha256');
let hasContent = false;
function walkDir(currentDir: string): void {
let entries;
try {
entries = readdirSync(currentDir, { withFileTypes: true });
} catch {
return;
}
for (const entry of [...entries].sort((a, b) => a.name.localeCompare(b.name))) {
if (entry.name.startsWith('.')) continue;
const fullPath = join(currentDir, entry.name);
const relPath = fullPath.slice(dir.length);
if (entry.isDirectory()) {
hash.update(`d:${relPath}\n`);
walkDir(fullPath);
} else if (entry.isFile()) {
try {
const content = readFileSync(fullPath);
hash.update(`f:${relPath}:${content.length}\n`);
hash.update(content);
hasContent = true;
} catch {
// skip unreadable files
}
}
}
}
walkDir(dir);
return hasContent ? hash.digest('hex').slice(0, 16) : 'empty';
}
/**
* Compute a stable 32-char hex identifier for a cassette key.
*
* modelArgs are sorted before hashing so insertion order differences
* between providers don't produce different cassettes.
*/
export function buildCassetteKey(key: CassetteKey): string {
const canonical = JSON.stringify({
normalizedPrompt: key.normalizedPrompt,
providerName: key.providerName,
modelArgs: [...key.modelArgs].sort(),
worktreeHash: key.worktreeHash,
});
return createHash('sha256').update(canonical).digest('hex').slice(0, 32);
}

View File

@@ -0,0 +1,76 @@
/**
* Cassette Normalizer
*
* Strips dynamic content from prompts and CLI args before hashing into a cassette key.
* Dynamic content (UUIDs, temp paths, timestamps, session numbers) varies between
* test runs but doesn't affect how the agent responds — so we replace them with
* stable placeholders to get a stable cache key.
*/
const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
const NANOID_RE = /(?<![A-Za-z0-9])[A-Za-z0-9_-]{21}(?![A-Za-z0-9_-])/g;
const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g;
const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g;
const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
// Agent worktree paths: agent-workdirs/<random-agent-name> (with or without trailing slash)
// The agent name (e.g. "available-sheep") changes every run but is not a UUID or nanoid.
// Stop at the first slash so the project name after it is preserved.
const AGENT_WORKDIR_RE = /agent-workdirs\/[^\s/\\]+/g;
/**
* Normalize a prompt for stable cassette key generation.
*
* Replacements applied in order (most-specific first to avoid partial matches):
* 1. Absolute workspace root path → __WORKSPACE__
* 2. UUIDs → __UUID__
* 2.5. Nanoid IDs (21-char alphanumeric) → __ID__
* 3. ISO 8601 timestamps → __TIMESTAMP__
* 4. Unix epoch milliseconds → __EPOCH__
* 5. Session numbers → session__N__
* 6. Agent worktree path segment → agent-workdirs/__AGENT__/
*/
export function normalizePrompt(prompt: string, workspaceRoot: string): string {
let normalized = prompt;
if (workspaceRoot) {
normalized = normalized.replaceAll(workspaceRoot, '__WORKSPACE__');
}
normalized = normalized.replace(UUID_RE, '__UUID__');
normalized = normalized.replace(NANOID_RE, '__ID__');
normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__');
normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__');
normalized = normalized.replace(SESSION_NUM_RE, 'session__N__');
normalized = normalized.replace(AGENT_WORKDIR_RE, 'agent-workdirs/__AGENT__');
return normalized;
}
/**
* Strip the prompt value from CLI args to produce stable modelArgs for the cassette key.
*
* Handles all provider prompt flag styles:
* - Native: `-p <prompt>` (Claude)
* - Flag: `--prompt <prompt>`, `-p <prompt>` (Gemini, Cursor, Auggie, Amp, Opencode)
* - Also removes the bare prompt value if it appears as a positional arg.
*/
export function stripPromptFromArgs(args: string[], prompt: string): string[] {
if (!prompt) return [...args];
const result: string[] = [];
let i = 0;
while (i < args.length) {
const arg = args[i];
const PROMPT_FLAGS = ['-p', '--prompt', '--message'];
if (PROMPT_FLAGS.includes(arg) && args[i + 1] === prompt) {
i += 2; // skip flag + value
} else if (arg === prompt) {
i += 1; // skip bare positional prompt
} else {
result.push(arg);
i++;
}
}
return result;
}

View File

@@ -0,0 +1,258 @@
/**
* CassetteProcessManager
*
* Extends ProcessManager to intercept subprocess spawning and either:
* - Replay a recorded cassette (no API cost, deterministic)
* - Record a new cassette by running the real agent and capturing its output
*
* Modes:
* - 'replay': cassette MUST exist; throws if missing (safe for CI)
* - 'record': always runs real agent; saves/overwrites cassette on completion
* - 'auto': replays if cassette exists; falls through to record if missing
*
* The cassette key is built from:
* - Normalized prompt (dynamic content replaced with placeholders)
* - Provider name and stable CLI args (prompt value stripped)
* - Worktree file hash (detects content drift for execute-mode agents)
*/
import { readFileSync, existsSync, mkdirSync, writeFileSync, readdirSync } from 'node:fs';
import { join, dirname, relative } from 'node:path';
import { ProcessManager } from '../../agent/process-manager.js';
import type { StreamEvent } from '../../agent/providers/parsers/index.js';
import type { FileTailer } from '../../agent/file-tailer.js';
import type { ProjectRepository } from '../../db/repositories/project-repository.js';
import type { CassetteKey, CassetteEntry } from './types.js';
import type { CassetteStore } from './store.js';
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
import { hashWorktreeFiles } from './key.js';
export type CassetteMode = 'replay' | 'record' | 'auto';
interface PendingRecording {
key: CassetteKey;
outputFilePath: string;
agentCwd: string;
}
interface PendingReplay {
cassette: CassetteEntry;
agentCwd: string;
}
export class CassetteProcessManager extends ProcessManager {
private readonly _workspaceRoot: string;
private readonly replayWorkerPath: string;
private readonly pendingRecordings = new Map<number, PendingRecording>();
private readonly pendingReplays = new Map<number, PendingReplay>();
constructor(
workspaceRoot: string,
projectRepository: ProjectRepository,
private readonly store: CassetteStore,
private readonly cassetteMode: CassetteMode = 'auto',
) {
super(workspaceRoot, projectRepository);
this._workspaceRoot = workspaceRoot;
this.replayWorkerPath = new URL('./replay-worker.mjs', import.meta.url).pathname;
}
override spawnDetached(
agentId: string,
agentName: string,
command: string,
args: string[],
cwd: string,
env: Record<string, string>,
providerName: string,
prompt?: string,
onEvent?: (event: StreamEvent) => void,
onRawContent?: (content: string) => void,
): { pid: number; outputFilePath: string; tailer: FileTailer } {
const key: CassetteKey = {
normalizedPrompt: normalizePrompt(prompt ?? '', this._workspaceRoot),
providerName,
modelArgs: stripPromptFromArgs(args, prompt ?? ''),
worktreeHash: hashWorktreeFiles(cwd),
};
// In record mode we always skip the store lookup and go straight to real spawn.
const existing = this.cassetteMode !== 'record' ? this.store.find(key) : null;
if (existing) {
const result = this.replayFromCassette(agentId, agentName, cwd, env, providerName, existing, onEvent, onRawContent);
this.pendingReplays.set(result.pid, { cassette: existing, agentCwd: cwd });
return result;
}
if (this.cassetteMode === 'replay') {
throw new Error(
`[cassette] No cassette found for agent '${agentName}' (provider=${providerName}, mode=replay).\n` +
`Run with CW_CASSETTE_RECORD=1 to record it.`,
);
}
// auto or record: run the real agent and record the cassette on completion.
console.log(`[cassette] recording new cassette for agent '${agentName}' (${providerName})`);
const result = super.spawnDetached(agentId, agentName, command, args, cwd, env, providerName, prompt, onEvent, onRawContent);
this.pendingRecordings.set(result.pid, { key, outputFilePath: result.outputFilePath, agentCwd: cwd });
return result;
}
override pollForCompletion(
agentId: string,
pid: number,
onComplete: () => Promise<void>,
getTailer: () => FileTailer | undefined,
): { cancel: () => void } {
const recording = this.pendingRecordings.get(pid);
if (recording) {
// Record mode — wrap onComplete to save the cassette before handing off.
return super.pollForCompletion(agentId, pid, async () => {
await this.saveCassette(recording);
this.pendingRecordings.delete(pid);
await onComplete();
}, getTailer);
}
const replay = this.pendingReplays.get(pid);
if (replay) {
// Replay mode — restore .cw/output/ files before onComplete so that
// readPhaseFiles / readTaskFiles / readProposalFiles find their data.
return super.pollForCompletion(agentId, pid, async () => {
this.restoreOutputFiles(replay.cassette, replay.agentCwd);
this.pendingReplays.delete(pid);
await onComplete();
}, getTailer);
}
return super.pollForCompletion(agentId, pid, onComplete, getTailer);
}
private async saveCassette(pending: PendingRecording): Promise<void> {
// Read all JSONL lines from the output file the agent wrote to.
let jsonlLines: string[] = [];
try {
const content = readFileSync(pending.outputFilePath, 'utf-8');
jsonlLines = content.split('\n').filter(l => l.trim() !== '');
} catch {
// No output produced — record an empty cassette.
}
// Read signal.json from the agent working directory.
let signalJson: Record<string, unknown> | null = null;
const outputDir = join(pending.agentCwd, '.cw', 'output');
const signalPath = join(outputDir, 'signal.json');
if (existsSync(signalPath)) {
try {
signalJson = JSON.parse(readFileSync(signalPath, 'utf-8')) as Record<string, unknown>;
} catch {
// Corrupt signal file — record null.
}
}
// Capture all other files in .cw/output/ (phase files, task files, etc.)
const outputFiles: Record<string, string> = {};
if (existsSync(outputDir)) {
this.walkOutputDir(outputDir, outputDir, (relPath, content) => {
if (relPath !== 'signal.json') {
outputFiles[relPath] = content;
}
});
}
const entry: CassetteEntry = {
version: 1,
key: pending.key,
recording: {
jsonlLines,
signalJson,
exitCode: 0,
recordedAt: new Date().toISOString(),
outputFiles,
},
};
this.store.save(pending.key, entry);
}
/**
* Restore captured .cw/output/ files to the new agent working directory.
* Called before onComplete so that downstream readers (readPhaseFiles, etc.)
* find the expected files in place.
*/
private restoreOutputFiles(cassette: CassetteEntry, agentCwd: string): void {
const { outputFiles, signalJson } = cassette.recording;
const outputDir = join(agentCwd, '.cw', 'output');
// Restore captured output files
if (outputFiles) {
for (const [relPath, content] of Object.entries(outputFiles)) {
const fullPath = join(outputDir, relPath);
mkdirSync(dirname(fullPath), { recursive: true });
writeFileSync(fullPath, content, 'utf-8');
}
}
// Write signal.json (the manager reads this to detect completion status)
if (signalJson) {
mkdirSync(outputDir, { recursive: true });
writeFileSync(join(outputDir, 'signal.json'), JSON.stringify(signalJson), 'utf-8');
}
}
private walkOutputDir(
baseDir: string,
currentDir: string,
callback: (relPath: string, content: string) => void,
): void {
let entries;
try {
entries = readdirSync(currentDir, { withFileTypes: true });
} catch {
return;
}
for (const entry of entries) {
const fullPath = join(currentDir, entry.name);
const relPath = relative(baseDir, fullPath);
if (entry.isDirectory()) {
this.walkOutputDir(baseDir, fullPath, callback);
} else if (entry.isFile()) {
try {
const content = readFileSync(fullPath, 'utf-8');
callback(relPath, content);
} catch {
// Skip unreadable files
}
}
}
}
private replayFromCassette(
agentId: string,
agentName: string,
cwd: string,
env: Record<string, string>,
providerName: string,
cassette: CassetteEntry,
onEvent?: (event: StreamEvent) => void,
onRawContent?: (content: string) => void,
): { pid: number; outputFilePath: string; tailer: FileTailer } {
console.log(`[cassette] replaying cassette for agent '${agentName}' (${cassette.recording.jsonlLines.length} lines)`);
return super.spawnDetached(
agentId,
agentName,
process.execPath, // use the running node binary
[this.replayWorkerPath], // replay-worker.mjs
cwd,
{ ...env, CW_CASSETTE_DATA: JSON.stringify(cassette.recording) },
providerName, // use original provider's parser for the tailer
undefined, // no prompt — worker handles output directly
onEvent,
onRawContent,
);
}
}

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env node
/**
* Cassette Replay Worker
*
* Spawned as a detached subprocess by CassetteProcessManager instead of the real
* agent CLI. Reads the cassette recording from CW_CASSETTE_DATA env var, replays
* the JSONL output to stdout (which spawnDetached redirects to the output file),
* writes signal.json relative to the process cwd, and exits.
*
* This is a plain .mjs file (no TypeScript) so it can be spawned with bare `node`
* without any build step or tsx dependency.
*/
import { mkdirSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
const data = process.env.CW_CASSETTE_DATA;
if (!data) {
process.stderr.write('[replay-worker] CW_CASSETTE_DATA env var not set\n');
process.exit(1);
}
let recording;
try {
recording = JSON.parse(data);
} catch (err) {
process.stderr.write(`[replay-worker] failed to parse CW_CASSETTE_DATA: ${err.message}\n`);
process.exit(1);
}
const { jsonlLines = [], signalJson = null, exitCode = 0 } = recording;
// Write JSONL lines to stdout.
// spawnDetached redirects stdout to the output file via open()+fd redirection,
// so writing to process.stdout here is equivalent to writing to the output file.
for (const line of jsonlLines) {
process.stdout.write(line + '\n');
}
// Write signal.json to the expected location relative to cwd.
// The agent's cwd is set by spawnDetached to the agent working directory.
if (signalJson) {
const signalDir = join(process.cwd(), '.cw', 'output');
mkdirSync(signalDir, { recursive: true });
writeFileSync(join(signalDir, 'signal.json'), JSON.stringify(signalJson, null, 2), 'utf-8');
}
process.exit(exitCode);

View File

@@ -0,0 +1,50 @@
/**
* CassetteStore
*
* Reads and writes cassette files from a directory on disk.
* Each cassette is stored as a JSON file named after the 32-char key hash.
* Cassette files are intended to be committed to git — they are the
* "recorded interactions" that allow tests to run without real API calls.
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import type { CassetteKey, CassetteEntry } from './types.js';
import { buildCassetteKey } from './key.js';
export class CassetteStore {
constructor(private readonly cassetteDir: string) {}
private pathFor(keyHash: string): string {
return join(this.cassetteDir, `${keyHash}.json`);
}
/**
* Look up a cassette by its key.
* Returns null if not found or if the file is corrupt.
*/
find(key: CassetteKey): CassetteEntry | null {
const hash = buildCassetteKey(key);
const path = this.pathFor(hash);
if (!existsSync(path)) return null;
try {
return JSON.parse(readFileSync(path, 'utf-8')) as CassetteEntry;
} catch {
return null;
}
}
/**
* Save a cassette to disk. Creates the cassette directory if needed.
* Prints the cassette filename so it's visible during recording runs.
*/
save(key: CassetteKey, entry: CassetteEntry): void {
mkdirSync(this.cassetteDir, { recursive: true });
const hash = buildCassetteKey(key);
const path = this.pathFor(hash);
writeFileSync(path, JSON.stringify(entry, null, 2), 'utf-8');
console.log(`[cassette] recorded → ${hash}.json (${entry.recording.jsonlLines.length} lines)`);
}
}

View File

@@ -0,0 +1,42 @@
/**
* Cassette Types
*
* VCR-style cassette format for recording and replaying agent subprocess I/O.
* A cassette captures everything an agent process writes so tests can replay
* it deterministically without hitting real AI APIs.
*/
export interface CassetteKey {
/** Prompt with dynamic content (UUIDs, paths, timestamps) replaced with placeholders. */
normalizedPrompt: string;
/** Provider name, e.g. 'claude', 'codex'. */
providerName: string;
/** Stable CLI args with the prompt value stripped. */
modelArgs: string[];
/** SHA256 prefix of all non-hidden files in the agent worktree at spawn time. */
worktreeHash: string;
}
export interface CassetteRecording {
/** All JSONL lines the agent wrote to stdout (captured from output file). */
jsonlLines: string[];
/** Content of signal.json written by the agent, or null if missing. */
signalJson: Record<string, unknown> | null;
/** Process exit code (0 = success). */
exitCode: number;
/** ISO timestamp when this cassette was recorded. */
recordedAt: string;
/**
* All files the agent wrote to .cw/output/ (relative path → UTF-8 content),
* excluding signal.json (which is captured separately in signalJson).
* Restored during replay before onComplete fires so downstream readers
* (e.g. readPhaseFiles, readTaskFiles) see the expected directory contents.
*/
outputFiles?: Record<string, string>;
}
export interface CassetteEntry {
version: 1;
key: CassetteKey;
recording: CassetteRecording;
}

View File

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,285 @@
/**
* E2E Tests for Architect Workflow
*
* Tests the complete architect workflow from discussion through phase creation:
* - Discuss mode: Gather context, answer questions, capture decisions
* - Plan mode: Break initiative into phases
* - Full workflow: Discuss -> Plan -> Phase persistence
*
* Uses TestHarness from src/test/ for full system wiring.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { createTestHarness, type TestHarness } from '../index.js';
import type { AgentStoppedEvent } from '../../events/types.js';
describe('Architect Workflow E2E', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
describe('discuss mode', () => {
it('should spawn architect in discuss mode and complete with decisions', async () => {
vi.useFakeTimers();
// Create initiative
const initiative = await harness.createInitiative('Auth System');
// Set up discuss completion scenario
harness.setArchitectDiscussComplete('auth-discuss', [
{ topic: 'Auth Method', decision: 'JWT', reason: 'Stateless, scalable' },
{ topic: 'Token Storage', decision: 'httpOnly cookie', reason: 'XSS protection' },
], 'Auth approach decided');
// Spawn architect in discuss mode
const agent = await harness.caller.spawnArchitectDiscuss({
name: 'auth-discuss',
initiativeId: initiative.id,
});
expect(agent.mode).toBe('discuss');
// Wait for completion
await harness.advanceTimers();
// Verify agent stopped with context_complete
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
expect(events).toHaveLength(1);
expect(events[0].payload.reason).toBe('context_complete');
});
it('should pause on questions and resume with answers', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Auth System');
// First, agent asks questions
harness.setArchitectDiscussQuestions('auth-discuss', [
{ id: 'q1', question: 'JWT or Session?', options: [{ label: 'JWT' }, { label: 'Session' }] },
{ id: 'q2', question: 'OAuth providers?' },
]);
const agent = await harness.caller.spawnArchitectDiscuss({
name: 'auth-discuss',
initiativeId: initiative.id,
});
await harness.advanceTimers();
// Agent should be waiting
const waitingAgent = await harness.caller.getAgent({ name: 'auth-discuss' });
expect(waitingAgent?.status).toBe('waiting_for_input');
// Get pending questions
const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
expect(pending?.questions).toHaveLength(2);
// Now set up completion scenario for after resume
harness.setArchitectDiscussComplete('auth-discuss', [
{ topic: 'Auth', decision: 'JWT', reason: 'User chose' },
], 'Complete');
// Resume with answers
await harness.caller.resumeAgent({
name: 'auth-discuss',
answers: { q1: 'JWT', q2: 'Google, GitHub' },
});
await harness.advanceTimers();
// Should complete
const finalAgent = await harness.caller.getAgent({ name: 'auth-discuss' });
expect(finalAgent?.status).toBe('idle');
});
});
describe('plan mode', () => {
it('should spawn architect in plan mode and create phases', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Auth System');
// Set up plan completion
harness.setArchitectPlanComplete('auth-plan', [
{ number: 1, name: 'Database Setup', description: 'User table and auth schema', dependencies: [] },
{ number: 2, name: 'JWT Implementation', description: 'Token generation and validation', dependencies: [1] },
{ number: 3, name: 'Protected Routes', description: 'Middleware and route guards', dependencies: [2] },
]);
const agent = await harness.caller.spawnArchitectPlan({
name: 'auth-plan',
initiativeId: initiative.id,
});
expect(agent.mode).toBe('plan');
await harness.advanceTimers();
// Verify stopped with plan_complete
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
expect(events).toHaveLength(1);
expect(events[0].payload.reason).toBe('plan_complete');
});
it('should persist phases from plan output', async () => {
const initiative = await harness.createInitiative('Auth System');
const phasesData = [
{ name: 'Foundation' },
{ name: 'Features' },
];
// Persist phases (simulating what would happen after plan)
const created = await harness.createPhasesFromPlan(initiative.id, phasesData);
expect(created).toHaveLength(2);
// Verify retrieval
const phases = await harness.getPhases(initiative.id);
expect(phases).toHaveLength(2);
expect(phases[0].name).toBe('Foundation');
expect(phases[1].name).toBe('Features');
});
});
describe('plan conflict detection', () => {
it('should reject if a plan agent is already running', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Auth System');
// Set up a long-running plan agent (never completes during this test)
harness.setArchitectPlanComplete('first-plan', [
{ number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
]);
// Use a delay so it stays running
harness.setAgentScenario('first-plan', { status: 'done', delay: 999999 });
await harness.caller.spawnArchitectPlan({
name: 'first-plan',
initiativeId: initiative.id,
});
// Agent should be running
const agents = await harness.caller.listAgents();
expect(agents.find(a => a.name === 'first-plan')?.status).toBe('running');
// Second plan should be rejected
await expect(
harness.caller.spawnArchitectPlan({
name: 'second-plan',
initiativeId: initiative.id,
}),
).rejects.toThrow(/already running/);
});
it('should auto-dismiss stale plan agents before checking', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Auth System');
// Set up a plan agent that crashes immediately
harness.setAgentScenario('stale-plan', { status: 'error', error: 'crashed' });
await harness.caller.spawnArchitectPlan({
name: 'stale-plan',
initiativeId: initiative.id,
});
await harness.advanceTimers();
// Should be crashed
const agents = await harness.caller.listAgents();
expect(agents.find(a => a.name === 'stale-plan')?.status).toBe('crashed');
// New plan should succeed (stale one gets auto-dismissed)
harness.setArchitectPlanComplete('new-plan', [
{ number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
]);
const agent = await harness.caller.spawnArchitectPlan({
name: 'new-plan',
initiativeId: initiative.id,
});
expect(agent.mode).toBe('plan');
});
it('should allow plan for different initiatives', async () => {
vi.useFakeTimers();
const init1 = await harness.createInitiative('Initiative 1');
const init2 = await harness.createInitiative('Initiative 2');
// Long-running agent on initiative 1
harness.setAgentScenario('plan-1', { status: 'done', delay: 999999 });
await harness.caller.spawnArchitectPlan({
name: 'plan-1',
initiativeId: init1.id,
});
// Plan on initiative 2 should succeed
harness.setArchitectPlanComplete('plan-2', [
{ number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
]);
const agent = await harness.caller.spawnArchitectPlan({
name: 'plan-2',
initiativeId: init2.id,
});
expect(agent.mode).toBe('plan');
});
});
describe('full workflow', () => {
it('should complete discuss -> plan -> phases workflow', async () => {
vi.useFakeTimers();
// 1. Create initiative
const initiative = await harness.createInitiative('Full Workflow Test');
// 2. Discuss phase
harness.setArchitectDiscussComplete('discuss-agent', [
{ topic: 'Scope', decision: 'MVP only', reason: 'Time constraint' },
], 'Scope defined');
await harness.caller.spawnArchitectDiscuss({
name: 'discuss-agent',
initiativeId: initiative.id,
});
await harness.advanceTimers();
// 3. Plan phase
harness.setArchitectPlanComplete('plan-agent', [
{ number: 1, name: 'Core', description: 'Core functionality', dependencies: [] },
{ number: 2, name: 'Polish', description: 'UI and UX', dependencies: [1] },
]);
await harness.caller.spawnArchitectPlan({
name: 'plan-agent',
initiativeId: initiative.id,
contextSummary: 'MVP scope defined',
});
await harness.advanceTimers();
// 4. Persist phases
await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Core' },
{ name: 'Polish' },
]);
// 5. Verify final state
const phases = await harness.getPhases(initiative.id);
expect(phases).toHaveLength(2);
// Both agents should be idle
const agents = await harness.caller.listAgents();
expect(agents.filter(a => a.status === 'idle')).toHaveLength(2);
});
});
});

View File

@@ -0,0 +1,385 @@
/**
* E2E Tests for Detail Workflow
*
* Tests the complete detail workflow from phase through task creation:
* - Detail mode: Break phase into executable tasks
* - Q&A flow: Handle clarifying questions during detailing
* - Task persistence: Save child tasks from detail output
*
* Uses TestHarness from src/test/ for full system wiring.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { createTestHarness, type TestHarness } from '../index.js';
import type { AgentStoppedEvent, AgentWaitingEvent } from '../../events/types.js';
describe('Detail Workflow E2E', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
describe('spawn detail agent', () => {
it('should spawn agent in detail mode and complete with tasks', async () => {
vi.useFakeTimers();
// Setup: Create initiative -> phase -> plan
const initiative = await harness.createInitiative('Test Project');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan', 'Implement authentication');
// Set detail scenario
harness.setArchitectDetailComplete('detailer', [
{ number: 1, name: 'Create schema', content: 'User table', type: 'auto', dependencies: [] },
{ number: 2, name: 'Create endpoint', content: 'Login API', type: 'auto', dependencies: [1] },
]);
// Spawn detail agent
const agent = await harness.caller.spawnArchitectDetail({
name: 'detailer',
phaseId: phases[0].id,
});
expect(agent.mode).toBe('detail');
// Advance timers for async completion
await harness.advanceTimers();
// Verify agent completed
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
expect(events).toHaveLength(1);
expect(events[0].payload.name).toBe('detailer');
expect(events[0].payload.reason).toBe('detail_complete');
});
it('should pause on questions and resume', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Test Project');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
const detailTask = await harness.createDetailTask(phases[0].id, 'Complex Plan');
// Set questions scenario
harness.setArchitectDetailQuestions('detailer', [
{ id: 'q1', question: 'How granular should tasks be?' },
]);
const agent = await harness.caller.spawnArchitectDetail({
name: 'detailer',
phaseId: phases[0].id,
});
await harness.advanceTimers();
// Verify agent is waiting for input
const waitingAgent = await harness.caller.getAgent({ name: 'detailer' });
expect(waitingAgent?.status).toBe('waiting_for_input');
// Verify paused on questions (emits agent:waiting, not agent:stopped)
const waitingEvents = harness.getEmittedEvents('agent:waiting') as AgentWaitingEvent[];
expect(waitingEvents).toHaveLength(1);
expect(waitingEvents[0].payload.questions).toHaveLength(1);
// Get pending questions
const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
expect(pending?.questions).toHaveLength(1);
expect(pending?.questions[0].question).toBe('How granular should tasks be?');
// Set completion scenario for resume
harness.setArchitectDetailComplete('detailer', [
{ number: 1, name: 'Task 1', content: 'Single task', type: 'auto', dependencies: [] },
]);
// Resume with answer
await harness.caller.resumeAgent({
name: 'detailer',
answers: { q1: 'Very granular' },
});
await harness.advanceTimers();
// Verify completed after resume
const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
expect(finalAgent?.status).toBe('idle');
});
it('should handle multiple questions', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Multi-Q Project');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
const detailTask = await harness.createDetailTask(phases[0].id, 'Complex Plan');
// Set multiple questions scenario
harness.setArchitectDetailQuestions('detailer', [
{ id: 'q1', question: 'What task granularity?', options: [{ label: 'Fine' }, { label: 'Coarse' }] },
{ id: 'q2', question: 'Include checkpoints?' },
{ id: 'q3', question: 'Any blocking dependencies?' },
]);
const agent = await harness.caller.spawnArchitectDetail({
name: 'detailer',
phaseId: phases[0].id,
});
await harness.advanceTimers();
// Verify all questions received
const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
expect(pending?.questions).toHaveLength(3);
// Set completion scenario for resume
harness.setArchitectDetailComplete('detailer', [
{ number: 1, name: 'Task 1', content: 'First task', type: 'auto', dependencies: [] },
{ number: 2, name: 'Task 2', content: 'Second task', type: 'auto', dependencies: [1] },
{ number: 3, name: 'Verify', content: 'Verify all', type: 'checkpoint:human-verify', dependencies: [2] },
]);
// Resume with all answers
await harness.caller.resumeAgent({
name: 'detailer',
answers: {
q1: 'Fine',
q2: 'Yes, add human verification',
q3: 'Tasks 1 and 2 are sequential',
},
});
await harness.advanceTimers();
// Verify completed
const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
expect(finalAgent?.status).toBe('idle');
});
});
describe('detail conflict detection', () => {
it('should reject if a detail agent is already running for the same phase', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Test Project');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
// Long-running detail agent
harness.setAgentScenario('detailer-1', { status: 'done', delay: 999999 });
await harness.caller.spawnArchitectDetail({
name: 'detailer-1',
phaseId: phases[0].id,
});
// Second detail for same phase should be rejected
await expect(
harness.caller.spawnArchitectDetail({
name: 'detailer-2',
phaseId: phases[0].id,
}),
).rejects.toThrow(/already running/);
});
it('should auto-dismiss stale detail agents before checking', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Test Project');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
// Detail agent that crashes immediately
harness.setAgentScenario('stale-detailer', { status: 'error', error: 'crashed' });
await harness.caller.spawnArchitectDetail({
name: 'stale-detailer',
phaseId: phases[0].id,
});
await harness.advanceTimers();
// New detail should succeed
harness.setArchitectDetailComplete('new-detailer', [
{ number: 1, name: 'Task 1', content: 'Do it', type: 'auto', dependencies: [] },
]);
const agent = await harness.caller.spawnArchitectDetail({
name: 'new-detailer',
phaseId: phases[0].id,
});
expect(agent.mode).toBe('detail');
});
it('should allow detail for different phases simultaneously', async () => {
vi.useFakeTimers();
const initiative = await harness.createInitiative('Test Project');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
{ name: 'Phase 2' },
]);
// Long-running agent on phase 1
harness.setAgentScenario('detailer-p1', { status: 'done', delay: 999999 });
await harness.caller.spawnArchitectDetail({
name: 'detailer-p1',
phaseId: phases[0].id,
});
// Detail on phase 2 should succeed
harness.setArchitectDetailComplete('detailer-p2', [
{ number: 1, name: 'Task 1', content: 'Do it', type: 'auto', dependencies: [] },
]);
const agent = await harness.caller.spawnArchitectDetail({
name: 'detailer-p2',
phaseId: phases[1].id,
});
expect(agent.mode).toBe('detail');
});
});
describe('task persistence', () => {
it('should create tasks from detail output', async () => {
const initiative = await harness.createInitiative('Test Project');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan');
// Create tasks from detail output
await harness.caller.createChildTasks({
parentTaskId: detailTask.id,
tasks: [
{ number: 1, name: 'Schema', description: 'Create tables', type: 'auto', dependencies: [] },
{ number: 2, name: 'API', description: 'Create endpoints', type: 'auto', dependencies: [1] },
{ number: 3, name: 'Verify', description: 'Test flow', type: 'checkpoint:human-verify', dependencies: [2] },
],
});
// Verify tasks created
const tasks = await harness.getChildTasks(detailTask.id);
expect(tasks).toHaveLength(3);
expect(tasks[0].name).toBe('Schema');
expect(tasks[1].name).toBe('API');
expect(tasks[2].name).toBe('Verify');
expect(tasks[2].type).toBe('checkpoint:human-verify');
});
it('should handle all task types', async () => {
const initiative = await harness.createInitiative('Task Types Test');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
const detailTask = await harness.createDetailTask(phases[0].id, 'Mixed Tasks');
// Create tasks with all types
await harness.caller.createChildTasks({
parentTaskId: detailTask.id,
tasks: [
{ number: 1, name: 'Auto Task', description: 'Automated work', type: 'auto' },
{ number: 2, name: 'Human Verify', description: 'Visual check', type: 'checkpoint:human-verify', dependencies: [1] },
{ number: 3, name: 'Decision', description: 'Choose approach', type: 'checkpoint:decision', dependencies: [2] },
{ number: 4, name: 'Human Action', description: 'Manual step', type: 'checkpoint:human-action', dependencies: [3] },
],
});
const tasks = await harness.getChildTasks(detailTask.id);
expect(tasks).toHaveLength(4);
expect(tasks[0].type).toBe('auto');
expect(tasks[1].type).toBe('checkpoint:human-verify');
expect(tasks[2].type).toBe('checkpoint:decision');
expect(tasks[3].type).toBe('checkpoint:human-action');
});
it('should create task dependencies', async () => {
const initiative = await harness.createInitiative('Dependencies Test');
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Phase 1' },
]);
const detailTask = await harness.createDetailTask(phases[0].id, 'Dependent Tasks');
// Create tasks with complex dependencies
await harness.caller.createChildTasks({
parentTaskId: detailTask.id,
tasks: [
{ number: 1, name: 'Task A', description: 'No deps', type: 'auto' },
{ number: 2, name: 'Task B', description: 'Depends on A', type: 'auto', dependencies: [1] },
{ number: 3, name: 'Task C', description: 'Depends on A', type: 'auto', dependencies: [1] },
{ number: 4, name: 'Task D', description: 'Depends on B and C', type: 'auto', dependencies: [2, 3] },
],
});
const tasks = await harness.getChildTasks(detailTask.id);
expect(tasks).toHaveLength(4);
// All tasks should be created with correct names
expect(tasks.map(t => t.name)).toEqual(['Task A', 'Task B', 'Task C', 'Task D']);
});
});
describe('full detail workflow', () => {
it('should complete initiative -> phase -> plan -> detail -> tasks workflow', async () => {
vi.useFakeTimers();
// 1. Create initiative
const initiative = await harness.createInitiative('Full Workflow Test');
// 2. Create phase
const phases = await harness.createPhasesFromPlan(initiative.id, [
{ name: 'Auth Phase' },
]);
// 3. Create plan
const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan', 'Implement JWT auth');
// 4. Spawn detail agent
harness.setArchitectDetailComplete('detailer', [
{ number: 1, name: 'Create user schema', content: 'Define User model', type: 'auto', dependencies: [] },
{ number: 2, name: 'Implement JWT', content: 'Token generation', type: 'auto', dependencies: [1] },
{ number: 3, name: 'Protected routes', content: 'Middleware', type: 'auto', dependencies: [2] },
{ number: 4, name: 'Verify auth', content: 'Test login flow', type: 'checkpoint:human-verify', dependencies: [3] },
]);
await harness.caller.spawnArchitectDetail({
name: 'detailer',
phaseId: phases[0].id,
});
await harness.advanceTimers();
// 5. Verify agent completed
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
expect(events).toHaveLength(1);
expect(events[0].payload.reason).toBe('detail_complete');
// 6. Persist tasks (simulating what orchestrator would do after detail)
await harness.caller.createChildTasks({
parentTaskId: detailTask.id,
tasks: [
{ number: 1, name: 'Create user schema', description: 'Define User model', type: 'auto', dependencies: [] },
{ number: 2, name: 'Implement JWT', description: 'Token generation', type: 'auto', dependencies: [1] },
{ number: 3, name: 'Protected routes', description: 'Middleware', type: 'auto', dependencies: [2] },
{ number: 4, name: 'Verify auth', description: 'Test login flow', type: 'checkpoint:human-verify', dependencies: [3] },
],
});
// 7. Verify final state
const tasks = await harness.getChildTasks(detailTask.id);
expect(tasks).toHaveLength(4);
expect(tasks[0].name).toBe('Create user schema');
expect(tasks[3].type).toBe('checkpoint:human-verify');
// Agent should be idle
const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
expect(finalAgent?.status).toBe('idle');
});
});
});

View File

@@ -0,0 +1,426 @@
/**
* E2E Tests for Edge Cases
*
* Tests edge case scenarios in dispatch/coordination flow:
* - Agent crashes during task
* - Agent waiting for input
* - Task blocking
* - Merge conflicts
*
* Uses TestHarness from src/test/ for full system wiring.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
createTestHarness,
SIMPLE_FIXTURE,
type TestHarness,
} from '../index.js';
import type {
AgentSpawnedEvent,
AgentCrashedEvent,
AgentWaitingEvent,
TaskBlockedEvent,
MergeConflictedEvent,
} from '../../events/types.js';
describe('E2E Edge Cases', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
describe('Agent crash during task', () => {
it('emits agent:spawned then agent:crashed events', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent for DispatchManager
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set error scenario BEFORE dispatch
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
status: 'error',
error: 'Token limit exceeded',
});
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify: agent:spawned event emitted
const spawnedEvents = harness.getEventsByType('agent:spawned');
expect(spawnedEvents.length).toBe(1);
const spawnedPayload = (spawnedEvents[0] as AgentSpawnedEvent).payload;
expect(spawnedPayload.taskId).toBe(taskAId);
// Verify: agent:crashed event emitted
const crashedEvents = harness.getEventsByType('agent:crashed');
expect(crashedEvents.length).toBe(1);
const crashedPayload = (crashedEvents[0] as AgentCrashedEvent).payload;
expect(crashedPayload.taskId).toBe(taskAId);
expect(crashedPayload.error).toBe('Token limit exceeded');
});
it('task status should NOT be completed after crash', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set error scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
status: 'error',
error: 'Token limit exceeded',
});
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Task status should be 'in_progress' (not 'completed')
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('in_progress');
});
it('captures error message in agent result', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set error scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
status: 'error',
error: 'Out of memory',
});
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Get agent result - should have error
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
expect(agentResult).not.toBeNull();
expect(agentResult?.success).toBe(false);
expect(agentResult?.message).toBe('Out of memory');
});
});
describe('Agent waiting for input and resume', () => {
it('emits agent:waiting event with question', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set questions scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
status: 'questions',
questions: [{ id: 'q1', question: 'Which database should I use?' }],
});
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify: agent:waiting event emitted
const waitingEvents = harness.getEventsByType('agent:waiting');
expect(waitingEvents.length).toBe(1);
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
expect(waitingPayload.taskId).toBe(taskAId);
expect(waitingPayload.questions[0].question).toBe('Which database should I use?');
});
});
describe('Task blocking', () => {
it('blocked task appears in blocked list from getQueueState', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
const queueState = await harness.dispatchManager.getQueueState();
expect(queueState.blocked.length).toBe(1);
expect(queueState.blocked[0].taskId).toBe(taskAId);
expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
});
it('blocked task emits task:blocked event', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
const blockedEvents = harness.getEventsByType('task:blocked');
expect(blockedEvents.length).toBe(1);
const blockedPayload = (blockedEvents[0] as TaskBlockedEvent).payload;
expect(blockedPayload.taskId).toBe(taskAId);
expect(blockedPayload.reason).toBe('Waiting for user decision');
});
it('getNextDispatchable does not return blocked task', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
const taskBId = seeded.tasks.get('Task B')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Queue Task A and block it
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
// Queue Task B (not blocked, but depends on Task A which needs to be completed first)
// Actually Task B depends on Task A in SIMPLE_FIXTURE, but the dependency
// isn't loaded into the queue. Queue a fresh task instead.
// For this test, we just verify blocked task is not returned.
// Get next dispatchable - should be null since Task A is blocked
const next = await harness.dispatchManager.getNextDispatchable();
expect(next).toBeNull();
});
it('task status is set to blocked in database', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('blocked');
});
});
describe('Merge conflict handling', () => {
it('detects merge conflict and emits merge:conflicted event', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Mark task as completed (required for merge)
await harness.taskRepository.update(taskAId, { status: 'completed' });
// Create a worktree for this task
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
// Create agent in agentRepository with worktreeId
// (coordinationManager.queueMerge looks up agent by taskId)
const agent = await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId,
taskId: taskAId,
status: 'idle',
});
// Set up merge conflict result BEFORE processMerges
harness.worktreeManager.setMergeResult(worktreeId, {
success: false,
conflicts: ['src/shared.ts', 'src/types.ts'],
message: 'Merge conflict in 2 files',
});
// Queue for merge
await harness.coordinationManager.queueMerge(taskAId);
harness.clearEvents();
// Process merges - should hit conflict
const results = await harness.coordinationManager.processMerges('main');
// Verify: merge result indicates failure
expect(results.length).toBe(1);
expect(results[0].success).toBe(false);
expect(results[0].conflicts).toEqual(['src/shared.ts', 'src/types.ts']);
// Verify: merge:conflicted event emitted
const conflictEvents = harness.getEventsByType('merge:conflicted');
expect(conflictEvents.length).toBe(1);
const conflictPayload = (conflictEvents[0] as MergeConflictedEvent).payload;
expect(conflictPayload.taskId).toBe(taskAId);
expect(conflictPayload.conflictingFiles).toEqual(['src/shared.ts', 'src/types.ts']);
});
it('conflict appears in queue state as conflicted', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Mark task as completed
await harness.taskRepository.update(taskAId, { status: 'completed' });
// Create worktree
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
// Create agent in agentRepository
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId,
taskId: taskAId,
status: 'idle',
});
// Set up merge conflict
harness.worktreeManager.setMergeResult(worktreeId, {
success: false,
conflicts: ['src/shared.ts'],
message: 'Merge conflict',
});
// Queue and process
await harness.coordinationManager.queueMerge(taskAId);
await harness.coordinationManager.processMerges('main');
// Check queue state
const queueState = await harness.coordinationManager.getQueueState();
expect(queueState.conflicted.length).toBe(1);
expect(queueState.conflicted[0].taskId).toBe(taskAId);
expect(queueState.conflicted[0].conflicts).toContain('src/shared.ts');
});
it('handleConflict creates conflict-resolution task', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Mark task as completed
await harness.taskRepository.update(taskAId, { status: 'completed' });
// Create worktree
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
// Create agent in agentRepository
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId,
taskId: taskAId,
status: 'idle',
});
// Set up merge conflict
harness.worktreeManager.setMergeResult(worktreeId, {
success: false,
conflicts: ['src/shared.ts', 'src/types.ts'],
message: 'Merge conflict',
});
// Queue and process (handleConflict is called automatically)
await harness.coordinationManager.queueMerge(taskAId);
await harness.coordinationManager.processMerges('main');
// Verify: original task is now blocked
const originalTask = await harness.taskRepository.findById(taskAId);
expect(originalTask?.status).toBe('blocked');
// Verify: task:queued event emitted for conflict resolution task
const queuedEvents = harness.getEventsByType('task:queued');
const conflictTaskEvent = queuedEvents.find(
(e) => e.payload && (e.payload as { taskId: string }).taskId !== taskAId
);
expect(conflictTaskEvent).toBeDefined();
});
it('successful merge after clearing conflict result', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
const taskBId = seeded.tasks.get('Task B')!;
// Set up Task A for merge (with conflict)
await harness.taskRepository.update(taskAId, { status: 'completed' });
const worktreeIdA = `wt-${taskAId.slice(0, 6)}`;
await harness.worktreeManager.create(worktreeIdA, 'feature-task-a');
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId: worktreeIdA,
taskId: taskAId,
status: 'idle',
});
// Set conflict for Task A
harness.worktreeManager.setMergeResult(worktreeIdA, {
success: false,
conflicts: ['src/shared.ts'],
message: 'Merge conflict',
});
// Process Task A merge (will conflict)
await harness.coordinationManager.queueMerge(taskAId);
const conflictResults = await harness.coordinationManager.processMerges('main');
expect(conflictResults[0].success).toBe(false);
// Now set up Task B for merge (should succeed)
await harness.taskRepository.update(taskBId, { status: 'completed' });
const worktreeIdB = `wt-${taskBId.slice(0, 6)}`;
await harness.worktreeManager.create(worktreeIdB, 'feature-task-b');
await harness.agentRepository.create({
name: `agent-${taskBId.slice(0, 6)}`,
worktreeId: worktreeIdB,
taskId: taskBId,
status: 'idle',
});
// Task B merge should succeed (default behavior)
await harness.coordinationManager.queueMerge(taskBId);
harness.clearEvents();
const successResults = await harness.coordinationManager.processMerges('main');
// Verify Task B merged successfully
expect(successResults.length).toBe(1);
expect(successResults[0].taskId).toBe(taskBId);
expect(successResults[0].success).toBe(true);
// Verify Task B in merged list
const queueState = await harness.coordinationManager.getQueueState();
expect(queueState.merged).toContain(taskBId);
});
});
});

View File

@@ -0,0 +1,551 @@
/**
* E2E Tests for Extended Scenarios
*
* Tests extended scenarios in dispatch/coordination flow:
* - Conflict hand-back round-trip (conflict -> agent resolves -> merge succeeds)
* - Multi-agent parallel work and completion
*
* Uses TestHarness from src/test/ for full system wiring.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
createTestHarness,
SIMPLE_FIXTURE,
PARALLEL_FIXTURE,
COMPLEX_FIXTURE,
type TestHarness,
} from '../index.js';
import type {
MergeConflictedEvent,
MergeCompletedEvent,
TaskQueuedEvent,
AgentStoppedEvent,
AgentCrashedEvent,
} from '../../events/types.js';
describe('E2E Extended Scenarios', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
// ===========================================================================
// Conflict Hand-back Round-trip
// ===========================================================================
describe('Conflict hand-back round-trip', () => {
it('conflict triggers resolution task, agent resolves, merge succeeds', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Step 1: Complete Task A
await harness.taskRepository.update(taskAId, { status: 'completed' });
// Step 2: Create agent in agentRepository with worktreeId
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId,
taskId: taskAId,
status: 'idle',
});
// Step 3: Create worktree via MockWorktreeManager
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
// Step 4: Set merge conflict result for first merge attempt
harness.worktreeManager.setMergeResult(worktreeId, {
success: false,
conflicts: ['src/shared.ts', 'src/types.ts'],
message: 'Merge conflict in 2 files',
});
// Step 5: Queue and process merge (should fail with conflict)
await harness.coordinationManager.queueMerge(taskAId);
harness.clearEvents();
const conflictResults = await harness.coordinationManager.processMerges('main');
// Verify: merge failed with conflict
expect(conflictResults.length).toBe(1);
expect(conflictResults[0].success).toBe(false);
expect(conflictResults[0].conflicts).toEqual(['src/shared.ts', 'src/types.ts']);
// Verify: merge:conflicted event emitted
const conflictedEvents = harness.getEventsByType('merge:conflicted');
expect(conflictedEvents.length).toBe(1);
const conflictPayload = (conflictedEvents[0] as MergeConflictedEvent).payload;
expect(conflictPayload.taskId).toBe(taskAId);
expect(conflictPayload.conflictingFiles).toEqual(['src/shared.ts', 'src/types.ts']);
// Verify: original task marked blocked
const originalTask = await harness.taskRepository.findById(taskAId);
expect(originalTask?.status).toBe('blocked');
// Note: CoordinationManager.handleConflict updates task status to blocked
// but does not emit task:blocked event (that's emitted by DispatchManager.blockTask)
// Verify: task:queued event emitted for resolution task
const queuedEvents = harness.getEventsByType('task:queued');
const resolutionTaskEvent = queuedEvents.find(
(e) => (e as TaskQueuedEvent).payload.taskId !== taskAId
);
expect(resolutionTaskEvent).toBeDefined();
// Step 6: Clear the merge conflict (setMergeResult to success)
harness.worktreeManager.setMergeResult(worktreeId, {
success: true,
message: 'Merged successfully',
});
// Step 7: Re-queue original task for merge (simulating resolution completed)
// In a real system, the resolution task would fix conflicts and re-queue
// Here we simulate by clearing conflict and re-queuing
await harness.taskRepository.update(taskAId, { status: 'completed' });
harness.clearEvents();
await harness.coordinationManager.queueMerge(taskAId);
const successResults = await harness.coordinationManager.processMerges('main');
// Verify: merge succeeded
expect(successResults.length).toBe(1);
expect(successResults[0].taskId).toBe(taskAId);
expect(successResults[0].success).toBe(true);
// Verify: merge:completed event for original task
const completedEvents = harness.getEventsByType('merge:completed');
expect(completedEvents.length).toBe(1);
const completedPayload = (completedEvents[0] as MergeCompletedEvent).payload;
expect(completedPayload.taskId).toBe(taskAId);
});
it('conflict resolution preserves original task context', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Complete Task A
await harness.taskRepository.update(taskAId, { status: 'completed' });
// Create agent and worktree
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId,
taskId: taskAId,
status: 'idle',
});
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
// Set conflict
harness.worktreeManager.setMergeResult(worktreeId, {
success: false,
conflicts: ['src/conflict-file.ts'],
message: 'Merge conflict',
});
// Process merge to trigger conflict handling
await harness.coordinationManager.queueMerge(taskAId);
harness.clearEvents();
await harness.coordinationManager.processMerges('main');
// Get the resolution task from task:queued events
const queuedEvents = harness.getEventsByType('task:queued');
expect(queuedEvents.length).toBeGreaterThan(0);
// Find resolution task (the one that isn't the original task)
const resolutionTaskQueuedEvent = queuedEvents.find(
(e) => (e as TaskQueuedEvent).payload.taskId !== taskAId
);
expect(resolutionTaskQueuedEvent).toBeDefined();
// Resolution task should exist and link back to original task
const resolutionTaskId = (resolutionTaskQueuedEvent as TaskQueuedEvent).payload.taskId;
const resolutionTask = await harness.taskRepository.findById(resolutionTaskId);
expect(resolutionTask).toBeDefined();
// Resolution task description should contain conflict file info
expect(resolutionTask?.description).toContain('conflict');
});
it('multiple sequential conflicts resolved in order', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
const taskBId = seeded.tasks.get('Task B')!;
// Complete both tasks
await harness.taskRepository.update(taskAId, { status: 'completed' });
await harness.taskRepository.update(taskBId, { status: 'completed' });
// Set up worktrees and agents for both tasks
const worktreeIdA = `wt-${taskAId.slice(0, 6)}`;
const worktreeIdB = `wt-${taskBId.slice(0, 6)}`;
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId: worktreeIdA,
taskId: taskAId,
status: 'idle',
});
await harness.agentRepository.create({
name: `agent-${taskBId.slice(0, 6)}`,
worktreeId: worktreeIdB,
taskId: taskBId,
status: 'idle',
});
await harness.worktreeManager.create(worktreeIdA, 'feature-task-a');
await harness.worktreeManager.create(worktreeIdB, 'feature-task-b');
// Set conflicts for both
harness.worktreeManager.setMergeResult(worktreeIdA, {
success: false,
conflicts: ['src/shared-a.ts'],
message: 'Conflict A',
});
harness.worktreeManager.setMergeResult(worktreeIdB, {
success: false,
conflicts: ['src/shared-b.ts'],
message: 'Conflict B',
});
// Queue both for merge
await harness.coordinationManager.queueMerge(taskAId);
await harness.coordinationManager.queueMerge(taskBId);
harness.clearEvents();
// Process merges - both should fail
const conflictResults = await harness.coordinationManager.processMerges('main');
expect(conflictResults.filter((r) => !r.success).length).toBe(2);
// Verify both are in conflicted state
const queueState = await harness.coordinationManager.getQueueState();
expect(queueState.conflicted.length).toBe(2);
// Resolve Task A's conflict
harness.worktreeManager.setMergeResult(worktreeIdA, {
success: true,
message: 'Merged A',
});
await harness.taskRepository.update(taskAId, { status: 'completed' });
await harness.coordinationManager.queueMerge(taskAId);
harness.clearEvents();
const resultA = await harness.coordinationManager.processMerges('main');
expect(resultA.length).toBe(1);
expect(resultA[0].taskId).toBe(taskAId);
expect(resultA[0].success).toBe(true);
// Verify merge:completed for A
const completedEventsA = harness.getEventsByType('merge:completed');
expect(completedEventsA.length).toBe(1);
expect((completedEventsA[0] as MergeCompletedEvent).payload.taskId).toBe(taskAId);
// Resolve Task B's conflict
harness.worktreeManager.setMergeResult(worktreeIdB, {
success: true,
message: 'Merged B',
});
await harness.taskRepository.update(taskBId, { status: 'completed' });
await harness.coordinationManager.queueMerge(taskBId);
harness.clearEvents();
const resultB = await harness.coordinationManager.processMerges('main');
expect(resultB.length).toBe(1);
expect(resultB[0].taskId).toBe(taskBId);
expect(resultB[0].success).toBe(true);
// Verify merge:completed for B
const completedEventsB = harness.getEventsByType('merge:completed');
expect(completedEventsB.length).toBe(1);
expect((completedEventsB[0] as MergeCompletedEvent).payload.taskId).toBe(taskBId);
// Verify final merged list has both
const finalState = await harness.coordinationManager.getQueueState();
expect(finalState.merged).toContain(taskAId);
expect(finalState.merged).toContain(taskBId);
});
});
// ===========================================================================
// Multi-agent Parallel Work
// ===========================================================================
describe('Multi-agent parallel work', () => {
it('multiple agents complete tasks in parallel', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
const taskXId = seeded.tasks.get('Task X')!;
const taskYId = seeded.tasks.get('Task Y')!;
const taskPId = seeded.tasks.get('Task P')!;
const taskQId = seeded.tasks.get('Task Q')!;
// Pre-seed 3 idle agents
await harness.agentManager.spawn({
name: 'pool-agent-1',
taskId: 'placeholder-1',
prompt: 'placeholder',
});
await harness.agentManager.spawn({
name: 'pool-agent-2',
taskId: 'placeholder-2',
prompt: 'placeholder',
});
await harness.agentManager.spawn({
name: 'pool-agent-3',
taskId: 'placeholder-3',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Queue all 4 tasks
await harness.dispatchManager.queue(taskXId);
await harness.dispatchManager.queue(taskYId);
await harness.dispatchManager.queue(taskPId);
await harness.dispatchManager.queue(taskQId);
harness.clearEvents();
// Dispatch 3 tasks in parallel (3 agents working)
const result1 = await harness.dispatchManager.dispatchNext();
const result2 = await harness.dispatchManager.dispatchNext();
const result3 = await harness.dispatchManager.dispatchNext();
expect(result1.success).toBe(true);
expect(result2.success).toBe(true);
expect(result3.success).toBe(true);
// All 3 should be dispatched to different agents
const dispatchedIds = [result1.agentId, result2.agentId, result3.agentId];
expect(new Set(dispatchedIds).size).toBe(3);
// Advance timers to complete all 3 agents
await harness.advanceTimers();
// Verify: 3 agent:stopped events
const stoppedEvents = harness.getEventsByType('agent:stopped');
expect(stoppedEvents.length).toBe(3);
// Complete all 3 tasks
await harness.dispatchManager.completeTask(result1.taskId!);
await harness.dispatchManager.completeTask(result2.taskId!);
await harness.dispatchManager.completeTask(result3.taskId!);
// Dispatch remaining task (Task Q)
const result4 = await harness.dispatchManager.dispatchNext();
expect(result4.success).toBe(true);
await harness.advanceTimers();
await harness.dispatchManager.completeTask(result4.taskId!);
// Verify: all 4 tasks completed in database
const tasks = await Promise.all([
harness.taskRepository.findById(taskXId),
harness.taskRepository.findById(taskYId),
harness.taskRepository.findById(taskPId),
harness.taskRepository.findById(taskQId),
]);
expect(tasks.every((t) => t?.status === 'completed')).toBe(true);
});
it('parallel merges process in correct dependency order', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
const task1AId = seeded.tasks.get('Task 1A')!;
const task1BId = seeded.tasks.get('Task 1B')!;
const task2AId = seeded.tasks.get('Task 2A')!;
const task3AId = seeded.tasks.get('Task 3A')!;
const task4AId = seeded.tasks.get('Task 4A')!;
// Complete Task 1A and Task 1B (no dependencies)
await harness.taskRepository.update(task1AId, { status: 'completed' });
await harness.taskRepository.update(task1BId, { status: 'completed' });
// Set up worktrees and agents for both
const wt1A = `wt-${task1AId.slice(0, 6)}`;
const wt1B = `wt-${task1BId.slice(0, 6)}`;
await harness.agentRepository.create({
name: `agent-${task1AId.slice(0, 6)}`,
worktreeId: wt1A,
taskId: task1AId,
status: 'idle',
});
await harness.agentRepository.create({
name: `agent-${task1BId.slice(0, 6)}`,
worktreeId: wt1B,
taskId: task1BId,
status: 'idle',
});
await harness.worktreeManager.create(wt1A, 'feature-1a');
await harness.worktreeManager.create(wt1B, 'feature-1b');
// Queue both for merge
await harness.coordinationManager.queueMerge(task1AId);
await harness.coordinationManager.queueMerge(task1BId);
harness.clearEvents();
// Process merges - both should succeed (no dependencies between them)
const results1 = await harness.coordinationManager.processMerges('main');
expect(results1.length).toBe(2);
expect(results1.every((r) => r.success)).toBe(true);
// Verify: merge:completed for both in same batch
const completed1 = harness.getEventsByType('merge:completed');
expect(completed1.length).toBe(2);
// Complete Task 2A (depends on 1A) and Task 3A (depends on 1B)
await harness.taskRepository.update(task2AId, { status: 'completed' });
await harness.taskRepository.update(task3AId, { status: 'completed' });
const wt2A = `wt-${task2AId.slice(0, 6)}`;
const wt3A = `wt-${task3AId.slice(0, 6)}`;
await harness.agentRepository.create({
name: `agent-${task2AId.slice(0, 6)}`,
worktreeId: wt2A,
taskId: task2AId,
status: 'idle',
});
await harness.agentRepository.create({
name: `agent-${task3AId.slice(0, 6)}`,
worktreeId: wt3A,
taskId: task3AId,
status: 'idle',
});
await harness.worktreeManager.create(wt2A, 'feature-2a');
await harness.worktreeManager.create(wt3A, 'feature-3a');
// Queue and merge
await harness.coordinationManager.queueMerge(task2AId);
await harness.coordinationManager.queueMerge(task3AId);
harness.clearEvents();
const results2 = await harness.coordinationManager.processMerges('main');
expect(results2.length).toBe(2);
expect(results2.every((r) => r.success)).toBe(true);
// Complete Task 4A (depends on 2A and 3A)
await harness.taskRepository.update(task4AId, { status: 'completed' });
const wt4A = `wt-${task4AId.slice(0, 6)}`;
await harness.agentRepository.create({
name: `agent-${task4AId.slice(0, 6)}`,
worktreeId: wt4A,
taskId: task4AId,
status: 'idle',
});
await harness.worktreeManager.create(wt4A, 'feature-4a');
// Queue and merge
await harness.coordinationManager.queueMerge(task4AId);
harness.clearEvents();
const results3 = await harness.coordinationManager.processMerges('main');
expect(results3.length).toBe(1);
expect(results3[0].taskId).toBe(task4AId);
expect(results3[0].success).toBe(true);
// Verify: final merge order respects dependency graph
const finalState = await harness.coordinationManager.getQueueState();
expect(finalState.merged).toContain(task1AId);
expect(finalState.merged).toContain(task1BId);
expect(finalState.merged).toContain(task2AId);
expect(finalState.merged).toContain(task3AId);
expect(finalState.merged).toContain(task4AId);
});
it('parallel dispatch with mixed outcomes', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
const taskXId = seeded.tasks.get('Task X')!;
const taskYId = seeded.tasks.get('Task Y')!;
// Pre-seed 2 agents
await harness.agentManager.spawn({
name: 'pool-agent-1',
taskId: 'placeholder-1',
prompt: 'placeholder',
});
await harness.agentManager.spawn({
name: 'pool-agent-2',
taskId: 'placeholder-2',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set Task X to succeed, Task Y to crash
harness.setAgentDone(`agent-${taskXId.slice(0, 6)}`, 'Task X completed');
harness.setAgentError(`agent-${taskYId.slice(0, 6)}`, 'Out of memory error');
// Queue both tasks
await harness.dispatchManager.queue(taskXId);
await harness.dispatchManager.queue(taskYId);
harness.clearEvents();
// Dispatch both tasks
const result1 = await harness.dispatchManager.dispatchNext();
const result2 = await harness.dispatchManager.dispatchNext();
// Both should dispatch successfully
expect(result1.success).toBe(true);
expect(result2.success).toBe(true);
// Run timers to complete agents
await harness.advanceTimers();
// Verify: one agent:stopped, one agent:crashed
const stoppedEvents = harness.getEventsByType('agent:stopped');
const crashedEvents = harness.getEventsByType('agent:crashed');
expect(stoppedEvents.length).toBe(1);
expect(crashedEvents.length).toBe(1);
// Identify which task succeeded and which crashed
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
const crashedPayload = (crashedEvents[0] as AgentCrashedEvent).payload;
// Find the successful task
const successTaskId = stoppedPayload.taskId;
const crashedTaskId = crashedPayload.taskId;
// Complete the successful task
await harness.dispatchManager.completeTask(successTaskId!);
// Verify: completed task is actually completed
const completedTask = await harness.taskRepository.findById(successTaskId!);
expect(completedTask?.status).toBe('completed');
// Verify: crashed task stays in_progress
const inProgressTask = await harness.taskRepository.findById(crashedTaskId!);
expect(inProgressTask?.status).toBe('in_progress');
// Verify: completed task can merge (set up infrastructure)
const wtSuccess = `wt-${successTaskId!.slice(0, 6)}`;
await harness.agentRepository.create({
name: `merge-agent-${successTaskId!.slice(0, 6)}`,
worktreeId: wtSuccess,
taskId: successTaskId!,
status: 'idle',
});
await harness.worktreeManager.create(wtSuccess, 'feature-success');
await harness.coordinationManager.queueMerge(successTaskId!);
const mergeResults = await harness.coordinationManager.processMerges('main');
expect(mergeResults.length).toBe(1);
expect(mergeResults[0].success).toBe(true);
});
});
});

View File

@@ -0,0 +1,437 @@
/**
* E2E Happy Path Tests
*
* Tests proving core dispatch/coordination flow works end-to-end
* using the TestHarness with mocked agents and worktrees.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
createTestHarness,
SIMPLE_FIXTURE,
PARALLEL_FIXTURE,
COMPLEX_FIXTURE,
type TestHarness,
} from '../index.js';
describe('E2E Happy Path', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
// ===========================================================================
// Scenario 1: Single Task Flow
// ===========================================================================
describe('Single task flow', () => {
it('completes a single task from queue to completion', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed idle agent (required by DispatchManager before spawning new ones)
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Step 1: Queue task
await harness.dispatchManager.queue(taskAId);
// Verify task:queued event
const queuedEvents = harness.getEventsByType('task:queued');
expect(queuedEvents.length).toBe(1);
expect((queuedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
// Step 2: Dispatch task
const dispatchResult = await harness.dispatchManager.dispatchNext();
expect(dispatchResult.success).toBe(true);
expect(dispatchResult.taskId).toBe(taskAId);
expect(dispatchResult.agentId).toBeDefined();
// Verify task:dispatched event
const dispatchedEvents = harness.getEventsByType('task:dispatched');
expect(dispatchedEvents.length).toBe(1);
expect((dispatchedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
// Verify agent:spawned event
const spawnedEvents = harness.getEventsByType('agent:spawned');
expect(spawnedEvents.length).toBe(1);
// Step 3: Wait for agent completion
await harness.advanceTimers();
// Verify agent:stopped event
const stoppedEvents = harness.getEventsByType('agent:stopped');
expect(stoppedEvents.length).toBe(1);
// Step 4: Mark task complete
await harness.dispatchManager.completeTask(taskAId);
// Verify task status in database
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('completed');
});
});
// ===========================================================================
// Scenario 2: Sequential Dependencies
// ===========================================================================
describe('Sequential dependencies', () => {
it('dispatches tasks in priority order (dependency ordering via task status)', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
const taskBId = seeded.tasks.get('Task B')!;
const taskCId = seeded.tasks.get('Task C')!;
// Pre-seed idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Queue all three tasks
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.queue(taskBId);
await harness.dispatchManager.queue(taskCId);
harness.clearEvents();
// All three tasks are queued
const queueState = await harness.dispatchManager.getQueueState();
expect(queueState.queued.length).toBe(3);
// First dispatchNext: Task A (high priority) dispatches first
const nextTask = await harness.dispatchManager.getNextDispatchable();
expect(nextTask).not.toBeNull();
expect(nextTask!.taskId).toBe(taskAId); // High priority first
// All tasks are "ready" in current implementation (dependency loading TBD)
const readyTaskIds = queueState.ready.map((t) => t.taskId);
expect(readyTaskIds).toContain(taskAId);
// Dispatch Task A
const dispatchResult = await harness.dispatchManager.dispatchNext();
expect(dispatchResult.success).toBe(true);
expect(dispatchResult.taskId).toBe(taskAId);
// Wait for agent completion
await harness.advanceTimers();
// Complete Task A
await harness.dispatchManager.completeTask(taskAId);
// Verify Task A removed from queue, B and C remain
const queueStateAfter = await harness.dispatchManager.getQueueState();
const remainingTaskIds = queueStateAfter.queued.map((t) => t.taskId);
expect(remainingTaskIds).not.toContain(taskAId);
expect(remainingTaskIds).toContain(taskBId);
expect(remainingTaskIds).toContain(taskCId);
// Task A marked completed in database
const taskA = await harness.taskRepository.findById(taskAId);
expect(taskA?.status).toBe('completed');
});
});
// ===========================================================================
// Scenario 3: Parallel Dispatch
// ===========================================================================
describe('Parallel dispatch', () => {
it('dispatches multiple independent tasks to multiple agents', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
const taskXId = seeded.tasks.get('Task X')!;
const taskYId = seeded.tasks.get('Task Y')!;
const taskPId = seeded.tasks.get('Task P')!;
const taskQId = seeded.tasks.get('Task Q')!;
// Pre-seed 2 idle agents
await harness.agentManager.spawn({
name: 'pool-agent-1',
taskId: 'placeholder-1',
prompt: 'placeholder',
});
await harness.agentManager.spawn({
name: 'pool-agent-2',
taskId: 'placeholder-2',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Queue all 4 tasks
await harness.dispatchManager.queue(taskXId);
await harness.dispatchManager.queue(taskYId);
await harness.dispatchManager.queue(taskPId);
await harness.dispatchManager.queue(taskQId);
harness.clearEvents();
// All 4 tasks should be dispatchable (no dependencies)
const queueState = await harness.dispatchManager.getQueueState();
expect(queueState.ready.length).toBe(4);
// Dispatch first task
const result1 = await harness.dispatchManager.dispatchNext();
expect(result1.success).toBe(true);
// Dispatch second task (parallel)
const result2 = await harness.dispatchManager.dispatchNext();
expect(result2.success).toBe(true);
// Verify both agents assigned different tasks
expect(result1.taskId).not.toBe(result2.taskId);
expect(result1.agentId).not.toBe(result2.agentId);
// Both dispatches succeeded
const dispatchedEvents = harness.getEventsByType('task:dispatched');
expect(dispatchedEvents.length).toBe(2);
});
});
// ===========================================================================
// Scenario 4: Full Merge Flow
// ===========================================================================
describe('Full merge flow', () => {
it('queues and processes merge after task completion', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed idle agent in MockAgentManager
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Queue and dispatch task
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
expect(dispatchResult.success).toBe(true);
// Wait for agent completion
await harness.advanceTimers();
// Complete task
await harness.dispatchManager.completeTask(taskAId);
harness.clearEvents();
// Create agent in database (CoordinationManager.queueMerge requires it)
// This bridges the gap between MockAgentManager (in-memory) and AgentRepository (database)
const worktreeId = `worktree-${taskAId.slice(0, 8)}`;
const agent = await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
taskId: taskAId,
worktreeId,
status: 'idle',
});
// Create worktree for merge
await harness.worktreeManager.create(worktreeId, `feature-${taskAId.slice(0, 6)}`);
// Queue merge
await harness.coordinationManager.queueMerge(taskAId);
// Verify merge:queued event
const mergeQueuedEvents = harness.getEventsByType('merge:queued');
expect(mergeQueuedEvents.length).toBe(1);
// Process merges
const mergeResults = await harness.coordinationManager.processMerges('main');
expect(mergeResults.length).toBe(1);
expect(mergeResults[0].taskId).toBe(taskAId);
expect(mergeResults[0].success).toBe(true);
// Verify merge:completed event
const mergeCompletedEvents = harness.getEventsByType('merge:completed');
expect(mergeCompletedEvents.length).toBe(1);
});
});
// ===========================================================================
// Scenario 5: Complex Dependency Flow
// ===========================================================================
describe('Complex dependency flow', () => {
it('handles multi-level dependency graph with COMPLEX_FIXTURE', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
// Get all task IDs
const task1AId = seeded.tasks.get('Task 1A')!;
const task1BId = seeded.tasks.get('Task 1B')!;
const task2AId = seeded.tasks.get('Task 2A')!;
const task3AId = seeded.tasks.get('Task 3A')!;
const task4AId = seeded.tasks.get('Task 4A')!;
// Pre-seed idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Queue all 5 tasks
await harness.dispatchManager.queue(task1AId);
await harness.dispatchManager.queue(task1BId);
await harness.dispatchManager.queue(task2AId);
await harness.dispatchManager.queue(task3AId);
await harness.dispatchManager.queue(task4AId);
harness.clearEvents();
// Verify all 5 tasks are queued
const initialState = await harness.dispatchManager.getQueueState();
expect(initialState.queued.length).toBe(5);
// Only tasks with no dependencies are ready:
// - Task 1A: no deps -> READY
// - Task 1B: no deps -> READY
// - Task 2A: depends on 1A -> NOT READY
// - Task 3A: depends on 1B -> NOT READY
// - Task 4A: depends on 2A, 3A -> NOT READY
expect(initialState.ready.length).toBe(2);
// First dispatch: Task 1A (high priority, first queued)
const result1 = await harness.dispatchManager.dispatchNext();
expect(result1.success).toBe(true);
expect(result1.taskId).toBe(task1AId);
// Wait for agent completion
await harness.advanceTimers();
// Complete Task 1A
await harness.dispatchManager.completeTask(task1AId);
// Verify Task 1A completed in database
const task1A = await harness.taskRepository.findById(task1AId);
expect(task1A?.status).toBe('completed');
// 4 tasks remain in queue
const afterFirstState = await harness.dispatchManager.getQueueState();
expect(afterFirstState.queued.length).toBe(4);
// Dispatch and complete remaining tasks one by one
// Task 1B (high priority among remaining)
const result2 = await harness.dispatchManager.dispatchNext();
expect(result2.success).toBe(true);
await harness.advanceTimers();
await harness.dispatchManager.completeTask(result2.taskId!);
// 3 tasks remain
const midState = await harness.dispatchManager.getQueueState();
expect(midState.queued.length).toBe(3);
// Continue dispatching remaining tasks
const result3 = await harness.dispatchManager.dispatchNext();
expect(result3.success).toBe(true);
await harness.advanceTimers();
await harness.dispatchManager.completeTask(result3.taskId!);
const result4 = await harness.dispatchManager.dispatchNext();
expect(result4.success).toBe(true);
await harness.advanceTimers();
await harness.dispatchManager.completeTask(result4.taskId!);
const result5 = await harness.dispatchManager.dispatchNext();
expect(result5.success).toBe(true);
await harness.advanceTimers();
await harness.dispatchManager.completeTask(result5.taskId!);
// All tasks completed
const finalState = await harness.dispatchManager.getQueueState();
expect(finalState.queued.length).toBe(0);
// Verify all 5 tasks completed in database
const allTasks = await Promise.all([
harness.taskRepository.findById(task1AId),
harness.taskRepository.findById(task1BId),
harness.taskRepository.findById(task2AId),
harness.taskRepository.findById(task3AId),
harness.taskRepository.findById(task4AId),
]);
expect(allTasks.every((t) => t?.status === 'completed')).toBe(true);
// Verify event sequence: 5 task:dispatched, 5 task:completed
const dispatchedEvents = harness.getEventsByType('task:dispatched');
expect(dispatchedEvents.length).toBe(5);
const completedEvents = harness.getEventsByType('task:completed');
expect(completedEvents.length).toBe(5);
});
it('fixture dependencies are stored correctly in database', async () => {
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
// Get task IDs
const task1AId = seeded.tasks.get('Task 1A')!;
const task1BId = seeded.tasks.get('Task 1B')!;
const task2AId = seeded.tasks.get('Task 2A')!;
const task3AId = seeded.tasks.get('Task 3A')!;
const task4AId = seeded.tasks.get('Task 4A')!;
// Query task_dependencies directly to verify fixture setup
const { taskDependencies } = await import('../../db/schema.js');
const { eq } = await import('drizzle-orm');
// Task 2A should depend on Task 1A
const task2ADeps = await harness.db
.select()
.from(taskDependencies)
.where(eq(taskDependencies.taskId, task2AId));
expect(task2ADeps.length).toBe(1);
expect(task2ADeps[0].dependsOnTaskId).toBe(task1AId);
// Task 3A should depend on Task 1B
const task3ADeps = await harness.db
.select()
.from(taskDependencies)
.where(eq(taskDependencies.taskId, task3AId));
expect(task3ADeps.length).toBe(1);
expect(task3ADeps[0].dependsOnTaskId).toBe(task1BId);
// Task 4A should depend on both Task 2A and Task 3A
const task4ADeps = await harness.db
.select()
.from(taskDependencies)
.where(eq(taskDependencies.taskId, task4AId));
expect(task4ADeps.length).toBe(2);
const depIds = task4ADeps.map((d) => d.dependsOnTaskId);
expect(depIds).toContain(task2AId);
expect(depIds).toContain(task3AId);
// Tasks 1A and 1B should have no dependencies
const task1ADeps = await harness.db
.select()
.from(taskDependencies)
.where(eq(taskDependencies.taskId, task1AId));
expect(task1ADeps.length).toBe(0);
const task1BDeps = await harness.db
.select()
.from(taskDependencies)
.where(eq(taskDependencies.taskId, task1BId));
expect(task1BDeps.length).toBe(0);
});
});
});

View File

@@ -0,0 +1,12 @@
/**
* E2E Tests for Dispatch/Coordination Flows
*
* Test files:
* - happy-path.test.ts: Normal operation scenarios
* - edge-cases.test.ts: Error handling and edge cases
*
* Uses TestHarness from src/test/ for system wiring.
*/
// No exports needed - tests are self-contained
export {};

View File

@@ -0,0 +1,480 @@
/**
* E2E Tests for Phase Parallel Execution
*
* Tests proving phase dispatch/coordination flow works end-to-end
* using the TestHarness with phaseDispatchManager.
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { createTestHarness, type TestHarness } from '../index.js';
import type {
PhaseQueuedEvent,
PhaseStartedEvent,
PhaseCompletedEvent,
PhaseBlockedEvent,
} from '../../events/types.js';
describe('Phase Parallel Execution', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
});
// ===========================================================================
// Test 1: Independent phases dispatch in parallel
// ===========================================================================
describe('Independent phases dispatch in parallel', () => {
it('dispatches multiple independent phases when no dependencies exist', async () => {
// Create initiative with 2 independent phases (no dependencies)
const initiative = await harness.initiativeRepository.create({
name: 'Independent Phases Test',
status: 'active',
});
const phaseA = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase A',
content: 'Independent phase A',
status: 'pending',
});
const phaseB = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase B',
content: 'Independent phase B',
status: 'pending',
});
// Approve phases before queuing
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
// Queue both phases
await harness.phaseDispatchManager.queuePhase(phaseA.id);
await harness.phaseDispatchManager.queuePhase(phaseB.id);
// Verify phase:queued events
const queuedEvents = harness.getEventsByType('phase:queued');
expect(queuedEvents.length).toBe(2);
// Get queue state - both should be ready (no dependencies)
const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
expect(queueState.queued.length).toBe(2);
expect(queueState.ready.length).toBe(2);
expect(queueState.blocked.length).toBe(0);
// Both phases should be dispatchable immediately
const readyPhaseIds = queueState.ready.map((p) => p.phaseId);
expect(readyPhaseIds).toContain(phaseA.id);
expect(readyPhaseIds).toContain(phaseB.id);
harness.clearEvents();
// Dispatch first phase
const result1 = await harness.phaseDispatchManager.dispatchNextPhase();
expect(result1.success).toBe(true);
// Dispatch second phase (parallel)
const result2 = await harness.phaseDispatchManager.dispatchNextPhase();
expect(result2.success).toBe(true);
// Verify both dispatched to different phases
expect(result1.phaseId).not.toBe(result2.phaseId);
// Verify phase:started events
const startedEvents = harness.getEventsByType('phase:started');
expect(startedEvents.length).toBe(2);
// Verify both phases are now in_progress
const updatedPhaseA = await harness.phaseRepository.findById(phaseA.id);
const updatedPhaseB = await harness.phaseRepository.findById(phaseB.id);
expect(updatedPhaseA?.status).toBe('in_progress');
expect(updatedPhaseB?.status).toBe('in_progress');
});
});
// ===========================================================================
// Test 2: Dependent phase waits for prerequisite
// ===========================================================================
describe('Dependent phase waits for prerequisite', () => {
it('only dispatches phase A first, then B after A completes', async () => {
// Create phases: A, B (depends on A)
const initiative = await harness.initiativeRepository.create({
name: 'Sequential Phases Test',
status: 'active',
});
const phaseA = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase A',
content: 'First phase',
status: 'pending',
});
const phaseB = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase B',
content: 'Second phase, depends on A',
status: 'pending',
});
// Approve phases before queuing
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
// Create dependency: B depends on A
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
// Queue both phases
await harness.phaseDispatchManager.queuePhase(phaseA.id);
await harness.phaseDispatchManager.queuePhase(phaseB.id);
// Check queue state - only A should be ready
const queueState1 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(queueState1.queued.length).toBe(2);
expect(queueState1.ready.length).toBe(1);
expect(queueState1.ready[0].phaseId).toBe(phaseA.id);
harness.clearEvents();
// Dispatch - should get phase A
const result1 = await harness.phaseDispatchManager.dispatchNextPhase();
expect(result1.success).toBe(true);
expect(result1.phaseId).toBe(phaseA.id);
// Try to dispatch again - should fail (B is blocked by A)
const result2 = await harness.phaseDispatchManager.dispatchNextPhase();
expect(result2.success).toBe(false);
expect(result2.reason).toBe('No dispatchable phases');
// Verify phase B still in queue but not ready
const queueState2 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(queueState2.queued.length).toBe(1);
expect(queueState2.ready.length).toBe(0);
// Complete phase A
await harness.phaseDispatchManager.completePhase(phaseA.id);
// Verify phase:completed event for A
const completedEvents = harness.getEventsByType('phase:completed');
expect(completedEvents.length).toBe(1);
expect((completedEvents[0] as PhaseCompletedEvent).payload.phaseId).toBe(phaseA.id);
// Now B should be ready
const queueState3 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(queueState3.ready.length).toBe(1);
expect(queueState3.ready[0].phaseId).toBe(phaseB.id);
harness.clearEvents();
// Dispatch - should get phase B
const result3 = await harness.phaseDispatchManager.dispatchNextPhase();
expect(result3.success).toBe(true);
expect(result3.phaseId).toBe(phaseB.id);
// Verify phase B is now in_progress
const updatedPhaseB = await harness.phaseRepository.findById(phaseB.id);
expect(updatedPhaseB?.status).toBe('in_progress');
});
});
// ===========================================================================
// Test 3: Diamond dependency pattern
// ===========================================================================
describe('Diamond dependency pattern', () => {
it('handles diamond: A -> B,C -> D correctly', async () => {
// Create phases: A, B (depends on A), C (depends on A), D (depends on B, C)
const initiative = await harness.initiativeRepository.create({
name: 'Diamond Pattern Test',
status: 'active',
});
const phaseA = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase A',
content: 'Root phase',
status: 'pending',
});
const phaseB = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase B',
content: 'Depends on A',
status: 'pending',
});
const phaseC = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase C',
content: 'Depends on A',
status: 'pending',
});
const phaseD = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase D',
content: 'Depends on B and C',
status: 'pending',
});
// Approve all phases before queuing
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseC.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseD.id, { status: 'approved' as const });
// Create dependencies
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
await harness.phaseRepository.createDependency(phaseC.id, phaseA.id);
await harness.phaseRepository.createDependency(phaseD.id, phaseB.id);
await harness.phaseRepository.createDependency(phaseD.id, phaseC.id);
// Queue all phases
await harness.phaseDispatchManager.queuePhase(phaseA.id);
await harness.phaseDispatchManager.queuePhase(phaseB.id);
await harness.phaseDispatchManager.queuePhase(phaseC.id);
await harness.phaseDispatchManager.queuePhase(phaseD.id);
// Step 1: Only A should be ready
const state1 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(state1.queued.length).toBe(4);
expect(state1.ready.length).toBe(1);
expect(state1.ready[0].phaseId).toBe(phaseA.id);
// Dispatch A
const resultA = await harness.phaseDispatchManager.dispatchNextPhase();
expect(resultA.success).toBe(true);
expect(resultA.phaseId).toBe(phaseA.id);
// Step 2: After A completes, B and C should be ready (parallel)
await harness.phaseDispatchManager.completePhase(phaseA.id);
const state2 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(state2.queued.length).toBe(3); // B, C, D still queued
expect(state2.ready.length).toBe(2); // B and C ready
const readyIds = state2.ready.map((p) => p.phaseId);
expect(readyIds).toContain(phaseB.id);
expect(readyIds).toContain(phaseC.id);
expect(readyIds).not.toContain(phaseD.id);
// Dispatch B and C in parallel
const resultB = await harness.phaseDispatchManager.dispatchNextPhase();
expect(resultB.success).toBe(true);
const resultC = await harness.phaseDispatchManager.dispatchNextPhase();
expect(resultC.success).toBe(true);
// Verify D is still not ready (needs both B and C complete)
const state3 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(state3.ready.length).toBe(0);
expect(state3.queued.length).toBe(1);
expect(state3.queued[0].phaseId).toBe(phaseD.id);
// Step 3: Complete B only - D still not ready
await harness.phaseDispatchManager.completePhase(resultB.phaseId);
const state4 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(state4.ready.length).toBe(0); // D still blocked by C
// Step 4: Complete C - now D should be ready
await harness.phaseDispatchManager.completePhase(resultC.phaseId);
const state5 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(state5.ready.length).toBe(1);
expect(state5.ready[0].phaseId).toBe(phaseD.id);
// Dispatch D
const resultD = await harness.phaseDispatchManager.dispatchNextPhase();
expect(resultD.success).toBe(true);
expect(resultD.phaseId).toBe(phaseD.id);
// Verify D is now in_progress
const updatedPhaseD = await harness.phaseRepository.findById(phaseD.id);
expect(updatedPhaseD?.status).toBe('in_progress');
});
});
// ===========================================================================
// Test 4: Approval gate rejects non-approved phases
// ===========================================================================
describe('Approval gate rejects non-approved phases', () => {
it('rejects queuePhase for pending phase', async () => {
const initiative = await harness.initiativeRepository.create({
name: 'Approval Gate Test',
status: 'active',
});
const phase = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Unapproved Phase',
status: 'pending',
});
await expect(
harness.phaseDispatchManager.queuePhase(phase.id)
).rejects.toThrow('must be approved before queuing');
});
it('rejects queuePhase for in_progress phase', async () => {
const initiative = await harness.initiativeRepository.create({
name: 'Approval Gate Test 2',
status: 'active',
});
const phase = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'In Progress Phase',
status: 'in_progress',
});
await expect(
harness.phaseDispatchManager.queuePhase(phase.id)
).rejects.toThrow('must be approved before queuing');
});
});
// ===========================================================================
// Test 5: Blocked phase doesn't dispatch
// ===========================================================================
describe('Blocked phase does not dispatch', () => {
it('prevents dispatch of blocked phase even if dependencies complete', async () => {
// Create phases: A, B (depends on A)
const initiative = await harness.initiativeRepository.create({
name: 'Blocked Phase Test',
status: 'active',
});
const phaseA = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase A',
content: 'First phase that will be blocked',
status: 'pending',
});
const phaseB = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase B',
content: 'Second phase, depends on A',
status: 'pending',
});
// Approve phases before queuing
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
// Create dependency: B depends on A
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
// Queue phase A
await harness.phaseDispatchManager.queuePhase(phaseA.id);
// Block phase A
await harness.phaseDispatchManager.blockPhase(phaseA.id, 'External dependency unavailable');
// Verify phase:blocked event
const blockedEvents = harness.getEventsByType('phase:blocked');
expect(blockedEvents.length).toBe(1);
expect((blockedEvents[0] as PhaseBlockedEvent).payload.phaseId).toBe(phaseA.id);
expect((blockedEvents[0] as PhaseBlockedEvent).payload.reason).toBe(
'External dependency unavailable'
);
// Try to dispatch - should fail
const result = await harness.phaseDispatchManager.dispatchNextPhase();
expect(result.success).toBe(false);
expect(result.reason).toBe('No dispatchable phases');
// Verify queue state shows A as blocked
const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
expect(queueState.blocked.length).toBe(1);
expect(queueState.blocked[0].phaseId).toBe(phaseA.id);
expect(queueState.blocked[0].reason).toBe('External dependency unavailable');
// Queue phase B
await harness.phaseDispatchManager.queuePhase(phaseB.id);
// B should never become ready because A is blocked (not completed)
const queueState2 = await harness.phaseDispatchManager.getPhaseQueueState();
expect(queueState2.ready.length).toBe(0);
expect(queueState2.queued.length).toBe(1); // Only B is queued (A is blocked, not queued)
expect(queueState2.queued[0].phaseId).toBe(phaseB.id);
// Try to dispatch B - should fail
const resultB = await harness.phaseDispatchManager.dispatchNextPhase();
expect(resultB.success).toBe(false);
expect(resultB.reason).toBe('No dispatchable phases');
// Verify phase A status is blocked in database
const updatedPhaseA = await harness.phaseRepository.findById(phaseA.id);
expect(updatedPhaseA?.status).toBe('blocked');
});
it('blocked phase prevents all downstream phases from dispatching', async () => {
// Create chain: A -> B -> C, then block A
const initiative = await harness.initiativeRepository.create({
name: 'Chain Block Test',
status: 'active',
});
const phaseA = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase A',
content: 'Root phase',
status: 'pending',
});
const phaseB = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase B',
content: 'Depends on A',
status: 'pending',
});
const phaseC = await harness.phaseRepository.create({
initiativeId: initiative.id,
name: 'Phase C',
content: 'Depends on B',
status: 'pending',
});
// Approve all phases before queuing
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
await harness.phaseRepository.update(phaseC.id, { status: 'approved' as const });
// Create dependency chain: A -> B -> C
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
await harness.phaseRepository.createDependency(phaseC.id, phaseB.id);
// Queue all phases
await harness.phaseDispatchManager.queuePhase(phaseA.id);
await harness.phaseDispatchManager.queuePhase(phaseB.id);
await harness.phaseDispatchManager.queuePhase(phaseC.id);
// Block phase A
await harness.phaseDispatchManager.blockPhase(phaseA.id, 'Resource unavailable');
// Verify only B and C are in queue (A is blocked)
const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
expect(queueState.queued.length).toBe(2);
expect(queueState.ready.length).toBe(0); // Neither B nor C can dispatch
expect(queueState.blocked.length).toBe(1);
// Try to dispatch any phase - should fail for all
const result = await harness.phaseDispatchManager.dispatchNextPhase();
expect(result.success).toBe(false);
expect(result.reason).toBe('No dispatchable phases');
});
});
});

View File

@@ -0,0 +1,490 @@
/**
* E2E Tests for Recovery and Extended Scenarios
*
* Tests recovery/resume after interruption scenarios:
* - Queue state survives harness recreation (DB is source of truth)
* - In-progress task recoverable after agent crash
* - Blocked task state persists and can be unblocked
* - Merge queue state recoverable
*
* Tests extended agent Q&A scenarios:
* - Multiple questions in sequence
* - Question surfaces in message queue
* - Agent resumes with answer in context
* - Waiting agent blocks task completion
*
* Uses TestHarness from src/test/ for full system wiring.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
createTestHarness,
SIMPLE_FIXTURE,
type TestHarness,
} from '../index.js';
import type {
AgentWaitingEvent,
AgentResumedEvent,
AgentStoppedEvent,
} from '../../events/types.js';
describe('E2E Recovery Scenarios', () => {
describe('Recovery after interruption', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
it('queue state survives in database (source of truth)', async () => {
// Seed fixture, queue tasks
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Queue task
await harness.dispatchManager.queue(taskAId);
// Verify queue state shows task (queued, not pending)
const queueState1 = await harness.dispatchManager.getQueueState();
expect(queueState1.queued.length).toBe(1);
expect(queueState1.queued[0].taskId).toBe(taskAId);
// The queue state is in memory, but task status is in DB.
// Verify task status in database directly
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('pending');
// Verify: even after clearing in-memory queue state,
// we can still find pending tasks from database
const allTasks = await harness.taskRepository.findByParentTaskId(
seeded.taskGroups.get('Task Group 1')!
);
const pendingTasks = allTasks.filter((t) => t.status === 'pending');
// Task A is pending (not queued, but status is pending)
// Task B and C are also pending but depend on Task A
expect(pendingTasks.length).toBeGreaterThanOrEqual(1);
});
it('in-progress task recoverable after agent crash', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set crash scenario
harness.setAgentError(`agent-${taskAId.slice(0, 6)}`, 'Token limit exceeded');
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify task status is 'in_progress' (not completed, not lost)
let task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('in_progress');
// Task can be re-queued and dispatched to a new agent
// First, clear agent manager and create new pool agent
harness.agentManager.clear();
await harness.agentManager.spawn({
name: 'new-pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Re-queue the task (it's still in_progress but we can retry)
await harness.dispatchManager.queue(taskAId);
// Set success scenario for the new agent
harness.setAgentDone(`agent-${taskAId.slice(0, 6)}`, 'Task completed after retry');
// Clear events and dispatch again
harness.clearEvents();
const dispatchResult = await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify: agent completed successfully
expect(dispatchResult.agentId).toBeDefined();
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
expect(agentResult?.success).toBe(true);
});
it('blocked task state persists in database', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Queue task and block it
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
// Verify task in blocked state in DB
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('blocked');
// Query blocked tasks from queue state
const queueState = await harness.dispatchManager.getQueueState();
expect(queueState.blocked.length).toBe(1);
expect(queueState.blocked[0].taskId).toBe(taskAId);
expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
// Re-queue task to unblock (set status back to pending via repository)
await harness.taskRepository.update(taskAId, { status: 'pending' });
await harness.dispatchManager.queue(taskAId);
// Verify: task now in pending state in database
const unblocked = await harness.taskRepository.findById(taskAId);
expect(unblocked?.status).toBe('pending');
// Task should be in queued list
const queueState2 = await harness.dispatchManager.getQueueState();
expect(queueState2.queued.some((t) => t.taskId === taskAId)).toBe(true);
});
it('merge queue state recoverable', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Mark task as completed (required for merge)
await harness.taskRepository.update(taskAId, { status: 'completed' });
// Create worktree for task
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
// Create agent in agentRepository (required for merge lookup)
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId,
taskId: taskAId,
status: 'idle',
});
// Queue for merge
await harness.coordinationManager.queueMerge(taskAId);
// Verify merge queue has queued item
const queueState1 = await harness.coordinationManager.getQueueState();
expect(queueState1.queued.some((item) => item.taskId === taskAId)).toBe(true);
// Process merge
const results = await harness.coordinationManager.processMerges('main');
// Verify: merge completed correctly
expect(results.length).toBe(1);
expect(results[0].taskId).toBe(taskAId);
expect(results[0].success).toBe(true);
// Verify: task in merged list
const queueState2 = await harness.coordinationManager.getQueueState();
expect(queueState2.merged.includes(taskAId)).toBe(true);
});
});
describe('Agent Q&A extended scenarios', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
it('question enters waiting state and completes after resume', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set questions scenario with options
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
{
id: 'q1',
question: 'Which database should I use?',
options: [
{ label: 'PostgreSQL', description: 'Relational, ACID compliant' },
{ label: 'SQLite', description: 'Lightweight, file-based' },
],
},
]);
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
const dispatchResult = await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify: agent:waiting event emitted
const waitingEvents = harness.getEventsByType('agent:waiting');
expect(waitingEvents.length).toBe(1);
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
expect(waitingPayload.taskId).toBe(taskAId);
expect(waitingPayload.questions[0].question).toBe('Which database should I use?');
// Clear and resume with answers map
harness.clearEvents();
await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'PostgreSQL' });
await harness.advanceTimers();
// Verify: resumed and stopped events
const resumedEvents = harness.getEventsByType('agent:resumed');
expect(resumedEvents.length).toBe(1);
const resumedPayload = (resumedEvents[0] as AgentResumedEvent).payload;
expect(resumedPayload.taskId).toBe(taskAId);
const stoppedEvents = harness.getEventsByType('agent:stopped');
expect(stoppedEvents.length).toBe(1);
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
expect(stoppedPayload.reason).toBe('task_complete');
});
it('questions surface as structured PendingQuestions', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set questions scenario with options
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
{
id: 'q1',
question: 'Select your framework',
options: [
{ label: 'React' },
{ label: 'Vue' },
{ label: 'Svelte' },
],
},
]);
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify: agent:waiting event has questions
const waitingEvents = harness.getEventsByType('agent:waiting');
expect(waitingEvents.length).toBe(1);
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
expect(waitingPayload.questions[0].question).toBe('Select your framework');
expect(waitingPayload.questions[0].options).toEqual([
{ label: 'React' },
{ label: 'Vue' },
{ label: 'Svelte' },
]);
// Verify: getPendingQuestions returns structured data
const pendingQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
expect(pendingQuestions).not.toBeNull();
expect(pendingQuestions?.questions[0].question).toBe('Select your framework');
expect(pendingQuestions?.questions[0].options).toEqual([
{ label: 'React' },
{ label: 'Vue' },
{ label: 'Svelte' },
]);
});
it('agent resumes with answer and completes successfully', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set questions scenario
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
{ id: 'q1', question: 'Choose database type' },
]);
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify agent is waiting
const agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('waiting_for_input');
// Resume with answers map
await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'PostgreSQL' });
await harness.advanceTimers();
// Verify: agent completed successfully
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
expect(agentResult).not.toBeNull();
expect(agentResult?.success).toBe(true);
expect(agentResult?.message).toBe('Resumed and completed successfully');
// Verify: agent status is now idle
const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
expect(finalAgent?.status).toBe('idle');
});
it('waiting agent status transitions correctly through full cycle', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Set questions scenario
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
{ id: 'q1', question: 'API key format?' },
]);
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
// Phase 1: Initially running
let agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('running');
await harness.advanceTimers();
// Phase 2: After scenario completes, waiting_for_input
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('waiting_for_input');
// Verify pending questions exist
const pendingQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
expect(pendingQuestions?.questions[0].question).toBe('API key format?');
// Phase 3: Resume with answers map
await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'Bearer token' });
// After resume: running again briefly
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('running');
await harness.advanceTimers();
// Phase 4: After completion, idle
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('idle');
// Verify pending questions is cleared after resume
const clearedQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
expect(clearedQuestions).toBeNull();
});
it('should handle agent asking multiple questions at once', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
// Setup: agent asks two questions
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
{
id: 'q1',
question: 'Which database?',
options: [{ label: 'SQLite' }, { label: 'Postgres' }],
},
{
id: 'q2',
question: 'Include tests?',
options: [{ label: 'Yes' }, { label: 'No' }],
},
]);
// Queue and dispatch task
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
const dispatchResult = await harness.dispatchManager.dispatchNext();
await harness.advanceTimers();
// Verify: agent:waiting event emitted
const waitingEvents = harness.getEventsByType('agent:waiting');
expect(waitingEvents.length).toBe(1);
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
expect(waitingPayload.taskId).toBe(taskAId);
// Verify both questions present
const pending = await harness.getPendingQuestions(dispatchResult.agentId!);
expect(pending?.questions).toHaveLength(2);
expect(pending?.questions[0].id).toBe('q1');
expect(pending?.questions[0].question).toBe('Which database?');
expect(pending?.questions[1].id).toBe('q2');
expect(pending?.questions[1].question).toBe('Include tests?');
// Resume with answers for both questions
harness.clearEvents();
await harness.agentManager.resume(dispatchResult.agentId!, {
q1: 'SQLite',
q2: 'Yes',
});
await harness.advanceTimers();
// Verify: agent:resumed event emitted
const resumedEvents = harness.getEventsByType('agent:resumed');
expect(resumedEvents.length).toBe(1);
// Verify: agent:stopped event emitted (after resume completes)
const stoppedEvents = harness.getEventsByType('agent:stopped');
expect(stoppedEvents.length).toBe(1);
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
expect(stoppedPayload.taskId).toBe(taskAId);
expect(stoppedPayload.reason).toBe('task_complete');
// Verify task completed (agent result)
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
expect(agentResult?.success).toBe(true);
// Verify agent is now idle
const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
expect(finalAgent?.status).toBe('idle');
});
});
});

View File

@@ -0,0 +1,316 @@
/**
* Test Fixtures for E2E Testing
*
* Provides fixture helpers that seed complete task hierarchies
* for integration and E2E tests.
*/
import { nanoid } from 'nanoid';
import type { DrizzleDatabase } from '../db/index.js';
import {
DrizzleInitiativeRepository,
DrizzlePhaseRepository,
DrizzleTaskRepository,
} from '../db/repositories/drizzle/index.js';
import { taskDependencies } from '../db/schema.js';
// =============================================================================
// Fixture Interfaces
// =============================================================================
/**
* Task fixture definition.
*/
export interface TaskFixture {
/** Unique identifier for this task (used for dependency references) */
id: string;
/** Task name */
name: string;
/** Task priority */
priority?: 'low' | 'medium' | 'high';
/** Task category */
category?: 'execute' | 'research' | 'discuss' | 'plan' | 'detail' | 'refine' | 'verify' | 'merge' | 'review';
/** Names of other tasks in same fixture this task depends on */
dependsOn?: string[];
}
/**
* Task group fixture definition (replaces Plan).
* Tasks are grouped by parent task in the new model.
*/
export interface TaskGroupFixture {
/** Group name (becomes a detail task) */
name: string;
/** Tasks in this group */
tasks: TaskFixture[];
}
/**
* Phase fixture definition.
*/
export interface PhaseFixture {
/** Phase name */
name: string;
/** Task groups in this phase (each group becomes a parent detail task) */
taskGroups: TaskGroupFixture[];
}
/**
* Initiative fixture definition (top-level).
*/
export interface InitiativeFixture {
/** Initiative name */
name: string;
/** Phases in this initiative */
phases: PhaseFixture[];
}
/**
* Result of seeding a fixture.
* Maps names to IDs for all created entities.
*/
export interface SeededFixture {
/** ID of the created initiative */
initiativeId: string;
/** Map of phase names to IDs */
phases: Map<string, string>;
/** Map of task group names to parent task IDs */
taskGroups: Map<string, string>;
/** Map of task names to IDs */
tasks: Map<string, string>;
}
// =============================================================================
// Seed Function
// =============================================================================
/**
* Seed a complete task hierarchy from a fixture definition.
*
* Creates initiative, phases, detail tasks (as parent), and child tasks.
* Resolves task dependencies by name to actual task IDs.
*
* @param db - Drizzle database instance
* @param fixture - The fixture definition to seed
* @returns SeededFixture with all created entity IDs
*/
export async function seedFixture(
db: DrizzleDatabase,
fixture: InitiativeFixture
): Promise<SeededFixture> {
// Create repositories
const initiativeRepo = new DrizzleInitiativeRepository(db);
const phaseRepo = new DrizzlePhaseRepository(db);
const taskRepo = new DrizzleTaskRepository(db);
// Result maps
const phasesMap = new Map<string, string>();
const taskGroupsMap = new Map<string, string>();
const tasksMap = new Map<string, string>();
// Collect all task dependencies to resolve after creation
const pendingDependencies: Array<{ taskId: string; dependsOnNames: string[] }> = [];
// Create initiative
const initiative = await initiativeRepo.create({
name: fixture.name,
status: 'active',
});
// Create phases
for (const phaseFixture of fixture.phases) {
const phase = await phaseRepo.create({
initiativeId: initiative.id,
name: phaseFixture.name,
status: 'pending',
});
phasesMap.set(phaseFixture.name, phase.id);
// Create task groups as parent detail tasks
let taskOrder = 0;
for (const groupFixture of phaseFixture.taskGroups) {
// Create parent detail task
const parentTask = await taskRepo.create({
phaseId: phase.id,
initiativeId: initiative.id,
name: groupFixture.name,
description: `Test task group: ${groupFixture.name}`,
category: 'detail',
type: 'auto',
priority: 'medium',
status: 'completed', // Detail tasks are completed once child tasks are created
order: taskOrder++,
});
taskGroupsMap.set(groupFixture.name, parentTask.id);
// Create child tasks linked to parent
let childOrder = 0;
for (const taskFixture of groupFixture.tasks) {
const task = await taskRepo.create({
parentTaskId: parentTask.id,
phaseId: phase.id,
initiativeId: initiative.id,
name: taskFixture.name,
description: `Test task: ${taskFixture.name}`,
category: taskFixture.category ?? 'execute',
type: 'auto',
priority: taskFixture.priority ?? 'medium',
status: 'pending',
order: childOrder++,
});
tasksMap.set(taskFixture.id, task.id);
// Collect dependencies to resolve later
if (taskFixture.dependsOn && taskFixture.dependsOn.length > 0) {
pendingDependencies.push({
taskId: task.id,
dependsOnNames: taskFixture.dependsOn,
});
}
}
}
}
// Resolve and insert task dependencies
for (const { taskId, dependsOnNames } of pendingDependencies) {
for (const depName of dependsOnNames) {
const dependsOnTaskId = tasksMap.get(depName);
if (!dependsOnTaskId) {
throw new Error(
`Dependency resolution failed: task "${depName}" not found in fixture`
);
}
// Insert into task_dependencies table
await db.insert(taskDependencies).values({
id: nanoid(),
taskId,
dependsOnTaskId,
createdAt: new Date(),
});
}
}
return {
initiativeId: initiative.id,
phases: phasesMap,
taskGroups: taskGroupsMap,
tasks: tasksMap,
};
}
// =============================================================================
// Convenience Fixtures
// =============================================================================
/**
* Simple fixture: 1 initiative -> 1 phase -> 1 task group -> 3 tasks.
*
* Task dependency structure:
* - Task A: no dependencies
* - Task B: depends on Task A
* - Task C: depends on Task A
*/
export const SIMPLE_FIXTURE: InitiativeFixture = {
name: 'Simple Test Initiative',
phases: [
{
name: 'Phase 1',
taskGroups: [
{
name: 'Task Group 1',
tasks: [
{ id: 'Task A', name: 'Task A', priority: 'high' },
{ id: 'Task B', name: 'Task B', priority: 'medium', dependsOn: ['Task A'] },
{ id: 'Task C', name: 'Task C', priority: 'medium', dependsOn: ['Task A'] },
],
},
],
},
],
};
/**
* Parallel fixture: 1 initiative -> 1 phase -> 2 task groups (each with 2 independent tasks).
*
* Task structure:
* - Group A: Task X, Task Y (independent)
* - Group B: Task P, Task Q (independent)
*/
export const PARALLEL_FIXTURE: InitiativeFixture = {
name: 'Parallel Test Initiative',
phases: [
{
name: 'Parallel Phase',
taskGroups: [
{
name: 'Group A',
tasks: [
{ id: 'Task X', name: 'Task X', priority: 'high' },
{ id: 'Task Y', name: 'Task Y', priority: 'medium' },
],
},
{
name: 'Group B',
tasks: [
{ id: 'Task P', name: 'Task P', priority: 'high' },
{ id: 'Task Q', name: 'Task Q', priority: 'low' },
],
},
],
},
],
};
/**
* Complex fixture: 1 initiative -> 2 phases -> 4 task groups with cross-group dependencies.
*
* Structure:
* - Phase 1: Group 1 (Task 1A, 1B), Group 2 (Task 2A depends on 1A)
* - Phase 2: Group 3 (Task 3A depends on 1B), Group 4 (Task 4A depends on 2A and 3A)
*/
export const COMPLEX_FIXTURE: InitiativeFixture = {
name: 'Complex Test Initiative',
phases: [
{
name: 'Phase 1',
taskGroups: [
{
name: 'Group 1',
tasks: [
{ id: 'Task 1A', name: 'Task 1A', priority: 'high' },
{ id: 'Task 1B', name: 'Task 1B', priority: 'medium' },
],
},
{
name: 'Group 2',
tasks: [
{ id: 'Task 2A', name: 'Task 2A', priority: 'high', dependsOn: ['Task 1A'] },
],
},
],
},
{
name: 'Phase 2',
taskGroups: [
{
name: 'Group 3',
tasks: [
{ id: 'Task 3A', name: 'Task 3A', priority: 'high', dependsOn: ['Task 1B'] },
],
},
{
name: 'Group 4',
tasks: [
{
id: 'Task 4A',
name: 'Task 4A',
priority: 'high',
dependsOn: ['Task 2A', 'Task 3A'],
},
],
},
],
},
],
};

View File

@@ -0,0 +1,35 @@
# todo-api
A minimal zero-dependency in-memory todo list library for Node.js.
## API
```js
import { TodoStore } from './src/todo.js';
const store = new TodoStore();
const id = store.add('buy milk'); // returns numeric id
store.list(); // returns [{ id, text, done }]
store.remove(id); // deletes item
store.complete(id); // NOT IMPLEMENTED — marks item done
```
## Status
The `complete(id)` method is **missing**. The test suite in `src/todo.test.js` covers it and currently fails:
```
node --test src/todo.test.js
# → TypeError: store.complete is not a function
```
## Task
Implement `complete(id)` on `TodoStore` in `src/todo.js` so that it:
1. Finds the item with the given `id`.
2. Sets `item.done = true`.
3. Does not throw if `id` is not found (silent no-op).
All five tests in `src/todo.test.js` should pass after the fix.

View File

@@ -0,0 +1,8 @@
{
"name": "todo-api",
"version": "1.0.0",
"type": "module",
"scripts": {
"test": "node --test src/todo.test.js"
}
}

View File

@@ -0,0 +1,19 @@
export class TodoStore {
#items = [];
add(text) {
const id = Date.now();
this.#items.push({ id, text, done: false });
return id;
}
list() {
return [...this.#items];
}
remove(id) {
this.#items = this.#items.filter(i => i.id !== id);
}
// complete(id) deliberately missing — implement me!
}

View File

@@ -0,0 +1,41 @@
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { TodoStore } from './todo.js';
test('add returns an id', () => {
const store = new TodoStore();
const id = store.add('buy milk');
assert.ok(typeof id === 'number', 'id should be a number');
});
test('list returns all items', () => {
const store = new TodoStore();
store.add('task one');
store.add('task two');
assert.equal(store.list().length, 2);
});
test('remove deletes an item', () => {
const store = new TodoStore();
const id = store.add('delete me');
store.remove(id);
assert.equal(store.list().length, 0);
});
test('complete marks item done', () => {
const store = new TodoStore();
const id = store.add('buy milk');
store.complete(id);
const item = store.list().find(i => i.id === id);
assert.ok(item, 'item should still exist after completing');
assert.equal(item.done, true, 'item.done should be true after complete()');
});
test('complete does not affect other items', () => {
const store = new TodoStore();
const id1 = store.add('task one');
const id2 = store.add('task two');
store.complete(id1);
const item2 = store.list().find(i => i.id === id2);
assert.equal(item2.done, false, 'other items should remain undone');
});

View File

@@ -0,0 +1,394 @@
/**
* Tests for Test Harness
*
* Proves that the test harness enables E2E testing scenarios.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
createTestHarness,
SIMPLE_FIXTURE,
PARALLEL_FIXTURE,
COMPLEX_FIXTURE,
type TestHarness,
} from './index.js';
import { taskDependencies } from '../db/schema.js';
import { eq } from 'drizzle-orm';
describe('TestHarness', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
describe('createTestHarness', () => {
it('returns all components', () => {
expect(harness.db).toBeDefined();
expect(harness.eventBus).toBeDefined();
expect(harness.agentManager).toBeDefined();
expect(harness.worktreeManager).toBeDefined();
expect(harness.dispatchManager).toBeDefined();
expect(harness.coordinationManager).toBeDefined();
expect(harness.taskRepository).toBeDefined();
expect(harness.messageRepository).toBeDefined();
expect(harness.agentRepository).toBeDefined();
});
it('provides helper methods', () => {
expect(typeof harness.seedFixture).toBe('function');
expect(typeof harness.setAgentScenario).toBe('function');
expect(typeof harness.setAgentQuestion).toBe('function');
expect(typeof harness.setAgentQuestions).toBe('function');
expect(typeof harness.getEventsByType).toBe('function');
expect(typeof harness.clearEvents).toBe('function');
expect(typeof harness.cleanup).toBe('function');
});
});
describe('setAgentQuestion convenience helper', () => {
it('wraps single question in array format', async () => {
vi.useFakeTimers();
// Set single question using convenience method
harness.setAgentQuestion('test-agent', 'q1', 'Which option?', [
{ label: 'Option A', description: 'First option' },
{ label: 'Option B', description: 'Second option' },
]);
// Spawn agent with that scenario
const agent = await harness.agentManager.spawn({
name: 'test-agent',
taskId: 'task-1',
prompt: 'test',
});
await harness.advanceTimers();
// Verify questions array format
const pending = await harness.getPendingQuestions(agent.id);
expect(pending).not.toBeNull();
expect(pending?.questions).toHaveLength(1);
expect(pending?.questions[0].id).toBe('q1');
expect(pending?.questions[0].question).toBe('Which option?');
expect(pending?.questions[0].options).toHaveLength(2);
});
});
describe('seedFixture', () => {
it('creates task hierarchy from SIMPLE_FIXTURE', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
// Check initiative created
expect(seeded.initiativeId).toBeDefined();
// Check phases created
expect(seeded.phases.size).toBe(1);
expect(seeded.phases.has('Phase 1')).toBe(true);
// Check task groups created
expect(seeded.taskGroups.size).toBe(1);
expect(seeded.taskGroups.has('Task Group 1')).toBe(true);
// Check tasks created
expect(seeded.tasks.size).toBe(3);
expect(seeded.tasks.has('Task A')).toBe(true);
expect(seeded.tasks.has('Task B')).toBe(true);
expect(seeded.tasks.has('Task C')).toBe(true);
});
it('returns correct IDs that exist in database', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
// Verify task exists in database
const taskAId = seeded.tasks.get('Task A')!;
const taskA = await harness.taskRepository.findById(taskAId);
expect(taskA).not.toBeNull();
expect(taskA?.name).toBe('Task A');
});
it('creates PARALLEL_FIXTURE correctly', async () => {
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
expect(seeded.phases.size).toBe(1);
expect(seeded.taskGroups.size).toBe(2);
expect(seeded.tasks.size).toBe(4);
expect(seeded.tasks.has('Task X')).toBe(true);
expect(seeded.tasks.has('Task Q')).toBe(true);
});
it('creates COMPLEX_FIXTURE correctly', async () => {
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
expect(seeded.phases.size).toBe(2);
expect(seeded.taskGroups.size).toBe(4);
expect(seeded.tasks.size).toBe(5);
});
});
describe('task dependencies', () => {
it('resolves dependencies correctly (dependsOn contains actual task IDs)', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
const taskBId = seeded.tasks.get('Task B')!;
// Query task_dependencies table directly
const deps = await harness.db
.select()
.from(taskDependencies)
.where(eq(taskDependencies.taskId, taskBId));
expect(deps.length).toBe(1);
expect(deps[0].dependsOnTaskId).toBe(taskAId);
});
it('creates multiple dependencies for a task', async () => {
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
// Task 4A depends on both Task 2A and Task 3A
const task4AId = seeded.tasks.get('Task 4A')!;
const task2AId = seeded.tasks.get('Task 2A')!;
const task3AId = seeded.tasks.get('Task 3A')!;
const deps = await harness.db
.select()
.from(taskDependencies)
.where(eq(taskDependencies.taskId, task4AId));
expect(deps.length).toBe(2);
const depIds = deps.map((d) => d.dependsOnTaskId);
expect(depIds).toContain(task2AId);
expect(depIds).toContain(task3AId);
});
});
describe('event capture', () => {
it('captures events via getEventsByType', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Queue a task (emits task:queued event)
await harness.dispatchManager.queue(taskAId);
const events = harness.getEventsByType('task:queued');
expect(events.length).toBe(1);
expect((events[0].payload as { taskId: string }).taskId).toBe(taskAId);
});
it('clears events via clearEvents', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
await harness.dispatchManager.queue(taskAId);
expect(harness.getEventsByType('task:queued').length).toBe(1);
harness.clearEvents();
expect(harness.getEventsByType('task:queued').length).toBe(0);
});
});
describe('dispatch flow', () => {
it('dispatchManager.queue() + dispatchNext() uses MockAgentManager', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Note: DispatchManager.dispatchNext() requires an idle agent in the pool
// before it will spawn a new agent. Pre-seed an idle agent.
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
// Wait for agent to complete and become idle
await harness.advanceTimers();
// Queue the task
await harness.dispatchManager.queue(taskAId);
// Clear events from queue and agent spawn
harness.clearEvents();
// Dispatch the task
const result = await harness.dispatchManager.dispatchNext();
// Advance timers to trigger mock agent completion
await harness.advanceTimers();
expect(result.success).toBe(true);
expect(result.taskId).toBe(taskAId);
expect(result.agentId).toBeDefined();
// Should have emitted task:dispatched
const dispatchedEvents = harness.getEventsByType('task:dispatched');
expect(dispatchedEvents.length).toBe(1);
});
it('returns failure when no tasks are queued', async () => {
const result = await harness.dispatchManager.dispatchNext();
expect(result.success).toBe(false);
expect(result.reason).toBe('No dispatchable tasks');
});
it('returns failure when no idle agents available', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Queue the task but don't pre-seed any agents
await harness.dispatchManager.queue(taskAId);
// Dispatch without any agents in pool
const result = await harness.dispatchManager.dispatchNext();
expect(result.success).toBe(false);
expect(result.reason).toBe('No available agents');
});
});
describe('agent completion triggers events', () => {
it('agent completion emits agent:stopped event', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed an idle agent (required by DispatchManager)
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.dispatchNext();
// Should have agent:spawned
const spawnedEvents = harness.getEventsByType('agent:spawned');
expect(spawnedEvents.length).toBe(1);
// Advance timers to trigger completion
await harness.advanceTimers();
// Should have agent:stopped
const stoppedEvents = harness.getEventsByType('agent:stopped');
expect(stoppedEvents.length).toBe(1);
});
it('custom scenario affects agent behavior', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed an idle agent (required by DispatchManager)
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Set error scenario for the agent that will be spawned
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
status: 'error',
delay: 0,
error: 'Test crash',
});
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.dispatchNext();
// Advance timers
await harness.advanceTimers();
// Should have agent:crashed
const crashedEvents = harness.getEventsByType('agent:crashed');
expect(crashedEvents.length).toBe(1);
});
});
describe('full dispatch -> complete -> merge flow', () => {
it('works end-to-end', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed an idle agent (required by DispatchManager)
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await harness.advanceTimers();
harness.clearEvents();
// Step 1: Queue task
await harness.dispatchManager.queue(taskAId);
// Step 2: Dispatch task
const dispatchResult = await harness.dispatchManager.dispatchNext();
expect(dispatchResult.success).toBe(true);
// Advance timers for agent completion
await harness.advanceTimers();
// Clear events for cleaner verification
harness.clearEvents();
// Step 3: Complete task
await harness.dispatchManager.completeTask(taskAId);
// Verify task:completed event
const completedEvents = harness.getEventsByType('task:completed');
expect(completedEvents.length).toBe(1);
expect((completedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
// Step 4: Verify task status in database
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('completed');
});
});
describe('MockWorktreeManager', () => {
it('creates fake worktrees', async () => {
const worktree = await harness.worktreeManager.create('wt-1', 'feature-1');
expect(worktree.id).toBe('wt-1');
expect(worktree.branch).toBe('feature-1');
expect(worktree.path).toContain('wt-1');
});
it('merge returns success by default', async () => {
await harness.worktreeManager.create('wt-1', 'feature-1');
const result = await harness.worktreeManager.merge('wt-1', 'main');
expect(result.success).toBe(true);
});
it('allows setting custom merge results', async () => {
await harness.worktreeManager.create('wt-1', 'feature-1');
harness.worktreeManager.setMergeResult('wt-1', {
success: false,
conflicts: ['file1.ts', 'file2.ts'],
message: 'Merge conflict',
});
const result = await harness.worktreeManager.merge('wt-1', 'main');
expect(result.success).toBe(false);
expect(result.conflicts).toEqual(['file1.ts', 'file2.ts']);
});
});
});

636
apps/server/test/harness.ts Normal file
View File

@@ -0,0 +1,636 @@
/**
* Test Harness for E2E Testing
*
* Wires up the full system with mocks for E2E testing.
* Uses real managers (DispatchManager, CoordinationManager) with
* MockAgentManager and MockWorktreeManager for isolation.
*/
import { randomUUID } from 'crypto';
import { vi } from 'vitest';
import type { DrizzleDatabase } from '../db/index.js';
import type { EventBus, DomainEvent } from '../events/types.js';
import { EventEmitterBus } from '../events/bus.js';
import type { AgentManager } from '../agent/types.js';
import { MockAgentManager, type MockAgentScenario } from '../agent/mock-manager.js';
import type { PendingQuestions, QuestionItem } from '../agent/types.js';
import type { WorktreeManager, Worktree, WorktreeDiff, MergeResult } from '../git/types.js';
import type { DispatchManager, PhaseDispatchManager } from '../dispatch/types.js';
import { DefaultDispatchManager } from '../dispatch/manager.js';
import { DefaultPhaseDispatchManager } from '../dispatch/phase-manager.js';
import type { CoordinationManager } from '../coordination/types.js';
import { DefaultCoordinationManager } from '../coordination/manager.js';
import type { TaskRepository } from '../db/repositories/task-repository.js';
import type { MessageRepository } from '../db/repositories/message-repository.js';
import type { AgentRepository } from '../db/repositories/agent-repository.js';
import type { InitiativeRepository } from '../db/repositories/initiative-repository.js';
import type { PhaseRepository } from '../db/repositories/phase-repository.js';
import type { Initiative, Phase, Task } from '../db/schema.js';
import { createTestDatabase } from '../db/repositories/drizzle/test-helpers.js';
import { createRepositories } from '../container.js';
import {
seedFixture,
type InitiativeFixture,
type SeededFixture,
} from './fixtures.js';
import { appRouter, createCallerFactory } from '../trpc/router.js';
import { createContext, type TRPCContext } from '../trpc/context.js';
// =============================================================================
// MockWorktreeManager
// =============================================================================
/**
* Simple in-memory WorktreeManager for testing.
* Creates fake worktrees without actual git operations.
*/
export class MockWorktreeManager implements WorktreeManager {
private worktrees: Map<string, Worktree> = new Map();
private mergeResults: Map<string, MergeResult> = new Map();
/**
* Set a custom merge result for a specific worktree.
* Used to test conflict scenarios.
*/
setMergeResult(worktreeId: string, result: MergeResult): void {
this.mergeResults.set(worktreeId, result);
}
async create(id: string, branch: string, baseBranch?: string): Promise<Worktree> {
const worktree: Worktree = {
id,
branch,
path: `/tmp/test-worktrees/${id}`,
isMainWorktree: false,
};
this.worktrees.set(id, worktree);
return worktree;
}
async remove(id: string): Promise<void> {
if (!this.worktrees.has(id)) {
throw new Error(`Worktree not found: ${id}`);
}
this.worktrees.delete(id);
this.mergeResults.delete(id);
}
async list(): Promise<Worktree[]> {
return Array.from(this.worktrees.values());
}
async get(id: string): Promise<Worktree | null> {
return this.worktrees.get(id) ?? null;
}
async diff(id: string): Promise<WorktreeDiff> {
if (!this.worktrees.has(id)) {
throw new Error(`Worktree not found: ${id}`);
}
return {
files: [],
summary: 'No changes (mock)',
};
}
async merge(id: string, targetBranch: string): Promise<MergeResult> {
if (!this.worktrees.has(id)) {
throw new Error(`Worktree not found: ${id}`);
}
// Return custom result if set, otherwise success
const customResult = this.mergeResults.get(id);
if (customResult) {
return customResult;
}
return {
success: true,
message: `Merged ${id} into ${targetBranch} (mock)`,
};
}
/**
* Clear all worktrees.
* Useful for test cleanup.
*/
clear(): void {
this.worktrees.clear();
this.mergeResults.clear();
}
}
// =============================================================================
// CapturingEventBus
// =============================================================================
/**
* EventBus wrapper that captures all emitted events.
* Extends EventEmitterBus with event capture functionality.
*/
export class CapturingEventBus extends EventEmitterBus {
/** All emitted events */
emittedEvents: DomainEvent[] = [];
emit<T extends DomainEvent>(event: T): void {
this.emittedEvents.push(event);
super.emit(event);
}
/**
* Get events by type.
*/
getEventsByType(type: string): DomainEvent[] {
return this.emittedEvents.filter((e) => e.type === type);
}
/**
* Clear captured events.
*/
clearEvents(): void {
this.emittedEvents = [];
}
}
// =============================================================================
// tRPC Caller Type
// =============================================================================
/**
* Create caller factory for the app router.
*/
const createCaller = createCallerFactory(appRouter);
/**
* Type for the tRPC caller.
*/
export type TRPCCaller = ReturnType<typeof createCaller>;
// =============================================================================
// TestHarness Interface
// =============================================================================
/**
* Test harness for E2E testing.
* Provides access to all system components and helper methods.
*/
export interface TestHarness {
// Core components
/** In-memory SQLite database */
db: DrizzleDatabase;
/** Event bus with event capture */
eventBus: CapturingEventBus;
/** Mock agent manager */
agentManager: MockAgentManager;
/** Alias for agentManager - used in tests for clarity */
mockAgentManager: MockAgentManager;
/** Mock worktree manager */
worktreeManager: MockWorktreeManager;
/** Real dispatch manager wired to mocks */
dispatchManager: DispatchManager;
/** Real phase dispatch manager wired to phaseRepository */
phaseDispatchManager: PhaseDispatchManager;
/** Real coordination manager wired to mocks */
coordinationManager: CoordinationManager;
// Repositories
/** Task repository */
taskRepository: TaskRepository;
/** Message repository */
messageRepository: MessageRepository;
/** Agent repository */
agentRepository: AgentRepository;
/** Initiative repository */
initiativeRepository: InitiativeRepository;
/** Phase repository */
phaseRepository: PhaseRepository;
// tRPC Caller
/** tRPC caller for direct procedure calls */
caller: TRPCCaller;
// Helpers
/**
* Seed a fixture into the database.
*/
seedFixture(fixture: InitiativeFixture): Promise<SeededFixture>;
/**
* Set scenario for a specific agent name.
*/
setAgentScenario(agentName: string, scenario: MockAgentScenario): void;
/**
* Convenience: Set agent to complete with done status.
*/
setAgentDone(agentName: string, result?: string): void;
/**
* Convenience: Set agent to ask questions (array form).
*/
setAgentQuestions(
agentName: string,
questions: QuestionItem[]
): void;
/**
* Convenience: Set agent to ask a single question.
* Wraps the question in an array internally.
*/
setAgentQuestion(
agentName: string,
questionId: string,
question: string,
options?: Array<{ label: string; description?: string }>
): void;
/**
* Convenience: Set agent to fail with unrecoverable error.
*/
setAgentError(agentName: string, error: string): void;
/**
* Get pending questions for an agent.
*/
getPendingQuestions(agentId: string): Promise<PendingQuestions | null>;
/**
* Get events by type.
*/
getEventsByType(type: string): DomainEvent[];
/**
* Get emitted events by type (alias for getEventsByType).
*/
getEmittedEvents(type: string): DomainEvent[];
/**
* Clear all captured events.
*/
clearEvents(): void;
/**
* Clean up all resources.
*/
cleanup(): void;
/**
* Advance fake timers (wrapper for vi.runAllTimersAsync).
* Only works when vi.useFakeTimers() is active.
*/
advanceTimers(): Promise<void>;
/**
* Run a test body with fake timers enabled.
* Activates fake timers before the callback and restores real timers after,
* even if the callback throws.
*/
withFakeTimers(fn: () => Promise<void>): Promise<void>;
// ==========================================================================
// Architect Mode Helpers
// ==========================================================================
/**
* Set up scenario where architect completes discussion.
*/
setArchitectDiscussComplete(
agentName: string,
_decisions: unknown[],
summary: string
): void;
/**
* Set up scenario where architect needs more questions in discuss mode.
*/
setArchitectDiscussQuestions(
agentName: string,
questions: QuestionItem[]
): void;
/**
* Set up scenario where architect completes plan.
*/
setArchitectPlanComplete(
agentName: string,
_phases: unknown[]
): void;
/**
* Set up scenario where architect completes detail.
*/
setArchitectDetailComplete(
agentName: string,
_tasks: unknown[]
): void;
/**
* Set up scenario where architect needs questions in detail mode.
*/
setArchitectDetailQuestions(
agentName: string,
questions: QuestionItem[]
): void;
// ==========================================================================
// Initiative/Phase/Plan Convenience Helpers
// ==========================================================================
/**
* Get initiative by ID through tRPC.
*/
getInitiative(id: string): Promise<Initiative | null>;
/**
* Get phases for initiative through tRPC.
*/
getPhases(initiativeId: string): Promise<Phase[]>;
/**
* Create initiative through tRPC.
*/
createInitiative(name: string): Promise<Initiative>;
/**
* Create phases from plan output through tRPC.
*/
createPhasesFromPlan(
initiativeId: string,
phases: Array<{ name: string }>
): Promise<Phase[]>;
/**
* Create a detail task through tRPC (replaces createPlan).
*/
createDetailTask(
phaseId: string,
name: string,
description?: string
): Promise<Task>;
/**
* Get child tasks of a parent task through tRPC.
*/
getChildTasks(parentTaskId: string): Promise<Task[]>;
}
// =============================================================================
// createTestHarness Factory
// =============================================================================
/**
* Create a fully wired test harness for E2E testing.
*
* Wires:
* - In-memory SQLite database
* - CapturingEventBus (captures all events)
* - MockAgentManager (simulates agent behavior)
* - MockWorktreeManager (fake worktrees)
* - Real DefaultDispatchManager (with mock agent manager)
* - Real DefaultCoordinationManager (with mock worktree manager)
* - All repositories (Drizzle implementations)
* - tRPC caller with full context
*/
export function createTestHarness(): TestHarness {
// Create database
const db = createTestDatabase();
// Create event bus with capture
const eventBus = new CapturingEventBus();
// Create mock managers
const agentManager = new MockAgentManager({ eventBus });
const worktreeManager = new MockWorktreeManager();
// Create repositories
const repos = createRepositories(db);
const { taskRepository, messageRepository, agentRepository, initiativeRepository, phaseRepository } = repos;
// Create real managers wired to mocks
const dispatchManager = new DefaultDispatchManager(
taskRepository,
messageRepository,
agentManager,
eventBus
);
const phaseDispatchManager = new DefaultPhaseDispatchManager(
phaseRepository,
taskRepository,
dispatchManager,
eventBus
);
const coordinationManager = new DefaultCoordinationManager(
worktreeManager,
taskRepository,
agentRepository,
messageRepository,
eventBus
);
// Create tRPC context with all dependencies
const ctx: TRPCContext = createContext({
eventBus,
serverStartedAt: new Date(),
processCount: 0,
agentManager,
taskRepository,
messageRepository,
dispatchManager,
phaseDispatchManager,
coordinationManager,
initiativeRepository,
phaseRepository,
});
// Create tRPC caller
const caller = createCaller(ctx);
// Build harness
const harness: TestHarness = {
// Core components
db,
eventBus,
agentManager,
mockAgentManager: agentManager, // Alias for clarity in tests
worktreeManager,
dispatchManager,
phaseDispatchManager,
coordinationManager,
// Repositories
taskRepository,
messageRepository,
agentRepository,
initiativeRepository,
phaseRepository,
// tRPC Caller
caller,
// Helpers
seedFixture: (fixture: InitiativeFixture) => seedFixture(db, fixture),
setAgentScenario: (agentName: string, scenario: MockAgentScenario) => {
agentManager.setScenario(agentName, scenario);
},
setAgentDone: (agentName: string, result?: string) => {
agentManager.setScenario(agentName, { status: 'done', result });
},
setAgentQuestions: (
agentName: string,
questions: QuestionItem[]
) => {
agentManager.setScenario(agentName, { status: 'questions', questions });
},
setAgentQuestion: (
agentName: string,
questionId: string,
question: string,
options?: Array<{ label: string; description?: string }>
) => {
agentManager.setScenario(agentName, {
status: 'questions',
questions: [{ id: questionId, question, options }],
});
},
setAgentError: (agentName: string, error: string) => {
agentManager.setScenario(agentName, { status: 'error', error });
},
getPendingQuestions: (agentId: string) => agentManager.getPendingQuestions(agentId),
getEventsByType: (type: string) => eventBus.getEventsByType(type),
getEmittedEvents: (type: string) => eventBus.getEventsByType(type),
clearEvents: () => eventBus.clearEvents(),
cleanup: () => {
agentManager.clear();
worktreeManager.clear();
eventBus.clearEvents();
},
// Timer helper - requires vi.useFakeTimers() to be active
advanceTimers: async () => { await vi.runAllTimersAsync(); },
withFakeTimers: async (fn: () => Promise<void>) => {
vi.useFakeTimers();
try {
await fn();
} finally {
vi.useRealTimers();
}
},
// ========================================================================
// Architect Mode Helpers
// ========================================================================
setArchitectDiscussComplete: (
agentName: string,
_decisions: unknown[],
summary: string
) => {
agentManager.setScenario(agentName, {
status: 'done',
result: summary,
delay: 0,
});
},
setArchitectDiscussQuestions: (
agentName: string,
questions: QuestionItem[]
) => {
agentManager.setScenario(agentName, {
status: 'questions',
questions,
delay: 0,
});
},
setArchitectPlanComplete: (
agentName: string,
_phases: unknown[]
) => {
agentManager.setScenario(agentName, {
status: 'done',
result: 'Plan complete',
delay: 0,
});
},
setArchitectDetailComplete: (
agentName: string,
_tasks: unknown[]
) => {
agentManager.setScenario(agentName, {
status: 'done',
result: 'Detail complete',
delay: 0,
});
},
setArchitectDetailQuestions: (
agentName: string,
questions: QuestionItem[]
) => {
agentManager.setScenario(agentName, {
status: 'questions',
questions,
delay: 0,
});
},
// ========================================================================
// Initiative/Phase/Plan Convenience Helpers
// ========================================================================
getInitiative: async (id: string) => {
try {
return await caller.getInitiative({ id });
} catch {
return null;
}
},
getPhases: (initiativeId: string) => {
return caller.listPhases({ initiativeId });
},
createInitiative: (name: string) => {
return caller.createInitiative({ name });
},
createPhasesFromPlan: (
initiativeId: string,
phases: Array<{ name: string }>
) => {
return caller.createPhasesFromPlan({ initiativeId, phases });
},
createDetailTask: async (phaseId: string, name: string, description?: string) => {
return caller.createPhaseTask({
phaseId,
name,
description,
category: 'detail',
type: 'auto',
requiresApproval: true,
});
},
getChildTasks: (parentTaskId: string) => {
return caller.listTasks({ parentTaskId });
},
};
return harness;
}

27
apps/server/test/index.ts Normal file
View File

@@ -0,0 +1,27 @@
/**
* Test Module
*
* Provides test harness and fixtures for E2E testing.
*/
// Fixture helpers
export {
seedFixture,
type TaskFixture,
type TaskGroupFixture,
type PhaseFixture,
type InitiativeFixture,
type SeededFixture,
SIMPLE_FIXTURE,
PARALLEL_FIXTURE,
COMPLEX_FIXTURE,
} from './fixtures.js';
// Test harness
export {
createTestHarness,
MockWorktreeManager,
CapturingEventBus,
type TestHarness,
type TRPCCaller,
} from './harness.js';

View File

@@ -0,0 +1,203 @@
/**
* Agent Working Directory Verification Tests
*
* Tests that verify agents actually run in their intended working directories.
* These tests use simple shell commands to prove the agent execution location.
*
* IMPORTANT: These tests spawn real CLI processes and may incur API costs.
* They are SKIPPED by default to prevent accidental charges.
*
* To run these tests:
* ```bash
* REAL_WORKDIR_TESTS=1 npm test -- src/test/integration/agent-workdir-verification.test.ts --test-timeout=120000
* ```
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { mkdtemp, rm, readFile } from 'node:fs/promises';
import { existsSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { MultiProviderAgentManager } from '../../agent/manager.js';
import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleInitiativeRepository,
} from '../../db/repositories/drizzle/index.js';
import { EventEmitterBus } from '../../events/bus.js';
const SHOULD_SKIP = !process.env.REAL_WORKDIR_TESTS;
const TEST_TIMEOUT = 60000;
describe.skipIf(SHOULD_SKIP)('Agent Working Directory Verification', () => {
let tempDir: string;
let agentManager: MultiProviderAgentManager;
let agentRepository: DrizzleAgentRepository;
beforeAll(async () => {
if (SHOULD_SKIP) return;
console.log('\n=== Running Agent Working Directory Tests ===');
console.log('These tests verify agents run in correct working directories.\n');
// Create temp directory for test workspace
tempDir = await mkdtemp(join(tmpdir(), 'cw-workdir-test-'));
// Set up test database and repositories
const db = await createTestDatabase();
const eventBus = new EventEmitterBus();
agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
agentManager = new MultiProviderAgentManager(
agentRepository,
tempDir,
projectRepository,
accountRepository,
eventBus,
);
});
afterAll(async () => {
if (SHOULD_SKIP || !tempDir) return;
try {
await rm(tempDir, { recursive: true });
} catch (err) {
console.warn('Failed to cleanup temp directory:', err);
}
});
it('spawns agent in correct standalone working directory', async () => {
const prompt = `
Write your current working directory to a file called 'verify-pwd.txt'.
Use this exact bash command:
pwd > verify-pwd.txt
Then output the signal: {"done": true}
`.trim();
// Spawn standalone agent
const agent = await agentManager.spawn({
taskId: null,
prompt,
mode: 'execute',
provider: 'claude',
});
expect(agent.id).toBeTruthy();
expect(agent.status).toBe('running');
// Wait for completion (poll agent status)
let attempts = 0;
const maxAttempts = 60; // 60 seconds timeout
while (attempts < maxAttempts) {
await new Promise(resolve => setTimeout(resolve, 1000));
attempts++;
const currentAgent = await agentRepository.findById(agent.id);
if (!currentAgent || currentAgent.status !== 'running') {
break;
}
}
// Verify final agent state
const completedAgent = await agentRepository.findById(agent.id);
expect(completedAgent).toBeTruthy();
expect(completedAgent!.status).not.toBe('running');
// Get the agent's expected working directory
const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');
// Read diagnostic files
const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');
const verifyPwdFile = join(expectedWorkdir, 'verify-pwd.txt');
// Verify diagnostic files exist
expect(existsSync(diagnosticFile), 'spawn diagnostic file should exist').toBe(true);
expect(existsSync(expectedPwdFile), 'expected pwd file should exist').toBe(true);
// Read diagnostic data
const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();
console.log('Diagnostic data:', diagnostic);
console.log('Expected working directory:', expectedPwd);
// Verify diagnostic consistency
expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
expect(diagnostic.cwdExistsAtSpawn).toBe(true);
expect(expectedPwd).toBe(expectedWorkdir);
// The critical test: verify the agent actually wrote the file in the expected location
if (existsSync(verifyPwdFile)) {
const actualPwd = (await readFile(verifyPwdFile, 'utf-8')).trim();
console.log('Agent reported working directory:', actualPwd);
// This is the key verification: the pwd reported by the agent should match expected
expect(actualPwd).toBe(expectedWorkdir);
} else {
// If the file doesn't exist, the agent either failed or ran somewhere else
console.warn('Agent did not create verify-pwd.txt file');
console.log('Expected at:', verifyPwdFile);
// Let's check if it was created elsewhere (debugging)
const alternativeLocations = [
join(tempDir, 'verify-pwd.txt'),
join(process.cwd(), 'verify-pwd.txt'),
];
for (const loc of alternativeLocations) {
if (existsSync(loc)) {
const content = await readFile(loc, 'utf-8');
console.log(`Found verify-pwd.txt at unexpected location ${loc}:`, content.trim());
}
}
throw new Error('Agent did not create pwd verification file in expected location');
}
}, TEST_TIMEOUT);
it('creates diagnostic files with correct metadata', async () => {
const prompt = `Output the signal: {"done": true}`;
const agent = await agentManager.spawn({
taskId: null,
prompt,
mode: 'execute',
provider: 'claude',
});
// Wait a bit for spawn to complete
await new Promise(resolve => setTimeout(resolve, 2000));
const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');
const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');
// Verify files exist immediately after spawn
expect(existsSync(diagnosticFile), 'diagnostic file should be created after spawn').toBe(true);
expect(existsSync(expectedPwdFile), 'expected pwd file should be created').toBe(true);
// Verify diagnostic content
const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();
expect(diagnostic.agentId).toBe(agent.id);
expect(diagnostic.alias).toBe(agent.name);
expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
expect(diagnostic.provider).toBe('claude');
expect(diagnostic.cwdExistsAtSpawn).toBe(true);
expect(diagnostic.customCwdProvided).toBe(false);
expect(typeof diagnostic.timestamp).toBe('string');
expect(Array.isArray(diagnostic.args)).toBe(true);
expect(expectedPwd).toBe(expectedWorkdir);
});
});

View File

@@ -0,0 +1,232 @@
/**
* Integration test to reproduce and fix the crash marking race condition.
*
* This test simulates the exact scenario where agents complete successfully
* but get marked as crashed due to timing issues in the output handler.
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { writeFile, mkdir, rm } from 'node:fs/promises';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { randomBytes } from 'node:crypto';
import { OutputHandler } from '../../agent/output-handler.js';
import type { AgentRepository } from '../../db/repositories/agent-repository.js';
interface TestAgent {
id: string;
name: string;
status: 'idle' | 'running' | 'waiting_for_input' | 'stopped' | 'crashed';
mode: 'execute' | 'discuss' | 'plan' | 'detail' | 'refine';
taskId: string | null;
sessionId: string | null;
worktreeId: string;
createdAt: Date;
updatedAt: Date;
provider: string;
accountId: string | null;
pid: number | null;
outputFilePath: string | null;
result: string | null;
pendingQuestions: string | null;
initiativeId: string | null;
userDismissedAt: Date | null;
exitCode: number | null;
}
describe('Crash marking race condition', () => {
let outputHandler: OutputHandler;
let testAgent: TestAgent;
let testDir: string;
let mockRepo: AgentRepository;
// Track all repository calls
let updateCalls: Array<{ id: string; data: any }> = [];
let finalAgentStatus: string | null = null;
beforeEach(async () => {
updateCalls = [];
finalAgentStatus = null;
// Create test directory structure
testDir = join(tmpdir(), `crash-test-${randomBytes(8).toString('hex')}`);
const outputDir = join(testDir, '.cw/output');
await mkdir(outputDir, { recursive: true });
// Create test agent
testAgent = {
id: 'test-agent-id',
name: 'test-agent',
status: 'running',
mode: 'refine',
taskId: 'task-1',
sessionId: 'session-1',
worktreeId: 'worktree-1',
createdAt: new Date(),
updatedAt: new Date(),
provider: 'claude',
accountId: null,
pid: 12345,
outputFilePath: join(testDir, 'output.jsonl'),
result: null,
pendingQuestions: null,
initiativeId: 'init-1',
userDismissedAt: null,
exitCode: null
};
// Mock repository that tracks all update calls
mockRepo = {
async findById(id: string) {
return id === testAgent.id ? { ...testAgent } : null;
},
async update(id: string, data: any) {
updateCalls.push({ id, data });
if (data.status) {
finalAgentStatus = data.status;
testAgent.status = data.status;
}
return { ...testAgent, ...data };
},
async create() { throw new Error('Not implemented'); },
async findAll() { throw new Error('Not implemented'); },
async findByStatus() { throw new Error('Not implemented'); },
async findByTaskId() { throw new Error('Not implemented'); },
async findByName() { throw new Error('Not implemented'); },
async findBySessionId() { throw new Error('Not implemented'); },
async delete() { throw new Error('Not implemented'); }
};
outputHandler = new OutputHandler(mockRepo);
});
afterEach(async () => {
try {
await rm(testDir, { recursive: true });
} catch {
// Ignore cleanup errors
}
});
it('should NOT mark agent as crashed when signal.json indicates completion', async () => {
// SETUP: Create a valid completion signal that should prevent crash marking
const signalPath = join(testDir, '.cw/output/signal.json');
const signalContent = {
status: 'questions',
questions: [
{ id: 'q1', question: 'Test question?' }
]
};
await writeFile(signalPath, JSON.stringify(signalContent, null, 2));
// SETUP: Create empty output file to simulate "no new output detected" scenario
const outputFilePath = join(testDir, 'output.jsonl');
await writeFile(outputFilePath, ''); // Empty file simulates the race condition
// Mock active agent with output file path
const mockActive = {
outputFilePath,
streamSessionId: 'session-1'
};
// Mock getAgentWorkdir function — receives worktreeId, not agentId
const getAgentWorkdir = (worktreeId: string) => {
expect(worktreeId).toBe(testAgent.worktreeId);
return testDir;
};
// EXECUTE: Call handleCompletion which should trigger the race condition scenario
// This simulates: no stream text + no new file content + valid signal.json
await (outputHandler as any).handleCompletion(
testAgent.id,
mockActive,
getAgentWorkdir
);
// VERIFY: Agent should NOT be marked as crashed
console.log('Update calls:', updateCalls);
console.log('Final agent status:', finalAgentStatus);
expect(updateCalls.length).toBeGreaterThan(0);
expect(finalAgentStatus).not.toBe('crashed');
// Should be marked with the appropriate completion status
expect(['idle', 'waiting_for_input', 'stopped']).toContain(finalAgentStatus);
});
it('should mark agent as crashed when no completion signal exists', async () => {
// SETUP: No signal.json file exists - agent should be marked as crashed
const outputFilePath = join(testDir, 'output.jsonl');
await writeFile(outputFilePath, ''); // Empty file
const mockActive = {
outputFilePath,
streamSessionId: 'session-1'
};
const getAgentWorkdir = (agentId: string) => testDir;
// EXECUTE: This should mark agent as crashed since no completion signal exists
await (outputHandler as any).handleCompletion(
testAgent.id,
mockActive,
getAgentWorkdir
);
// VERIFY: Agent SHOULD be marked as crashed
expect(finalAgentStatus).toBe('crashed');
});
it('should handle the exact slim-wildebeest scenario', async () => {
// SETUP: Reproduce the exact conditions that slim-wildebeest had
const signalPath = join(testDir, '.cw/output/signal.json');
const exactSignalContent = {
"status": "questions",
"questions": [
{
"id": "q1",
"question": "What UI framework/styling system is the admin UI currently using that needs to be replaced?"
},
{
"id": "q2",
"question": "What specific problems with the current admin UI are we solving? (e.g., poor developer experience, design inconsistency, performance issues, lack of accessibility)"
}
]
};
await writeFile(signalPath, JSON.stringify(exactSignalContent, null, 2));
// Create SUMMARY.md like slim-wildebeest had
const summaryPath = join(testDir, '.cw/output/SUMMARY.md');
const summaryContent = `---
files_modified: []
---
Initiative page is essentially empty — lacks context, scope, goals, and technical approach. Requested clarification on current state, problems being solved, scope boundaries, and success criteria before proposing meaningful improvements.`;
await writeFile(summaryPath, summaryContent);
// Simulate the output file scenario
const outputFilePath = join(testDir, 'output.jsonl');
await writeFile(outputFilePath, 'some initial content\n'); // Some content but no new lines
const mockActive = {
outputFilePath,
streamSessionId: 'session-1'
};
const getAgentWorkdir = (agentId: string) => testDir;
// EXECUTE: This is the exact scenario that caused slim-wildebeest to be marked as crashed
await (outputHandler as any).handleCompletion(
testAgent.id,
mockActive,
getAgentWorkdir
);
// VERIFY: This should NOT be marked as crashed
console.log('slim-wildebeest scenario - Final status:', finalAgentStatus);
console.log('slim-wildebeest scenario - Update calls:', updateCalls);
expect(finalAgentStatus).not.toBe('crashed');
expect(['idle', 'waiting_for_input', 'stopped']).toContain(finalAgentStatus);
});
});

View File

@@ -0,0 +1,244 @@
/**
* Full-Flow Cassette Integration Test
*
* Cassette-backed variant of the full multi-agent workflow test.
* Runs the same discuss → plan → detail → execute pipeline but intercepts
* subprocess spawning with CassetteProcessManager — no real API calls in CI.
*
* Recording (one-time, costs ~$25):
* CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
* # Commit the generated src/test/cassettes/<hash>.json files afterward
*
* Replay (default — runs in seconds):
* npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts
*
* Force re-record (overwrites existing cassettes):
* CW_CASSETTE_FORCE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { existsSync, readdirSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { Phase, Task } from '../../../db/schema.js';
import type { AgentResult } from '../../../agent/types.js';
import { buildExecutePrompt } from '../../../agent/prompts/index.js';
import { CassetteStore } from '../../cassette/store.js';
import { CassetteProcessManager, type CassetteMode } from '../../cassette/process-manager.js';
import {
createFullFlowHarness,
type FullFlowHarness,
} from './harness.js';
import {
printHeader,
printDiscussResult,
printPlanResult,
printDetailResult,
printExecuteResult,
printFinalSummary,
type ExecutedTask,
} from './report.js';
// =============================================================================
// Constants
// =============================================================================
const RECORDING =
process.env.CW_CASSETTE_FORCE_RECORD === '1' || process.env.CW_CASSETTE_RECORD === '1';
/**
* Test timeout.
* - Replay: 5 min (cassettes complete in seconds; cap is generous headroom)
* - Record: 60 min (real agents doing discuss/plan/detail/execute take API time)
*/
const CASSETTE_FLOW_TIMEOUT = RECORDING ? 60 * 60_000 : 5 * 60_000;
const __dirname = dirname(fileURLToPath(import.meta.url));
const CASSETTE_DIR =
process.env.CW_CASSETTE_DIR ?? join(__dirname, '../../cassettes');
// =============================================================================
// Mode helper
// =============================================================================
function cassetteMode(): CassetteMode {
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
return 'replay';
}
/**
* True when cassettes are available (at least one .json file) OR we're in a
* recording run. Skips the suite if no cassettes have been recorded yet so
* that `npm test` doesn't fail on a fresh clone before cassettes are committed.
*/
function cassettesAvailable(): boolean {
const mode = cassetteMode();
if (mode !== 'replay') return true; // recording runs always proceed
if (!existsSync(CASSETTE_DIR)) return false;
return readdirSync(CASSETTE_DIR).some((f) => f.endsWith('.json'));
}
// =============================================================================
// Test
// =============================================================================
describe.skipIf(!cassettesAvailable())('full flow (cassette replay)', () => {
let harness: FullFlowHarness;
const startedAt = Date.now();
beforeAll(async () => {
const store = new CassetteStore(CASSETTE_DIR);
const mode = cassetteMode();
harness = await createFullFlowHarness('Add complete() method to TodoStore', {
processManagerFactory: (workspaceRoot, projectRepo) =>
new CassetteProcessManager(workspaceRoot, projectRepo, store, mode),
});
printHeader(harness.initiative.name);
console.log(` Cassette mode : ${mode}`);
console.log(` Cassette dir : ${CASSETTE_DIR}`);
console.log(` Initiative ID : ${harness.initiative.id}`);
console.log(` Workspace : ${harness.workspaceRoot}`);
}, CASSETTE_FLOW_TIMEOUT);
afterAll(async () => {
if (harness) await harness.cleanup();
});
it(
'runs the complete multi-agent workflow from cassettes',
async () => {
const { initiative, caller, agentManager, phaseRepository, taskRepository } = harness;
const initiativeId = initiative.id;
// ── Stage 2: Discuss ───────────────────────────────────────────────────
console.log('\n\n>>> Stage 2: DISCUSS <<<');
const discussAgent = await caller.spawnArchitectDiscuss({ initiativeId });
expect(discussAgent.id).toBeTruthy();
console.log(` Spawned discuss agent: ${discussAgent.name} (${discussAgent.id})`);
const discussResult = await harness.driveToCompletion(
discussAgent.id,
'Use your best judgment and keep it simple. The focus is implementing complete(id) on TodoStore.',
CASSETTE_FLOW_TIMEOUT,
);
printDiscussResult(discussAgent.id, discussResult);
if (!discussResult?.success) {
console.warn(' [WARN] discuss agent did not succeed; continuing to plan stage');
}
// ── Stage 3: Plan ──────────────────────────────────────────────────────
console.log('\n\n>>> Stage 3: PLAN <<<');
const planAgent = await caller.spawnArchitectPlan({ initiativeId });
expect(planAgent.id).toBeTruthy();
console.log(` Spawned plan agent: ${planAgent.name} (${planAgent.id})`);
const planResult = await harness.driveToCompletion(
planAgent.id,
'Keep it simple.',
CASSETTE_FLOW_TIMEOUT,
);
expect(planResult).toBeTruthy();
const phases: Phase[] = await phaseRepository.findByInitiativeId(initiativeId);
expect(phases.length).toBeGreaterThan(0);
printPlanResult(phases);
// ── Stage 4: Detail (per phase) ────────────────────────────────────────
console.log('\n\n>>> Stage 4: DETAIL <<<');
for (const phase of phases) {
const detailAgent = await caller.spawnArchitectDetail({ phaseId: phase.id });
expect(detailAgent.id).toBeTruthy();
console.log(` Spawned detail agent for phase "${phase.name}": ${detailAgent.name}`);
const detailResult = await harness.driveToCompletion(
detailAgent.id,
'Keep it simple.',
CASSETTE_FLOW_TIMEOUT,
);
expect(detailResult).toBeTruthy();
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
const executeTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
expect(executeTasks.length).toBeGreaterThan(0);
printDetailResult(phase, phaseTasks);
}
// ── Stage 5: Execute ───────────────────────────────────────────────────
console.log('\n\n>>> Stage 5: EXECUTE <<<');
const allTasks = await gatherAllExecuteTasks(taskRepository, phases);
console.log(` Found ${allTasks.length} execute task(s) across ${phases.length} phase(s)`);
const executed: ExecutedTask[] = [];
for (const task of allTasks) {
console.log(` Spawning execute agent for: "${task.name}"`);
const execAgent = await agentManager.spawn({
taskId: task.id,
prompt: buildExecutePrompt(task.description ?? task.name),
mode: 'execute',
initiativeId,
phaseId: task.phaseId ?? undefined,
inputContext: {
initiative,
task,
},
});
console.log(` Agent: ${execAgent.name} (${execAgent.id})`);
const result = await harness.driveToCompletion(
execAgent.id,
'Use your best judgment and keep it simple.',
CASSETTE_FLOW_TIMEOUT,
);
executed.push({ task, result });
const icon = result?.success ? '✓' : '✗';
console.log(` ${icon} Completed with success=${result?.success ?? null}`);
if (result && !result.success) {
console.log(` Message: ${result.message?.slice(0, 200)}`);
}
}
printExecuteResult(executed);
// ── Assertions ─────────────────────────────────────────────────────────
expect(executed.length).toBeGreaterThan(0);
const allSucceeded = executed.every((e) => e.result?.success === true);
if (!allSucceeded) {
const failed = executed.filter((e) => !e.result?.success);
console.warn(` [WARN] ${failed.length} execute task(s) did not succeed`);
}
// ── Final summary ──────────────────────────────────────────────────────
printFinalSummary(
initiative.name,
phases,
allTasks,
executed,
Date.now() - startedAt,
);
},
CASSETTE_FLOW_TIMEOUT,
);
});
// =============================================================================
// Helpers
// =============================================================================
async function gatherAllExecuteTasks(
taskRepository: FullFlowHarness['taskRepository'],
phases: Phase[],
): Promise<Task[]> {
const result: Task[] = [];
for (const phase of phases) {
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
const execTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
result.push(...execTasks);
}
return result;
}

View File

@@ -0,0 +1,399 @@
/**
* Full-Flow Test Harness
*
* Wires up the complete system with real agents for end-to-end multi-agent
* workflow testing: discuss → plan → detail → execute.
*
* Unlike the standard TestHarness (MockAgentManager) or RealProviderHarness
* (agents only), this harness adds:
* - All 11 repositories
* - tRPC caller for architect/agent procedures
* - A self-contained fixture git repo (todo-api) for agents to work on
* - Helpers for driving agents through question/answer loops
*
* Used by full-flow-cassette.test.ts (replay) and for manual recording runs.
*/
import { mkdtemp, rm, cp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { execSync } from 'node:child_process';
import type { DrizzleDatabase } from '../../../db/index.js';
import type { DomainEvent } from '../../../events/types.js';
import { EventEmitterBus } from '../../../events/bus.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import type { AgentResult, PendingQuestions } from '../../../agent/types.js';
import type { Initiative, Project, Phase, Task } from '../../../db/schema.js';
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
import type { PhaseRepository } from '../../../db/repositories/phase-repository.js';
import type { TaskRepository } from '../../../db/repositories/task-repository.js';
import type { MessageRepository } from '../../../db/repositories/message-repository.js';
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
import type { PageRepository } from '../../../db/repositories/page-repository.js';
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
import type { ChangeSetRepository } from '../../../db/repositories/change-set-repository.js';
import type { LogChunkRepository } from '../../../db/repositories/log-chunk-repository.js';
import type { ConversationRepository } from '../../../db/repositories/conversation-repository.js';
import type { ProcessManager } from '../../../agent/process-manager.js';
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
import { createRepositories } from '../../../container.js';
import { DefaultDispatchManager } from '../../../dispatch/manager.js';
import { appRouter, createCallerFactory } from '../../../trpc/router.js';
import { createContext } from '../../../trpc/context.js';
// =============================================================================
// CapturingEventBus
// =============================================================================
export class CapturingEventBus extends EventEmitterBus {
emittedEvents: DomainEvent[] = [];
emit<T extends DomainEvent>(event: T): void {
this.emittedEvents.push(event);
super.emit(event);
}
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return this.emittedEvents.filter((e) => e.type === type) as T[];
}
clearEvents(): void {
this.emittedEvents = [];
}
}
// =============================================================================
// Sleep helper
// =============================================================================
export function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// =============================================================================
// tRPC caller type
// =============================================================================
const createCaller = createCallerFactory(appRouter);
export type FullFlowCaller = ReturnType<typeof createCaller>;
// =============================================================================
// FullFlowHarness interface
// =============================================================================
/** Status of an agent that requires attention: done, waiting for answers, or crashed */
export type AgentAttentionStatus = 'done' | 'waiting' | 'crashed';
export interface FullFlowHarness {
/** Absolute path to the CW workspace (worktrees are created here) */
workspaceRoot: string;
/** Absolute path to the cloned todo-api fixture git repo */
fixtureRoot: string;
/** The registered todo-api project */
project: Project;
/** The initiative created for the test run */
initiative: Initiative;
/** tRPC caller (all procedures available) */
caller: FullFlowCaller;
/** Real MultiProviderAgentManager */
agentManager: MultiProviderAgentManager;
/** In-memory SQLite database */
db: DrizzleDatabase;
/** Event bus with capture capability */
eventBus: CapturingEventBus;
// All 11 repositories
initiativeRepository: InitiativeRepository;
phaseRepository: PhaseRepository;
taskRepository: TaskRepository;
messageRepository: MessageRepository;
agentRepository: AgentRepository;
pageRepository: PageRepository;
projectRepository: ProjectRepository;
accountRepository: AccountRepository;
changeSetRepository: ChangeSetRepository;
logChunkRepository: LogChunkRepository;
conversationRepository: ConversationRepository;
/**
* Wait for an agent to reach a terminal status (idle/stopped/crashed).
* Returns null if the agent enters waiting_for_input.
*/
waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
/**
* Poll until the agent needs attention: done (idle/stopped), waiting for input, or crashed.
* Useful for the question/answer loop in discuss mode.
*/
waitForAgentAttention(agentId: string, timeoutMs?: number): Promise<AgentAttentionStatus>;
/**
* Drive an agent to full completion, answering any questions along the way.
* Answers all questions with the provided answer string (or a default).
*/
driveToCompletion(
agentId: string,
answer?: string,
timeoutMs?: number,
): Promise<AgentResult | null>;
/**
* Get captured events filtered by type.
*/
getEventsByType<T extends DomainEvent>(type: T['type']): T[];
/**
* Kill all running agents and remove temp directories.
*/
cleanup(): Promise<void>;
}
// =============================================================================
// Poll interval
// =============================================================================
const POLL_INTERVAL_MS = 1500;
// =============================================================================
// Factory
// =============================================================================
const __dirname = dirname(fileURLToPath(import.meta.url));
const FIXTURES_DIR = join(__dirname, '../../fixtures/todo-api');
export interface FullFlowHarnessOptions {
/** Factory called after workspaceRoot + repos are created. Return a custom ProcessManager. */
processManagerFactory?: (workspaceRoot: string, projectRepo: ProjectRepository) => ProcessManager;
}
/**
* Create a full-flow test harness.
*
* Setup steps:
* 1. Copy todo-api fixture into a temp git repo (fixtureRoot).
* 2. Create workspace temp dir (workspaceRoot) for CW operations.
* 3. Init in-memory DB + all 11 repos.
* 4. Wire real MultiProviderAgentManager with all repos.
* 5. Wire DefaultDispatchManager for execute stage.
* 6. Create tRPC caller with full context.
* 7. Register project in DB directly (url = fixtureRoot).
* 8. Create initiative via tRPC (links project, creates root page).
*/
export async function createFullFlowHarness(
initiativeName = 'Add complete() method to TodoStore',
options?: FullFlowHarnessOptions,
): Promise<FullFlowHarness> {
// ── 0. Allow nested claude invocations ────────────────────────────────────
// Claude Code sets CLAUDECODE in the environment, which prevents nested
// claude CLI calls from starting ("cannot be launched inside another Claude
// Code session"). Save and remove it so spawned agents can run normally.
// It is restored in cleanup().
const savedClaudeCodeEnv = process.env.CLAUDECODE;
delete process.env.CLAUDECODE;
// ── 1. Fixture project ────────────────────────────────────────────────────
// IMPORTANT: cp(src, dest) puts src INSIDE dest when dest already exists
// (like `cp -r src dest/` → creates dest/src/). We need dest to NOT exist
// yet so that cp creates it as a copy of src directly.
const fixtureBase = await mkdtemp(join(tmpdir(), 'cw-fixture-'));
const fixtureRoot = join(fixtureBase, 'todo-api'); // does not exist yet
await cp(FIXTURES_DIR, fixtureRoot, { recursive: true });
// Verify files landed at the right level before git operations
execSync(`test -f "${join(fixtureRoot, 'package.json')}"`, { stdio: 'pipe' });
execSync('git init', { cwd: fixtureRoot, stdio: 'pipe' });
execSync('git config user.email "test@test.com"', { cwd: fixtureRoot, stdio: 'pipe' });
execSync('git config user.name "Test"', { cwd: fixtureRoot, stdio: 'pipe' });
execSync('git add .', { cwd: fixtureRoot, stdio: 'pipe' });
execSync('git commit -m "initial todo-api with missing complete()"', {
cwd: fixtureRoot,
stdio: 'pipe',
});
// ── 2. Workspace root ─────────────────────────────────────────────────────
// Just a plain temp directory — agent worktrees live under repos/ inside it.
// No git init needed; the PROJECT clone (repos/<name>-<id>/) is the git repo.
const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-workspace-'));
// ── 3. Database + repositories ────────────────────────────────────────────
const db = createTestDatabase();
const repos = createRepositories(db);
// ── 4. Event bus ──────────────────────────────────────────────────────────
const eventBus = new CapturingEventBus();
// ── 5. Real agent manager ─────────────────────────────────────────────────
const customProcessManager = options?.processManagerFactory?.(workspaceRoot, repos.projectRepository);
const agentManager = new MultiProviderAgentManager(
repos.agentRepository,
workspaceRoot,
repos.projectRepository,
repos.accountRepository,
eventBus,
undefined, // no credential manager needed for default claude account
repos.changeSetRepository,
repos.phaseRepository,
repos.taskRepository,
repos.pageRepository,
repos.logChunkRepository,
false, // debug
customProcessManager, // processManagerOverride
);
// ── 6. Dispatch manager (for execute stage) ───────────────────────────────
const dispatchManager = new DefaultDispatchManager(
repos.taskRepository,
repos.messageRepository,
agentManager,
eventBus,
repos.initiativeRepository,
repos.phaseRepository,
);
// ── 7. tRPC caller ────────────────────────────────────────────────────────
const ctx = createContext({
eventBus,
serverStartedAt: new Date(),
processCount: 0,
agentManager,
dispatchManager,
workspaceRoot,
...repos,
});
const caller = createCaller(ctx);
// ── 8. Register project directly in DB (bypass tRPC clone) ───────────────
const project = await repos.projectRepository.create({
name: 'todo-api',
url: fixtureRoot,
});
// ── 9. Create initiative via tRPC (creates root page automatically) ───────
const initiative = await caller.createInitiative({
name: initiativeName,
projectIds: [project.id],
});
// ── Helpers ───────────────────────────────────────────────────────────────
async function waitForAgentCompletion(
agentId: string,
timeoutMs = 120_000,
): Promise<AgentResult | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await repos.agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
return agentManager.getResult(agentId);
}
if (agent.status === 'waiting_for_input') return null;
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout: agent ${agentId} did not complete within ${timeoutMs}ms`);
}
async function waitForAgentAttention(
agentId: string,
timeoutMs = 120_000,
): Promise<AgentAttentionStatus> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await repos.agentRepository.findById(agentId);
if (!agent) return 'crashed';
if (agent.status === 'idle' || agent.status === 'stopped') return 'done';
if (agent.status === 'crashed') return 'crashed';
if (agent.status === 'waiting_for_input') return 'waiting';
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout: agent ${agentId} did not reach attention state within ${timeoutMs}ms`);
}
async function driveToCompletion(
agentId: string,
answer = 'Use your best judgment and keep it simple.',
timeoutMs = 10 * 60_000,
): Promise<AgentResult | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const remaining = deadline - Date.now();
if (remaining <= 0) break;
let status: AgentAttentionStatus;
try {
status = await waitForAgentAttention(agentId, Math.min(remaining, 3 * 60_000));
} catch {
// Agent is still running (hasn't reached an attention state within the polling
// window). This is normal for long-running execute agents. Continue the outer
// loop — the deadline check above will terminate us if we truly time out.
continue;
}
if (status === 'done' || status === 'crashed') {
return agentManager.getResult(agentId);
}
if (status === 'waiting') {
const pending = await agentManager.getPendingQuestions(agentId);
if (!pending || pending.questions.length === 0) {
// Shouldn't happen, but guard against it
await sleep(POLL_INTERVAL_MS);
continue;
}
const answers = Object.fromEntries(
pending.questions.map((q) => [q.id, answer]),
);
await agentManager.resume(agentId, answers);
}
}
throw new Error(`driveToCompletion: agent ${agentId} did not finish within ${timeoutMs}ms`);
}
// ── Build and return harness ───────────────────────────────────────────────
const harness: FullFlowHarness = {
workspaceRoot,
fixtureRoot,
project,
initiative,
caller,
agentManager,
db,
eventBus,
...repos,
waitForAgentCompletion,
waitForAgentAttention,
driveToCompletion,
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return eventBus.getEventsByType<T>(type);
},
async cleanup() {
// Kill any running agents
const agents = await repos.agentRepository.findAll();
await Promise.allSettled(
agents
.filter((a) => a.status === 'running')
.map((a) => agentManager.stop(a.id)),
);
// Restore CLAUDECODE env var
if (savedClaudeCodeEnv !== undefined) {
process.env.CLAUDECODE = savedClaudeCodeEnv;
}
// Remove temp directories (fixtureBase contains fixtureRoot)
await Promise.allSettled([
rm(fixtureBase, { recursive: true, force: true }),
rm(workspaceRoot, { recursive: true, force: true }),
]);
},
};
return harness;
}

View File

@@ -0,0 +1,156 @@
/**
* Full-Flow Test Report Utility
*
* Plain console.log formatters for human-readable output at each stage of the
* full-flow integration test. No external dependencies.
*/
import { execSync } from 'node:child_process';
import { join } from 'node:path';
import type { Phase, Task } from '../../../db/schema.js';
import type { AgentResult } from '../../../agent/types.js';
// =============================================================================
// Types
// =============================================================================
export interface ExecutedTask {
task: Task;
result: AgentResult | null;
}
// =============================================================================
// Helpers
// =============================================================================
const DIVIDER = '═'.repeat(60);
const THIN = '─'.repeat(60);
function section(title: string): void {
console.log(`\n${DIVIDER}`);
console.log(` ${title}`);
console.log(DIVIDER);
}
function line(msg: string): void {
console.log(` ${msg}`);
}
// =============================================================================
// Stage reporters
// =============================================================================
export function printHeader(initiativeName: string): void {
section(`FULL-FLOW TEST: ${initiativeName}`);
console.log(` Started at: ${new Date().toISOString()}`);
}
export function printDiscussResult(agentId: string, result: AgentResult | null): void {
console.log(`\n[DISCUSS]`);
console.log(THIN);
line(`Agent: ${agentId}`);
if (result) {
line(`Success: ${result.success}`);
if (result.message) line(`Message: ${result.message.slice(0, 200)}`);
} else {
line('Result: null (agent may have crashed)');
}
}
export function printPlanResult(phases: Phase[]): void {
console.log(`\n[PLAN] ${phases.length} phase(s) created`);
console.log(THIN);
phases.forEach((ph, i) => {
line(`${i + 1}. ${ph.name}`);
});
}
export function printDetailResult(phase: Phase, tasks: Task[]): void {
console.log(`\n[DETAIL] Phase "${phase.name}" → ${tasks.length} task(s)`);
console.log(THIN);
tasks.forEach((t, i) => {
const flags = [t.category, t.type, t.requiresApproval ? 'approval-required' : 'auto'].join(', ');
line(`${i + 1}. ${t.name} [${flags}]`);
if (t.description) {
line(` ${t.description.slice(0, 120)}`);
}
});
}
export function printExecuteResult(executed: ExecutedTask[]): void {
const succeeded = executed.filter((e) => e.result?.success).length;
console.log(`\n[EXECUTE] ${succeeded}/${executed.length} task(s) succeeded`);
console.log(THIN);
for (const { task, result } of executed) {
const icon = result?.success ? '✓' : '✗';
line(`${icon} ${task.name}`);
if (result && !result.success) {
line(` Error: ${result.message?.slice(0, 120)}`);
}
}
}
export function printGitDiff(workspaceRoot: string, projectName: string): void {
console.log('\n[GIT DIFF — agent worktrees]');
console.log(THIN);
// Find all agent worktrees for this project
const worktreesBase = join(workspaceRoot, 'agent-workdirs');
try {
const dirs = execSync(`ls "${worktreesBase}" 2>/dev/null || echo ""`, { encoding: 'utf8' })
.trim()
.split('\n')
.filter(Boolean);
for (const dir of dirs) {
const projectDir = join(worktreesBase, dir, projectName);
try {
const stat = execSync(`git -C "${projectDir}" diff HEAD~1 --stat 2>/dev/null || echo ""`, {
encoding: 'utf8',
}).trim();
if (stat) {
line(`Worktree: ${dir}/${projectName}`);
stat.split('\n').forEach((l) => line(` ${l}`));
}
} catch {
// Worktree might not have commits — skip silently
}
}
} catch {
line('(no agent worktrees found)');
}
}
export function printNpmTestResult(projectDir: string): void {
console.log('\n[NPM TEST]');
console.log(THIN);
try {
const output = execSync('node --test src/todo.test.js', {
cwd: projectDir,
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'pipe'],
});
line('Tests passed:');
output.split('\n').forEach((l) => line(` ${l}`));
} catch (err: unknown) {
const e = err as { stdout?: string; stderr?: string; status?: number };
line(`Tests FAILED (exit ${e.status ?? '?'})`);
if (e.stdout) e.stdout.split('\n').forEach((l) => line(` ${l}`));
if (e.stderr) e.stderr.split('\n').forEach((l) => line(` ${l}`));
}
}
export function printFinalSummary(
initiativeName: string,
phases: Phase[],
tasks: Task[],
executed: ExecutedTask[],
durationMs: number,
): void {
section(`SUMMARY: ${initiativeName}`);
line(`Duration : ${Math.round(durationMs / 1000)}s`);
line(`Phases : ${phases.length}`);
line(`Tasks : ${tasks.length}`);
line(`Executed : ${executed.filter((e) => e.result?.success).length}/${executed.length} succeeded`);
console.log(DIVIDER);
}

View File

@@ -0,0 +1,183 @@
/**
* Real Claude CLI Integration Tests
*
* IMPORTANT: These tests call the real Claude CLI and incur API costs.
* They are SKIPPED by default and should only be run manually for validation.
*
* To run these tests:
* ```bash
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-claude.test.ts --test-timeout=120000
* ```
*
* Purpose:
* - Validate that JSON schemas work correctly with Claude CLI --json-schema flag
* - Confirm MockAgentManager accurately simulates real CLI behavior
* - Document actual response structure and costs
*
* Updated (2026-02-06): Now uses the universal agentSignalSchema instead of
* per-mode schemas. Agents output trivial signals (done/questions/error) and
* write files instead of producing mode-specific JSON.
*
* Total validation cost: ~$0.10 (3 tests)
*/
import { describe, it, expect, beforeAll } from 'vitest';
import { execa } from 'execa';
import {
agentSignalJsonSchema,
agentSignalSchema,
} from '../../agent/schema.js';
/**
* Result structure from Claude CLI with --output-format json
*
* When --json-schema is used:
* - result: "" (empty string)
* - structured_output: { ... } (the validated JSON object)
*/
interface ClaudeCliResult {
type: 'result';
subtype: 'success' | 'error' | 'error_max_turns';
is_error: boolean;
session_id: string;
result: string;
structured_output?: unknown;
total_cost_usd?: number;
}
/**
* Helper to call Claude CLI directly with a prompt and JSON schema.
*
* @param prompt - The prompt to send to Claude
* @param jsonSchema - JSON schema to enforce structured output
* @param timeoutMs - Timeout in milliseconds (default 90s)
* @returns Parsed CLI result with structured_output
*/
async function callClaudeCli(
prompt: string,
jsonSchema: object,
timeoutMs = 90000
): Promise<{ cliResult: ClaudeCliResult; structuredOutput: unknown }> {
const startTime = Date.now();
const { stdout } = await execa(
'claude',
[
'-p',
prompt,
'--output-format',
'json',
'--json-schema',
JSON.stringify(jsonSchema),
],
{
timeout: timeoutMs,
}
);
const duration = Date.now() - startTime;
const cliResult: ClaudeCliResult = JSON.parse(stdout);
console.log(`\n Duration: ${(duration / 1000).toFixed(1)}s`);
console.log(` Cost: $${cliResult.total_cost_usd?.toFixed(4) ?? 'N/A'}`);
console.log(` Session ID: ${cliResult.session_id}`);
console.log(` Result field empty: ${cliResult.result === ''}`);
console.log(` Has structured_output: ${cliResult.structured_output !== undefined}`);
// When --json-schema is used, structured output is in structured_output field
// The result field is typically empty when using --json-schema
const structuredOutput = cliResult.structured_output ?? JSON.parse(cliResult.result);
return { cliResult, structuredOutput };
}
/**
* Check if real Claude tests should run.
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
*/
const shouldRunRealTests = process.env.REAL_CLAUDE_TESTS === '1';
/**
* Skip wrapper - tests are expensive and should run manually
*/
const describeReal = shouldRunRealTests ? describe : describe.skip;
// Individual test timeout - real API calls take 5-30 seconds
const TEST_TIMEOUT = 120000; // 2 minutes
describeReal('Real Claude CLI Integration', () => {
beforeAll(() => {
console.log('\n=== Running Real Claude CLI Tests ===');
console.log('These tests call the real Claude API and incur costs.\n');
});
describe('Universal Signal Schema', () => {
it(
'should return done status',
async () => {
const prompt = `Complete this simple task: Say "Hello, World!" as a test.
Output your response in the required JSON format with status "done".`;
const { cliResult, structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
// Verify the CLI response structure
expect(cliResult.subtype).toBe('success');
expect(cliResult.result).toBe(''); // Empty when using --json-schema
expect(cliResult.structured_output).toBeDefined();
// Validate against Zod schema
const parsed = agentSignalSchema.parse(structuredOutput);
expect(parsed.status).toBe('done');
},
TEST_TIMEOUT
);
it(
'should return questions status with array',
async () => {
const prompt = `You are working on a vague task: "Make it better"
You MUST ask clarifying questions before proceeding. You cannot complete this task without more information.
Output your response with status "questions" and include at least 2 questions with unique IDs.`;
const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
// Validate against Zod schema
const parsed = agentSignalSchema.parse(structuredOutput);
expect(parsed.status).toBe('questions');
if (parsed.status === 'questions') {
expect(Array.isArray(parsed.questions)).toBe(true);
expect(parsed.questions.length).toBeGreaterThanOrEqual(1);
expect(parsed.questions[0].id).toBeTruthy();
expect(parsed.questions[0].question).toBeTruthy();
}
},
TEST_TIMEOUT
);
it(
'should return error status',
async () => {
const prompt = `You have encountered an unrecoverable error. Output your response with status "error" and a descriptive error message.`;
const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
// Validate against Zod schema
const parsed = agentSignalSchema.parse(structuredOutput);
expect(parsed.status).toBe('error');
if (parsed.status === 'error') {
expect(parsed.error).toBeTruthy();
}
},
TEST_TIMEOUT
);
});
});

View File

@@ -0,0 +1,298 @@
/**
* Real Claude CLI Manager Integration Tests
*
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run these tests:
* ```bash
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/claude-manager.test.ts --test-timeout=300000
* ```
*
* Tests covered:
* - Output stream parsing (text_delta events)
* - Session ID extraction from init event
* - Result parsing and validation
* - Session resume with user answers
*
* Estimated cost: ~$0.10 per full run
*/
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
import {
createRealProviderHarness,
describeRealClaude,
REAL_TEST_TIMEOUT,
sleep,
type RealProviderHarness,
} from './harness.js';
import { MINIMAL_PROMPTS } from './prompts.js';
import type { AgentSpawnedEvent, AgentStoppedEvent, AgentOutputEvent } from '../../../events/types.js';
describeRealClaude('Real Claude Manager Integration', () => {
let harness: RealProviderHarness;
beforeAll(async () => {
console.log('\n=== Running Real Claude Manager Tests ===');
console.log('These tests call the real Claude API and incur costs.\n');
harness = await createRealProviderHarness({ provider: 'claude' });
});
afterAll(async () => {
await harness.cleanup();
});
beforeEach(() => {
harness.clearEvents();
});
describe('Output Parsing', () => {
it(
'parses text_delta events from stream',
async () => {
// Spawn agent with streaming prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.streaming,
mode: 'execute',
provider: 'claude',
});
expect(agent.id).toBeTruthy();
expect(agent.status).toBe('running');
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify we got output events
const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
console.log(' Output events:', outputEvents.length);
// Verify completion
expect(result).toBeTruthy();
console.log(' Result:', result?.message);
},
REAL_TEST_TIMEOUT
);
it(
'parses init event and extracts session ID',
async () => {
// Spawn agent with simple done prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.done,
mode: 'execute',
provider: 'claude',
});
// Wait for completion
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify session ID was extracted and persisted
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.sessionId).toBeTruthy();
expect(dbAgent?.sessionId).toMatch(/^[a-f0-9-]+$/);
console.log(' Session ID:', dbAgent?.sessionId);
},
REAL_TEST_TIMEOUT
);
it(
'parses result event with completion',
async () => {
// Spawn agent with simple done prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.done,
mode: 'execute',
provider: 'claude',
});
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify result was parsed
expect(result).toBeTruthy();
expect(result?.success).toBe(true);
expect(result?.message).toBeTruthy();
// Verify events
const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
expect(spawnedEvents.length).toBe(1);
expect(spawnedEvents[0].payload.agentId).toBe(agent.id);
expect(spawnedEvents[0].payload.provider).toBe('claude');
const stoppedEvents = harness.getEventsByType<AgentStoppedEvent>('agent:stopped');
expect(stoppedEvents.length).toBe(1);
expect(stoppedEvents[0].payload.agentId).toBe(agent.id);
expect(stoppedEvents[0].payload.reason).toBe('task_complete');
console.log(' Result message:', result?.message);
},
REAL_TEST_TIMEOUT
);
});
describe('Questions Flow', () => {
it(
'parses questions status and enters waiting_for_input',
async () => {
// Spawn agent with questions prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.questions,
mode: 'execute',
provider: 'claude',
});
// Wait for waiting_for_input status
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
// Verify questions were parsed
expect(questions).toBeTruthy();
expect(questions?.questions).toBeTruthy();
expect(questions?.questions.length).toBeGreaterThan(0);
expect(questions?.questions[0].id).toBeTruthy();
expect(questions?.questions[0].question).toBeTruthy();
// Verify agent status
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.status).toBe('waiting_for_input');
expect(dbAgent?.sessionId).toBeTruthy();
console.log(' Questions:', questions?.questions.length);
console.log(' First question:', questions?.questions[0].question);
},
REAL_TEST_TIMEOUT
);
});
describe('Session Resume', () => {
it(
'resumes session with user answers',
async () => {
// 1. Spawn agent that asks questions
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.questions,
mode: 'execute',
provider: 'claude',
});
// 2. Wait for waiting_for_input
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
expect(questions?.questions.length).toBeGreaterThan(0);
const sessionIdBeforeResume = (await harness.agentRepository.findById(agent.id))?.sessionId;
console.log(' Session ID before resume:', sessionIdBeforeResume);
console.log(' Questions received:', questions?.questions.map((q) => q.id).join(', '));
harness.clearEvents();
// 3. Resume with answer
const answers: Record<string, string> = {};
for (const q of questions?.questions ?? []) {
answers[q.id] = `Answer to ${q.id}`;
}
await harness.agentManager.resume(agent.id, answers);
// 4. Wait for completion or another waiting state
let attempts = 0;
let finalStatus = 'running';
while (attempts < 60) {
const agent2 = await harness.agentRepository.findById(agent.id);
if (agent2?.status !== 'running') {
finalStatus = agent2?.status ?? 'unknown';
break;
}
await sleep(1000);
attempts++;
}
// Verify the agent processed the resume (either completed or asked more questions)
const dbAgent = await harness.agentRepository.findById(agent.id);
console.log(' Final status:', dbAgent?.status);
// Agent should not still be running
expect(['idle', 'waiting_for_input', 'crashed']).toContain(dbAgent?.status);
// If idle, verify result
if (dbAgent?.status === 'idle') {
const result = await harness.agentManager.getResult(agent.id);
console.log(' Result:', result?.message);
expect(result).toBeTruthy();
}
},
REAL_TEST_TIMEOUT * 2 // Double timeout for two-step process
);
it(
'maintains session continuity across resume',
async () => {
// 1. Spawn agent that asks questions
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.questions,
mode: 'execute',
provider: 'claude',
});
// 2. Wait for waiting_for_input
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
expect(questions?.questions.length).toBeGreaterThan(0);
const sessionIdBefore = (await harness.agentRepository.findById(agent.id))?.sessionId;
expect(sessionIdBefore).toBeTruthy();
// 3. Resume with answer
const answers: Record<string, string> = {};
for (const q of questions?.questions ?? []) {
answers[q.id] = `Answer to ${q.id}`;
}
await harness.agentManager.resume(agent.id, answers);
// 4. Wait for completion
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify session ID exists (may be same or new depending on CLI behavior)
const sessionIdAfter = (await harness.agentRepository.findById(agent.id))?.sessionId;
expect(sessionIdAfter).toBeTruthy();
console.log(' Session ID before:', sessionIdBefore);
console.log(' Session ID after:', sessionIdAfter);
},
REAL_TEST_TIMEOUT * 2
);
});
describe('Error Handling', () => {
it(
'handles error status',
async () => {
// Spawn agent with error prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.error,
mode: 'execute',
provider: 'claude',
});
// Wait for completion (will be crashed)
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify error was handled
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.status).toBe('crashed');
expect(result?.success).toBe(false);
expect(result?.message).toContain('Test error');
console.log(' Error message:', result?.message);
},
REAL_TEST_TIMEOUT
);
});
});

View File

@@ -0,0 +1,172 @@
/**
* Real Codex CLI Manager Integration Tests
*
* IMPORTANT: These tests call the REAL Codex CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run these tests:
* ```bash
* REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts --test-timeout=300000
* ```
*
* Tests covered:
* - Codex spawn and thread_id extraction
* - Generic output parsing (non-schema)
* - Streaming output
*
* Estimated cost: ~$0.10 per full run
*
* Note: Codex uses different output format and session ID field (thread_id).
*/
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
import {
createRealProviderHarness,
describeRealCodex,
REAL_TEST_TIMEOUT,
type RealProviderHarness,
} from './harness.js';
import { CODEX_PROMPTS } from './prompts.js';
import type { AgentSpawnedEvent, AgentOutputEvent } from '../../../events/types.js';
describeRealCodex('Real Codex Manager Integration', () => {
let harness: RealProviderHarness;
beforeAll(async () => {
console.log('\n=== Running Real Codex Manager Tests ===');
console.log('These tests call the real Codex API and incur costs.\n');
harness = await createRealProviderHarness({ provider: 'codex' });
});
afterAll(async () => {
await harness.cleanup();
});
beforeEach(() => {
harness.clearEvents();
});
describe('Codex Spawn', () => {
it(
'spawns codex agent and extracts thread_id',
async () => {
// Spawn agent with simple task
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: CODEX_PROMPTS.done,
mode: 'execute',
provider: 'codex',
});
expect(agent.id).toBeTruthy();
expect(agent.provider).toBe('codex');
expect(agent.status).toBe('running');
// Verify spawned event
const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
expect(spawnedEvents.length).toBe(1);
expect(spawnedEvents[0].payload.provider).toBe('codex');
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify session ID (thread_id) was extracted
const dbAgent = await harness.agentRepository.findById(agent.id);
console.log(' Thread ID:', dbAgent?.sessionId);
console.log(' Status:', dbAgent?.status);
console.log(' Result:', result?.message);
// Codex should complete or crash
expect(['idle', 'crashed']).toContain(dbAgent?.status);
// If completed successfully, should have extracted thread_id
if (dbAgent?.status === 'idle' && dbAgent?.sessionId) {
expect(dbAgent.sessionId).toBeTruthy();
}
},
REAL_TEST_TIMEOUT
);
it(
'uses generic parser for output',
async () => {
// Spawn agent with streaming prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: CODEX_PROMPTS.streaming,
mode: 'execute',
provider: 'codex',
});
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify output events were captured
const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
console.log(' Output events:', outputEvents.length);
// For generic provider, result should be captured
const dbAgent = await harness.agentRepository.findById(agent.id);
console.log(' Status:', dbAgent?.status);
console.log(' Result:', result?.message?.substring(0, 100) + '...');
expect(['idle', 'crashed']).toContain(dbAgent?.status);
},
REAL_TEST_TIMEOUT
);
});
describe('Codex Provider Config', () => {
it(
'uses correct command and args for codex',
async () => {
// This is more of a config verification test
// The actual command execution is validated by the spawn test
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: 'Say hello',
mode: 'execute',
provider: 'codex',
});
// Verify agent was created with codex provider
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.provider).toBe('codex');
// Wait for completion (or timeout)
try {
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
} catch {
// Codex might fail if not installed, that's OK for config test
}
const finalAgent = await harness.agentRepository.findById(agent.id);
console.log(' Provider:', finalAgent?.provider);
console.log(' Status:', finalAgent?.status);
},
REAL_TEST_TIMEOUT
);
});
});
/**
* Codex-specific observations from testing:
*
* 1. Output Format:
* - Codex uses JSONL streaming with different event types
* - thread.started event contains thread_id
* - Output parsing is more generic (not JSON schema validated)
*
* 2. Command Structure:
* - codex exec --full-auto --json -p "prompt"
* - resume: codex exec resume <thread_id>
*
* 3. Session ID:
* - Called "thread_id" in Codex
* - Extracted from thread.started event
*
* 4. Resume:
* - Uses subcommand style: codex exec resume <thread_id>
* - Different from Claude's flag style: claude --resume <session_id>
*/

View File

@@ -0,0 +1,540 @@
/**
* Real Claude Inter-Agent Conversation Integration Tests
*
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run:
* ```bash
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
* ```
*
* Architecture:
* - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer)
* - In-memory ConversationRepository (no SQLite, no FK constraints)
* - Real agent harness for spawning two Claude sessions with actual coding tasks
* - Two sequential questions prove the listen→answer→re-listen cycle works
*
* Estimated cost: ~$0.30 per full run (two Claude sessions)
*/
import { it, expect, beforeAll, afterAll } from 'vitest';
import { createServer } from 'node:http';
import type { Server } from 'node:http';
import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import { nanoid } from 'nanoid';
import { fetchRequestHandler } from '@trpc/server/adapters/fetch';
import { router, publicProcedure } from '../../../trpc/trpc.js';
import { conversationProcedures } from '../../../trpc/routers/conversation.js';
import { EventEmitterBus } from '../../../events/bus.js';
import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js';
import type { Conversation } from '../../../db/schema.js';
import {
createRealProviderHarness,
describeRealClaude,
sleep,
type RealProviderHarness,
} from './harness.js';
const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation
// ---------------------------------------------------------------------------
// In-memory ConversationRepository — no SQLite, no FK constraints
// ---------------------------------------------------------------------------
class InMemoryConversationRepository implements ConversationRepository {
private store = new Map<string, Conversation>();
async create(data: CreateConversationData): Promise<Conversation> {
const now = new Date();
const conversation: Conversation = {
id: nanoid(),
fromAgentId: data.fromAgentId,
toAgentId: data.toAgentId,
initiativeId: data.initiativeId ?? null,
phaseId: data.phaseId ?? null,
taskId: data.taskId ?? null,
question: data.question,
answer: null,
status: 'pending',
createdAt: now,
updatedAt: now,
};
this.store.set(conversation.id, conversation);
return conversation;
}
async findById(id: string): Promise<Conversation | null> {
return this.store.get(id) ?? null;
}
async findPendingForAgent(toAgentId: string): Promise<Conversation[]> {
return [...this.store.values()]
.filter((c) => c.toAgentId === toAgentId && c.status === 'pending')
.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
}
async answer(id: string, answer: string): Promise<Conversation | null> {
const conv = this.store.get(id);
if (!conv) return null;
const updated: Conversation = {
...conv,
answer,
status: 'answered' as const,
updatedAt: new Date(),
};
this.store.set(id, updated);
return updated;
}
/** Test helper — return all conversations */
getAll(): Conversation[] {
return [...this.store.values()];
}
}
// ---------------------------------------------------------------------------
// Mock conversation server — serves ONLY conversation tRPC procedures
// ---------------------------------------------------------------------------
async function startMockConversationServer(): Promise<{
server: Server;
port: number;
repo: InMemoryConversationRepository;
}> {
const repo = new InMemoryConversationRepository();
const eventBus = new EventEmitterBus();
// Mini router with only conversation procedures
const miniRouter = router({
...conversationProcedures(publicProcedure),
});
const httpServer = createServer(async (req, res) => {
if (!req.url?.startsWith('/trpc')) {
res.writeHead(404);
res.end('Not found');
return;
}
const host = req.headers.host ?? 'localhost';
const url = new URL(req.url, `http://${host}`);
let body: string | undefined;
if (req.method !== 'GET' && req.method !== 'HEAD') {
body = await new Promise<string>((resolve) => {
let data = '';
req.on('data', (chunk: Buffer) => {
data += chunk.toString();
});
req.on('end', () => resolve(data));
});
}
const headers = new Headers();
for (const [key, value] of Object.entries(req.headers)) {
if (value) {
if (Array.isArray(value)) {
value.forEach((v) => headers.append(key, v));
} else {
headers.set(key, value);
}
}
}
const fetchRequest = new Request(url.toString(), {
method: req.method,
headers,
body: body ?? undefined,
});
const fetchResponse = await fetchRequestHandler({
endpoint: '/trpc',
req: fetchRequest,
router: miniRouter,
createContext: () =>
({
eventBus,
serverStartedAt: new Date(),
processCount: 0,
conversationRepository: repo,
// Stub — requireAgentManager is called unconditionally in createConversation,
// but list() is only invoked for taskId/phaseId resolution. With --agent-id
// targeting, list() is never called.
agentManager: { list: async () => [] },
}) as any,
});
res.statusCode = fetchResponse.status;
fetchResponse.headers.forEach((value, key) => {
res.setHeader(key, value);
});
if (fetchResponse.body) {
const reader = fetchResponse.body.getReader();
const pump = async () => {
while (true) {
const { done, value } = await reader.read();
if (done) {
res.end();
return;
}
res.write(value);
}
};
pump().catch(() => res.end());
} else {
res.end(await fetchResponse.text());
}
});
const port = 40000 + Math.floor(Math.random() * 10000);
await new Promise<void>((resolve) => {
httpServer.listen(port, '127.0.0.1', () => resolve());
});
return { server: httpServer, port, repo };
}
// ---------------------------------------------------------------------------
// Diagnostic helpers
// ---------------------------------------------------------------------------
function dumpAgentLogs(workspaceRoot: string, agentName: string) {
const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName);
if (!existsSync(logDir)) {
console.log(` [${agentName}] No log directory at ${logDir}`);
return;
}
// Dump output.jsonl (last 30 lines)
const outputPath = join(logDir, 'output.jsonl');
if (existsSync(outputPath)) {
const lines = readFileSync(outputPath, 'utf-8').trim().split('\n');
const last = lines.slice(-30);
console.log(` [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`);
for (const line of last) {
try {
const ev = JSON.parse(line);
if (ev.type === 'assistant' && ev.message?.content) {
for (const block of ev.message.content) {
if (block.type === 'text') {
console.log(` TEXT: ${block.text.substring(0, 200)}`);
} else if (block.type === 'tool_use') {
console.log(` TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`);
}
}
} else if (ev.type === 'result') {
console.log(` RESULT: ${JSON.stringify(ev).substring(0, 300)}`);
}
} catch {
console.log(` RAW: ${line.substring(0, 200)}`);
}
}
}
// Dump stderr
const stderrPath = join(logDir, 'stderr.log');
if (existsSync(stderrPath)) {
const stderr = readFileSync(stderrPath, 'utf-8').trim();
if (stderr) {
console.log(` [${agentName}] stderr: ${stderr.substring(0, 500)}`);
}
}
}
// ---------------------------------------------------------------------------
// Test suite
// ---------------------------------------------------------------------------
describeRealClaude('Real Inter-Agent Conversation (mock server)', () => {
let harness: RealProviderHarness;
let mockServer: Server;
let mockPort: number;
let mockRepo: InMemoryConversationRepository;
const originalCwPort = process.env.CW_PORT;
beforeAll(async () => {
console.log('\n=== Real Inter-Agent Conversation Test ===');
console.log('Mock conversation server + two Claude sessions.\n');
// Start mock conversation server (only listen/ask/answer endpoints)
const mock = await startMockConversationServer();
mockServer = mock.server;
mockPort = mock.port;
mockRepo = mock.repo;
console.log(` Mock server on port ${mockPort}`);
// Set CW_PORT so agents' cw commands hit the mock server
process.env.CW_PORT = String(mockPort);
// Real agent harness for spawning + worktrees (no full CoordinationServer)
harness = await createRealProviderHarness({ provider: 'claude' });
console.log(` Workspace: ${harness.workspaceRoot}`);
});
afterAll(async () => {
if (originalCwPort) {
process.env.CW_PORT = originalCwPort;
} else {
delete process.env.CW_PORT;
}
await harness?.cleanup();
mockServer?.close();
});
it(
'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)',
async () => {
const agentSuffix = nanoid(6); // unique suffix for temp files
// ---------------------------------------------------------------
// Agent A — builds a validator module WHILE answering questions
// in the background via cw listen
// ---------------------------------------------------------------
const agentA = await harness.agentManager.spawn({
taskId: null,
prompt: `You are Agent A in a multi-agent coordination test.
You have TWO concurrent responsibilities:
1. Build a TypeScript validator module (your main coding task)
2. Answer questions from other agents via a background listener
SETUP (do this first):
- Read .cw/input/manifest.json to get your agentId
- Start a background listener that writes to a temp file:
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
LISTEN_PID=$!
MAIN CODING TASK — implement a user registration validator:
1. Create types.ts:
export interface RegistrationInput { name: string; email: string; password: string; }
export interface ValidationResult { valid: boolean; errors: string[]; }
2. Create validator.ts:
Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult
Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars.
3. Create index.ts that re-exports everything from types.ts and validator.ts.
BETWEEN EACH FILE, check for incoming questions:
if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then
# parse the JSON, get conversationId and question
# answer: cw answer "<answer based on your code>" --conversation-id <id>
# clear and restart listener:
> /tmp/cw-listen-${agentSuffix}.txt
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
LISTEN_PID=$!
fi
You will receive TWO questions total while you work. Answer them based on the code you are writing.
CLEANUP: After all 3 files are written and both questions answered:
- kill $LISTEN_PID 2>/dev/null
- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"}
CRITICAL:
- The listener MUST run in the background while you write code.
- Check for questions between files, not as blocking waits.
- The CW_PORT environment variable is already set to ${mockPort}.`,
mode: 'execute',
provider: 'claude',
inputContext: {},
});
console.log(` Agent A: ${agentA.id} (${agentA.name})`);
// Give Agent A time to start its background listener and begin coding
await sleep(15000);
// ---------------------------------------------------------------
// Agent B — builds a client module, asks Agent A questions to
// learn the validation rules, then uses answers in its code
// ---------------------------------------------------------------
const agentB = await harness.agentManager.spawn({
taskId: null,
prompt: `You are Agent B in a multi-agent coordination test.
Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module.
YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules:
1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>.
Leave a TODO comment where validation will go.
2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks:
FIELDS=$(cw ask "What are the required fields and their types for registration?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
3. Ask Agent A about the specific email validation rules:
EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
4. Create validated-client.ts — a COMPLETE implementation using the answers:
Import the scaffold, add a validateBeforeSubmit(name, email, password) function
that implements the EXACT validation rules Agent A told you about.
Include a comment at the top with the rules you received.
5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"}
CRITICAL:
- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first).
- Use the ACTUAL answers from Agent A in your validated-client.ts implementation.
- The CW_PORT environment variable is already set to ${mockPort}.`,
mode: 'execute',
provider: 'claude',
inputContext: {},
});
console.log(` Agent B: ${agentB.id} (${agentB.name})`);
// ---------------------------------------------------------------
// Wait for both agents to stop running, then verify conversations
// ---------------------------------------------------------------
const deadline = Date.now() + TEST_TIMEOUT;
let aDone = false;
let bDone = false;
let lastLogTime = 0;
while (Date.now() < deadline && (!aDone || !bDone)) {
const agentAInfo = await harness.agentRepository.findById(agentA.id);
const agentBInfo = await harness.agentRepository.findById(agentB.id);
// Periodic progress logging every 30s
if (Date.now() - lastLogTime > 30000) {
const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000);
console.log(` [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`);
lastLogTime = Date.now();
}
if (agentAInfo && agentAInfo.status !== 'running' && !aDone) {
aDone = true;
console.log(` Agent A final status: ${agentAInfo.status}`);
dumpAgentLogs(harness.workspaceRoot, agentA.name);
}
if (agentBInfo && agentBInfo.status !== 'running' && !bDone) {
bDone = true;
console.log(` Agent B final status: ${agentBInfo.status}`);
dumpAgentLogs(harness.workspaceRoot, agentB.name);
}
if (!aDone || !bDone) await sleep(2000);
}
expect(aDone).toBe(true);
expect(bDone).toBe(true);
// ---------------------------------------------------------------
// Verify conversations in mock repo
// ---------------------------------------------------------------
const allConversations = mockRepo.getAll();
console.log(` Total conversations: ${allConversations.length}`);
for (const c of allConversations) {
console.log(
` ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`,
);
}
// Exactly 2 conversations, both answered
expect(allConversations.length).toBe(2);
expect(allConversations.every((c) => c.status === 'answered')).toBe(true);
// Both target Agent A, both from Agent B
expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true);
expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true);
// Questions should be distinct (one about fields, one about email validation)
const questions = allConversations.map((c) => c.question);
expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true);
expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true);
// Both answers should be non-empty
expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true);
// ---------------------------------------------------------------
// Verify Agent A's coding output — validator module files exist
// ---------------------------------------------------------------
const aWorkdir = join(
harness.workspaceRoot,
'agent-workdirs',
agentA.name,
'workspace',
);
const aFiles = ['types.ts', 'validator.ts', 'index.ts'];
for (const f of aFiles) {
const filePath = join(aWorkdir, f);
const exists = existsSync(filePath);
console.log(` Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
expect(exists).toBe(true);
}
// validator.ts should contain actual validation logic
const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8');
console.log(` Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`);
expect(validatorContent.toLowerCase()).toContain('email');
expect(validatorContent.toLowerCase()).toContain('password');
// ---------------------------------------------------------------
// Verify Agent B's coding output — client module files exist
// ---------------------------------------------------------------
const bWorkdir = join(
harness.workspaceRoot,
'agent-workdirs',
agentB.name,
'workspace',
);
const bFiles = ['client-scaffold.ts', 'validated-client.ts'];
for (const f of bFiles) {
const filePath = join(bWorkdir, f);
const exists = existsSync(filePath);
console.log(` Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
expect(exists).toBe(true);
}
// validated-client.ts should reference validation rules from Agent A's answers
const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8');
console.log(` Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`);
expect(clientContent.toLowerCase()).toContain('email');
// ---------------------------------------------------------------
// Verify interleaving: Agent A's JSONL log has coding tool calls
// (Write for .ts files) interleaved with conversation tool calls
// (Bash for cw listen/answer)
// ---------------------------------------------------------------
const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl');
const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n');
const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = [];
for (const line of aLog) {
try {
const ev = JSON.parse(line);
if (ev.type !== 'assistant' || !ev.message?.content) continue;
for (const block of ev.message.content) {
if (block.type !== 'tool_use') continue;
const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input);
if (block.name === 'Write' && input.includes('.ts')) {
toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) });
} else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) {
toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) });
}
}
} catch { /* skip non-JSON lines */ }
}
console.log(` Agent A interleaving (${toolCalls.length} relevant tool calls):`);
for (const tc of toolCalls) {
console.log(` [${tc.type}] ${tc.name}: ${tc.detail}`);
}
// Must have both code and conversation tool calls
const hasCode = toolCalls.some((tc) => tc.type === 'code');
const hasConversation = toolCalls.some((tc) => tc.type === 'conversation');
expect(hasCode).toBe(true);
expect(hasConversation).toBe(true);
// Verify interleaving: at least one code call must appear AFTER a conversation call
// (proving coding continued after handling a question)
const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation');
const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code');
console.log(` First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`);
expect(lastCodeIdx).toBeGreaterThan(firstConvIdx);
},
TEST_TIMEOUT,
);
});

View File

@@ -0,0 +1,265 @@
/**
* Crash Recovery Integration Tests
*
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run these tests:
* ```bash
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/crash-recovery.test.ts --test-timeout=300000
* ```
*
* Tests covered:
* - Server restart while agent is running
* - Resuming streaming after restart
* - Marking dead agents as crashed
* - Output file processing after restart
*
* Estimated cost: ~$0.08 per full run
*/
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
import {
createRealProviderHarness,
describeRealClaude,
REAL_TEST_TIMEOUT,
EXTENDED_TEST_TIMEOUT,
sleep,
type RealProviderHarness,
} from './harness.js';
import { MINIMAL_PROMPTS } from './prompts.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
describeRealClaude('Crash Recovery', () => {
let harness: RealProviderHarness;
beforeAll(async () => {
console.log('\n=== Running Crash Recovery Tests ===');
console.log('These tests call the real Claude API and incur costs.\n');
harness = await createRealProviderHarness({ provider: 'claude' });
});
afterAll(async () => {
await harness.cleanup();
});
beforeEach(() => {
harness.clearEvents();
});
describe('Server Restart Simulation', () => {
it(
'resumes streaming for still-running agent after restart',
async () => {
// 1. Spawn agent with slow task
console.log(' 1. Spawning agent with slow task...');
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.slow,
mode: 'execute',
provider: 'claude',
});
// 2. Wait for agent to be running
await harness.waitForAgentStatus(agent.id, 'running', 10000);
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.pid).toBeTruthy();
expect(dbAgent?.outputFilePath).toBeTruthy();
console.log(' 2. Agent running with PID:', dbAgent?.pid);
// 3. Give the agent a moment to start writing output
await sleep(2000);
// 4. Simulate server crash - create NEW manager (old state lost)
console.log(' 3. Simulating server restart with new manager...');
harness.clearEvents(); // Clear events from old manager
const newManager = new MultiProviderAgentManager(
harness.agentRepository,
harness.workspaceRoot,
harness.projectRepository,
harness.accountRepository,
harness.eventBus
);
// 5. Reconcile - should pick up running agent
console.log(' 4. Reconciling agent state...');
await newManager.reconcileAfterRestart();
// 6. Wait for completion via new manager
console.log(' 5. Waiting for completion via new manager...');
let attempts = 0;
let finalStatus = 'running';
while (attempts < 60) {
const refreshed = await harness.agentRepository.findById(agent.id);
if (refreshed?.status !== 'running') {
finalStatus = refreshed?.status ?? 'unknown';
break;
}
await sleep(2000);
attempts++;
}
const finalAgent = await harness.agentRepository.findById(agent.id);
console.log(' 6. Final status:', finalAgent?.status);
// Either completed successfully or crashed (both are valid outcomes)
expect(['idle', 'crashed', 'stopped']).toContain(finalAgent?.status);
if (finalAgent?.status === 'idle') {
const result = await newManager.getResult(agent.id);
console.log(' Result:', result?.message);
}
},
EXTENDED_TEST_TIMEOUT
);
it(
'marks dead agent as crashed during reconcile',
async () => {
// 1. Create a fake agent record with a dead PID
console.log(' 1. Creating fake agent with dead PID...');
const fakeAgent = await harness.agentRepository.create({
name: 'dead-agent-test',
taskId: null,
initiativeId: null,
sessionId: null,
worktreeId: 'dead-worktree',
status: 'running',
mode: 'execute',
provider: 'claude',
accountId: null,
});
// Set a PID that's definitely dead (high number that won't exist)
await harness.agentRepository.update(fakeAgent.id, { pid: 999999, outputFilePath: '/nonexistent/path' });
// Verify it's marked as running
let agent = await harness.agentRepository.findById(fakeAgent.id);
expect(agent?.status).toBe('running');
expect(agent?.pid).toBe(999999);
// 2. Create new manager and reconcile
console.log(' 2. Creating new manager and reconciling...');
const newManager = new MultiProviderAgentManager(
harness.agentRepository,
harness.workspaceRoot,
harness.projectRepository,
harness.accountRepository,
harness.eventBus
);
await newManager.reconcileAfterRestart();
// 3. Verify agent is now crashed
agent = await harness.agentRepository.findById(fakeAgent.id);
expect(agent?.status).toBe('crashed');
console.log(' 3. Agent marked as crashed (dead PID detected)');
},
REAL_TEST_TIMEOUT
);
it(
'processes output file for dead agent during reconcile',
async () => {
// 1. Spawn agent and wait for completion
console.log(' 1. Spawning agent to completion...');
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.done,
mode: 'execute',
provider: 'claude',
});
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
const dbAgent = await harness.agentRepository.findById(agent.id);
const outputFilePath = dbAgent?.outputFilePath;
expect(outputFilePath).toBeTruthy();
console.log(' 2. Output file:', outputFilePath);
// 2. Reset agent to "running" to simulate mid-crash state
await harness.agentRepository.update(agent.id, { status: 'running' });
// Clear result so reconcile has to re-process
await harness.agentRepository.update(agent.id, { result: null });
// Verify reset
let resetAgent = await harness.agentRepository.findById(agent.id);
expect(resetAgent?.status).toBe('running');
// 3. Create new manager and reconcile
console.log(' 3. Creating new manager and reconciling...');
harness.clearEvents();
const newManager = new MultiProviderAgentManager(
harness.agentRepository,
harness.workspaceRoot,
harness.projectRepository,
harness.accountRepository,
harness.eventBus
);
await newManager.reconcileAfterRestart();
// Give it a moment to process the file
await sleep(1000);
// 4. Verify agent was processed from output file
const finalAgent = await harness.agentRepository.findById(agent.id);
console.log(' 4. Final status:', finalAgent?.status);
// Should either be idle (processed successfully) or crashed (couldn't process)
expect(['idle', 'crashed']).toContain(finalAgent?.status);
},
REAL_TEST_TIMEOUT
);
});
describe('Event Consistency', () => {
it(
'does not duplicate events on restart',
async () => {
// 1. Spawn agent with slow task
console.log(' 1. Spawning agent...');
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.streaming,
mode: 'execute',
provider: 'claude',
});
// 2. Wait for some output events
await sleep(3000);
const initialOutputCount = harness.getEventsByType('agent:output').length;
console.log(' 2. Initial output events:', initialOutputCount);
// 3. Wait for completion
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
const finalOutputCount = harness.getEventsByType('agent:output').length;
console.log(' 3. Final output events:', finalOutputCount);
// 4. Create new manager and reconcile (agent already complete)
harness.clearEvents();
const newManager = new MultiProviderAgentManager(
harness.agentRepository,
harness.workspaceRoot,
harness.projectRepository,
harness.accountRepository,
harness.eventBus
);
await newManager.reconcileAfterRestart();
await sleep(1000);
// 5. Verify no new output events (agent was already complete)
const postReconcileOutputCount = harness.getEventsByType('agent:output').length;
console.log(' 4. Post-reconcile output events:', postReconcileOutputCount);
// Should not have re-emitted all the old output events
expect(postReconcileOutputCount).toBe(0);
},
REAL_TEST_TIMEOUT
);
});
});

View File

@@ -0,0 +1,378 @@
/**
* Real Provider Test Harness
*
* Extends the existing test infrastructure to use REAL MultiProviderAgentManager
* for integration testing with actual CLI providers like Claude and Codex.
*
* Unlike the standard TestHarness which uses MockAgentManager, this harness:
* - Uses real CLI spawning (costs real API credits!)
* - Provides poll-based waiting helpers
* - Captures events for inspection
* - Manages temp directories for worktrees
*/
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe } from 'vitest';
import type { DrizzleDatabase } from '../../../db/index.js';
import type { DomainEvent, EventBus } from '../../../events/types.js';
import { EventEmitterBus } from '../../../events/bus.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleInitiativeRepository,
} from '../../../db/repositories/drizzle/index.js';
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
/**
* Sleep helper for polling loops.
*/
export function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Event bus that captures all emitted events for inspection.
*/
export class CapturingEventBus extends EventEmitterBus {
emittedEvents: DomainEvent[] = [];
emit<T extends DomainEvent>(event: T): void {
this.emittedEvents.push(event);
super.emit(event);
}
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return this.emittedEvents.filter((e) => e.type === type) as T[];
}
clearEvents(): void {
this.emittedEvents = [];
}
}
/**
* Options for creating a real provider test harness.
*/
export interface RealProviderHarnessOptions {
/** Which provider to test (default: 'claude') */
provider?: 'claude' | 'codex';
/** Optional workspace root (temp dir created if omitted) */
workspaceRoot?: string;
}
/**
* Real Provider Test Harness interface.
*
* Provides everything needed to test against real CLI providers:
* - In-memory database with real repositories
* - Real MultiProviderAgentManager (spawns actual CLI processes)
* - Event capture for verification
* - Polling-based wait helpers
*/
export interface RealProviderHarness {
/** In-memory SQLite database */
db: DrizzleDatabase;
/** Event bus with capture capability */
eventBus: CapturingEventBus;
/** Real agent manager (not mock!) */
agentManager: MultiProviderAgentManager;
/** Workspace root directory */
workspaceRoot: string;
/** Agent repository */
agentRepository: AgentRepository;
/** Project repository */
projectRepository: ProjectRepository;
/** Account repository */
accountRepository: AccountRepository;
/** Initiative repository */
initiativeRepository: InitiativeRepository;
/**
* Wait for an agent to reach idle or crashed status.
* Polls the database at regular intervals.
*
* @param agentId - The agent ID to wait for
* @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
* @returns The agent result if completed, or null if crashed/timeout
*/
waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
/**
* Wait for an agent to enter waiting_for_input status.
* Polls the database at regular intervals.
*
* @param agentId - The agent ID to wait for
* @param timeoutMs - Maximum time to wait (default 120000ms)
* @returns The pending questions if waiting, or null if timeout/other status
*/
waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;
/**
* Wait for an agent to reach a specific status.
*
* @param agentId - The agent ID to wait for
* @param status - The target status
* @param timeoutMs - Maximum time to wait (default 120000ms)
*/
waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;
/**
* Get captured events filtered by type.
*/
getEventsByType<T extends DomainEvent>(type: T['type']): T[];
/**
* Clear all captured events.
*/
clearEvents(): void;
/**
* Kill all running agents (for cleanup).
*/
killAllAgents(): Promise<void>;
/**
* Clean up all resources (directories, processes).
* Call this in afterAll/afterEach.
*/
cleanup(): Promise<void>;
}
/** Default poll interval for status checks */
const POLL_INTERVAL_MS = 1000;
/**
* Create a test harness for real provider integration tests.
*
* This creates:
* - In-memory SQLite database
* - Temp directory for worktrees (or uses provided workspace)
* - Real MultiProviderAgentManager
* - Event capture bus
*
* @example
* ```typescript
* let harness: RealProviderHarness;
*
* beforeAll(async () => {
* harness = await createRealProviderHarness({ provider: 'claude' });
* });
*
* afterAll(async () => {
* await harness.cleanup();
* });
*
* it('spawns and completes', async () => {
* const agent = await harness.agentManager.spawn({...});
* const result = await harness.waitForAgentCompletion(agent.id);
* expect(result?.success).toBe(true);
* });
* ```
*/
export async function createRealProviderHarness(
options: RealProviderHarnessOptions = {}
): Promise<RealProviderHarness> {
// Create workspace directory (temp if not provided)
const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up
// Initialize git repo in temp workspace (required for worktree operations)
if (ownedWorkspace) {
const { execSync } = await import('node:child_process');
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
// Create initial commit (worktrees require at least one commit)
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
}
// Create in-memory database
const db = createTestDatabase();
// Create repositories
const agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
const initiativeRepository = new DrizzleInitiativeRepository(db);
// Create event bus with capture (parent class already sets maxListeners to 100)
const eventBus = new CapturingEventBus();
// Create REAL agent manager (not mock!)
const agentManager = new MultiProviderAgentManager(
agentRepository,
workspaceRoot,
projectRepository,
accountRepository,
eventBus
);
// Build harness
const harness: RealProviderHarness = {
db,
eventBus,
agentManager,
workspaceRoot,
agentRepository,
projectRepository,
accountRepository,
initiativeRepository,
async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'idle' || agent.status === 'stopped') {
// Agent completed - get result
return agentManager.getResult(agentId);
}
if (agent.status === 'crashed') {
// Agent crashed - return the error result
return agentManager.getResult(agentId);
}
if (agent.status === 'waiting_for_input') {
// Agent is waiting - return null (not completed)
return null;
}
// Still running - wait and check again
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
},
async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'waiting_for_input') {
return agentManager.getPendingQuestions(agentId);
}
if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
// Agent finished without asking questions
return null;
}
// Still running - wait and check again
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
},
async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) {
throw new Error(`Agent ${agentId} not found`);
}
if (agent.status === status) {
return;
}
// Check for terminal states that mean we'll never reach target
if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
}
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
},
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return eventBus.getEventsByType<T>(type);
},
clearEvents(): void {
eventBus.clearEvents();
},
async killAllAgents(): Promise<void> {
const agents = await agentRepository.findAll();
for (const agent of agents) {
if (agent.status === 'running') {
try {
await agentManager.stop(agent.id);
} catch {
// Ignore errors during cleanup
}
}
}
},
async cleanup(): Promise<void> {
// Kill any running agents
await harness.killAllAgents();
// Clean up workspace directory if we created it
if (ownedWorkspace) {
try {
await rm(workspaceRoot, { recursive: true, force: true });
} catch {
// Ignore cleanup errors
}
}
},
};
return harness;
}
/**
* Check if real Claude tests should run.
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
*/
export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';
/**
* Check if real Codex tests should run.
* Set REAL_CODEX_TESTS=1 environment variable to enable.
*/
export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';
/**
* Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
*/
export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);
/**
* Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
*/
export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);
/**
* Default test timeout for real CLI tests (2 minutes).
* Real API calls take 5-30 seconds typically.
*/
export const REAL_TEST_TIMEOUT = 120000;
/**
* Extended test timeout for slow tests (5 minutes).
* Used for schema retry tests and crash recovery tests.
*/
export const EXTENDED_TEST_TIMEOUT = 300000;

View File

@@ -0,0 +1,56 @@
/**
* Real Provider Integration Tests
*
* This module provides infrastructure for testing against real CLI providers.
* Tests are expensive (real API calls) and skipped by default.
*
* ## Running Tests
*
* ```bash
* # Claude tests only
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/ --test-timeout=300000
*
* # Codex tests only
* REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts
*
* # All real provider tests
* REAL_CLAUDE_TESTS=1 REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/
* ```
*
* ## Cost Estimates
*
* | Suite | Tests | Est. Cost | Duration |
* |-------|-------|-----------|----------|
* | Output Parsing | 3 | $0.06 | ~2 min |
* | Schema Validation | 4 | $0.22 | ~4 min |
* | Crash Recovery | 3 | $0.08 | ~3 min |
* | Session Resume | 2 | $0.08 | ~3 min |
* | Codex Integration | 2 | $0.10 | ~2 min |
* | **TOTAL** | **14** | **~$0.54** | **~14 min** |
*
* ## Test Files
*
* - `harness.ts` - RealProviderHarness factory and utilities
* - `prompts.ts` - Minimal cost test prompts
* - `claude-manager.test.ts` - Claude spawn/resume/output tests
* - `codex-manager.test.ts` - Codex provider tests
* - `schema-retry.test.ts` - Schema validation + retry tests
* - `crash-recovery.test.ts` - Server restart simulation
* - `sample-outputs/` - Captured CLI output for parser unit tests
*/
export {
createRealProviderHarness,
CapturingEventBus,
sleep,
shouldRunRealClaudeTests,
shouldRunRealCodexTests,
describeRealClaude,
describeRealCodex,
REAL_TEST_TIMEOUT,
EXTENDED_TEST_TIMEOUT,
type RealProviderHarness,
type RealProviderHarnessOptions,
} from './harness.js';
export { MINIMAL_PROMPTS, CODEX_PROMPTS } from './prompts.js';

View File

@@ -0,0 +1,113 @@
/**
* Minimal Cost Test Prompts
*
* Carefully crafted prompts designed to minimize token usage while
* testing specific CLI behaviors. Each prompt aims for the smallest
* possible API cost while still exercising the target functionality.
*
* Cost estimates assume Claude Sonnet pricing (~$3/M input, $15/M output).
*/
export const MINIMAL_PROMPTS = {
/**
* ~$0.01 - Cheapest done response
* Tests: basic spawn → completion flow, status parsing
*/
done: `Output exactly this JSON with no other text:
{"status":"done","result":"ok"}`,
/**
* ~$0.01 - Cheapest questions response
* Tests: waiting_for_input status, questions array parsing
*/
questions: `Output exactly this JSON with no other text:
{"status":"questions","questions":[{"id":"q1","question":"What is your name?"}]}`,
/**
* ~$0.03 - Slow task for timing tests
* Tests: streaming during long-running task, crash recovery
* Note: Agent may not actually wait 30 seconds, but will produce delayed output
*/
slow: `Think through a simple problem step by step, counting from 1 to 10 slowly, then output:
{"status":"done","result":"counted to 10"}`,
/**
* ~$0.02 - Produces text deltas for streaming tests
* Tests: text_delta event parsing, output buffering
*/
streaming: `Count from 1 to 5, outputting each number, then output:
{"status":"done","result":"counted"}`,
/**
* ~$0.03 - Deliberately produces non-JSON first
* Tests: schema validation failure, retry logic
*/
badThenGood: `First say "thinking..." on its own line, then output:
{"status":"done","result":"fixed"}`,
/**
* ~$0.02 - Multiple questions
* Tests: questions array with multiple items
*/
multipleQuestions: `Output exactly this JSON with no other text:
{"status":"questions","questions":[{"id":"q1","question":"First question?"},{"id":"q2","question":"Second question?"}]}`,
/**
* ~$0.01 - Error signal
* Tests: error status handling
*/
error: `Output exactly this JSON with no other text:
{"status":"error","error":"Test error message"}`,
/**
* ~$0.02 - Answer continuation
* Tests: session resume with answers
*/
answerContinuation: (answers: Record<string, string>): string => {
const answerLines = Object.entries(answers)
.map(([id, answer]) => `${id}: ${answer}`)
.join('\n');
return `I received your answers:
${answerLines}
Now complete the task by outputting:
{"status":"done","result":"completed with answers"}`;
},
/**
* ~$0.02 - Context complete for discuss mode
* Tests: discuss mode output handling (now uses universal done signal)
*/
discussComplete: `Output exactly this JSON with no other text:
{"status":"done"}`,
/**
* ~$0.02 - Plan complete
* Tests: plan mode output handling (now uses universal done signal)
*/
planComplete: `Output exactly this JSON with no other text:
{"status":"done"}`,
/**
* ~$0.02 - Detail complete
* Tests: detail mode output handling (now uses universal done signal)
*/
detailComplete: `Output exactly this JSON with no other text:
{"status":"done"}`,
} as const;
/**
* Prompts specifically for Codex provider testing.
* Codex may have different output format requirements.
*/
export const CODEX_PROMPTS = {
/**
* Basic completion for Codex
*/
done: `Complete this simple task: output "done" and finish.`,
/**
* Produces streaming output
*/
streaming: `Count from 1 to 5, saying each number aloud, then say "finished".`,
} as const;

View File

@@ -0,0 +1,68 @@
# Sample CLI Outputs
This directory contains captured real CLI outputs for use in parser unit tests.
These files allow testing stream parsers without incurring API costs.
## Files
### claude-stream-success.jsonl
A successful Claude CLI session (v2.1.33) that:
- Initializes with `system` event containing `session_id`
- Emits `assistant` message with content
- Completes with `result` event containing `done` status JSON
### claude-stream-questions.jsonl
A Claude CLI session that:
- Initializes with `system` event containing `session_id`
- Emits `assistant` message with content wrapped in markdown code block
- Completes with `result` event containing `questions` status JSON
### codex-stream-success.jsonl
A successful Codex CLI session (v0.98.0) that:
- Starts with `thread.started` event containing `thread_id`
- Emits `turn.started`, `item.completed` events
- Completes with `turn.completed` event containing usage stats
## Event Type Differences
### Claude CLI (`--output-format stream-json`)
- `system` (subtype: `init`) - Contains `session_id`, tools, model info
- `assistant` - Contains message content in `content[].text`
- `result` - Contains final `result` text and `total_cost_usd`
### Codex CLI (`--json`)
- `thread.started` - Contains `thread_id` (equivalent to session_id)
- `turn.started` - Marks beginning of turn
- `item.completed` - Contains reasoning or agent_message items
- `turn.completed` - Contains usage stats
## Usage
These files can be used to test stream parsers in isolation:
```typescript
import { readFileSync } from 'fs';
import { ClaudeStreamParser } from '../../../agent/providers/parsers/claude.js';
const output = readFileSync('sample-outputs/claude-stream-success.jsonl', 'utf-8');
const parser = new ClaudeStreamParser();
for (const line of output.split('\n')) {
if (line.trim()) {
const events = parser.parseLine(line);
// Assert on events...
}
}
```
## Capturing New Outputs
### Claude
```bash
claude -p "your prompt" --output-format stream-json --verbose > output.jsonl
```
### Codex
```bash
codex exec --full-auto --json "your prompt" > output.jsonl
```

View File

@@ -0,0 +1,3 @@
{"type":"system","subtype":"init","cwd":"/Users/lukasmay/development/projects/codewalk-district","session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","tools":["Task","TaskOutput","Bash","Glob","Grep","ExitPlanMode","Read","Edit","Write","NotebookEdit","WebFetch","TodoWrite","WebSearch","TaskStop","AskUserQuestion","Skill","EnterPlanMode","ToolSearch"],"mcp_servers":[],"model":"claude-opus-4-6","permissionMode":"default","slash_commands":["keybindings-help","debug","gsd:define-requirements","gsd:list-phase-assumptions","gsd:debug","gsd:remove-phase","gsd:complete-milestone","gsd:research-phase","gsd:plan-phase","gsd:check-todos","gsd:pause-work","gsd:execute-plan","gsd:research-project","gsd:add-todo","gsd:plan-fix","gsd:resume-work","gsd:progress","gsd:help","gsd:discuss-milestone","gsd:add-phase","gsd:create-roadmap","gsd:map-codebase","gsd:whats-new","gsd:insert-phase","gsd:new-milestone","gsd:new-project","gsd:execute-phase","gsd:verify-work","gsd:discuss-phase","compact","context","cost","init","pr-comments","release-notes","review","security-review","insights"],"apiKeySource":"none","claude_code_version":"2.1.33","output_style":"default","agents":["Bash","general-purpose","statusline-setup","Explore","Plan","claude-code-guide","jira-sw-assessment"],"skills":["keybindings-help","debug"],"plugins":[],"uuid":"224c683c-41f4-4fdd-9af6-f8cdca366ec1"}
{"type":"assistant","message":{"model":"claude-opus-4-6","id":"msg_01CfDymxvSRFodJ5Zm6NjLHV","type":"message","role":"assistant","content":[{"type":"text","text":"```json\n{\"status\":\"questions\",\"questions\":[{\"id\":\"q1\",\"question\":\"What is your name?\"},{\"id\":\"q2\",\"question\":\"What is the deadline?\"}]}\n```"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":5983,"cache_read_input_tokens":18026,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":5983},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"},"context_management":null},"parent_tool_use_id":null,"session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","uuid":"29288f20-766c-4047-82f5-679024188f52"}
{"type":"result","subtype":"success","is_error":false,"duration_ms":3213,"duration_api_ms":3203,"num_turns":1,"result":"```json\n{\"status\":\"questions\",\"questions\":[{\"id\":\"q1\",\"question\":\"What is your name?\"},{\"id\":\"q2\",\"question\":\"What is the deadline?\"}]}\n```","stop_reason":null,"session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","total_cost_usd":0.04754675,"usage":{"input_tokens":3,"cache_creation_input_tokens":5983,"cache_read_input_tokens":18026,"output_tokens":45,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":5983,"ephemeral_5m_input_tokens":0}},"modelUsage":{"claude-opus-4-6":{"inputTokens":3,"outputTokens":45,"cacheReadInputTokens":18026,"cacheCreationInputTokens":5983,"webSearchRequests":0,"costUSD":0.04754675,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"uuid":"08db08cd-0f12-47ae-8c21-c29e11a6d7df"}

View File

@@ -0,0 +1,3 @@
{"type":"system","subtype":"init","cwd":"/Users/lukasmay/development/projects/codewalk-district","session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","tools":["Task","TaskOutput","Bash","Glob","Grep","ExitPlanMode","Read","Edit","Write","NotebookEdit","WebFetch","TodoWrite","WebSearch","TaskStop","AskUserQuestion","Skill","EnterPlanMode","ToolSearch"],"mcp_servers":[],"model":"claude-opus-4-6","permissionMode":"default","slash_commands":["keybindings-help","debug","gsd:define-requirements","gsd:list-phase-assumptions","gsd:debug","gsd:remove-phase","gsd:complete-milestone","gsd:research-phase","gsd:plan-phase","gsd:check-todos","gsd:pause-work","gsd:execute-plan","gsd:research-project","gsd:add-todo","gsd:plan-fix","gsd:resume-work","gsd:progress","gsd:help","gsd:discuss-milestone","gsd:add-phase","gsd:create-roadmap","gsd:map-codebase","gsd:whats-new","gsd:insert-phase","gsd:new-milestone","gsd:new-project","gsd:execute-phase","gsd:verify-work","gsd:discuss-phase","compact","context","cost","init","pr-comments","release-notes","review","security-review","insights"],"apiKeySource":"none","claude_code_version":"2.1.33","output_style":"default","agents":["Bash","general-purpose","statusline-setup","Explore","Plan","claude-code-guide","jira-sw-assessment"],"skills":["keybindings-help","debug"],"plugins":[],"uuid":"c1d6dced-ca04-4335-a624-624660479b7b"}
{"type":"assistant","message":{"model":"claude-opus-4-6","id":"msg_01RjSiQY1RUgT47j73Dom93j","type":"message","role":"assistant","content":[{"type":"text","text":"{\"status\":\"done\",\"result\":\"ok\"}"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":5958,"cache_read_input_tokens":18026,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":5958},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"},"context_management":null},"parent_tool_use_id":null,"session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","uuid":"f1c8695a-534e-4de2-a684-fa4a1ec03749"}
{"type":"result","subtype":"success","is_error":false,"duration_ms":2465,"duration_api_ms":2453,"num_turns":1,"result":"{\"status\":\"done\",\"result\":\"ok\"}","stop_reason":null,"session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","total_cost_usd":0.046565499999999996,"usage":{"input_tokens":3,"cache_creation_input_tokens":5958,"cache_read_input_tokens":18026,"output_tokens":12,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":5958,"ephemeral_5m_input_tokens":0}},"modelUsage":{"claude-opus-4-6":{"inputTokens":3,"outputTokens":12,"cacheReadInputTokens":18026,"cacheCreationInputTokens":5958,"webSearchRequests":0,"costUSD":0.046565499999999996,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"uuid":"53139e08-b4f3-4f94-b129-82759f77fdca"}

View File

@@ -0,0 +1,5 @@
{"type":"thread.started","thread_id":"019c3242-955e-7140-9978-517f0b5a22cb"}
{"type":"turn.started"}
{"type":"item.completed","item":{"id":"item_0","type":"reasoning","text":"**Confirming simple greeting task**"}}
{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"Hello!"}}
{"type":"turn.completed","usage":{"input_tokens":8458,"cached_input_tokens":6912,"output_tokens":32}}

View File

@@ -0,0 +1,306 @@
/**
* Schema Validation & Retry Integration Tests
*
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run these tests:
* ```bash
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/schema-retry.test.ts --test-timeout=300000
* ```
*
* Tests covered:
* - Valid JSON output validation
* - Questions status parsing
* - Schema validation failure with retry
* - Max retry limit handling
*
* Estimated cost: ~$0.20 per full run (includes retries)
*/
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
import {
createRealProviderHarness,
describeRealClaude,
REAL_TEST_TIMEOUT,
EXTENDED_TEST_TIMEOUT,
type RealProviderHarness,
} from './harness.js';
import { MINIMAL_PROMPTS } from './prompts.js';
import type { AgentResumedEvent, AgentCrashedEvent } from '../../../events/types.js';
describeRealClaude('Schema Validation & Retry', () => {
let harness: RealProviderHarness;
beforeAll(async () => {
console.log('\n=== Running Schema Validation & Retry Tests ===');
console.log('These tests call the real Claude API and incur costs.');
console.log('Retry tests may take longer and cost more.\n');
harness = await createRealProviderHarness({ provider: 'claude' });
});
afterAll(async () => {
await harness.cleanup();
});
beforeEach(() => {
harness.clearEvents();
});
describe('Valid Output', () => {
it(
'validates done status output',
async () => {
// Spawn agent with minimal done prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.done,
mode: 'execute',
provider: 'claude',
});
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify completion
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.status).toBe('idle');
expect(result?.success).toBe(true);
// No retry events should have been emitted
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
expect(resumeEvents.length).toBe(0);
console.log(' Status: idle (valid done output)');
console.log(' Result:', result?.message);
},
REAL_TEST_TIMEOUT
);
it(
'validates questions status output',
async () => {
// Spawn agent with questions prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.questions,
mode: 'execute',
provider: 'claude',
});
// Wait for waiting_for_input
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
// Verify questions were validated
expect(questions).toBeTruthy();
expect(questions?.questions).toBeInstanceOf(Array);
expect(questions?.questions.length).toBeGreaterThan(0);
// Each question should have id and question fields
for (const q of questions?.questions ?? []) {
expect(q.id).toBeTruthy();
expect(q.question).toBeTruthy();
}
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.status).toBe('waiting_for_input');
// No retry events
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
expect(resumeEvents.length).toBe(0);
console.log(' Status: waiting_for_input (valid questions output)');
console.log(' Questions:', questions?.questions.length);
},
REAL_TEST_TIMEOUT
);
it(
'validates multiple questions',
async () => {
// Spawn agent with multiple questions prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.multipleQuestions,
mode: 'execute',
provider: 'claude',
});
// Wait for waiting_for_input
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
// Verify multiple questions
expect(questions?.questions.length).toBeGreaterThanOrEqual(2);
// Each question should have unique ID
const ids = questions?.questions.map((q) => q.id) ?? [];
const uniqueIds = new Set(ids);
expect(uniqueIds.size).toBe(ids.length);
console.log(' Questions:', questions?.questions.map((q) => q.id).join(', '));
},
REAL_TEST_TIMEOUT
);
});
describe('Retry Logic', () => {
it(
'retries when output does not match schema',
async () => {
// Prompt that produces non-JSON first, then valid JSON
// Note: Claude may or may not produce invalid output first
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.badThenGood,
mode: 'execute',
provider: 'claude',
});
// Wait for completion (may involve retries)
const result = await harness.waitForAgentCompletion(agent.id, EXTENDED_TEST_TIMEOUT);
const dbAgent = await harness.agentRepository.findById(agent.id);
// Either succeeded with retry OR succeeded first time
expect(['idle', 'crashed']).toContain(dbAgent?.status);
// Check for retry events
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
console.log(' Retry attempts:', resumeEvents.length);
console.log(' Final status:', dbAgent?.status);
if (dbAgent?.status === 'idle') {
expect(result?.success).toBe(true);
console.log(' Result:', result?.message);
} else {
// Crashed after max retries
const crashedEvents = harness.getEventsByType<AgentCrashedEvent>('agent:crashed');
expect(crashedEvents.length).toBeGreaterThan(0);
console.log(' Crashed after retries');
}
},
EXTENDED_TEST_TIMEOUT
);
it(
'extracts JSON from markdown code blocks',
async () => {
// Prompt that produces JSON wrapped in markdown
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: `Output the result wrapped in a markdown code block like this:
\`\`\`json
{"status":"done","result":"extracted from markdown"}
\`\`\``,
mode: 'execute',
provider: 'claude',
});
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
const dbAgent = await harness.agentRepository.findById(agent.id);
console.log(' Status:', dbAgent?.status);
console.log(' Result:', result?.message);
// Should succeed (JSON extraction from code block)
if (dbAgent?.status === 'idle') {
expect(result?.success).toBe(true);
}
},
REAL_TEST_TIMEOUT
);
it(
'extracts JSON from text with surrounding content',
async () => {
// Prompt that produces JSON with text before it
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: `First say "Here is my response:" then output the JSON:
{"status":"done","result":"extracted from text"}`,
mode: 'execute',
provider: 'claude',
});
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
const dbAgent = await harness.agentRepository.findById(agent.id);
console.log(' Status:', dbAgent?.status);
console.log(' Result:', result?.message);
// Should succeed (JSON extraction from last {...} block)
if (dbAgent?.status === 'idle') {
expect(result?.success).toBe(true);
}
},
REAL_TEST_TIMEOUT
);
});
describe('Mode-Specific Schemas', () => {
it(
'validates discuss mode output',
async () => {
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.discussComplete,
mode: 'discuss',
provider: 'claude',
});
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.status).toBe('idle');
expect(result?.success).toBe(true);
console.log(' Discuss mode result:', result?.message);
},
REAL_TEST_TIMEOUT
);
it(
'validates plan mode output',
async () => {
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.planComplete,
mode: 'plan',
provider: 'claude',
});
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.status).toBe('idle');
expect(result?.success).toBe(true);
console.log(' Plan mode result:', result?.message);
},
REAL_TEST_TIMEOUT
);
it(
'validates detail mode output',
async () => {
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: MINIMAL_PROMPTS.detailComplete,
mode: 'detail',
provider: 'claude',
});
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.status).toBe('idle');
expect(result?.success).toBe(true);
console.log(' Detail mode result:', result?.message);
},
REAL_TEST_TIMEOUT
);
});
});

View File

@@ -0,0 +1,134 @@
import { describe, it, expect } from 'vitest';
import { topologicalSortPhases, type PhaseForSort, type DependencyEdge } from '@codewalk-district/shared';
function mkPhase(id: string, createdAt: string | Date): PhaseForSort {
return { id, createdAt };
}
describe('topologicalSortPhases', () => {
it('should return empty array for empty input', () => {
expect(topologicalSortPhases([], [])).toEqual([]);
});
it('should return phases in createdAt order when no edges', () => {
const phases = [
mkPhase('c', '2026-01-03'),
mkPhase('a', '2026-01-01'),
mkPhase('b', '2026-01-02'),
];
const result = topologicalSortPhases(phases, []);
expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c']);
});
it('should sort linear chain correctly', () => {
// A -> B -> C (B depends on A, C depends on B)
const phases = [
mkPhase('a', '2026-01-01'),
mkPhase('b', '2026-01-02'),
mkPhase('c', '2026-01-03'),
];
const edges: DependencyEdge[] = [
{ phaseId: 'b', dependsOnPhaseId: 'a' },
{ phaseId: 'c', dependsOnPhaseId: 'b' },
];
const result = topologicalSortPhases(phases, edges);
expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c']);
});
it('should handle diamond dependency', () => {
// A
// / \
// B C
// \ /
// D
const phases = [
mkPhase('a', '2026-01-01'),
mkPhase('b', '2026-01-02'),
mkPhase('c', '2026-01-03'),
mkPhase('d', '2026-01-04'),
];
const edges: DependencyEdge[] = [
{ phaseId: 'b', dependsOnPhaseId: 'a' },
{ phaseId: 'c', dependsOnPhaseId: 'a' },
{ phaseId: 'd', dependsOnPhaseId: 'b' },
{ phaseId: 'd', dependsOnPhaseId: 'c' },
];
const result = topologicalSortPhases(phases, edges);
// A must come first, D must come last, B before C by createdAt
expect(result[0].id).toBe('a');
expect(result[3].id).toBe('d');
expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c', 'd']);
});
it('should use createdAt as deterministic tiebreaker', () => {
// Three independent phases — should sort by createdAt
const phases = [
mkPhase('z', '2026-01-03'),
mkPhase('y', '2026-01-01'),
mkPhase('x', '2026-01-02'),
];
const result = topologicalSortPhases(phases, []);
expect(result.map((p) => p.id)).toEqual(['y', 'x', 'z']);
});
it('should handle cycle gracefully by appending cycled nodes', () => {
// A -> B -> A (cycle), C is independent
const phases = [
mkPhase('a', '2026-01-01'),
mkPhase('b', '2026-01-02'),
mkPhase('c', '2026-01-03'),
];
const edges: DependencyEdge[] = [
{ phaseId: 'b', dependsOnPhaseId: 'a' },
{ phaseId: 'a', dependsOnPhaseId: 'b' },
];
const result = topologicalSortPhases(phases, edges);
// C has no deps so it comes first, then A and B appended (cycle)
expect(result[0].id).toBe('c');
expect(result.length).toBe(3);
// A and B are appended in createdAt order
expect(result[1].id).toBe('a');
expect(result[2].id).toBe('b');
});
it('should ignore edges referencing non-existent phases', () => {
const phases = [
mkPhase('a', '2026-01-01'),
mkPhase('b', '2026-01-02'),
];
const edges: DependencyEdge[] = [
{ phaseId: 'b', dependsOnPhaseId: 'nonexistent' },
];
const result = topologicalSortPhases(phases, edges);
// Edge is ignored, both treated as independent
expect(result.map((p) => p.id)).toEqual(['a', 'b']);
});
it('should handle single phase with no edges', () => {
const phases = [mkPhase('only', '2026-01-01')];
const result = topologicalSortPhases(phases, []);
expect(result.map((p) => p.id)).toEqual(['only']);
});
it('should work with Date objects', () => {
const phases = [
mkPhase('b', new Date('2026-01-02')),
mkPhase('a', new Date('2026-01-01')),
];
const edges: DependencyEdge[] = [
{ phaseId: 'b', dependsOnPhaseId: 'a' },
];
const result = topologicalSortPhases(phases, edges);
expect(result.map((p) => p.id)).toEqual(['a', 'b']);
});
it('should preserve extra properties on phase objects', () => {
const phases = [
{ id: 'a', createdAt: '2026-01-01', name: 'Alpha', status: 'pending' },
{ id: 'b', createdAt: '2026-01-02', name: 'Beta', status: 'active' },
];
const result = topologicalSortPhases(phases, []);
expect(result[0].name).toBe('Alpha');
expect(result[1].name).toBe('Beta');
});
});