From 0ed657b644d35f2d7373dc90ff67217490f24338 Mon Sep 17 00:00:00 2001 From: Lukas May Date: Mon, 2 Mar 2026 12:17:52 +0900 Subject: [PATCH] feat: Add VCR-style cassette testing system for agent subprocess pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements cassette recording/replay to test the full agent execution pipeline (ProcessManager → FileTailer → OutputHandler → SignalManager) without real AI API calls. Key components: - `CassetteProcessManager`: extends ProcessManager, intercepts spawnDetached to replay cassettes or record real runs on completion - `replay-worker.mjs`: standalone node script that replays JSONL + signal.json as a subprocess, exercising the complete file-based output pipeline - `CassetteStore`: reads/writes cassette JSON files keyed by SHA256 hash - `normalizer.ts`: strips dynamic content (UUIDs, temp paths, timestamps, session numbers) from prompts for stable cassette keys - `key.ts`: hashes normalized prompt + provider args + worktree file content (worktree hash detects content drift for execute-mode agents) - `createCassetteHarness()`: wraps RealProviderHarness with cassette support, same interface so existing real-provider tests work unchanged Mode control via env vars: (default) → replay: cassette must exist (safe for CI) CW_CASSETTE_RECORD=1 → auto: replay if exists, record if missing CW_CASSETTE_FORCE_RECORD=1 → record: always run real agent, overwrite cassette MultiProviderAgentManager gains an optional `processManagerOverride` constructor parameter for clean dependency injection without changing existing callers. Cassette files live in src/test/cassettes/ and are intended to be committed to git so CI runs without API access. --- src/agent/manager.ts | 3 +- src/test/cassette/cassette.test.ts | 257 +++++++++++++++++++++++++++ src/test/cassette/harness.ts | 200 +++++++++++++++++++++ src/test/cassette/index.ts | 6 + src/test/cassette/key.ts | 76 ++++++++ src/test/cassette/normalizer.ts | 67 +++++++ src/test/cassette/process-manager.ts | 174 ++++++++++++++++++ src/test/cassette/replay-worker.mjs | 48 +++++ src/test/cassette/store.ts | 50 ++++++ src/test/cassette/types.ts | 35 ++++ src/test/cassettes/.gitkeep | 0 11 files changed, 915 insertions(+), 1 deletion(-) create mode 100644 src/test/cassette/cassette.test.ts create mode 100644 src/test/cassette/harness.ts create mode 100644 src/test/cassette/index.ts create mode 100644 src/test/cassette/key.ts create mode 100644 src/test/cassette/normalizer.ts create mode 100644 src/test/cassette/process-manager.ts create mode 100644 src/test/cassette/replay-worker.mjs create mode 100644 src/test/cassette/store.ts create mode 100644 src/test/cassette/types.ts create mode 100644 src/test/cassettes/.gitkeep diff --git a/src/agent/manager.ts b/src/agent/manager.ts index 3043f9d..12e6f8d 100644 --- a/src/agent/manager.ts +++ b/src/agent/manager.ts @@ -80,9 +80,10 @@ export class MultiProviderAgentManager implements AgentManager { private pageRepository?: PageRepository, private logChunkRepository?: LogChunkRepository, private debug: boolean = false, + processManagerOverride?: ProcessManager, ) { this.signalManager = new FileSystemSignalManager(); - this.processManager = new ProcessManager(workspaceRoot, projectRepository); + this.processManager = processManagerOverride ?? new ProcessManager(workspaceRoot, projectRepository); this.credentialHandler = new CredentialHandler(workspaceRoot, accountRepository, credentialManager); this.outputHandler = new OutputHandler(repository, eventBus, changeSetRepository, phaseRepository, taskRepository, pageRepository, this.signalManager); this.cleanupManager = new CleanupManager(workspaceRoot, repository, projectRepository, eventBus, debug, this.signalManager); diff --git a/src/test/cassette/cassette.test.ts b/src/test/cassette/cassette.test.ts new file mode 100644 index 0000000..643eb8a --- /dev/null +++ b/src/test/cassette/cassette.test.ts @@ -0,0 +1,257 @@ +/** + * Cassette System Unit Tests + * + * Verifies normalizer, key generation, and store in isolation. + * These run without any real processes or API calls. + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { normalizePrompt, stripPromptFromArgs } from './normalizer.js'; +import { hashWorktreeFiles, buildCassetteKey } from './key.js'; +import { CassetteStore } from './store.js'; +import type { CassetteEntry, CassetteKey } from './types.js'; + +// --------------------------------------------------------------------------- +// Normalizer +// --------------------------------------------------------------------------- + +describe('normalizePrompt', () => { + it('strips UUIDs', () => { + const prompt = 'Agent 550e8400-e29b-41d4-a716-446655440000 is running task abc123ef-0000-0000-0000-000000000000'; + const result = normalizePrompt(prompt, ''); + expect(result).not.toContain('550e8400'); + expect(result).not.toContain('abc123ef'); + expect(result).toContain('__UUID__'); + }); + + it('strips workspace root path', () => { + const workspaceRoot = '/tmp/cw-test-abc123'; + const prompt = `Working directory: ${workspaceRoot}/agent-workdirs/my-agent`; + const result = normalizePrompt(prompt, workspaceRoot); + expect(result).not.toContain(workspaceRoot); + expect(result).toContain('__WORKSPACE__'); + }); + + it('strips ISO timestamps', () => { + const prompt = 'Started at 2026-03-01T14:30:00Z, last seen 2026-03-01T14:35:22.456Z'; + const result = normalizePrompt(prompt, ''); + expect(result).not.toContain('2026-03-01'); + expect(result).toContain('__TIMESTAMP__'); + }); + + it('strips session numbers', () => { + const prompt = 'Resuming session 3 with agent session-42'; + const result = normalizePrompt(prompt, ''); + expect(result).toContain('session__N__'); + expect(result).not.toContain('session 3'); + expect(result).not.toContain('session-42'); + }); + + it('leaves static content unchanged', () => { + const prompt = 'You are a Worker agent. Execute the assigned coding task.'; + const result = normalizePrompt(prompt, '/tmp/ws'); + expect(result).toBe(prompt); + }); + + it('strips workspace root before UUID replacement to avoid double-normalizing', () => { + const workspaceRoot = '/tmp/cw-test-abc123'; + const uuid = '550e8400-e29b-41d4-a716-446655440000'; + const prompt = `Dir: ${workspaceRoot}/agents/${uuid}`; + const result = normalizePrompt(prompt, workspaceRoot); + expect(result).toBe('Dir: __WORKSPACE__/agents/__UUID__'); + }); +}); + +describe('stripPromptFromArgs', () => { + it('strips -p style (Claude native)', () => { + const prompt = 'Do the task.'; + const args = ['--dangerously-skip-permissions', '--verbose', '-p', prompt, '--output-format', 'stream-json']; + const result = stripPromptFromArgs(args, prompt); + expect(result).toEqual(['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json']); + }); + + it('strips --prompt style', () => { + const prompt = 'Do the task.'; + const args = ['--flag', '--prompt', prompt, '--json']; + const result = stripPromptFromArgs(args, prompt); + expect(result).toEqual(['--flag', '--json']); + }); + + it('strips bare positional prompt', () => { + const prompt = 'Do the task.'; + const args = ['--full-auto', prompt]; + const result = stripPromptFromArgs(args, prompt); + expect(result).toEqual(['--full-auto']); + }); + + it('returns unchanged args when prompt is empty', () => { + const args = ['--flag', '--value']; + expect(stripPromptFromArgs(args, '')).toEqual(args); + }); +}); + +// --------------------------------------------------------------------------- +// Key generation +// --------------------------------------------------------------------------- + +describe('buildCassetteKey', () => { + const baseKey: CassetteKey = { + normalizedPrompt: 'You are a Worker agent.', + providerName: 'claude', + modelArgs: ['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json'], + worktreeHash: 'empty', + }; + + it('produces a 32-char hex string', () => { + const key = buildCassetteKey(baseKey); + expect(key).toMatch(/^[0-9a-f]{32}$/); + }); + + it('is deterministic for the same key', () => { + expect(buildCassetteKey(baseKey)).toBe(buildCassetteKey(baseKey)); + }); + + it('differs when normalizedPrompt changes', () => { + const key2 = { ...baseKey, normalizedPrompt: 'You are a Discuss agent.' }; + expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2)); + }); + + it('differs when providerName changes', () => { + const key2 = { ...baseKey, providerName: 'codex' }; + expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2)); + }); + + it('differs when worktreeHash changes', () => { + const key2 = { ...baseKey, worktreeHash: 'abcdef1234567890' }; + expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2)); + }); + + it('is stable regardless of modelArgs insertion order', () => { + const key1 = { ...baseKey, modelArgs: ['--verbose', '--dangerously-skip-permissions'] }; + const key2 = { ...baseKey, modelArgs: ['--dangerously-skip-permissions', '--verbose'] }; + expect(buildCassetteKey(key1)).toBe(buildCassetteKey(key2)); + }); +}); + +describe('hashWorktreeFiles', () => { + it('returns "empty" for a non-existent directory', () => { + expect(hashWorktreeFiles('/does/not/exist')).toBe('empty'); + }); + + it('returns "empty" for a directory with only hidden files', () => { + const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-')); + try { + // Only hidden entries present + const { mkdirSync } = require('node:fs'); + mkdirSync(join(dir, '.git')); + expect(hashWorktreeFiles(dir)).toBe('empty'); + } finally { + rmSync(dir, { recursive: true }); + } + }); + + it('produces a 16-char hex string for a directory with files', () => { + const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-')); + try { + const { writeFileSync } = require('node:fs'); + writeFileSync(join(dir, 'index.ts'), 'export const x = 1;'); + const hash = hashWorktreeFiles(dir); + expect(hash).toMatch(/^[0-9a-f]{16}$/); + } finally { + rmSync(dir, { recursive: true }); + } + }); + + it('changes when file content changes', () => { + const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-')); + try { + const { writeFileSync } = require('node:fs'); + writeFileSync(join(dir, 'index.ts'), 'export const x = 1;'); + const hash1 = hashWorktreeFiles(dir); + writeFileSync(join(dir, 'index.ts'), 'export const x = 2;'); + const hash2 = hashWorktreeFiles(dir); + expect(hash1).not.toBe(hash2); + } finally { + rmSync(dir, { recursive: true }); + } + }); +}); + +// --------------------------------------------------------------------------- +// CassetteStore +// --------------------------------------------------------------------------- + +describe('CassetteStore', () => { + let dir: string; + let store: CassetteStore; + + const key: CassetteKey = { + normalizedPrompt: 'Test prompt', + providerName: 'claude', + modelArgs: ['--verbose'], + worktreeHash: 'empty', + }; + + const entry: CassetteEntry = { + version: 1, + key, + recording: { + jsonlLines: ['{"type":"system","session_id":"test-session"}', '{"type":"result","subtype":"success"}'], + signalJson: { status: 'done', message: 'Task completed' }, + exitCode: 0, + recordedAt: '2026-03-01T00:00:00.000Z', + }, + }; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'cw-store-test-')); + store = new CassetteStore(dir); + }); + + it('returns null for unknown key', () => { + expect(store.find(key)).toBeNull(); + }); + + it('round-trips a cassette entry', () => { + store.save(key, entry); + const loaded = store.find(key); + expect(loaded).not.toBeNull(); + expect(loaded?.recording.signalJson).toEqual({ status: 'done', message: 'Task completed' }); + expect(loaded?.recording.jsonlLines).toHaveLength(2); + }); + + it('overwrites an existing cassette', () => { + store.save(key, entry); + const updated: CassetteEntry = { + ...entry, + recording: { ...entry.recording, jsonlLines: ['new line'], recordedAt: '2026-03-02T00:00:00.000Z' }, + }; + store.save(key, updated); + const loaded = store.find(key); + expect(loaded?.recording.jsonlLines).toEqual(['new line']); + }); + + it('uses same file for same key', () => { + store.save(key, entry); + const { readdirSync } = require('node:fs'); + const files = readdirSync(dir).filter((f: string) => f.endsWith('.json')); + expect(files).toHaveLength(1); + + store.save(key, entry); // overwrite + const files2 = readdirSync(dir).filter((f: string) => f.endsWith('.json')); + expect(files2).toHaveLength(1); + }); + + it('uses different files for different keys', () => { + const key2: CassetteKey = { ...key, providerName: 'codex' }; + store.save(key, entry); + store.save(key2, { ...entry, key: key2 }); + + const { readdirSync } = require('node:fs'); + const files = readdirSync(dir).filter((f: string) => f.endsWith('.json')); + expect(files).toHaveLength(2); + }); +}); diff --git a/src/test/cassette/harness.ts b/src/test/cassette/harness.ts new file mode 100644 index 0000000..cacc1df --- /dev/null +++ b/src/test/cassette/harness.ts @@ -0,0 +1,200 @@ +/** + * Cassette Test Harness + * + * Wraps RealProviderHarness with the CassetteProcessManager so tests run + * against recorded cassettes instead of real AI APIs. + * + * Usage: + * + * let harness: RealProviderHarness; + * + * beforeAll(async () => { + * harness = await createCassetteHarness({ provider: 'claude' }); + * }); + * + * afterAll(() => harness.cleanup()); + * + * it('completes a task', async () => { + * const agent = await harness.agentManager.spawn({ prompt: MINIMAL_PROMPTS.done, ... }); + * const result = await harness.waitForAgentCompletion(agent.id); + * expect(result?.success).toBe(true); + * }); + * + * Mode control via env vars: + * (default) → replay mode: cassette must exist, throws if missing + * CW_CASSETTE_RECORD=1 → auto mode: replay if exists, record if missing + * CW_CASSETTE_FORCE_RECORD=1→ record mode: always run real agent, overwrite cassette + */ + +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { execSync } from 'node:child_process'; +import { join } from 'node:path'; +import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js'; +import { + DrizzleAgentRepository, + DrizzleProjectRepository, + DrizzleAccountRepository, + DrizzleInitiativeRepository, +} from '../../db/repositories/drizzle/index.js'; +import { MultiProviderAgentManager } from '../../agent/manager.js'; +import { CapturingEventBus, sleep, type RealProviderHarness } from '../integration/real-providers/harness.js'; +import { CassetteStore } from './store.js'; +import { CassetteProcessManager, type CassetteMode } from './process-manager.js'; + +export interface CassetteHarnessOptions { + /** Which provider the agent runs as (default: 'claude'). */ + provider?: 'claude' | 'codex'; + /** + * Directory where cassette JSON files are stored and read from. + * Defaults to CW_CASSETTE_DIR env var, then src/test/cassettes/. + */ + cassetteDir?: string; + /** + * Override cassette mode. Normally derived from env vars: + * - CW_CASSETTE_FORCE_RECORD=1 → 'record' + * - CW_CASSETTE_RECORD=1 → 'auto' + * - (default) → 'replay' + */ + mode?: CassetteMode; +} + +const DEFAULT_CASSETTE_DIR = new URL('../cassettes', import.meta.url).pathname; + +/** + * Resolve cassette mode from env vars (highest priority) or options. + */ +function resolveCassetteMode(options: CassetteHarnessOptions): CassetteMode { + if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record'; + if (process.env.CW_CASSETTE_RECORD === '1') return 'auto'; + return options.mode ?? 'replay'; +} + +/** + * Create a test harness backed by the cassette system. + * + * The harness exposes the same interface as RealProviderHarness so tests + * written for real providers work unchanged with cassettes. + * + * Replay is much faster than real API calls (typically < 500ms) and + * exercises the full pipeline: ProcessManager → FileTailer → OutputHandler + * → SignalManager → event emission. + */ +export async function createCassetteHarness(options: CassetteHarnessOptions = {}): Promise { + const cassetteDir = options.cassetteDir ?? process.env.CW_CASSETTE_DIR ?? DEFAULT_CASSETTE_DIR; + const cassetteMode = resolveCassetteMode(options); + + // Create a temporary git workspace (required for worktree operations). + const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-cassette-')); + execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' }); + execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' }); + execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' }); + execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' }); + + const db = createTestDatabase(); + const agentRepository = new DrizzleAgentRepository(db); + const projectRepository = new DrizzleProjectRepository(db); + const accountRepository = new DrizzleAccountRepository(db); + const initiativeRepository = new DrizzleInitiativeRepository(db); + const eventBus = new CapturingEventBus(); + + const store = new CassetteStore(cassetteDir); + const cassetteProcessManager = new CassetteProcessManager( + workspaceRoot, + projectRepository, + store, + cassetteMode, + ); + + const agentManager = new MultiProviderAgentManager( + agentRepository, + workspaceRoot, + projectRepository, + accountRepository, + eventBus, + undefined, // credentialManager + undefined, // changeSetRepository + undefined, // phaseRepository + undefined, // taskRepository + undefined, // pageRepository + undefined, // logChunkRepository + false, // debug + cassetteProcessManager, + ); + + const harness: RealProviderHarness = { + db, + eventBus, + agentManager, + workspaceRoot, + agentRepository, + projectRepository, + accountRepository, + initiativeRepository, + + // Cassette replays are fast — use a short poll interval and default timeout. + async waitForAgentCompletion(agentId, timeoutMs = 30_000) { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + const agent = await agentRepository.findById(agentId); + if (!agent) return null; + if (agent.status === 'idle' || agent.status === 'stopped') { + return agentManager.getResult(agentId); + } + if (agent.status === 'crashed') { + return agentManager.getResult(agentId); + } + if (agent.status === 'waiting_for_input') return null; + await sleep(100); + } + throw new Error(`[cassette] Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`); + }, + + async waitForAgentWaiting(agentId, timeoutMs = 30_000) { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + const agent = await agentRepository.findById(agentId); + if (!agent) return null; + if (agent.status === 'waiting_for_input') return agentManager.getPendingQuestions(agentId); + if (['idle', 'stopped', 'crashed'].includes(agent.status)) return null; + await sleep(100); + } + throw new Error(`[cassette] Timeout waiting for agent ${agentId} to enter waiting state after ${timeoutMs}ms`); + }, + + async waitForAgentStatus(agentId, status, timeoutMs = 30_000) { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + const agent = await agentRepository.findById(agentId); + if (!agent) throw new Error(`Agent ${agentId} not found`); + if (agent.status === status) return; + await sleep(100); + } + throw new Error(`[cassette] Timeout waiting for agent ${agentId} to reach status '${status}' after ${timeoutMs}ms`); + }, + + getEventsByType(type) { + return eventBus.getEventsByType(type); + }, + + clearEvents() { + eventBus.clearEvents(); + }, + + async killAllAgents() { + const agents = await agentRepository.findAll(); + for (const agent of agents) { + if (agent.status === 'running') { + try { await agentManager.stop(agent.id); } catch { /* ignore */ } + } + } + }, + + async cleanup() { + await harness.killAllAgents(); + try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ } + }, + }; + + return harness; +} diff --git a/src/test/cassette/index.ts b/src/test/cassette/index.ts new file mode 100644 index 0000000..7ae5ecf --- /dev/null +++ b/src/test/cassette/index.ts @@ -0,0 +1,6 @@ +export { CassetteStore } from './store.js'; +export { CassetteProcessManager, type CassetteMode } from './process-manager.js'; +export { createCassetteHarness, type CassetteHarnessOptions } from './harness.js'; +export { normalizePrompt, stripPromptFromArgs } from './normalizer.js'; +export { hashWorktreeFiles, buildCassetteKey } from './key.js'; +export type { CassetteKey, CassetteRecording, CassetteEntry } from './types.js'; diff --git a/src/test/cassette/key.ts b/src/test/cassette/key.ts new file mode 100644 index 0000000..b64577a --- /dev/null +++ b/src/test/cassette/key.ts @@ -0,0 +1,76 @@ +/** + * Cassette Key Generation + * + * Builds stable SHA256-based identifiers for cassettes. + * Two spans are separate concerns: + * - hashWorktreeFiles: fingerprints the worktree state at spawn time (for execute mode drift) + * - buildCassetteKey: hashes all key components into a 32-char hex filename + */ + +import { createHash } from 'node:crypto'; +import { readdirSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import type { CassetteKey } from './types.js'; + +/** + * Recursively hash all non-hidden files in a directory. + * + * Hidden entries (starting with '.') are skipped — this excludes .git, .cw, etc. + * Entries are processed in sorted order for determinism across platforms. + * + * Returns the first 16 hex chars of the SHA256, or 'empty' if the directory + * is absent or contains no readable files. + */ +export function hashWorktreeFiles(dir: string): string { + const hash = createHash('sha256'); + let hasContent = false; + + function walkDir(currentDir: string): void { + let entries; + try { + entries = readdirSync(currentDir, { withFileTypes: true }); + } catch { + return; + } + + for (const entry of [...entries].sort((a, b) => a.name.localeCompare(b.name))) { + if (entry.name.startsWith('.')) continue; + + const fullPath = join(currentDir, entry.name); + const relPath = fullPath.slice(dir.length); + + if (entry.isDirectory()) { + hash.update(`d:${relPath}\n`); + walkDir(fullPath); + } else if (entry.isFile()) { + try { + const content = readFileSync(fullPath); + hash.update(`f:${relPath}:${content.length}\n`); + hash.update(content); + hasContent = true; + } catch { + // skip unreadable files + } + } + } + } + + walkDir(dir); + return hasContent ? hash.digest('hex').slice(0, 16) : 'empty'; +} + +/** + * Compute a stable 32-char hex identifier for a cassette key. + * + * modelArgs are sorted before hashing so insertion order differences + * between providers don't produce different cassettes. + */ +export function buildCassetteKey(key: CassetteKey): string { + const canonical = JSON.stringify({ + normalizedPrompt: key.normalizedPrompt, + providerName: key.providerName, + modelArgs: [...key.modelArgs].sort(), + worktreeHash: key.worktreeHash, + }); + return createHash('sha256').update(canonical).digest('hex').slice(0, 32); +} diff --git a/src/test/cassette/normalizer.ts b/src/test/cassette/normalizer.ts new file mode 100644 index 0000000..56c7c76 --- /dev/null +++ b/src/test/cassette/normalizer.ts @@ -0,0 +1,67 @@ +/** + * Cassette Normalizer + * + * Strips dynamic content from prompts and CLI args before hashing into a cassette key. + * Dynamic content (UUIDs, temp paths, timestamps, session numbers) varies between + * test runs but doesn't affect how the agent responds — so we replace them with + * stable placeholders to get a stable cache key. + */ + +const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi; +const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g; +const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g; +const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi; + +/** + * Normalize a prompt for stable cassette key generation. + * + * Replacements applied in order (most-specific first to avoid partial matches): + * 1. Absolute workspace root path → __WORKSPACE__ + * 2. UUIDs → __UUID__ + * 3. ISO 8601 timestamps → __TIMESTAMP__ + * 4. Unix epoch milliseconds → __EPOCH__ + * 5. Session numbers → session__N__ + */ +export function normalizePrompt(prompt: string, workspaceRoot: string): string { + let normalized = prompt; + + if (workspaceRoot) { + normalized = normalized.replaceAll(workspaceRoot, '__WORKSPACE__'); + } + + normalized = normalized.replace(UUID_RE, '__UUID__'); + normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__'); + normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__'); + normalized = normalized.replace(SESSION_NUM_RE, 'session__N__'); + + return normalized; +} + +/** + * Strip the prompt value from CLI args to produce stable modelArgs for the cassette key. + * + * Handles all provider prompt flag styles: + * - Native: `-p ` (Claude) + * - Flag: `--prompt `, `-p ` (Gemini, Cursor, Auggie, Amp, Opencode) + * - Also removes the bare prompt value if it appears as a positional arg. + */ +export function stripPromptFromArgs(args: string[], prompt: string): string[] { + if (!prompt) return [...args]; + + const result: string[] = []; + let i = 0; + while (i < args.length) { + const arg = args[i]; + const PROMPT_FLAGS = ['-p', '--prompt', '--message']; + + if (PROMPT_FLAGS.includes(arg) && args[i + 1] === prompt) { + i += 2; // skip flag + value + } else if (arg === prompt) { + i += 1; // skip bare positional prompt + } else { + result.push(arg); + i++; + } + } + return result; +} diff --git a/src/test/cassette/process-manager.ts b/src/test/cassette/process-manager.ts new file mode 100644 index 0000000..ada45ca --- /dev/null +++ b/src/test/cassette/process-manager.ts @@ -0,0 +1,174 @@ +/** + * CassetteProcessManager + * + * Extends ProcessManager to intercept subprocess spawning and either: + * - Replay a recorded cassette (no API cost, deterministic) + * - Record a new cassette by running the real agent and capturing its output + * + * Modes: + * - 'replay': cassette MUST exist; throws if missing (safe for CI) + * - 'record': always runs real agent; saves/overwrites cassette on completion + * - 'auto': replays if cassette exists; falls through to record if missing + * + * The cassette key is built from: + * - Normalized prompt (dynamic content replaced with placeholders) + * - Provider name and stable CLI args (prompt value stripped) + * - Worktree file hash (detects content drift for execute-mode agents) + */ + +import { readFileSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { ProcessManager } from '../../agent/process-manager.js'; +import type { StreamEvent } from '../../agent/providers/parsers/index.js'; +import type { FileTailer } from '../../agent/file-tailer.js'; +import type { ProjectRepository } from '../../db/repositories/project-repository.js'; +import type { CassetteKey, CassetteEntry } from './types.js'; +import type { CassetteStore } from './store.js'; +import { normalizePrompt, stripPromptFromArgs } from './normalizer.js'; +import { hashWorktreeFiles } from './key.js'; + +export type CassetteMode = 'replay' | 'record' | 'auto'; + +interface PendingRecording { + key: CassetteKey; + outputFilePath: string; + agentCwd: string; +} + +export class CassetteProcessManager extends ProcessManager { + private readonly _workspaceRoot: string; + private readonly replayWorkerPath: string; + private readonly pendingRecordings = new Map(); + + constructor( + workspaceRoot: string, + projectRepository: ProjectRepository, + private readonly store: CassetteStore, + private readonly cassetteMode: CassetteMode = 'auto', + ) { + super(workspaceRoot, projectRepository); + this._workspaceRoot = workspaceRoot; + this.replayWorkerPath = new URL('./replay-worker.mjs', import.meta.url).pathname; + } + + override spawnDetached( + agentId: string, + agentName: string, + command: string, + args: string[], + cwd: string, + env: Record, + providerName: string, + prompt?: string, + onEvent?: (event: StreamEvent) => void, + onRawContent?: (content: string) => void, + ): { pid: number; outputFilePath: string; tailer: FileTailer } { + const key: CassetteKey = { + normalizedPrompt: normalizePrompt(prompt ?? '', this._workspaceRoot), + providerName, + modelArgs: stripPromptFromArgs(args, prompt ?? ''), + worktreeHash: hashWorktreeFiles(cwd), + }; + + // In record mode we always skip the store lookup and go straight to real spawn. + const existing = this.cassetteMode !== 'record' ? this.store.find(key) : null; + + if (existing) { + return this.replayFromCassette(agentId, agentName, cwd, env, providerName, existing, onEvent, onRawContent); + } + + if (this.cassetteMode === 'replay') { + throw new Error( + `[cassette] No cassette found for agent '${agentName}' (provider=${providerName}, mode=replay).\n` + + `Run with CW_CASSETTE_RECORD=1 to record it.`, + ); + } + + // auto or record: run the real agent and record the cassette on completion. + console.log(`[cassette] recording new cassette for agent '${agentName}' (${providerName})`); + const result = super.spawnDetached(agentId, agentName, command, args, cwd, env, providerName, prompt, onEvent, onRawContent); + this.pendingRecordings.set(result.pid, { key, outputFilePath: result.outputFilePath, agentCwd: cwd }); + return result; + } + + override pollForCompletion( + agentId: string, + pid: number, + onComplete: () => Promise, + getTailer: () => FileTailer | undefined, + ): { cancel: () => void } { + const pending = this.pendingRecordings.get(pid); + if (!pending) { + // Replay mode — no recording to save; delegate to base implementation. + return super.pollForCompletion(agentId, pid, onComplete, getTailer); + } + + // Record mode — wrap onComplete to save the cassette before handing off. + return super.pollForCompletion(agentId, pid, async () => { + await this.saveCassette(pending); + this.pendingRecordings.delete(pid); + await onComplete(); + }, getTailer); + } + + private async saveCassette(pending: PendingRecording): Promise { + // Read all JSONL lines from the output file the agent wrote to. + let jsonlLines: string[] = []; + try { + const content = readFileSync(pending.outputFilePath, 'utf-8'); + jsonlLines = content.split('\n').filter(l => l.trim() !== ''); + } catch { + // No output produced — record an empty cassette. + } + + // Read signal.json from the agent working directory. + let signalJson: Record | null = null; + const signalPath = join(pending.agentCwd, '.cw', 'output', 'signal.json'); + if (existsSync(signalPath)) { + try { + signalJson = JSON.parse(readFileSync(signalPath, 'utf-8')) as Record; + } catch { + // Corrupt signal file — record null. + } + } + + const entry: CassetteEntry = { + version: 1, + key: pending.key, + recording: { + jsonlLines, + signalJson, + exitCode: 0, + recordedAt: new Date().toISOString(), + }, + }; + + this.store.save(pending.key, entry); + } + + private replayFromCassette( + agentId: string, + agentName: string, + cwd: string, + env: Record, + providerName: string, + cassette: CassetteEntry, + onEvent?: (event: StreamEvent) => void, + onRawContent?: (content: string) => void, + ): { pid: number; outputFilePath: string; tailer: FileTailer } { + console.log(`[cassette] replaying cassette for agent '${agentName}' (${cassette.recording.jsonlLines.length} lines)`); + + return super.spawnDetached( + agentId, + agentName, + process.execPath, // use the running node binary + [this.replayWorkerPath], // replay-worker.mjs + cwd, + { ...env, CW_CASSETTE_DATA: JSON.stringify(cassette.recording) }, + providerName, // use original provider's parser for the tailer + undefined, // no prompt — worker handles output directly + onEvent, + onRawContent, + ); + } +} diff --git a/src/test/cassette/replay-worker.mjs b/src/test/cassette/replay-worker.mjs new file mode 100644 index 0000000..b1e0544 --- /dev/null +++ b/src/test/cassette/replay-worker.mjs @@ -0,0 +1,48 @@ +#!/usr/bin/env node +/** + * Cassette Replay Worker + * + * Spawned as a detached subprocess by CassetteProcessManager instead of the real + * agent CLI. Reads the cassette recording from CW_CASSETTE_DATA env var, replays + * the JSONL output to stdout (which spawnDetached redirects to the output file), + * writes signal.json relative to the process cwd, and exits. + * + * This is a plain .mjs file (no TypeScript) so it can be spawned with bare `node` + * without any build step or tsx dependency. + */ + +import { mkdirSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; + +const data = process.env.CW_CASSETTE_DATA; +if (!data) { + process.stderr.write('[replay-worker] CW_CASSETTE_DATA env var not set\n'); + process.exit(1); +} + +let recording; +try { + recording = JSON.parse(data); +} catch (err) { + process.stderr.write(`[replay-worker] failed to parse CW_CASSETTE_DATA: ${err.message}\n`); + process.exit(1); +} + +const { jsonlLines = [], signalJson = null, exitCode = 0 } = recording; + +// Write JSONL lines to stdout. +// spawnDetached redirects stdout to the output file via open()+fd redirection, +// so writing to process.stdout here is equivalent to writing to the output file. +for (const line of jsonlLines) { + process.stdout.write(line + '\n'); +} + +// Write signal.json to the expected location relative to cwd. +// The agent's cwd is set by spawnDetached to the agent working directory. +if (signalJson) { + const signalDir = join(process.cwd(), '.cw', 'output'); + mkdirSync(signalDir, { recursive: true }); + writeFileSync(join(signalDir, 'signal.json'), JSON.stringify(signalJson, null, 2), 'utf-8'); +} + +process.exit(exitCode); diff --git a/src/test/cassette/store.ts b/src/test/cassette/store.ts new file mode 100644 index 0000000..8cb25f9 --- /dev/null +++ b/src/test/cassette/store.ts @@ -0,0 +1,50 @@ +/** + * CassetteStore + * + * Reads and writes cassette files from a directory on disk. + * Each cassette is stored as a JSON file named after the 32-char key hash. + * Cassette files are intended to be committed to git — they are the + * "recorded interactions" that allow tests to run without real API calls. + */ + +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import type { CassetteKey, CassetteEntry } from './types.js'; +import { buildCassetteKey } from './key.js'; + +export class CassetteStore { + constructor(private readonly cassetteDir: string) {} + + private pathFor(keyHash: string): string { + return join(this.cassetteDir, `${keyHash}.json`); + } + + /** + * Look up a cassette by its key. + * Returns null if not found or if the file is corrupt. + */ + find(key: CassetteKey): CassetteEntry | null { + const hash = buildCassetteKey(key); + const path = this.pathFor(hash); + + if (!existsSync(path)) return null; + + try { + return JSON.parse(readFileSync(path, 'utf-8')) as CassetteEntry; + } catch { + return null; + } + } + + /** + * Save a cassette to disk. Creates the cassette directory if needed. + * Prints the cassette filename so it's visible during recording runs. + */ + save(key: CassetteKey, entry: CassetteEntry): void { + mkdirSync(this.cassetteDir, { recursive: true }); + const hash = buildCassetteKey(key); + const path = this.pathFor(hash); + writeFileSync(path, JSON.stringify(entry, null, 2), 'utf-8'); + console.log(`[cassette] recorded → ${hash}.json (${entry.recording.jsonlLines.length} lines)`); + } +} diff --git a/src/test/cassette/types.ts b/src/test/cassette/types.ts new file mode 100644 index 0000000..5daeae3 --- /dev/null +++ b/src/test/cassette/types.ts @@ -0,0 +1,35 @@ +/** + * Cassette Types + * + * VCR-style cassette format for recording and replaying agent subprocess I/O. + * A cassette captures everything an agent process writes so tests can replay + * it deterministically without hitting real AI APIs. + */ + +export interface CassetteKey { + /** Prompt with dynamic content (UUIDs, paths, timestamps) replaced with placeholders. */ + normalizedPrompt: string; + /** Provider name, e.g. 'claude', 'codex'. */ + providerName: string; + /** Stable CLI args with the prompt value stripped. */ + modelArgs: string[]; + /** SHA256 prefix of all non-hidden files in the agent worktree at spawn time. */ + worktreeHash: string; +} + +export interface CassetteRecording { + /** All JSONL lines the agent wrote to stdout (captured from output file). */ + jsonlLines: string[]; + /** Content of signal.json written by the agent, or null if missing. */ + signalJson: Record | null; + /** Process exit code (0 = success). */ + exitCode: number; + /** ISO timestamp when this cassette was recorded. */ + recordedAt: string; +} + +export interface CassetteEntry { + version: 1; + key: CassetteKey; + recording: CassetteRecording; +} diff --git a/src/test/cassettes/.gitkeep b/src/test/cassettes/.gitkeep new file mode 100644 index 0000000..e69de29