feat: Add VCR-style cassette testing system for agent subprocess pipeline

Implements cassette recording/replay to test the full agent execution
pipeline (ProcessManager → FileTailer → OutputHandler → SignalManager)
without real AI API calls.

Key components:
- `CassetteProcessManager`: extends ProcessManager, intercepts spawnDetached
  to replay cassettes or record real runs on completion
- `replay-worker.mjs`: standalone node script that replays JSONL + signal.json
  as a subprocess, exercising the complete file-based output pipeline
- `CassetteStore`: reads/writes cassette JSON files keyed by SHA256 hash
- `normalizer.ts`: strips dynamic content (UUIDs, temp paths, timestamps,
  session numbers) from prompts for stable cassette keys
- `key.ts`: hashes normalized prompt + provider args + worktree file content
  (worktree hash detects content drift for execute-mode agents)
- `createCassetteHarness()`: wraps RealProviderHarness with cassette support,
  same interface so existing real-provider tests work unchanged

Mode control via env vars:
  (default)                  → replay: cassette must exist (safe for CI)
  CW_CASSETTE_RECORD=1       → auto: replay if exists, record if missing
  CW_CASSETTE_FORCE_RECORD=1 → record: always run real agent, overwrite cassette

MultiProviderAgentManager gains an optional `processManagerOverride` constructor
parameter for clean dependency injection without changing existing callers.

Cassette files live in src/test/cassettes/ and are intended to be committed
to git so CI runs without API access.
This commit is contained in:
Lukas May
2026-03-02 12:17:52 +09:00
parent a1366efe4d
commit 0ed657b644
11 changed files with 915 additions and 1 deletions

View File

@@ -80,9 +80,10 @@ export class MultiProviderAgentManager implements AgentManager {
private pageRepository?: PageRepository,
private logChunkRepository?: LogChunkRepository,
private debug: boolean = false,
processManagerOverride?: ProcessManager,
) {
this.signalManager = new FileSystemSignalManager();
this.processManager = new ProcessManager(workspaceRoot, projectRepository);
this.processManager = processManagerOverride ?? new ProcessManager(workspaceRoot, projectRepository);
this.credentialHandler = new CredentialHandler(workspaceRoot, accountRepository, credentialManager);
this.outputHandler = new OutputHandler(repository, eventBus, changeSetRepository, phaseRepository, taskRepository, pageRepository, this.signalManager);
this.cleanupManager = new CleanupManager(workspaceRoot, repository, projectRepository, eventBus, debug, this.signalManager);

View File

@@ -0,0 +1,257 @@
/**
* Cassette System Unit Tests
*
* Verifies normalizer, key generation, and store in isolation.
* These run without any real processes or API calls.
*/
import { describe, it, expect, beforeEach } from 'vitest';
import { mkdtempSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
import { hashWorktreeFiles, buildCassetteKey } from './key.js';
import { CassetteStore } from './store.js';
import type { CassetteEntry, CassetteKey } from './types.js';
// ---------------------------------------------------------------------------
// Normalizer
// ---------------------------------------------------------------------------
describe('normalizePrompt', () => {
it('strips UUIDs', () => {
const prompt = 'Agent 550e8400-e29b-41d4-a716-446655440000 is running task abc123ef-0000-0000-0000-000000000000';
const result = normalizePrompt(prompt, '');
expect(result).not.toContain('550e8400');
expect(result).not.toContain('abc123ef');
expect(result).toContain('__UUID__');
});
it('strips workspace root path', () => {
const workspaceRoot = '/tmp/cw-test-abc123';
const prompt = `Working directory: ${workspaceRoot}/agent-workdirs/my-agent`;
const result = normalizePrompt(prompt, workspaceRoot);
expect(result).not.toContain(workspaceRoot);
expect(result).toContain('__WORKSPACE__');
});
it('strips ISO timestamps', () => {
const prompt = 'Started at 2026-03-01T14:30:00Z, last seen 2026-03-01T14:35:22.456Z';
const result = normalizePrompt(prompt, '');
expect(result).not.toContain('2026-03-01');
expect(result).toContain('__TIMESTAMP__');
});
it('strips session numbers', () => {
const prompt = 'Resuming session 3 with agent session-42';
const result = normalizePrompt(prompt, '');
expect(result).toContain('session__N__');
expect(result).not.toContain('session 3');
expect(result).not.toContain('session-42');
});
it('leaves static content unchanged', () => {
const prompt = 'You are a Worker agent. Execute the assigned coding task.';
const result = normalizePrompt(prompt, '/tmp/ws');
expect(result).toBe(prompt);
});
it('strips workspace root before UUID replacement to avoid double-normalizing', () => {
const workspaceRoot = '/tmp/cw-test-abc123';
const uuid = '550e8400-e29b-41d4-a716-446655440000';
const prompt = `Dir: ${workspaceRoot}/agents/${uuid}`;
const result = normalizePrompt(prompt, workspaceRoot);
expect(result).toBe('Dir: __WORKSPACE__/agents/__UUID__');
});
});
describe('stripPromptFromArgs', () => {
it('strips -p <prompt> style (Claude native)', () => {
const prompt = 'Do the task.';
const args = ['--dangerously-skip-permissions', '--verbose', '-p', prompt, '--output-format', 'stream-json'];
const result = stripPromptFromArgs(args, prompt);
expect(result).toEqual(['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json']);
});
it('strips --prompt <prompt> style', () => {
const prompt = 'Do the task.';
const args = ['--flag', '--prompt', prompt, '--json'];
const result = stripPromptFromArgs(args, prompt);
expect(result).toEqual(['--flag', '--json']);
});
it('strips bare positional prompt', () => {
const prompt = 'Do the task.';
const args = ['--full-auto', prompt];
const result = stripPromptFromArgs(args, prompt);
expect(result).toEqual(['--full-auto']);
});
it('returns unchanged args when prompt is empty', () => {
const args = ['--flag', '--value'];
expect(stripPromptFromArgs(args, '')).toEqual(args);
});
});
// ---------------------------------------------------------------------------
// Key generation
// ---------------------------------------------------------------------------
describe('buildCassetteKey', () => {
const baseKey: CassetteKey = {
normalizedPrompt: 'You are a Worker agent.',
providerName: 'claude',
modelArgs: ['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json'],
worktreeHash: 'empty',
};
it('produces a 32-char hex string', () => {
const key = buildCassetteKey(baseKey);
expect(key).toMatch(/^[0-9a-f]{32}$/);
});
it('is deterministic for the same key', () => {
expect(buildCassetteKey(baseKey)).toBe(buildCassetteKey(baseKey));
});
it('differs when normalizedPrompt changes', () => {
const key2 = { ...baseKey, normalizedPrompt: 'You are a Discuss agent.' };
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
});
it('differs when providerName changes', () => {
const key2 = { ...baseKey, providerName: 'codex' };
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
});
it('differs when worktreeHash changes', () => {
const key2 = { ...baseKey, worktreeHash: 'abcdef1234567890' };
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
});
it('is stable regardless of modelArgs insertion order', () => {
const key1 = { ...baseKey, modelArgs: ['--verbose', '--dangerously-skip-permissions'] };
const key2 = { ...baseKey, modelArgs: ['--dangerously-skip-permissions', '--verbose'] };
expect(buildCassetteKey(key1)).toBe(buildCassetteKey(key2));
});
});
describe('hashWorktreeFiles', () => {
it('returns "empty" for a non-existent directory', () => {
expect(hashWorktreeFiles('/does/not/exist')).toBe('empty');
});
it('returns "empty" for a directory with only hidden files', () => {
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
try {
// Only hidden entries present
const { mkdirSync } = require('node:fs');
mkdirSync(join(dir, '.git'));
expect(hashWorktreeFiles(dir)).toBe('empty');
} finally {
rmSync(dir, { recursive: true });
}
});
it('produces a 16-char hex string for a directory with files', () => {
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
try {
const { writeFileSync } = require('node:fs');
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
const hash = hashWorktreeFiles(dir);
expect(hash).toMatch(/^[0-9a-f]{16}$/);
} finally {
rmSync(dir, { recursive: true });
}
});
it('changes when file content changes', () => {
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
try {
const { writeFileSync } = require('node:fs');
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
const hash1 = hashWorktreeFiles(dir);
writeFileSync(join(dir, 'index.ts'), 'export const x = 2;');
const hash2 = hashWorktreeFiles(dir);
expect(hash1).not.toBe(hash2);
} finally {
rmSync(dir, { recursive: true });
}
});
});
// ---------------------------------------------------------------------------
// CassetteStore
// ---------------------------------------------------------------------------
describe('CassetteStore', () => {
let dir: string;
let store: CassetteStore;
const key: CassetteKey = {
normalizedPrompt: 'Test prompt',
providerName: 'claude',
modelArgs: ['--verbose'],
worktreeHash: 'empty',
};
const entry: CassetteEntry = {
version: 1,
key,
recording: {
jsonlLines: ['{"type":"system","session_id":"test-session"}', '{"type":"result","subtype":"success"}'],
signalJson: { status: 'done', message: 'Task completed' },
exitCode: 0,
recordedAt: '2026-03-01T00:00:00.000Z',
},
};
beforeEach(() => {
dir = mkdtempSync(join(tmpdir(), 'cw-store-test-'));
store = new CassetteStore(dir);
});
it('returns null for unknown key', () => {
expect(store.find(key)).toBeNull();
});
it('round-trips a cassette entry', () => {
store.save(key, entry);
const loaded = store.find(key);
expect(loaded).not.toBeNull();
expect(loaded?.recording.signalJson).toEqual({ status: 'done', message: 'Task completed' });
expect(loaded?.recording.jsonlLines).toHaveLength(2);
});
it('overwrites an existing cassette', () => {
store.save(key, entry);
const updated: CassetteEntry = {
...entry,
recording: { ...entry.recording, jsonlLines: ['new line'], recordedAt: '2026-03-02T00:00:00.000Z' },
};
store.save(key, updated);
const loaded = store.find(key);
expect(loaded?.recording.jsonlLines).toEqual(['new line']);
});
it('uses same file for same key', () => {
store.save(key, entry);
const { readdirSync } = require('node:fs');
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
expect(files).toHaveLength(1);
store.save(key, entry); // overwrite
const files2 = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
expect(files2).toHaveLength(1);
});
it('uses different files for different keys', () => {
const key2: CassetteKey = { ...key, providerName: 'codex' };
store.save(key, entry);
store.save(key2, { ...entry, key: key2 });
const { readdirSync } = require('node:fs');
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
expect(files).toHaveLength(2);
});
});

View File

@@ -0,0 +1,200 @@
/**
* Cassette Test Harness
*
* Wraps RealProviderHarness with the CassetteProcessManager so tests run
* against recorded cassettes instead of real AI APIs.
*
* Usage:
*
* let harness: RealProviderHarness;
*
* beforeAll(async () => {
* harness = await createCassetteHarness({ provider: 'claude' });
* });
*
* afterAll(() => harness.cleanup());
*
* it('completes a task', async () => {
* const agent = await harness.agentManager.spawn({ prompt: MINIMAL_PROMPTS.done, ... });
* const result = await harness.waitForAgentCompletion(agent.id);
* expect(result?.success).toBe(true);
* });
*
* Mode control via env vars:
* (default) → replay mode: cassette must exist, throws if missing
* CW_CASSETTE_RECORD=1 → auto mode: replay if exists, record if missing
* CW_CASSETTE_FORCE_RECORD=1→ record mode: always run real agent, overwrite cassette
*/
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { execSync } from 'node:child_process';
import { join } from 'node:path';
import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleInitiativeRepository,
} from '../../db/repositories/drizzle/index.js';
import { MultiProviderAgentManager } from '../../agent/manager.js';
import { CapturingEventBus, sleep, type RealProviderHarness } from '../integration/real-providers/harness.js';
import { CassetteStore } from './store.js';
import { CassetteProcessManager, type CassetteMode } from './process-manager.js';
export interface CassetteHarnessOptions {
/** Which provider the agent runs as (default: 'claude'). */
provider?: 'claude' | 'codex';
/**
* Directory where cassette JSON files are stored and read from.
* Defaults to CW_CASSETTE_DIR env var, then src/test/cassettes/.
*/
cassetteDir?: string;
/**
* Override cassette mode. Normally derived from env vars:
* - CW_CASSETTE_FORCE_RECORD=1 → 'record'
* - CW_CASSETTE_RECORD=1 → 'auto'
* - (default) → 'replay'
*/
mode?: CassetteMode;
}
const DEFAULT_CASSETTE_DIR = new URL('../cassettes', import.meta.url).pathname;
/**
* Resolve cassette mode from env vars (highest priority) or options.
*/
function resolveCassetteMode(options: CassetteHarnessOptions): CassetteMode {
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
return options.mode ?? 'replay';
}
/**
* Create a test harness backed by the cassette system.
*
* The harness exposes the same interface as RealProviderHarness so tests
* written for real providers work unchanged with cassettes.
*
* Replay is much faster than real API calls (typically < 500ms) and
* exercises the full pipeline: ProcessManager → FileTailer → OutputHandler
* → SignalManager → event emission.
*/
export async function createCassetteHarness(options: CassetteHarnessOptions = {}): Promise<RealProviderHarness> {
const cassetteDir = options.cassetteDir ?? process.env.CW_CASSETTE_DIR ?? DEFAULT_CASSETTE_DIR;
const cassetteMode = resolveCassetteMode(options);
// Create a temporary git workspace (required for worktree operations).
const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-cassette-'));
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
const db = createTestDatabase();
const agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
const initiativeRepository = new DrizzleInitiativeRepository(db);
const eventBus = new CapturingEventBus();
const store = new CassetteStore(cassetteDir);
const cassetteProcessManager = new CassetteProcessManager(
workspaceRoot,
projectRepository,
store,
cassetteMode,
);
const agentManager = new MultiProviderAgentManager(
agentRepository,
workspaceRoot,
projectRepository,
accountRepository,
eventBus,
undefined, // credentialManager
undefined, // changeSetRepository
undefined, // phaseRepository
undefined, // taskRepository
undefined, // pageRepository
undefined, // logChunkRepository
false, // debug
cassetteProcessManager,
);
const harness: RealProviderHarness = {
db,
eventBus,
agentManager,
workspaceRoot,
agentRepository,
projectRepository,
accountRepository,
initiativeRepository,
// Cassette replays are fast — use a short poll interval and default timeout.
async waitForAgentCompletion(agentId, timeoutMs = 30_000) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'idle' || agent.status === 'stopped') {
return agentManager.getResult(agentId);
}
if (agent.status === 'crashed') {
return agentManager.getResult(agentId);
}
if (agent.status === 'waiting_for_input') return null;
await sleep(100);
}
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
},
async waitForAgentWaiting(agentId, timeoutMs = 30_000) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'waiting_for_input') return agentManager.getPendingQuestions(agentId);
if (['idle', 'stopped', 'crashed'].includes(agent.status)) return null;
await sleep(100);
}
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to enter waiting state after ${timeoutMs}ms`);
},
async waitForAgentStatus(agentId, status, timeoutMs = 30_000) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) throw new Error(`Agent ${agentId} not found`);
if (agent.status === status) return;
await sleep(100);
}
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to reach status '${status}' after ${timeoutMs}ms`);
},
getEventsByType(type) {
return eventBus.getEventsByType(type);
},
clearEvents() {
eventBus.clearEvents();
},
async killAllAgents() {
const agents = await agentRepository.findAll();
for (const agent of agents) {
if (agent.status === 'running') {
try { await agentManager.stop(agent.id); } catch { /* ignore */ }
}
}
},
async cleanup() {
await harness.killAllAgents();
try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
},
};
return harness;
}

View File

@@ -0,0 +1,6 @@
export { CassetteStore } from './store.js';
export { CassetteProcessManager, type CassetteMode } from './process-manager.js';
export { createCassetteHarness, type CassetteHarnessOptions } from './harness.js';
export { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
export { hashWorktreeFiles, buildCassetteKey } from './key.js';
export type { CassetteKey, CassetteRecording, CassetteEntry } from './types.js';

76
src/test/cassette/key.ts Normal file
View File

@@ -0,0 +1,76 @@
/**
* Cassette Key Generation
*
* Builds stable SHA256-based identifiers for cassettes.
* Two spans are separate concerns:
* - hashWorktreeFiles: fingerprints the worktree state at spawn time (for execute mode drift)
* - buildCassetteKey: hashes all key components into a 32-char hex filename
*/
import { createHash } from 'node:crypto';
import { readdirSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import type { CassetteKey } from './types.js';
/**
* Recursively hash all non-hidden files in a directory.
*
* Hidden entries (starting with '.') are skipped — this excludes .git, .cw, etc.
* Entries are processed in sorted order for determinism across platforms.
*
* Returns the first 16 hex chars of the SHA256, or 'empty' if the directory
* is absent or contains no readable files.
*/
export function hashWorktreeFiles(dir: string): string {
const hash = createHash('sha256');
let hasContent = false;
function walkDir(currentDir: string): void {
let entries;
try {
entries = readdirSync(currentDir, { withFileTypes: true });
} catch {
return;
}
for (const entry of [...entries].sort((a, b) => a.name.localeCompare(b.name))) {
if (entry.name.startsWith('.')) continue;
const fullPath = join(currentDir, entry.name);
const relPath = fullPath.slice(dir.length);
if (entry.isDirectory()) {
hash.update(`d:${relPath}\n`);
walkDir(fullPath);
} else if (entry.isFile()) {
try {
const content = readFileSync(fullPath);
hash.update(`f:${relPath}:${content.length}\n`);
hash.update(content);
hasContent = true;
} catch {
// skip unreadable files
}
}
}
}
walkDir(dir);
return hasContent ? hash.digest('hex').slice(0, 16) : 'empty';
}
/**
* Compute a stable 32-char hex identifier for a cassette key.
*
* modelArgs are sorted before hashing so insertion order differences
* between providers don't produce different cassettes.
*/
export function buildCassetteKey(key: CassetteKey): string {
const canonical = JSON.stringify({
normalizedPrompt: key.normalizedPrompt,
providerName: key.providerName,
modelArgs: [...key.modelArgs].sort(),
worktreeHash: key.worktreeHash,
});
return createHash('sha256').update(canonical).digest('hex').slice(0, 32);
}

View File

@@ -0,0 +1,67 @@
/**
* Cassette Normalizer
*
* Strips dynamic content from prompts and CLI args before hashing into a cassette key.
* Dynamic content (UUIDs, temp paths, timestamps, session numbers) varies between
* test runs but doesn't affect how the agent responds — so we replace them with
* stable placeholders to get a stable cache key.
*/
const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g;
const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g;
const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
/**
* Normalize a prompt for stable cassette key generation.
*
* Replacements applied in order (most-specific first to avoid partial matches):
* 1. Absolute workspace root path → __WORKSPACE__
* 2. UUIDs → __UUID__
* 3. ISO 8601 timestamps → __TIMESTAMP__
* 4. Unix epoch milliseconds → __EPOCH__
* 5. Session numbers → session__N__
*/
export function normalizePrompt(prompt: string, workspaceRoot: string): string {
let normalized = prompt;
if (workspaceRoot) {
normalized = normalized.replaceAll(workspaceRoot, '__WORKSPACE__');
}
normalized = normalized.replace(UUID_RE, '__UUID__');
normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__');
normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__');
normalized = normalized.replace(SESSION_NUM_RE, 'session__N__');
return normalized;
}
/**
* Strip the prompt value from CLI args to produce stable modelArgs for the cassette key.
*
* Handles all provider prompt flag styles:
* - Native: `-p <prompt>` (Claude)
* - Flag: `--prompt <prompt>`, `-p <prompt>` (Gemini, Cursor, Auggie, Amp, Opencode)
* - Also removes the bare prompt value if it appears as a positional arg.
*/
export function stripPromptFromArgs(args: string[], prompt: string): string[] {
if (!prompt) return [...args];
const result: string[] = [];
let i = 0;
while (i < args.length) {
const arg = args[i];
const PROMPT_FLAGS = ['-p', '--prompt', '--message'];
if (PROMPT_FLAGS.includes(arg) && args[i + 1] === prompt) {
i += 2; // skip flag + value
} else if (arg === prompt) {
i += 1; // skip bare positional prompt
} else {
result.push(arg);
i++;
}
}
return result;
}

View File

@@ -0,0 +1,174 @@
/**
* CassetteProcessManager
*
* Extends ProcessManager to intercept subprocess spawning and either:
* - Replay a recorded cassette (no API cost, deterministic)
* - Record a new cassette by running the real agent and capturing its output
*
* Modes:
* - 'replay': cassette MUST exist; throws if missing (safe for CI)
* - 'record': always runs real agent; saves/overwrites cassette on completion
* - 'auto': replays if cassette exists; falls through to record if missing
*
* The cassette key is built from:
* - Normalized prompt (dynamic content replaced with placeholders)
* - Provider name and stable CLI args (prompt value stripped)
* - Worktree file hash (detects content drift for execute-mode agents)
*/
import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import { ProcessManager } from '../../agent/process-manager.js';
import type { StreamEvent } from '../../agent/providers/parsers/index.js';
import type { FileTailer } from '../../agent/file-tailer.js';
import type { ProjectRepository } from '../../db/repositories/project-repository.js';
import type { CassetteKey, CassetteEntry } from './types.js';
import type { CassetteStore } from './store.js';
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
import { hashWorktreeFiles } from './key.js';
export type CassetteMode = 'replay' | 'record' | 'auto';
interface PendingRecording {
key: CassetteKey;
outputFilePath: string;
agentCwd: string;
}
export class CassetteProcessManager extends ProcessManager {
private readonly _workspaceRoot: string;
private readonly replayWorkerPath: string;
private readonly pendingRecordings = new Map<number, PendingRecording>();
constructor(
workspaceRoot: string,
projectRepository: ProjectRepository,
private readonly store: CassetteStore,
private readonly cassetteMode: CassetteMode = 'auto',
) {
super(workspaceRoot, projectRepository);
this._workspaceRoot = workspaceRoot;
this.replayWorkerPath = new URL('./replay-worker.mjs', import.meta.url).pathname;
}
override spawnDetached(
agentId: string,
agentName: string,
command: string,
args: string[],
cwd: string,
env: Record<string, string>,
providerName: string,
prompt?: string,
onEvent?: (event: StreamEvent) => void,
onRawContent?: (content: string) => void,
): { pid: number; outputFilePath: string; tailer: FileTailer } {
const key: CassetteKey = {
normalizedPrompt: normalizePrompt(prompt ?? '', this._workspaceRoot),
providerName,
modelArgs: stripPromptFromArgs(args, prompt ?? ''),
worktreeHash: hashWorktreeFiles(cwd),
};
// In record mode we always skip the store lookup and go straight to real spawn.
const existing = this.cassetteMode !== 'record' ? this.store.find(key) : null;
if (existing) {
return this.replayFromCassette(agentId, agentName, cwd, env, providerName, existing, onEvent, onRawContent);
}
if (this.cassetteMode === 'replay') {
throw new Error(
`[cassette] No cassette found for agent '${agentName}' (provider=${providerName}, mode=replay).\n` +
`Run with CW_CASSETTE_RECORD=1 to record it.`,
);
}
// auto or record: run the real agent and record the cassette on completion.
console.log(`[cassette] recording new cassette for agent '${agentName}' (${providerName})`);
const result = super.spawnDetached(agentId, agentName, command, args, cwd, env, providerName, prompt, onEvent, onRawContent);
this.pendingRecordings.set(result.pid, { key, outputFilePath: result.outputFilePath, agentCwd: cwd });
return result;
}
override pollForCompletion(
agentId: string,
pid: number,
onComplete: () => Promise<void>,
getTailer: () => FileTailer | undefined,
): { cancel: () => void } {
const pending = this.pendingRecordings.get(pid);
if (!pending) {
// Replay mode — no recording to save; delegate to base implementation.
return super.pollForCompletion(agentId, pid, onComplete, getTailer);
}
// Record mode — wrap onComplete to save the cassette before handing off.
return super.pollForCompletion(agentId, pid, async () => {
await this.saveCassette(pending);
this.pendingRecordings.delete(pid);
await onComplete();
}, getTailer);
}
private async saveCassette(pending: PendingRecording): Promise<void> {
// Read all JSONL lines from the output file the agent wrote to.
let jsonlLines: string[] = [];
try {
const content = readFileSync(pending.outputFilePath, 'utf-8');
jsonlLines = content.split('\n').filter(l => l.trim() !== '');
} catch {
// No output produced — record an empty cassette.
}
// Read signal.json from the agent working directory.
let signalJson: Record<string, unknown> | null = null;
const signalPath = join(pending.agentCwd, '.cw', 'output', 'signal.json');
if (existsSync(signalPath)) {
try {
signalJson = JSON.parse(readFileSync(signalPath, 'utf-8')) as Record<string, unknown>;
} catch {
// Corrupt signal file — record null.
}
}
const entry: CassetteEntry = {
version: 1,
key: pending.key,
recording: {
jsonlLines,
signalJson,
exitCode: 0,
recordedAt: new Date().toISOString(),
},
};
this.store.save(pending.key, entry);
}
private replayFromCassette(
agentId: string,
agentName: string,
cwd: string,
env: Record<string, string>,
providerName: string,
cassette: CassetteEntry,
onEvent?: (event: StreamEvent) => void,
onRawContent?: (content: string) => void,
): { pid: number; outputFilePath: string; tailer: FileTailer } {
console.log(`[cassette] replaying cassette for agent '${agentName}' (${cassette.recording.jsonlLines.length} lines)`);
return super.spawnDetached(
agentId,
agentName,
process.execPath, // use the running node binary
[this.replayWorkerPath], // replay-worker.mjs
cwd,
{ ...env, CW_CASSETTE_DATA: JSON.stringify(cassette.recording) },
providerName, // use original provider's parser for the tailer
undefined, // no prompt — worker handles output directly
onEvent,
onRawContent,
);
}
}

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env node
/**
* Cassette Replay Worker
*
* Spawned as a detached subprocess by CassetteProcessManager instead of the real
* agent CLI. Reads the cassette recording from CW_CASSETTE_DATA env var, replays
* the JSONL output to stdout (which spawnDetached redirects to the output file),
* writes signal.json relative to the process cwd, and exits.
*
* This is a plain .mjs file (no TypeScript) so it can be spawned with bare `node`
* without any build step or tsx dependency.
*/
import { mkdirSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
const data = process.env.CW_CASSETTE_DATA;
if (!data) {
process.stderr.write('[replay-worker] CW_CASSETTE_DATA env var not set\n');
process.exit(1);
}
let recording;
try {
recording = JSON.parse(data);
} catch (err) {
process.stderr.write(`[replay-worker] failed to parse CW_CASSETTE_DATA: ${err.message}\n`);
process.exit(1);
}
const { jsonlLines = [], signalJson = null, exitCode = 0 } = recording;
// Write JSONL lines to stdout.
// spawnDetached redirects stdout to the output file via open()+fd redirection,
// so writing to process.stdout here is equivalent to writing to the output file.
for (const line of jsonlLines) {
process.stdout.write(line + '\n');
}
// Write signal.json to the expected location relative to cwd.
// The agent's cwd is set by spawnDetached to the agent working directory.
if (signalJson) {
const signalDir = join(process.cwd(), '.cw', 'output');
mkdirSync(signalDir, { recursive: true });
writeFileSync(join(signalDir, 'signal.json'), JSON.stringify(signalJson, null, 2), 'utf-8');
}
process.exit(exitCode);

View File

@@ -0,0 +1,50 @@
/**
* CassetteStore
*
* Reads and writes cassette files from a directory on disk.
* Each cassette is stored as a JSON file named after the 32-char key hash.
* Cassette files are intended to be committed to git — they are the
* "recorded interactions" that allow tests to run without real API calls.
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import type { CassetteKey, CassetteEntry } from './types.js';
import { buildCassetteKey } from './key.js';
export class CassetteStore {
constructor(private readonly cassetteDir: string) {}
private pathFor(keyHash: string): string {
return join(this.cassetteDir, `${keyHash}.json`);
}
/**
* Look up a cassette by its key.
* Returns null if not found or if the file is corrupt.
*/
find(key: CassetteKey): CassetteEntry | null {
const hash = buildCassetteKey(key);
const path = this.pathFor(hash);
if (!existsSync(path)) return null;
try {
return JSON.parse(readFileSync(path, 'utf-8')) as CassetteEntry;
} catch {
return null;
}
}
/**
* Save a cassette to disk. Creates the cassette directory if needed.
* Prints the cassette filename so it's visible during recording runs.
*/
save(key: CassetteKey, entry: CassetteEntry): void {
mkdirSync(this.cassetteDir, { recursive: true });
const hash = buildCassetteKey(key);
const path = this.pathFor(hash);
writeFileSync(path, JSON.stringify(entry, null, 2), 'utf-8');
console.log(`[cassette] recorded → ${hash}.json (${entry.recording.jsonlLines.length} lines)`);
}
}

View File

@@ -0,0 +1,35 @@
/**
* Cassette Types
*
* VCR-style cassette format for recording and replaying agent subprocess I/O.
* A cassette captures everything an agent process writes so tests can replay
* it deterministically without hitting real AI APIs.
*/
export interface CassetteKey {
/** Prompt with dynamic content (UUIDs, paths, timestamps) replaced with placeholders. */
normalizedPrompt: string;
/** Provider name, e.g. 'claude', 'codex'. */
providerName: string;
/** Stable CLI args with the prompt value stripped. */
modelArgs: string[];
/** SHA256 prefix of all non-hidden files in the agent worktree at spawn time. */
worktreeHash: string;
}
export interface CassetteRecording {
/** All JSONL lines the agent wrote to stdout (captured from output file). */
jsonlLines: string[];
/** Content of signal.json written by the agent, or null if missing. */
signalJson: Record<string, unknown> | null;
/** Process exit code (0 = success). */
exitCode: number;
/** ISO timestamp when this cassette was recorded. */
recordedAt: string;
}
export interface CassetteEntry {
version: 1;
key: CassetteKey;
recording: CassetteRecording;
}

View File