feat: Add VCR-style cassette testing system for agent subprocess pipeline
Implements cassette recording/replay to test the full agent execution pipeline (ProcessManager → FileTailer → OutputHandler → SignalManager) without real AI API calls. Key components: - `CassetteProcessManager`: extends ProcessManager, intercepts spawnDetached to replay cassettes or record real runs on completion - `replay-worker.mjs`: standalone node script that replays JSONL + signal.json as a subprocess, exercising the complete file-based output pipeline - `CassetteStore`: reads/writes cassette JSON files keyed by SHA256 hash - `normalizer.ts`: strips dynamic content (UUIDs, temp paths, timestamps, session numbers) from prompts for stable cassette keys - `key.ts`: hashes normalized prompt + provider args + worktree file content (worktree hash detects content drift for execute-mode agents) - `createCassetteHarness()`: wraps RealProviderHarness with cassette support, same interface so existing real-provider tests work unchanged Mode control via env vars: (default) → replay: cassette must exist (safe for CI) CW_CASSETTE_RECORD=1 → auto: replay if exists, record if missing CW_CASSETTE_FORCE_RECORD=1 → record: always run real agent, overwrite cassette MultiProviderAgentManager gains an optional `processManagerOverride` constructor parameter for clean dependency injection without changing existing callers. Cassette files live in src/test/cassettes/ and are intended to be committed to git so CI runs without API access.
This commit is contained in:
@@ -80,9 +80,10 @@ export class MultiProviderAgentManager implements AgentManager {
|
||||
private pageRepository?: PageRepository,
|
||||
private logChunkRepository?: LogChunkRepository,
|
||||
private debug: boolean = false,
|
||||
processManagerOverride?: ProcessManager,
|
||||
) {
|
||||
this.signalManager = new FileSystemSignalManager();
|
||||
this.processManager = new ProcessManager(workspaceRoot, projectRepository);
|
||||
this.processManager = processManagerOverride ?? new ProcessManager(workspaceRoot, projectRepository);
|
||||
this.credentialHandler = new CredentialHandler(workspaceRoot, accountRepository, credentialManager);
|
||||
this.outputHandler = new OutputHandler(repository, eventBus, changeSetRepository, phaseRepository, taskRepository, pageRepository, this.signalManager);
|
||||
this.cleanupManager = new CleanupManager(workspaceRoot, repository, projectRepository, eventBus, debug, this.signalManager);
|
||||
|
||||
257
src/test/cassette/cassette.test.ts
Normal file
257
src/test/cassette/cassette.test.ts
Normal file
@@ -0,0 +1,257 @@
|
||||
/**
|
||||
* Cassette System Unit Tests
|
||||
*
|
||||
* Verifies normalizer, key generation, and store in isolation.
|
||||
* These run without any real processes or API calls.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { mkdtempSync, rmSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
|
||||
import { hashWorktreeFiles, buildCassetteKey } from './key.js';
|
||||
import { CassetteStore } from './store.js';
|
||||
import type { CassetteEntry, CassetteKey } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Normalizer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('normalizePrompt', () => {
|
||||
it('strips UUIDs', () => {
|
||||
const prompt = 'Agent 550e8400-e29b-41d4-a716-446655440000 is running task abc123ef-0000-0000-0000-000000000000';
|
||||
const result = normalizePrompt(prompt, '');
|
||||
expect(result).not.toContain('550e8400');
|
||||
expect(result).not.toContain('abc123ef');
|
||||
expect(result).toContain('__UUID__');
|
||||
});
|
||||
|
||||
it('strips workspace root path', () => {
|
||||
const workspaceRoot = '/tmp/cw-test-abc123';
|
||||
const prompt = `Working directory: ${workspaceRoot}/agent-workdirs/my-agent`;
|
||||
const result = normalizePrompt(prompt, workspaceRoot);
|
||||
expect(result).not.toContain(workspaceRoot);
|
||||
expect(result).toContain('__WORKSPACE__');
|
||||
});
|
||||
|
||||
it('strips ISO timestamps', () => {
|
||||
const prompt = 'Started at 2026-03-01T14:30:00Z, last seen 2026-03-01T14:35:22.456Z';
|
||||
const result = normalizePrompt(prompt, '');
|
||||
expect(result).not.toContain('2026-03-01');
|
||||
expect(result).toContain('__TIMESTAMP__');
|
||||
});
|
||||
|
||||
it('strips session numbers', () => {
|
||||
const prompt = 'Resuming session 3 with agent session-42';
|
||||
const result = normalizePrompt(prompt, '');
|
||||
expect(result).toContain('session__N__');
|
||||
expect(result).not.toContain('session 3');
|
||||
expect(result).not.toContain('session-42');
|
||||
});
|
||||
|
||||
it('leaves static content unchanged', () => {
|
||||
const prompt = 'You are a Worker agent. Execute the assigned coding task.';
|
||||
const result = normalizePrompt(prompt, '/tmp/ws');
|
||||
expect(result).toBe(prompt);
|
||||
});
|
||||
|
||||
it('strips workspace root before UUID replacement to avoid double-normalizing', () => {
|
||||
const workspaceRoot = '/tmp/cw-test-abc123';
|
||||
const uuid = '550e8400-e29b-41d4-a716-446655440000';
|
||||
const prompt = `Dir: ${workspaceRoot}/agents/${uuid}`;
|
||||
const result = normalizePrompt(prompt, workspaceRoot);
|
||||
expect(result).toBe('Dir: __WORKSPACE__/agents/__UUID__');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripPromptFromArgs', () => {
|
||||
it('strips -p <prompt> style (Claude native)', () => {
|
||||
const prompt = 'Do the task.';
|
||||
const args = ['--dangerously-skip-permissions', '--verbose', '-p', prompt, '--output-format', 'stream-json'];
|
||||
const result = stripPromptFromArgs(args, prompt);
|
||||
expect(result).toEqual(['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json']);
|
||||
});
|
||||
|
||||
it('strips --prompt <prompt> style', () => {
|
||||
const prompt = 'Do the task.';
|
||||
const args = ['--flag', '--prompt', prompt, '--json'];
|
||||
const result = stripPromptFromArgs(args, prompt);
|
||||
expect(result).toEqual(['--flag', '--json']);
|
||||
});
|
||||
|
||||
it('strips bare positional prompt', () => {
|
||||
const prompt = 'Do the task.';
|
||||
const args = ['--full-auto', prompt];
|
||||
const result = stripPromptFromArgs(args, prompt);
|
||||
expect(result).toEqual(['--full-auto']);
|
||||
});
|
||||
|
||||
it('returns unchanged args when prompt is empty', () => {
|
||||
const args = ['--flag', '--value'];
|
||||
expect(stripPromptFromArgs(args, '')).toEqual(args);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Key generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('buildCassetteKey', () => {
|
||||
const baseKey: CassetteKey = {
|
||||
normalizedPrompt: 'You are a Worker agent.',
|
||||
providerName: 'claude',
|
||||
modelArgs: ['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json'],
|
||||
worktreeHash: 'empty',
|
||||
};
|
||||
|
||||
it('produces a 32-char hex string', () => {
|
||||
const key = buildCassetteKey(baseKey);
|
||||
expect(key).toMatch(/^[0-9a-f]{32}$/);
|
||||
});
|
||||
|
||||
it('is deterministic for the same key', () => {
|
||||
expect(buildCassetteKey(baseKey)).toBe(buildCassetteKey(baseKey));
|
||||
});
|
||||
|
||||
it('differs when normalizedPrompt changes', () => {
|
||||
const key2 = { ...baseKey, normalizedPrompt: 'You are a Discuss agent.' };
|
||||
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
|
||||
});
|
||||
|
||||
it('differs when providerName changes', () => {
|
||||
const key2 = { ...baseKey, providerName: 'codex' };
|
||||
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
|
||||
});
|
||||
|
||||
it('differs when worktreeHash changes', () => {
|
||||
const key2 = { ...baseKey, worktreeHash: 'abcdef1234567890' };
|
||||
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
|
||||
});
|
||||
|
||||
it('is stable regardless of modelArgs insertion order', () => {
|
||||
const key1 = { ...baseKey, modelArgs: ['--verbose', '--dangerously-skip-permissions'] };
|
||||
const key2 = { ...baseKey, modelArgs: ['--dangerously-skip-permissions', '--verbose'] };
|
||||
expect(buildCassetteKey(key1)).toBe(buildCassetteKey(key2));
|
||||
});
|
||||
});
|
||||
|
||||
describe('hashWorktreeFiles', () => {
|
||||
it('returns "empty" for a non-existent directory', () => {
|
||||
expect(hashWorktreeFiles('/does/not/exist')).toBe('empty');
|
||||
});
|
||||
|
||||
it('returns "empty" for a directory with only hidden files', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
|
||||
try {
|
||||
// Only hidden entries present
|
||||
const { mkdirSync } = require('node:fs');
|
||||
mkdirSync(join(dir, '.git'));
|
||||
expect(hashWorktreeFiles(dir)).toBe('empty');
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('produces a 16-char hex string for a directory with files', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
|
||||
try {
|
||||
const { writeFileSync } = require('node:fs');
|
||||
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
|
||||
const hash = hashWorktreeFiles(dir);
|
||||
expect(hash).toMatch(/^[0-9a-f]{16}$/);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('changes when file content changes', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
|
||||
try {
|
||||
const { writeFileSync } = require('node:fs');
|
||||
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
|
||||
const hash1 = hashWorktreeFiles(dir);
|
||||
writeFileSync(join(dir, 'index.ts'), 'export const x = 2;');
|
||||
const hash2 = hashWorktreeFiles(dir);
|
||||
expect(hash1).not.toBe(hash2);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CassetteStore
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('CassetteStore', () => {
|
||||
let dir: string;
|
||||
let store: CassetteStore;
|
||||
|
||||
const key: CassetteKey = {
|
||||
normalizedPrompt: 'Test prompt',
|
||||
providerName: 'claude',
|
||||
modelArgs: ['--verbose'],
|
||||
worktreeHash: 'empty',
|
||||
};
|
||||
|
||||
const entry: CassetteEntry = {
|
||||
version: 1,
|
||||
key,
|
||||
recording: {
|
||||
jsonlLines: ['{"type":"system","session_id":"test-session"}', '{"type":"result","subtype":"success"}'],
|
||||
signalJson: { status: 'done', message: 'Task completed' },
|
||||
exitCode: 0,
|
||||
recordedAt: '2026-03-01T00:00:00.000Z',
|
||||
},
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
dir = mkdtempSync(join(tmpdir(), 'cw-store-test-'));
|
||||
store = new CassetteStore(dir);
|
||||
});
|
||||
|
||||
it('returns null for unknown key', () => {
|
||||
expect(store.find(key)).toBeNull();
|
||||
});
|
||||
|
||||
it('round-trips a cassette entry', () => {
|
||||
store.save(key, entry);
|
||||
const loaded = store.find(key);
|
||||
expect(loaded).not.toBeNull();
|
||||
expect(loaded?.recording.signalJson).toEqual({ status: 'done', message: 'Task completed' });
|
||||
expect(loaded?.recording.jsonlLines).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('overwrites an existing cassette', () => {
|
||||
store.save(key, entry);
|
||||
const updated: CassetteEntry = {
|
||||
...entry,
|
||||
recording: { ...entry.recording, jsonlLines: ['new line'], recordedAt: '2026-03-02T00:00:00.000Z' },
|
||||
};
|
||||
store.save(key, updated);
|
||||
const loaded = store.find(key);
|
||||
expect(loaded?.recording.jsonlLines).toEqual(['new line']);
|
||||
});
|
||||
|
||||
it('uses same file for same key', () => {
|
||||
store.save(key, entry);
|
||||
const { readdirSync } = require('node:fs');
|
||||
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
|
||||
expect(files).toHaveLength(1);
|
||||
|
||||
store.save(key, entry); // overwrite
|
||||
const files2 = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
|
||||
expect(files2).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('uses different files for different keys', () => {
|
||||
const key2: CassetteKey = { ...key, providerName: 'codex' };
|
||||
store.save(key, entry);
|
||||
store.save(key2, { ...entry, key: key2 });
|
||||
|
||||
const { readdirSync } = require('node:fs');
|
||||
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
|
||||
expect(files).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
200
src/test/cassette/harness.ts
Normal file
200
src/test/cassette/harness.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
/**
|
||||
* Cassette Test Harness
|
||||
*
|
||||
* Wraps RealProviderHarness with the CassetteProcessManager so tests run
|
||||
* against recorded cassettes instead of real AI APIs.
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* let harness: RealProviderHarness;
|
||||
*
|
||||
* beforeAll(async () => {
|
||||
* harness = await createCassetteHarness({ provider: 'claude' });
|
||||
* });
|
||||
*
|
||||
* afterAll(() => harness.cleanup());
|
||||
*
|
||||
* it('completes a task', async () => {
|
||||
* const agent = await harness.agentManager.spawn({ prompt: MINIMAL_PROMPTS.done, ... });
|
||||
* const result = await harness.waitForAgentCompletion(agent.id);
|
||||
* expect(result?.success).toBe(true);
|
||||
* });
|
||||
*
|
||||
* Mode control via env vars:
|
||||
* (default) → replay mode: cassette must exist, throws if missing
|
||||
* CW_CASSETTE_RECORD=1 → auto mode: replay if exists, record if missing
|
||||
* CW_CASSETTE_FORCE_RECORD=1→ record mode: always run real agent, overwrite cassette
|
||||
*/
|
||||
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { join } from 'node:path';
|
||||
import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
|
||||
import {
|
||||
DrizzleAgentRepository,
|
||||
DrizzleProjectRepository,
|
||||
DrizzleAccountRepository,
|
||||
DrizzleInitiativeRepository,
|
||||
} from '../../db/repositories/drizzle/index.js';
|
||||
import { MultiProviderAgentManager } from '../../agent/manager.js';
|
||||
import { CapturingEventBus, sleep, type RealProviderHarness } from '../integration/real-providers/harness.js';
|
||||
import { CassetteStore } from './store.js';
|
||||
import { CassetteProcessManager, type CassetteMode } from './process-manager.js';
|
||||
|
||||
export interface CassetteHarnessOptions {
|
||||
/** Which provider the agent runs as (default: 'claude'). */
|
||||
provider?: 'claude' | 'codex';
|
||||
/**
|
||||
* Directory where cassette JSON files are stored and read from.
|
||||
* Defaults to CW_CASSETTE_DIR env var, then src/test/cassettes/.
|
||||
*/
|
||||
cassetteDir?: string;
|
||||
/**
|
||||
* Override cassette mode. Normally derived from env vars:
|
||||
* - CW_CASSETTE_FORCE_RECORD=1 → 'record'
|
||||
* - CW_CASSETTE_RECORD=1 → 'auto'
|
||||
* - (default) → 'replay'
|
||||
*/
|
||||
mode?: CassetteMode;
|
||||
}
|
||||
|
||||
const DEFAULT_CASSETTE_DIR = new URL('../cassettes', import.meta.url).pathname;
|
||||
|
||||
/**
|
||||
* Resolve cassette mode from env vars (highest priority) or options.
|
||||
*/
|
||||
function resolveCassetteMode(options: CassetteHarnessOptions): CassetteMode {
|
||||
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
|
||||
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
|
||||
return options.mode ?? 'replay';
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a test harness backed by the cassette system.
|
||||
*
|
||||
* The harness exposes the same interface as RealProviderHarness so tests
|
||||
* written for real providers work unchanged with cassettes.
|
||||
*
|
||||
* Replay is much faster than real API calls (typically < 500ms) and
|
||||
* exercises the full pipeline: ProcessManager → FileTailer → OutputHandler
|
||||
* → SignalManager → event emission.
|
||||
*/
|
||||
export async function createCassetteHarness(options: CassetteHarnessOptions = {}): Promise<RealProviderHarness> {
|
||||
const cassetteDir = options.cassetteDir ?? process.env.CW_CASSETTE_DIR ?? DEFAULT_CASSETTE_DIR;
|
||||
const cassetteMode = resolveCassetteMode(options);
|
||||
|
||||
// Create a temporary git workspace (required for worktree operations).
|
||||
const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-cassette-'));
|
||||
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
|
||||
const db = createTestDatabase();
|
||||
const agentRepository = new DrizzleAgentRepository(db);
|
||||
const projectRepository = new DrizzleProjectRepository(db);
|
||||
const accountRepository = new DrizzleAccountRepository(db);
|
||||
const initiativeRepository = new DrizzleInitiativeRepository(db);
|
||||
const eventBus = new CapturingEventBus();
|
||||
|
||||
const store = new CassetteStore(cassetteDir);
|
||||
const cassetteProcessManager = new CassetteProcessManager(
|
||||
workspaceRoot,
|
||||
projectRepository,
|
||||
store,
|
||||
cassetteMode,
|
||||
);
|
||||
|
||||
const agentManager = new MultiProviderAgentManager(
|
||||
agentRepository,
|
||||
workspaceRoot,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
eventBus,
|
||||
undefined, // credentialManager
|
||||
undefined, // changeSetRepository
|
||||
undefined, // phaseRepository
|
||||
undefined, // taskRepository
|
||||
undefined, // pageRepository
|
||||
undefined, // logChunkRepository
|
||||
false, // debug
|
||||
cassetteProcessManager,
|
||||
);
|
||||
|
||||
const harness: RealProviderHarness = {
|
||||
db,
|
||||
eventBus,
|
||||
agentManager,
|
||||
workspaceRoot,
|
||||
agentRepository,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
initiativeRepository,
|
||||
|
||||
// Cassette replays are fast — use a short poll interval and default timeout.
|
||||
async waitForAgentCompletion(agentId, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) return null;
|
||||
if (agent.status === 'idle' || agent.status === 'stopped') {
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
if (agent.status === 'crashed') {
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
if (agent.status === 'waiting_for_input') return null;
|
||||
await sleep(100);
|
||||
}
|
||||
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
async waitForAgentWaiting(agentId, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) return null;
|
||||
if (agent.status === 'waiting_for_input') return agentManager.getPendingQuestions(agentId);
|
||||
if (['idle', 'stopped', 'crashed'].includes(agent.status)) return null;
|
||||
await sleep(100);
|
||||
}
|
||||
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to enter waiting state after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
async waitForAgentStatus(agentId, status, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
||||
if (agent.status === status) return;
|
||||
await sleep(100);
|
||||
}
|
||||
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to reach status '${status}' after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
getEventsByType(type) {
|
||||
return eventBus.getEventsByType(type);
|
||||
},
|
||||
|
||||
clearEvents() {
|
||||
eventBus.clearEvents();
|
||||
},
|
||||
|
||||
async killAllAgents() {
|
||||
const agents = await agentRepository.findAll();
|
||||
for (const agent of agents) {
|
||||
if (agent.status === 'running') {
|
||||
try { await agentManager.stop(agent.id); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
async cleanup() {
|
||||
await harness.killAllAgents();
|
||||
try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
},
|
||||
};
|
||||
|
||||
return harness;
|
||||
}
|
||||
6
src/test/cassette/index.ts
Normal file
6
src/test/cassette/index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export { CassetteStore } from './store.js';
|
||||
export { CassetteProcessManager, type CassetteMode } from './process-manager.js';
|
||||
export { createCassetteHarness, type CassetteHarnessOptions } from './harness.js';
|
||||
export { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
|
||||
export { hashWorktreeFiles, buildCassetteKey } from './key.js';
|
||||
export type { CassetteKey, CassetteRecording, CassetteEntry } from './types.js';
|
||||
76
src/test/cassette/key.ts
Normal file
76
src/test/cassette/key.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Cassette Key Generation
|
||||
*
|
||||
* Builds stable SHA256-based identifiers for cassettes.
|
||||
* Two spans are separate concerns:
|
||||
* - hashWorktreeFiles: fingerprints the worktree state at spawn time (for execute mode drift)
|
||||
* - buildCassetteKey: hashes all key components into a 32-char hex filename
|
||||
*/
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
import { readdirSync, readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import type { CassetteKey } from './types.js';
|
||||
|
||||
/**
|
||||
* Recursively hash all non-hidden files in a directory.
|
||||
*
|
||||
* Hidden entries (starting with '.') are skipped — this excludes .git, .cw, etc.
|
||||
* Entries are processed in sorted order for determinism across platforms.
|
||||
*
|
||||
* Returns the first 16 hex chars of the SHA256, or 'empty' if the directory
|
||||
* is absent or contains no readable files.
|
||||
*/
|
||||
export function hashWorktreeFiles(dir: string): string {
|
||||
const hash = createHash('sha256');
|
||||
let hasContent = false;
|
||||
|
||||
function walkDir(currentDir: string): void {
|
||||
let entries;
|
||||
try {
|
||||
entries = readdirSync(currentDir, { withFileTypes: true });
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const entry of [...entries].sort((a, b) => a.name.localeCompare(b.name))) {
|
||||
if (entry.name.startsWith('.')) continue;
|
||||
|
||||
const fullPath = join(currentDir, entry.name);
|
||||
const relPath = fullPath.slice(dir.length);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
hash.update(`d:${relPath}\n`);
|
||||
walkDir(fullPath);
|
||||
} else if (entry.isFile()) {
|
||||
try {
|
||||
const content = readFileSync(fullPath);
|
||||
hash.update(`f:${relPath}:${content.length}\n`);
|
||||
hash.update(content);
|
||||
hasContent = true;
|
||||
} catch {
|
||||
// skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
walkDir(dir);
|
||||
return hasContent ? hash.digest('hex').slice(0, 16) : 'empty';
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a stable 32-char hex identifier for a cassette key.
|
||||
*
|
||||
* modelArgs are sorted before hashing so insertion order differences
|
||||
* between providers don't produce different cassettes.
|
||||
*/
|
||||
export function buildCassetteKey(key: CassetteKey): string {
|
||||
const canonical = JSON.stringify({
|
||||
normalizedPrompt: key.normalizedPrompt,
|
||||
providerName: key.providerName,
|
||||
modelArgs: [...key.modelArgs].sort(),
|
||||
worktreeHash: key.worktreeHash,
|
||||
});
|
||||
return createHash('sha256').update(canonical).digest('hex').slice(0, 32);
|
||||
}
|
||||
67
src/test/cassette/normalizer.ts
Normal file
67
src/test/cassette/normalizer.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Cassette Normalizer
|
||||
*
|
||||
* Strips dynamic content from prompts and CLI args before hashing into a cassette key.
|
||||
* Dynamic content (UUIDs, temp paths, timestamps, session numbers) varies between
|
||||
* test runs but doesn't affect how the agent responds — so we replace them with
|
||||
* stable placeholders to get a stable cache key.
|
||||
*/
|
||||
|
||||
const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
|
||||
const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g;
|
||||
const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g;
|
||||
const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
|
||||
|
||||
/**
|
||||
* Normalize a prompt for stable cassette key generation.
|
||||
*
|
||||
* Replacements applied in order (most-specific first to avoid partial matches):
|
||||
* 1. Absolute workspace root path → __WORKSPACE__
|
||||
* 2. UUIDs → __UUID__
|
||||
* 3. ISO 8601 timestamps → __TIMESTAMP__
|
||||
* 4. Unix epoch milliseconds → __EPOCH__
|
||||
* 5. Session numbers → session__N__
|
||||
*/
|
||||
export function normalizePrompt(prompt: string, workspaceRoot: string): string {
|
||||
let normalized = prompt;
|
||||
|
||||
if (workspaceRoot) {
|
||||
normalized = normalized.replaceAll(workspaceRoot, '__WORKSPACE__');
|
||||
}
|
||||
|
||||
normalized = normalized.replace(UUID_RE, '__UUID__');
|
||||
normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__');
|
||||
normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__');
|
||||
normalized = normalized.replace(SESSION_NUM_RE, 'session__N__');
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip the prompt value from CLI args to produce stable modelArgs for the cassette key.
|
||||
*
|
||||
* Handles all provider prompt flag styles:
|
||||
* - Native: `-p <prompt>` (Claude)
|
||||
* - Flag: `--prompt <prompt>`, `-p <prompt>` (Gemini, Cursor, Auggie, Amp, Opencode)
|
||||
* - Also removes the bare prompt value if it appears as a positional arg.
|
||||
*/
|
||||
export function stripPromptFromArgs(args: string[], prompt: string): string[] {
|
||||
if (!prompt) return [...args];
|
||||
|
||||
const result: string[] = [];
|
||||
let i = 0;
|
||||
while (i < args.length) {
|
||||
const arg = args[i];
|
||||
const PROMPT_FLAGS = ['-p', '--prompt', '--message'];
|
||||
|
||||
if (PROMPT_FLAGS.includes(arg) && args[i + 1] === prompt) {
|
||||
i += 2; // skip flag + value
|
||||
} else if (arg === prompt) {
|
||||
i += 1; // skip bare positional prompt
|
||||
} else {
|
||||
result.push(arg);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
174
src/test/cassette/process-manager.ts
Normal file
174
src/test/cassette/process-manager.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
/**
|
||||
* CassetteProcessManager
|
||||
*
|
||||
* Extends ProcessManager to intercept subprocess spawning and either:
|
||||
* - Replay a recorded cassette (no API cost, deterministic)
|
||||
* - Record a new cassette by running the real agent and capturing its output
|
||||
*
|
||||
* Modes:
|
||||
* - 'replay': cassette MUST exist; throws if missing (safe for CI)
|
||||
* - 'record': always runs real agent; saves/overwrites cassette on completion
|
||||
* - 'auto': replays if cassette exists; falls through to record if missing
|
||||
*
|
||||
* The cassette key is built from:
|
||||
* - Normalized prompt (dynamic content replaced with placeholders)
|
||||
* - Provider name and stable CLI args (prompt value stripped)
|
||||
* - Worktree file hash (detects content drift for execute-mode agents)
|
||||
*/
|
||||
|
||||
import { readFileSync, existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { ProcessManager } from '../../agent/process-manager.js';
|
||||
import type { StreamEvent } from '../../agent/providers/parsers/index.js';
|
||||
import type { FileTailer } from '../../agent/file-tailer.js';
|
||||
import type { ProjectRepository } from '../../db/repositories/project-repository.js';
|
||||
import type { CassetteKey, CassetteEntry } from './types.js';
|
||||
import type { CassetteStore } from './store.js';
|
||||
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
|
||||
import { hashWorktreeFiles } from './key.js';
|
||||
|
||||
export type CassetteMode = 'replay' | 'record' | 'auto';
|
||||
|
||||
interface PendingRecording {
|
||||
key: CassetteKey;
|
||||
outputFilePath: string;
|
||||
agentCwd: string;
|
||||
}
|
||||
|
||||
export class CassetteProcessManager extends ProcessManager {
|
||||
private readonly _workspaceRoot: string;
|
||||
private readonly replayWorkerPath: string;
|
||||
private readonly pendingRecordings = new Map<number, PendingRecording>();
|
||||
|
||||
constructor(
|
||||
workspaceRoot: string,
|
||||
projectRepository: ProjectRepository,
|
||||
private readonly store: CassetteStore,
|
||||
private readonly cassetteMode: CassetteMode = 'auto',
|
||||
) {
|
||||
super(workspaceRoot, projectRepository);
|
||||
this._workspaceRoot = workspaceRoot;
|
||||
this.replayWorkerPath = new URL('./replay-worker.mjs', import.meta.url).pathname;
|
||||
}
|
||||
|
||||
override spawnDetached(
|
||||
agentId: string,
|
||||
agentName: string,
|
||||
command: string,
|
||||
args: string[],
|
||||
cwd: string,
|
||||
env: Record<string, string>,
|
||||
providerName: string,
|
||||
prompt?: string,
|
||||
onEvent?: (event: StreamEvent) => void,
|
||||
onRawContent?: (content: string) => void,
|
||||
): { pid: number; outputFilePath: string; tailer: FileTailer } {
|
||||
const key: CassetteKey = {
|
||||
normalizedPrompt: normalizePrompt(prompt ?? '', this._workspaceRoot),
|
||||
providerName,
|
||||
modelArgs: stripPromptFromArgs(args, prompt ?? ''),
|
||||
worktreeHash: hashWorktreeFiles(cwd),
|
||||
};
|
||||
|
||||
// In record mode we always skip the store lookup and go straight to real spawn.
|
||||
const existing = this.cassetteMode !== 'record' ? this.store.find(key) : null;
|
||||
|
||||
if (existing) {
|
||||
return this.replayFromCassette(agentId, agentName, cwd, env, providerName, existing, onEvent, onRawContent);
|
||||
}
|
||||
|
||||
if (this.cassetteMode === 'replay') {
|
||||
throw new Error(
|
||||
`[cassette] No cassette found for agent '${agentName}' (provider=${providerName}, mode=replay).\n` +
|
||||
`Run with CW_CASSETTE_RECORD=1 to record it.`,
|
||||
);
|
||||
}
|
||||
|
||||
// auto or record: run the real agent and record the cassette on completion.
|
||||
console.log(`[cassette] recording new cassette for agent '${agentName}' (${providerName})`);
|
||||
const result = super.spawnDetached(agentId, agentName, command, args, cwd, env, providerName, prompt, onEvent, onRawContent);
|
||||
this.pendingRecordings.set(result.pid, { key, outputFilePath: result.outputFilePath, agentCwd: cwd });
|
||||
return result;
|
||||
}
|
||||
|
||||
override pollForCompletion(
|
||||
agentId: string,
|
||||
pid: number,
|
||||
onComplete: () => Promise<void>,
|
||||
getTailer: () => FileTailer | undefined,
|
||||
): { cancel: () => void } {
|
||||
const pending = this.pendingRecordings.get(pid);
|
||||
if (!pending) {
|
||||
// Replay mode — no recording to save; delegate to base implementation.
|
||||
return super.pollForCompletion(agentId, pid, onComplete, getTailer);
|
||||
}
|
||||
|
||||
// Record mode — wrap onComplete to save the cassette before handing off.
|
||||
return super.pollForCompletion(agentId, pid, async () => {
|
||||
await this.saveCassette(pending);
|
||||
this.pendingRecordings.delete(pid);
|
||||
await onComplete();
|
||||
}, getTailer);
|
||||
}
|
||||
|
||||
private async saveCassette(pending: PendingRecording): Promise<void> {
|
||||
// Read all JSONL lines from the output file the agent wrote to.
|
||||
let jsonlLines: string[] = [];
|
||||
try {
|
||||
const content = readFileSync(pending.outputFilePath, 'utf-8');
|
||||
jsonlLines = content.split('\n').filter(l => l.trim() !== '');
|
||||
} catch {
|
||||
// No output produced — record an empty cassette.
|
||||
}
|
||||
|
||||
// Read signal.json from the agent working directory.
|
||||
let signalJson: Record<string, unknown> | null = null;
|
||||
const signalPath = join(pending.agentCwd, '.cw', 'output', 'signal.json');
|
||||
if (existsSync(signalPath)) {
|
||||
try {
|
||||
signalJson = JSON.parse(readFileSync(signalPath, 'utf-8')) as Record<string, unknown>;
|
||||
} catch {
|
||||
// Corrupt signal file — record null.
|
||||
}
|
||||
}
|
||||
|
||||
const entry: CassetteEntry = {
|
||||
version: 1,
|
||||
key: pending.key,
|
||||
recording: {
|
||||
jsonlLines,
|
||||
signalJson,
|
||||
exitCode: 0,
|
||||
recordedAt: new Date().toISOString(),
|
||||
},
|
||||
};
|
||||
|
||||
this.store.save(pending.key, entry);
|
||||
}
|
||||
|
||||
private replayFromCassette(
|
||||
agentId: string,
|
||||
agentName: string,
|
||||
cwd: string,
|
||||
env: Record<string, string>,
|
||||
providerName: string,
|
||||
cassette: CassetteEntry,
|
||||
onEvent?: (event: StreamEvent) => void,
|
||||
onRawContent?: (content: string) => void,
|
||||
): { pid: number; outputFilePath: string; tailer: FileTailer } {
|
||||
console.log(`[cassette] replaying cassette for agent '${agentName}' (${cassette.recording.jsonlLines.length} lines)`);
|
||||
|
||||
return super.spawnDetached(
|
||||
agentId,
|
||||
agentName,
|
||||
process.execPath, // use the running node binary
|
||||
[this.replayWorkerPath], // replay-worker.mjs
|
||||
cwd,
|
||||
{ ...env, CW_CASSETTE_DATA: JSON.stringify(cassette.recording) },
|
||||
providerName, // use original provider's parser for the tailer
|
||||
undefined, // no prompt — worker handles output directly
|
||||
onEvent,
|
||||
onRawContent,
|
||||
);
|
||||
}
|
||||
}
|
||||
48
src/test/cassette/replay-worker.mjs
Normal file
48
src/test/cassette/replay-worker.mjs
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Cassette Replay Worker
|
||||
*
|
||||
* Spawned as a detached subprocess by CassetteProcessManager instead of the real
|
||||
* agent CLI. Reads the cassette recording from CW_CASSETTE_DATA env var, replays
|
||||
* the JSONL output to stdout (which spawnDetached redirects to the output file),
|
||||
* writes signal.json relative to the process cwd, and exits.
|
||||
*
|
||||
* This is a plain .mjs file (no TypeScript) so it can be spawned with bare `node`
|
||||
* without any build step or tsx dependency.
|
||||
*/
|
||||
|
||||
import { mkdirSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
const data = process.env.CW_CASSETTE_DATA;
|
||||
if (!data) {
|
||||
process.stderr.write('[replay-worker] CW_CASSETTE_DATA env var not set\n');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let recording;
|
||||
try {
|
||||
recording = JSON.parse(data);
|
||||
} catch (err) {
|
||||
process.stderr.write(`[replay-worker] failed to parse CW_CASSETTE_DATA: ${err.message}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const { jsonlLines = [], signalJson = null, exitCode = 0 } = recording;
|
||||
|
||||
// Write JSONL lines to stdout.
|
||||
// spawnDetached redirects stdout to the output file via open()+fd redirection,
|
||||
// so writing to process.stdout here is equivalent to writing to the output file.
|
||||
for (const line of jsonlLines) {
|
||||
process.stdout.write(line + '\n');
|
||||
}
|
||||
|
||||
// Write signal.json to the expected location relative to cwd.
|
||||
// The agent's cwd is set by spawnDetached to the agent working directory.
|
||||
if (signalJson) {
|
||||
const signalDir = join(process.cwd(), '.cw', 'output');
|
||||
mkdirSync(signalDir, { recursive: true });
|
||||
writeFileSync(join(signalDir, 'signal.json'), JSON.stringify(signalJson, null, 2), 'utf-8');
|
||||
}
|
||||
|
||||
process.exit(exitCode);
|
||||
50
src/test/cassette/store.ts
Normal file
50
src/test/cassette/store.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* CassetteStore
|
||||
*
|
||||
* Reads and writes cassette files from a directory on disk.
|
||||
* Each cassette is stored as a JSON file named after the 32-char key hash.
|
||||
* Cassette files are intended to be committed to git — they are the
|
||||
* "recorded interactions" that allow tests to run without real API calls.
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import type { CassetteKey, CassetteEntry } from './types.js';
|
||||
import { buildCassetteKey } from './key.js';
|
||||
|
||||
export class CassetteStore {
|
||||
constructor(private readonly cassetteDir: string) {}
|
||||
|
||||
private pathFor(keyHash: string): string {
|
||||
return join(this.cassetteDir, `${keyHash}.json`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up a cassette by its key.
|
||||
* Returns null if not found or if the file is corrupt.
|
||||
*/
|
||||
find(key: CassetteKey): CassetteEntry | null {
|
||||
const hash = buildCassetteKey(key);
|
||||
const path = this.pathFor(hash);
|
||||
|
||||
if (!existsSync(path)) return null;
|
||||
|
||||
try {
|
||||
return JSON.parse(readFileSync(path, 'utf-8')) as CassetteEntry;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a cassette to disk. Creates the cassette directory if needed.
|
||||
* Prints the cassette filename so it's visible during recording runs.
|
||||
*/
|
||||
save(key: CassetteKey, entry: CassetteEntry): void {
|
||||
mkdirSync(this.cassetteDir, { recursive: true });
|
||||
const hash = buildCassetteKey(key);
|
||||
const path = this.pathFor(hash);
|
||||
writeFileSync(path, JSON.stringify(entry, null, 2), 'utf-8');
|
||||
console.log(`[cassette] recorded → ${hash}.json (${entry.recording.jsonlLines.length} lines)`);
|
||||
}
|
||||
}
|
||||
35
src/test/cassette/types.ts
Normal file
35
src/test/cassette/types.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Cassette Types
|
||||
*
|
||||
* VCR-style cassette format for recording and replaying agent subprocess I/O.
|
||||
* A cassette captures everything an agent process writes so tests can replay
|
||||
* it deterministically without hitting real AI APIs.
|
||||
*/
|
||||
|
||||
export interface CassetteKey {
|
||||
/** Prompt with dynamic content (UUIDs, paths, timestamps) replaced with placeholders. */
|
||||
normalizedPrompt: string;
|
||||
/** Provider name, e.g. 'claude', 'codex'. */
|
||||
providerName: string;
|
||||
/** Stable CLI args with the prompt value stripped. */
|
||||
modelArgs: string[];
|
||||
/** SHA256 prefix of all non-hidden files in the agent worktree at spawn time. */
|
||||
worktreeHash: string;
|
||||
}
|
||||
|
||||
export interface CassetteRecording {
|
||||
/** All JSONL lines the agent wrote to stdout (captured from output file). */
|
||||
jsonlLines: string[];
|
||||
/** Content of signal.json written by the agent, or null if missing. */
|
||||
signalJson: Record<string, unknown> | null;
|
||||
/** Process exit code (0 = success). */
|
||||
exitCode: number;
|
||||
/** ISO timestamp when this cassette was recorded. */
|
||||
recordedAt: string;
|
||||
}
|
||||
|
||||
export interface CassetteEntry {
|
||||
version: 1;
|
||||
key: CassetteKey;
|
||||
recording: CassetteRecording;
|
||||
}
|
||||
0
src/test/cassettes/.gitkeep
Normal file
0
src/test/cassettes/.gitkeep
Normal file
Reference in New Issue
Block a user