feat: Add cassette support for full-flow integration test
- normalizer.ts: Add NANOID_RE (21-char alphanumeric) → __ID__ as step 2.5, fixing cassette key instability from nanoid agent IDs in prompts - harness.ts: Add FullFlowHarnessOptions.processManagerFactory for injecting CassetteProcessManager without duplicating harness setup - full-flow-cassette.test.ts: New cassette-backed variant of full-flow test; skips automatically when no cassettes exist (fresh clone), runs in ~seconds once cassettes are recorded and committed - CLAUDE.md: Document cassette recording command for the full-flow test
This commit is contained in:
@@ -48,6 +48,11 @@ npm test
|
|||||||
CW_CASSETTE_RECORD=1 npm test -- <test-file> # Record new cassettes locally
|
CW_CASSETTE_RECORD=1 npm test -- <test-file> # Record new cassettes locally
|
||||||
REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/ --test-timeout=300000 # Real provider tests (~$0.50)
|
REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/ --test-timeout=300000 # Real provider tests (~$0.50)
|
||||||
FULL_FLOW_TESTS=1 npm test -- src/test/integration/full-flow/ --test-timeout=3600000 # Full end-to-end test (~$2-5)
|
FULL_FLOW_TESTS=1 npm test -- src/test/integration/full-flow/ --test-timeout=3600000 # Full end-to-end test (~$2-5)
|
||||||
|
|
||||||
|
# Record full-flow cassettes (one-time, costs ~$2–5 in API credits):
|
||||||
|
CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
|
||||||
|
# Commit the generated src/test/cassettes/<hash>.json files afterward.
|
||||||
|
# Subsequent runs replay from cassettes at no cost: npm test
|
||||||
```
|
```
|
||||||
|
|
||||||
See [docs/testing.md](docs/testing.md) for details, including the **cassette system** for pipeline integration tests that run without API costs.
|
See [docs/testing.md](docs/testing.md) for details, including the **cassette system** for pipeline integration tests that run without API costs.
|
||||||
|
|||||||
@@ -56,6 +56,14 @@ describe('normalizePrompt', () => {
|
|||||||
expect(result).toBe(prompt);
|
expect(result).toBe(prompt);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('strips nanoid strings (21-char alphanumeric)', () => {
|
||||||
|
const nanoid = 'V1StGXR8_Z5jdHi6B-myT';
|
||||||
|
const prompt = `Agent worktree: /tmp/cw-preview-${nanoid}/app`;
|
||||||
|
const result = normalizePrompt(prompt, '');
|
||||||
|
expect(result).not.toContain(nanoid);
|
||||||
|
expect(result).toContain('__ID__');
|
||||||
|
});
|
||||||
|
|
||||||
it('strips workspace root before UUID replacement to avoid double-normalizing', () => {
|
it('strips workspace root before UUID replacement to avoid double-normalizing', () => {
|
||||||
const workspaceRoot = '/tmp/cw-test-abc123';
|
const workspaceRoot = '/tmp/cw-test-abc123';
|
||||||
const uuid = '550e8400-e29b-41d4-a716-446655440000';
|
const uuid = '550e8400-e29b-41d4-a716-446655440000';
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
|
const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
|
||||||
|
const NANOID_RE = /(?<![A-Za-z0-9])[A-Za-z0-9_-]{21}(?![A-Za-z0-9_-])/g;
|
||||||
const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g;
|
const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g;
|
||||||
const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g;
|
const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g;
|
||||||
const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
|
const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
|
||||||
@@ -18,6 +19,7 @@ const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
|
|||||||
* Replacements applied in order (most-specific first to avoid partial matches):
|
* Replacements applied in order (most-specific first to avoid partial matches):
|
||||||
* 1. Absolute workspace root path → __WORKSPACE__
|
* 1. Absolute workspace root path → __WORKSPACE__
|
||||||
* 2. UUIDs → __UUID__
|
* 2. UUIDs → __UUID__
|
||||||
|
* 2.5. Nanoid IDs (21-char alphanumeric) → __ID__
|
||||||
* 3. ISO 8601 timestamps → __TIMESTAMP__
|
* 3. ISO 8601 timestamps → __TIMESTAMP__
|
||||||
* 4. Unix epoch milliseconds → __EPOCH__
|
* 4. Unix epoch milliseconds → __EPOCH__
|
||||||
* 5. Session numbers → session__N__
|
* 5. Session numbers → session__N__
|
||||||
@@ -30,6 +32,7 @@ export function normalizePrompt(prompt: string, workspaceRoot: string): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
normalized = normalized.replace(UUID_RE, '__UUID__');
|
normalized = normalized.replace(UUID_RE, '__UUID__');
|
||||||
|
normalized = normalized.replace(NANOID_RE, '__ID__');
|
||||||
normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__');
|
normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__');
|
||||||
normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__');
|
normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__');
|
||||||
normalized = normalized.replace(SESSION_NUM_RE, 'session__N__');
|
normalized = normalized.replace(SESSION_NUM_RE, 'session__N__');
|
||||||
|
|||||||
237
src/test/integration/full-flow/full-flow-cassette.test.ts
Normal file
237
src/test/integration/full-flow/full-flow-cassette.test.ts
Normal file
@@ -0,0 +1,237 @@
|
|||||||
|
/**
|
||||||
|
* Full-Flow Cassette Integration Test
|
||||||
|
*
|
||||||
|
* Cassette-backed variant of the full multi-agent workflow test.
|
||||||
|
* Runs the same discuss → plan → detail → execute pipeline but intercepts
|
||||||
|
* subprocess spawning with CassetteProcessManager — no real API calls in CI.
|
||||||
|
*
|
||||||
|
* Recording (one-time, costs ~$2–5):
|
||||||
|
* CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
|
||||||
|
* # Commit the generated src/test/cassettes/<hash>.json files afterward
|
||||||
|
*
|
||||||
|
* Replay (default — runs in seconds):
|
||||||
|
* npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts
|
||||||
|
*
|
||||||
|
* Force re-record (overwrites existing cassettes):
|
||||||
|
* CW_CASSETTE_FORCE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||||
|
import { existsSync, readdirSync } from 'node:fs';
|
||||||
|
import { join, dirname } from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import type { Phase, Task } from '../../../db/schema.js';
|
||||||
|
import type { AgentResult } from '../../../agent/types.js';
|
||||||
|
import { buildExecutePrompt } from '../../../agent/prompts/index.js';
|
||||||
|
import { CassetteStore } from '../../cassette/store.js';
|
||||||
|
import { CassetteProcessManager, type CassetteMode } from '../../cassette/process-manager.js';
|
||||||
|
import {
|
||||||
|
createFullFlowHarness,
|
||||||
|
type FullFlowHarness,
|
||||||
|
} from './harness.js';
|
||||||
|
import {
|
||||||
|
printHeader,
|
||||||
|
printDiscussResult,
|
||||||
|
printPlanResult,
|
||||||
|
printDetailResult,
|
||||||
|
printExecuteResult,
|
||||||
|
printFinalSummary,
|
||||||
|
type ExecutedTask,
|
||||||
|
} from './report.js';
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Constants
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/** Total test timeout: 5 minutes (replay=seconds; 5min covers accidental record) */
|
||||||
|
const CASSETTE_FLOW_TIMEOUT = 5 * 60_000;
|
||||||
|
|
||||||
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const CASSETTE_DIR =
|
||||||
|
process.env.CW_CASSETTE_DIR ?? join(__dirname, '../../cassettes');
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Mode helper
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
function cassetteMode(): CassetteMode {
|
||||||
|
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
|
||||||
|
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
|
||||||
|
return 'replay';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True when cassettes are available (at least one .json file) OR we're in a
|
||||||
|
* recording run. Skips the suite if no cassettes have been recorded yet so
|
||||||
|
* that `npm test` doesn't fail on a fresh clone before cassettes are committed.
|
||||||
|
*/
|
||||||
|
function cassettesAvailable(): boolean {
|
||||||
|
const mode = cassetteMode();
|
||||||
|
if (mode !== 'replay') return true; // recording runs always proceed
|
||||||
|
if (!existsSync(CASSETTE_DIR)) return false;
|
||||||
|
return readdirSync(CASSETTE_DIR).some((f) => f.endsWith('.json'));
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
describe.skipIf(!cassettesAvailable())('full flow (cassette replay)', () => {
|
||||||
|
let harness: FullFlowHarness;
|
||||||
|
const startedAt = Date.now();
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
const store = new CassetteStore(CASSETTE_DIR);
|
||||||
|
const mode = cassetteMode();
|
||||||
|
|
||||||
|
harness = await createFullFlowHarness('Add complete() method to TodoStore', {
|
||||||
|
processManagerFactory: (workspaceRoot, projectRepo) =>
|
||||||
|
new CassetteProcessManager(workspaceRoot, projectRepo, store, mode),
|
||||||
|
});
|
||||||
|
|
||||||
|
printHeader(harness.initiative.name);
|
||||||
|
console.log(` Cassette mode : ${mode}`);
|
||||||
|
console.log(` Cassette dir : ${CASSETTE_DIR}`);
|
||||||
|
console.log(` Initiative ID : ${harness.initiative.id}`);
|
||||||
|
console.log(` Workspace : ${harness.workspaceRoot}`);
|
||||||
|
}, CASSETTE_FLOW_TIMEOUT);
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
if (harness) await harness.cleanup();
|
||||||
|
});
|
||||||
|
|
||||||
|
it(
|
||||||
|
'runs the complete multi-agent workflow from cassettes',
|
||||||
|
async () => {
|
||||||
|
const { initiative, caller, agentManager, phaseRepository, taskRepository } = harness;
|
||||||
|
const initiativeId = initiative.id;
|
||||||
|
|
||||||
|
// ── Stage 2: Discuss ───────────────────────────────────────────────────
|
||||||
|
console.log('\n\n>>> Stage 2: DISCUSS <<<');
|
||||||
|
const discussAgent = await caller.spawnArchitectDiscuss({ initiativeId });
|
||||||
|
expect(discussAgent.id).toBeTruthy();
|
||||||
|
console.log(` Spawned discuss agent: ${discussAgent.name} (${discussAgent.id})`);
|
||||||
|
|
||||||
|
const discussResult = await harness.driveToCompletion(
|
||||||
|
discussAgent.id,
|
||||||
|
'Use your best judgment and keep it simple. The focus is implementing complete(id) on TodoStore.',
|
||||||
|
CASSETTE_FLOW_TIMEOUT,
|
||||||
|
);
|
||||||
|
printDiscussResult(discussAgent.id, discussResult);
|
||||||
|
|
||||||
|
if (!discussResult?.success) {
|
||||||
|
console.warn(' [WARN] discuss agent did not succeed; continuing to plan stage');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Stage 3: Plan ──────────────────────────────────────────────────────
|
||||||
|
console.log('\n\n>>> Stage 3: PLAN <<<');
|
||||||
|
const planAgent = await caller.spawnArchitectPlan({ initiativeId });
|
||||||
|
expect(planAgent.id).toBeTruthy();
|
||||||
|
console.log(` Spawned plan agent: ${planAgent.name} (${planAgent.id})`);
|
||||||
|
|
||||||
|
const planResult = await harness.driveToCompletion(
|
||||||
|
planAgent.id,
|
||||||
|
'Keep it simple.',
|
||||||
|
CASSETTE_FLOW_TIMEOUT,
|
||||||
|
);
|
||||||
|
expect(planResult).toBeTruthy();
|
||||||
|
|
||||||
|
const phases: Phase[] = await phaseRepository.findByInitiativeId(initiativeId);
|
||||||
|
expect(phases.length).toBeGreaterThan(0);
|
||||||
|
printPlanResult(phases);
|
||||||
|
|
||||||
|
// ── Stage 4: Detail (per phase) ────────────────────────────────────────
|
||||||
|
console.log('\n\n>>> Stage 4: DETAIL <<<');
|
||||||
|
for (const phase of phases) {
|
||||||
|
const detailAgent = await caller.spawnArchitectDetail({ phaseId: phase.id });
|
||||||
|
expect(detailAgent.id).toBeTruthy();
|
||||||
|
console.log(` Spawned detail agent for phase "${phase.name}": ${detailAgent.name}`);
|
||||||
|
|
||||||
|
const detailResult = await harness.driveToCompletion(
|
||||||
|
detailAgent.id,
|
||||||
|
'Keep it simple.',
|
||||||
|
CASSETTE_FLOW_TIMEOUT,
|
||||||
|
);
|
||||||
|
expect(detailResult).toBeTruthy();
|
||||||
|
|
||||||
|
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
|
||||||
|
const executeTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
|
||||||
|
expect(executeTasks.length).toBeGreaterThan(0);
|
||||||
|
printDetailResult(phase, phaseTasks);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Stage 5: Execute ───────────────────────────────────────────────────
|
||||||
|
console.log('\n\n>>> Stage 5: EXECUTE <<<');
|
||||||
|
const allTasks = await gatherAllExecuteTasks(taskRepository, phases);
|
||||||
|
console.log(` Found ${allTasks.length} execute task(s) across ${phases.length} phase(s)`);
|
||||||
|
|
||||||
|
const executed: ExecutedTask[] = [];
|
||||||
|
for (const task of allTasks) {
|
||||||
|
console.log(` Spawning execute agent for: "${task.name}"`);
|
||||||
|
const execAgent = await agentManager.spawn({
|
||||||
|
taskId: task.id,
|
||||||
|
prompt: buildExecutePrompt(task.description ?? task.name),
|
||||||
|
mode: 'execute',
|
||||||
|
initiativeId,
|
||||||
|
phaseId: task.phaseId ?? undefined,
|
||||||
|
inputContext: {
|
||||||
|
initiative,
|
||||||
|
task,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
console.log(` Agent: ${execAgent.name} (${execAgent.id})`);
|
||||||
|
|
||||||
|
const result = await harness.driveToCompletion(
|
||||||
|
execAgent.id,
|
||||||
|
'Use your best judgment and keep it simple.',
|
||||||
|
CASSETTE_FLOW_TIMEOUT,
|
||||||
|
);
|
||||||
|
executed.push({ task, result });
|
||||||
|
|
||||||
|
const icon = result?.success ? '✓' : '✗';
|
||||||
|
console.log(` ${icon} Completed with success=${result?.success ?? null}`);
|
||||||
|
if (result && !result.success) {
|
||||||
|
console.log(` Message: ${result.message?.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printExecuteResult(executed);
|
||||||
|
|
||||||
|
// ── Assertions ─────────────────────────────────────────────────────────
|
||||||
|
expect(executed.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const allSucceeded = executed.every((e) => e.result?.success === true);
|
||||||
|
if (!allSucceeded) {
|
||||||
|
const failed = executed.filter((e) => !e.result?.success);
|
||||||
|
console.warn(` [WARN] ${failed.length} execute task(s) did not succeed`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Final summary ──────────────────────────────────────────────────────
|
||||||
|
printFinalSummary(
|
||||||
|
initiative.name,
|
||||||
|
phases,
|
||||||
|
allTasks,
|
||||||
|
executed,
|
||||||
|
Date.now() - startedAt,
|
||||||
|
);
|
||||||
|
},
|
||||||
|
CASSETTE_FLOW_TIMEOUT,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Helpers
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
async function gatherAllExecuteTasks(
|
||||||
|
taskRepository: FullFlowHarness['taskRepository'],
|
||||||
|
phases: Phase[],
|
||||||
|
): Promise<Task[]> {
|
||||||
|
const result: Task[] = [];
|
||||||
|
for (const phase of phases) {
|
||||||
|
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
|
||||||
|
const execTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
|
||||||
|
result.push(...execTasks);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
@@ -36,6 +36,7 @@ import type { AccountRepository } from '../../../db/repositories/account-reposit
|
|||||||
import type { ChangeSetRepository } from '../../../db/repositories/change-set-repository.js';
|
import type { ChangeSetRepository } from '../../../db/repositories/change-set-repository.js';
|
||||||
import type { LogChunkRepository } from '../../../db/repositories/log-chunk-repository.js';
|
import type { LogChunkRepository } from '../../../db/repositories/log-chunk-repository.js';
|
||||||
import type { ConversationRepository } from '../../../db/repositories/conversation-repository.js';
|
import type { ConversationRepository } from '../../../db/repositories/conversation-repository.js';
|
||||||
|
import type { ProcessManager } from '../../../agent/process-manager.js';
|
||||||
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
|
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
|
||||||
import { createRepositories } from '../../../container.js';
|
import { createRepositories } from '../../../container.js';
|
||||||
import { DefaultDispatchManager } from '../../../dispatch/manager.js';
|
import { DefaultDispatchManager } from '../../../dispatch/manager.js';
|
||||||
@@ -162,6 +163,11 @@ const POLL_INTERVAL_MS = 1500;
|
|||||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
const FIXTURES_DIR = join(__dirname, '../../fixtures/todo-api');
|
const FIXTURES_DIR = join(__dirname, '../../fixtures/todo-api');
|
||||||
|
|
||||||
|
export interface FullFlowHarnessOptions {
|
||||||
|
/** Factory called after workspaceRoot + repos are created. Return a custom ProcessManager. */
|
||||||
|
processManagerFactory?: (workspaceRoot: string, projectRepo: ProjectRepository) => ProcessManager;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a full-flow test harness.
|
* Create a full-flow test harness.
|
||||||
*
|
*
|
||||||
@@ -177,6 +183,7 @@ const FIXTURES_DIR = join(__dirname, '../../fixtures/todo-api');
|
|||||||
*/
|
*/
|
||||||
export async function createFullFlowHarness(
|
export async function createFullFlowHarness(
|
||||||
initiativeName = 'Add complete() method to TodoStore',
|
initiativeName = 'Add complete() method to TodoStore',
|
||||||
|
options?: FullFlowHarnessOptions,
|
||||||
): Promise<FullFlowHarness> {
|
): Promise<FullFlowHarness> {
|
||||||
// ── 0. Allow nested claude invocations ────────────────────────────────────
|
// ── 0. Allow nested claude invocations ────────────────────────────────────
|
||||||
// Claude Code sets CLAUDECODE in the environment, which prevents nested
|
// Claude Code sets CLAUDECODE in the environment, which prevents nested
|
||||||
@@ -219,6 +226,7 @@ export async function createFullFlowHarness(
|
|||||||
const eventBus = new CapturingEventBus();
|
const eventBus = new CapturingEventBus();
|
||||||
|
|
||||||
// ── 5. Real agent manager ─────────────────────────────────────────────────
|
// ── 5. Real agent manager ─────────────────────────────────────────────────
|
||||||
|
const customProcessManager = options?.processManagerFactory?.(workspaceRoot, repos.projectRepository);
|
||||||
const agentManager = new MultiProviderAgentManager(
|
const agentManager = new MultiProviderAgentManager(
|
||||||
repos.agentRepository,
|
repos.agentRepository,
|
||||||
workspaceRoot,
|
workspaceRoot,
|
||||||
@@ -231,6 +239,8 @@ export async function createFullFlowHarness(
|
|||||||
repos.taskRepository,
|
repos.taskRepository,
|
||||||
repos.pageRepository,
|
repos.pageRepository,
|
||||||
repos.logChunkRepository,
|
repos.logChunkRepository,
|
||||||
|
false, // debug
|
||||||
|
customProcessManager, // processManagerOverride
|
||||||
);
|
);
|
||||||
|
|
||||||
// ── 6. Dispatch manager (for execute stage) ───────────────────────────────
|
// ── 6. Dispatch manager (for execute stage) ───────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user