Files
Codewalkers/apps/server/test/integration/full-flow/full-flow-cassette.test.ts
Lukas May 34578d39c6 refactor: Restructure monorepo to apps/server/ and apps/web/ layout
Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt
standard monorepo conventions (apps/ for runnable apps, packages/
for reusable libraries). Update all config files, shared package
imports, test fixtures, and documentation to reflect new paths.

Key fixes:
- Update workspace config to ["apps/*", "packages/*"]
- Update tsconfig.json rootDir/include for apps/server/
- Add apps/web/** to vitest exclude list
- Update drizzle.config.ts schema path
- Fix ensure-schema.ts migration path detection (3 levels up in dev,
  2 levels up in dist)
- Fix tests/integration/cli-server.test.ts import paths
- Update packages/shared imports to apps/server/ paths
- Update all docs/ files with new paths
2026-03-03 11:22:53 +01:00

245 lines
9.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Full-Flow Cassette Integration Test
*
* Cassette-backed variant of the full multi-agent workflow test.
* Runs the same discuss → plan → detail → execute pipeline but intercepts
* subprocess spawning with CassetteProcessManager — no real API calls in CI.
*
* Recording (one-time, costs ~$25):
* CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
* # Commit the generated src/test/cassettes/<hash>.json files afterward
*
* Replay (default — runs in seconds):
* npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts
*
* Force re-record (overwrites existing cassettes):
* CW_CASSETTE_FORCE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { existsSync, readdirSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { Phase, Task } from '../../../db/schema.js';
import type { AgentResult } from '../../../agent/types.js';
import { buildExecutePrompt } from '../../../agent/prompts/index.js';
import { CassetteStore } from '../../cassette/store.js';
import { CassetteProcessManager, type CassetteMode } from '../../cassette/process-manager.js';
import {
createFullFlowHarness,
type FullFlowHarness,
} from './harness.js';
import {
printHeader,
printDiscussResult,
printPlanResult,
printDetailResult,
printExecuteResult,
printFinalSummary,
type ExecutedTask,
} from './report.js';
// =============================================================================
// Constants
// =============================================================================
const RECORDING =
process.env.CW_CASSETTE_FORCE_RECORD === '1' || process.env.CW_CASSETTE_RECORD === '1';
/**
* Test timeout.
* - Replay: 5 min (cassettes complete in seconds; cap is generous headroom)
* - Record: 60 min (real agents doing discuss/plan/detail/execute take API time)
*/
const CASSETTE_FLOW_TIMEOUT = RECORDING ? 60 * 60_000 : 5 * 60_000;
const __dirname = dirname(fileURLToPath(import.meta.url));
const CASSETTE_DIR =
process.env.CW_CASSETTE_DIR ?? join(__dirname, '../../cassettes');
// =============================================================================
// Mode helper
// =============================================================================
function cassetteMode(): CassetteMode {
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
return 'replay';
}
/**
* True when cassettes are available (at least one .json file) OR we're in a
* recording run. Skips the suite if no cassettes have been recorded yet so
* that `npm test` doesn't fail on a fresh clone before cassettes are committed.
*/
function cassettesAvailable(): boolean {
const mode = cassetteMode();
if (mode !== 'replay') return true; // recording runs always proceed
if (!existsSync(CASSETTE_DIR)) return false;
return readdirSync(CASSETTE_DIR).some((f) => f.endsWith('.json'));
}
// =============================================================================
// Test
// =============================================================================
describe.skipIf(!cassettesAvailable())('full flow (cassette replay)', () => {
let harness: FullFlowHarness;
const startedAt = Date.now();
beforeAll(async () => {
const store = new CassetteStore(CASSETTE_DIR);
const mode = cassetteMode();
harness = await createFullFlowHarness('Add complete() method to TodoStore', {
processManagerFactory: (workspaceRoot, projectRepo) =>
new CassetteProcessManager(workspaceRoot, projectRepo, store, mode),
});
printHeader(harness.initiative.name);
console.log(` Cassette mode : ${mode}`);
console.log(` Cassette dir : ${CASSETTE_DIR}`);
console.log(` Initiative ID : ${harness.initiative.id}`);
console.log(` Workspace : ${harness.workspaceRoot}`);
}, CASSETTE_FLOW_TIMEOUT);
afterAll(async () => {
if (harness) await harness.cleanup();
});
it(
'runs the complete multi-agent workflow from cassettes',
async () => {
const { initiative, caller, agentManager, phaseRepository, taskRepository } = harness;
const initiativeId = initiative.id;
// ── Stage 2: Discuss ───────────────────────────────────────────────────
console.log('\n\n>>> Stage 2: DISCUSS <<<');
const discussAgent = await caller.spawnArchitectDiscuss({ initiativeId });
expect(discussAgent.id).toBeTruthy();
console.log(` Spawned discuss agent: ${discussAgent.name} (${discussAgent.id})`);
const discussResult = await harness.driveToCompletion(
discussAgent.id,
'Use your best judgment and keep it simple. The focus is implementing complete(id) on TodoStore.',
CASSETTE_FLOW_TIMEOUT,
);
printDiscussResult(discussAgent.id, discussResult);
if (!discussResult?.success) {
console.warn(' [WARN] discuss agent did not succeed; continuing to plan stage');
}
// ── Stage 3: Plan ──────────────────────────────────────────────────────
console.log('\n\n>>> Stage 3: PLAN <<<');
const planAgent = await caller.spawnArchitectPlan({ initiativeId });
expect(planAgent.id).toBeTruthy();
console.log(` Spawned plan agent: ${planAgent.name} (${planAgent.id})`);
const planResult = await harness.driveToCompletion(
planAgent.id,
'Keep it simple.',
CASSETTE_FLOW_TIMEOUT,
);
expect(planResult).toBeTruthy();
const phases: Phase[] = await phaseRepository.findByInitiativeId(initiativeId);
expect(phases.length).toBeGreaterThan(0);
printPlanResult(phases);
// ── Stage 4: Detail (per phase) ────────────────────────────────────────
console.log('\n\n>>> Stage 4: DETAIL <<<');
for (const phase of phases) {
const detailAgent = await caller.spawnArchitectDetail({ phaseId: phase.id });
expect(detailAgent.id).toBeTruthy();
console.log(` Spawned detail agent for phase "${phase.name}": ${detailAgent.name}`);
const detailResult = await harness.driveToCompletion(
detailAgent.id,
'Keep it simple.',
CASSETTE_FLOW_TIMEOUT,
);
expect(detailResult).toBeTruthy();
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
const executeTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
expect(executeTasks.length).toBeGreaterThan(0);
printDetailResult(phase, phaseTasks);
}
// ── Stage 5: Execute ───────────────────────────────────────────────────
console.log('\n\n>>> Stage 5: EXECUTE <<<');
const allTasks = await gatherAllExecuteTasks(taskRepository, phases);
console.log(` Found ${allTasks.length} execute task(s) across ${phases.length} phase(s)`);
const executed: ExecutedTask[] = [];
for (const task of allTasks) {
console.log(` Spawning execute agent for: "${task.name}"`);
const execAgent = await agentManager.spawn({
taskId: task.id,
prompt: buildExecutePrompt(task.description ?? task.name),
mode: 'execute',
initiativeId,
phaseId: task.phaseId ?? undefined,
inputContext: {
initiative,
task,
},
});
console.log(` Agent: ${execAgent.name} (${execAgent.id})`);
const result = await harness.driveToCompletion(
execAgent.id,
'Use your best judgment and keep it simple.',
CASSETTE_FLOW_TIMEOUT,
);
executed.push({ task, result });
const icon = result?.success ? '✓' : '✗';
console.log(` ${icon} Completed with success=${result?.success ?? null}`);
if (result && !result.success) {
console.log(` Message: ${result.message?.slice(0, 200)}`);
}
}
printExecuteResult(executed);
// ── Assertions ─────────────────────────────────────────────────────────
expect(executed.length).toBeGreaterThan(0);
const allSucceeded = executed.every((e) => e.result?.success === true);
if (!allSucceeded) {
const failed = executed.filter((e) => !e.result?.success);
console.warn(` [WARN] ${failed.length} execute task(s) did not succeed`);
}
// ── Final summary ──────────────────────────────────────────────────────
printFinalSummary(
initiative.name,
phases,
allTasks,
executed,
Date.now() - startedAt,
);
},
CASSETTE_FLOW_TIMEOUT,
);
});
// =============================================================================
// Helpers
// =============================================================================
async function gatherAllExecuteTasks(
taskRepository: FullFlowHarness['taskRepository'],
phases: Phase[],
): Promise<Task[]> {
const result: Task[] = [];
for (const phase of phases) {
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
const execTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
result.push(...execTasks);
}
return result;
}