Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt standard monorepo conventions (apps/ for runnable apps, packages/ for reusable libraries). Update all config files, shared package imports, test fixtures, and documentation to reflect new paths. Key fixes: - Update workspace config to ["apps/*", "packages/*"] - Update tsconfig.json rootDir/include for apps/server/ - Add apps/web/** to vitest exclude list - Update drizzle.config.ts schema path - Fix ensure-schema.ts migration path detection (3 levels up in dev, 2 levels up in dist) - Fix tests/integration/cli-server.test.ts import paths - Update packages/shared imports to apps/server/ paths - Update all docs/ files with new paths
245 lines
9.8 KiB
TypeScript
245 lines
9.8 KiB
TypeScript
/**
|
||
* Full-Flow Cassette Integration Test
|
||
*
|
||
* Cassette-backed variant of the full multi-agent workflow test.
|
||
* Runs the same discuss → plan → detail → execute pipeline but intercepts
|
||
* subprocess spawning with CassetteProcessManager — no real API calls in CI.
|
||
*
|
||
* Recording (one-time, costs ~$2–5):
|
||
* CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
|
||
* # Commit the generated src/test/cassettes/<hash>.json files afterward
|
||
*
|
||
* Replay (default — runs in seconds):
|
||
* npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts
|
||
*
|
||
* Force re-record (overwrites existing cassettes):
|
||
* CW_CASSETTE_FORCE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
|
||
*/
|
||
|
||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||
import { existsSync, readdirSync } from 'node:fs';
|
||
import { join, dirname } from 'node:path';
|
||
import { fileURLToPath } from 'node:url';
|
||
import type { Phase, Task } from '../../../db/schema.js';
|
||
import type { AgentResult } from '../../../agent/types.js';
|
||
import { buildExecutePrompt } from '../../../agent/prompts/index.js';
|
||
import { CassetteStore } from '../../cassette/store.js';
|
||
import { CassetteProcessManager, type CassetteMode } from '../../cassette/process-manager.js';
|
||
import {
|
||
createFullFlowHarness,
|
||
type FullFlowHarness,
|
||
} from './harness.js';
|
||
import {
|
||
printHeader,
|
||
printDiscussResult,
|
||
printPlanResult,
|
||
printDetailResult,
|
||
printExecuteResult,
|
||
printFinalSummary,
|
||
type ExecutedTask,
|
||
} from './report.js';
|
||
|
||
// =============================================================================
|
||
// Constants
|
||
// =============================================================================
|
||
|
||
const RECORDING =
|
||
process.env.CW_CASSETTE_FORCE_RECORD === '1' || process.env.CW_CASSETTE_RECORD === '1';
|
||
|
||
/**
|
||
* Test timeout.
|
||
* - Replay: 5 min (cassettes complete in seconds; cap is generous headroom)
|
||
* - Record: 60 min (real agents doing discuss/plan/detail/execute take API time)
|
||
*/
|
||
const CASSETTE_FLOW_TIMEOUT = RECORDING ? 60 * 60_000 : 5 * 60_000;
|
||
|
||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||
const CASSETTE_DIR =
|
||
process.env.CW_CASSETTE_DIR ?? join(__dirname, '../../cassettes');
|
||
|
||
// =============================================================================
|
||
// Mode helper
|
||
// =============================================================================
|
||
|
||
function cassetteMode(): CassetteMode {
|
||
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
|
||
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
|
||
return 'replay';
|
||
}
|
||
|
||
/**
|
||
* True when cassettes are available (at least one .json file) OR we're in a
|
||
* recording run. Skips the suite if no cassettes have been recorded yet so
|
||
* that `npm test` doesn't fail on a fresh clone before cassettes are committed.
|
||
*/
|
||
function cassettesAvailable(): boolean {
|
||
const mode = cassetteMode();
|
||
if (mode !== 'replay') return true; // recording runs always proceed
|
||
if (!existsSync(CASSETTE_DIR)) return false;
|
||
return readdirSync(CASSETTE_DIR).some((f) => f.endsWith('.json'));
|
||
}
|
||
|
||
// =============================================================================
|
||
// Test
|
||
// =============================================================================
|
||
|
||
describe.skipIf(!cassettesAvailable())('full flow (cassette replay)', () => {
|
||
let harness: FullFlowHarness;
|
||
const startedAt = Date.now();
|
||
|
||
beforeAll(async () => {
|
||
const store = new CassetteStore(CASSETTE_DIR);
|
||
const mode = cassetteMode();
|
||
|
||
harness = await createFullFlowHarness('Add complete() method to TodoStore', {
|
||
processManagerFactory: (workspaceRoot, projectRepo) =>
|
||
new CassetteProcessManager(workspaceRoot, projectRepo, store, mode),
|
||
});
|
||
|
||
printHeader(harness.initiative.name);
|
||
console.log(` Cassette mode : ${mode}`);
|
||
console.log(` Cassette dir : ${CASSETTE_DIR}`);
|
||
console.log(` Initiative ID : ${harness.initiative.id}`);
|
||
console.log(` Workspace : ${harness.workspaceRoot}`);
|
||
}, CASSETTE_FLOW_TIMEOUT);
|
||
|
||
afterAll(async () => {
|
||
if (harness) await harness.cleanup();
|
||
});
|
||
|
||
it(
|
||
'runs the complete multi-agent workflow from cassettes',
|
||
async () => {
|
||
const { initiative, caller, agentManager, phaseRepository, taskRepository } = harness;
|
||
const initiativeId = initiative.id;
|
||
|
||
// ── Stage 2: Discuss ───────────────────────────────────────────────────
|
||
console.log('\n\n>>> Stage 2: DISCUSS <<<');
|
||
const discussAgent = await caller.spawnArchitectDiscuss({ initiativeId });
|
||
expect(discussAgent.id).toBeTruthy();
|
||
console.log(` Spawned discuss agent: ${discussAgent.name} (${discussAgent.id})`);
|
||
|
||
const discussResult = await harness.driveToCompletion(
|
||
discussAgent.id,
|
||
'Use your best judgment and keep it simple. The focus is implementing complete(id) on TodoStore.',
|
||
CASSETTE_FLOW_TIMEOUT,
|
||
);
|
||
printDiscussResult(discussAgent.id, discussResult);
|
||
|
||
if (!discussResult?.success) {
|
||
console.warn(' [WARN] discuss agent did not succeed; continuing to plan stage');
|
||
}
|
||
|
||
// ── Stage 3: Plan ──────────────────────────────────────────────────────
|
||
console.log('\n\n>>> Stage 3: PLAN <<<');
|
||
const planAgent = await caller.spawnArchitectPlan({ initiativeId });
|
||
expect(planAgent.id).toBeTruthy();
|
||
console.log(` Spawned plan agent: ${planAgent.name} (${planAgent.id})`);
|
||
|
||
const planResult = await harness.driveToCompletion(
|
||
planAgent.id,
|
||
'Keep it simple.',
|
||
CASSETTE_FLOW_TIMEOUT,
|
||
);
|
||
expect(planResult).toBeTruthy();
|
||
|
||
const phases: Phase[] = await phaseRepository.findByInitiativeId(initiativeId);
|
||
expect(phases.length).toBeGreaterThan(0);
|
||
printPlanResult(phases);
|
||
|
||
// ── Stage 4: Detail (per phase) ────────────────────────────────────────
|
||
console.log('\n\n>>> Stage 4: DETAIL <<<');
|
||
for (const phase of phases) {
|
||
const detailAgent = await caller.spawnArchitectDetail({ phaseId: phase.id });
|
||
expect(detailAgent.id).toBeTruthy();
|
||
console.log(` Spawned detail agent for phase "${phase.name}": ${detailAgent.name}`);
|
||
|
||
const detailResult = await harness.driveToCompletion(
|
||
detailAgent.id,
|
||
'Keep it simple.',
|
||
CASSETTE_FLOW_TIMEOUT,
|
||
);
|
||
expect(detailResult).toBeTruthy();
|
||
|
||
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
|
||
const executeTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
|
||
expect(executeTasks.length).toBeGreaterThan(0);
|
||
printDetailResult(phase, phaseTasks);
|
||
}
|
||
|
||
// ── Stage 5: Execute ───────────────────────────────────────────────────
|
||
console.log('\n\n>>> Stage 5: EXECUTE <<<');
|
||
const allTasks = await gatherAllExecuteTasks(taskRepository, phases);
|
||
console.log(` Found ${allTasks.length} execute task(s) across ${phases.length} phase(s)`);
|
||
|
||
const executed: ExecutedTask[] = [];
|
||
for (const task of allTasks) {
|
||
console.log(` Spawning execute agent for: "${task.name}"`);
|
||
const execAgent = await agentManager.spawn({
|
||
taskId: task.id,
|
||
prompt: buildExecutePrompt(task.description ?? task.name),
|
||
mode: 'execute',
|
||
initiativeId,
|
||
phaseId: task.phaseId ?? undefined,
|
||
inputContext: {
|
||
initiative,
|
||
task,
|
||
},
|
||
});
|
||
console.log(` Agent: ${execAgent.name} (${execAgent.id})`);
|
||
|
||
const result = await harness.driveToCompletion(
|
||
execAgent.id,
|
||
'Use your best judgment and keep it simple.',
|
||
CASSETTE_FLOW_TIMEOUT,
|
||
);
|
||
executed.push({ task, result });
|
||
|
||
const icon = result?.success ? '✓' : '✗';
|
||
console.log(` ${icon} Completed with success=${result?.success ?? null}`);
|
||
if (result && !result.success) {
|
||
console.log(` Message: ${result.message?.slice(0, 200)}`);
|
||
}
|
||
}
|
||
|
||
printExecuteResult(executed);
|
||
|
||
// ── Assertions ─────────────────────────────────────────────────────────
|
||
expect(executed.length).toBeGreaterThan(0);
|
||
|
||
const allSucceeded = executed.every((e) => e.result?.success === true);
|
||
if (!allSucceeded) {
|
||
const failed = executed.filter((e) => !e.result?.success);
|
||
console.warn(` [WARN] ${failed.length} execute task(s) did not succeed`);
|
||
}
|
||
|
||
// ── Final summary ──────────────────────────────────────────────────────
|
||
printFinalSummary(
|
||
initiative.name,
|
||
phases,
|
||
allTasks,
|
||
executed,
|
||
Date.now() - startedAt,
|
||
);
|
||
},
|
||
CASSETTE_FLOW_TIMEOUT,
|
||
);
|
||
});
|
||
|
||
// =============================================================================
|
||
// Helpers
|
||
// =============================================================================
|
||
|
||
async function gatherAllExecuteTasks(
|
||
taskRepository: FullFlowHarness['taskRepository'],
|
||
phases: Phase[],
|
||
): Promise<Task[]> {
|
||
const result: Task[] = [];
|
||
for (const phase of phases) {
|
||
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
|
||
const execTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
|
||
result.push(...execTasks);
|
||
}
|
||
return result;
|
||
}
|