refactor: Remove full-flow.test.ts in favour of cassette variant

The cassette-backed test (full-flow-cassette.test.ts) covers the same discuss→plan→detail→execute pipeline without API cost. The real-agent test added no unique value once cassettes were committed, and the Stage 6 npm-test validation it included was soft (warn, not fail). Also removes the now-unused shouldRunFullFlowTests export and the FULL_FLOW_TESTS=1 entry from CLAUDE.md.
2026-03-03 10:53:41 +01:00
parent 25360e1711
commit 8c38d958ce
3 changed files with 1 additions and 288 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -47,7 +47,6 @@ Run after any change to server-side code (`src/**`).
 npm test                                                                         # Unit + E2E tests (no API cost)
 CW_CASSETTE_RECORD=1 npm test -- <test-file>                                    # Record new cassettes locally
 REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/ --test-timeout=300000  # Real provider tests (~$0.50)
-FULL_FLOW_TESTS=1 npm test -- src/test/integration/full-flow/ --test-timeout=3600000        # Full end-to-end test (~$2-5)

 # Record full-flow cassettes (one-time, costs ~$2–5 in API credits):
 CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
--- a/src/test/integration/full-flow/full-flow.test.ts
+++ b/src/test/integration/full-flow/full-flow.test.ts
@@ -1,280 +0,0 @@
-/**
- * Full-Flow Integration Test
- *
- * Tests a complete multi-agent workflow from "create initiative" through
- * discuss → plan → detail → execute, validating that:
- *   - discuss mode gathers requirements (handling questions if asked)
- *   - plan mode produces sensible phases
- *   - detail mode breaks phases into executable tasks
- *   - execute mode implements the missing complete() method
- *   - npm test passes in the todo-api project after execution
- *
- * COSTS REAL API CREDITS (~$2–5 per run).
- * Only runs when FULL_FLOW_TESTS=1 is set.
- *
- * Usage:
- *   FULL_FLOW_TESTS=1 npm test -- src/test/integration/full-flow/ --test-timeout=1800000
- */
-
-import { describe, it, expect, beforeAll, afterAll } from 'vitest';
-import { join } from 'node:path';
-import { execSync } from 'node:child_process';
-import type { Phase, Task } from '../../../db/schema.js';
-import type { AgentResult } from '../../../agent/types.js';
-import { buildExecutePrompt } from '../../../agent/prompts/index.js';
-import {
-  createFullFlowHarness,
-  shouldRunFullFlowTests,
-  type FullFlowHarness,
-} from './harness.js';
-import {
-  printHeader,
-  printDiscussResult,
-  printPlanResult,
-  printDetailResult,
-  printExecuteResult,
-  printGitDiff,
-  printFinalSummary,
-  type ExecutedTask,
-} from './report.js';
-
-// =============================================================================
-// Constants
-// =============================================================================
-
-/** Total test timeout: 60 minutes */
-const FULL_FLOW_TIMEOUT = 60 * 60 * 1000;
-
-/** Per-stage timeouts — architect agents are variable; these are upper bounds not expectations */
-const DISCUSS_TIMEOUT_MS = 8 * 60_000;
-const PLAN_TIMEOUT_MS = 12 * 60_000;
-const DETAIL_TIMEOUT_MS = 15 * 60_000; // per phase
-const EXECUTE_TIMEOUT_MS = 20 * 60_000; // per task — real RED-GREEN-REFACTOR cycles take time
-
-// =============================================================================
-// Test
-// =============================================================================
-
-describe.skipIf(!shouldRunFullFlowTests)('full flow (real agents — costs API credits)', () => {
-  let harness: FullFlowHarness;
-  const startedAt = Date.now();
-
-  beforeAll(async () => {
-    harness = await createFullFlowHarness('Add complete() method to TodoStore');
-    printHeader(harness.initiative.name);
-    console.log(`  Initiative ID : ${harness.initiative.id}`);
-    console.log(`  Project ID    : ${harness.project.id}`);
-    console.log(`  Workspace     : ${harness.workspaceRoot}`);
-    console.log(`  Fixture dir   : ${harness.fixtureRoot}`);
-  }, FULL_FLOW_TIMEOUT);
-
-  afterAll(async () => {
-    if (harness) {
-      await harness.cleanup();
-    }
-  });
-
-  it(
-    'runs the complete multi-agent workflow',
-    async () => {
-      const { initiative, caller, agentManager, phaseRepository, taskRepository } = harness;
-      const initiativeId = initiative.id;
-
-      // ── Stage 2: Discuss ─────────────────────────────────────────────────────
-      console.log('\n\n>>> Stage 2: DISCUSS <<<');
-      const discussAgent = await caller.spawnArchitectDiscuss({ initiativeId });
-      expect(discussAgent.id).toBeTruthy();
-      console.log(`  Spawned discuss agent: ${discussAgent.name} (${discussAgent.id})`);
-
-      const discussResult = await harness.driveToCompletion(
-        discussAgent.id,
-        'Use your best judgment and keep it simple. The focus is implementing complete(id) on TodoStore.',
-        DISCUSS_TIMEOUT_MS,
-      );
-      printDiscussResult(discussAgent.id, discussResult);
-
-      // Discuss agents can complete without asking questions — success means it ran
-      // without crashing. A crashed discuss agent is a blocker but not fatal for
-      // subsequent stages (plan can still run with the initiative description alone).
-      if (!discussResult?.success) {
-        console.warn('  [WARN] discuss agent did not succeed; continuing to plan stage');
-      }
-
-      // ── Stage 3: Plan ─────────────────────────────────────────────────────────
-      console.log('\n\n>>> Stage 3: PLAN <<<');
-      const planAgent = await caller.spawnArchitectPlan({ initiativeId });
-      expect(planAgent.id).toBeTruthy();
-      console.log(`  Spawned plan agent: ${planAgent.name} (${planAgent.id})`);
-
-      const planResult = await harness.driveToCompletion(planAgent.id, 'Keep it simple.', PLAN_TIMEOUT_MS);
-      expect(planResult).toBeTruthy();
-
-      const phases: Phase[] = await phaseRepository.findByInitiativeId(initiativeId);
-      expect(phases.length).toBeGreaterThan(0);
-      printPlanResult(phases);
-
-      // ── Stage 4: Detail (per phase) ───────────────────────────────────────────
-      console.log('\n\n>>> Stage 4: DETAIL <<<');
-      for (const phase of phases) {
-        const detailAgent = await caller.spawnArchitectDetail({ phaseId: phase.id });
-        expect(detailAgent.id).toBeTruthy();
-        console.log(`  Spawned detail agent for phase "${phase.name}": ${detailAgent.name}`);
-
-        const detailResult = await harness.driveToCompletion(
-          detailAgent.id,
-          'Keep it simple.',
-          DETAIL_TIMEOUT_MS,
-        );
-        expect(detailResult).toBeTruthy();
-
-        const phaseTasks = await taskRepository.findByPhaseId(phase.id);
-        const executeTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
-        expect(executeTasks.length).toBeGreaterThan(0);
-        printDetailResult(phase, phaseTasks);
-      }
-
-      // ── Stage 5: Execute ──────────────────────────────────────────────────────
-      console.log('\n\n>>> Stage 5: EXECUTE <<<');
-      const allTasks = await gatherAllExecuteTasks(taskRepository, phases);
-      console.log(`  Found ${allTasks.length} execute task(s) across ${phases.length} phase(s)`);
-
-      const executed: ExecutedTask[] = [];
-      for (const task of allTasks) {
-        console.log(`  Spawning execute agent for: "${task.name}"`);
-        const execAgent = await agentManager.spawn({
-          taskId: task.id,
-          prompt: buildExecutePrompt(task.description ?? task.name),
-          mode: 'execute',
-          initiativeId,
-          phaseId: task.phaseId ?? undefined,
-          inputContext: {
-            initiative,
-            task,
-          },
-        });
-        console.log(`    Agent: ${execAgent.name} (${execAgent.id})`);
-
-        const result = await harness.driveToCompletion(
-          execAgent.id,
-          'Use your best judgment and keep it simple.',
-          EXECUTE_TIMEOUT_MS,
-        );
-        executed.push({ task, result });
-
-        const icon = result?.success ? '✓' : '✗';
-        console.log(`    ${icon} Completed with success=${result?.success ?? null}`);
-        if (result && !result.success) {
-          console.log(`      Message: ${result.message?.slice(0, 200)}`);
-        }
-      }
-
-      printExecuteResult(executed);
-      printGitDiff(harness.workspaceRoot, harness.project.name);
-
-      // ── Stage 6: Validate ─────────────────────────────────────────────────────
-      console.log('\n\n>>> Stage 6: VALIDATE <<<');
-
-      // Find the last execute agent's worktree for the todo-api project
-      const lastExecuteAgent = executed[executed.length - 1];
-      const projectWorktreeDir = findProjectWorktree(
-        harness.workspaceRoot,
-        harness.project.name,
-        lastExecuteAgent,
-      );
-
-      if (projectWorktreeDir) {
-        console.log(`  Running npm test in: ${projectWorktreeDir}`);
-        try {
-          execSync('node --test src/todo.test.js', {
-            cwd: projectWorktreeDir,
-            stdio: 'pipe',
-          });
-          console.log('  ✓ All tests passed');
-        } catch (err: unknown) {
-          const e = err as { stdout?: Buffer; stderr?: Buffer };
-          console.log('  ✗ Tests failed:');
-          if (e.stdout) console.log(e.stdout.toString());
-          if (e.stderr) console.log(e.stderr.toString());
-          // Don't hard-fail on test validation — the important check is all execute agents succeeded
-          console.warn('  [WARN] npm test failed in project worktree (may be expected if task ordering differs)');
-        }
-      } else {
-        console.warn('  [WARN] Could not find project worktree dir for npm test validation');
-      }
-
-      // Core assertions
-      const allSucceeded = executed.every((e) => e.result?.success === true);
-      if (!allSucceeded) {
-        const failed = executed.filter((e) => !e.result?.success);
-        console.warn(`  [WARN] ${failed.length} execute task(s) did not succeed`);
-      }
-      expect(executed.length).toBeGreaterThan(0);
-
-      // ── Final summary ─────────────────────────────────────────────────────────
-      printFinalSummary(
-        initiative.name,
-        phases,
-        allTasks,
-        executed,
-        Date.now() - startedAt,
-      );
-    },
-    FULL_FLOW_TIMEOUT,
-  );
-});
-
-// =============================================================================
-// Helpers
-// =============================================================================
-
-/**
- * Gather all auto execute tasks across all phases, in order.
- * Excludes planning tasks (discuss, plan, detail, refine, research).
- */
-async function gatherAllExecuteTasks(
-  taskRepository: FullFlowHarness['taskRepository'],
-  phases: Phase[],
-): Promise<Task[]> {
-  const result: Task[] = [];
-  for (const phase of phases) {
-    const phaseTasks = await taskRepository.findByPhaseId(phase.id);
-    const execTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
-    result.push(...execTasks);
-  }
-  return result;
-}
-
-/**
- * Find the project worktree directory for the last executed task.
- * Worktrees live at: <workspaceRoot>/agent-workdirs/<alias>/<projectName>/
- */
-function findProjectWorktree(
-  workspaceRoot: string,
-  projectName: string,
-  lastExecuted: ExecutedTask | undefined,
-): string | null {
-  if (!lastExecuted) return null;
-
-  try {
-    const worktreesBase = join(workspaceRoot, 'agent-workdirs');
-    const dirs = execSync(`ls "${worktreesBase}" 2>/dev/null || true`, { encoding: 'utf8' })
-      .trim()
-      .split('\n')
-      .filter(Boolean);
-
-    // Try all agent worktrees and return the first one with a project subdirectory
-    for (const dir of dirs.reverse()) {
-      const candidate = join(worktreesBase, dir, projectName);
-      try {
-        execSync(`test -d "${candidate}"`, { stdio: 'ignore' });
-        return candidate;
-      } catch {
-        // Not found in this worktree
-      }
-    }
-  } catch {
-    // ls failed or no worktrees yet
-  }
-
-  return null;
-}
--- a/src/test/integration/full-flow/harness.ts
+++ b/src/test/integration/full-flow/harness.ts
@@ -11,7 +11,7 @@
 *  - A self-contained fixture git repo (todo-api) for agents to work on
 *  - Helpers for driving agents through question/answer loops
 *
- * COSTS REAL API CREDITS. Controlled by FULL_FLOW_TESTS=1.
+ * Used by full-flow-cassette.test.ts (replay) and for manual recording runs.
 */

 import { mkdtemp, rm, cp } from 'node:fs/promises';
@@ -397,9 +397,3 @@ export async function createFullFlowHarness(

  return harness;
 }
-
-// =============================================================================
-// Guard
-// =============================================================================
-
-export const shouldRunFullFlowTests = process.env.FULL_FLOW_TESTS === '1';