refactor: Restructure monorepo to apps/server/ and apps/web/ layout
Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt standard monorepo conventions (apps/ for runnable apps, packages/ for reusable libraries). Update all config files, shared package imports, test fixtures, and documentation to reflect new paths. Key fixes: - Update workspace config to ["apps/*", "packages/*"] - Update tsconfig.json rootDir/include for apps/server/ - Add apps/web/** to vitest exclude list - Update drizzle.config.ts schema path - Fix ensure-schema.ts migration path detection (3 levels up in dev, 2 levels up in dist) - Fix tests/integration/cli-server.test.ts import paths - Update packages/shared imports to apps/server/ paths - Update all docs/ files with new paths
This commit is contained in:
265
apps/server/test/cassette/cassette.test.ts
Normal file
265
apps/server/test/cassette/cassette.test.ts
Normal file
@@ -0,0 +1,265 @@
|
||||
/**
|
||||
* Cassette System Unit Tests
|
||||
*
|
||||
* Verifies normalizer, key generation, and store in isolation.
|
||||
* These run without any real processes or API calls.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import { mkdtempSync, rmSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
|
||||
import { hashWorktreeFiles, buildCassetteKey } from './key.js';
|
||||
import { CassetteStore } from './store.js';
|
||||
import type { CassetteEntry, CassetteKey } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Normalizer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('normalizePrompt', () => {
|
||||
it('strips UUIDs', () => {
|
||||
const prompt = 'Agent 550e8400-e29b-41d4-a716-446655440000 is running task abc123ef-0000-0000-0000-000000000000';
|
||||
const result = normalizePrompt(prompt, '');
|
||||
expect(result).not.toContain('550e8400');
|
||||
expect(result).not.toContain('abc123ef');
|
||||
expect(result).toContain('__UUID__');
|
||||
});
|
||||
|
||||
it('strips workspace root path', () => {
|
||||
const workspaceRoot = '/tmp/cw-test-abc123';
|
||||
const prompt = `Working directory: ${workspaceRoot}/agent-workdirs/my-agent`;
|
||||
const result = normalizePrompt(prompt, workspaceRoot);
|
||||
expect(result).not.toContain(workspaceRoot);
|
||||
expect(result).toContain('__WORKSPACE__');
|
||||
});
|
||||
|
||||
it('strips ISO timestamps', () => {
|
||||
const prompt = 'Started at 2026-03-01T14:30:00Z, last seen 2026-03-01T14:35:22.456Z';
|
||||
const result = normalizePrompt(prompt, '');
|
||||
expect(result).not.toContain('2026-03-01');
|
||||
expect(result).toContain('__TIMESTAMP__');
|
||||
});
|
||||
|
||||
it('strips session numbers', () => {
|
||||
const prompt = 'Resuming session 3 with agent session-42';
|
||||
const result = normalizePrompt(prompt, '');
|
||||
expect(result).toContain('session__N__');
|
||||
expect(result).not.toContain('session 3');
|
||||
expect(result).not.toContain('session-42');
|
||||
});
|
||||
|
||||
it('leaves static content unchanged', () => {
|
||||
const prompt = 'You are a Worker agent. Execute the assigned coding task.';
|
||||
const result = normalizePrompt(prompt, '/tmp/ws');
|
||||
expect(result).toBe(prompt);
|
||||
});
|
||||
|
||||
it('strips nanoid strings (21-char alphanumeric)', () => {
|
||||
const nanoid = 'V1StGXR8_Z5jdHi6B-myT';
|
||||
const prompt = `Agent worktree: /tmp/cw-preview-${nanoid}/app`;
|
||||
const result = normalizePrompt(prompt, '');
|
||||
expect(result).not.toContain(nanoid);
|
||||
expect(result).toContain('__ID__');
|
||||
});
|
||||
|
||||
it('strips workspace root before UUID replacement to avoid double-normalizing', () => {
|
||||
const workspaceRoot = '/tmp/cw-test-abc123';
|
||||
const uuid = '550e8400-e29b-41d4-a716-446655440000';
|
||||
const prompt = `Dir: ${workspaceRoot}/agents/${uuid}`;
|
||||
const result = normalizePrompt(prompt, workspaceRoot);
|
||||
expect(result).toBe('Dir: __WORKSPACE__/agents/__UUID__');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripPromptFromArgs', () => {
|
||||
it('strips -p <prompt> style (Claude native)', () => {
|
||||
const prompt = 'Do the task.';
|
||||
const args = ['--dangerously-skip-permissions', '--verbose', '-p', prompt, '--output-format', 'stream-json'];
|
||||
const result = stripPromptFromArgs(args, prompt);
|
||||
expect(result).toEqual(['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json']);
|
||||
});
|
||||
|
||||
it('strips --prompt <prompt> style', () => {
|
||||
const prompt = 'Do the task.';
|
||||
const args = ['--flag', '--prompt', prompt, '--json'];
|
||||
const result = stripPromptFromArgs(args, prompt);
|
||||
expect(result).toEqual(['--flag', '--json']);
|
||||
});
|
||||
|
||||
it('strips bare positional prompt', () => {
|
||||
const prompt = 'Do the task.';
|
||||
const args = ['--full-auto', prompt];
|
||||
const result = stripPromptFromArgs(args, prompt);
|
||||
expect(result).toEqual(['--full-auto']);
|
||||
});
|
||||
|
||||
it('returns unchanged args when prompt is empty', () => {
|
||||
const args = ['--flag', '--value'];
|
||||
expect(stripPromptFromArgs(args, '')).toEqual(args);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Key generation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('buildCassetteKey', () => {
|
||||
const baseKey: CassetteKey = {
|
||||
normalizedPrompt: 'You are a Worker agent.',
|
||||
providerName: 'claude',
|
||||
modelArgs: ['--dangerously-skip-permissions', '--verbose', '--output-format', 'stream-json'],
|
||||
worktreeHash: 'empty',
|
||||
};
|
||||
|
||||
it('produces a 32-char hex string', () => {
|
||||
const key = buildCassetteKey(baseKey);
|
||||
expect(key).toMatch(/^[0-9a-f]{32}$/);
|
||||
});
|
||||
|
||||
it('is deterministic for the same key', () => {
|
||||
expect(buildCassetteKey(baseKey)).toBe(buildCassetteKey(baseKey));
|
||||
});
|
||||
|
||||
it('differs when normalizedPrompt changes', () => {
|
||||
const key2 = { ...baseKey, normalizedPrompt: 'You are a Discuss agent.' };
|
||||
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
|
||||
});
|
||||
|
||||
it('differs when providerName changes', () => {
|
||||
const key2 = { ...baseKey, providerName: 'codex' };
|
||||
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
|
||||
});
|
||||
|
||||
it('differs when worktreeHash changes', () => {
|
||||
const key2 = { ...baseKey, worktreeHash: 'abcdef1234567890' };
|
||||
expect(buildCassetteKey(baseKey)).not.toBe(buildCassetteKey(key2));
|
||||
});
|
||||
|
||||
it('is stable regardless of modelArgs insertion order', () => {
|
||||
const key1 = { ...baseKey, modelArgs: ['--verbose', '--dangerously-skip-permissions'] };
|
||||
const key2 = { ...baseKey, modelArgs: ['--dangerously-skip-permissions', '--verbose'] };
|
||||
expect(buildCassetteKey(key1)).toBe(buildCassetteKey(key2));
|
||||
});
|
||||
});
|
||||
|
||||
describe('hashWorktreeFiles', () => {
|
||||
it('returns "empty" for a non-existent directory', () => {
|
||||
expect(hashWorktreeFiles('/does/not/exist')).toBe('empty');
|
||||
});
|
||||
|
||||
it('returns "empty" for a directory with only hidden files', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
|
||||
try {
|
||||
// Only hidden entries present
|
||||
const { mkdirSync } = require('node:fs');
|
||||
mkdirSync(join(dir, '.git'));
|
||||
expect(hashWorktreeFiles(dir)).toBe('empty');
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('produces a 16-char hex string for a directory with files', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
|
||||
try {
|
||||
const { writeFileSync } = require('node:fs');
|
||||
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
|
||||
const hash = hashWorktreeFiles(dir);
|
||||
expect(hash).toMatch(/^[0-9a-f]{16}$/);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('changes when file content changes', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'cw-hash-test-'));
|
||||
try {
|
||||
const { writeFileSync } = require('node:fs');
|
||||
writeFileSync(join(dir, 'index.ts'), 'export const x = 1;');
|
||||
const hash1 = hashWorktreeFiles(dir);
|
||||
writeFileSync(join(dir, 'index.ts'), 'export const x = 2;');
|
||||
const hash2 = hashWorktreeFiles(dir);
|
||||
expect(hash1).not.toBe(hash2);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CassetteStore
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('CassetteStore', () => {
|
||||
let dir: string;
|
||||
let store: CassetteStore;
|
||||
|
||||
const key: CassetteKey = {
|
||||
normalizedPrompt: 'Test prompt',
|
||||
providerName: 'claude',
|
||||
modelArgs: ['--verbose'],
|
||||
worktreeHash: 'empty',
|
||||
};
|
||||
|
||||
const entry: CassetteEntry = {
|
||||
version: 1,
|
||||
key,
|
||||
recording: {
|
||||
jsonlLines: ['{"type":"system","session_id":"test-session"}', '{"type":"result","subtype":"success"}'],
|
||||
signalJson: { status: 'done', message: 'Task completed' },
|
||||
exitCode: 0,
|
||||
recordedAt: '2026-03-01T00:00:00.000Z',
|
||||
},
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
dir = mkdtempSync(join(tmpdir(), 'cw-store-test-'));
|
||||
store = new CassetteStore(dir);
|
||||
});
|
||||
|
||||
it('returns null for unknown key', () => {
|
||||
expect(store.find(key)).toBeNull();
|
||||
});
|
||||
|
||||
it('round-trips a cassette entry', () => {
|
||||
store.save(key, entry);
|
||||
const loaded = store.find(key);
|
||||
expect(loaded).not.toBeNull();
|
||||
expect(loaded?.recording.signalJson).toEqual({ status: 'done', message: 'Task completed' });
|
||||
expect(loaded?.recording.jsonlLines).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('overwrites an existing cassette', () => {
|
||||
store.save(key, entry);
|
||||
const updated: CassetteEntry = {
|
||||
...entry,
|
||||
recording: { ...entry.recording, jsonlLines: ['new line'], recordedAt: '2026-03-02T00:00:00.000Z' },
|
||||
};
|
||||
store.save(key, updated);
|
||||
const loaded = store.find(key);
|
||||
expect(loaded?.recording.jsonlLines).toEqual(['new line']);
|
||||
});
|
||||
|
||||
it('uses same file for same key', () => {
|
||||
store.save(key, entry);
|
||||
const { readdirSync } = require('node:fs');
|
||||
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
|
||||
expect(files).toHaveLength(1);
|
||||
|
||||
store.save(key, entry); // overwrite
|
||||
const files2 = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
|
||||
expect(files2).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('uses different files for different keys', () => {
|
||||
const key2: CassetteKey = { ...key, providerName: 'codex' };
|
||||
store.save(key, entry);
|
||||
store.save(key2, { ...entry, key: key2 });
|
||||
|
||||
const { readdirSync } = require('node:fs');
|
||||
const files = readdirSync(dir).filter((f: string) => f.endsWith('.json'));
|
||||
expect(files).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
200
apps/server/test/cassette/harness.ts
Normal file
200
apps/server/test/cassette/harness.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
/**
|
||||
* Cassette Test Harness
|
||||
*
|
||||
* Wraps RealProviderHarness with the CassetteProcessManager so tests run
|
||||
* against recorded cassettes instead of real AI APIs.
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* let harness: RealProviderHarness;
|
||||
*
|
||||
* beforeAll(async () => {
|
||||
* harness = await createCassetteHarness({ provider: 'claude' });
|
||||
* });
|
||||
*
|
||||
* afterAll(() => harness.cleanup());
|
||||
*
|
||||
* it('completes a task', async () => {
|
||||
* const agent = await harness.agentManager.spawn({ prompt: MINIMAL_PROMPTS.done, ... });
|
||||
* const result = await harness.waitForAgentCompletion(agent.id);
|
||||
* expect(result?.success).toBe(true);
|
||||
* });
|
||||
*
|
||||
* Mode control via env vars:
|
||||
* (default) → replay mode: cassette must exist, throws if missing
|
||||
* CW_CASSETTE_RECORD=1 → auto mode: replay if exists, record if missing
|
||||
* CW_CASSETTE_FORCE_RECORD=1→ record mode: always run real agent, overwrite cassette
|
||||
*/
|
||||
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { join } from 'node:path';
|
||||
import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
|
||||
import {
|
||||
DrizzleAgentRepository,
|
||||
DrizzleProjectRepository,
|
||||
DrizzleAccountRepository,
|
||||
DrizzleInitiativeRepository,
|
||||
} from '../../db/repositories/drizzle/index.js';
|
||||
import { MultiProviderAgentManager } from '../../agent/manager.js';
|
||||
import { CapturingEventBus, sleep, type RealProviderHarness } from '../integration/real-providers/harness.js';
|
||||
import { CassetteStore } from './store.js';
|
||||
import { CassetteProcessManager, type CassetteMode } from './process-manager.js';
|
||||
|
||||
export interface CassetteHarnessOptions {
|
||||
/** Which provider the agent runs as (default: 'claude'). */
|
||||
provider?: 'claude' | 'codex';
|
||||
/**
|
||||
* Directory where cassette JSON files are stored and read from.
|
||||
* Defaults to CW_CASSETTE_DIR env var, then src/test/cassettes/.
|
||||
*/
|
||||
cassetteDir?: string;
|
||||
/**
|
||||
* Override cassette mode. Normally derived from env vars:
|
||||
* - CW_CASSETTE_FORCE_RECORD=1 → 'record'
|
||||
* - CW_CASSETTE_RECORD=1 → 'auto'
|
||||
* - (default) → 'replay'
|
||||
*/
|
||||
mode?: CassetteMode;
|
||||
}
|
||||
|
||||
const DEFAULT_CASSETTE_DIR = new URL('../cassettes', import.meta.url).pathname;
|
||||
|
||||
/**
|
||||
* Resolve cassette mode from env vars (highest priority) or options.
|
||||
*/
|
||||
function resolveCassetteMode(options: CassetteHarnessOptions): CassetteMode {
|
||||
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
|
||||
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
|
||||
return options.mode ?? 'replay';
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a test harness backed by the cassette system.
|
||||
*
|
||||
* The harness exposes the same interface as RealProviderHarness so tests
|
||||
* written for real providers work unchanged with cassettes.
|
||||
*
|
||||
* Replay is much faster than real API calls (typically < 500ms) and
|
||||
* exercises the full pipeline: ProcessManager → FileTailer → OutputHandler
|
||||
* → SignalManager → event emission.
|
||||
*/
|
||||
export async function createCassetteHarness(options: CassetteHarnessOptions = {}): Promise<RealProviderHarness> {
|
||||
const cassetteDir = options.cassetteDir ?? process.env.CW_CASSETTE_DIR ?? DEFAULT_CASSETTE_DIR;
|
||||
const cassetteMode = resolveCassetteMode(options);
|
||||
|
||||
// Create a temporary git workspace (required for worktree operations).
|
||||
const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-cassette-'));
|
||||
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
|
||||
const db = createTestDatabase();
|
||||
const agentRepository = new DrizzleAgentRepository(db);
|
||||
const projectRepository = new DrizzleProjectRepository(db);
|
||||
const accountRepository = new DrizzleAccountRepository(db);
|
||||
const initiativeRepository = new DrizzleInitiativeRepository(db);
|
||||
const eventBus = new CapturingEventBus();
|
||||
|
||||
const store = new CassetteStore(cassetteDir);
|
||||
const cassetteProcessManager = new CassetteProcessManager(
|
||||
workspaceRoot,
|
||||
projectRepository,
|
||||
store,
|
||||
cassetteMode,
|
||||
);
|
||||
|
||||
const agentManager = new MultiProviderAgentManager(
|
||||
agentRepository,
|
||||
workspaceRoot,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
eventBus,
|
||||
undefined, // credentialManager
|
||||
undefined, // changeSetRepository
|
||||
undefined, // phaseRepository
|
||||
undefined, // taskRepository
|
||||
undefined, // pageRepository
|
||||
undefined, // logChunkRepository
|
||||
false, // debug
|
||||
cassetteProcessManager,
|
||||
);
|
||||
|
||||
const harness: RealProviderHarness = {
|
||||
db,
|
||||
eventBus,
|
||||
agentManager,
|
||||
workspaceRoot,
|
||||
agentRepository,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
initiativeRepository,
|
||||
|
||||
// Cassette replays are fast — use a short poll interval and default timeout.
|
||||
async waitForAgentCompletion(agentId, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) return null;
|
||||
if (agent.status === 'idle' || agent.status === 'stopped') {
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
if (agent.status === 'crashed') {
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
if (agent.status === 'waiting_for_input') return null;
|
||||
await sleep(100);
|
||||
}
|
||||
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
async waitForAgentWaiting(agentId, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) return null;
|
||||
if (agent.status === 'waiting_for_input') return agentManager.getPendingQuestions(agentId);
|
||||
if (['idle', 'stopped', 'crashed'].includes(agent.status)) return null;
|
||||
await sleep(100);
|
||||
}
|
||||
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to enter waiting state after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
async waitForAgentStatus(agentId, status, timeoutMs = 30_000) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
||||
if (agent.status === status) return;
|
||||
await sleep(100);
|
||||
}
|
||||
throw new Error(`[cassette] Timeout waiting for agent ${agentId} to reach status '${status}' after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
getEventsByType(type) {
|
||||
return eventBus.getEventsByType(type);
|
||||
},
|
||||
|
||||
clearEvents() {
|
||||
eventBus.clearEvents();
|
||||
},
|
||||
|
||||
async killAllAgents() {
|
||||
const agents = await agentRepository.findAll();
|
||||
for (const agent of agents) {
|
||||
if (agent.status === 'running') {
|
||||
try { await agentManager.stop(agent.id); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
async cleanup() {
|
||||
await harness.killAllAgents();
|
||||
try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
},
|
||||
};
|
||||
|
||||
return harness;
|
||||
}
|
||||
6
apps/server/test/cassette/index.ts
Normal file
6
apps/server/test/cassette/index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export { CassetteStore } from './store.js';
|
||||
export { CassetteProcessManager, type CassetteMode } from './process-manager.js';
|
||||
export { createCassetteHarness, type CassetteHarnessOptions } from './harness.js';
|
||||
export { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
|
||||
export { hashWorktreeFiles, buildCassetteKey } from './key.js';
|
||||
export type { CassetteKey, CassetteRecording, CassetteEntry } from './types.js';
|
||||
76
apps/server/test/cassette/key.ts
Normal file
76
apps/server/test/cassette/key.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Cassette Key Generation
|
||||
*
|
||||
* Builds stable SHA256-based identifiers for cassettes.
|
||||
* Two spans are separate concerns:
|
||||
* - hashWorktreeFiles: fingerprints the worktree state at spawn time (for execute mode drift)
|
||||
* - buildCassetteKey: hashes all key components into a 32-char hex filename
|
||||
*/
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
import { readdirSync, readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import type { CassetteKey } from './types.js';
|
||||
|
||||
/**
|
||||
* Recursively hash all non-hidden files in a directory.
|
||||
*
|
||||
* Hidden entries (starting with '.') are skipped — this excludes .git, .cw, etc.
|
||||
* Entries are processed in sorted order for determinism across platforms.
|
||||
*
|
||||
* Returns the first 16 hex chars of the SHA256, or 'empty' if the directory
|
||||
* is absent or contains no readable files.
|
||||
*/
|
||||
export function hashWorktreeFiles(dir: string): string {
|
||||
const hash = createHash('sha256');
|
||||
let hasContent = false;
|
||||
|
||||
function walkDir(currentDir: string): void {
|
||||
let entries;
|
||||
try {
|
||||
entries = readdirSync(currentDir, { withFileTypes: true });
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const entry of [...entries].sort((a, b) => a.name.localeCompare(b.name))) {
|
||||
if (entry.name.startsWith('.')) continue;
|
||||
|
||||
const fullPath = join(currentDir, entry.name);
|
||||
const relPath = fullPath.slice(dir.length);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
hash.update(`d:${relPath}\n`);
|
||||
walkDir(fullPath);
|
||||
} else if (entry.isFile()) {
|
||||
try {
|
||||
const content = readFileSync(fullPath);
|
||||
hash.update(`f:${relPath}:${content.length}\n`);
|
||||
hash.update(content);
|
||||
hasContent = true;
|
||||
} catch {
|
||||
// skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
walkDir(dir);
|
||||
return hasContent ? hash.digest('hex').slice(0, 16) : 'empty';
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a stable 32-char hex identifier for a cassette key.
|
||||
*
|
||||
* modelArgs are sorted before hashing so insertion order differences
|
||||
* between providers don't produce different cassettes.
|
||||
*/
|
||||
export function buildCassetteKey(key: CassetteKey): string {
|
||||
const canonical = JSON.stringify({
|
||||
normalizedPrompt: key.normalizedPrompt,
|
||||
providerName: key.providerName,
|
||||
modelArgs: [...key.modelArgs].sort(),
|
||||
worktreeHash: key.worktreeHash,
|
||||
});
|
||||
return createHash('sha256').update(canonical).digest('hex').slice(0, 32);
|
||||
}
|
||||
76
apps/server/test/cassette/normalizer.ts
Normal file
76
apps/server/test/cassette/normalizer.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Cassette Normalizer
|
||||
*
|
||||
* Strips dynamic content from prompts and CLI args before hashing into a cassette key.
|
||||
* Dynamic content (UUIDs, temp paths, timestamps, session numbers) varies between
|
||||
* test runs but doesn't affect how the agent responds — so we replace them with
|
||||
* stable placeholders to get a stable cache key.
|
||||
*/
|
||||
|
||||
const UUID_RE = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi;
|
||||
const NANOID_RE = /(?<![A-Za-z0-9])[A-Za-z0-9_-]{21}(?![A-Za-z0-9_-])/g;
|
||||
const ISO_TIMESTAMP_RE = /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?/g;
|
||||
const UNIX_EPOCH_MS_RE = /\b1[0-9]{12}\b/g;
|
||||
const SESSION_NUM_RE = /\bsession[_\s-]?\d+\b/gi;
|
||||
// Agent worktree paths: agent-workdirs/<random-agent-name> (with or without trailing slash)
|
||||
// The agent name (e.g. "available-sheep") changes every run but is not a UUID or nanoid.
|
||||
// Stop at the first slash so the project name after it is preserved.
|
||||
const AGENT_WORKDIR_RE = /agent-workdirs\/[^\s/\\]+/g;
|
||||
|
||||
/**
|
||||
* Normalize a prompt for stable cassette key generation.
|
||||
*
|
||||
* Replacements applied in order (most-specific first to avoid partial matches):
|
||||
* 1. Absolute workspace root path → __WORKSPACE__
|
||||
* 2. UUIDs → __UUID__
|
||||
* 2.5. Nanoid IDs (21-char alphanumeric) → __ID__
|
||||
* 3. ISO 8601 timestamps → __TIMESTAMP__
|
||||
* 4. Unix epoch milliseconds → __EPOCH__
|
||||
* 5. Session numbers → session__N__
|
||||
* 6. Agent worktree path segment → agent-workdirs/__AGENT__/
|
||||
*/
|
||||
export function normalizePrompt(prompt: string, workspaceRoot: string): string {
|
||||
let normalized = prompt;
|
||||
|
||||
if (workspaceRoot) {
|
||||
normalized = normalized.replaceAll(workspaceRoot, '__WORKSPACE__');
|
||||
}
|
||||
|
||||
normalized = normalized.replace(UUID_RE, '__UUID__');
|
||||
normalized = normalized.replace(NANOID_RE, '__ID__');
|
||||
normalized = normalized.replace(ISO_TIMESTAMP_RE, '__TIMESTAMP__');
|
||||
normalized = normalized.replace(UNIX_EPOCH_MS_RE, '__EPOCH__');
|
||||
normalized = normalized.replace(SESSION_NUM_RE, 'session__N__');
|
||||
normalized = normalized.replace(AGENT_WORKDIR_RE, 'agent-workdirs/__AGENT__');
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip the prompt value from CLI args to produce stable modelArgs for the cassette key.
|
||||
*
|
||||
* Handles all provider prompt flag styles:
|
||||
* - Native: `-p <prompt>` (Claude)
|
||||
* - Flag: `--prompt <prompt>`, `-p <prompt>` (Gemini, Cursor, Auggie, Amp, Opencode)
|
||||
* - Also removes the bare prompt value if it appears as a positional arg.
|
||||
*/
|
||||
export function stripPromptFromArgs(args: string[], prompt: string): string[] {
|
||||
if (!prompt) return [...args];
|
||||
|
||||
const result: string[] = [];
|
||||
let i = 0;
|
||||
while (i < args.length) {
|
||||
const arg = args[i];
|
||||
const PROMPT_FLAGS = ['-p', '--prompt', '--message'];
|
||||
|
||||
if (PROMPT_FLAGS.includes(arg) && args[i + 1] === prompt) {
|
||||
i += 2; // skip flag + value
|
||||
} else if (arg === prompt) {
|
||||
i += 1; // skip bare positional prompt
|
||||
} else {
|
||||
result.push(arg);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
258
apps/server/test/cassette/process-manager.ts
Normal file
258
apps/server/test/cassette/process-manager.ts
Normal file
@@ -0,0 +1,258 @@
|
||||
/**
|
||||
* CassetteProcessManager
|
||||
*
|
||||
* Extends ProcessManager to intercept subprocess spawning and either:
|
||||
* - Replay a recorded cassette (no API cost, deterministic)
|
||||
* - Record a new cassette by running the real agent and capturing its output
|
||||
*
|
||||
* Modes:
|
||||
* - 'replay': cassette MUST exist; throws if missing (safe for CI)
|
||||
* - 'record': always runs real agent; saves/overwrites cassette on completion
|
||||
* - 'auto': replays if cassette exists; falls through to record if missing
|
||||
*
|
||||
* The cassette key is built from:
|
||||
* - Normalized prompt (dynamic content replaced with placeholders)
|
||||
* - Provider name and stable CLI args (prompt value stripped)
|
||||
* - Worktree file hash (detects content drift for execute-mode agents)
|
||||
*/
|
||||
|
||||
import { readFileSync, existsSync, mkdirSync, writeFileSync, readdirSync } from 'node:fs';
|
||||
import { join, dirname, relative } from 'node:path';
|
||||
import { ProcessManager } from '../../agent/process-manager.js';
|
||||
import type { StreamEvent } from '../../agent/providers/parsers/index.js';
|
||||
import type { FileTailer } from '../../agent/file-tailer.js';
|
||||
import type { ProjectRepository } from '../../db/repositories/project-repository.js';
|
||||
import type { CassetteKey, CassetteEntry } from './types.js';
|
||||
import type { CassetteStore } from './store.js';
|
||||
import { normalizePrompt, stripPromptFromArgs } from './normalizer.js';
|
||||
import { hashWorktreeFiles } from './key.js';
|
||||
|
||||
export type CassetteMode = 'replay' | 'record' | 'auto';
|
||||
|
||||
interface PendingRecording {
|
||||
key: CassetteKey;
|
||||
outputFilePath: string;
|
||||
agentCwd: string;
|
||||
}
|
||||
|
||||
interface PendingReplay {
|
||||
cassette: CassetteEntry;
|
||||
agentCwd: string;
|
||||
}
|
||||
|
||||
export class CassetteProcessManager extends ProcessManager {
|
||||
private readonly _workspaceRoot: string;
|
||||
private readonly replayWorkerPath: string;
|
||||
private readonly pendingRecordings = new Map<number, PendingRecording>();
|
||||
private readonly pendingReplays = new Map<number, PendingReplay>();
|
||||
|
||||
constructor(
|
||||
workspaceRoot: string,
|
||||
projectRepository: ProjectRepository,
|
||||
private readonly store: CassetteStore,
|
||||
private readonly cassetteMode: CassetteMode = 'auto',
|
||||
) {
|
||||
super(workspaceRoot, projectRepository);
|
||||
this._workspaceRoot = workspaceRoot;
|
||||
this.replayWorkerPath = new URL('./replay-worker.mjs', import.meta.url).pathname;
|
||||
}
|
||||
|
||||
override spawnDetached(
|
||||
agentId: string,
|
||||
agentName: string,
|
||||
command: string,
|
||||
args: string[],
|
||||
cwd: string,
|
||||
env: Record<string, string>,
|
||||
providerName: string,
|
||||
prompt?: string,
|
||||
onEvent?: (event: StreamEvent) => void,
|
||||
onRawContent?: (content: string) => void,
|
||||
): { pid: number; outputFilePath: string; tailer: FileTailer } {
|
||||
const key: CassetteKey = {
|
||||
normalizedPrompt: normalizePrompt(prompt ?? '', this._workspaceRoot),
|
||||
providerName,
|
||||
modelArgs: stripPromptFromArgs(args, prompt ?? ''),
|
||||
worktreeHash: hashWorktreeFiles(cwd),
|
||||
};
|
||||
|
||||
// In record mode we always skip the store lookup and go straight to real spawn.
|
||||
const existing = this.cassetteMode !== 'record' ? this.store.find(key) : null;
|
||||
|
||||
if (existing) {
|
||||
const result = this.replayFromCassette(agentId, agentName, cwd, env, providerName, existing, onEvent, onRawContent);
|
||||
this.pendingReplays.set(result.pid, { cassette: existing, agentCwd: cwd });
|
||||
return result;
|
||||
}
|
||||
|
||||
if (this.cassetteMode === 'replay') {
|
||||
throw new Error(
|
||||
`[cassette] No cassette found for agent '${agentName}' (provider=${providerName}, mode=replay).\n` +
|
||||
`Run with CW_CASSETTE_RECORD=1 to record it.`,
|
||||
);
|
||||
}
|
||||
|
||||
// auto or record: run the real agent and record the cassette on completion.
|
||||
console.log(`[cassette] recording new cassette for agent '${agentName}' (${providerName})`);
|
||||
const result = super.spawnDetached(agentId, agentName, command, args, cwd, env, providerName, prompt, onEvent, onRawContent);
|
||||
this.pendingRecordings.set(result.pid, { key, outputFilePath: result.outputFilePath, agentCwd: cwd });
|
||||
return result;
|
||||
}
|
||||
|
||||
override pollForCompletion(
|
||||
agentId: string,
|
||||
pid: number,
|
||||
onComplete: () => Promise<void>,
|
||||
getTailer: () => FileTailer | undefined,
|
||||
): { cancel: () => void } {
|
||||
const recording = this.pendingRecordings.get(pid);
|
||||
if (recording) {
|
||||
// Record mode — wrap onComplete to save the cassette before handing off.
|
||||
return super.pollForCompletion(agentId, pid, async () => {
|
||||
await this.saveCassette(recording);
|
||||
this.pendingRecordings.delete(pid);
|
||||
await onComplete();
|
||||
}, getTailer);
|
||||
}
|
||||
|
||||
const replay = this.pendingReplays.get(pid);
|
||||
if (replay) {
|
||||
// Replay mode — restore .cw/output/ files before onComplete so that
|
||||
// readPhaseFiles / readTaskFiles / readProposalFiles find their data.
|
||||
return super.pollForCompletion(agentId, pid, async () => {
|
||||
this.restoreOutputFiles(replay.cassette, replay.agentCwd);
|
||||
this.pendingReplays.delete(pid);
|
||||
await onComplete();
|
||||
}, getTailer);
|
||||
}
|
||||
|
||||
return super.pollForCompletion(agentId, pid, onComplete, getTailer);
|
||||
}
|
||||
|
||||
private async saveCassette(pending: PendingRecording): Promise<void> {
|
||||
// Read all JSONL lines from the output file the agent wrote to.
|
||||
let jsonlLines: string[] = [];
|
||||
try {
|
||||
const content = readFileSync(pending.outputFilePath, 'utf-8');
|
||||
jsonlLines = content.split('\n').filter(l => l.trim() !== '');
|
||||
} catch {
|
||||
// No output produced — record an empty cassette.
|
||||
}
|
||||
|
||||
// Read signal.json from the agent working directory.
|
||||
let signalJson: Record<string, unknown> | null = null;
|
||||
const outputDir = join(pending.agentCwd, '.cw', 'output');
|
||||
const signalPath = join(outputDir, 'signal.json');
|
||||
if (existsSync(signalPath)) {
|
||||
try {
|
||||
signalJson = JSON.parse(readFileSync(signalPath, 'utf-8')) as Record<string, unknown>;
|
||||
} catch {
|
||||
// Corrupt signal file — record null.
|
||||
}
|
||||
}
|
||||
|
||||
// Capture all other files in .cw/output/ (phase files, task files, etc.)
|
||||
const outputFiles: Record<string, string> = {};
|
||||
if (existsSync(outputDir)) {
|
||||
this.walkOutputDir(outputDir, outputDir, (relPath, content) => {
|
||||
if (relPath !== 'signal.json') {
|
||||
outputFiles[relPath] = content;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const entry: CassetteEntry = {
|
||||
version: 1,
|
||||
key: pending.key,
|
||||
recording: {
|
||||
jsonlLines,
|
||||
signalJson,
|
||||
exitCode: 0,
|
||||
recordedAt: new Date().toISOString(),
|
||||
outputFiles,
|
||||
},
|
||||
};
|
||||
|
||||
this.store.save(pending.key, entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore captured .cw/output/ files to the new agent working directory.
|
||||
* Called before onComplete so that downstream readers (readPhaseFiles, etc.)
|
||||
* find the expected files in place.
|
||||
*/
|
||||
private restoreOutputFiles(cassette: CassetteEntry, agentCwd: string): void {
|
||||
const { outputFiles, signalJson } = cassette.recording;
|
||||
const outputDir = join(agentCwd, '.cw', 'output');
|
||||
|
||||
// Restore captured output files
|
||||
if (outputFiles) {
|
||||
for (const [relPath, content] of Object.entries(outputFiles)) {
|
||||
const fullPath = join(outputDir, relPath);
|
||||
mkdirSync(dirname(fullPath), { recursive: true });
|
||||
writeFileSync(fullPath, content, 'utf-8');
|
||||
}
|
||||
}
|
||||
|
||||
// Write signal.json (the manager reads this to detect completion status)
|
||||
if (signalJson) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
writeFileSync(join(outputDir, 'signal.json'), JSON.stringify(signalJson), 'utf-8');
|
||||
}
|
||||
}
|
||||
|
||||
private walkOutputDir(
|
||||
baseDir: string,
|
||||
currentDir: string,
|
||||
callback: (relPath: string, content: string) => void,
|
||||
): void {
|
||||
let entries;
|
||||
try {
|
||||
entries = readdirSync(currentDir, { withFileTypes: true });
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
const fullPath = join(currentDir, entry.name);
|
||||
const relPath = relative(baseDir, fullPath);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
this.walkOutputDir(baseDir, fullPath, callback);
|
||||
} else if (entry.isFile()) {
|
||||
try {
|
||||
const content = readFileSync(fullPath, 'utf-8');
|
||||
callback(relPath, content);
|
||||
} catch {
|
||||
// Skip unreadable files
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private replayFromCassette(
|
||||
agentId: string,
|
||||
agentName: string,
|
||||
cwd: string,
|
||||
env: Record<string, string>,
|
||||
providerName: string,
|
||||
cassette: CassetteEntry,
|
||||
onEvent?: (event: StreamEvent) => void,
|
||||
onRawContent?: (content: string) => void,
|
||||
): { pid: number; outputFilePath: string; tailer: FileTailer } {
|
||||
console.log(`[cassette] replaying cassette for agent '${agentName}' (${cassette.recording.jsonlLines.length} lines)`);
|
||||
|
||||
return super.spawnDetached(
|
||||
agentId,
|
||||
agentName,
|
||||
process.execPath, // use the running node binary
|
||||
[this.replayWorkerPath], // replay-worker.mjs
|
||||
cwd,
|
||||
{ ...env, CW_CASSETTE_DATA: JSON.stringify(cassette.recording) },
|
||||
providerName, // use original provider's parser for the tailer
|
||||
undefined, // no prompt — worker handles output directly
|
||||
onEvent,
|
||||
onRawContent,
|
||||
);
|
||||
}
|
||||
}
|
||||
48
apps/server/test/cassette/replay-worker.mjs
Normal file
48
apps/server/test/cassette/replay-worker.mjs
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Cassette Replay Worker
|
||||
*
|
||||
* Spawned as a detached subprocess by CassetteProcessManager instead of the real
|
||||
* agent CLI. Reads the cassette recording from CW_CASSETTE_DATA env var, replays
|
||||
* the JSONL output to stdout (which spawnDetached redirects to the output file),
|
||||
* writes signal.json relative to the process cwd, and exits.
|
||||
*
|
||||
* This is a plain .mjs file (no TypeScript) so it can be spawned with bare `node`
|
||||
* without any build step or tsx dependency.
|
||||
*/
|
||||
|
||||
import { mkdirSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
const data = process.env.CW_CASSETTE_DATA;
|
||||
if (!data) {
|
||||
process.stderr.write('[replay-worker] CW_CASSETTE_DATA env var not set\n');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
let recording;
|
||||
try {
|
||||
recording = JSON.parse(data);
|
||||
} catch (err) {
|
||||
process.stderr.write(`[replay-worker] failed to parse CW_CASSETTE_DATA: ${err.message}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const { jsonlLines = [], signalJson = null, exitCode = 0 } = recording;
|
||||
|
||||
// Write JSONL lines to stdout.
|
||||
// spawnDetached redirects stdout to the output file via open()+fd redirection,
|
||||
// so writing to process.stdout here is equivalent to writing to the output file.
|
||||
for (const line of jsonlLines) {
|
||||
process.stdout.write(line + '\n');
|
||||
}
|
||||
|
||||
// Write signal.json to the expected location relative to cwd.
|
||||
// The agent's cwd is set by spawnDetached to the agent working directory.
|
||||
if (signalJson) {
|
||||
const signalDir = join(process.cwd(), '.cw', 'output');
|
||||
mkdirSync(signalDir, { recursive: true });
|
||||
writeFileSync(join(signalDir, 'signal.json'), JSON.stringify(signalJson, null, 2), 'utf-8');
|
||||
}
|
||||
|
||||
process.exit(exitCode);
|
||||
50
apps/server/test/cassette/store.ts
Normal file
50
apps/server/test/cassette/store.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* CassetteStore
|
||||
*
|
||||
* Reads and writes cassette files from a directory on disk.
|
||||
* Each cassette is stored as a JSON file named after the 32-char key hash.
|
||||
* Cassette files are intended to be committed to git — they are the
|
||||
* "recorded interactions" that allow tests to run without real API calls.
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import type { CassetteKey, CassetteEntry } from './types.js';
|
||||
import { buildCassetteKey } from './key.js';
|
||||
|
||||
export class CassetteStore {
|
||||
constructor(private readonly cassetteDir: string) {}
|
||||
|
||||
private pathFor(keyHash: string): string {
|
||||
return join(this.cassetteDir, `${keyHash}.json`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up a cassette by its key.
|
||||
* Returns null if not found or if the file is corrupt.
|
||||
*/
|
||||
find(key: CassetteKey): CassetteEntry | null {
|
||||
const hash = buildCassetteKey(key);
|
||||
const path = this.pathFor(hash);
|
||||
|
||||
if (!existsSync(path)) return null;
|
||||
|
||||
try {
|
||||
return JSON.parse(readFileSync(path, 'utf-8')) as CassetteEntry;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a cassette to disk. Creates the cassette directory if needed.
|
||||
* Prints the cassette filename so it's visible during recording runs.
|
||||
*/
|
||||
save(key: CassetteKey, entry: CassetteEntry): void {
|
||||
mkdirSync(this.cassetteDir, { recursive: true });
|
||||
const hash = buildCassetteKey(key);
|
||||
const path = this.pathFor(hash);
|
||||
writeFileSync(path, JSON.stringify(entry, null, 2), 'utf-8');
|
||||
console.log(`[cassette] recorded → ${hash}.json (${entry.recording.jsonlLines.length} lines)`);
|
||||
}
|
||||
}
|
||||
42
apps/server/test/cassette/types.ts
Normal file
42
apps/server/test/cassette/types.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Cassette Types
|
||||
*
|
||||
* VCR-style cassette format for recording and replaying agent subprocess I/O.
|
||||
* A cassette captures everything an agent process writes so tests can replay
|
||||
* it deterministically without hitting real AI APIs.
|
||||
*/
|
||||
|
||||
export interface CassetteKey {
|
||||
/** Prompt with dynamic content (UUIDs, paths, timestamps) replaced with placeholders. */
|
||||
normalizedPrompt: string;
|
||||
/** Provider name, e.g. 'claude', 'codex'. */
|
||||
providerName: string;
|
||||
/** Stable CLI args with the prompt value stripped. */
|
||||
modelArgs: string[];
|
||||
/** SHA256 prefix of all non-hidden files in the agent worktree at spawn time. */
|
||||
worktreeHash: string;
|
||||
}
|
||||
|
||||
export interface CassetteRecording {
|
||||
/** All JSONL lines the agent wrote to stdout (captured from output file). */
|
||||
jsonlLines: string[];
|
||||
/** Content of signal.json written by the agent, or null if missing. */
|
||||
signalJson: Record<string, unknown> | null;
|
||||
/** Process exit code (0 = success). */
|
||||
exitCode: number;
|
||||
/** ISO timestamp when this cassette was recorded. */
|
||||
recordedAt: string;
|
||||
/**
|
||||
* All files the agent wrote to .cw/output/ (relative path → UTF-8 content),
|
||||
* excluding signal.json (which is captured separately in signalJson).
|
||||
* Restored during replay before onComplete fires so downstream readers
|
||||
* (e.g. readPhaseFiles, readTaskFiles) see the expected directory contents.
|
||||
*/
|
||||
outputFiles?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface CassetteEntry {
|
||||
version: 1;
|
||||
key: CassetteKey;
|
||||
recording: CassetteRecording;
|
||||
}
|
||||
0
apps/server/test/cassettes/.gitkeep
Normal file
0
apps/server/test/cassettes/.gitkeep
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
285
apps/server/test/e2e/architect-workflow.test.ts
Normal file
285
apps/server/test/e2e/architect-workflow.test.ts
Normal file
@@ -0,0 +1,285 @@
|
||||
/**
|
||||
* E2E Tests for Architect Workflow
|
||||
*
|
||||
* Tests the complete architect workflow from discussion through phase creation:
|
||||
* - Discuss mode: Gather context, answer questions, capture decisions
|
||||
* - Plan mode: Break initiative into phases
|
||||
* - Full workflow: Discuss -> Plan -> Phase persistence
|
||||
*
|
||||
* Uses TestHarness from src/test/ for full system wiring.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import { createTestHarness, type TestHarness } from '../index.js';
|
||||
import type { AgentStoppedEvent } from '../../events/types.js';
|
||||
|
||||
describe('Architect Workflow E2E', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
describe('discuss mode', () => {
|
||||
it('should spawn architect in discuss mode and complete with decisions', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
// Create initiative
|
||||
const initiative = await harness.createInitiative('Auth System');
|
||||
|
||||
// Set up discuss completion scenario
|
||||
harness.setArchitectDiscussComplete('auth-discuss', [
|
||||
{ topic: 'Auth Method', decision: 'JWT', reason: 'Stateless, scalable' },
|
||||
{ topic: 'Token Storage', decision: 'httpOnly cookie', reason: 'XSS protection' },
|
||||
], 'Auth approach decided');
|
||||
|
||||
// Spawn architect in discuss mode
|
||||
const agent = await harness.caller.spawnArchitectDiscuss({
|
||||
name: 'auth-discuss',
|
||||
initiativeId: initiative.id,
|
||||
});
|
||||
|
||||
expect(agent.mode).toBe('discuss');
|
||||
|
||||
// Wait for completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify agent stopped with context_complete
|
||||
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].payload.reason).toBe('context_complete');
|
||||
});
|
||||
|
||||
it('should pause on questions and resume with answers', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Auth System');
|
||||
|
||||
// First, agent asks questions
|
||||
harness.setArchitectDiscussQuestions('auth-discuss', [
|
||||
{ id: 'q1', question: 'JWT or Session?', options: [{ label: 'JWT' }, { label: 'Session' }] },
|
||||
{ id: 'q2', question: 'OAuth providers?' },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectDiscuss({
|
||||
name: 'auth-discuss',
|
||||
initiativeId: initiative.id,
|
||||
});
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Agent should be waiting
|
||||
const waitingAgent = await harness.caller.getAgent({ name: 'auth-discuss' });
|
||||
expect(waitingAgent?.status).toBe('waiting_for_input');
|
||||
|
||||
// Get pending questions
|
||||
const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
|
||||
expect(pending?.questions).toHaveLength(2);
|
||||
|
||||
// Now set up completion scenario for after resume
|
||||
harness.setArchitectDiscussComplete('auth-discuss', [
|
||||
{ topic: 'Auth', decision: 'JWT', reason: 'User chose' },
|
||||
], 'Complete');
|
||||
|
||||
// Resume with answers
|
||||
await harness.caller.resumeAgent({
|
||||
name: 'auth-discuss',
|
||||
answers: { q1: 'JWT', q2: 'Google, GitHub' },
|
||||
});
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Should complete
|
||||
const finalAgent = await harness.caller.getAgent({ name: 'auth-discuss' });
|
||||
expect(finalAgent?.status).toBe('idle');
|
||||
});
|
||||
});
|
||||
|
||||
describe('plan mode', () => {
|
||||
it('should spawn architect in plan mode and create phases', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Auth System');
|
||||
|
||||
// Set up plan completion
|
||||
harness.setArchitectPlanComplete('auth-plan', [
|
||||
{ number: 1, name: 'Database Setup', description: 'User table and auth schema', dependencies: [] },
|
||||
{ number: 2, name: 'JWT Implementation', description: 'Token generation and validation', dependencies: [1] },
|
||||
{ number: 3, name: 'Protected Routes', description: 'Middleware and route guards', dependencies: [2] },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectPlan({
|
||||
name: 'auth-plan',
|
||||
initiativeId: initiative.id,
|
||||
});
|
||||
|
||||
expect(agent.mode).toBe('plan');
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify stopped with plan_complete
|
||||
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].payload.reason).toBe('plan_complete');
|
||||
});
|
||||
|
||||
it('should persist phases from plan output', async () => {
|
||||
const initiative = await harness.createInitiative('Auth System');
|
||||
|
||||
const phasesData = [
|
||||
{ name: 'Foundation' },
|
||||
{ name: 'Features' },
|
||||
];
|
||||
|
||||
// Persist phases (simulating what would happen after plan)
|
||||
const created = await harness.createPhasesFromPlan(initiative.id, phasesData);
|
||||
|
||||
expect(created).toHaveLength(2);
|
||||
|
||||
// Verify retrieval
|
||||
const phases = await harness.getPhases(initiative.id);
|
||||
expect(phases).toHaveLength(2);
|
||||
expect(phases[0].name).toBe('Foundation');
|
||||
expect(phases[1].name).toBe('Features');
|
||||
});
|
||||
});
|
||||
|
||||
describe('plan conflict detection', () => {
|
||||
it('should reject if a plan agent is already running', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Auth System');
|
||||
|
||||
// Set up a long-running plan agent (never completes during this test)
|
||||
harness.setArchitectPlanComplete('first-plan', [
|
||||
{ number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
|
||||
]);
|
||||
// Use a delay so it stays running
|
||||
harness.setAgentScenario('first-plan', { status: 'done', delay: 999999 });
|
||||
|
||||
await harness.caller.spawnArchitectPlan({
|
||||
name: 'first-plan',
|
||||
initiativeId: initiative.id,
|
||||
});
|
||||
|
||||
// Agent should be running
|
||||
const agents = await harness.caller.listAgents();
|
||||
expect(agents.find(a => a.name === 'first-plan')?.status).toBe('running');
|
||||
|
||||
// Second plan should be rejected
|
||||
await expect(
|
||||
harness.caller.spawnArchitectPlan({
|
||||
name: 'second-plan',
|
||||
initiativeId: initiative.id,
|
||||
}),
|
||||
).rejects.toThrow(/already running/);
|
||||
});
|
||||
|
||||
it('should auto-dismiss stale plan agents before checking', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Auth System');
|
||||
|
||||
// Set up a plan agent that crashes immediately
|
||||
harness.setAgentScenario('stale-plan', { status: 'error', error: 'crashed' });
|
||||
|
||||
await harness.caller.spawnArchitectPlan({
|
||||
name: 'stale-plan',
|
||||
initiativeId: initiative.id,
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Should be crashed
|
||||
const agents = await harness.caller.listAgents();
|
||||
expect(agents.find(a => a.name === 'stale-plan')?.status).toBe('crashed');
|
||||
|
||||
// New plan should succeed (stale one gets auto-dismissed)
|
||||
harness.setArchitectPlanComplete('new-plan', [
|
||||
{ number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectPlan({
|
||||
name: 'new-plan',
|
||||
initiativeId: initiative.id,
|
||||
});
|
||||
expect(agent.mode).toBe('plan');
|
||||
});
|
||||
|
||||
it('should allow plan for different initiatives', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const init1 = await harness.createInitiative('Initiative 1');
|
||||
const init2 = await harness.createInitiative('Initiative 2');
|
||||
|
||||
// Long-running agent on initiative 1
|
||||
harness.setAgentScenario('plan-1', { status: 'done', delay: 999999 });
|
||||
await harness.caller.spawnArchitectPlan({
|
||||
name: 'plan-1',
|
||||
initiativeId: init1.id,
|
||||
});
|
||||
|
||||
// Plan on initiative 2 should succeed
|
||||
harness.setArchitectPlanComplete('plan-2', [
|
||||
{ number: 1, name: 'Phase 1', description: 'First', dependencies: [] },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectPlan({
|
||||
name: 'plan-2',
|
||||
initiativeId: init2.id,
|
||||
});
|
||||
expect(agent.mode).toBe('plan');
|
||||
});
|
||||
});
|
||||
|
||||
describe('full workflow', () => {
|
||||
it('should complete discuss -> plan -> phases workflow', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
// 1. Create initiative
|
||||
const initiative = await harness.createInitiative('Full Workflow Test');
|
||||
|
||||
// 2. Discuss phase
|
||||
harness.setArchitectDiscussComplete('discuss-agent', [
|
||||
{ topic: 'Scope', decision: 'MVP only', reason: 'Time constraint' },
|
||||
], 'Scope defined');
|
||||
|
||||
await harness.caller.spawnArchitectDiscuss({
|
||||
name: 'discuss-agent',
|
||||
initiativeId: initiative.id,
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// 3. Plan phase
|
||||
harness.setArchitectPlanComplete('plan-agent', [
|
||||
{ number: 1, name: 'Core', description: 'Core functionality', dependencies: [] },
|
||||
{ number: 2, name: 'Polish', description: 'UI and UX', dependencies: [1] },
|
||||
]);
|
||||
|
||||
await harness.caller.spawnArchitectPlan({
|
||||
name: 'plan-agent',
|
||||
initiativeId: initiative.id,
|
||||
contextSummary: 'MVP scope defined',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// 4. Persist phases
|
||||
await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Core' },
|
||||
{ name: 'Polish' },
|
||||
]);
|
||||
|
||||
// 5. Verify final state
|
||||
const phases = await harness.getPhases(initiative.id);
|
||||
expect(phases).toHaveLength(2);
|
||||
|
||||
// Both agents should be idle
|
||||
const agents = await harness.caller.listAgents();
|
||||
expect(agents.filter(a => a.status === 'idle')).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
});
|
||||
385
apps/server/test/e2e/decompose-workflow.test.ts
Normal file
385
apps/server/test/e2e/decompose-workflow.test.ts
Normal file
@@ -0,0 +1,385 @@
|
||||
/**
|
||||
* E2E Tests for Detail Workflow
|
||||
*
|
||||
* Tests the complete detail workflow from phase through task creation:
|
||||
* - Detail mode: Break phase into executable tasks
|
||||
* - Q&A flow: Handle clarifying questions during detailing
|
||||
* - Task persistence: Save child tasks from detail output
|
||||
*
|
||||
* Uses TestHarness from src/test/ for full system wiring.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import { createTestHarness, type TestHarness } from '../index.js';
|
||||
import type { AgentStoppedEvent, AgentWaitingEvent } from '../../events/types.js';
|
||||
|
||||
describe('Detail Workflow E2E', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
describe('spawn detail agent', () => {
|
||||
it('should spawn agent in detail mode and complete with tasks', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
// Setup: Create initiative -> phase -> plan
|
||||
const initiative = await harness.createInitiative('Test Project');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan', 'Implement authentication');
|
||||
|
||||
// Set detail scenario
|
||||
harness.setArchitectDetailComplete('detailer', [
|
||||
{ number: 1, name: 'Create schema', content: 'User table', type: 'auto', dependencies: [] },
|
||||
{ number: 2, name: 'Create endpoint', content: 'Login API', type: 'auto', dependencies: [1] },
|
||||
]);
|
||||
|
||||
// Spawn detail agent
|
||||
const agent = await harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
|
||||
expect(agent.mode).toBe('detail');
|
||||
|
||||
// Advance timers for async completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify agent completed
|
||||
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].payload.name).toBe('detailer');
|
||||
expect(events[0].payload.reason).toBe('detail_complete');
|
||||
});
|
||||
|
||||
it('should pause on questions and resume', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Test Project');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
const detailTask = await harness.createDetailTask(phases[0].id, 'Complex Plan');
|
||||
|
||||
// Set questions scenario
|
||||
harness.setArchitectDetailQuestions('detailer', [
|
||||
{ id: 'q1', question: 'How granular should tasks be?' },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify agent is waiting for input
|
||||
const waitingAgent = await harness.caller.getAgent({ name: 'detailer' });
|
||||
expect(waitingAgent?.status).toBe('waiting_for_input');
|
||||
|
||||
// Verify paused on questions (emits agent:waiting, not agent:stopped)
|
||||
const waitingEvents = harness.getEmittedEvents('agent:waiting') as AgentWaitingEvent[];
|
||||
expect(waitingEvents).toHaveLength(1);
|
||||
expect(waitingEvents[0].payload.questions).toHaveLength(1);
|
||||
|
||||
// Get pending questions
|
||||
const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
|
||||
expect(pending?.questions).toHaveLength(1);
|
||||
expect(pending?.questions[0].question).toBe('How granular should tasks be?');
|
||||
|
||||
// Set completion scenario for resume
|
||||
harness.setArchitectDetailComplete('detailer', [
|
||||
{ number: 1, name: 'Task 1', content: 'Single task', type: 'auto', dependencies: [] },
|
||||
]);
|
||||
|
||||
// Resume with answer
|
||||
await harness.caller.resumeAgent({
|
||||
name: 'detailer',
|
||||
answers: { q1: 'Very granular' },
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify completed after resume
|
||||
const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
|
||||
expect(finalAgent?.status).toBe('idle');
|
||||
});
|
||||
|
||||
it('should handle multiple questions', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Multi-Q Project');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
const detailTask = await harness.createDetailTask(phases[0].id, 'Complex Plan');
|
||||
|
||||
// Set multiple questions scenario
|
||||
harness.setArchitectDetailQuestions('detailer', [
|
||||
{ id: 'q1', question: 'What task granularity?', options: [{ label: 'Fine' }, { label: 'Coarse' }] },
|
||||
{ id: 'q2', question: 'Include checkpoints?' },
|
||||
{ id: 'q3', question: 'Any blocking dependencies?' },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify all questions received
|
||||
const pending = await harness.mockAgentManager.getPendingQuestions(agent.id);
|
||||
expect(pending?.questions).toHaveLength(3);
|
||||
|
||||
// Set completion scenario for resume
|
||||
harness.setArchitectDetailComplete('detailer', [
|
||||
{ number: 1, name: 'Task 1', content: 'First task', type: 'auto', dependencies: [] },
|
||||
{ number: 2, name: 'Task 2', content: 'Second task', type: 'auto', dependencies: [1] },
|
||||
{ number: 3, name: 'Verify', content: 'Verify all', type: 'checkpoint:human-verify', dependencies: [2] },
|
||||
]);
|
||||
|
||||
// Resume with all answers
|
||||
await harness.caller.resumeAgent({
|
||||
name: 'detailer',
|
||||
answers: {
|
||||
q1: 'Fine',
|
||||
q2: 'Yes, add human verification',
|
||||
q3: 'Tasks 1 and 2 are sequential',
|
||||
},
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify completed
|
||||
const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
|
||||
expect(finalAgent?.status).toBe('idle');
|
||||
});
|
||||
});
|
||||
|
||||
describe('detail conflict detection', () => {
|
||||
it('should reject if a detail agent is already running for the same phase', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Test Project');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
|
||||
// Long-running detail agent
|
||||
harness.setAgentScenario('detailer-1', { status: 'done', delay: 999999 });
|
||||
|
||||
await harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer-1',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
|
||||
// Second detail for same phase should be rejected
|
||||
await expect(
|
||||
harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer-2',
|
||||
phaseId: phases[0].id,
|
||||
}),
|
||||
).rejects.toThrow(/already running/);
|
||||
});
|
||||
|
||||
it('should auto-dismiss stale detail agents before checking', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Test Project');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
|
||||
// Detail agent that crashes immediately
|
||||
harness.setAgentScenario('stale-detailer', { status: 'error', error: 'crashed' });
|
||||
|
||||
await harness.caller.spawnArchitectDetail({
|
||||
name: 'stale-detailer',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// New detail should succeed
|
||||
harness.setArchitectDetailComplete('new-detailer', [
|
||||
{ number: 1, name: 'Task 1', content: 'Do it', type: 'auto', dependencies: [] },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectDetail({
|
||||
name: 'new-detailer',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
expect(agent.mode).toBe('detail');
|
||||
});
|
||||
|
||||
it('should allow detail for different phases simultaneously', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
const initiative = await harness.createInitiative('Test Project');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
{ name: 'Phase 2' },
|
||||
]);
|
||||
|
||||
// Long-running agent on phase 1
|
||||
harness.setAgentScenario('detailer-p1', { status: 'done', delay: 999999 });
|
||||
await harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer-p1',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
|
||||
// Detail on phase 2 should succeed
|
||||
harness.setArchitectDetailComplete('detailer-p2', [
|
||||
{ number: 1, name: 'Task 1', content: 'Do it', type: 'auto', dependencies: [] },
|
||||
]);
|
||||
|
||||
const agent = await harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer-p2',
|
||||
phaseId: phases[1].id,
|
||||
});
|
||||
expect(agent.mode).toBe('detail');
|
||||
});
|
||||
});
|
||||
|
||||
describe('task persistence', () => {
|
||||
it('should create tasks from detail output', async () => {
|
||||
const initiative = await harness.createInitiative('Test Project');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan');
|
||||
|
||||
// Create tasks from detail output
|
||||
await harness.caller.createChildTasks({
|
||||
parentTaskId: detailTask.id,
|
||||
tasks: [
|
||||
{ number: 1, name: 'Schema', description: 'Create tables', type: 'auto', dependencies: [] },
|
||||
{ number: 2, name: 'API', description: 'Create endpoints', type: 'auto', dependencies: [1] },
|
||||
{ number: 3, name: 'Verify', description: 'Test flow', type: 'checkpoint:human-verify', dependencies: [2] },
|
||||
],
|
||||
});
|
||||
|
||||
// Verify tasks created
|
||||
const tasks = await harness.getChildTasks(detailTask.id);
|
||||
expect(tasks).toHaveLength(3);
|
||||
expect(tasks[0].name).toBe('Schema');
|
||||
expect(tasks[1].name).toBe('API');
|
||||
expect(tasks[2].name).toBe('Verify');
|
||||
expect(tasks[2].type).toBe('checkpoint:human-verify');
|
||||
});
|
||||
|
||||
it('should handle all task types', async () => {
|
||||
const initiative = await harness.createInitiative('Task Types Test');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
const detailTask = await harness.createDetailTask(phases[0].id, 'Mixed Tasks');
|
||||
|
||||
// Create tasks with all types
|
||||
await harness.caller.createChildTasks({
|
||||
parentTaskId: detailTask.id,
|
||||
tasks: [
|
||||
{ number: 1, name: 'Auto Task', description: 'Automated work', type: 'auto' },
|
||||
{ number: 2, name: 'Human Verify', description: 'Visual check', type: 'checkpoint:human-verify', dependencies: [1] },
|
||||
{ number: 3, name: 'Decision', description: 'Choose approach', type: 'checkpoint:decision', dependencies: [2] },
|
||||
{ number: 4, name: 'Human Action', description: 'Manual step', type: 'checkpoint:human-action', dependencies: [3] },
|
||||
],
|
||||
});
|
||||
|
||||
const tasks = await harness.getChildTasks(detailTask.id);
|
||||
expect(tasks).toHaveLength(4);
|
||||
expect(tasks[0].type).toBe('auto');
|
||||
expect(tasks[1].type).toBe('checkpoint:human-verify');
|
||||
expect(tasks[2].type).toBe('checkpoint:decision');
|
||||
expect(tasks[3].type).toBe('checkpoint:human-action');
|
||||
});
|
||||
|
||||
it('should create task dependencies', async () => {
|
||||
const initiative = await harness.createInitiative('Dependencies Test');
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Phase 1' },
|
||||
]);
|
||||
const detailTask = await harness.createDetailTask(phases[0].id, 'Dependent Tasks');
|
||||
|
||||
// Create tasks with complex dependencies
|
||||
await harness.caller.createChildTasks({
|
||||
parentTaskId: detailTask.id,
|
||||
tasks: [
|
||||
{ number: 1, name: 'Task A', description: 'No deps', type: 'auto' },
|
||||
{ number: 2, name: 'Task B', description: 'Depends on A', type: 'auto', dependencies: [1] },
|
||||
{ number: 3, name: 'Task C', description: 'Depends on A', type: 'auto', dependencies: [1] },
|
||||
{ number: 4, name: 'Task D', description: 'Depends on B and C', type: 'auto', dependencies: [2, 3] },
|
||||
],
|
||||
});
|
||||
|
||||
const tasks = await harness.getChildTasks(detailTask.id);
|
||||
expect(tasks).toHaveLength(4);
|
||||
|
||||
// All tasks should be created with correct names
|
||||
expect(tasks.map(t => t.name)).toEqual(['Task A', 'Task B', 'Task C', 'Task D']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('full detail workflow', () => {
|
||||
it('should complete initiative -> phase -> plan -> detail -> tasks workflow', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
// 1. Create initiative
|
||||
const initiative = await harness.createInitiative('Full Workflow Test');
|
||||
|
||||
// 2. Create phase
|
||||
const phases = await harness.createPhasesFromPlan(initiative.id, [
|
||||
{ name: 'Auth Phase' },
|
||||
]);
|
||||
|
||||
// 3. Create plan
|
||||
const detailTask = await harness.createDetailTask(phases[0].id, 'Auth Plan', 'Implement JWT auth');
|
||||
|
||||
// 4. Spawn detail agent
|
||||
harness.setArchitectDetailComplete('detailer', [
|
||||
{ number: 1, name: 'Create user schema', content: 'Define User model', type: 'auto', dependencies: [] },
|
||||
{ number: 2, name: 'Implement JWT', content: 'Token generation', type: 'auto', dependencies: [1] },
|
||||
{ number: 3, name: 'Protected routes', content: 'Middleware', type: 'auto', dependencies: [2] },
|
||||
{ number: 4, name: 'Verify auth', content: 'Test login flow', type: 'checkpoint:human-verify', dependencies: [3] },
|
||||
]);
|
||||
|
||||
await harness.caller.spawnArchitectDetail({
|
||||
name: 'detailer',
|
||||
phaseId: phases[0].id,
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// 5. Verify agent completed
|
||||
const events = harness.getEmittedEvents('agent:stopped') as AgentStoppedEvent[];
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].payload.reason).toBe('detail_complete');
|
||||
|
||||
// 6. Persist tasks (simulating what orchestrator would do after detail)
|
||||
await harness.caller.createChildTasks({
|
||||
parentTaskId: detailTask.id,
|
||||
tasks: [
|
||||
{ number: 1, name: 'Create user schema', description: 'Define User model', type: 'auto', dependencies: [] },
|
||||
{ number: 2, name: 'Implement JWT', description: 'Token generation', type: 'auto', dependencies: [1] },
|
||||
{ number: 3, name: 'Protected routes', description: 'Middleware', type: 'auto', dependencies: [2] },
|
||||
{ number: 4, name: 'Verify auth', description: 'Test login flow', type: 'checkpoint:human-verify', dependencies: [3] },
|
||||
],
|
||||
});
|
||||
|
||||
// 7. Verify final state
|
||||
const tasks = await harness.getChildTasks(detailTask.id);
|
||||
expect(tasks).toHaveLength(4);
|
||||
expect(tasks[0].name).toBe('Create user schema');
|
||||
expect(tasks[3].type).toBe('checkpoint:human-verify');
|
||||
|
||||
// Agent should be idle
|
||||
const finalAgent = await harness.caller.getAgent({ name: 'detailer' });
|
||||
expect(finalAgent?.status).toBe('idle');
|
||||
});
|
||||
});
|
||||
});
|
||||
426
apps/server/test/e2e/edge-cases.test.ts
Normal file
426
apps/server/test/e2e/edge-cases.test.ts
Normal file
@@ -0,0 +1,426 @@
|
||||
/**
|
||||
* E2E Tests for Edge Cases
|
||||
*
|
||||
* Tests edge case scenarios in dispatch/coordination flow:
|
||||
* - Agent crashes during task
|
||||
* - Agent waiting for input
|
||||
* - Task blocking
|
||||
* - Merge conflicts
|
||||
*
|
||||
* Uses TestHarness from src/test/ for full system wiring.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import {
|
||||
createTestHarness,
|
||||
SIMPLE_FIXTURE,
|
||||
type TestHarness,
|
||||
} from '../index.js';
|
||||
import type {
|
||||
AgentSpawnedEvent,
|
||||
AgentCrashedEvent,
|
||||
AgentWaitingEvent,
|
||||
TaskBlockedEvent,
|
||||
MergeConflictedEvent,
|
||||
} from '../../events/types.js';
|
||||
|
||||
describe('E2E Edge Cases', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
describe('Agent crash during task', () => {
|
||||
it('emits agent:spawned then agent:crashed events', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent for DispatchManager
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set error scenario BEFORE dispatch
|
||||
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
|
||||
status: 'error',
|
||||
error: 'Token limit exceeded',
|
||||
});
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent:spawned event emitted
|
||||
const spawnedEvents = harness.getEventsByType('agent:spawned');
|
||||
expect(spawnedEvents.length).toBe(1);
|
||||
const spawnedPayload = (spawnedEvents[0] as AgentSpawnedEvent).payload;
|
||||
expect(spawnedPayload.taskId).toBe(taskAId);
|
||||
|
||||
// Verify: agent:crashed event emitted
|
||||
const crashedEvents = harness.getEventsByType('agent:crashed');
|
||||
expect(crashedEvents.length).toBe(1);
|
||||
const crashedPayload = (crashedEvents[0] as AgentCrashedEvent).payload;
|
||||
expect(crashedPayload.taskId).toBe(taskAId);
|
||||
expect(crashedPayload.error).toBe('Token limit exceeded');
|
||||
});
|
||||
|
||||
it('task status should NOT be completed after crash', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set error scenario
|
||||
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
|
||||
status: 'error',
|
||||
error: 'Token limit exceeded',
|
||||
});
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Task status should be 'in_progress' (not 'completed')
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('in_progress');
|
||||
});
|
||||
|
||||
it('captures error message in agent result', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set error scenario
|
||||
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
|
||||
status: 'error',
|
||||
error: 'Out of memory',
|
||||
});
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Get agent result - should have error
|
||||
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
|
||||
expect(agentResult).not.toBeNull();
|
||||
expect(agentResult?.success).toBe(false);
|
||||
expect(agentResult?.message).toBe('Out of memory');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Agent waiting for input and resume', () => {
|
||||
it('emits agent:waiting event with question', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set questions scenario
|
||||
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
|
||||
status: 'questions',
|
||||
questions: [{ id: 'q1', question: 'Which database should I use?' }],
|
||||
});
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent:waiting event emitted
|
||||
const waitingEvents = harness.getEventsByType('agent:waiting');
|
||||
expect(waitingEvents.length).toBe(1);
|
||||
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
|
||||
expect(waitingPayload.taskId).toBe(taskAId);
|
||||
expect(waitingPayload.questions[0].question).toBe('Which database should I use?');
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
describe('Task blocking', () => {
|
||||
it('blocked task appears in blocked list from getQueueState', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
|
||||
|
||||
const queueState = await harness.dispatchManager.getQueueState();
|
||||
|
||||
expect(queueState.blocked.length).toBe(1);
|
||||
expect(queueState.blocked[0].taskId).toBe(taskAId);
|
||||
expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
|
||||
});
|
||||
|
||||
it('blocked task emits task:blocked event', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
|
||||
|
||||
const blockedEvents = harness.getEventsByType('task:blocked');
|
||||
expect(blockedEvents.length).toBe(1);
|
||||
const blockedPayload = (blockedEvents[0] as TaskBlockedEvent).payload;
|
||||
expect(blockedPayload.taskId).toBe(taskAId);
|
||||
expect(blockedPayload.reason).toBe('Waiting for user decision');
|
||||
});
|
||||
|
||||
it('getNextDispatchable does not return blocked task', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
const taskBId = seeded.tasks.get('Task B')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Queue Task A and block it
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
|
||||
|
||||
// Queue Task B (not blocked, but depends on Task A which needs to be completed first)
|
||||
// Actually Task B depends on Task A in SIMPLE_FIXTURE, but the dependency
|
||||
// isn't loaded into the queue. Queue a fresh task instead.
|
||||
// For this test, we just verify blocked task is not returned.
|
||||
|
||||
// Get next dispatchable - should be null since Task A is blocked
|
||||
const next = await harness.dispatchManager.getNextDispatchable();
|
||||
expect(next).toBeNull();
|
||||
});
|
||||
|
||||
it('task status is set to blocked in database', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
|
||||
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('blocked');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Merge conflict handling', () => {
|
||||
it('detects merge conflict and emits merge:conflicted event', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Mark task as completed (required for merge)
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
|
||||
// Create a worktree for this task
|
||||
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
|
||||
|
||||
// Create agent in agentRepository with worktreeId
|
||||
// (coordinationManager.queueMerge looks up agent by taskId)
|
||||
const agent = await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Set up merge conflict result BEFORE processMerges
|
||||
harness.worktreeManager.setMergeResult(worktreeId, {
|
||||
success: false,
|
||||
conflicts: ['src/shared.ts', 'src/types.ts'],
|
||||
message: 'Merge conflict in 2 files',
|
||||
});
|
||||
|
||||
// Queue for merge
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
// Process merges - should hit conflict
|
||||
const results = await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Verify: merge result indicates failure
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0].success).toBe(false);
|
||||
expect(results[0].conflicts).toEqual(['src/shared.ts', 'src/types.ts']);
|
||||
|
||||
// Verify: merge:conflicted event emitted
|
||||
const conflictEvents = harness.getEventsByType('merge:conflicted');
|
||||
expect(conflictEvents.length).toBe(1);
|
||||
const conflictPayload = (conflictEvents[0] as MergeConflictedEvent).payload;
|
||||
expect(conflictPayload.taskId).toBe(taskAId);
|
||||
expect(conflictPayload.conflictingFiles).toEqual(['src/shared.ts', 'src/types.ts']);
|
||||
});
|
||||
|
||||
it('conflict appears in queue state as conflicted', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Mark task as completed
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
|
||||
// Create worktree
|
||||
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
|
||||
|
||||
// Create agent in agentRepository
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Set up merge conflict
|
||||
harness.worktreeManager.setMergeResult(worktreeId, {
|
||||
success: false,
|
||||
conflicts: ['src/shared.ts'],
|
||||
message: 'Merge conflict',
|
||||
});
|
||||
|
||||
// Queue and process
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Check queue state
|
||||
const queueState = await harness.coordinationManager.getQueueState();
|
||||
expect(queueState.conflicted.length).toBe(1);
|
||||
expect(queueState.conflicted[0].taskId).toBe(taskAId);
|
||||
expect(queueState.conflicted[0].conflicts).toContain('src/shared.ts');
|
||||
});
|
||||
|
||||
it('handleConflict creates conflict-resolution task', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Mark task as completed
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
|
||||
// Create worktree
|
||||
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
|
||||
|
||||
// Create agent in agentRepository
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Set up merge conflict
|
||||
harness.worktreeManager.setMergeResult(worktreeId, {
|
||||
success: false,
|
||||
conflicts: ['src/shared.ts', 'src/types.ts'],
|
||||
message: 'Merge conflict',
|
||||
});
|
||||
|
||||
// Queue and process (handleConflict is called automatically)
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Verify: original task is now blocked
|
||||
const originalTask = await harness.taskRepository.findById(taskAId);
|
||||
expect(originalTask?.status).toBe('blocked');
|
||||
|
||||
// Verify: task:queued event emitted for conflict resolution task
|
||||
const queuedEvents = harness.getEventsByType('task:queued');
|
||||
const conflictTaskEvent = queuedEvents.find(
|
||||
(e) => e.payload && (e.payload as { taskId: string }).taskId !== taskAId
|
||||
);
|
||||
expect(conflictTaskEvent).toBeDefined();
|
||||
});
|
||||
|
||||
it('successful merge after clearing conflict result', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
const taskBId = seeded.tasks.get('Task B')!;
|
||||
|
||||
// Set up Task A for merge (with conflict)
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
const worktreeIdA = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.worktreeManager.create(worktreeIdA, 'feature-task-a');
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId: worktreeIdA,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Set conflict for Task A
|
||||
harness.worktreeManager.setMergeResult(worktreeIdA, {
|
||||
success: false,
|
||||
conflicts: ['src/shared.ts'],
|
||||
message: 'Merge conflict',
|
||||
});
|
||||
|
||||
// Process Task A merge (will conflict)
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
const conflictResults = await harness.coordinationManager.processMerges('main');
|
||||
expect(conflictResults[0].success).toBe(false);
|
||||
|
||||
// Now set up Task B for merge (should succeed)
|
||||
await harness.taskRepository.update(taskBId, { status: 'completed' });
|
||||
const worktreeIdB = `wt-${taskBId.slice(0, 6)}`;
|
||||
await harness.worktreeManager.create(worktreeIdB, 'feature-task-b');
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskBId.slice(0, 6)}`,
|
||||
worktreeId: worktreeIdB,
|
||||
taskId: taskBId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Task B merge should succeed (default behavior)
|
||||
await harness.coordinationManager.queueMerge(taskBId);
|
||||
harness.clearEvents();
|
||||
const successResults = await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Verify Task B merged successfully
|
||||
expect(successResults.length).toBe(1);
|
||||
expect(successResults[0].taskId).toBe(taskBId);
|
||||
expect(successResults[0].success).toBe(true);
|
||||
|
||||
// Verify Task B in merged list
|
||||
const queueState = await harness.coordinationManager.getQueueState();
|
||||
expect(queueState.merged).toContain(taskBId);
|
||||
});
|
||||
});
|
||||
});
|
||||
551
apps/server/test/e2e/extended-scenarios.test.ts
Normal file
551
apps/server/test/e2e/extended-scenarios.test.ts
Normal file
@@ -0,0 +1,551 @@
|
||||
/**
|
||||
* E2E Tests for Extended Scenarios
|
||||
*
|
||||
* Tests extended scenarios in dispatch/coordination flow:
|
||||
* - Conflict hand-back round-trip (conflict -> agent resolves -> merge succeeds)
|
||||
* - Multi-agent parallel work and completion
|
||||
*
|
||||
* Uses TestHarness from src/test/ for full system wiring.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import {
|
||||
createTestHarness,
|
||||
SIMPLE_FIXTURE,
|
||||
PARALLEL_FIXTURE,
|
||||
COMPLEX_FIXTURE,
|
||||
type TestHarness,
|
||||
} from '../index.js';
|
||||
import type {
|
||||
MergeConflictedEvent,
|
||||
MergeCompletedEvent,
|
||||
TaskQueuedEvent,
|
||||
AgentStoppedEvent,
|
||||
AgentCrashedEvent,
|
||||
} from '../../events/types.js';
|
||||
|
||||
describe('E2E Extended Scenarios', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Conflict Hand-back Round-trip
|
||||
// ===========================================================================
|
||||
|
||||
describe('Conflict hand-back round-trip', () => {
|
||||
it('conflict triggers resolution task, agent resolves, merge succeeds', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Step 1: Complete Task A
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
|
||||
// Step 2: Create agent in agentRepository with worktreeId
|
||||
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Step 3: Create worktree via MockWorktreeManager
|
||||
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
|
||||
|
||||
// Step 4: Set merge conflict result for first merge attempt
|
||||
harness.worktreeManager.setMergeResult(worktreeId, {
|
||||
success: false,
|
||||
conflicts: ['src/shared.ts', 'src/types.ts'],
|
||||
message: 'Merge conflict in 2 files',
|
||||
});
|
||||
|
||||
// Step 5: Queue and process merge (should fail with conflict)
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
harness.clearEvents();
|
||||
const conflictResults = await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Verify: merge failed with conflict
|
||||
expect(conflictResults.length).toBe(1);
|
||||
expect(conflictResults[0].success).toBe(false);
|
||||
expect(conflictResults[0].conflicts).toEqual(['src/shared.ts', 'src/types.ts']);
|
||||
|
||||
// Verify: merge:conflicted event emitted
|
||||
const conflictedEvents = harness.getEventsByType('merge:conflicted');
|
||||
expect(conflictedEvents.length).toBe(1);
|
||||
const conflictPayload = (conflictedEvents[0] as MergeConflictedEvent).payload;
|
||||
expect(conflictPayload.taskId).toBe(taskAId);
|
||||
expect(conflictPayload.conflictingFiles).toEqual(['src/shared.ts', 'src/types.ts']);
|
||||
|
||||
// Verify: original task marked blocked
|
||||
const originalTask = await harness.taskRepository.findById(taskAId);
|
||||
expect(originalTask?.status).toBe('blocked');
|
||||
|
||||
// Note: CoordinationManager.handleConflict updates task status to blocked
|
||||
// but does not emit task:blocked event (that's emitted by DispatchManager.blockTask)
|
||||
|
||||
// Verify: task:queued event emitted for resolution task
|
||||
const queuedEvents = harness.getEventsByType('task:queued');
|
||||
const resolutionTaskEvent = queuedEvents.find(
|
||||
(e) => (e as TaskQueuedEvent).payload.taskId !== taskAId
|
||||
);
|
||||
expect(resolutionTaskEvent).toBeDefined();
|
||||
|
||||
// Step 6: Clear the merge conflict (setMergeResult to success)
|
||||
harness.worktreeManager.setMergeResult(worktreeId, {
|
||||
success: true,
|
||||
message: 'Merged successfully',
|
||||
});
|
||||
|
||||
// Step 7: Re-queue original task for merge (simulating resolution completed)
|
||||
// In a real system, the resolution task would fix conflicts and re-queue
|
||||
// Here we simulate by clearing conflict and re-queuing
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
harness.clearEvents();
|
||||
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
const successResults = await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Verify: merge succeeded
|
||||
expect(successResults.length).toBe(1);
|
||||
expect(successResults[0].taskId).toBe(taskAId);
|
||||
expect(successResults[0].success).toBe(true);
|
||||
|
||||
// Verify: merge:completed event for original task
|
||||
const completedEvents = harness.getEventsByType('merge:completed');
|
||||
expect(completedEvents.length).toBe(1);
|
||||
const completedPayload = (completedEvents[0] as MergeCompletedEvent).payload;
|
||||
expect(completedPayload.taskId).toBe(taskAId);
|
||||
});
|
||||
|
||||
it('conflict resolution preserves original task context', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Complete Task A
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
|
||||
// Create agent and worktree
|
||||
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
|
||||
|
||||
// Set conflict
|
||||
harness.worktreeManager.setMergeResult(worktreeId, {
|
||||
success: false,
|
||||
conflicts: ['src/conflict-file.ts'],
|
||||
message: 'Merge conflict',
|
||||
});
|
||||
|
||||
// Process merge to trigger conflict handling
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
harness.clearEvents();
|
||||
await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Get the resolution task from task:queued events
|
||||
const queuedEvents = harness.getEventsByType('task:queued');
|
||||
expect(queuedEvents.length).toBeGreaterThan(0);
|
||||
|
||||
// Find resolution task (the one that isn't the original task)
|
||||
const resolutionTaskQueuedEvent = queuedEvents.find(
|
||||
(e) => (e as TaskQueuedEvent).payload.taskId !== taskAId
|
||||
);
|
||||
expect(resolutionTaskQueuedEvent).toBeDefined();
|
||||
|
||||
// Resolution task should exist and link back to original task
|
||||
const resolutionTaskId = (resolutionTaskQueuedEvent as TaskQueuedEvent).payload.taskId;
|
||||
const resolutionTask = await harness.taskRepository.findById(resolutionTaskId);
|
||||
expect(resolutionTask).toBeDefined();
|
||||
|
||||
// Resolution task description should contain conflict file info
|
||||
expect(resolutionTask?.description).toContain('conflict');
|
||||
});
|
||||
|
||||
it('multiple sequential conflicts resolved in order', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
const taskBId = seeded.tasks.get('Task B')!;
|
||||
|
||||
// Complete both tasks
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
await harness.taskRepository.update(taskBId, { status: 'completed' });
|
||||
|
||||
// Set up worktrees and agents for both tasks
|
||||
const worktreeIdA = `wt-${taskAId.slice(0, 6)}`;
|
||||
const worktreeIdB = `wt-${taskBId.slice(0, 6)}`;
|
||||
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId: worktreeIdA,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskBId.slice(0, 6)}`,
|
||||
worktreeId: worktreeIdB,
|
||||
taskId: taskBId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
await harness.worktreeManager.create(worktreeIdA, 'feature-task-a');
|
||||
await harness.worktreeManager.create(worktreeIdB, 'feature-task-b');
|
||||
|
||||
// Set conflicts for both
|
||||
harness.worktreeManager.setMergeResult(worktreeIdA, {
|
||||
success: false,
|
||||
conflicts: ['src/shared-a.ts'],
|
||||
message: 'Conflict A',
|
||||
});
|
||||
harness.worktreeManager.setMergeResult(worktreeIdB, {
|
||||
success: false,
|
||||
conflicts: ['src/shared-b.ts'],
|
||||
message: 'Conflict B',
|
||||
});
|
||||
|
||||
// Queue both for merge
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
await harness.coordinationManager.queueMerge(taskBId);
|
||||
harness.clearEvents();
|
||||
|
||||
// Process merges - both should fail
|
||||
const conflictResults = await harness.coordinationManager.processMerges('main');
|
||||
expect(conflictResults.filter((r) => !r.success).length).toBe(2);
|
||||
|
||||
// Verify both are in conflicted state
|
||||
const queueState = await harness.coordinationManager.getQueueState();
|
||||
expect(queueState.conflicted.length).toBe(2);
|
||||
|
||||
// Resolve Task A's conflict
|
||||
harness.worktreeManager.setMergeResult(worktreeIdA, {
|
||||
success: true,
|
||||
message: 'Merged A',
|
||||
});
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
const resultA = await harness.coordinationManager.processMerges('main');
|
||||
expect(resultA.length).toBe(1);
|
||||
expect(resultA[0].taskId).toBe(taskAId);
|
||||
expect(resultA[0].success).toBe(true);
|
||||
|
||||
// Verify merge:completed for A
|
||||
const completedEventsA = harness.getEventsByType('merge:completed');
|
||||
expect(completedEventsA.length).toBe(1);
|
||||
expect((completedEventsA[0] as MergeCompletedEvent).payload.taskId).toBe(taskAId);
|
||||
|
||||
// Resolve Task B's conflict
|
||||
harness.worktreeManager.setMergeResult(worktreeIdB, {
|
||||
success: true,
|
||||
message: 'Merged B',
|
||||
});
|
||||
await harness.taskRepository.update(taskBId, { status: 'completed' });
|
||||
await harness.coordinationManager.queueMerge(taskBId);
|
||||
harness.clearEvents();
|
||||
|
||||
const resultB = await harness.coordinationManager.processMerges('main');
|
||||
expect(resultB.length).toBe(1);
|
||||
expect(resultB[0].taskId).toBe(taskBId);
|
||||
expect(resultB[0].success).toBe(true);
|
||||
|
||||
// Verify merge:completed for B
|
||||
const completedEventsB = harness.getEventsByType('merge:completed');
|
||||
expect(completedEventsB.length).toBe(1);
|
||||
expect((completedEventsB[0] as MergeCompletedEvent).payload.taskId).toBe(taskBId);
|
||||
|
||||
// Verify final merged list has both
|
||||
const finalState = await harness.coordinationManager.getQueueState();
|
||||
expect(finalState.merged).toContain(taskAId);
|
||||
expect(finalState.merged).toContain(taskBId);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Multi-agent Parallel Work
|
||||
// ===========================================================================
|
||||
|
||||
describe('Multi-agent parallel work', () => {
|
||||
it('multiple agents complete tasks in parallel', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
|
||||
const taskXId = seeded.tasks.get('Task X')!;
|
||||
const taskYId = seeded.tasks.get('Task Y')!;
|
||||
const taskPId = seeded.tasks.get('Task P')!;
|
||||
const taskQId = seeded.tasks.get('Task Q')!;
|
||||
|
||||
// Pre-seed 3 idle agents
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent-1',
|
||||
taskId: 'placeholder-1',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent-2',
|
||||
taskId: 'placeholder-2',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent-3',
|
||||
taskId: 'placeholder-3',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Queue all 4 tasks
|
||||
await harness.dispatchManager.queue(taskXId);
|
||||
await harness.dispatchManager.queue(taskYId);
|
||||
await harness.dispatchManager.queue(taskPId);
|
||||
await harness.dispatchManager.queue(taskQId);
|
||||
harness.clearEvents();
|
||||
|
||||
// Dispatch 3 tasks in parallel (3 agents working)
|
||||
const result1 = await harness.dispatchManager.dispatchNext();
|
||||
const result2 = await harness.dispatchManager.dispatchNext();
|
||||
const result3 = await harness.dispatchManager.dispatchNext();
|
||||
|
||||
expect(result1.success).toBe(true);
|
||||
expect(result2.success).toBe(true);
|
||||
expect(result3.success).toBe(true);
|
||||
|
||||
// All 3 should be dispatched to different agents
|
||||
const dispatchedIds = [result1.agentId, result2.agentId, result3.agentId];
|
||||
expect(new Set(dispatchedIds).size).toBe(3);
|
||||
|
||||
// Advance timers to complete all 3 agents
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: 3 agent:stopped events
|
||||
const stoppedEvents = harness.getEventsByType('agent:stopped');
|
||||
expect(stoppedEvents.length).toBe(3);
|
||||
|
||||
// Complete all 3 tasks
|
||||
await harness.dispatchManager.completeTask(result1.taskId!);
|
||||
await harness.dispatchManager.completeTask(result2.taskId!);
|
||||
await harness.dispatchManager.completeTask(result3.taskId!);
|
||||
|
||||
// Dispatch remaining task (Task Q)
|
||||
const result4 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result4.success).toBe(true);
|
||||
|
||||
await harness.advanceTimers();
|
||||
await harness.dispatchManager.completeTask(result4.taskId!);
|
||||
|
||||
// Verify: all 4 tasks completed in database
|
||||
const tasks = await Promise.all([
|
||||
harness.taskRepository.findById(taskXId),
|
||||
harness.taskRepository.findById(taskYId),
|
||||
harness.taskRepository.findById(taskPId),
|
||||
harness.taskRepository.findById(taskQId),
|
||||
]);
|
||||
expect(tasks.every((t) => t?.status === 'completed')).toBe(true);
|
||||
});
|
||||
|
||||
it('parallel merges process in correct dependency order', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
|
||||
|
||||
const task1AId = seeded.tasks.get('Task 1A')!;
|
||||
const task1BId = seeded.tasks.get('Task 1B')!;
|
||||
const task2AId = seeded.tasks.get('Task 2A')!;
|
||||
const task3AId = seeded.tasks.get('Task 3A')!;
|
||||
const task4AId = seeded.tasks.get('Task 4A')!;
|
||||
|
||||
// Complete Task 1A and Task 1B (no dependencies)
|
||||
await harness.taskRepository.update(task1AId, { status: 'completed' });
|
||||
await harness.taskRepository.update(task1BId, { status: 'completed' });
|
||||
|
||||
// Set up worktrees and agents for both
|
||||
const wt1A = `wt-${task1AId.slice(0, 6)}`;
|
||||
const wt1B = `wt-${task1BId.slice(0, 6)}`;
|
||||
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${task1AId.slice(0, 6)}`,
|
||||
worktreeId: wt1A,
|
||||
taskId: task1AId,
|
||||
status: 'idle',
|
||||
});
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${task1BId.slice(0, 6)}`,
|
||||
worktreeId: wt1B,
|
||||
taskId: task1BId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
await harness.worktreeManager.create(wt1A, 'feature-1a');
|
||||
await harness.worktreeManager.create(wt1B, 'feature-1b');
|
||||
|
||||
// Queue both for merge
|
||||
await harness.coordinationManager.queueMerge(task1AId);
|
||||
await harness.coordinationManager.queueMerge(task1BId);
|
||||
harness.clearEvents();
|
||||
|
||||
// Process merges - both should succeed (no dependencies between them)
|
||||
const results1 = await harness.coordinationManager.processMerges('main');
|
||||
expect(results1.length).toBe(2);
|
||||
expect(results1.every((r) => r.success)).toBe(true);
|
||||
|
||||
// Verify: merge:completed for both in same batch
|
||||
const completed1 = harness.getEventsByType('merge:completed');
|
||||
expect(completed1.length).toBe(2);
|
||||
|
||||
// Complete Task 2A (depends on 1A) and Task 3A (depends on 1B)
|
||||
await harness.taskRepository.update(task2AId, { status: 'completed' });
|
||||
await harness.taskRepository.update(task3AId, { status: 'completed' });
|
||||
|
||||
const wt2A = `wt-${task2AId.slice(0, 6)}`;
|
||||
const wt3A = `wt-${task3AId.slice(0, 6)}`;
|
||||
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${task2AId.slice(0, 6)}`,
|
||||
worktreeId: wt2A,
|
||||
taskId: task2AId,
|
||||
status: 'idle',
|
||||
});
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${task3AId.slice(0, 6)}`,
|
||||
worktreeId: wt3A,
|
||||
taskId: task3AId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
await harness.worktreeManager.create(wt2A, 'feature-2a');
|
||||
await harness.worktreeManager.create(wt3A, 'feature-3a');
|
||||
|
||||
// Queue and merge
|
||||
await harness.coordinationManager.queueMerge(task2AId);
|
||||
await harness.coordinationManager.queueMerge(task3AId);
|
||||
harness.clearEvents();
|
||||
|
||||
const results2 = await harness.coordinationManager.processMerges('main');
|
||||
expect(results2.length).toBe(2);
|
||||
expect(results2.every((r) => r.success)).toBe(true);
|
||||
|
||||
// Complete Task 4A (depends on 2A and 3A)
|
||||
await harness.taskRepository.update(task4AId, { status: 'completed' });
|
||||
|
||||
const wt4A = `wt-${task4AId.slice(0, 6)}`;
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${task4AId.slice(0, 6)}`,
|
||||
worktreeId: wt4A,
|
||||
taskId: task4AId,
|
||||
status: 'idle',
|
||||
});
|
||||
await harness.worktreeManager.create(wt4A, 'feature-4a');
|
||||
|
||||
// Queue and merge
|
||||
await harness.coordinationManager.queueMerge(task4AId);
|
||||
harness.clearEvents();
|
||||
|
||||
const results3 = await harness.coordinationManager.processMerges('main');
|
||||
expect(results3.length).toBe(1);
|
||||
expect(results3[0].taskId).toBe(task4AId);
|
||||
expect(results3[0].success).toBe(true);
|
||||
|
||||
// Verify: final merge order respects dependency graph
|
||||
const finalState = await harness.coordinationManager.getQueueState();
|
||||
expect(finalState.merged).toContain(task1AId);
|
||||
expect(finalState.merged).toContain(task1BId);
|
||||
expect(finalState.merged).toContain(task2AId);
|
||||
expect(finalState.merged).toContain(task3AId);
|
||||
expect(finalState.merged).toContain(task4AId);
|
||||
});
|
||||
|
||||
it('parallel dispatch with mixed outcomes', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
|
||||
const taskXId = seeded.tasks.get('Task X')!;
|
||||
const taskYId = seeded.tasks.get('Task Y')!;
|
||||
|
||||
// Pre-seed 2 agents
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent-1',
|
||||
taskId: 'placeholder-1',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent-2',
|
||||
taskId: 'placeholder-2',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set Task X to succeed, Task Y to crash
|
||||
harness.setAgentDone(`agent-${taskXId.slice(0, 6)}`, 'Task X completed');
|
||||
harness.setAgentError(`agent-${taskYId.slice(0, 6)}`, 'Out of memory error');
|
||||
|
||||
// Queue both tasks
|
||||
await harness.dispatchManager.queue(taskXId);
|
||||
await harness.dispatchManager.queue(taskYId);
|
||||
harness.clearEvents();
|
||||
|
||||
// Dispatch both tasks
|
||||
const result1 = await harness.dispatchManager.dispatchNext();
|
||||
const result2 = await harness.dispatchManager.dispatchNext();
|
||||
|
||||
// Both should dispatch successfully
|
||||
expect(result1.success).toBe(true);
|
||||
expect(result2.success).toBe(true);
|
||||
|
||||
// Run timers to complete agents
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: one agent:stopped, one agent:crashed
|
||||
const stoppedEvents = harness.getEventsByType('agent:stopped');
|
||||
const crashedEvents = harness.getEventsByType('agent:crashed');
|
||||
|
||||
expect(stoppedEvents.length).toBe(1);
|
||||
expect(crashedEvents.length).toBe(1);
|
||||
|
||||
// Identify which task succeeded and which crashed
|
||||
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
|
||||
const crashedPayload = (crashedEvents[0] as AgentCrashedEvent).payload;
|
||||
|
||||
// Find the successful task
|
||||
const successTaskId = stoppedPayload.taskId;
|
||||
const crashedTaskId = crashedPayload.taskId;
|
||||
|
||||
// Complete the successful task
|
||||
await harness.dispatchManager.completeTask(successTaskId!);
|
||||
|
||||
// Verify: completed task is actually completed
|
||||
const completedTask = await harness.taskRepository.findById(successTaskId!);
|
||||
expect(completedTask?.status).toBe('completed');
|
||||
|
||||
// Verify: crashed task stays in_progress
|
||||
const inProgressTask = await harness.taskRepository.findById(crashedTaskId!);
|
||||
expect(inProgressTask?.status).toBe('in_progress');
|
||||
|
||||
// Verify: completed task can merge (set up infrastructure)
|
||||
const wtSuccess = `wt-${successTaskId!.slice(0, 6)}`;
|
||||
await harness.agentRepository.create({
|
||||
name: `merge-agent-${successTaskId!.slice(0, 6)}`,
|
||||
worktreeId: wtSuccess,
|
||||
taskId: successTaskId!,
|
||||
status: 'idle',
|
||||
});
|
||||
await harness.worktreeManager.create(wtSuccess, 'feature-success');
|
||||
|
||||
await harness.coordinationManager.queueMerge(successTaskId!);
|
||||
const mergeResults = await harness.coordinationManager.processMerges('main');
|
||||
|
||||
expect(mergeResults.length).toBe(1);
|
||||
expect(mergeResults[0].success).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
437
apps/server/test/e2e/happy-path.test.ts
Normal file
437
apps/server/test/e2e/happy-path.test.ts
Normal file
@@ -0,0 +1,437 @@
|
||||
/**
|
||||
* E2E Happy Path Tests
|
||||
*
|
||||
* Tests proving core dispatch/coordination flow works end-to-end
|
||||
* using the TestHarness with mocked agents and worktrees.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import {
|
||||
createTestHarness,
|
||||
SIMPLE_FIXTURE,
|
||||
PARALLEL_FIXTURE,
|
||||
COMPLEX_FIXTURE,
|
||||
type TestHarness,
|
||||
} from '../index.js';
|
||||
|
||||
describe('E2E Happy Path', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Scenario 1: Single Task Flow
|
||||
// ===========================================================================
|
||||
|
||||
describe('Single task flow', () => {
|
||||
it('completes a single task from queue to completion', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed idle agent (required by DispatchManager before spawning new ones)
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Step 1: Queue task
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Verify task:queued event
|
||||
const queuedEvents = harness.getEventsByType('task:queued');
|
||||
expect(queuedEvents.length).toBe(1);
|
||||
expect((queuedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
|
||||
|
||||
// Step 2: Dispatch task
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
expect(dispatchResult.success).toBe(true);
|
||||
expect(dispatchResult.taskId).toBe(taskAId);
|
||||
expect(dispatchResult.agentId).toBeDefined();
|
||||
|
||||
// Verify task:dispatched event
|
||||
const dispatchedEvents = harness.getEventsByType('task:dispatched');
|
||||
expect(dispatchedEvents.length).toBe(1);
|
||||
expect((dispatchedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
|
||||
|
||||
// Verify agent:spawned event
|
||||
const spawnedEvents = harness.getEventsByType('agent:spawned');
|
||||
expect(spawnedEvents.length).toBe(1);
|
||||
|
||||
// Step 3: Wait for agent completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify agent:stopped event
|
||||
const stoppedEvents = harness.getEventsByType('agent:stopped');
|
||||
expect(stoppedEvents.length).toBe(1);
|
||||
|
||||
// Step 4: Mark task complete
|
||||
await harness.dispatchManager.completeTask(taskAId);
|
||||
|
||||
// Verify task status in database
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('completed');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Scenario 2: Sequential Dependencies
|
||||
// ===========================================================================
|
||||
|
||||
describe('Sequential dependencies', () => {
|
||||
it('dispatches tasks in priority order (dependency ordering via task status)', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
const taskBId = seeded.tasks.get('Task B')!;
|
||||
const taskCId = seeded.tasks.get('Task C')!;
|
||||
|
||||
// Pre-seed idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Queue all three tasks
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.queue(taskBId);
|
||||
await harness.dispatchManager.queue(taskCId);
|
||||
harness.clearEvents();
|
||||
|
||||
// All three tasks are queued
|
||||
const queueState = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState.queued.length).toBe(3);
|
||||
|
||||
// First dispatchNext: Task A (high priority) dispatches first
|
||||
const nextTask = await harness.dispatchManager.getNextDispatchable();
|
||||
expect(nextTask).not.toBeNull();
|
||||
expect(nextTask!.taskId).toBe(taskAId); // High priority first
|
||||
|
||||
// All tasks are "ready" in current implementation (dependency loading TBD)
|
||||
const readyTaskIds = queueState.ready.map((t) => t.taskId);
|
||||
expect(readyTaskIds).toContain(taskAId);
|
||||
|
||||
// Dispatch Task A
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
expect(dispatchResult.success).toBe(true);
|
||||
expect(dispatchResult.taskId).toBe(taskAId);
|
||||
|
||||
// Wait for agent completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Complete Task A
|
||||
await harness.dispatchManager.completeTask(taskAId);
|
||||
|
||||
// Verify Task A removed from queue, B and C remain
|
||||
const queueStateAfter = await harness.dispatchManager.getQueueState();
|
||||
const remainingTaskIds = queueStateAfter.queued.map((t) => t.taskId);
|
||||
expect(remainingTaskIds).not.toContain(taskAId);
|
||||
expect(remainingTaskIds).toContain(taskBId);
|
||||
expect(remainingTaskIds).toContain(taskCId);
|
||||
|
||||
// Task A marked completed in database
|
||||
const taskA = await harness.taskRepository.findById(taskAId);
|
||||
expect(taskA?.status).toBe('completed');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Scenario 3: Parallel Dispatch
|
||||
// ===========================================================================
|
||||
|
||||
describe('Parallel dispatch', () => {
|
||||
it('dispatches multiple independent tasks to multiple agents', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
|
||||
const taskXId = seeded.tasks.get('Task X')!;
|
||||
const taskYId = seeded.tasks.get('Task Y')!;
|
||||
const taskPId = seeded.tasks.get('Task P')!;
|
||||
const taskQId = seeded.tasks.get('Task Q')!;
|
||||
|
||||
// Pre-seed 2 idle agents
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent-1',
|
||||
taskId: 'placeholder-1',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent-2',
|
||||
taskId: 'placeholder-2',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Queue all 4 tasks
|
||||
await harness.dispatchManager.queue(taskXId);
|
||||
await harness.dispatchManager.queue(taskYId);
|
||||
await harness.dispatchManager.queue(taskPId);
|
||||
await harness.dispatchManager.queue(taskQId);
|
||||
harness.clearEvents();
|
||||
|
||||
// All 4 tasks should be dispatchable (no dependencies)
|
||||
const queueState = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState.ready.length).toBe(4);
|
||||
|
||||
// Dispatch first task
|
||||
const result1 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result1.success).toBe(true);
|
||||
|
||||
// Dispatch second task (parallel)
|
||||
const result2 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result2.success).toBe(true);
|
||||
|
||||
// Verify both agents assigned different tasks
|
||||
expect(result1.taskId).not.toBe(result2.taskId);
|
||||
expect(result1.agentId).not.toBe(result2.agentId);
|
||||
|
||||
// Both dispatches succeeded
|
||||
const dispatchedEvents = harness.getEventsByType('task:dispatched');
|
||||
expect(dispatchedEvents.length).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Scenario 4: Full Merge Flow
|
||||
// ===========================================================================
|
||||
|
||||
describe('Full merge flow', () => {
|
||||
it('queues and processes merge after task completion', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed idle agent in MockAgentManager
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Queue and dispatch task
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
expect(dispatchResult.success).toBe(true);
|
||||
|
||||
// Wait for agent completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Complete task
|
||||
await harness.dispatchManager.completeTask(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
// Create agent in database (CoordinationManager.queueMerge requires it)
|
||||
// This bridges the gap between MockAgentManager (in-memory) and AgentRepository (database)
|
||||
const worktreeId = `worktree-${taskAId.slice(0, 8)}`;
|
||||
const agent = await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
taskId: taskAId,
|
||||
worktreeId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Create worktree for merge
|
||||
await harness.worktreeManager.create(worktreeId, `feature-${taskAId.slice(0, 6)}`);
|
||||
|
||||
// Queue merge
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
|
||||
// Verify merge:queued event
|
||||
const mergeQueuedEvents = harness.getEventsByType('merge:queued');
|
||||
expect(mergeQueuedEvents.length).toBe(1);
|
||||
|
||||
// Process merges
|
||||
const mergeResults = await harness.coordinationManager.processMerges('main');
|
||||
expect(mergeResults.length).toBe(1);
|
||||
expect(mergeResults[0].taskId).toBe(taskAId);
|
||||
expect(mergeResults[0].success).toBe(true);
|
||||
|
||||
// Verify merge:completed event
|
||||
const mergeCompletedEvents = harness.getEventsByType('merge:completed');
|
||||
expect(mergeCompletedEvents.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Scenario 5: Complex Dependency Flow
|
||||
// ===========================================================================
|
||||
|
||||
describe('Complex dependency flow', () => {
|
||||
it('handles multi-level dependency graph with COMPLEX_FIXTURE', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
|
||||
|
||||
// Get all task IDs
|
||||
const task1AId = seeded.tasks.get('Task 1A')!;
|
||||
const task1BId = seeded.tasks.get('Task 1B')!;
|
||||
const task2AId = seeded.tasks.get('Task 2A')!;
|
||||
const task3AId = seeded.tasks.get('Task 3A')!;
|
||||
const task4AId = seeded.tasks.get('Task 4A')!;
|
||||
|
||||
// Pre-seed idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Queue all 5 tasks
|
||||
await harness.dispatchManager.queue(task1AId);
|
||||
await harness.dispatchManager.queue(task1BId);
|
||||
await harness.dispatchManager.queue(task2AId);
|
||||
await harness.dispatchManager.queue(task3AId);
|
||||
await harness.dispatchManager.queue(task4AId);
|
||||
harness.clearEvents();
|
||||
|
||||
// Verify all 5 tasks are queued
|
||||
const initialState = await harness.dispatchManager.getQueueState();
|
||||
expect(initialState.queued.length).toBe(5);
|
||||
|
||||
// Only tasks with no dependencies are ready:
|
||||
// - Task 1A: no deps -> READY
|
||||
// - Task 1B: no deps -> READY
|
||||
// - Task 2A: depends on 1A -> NOT READY
|
||||
// - Task 3A: depends on 1B -> NOT READY
|
||||
// - Task 4A: depends on 2A, 3A -> NOT READY
|
||||
expect(initialState.ready.length).toBe(2);
|
||||
|
||||
// First dispatch: Task 1A (high priority, first queued)
|
||||
const result1 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result1.success).toBe(true);
|
||||
expect(result1.taskId).toBe(task1AId);
|
||||
|
||||
// Wait for agent completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Complete Task 1A
|
||||
await harness.dispatchManager.completeTask(task1AId);
|
||||
|
||||
// Verify Task 1A completed in database
|
||||
const task1A = await harness.taskRepository.findById(task1AId);
|
||||
expect(task1A?.status).toBe('completed');
|
||||
|
||||
// 4 tasks remain in queue
|
||||
const afterFirstState = await harness.dispatchManager.getQueueState();
|
||||
expect(afterFirstState.queued.length).toBe(4);
|
||||
|
||||
// Dispatch and complete remaining tasks one by one
|
||||
// Task 1B (high priority among remaining)
|
||||
const result2 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result2.success).toBe(true);
|
||||
await harness.advanceTimers();
|
||||
await harness.dispatchManager.completeTask(result2.taskId!);
|
||||
|
||||
// 3 tasks remain
|
||||
const midState = await harness.dispatchManager.getQueueState();
|
||||
expect(midState.queued.length).toBe(3);
|
||||
|
||||
// Continue dispatching remaining tasks
|
||||
const result3 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result3.success).toBe(true);
|
||||
await harness.advanceTimers();
|
||||
await harness.dispatchManager.completeTask(result3.taskId!);
|
||||
|
||||
const result4 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result4.success).toBe(true);
|
||||
await harness.advanceTimers();
|
||||
await harness.dispatchManager.completeTask(result4.taskId!);
|
||||
|
||||
const result5 = await harness.dispatchManager.dispatchNext();
|
||||
expect(result5.success).toBe(true);
|
||||
await harness.advanceTimers();
|
||||
await harness.dispatchManager.completeTask(result5.taskId!);
|
||||
|
||||
// All tasks completed
|
||||
const finalState = await harness.dispatchManager.getQueueState();
|
||||
expect(finalState.queued.length).toBe(0);
|
||||
|
||||
// Verify all 5 tasks completed in database
|
||||
const allTasks = await Promise.all([
|
||||
harness.taskRepository.findById(task1AId),
|
||||
harness.taskRepository.findById(task1BId),
|
||||
harness.taskRepository.findById(task2AId),
|
||||
harness.taskRepository.findById(task3AId),
|
||||
harness.taskRepository.findById(task4AId),
|
||||
]);
|
||||
expect(allTasks.every((t) => t?.status === 'completed')).toBe(true);
|
||||
|
||||
// Verify event sequence: 5 task:dispatched, 5 task:completed
|
||||
const dispatchedEvents = harness.getEventsByType('task:dispatched');
|
||||
expect(dispatchedEvents.length).toBe(5);
|
||||
|
||||
const completedEvents = harness.getEventsByType('task:completed');
|
||||
expect(completedEvents.length).toBe(5);
|
||||
});
|
||||
|
||||
it('fixture dependencies are stored correctly in database', async () => {
|
||||
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
|
||||
|
||||
// Get task IDs
|
||||
const task1AId = seeded.tasks.get('Task 1A')!;
|
||||
const task1BId = seeded.tasks.get('Task 1B')!;
|
||||
const task2AId = seeded.tasks.get('Task 2A')!;
|
||||
const task3AId = seeded.tasks.get('Task 3A')!;
|
||||
const task4AId = seeded.tasks.get('Task 4A')!;
|
||||
|
||||
// Query task_dependencies directly to verify fixture setup
|
||||
const { taskDependencies } = await import('../../db/schema.js');
|
||||
const { eq } = await import('drizzle-orm');
|
||||
|
||||
// Task 2A should depend on Task 1A
|
||||
const task2ADeps = await harness.db
|
||||
.select()
|
||||
.from(taskDependencies)
|
||||
.where(eq(taskDependencies.taskId, task2AId));
|
||||
expect(task2ADeps.length).toBe(1);
|
||||
expect(task2ADeps[0].dependsOnTaskId).toBe(task1AId);
|
||||
|
||||
// Task 3A should depend on Task 1B
|
||||
const task3ADeps = await harness.db
|
||||
.select()
|
||||
.from(taskDependencies)
|
||||
.where(eq(taskDependencies.taskId, task3AId));
|
||||
expect(task3ADeps.length).toBe(1);
|
||||
expect(task3ADeps[0].dependsOnTaskId).toBe(task1BId);
|
||||
|
||||
// Task 4A should depend on both Task 2A and Task 3A
|
||||
const task4ADeps = await harness.db
|
||||
.select()
|
||||
.from(taskDependencies)
|
||||
.where(eq(taskDependencies.taskId, task4AId));
|
||||
expect(task4ADeps.length).toBe(2);
|
||||
const depIds = task4ADeps.map((d) => d.dependsOnTaskId);
|
||||
expect(depIds).toContain(task2AId);
|
||||
expect(depIds).toContain(task3AId);
|
||||
|
||||
// Tasks 1A and 1B should have no dependencies
|
||||
const task1ADeps = await harness.db
|
||||
.select()
|
||||
.from(taskDependencies)
|
||||
.where(eq(taskDependencies.taskId, task1AId));
|
||||
expect(task1ADeps.length).toBe(0);
|
||||
|
||||
const task1BDeps = await harness.db
|
||||
.select()
|
||||
.from(taskDependencies)
|
||||
.where(eq(taskDependencies.taskId, task1BId));
|
||||
expect(task1BDeps.length).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
12
apps/server/test/e2e/index.ts
Normal file
12
apps/server/test/e2e/index.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* E2E Tests for Dispatch/Coordination Flows
|
||||
*
|
||||
* Test files:
|
||||
* - happy-path.test.ts: Normal operation scenarios
|
||||
* - edge-cases.test.ts: Error handling and edge cases
|
||||
*
|
||||
* Uses TestHarness from src/test/ for system wiring.
|
||||
*/
|
||||
|
||||
// No exports needed - tests are self-contained
|
||||
export {};
|
||||
480
apps/server/test/e2e/phase-dispatch.test.ts
Normal file
480
apps/server/test/e2e/phase-dispatch.test.ts
Normal file
@@ -0,0 +1,480 @@
|
||||
/**
|
||||
* E2E Tests for Phase Parallel Execution
|
||||
*
|
||||
* Tests proving phase dispatch/coordination flow works end-to-end
|
||||
* using the TestHarness with phaseDispatchManager.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { createTestHarness, type TestHarness } from '../index.js';
|
||||
import type {
|
||||
PhaseQueuedEvent,
|
||||
PhaseStartedEvent,
|
||||
PhaseCompletedEvent,
|
||||
PhaseBlockedEvent,
|
||||
} from '../../events/types.js';
|
||||
|
||||
describe('Phase Parallel Execution', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Test 1: Independent phases dispatch in parallel
|
||||
// ===========================================================================
|
||||
|
||||
describe('Independent phases dispatch in parallel', () => {
|
||||
it('dispatches multiple independent phases when no dependencies exist', async () => {
|
||||
// Create initiative with 2 independent phases (no dependencies)
|
||||
const initiative = await harness.initiativeRepository.create({
|
||||
name: 'Independent Phases Test',
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
const phaseA = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase A',
|
||||
content: 'Independent phase A',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseB = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase B',
|
||||
content: 'Independent phase B',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
// Approve phases before queuing
|
||||
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
|
||||
|
||||
// Queue both phases
|
||||
await harness.phaseDispatchManager.queuePhase(phaseA.id);
|
||||
await harness.phaseDispatchManager.queuePhase(phaseB.id);
|
||||
|
||||
// Verify phase:queued events
|
||||
const queuedEvents = harness.getEventsByType('phase:queued');
|
||||
expect(queuedEvents.length).toBe(2);
|
||||
|
||||
// Get queue state - both should be ready (no dependencies)
|
||||
const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(queueState.queued.length).toBe(2);
|
||||
expect(queueState.ready.length).toBe(2);
|
||||
expect(queueState.blocked.length).toBe(0);
|
||||
|
||||
// Both phases should be dispatchable immediately
|
||||
const readyPhaseIds = queueState.ready.map((p) => p.phaseId);
|
||||
expect(readyPhaseIds).toContain(phaseA.id);
|
||||
expect(readyPhaseIds).toContain(phaseB.id);
|
||||
|
||||
harness.clearEvents();
|
||||
|
||||
// Dispatch first phase
|
||||
const result1 = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(result1.success).toBe(true);
|
||||
|
||||
// Dispatch second phase (parallel)
|
||||
const result2 = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(result2.success).toBe(true);
|
||||
|
||||
// Verify both dispatched to different phases
|
||||
expect(result1.phaseId).not.toBe(result2.phaseId);
|
||||
|
||||
// Verify phase:started events
|
||||
const startedEvents = harness.getEventsByType('phase:started');
|
||||
expect(startedEvents.length).toBe(2);
|
||||
|
||||
// Verify both phases are now in_progress
|
||||
const updatedPhaseA = await harness.phaseRepository.findById(phaseA.id);
|
||||
const updatedPhaseB = await harness.phaseRepository.findById(phaseB.id);
|
||||
expect(updatedPhaseA?.status).toBe('in_progress');
|
||||
expect(updatedPhaseB?.status).toBe('in_progress');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Test 2: Dependent phase waits for prerequisite
|
||||
// ===========================================================================
|
||||
|
||||
describe('Dependent phase waits for prerequisite', () => {
|
||||
it('only dispatches phase A first, then B after A completes', async () => {
|
||||
// Create phases: A, B (depends on A)
|
||||
const initiative = await harness.initiativeRepository.create({
|
||||
name: 'Sequential Phases Test',
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
const phaseA = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase A',
|
||||
content: 'First phase',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseB = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase B',
|
||||
content: 'Second phase, depends on A',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
// Approve phases before queuing
|
||||
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
|
||||
|
||||
// Create dependency: B depends on A
|
||||
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
|
||||
|
||||
// Queue both phases
|
||||
await harness.phaseDispatchManager.queuePhase(phaseA.id);
|
||||
await harness.phaseDispatchManager.queuePhase(phaseB.id);
|
||||
|
||||
// Check queue state - only A should be ready
|
||||
const queueState1 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(queueState1.queued.length).toBe(2);
|
||||
expect(queueState1.ready.length).toBe(1);
|
||||
expect(queueState1.ready[0].phaseId).toBe(phaseA.id);
|
||||
|
||||
harness.clearEvents();
|
||||
|
||||
// Dispatch - should get phase A
|
||||
const result1 = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(result1.success).toBe(true);
|
||||
expect(result1.phaseId).toBe(phaseA.id);
|
||||
|
||||
// Try to dispatch again - should fail (B is blocked by A)
|
||||
const result2 = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(result2.success).toBe(false);
|
||||
expect(result2.reason).toBe('No dispatchable phases');
|
||||
|
||||
// Verify phase B still in queue but not ready
|
||||
const queueState2 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(queueState2.queued.length).toBe(1);
|
||||
expect(queueState2.ready.length).toBe(0);
|
||||
|
||||
// Complete phase A
|
||||
await harness.phaseDispatchManager.completePhase(phaseA.id);
|
||||
|
||||
// Verify phase:completed event for A
|
||||
const completedEvents = harness.getEventsByType('phase:completed');
|
||||
expect(completedEvents.length).toBe(1);
|
||||
expect((completedEvents[0] as PhaseCompletedEvent).payload.phaseId).toBe(phaseA.id);
|
||||
|
||||
// Now B should be ready
|
||||
const queueState3 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(queueState3.ready.length).toBe(1);
|
||||
expect(queueState3.ready[0].phaseId).toBe(phaseB.id);
|
||||
|
||||
harness.clearEvents();
|
||||
|
||||
// Dispatch - should get phase B
|
||||
const result3 = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(result3.success).toBe(true);
|
||||
expect(result3.phaseId).toBe(phaseB.id);
|
||||
|
||||
// Verify phase B is now in_progress
|
||||
const updatedPhaseB = await harness.phaseRepository.findById(phaseB.id);
|
||||
expect(updatedPhaseB?.status).toBe('in_progress');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Test 3: Diamond dependency pattern
|
||||
// ===========================================================================
|
||||
|
||||
describe('Diamond dependency pattern', () => {
|
||||
it('handles diamond: A -> B,C -> D correctly', async () => {
|
||||
// Create phases: A, B (depends on A), C (depends on A), D (depends on B, C)
|
||||
const initiative = await harness.initiativeRepository.create({
|
||||
name: 'Diamond Pattern Test',
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
const phaseA = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase A',
|
||||
content: 'Root phase',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseB = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase B',
|
||||
content: 'Depends on A',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseC = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase C',
|
||||
content: 'Depends on A',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseD = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase D',
|
||||
content: 'Depends on B and C',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
// Approve all phases before queuing
|
||||
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseC.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseD.id, { status: 'approved' as const });
|
||||
|
||||
// Create dependencies
|
||||
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
|
||||
await harness.phaseRepository.createDependency(phaseC.id, phaseA.id);
|
||||
await harness.phaseRepository.createDependency(phaseD.id, phaseB.id);
|
||||
await harness.phaseRepository.createDependency(phaseD.id, phaseC.id);
|
||||
|
||||
// Queue all phases
|
||||
await harness.phaseDispatchManager.queuePhase(phaseA.id);
|
||||
await harness.phaseDispatchManager.queuePhase(phaseB.id);
|
||||
await harness.phaseDispatchManager.queuePhase(phaseC.id);
|
||||
await harness.phaseDispatchManager.queuePhase(phaseD.id);
|
||||
|
||||
// Step 1: Only A should be ready
|
||||
const state1 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(state1.queued.length).toBe(4);
|
||||
expect(state1.ready.length).toBe(1);
|
||||
expect(state1.ready[0].phaseId).toBe(phaseA.id);
|
||||
|
||||
// Dispatch A
|
||||
const resultA = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(resultA.success).toBe(true);
|
||||
expect(resultA.phaseId).toBe(phaseA.id);
|
||||
|
||||
// Step 2: After A completes, B and C should be ready (parallel)
|
||||
await harness.phaseDispatchManager.completePhase(phaseA.id);
|
||||
|
||||
const state2 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(state2.queued.length).toBe(3); // B, C, D still queued
|
||||
expect(state2.ready.length).toBe(2); // B and C ready
|
||||
|
||||
const readyIds = state2.ready.map((p) => p.phaseId);
|
||||
expect(readyIds).toContain(phaseB.id);
|
||||
expect(readyIds).toContain(phaseC.id);
|
||||
expect(readyIds).not.toContain(phaseD.id);
|
||||
|
||||
// Dispatch B and C in parallel
|
||||
const resultB = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(resultB.success).toBe(true);
|
||||
|
||||
const resultC = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(resultC.success).toBe(true);
|
||||
|
||||
// Verify D is still not ready (needs both B and C complete)
|
||||
const state3 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(state3.ready.length).toBe(0);
|
||||
expect(state3.queued.length).toBe(1);
|
||||
expect(state3.queued[0].phaseId).toBe(phaseD.id);
|
||||
|
||||
// Step 3: Complete B only - D still not ready
|
||||
await harness.phaseDispatchManager.completePhase(resultB.phaseId);
|
||||
|
||||
const state4 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(state4.ready.length).toBe(0); // D still blocked by C
|
||||
|
||||
// Step 4: Complete C - now D should be ready
|
||||
await harness.phaseDispatchManager.completePhase(resultC.phaseId);
|
||||
|
||||
const state5 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(state5.ready.length).toBe(1);
|
||||
expect(state5.ready[0].phaseId).toBe(phaseD.id);
|
||||
|
||||
// Dispatch D
|
||||
const resultD = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(resultD.success).toBe(true);
|
||||
expect(resultD.phaseId).toBe(phaseD.id);
|
||||
|
||||
// Verify D is now in_progress
|
||||
const updatedPhaseD = await harness.phaseRepository.findById(phaseD.id);
|
||||
expect(updatedPhaseD?.status).toBe('in_progress');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Test 4: Approval gate rejects non-approved phases
|
||||
// ===========================================================================
|
||||
|
||||
describe('Approval gate rejects non-approved phases', () => {
|
||||
it('rejects queuePhase for pending phase', async () => {
|
||||
const initiative = await harness.initiativeRepository.create({
|
||||
name: 'Approval Gate Test',
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
const phase = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Unapproved Phase',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
await expect(
|
||||
harness.phaseDispatchManager.queuePhase(phase.id)
|
||||
).rejects.toThrow('must be approved before queuing');
|
||||
});
|
||||
|
||||
it('rejects queuePhase for in_progress phase', async () => {
|
||||
const initiative = await harness.initiativeRepository.create({
|
||||
name: 'Approval Gate Test 2',
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
const phase = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'In Progress Phase',
|
||||
status: 'in_progress',
|
||||
});
|
||||
|
||||
await expect(
|
||||
harness.phaseDispatchManager.queuePhase(phase.id)
|
||||
).rejects.toThrow('must be approved before queuing');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// Test 5: Blocked phase doesn't dispatch
|
||||
// ===========================================================================
|
||||
|
||||
describe('Blocked phase does not dispatch', () => {
|
||||
it('prevents dispatch of blocked phase even if dependencies complete', async () => {
|
||||
// Create phases: A, B (depends on A)
|
||||
const initiative = await harness.initiativeRepository.create({
|
||||
name: 'Blocked Phase Test',
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
const phaseA = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase A',
|
||||
content: 'First phase that will be blocked',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseB = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase B',
|
||||
content: 'Second phase, depends on A',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
// Approve phases before queuing
|
||||
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
|
||||
|
||||
// Create dependency: B depends on A
|
||||
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
|
||||
|
||||
// Queue phase A
|
||||
await harness.phaseDispatchManager.queuePhase(phaseA.id);
|
||||
|
||||
// Block phase A
|
||||
await harness.phaseDispatchManager.blockPhase(phaseA.id, 'External dependency unavailable');
|
||||
|
||||
// Verify phase:blocked event
|
||||
const blockedEvents = harness.getEventsByType('phase:blocked');
|
||||
expect(blockedEvents.length).toBe(1);
|
||||
expect((blockedEvents[0] as PhaseBlockedEvent).payload.phaseId).toBe(phaseA.id);
|
||||
expect((blockedEvents[0] as PhaseBlockedEvent).payload.reason).toBe(
|
||||
'External dependency unavailable'
|
||||
);
|
||||
|
||||
// Try to dispatch - should fail
|
||||
const result = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.reason).toBe('No dispatchable phases');
|
||||
|
||||
// Verify queue state shows A as blocked
|
||||
const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(queueState.blocked.length).toBe(1);
|
||||
expect(queueState.blocked[0].phaseId).toBe(phaseA.id);
|
||||
expect(queueState.blocked[0].reason).toBe('External dependency unavailable');
|
||||
|
||||
// Queue phase B
|
||||
await harness.phaseDispatchManager.queuePhase(phaseB.id);
|
||||
|
||||
// B should never become ready because A is blocked (not completed)
|
||||
const queueState2 = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(queueState2.ready.length).toBe(0);
|
||||
expect(queueState2.queued.length).toBe(1); // Only B is queued (A is blocked, not queued)
|
||||
expect(queueState2.queued[0].phaseId).toBe(phaseB.id);
|
||||
|
||||
// Try to dispatch B - should fail
|
||||
const resultB = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(resultB.success).toBe(false);
|
||||
expect(resultB.reason).toBe('No dispatchable phases');
|
||||
|
||||
// Verify phase A status is blocked in database
|
||||
const updatedPhaseA = await harness.phaseRepository.findById(phaseA.id);
|
||||
expect(updatedPhaseA?.status).toBe('blocked');
|
||||
});
|
||||
|
||||
it('blocked phase prevents all downstream phases from dispatching', async () => {
|
||||
// Create chain: A -> B -> C, then block A
|
||||
const initiative = await harness.initiativeRepository.create({
|
||||
name: 'Chain Block Test',
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
const phaseA = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase A',
|
||||
content: 'Root phase',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseB = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase B',
|
||||
content: 'Depends on A',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
const phaseC = await harness.phaseRepository.create({
|
||||
initiativeId: initiative.id,
|
||||
name: 'Phase C',
|
||||
content: 'Depends on B',
|
||||
status: 'pending',
|
||||
});
|
||||
|
||||
// Approve all phases before queuing
|
||||
await harness.phaseRepository.update(phaseA.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseB.id, { status: 'approved' as const });
|
||||
await harness.phaseRepository.update(phaseC.id, { status: 'approved' as const });
|
||||
|
||||
// Create dependency chain: A -> B -> C
|
||||
await harness.phaseRepository.createDependency(phaseB.id, phaseA.id);
|
||||
await harness.phaseRepository.createDependency(phaseC.id, phaseB.id);
|
||||
|
||||
// Queue all phases
|
||||
await harness.phaseDispatchManager.queuePhase(phaseA.id);
|
||||
await harness.phaseDispatchManager.queuePhase(phaseB.id);
|
||||
await harness.phaseDispatchManager.queuePhase(phaseC.id);
|
||||
|
||||
// Block phase A
|
||||
await harness.phaseDispatchManager.blockPhase(phaseA.id, 'Resource unavailable');
|
||||
|
||||
// Verify only B and C are in queue (A is blocked)
|
||||
const queueState = await harness.phaseDispatchManager.getPhaseQueueState();
|
||||
expect(queueState.queued.length).toBe(2);
|
||||
expect(queueState.ready.length).toBe(0); // Neither B nor C can dispatch
|
||||
expect(queueState.blocked.length).toBe(1);
|
||||
|
||||
// Try to dispatch any phase - should fail for all
|
||||
const result = await harness.phaseDispatchManager.dispatchNextPhase();
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.reason).toBe('No dispatchable phases');
|
||||
});
|
||||
});
|
||||
});
|
||||
490
apps/server/test/e2e/recovery-scenarios.test.ts
Normal file
490
apps/server/test/e2e/recovery-scenarios.test.ts
Normal file
@@ -0,0 +1,490 @@
|
||||
/**
|
||||
* E2E Tests for Recovery and Extended Scenarios
|
||||
*
|
||||
* Tests recovery/resume after interruption scenarios:
|
||||
* - Queue state survives harness recreation (DB is source of truth)
|
||||
* - In-progress task recoverable after agent crash
|
||||
* - Blocked task state persists and can be unblocked
|
||||
* - Merge queue state recoverable
|
||||
*
|
||||
* Tests extended agent Q&A scenarios:
|
||||
* - Multiple questions in sequence
|
||||
* - Question surfaces in message queue
|
||||
* - Agent resumes with answer in context
|
||||
* - Waiting agent blocks task completion
|
||||
*
|
||||
* Uses TestHarness from src/test/ for full system wiring.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import {
|
||||
createTestHarness,
|
||||
SIMPLE_FIXTURE,
|
||||
type TestHarness,
|
||||
} from '../index.js';
|
||||
import type {
|
||||
AgentWaitingEvent,
|
||||
AgentResumedEvent,
|
||||
AgentStoppedEvent,
|
||||
} from '../../events/types.js';
|
||||
|
||||
describe('E2E Recovery Scenarios', () => {
|
||||
describe('Recovery after interruption', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('queue state survives in database (source of truth)', async () => {
|
||||
// Seed fixture, queue tasks
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Queue task
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Verify queue state shows task (queued, not pending)
|
||||
const queueState1 = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState1.queued.length).toBe(1);
|
||||
expect(queueState1.queued[0].taskId).toBe(taskAId);
|
||||
|
||||
// The queue state is in memory, but task status is in DB.
|
||||
// Verify task status in database directly
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('pending');
|
||||
|
||||
// Verify: even after clearing in-memory queue state,
|
||||
// we can still find pending tasks from database
|
||||
const allTasks = await harness.taskRepository.findByParentTaskId(
|
||||
seeded.taskGroups.get('Task Group 1')!
|
||||
);
|
||||
const pendingTasks = allTasks.filter((t) => t.status === 'pending');
|
||||
|
||||
// Task A is pending (not queued, but status is pending)
|
||||
// Task B and C are also pending but depend on Task A
|
||||
expect(pendingTasks.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('in-progress task recoverable after agent crash', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set crash scenario
|
||||
harness.setAgentError(`agent-${taskAId.slice(0, 6)}`, 'Token limit exceeded');
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify task status is 'in_progress' (not completed, not lost)
|
||||
let task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('in_progress');
|
||||
|
||||
// Task can be re-queued and dispatched to a new agent
|
||||
// First, clear agent manager and create new pool agent
|
||||
harness.agentManager.clear();
|
||||
await harness.agentManager.spawn({
|
||||
name: 'new-pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Re-queue the task (it's still in_progress but we can retry)
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Set success scenario for the new agent
|
||||
harness.setAgentDone(`agent-${taskAId.slice(0, 6)}`, 'Task completed after retry');
|
||||
|
||||
// Clear events and dispatch again
|
||||
harness.clearEvents();
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent completed successfully
|
||||
expect(dispatchResult.agentId).toBeDefined();
|
||||
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
|
||||
expect(agentResult?.success).toBe(true);
|
||||
});
|
||||
|
||||
it('blocked task state persists in database', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Queue task and block it
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
|
||||
|
||||
// Verify task in blocked state in DB
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('blocked');
|
||||
|
||||
// Query blocked tasks from queue state
|
||||
const queueState = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState.blocked.length).toBe(1);
|
||||
expect(queueState.blocked[0].taskId).toBe(taskAId);
|
||||
expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
|
||||
|
||||
// Re-queue task to unblock (set status back to pending via repository)
|
||||
await harness.taskRepository.update(taskAId, { status: 'pending' });
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Verify: task now in pending state in database
|
||||
const unblocked = await harness.taskRepository.findById(taskAId);
|
||||
expect(unblocked?.status).toBe('pending');
|
||||
|
||||
// Task should be in queued list
|
||||
const queueState2 = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState2.queued.some((t) => t.taskId === taskAId)).toBe(true);
|
||||
});
|
||||
|
||||
it('merge queue state recoverable', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Mark task as completed (required for merge)
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
|
||||
// Create worktree for task
|
||||
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
|
||||
|
||||
// Create agent in agentRepository (required for merge lookup)
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Queue for merge
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
|
||||
// Verify merge queue has queued item
|
||||
const queueState1 = await harness.coordinationManager.getQueueState();
|
||||
expect(queueState1.queued.some((item) => item.taskId === taskAId)).toBe(true);
|
||||
|
||||
// Process merge
|
||||
const results = await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Verify: merge completed correctly
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0].taskId).toBe(taskAId);
|
||||
expect(results[0].success).toBe(true);
|
||||
|
||||
// Verify: task in merged list
|
||||
const queueState2 = await harness.coordinationManager.getQueueState();
|
||||
expect(queueState2.merged.includes(taskAId)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Agent Q&A extended scenarios', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('question enters waiting state and completes after resume', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set questions scenario with options
|
||||
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
|
||||
{
|
||||
id: 'q1',
|
||||
question: 'Which database should I use?',
|
||||
options: [
|
||||
{ label: 'PostgreSQL', description: 'Relational, ACID compliant' },
|
||||
{ label: 'SQLite', description: 'Lightweight, file-based' },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent:waiting event emitted
|
||||
const waitingEvents = harness.getEventsByType('agent:waiting');
|
||||
expect(waitingEvents.length).toBe(1);
|
||||
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
|
||||
expect(waitingPayload.taskId).toBe(taskAId);
|
||||
expect(waitingPayload.questions[0].question).toBe('Which database should I use?');
|
||||
|
||||
// Clear and resume with answers map
|
||||
harness.clearEvents();
|
||||
await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'PostgreSQL' });
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: resumed and stopped events
|
||||
const resumedEvents = harness.getEventsByType('agent:resumed');
|
||||
expect(resumedEvents.length).toBe(1);
|
||||
const resumedPayload = (resumedEvents[0] as AgentResumedEvent).payload;
|
||||
expect(resumedPayload.taskId).toBe(taskAId);
|
||||
|
||||
const stoppedEvents = harness.getEventsByType('agent:stopped');
|
||||
expect(stoppedEvents.length).toBe(1);
|
||||
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
|
||||
expect(stoppedPayload.reason).toBe('task_complete');
|
||||
});
|
||||
|
||||
it('questions surface as structured PendingQuestions', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set questions scenario with options
|
||||
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
|
||||
{
|
||||
id: 'q1',
|
||||
question: 'Select your framework',
|
||||
options: [
|
||||
{ label: 'React' },
|
||||
{ label: 'Vue' },
|
||||
{ label: 'Svelte' },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent:waiting event has questions
|
||||
const waitingEvents = harness.getEventsByType('agent:waiting');
|
||||
expect(waitingEvents.length).toBe(1);
|
||||
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
|
||||
expect(waitingPayload.questions[0].question).toBe('Select your framework');
|
||||
expect(waitingPayload.questions[0].options).toEqual([
|
||||
{ label: 'React' },
|
||||
{ label: 'Vue' },
|
||||
{ label: 'Svelte' },
|
||||
]);
|
||||
|
||||
// Verify: getPendingQuestions returns structured data
|
||||
const pendingQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
|
||||
expect(pendingQuestions).not.toBeNull();
|
||||
expect(pendingQuestions?.questions[0].question).toBe('Select your framework');
|
||||
expect(pendingQuestions?.questions[0].options).toEqual([
|
||||
{ label: 'React' },
|
||||
{ label: 'Vue' },
|
||||
{ label: 'Svelte' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('agent resumes with answer and completes successfully', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set questions scenario
|
||||
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
|
||||
{ id: 'q1', question: 'Choose database type' },
|
||||
]);
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify agent is waiting
|
||||
const agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('waiting_for_input');
|
||||
|
||||
// Resume with answers map
|
||||
await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'PostgreSQL' });
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent completed successfully
|
||||
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
|
||||
expect(agentResult).not.toBeNull();
|
||||
expect(agentResult?.success).toBe(true);
|
||||
expect(agentResult?.message).toBe('Resumed and completed successfully');
|
||||
|
||||
// Verify: agent status is now idle
|
||||
const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(finalAgent?.status).toBe('idle');
|
||||
});
|
||||
|
||||
it('waiting agent status transitions correctly through full cycle', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Set questions scenario
|
||||
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
|
||||
{ id: 'q1', question: 'API key format?' },
|
||||
]);
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
|
||||
// Phase 1: Initially running
|
||||
let agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('running');
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Phase 2: After scenario completes, waiting_for_input
|
||||
agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('waiting_for_input');
|
||||
|
||||
// Verify pending questions exist
|
||||
const pendingQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
|
||||
expect(pendingQuestions?.questions[0].question).toBe('API key format?');
|
||||
|
||||
// Phase 3: Resume with answers map
|
||||
await harness.agentManager.resume(dispatchResult.agentId!, { q1: 'Bearer token' });
|
||||
|
||||
// After resume: running again briefly
|
||||
agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('running');
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Phase 4: After completion, idle
|
||||
agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('idle');
|
||||
|
||||
// Verify pending questions is cleared after resume
|
||||
const clearedQuestions = await harness.getPendingQuestions(dispatchResult.agentId!);
|
||||
expect(clearedQuestions).toBeNull();
|
||||
});
|
||||
|
||||
it('should handle agent asking multiple questions at once', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Setup: agent asks two questions
|
||||
harness.setAgentQuestions(`agent-${taskAId.slice(0, 6)}`, [
|
||||
{
|
||||
id: 'q1',
|
||||
question: 'Which database?',
|
||||
options: [{ label: 'SQLite' }, { label: 'Postgres' }],
|
||||
},
|
||||
{
|
||||
id: 'q2',
|
||||
question: 'Include tests?',
|
||||
options: [{ label: 'Yes' }, { label: 'No' }],
|
||||
},
|
||||
]);
|
||||
|
||||
// Queue and dispatch task
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent:waiting event emitted
|
||||
const waitingEvents = harness.getEventsByType('agent:waiting');
|
||||
expect(waitingEvents.length).toBe(1);
|
||||
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
|
||||
expect(waitingPayload.taskId).toBe(taskAId);
|
||||
|
||||
// Verify both questions present
|
||||
const pending = await harness.getPendingQuestions(dispatchResult.agentId!);
|
||||
expect(pending?.questions).toHaveLength(2);
|
||||
expect(pending?.questions[0].id).toBe('q1');
|
||||
expect(pending?.questions[0].question).toBe('Which database?');
|
||||
expect(pending?.questions[1].id).toBe('q2');
|
||||
expect(pending?.questions[1].question).toBe('Include tests?');
|
||||
|
||||
// Resume with answers for both questions
|
||||
harness.clearEvents();
|
||||
await harness.agentManager.resume(dispatchResult.agentId!, {
|
||||
q1: 'SQLite',
|
||||
q2: 'Yes',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify: agent:resumed event emitted
|
||||
const resumedEvents = harness.getEventsByType('agent:resumed');
|
||||
expect(resumedEvents.length).toBe(1);
|
||||
|
||||
// Verify: agent:stopped event emitted (after resume completes)
|
||||
const stoppedEvents = harness.getEventsByType('agent:stopped');
|
||||
expect(stoppedEvents.length).toBe(1);
|
||||
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
|
||||
expect(stoppedPayload.taskId).toBe(taskAId);
|
||||
expect(stoppedPayload.reason).toBe('task_complete');
|
||||
|
||||
// Verify task completed (agent result)
|
||||
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
|
||||
expect(agentResult?.success).toBe(true);
|
||||
|
||||
// Verify agent is now idle
|
||||
const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(finalAgent?.status).toBe('idle');
|
||||
});
|
||||
});
|
||||
});
|
||||
316
apps/server/test/fixtures.ts
Normal file
316
apps/server/test/fixtures.ts
Normal file
@@ -0,0 +1,316 @@
|
||||
/**
|
||||
* Test Fixtures for E2E Testing
|
||||
*
|
||||
* Provides fixture helpers that seed complete task hierarchies
|
||||
* for integration and E2E tests.
|
||||
*/
|
||||
|
||||
import { nanoid } from 'nanoid';
|
||||
import type { DrizzleDatabase } from '../db/index.js';
|
||||
import {
|
||||
DrizzleInitiativeRepository,
|
||||
DrizzlePhaseRepository,
|
||||
DrizzleTaskRepository,
|
||||
} from '../db/repositories/drizzle/index.js';
|
||||
import { taskDependencies } from '../db/schema.js';
|
||||
|
||||
// =============================================================================
|
||||
// Fixture Interfaces
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Task fixture definition.
|
||||
*/
|
||||
export interface TaskFixture {
|
||||
/** Unique identifier for this task (used for dependency references) */
|
||||
id: string;
|
||||
/** Task name */
|
||||
name: string;
|
||||
/** Task priority */
|
||||
priority?: 'low' | 'medium' | 'high';
|
||||
/** Task category */
|
||||
category?: 'execute' | 'research' | 'discuss' | 'plan' | 'detail' | 'refine' | 'verify' | 'merge' | 'review';
|
||||
/** Names of other tasks in same fixture this task depends on */
|
||||
dependsOn?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Task group fixture definition (replaces Plan).
|
||||
* Tasks are grouped by parent task in the new model.
|
||||
*/
|
||||
export interface TaskGroupFixture {
|
||||
/** Group name (becomes a detail task) */
|
||||
name: string;
|
||||
/** Tasks in this group */
|
||||
tasks: TaskFixture[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase fixture definition.
|
||||
*/
|
||||
export interface PhaseFixture {
|
||||
/** Phase name */
|
||||
name: string;
|
||||
/** Task groups in this phase (each group becomes a parent detail task) */
|
||||
taskGroups: TaskGroupFixture[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiative fixture definition (top-level).
|
||||
*/
|
||||
export interface InitiativeFixture {
|
||||
/** Initiative name */
|
||||
name: string;
|
||||
/** Phases in this initiative */
|
||||
phases: PhaseFixture[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of seeding a fixture.
|
||||
* Maps names to IDs for all created entities.
|
||||
*/
|
||||
export interface SeededFixture {
|
||||
/** ID of the created initiative */
|
||||
initiativeId: string;
|
||||
/** Map of phase names to IDs */
|
||||
phases: Map<string, string>;
|
||||
/** Map of task group names to parent task IDs */
|
||||
taskGroups: Map<string, string>;
|
||||
/** Map of task names to IDs */
|
||||
tasks: Map<string, string>;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Seed Function
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Seed a complete task hierarchy from a fixture definition.
|
||||
*
|
||||
* Creates initiative, phases, detail tasks (as parent), and child tasks.
|
||||
* Resolves task dependencies by name to actual task IDs.
|
||||
*
|
||||
* @param db - Drizzle database instance
|
||||
* @param fixture - The fixture definition to seed
|
||||
* @returns SeededFixture with all created entity IDs
|
||||
*/
|
||||
export async function seedFixture(
|
||||
db: DrizzleDatabase,
|
||||
fixture: InitiativeFixture
|
||||
): Promise<SeededFixture> {
|
||||
// Create repositories
|
||||
const initiativeRepo = new DrizzleInitiativeRepository(db);
|
||||
const phaseRepo = new DrizzlePhaseRepository(db);
|
||||
const taskRepo = new DrizzleTaskRepository(db);
|
||||
|
||||
// Result maps
|
||||
const phasesMap = new Map<string, string>();
|
||||
const taskGroupsMap = new Map<string, string>();
|
||||
const tasksMap = new Map<string, string>();
|
||||
|
||||
// Collect all task dependencies to resolve after creation
|
||||
const pendingDependencies: Array<{ taskId: string; dependsOnNames: string[] }> = [];
|
||||
|
||||
// Create initiative
|
||||
const initiative = await initiativeRepo.create({
|
||||
name: fixture.name,
|
||||
status: 'active',
|
||||
});
|
||||
|
||||
// Create phases
|
||||
for (const phaseFixture of fixture.phases) {
|
||||
const phase = await phaseRepo.create({
|
||||
initiativeId: initiative.id,
|
||||
name: phaseFixture.name,
|
||||
status: 'pending',
|
||||
});
|
||||
phasesMap.set(phaseFixture.name, phase.id);
|
||||
|
||||
// Create task groups as parent detail tasks
|
||||
let taskOrder = 0;
|
||||
for (const groupFixture of phaseFixture.taskGroups) {
|
||||
// Create parent detail task
|
||||
const parentTask = await taskRepo.create({
|
||||
phaseId: phase.id,
|
||||
initiativeId: initiative.id,
|
||||
name: groupFixture.name,
|
||||
description: `Test task group: ${groupFixture.name}`,
|
||||
category: 'detail',
|
||||
type: 'auto',
|
||||
priority: 'medium',
|
||||
status: 'completed', // Detail tasks are completed once child tasks are created
|
||||
order: taskOrder++,
|
||||
});
|
||||
taskGroupsMap.set(groupFixture.name, parentTask.id);
|
||||
|
||||
// Create child tasks linked to parent
|
||||
let childOrder = 0;
|
||||
for (const taskFixture of groupFixture.tasks) {
|
||||
const task = await taskRepo.create({
|
||||
parentTaskId: parentTask.id,
|
||||
phaseId: phase.id,
|
||||
initiativeId: initiative.id,
|
||||
name: taskFixture.name,
|
||||
description: `Test task: ${taskFixture.name}`,
|
||||
category: taskFixture.category ?? 'execute',
|
||||
type: 'auto',
|
||||
priority: taskFixture.priority ?? 'medium',
|
||||
status: 'pending',
|
||||
order: childOrder++,
|
||||
});
|
||||
tasksMap.set(taskFixture.id, task.id);
|
||||
|
||||
// Collect dependencies to resolve later
|
||||
if (taskFixture.dependsOn && taskFixture.dependsOn.length > 0) {
|
||||
pendingDependencies.push({
|
||||
taskId: task.id,
|
||||
dependsOnNames: taskFixture.dependsOn,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve and insert task dependencies
|
||||
for (const { taskId, dependsOnNames } of pendingDependencies) {
|
||||
for (const depName of dependsOnNames) {
|
||||
const dependsOnTaskId = tasksMap.get(depName);
|
||||
if (!dependsOnTaskId) {
|
||||
throw new Error(
|
||||
`Dependency resolution failed: task "${depName}" not found in fixture`
|
||||
);
|
||||
}
|
||||
|
||||
// Insert into task_dependencies table
|
||||
await db.insert(taskDependencies).values({
|
||||
id: nanoid(),
|
||||
taskId,
|
||||
dependsOnTaskId,
|
||||
createdAt: new Date(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
initiativeId: initiative.id,
|
||||
phases: phasesMap,
|
||||
taskGroups: taskGroupsMap,
|
||||
tasks: tasksMap,
|
||||
};
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Convenience Fixtures
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Simple fixture: 1 initiative -> 1 phase -> 1 task group -> 3 tasks.
|
||||
*
|
||||
* Task dependency structure:
|
||||
* - Task A: no dependencies
|
||||
* - Task B: depends on Task A
|
||||
* - Task C: depends on Task A
|
||||
*/
|
||||
export const SIMPLE_FIXTURE: InitiativeFixture = {
|
||||
name: 'Simple Test Initiative',
|
||||
phases: [
|
||||
{
|
||||
name: 'Phase 1',
|
||||
taskGroups: [
|
||||
{
|
||||
name: 'Task Group 1',
|
||||
tasks: [
|
||||
{ id: 'Task A', name: 'Task A', priority: 'high' },
|
||||
{ id: 'Task B', name: 'Task B', priority: 'medium', dependsOn: ['Task A'] },
|
||||
{ id: 'Task C', name: 'Task C', priority: 'medium', dependsOn: ['Task A'] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
/**
|
||||
* Parallel fixture: 1 initiative -> 1 phase -> 2 task groups (each with 2 independent tasks).
|
||||
*
|
||||
* Task structure:
|
||||
* - Group A: Task X, Task Y (independent)
|
||||
* - Group B: Task P, Task Q (independent)
|
||||
*/
|
||||
export const PARALLEL_FIXTURE: InitiativeFixture = {
|
||||
name: 'Parallel Test Initiative',
|
||||
phases: [
|
||||
{
|
||||
name: 'Parallel Phase',
|
||||
taskGroups: [
|
||||
{
|
||||
name: 'Group A',
|
||||
tasks: [
|
||||
{ id: 'Task X', name: 'Task X', priority: 'high' },
|
||||
{ id: 'Task Y', name: 'Task Y', priority: 'medium' },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Group B',
|
||||
tasks: [
|
||||
{ id: 'Task P', name: 'Task P', priority: 'high' },
|
||||
{ id: 'Task Q', name: 'Task Q', priority: 'low' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
/**
|
||||
* Complex fixture: 1 initiative -> 2 phases -> 4 task groups with cross-group dependencies.
|
||||
*
|
||||
* Structure:
|
||||
* - Phase 1: Group 1 (Task 1A, 1B), Group 2 (Task 2A depends on 1A)
|
||||
* - Phase 2: Group 3 (Task 3A depends on 1B), Group 4 (Task 4A depends on 2A and 3A)
|
||||
*/
|
||||
export const COMPLEX_FIXTURE: InitiativeFixture = {
|
||||
name: 'Complex Test Initiative',
|
||||
phases: [
|
||||
{
|
||||
name: 'Phase 1',
|
||||
taskGroups: [
|
||||
{
|
||||
name: 'Group 1',
|
||||
tasks: [
|
||||
{ id: 'Task 1A', name: 'Task 1A', priority: 'high' },
|
||||
{ id: 'Task 1B', name: 'Task 1B', priority: 'medium' },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Group 2',
|
||||
tasks: [
|
||||
{ id: 'Task 2A', name: 'Task 2A', priority: 'high', dependsOn: ['Task 1A'] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Phase 2',
|
||||
taskGroups: [
|
||||
{
|
||||
name: 'Group 3',
|
||||
tasks: [
|
||||
{ id: 'Task 3A', name: 'Task 3A', priority: 'high', dependsOn: ['Task 1B'] },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'Group 4',
|
||||
tasks: [
|
||||
{
|
||||
id: 'Task 4A',
|
||||
name: 'Task 4A',
|
||||
priority: 'high',
|
||||
dependsOn: ['Task 2A', 'Task 3A'],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
35
apps/server/test/fixtures/todo-api/README.md
vendored
Normal file
35
apps/server/test/fixtures/todo-api/README.md
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# todo-api
|
||||
|
||||
A minimal zero-dependency in-memory todo list library for Node.js.
|
||||
|
||||
## API
|
||||
|
||||
```js
|
||||
import { TodoStore } from './src/todo.js';
|
||||
|
||||
const store = new TodoStore();
|
||||
|
||||
const id = store.add('buy milk'); // returns numeric id
|
||||
store.list(); // returns [{ id, text, done }]
|
||||
store.remove(id); // deletes item
|
||||
store.complete(id); // NOT IMPLEMENTED — marks item done
|
||||
```
|
||||
|
||||
## Status
|
||||
|
||||
The `complete(id)` method is **missing**. The test suite in `src/todo.test.js` covers it and currently fails:
|
||||
|
||||
```
|
||||
node --test src/todo.test.js
|
||||
# → TypeError: store.complete is not a function
|
||||
```
|
||||
|
||||
## Task
|
||||
|
||||
Implement `complete(id)` on `TodoStore` in `src/todo.js` so that it:
|
||||
|
||||
1. Finds the item with the given `id`.
|
||||
2. Sets `item.done = true`.
|
||||
3. Does not throw if `id` is not found (silent no-op).
|
||||
|
||||
All five tests in `src/todo.test.js` should pass after the fix.
|
||||
8
apps/server/test/fixtures/todo-api/package.json
vendored
Normal file
8
apps/server/test/fixtures/todo-api/package.json
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "todo-api",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "node --test src/todo.test.js"
|
||||
}
|
||||
}
|
||||
19
apps/server/test/fixtures/todo-api/src/todo.js
vendored
Normal file
19
apps/server/test/fixtures/todo-api/src/todo.js
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
export class TodoStore {
|
||||
#items = [];
|
||||
|
||||
add(text) {
|
||||
const id = Date.now();
|
||||
this.#items.push({ id, text, done: false });
|
||||
return id;
|
||||
}
|
||||
|
||||
list() {
|
||||
return [...this.#items];
|
||||
}
|
||||
|
||||
remove(id) {
|
||||
this.#items = this.#items.filter(i => i.id !== id);
|
||||
}
|
||||
|
||||
// complete(id) deliberately missing — implement me!
|
||||
}
|
||||
41
apps/server/test/fixtures/todo-api/src/todo.test.js
vendored
Normal file
41
apps/server/test/fixtures/todo-api/src/todo.test.js
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
import { test } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { TodoStore } from './todo.js';
|
||||
|
||||
test('add returns an id', () => {
|
||||
const store = new TodoStore();
|
||||
const id = store.add('buy milk');
|
||||
assert.ok(typeof id === 'number', 'id should be a number');
|
||||
});
|
||||
|
||||
test('list returns all items', () => {
|
||||
const store = new TodoStore();
|
||||
store.add('task one');
|
||||
store.add('task two');
|
||||
assert.equal(store.list().length, 2);
|
||||
});
|
||||
|
||||
test('remove deletes an item', () => {
|
||||
const store = new TodoStore();
|
||||
const id = store.add('delete me');
|
||||
store.remove(id);
|
||||
assert.equal(store.list().length, 0);
|
||||
});
|
||||
|
||||
test('complete marks item done', () => {
|
||||
const store = new TodoStore();
|
||||
const id = store.add('buy milk');
|
||||
store.complete(id);
|
||||
const item = store.list().find(i => i.id === id);
|
||||
assert.ok(item, 'item should still exist after completing');
|
||||
assert.equal(item.done, true, 'item.done should be true after complete()');
|
||||
});
|
||||
|
||||
test('complete does not affect other items', () => {
|
||||
const store = new TodoStore();
|
||||
const id1 = store.add('task one');
|
||||
const id2 = store.add('task two');
|
||||
store.complete(id1);
|
||||
const item2 = store.list().find(i => i.id === id2);
|
||||
assert.equal(item2.done, false, 'other items should remain undone');
|
||||
});
|
||||
394
apps/server/test/harness.test.ts
Normal file
394
apps/server/test/harness.test.ts
Normal file
@@ -0,0 +1,394 @@
|
||||
/**
|
||||
* Tests for Test Harness
|
||||
*
|
||||
* Proves that the test harness enables E2E testing scenarios.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import {
|
||||
createTestHarness,
|
||||
SIMPLE_FIXTURE,
|
||||
PARALLEL_FIXTURE,
|
||||
COMPLEX_FIXTURE,
|
||||
type TestHarness,
|
||||
} from './index.js';
|
||||
import { taskDependencies } from '../db/schema.js';
|
||||
import { eq } from 'drizzle-orm';
|
||||
|
||||
describe('TestHarness', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
describe('createTestHarness', () => {
|
||||
it('returns all components', () => {
|
||||
expect(harness.db).toBeDefined();
|
||||
expect(harness.eventBus).toBeDefined();
|
||||
expect(harness.agentManager).toBeDefined();
|
||||
expect(harness.worktreeManager).toBeDefined();
|
||||
expect(harness.dispatchManager).toBeDefined();
|
||||
expect(harness.coordinationManager).toBeDefined();
|
||||
expect(harness.taskRepository).toBeDefined();
|
||||
expect(harness.messageRepository).toBeDefined();
|
||||
expect(harness.agentRepository).toBeDefined();
|
||||
});
|
||||
|
||||
it('provides helper methods', () => {
|
||||
expect(typeof harness.seedFixture).toBe('function');
|
||||
expect(typeof harness.setAgentScenario).toBe('function');
|
||||
expect(typeof harness.setAgentQuestion).toBe('function');
|
||||
expect(typeof harness.setAgentQuestions).toBe('function');
|
||||
expect(typeof harness.getEventsByType).toBe('function');
|
||||
expect(typeof harness.clearEvents).toBe('function');
|
||||
expect(typeof harness.cleanup).toBe('function');
|
||||
});
|
||||
});
|
||||
|
||||
describe('setAgentQuestion convenience helper', () => {
|
||||
it('wraps single question in array format', async () => {
|
||||
vi.useFakeTimers();
|
||||
|
||||
// Set single question using convenience method
|
||||
harness.setAgentQuestion('test-agent', 'q1', 'Which option?', [
|
||||
{ label: 'Option A', description: 'First option' },
|
||||
{ label: 'Option B', description: 'Second option' },
|
||||
]);
|
||||
|
||||
// Spawn agent with that scenario
|
||||
const agent = await harness.agentManager.spawn({
|
||||
name: 'test-agent',
|
||||
taskId: 'task-1',
|
||||
prompt: 'test',
|
||||
});
|
||||
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Verify questions array format
|
||||
const pending = await harness.getPendingQuestions(agent.id);
|
||||
expect(pending).not.toBeNull();
|
||||
expect(pending?.questions).toHaveLength(1);
|
||||
expect(pending?.questions[0].id).toBe('q1');
|
||||
expect(pending?.questions[0].question).toBe('Which option?');
|
||||
expect(pending?.questions[0].options).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('seedFixture', () => {
|
||||
it('creates task hierarchy from SIMPLE_FIXTURE', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
|
||||
// Check initiative created
|
||||
expect(seeded.initiativeId).toBeDefined();
|
||||
|
||||
// Check phases created
|
||||
expect(seeded.phases.size).toBe(1);
|
||||
expect(seeded.phases.has('Phase 1')).toBe(true);
|
||||
|
||||
// Check task groups created
|
||||
expect(seeded.taskGroups.size).toBe(1);
|
||||
expect(seeded.taskGroups.has('Task Group 1')).toBe(true);
|
||||
|
||||
// Check tasks created
|
||||
expect(seeded.tasks.size).toBe(3);
|
||||
expect(seeded.tasks.has('Task A')).toBe(true);
|
||||
expect(seeded.tasks.has('Task B')).toBe(true);
|
||||
expect(seeded.tasks.has('Task C')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns correct IDs that exist in database', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
|
||||
// Verify task exists in database
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
const taskA = await harness.taskRepository.findById(taskAId);
|
||||
expect(taskA).not.toBeNull();
|
||||
expect(taskA?.name).toBe('Task A');
|
||||
});
|
||||
|
||||
it('creates PARALLEL_FIXTURE correctly', async () => {
|
||||
const seeded = await harness.seedFixture(PARALLEL_FIXTURE);
|
||||
|
||||
expect(seeded.phases.size).toBe(1);
|
||||
expect(seeded.taskGroups.size).toBe(2);
|
||||
expect(seeded.tasks.size).toBe(4);
|
||||
expect(seeded.tasks.has('Task X')).toBe(true);
|
||||
expect(seeded.tasks.has('Task Q')).toBe(true);
|
||||
});
|
||||
|
||||
it('creates COMPLEX_FIXTURE correctly', async () => {
|
||||
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
|
||||
|
||||
expect(seeded.phases.size).toBe(2);
|
||||
expect(seeded.taskGroups.size).toBe(4);
|
||||
expect(seeded.tasks.size).toBe(5);
|
||||
});
|
||||
});
|
||||
|
||||
describe('task dependencies', () => {
|
||||
it('resolves dependencies correctly (dependsOn contains actual task IDs)', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
const taskBId = seeded.tasks.get('Task B')!;
|
||||
|
||||
// Query task_dependencies table directly
|
||||
const deps = await harness.db
|
||||
.select()
|
||||
.from(taskDependencies)
|
||||
.where(eq(taskDependencies.taskId, taskBId));
|
||||
|
||||
expect(deps.length).toBe(1);
|
||||
expect(deps[0].dependsOnTaskId).toBe(taskAId);
|
||||
});
|
||||
|
||||
it('creates multiple dependencies for a task', async () => {
|
||||
const seeded = await harness.seedFixture(COMPLEX_FIXTURE);
|
||||
|
||||
// Task 4A depends on both Task 2A and Task 3A
|
||||
const task4AId = seeded.tasks.get('Task 4A')!;
|
||||
const task2AId = seeded.tasks.get('Task 2A')!;
|
||||
const task3AId = seeded.tasks.get('Task 3A')!;
|
||||
|
||||
const deps = await harness.db
|
||||
.select()
|
||||
.from(taskDependencies)
|
||||
.where(eq(taskDependencies.taskId, task4AId));
|
||||
|
||||
expect(deps.length).toBe(2);
|
||||
const depIds = deps.map((d) => d.dependsOnTaskId);
|
||||
expect(depIds).toContain(task2AId);
|
||||
expect(depIds).toContain(task3AId);
|
||||
});
|
||||
});
|
||||
|
||||
describe('event capture', () => {
|
||||
it('captures events via getEventsByType', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Queue a task (emits task:queued event)
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
const events = harness.getEventsByType('task:queued');
|
||||
expect(events.length).toBe(1);
|
||||
expect((events[0].payload as { taskId: string }).taskId).toBe(taskAId);
|
||||
});
|
||||
|
||||
it('clears events via clearEvents', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
expect(harness.getEventsByType('task:queued').length).toBe(1);
|
||||
|
||||
harness.clearEvents();
|
||||
expect(harness.getEventsByType('task:queued').length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('dispatch flow', () => {
|
||||
it('dispatchManager.queue() + dispatchNext() uses MockAgentManager', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Note: DispatchManager.dispatchNext() requires an idle agent in the pool
|
||||
// before it will spawn a new agent. Pre-seed an idle agent.
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
// Wait for agent to complete and become idle
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Queue the task
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Clear events from queue and agent spawn
|
||||
harness.clearEvents();
|
||||
|
||||
// Dispatch the task
|
||||
const result = await harness.dispatchManager.dispatchNext();
|
||||
|
||||
// Advance timers to trigger mock agent completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.taskId).toBe(taskAId);
|
||||
expect(result.agentId).toBeDefined();
|
||||
|
||||
// Should have emitted task:dispatched
|
||||
const dispatchedEvents = harness.getEventsByType('task:dispatched');
|
||||
expect(dispatchedEvents.length).toBe(1);
|
||||
});
|
||||
|
||||
it('returns failure when no tasks are queued', async () => {
|
||||
const result = await harness.dispatchManager.dispatchNext();
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.reason).toBe('No dispatchable tasks');
|
||||
});
|
||||
|
||||
it('returns failure when no idle agents available', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Queue the task but don't pre-seed any agents
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Dispatch without any agents in pool
|
||||
const result = await harness.dispatchManager.dispatchNext();
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.reason).toBe('No available agents');
|
||||
});
|
||||
});
|
||||
|
||||
describe('agent completion triggers events', () => {
|
||||
it('agent completion emits agent:stopped event', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed an idle agent (required by DispatchManager)
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
await harness.dispatchManager.dispatchNext();
|
||||
|
||||
// Should have agent:spawned
|
||||
const spawnedEvents = harness.getEventsByType('agent:spawned');
|
||||
expect(spawnedEvents.length).toBe(1);
|
||||
|
||||
// Advance timers to trigger completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Should have agent:stopped
|
||||
const stoppedEvents = harness.getEventsByType('agent:stopped');
|
||||
expect(stoppedEvents.length).toBe(1);
|
||||
});
|
||||
|
||||
it('custom scenario affects agent behavior', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed an idle agent (required by DispatchManager)
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Set error scenario for the agent that will be spawned
|
||||
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
|
||||
status: 'error',
|
||||
delay: 0,
|
||||
error: 'Test crash',
|
||||
});
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
await harness.dispatchManager.dispatchNext();
|
||||
|
||||
// Advance timers
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Should have agent:crashed
|
||||
const crashedEvents = harness.getEventsByType('agent:crashed');
|
||||
expect(crashedEvents.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('full dispatch -> complete -> merge flow', () => {
|
||||
it('works end-to-end', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed an idle agent (required by DispatchManager)
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await harness.advanceTimers();
|
||||
harness.clearEvents();
|
||||
|
||||
// Step 1: Queue task
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Step 2: Dispatch task
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
expect(dispatchResult.success).toBe(true);
|
||||
|
||||
// Advance timers for agent completion
|
||||
await harness.advanceTimers();
|
||||
|
||||
// Clear events for cleaner verification
|
||||
harness.clearEvents();
|
||||
|
||||
// Step 3: Complete task
|
||||
await harness.dispatchManager.completeTask(taskAId);
|
||||
|
||||
// Verify task:completed event
|
||||
const completedEvents = harness.getEventsByType('task:completed');
|
||||
expect(completedEvents.length).toBe(1);
|
||||
expect((completedEvents[0].payload as { taskId: string }).taskId).toBe(taskAId);
|
||||
|
||||
// Step 4: Verify task status in database
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('completed');
|
||||
});
|
||||
});
|
||||
|
||||
describe('MockWorktreeManager', () => {
|
||||
it('creates fake worktrees', async () => {
|
||||
const worktree = await harness.worktreeManager.create('wt-1', 'feature-1');
|
||||
|
||||
expect(worktree.id).toBe('wt-1');
|
||||
expect(worktree.branch).toBe('feature-1');
|
||||
expect(worktree.path).toContain('wt-1');
|
||||
});
|
||||
|
||||
it('merge returns success by default', async () => {
|
||||
await harness.worktreeManager.create('wt-1', 'feature-1');
|
||||
const result = await harness.worktreeManager.merge('wt-1', 'main');
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it('allows setting custom merge results', async () => {
|
||||
await harness.worktreeManager.create('wt-1', 'feature-1');
|
||||
harness.worktreeManager.setMergeResult('wt-1', {
|
||||
success: false,
|
||||
conflicts: ['file1.ts', 'file2.ts'],
|
||||
message: 'Merge conflict',
|
||||
});
|
||||
|
||||
const result = await harness.worktreeManager.merge('wt-1', 'main');
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.conflicts).toEqual(['file1.ts', 'file2.ts']);
|
||||
});
|
||||
});
|
||||
});
|
||||
636
apps/server/test/harness.ts
Normal file
636
apps/server/test/harness.ts
Normal file
@@ -0,0 +1,636 @@
|
||||
/**
|
||||
* Test Harness for E2E Testing
|
||||
*
|
||||
* Wires up the full system with mocks for E2E testing.
|
||||
* Uses real managers (DispatchManager, CoordinationManager) with
|
||||
* MockAgentManager and MockWorktreeManager for isolation.
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'crypto';
|
||||
import { vi } from 'vitest';
|
||||
import type { DrizzleDatabase } from '../db/index.js';
|
||||
import type { EventBus, DomainEvent } from '../events/types.js';
|
||||
import { EventEmitterBus } from '../events/bus.js';
|
||||
import type { AgentManager } from '../agent/types.js';
|
||||
import { MockAgentManager, type MockAgentScenario } from '../agent/mock-manager.js';
|
||||
import type { PendingQuestions, QuestionItem } from '../agent/types.js';
|
||||
import type { WorktreeManager, Worktree, WorktreeDiff, MergeResult } from '../git/types.js';
|
||||
import type { DispatchManager, PhaseDispatchManager } from '../dispatch/types.js';
|
||||
import { DefaultDispatchManager } from '../dispatch/manager.js';
|
||||
import { DefaultPhaseDispatchManager } from '../dispatch/phase-manager.js';
|
||||
import type { CoordinationManager } from '../coordination/types.js';
|
||||
import { DefaultCoordinationManager } from '../coordination/manager.js';
|
||||
import type { TaskRepository } from '../db/repositories/task-repository.js';
|
||||
import type { MessageRepository } from '../db/repositories/message-repository.js';
|
||||
import type { AgentRepository } from '../db/repositories/agent-repository.js';
|
||||
import type { InitiativeRepository } from '../db/repositories/initiative-repository.js';
|
||||
import type { PhaseRepository } from '../db/repositories/phase-repository.js';
|
||||
import type { Initiative, Phase, Task } from '../db/schema.js';
|
||||
import { createTestDatabase } from '../db/repositories/drizzle/test-helpers.js';
|
||||
import { createRepositories } from '../container.js';
|
||||
import {
|
||||
seedFixture,
|
||||
type InitiativeFixture,
|
||||
type SeededFixture,
|
||||
} from './fixtures.js';
|
||||
import { appRouter, createCallerFactory } from '../trpc/router.js';
|
||||
import { createContext, type TRPCContext } from '../trpc/context.js';
|
||||
|
||||
// =============================================================================
|
||||
// MockWorktreeManager
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Simple in-memory WorktreeManager for testing.
|
||||
* Creates fake worktrees without actual git operations.
|
||||
*/
|
||||
export class MockWorktreeManager implements WorktreeManager {
|
||||
private worktrees: Map<string, Worktree> = new Map();
|
||||
private mergeResults: Map<string, MergeResult> = new Map();
|
||||
|
||||
/**
|
||||
* Set a custom merge result for a specific worktree.
|
||||
* Used to test conflict scenarios.
|
||||
*/
|
||||
setMergeResult(worktreeId: string, result: MergeResult): void {
|
||||
this.mergeResults.set(worktreeId, result);
|
||||
}
|
||||
|
||||
async create(id: string, branch: string, baseBranch?: string): Promise<Worktree> {
|
||||
const worktree: Worktree = {
|
||||
id,
|
||||
branch,
|
||||
path: `/tmp/test-worktrees/${id}`,
|
||||
isMainWorktree: false,
|
||||
};
|
||||
this.worktrees.set(id, worktree);
|
||||
return worktree;
|
||||
}
|
||||
|
||||
async remove(id: string): Promise<void> {
|
||||
if (!this.worktrees.has(id)) {
|
||||
throw new Error(`Worktree not found: ${id}`);
|
||||
}
|
||||
this.worktrees.delete(id);
|
||||
this.mergeResults.delete(id);
|
||||
}
|
||||
|
||||
async list(): Promise<Worktree[]> {
|
||||
return Array.from(this.worktrees.values());
|
||||
}
|
||||
|
||||
async get(id: string): Promise<Worktree | null> {
|
||||
return this.worktrees.get(id) ?? null;
|
||||
}
|
||||
|
||||
async diff(id: string): Promise<WorktreeDiff> {
|
||||
if (!this.worktrees.has(id)) {
|
||||
throw new Error(`Worktree not found: ${id}`);
|
||||
}
|
||||
return {
|
||||
files: [],
|
||||
summary: 'No changes (mock)',
|
||||
};
|
||||
}
|
||||
|
||||
async merge(id: string, targetBranch: string): Promise<MergeResult> {
|
||||
if (!this.worktrees.has(id)) {
|
||||
throw new Error(`Worktree not found: ${id}`);
|
||||
}
|
||||
|
||||
// Return custom result if set, otherwise success
|
||||
const customResult = this.mergeResults.get(id);
|
||||
if (customResult) {
|
||||
return customResult;
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Merged ${id} into ${targetBranch} (mock)`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all worktrees.
|
||||
* Useful for test cleanup.
|
||||
*/
|
||||
clear(): void {
|
||||
this.worktrees.clear();
|
||||
this.mergeResults.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// CapturingEventBus
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* EventBus wrapper that captures all emitted events.
|
||||
* Extends EventEmitterBus with event capture functionality.
|
||||
*/
|
||||
export class CapturingEventBus extends EventEmitterBus {
|
||||
/** All emitted events */
|
||||
emittedEvents: DomainEvent[] = [];
|
||||
|
||||
emit<T extends DomainEvent>(event: T): void {
|
||||
this.emittedEvents.push(event);
|
||||
super.emit(event);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get events by type.
|
||||
*/
|
||||
getEventsByType(type: string): DomainEvent[] {
|
||||
return this.emittedEvents.filter((e) => e.type === type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear captured events.
|
||||
*/
|
||||
clearEvents(): void {
|
||||
this.emittedEvents = [];
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// tRPC Caller Type
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Create caller factory for the app router.
|
||||
*/
|
||||
const createCaller = createCallerFactory(appRouter);
|
||||
|
||||
/**
|
||||
* Type for the tRPC caller.
|
||||
*/
|
||||
export type TRPCCaller = ReturnType<typeof createCaller>;
|
||||
|
||||
// =============================================================================
|
||||
// TestHarness Interface
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Test harness for E2E testing.
|
||||
* Provides access to all system components and helper methods.
|
||||
*/
|
||||
export interface TestHarness {
|
||||
// Core components
|
||||
/** In-memory SQLite database */
|
||||
db: DrizzleDatabase;
|
||||
/** Event bus with event capture */
|
||||
eventBus: CapturingEventBus;
|
||||
/** Mock agent manager */
|
||||
agentManager: MockAgentManager;
|
||||
/** Alias for agentManager - used in tests for clarity */
|
||||
mockAgentManager: MockAgentManager;
|
||||
/** Mock worktree manager */
|
||||
worktreeManager: MockWorktreeManager;
|
||||
/** Real dispatch manager wired to mocks */
|
||||
dispatchManager: DispatchManager;
|
||||
/** Real phase dispatch manager wired to phaseRepository */
|
||||
phaseDispatchManager: PhaseDispatchManager;
|
||||
/** Real coordination manager wired to mocks */
|
||||
coordinationManager: CoordinationManager;
|
||||
|
||||
// Repositories
|
||||
/** Task repository */
|
||||
taskRepository: TaskRepository;
|
||||
/** Message repository */
|
||||
messageRepository: MessageRepository;
|
||||
/** Agent repository */
|
||||
agentRepository: AgentRepository;
|
||||
/** Initiative repository */
|
||||
initiativeRepository: InitiativeRepository;
|
||||
/** Phase repository */
|
||||
phaseRepository: PhaseRepository;
|
||||
|
||||
// tRPC Caller
|
||||
/** tRPC caller for direct procedure calls */
|
||||
caller: TRPCCaller;
|
||||
|
||||
// Helpers
|
||||
/**
|
||||
* Seed a fixture into the database.
|
||||
*/
|
||||
seedFixture(fixture: InitiativeFixture): Promise<SeededFixture>;
|
||||
|
||||
/**
|
||||
* Set scenario for a specific agent name.
|
||||
*/
|
||||
setAgentScenario(agentName: string, scenario: MockAgentScenario): void;
|
||||
|
||||
/**
|
||||
* Convenience: Set agent to complete with done status.
|
||||
*/
|
||||
setAgentDone(agentName: string, result?: string): void;
|
||||
|
||||
/**
|
||||
* Convenience: Set agent to ask questions (array form).
|
||||
*/
|
||||
setAgentQuestions(
|
||||
agentName: string,
|
||||
questions: QuestionItem[]
|
||||
): void;
|
||||
|
||||
/**
|
||||
* Convenience: Set agent to ask a single question.
|
||||
* Wraps the question in an array internally.
|
||||
*/
|
||||
setAgentQuestion(
|
||||
agentName: string,
|
||||
questionId: string,
|
||||
question: string,
|
||||
options?: Array<{ label: string; description?: string }>
|
||||
): void;
|
||||
|
||||
/**
|
||||
* Convenience: Set agent to fail with unrecoverable error.
|
||||
*/
|
||||
setAgentError(agentName: string, error: string): void;
|
||||
|
||||
/**
|
||||
* Get pending questions for an agent.
|
||||
*/
|
||||
getPendingQuestions(agentId: string): Promise<PendingQuestions | null>;
|
||||
|
||||
/**
|
||||
* Get events by type.
|
||||
*/
|
||||
getEventsByType(type: string): DomainEvent[];
|
||||
|
||||
/**
|
||||
* Get emitted events by type (alias for getEventsByType).
|
||||
*/
|
||||
getEmittedEvents(type: string): DomainEvent[];
|
||||
|
||||
/**
|
||||
* Clear all captured events.
|
||||
*/
|
||||
clearEvents(): void;
|
||||
|
||||
/**
|
||||
* Clean up all resources.
|
||||
*/
|
||||
cleanup(): void;
|
||||
|
||||
/**
|
||||
* Advance fake timers (wrapper for vi.runAllTimersAsync).
|
||||
* Only works when vi.useFakeTimers() is active.
|
||||
*/
|
||||
advanceTimers(): Promise<void>;
|
||||
|
||||
/**
|
||||
* Run a test body with fake timers enabled.
|
||||
* Activates fake timers before the callback and restores real timers after,
|
||||
* even if the callback throws.
|
||||
*/
|
||||
withFakeTimers(fn: () => Promise<void>): Promise<void>;
|
||||
|
||||
// ==========================================================================
|
||||
// Architect Mode Helpers
|
||||
// ==========================================================================
|
||||
|
||||
/**
|
||||
* Set up scenario where architect completes discussion.
|
||||
*/
|
||||
setArchitectDiscussComplete(
|
||||
agentName: string,
|
||||
_decisions: unknown[],
|
||||
summary: string
|
||||
): void;
|
||||
|
||||
/**
|
||||
* Set up scenario where architect needs more questions in discuss mode.
|
||||
*/
|
||||
setArchitectDiscussQuestions(
|
||||
agentName: string,
|
||||
questions: QuestionItem[]
|
||||
): void;
|
||||
|
||||
/**
|
||||
* Set up scenario where architect completes plan.
|
||||
*/
|
||||
setArchitectPlanComplete(
|
||||
agentName: string,
|
||||
_phases: unknown[]
|
||||
): void;
|
||||
|
||||
/**
|
||||
* Set up scenario where architect completes detail.
|
||||
*/
|
||||
setArchitectDetailComplete(
|
||||
agentName: string,
|
||||
_tasks: unknown[]
|
||||
): void;
|
||||
|
||||
/**
|
||||
* Set up scenario where architect needs questions in detail mode.
|
||||
*/
|
||||
setArchitectDetailQuestions(
|
||||
agentName: string,
|
||||
questions: QuestionItem[]
|
||||
): void;
|
||||
|
||||
// ==========================================================================
|
||||
// Initiative/Phase/Plan Convenience Helpers
|
||||
// ==========================================================================
|
||||
|
||||
/**
|
||||
* Get initiative by ID through tRPC.
|
||||
*/
|
||||
getInitiative(id: string): Promise<Initiative | null>;
|
||||
|
||||
/**
|
||||
* Get phases for initiative through tRPC.
|
||||
*/
|
||||
getPhases(initiativeId: string): Promise<Phase[]>;
|
||||
|
||||
/**
|
||||
* Create initiative through tRPC.
|
||||
*/
|
||||
createInitiative(name: string): Promise<Initiative>;
|
||||
|
||||
/**
|
||||
* Create phases from plan output through tRPC.
|
||||
*/
|
||||
createPhasesFromPlan(
|
||||
initiativeId: string,
|
||||
phases: Array<{ name: string }>
|
||||
): Promise<Phase[]>;
|
||||
|
||||
/**
|
||||
* Create a detail task through tRPC (replaces createPlan).
|
||||
*/
|
||||
createDetailTask(
|
||||
phaseId: string,
|
||||
name: string,
|
||||
description?: string
|
||||
): Promise<Task>;
|
||||
|
||||
/**
|
||||
* Get child tasks of a parent task through tRPC.
|
||||
*/
|
||||
getChildTasks(parentTaskId: string): Promise<Task[]>;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// createTestHarness Factory
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Create a fully wired test harness for E2E testing.
|
||||
*
|
||||
* Wires:
|
||||
* - In-memory SQLite database
|
||||
* - CapturingEventBus (captures all events)
|
||||
* - MockAgentManager (simulates agent behavior)
|
||||
* - MockWorktreeManager (fake worktrees)
|
||||
* - Real DefaultDispatchManager (with mock agent manager)
|
||||
* - Real DefaultCoordinationManager (with mock worktree manager)
|
||||
* - All repositories (Drizzle implementations)
|
||||
* - tRPC caller with full context
|
||||
*/
|
||||
export function createTestHarness(): TestHarness {
|
||||
// Create database
|
||||
const db = createTestDatabase();
|
||||
|
||||
// Create event bus with capture
|
||||
const eventBus = new CapturingEventBus();
|
||||
|
||||
// Create mock managers
|
||||
const agentManager = new MockAgentManager({ eventBus });
|
||||
const worktreeManager = new MockWorktreeManager();
|
||||
|
||||
// Create repositories
|
||||
const repos = createRepositories(db);
|
||||
const { taskRepository, messageRepository, agentRepository, initiativeRepository, phaseRepository } = repos;
|
||||
|
||||
// Create real managers wired to mocks
|
||||
const dispatchManager = new DefaultDispatchManager(
|
||||
taskRepository,
|
||||
messageRepository,
|
||||
agentManager,
|
||||
eventBus
|
||||
);
|
||||
|
||||
const phaseDispatchManager = new DefaultPhaseDispatchManager(
|
||||
phaseRepository,
|
||||
taskRepository,
|
||||
dispatchManager,
|
||||
eventBus
|
||||
);
|
||||
|
||||
const coordinationManager = new DefaultCoordinationManager(
|
||||
worktreeManager,
|
||||
taskRepository,
|
||||
agentRepository,
|
||||
messageRepository,
|
||||
eventBus
|
||||
);
|
||||
|
||||
// Create tRPC context with all dependencies
|
||||
const ctx: TRPCContext = createContext({
|
||||
eventBus,
|
||||
serverStartedAt: new Date(),
|
||||
processCount: 0,
|
||||
agentManager,
|
||||
taskRepository,
|
||||
messageRepository,
|
||||
dispatchManager,
|
||||
phaseDispatchManager,
|
||||
coordinationManager,
|
||||
initiativeRepository,
|
||||
phaseRepository,
|
||||
});
|
||||
|
||||
// Create tRPC caller
|
||||
const caller = createCaller(ctx);
|
||||
|
||||
// Build harness
|
||||
const harness: TestHarness = {
|
||||
// Core components
|
||||
db,
|
||||
eventBus,
|
||||
agentManager,
|
||||
mockAgentManager: agentManager, // Alias for clarity in tests
|
||||
worktreeManager,
|
||||
dispatchManager,
|
||||
phaseDispatchManager,
|
||||
coordinationManager,
|
||||
|
||||
// Repositories
|
||||
taskRepository,
|
||||
messageRepository,
|
||||
agentRepository,
|
||||
initiativeRepository,
|
||||
phaseRepository,
|
||||
|
||||
// tRPC Caller
|
||||
caller,
|
||||
|
||||
// Helpers
|
||||
seedFixture: (fixture: InitiativeFixture) => seedFixture(db, fixture),
|
||||
|
||||
setAgentScenario: (agentName: string, scenario: MockAgentScenario) => {
|
||||
agentManager.setScenario(agentName, scenario);
|
||||
},
|
||||
|
||||
setAgentDone: (agentName: string, result?: string) => {
|
||||
agentManager.setScenario(agentName, { status: 'done', result });
|
||||
},
|
||||
|
||||
setAgentQuestions: (
|
||||
agentName: string,
|
||||
questions: QuestionItem[]
|
||||
) => {
|
||||
agentManager.setScenario(agentName, { status: 'questions', questions });
|
||||
},
|
||||
|
||||
setAgentQuestion: (
|
||||
agentName: string,
|
||||
questionId: string,
|
||||
question: string,
|
||||
options?: Array<{ label: string; description?: string }>
|
||||
) => {
|
||||
agentManager.setScenario(agentName, {
|
||||
status: 'questions',
|
||||
questions: [{ id: questionId, question, options }],
|
||||
});
|
||||
},
|
||||
|
||||
setAgentError: (agentName: string, error: string) => {
|
||||
agentManager.setScenario(agentName, { status: 'error', error });
|
||||
},
|
||||
|
||||
getPendingQuestions: (agentId: string) => agentManager.getPendingQuestions(agentId),
|
||||
|
||||
getEventsByType: (type: string) => eventBus.getEventsByType(type),
|
||||
|
||||
getEmittedEvents: (type: string) => eventBus.getEventsByType(type),
|
||||
|
||||
clearEvents: () => eventBus.clearEvents(),
|
||||
|
||||
cleanup: () => {
|
||||
agentManager.clear();
|
||||
worktreeManager.clear();
|
||||
eventBus.clearEvents();
|
||||
},
|
||||
|
||||
// Timer helper - requires vi.useFakeTimers() to be active
|
||||
advanceTimers: async () => { await vi.runAllTimersAsync(); },
|
||||
|
||||
withFakeTimers: async (fn: () => Promise<void>) => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
await fn();
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
},
|
||||
|
||||
// ========================================================================
|
||||
// Architect Mode Helpers
|
||||
// ========================================================================
|
||||
|
||||
setArchitectDiscussComplete: (
|
||||
agentName: string,
|
||||
_decisions: unknown[],
|
||||
summary: string
|
||||
) => {
|
||||
agentManager.setScenario(agentName, {
|
||||
status: 'done',
|
||||
result: summary,
|
||||
delay: 0,
|
||||
});
|
||||
},
|
||||
|
||||
setArchitectDiscussQuestions: (
|
||||
agentName: string,
|
||||
questions: QuestionItem[]
|
||||
) => {
|
||||
agentManager.setScenario(agentName, {
|
||||
status: 'questions',
|
||||
questions,
|
||||
delay: 0,
|
||||
});
|
||||
},
|
||||
|
||||
setArchitectPlanComplete: (
|
||||
agentName: string,
|
||||
_phases: unknown[]
|
||||
) => {
|
||||
agentManager.setScenario(agentName, {
|
||||
status: 'done',
|
||||
result: 'Plan complete',
|
||||
delay: 0,
|
||||
});
|
||||
},
|
||||
|
||||
setArchitectDetailComplete: (
|
||||
agentName: string,
|
||||
_tasks: unknown[]
|
||||
) => {
|
||||
agentManager.setScenario(agentName, {
|
||||
status: 'done',
|
||||
result: 'Detail complete',
|
||||
delay: 0,
|
||||
});
|
||||
},
|
||||
|
||||
setArchitectDetailQuestions: (
|
||||
agentName: string,
|
||||
questions: QuestionItem[]
|
||||
) => {
|
||||
agentManager.setScenario(agentName, {
|
||||
status: 'questions',
|
||||
questions,
|
||||
delay: 0,
|
||||
});
|
||||
},
|
||||
|
||||
// ========================================================================
|
||||
// Initiative/Phase/Plan Convenience Helpers
|
||||
// ========================================================================
|
||||
|
||||
getInitiative: async (id: string) => {
|
||||
try {
|
||||
return await caller.getInitiative({ id });
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
},
|
||||
|
||||
getPhases: (initiativeId: string) => {
|
||||
return caller.listPhases({ initiativeId });
|
||||
},
|
||||
|
||||
createInitiative: (name: string) => {
|
||||
return caller.createInitiative({ name });
|
||||
},
|
||||
|
||||
createPhasesFromPlan: (
|
||||
initiativeId: string,
|
||||
phases: Array<{ name: string }>
|
||||
) => {
|
||||
return caller.createPhasesFromPlan({ initiativeId, phases });
|
||||
},
|
||||
|
||||
createDetailTask: async (phaseId: string, name: string, description?: string) => {
|
||||
return caller.createPhaseTask({
|
||||
phaseId,
|
||||
name,
|
||||
description,
|
||||
category: 'detail',
|
||||
type: 'auto',
|
||||
requiresApproval: true,
|
||||
});
|
||||
},
|
||||
|
||||
getChildTasks: (parentTaskId: string) => {
|
||||
return caller.listTasks({ parentTaskId });
|
||||
},
|
||||
};
|
||||
|
||||
return harness;
|
||||
}
|
||||
27
apps/server/test/index.ts
Normal file
27
apps/server/test/index.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Test Module
|
||||
*
|
||||
* Provides test harness and fixtures for E2E testing.
|
||||
*/
|
||||
|
||||
// Fixture helpers
|
||||
export {
|
||||
seedFixture,
|
||||
type TaskFixture,
|
||||
type TaskGroupFixture,
|
||||
type PhaseFixture,
|
||||
type InitiativeFixture,
|
||||
type SeededFixture,
|
||||
SIMPLE_FIXTURE,
|
||||
PARALLEL_FIXTURE,
|
||||
COMPLEX_FIXTURE,
|
||||
} from './fixtures.js';
|
||||
|
||||
// Test harness
|
||||
export {
|
||||
createTestHarness,
|
||||
MockWorktreeManager,
|
||||
CapturingEventBus,
|
||||
type TestHarness,
|
||||
type TRPCCaller,
|
||||
} from './harness.js';
|
||||
203
apps/server/test/integration/agent-workdir-verification.test.ts
Normal file
203
apps/server/test/integration/agent-workdir-verification.test.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
/**
|
||||
* Agent Working Directory Verification Tests
|
||||
*
|
||||
* Tests that verify agents actually run in their intended working directories.
|
||||
* These tests use simple shell commands to prove the agent execution location.
|
||||
*
|
||||
* IMPORTANT: These tests spawn real CLI processes and may incur API costs.
|
||||
* They are SKIPPED by default to prevent accidental charges.
|
||||
*
|
||||
* To run these tests:
|
||||
* ```bash
|
||||
* REAL_WORKDIR_TESTS=1 npm test -- src/test/integration/agent-workdir-verification.test.ts --test-timeout=120000
|
||||
* ```
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { mkdtemp, rm, readFile } from 'node:fs/promises';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { MultiProviderAgentManager } from '../../agent/manager.js';
|
||||
import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
|
||||
import {
|
||||
DrizzleAgentRepository,
|
||||
DrizzleProjectRepository,
|
||||
DrizzleAccountRepository,
|
||||
DrizzleInitiativeRepository,
|
||||
} from '../../db/repositories/drizzle/index.js';
|
||||
import { EventEmitterBus } from '../../events/bus.js';
|
||||
|
||||
const SHOULD_SKIP = !process.env.REAL_WORKDIR_TESTS;
|
||||
const TEST_TIMEOUT = 60000;
|
||||
|
||||
describe.skipIf(SHOULD_SKIP)('Agent Working Directory Verification', () => {
|
||||
let tempDir: string;
|
||||
let agentManager: MultiProviderAgentManager;
|
||||
let agentRepository: DrizzleAgentRepository;
|
||||
|
||||
beforeAll(async () => {
|
||||
if (SHOULD_SKIP) return;
|
||||
|
||||
console.log('\n=== Running Agent Working Directory Tests ===');
|
||||
console.log('These tests verify agents run in correct working directories.\n');
|
||||
|
||||
// Create temp directory for test workspace
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'cw-workdir-test-'));
|
||||
|
||||
// Set up test database and repositories
|
||||
const db = await createTestDatabase();
|
||||
const eventBus = new EventEmitterBus();
|
||||
|
||||
agentRepository = new DrizzleAgentRepository(db);
|
||||
const projectRepository = new DrizzleProjectRepository(db);
|
||||
const accountRepository = new DrizzleAccountRepository(db);
|
||||
|
||||
agentManager = new MultiProviderAgentManager(
|
||||
agentRepository,
|
||||
tempDir,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
eventBus,
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (SHOULD_SKIP || !tempDir) return;
|
||||
try {
|
||||
await rm(tempDir, { recursive: true });
|
||||
} catch (err) {
|
||||
console.warn('Failed to cleanup temp directory:', err);
|
||||
}
|
||||
});
|
||||
|
||||
it('spawns agent in correct standalone working directory', async () => {
|
||||
const prompt = `
|
||||
Write your current working directory to a file called 'verify-pwd.txt'.
|
||||
Use this exact bash command:
|
||||
|
||||
pwd > verify-pwd.txt
|
||||
|
||||
Then output the signal: {"done": true}
|
||||
`.trim();
|
||||
|
||||
// Spawn standalone agent
|
||||
const agent = await agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
expect(agent.id).toBeTruthy();
|
||||
expect(agent.status).toBe('running');
|
||||
|
||||
// Wait for completion (poll agent status)
|
||||
let attempts = 0;
|
||||
const maxAttempts = 60; // 60 seconds timeout
|
||||
|
||||
while (attempts < maxAttempts) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
attempts++;
|
||||
|
||||
const currentAgent = await agentRepository.findById(agent.id);
|
||||
if (!currentAgent || currentAgent.status !== 'running') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify final agent state
|
||||
const completedAgent = await agentRepository.findById(agent.id);
|
||||
expect(completedAgent).toBeTruthy();
|
||||
expect(completedAgent!.status).not.toBe('running');
|
||||
|
||||
// Get the agent's expected working directory
|
||||
const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');
|
||||
|
||||
// Read diagnostic files
|
||||
const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
|
||||
const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');
|
||||
const verifyPwdFile = join(expectedWorkdir, 'verify-pwd.txt');
|
||||
|
||||
// Verify diagnostic files exist
|
||||
expect(existsSync(diagnosticFile), 'spawn diagnostic file should exist').toBe(true);
|
||||
expect(existsSync(expectedPwdFile), 'expected pwd file should exist').toBe(true);
|
||||
|
||||
// Read diagnostic data
|
||||
const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
|
||||
const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();
|
||||
|
||||
console.log('Diagnostic data:', diagnostic);
|
||||
console.log('Expected working directory:', expectedPwd);
|
||||
|
||||
// Verify diagnostic consistency
|
||||
expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
|
||||
expect(diagnostic.cwdExistsAtSpawn).toBe(true);
|
||||
expect(expectedPwd).toBe(expectedWorkdir);
|
||||
|
||||
// The critical test: verify the agent actually wrote the file in the expected location
|
||||
if (existsSync(verifyPwdFile)) {
|
||||
const actualPwd = (await readFile(verifyPwdFile, 'utf-8')).trim();
|
||||
console.log('Agent reported working directory:', actualPwd);
|
||||
|
||||
// This is the key verification: the pwd reported by the agent should match expected
|
||||
expect(actualPwd).toBe(expectedWorkdir);
|
||||
} else {
|
||||
// If the file doesn't exist, the agent either failed or ran somewhere else
|
||||
console.warn('Agent did not create verify-pwd.txt file');
|
||||
console.log('Expected at:', verifyPwdFile);
|
||||
|
||||
// Let's check if it was created elsewhere (debugging)
|
||||
const alternativeLocations = [
|
||||
join(tempDir, 'verify-pwd.txt'),
|
||||
join(process.cwd(), 'verify-pwd.txt'),
|
||||
];
|
||||
|
||||
for (const loc of alternativeLocations) {
|
||||
if (existsSync(loc)) {
|
||||
const content = await readFile(loc, 'utf-8');
|
||||
console.log(`Found verify-pwd.txt at unexpected location ${loc}:`, content.trim());
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Agent did not create pwd verification file in expected location');
|
||||
}
|
||||
}, TEST_TIMEOUT);
|
||||
|
||||
it('creates diagnostic files with correct metadata', async () => {
|
||||
const prompt = `Output the signal: {"done": true}`;
|
||||
|
||||
const agent = await agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait a bit for spawn to complete
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
|
||||
const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');
|
||||
const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
|
||||
const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');
|
||||
|
||||
// Verify files exist immediately after spawn
|
||||
expect(existsSync(diagnosticFile), 'diagnostic file should be created after spawn').toBe(true);
|
||||
expect(existsSync(expectedPwdFile), 'expected pwd file should be created').toBe(true);
|
||||
|
||||
// Verify diagnostic content
|
||||
const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
|
||||
const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();
|
||||
|
||||
expect(diagnostic.agentId).toBe(agent.id);
|
||||
expect(diagnostic.alias).toBe(agent.name);
|
||||
expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
|
||||
expect(diagnostic.provider).toBe('claude');
|
||||
expect(diagnostic.cwdExistsAtSpawn).toBe(true);
|
||||
expect(diagnostic.customCwdProvided).toBe(false);
|
||||
expect(typeof diagnostic.timestamp).toBe('string');
|
||||
expect(Array.isArray(diagnostic.args)).toBe(true);
|
||||
|
||||
expect(expectedPwd).toBe(expectedWorkdir);
|
||||
});
|
||||
});
|
||||
232
apps/server/test/integration/crash-race-condition.test.ts
Normal file
232
apps/server/test/integration/crash-race-condition.test.ts
Normal file
@@ -0,0 +1,232 @@
|
||||
/**
|
||||
* Integration test to reproduce and fix the crash marking race condition.
|
||||
*
|
||||
* This test simulates the exact scenario where agents complete successfully
|
||||
* but get marked as crashed due to timing issues in the output handler.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { writeFile, mkdir, rm } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { randomBytes } from 'node:crypto';
|
||||
import { OutputHandler } from '../../agent/output-handler.js';
|
||||
import type { AgentRepository } from '../../db/repositories/agent-repository.js';
|
||||
|
||||
interface TestAgent {
|
||||
id: string;
|
||||
name: string;
|
||||
status: 'idle' | 'running' | 'waiting_for_input' | 'stopped' | 'crashed';
|
||||
mode: 'execute' | 'discuss' | 'plan' | 'detail' | 'refine';
|
||||
taskId: string | null;
|
||||
sessionId: string | null;
|
||||
worktreeId: string;
|
||||
createdAt: Date;
|
||||
updatedAt: Date;
|
||||
provider: string;
|
||||
accountId: string | null;
|
||||
pid: number | null;
|
||||
outputFilePath: string | null;
|
||||
result: string | null;
|
||||
pendingQuestions: string | null;
|
||||
initiativeId: string | null;
|
||||
userDismissedAt: Date | null;
|
||||
exitCode: number | null;
|
||||
}
|
||||
|
||||
describe('Crash marking race condition', () => {
|
||||
let outputHandler: OutputHandler;
|
||||
let testAgent: TestAgent;
|
||||
let testDir: string;
|
||||
let mockRepo: AgentRepository;
|
||||
|
||||
// Track all repository calls
|
||||
let updateCalls: Array<{ id: string; data: any }> = [];
|
||||
let finalAgentStatus: string | null = null;
|
||||
|
||||
beforeEach(async () => {
|
||||
updateCalls = [];
|
||||
finalAgentStatus = null;
|
||||
|
||||
// Create test directory structure
|
||||
testDir = join(tmpdir(), `crash-test-${randomBytes(8).toString('hex')}`);
|
||||
const outputDir = join(testDir, '.cw/output');
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
|
||||
// Create test agent
|
||||
testAgent = {
|
||||
id: 'test-agent-id',
|
||||
name: 'test-agent',
|
||||
status: 'running',
|
||||
mode: 'refine',
|
||||
taskId: 'task-1',
|
||||
sessionId: 'session-1',
|
||||
worktreeId: 'worktree-1',
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
provider: 'claude',
|
||||
accountId: null,
|
||||
pid: 12345,
|
||||
outputFilePath: join(testDir, 'output.jsonl'),
|
||||
result: null,
|
||||
pendingQuestions: null,
|
||||
initiativeId: 'init-1',
|
||||
userDismissedAt: null,
|
||||
exitCode: null
|
||||
};
|
||||
|
||||
// Mock repository that tracks all update calls
|
||||
mockRepo = {
|
||||
async findById(id: string) {
|
||||
return id === testAgent.id ? { ...testAgent } : null;
|
||||
},
|
||||
async update(id: string, data: any) {
|
||||
updateCalls.push({ id, data });
|
||||
if (data.status) {
|
||||
finalAgentStatus = data.status;
|
||||
testAgent.status = data.status;
|
||||
}
|
||||
return { ...testAgent, ...data };
|
||||
},
|
||||
async create() { throw new Error('Not implemented'); },
|
||||
async findAll() { throw new Error('Not implemented'); },
|
||||
async findByStatus() { throw new Error('Not implemented'); },
|
||||
async findByTaskId() { throw new Error('Not implemented'); },
|
||||
async findByName() { throw new Error('Not implemented'); },
|
||||
async findBySessionId() { throw new Error('Not implemented'); },
|
||||
async delete() { throw new Error('Not implemented'); }
|
||||
};
|
||||
|
||||
outputHandler = new OutputHandler(mockRepo);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
try {
|
||||
await rm(testDir, { recursive: true });
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
});
|
||||
|
||||
it('should NOT mark agent as crashed when signal.json indicates completion', async () => {
|
||||
// SETUP: Create a valid completion signal that should prevent crash marking
|
||||
const signalPath = join(testDir, '.cw/output/signal.json');
|
||||
const signalContent = {
|
||||
status: 'questions',
|
||||
questions: [
|
||||
{ id: 'q1', question: 'Test question?' }
|
||||
]
|
||||
};
|
||||
await writeFile(signalPath, JSON.stringify(signalContent, null, 2));
|
||||
|
||||
// SETUP: Create empty output file to simulate "no new output detected" scenario
|
||||
const outputFilePath = join(testDir, 'output.jsonl');
|
||||
await writeFile(outputFilePath, ''); // Empty file simulates the race condition
|
||||
|
||||
// Mock active agent with output file path
|
||||
const mockActive = {
|
||||
outputFilePath,
|
||||
streamSessionId: 'session-1'
|
||||
};
|
||||
|
||||
// Mock getAgentWorkdir function — receives worktreeId, not agentId
|
||||
const getAgentWorkdir = (worktreeId: string) => {
|
||||
expect(worktreeId).toBe(testAgent.worktreeId);
|
||||
return testDir;
|
||||
};
|
||||
|
||||
// EXECUTE: Call handleCompletion which should trigger the race condition scenario
|
||||
// This simulates: no stream text + no new file content + valid signal.json
|
||||
await (outputHandler as any).handleCompletion(
|
||||
testAgent.id,
|
||||
mockActive,
|
||||
getAgentWorkdir
|
||||
);
|
||||
|
||||
// VERIFY: Agent should NOT be marked as crashed
|
||||
console.log('Update calls:', updateCalls);
|
||||
console.log('Final agent status:', finalAgentStatus);
|
||||
|
||||
expect(updateCalls.length).toBeGreaterThan(0);
|
||||
expect(finalAgentStatus).not.toBe('crashed');
|
||||
|
||||
// Should be marked with the appropriate completion status
|
||||
expect(['idle', 'waiting_for_input', 'stopped']).toContain(finalAgentStatus);
|
||||
});
|
||||
|
||||
it('should mark agent as crashed when no completion signal exists', async () => {
|
||||
// SETUP: No signal.json file exists - agent should be marked as crashed
|
||||
const outputFilePath = join(testDir, 'output.jsonl');
|
||||
await writeFile(outputFilePath, ''); // Empty file
|
||||
|
||||
const mockActive = {
|
||||
outputFilePath,
|
||||
streamSessionId: 'session-1'
|
||||
};
|
||||
|
||||
const getAgentWorkdir = (agentId: string) => testDir;
|
||||
|
||||
// EXECUTE: This should mark agent as crashed since no completion signal exists
|
||||
await (outputHandler as any).handleCompletion(
|
||||
testAgent.id,
|
||||
mockActive,
|
||||
getAgentWorkdir
|
||||
);
|
||||
|
||||
// VERIFY: Agent SHOULD be marked as crashed
|
||||
expect(finalAgentStatus).toBe('crashed');
|
||||
});
|
||||
|
||||
it('should handle the exact slim-wildebeest scenario', async () => {
|
||||
// SETUP: Reproduce the exact conditions that slim-wildebeest had
|
||||
const signalPath = join(testDir, '.cw/output/signal.json');
|
||||
const exactSignalContent = {
|
||||
"status": "questions",
|
||||
"questions": [
|
||||
{
|
||||
"id": "q1",
|
||||
"question": "What UI framework/styling system is the admin UI currently using that needs to be replaced?"
|
||||
},
|
||||
{
|
||||
"id": "q2",
|
||||
"question": "What specific problems with the current admin UI are we solving? (e.g., poor developer experience, design inconsistency, performance issues, lack of accessibility)"
|
||||
}
|
||||
]
|
||||
};
|
||||
await writeFile(signalPath, JSON.stringify(exactSignalContent, null, 2));
|
||||
|
||||
// Create SUMMARY.md like slim-wildebeest had
|
||||
const summaryPath = join(testDir, '.cw/output/SUMMARY.md');
|
||||
const summaryContent = `---
|
||||
files_modified: []
|
||||
---
|
||||
Initiative page is essentially empty — lacks context, scope, goals, and technical approach. Requested clarification on current state, problems being solved, scope boundaries, and success criteria before proposing meaningful improvements.`;
|
||||
await writeFile(summaryPath, summaryContent);
|
||||
|
||||
// Simulate the output file scenario
|
||||
const outputFilePath = join(testDir, 'output.jsonl');
|
||||
await writeFile(outputFilePath, 'some initial content\n'); // Some content but no new lines
|
||||
|
||||
const mockActive = {
|
||||
outputFilePath,
|
||||
streamSessionId: 'session-1'
|
||||
};
|
||||
|
||||
const getAgentWorkdir = (agentId: string) => testDir;
|
||||
|
||||
// EXECUTE: This is the exact scenario that caused slim-wildebeest to be marked as crashed
|
||||
await (outputHandler as any).handleCompletion(
|
||||
testAgent.id,
|
||||
mockActive,
|
||||
getAgentWorkdir
|
||||
);
|
||||
|
||||
// VERIFY: This should NOT be marked as crashed
|
||||
console.log('slim-wildebeest scenario - Final status:', finalAgentStatus);
|
||||
console.log('slim-wildebeest scenario - Update calls:', updateCalls);
|
||||
|
||||
expect(finalAgentStatus).not.toBe('crashed');
|
||||
expect(['idle', 'waiting_for_input', 'stopped']).toContain(finalAgentStatus);
|
||||
});
|
||||
|
||||
});
|
||||
@@ -0,0 +1,244 @@
|
||||
/**
|
||||
* Full-Flow Cassette Integration Test
|
||||
*
|
||||
* Cassette-backed variant of the full multi-agent workflow test.
|
||||
* Runs the same discuss → plan → detail → execute pipeline but intercepts
|
||||
* subprocess spawning with CassetteProcessManager — no real API calls in CI.
|
||||
*
|
||||
* Recording (one-time, costs ~$2–5):
|
||||
* CW_CASSETTE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
|
||||
* # Commit the generated src/test/cassettes/<hash>.json files afterward
|
||||
*
|
||||
* Replay (default — runs in seconds):
|
||||
* npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts
|
||||
*
|
||||
* Force re-record (overwrites existing cassettes):
|
||||
* CW_CASSETTE_FORCE_RECORD=1 npm test -- src/test/integration/full-flow/full-flow-cassette.test.ts --test-timeout=3600000
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { existsSync, readdirSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import type { Phase, Task } from '../../../db/schema.js';
|
||||
import type { AgentResult } from '../../../agent/types.js';
|
||||
import { buildExecutePrompt } from '../../../agent/prompts/index.js';
|
||||
import { CassetteStore } from '../../cassette/store.js';
|
||||
import { CassetteProcessManager, type CassetteMode } from '../../cassette/process-manager.js';
|
||||
import {
|
||||
createFullFlowHarness,
|
||||
type FullFlowHarness,
|
||||
} from './harness.js';
|
||||
import {
|
||||
printHeader,
|
||||
printDiscussResult,
|
||||
printPlanResult,
|
||||
printDetailResult,
|
||||
printExecuteResult,
|
||||
printFinalSummary,
|
||||
type ExecutedTask,
|
||||
} from './report.js';
|
||||
|
||||
// =============================================================================
|
||||
// Constants
|
||||
// =============================================================================
|
||||
|
||||
const RECORDING =
|
||||
process.env.CW_CASSETTE_FORCE_RECORD === '1' || process.env.CW_CASSETTE_RECORD === '1';
|
||||
|
||||
/**
|
||||
* Test timeout.
|
||||
* - Replay: 5 min (cassettes complete in seconds; cap is generous headroom)
|
||||
* - Record: 60 min (real agents doing discuss/plan/detail/execute take API time)
|
||||
*/
|
||||
const CASSETTE_FLOW_TIMEOUT = RECORDING ? 60 * 60_000 : 5 * 60_000;
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const CASSETTE_DIR =
|
||||
process.env.CW_CASSETTE_DIR ?? join(__dirname, '../../cassettes');
|
||||
|
||||
// =============================================================================
|
||||
// Mode helper
|
||||
// =============================================================================
|
||||
|
||||
function cassetteMode(): CassetteMode {
|
||||
if (process.env.CW_CASSETTE_FORCE_RECORD === '1') return 'record';
|
||||
if (process.env.CW_CASSETTE_RECORD === '1') return 'auto';
|
||||
return 'replay';
|
||||
}
|
||||
|
||||
/**
|
||||
* True when cassettes are available (at least one .json file) OR we're in a
|
||||
* recording run. Skips the suite if no cassettes have been recorded yet so
|
||||
* that `npm test` doesn't fail on a fresh clone before cassettes are committed.
|
||||
*/
|
||||
function cassettesAvailable(): boolean {
|
||||
const mode = cassetteMode();
|
||||
if (mode !== 'replay') return true; // recording runs always proceed
|
||||
if (!existsSync(CASSETTE_DIR)) return false;
|
||||
return readdirSync(CASSETTE_DIR).some((f) => f.endsWith('.json'));
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test
|
||||
// =============================================================================
|
||||
|
||||
describe.skipIf(!cassettesAvailable())('full flow (cassette replay)', () => {
|
||||
let harness: FullFlowHarness;
|
||||
const startedAt = Date.now();
|
||||
|
||||
beforeAll(async () => {
|
||||
const store = new CassetteStore(CASSETTE_DIR);
|
||||
const mode = cassetteMode();
|
||||
|
||||
harness = await createFullFlowHarness('Add complete() method to TodoStore', {
|
||||
processManagerFactory: (workspaceRoot, projectRepo) =>
|
||||
new CassetteProcessManager(workspaceRoot, projectRepo, store, mode),
|
||||
});
|
||||
|
||||
printHeader(harness.initiative.name);
|
||||
console.log(` Cassette mode : ${mode}`);
|
||||
console.log(` Cassette dir : ${CASSETTE_DIR}`);
|
||||
console.log(` Initiative ID : ${harness.initiative.id}`);
|
||||
console.log(` Workspace : ${harness.workspaceRoot}`);
|
||||
}, CASSETTE_FLOW_TIMEOUT);
|
||||
|
||||
afterAll(async () => {
|
||||
if (harness) await harness.cleanup();
|
||||
});
|
||||
|
||||
it(
|
||||
'runs the complete multi-agent workflow from cassettes',
|
||||
async () => {
|
||||
const { initiative, caller, agentManager, phaseRepository, taskRepository } = harness;
|
||||
const initiativeId = initiative.id;
|
||||
|
||||
// ── Stage 2: Discuss ───────────────────────────────────────────────────
|
||||
console.log('\n\n>>> Stage 2: DISCUSS <<<');
|
||||
const discussAgent = await caller.spawnArchitectDiscuss({ initiativeId });
|
||||
expect(discussAgent.id).toBeTruthy();
|
||||
console.log(` Spawned discuss agent: ${discussAgent.name} (${discussAgent.id})`);
|
||||
|
||||
const discussResult = await harness.driveToCompletion(
|
||||
discussAgent.id,
|
||||
'Use your best judgment and keep it simple. The focus is implementing complete(id) on TodoStore.',
|
||||
CASSETTE_FLOW_TIMEOUT,
|
||||
);
|
||||
printDiscussResult(discussAgent.id, discussResult);
|
||||
|
||||
if (!discussResult?.success) {
|
||||
console.warn(' [WARN] discuss agent did not succeed; continuing to plan stage');
|
||||
}
|
||||
|
||||
// ── Stage 3: Plan ──────────────────────────────────────────────────────
|
||||
console.log('\n\n>>> Stage 3: PLAN <<<');
|
||||
const planAgent = await caller.spawnArchitectPlan({ initiativeId });
|
||||
expect(planAgent.id).toBeTruthy();
|
||||
console.log(` Spawned plan agent: ${planAgent.name} (${planAgent.id})`);
|
||||
|
||||
const planResult = await harness.driveToCompletion(
|
||||
planAgent.id,
|
||||
'Keep it simple.',
|
||||
CASSETTE_FLOW_TIMEOUT,
|
||||
);
|
||||
expect(planResult).toBeTruthy();
|
||||
|
||||
const phases: Phase[] = await phaseRepository.findByInitiativeId(initiativeId);
|
||||
expect(phases.length).toBeGreaterThan(0);
|
||||
printPlanResult(phases);
|
||||
|
||||
// ── Stage 4: Detail (per phase) ────────────────────────────────────────
|
||||
console.log('\n\n>>> Stage 4: DETAIL <<<');
|
||||
for (const phase of phases) {
|
||||
const detailAgent = await caller.spawnArchitectDetail({ phaseId: phase.id });
|
||||
expect(detailAgent.id).toBeTruthy();
|
||||
console.log(` Spawned detail agent for phase "${phase.name}": ${detailAgent.name}`);
|
||||
|
||||
const detailResult = await harness.driveToCompletion(
|
||||
detailAgent.id,
|
||||
'Keep it simple.',
|
||||
CASSETTE_FLOW_TIMEOUT,
|
||||
);
|
||||
expect(detailResult).toBeTruthy();
|
||||
|
||||
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
|
||||
const executeTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
|
||||
expect(executeTasks.length).toBeGreaterThan(0);
|
||||
printDetailResult(phase, phaseTasks);
|
||||
}
|
||||
|
||||
// ── Stage 5: Execute ───────────────────────────────────────────────────
|
||||
console.log('\n\n>>> Stage 5: EXECUTE <<<');
|
||||
const allTasks = await gatherAllExecuteTasks(taskRepository, phases);
|
||||
console.log(` Found ${allTasks.length} execute task(s) across ${phases.length} phase(s)`);
|
||||
|
||||
const executed: ExecutedTask[] = [];
|
||||
for (const task of allTasks) {
|
||||
console.log(` Spawning execute agent for: "${task.name}"`);
|
||||
const execAgent = await agentManager.spawn({
|
||||
taskId: task.id,
|
||||
prompt: buildExecutePrompt(task.description ?? task.name),
|
||||
mode: 'execute',
|
||||
initiativeId,
|
||||
phaseId: task.phaseId ?? undefined,
|
||||
inputContext: {
|
||||
initiative,
|
||||
task,
|
||||
},
|
||||
});
|
||||
console.log(` Agent: ${execAgent.name} (${execAgent.id})`);
|
||||
|
||||
const result = await harness.driveToCompletion(
|
||||
execAgent.id,
|
||||
'Use your best judgment and keep it simple.',
|
||||
CASSETTE_FLOW_TIMEOUT,
|
||||
);
|
||||
executed.push({ task, result });
|
||||
|
||||
const icon = result?.success ? '✓' : '✗';
|
||||
console.log(` ${icon} Completed with success=${result?.success ?? null}`);
|
||||
if (result && !result.success) {
|
||||
console.log(` Message: ${result.message?.slice(0, 200)}`);
|
||||
}
|
||||
}
|
||||
|
||||
printExecuteResult(executed);
|
||||
|
||||
// ── Assertions ─────────────────────────────────────────────────────────
|
||||
expect(executed.length).toBeGreaterThan(0);
|
||||
|
||||
const allSucceeded = executed.every((e) => e.result?.success === true);
|
||||
if (!allSucceeded) {
|
||||
const failed = executed.filter((e) => !e.result?.success);
|
||||
console.warn(` [WARN] ${failed.length} execute task(s) did not succeed`);
|
||||
}
|
||||
|
||||
// ── Final summary ──────────────────────────────────────────────────────
|
||||
printFinalSummary(
|
||||
initiative.name,
|
||||
phases,
|
||||
allTasks,
|
||||
executed,
|
||||
Date.now() - startedAt,
|
||||
);
|
||||
},
|
||||
CASSETTE_FLOW_TIMEOUT,
|
||||
);
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// Helpers
|
||||
// =============================================================================
|
||||
|
||||
async function gatherAllExecuteTasks(
|
||||
taskRepository: FullFlowHarness['taskRepository'],
|
||||
phases: Phase[],
|
||||
): Promise<Task[]> {
|
||||
const result: Task[] = [];
|
||||
for (const phase of phases) {
|
||||
const phaseTasks = await taskRepository.findByPhaseId(phase.id);
|
||||
const execTasks = phaseTasks.filter((t) => t.category === 'execute' && t.type === 'auto');
|
||||
result.push(...execTasks);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
399
apps/server/test/integration/full-flow/harness.ts
Normal file
399
apps/server/test/integration/full-flow/harness.ts
Normal file
@@ -0,0 +1,399 @@
|
||||
/**
|
||||
* Full-Flow Test Harness
|
||||
*
|
||||
* Wires up the complete system with real agents for end-to-end multi-agent
|
||||
* workflow testing: discuss → plan → detail → execute.
|
||||
*
|
||||
* Unlike the standard TestHarness (MockAgentManager) or RealProviderHarness
|
||||
* (agents only), this harness adds:
|
||||
* - All 11 repositories
|
||||
* - tRPC caller for architect/agent procedures
|
||||
* - A self-contained fixture git repo (todo-api) for agents to work on
|
||||
* - Helpers for driving agents through question/answer loops
|
||||
*
|
||||
* Used by full-flow-cassette.test.ts (replay) and for manual recording runs.
|
||||
*/
|
||||
|
||||
import { mkdtemp, rm, cp } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { execSync } from 'node:child_process';
|
||||
import type { DrizzleDatabase } from '../../../db/index.js';
|
||||
import type { DomainEvent } from '../../../events/types.js';
|
||||
import { EventEmitterBus } from '../../../events/bus.js';
|
||||
import { MultiProviderAgentManager } from '../../../agent/manager.js';
|
||||
import type { AgentResult, PendingQuestions } from '../../../agent/types.js';
|
||||
import type { Initiative, Project, Phase, Task } from '../../../db/schema.js';
|
||||
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
|
||||
import type { PhaseRepository } from '../../../db/repositories/phase-repository.js';
|
||||
import type { TaskRepository } from '../../../db/repositories/task-repository.js';
|
||||
import type { MessageRepository } from '../../../db/repositories/message-repository.js';
|
||||
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
|
||||
import type { PageRepository } from '../../../db/repositories/page-repository.js';
|
||||
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
|
||||
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
|
||||
import type { ChangeSetRepository } from '../../../db/repositories/change-set-repository.js';
|
||||
import type { LogChunkRepository } from '../../../db/repositories/log-chunk-repository.js';
|
||||
import type { ConversationRepository } from '../../../db/repositories/conversation-repository.js';
|
||||
import type { ProcessManager } from '../../../agent/process-manager.js';
|
||||
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
|
||||
import { createRepositories } from '../../../container.js';
|
||||
import { DefaultDispatchManager } from '../../../dispatch/manager.js';
|
||||
import { appRouter, createCallerFactory } from '../../../trpc/router.js';
|
||||
import { createContext } from '../../../trpc/context.js';
|
||||
|
||||
// =============================================================================
|
||||
// CapturingEventBus
|
||||
// =============================================================================
|
||||
|
||||
export class CapturingEventBus extends EventEmitterBus {
|
||||
emittedEvents: DomainEvent[] = [];
|
||||
|
||||
emit<T extends DomainEvent>(event: T): void {
|
||||
this.emittedEvents.push(event);
|
||||
super.emit(event);
|
||||
}
|
||||
|
||||
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
|
||||
return this.emittedEvents.filter((e) => e.type === type) as T[];
|
||||
}
|
||||
|
||||
clearEvents(): void {
|
||||
this.emittedEvents = [];
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Sleep helper
|
||||
// =============================================================================
|
||||
|
||||
export function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// tRPC caller type
|
||||
// =============================================================================
|
||||
|
||||
const createCaller = createCallerFactory(appRouter);
|
||||
export type FullFlowCaller = ReturnType<typeof createCaller>;
|
||||
|
||||
// =============================================================================
|
||||
// FullFlowHarness interface
|
||||
// =============================================================================
|
||||
|
||||
/** Status of an agent that requires attention: done, waiting for answers, or crashed */
|
||||
export type AgentAttentionStatus = 'done' | 'waiting' | 'crashed';
|
||||
|
||||
export interface FullFlowHarness {
|
||||
/** Absolute path to the CW workspace (worktrees are created here) */
|
||||
workspaceRoot: string;
|
||||
/** Absolute path to the cloned todo-api fixture git repo */
|
||||
fixtureRoot: string;
|
||||
/** The registered todo-api project */
|
||||
project: Project;
|
||||
/** The initiative created for the test run */
|
||||
initiative: Initiative;
|
||||
/** tRPC caller (all procedures available) */
|
||||
caller: FullFlowCaller;
|
||||
/** Real MultiProviderAgentManager */
|
||||
agentManager: MultiProviderAgentManager;
|
||||
/** In-memory SQLite database */
|
||||
db: DrizzleDatabase;
|
||||
/** Event bus with capture capability */
|
||||
eventBus: CapturingEventBus;
|
||||
|
||||
// All 11 repositories
|
||||
initiativeRepository: InitiativeRepository;
|
||||
phaseRepository: PhaseRepository;
|
||||
taskRepository: TaskRepository;
|
||||
messageRepository: MessageRepository;
|
||||
agentRepository: AgentRepository;
|
||||
pageRepository: PageRepository;
|
||||
projectRepository: ProjectRepository;
|
||||
accountRepository: AccountRepository;
|
||||
changeSetRepository: ChangeSetRepository;
|
||||
logChunkRepository: LogChunkRepository;
|
||||
conversationRepository: ConversationRepository;
|
||||
|
||||
/**
|
||||
* Wait for an agent to reach a terminal status (idle/stopped/crashed).
|
||||
* Returns null if the agent enters waiting_for_input.
|
||||
*/
|
||||
waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
|
||||
|
||||
/**
|
||||
* Poll until the agent needs attention: done (idle/stopped), waiting for input, or crashed.
|
||||
* Useful for the question/answer loop in discuss mode.
|
||||
*/
|
||||
waitForAgentAttention(agentId: string, timeoutMs?: number): Promise<AgentAttentionStatus>;
|
||||
|
||||
/**
|
||||
* Drive an agent to full completion, answering any questions along the way.
|
||||
* Answers all questions with the provided answer string (or a default).
|
||||
*/
|
||||
driveToCompletion(
|
||||
agentId: string,
|
||||
answer?: string,
|
||||
timeoutMs?: number,
|
||||
): Promise<AgentResult | null>;
|
||||
|
||||
/**
|
||||
* Get captured events filtered by type.
|
||||
*/
|
||||
getEventsByType<T extends DomainEvent>(type: T['type']): T[];
|
||||
|
||||
/**
|
||||
* Kill all running agents and remove temp directories.
|
||||
*/
|
||||
cleanup(): Promise<void>;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Poll interval
|
||||
// =============================================================================
|
||||
|
||||
const POLL_INTERVAL_MS = 1500;
|
||||
|
||||
// =============================================================================
|
||||
// Factory
|
||||
// =============================================================================
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const FIXTURES_DIR = join(__dirname, '../../fixtures/todo-api');
|
||||
|
||||
export interface FullFlowHarnessOptions {
|
||||
/** Factory called after workspaceRoot + repos are created. Return a custom ProcessManager. */
|
||||
processManagerFactory?: (workspaceRoot: string, projectRepo: ProjectRepository) => ProcessManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a full-flow test harness.
|
||||
*
|
||||
* Setup steps:
|
||||
* 1. Copy todo-api fixture into a temp git repo (fixtureRoot).
|
||||
* 2. Create workspace temp dir (workspaceRoot) for CW operations.
|
||||
* 3. Init in-memory DB + all 11 repos.
|
||||
* 4. Wire real MultiProviderAgentManager with all repos.
|
||||
* 5. Wire DefaultDispatchManager for execute stage.
|
||||
* 6. Create tRPC caller with full context.
|
||||
* 7. Register project in DB directly (url = fixtureRoot).
|
||||
* 8. Create initiative via tRPC (links project, creates root page).
|
||||
*/
|
||||
export async function createFullFlowHarness(
|
||||
initiativeName = 'Add complete() method to TodoStore',
|
||||
options?: FullFlowHarnessOptions,
|
||||
): Promise<FullFlowHarness> {
|
||||
// ── 0. Allow nested claude invocations ────────────────────────────────────
|
||||
// Claude Code sets CLAUDECODE in the environment, which prevents nested
|
||||
// claude CLI calls from starting ("cannot be launched inside another Claude
|
||||
// Code session"). Save and remove it so spawned agents can run normally.
|
||||
// It is restored in cleanup().
|
||||
const savedClaudeCodeEnv = process.env.CLAUDECODE;
|
||||
delete process.env.CLAUDECODE;
|
||||
|
||||
// ── 1. Fixture project ────────────────────────────────────────────────────
|
||||
// IMPORTANT: cp(src, dest) puts src INSIDE dest when dest already exists
|
||||
// (like `cp -r src dest/` → creates dest/src/). We need dest to NOT exist
|
||||
// yet so that cp creates it as a copy of src directly.
|
||||
const fixtureBase = await mkdtemp(join(tmpdir(), 'cw-fixture-'));
|
||||
const fixtureRoot = join(fixtureBase, 'todo-api'); // does not exist yet
|
||||
await cp(FIXTURES_DIR, fixtureRoot, { recursive: true });
|
||||
|
||||
// Verify files landed at the right level before git operations
|
||||
execSync(`test -f "${join(fixtureRoot, 'package.json')}"`, { stdio: 'pipe' });
|
||||
|
||||
execSync('git init', { cwd: fixtureRoot, stdio: 'pipe' });
|
||||
execSync('git config user.email "test@test.com"', { cwd: fixtureRoot, stdio: 'pipe' });
|
||||
execSync('git config user.name "Test"', { cwd: fixtureRoot, stdio: 'pipe' });
|
||||
execSync('git add .', { cwd: fixtureRoot, stdio: 'pipe' });
|
||||
execSync('git commit -m "initial todo-api with missing complete()"', {
|
||||
cwd: fixtureRoot,
|
||||
stdio: 'pipe',
|
||||
});
|
||||
|
||||
// ── 2. Workspace root ─────────────────────────────────────────────────────
|
||||
// Just a plain temp directory — agent worktrees live under repos/ inside it.
|
||||
// No git init needed; the PROJECT clone (repos/<name>-<id>/) is the git repo.
|
||||
const workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-workspace-'));
|
||||
|
||||
// ── 3. Database + repositories ────────────────────────────────────────────
|
||||
const db = createTestDatabase();
|
||||
const repos = createRepositories(db);
|
||||
|
||||
// ── 4. Event bus ──────────────────────────────────────────────────────────
|
||||
const eventBus = new CapturingEventBus();
|
||||
|
||||
// ── 5. Real agent manager ─────────────────────────────────────────────────
|
||||
const customProcessManager = options?.processManagerFactory?.(workspaceRoot, repos.projectRepository);
|
||||
const agentManager = new MultiProviderAgentManager(
|
||||
repos.agentRepository,
|
||||
workspaceRoot,
|
||||
repos.projectRepository,
|
||||
repos.accountRepository,
|
||||
eventBus,
|
||||
undefined, // no credential manager needed for default claude account
|
||||
repos.changeSetRepository,
|
||||
repos.phaseRepository,
|
||||
repos.taskRepository,
|
||||
repos.pageRepository,
|
||||
repos.logChunkRepository,
|
||||
false, // debug
|
||||
customProcessManager, // processManagerOverride
|
||||
);
|
||||
|
||||
// ── 6. Dispatch manager (for execute stage) ───────────────────────────────
|
||||
const dispatchManager = new DefaultDispatchManager(
|
||||
repos.taskRepository,
|
||||
repos.messageRepository,
|
||||
agentManager,
|
||||
eventBus,
|
||||
repos.initiativeRepository,
|
||||
repos.phaseRepository,
|
||||
);
|
||||
|
||||
// ── 7. tRPC caller ────────────────────────────────────────────────────────
|
||||
const ctx = createContext({
|
||||
eventBus,
|
||||
serverStartedAt: new Date(),
|
||||
processCount: 0,
|
||||
agentManager,
|
||||
dispatchManager,
|
||||
workspaceRoot,
|
||||
...repos,
|
||||
});
|
||||
const caller = createCaller(ctx);
|
||||
|
||||
// ── 8. Register project directly in DB (bypass tRPC clone) ───────────────
|
||||
const project = await repos.projectRepository.create({
|
||||
name: 'todo-api',
|
||||
url: fixtureRoot,
|
||||
});
|
||||
|
||||
// ── 9. Create initiative via tRPC (creates root page automatically) ───────
|
||||
const initiative = await caller.createInitiative({
|
||||
name: initiativeName,
|
||||
projectIds: [project.id],
|
||||
});
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
async function waitForAgentCompletion(
|
||||
agentId: string,
|
||||
timeoutMs = 120_000,
|
||||
): Promise<AgentResult | null> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await repos.agentRepository.findById(agentId);
|
||||
if (!agent) return null;
|
||||
if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
if (agent.status === 'waiting_for_input') return null;
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
throw new Error(`Timeout: agent ${agentId} did not complete within ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
async function waitForAgentAttention(
|
||||
agentId: string,
|
||||
timeoutMs = 120_000,
|
||||
): Promise<AgentAttentionStatus> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await repos.agentRepository.findById(agentId);
|
||||
if (!agent) return 'crashed';
|
||||
if (agent.status === 'idle' || agent.status === 'stopped') return 'done';
|
||||
if (agent.status === 'crashed') return 'crashed';
|
||||
if (agent.status === 'waiting_for_input') return 'waiting';
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
throw new Error(`Timeout: agent ${agentId} did not reach attention state within ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
async function driveToCompletion(
|
||||
agentId: string,
|
||||
answer = 'Use your best judgment and keep it simple.',
|
||||
timeoutMs = 10 * 60_000,
|
||||
): Promise<AgentResult | null> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const remaining = deadline - Date.now();
|
||||
if (remaining <= 0) break;
|
||||
|
||||
let status: AgentAttentionStatus;
|
||||
try {
|
||||
status = await waitForAgentAttention(agentId, Math.min(remaining, 3 * 60_000));
|
||||
} catch {
|
||||
// Agent is still running (hasn't reached an attention state within the polling
|
||||
// window). This is normal for long-running execute agents. Continue the outer
|
||||
// loop — the deadline check above will terminate us if we truly time out.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (status === 'done' || status === 'crashed') {
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
|
||||
if (status === 'waiting') {
|
||||
const pending = await agentManager.getPendingQuestions(agentId);
|
||||
if (!pending || pending.questions.length === 0) {
|
||||
// Shouldn't happen, but guard against it
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
continue;
|
||||
}
|
||||
const answers = Object.fromEntries(
|
||||
pending.questions.map((q) => [q.id, answer]),
|
||||
);
|
||||
await agentManager.resume(agentId, answers);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`driveToCompletion: agent ${agentId} did not finish within ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
// ── Build and return harness ───────────────────────────────────────────────
|
||||
|
||||
const harness: FullFlowHarness = {
|
||||
workspaceRoot,
|
||||
fixtureRoot,
|
||||
project,
|
||||
initiative,
|
||||
caller,
|
||||
agentManager,
|
||||
db,
|
||||
eventBus,
|
||||
...repos,
|
||||
|
||||
waitForAgentCompletion,
|
||||
waitForAgentAttention,
|
||||
driveToCompletion,
|
||||
|
||||
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
|
||||
return eventBus.getEventsByType<T>(type);
|
||||
},
|
||||
|
||||
async cleanup() {
|
||||
// Kill any running agents
|
||||
const agents = await repos.agentRepository.findAll();
|
||||
await Promise.allSettled(
|
||||
agents
|
||||
.filter((a) => a.status === 'running')
|
||||
.map((a) => agentManager.stop(a.id)),
|
||||
);
|
||||
// Restore CLAUDECODE env var
|
||||
if (savedClaudeCodeEnv !== undefined) {
|
||||
process.env.CLAUDECODE = savedClaudeCodeEnv;
|
||||
}
|
||||
// Remove temp directories (fixtureBase contains fixtureRoot)
|
||||
await Promise.allSettled([
|
||||
rm(fixtureBase, { recursive: true, force: true }),
|
||||
rm(workspaceRoot, { recursive: true, force: true }),
|
||||
]);
|
||||
},
|
||||
};
|
||||
|
||||
return harness;
|
||||
}
|
||||
156
apps/server/test/integration/full-flow/report.ts
Normal file
156
apps/server/test/integration/full-flow/report.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
/**
|
||||
* Full-Flow Test Report Utility
|
||||
*
|
||||
* Plain console.log formatters for human-readable output at each stage of the
|
||||
* full-flow integration test. No external dependencies.
|
||||
*/
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
import { join } from 'node:path';
|
||||
import type { Phase, Task } from '../../../db/schema.js';
|
||||
import type { AgentResult } from '../../../agent/types.js';
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
// =============================================================================
|
||||
|
||||
export interface ExecutedTask {
|
||||
task: Task;
|
||||
result: AgentResult | null;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Helpers
|
||||
// =============================================================================
|
||||
|
||||
const DIVIDER = '═'.repeat(60);
|
||||
const THIN = '─'.repeat(60);
|
||||
|
||||
function section(title: string): void {
|
||||
console.log(`\n${DIVIDER}`);
|
||||
console.log(` ${title}`);
|
||||
console.log(DIVIDER);
|
||||
}
|
||||
|
||||
function line(msg: string): void {
|
||||
console.log(` ${msg}`);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Stage reporters
|
||||
// =============================================================================
|
||||
|
||||
export function printHeader(initiativeName: string): void {
|
||||
section(`FULL-FLOW TEST: ${initiativeName}`);
|
||||
console.log(` Started at: ${new Date().toISOString()}`);
|
||||
}
|
||||
|
||||
export function printDiscussResult(agentId: string, result: AgentResult | null): void {
|
||||
console.log(`\n[DISCUSS]`);
|
||||
console.log(THIN);
|
||||
line(`Agent: ${agentId}`);
|
||||
if (result) {
|
||||
line(`Success: ${result.success}`);
|
||||
if (result.message) line(`Message: ${result.message.slice(0, 200)}`);
|
||||
} else {
|
||||
line('Result: null (agent may have crashed)');
|
||||
}
|
||||
}
|
||||
|
||||
export function printPlanResult(phases: Phase[]): void {
|
||||
console.log(`\n[PLAN] ${phases.length} phase(s) created`);
|
||||
console.log(THIN);
|
||||
phases.forEach((ph, i) => {
|
||||
line(`${i + 1}. ${ph.name}`);
|
||||
});
|
||||
}
|
||||
|
||||
export function printDetailResult(phase: Phase, tasks: Task[]): void {
|
||||
console.log(`\n[DETAIL] Phase "${phase.name}" → ${tasks.length} task(s)`);
|
||||
console.log(THIN);
|
||||
tasks.forEach((t, i) => {
|
||||
const flags = [t.category, t.type, t.requiresApproval ? 'approval-required' : 'auto'].join(', ');
|
||||
line(`${i + 1}. ${t.name} [${flags}]`);
|
||||
if (t.description) {
|
||||
line(` ${t.description.slice(0, 120)}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function printExecuteResult(executed: ExecutedTask[]): void {
|
||||
const succeeded = executed.filter((e) => e.result?.success).length;
|
||||
console.log(`\n[EXECUTE] ${succeeded}/${executed.length} task(s) succeeded`);
|
||||
console.log(THIN);
|
||||
for (const { task, result } of executed) {
|
||||
const icon = result?.success ? '✓' : '✗';
|
||||
line(`${icon} ${task.name}`);
|
||||
if (result && !result.success) {
|
||||
line(` Error: ${result.message?.slice(0, 120)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function printGitDiff(workspaceRoot: string, projectName: string): void {
|
||||
console.log('\n[GIT DIFF — agent worktrees]');
|
||||
console.log(THIN);
|
||||
|
||||
// Find all agent worktrees for this project
|
||||
const worktreesBase = join(workspaceRoot, 'agent-workdirs');
|
||||
try {
|
||||
const dirs = execSync(`ls "${worktreesBase}" 2>/dev/null || echo ""`, { encoding: 'utf8' })
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
|
||||
for (const dir of dirs) {
|
||||
const projectDir = join(worktreesBase, dir, projectName);
|
||||
try {
|
||||
const stat = execSync(`git -C "${projectDir}" diff HEAD~1 --stat 2>/dev/null || echo ""`, {
|
||||
encoding: 'utf8',
|
||||
}).trim();
|
||||
if (stat) {
|
||||
line(`Worktree: ${dir}/${projectName}`);
|
||||
stat.split('\n').forEach((l) => line(` ${l}`));
|
||||
}
|
||||
} catch {
|
||||
// Worktree might not have commits — skip silently
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
line('(no agent worktrees found)');
|
||||
}
|
||||
}
|
||||
|
||||
export function printNpmTestResult(projectDir: string): void {
|
||||
console.log('\n[NPM TEST]');
|
||||
console.log(THIN);
|
||||
try {
|
||||
const output = execSync('node --test src/todo.test.js', {
|
||||
cwd: projectDir,
|
||||
encoding: 'utf8',
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
line('Tests passed:');
|
||||
output.split('\n').forEach((l) => line(` ${l}`));
|
||||
} catch (err: unknown) {
|
||||
const e = err as { stdout?: string; stderr?: string; status?: number };
|
||||
line(`Tests FAILED (exit ${e.status ?? '?'})`);
|
||||
if (e.stdout) e.stdout.split('\n').forEach((l) => line(` ${l}`));
|
||||
if (e.stderr) e.stderr.split('\n').forEach((l) => line(` ${l}`));
|
||||
}
|
||||
}
|
||||
|
||||
export function printFinalSummary(
|
||||
initiativeName: string,
|
||||
phases: Phase[],
|
||||
tasks: Task[],
|
||||
executed: ExecutedTask[],
|
||||
durationMs: number,
|
||||
): void {
|
||||
section(`SUMMARY: ${initiativeName}`);
|
||||
line(`Duration : ${Math.round(durationMs / 1000)}s`);
|
||||
line(`Phases : ${phases.length}`);
|
||||
line(`Tasks : ${tasks.length}`);
|
||||
line(`Executed : ${executed.filter((e) => e.result?.success).length}/${executed.length} succeeded`);
|
||||
console.log(DIVIDER);
|
||||
}
|
||||
183
apps/server/test/integration/real-claude.test.ts
Normal file
183
apps/server/test/integration/real-claude.test.ts
Normal file
@@ -0,0 +1,183 @@
|
||||
/**
|
||||
* Real Claude CLI Integration Tests
|
||||
*
|
||||
* IMPORTANT: These tests call the real Claude CLI and incur API costs.
|
||||
* They are SKIPPED by default and should only be run manually for validation.
|
||||
*
|
||||
* To run these tests:
|
||||
* ```bash
|
||||
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-claude.test.ts --test-timeout=120000
|
||||
* ```
|
||||
*
|
||||
* Purpose:
|
||||
* - Validate that JSON schemas work correctly with Claude CLI --json-schema flag
|
||||
* - Confirm MockAgentManager accurately simulates real CLI behavior
|
||||
* - Document actual response structure and costs
|
||||
*
|
||||
* Updated (2026-02-06): Now uses the universal agentSignalSchema instead of
|
||||
* per-mode schemas. Agents output trivial signals (done/questions/error) and
|
||||
* write files instead of producing mode-specific JSON.
|
||||
*
|
||||
* Total validation cost: ~$0.10 (3 tests)
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll } from 'vitest';
|
||||
import { execa } from 'execa';
|
||||
import {
|
||||
agentSignalJsonSchema,
|
||||
agentSignalSchema,
|
||||
} from '../../agent/schema.js';
|
||||
|
||||
/**
|
||||
* Result structure from Claude CLI with --output-format json
|
||||
*
|
||||
* When --json-schema is used:
|
||||
* - result: "" (empty string)
|
||||
* - structured_output: { ... } (the validated JSON object)
|
||||
*/
|
||||
interface ClaudeCliResult {
|
||||
type: 'result';
|
||||
subtype: 'success' | 'error' | 'error_max_turns';
|
||||
is_error: boolean;
|
||||
session_id: string;
|
||||
result: string;
|
||||
structured_output?: unknown;
|
||||
total_cost_usd?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to call Claude CLI directly with a prompt and JSON schema.
|
||||
*
|
||||
* @param prompt - The prompt to send to Claude
|
||||
* @param jsonSchema - JSON schema to enforce structured output
|
||||
* @param timeoutMs - Timeout in milliseconds (default 90s)
|
||||
* @returns Parsed CLI result with structured_output
|
||||
*/
|
||||
async function callClaudeCli(
|
||||
prompt: string,
|
||||
jsonSchema: object,
|
||||
timeoutMs = 90000
|
||||
): Promise<{ cliResult: ClaudeCliResult; structuredOutput: unknown }> {
|
||||
const startTime = Date.now();
|
||||
|
||||
const { stdout } = await execa(
|
||||
'claude',
|
||||
[
|
||||
'-p',
|
||||
prompt,
|
||||
'--output-format',
|
||||
'json',
|
||||
'--json-schema',
|
||||
JSON.stringify(jsonSchema),
|
||||
],
|
||||
{
|
||||
timeout: timeoutMs,
|
||||
}
|
||||
);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
const cliResult: ClaudeCliResult = JSON.parse(stdout);
|
||||
|
||||
console.log(`\n Duration: ${(duration / 1000).toFixed(1)}s`);
|
||||
console.log(` Cost: $${cliResult.total_cost_usd?.toFixed(4) ?? 'N/A'}`);
|
||||
console.log(` Session ID: ${cliResult.session_id}`);
|
||||
console.log(` Result field empty: ${cliResult.result === ''}`);
|
||||
console.log(` Has structured_output: ${cliResult.structured_output !== undefined}`);
|
||||
|
||||
// When --json-schema is used, structured output is in structured_output field
|
||||
// The result field is typically empty when using --json-schema
|
||||
const structuredOutput = cliResult.structured_output ?? JSON.parse(cliResult.result);
|
||||
|
||||
return { cliResult, structuredOutput };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if real Claude tests should run.
|
||||
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
|
||||
*/
|
||||
const shouldRunRealTests = process.env.REAL_CLAUDE_TESTS === '1';
|
||||
|
||||
/**
|
||||
* Skip wrapper - tests are expensive and should run manually
|
||||
*/
|
||||
const describeReal = shouldRunRealTests ? describe : describe.skip;
|
||||
|
||||
// Individual test timeout - real API calls take 5-30 seconds
|
||||
const TEST_TIMEOUT = 120000; // 2 minutes
|
||||
|
||||
describeReal('Real Claude CLI Integration', () => {
|
||||
beforeAll(() => {
|
||||
console.log('\n=== Running Real Claude CLI Tests ===');
|
||||
console.log('These tests call the real Claude API and incur costs.\n');
|
||||
});
|
||||
|
||||
describe('Universal Signal Schema', () => {
|
||||
it(
|
||||
'should return done status',
|
||||
async () => {
|
||||
const prompt = `Complete this simple task: Say "Hello, World!" as a test.
|
||||
|
||||
Output your response in the required JSON format with status "done".`;
|
||||
|
||||
const { cliResult, structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
|
||||
|
||||
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
|
||||
|
||||
// Verify the CLI response structure
|
||||
expect(cliResult.subtype).toBe('success');
|
||||
expect(cliResult.result).toBe(''); // Empty when using --json-schema
|
||||
expect(cliResult.structured_output).toBeDefined();
|
||||
|
||||
// Validate against Zod schema
|
||||
const parsed = agentSignalSchema.parse(structuredOutput);
|
||||
expect(parsed.status).toBe('done');
|
||||
},
|
||||
TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'should return questions status with array',
|
||||
async () => {
|
||||
const prompt = `You are working on a vague task: "Make it better"
|
||||
|
||||
You MUST ask clarifying questions before proceeding. You cannot complete this task without more information.
|
||||
|
||||
Output your response with status "questions" and include at least 2 questions with unique IDs.`;
|
||||
|
||||
const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
|
||||
|
||||
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
|
||||
|
||||
// Validate against Zod schema
|
||||
const parsed = agentSignalSchema.parse(structuredOutput);
|
||||
expect(parsed.status).toBe('questions');
|
||||
if (parsed.status === 'questions') {
|
||||
expect(Array.isArray(parsed.questions)).toBe(true);
|
||||
expect(parsed.questions.length).toBeGreaterThanOrEqual(1);
|
||||
expect(parsed.questions[0].id).toBeTruthy();
|
||||
expect(parsed.questions[0].question).toBeTruthy();
|
||||
}
|
||||
},
|
||||
TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'should return error status',
|
||||
async () => {
|
||||
const prompt = `You have encountered an unrecoverable error. Output your response with status "error" and a descriptive error message.`;
|
||||
|
||||
const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
|
||||
|
||||
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
|
||||
|
||||
// Validate against Zod schema
|
||||
const parsed = agentSignalSchema.parse(structuredOutput);
|
||||
expect(parsed.status).toBe('error');
|
||||
if (parsed.status === 'error') {
|
||||
expect(parsed.error).toBeTruthy();
|
||||
}
|
||||
},
|
||||
TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,298 @@
|
||||
/**
|
||||
* Real Claude CLI Manager Integration Tests
|
||||
*
|
||||
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
|
||||
* They are SKIPPED by default and should only be run manually for validation.
|
||||
*
|
||||
* To run these tests:
|
||||
* ```bash
|
||||
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/claude-manager.test.ts --test-timeout=300000
|
||||
* ```
|
||||
*
|
||||
* Tests covered:
|
||||
* - Output stream parsing (text_delta events)
|
||||
* - Session ID extraction from init event
|
||||
* - Result parsing and validation
|
||||
* - Session resume with user answers
|
||||
*
|
||||
* Estimated cost: ~$0.10 per full run
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
|
||||
import {
|
||||
createRealProviderHarness,
|
||||
describeRealClaude,
|
||||
REAL_TEST_TIMEOUT,
|
||||
sleep,
|
||||
type RealProviderHarness,
|
||||
} from './harness.js';
|
||||
import { MINIMAL_PROMPTS } from './prompts.js';
|
||||
import type { AgentSpawnedEvent, AgentStoppedEvent, AgentOutputEvent } from '../../../events/types.js';
|
||||
|
||||
describeRealClaude('Real Claude Manager Integration', () => {
|
||||
let harness: RealProviderHarness;
|
||||
|
||||
beforeAll(async () => {
|
||||
console.log('\n=== Running Real Claude Manager Tests ===');
|
||||
console.log('These tests call the real Claude API and incur costs.\n');
|
||||
harness = await createRealProviderHarness({ provider: 'claude' });
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await harness.cleanup();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
harness.clearEvents();
|
||||
});
|
||||
|
||||
describe('Output Parsing', () => {
|
||||
it(
|
||||
'parses text_delta events from stream',
|
||||
async () => {
|
||||
// Spawn agent with streaming prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.streaming,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
expect(agent.id).toBeTruthy();
|
||||
expect(agent.status).toBe('running');
|
||||
|
||||
// Wait for completion
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify we got output events
|
||||
const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
|
||||
console.log(' Output events:', outputEvents.length);
|
||||
|
||||
// Verify completion
|
||||
expect(result).toBeTruthy();
|
||||
console.log(' Result:', result?.message);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'parses init event and extracts session ID',
|
||||
async () => {
|
||||
// Spawn agent with simple done prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.done,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for completion
|
||||
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify session ID was extracted and persisted
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.sessionId).toBeTruthy();
|
||||
expect(dbAgent?.sessionId).toMatch(/^[a-f0-9-]+$/);
|
||||
|
||||
console.log(' Session ID:', dbAgent?.sessionId);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'parses result event with completion',
|
||||
async () => {
|
||||
// Spawn agent with simple done prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.done,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for completion
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify result was parsed
|
||||
expect(result).toBeTruthy();
|
||||
expect(result?.success).toBe(true);
|
||||
expect(result?.message).toBeTruthy();
|
||||
|
||||
// Verify events
|
||||
const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
|
||||
expect(spawnedEvents.length).toBe(1);
|
||||
expect(spawnedEvents[0].payload.agentId).toBe(agent.id);
|
||||
expect(spawnedEvents[0].payload.provider).toBe('claude');
|
||||
|
||||
const stoppedEvents = harness.getEventsByType<AgentStoppedEvent>('agent:stopped');
|
||||
expect(stoppedEvents.length).toBe(1);
|
||||
expect(stoppedEvents[0].payload.agentId).toBe(agent.id);
|
||||
expect(stoppedEvents[0].payload.reason).toBe('task_complete');
|
||||
|
||||
console.log(' Result message:', result?.message);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
|
||||
describe('Questions Flow', () => {
|
||||
it(
|
||||
'parses questions status and enters waiting_for_input',
|
||||
async () => {
|
||||
// Spawn agent with questions prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.questions,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for waiting_for_input status
|
||||
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify questions were parsed
|
||||
expect(questions).toBeTruthy();
|
||||
expect(questions?.questions).toBeTruthy();
|
||||
expect(questions?.questions.length).toBeGreaterThan(0);
|
||||
expect(questions?.questions[0].id).toBeTruthy();
|
||||
expect(questions?.questions[0].question).toBeTruthy();
|
||||
|
||||
// Verify agent status
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.status).toBe('waiting_for_input');
|
||||
expect(dbAgent?.sessionId).toBeTruthy();
|
||||
|
||||
console.log(' Questions:', questions?.questions.length);
|
||||
console.log(' First question:', questions?.questions[0].question);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
|
||||
describe('Session Resume', () => {
|
||||
it(
|
||||
'resumes session with user answers',
|
||||
async () => {
|
||||
// 1. Spawn agent that asks questions
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.questions,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// 2. Wait for waiting_for_input
|
||||
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
|
||||
expect(questions?.questions.length).toBeGreaterThan(0);
|
||||
|
||||
const sessionIdBeforeResume = (await harness.agentRepository.findById(agent.id))?.sessionId;
|
||||
console.log(' Session ID before resume:', sessionIdBeforeResume);
|
||||
console.log(' Questions received:', questions?.questions.map((q) => q.id).join(', '));
|
||||
|
||||
harness.clearEvents();
|
||||
|
||||
// 3. Resume with answer
|
||||
const answers: Record<string, string> = {};
|
||||
for (const q of questions?.questions ?? []) {
|
||||
answers[q.id] = `Answer to ${q.id}`;
|
||||
}
|
||||
|
||||
await harness.agentManager.resume(agent.id, answers);
|
||||
|
||||
// 4. Wait for completion or another waiting state
|
||||
let attempts = 0;
|
||||
let finalStatus = 'running';
|
||||
while (attempts < 60) {
|
||||
const agent2 = await harness.agentRepository.findById(agent.id);
|
||||
if (agent2?.status !== 'running') {
|
||||
finalStatus = agent2?.status ?? 'unknown';
|
||||
break;
|
||||
}
|
||||
await sleep(1000);
|
||||
attempts++;
|
||||
}
|
||||
|
||||
// Verify the agent processed the resume (either completed or asked more questions)
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' Final status:', dbAgent?.status);
|
||||
|
||||
// Agent should not still be running
|
||||
expect(['idle', 'waiting_for_input', 'crashed']).toContain(dbAgent?.status);
|
||||
|
||||
// If idle, verify result
|
||||
if (dbAgent?.status === 'idle') {
|
||||
const result = await harness.agentManager.getResult(agent.id);
|
||||
console.log(' Result:', result?.message);
|
||||
expect(result).toBeTruthy();
|
||||
}
|
||||
},
|
||||
REAL_TEST_TIMEOUT * 2 // Double timeout for two-step process
|
||||
);
|
||||
|
||||
it(
|
||||
'maintains session continuity across resume',
|
||||
async () => {
|
||||
// 1. Spawn agent that asks questions
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.questions,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// 2. Wait for waiting_for_input
|
||||
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
|
||||
expect(questions?.questions.length).toBeGreaterThan(0);
|
||||
|
||||
const sessionIdBefore = (await harness.agentRepository.findById(agent.id))?.sessionId;
|
||||
expect(sessionIdBefore).toBeTruthy();
|
||||
|
||||
// 3. Resume with answer
|
||||
const answers: Record<string, string> = {};
|
||||
for (const q of questions?.questions ?? []) {
|
||||
answers[q.id] = `Answer to ${q.id}`;
|
||||
}
|
||||
|
||||
await harness.agentManager.resume(agent.id, answers);
|
||||
|
||||
// 4. Wait for completion
|
||||
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify session ID exists (may be same or new depending on CLI behavior)
|
||||
const sessionIdAfter = (await harness.agentRepository.findById(agent.id))?.sessionId;
|
||||
expect(sessionIdAfter).toBeTruthy();
|
||||
|
||||
console.log(' Session ID before:', sessionIdBefore);
|
||||
console.log(' Session ID after:', sessionIdAfter);
|
||||
},
|
||||
REAL_TEST_TIMEOUT * 2
|
||||
);
|
||||
});
|
||||
|
||||
describe('Error Handling', () => {
|
||||
it(
|
||||
'handles error status',
|
||||
async () => {
|
||||
// Spawn agent with error prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.error,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for completion (will be crashed)
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify error was handled
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.status).toBe('crashed');
|
||||
expect(result?.success).toBe(false);
|
||||
expect(result?.message).toContain('Test error');
|
||||
|
||||
console.log(' Error message:', result?.message);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,172 @@
|
||||
/**
|
||||
* Real Codex CLI Manager Integration Tests
|
||||
*
|
||||
* IMPORTANT: These tests call the REAL Codex CLI and incur API costs!
|
||||
* They are SKIPPED by default and should only be run manually for validation.
|
||||
*
|
||||
* To run these tests:
|
||||
* ```bash
|
||||
* REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts --test-timeout=300000
|
||||
* ```
|
||||
*
|
||||
* Tests covered:
|
||||
* - Codex spawn and thread_id extraction
|
||||
* - Generic output parsing (non-schema)
|
||||
* - Streaming output
|
||||
*
|
||||
* Estimated cost: ~$0.10 per full run
|
||||
*
|
||||
* Note: Codex uses different output format and session ID field (thread_id).
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
|
||||
import {
|
||||
createRealProviderHarness,
|
||||
describeRealCodex,
|
||||
REAL_TEST_TIMEOUT,
|
||||
type RealProviderHarness,
|
||||
} from './harness.js';
|
||||
import { CODEX_PROMPTS } from './prompts.js';
|
||||
import type { AgentSpawnedEvent, AgentOutputEvent } from '../../../events/types.js';
|
||||
|
||||
describeRealCodex('Real Codex Manager Integration', () => {
|
||||
let harness: RealProviderHarness;
|
||||
|
||||
beforeAll(async () => {
|
||||
console.log('\n=== Running Real Codex Manager Tests ===');
|
||||
console.log('These tests call the real Codex API and incur costs.\n');
|
||||
harness = await createRealProviderHarness({ provider: 'codex' });
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await harness.cleanup();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
harness.clearEvents();
|
||||
});
|
||||
|
||||
describe('Codex Spawn', () => {
|
||||
it(
|
||||
'spawns codex agent and extracts thread_id',
|
||||
async () => {
|
||||
// Spawn agent with simple task
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: CODEX_PROMPTS.done,
|
||||
mode: 'execute',
|
||||
provider: 'codex',
|
||||
});
|
||||
|
||||
expect(agent.id).toBeTruthy();
|
||||
expect(agent.provider).toBe('codex');
|
||||
expect(agent.status).toBe('running');
|
||||
|
||||
// Verify spawned event
|
||||
const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
|
||||
expect(spawnedEvents.length).toBe(1);
|
||||
expect(spawnedEvents[0].payload.provider).toBe('codex');
|
||||
|
||||
// Wait for completion
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify session ID (thread_id) was extracted
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' Thread ID:', dbAgent?.sessionId);
|
||||
console.log(' Status:', dbAgent?.status);
|
||||
console.log(' Result:', result?.message);
|
||||
|
||||
// Codex should complete or crash
|
||||
expect(['idle', 'crashed']).toContain(dbAgent?.status);
|
||||
|
||||
// If completed successfully, should have extracted thread_id
|
||||
if (dbAgent?.status === 'idle' && dbAgent?.sessionId) {
|
||||
expect(dbAgent.sessionId).toBeTruthy();
|
||||
}
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'uses generic parser for output',
|
||||
async () => {
|
||||
// Spawn agent with streaming prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: CODEX_PROMPTS.streaming,
|
||||
mode: 'execute',
|
||||
provider: 'codex',
|
||||
});
|
||||
|
||||
// Wait for completion
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify output events were captured
|
||||
const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
|
||||
console.log(' Output events:', outputEvents.length);
|
||||
|
||||
// For generic provider, result should be captured
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' Status:', dbAgent?.status);
|
||||
console.log(' Result:', result?.message?.substring(0, 100) + '...');
|
||||
|
||||
expect(['idle', 'crashed']).toContain(dbAgent?.status);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
|
||||
describe('Codex Provider Config', () => {
|
||||
it(
|
||||
'uses correct command and args for codex',
|
||||
async () => {
|
||||
// This is more of a config verification test
|
||||
// The actual command execution is validated by the spawn test
|
||||
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: 'Say hello',
|
||||
mode: 'execute',
|
||||
provider: 'codex',
|
||||
});
|
||||
|
||||
// Verify agent was created with codex provider
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.provider).toBe('codex');
|
||||
|
||||
// Wait for completion (or timeout)
|
||||
try {
|
||||
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
} catch {
|
||||
// Codex might fail if not installed, that's OK for config test
|
||||
}
|
||||
|
||||
const finalAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' Provider:', finalAgent?.provider);
|
||||
console.log(' Status:', finalAgent?.status);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Codex-specific observations from testing:
|
||||
*
|
||||
* 1. Output Format:
|
||||
* - Codex uses JSONL streaming with different event types
|
||||
* - thread.started event contains thread_id
|
||||
* - Output parsing is more generic (not JSON schema validated)
|
||||
*
|
||||
* 2. Command Structure:
|
||||
* - codex exec --full-auto --json -p "prompt"
|
||||
* - resume: codex exec resume <thread_id>
|
||||
*
|
||||
* 3. Session ID:
|
||||
* - Called "thread_id" in Codex
|
||||
* - Extracted from thread.started event
|
||||
*
|
||||
* 4. Resume:
|
||||
* - Uses subcommand style: codex exec resume <thread_id>
|
||||
* - Different from Claude's flag style: claude --resume <session_id>
|
||||
*/
|
||||
540
apps/server/test/integration/real-providers/conversation.test.ts
Normal file
540
apps/server/test/integration/real-providers/conversation.test.ts
Normal file
@@ -0,0 +1,540 @@
|
||||
/**
|
||||
* Real Claude Inter-Agent Conversation Integration Tests
|
||||
*
|
||||
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
|
||||
* They are SKIPPED by default and should only be run manually for validation.
|
||||
*
|
||||
* To run:
|
||||
* ```bash
|
||||
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
|
||||
* ```
|
||||
*
|
||||
* Architecture:
|
||||
* - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer)
|
||||
* - In-memory ConversationRepository (no SQLite, no FK constraints)
|
||||
* - Real agent harness for spawning two Claude sessions with actual coding tasks
|
||||
* - Two sequential questions prove the listen→answer→re-listen cycle works
|
||||
*
|
||||
* Estimated cost: ~$0.30 per full run (two Claude sessions)
|
||||
*/
|
||||
|
||||
import { it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { createServer } from 'node:http';
|
||||
import type { Server } from 'node:http';
|
||||
import { readFileSync, existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { nanoid } from 'nanoid';
|
||||
import { fetchRequestHandler } from '@trpc/server/adapters/fetch';
|
||||
import { router, publicProcedure } from '../../../trpc/trpc.js';
|
||||
import { conversationProcedures } from '../../../trpc/routers/conversation.js';
|
||||
import { EventEmitterBus } from '../../../events/bus.js';
|
||||
import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js';
|
||||
import type { Conversation } from '../../../db/schema.js';
|
||||
import {
|
||||
createRealProviderHarness,
|
||||
describeRealClaude,
|
||||
sleep,
|
||||
type RealProviderHarness,
|
||||
} from './harness.js';
|
||||
|
||||
const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// In-memory ConversationRepository — no SQLite, no FK constraints
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
class InMemoryConversationRepository implements ConversationRepository {
|
||||
private store = new Map<string, Conversation>();
|
||||
|
||||
async create(data: CreateConversationData): Promise<Conversation> {
|
||||
const now = new Date();
|
||||
const conversation: Conversation = {
|
||||
id: nanoid(),
|
||||
fromAgentId: data.fromAgentId,
|
||||
toAgentId: data.toAgentId,
|
||||
initiativeId: data.initiativeId ?? null,
|
||||
phaseId: data.phaseId ?? null,
|
||||
taskId: data.taskId ?? null,
|
||||
question: data.question,
|
||||
answer: null,
|
||||
status: 'pending',
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
};
|
||||
this.store.set(conversation.id, conversation);
|
||||
return conversation;
|
||||
}
|
||||
|
||||
async findById(id: string): Promise<Conversation | null> {
|
||||
return this.store.get(id) ?? null;
|
||||
}
|
||||
|
||||
async findPendingForAgent(toAgentId: string): Promise<Conversation[]> {
|
||||
return [...this.store.values()]
|
||||
.filter((c) => c.toAgentId === toAgentId && c.status === 'pending')
|
||||
.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
|
||||
}
|
||||
|
||||
async answer(id: string, answer: string): Promise<Conversation | null> {
|
||||
const conv = this.store.get(id);
|
||||
if (!conv) return null;
|
||||
const updated: Conversation = {
|
||||
...conv,
|
||||
answer,
|
||||
status: 'answered' as const,
|
||||
updatedAt: new Date(),
|
||||
};
|
||||
this.store.set(id, updated);
|
||||
return updated;
|
||||
}
|
||||
|
||||
/** Test helper — return all conversations */
|
||||
getAll(): Conversation[] {
|
||||
return [...this.store.values()];
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock conversation server — serves ONLY conversation tRPC procedures
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function startMockConversationServer(): Promise<{
|
||||
server: Server;
|
||||
port: number;
|
||||
repo: InMemoryConversationRepository;
|
||||
}> {
|
||||
const repo = new InMemoryConversationRepository();
|
||||
const eventBus = new EventEmitterBus();
|
||||
|
||||
// Mini router with only conversation procedures
|
||||
const miniRouter = router({
|
||||
...conversationProcedures(publicProcedure),
|
||||
});
|
||||
|
||||
const httpServer = createServer(async (req, res) => {
|
||||
if (!req.url?.startsWith('/trpc')) {
|
||||
res.writeHead(404);
|
||||
res.end('Not found');
|
||||
return;
|
||||
}
|
||||
|
||||
const host = req.headers.host ?? 'localhost';
|
||||
const url = new URL(req.url, `http://${host}`);
|
||||
|
||||
let body: string | undefined;
|
||||
if (req.method !== 'GET' && req.method !== 'HEAD') {
|
||||
body = await new Promise<string>((resolve) => {
|
||||
let data = '';
|
||||
req.on('data', (chunk: Buffer) => {
|
||||
data += chunk.toString();
|
||||
});
|
||||
req.on('end', () => resolve(data));
|
||||
});
|
||||
}
|
||||
|
||||
const headers = new Headers();
|
||||
for (const [key, value] of Object.entries(req.headers)) {
|
||||
if (value) {
|
||||
if (Array.isArray(value)) {
|
||||
value.forEach((v) => headers.append(key, v));
|
||||
} else {
|
||||
headers.set(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fetchRequest = new Request(url.toString(), {
|
||||
method: req.method,
|
||||
headers,
|
||||
body: body ?? undefined,
|
||||
});
|
||||
|
||||
const fetchResponse = await fetchRequestHandler({
|
||||
endpoint: '/trpc',
|
||||
req: fetchRequest,
|
||||
router: miniRouter,
|
||||
createContext: () =>
|
||||
({
|
||||
eventBus,
|
||||
serverStartedAt: new Date(),
|
||||
processCount: 0,
|
||||
conversationRepository: repo,
|
||||
// Stub — requireAgentManager is called unconditionally in createConversation,
|
||||
// but list() is only invoked for taskId/phaseId resolution. With --agent-id
|
||||
// targeting, list() is never called.
|
||||
agentManager: { list: async () => [] },
|
||||
}) as any,
|
||||
});
|
||||
|
||||
res.statusCode = fetchResponse.status;
|
||||
fetchResponse.headers.forEach((value, key) => {
|
||||
res.setHeader(key, value);
|
||||
});
|
||||
|
||||
if (fetchResponse.body) {
|
||||
const reader = fetchResponse.body.getReader();
|
||||
const pump = async () => {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
res.write(value);
|
||||
}
|
||||
};
|
||||
pump().catch(() => res.end());
|
||||
} else {
|
||||
res.end(await fetchResponse.text());
|
||||
}
|
||||
});
|
||||
|
||||
const port = 40000 + Math.floor(Math.random() * 10000);
|
||||
await new Promise<void>((resolve) => {
|
||||
httpServer.listen(port, '127.0.0.1', () => resolve());
|
||||
});
|
||||
|
||||
return { server: httpServer, port, repo };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Diagnostic helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function dumpAgentLogs(workspaceRoot: string, agentName: string) {
|
||||
const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName);
|
||||
if (!existsSync(logDir)) {
|
||||
console.log(` [${agentName}] No log directory at ${logDir}`);
|
||||
return;
|
||||
}
|
||||
// Dump output.jsonl (last 30 lines)
|
||||
const outputPath = join(logDir, 'output.jsonl');
|
||||
if (existsSync(outputPath)) {
|
||||
const lines = readFileSync(outputPath, 'utf-8').trim().split('\n');
|
||||
const last = lines.slice(-30);
|
||||
console.log(` [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`);
|
||||
for (const line of last) {
|
||||
try {
|
||||
const ev = JSON.parse(line);
|
||||
if (ev.type === 'assistant' && ev.message?.content) {
|
||||
for (const block of ev.message.content) {
|
||||
if (block.type === 'text') {
|
||||
console.log(` TEXT: ${block.text.substring(0, 200)}`);
|
||||
} else if (block.type === 'tool_use') {
|
||||
console.log(` TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`);
|
||||
}
|
||||
}
|
||||
} else if (ev.type === 'result') {
|
||||
console.log(` RESULT: ${JSON.stringify(ev).substring(0, 300)}`);
|
||||
}
|
||||
} catch {
|
||||
console.log(` RAW: ${line.substring(0, 200)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Dump stderr
|
||||
const stderrPath = join(logDir, 'stderr.log');
|
||||
if (existsSync(stderrPath)) {
|
||||
const stderr = readFileSync(stderrPath, 'utf-8').trim();
|
||||
if (stderr) {
|
||||
console.log(` [${agentName}] stderr: ${stderr.substring(0, 500)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test suite
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describeRealClaude('Real Inter-Agent Conversation (mock server)', () => {
|
||||
let harness: RealProviderHarness;
|
||||
let mockServer: Server;
|
||||
let mockPort: number;
|
||||
let mockRepo: InMemoryConversationRepository;
|
||||
const originalCwPort = process.env.CW_PORT;
|
||||
|
||||
beforeAll(async () => {
|
||||
console.log('\n=== Real Inter-Agent Conversation Test ===');
|
||||
console.log('Mock conversation server + two Claude sessions.\n');
|
||||
|
||||
// Start mock conversation server (only listen/ask/answer endpoints)
|
||||
const mock = await startMockConversationServer();
|
||||
mockServer = mock.server;
|
||||
mockPort = mock.port;
|
||||
mockRepo = mock.repo;
|
||||
console.log(` Mock server on port ${mockPort}`);
|
||||
|
||||
// Set CW_PORT so agents' cw commands hit the mock server
|
||||
process.env.CW_PORT = String(mockPort);
|
||||
|
||||
// Real agent harness for spawning + worktrees (no full CoordinationServer)
|
||||
harness = await createRealProviderHarness({ provider: 'claude' });
|
||||
console.log(` Workspace: ${harness.workspaceRoot}`);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (originalCwPort) {
|
||||
process.env.CW_PORT = originalCwPort;
|
||||
} else {
|
||||
delete process.env.CW_PORT;
|
||||
}
|
||||
await harness?.cleanup();
|
||||
mockServer?.close();
|
||||
});
|
||||
|
||||
it(
|
||||
'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)',
|
||||
async () => {
|
||||
const agentSuffix = nanoid(6); // unique suffix for temp files
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Agent A — builds a validator module WHILE answering questions
|
||||
// in the background via cw listen
|
||||
// ---------------------------------------------------------------
|
||||
const agentA = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `You are Agent A in a multi-agent coordination test.
|
||||
|
||||
You have TWO concurrent responsibilities:
|
||||
1. Build a TypeScript validator module (your main coding task)
|
||||
2. Answer questions from other agents via a background listener
|
||||
|
||||
SETUP (do this first):
|
||||
- Read .cw/input/manifest.json to get your agentId
|
||||
- Start a background listener that writes to a temp file:
|
||||
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
|
||||
LISTEN_PID=$!
|
||||
|
||||
MAIN CODING TASK — implement a user registration validator:
|
||||
|
||||
1. Create types.ts:
|
||||
export interface RegistrationInput { name: string; email: string; password: string; }
|
||||
export interface ValidationResult { valid: boolean; errors: string[]; }
|
||||
|
||||
2. Create validator.ts:
|
||||
Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult
|
||||
Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars.
|
||||
|
||||
3. Create index.ts that re-exports everything from types.ts and validator.ts.
|
||||
|
||||
BETWEEN EACH FILE, check for incoming questions:
|
||||
if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then
|
||||
# parse the JSON, get conversationId and question
|
||||
# answer: cw answer "<answer based on your code>" --conversation-id <id>
|
||||
# clear and restart listener:
|
||||
> /tmp/cw-listen-${agentSuffix}.txt
|
||||
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
|
||||
LISTEN_PID=$!
|
||||
fi
|
||||
|
||||
You will receive TWO questions total while you work. Answer them based on the code you are writing.
|
||||
|
||||
CLEANUP: After all 3 files are written and both questions answered:
|
||||
- kill $LISTEN_PID 2>/dev/null
|
||||
- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"}
|
||||
|
||||
CRITICAL:
|
||||
- The listener MUST run in the background while you write code.
|
||||
- Check for questions between files, not as blocking waits.
|
||||
- The CW_PORT environment variable is already set to ${mockPort}.`,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
inputContext: {},
|
||||
});
|
||||
|
||||
console.log(` Agent A: ${agentA.id} (${agentA.name})`);
|
||||
|
||||
// Give Agent A time to start its background listener and begin coding
|
||||
await sleep(15000);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Agent B — builds a client module, asks Agent A questions to
|
||||
// learn the validation rules, then uses answers in its code
|
||||
// ---------------------------------------------------------------
|
||||
const agentB = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `You are Agent B in a multi-agent coordination test.
|
||||
|
||||
Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module.
|
||||
|
||||
YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules:
|
||||
|
||||
1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>.
|
||||
Leave a TODO comment where validation will go.
|
||||
|
||||
2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks:
|
||||
FIELDS=$(cw ask "What are the required fields and their types for registration?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
|
||||
|
||||
3. Ask Agent A about the specific email validation rules:
|
||||
EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
|
||||
|
||||
4. Create validated-client.ts — a COMPLETE implementation using the answers:
|
||||
Import the scaffold, add a validateBeforeSubmit(name, email, password) function
|
||||
that implements the EXACT validation rules Agent A told you about.
|
||||
Include a comment at the top with the rules you received.
|
||||
|
||||
5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"}
|
||||
|
||||
CRITICAL:
|
||||
- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first).
|
||||
- Use the ACTUAL answers from Agent A in your validated-client.ts implementation.
|
||||
- The CW_PORT environment variable is already set to ${mockPort}.`,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
inputContext: {},
|
||||
});
|
||||
|
||||
console.log(` Agent B: ${agentB.id} (${agentB.name})`);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Wait for both agents to stop running, then verify conversations
|
||||
// ---------------------------------------------------------------
|
||||
const deadline = Date.now() + TEST_TIMEOUT;
|
||||
let aDone = false;
|
||||
let bDone = false;
|
||||
let lastLogTime = 0;
|
||||
|
||||
while (Date.now() < deadline && (!aDone || !bDone)) {
|
||||
const agentAInfo = await harness.agentRepository.findById(agentA.id);
|
||||
const agentBInfo = await harness.agentRepository.findById(agentB.id);
|
||||
|
||||
// Periodic progress logging every 30s
|
||||
if (Date.now() - lastLogTime > 30000) {
|
||||
const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000);
|
||||
console.log(` [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`);
|
||||
lastLogTime = Date.now();
|
||||
}
|
||||
|
||||
if (agentAInfo && agentAInfo.status !== 'running' && !aDone) {
|
||||
aDone = true;
|
||||
console.log(` Agent A final status: ${agentAInfo.status}`);
|
||||
dumpAgentLogs(harness.workspaceRoot, agentA.name);
|
||||
}
|
||||
if (agentBInfo && agentBInfo.status !== 'running' && !bDone) {
|
||||
bDone = true;
|
||||
console.log(` Agent B final status: ${agentBInfo.status}`);
|
||||
dumpAgentLogs(harness.workspaceRoot, agentB.name);
|
||||
}
|
||||
|
||||
if (!aDone || !bDone) await sleep(2000);
|
||||
}
|
||||
|
||||
expect(aDone).toBe(true);
|
||||
expect(bDone).toBe(true);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify conversations in mock repo
|
||||
// ---------------------------------------------------------------
|
||||
const allConversations = mockRepo.getAll();
|
||||
console.log(` Total conversations: ${allConversations.length}`);
|
||||
for (const c of allConversations) {
|
||||
console.log(
|
||||
` ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`,
|
||||
);
|
||||
}
|
||||
|
||||
// Exactly 2 conversations, both answered
|
||||
expect(allConversations.length).toBe(2);
|
||||
expect(allConversations.every((c) => c.status === 'answered')).toBe(true);
|
||||
|
||||
// Both target Agent A, both from Agent B
|
||||
expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true);
|
||||
expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true);
|
||||
|
||||
// Questions should be distinct (one about fields, one about email validation)
|
||||
const questions = allConversations.map((c) => c.question);
|
||||
expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true);
|
||||
expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true);
|
||||
|
||||
// Both answers should be non-empty
|
||||
expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify Agent A's coding output — validator module files exist
|
||||
// ---------------------------------------------------------------
|
||||
const aWorkdir = join(
|
||||
harness.workspaceRoot,
|
||||
'agent-workdirs',
|
||||
agentA.name,
|
||||
'workspace',
|
||||
);
|
||||
const aFiles = ['types.ts', 'validator.ts', 'index.ts'];
|
||||
for (const f of aFiles) {
|
||||
const filePath = join(aWorkdir, f);
|
||||
const exists = existsSync(filePath);
|
||||
console.log(` Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
|
||||
expect(exists).toBe(true);
|
||||
}
|
||||
// validator.ts should contain actual validation logic
|
||||
const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8');
|
||||
console.log(` Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`);
|
||||
expect(validatorContent.toLowerCase()).toContain('email');
|
||||
expect(validatorContent.toLowerCase()).toContain('password');
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify Agent B's coding output — client module files exist
|
||||
// ---------------------------------------------------------------
|
||||
const bWorkdir = join(
|
||||
harness.workspaceRoot,
|
||||
'agent-workdirs',
|
||||
agentB.name,
|
||||
'workspace',
|
||||
);
|
||||
const bFiles = ['client-scaffold.ts', 'validated-client.ts'];
|
||||
for (const f of bFiles) {
|
||||
const filePath = join(bWorkdir, f);
|
||||
const exists = existsSync(filePath);
|
||||
console.log(` Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
|
||||
expect(exists).toBe(true);
|
||||
}
|
||||
// validated-client.ts should reference validation rules from Agent A's answers
|
||||
const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8');
|
||||
console.log(` Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`);
|
||||
expect(clientContent.toLowerCase()).toContain('email');
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify interleaving: Agent A's JSONL log has coding tool calls
|
||||
// (Write for .ts files) interleaved with conversation tool calls
|
||||
// (Bash for cw listen/answer)
|
||||
// ---------------------------------------------------------------
|
||||
const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl');
|
||||
const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n');
|
||||
const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = [];
|
||||
|
||||
for (const line of aLog) {
|
||||
try {
|
||||
const ev = JSON.parse(line);
|
||||
if (ev.type !== 'assistant' || !ev.message?.content) continue;
|
||||
for (const block of ev.message.content) {
|
||||
if (block.type !== 'tool_use') continue;
|
||||
const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input);
|
||||
if (block.name === 'Write' && input.includes('.ts')) {
|
||||
toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) });
|
||||
} else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) {
|
||||
toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) });
|
||||
}
|
||||
}
|
||||
} catch { /* skip non-JSON lines */ }
|
||||
}
|
||||
|
||||
console.log(` Agent A interleaving (${toolCalls.length} relevant tool calls):`);
|
||||
for (const tc of toolCalls) {
|
||||
console.log(` [${tc.type}] ${tc.name}: ${tc.detail}`);
|
||||
}
|
||||
|
||||
// Must have both code and conversation tool calls
|
||||
const hasCode = toolCalls.some((tc) => tc.type === 'code');
|
||||
const hasConversation = toolCalls.some((tc) => tc.type === 'conversation');
|
||||
expect(hasCode).toBe(true);
|
||||
expect(hasConversation).toBe(true);
|
||||
|
||||
// Verify interleaving: at least one code call must appear AFTER a conversation call
|
||||
// (proving coding continued after handling a question)
|
||||
const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation');
|
||||
const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code');
|
||||
console.log(` First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`);
|
||||
expect(lastCodeIdx).toBeGreaterThan(firstConvIdx);
|
||||
},
|
||||
TEST_TIMEOUT,
|
||||
);
|
||||
});
|
||||
@@ -0,0 +1,265 @@
|
||||
/**
|
||||
* Crash Recovery Integration Tests
|
||||
*
|
||||
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
|
||||
* They are SKIPPED by default and should only be run manually for validation.
|
||||
*
|
||||
* To run these tests:
|
||||
* ```bash
|
||||
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/crash-recovery.test.ts --test-timeout=300000
|
||||
* ```
|
||||
*
|
||||
* Tests covered:
|
||||
* - Server restart while agent is running
|
||||
* - Resuming streaming after restart
|
||||
* - Marking dead agents as crashed
|
||||
* - Output file processing after restart
|
||||
*
|
||||
* Estimated cost: ~$0.08 per full run
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
|
||||
import {
|
||||
createRealProviderHarness,
|
||||
describeRealClaude,
|
||||
REAL_TEST_TIMEOUT,
|
||||
EXTENDED_TEST_TIMEOUT,
|
||||
sleep,
|
||||
type RealProviderHarness,
|
||||
} from './harness.js';
|
||||
import { MINIMAL_PROMPTS } from './prompts.js';
|
||||
import { MultiProviderAgentManager } from '../../../agent/manager.js';
|
||||
|
||||
describeRealClaude('Crash Recovery', () => {
|
||||
let harness: RealProviderHarness;
|
||||
|
||||
beforeAll(async () => {
|
||||
console.log('\n=== Running Crash Recovery Tests ===');
|
||||
console.log('These tests call the real Claude API and incur costs.\n');
|
||||
harness = await createRealProviderHarness({ provider: 'claude' });
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await harness.cleanup();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
harness.clearEvents();
|
||||
});
|
||||
|
||||
describe('Server Restart Simulation', () => {
|
||||
it(
|
||||
'resumes streaming for still-running agent after restart',
|
||||
async () => {
|
||||
// 1. Spawn agent with slow task
|
||||
console.log(' 1. Spawning agent with slow task...');
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.slow,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// 2. Wait for agent to be running
|
||||
await harness.waitForAgentStatus(agent.id, 'running', 10000);
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.pid).toBeTruthy();
|
||||
expect(dbAgent?.outputFilePath).toBeTruthy();
|
||||
console.log(' 2. Agent running with PID:', dbAgent?.pid);
|
||||
|
||||
// 3. Give the agent a moment to start writing output
|
||||
await sleep(2000);
|
||||
|
||||
// 4. Simulate server crash - create NEW manager (old state lost)
|
||||
console.log(' 3. Simulating server restart with new manager...');
|
||||
harness.clearEvents(); // Clear events from old manager
|
||||
|
||||
const newManager = new MultiProviderAgentManager(
|
||||
harness.agentRepository,
|
||||
harness.workspaceRoot,
|
||||
harness.projectRepository,
|
||||
harness.accountRepository,
|
||||
harness.eventBus
|
||||
);
|
||||
|
||||
// 5. Reconcile - should pick up running agent
|
||||
console.log(' 4. Reconciling agent state...');
|
||||
await newManager.reconcileAfterRestart();
|
||||
|
||||
// 6. Wait for completion via new manager
|
||||
console.log(' 5. Waiting for completion via new manager...');
|
||||
let attempts = 0;
|
||||
let finalStatus = 'running';
|
||||
while (attempts < 60) {
|
||||
const refreshed = await harness.agentRepository.findById(agent.id);
|
||||
if (refreshed?.status !== 'running') {
|
||||
finalStatus = refreshed?.status ?? 'unknown';
|
||||
break;
|
||||
}
|
||||
await sleep(2000);
|
||||
attempts++;
|
||||
}
|
||||
|
||||
const finalAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' 6. Final status:', finalAgent?.status);
|
||||
|
||||
// Either completed successfully or crashed (both are valid outcomes)
|
||||
expect(['idle', 'crashed', 'stopped']).toContain(finalAgent?.status);
|
||||
|
||||
if (finalAgent?.status === 'idle') {
|
||||
const result = await newManager.getResult(agent.id);
|
||||
console.log(' Result:', result?.message);
|
||||
}
|
||||
},
|
||||
EXTENDED_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'marks dead agent as crashed during reconcile',
|
||||
async () => {
|
||||
// 1. Create a fake agent record with a dead PID
|
||||
console.log(' 1. Creating fake agent with dead PID...');
|
||||
const fakeAgent = await harness.agentRepository.create({
|
||||
name: 'dead-agent-test',
|
||||
taskId: null,
|
||||
initiativeId: null,
|
||||
sessionId: null,
|
||||
worktreeId: 'dead-worktree',
|
||||
status: 'running',
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
accountId: null,
|
||||
});
|
||||
|
||||
// Set a PID that's definitely dead (high number that won't exist)
|
||||
await harness.agentRepository.update(fakeAgent.id, { pid: 999999, outputFilePath: '/nonexistent/path' });
|
||||
|
||||
// Verify it's marked as running
|
||||
let agent = await harness.agentRepository.findById(fakeAgent.id);
|
||||
expect(agent?.status).toBe('running');
|
||||
expect(agent?.pid).toBe(999999);
|
||||
|
||||
// 2. Create new manager and reconcile
|
||||
console.log(' 2. Creating new manager and reconciling...');
|
||||
const newManager = new MultiProviderAgentManager(
|
||||
harness.agentRepository,
|
||||
harness.workspaceRoot,
|
||||
harness.projectRepository,
|
||||
harness.accountRepository,
|
||||
harness.eventBus
|
||||
);
|
||||
|
||||
await newManager.reconcileAfterRestart();
|
||||
|
||||
// 3. Verify agent is now crashed
|
||||
agent = await harness.agentRepository.findById(fakeAgent.id);
|
||||
expect(agent?.status).toBe('crashed');
|
||||
console.log(' 3. Agent marked as crashed (dead PID detected)');
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'processes output file for dead agent during reconcile',
|
||||
async () => {
|
||||
// 1. Spawn agent and wait for completion
|
||||
console.log(' 1. Spawning agent to completion...');
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.done,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
const outputFilePath = dbAgent?.outputFilePath;
|
||||
expect(outputFilePath).toBeTruthy();
|
||||
console.log(' 2. Output file:', outputFilePath);
|
||||
|
||||
// 2. Reset agent to "running" to simulate mid-crash state
|
||||
await harness.agentRepository.update(agent.id, { status: 'running' });
|
||||
// Clear result so reconcile has to re-process
|
||||
await harness.agentRepository.update(agent.id, { result: null });
|
||||
|
||||
// Verify reset
|
||||
let resetAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(resetAgent?.status).toBe('running');
|
||||
|
||||
// 3. Create new manager and reconcile
|
||||
console.log(' 3. Creating new manager and reconciling...');
|
||||
harness.clearEvents();
|
||||
|
||||
const newManager = new MultiProviderAgentManager(
|
||||
harness.agentRepository,
|
||||
harness.workspaceRoot,
|
||||
harness.projectRepository,
|
||||
harness.accountRepository,
|
||||
harness.eventBus
|
||||
);
|
||||
|
||||
await newManager.reconcileAfterRestart();
|
||||
|
||||
// Give it a moment to process the file
|
||||
await sleep(1000);
|
||||
|
||||
// 4. Verify agent was processed from output file
|
||||
const finalAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' 4. Final status:', finalAgent?.status);
|
||||
|
||||
// Should either be idle (processed successfully) or crashed (couldn't process)
|
||||
expect(['idle', 'crashed']).toContain(finalAgent?.status);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
|
||||
describe('Event Consistency', () => {
|
||||
it(
|
||||
'does not duplicate events on restart',
|
||||
async () => {
|
||||
// 1. Spawn agent with slow task
|
||||
console.log(' 1. Spawning agent...');
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.streaming,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// 2. Wait for some output events
|
||||
await sleep(3000);
|
||||
const initialOutputCount = harness.getEventsByType('agent:output').length;
|
||||
console.log(' 2. Initial output events:', initialOutputCount);
|
||||
|
||||
// 3. Wait for completion
|
||||
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
const finalOutputCount = harness.getEventsByType('agent:output').length;
|
||||
console.log(' 3. Final output events:', finalOutputCount);
|
||||
|
||||
// 4. Create new manager and reconcile (agent already complete)
|
||||
harness.clearEvents();
|
||||
|
||||
const newManager = new MultiProviderAgentManager(
|
||||
harness.agentRepository,
|
||||
harness.workspaceRoot,
|
||||
harness.projectRepository,
|
||||
harness.accountRepository,
|
||||
harness.eventBus
|
||||
);
|
||||
|
||||
await newManager.reconcileAfterRestart();
|
||||
await sleep(1000);
|
||||
|
||||
// 5. Verify no new output events (agent was already complete)
|
||||
const postReconcileOutputCount = harness.getEventsByType('agent:output').length;
|
||||
console.log(' 4. Post-reconcile output events:', postReconcileOutputCount);
|
||||
|
||||
// Should not have re-emitted all the old output events
|
||||
expect(postReconcileOutputCount).toBe(0);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
});
|
||||
378
apps/server/test/integration/real-providers/harness.ts
Normal file
378
apps/server/test/integration/real-providers/harness.ts
Normal file
@@ -0,0 +1,378 @@
|
||||
/**
|
||||
* Real Provider Test Harness
|
||||
*
|
||||
* Extends the existing test infrastructure to use REAL MultiProviderAgentManager
|
||||
* for integration testing with actual CLI providers like Claude and Codex.
|
||||
*
|
||||
* Unlike the standard TestHarness which uses MockAgentManager, this harness:
|
||||
* - Uses real CLI spawning (costs real API credits!)
|
||||
* - Provides poll-based waiting helpers
|
||||
* - Captures events for inspection
|
||||
* - Manages temp directories for worktrees
|
||||
*/
|
||||
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe } from 'vitest';
|
||||
import type { DrizzleDatabase } from '../../../db/index.js';
|
||||
import type { DomainEvent, EventBus } from '../../../events/types.js';
|
||||
import { EventEmitterBus } from '../../../events/bus.js';
|
||||
import { MultiProviderAgentManager } from '../../../agent/manager.js';
|
||||
import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
|
||||
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
|
||||
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
|
||||
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
|
||||
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
|
||||
import {
|
||||
DrizzleAgentRepository,
|
||||
DrizzleProjectRepository,
|
||||
DrizzleAccountRepository,
|
||||
DrizzleInitiativeRepository,
|
||||
} from '../../../db/repositories/drizzle/index.js';
|
||||
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
|
||||
|
||||
/**
|
||||
* Sleep helper for polling loops.
|
||||
*/
|
||||
export function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Event bus that captures all emitted events for inspection.
|
||||
*/
|
||||
export class CapturingEventBus extends EventEmitterBus {
|
||||
emittedEvents: DomainEvent[] = [];
|
||||
|
||||
emit<T extends DomainEvent>(event: T): void {
|
||||
this.emittedEvents.push(event);
|
||||
super.emit(event);
|
||||
}
|
||||
|
||||
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
|
||||
return this.emittedEvents.filter((e) => e.type === type) as T[];
|
||||
}
|
||||
|
||||
clearEvents(): void {
|
||||
this.emittedEvents = [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for creating a real provider test harness.
|
||||
*/
|
||||
export interface RealProviderHarnessOptions {
|
||||
/** Which provider to test (default: 'claude') */
|
||||
provider?: 'claude' | 'codex';
|
||||
/** Optional workspace root (temp dir created if omitted) */
|
||||
workspaceRoot?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Real Provider Test Harness interface.
|
||||
*
|
||||
* Provides everything needed to test against real CLI providers:
|
||||
* - In-memory database with real repositories
|
||||
* - Real MultiProviderAgentManager (spawns actual CLI processes)
|
||||
* - Event capture for verification
|
||||
* - Polling-based wait helpers
|
||||
*/
|
||||
export interface RealProviderHarness {
|
||||
/** In-memory SQLite database */
|
||||
db: DrizzleDatabase;
|
||||
/** Event bus with capture capability */
|
||||
eventBus: CapturingEventBus;
|
||||
/** Real agent manager (not mock!) */
|
||||
agentManager: MultiProviderAgentManager;
|
||||
/** Workspace root directory */
|
||||
workspaceRoot: string;
|
||||
|
||||
/** Agent repository */
|
||||
agentRepository: AgentRepository;
|
||||
/** Project repository */
|
||||
projectRepository: ProjectRepository;
|
||||
/** Account repository */
|
||||
accountRepository: AccountRepository;
|
||||
/** Initiative repository */
|
||||
initiativeRepository: InitiativeRepository;
|
||||
|
||||
/**
|
||||
* Wait for an agent to reach idle or crashed status.
|
||||
* Polls the database at regular intervals.
|
||||
*
|
||||
* @param agentId - The agent ID to wait for
|
||||
* @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
|
||||
* @returns The agent result if completed, or null if crashed/timeout
|
||||
*/
|
||||
waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
|
||||
|
||||
/**
|
||||
* Wait for an agent to enter waiting_for_input status.
|
||||
* Polls the database at regular intervals.
|
||||
*
|
||||
* @param agentId - The agent ID to wait for
|
||||
* @param timeoutMs - Maximum time to wait (default 120000ms)
|
||||
* @returns The pending questions if waiting, or null if timeout/other status
|
||||
*/
|
||||
waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;
|
||||
|
||||
/**
|
||||
* Wait for an agent to reach a specific status.
|
||||
*
|
||||
* @param agentId - The agent ID to wait for
|
||||
* @param status - The target status
|
||||
* @param timeoutMs - Maximum time to wait (default 120000ms)
|
||||
*/
|
||||
waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;
|
||||
|
||||
/**
|
||||
* Get captured events filtered by type.
|
||||
*/
|
||||
getEventsByType<T extends DomainEvent>(type: T['type']): T[];
|
||||
|
||||
/**
|
||||
* Clear all captured events.
|
||||
*/
|
||||
clearEvents(): void;
|
||||
|
||||
/**
|
||||
* Kill all running agents (for cleanup).
|
||||
*/
|
||||
killAllAgents(): Promise<void>;
|
||||
|
||||
/**
|
||||
* Clean up all resources (directories, processes).
|
||||
* Call this in afterAll/afterEach.
|
||||
*/
|
||||
cleanup(): Promise<void>;
|
||||
}
|
||||
|
||||
/** Default poll interval for status checks */
|
||||
const POLL_INTERVAL_MS = 1000;
|
||||
|
||||
/**
|
||||
* Create a test harness for real provider integration tests.
|
||||
*
|
||||
* This creates:
|
||||
* - In-memory SQLite database
|
||||
* - Temp directory for worktrees (or uses provided workspace)
|
||||
* - Real MultiProviderAgentManager
|
||||
* - Event capture bus
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* let harness: RealProviderHarness;
|
||||
*
|
||||
* beforeAll(async () => {
|
||||
* harness = await createRealProviderHarness({ provider: 'claude' });
|
||||
* });
|
||||
*
|
||||
* afterAll(async () => {
|
||||
* await harness.cleanup();
|
||||
* });
|
||||
*
|
||||
* it('spawns and completes', async () => {
|
||||
* const agent = await harness.agentManager.spawn({...});
|
||||
* const result = await harness.waitForAgentCompletion(agent.id);
|
||||
* expect(result?.success).toBe(true);
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export async function createRealProviderHarness(
|
||||
options: RealProviderHarnessOptions = {}
|
||||
): Promise<RealProviderHarness> {
|
||||
// Create workspace directory (temp if not provided)
|
||||
const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
|
||||
const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up
|
||||
|
||||
// Initialize git repo in temp workspace (required for worktree operations)
|
||||
if (ownedWorkspace) {
|
||||
const { execSync } = await import('node:child_process');
|
||||
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
// Create initial commit (worktrees require at least one commit)
|
||||
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
|
||||
}
|
||||
|
||||
// Create in-memory database
|
||||
const db = createTestDatabase();
|
||||
|
||||
// Create repositories
|
||||
const agentRepository = new DrizzleAgentRepository(db);
|
||||
const projectRepository = new DrizzleProjectRepository(db);
|
||||
const accountRepository = new DrizzleAccountRepository(db);
|
||||
const initiativeRepository = new DrizzleInitiativeRepository(db);
|
||||
|
||||
// Create event bus with capture (parent class already sets maxListeners to 100)
|
||||
const eventBus = new CapturingEventBus();
|
||||
|
||||
// Create REAL agent manager (not mock!)
|
||||
const agentManager = new MultiProviderAgentManager(
|
||||
agentRepository,
|
||||
workspaceRoot,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
eventBus
|
||||
);
|
||||
|
||||
// Build harness
|
||||
const harness: RealProviderHarness = {
|
||||
db,
|
||||
eventBus,
|
||||
agentManager,
|
||||
workspaceRoot,
|
||||
agentRepository,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
initiativeRepository,
|
||||
|
||||
async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) return null;
|
||||
|
||||
if (agent.status === 'idle' || agent.status === 'stopped') {
|
||||
// Agent completed - get result
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
|
||||
if (agent.status === 'crashed') {
|
||||
// Agent crashed - return the error result
|
||||
return agentManager.getResult(agentId);
|
||||
}
|
||||
|
||||
if (agent.status === 'waiting_for_input') {
|
||||
// Agent is waiting - return null (not completed)
|
||||
return null;
|
||||
}
|
||||
|
||||
// Still running - wait and check again
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) return null;
|
||||
|
||||
if (agent.status === 'waiting_for_input') {
|
||||
return agentManager.getPendingQuestions(agentId);
|
||||
}
|
||||
|
||||
if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
|
||||
// Agent finished without asking questions
|
||||
return null;
|
||||
}
|
||||
|
||||
// Still running - wait and check again
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const agent = await agentRepository.findById(agentId);
|
||||
if (!agent) {
|
||||
throw new Error(`Agent ${agentId} not found`);
|
||||
}
|
||||
|
||||
if (agent.status === status) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for terminal states that mean we'll never reach target
|
||||
if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
|
||||
throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
|
||||
}
|
||||
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
|
||||
},
|
||||
|
||||
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
|
||||
return eventBus.getEventsByType<T>(type);
|
||||
},
|
||||
|
||||
clearEvents(): void {
|
||||
eventBus.clearEvents();
|
||||
},
|
||||
|
||||
async killAllAgents(): Promise<void> {
|
||||
const agents = await agentRepository.findAll();
|
||||
for (const agent of agents) {
|
||||
if (agent.status === 'running') {
|
||||
try {
|
||||
await agentManager.stop(agent.id);
|
||||
} catch {
|
||||
// Ignore errors during cleanup
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
// Kill any running agents
|
||||
await harness.killAllAgents();
|
||||
|
||||
// Clean up workspace directory if we created it
|
||||
if (ownedWorkspace) {
|
||||
try {
|
||||
await rm(workspaceRoot, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
return harness;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if real Claude tests should run.
|
||||
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
|
||||
*/
|
||||
export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';
|
||||
|
||||
/**
|
||||
* Check if real Codex tests should run.
|
||||
* Set REAL_CODEX_TESTS=1 environment variable to enable.
|
||||
*/
|
||||
export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';
|
||||
|
||||
/**
|
||||
* Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
|
||||
*/
|
||||
export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);
|
||||
|
||||
/**
|
||||
* Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
|
||||
*/
|
||||
export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);
|
||||
|
||||
/**
|
||||
* Default test timeout for real CLI tests (2 minutes).
|
||||
* Real API calls take 5-30 seconds typically.
|
||||
*/
|
||||
export const REAL_TEST_TIMEOUT = 120000;
|
||||
|
||||
/**
|
||||
* Extended test timeout for slow tests (5 minutes).
|
||||
* Used for schema retry tests and crash recovery tests.
|
||||
*/
|
||||
export const EXTENDED_TEST_TIMEOUT = 300000;
|
||||
56
apps/server/test/integration/real-providers/index.ts
Normal file
56
apps/server/test/integration/real-providers/index.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Real Provider Integration Tests
|
||||
*
|
||||
* This module provides infrastructure for testing against real CLI providers.
|
||||
* Tests are expensive (real API calls) and skipped by default.
|
||||
*
|
||||
* ## Running Tests
|
||||
*
|
||||
* ```bash
|
||||
* # Claude tests only
|
||||
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/ --test-timeout=300000
|
||||
*
|
||||
* # Codex tests only
|
||||
* REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts
|
||||
*
|
||||
* # All real provider tests
|
||||
* REAL_CLAUDE_TESTS=1 REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/
|
||||
* ```
|
||||
*
|
||||
* ## Cost Estimates
|
||||
*
|
||||
* | Suite | Tests | Est. Cost | Duration |
|
||||
* |-------|-------|-----------|----------|
|
||||
* | Output Parsing | 3 | $0.06 | ~2 min |
|
||||
* | Schema Validation | 4 | $0.22 | ~4 min |
|
||||
* | Crash Recovery | 3 | $0.08 | ~3 min |
|
||||
* | Session Resume | 2 | $0.08 | ~3 min |
|
||||
* | Codex Integration | 2 | $0.10 | ~2 min |
|
||||
* | **TOTAL** | **14** | **~$0.54** | **~14 min** |
|
||||
*
|
||||
* ## Test Files
|
||||
*
|
||||
* - `harness.ts` - RealProviderHarness factory and utilities
|
||||
* - `prompts.ts` - Minimal cost test prompts
|
||||
* - `claude-manager.test.ts` - Claude spawn/resume/output tests
|
||||
* - `codex-manager.test.ts` - Codex provider tests
|
||||
* - `schema-retry.test.ts` - Schema validation + retry tests
|
||||
* - `crash-recovery.test.ts` - Server restart simulation
|
||||
* - `sample-outputs/` - Captured CLI output for parser unit tests
|
||||
*/
|
||||
|
||||
export {
|
||||
createRealProviderHarness,
|
||||
CapturingEventBus,
|
||||
sleep,
|
||||
shouldRunRealClaudeTests,
|
||||
shouldRunRealCodexTests,
|
||||
describeRealClaude,
|
||||
describeRealCodex,
|
||||
REAL_TEST_TIMEOUT,
|
||||
EXTENDED_TEST_TIMEOUT,
|
||||
type RealProviderHarness,
|
||||
type RealProviderHarnessOptions,
|
||||
} from './harness.js';
|
||||
|
||||
export { MINIMAL_PROMPTS, CODEX_PROMPTS } from './prompts.js';
|
||||
113
apps/server/test/integration/real-providers/prompts.ts
Normal file
113
apps/server/test/integration/real-providers/prompts.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* Minimal Cost Test Prompts
|
||||
*
|
||||
* Carefully crafted prompts designed to minimize token usage while
|
||||
* testing specific CLI behaviors. Each prompt aims for the smallest
|
||||
* possible API cost while still exercising the target functionality.
|
||||
*
|
||||
* Cost estimates assume Claude Sonnet pricing (~$3/M input, $15/M output).
|
||||
*/
|
||||
|
||||
export const MINIMAL_PROMPTS = {
|
||||
/**
|
||||
* ~$0.01 - Cheapest done response
|
||||
* Tests: basic spawn → completion flow, status parsing
|
||||
*/
|
||||
done: `Output exactly this JSON with no other text:
|
||||
{"status":"done","result":"ok"}`,
|
||||
|
||||
/**
|
||||
* ~$0.01 - Cheapest questions response
|
||||
* Tests: waiting_for_input status, questions array parsing
|
||||
*/
|
||||
questions: `Output exactly this JSON with no other text:
|
||||
{"status":"questions","questions":[{"id":"q1","question":"What is your name?"}]}`,
|
||||
|
||||
/**
|
||||
* ~$0.03 - Slow task for timing tests
|
||||
* Tests: streaming during long-running task, crash recovery
|
||||
* Note: Agent may not actually wait 30 seconds, but will produce delayed output
|
||||
*/
|
||||
slow: `Think through a simple problem step by step, counting from 1 to 10 slowly, then output:
|
||||
{"status":"done","result":"counted to 10"}`,
|
||||
|
||||
/**
|
||||
* ~$0.02 - Produces text deltas for streaming tests
|
||||
* Tests: text_delta event parsing, output buffering
|
||||
*/
|
||||
streaming: `Count from 1 to 5, outputting each number, then output:
|
||||
{"status":"done","result":"counted"}`,
|
||||
|
||||
/**
|
||||
* ~$0.03 - Deliberately produces non-JSON first
|
||||
* Tests: schema validation failure, retry logic
|
||||
*/
|
||||
badThenGood: `First say "thinking..." on its own line, then output:
|
||||
{"status":"done","result":"fixed"}`,
|
||||
|
||||
/**
|
||||
* ~$0.02 - Multiple questions
|
||||
* Tests: questions array with multiple items
|
||||
*/
|
||||
multipleQuestions: `Output exactly this JSON with no other text:
|
||||
{"status":"questions","questions":[{"id":"q1","question":"First question?"},{"id":"q2","question":"Second question?"}]}`,
|
||||
|
||||
/**
|
||||
* ~$0.01 - Error signal
|
||||
* Tests: error status handling
|
||||
*/
|
||||
error: `Output exactly this JSON with no other text:
|
||||
{"status":"error","error":"Test error message"}`,
|
||||
|
||||
/**
|
||||
* ~$0.02 - Answer continuation
|
||||
* Tests: session resume with answers
|
||||
*/
|
||||
answerContinuation: (answers: Record<string, string>): string => {
|
||||
const answerLines = Object.entries(answers)
|
||||
.map(([id, answer]) => `${id}: ${answer}`)
|
||||
.join('\n');
|
||||
return `I received your answers:
|
||||
${answerLines}
|
||||
|
||||
Now complete the task by outputting:
|
||||
{"status":"done","result":"completed with answers"}`;
|
||||
},
|
||||
|
||||
/**
|
||||
* ~$0.02 - Context complete for discuss mode
|
||||
* Tests: discuss mode output handling (now uses universal done signal)
|
||||
*/
|
||||
discussComplete: `Output exactly this JSON with no other text:
|
||||
{"status":"done"}`,
|
||||
|
||||
/**
|
||||
* ~$0.02 - Plan complete
|
||||
* Tests: plan mode output handling (now uses universal done signal)
|
||||
*/
|
||||
planComplete: `Output exactly this JSON with no other text:
|
||||
{"status":"done"}`,
|
||||
|
||||
/**
|
||||
* ~$0.02 - Detail complete
|
||||
* Tests: detail mode output handling (now uses universal done signal)
|
||||
*/
|
||||
detailComplete: `Output exactly this JSON with no other text:
|
||||
{"status":"done"}`,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Prompts specifically for Codex provider testing.
|
||||
* Codex may have different output format requirements.
|
||||
*/
|
||||
export const CODEX_PROMPTS = {
|
||||
/**
|
||||
* Basic completion for Codex
|
||||
*/
|
||||
done: `Complete this simple task: output "done" and finish.`,
|
||||
|
||||
/**
|
||||
* Produces streaming output
|
||||
*/
|
||||
streaming: `Count from 1 to 5, saying each number aloud, then say "finished".`,
|
||||
} as const;
|
||||
@@ -0,0 +1,68 @@
|
||||
# Sample CLI Outputs
|
||||
|
||||
This directory contains captured real CLI outputs for use in parser unit tests.
|
||||
These files allow testing stream parsers without incurring API costs.
|
||||
|
||||
## Files
|
||||
|
||||
### claude-stream-success.jsonl
|
||||
A successful Claude CLI session (v2.1.33) that:
|
||||
- Initializes with `system` event containing `session_id`
|
||||
- Emits `assistant` message with content
|
||||
- Completes with `result` event containing `done` status JSON
|
||||
|
||||
### claude-stream-questions.jsonl
|
||||
A Claude CLI session that:
|
||||
- Initializes with `system` event containing `session_id`
|
||||
- Emits `assistant` message with content wrapped in markdown code block
|
||||
- Completes with `result` event containing `questions` status JSON
|
||||
|
||||
### codex-stream-success.jsonl
|
||||
A successful Codex CLI session (v0.98.0) that:
|
||||
- Starts with `thread.started` event containing `thread_id`
|
||||
- Emits `turn.started`, `item.completed` events
|
||||
- Completes with `turn.completed` event containing usage stats
|
||||
|
||||
## Event Type Differences
|
||||
|
||||
### Claude CLI (`--output-format stream-json`)
|
||||
- `system` (subtype: `init`) - Contains `session_id`, tools, model info
|
||||
- `assistant` - Contains message content in `content[].text`
|
||||
- `result` - Contains final `result` text and `total_cost_usd`
|
||||
|
||||
### Codex CLI (`--json`)
|
||||
- `thread.started` - Contains `thread_id` (equivalent to session_id)
|
||||
- `turn.started` - Marks beginning of turn
|
||||
- `item.completed` - Contains reasoning or agent_message items
|
||||
- `turn.completed` - Contains usage stats
|
||||
|
||||
## Usage
|
||||
|
||||
These files can be used to test stream parsers in isolation:
|
||||
|
||||
```typescript
|
||||
import { readFileSync } from 'fs';
|
||||
import { ClaudeStreamParser } from '../../../agent/providers/parsers/claude.js';
|
||||
|
||||
const output = readFileSync('sample-outputs/claude-stream-success.jsonl', 'utf-8');
|
||||
const parser = new ClaudeStreamParser();
|
||||
|
||||
for (const line of output.split('\n')) {
|
||||
if (line.trim()) {
|
||||
const events = parser.parseLine(line);
|
||||
// Assert on events...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Capturing New Outputs
|
||||
|
||||
### Claude
|
||||
```bash
|
||||
claude -p "your prompt" --output-format stream-json --verbose > output.jsonl
|
||||
```
|
||||
|
||||
### Codex
|
||||
```bash
|
||||
codex exec --full-auto --json "your prompt" > output.jsonl
|
||||
```
|
||||
@@ -0,0 +1,3 @@
|
||||
{"type":"system","subtype":"init","cwd":"/Users/lukasmay/development/projects/codewalk-district","session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","tools":["Task","TaskOutput","Bash","Glob","Grep","ExitPlanMode","Read","Edit","Write","NotebookEdit","WebFetch","TodoWrite","WebSearch","TaskStop","AskUserQuestion","Skill","EnterPlanMode","ToolSearch"],"mcp_servers":[],"model":"claude-opus-4-6","permissionMode":"default","slash_commands":["keybindings-help","debug","gsd:define-requirements","gsd:list-phase-assumptions","gsd:debug","gsd:remove-phase","gsd:complete-milestone","gsd:research-phase","gsd:plan-phase","gsd:check-todos","gsd:pause-work","gsd:execute-plan","gsd:research-project","gsd:add-todo","gsd:plan-fix","gsd:resume-work","gsd:progress","gsd:help","gsd:discuss-milestone","gsd:add-phase","gsd:create-roadmap","gsd:map-codebase","gsd:whats-new","gsd:insert-phase","gsd:new-milestone","gsd:new-project","gsd:execute-phase","gsd:verify-work","gsd:discuss-phase","compact","context","cost","init","pr-comments","release-notes","review","security-review","insights"],"apiKeySource":"none","claude_code_version":"2.1.33","output_style":"default","agents":["Bash","general-purpose","statusline-setup","Explore","Plan","claude-code-guide","jira-sw-assessment"],"skills":["keybindings-help","debug"],"plugins":[],"uuid":"224c683c-41f4-4fdd-9af6-f8cdca366ec1"}
|
||||
{"type":"assistant","message":{"model":"claude-opus-4-6","id":"msg_01CfDymxvSRFodJ5Zm6NjLHV","type":"message","role":"assistant","content":[{"type":"text","text":"```json\n{\"status\":\"questions\",\"questions\":[{\"id\":\"q1\",\"question\":\"What is your name?\"},{\"id\":\"q2\",\"question\":\"What is the deadline?\"}]}\n```"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":5983,"cache_read_input_tokens":18026,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":5983},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"},"context_management":null},"parent_tool_use_id":null,"session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","uuid":"29288f20-766c-4047-82f5-679024188f52"}
|
||||
{"type":"result","subtype":"success","is_error":false,"duration_ms":3213,"duration_api_ms":3203,"num_turns":1,"result":"```json\n{\"status\":\"questions\",\"questions\":[{\"id\":\"q1\",\"question\":\"What is your name?\"},{\"id\":\"q2\",\"question\":\"What is the deadline?\"}]}\n```","stop_reason":null,"session_id":"774631da-8e54-445e-9ccb-eea8e7fe805e","total_cost_usd":0.04754675,"usage":{"input_tokens":3,"cache_creation_input_tokens":5983,"cache_read_input_tokens":18026,"output_tokens":45,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":5983,"ephemeral_5m_input_tokens":0}},"modelUsage":{"claude-opus-4-6":{"inputTokens":3,"outputTokens":45,"cacheReadInputTokens":18026,"cacheCreationInputTokens":5983,"webSearchRequests":0,"costUSD":0.04754675,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"uuid":"08db08cd-0f12-47ae-8c21-c29e11a6d7df"}
|
||||
@@ -0,0 +1,3 @@
|
||||
{"type":"system","subtype":"init","cwd":"/Users/lukasmay/development/projects/codewalk-district","session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","tools":["Task","TaskOutput","Bash","Glob","Grep","ExitPlanMode","Read","Edit","Write","NotebookEdit","WebFetch","TodoWrite","WebSearch","TaskStop","AskUserQuestion","Skill","EnterPlanMode","ToolSearch"],"mcp_servers":[],"model":"claude-opus-4-6","permissionMode":"default","slash_commands":["keybindings-help","debug","gsd:define-requirements","gsd:list-phase-assumptions","gsd:debug","gsd:remove-phase","gsd:complete-milestone","gsd:research-phase","gsd:plan-phase","gsd:check-todos","gsd:pause-work","gsd:execute-plan","gsd:research-project","gsd:add-todo","gsd:plan-fix","gsd:resume-work","gsd:progress","gsd:help","gsd:discuss-milestone","gsd:add-phase","gsd:create-roadmap","gsd:map-codebase","gsd:whats-new","gsd:insert-phase","gsd:new-milestone","gsd:new-project","gsd:execute-phase","gsd:verify-work","gsd:discuss-phase","compact","context","cost","init","pr-comments","release-notes","review","security-review","insights"],"apiKeySource":"none","claude_code_version":"2.1.33","output_style":"default","agents":["Bash","general-purpose","statusline-setup","Explore","Plan","claude-code-guide","jira-sw-assessment"],"skills":["keybindings-help","debug"],"plugins":[],"uuid":"c1d6dced-ca04-4335-a624-624660479b7b"}
|
||||
{"type":"assistant","message":{"model":"claude-opus-4-6","id":"msg_01RjSiQY1RUgT47j73Dom93j","type":"message","role":"assistant","content":[{"type":"text","text":"{\"status\":\"done\",\"result\":\"ok\"}"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":5958,"cache_read_input_tokens":18026,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":5958},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"},"context_management":null},"parent_tool_use_id":null,"session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","uuid":"f1c8695a-534e-4de2-a684-fa4a1ec03749"}
|
||||
{"type":"result","subtype":"success","is_error":false,"duration_ms":2465,"duration_api_ms":2453,"num_turns":1,"result":"{\"status\":\"done\",\"result\":\"ok\"}","stop_reason":null,"session_id":"a0aa6272-b3a6-443c-9ef5-de3a2450dc6d","total_cost_usd":0.046565499999999996,"usage":{"input_tokens":3,"cache_creation_input_tokens":5958,"cache_read_input_tokens":18026,"output_tokens":12,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":5958,"ephemeral_5m_input_tokens":0}},"modelUsage":{"claude-opus-4-6":{"inputTokens":3,"outputTokens":12,"cacheReadInputTokens":18026,"cacheCreationInputTokens":5958,"webSearchRequests":0,"costUSD":0.046565499999999996,"contextWindow":200000,"maxOutputTokens":32000}},"permission_denials":[],"uuid":"53139e08-b4f3-4f94-b129-82759f77fdca"}
|
||||
@@ -0,0 +1,5 @@
|
||||
{"type":"thread.started","thread_id":"019c3242-955e-7140-9978-517f0b5a22cb"}
|
||||
{"type":"turn.started"}
|
||||
{"type":"item.completed","item":{"id":"item_0","type":"reasoning","text":"**Confirming simple greeting task**"}}
|
||||
{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"Hello!"}}
|
||||
{"type":"turn.completed","usage":{"input_tokens":8458,"cached_input_tokens":6912,"output_tokens":32}}
|
||||
306
apps/server/test/integration/real-providers/schema-retry.test.ts
Normal file
306
apps/server/test/integration/real-providers/schema-retry.test.ts
Normal file
@@ -0,0 +1,306 @@
|
||||
/**
|
||||
* Schema Validation & Retry Integration Tests
|
||||
*
|
||||
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
|
||||
* They are SKIPPED by default and should only be run manually for validation.
|
||||
*
|
||||
* To run these tests:
|
||||
* ```bash
|
||||
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/schema-retry.test.ts --test-timeout=300000
|
||||
* ```
|
||||
*
|
||||
* Tests covered:
|
||||
* - Valid JSON output validation
|
||||
* - Questions status parsing
|
||||
* - Schema validation failure with retry
|
||||
* - Max retry limit handling
|
||||
*
|
||||
* Estimated cost: ~$0.20 per full run (includes retries)
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
|
||||
import {
|
||||
createRealProviderHarness,
|
||||
describeRealClaude,
|
||||
REAL_TEST_TIMEOUT,
|
||||
EXTENDED_TEST_TIMEOUT,
|
||||
type RealProviderHarness,
|
||||
} from './harness.js';
|
||||
import { MINIMAL_PROMPTS } from './prompts.js';
|
||||
import type { AgentResumedEvent, AgentCrashedEvent } from '../../../events/types.js';
|
||||
|
||||
describeRealClaude('Schema Validation & Retry', () => {
|
||||
let harness: RealProviderHarness;
|
||||
|
||||
beforeAll(async () => {
|
||||
console.log('\n=== Running Schema Validation & Retry Tests ===');
|
||||
console.log('These tests call the real Claude API and incur costs.');
|
||||
console.log('Retry tests may take longer and cost more.\n');
|
||||
harness = await createRealProviderHarness({ provider: 'claude' });
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await harness.cleanup();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
harness.clearEvents();
|
||||
});
|
||||
|
||||
describe('Valid Output', () => {
|
||||
it(
|
||||
'validates done status output',
|
||||
async () => {
|
||||
// Spawn agent with minimal done prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.done,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for completion
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify completion
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.status).toBe('idle');
|
||||
expect(result?.success).toBe(true);
|
||||
|
||||
// No retry events should have been emitted
|
||||
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
|
||||
expect(resumeEvents.length).toBe(0);
|
||||
|
||||
console.log(' Status: idle (valid done output)');
|
||||
console.log(' Result:', result?.message);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'validates questions status output',
|
||||
async () => {
|
||||
// Spawn agent with questions prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.questions,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for waiting_for_input
|
||||
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify questions were validated
|
||||
expect(questions).toBeTruthy();
|
||||
expect(questions?.questions).toBeInstanceOf(Array);
|
||||
expect(questions?.questions.length).toBeGreaterThan(0);
|
||||
|
||||
// Each question should have id and question fields
|
||||
for (const q of questions?.questions ?? []) {
|
||||
expect(q.id).toBeTruthy();
|
||||
expect(q.question).toBeTruthy();
|
||||
}
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.status).toBe('waiting_for_input');
|
||||
|
||||
// No retry events
|
||||
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
|
||||
expect(resumeEvents.length).toBe(0);
|
||||
|
||||
console.log(' Status: waiting_for_input (valid questions output)');
|
||||
console.log(' Questions:', questions?.questions.length);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'validates multiple questions',
|
||||
async () => {
|
||||
// Spawn agent with multiple questions prompt
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.multipleQuestions,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for waiting_for_input
|
||||
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
// Verify multiple questions
|
||||
expect(questions?.questions.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Each question should have unique ID
|
||||
const ids = questions?.questions.map((q) => q.id) ?? [];
|
||||
const uniqueIds = new Set(ids);
|
||||
expect(uniqueIds.size).toBe(ids.length);
|
||||
|
||||
console.log(' Questions:', questions?.questions.map((q) => q.id).join(', '));
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
|
||||
describe('Retry Logic', () => {
|
||||
it(
|
||||
'retries when output does not match schema',
|
||||
async () => {
|
||||
// Prompt that produces non-JSON first, then valid JSON
|
||||
// Note: Claude may or may not produce invalid output first
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.badThenGood,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for completion (may involve retries)
|
||||
const result = await harness.waitForAgentCompletion(agent.id, EXTENDED_TEST_TIMEOUT);
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
|
||||
// Either succeeded with retry OR succeeded first time
|
||||
expect(['idle', 'crashed']).toContain(dbAgent?.status);
|
||||
|
||||
// Check for retry events
|
||||
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
|
||||
console.log(' Retry attempts:', resumeEvents.length);
|
||||
console.log(' Final status:', dbAgent?.status);
|
||||
|
||||
if (dbAgent?.status === 'idle') {
|
||||
expect(result?.success).toBe(true);
|
||||
console.log(' Result:', result?.message);
|
||||
} else {
|
||||
// Crashed after max retries
|
||||
const crashedEvents = harness.getEventsByType<AgentCrashedEvent>('agent:crashed');
|
||||
expect(crashedEvents.length).toBeGreaterThan(0);
|
||||
console.log(' Crashed after retries');
|
||||
}
|
||||
},
|
||||
EXTENDED_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'extracts JSON from markdown code blocks',
|
||||
async () => {
|
||||
// Prompt that produces JSON wrapped in markdown
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `Output the result wrapped in a markdown code block like this:
|
||||
\`\`\`json
|
||||
{"status":"done","result":"extracted from markdown"}
|
||||
\`\`\``,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for completion
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' Status:', dbAgent?.status);
|
||||
console.log(' Result:', result?.message);
|
||||
|
||||
// Should succeed (JSON extraction from code block)
|
||||
if (dbAgent?.status === 'idle') {
|
||||
expect(result?.success).toBe(true);
|
||||
}
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'extracts JSON from text with surrounding content',
|
||||
async () => {
|
||||
// Prompt that produces JSON with text before it
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `First say "Here is my response:" then output the JSON:
|
||||
{"status":"done","result":"extracted from text"}`,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
// Wait for completion
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
console.log(' Status:', dbAgent?.status);
|
||||
console.log(' Result:', result?.message);
|
||||
|
||||
// Should succeed (JSON extraction from last {...} block)
|
||||
if (dbAgent?.status === 'idle') {
|
||||
expect(result?.success).toBe(true);
|
||||
}
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
|
||||
describe('Mode-Specific Schemas', () => {
|
||||
it(
|
||||
'validates discuss mode output',
|
||||
async () => {
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.discussComplete,
|
||||
mode: 'discuss',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.status).toBe('idle');
|
||||
expect(result?.success).toBe(true);
|
||||
|
||||
console.log(' Discuss mode result:', result?.message);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'validates plan mode output',
|
||||
async () => {
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.planComplete,
|
||||
mode: 'plan',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.status).toBe('idle');
|
||||
expect(result?.success).toBe(true);
|
||||
|
||||
console.log(' Plan mode result:', result?.message);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
|
||||
it(
|
||||
'validates detail mode output',
|
||||
async () => {
|
||||
const agent = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: MINIMAL_PROMPTS.detailComplete,
|
||||
mode: 'detail',
|
||||
provider: 'claude',
|
||||
});
|
||||
|
||||
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
||||
|
||||
const dbAgent = await harness.agentRepository.findById(agent.id);
|
||||
expect(dbAgent?.status).toBe('idle');
|
||||
expect(result?.success).toBe(true);
|
||||
|
||||
console.log(' Detail mode result:', result?.message);
|
||||
},
|
||||
REAL_TEST_TIMEOUT
|
||||
);
|
||||
});
|
||||
});
|
||||
134
apps/server/test/topological-sort.test.ts
Normal file
134
apps/server/test/topological-sort.test.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { topologicalSortPhases, type PhaseForSort, type DependencyEdge } from '@codewalk-district/shared';
|
||||
|
||||
function mkPhase(id: string, createdAt: string | Date): PhaseForSort {
|
||||
return { id, createdAt };
|
||||
}
|
||||
|
||||
describe('topologicalSortPhases', () => {
|
||||
it('should return empty array for empty input', () => {
|
||||
expect(topologicalSortPhases([], [])).toEqual([]);
|
||||
});
|
||||
|
||||
it('should return phases in createdAt order when no edges', () => {
|
||||
const phases = [
|
||||
mkPhase('c', '2026-01-03'),
|
||||
mkPhase('a', '2026-01-01'),
|
||||
mkPhase('b', '2026-01-02'),
|
||||
];
|
||||
const result = topologicalSortPhases(phases, []);
|
||||
expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('should sort linear chain correctly', () => {
|
||||
// A -> B -> C (B depends on A, C depends on B)
|
||||
const phases = [
|
||||
mkPhase('a', '2026-01-01'),
|
||||
mkPhase('b', '2026-01-02'),
|
||||
mkPhase('c', '2026-01-03'),
|
||||
];
|
||||
const edges: DependencyEdge[] = [
|
||||
{ phaseId: 'b', dependsOnPhaseId: 'a' },
|
||||
{ phaseId: 'c', dependsOnPhaseId: 'b' },
|
||||
];
|
||||
const result = topologicalSortPhases(phases, edges);
|
||||
expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('should handle diamond dependency', () => {
|
||||
// A
|
||||
// / \
|
||||
// B C
|
||||
// \ /
|
||||
// D
|
||||
const phases = [
|
||||
mkPhase('a', '2026-01-01'),
|
||||
mkPhase('b', '2026-01-02'),
|
||||
mkPhase('c', '2026-01-03'),
|
||||
mkPhase('d', '2026-01-04'),
|
||||
];
|
||||
const edges: DependencyEdge[] = [
|
||||
{ phaseId: 'b', dependsOnPhaseId: 'a' },
|
||||
{ phaseId: 'c', dependsOnPhaseId: 'a' },
|
||||
{ phaseId: 'd', dependsOnPhaseId: 'b' },
|
||||
{ phaseId: 'd', dependsOnPhaseId: 'c' },
|
||||
];
|
||||
const result = topologicalSortPhases(phases, edges);
|
||||
// A must come first, D must come last, B before C by createdAt
|
||||
expect(result[0].id).toBe('a');
|
||||
expect(result[3].id).toBe('d');
|
||||
expect(result.map((p) => p.id)).toEqual(['a', 'b', 'c', 'd']);
|
||||
});
|
||||
|
||||
it('should use createdAt as deterministic tiebreaker', () => {
|
||||
// Three independent phases — should sort by createdAt
|
||||
const phases = [
|
||||
mkPhase('z', '2026-01-03'),
|
||||
mkPhase('y', '2026-01-01'),
|
||||
mkPhase('x', '2026-01-02'),
|
||||
];
|
||||
const result = topologicalSortPhases(phases, []);
|
||||
expect(result.map((p) => p.id)).toEqual(['y', 'x', 'z']);
|
||||
});
|
||||
|
||||
it('should handle cycle gracefully by appending cycled nodes', () => {
|
||||
// A -> B -> A (cycle), C is independent
|
||||
const phases = [
|
||||
mkPhase('a', '2026-01-01'),
|
||||
mkPhase('b', '2026-01-02'),
|
||||
mkPhase('c', '2026-01-03'),
|
||||
];
|
||||
const edges: DependencyEdge[] = [
|
||||
{ phaseId: 'b', dependsOnPhaseId: 'a' },
|
||||
{ phaseId: 'a', dependsOnPhaseId: 'b' },
|
||||
];
|
||||
const result = topologicalSortPhases(phases, edges);
|
||||
// C has no deps so it comes first, then A and B appended (cycle)
|
||||
expect(result[0].id).toBe('c');
|
||||
expect(result.length).toBe(3);
|
||||
// A and B are appended in createdAt order
|
||||
expect(result[1].id).toBe('a');
|
||||
expect(result[2].id).toBe('b');
|
||||
});
|
||||
|
||||
it('should ignore edges referencing non-existent phases', () => {
|
||||
const phases = [
|
||||
mkPhase('a', '2026-01-01'),
|
||||
mkPhase('b', '2026-01-02'),
|
||||
];
|
||||
const edges: DependencyEdge[] = [
|
||||
{ phaseId: 'b', dependsOnPhaseId: 'nonexistent' },
|
||||
];
|
||||
const result = topologicalSortPhases(phases, edges);
|
||||
// Edge is ignored, both treated as independent
|
||||
expect(result.map((p) => p.id)).toEqual(['a', 'b']);
|
||||
});
|
||||
|
||||
it('should handle single phase with no edges', () => {
|
||||
const phases = [mkPhase('only', '2026-01-01')];
|
||||
const result = topologicalSortPhases(phases, []);
|
||||
expect(result.map((p) => p.id)).toEqual(['only']);
|
||||
});
|
||||
|
||||
it('should work with Date objects', () => {
|
||||
const phases = [
|
||||
mkPhase('b', new Date('2026-01-02')),
|
||||
mkPhase('a', new Date('2026-01-01')),
|
||||
];
|
||||
const edges: DependencyEdge[] = [
|
||||
{ phaseId: 'b', dependsOnPhaseId: 'a' },
|
||||
];
|
||||
const result = topologicalSortPhases(phases, edges);
|
||||
expect(result.map((p) => p.id)).toEqual(['a', 'b']);
|
||||
});
|
||||
|
||||
it('should preserve extra properties on phase objects', () => {
|
||||
const phases = [
|
||||
{ id: 'a', createdAt: '2026-01-01', name: 'Alpha', status: 'pending' },
|
||||
{ id: 'b', createdAt: '2026-01-02', name: 'Beta', status: 'active' },
|
||||
];
|
||||
const result = topologicalSortPhases(phases, []);
|
||||
expect(result[0].name).toBe('Alpha');
|
||||
expect(result[1].name).toBe('Beta');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user