/** * Real Provider Test Harness * * Extends the existing test infrastructure to use REAL MultiProviderAgentManager * for integration testing with actual CLI providers like Claude and Codex. * * Unlike the standard TestHarness which uses MockAgentManager, this harness: * - Uses real CLI spawning (costs real API credits!) * - Provides poll-based waiting helpers * - Captures events for inspection * - Manages temp directories for worktrees */ import { mkdtemp, rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { describe } from 'vitest'; import type { DrizzleDatabase } from '../../../db/index.js'; import type { DomainEvent, EventBus } from '../../../events/types.js'; import { EventEmitterBus } from '../../../events/bus.js'; import { MultiProviderAgentManager } from '../../../agent/manager.js'; import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js'; import type { AgentRepository } from '../../../db/repositories/agent-repository.js'; import type { ProjectRepository } from '../../../db/repositories/project-repository.js'; import type { AccountRepository } from '../../../db/repositories/account-repository.js'; import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js'; import { DrizzleAgentRepository, DrizzleProjectRepository, DrizzleAccountRepository, DrizzleInitiativeRepository, } from '../../../db/repositories/drizzle/index.js'; import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js'; /** * Sleep helper for polling loops. */ export function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } /** * Event bus that captures all emitted events for inspection. */ export class CapturingEventBus extends EventEmitterBus { emittedEvents: DomainEvent[] = []; emit(event: T): void { this.emittedEvents.push(event); super.emit(event); } getEventsByType(type: T['type']): T[] { return this.emittedEvents.filter((e) => e.type === type) as T[]; } clearEvents(): void { this.emittedEvents = []; } } /** * Options for creating a real provider test harness. */ export interface RealProviderHarnessOptions { /** Which provider to test (default: 'claude') */ provider?: 'claude' | 'codex'; /** Optional workspace root (temp dir created if omitted) */ workspaceRoot?: string; } /** * Real Provider Test Harness interface. * * Provides everything needed to test against real CLI providers: * - In-memory database with real repositories * - Real MultiProviderAgentManager (spawns actual CLI processes) * - Event capture for verification * - Polling-based wait helpers */ export interface RealProviderHarness { /** In-memory SQLite database */ db: DrizzleDatabase; /** Event bus with capture capability */ eventBus: CapturingEventBus; /** Real agent manager (not mock!) */ agentManager: MultiProviderAgentManager; /** Workspace root directory */ workspaceRoot: string; /** Agent repository */ agentRepository: AgentRepository; /** Project repository */ projectRepository: ProjectRepository; /** Account repository */ accountRepository: AccountRepository; /** Initiative repository */ initiativeRepository: InitiativeRepository; /** * Wait for an agent to reach idle or crashed status. * Polls the database at regular intervals. * * @param agentId - The agent ID to wait for * @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes) * @returns The agent result if completed, or null if crashed/timeout */ waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise; /** * Wait for an agent to enter waiting_for_input status. * Polls the database at regular intervals. * * @param agentId - The agent ID to wait for * @param timeoutMs - Maximum time to wait (default 120000ms) * @returns The pending questions if waiting, or null if timeout/other status */ waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise; /** * Wait for an agent to reach a specific status. * * @param agentId - The agent ID to wait for * @param status - The target status * @param timeoutMs - Maximum time to wait (default 120000ms) */ waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise; /** * Get captured events filtered by type. */ getEventsByType(type: T['type']): T[]; /** * Clear all captured events. */ clearEvents(): void; /** * Kill all running agents (for cleanup). */ killAllAgents(): Promise; /** * Clean up all resources (directories, processes). * Call this in afterAll/afterEach. */ cleanup(): Promise; } /** Default poll interval for status checks */ const POLL_INTERVAL_MS = 1000; /** * Create a test harness for real provider integration tests. * * This creates: * - In-memory SQLite database * - Temp directory for worktrees (or uses provided workspace) * - Real MultiProviderAgentManager * - Event capture bus * * @example * ```typescript * let harness: RealProviderHarness; * * beforeAll(async () => { * harness = await createRealProviderHarness({ provider: 'claude' }); * }); * * afterAll(async () => { * await harness.cleanup(); * }); * * it('spawns and completes', async () => { * const agent = await harness.agentManager.spawn({...}); * const result = await harness.waitForAgentCompletion(agent.id); * expect(result?.success).toBe(true); * }); * ``` */ export async function createRealProviderHarness( options: RealProviderHarnessOptions = {} ): Promise { // Create workspace directory (temp if not provided) const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-'))); const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up // Initialize git repo in temp workspace (required for worktree operations) if (ownedWorkspace) { const { execSync } = await import('node:child_process'); execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' }); execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' }); execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' }); // Create initial commit (worktrees require at least one commit) execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' }); } // Create in-memory database const db = createTestDatabase(); // Create repositories const agentRepository = new DrizzleAgentRepository(db); const projectRepository = new DrizzleProjectRepository(db); const accountRepository = new DrizzleAccountRepository(db); const initiativeRepository = new DrizzleInitiativeRepository(db); // Create event bus with capture (parent class disables maxListeners warning) const eventBus = new CapturingEventBus(); // Create REAL agent manager (not mock!) const agentManager = new MultiProviderAgentManager( agentRepository, workspaceRoot, projectRepository, accountRepository, eventBus ); // Build harness const harness: RealProviderHarness = { db, eventBus, agentManager, workspaceRoot, agentRepository, projectRepository, accountRepository, initiativeRepository, async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise { const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { const agent = await agentRepository.findById(agentId); if (!agent) return null; if (agent.status === 'idle' || agent.status === 'stopped') { // Agent completed - get result return agentManager.getResult(agentId); } if (agent.status === 'crashed') { // Agent crashed - return the error result return agentManager.getResult(agentId); } if (agent.status === 'waiting_for_input') { // Agent is waiting - return null (not completed) return null; } // Still running - wait and check again await sleep(POLL_INTERVAL_MS); } throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`); }, async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise { const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { const agent = await agentRepository.findById(agentId); if (!agent) return null; if (agent.status === 'waiting_for_input') { return agentManager.getPendingQuestions(agentId); } if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') { // Agent finished without asking questions return null; } // Still running - wait and check again await sleep(POLL_INTERVAL_MS); } throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`); }, async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise { const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { const agent = await agentRepository.findById(agentId); if (!agent) { throw new Error(`Agent ${agentId} not found`); } if (agent.status === status) { return; } // Check for terminal states that mean we'll never reach target if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) { throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`); } await sleep(POLL_INTERVAL_MS); } throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`); }, getEventsByType(type: T['type']): T[] { return eventBus.getEventsByType(type); }, clearEvents(): void { eventBus.clearEvents(); }, async killAllAgents(): Promise { const agents = await agentRepository.findAll(); for (const agent of agents) { if (agent.status === 'running') { try { await agentManager.stop(agent.id); } catch { // Ignore errors during cleanup } } } }, async cleanup(): Promise { // Kill any running agents await harness.killAllAgents(); // Clean up workspace directory if we created it if (ownedWorkspace) { try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { // Ignore cleanup errors } } }, }; return harness; } /** * Check if real Claude tests should run. * Set REAL_CLAUDE_TESTS=1 environment variable to enable. */ export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1'; /** * Check if real Codex tests should run. * Set REAL_CODEX_TESTS=1 environment variable to enable. */ export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1'; /** * Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1. */ export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe); /** * Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1. */ export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe); /** * Default test timeout for real CLI tests (2 minutes). * Real API calls take 5-30 seconds typically. */ export const REAL_TEST_TIMEOUT = 120000; /** * Extended test timeout for slow tests (5 minutes). * Used for schema retry tests and crash recovery tests. */ export const EXTENDED_TEST_TIMEOUT = 300000;