Files
Codewalkers/apps/server/test/integration/real-providers/harness.ts
Lukas May 02ca1d568e fix: Disable EventEmitter maxListeners warning for SSE subscriptions
Each SSE client registers a listener per event type (30+ types), so a
few concurrent connections easily exceed the previous limit of 100.
Listeners are properly cleaned up on disconnect via eventBusIterable's
finally block, so this is not a real leak.
2026-03-06 16:39:36 +01:00

379 lines
12 KiB
TypeScript

/**
* Real Provider Test Harness
*
* Extends the existing test infrastructure to use REAL MultiProviderAgentManager
* for integration testing with actual CLI providers like Claude and Codex.
*
* Unlike the standard TestHarness which uses MockAgentManager, this harness:
* - Uses real CLI spawning (costs real API credits!)
* - Provides poll-based waiting helpers
* - Captures events for inspection
* - Manages temp directories for worktrees
*/
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe } from 'vitest';
import type { DrizzleDatabase } from '../../../db/index.js';
import type { DomainEvent, EventBus } from '../../../events/types.js';
import { EventEmitterBus } from '../../../events/bus.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleInitiativeRepository,
} from '../../../db/repositories/drizzle/index.js';
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
/**
* Sleep helper for polling loops.
*/
export function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Event bus that captures all emitted events for inspection.
*/
export class CapturingEventBus extends EventEmitterBus {
emittedEvents: DomainEvent[] = [];
emit<T extends DomainEvent>(event: T): void {
this.emittedEvents.push(event);
super.emit(event);
}
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return this.emittedEvents.filter((e) => e.type === type) as T[];
}
clearEvents(): void {
this.emittedEvents = [];
}
}
/**
* Options for creating a real provider test harness.
*/
export interface RealProviderHarnessOptions {
/** Which provider to test (default: 'claude') */
provider?: 'claude' | 'codex';
/** Optional workspace root (temp dir created if omitted) */
workspaceRoot?: string;
}
/**
* Real Provider Test Harness interface.
*
* Provides everything needed to test against real CLI providers:
* - In-memory database with real repositories
* - Real MultiProviderAgentManager (spawns actual CLI processes)
* - Event capture for verification
* - Polling-based wait helpers
*/
export interface RealProviderHarness {
/** In-memory SQLite database */
db: DrizzleDatabase;
/** Event bus with capture capability */
eventBus: CapturingEventBus;
/** Real agent manager (not mock!) */
agentManager: MultiProviderAgentManager;
/** Workspace root directory */
workspaceRoot: string;
/** Agent repository */
agentRepository: AgentRepository;
/** Project repository */
projectRepository: ProjectRepository;
/** Account repository */
accountRepository: AccountRepository;
/** Initiative repository */
initiativeRepository: InitiativeRepository;
/**
* Wait for an agent to reach idle or crashed status.
* Polls the database at regular intervals.
*
* @param agentId - The agent ID to wait for
* @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
* @returns The agent result if completed, or null if crashed/timeout
*/
waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
/**
* Wait for an agent to enter waiting_for_input status.
* Polls the database at regular intervals.
*
* @param agentId - The agent ID to wait for
* @param timeoutMs - Maximum time to wait (default 120000ms)
* @returns The pending questions if waiting, or null if timeout/other status
*/
waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;
/**
* Wait for an agent to reach a specific status.
*
* @param agentId - The agent ID to wait for
* @param status - The target status
* @param timeoutMs - Maximum time to wait (default 120000ms)
*/
waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;
/**
* Get captured events filtered by type.
*/
getEventsByType<T extends DomainEvent>(type: T['type']): T[];
/**
* Clear all captured events.
*/
clearEvents(): void;
/**
* Kill all running agents (for cleanup).
*/
killAllAgents(): Promise<void>;
/**
* Clean up all resources (directories, processes).
* Call this in afterAll/afterEach.
*/
cleanup(): Promise<void>;
}
/** Default poll interval for status checks */
const POLL_INTERVAL_MS = 1000;
/**
* Create a test harness for real provider integration tests.
*
* This creates:
* - In-memory SQLite database
* - Temp directory for worktrees (or uses provided workspace)
* - Real MultiProviderAgentManager
* - Event capture bus
*
* @example
* ```typescript
* let harness: RealProviderHarness;
*
* beforeAll(async () => {
* harness = await createRealProviderHarness({ provider: 'claude' });
* });
*
* afterAll(async () => {
* await harness.cleanup();
* });
*
* it('spawns and completes', async () => {
* const agent = await harness.agentManager.spawn({...});
* const result = await harness.waitForAgentCompletion(agent.id);
* expect(result?.success).toBe(true);
* });
* ```
*/
export async function createRealProviderHarness(
options: RealProviderHarnessOptions = {}
): Promise<RealProviderHarness> {
// Create workspace directory (temp if not provided)
const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up
// Initialize git repo in temp workspace (required for worktree operations)
if (ownedWorkspace) {
const { execSync } = await import('node:child_process');
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
// Create initial commit (worktrees require at least one commit)
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
}
// Create in-memory database
const db = createTestDatabase();
// Create repositories
const agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
const initiativeRepository = new DrizzleInitiativeRepository(db);
// Create event bus with capture (parent class disables maxListeners warning)
const eventBus = new CapturingEventBus();
// Create REAL agent manager (not mock!)
const agentManager = new MultiProviderAgentManager(
agentRepository,
workspaceRoot,
projectRepository,
accountRepository,
eventBus
);
// Build harness
const harness: RealProviderHarness = {
db,
eventBus,
agentManager,
workspaceRoot,
agentRepository,
projectRepository,
accountRepository,
initiativeRepository,
async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'idle' || agent.status === 'stopped') {
// Agent completed - get result
return agentManager.getResult(agentId);
}
if (agent.status === 'crashed') {
// Agent crashed - return the error result
return agentManager.getResult(agentId);
}
if (agent.status === 'waiting_for_input') {
// Agent is waiting - return null (not completed)
return null;
}
// Still running - wait and check again
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
},
async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'waiting_for_input') {
return agentManager.getPendingQuestions(agentId);
}
if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
// Agent finished without asking questions
return null;
}
// Still running - wait and check again
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
},
async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) {
throw new Error(`Agent ${agentId} not found`);
}
if (agent.status === status) {
return;
}
// Check for terminal states that mean we'll never reach target
if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
}
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
},
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return eventBus.getEventsByType<T>(type);
},
clearEvents(): void {
eventBus.clearEvents();
},
async killAllAgents(): Promise<void> {
const agents = await agentRepository.findAll();
for (const agent of agents) {
if (agent.status === 'running') {
try {
await agentManager.stop(agent.id);
} catch {
// Ignore errors during cleanup
}
}
}
},
async cleanup(): Promise<void> {
// Kill any running agents
await harness.killAllAgents();
// Clean up workspace directory if we created it
if (ownedWorkspace) {
try {
await rm(workspaceRoot, { recursive: true, force: true });
} catch {
// Ignore cleanup errors
}
}
},
};
return harness;
}
/**
* Check if real Claude tests should run.
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
*/
export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';
/**
* Check if real Codex tests should run.
* Set REAL_CODEX_TESTS=1 environment variable to enable.
*/
export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';
/**
* Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
*/
export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);
/**
* Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
*/
export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);
/**
* Default test timeout for real CLI tests (2 minutes).
* Real API calls take 5-30 seconds typically.
*/
export const REAL_TEST_TIMEOUT = 120000;
/**
* Extended test timeout for slow tests (5 minutes).
* Used for schema retry tests and crash recovery tests.
*/
export const EXTENDED_TEST_TIMEOUT = 300000;