Each SSE client registers a listener per event type (30+ types), so a few concurrent connections easily exceed the previous limit of 100. Listeners are properly cleaned up on disconnect via eventBusIterable's finally block, so this is not a real leak.
379 lines
12 KiB
TypeScript
379 lines
12 KiB
TypeScript
/**
|
|
* Real Provider Test Harness
|
|
*
|
|
* Extends the existing test infrastructure to use REAL MultiProviderAgentManager
|
|
* for integration testing with actual CLI providers like Claude and Codex.
|
|
*
|
|
* Unlike the standard TestHarness which uses MockAgentManager, this harness:
|
|
* - Uses real CLI spawning (costs real API credits!)
|
|
* - Provides poll-based waiting helpers
|
|
* - Captures events for inspection
|
|
* - Manages temp directories for worktrees
|
|
*/
|
|
|
|
import { mkdtemp, rm } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join } from 'node:path';
|
|
import { describe } from 'vitest';
|
|
import type { DrizzleDatabase } from '../../../db/index.js';
|
|
import type { DomainEvent, EventBus } from '../../../events/types.js';
|
|
import { EventEmitterBus } from '../../../events/bus.js';
|
|
import { MultiProviderAgentManager } from '../../../agent/manager.js';
|
|
import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
|
|
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
|
|
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
|
|
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
|
|
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
|
|
import {
|
|
DrizzleAgentRepository,
|
|
DrizzleProjectRepository,
|
|
DrizzleAccountRepository,
|
|
DrizzleInitiativeRepository,
|
|
} from '../../../db/repositories/drizzle/index.js';
|
|
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
|
|
|
|
/**
|
|
* Sleep helper for polling loops.
|
|
*/
|
|
export function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Event bus that captures all emitted events for inspection.
|
|
*/
|
|
export class CapturingEventBus extends EventEmitterBus {
|
|
emittedEvents: DomainEvent[] = [];
|
|
|
|
emit<T extends DomainEvent>(event: T): void {
|
|
this.emittedEvents.push(event);
|
|
super.emit(event);
|
|
}
|
|
|
|
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
|
|
return this.emittedEvents.filter((e) => e.type === type) as T[];
|
|
}
|
|
|
|
clearEvents(): void {
|
|
this.emittedEvents = [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Options for creating a real provider test harness.
|
|
*/
|
|
export interface RealProviderHarnessOptions {
|
|
/** Which provider to test (default: 'claude') */
|
|
provider?: 'claude' | 'codex';
|
|
/** Optional workspace root (temp dir created if omitted) */
|
|
workspaceRoot?: string;
|
|
}
|
|
|
|
/**
|
|
* Real Provider Test Harness interface.
|
|
*
|
|
* Provides everything needed to test against real CLI providers:
|
|
* - In-memory database with real repositories
|
|
* - Real MultiProviderAgentManager (spawns actual CLI processes)
|
|
* - Event capture for verification
|
|
* - Polling-based wait helpers
|
|
*/
|
|
export interface RealProviderHarness {
|
|
/** In-memory SQLite database */
|
|
db: DrizzleDatabase;
|
|
/** Event bus with capture capability */
|
|
eventBus: CapturingEventBus;
|
|
/** Real agent manager (not mock!) */
|
|
agentManager: MultiProviderAgentManager;
|
|
/** Workspace root directory */
|
|
workspaceRoot: string;
|
|
|
|
/** Agent repository */
|
|
agentRepository: AgentRepository;
|
|
/** Project repository */
|
|
projectRepository: ProjectRepository;
|
|
/** Account repository */
|
|
accountRepository: AccountRepository;
|
|
/** Initiative repository */
|
|
initiativeRepository: InitiativeRepository;
|
|
|
|
/**
|
|
* Wait for an agent to reach idle or crashed status.
|
|
* Polls the database at regular intervals.
|
|
*
|
|
* @param agentId - The agent ID to wait for
|
|
* @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
|
|
* @returns The agent result if completed, or null if crashed/timeout
|
|
*/
|
|
waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
|
|
|
|
/**
|
|
* Wait for an agent to enter waiting_for_input status.
|
|
* Polls the database at regular intervals.
|
|
*
|
|
* @param agentId - The agent ID to wait for
|
|
* @param timeoutMs - Maximum time to wait (default 120000ms)
|
|
* @returns The pending questions if waiting, or null if timeout/other status
|
|
*/
|
|
waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;
|
|
|
|
/**
|
|
* Wait for an agent to reach a specific status.
|
|
*
|
|
* @param agentId - The agent ID to wait for
|
|
* @param status - The target status
|
|
* @param timeoutMs - Maximum time to wait (default 120000ms)
|
|
*/
|
|
waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;
|
|
|
|
/**
|
|
* Get captured events filtered by type.
|
|
*/
|
|
getEventsByType<T extends DomainEvent>(type: T['type']): T[];
|
|
|
|
/**
|
|
* Clear all captured events.
|
|
*/
|
|
clearEvents(): void;
|
|
|
|
/**
|
|
* Kill all running agents (for cleanup).
|
|
*/
|
|
killAllAgents(): Promise<void>;
|
|
|
|
/**
|
|
* Clean up all resources (directories, processes).
|
|
* Call this in afterAll/afterEach.
|
|
*/
|
|
cleanup(): Promise<void>;
|
|
}
|
|
|
|
/** Default poll interval for status checks */
|
|
const POLL_INTERVAL_MS = 1000;
|
|
|
|
/**
|
|
* Create a test harness for real provider integration tests.
|
|
*
|
|
* This creates:
|
|
* - In-memory SQLite database
|
|
* - Temp directory for worktrees (or uses provided workspace)
|
|
* - Real MultiProviderAgentManager
|
|
* - Event capture bus
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* let harness: RealProviderHarness;
|
|
*
|
|
* beforeAll(async () => {
|
|
* harness = await createRealProviderHarness({ provider: 'claude' });
|
|
* });
|
|
*
|
|
* afterAll(async () => {
|
|
* await harness.cleanup();
|
|
* });
|
|
*
|
|
* it('spawns and completes', async () => {
|
|
* const agent = await harness.agentManager.spawn({...});
|
|
* const result = await harness.waitForAgentCompletion(agent.id);
|
|
* expect(result?.success).toBe(true);
|
|
* });
|
|
* ```
|
|
*/
|
|
export async function createRealProviderHarness(
|
|
options: RealProviderHarnessOptions = {}
|
|
): Promise<RealProviderHarness> {
|
|
// Create workspace directory (temp if not provided)
|
|
const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
|
|
const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up
|
|
|
|
// Initialize git repo in temp workspace (required for worktree operations)
|
|
if (ownedWorkspace) {
|
|
const { execSync } = await import('node:child_process');
|
|
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
|
|
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
|
|
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
|
|
// Create initial commit (worktrees require at least one commit)
|
|
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
|
|
}
|
|
|
|
// Create in-memory database
|
|
const db = createTestDatabase();
|
|
|
|
// Create repositories
|
|
const agentRepository = new DrizzleAgentRepository(db);
|
|
const projectRepository = new DrizzleProjectRepository(db);
|
|
const accountRepository = new DrizzleAccountRepository(db);
|
|
const initiativeRepository = new DrizzleInitiativeRepository(db);
|
|
|
|
// Create event bus with capture (parent class disables maxListeners warning)
|
|
const eventBus = new CapturingEventBus();
|
|
|
|
// Create REAL agent manager (not mock!)
|
|
const agentManager = new MultiProviderAgentManager(
|
|
agentRepository,
|
|
workspaceRoot,
|
|
projectRepository,
|
|
accountRepository,
|
|
eventBus
|
|
);
|
|
|
|
// Build harness
|
|
const harness: RealProviderHarness = {
|
|
db,
|
|
eventBus,
|
|
agentManager,
|
|
workspaceRoot,
|
|
agentRepository,
|
|
projectRepository,
|
|
accountRepository,
|
|
initiativeRepository,
|
|
|
|
async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
|
|
const deadline = Date.now() + timeoutMs;
|
|
|
|
while (Date.now() < deadline) {
|
|
const agent = await agentRepository.findById(agentId);
|
|
if (!agent) return null;
|
|
|
|
if (agent.status === 'idle' || agent.status === 'stopped') {
|
|
// Agent completed - get result
|
|
return agentManager.getResult(agentId);
|
|
}
|
|
|
|
if (agent.status === 'crashed') {
|
|
// Agent crashed - return the error result
|
|
return agentManager.getResult(agentId);
|
|
}
|
|
|
|
if (agent.status === 'waiting_for_input') {
|
|
// Agent is waiting - return null (not completed)
|
|
return null;
|
|
}
|
|
|
|
// Still running - wait and check again
|
|
await sleep(POLL_INTERVAL_MS);
|
|
}
|
|
|
|
throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
|
|
},
|
|
|
|
async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
|
|
const deadline = Date.now() + timeoutMs;
|
|
|
|
while (Date.now() < deadline) {
|
|
const agent = await agentRepository.findById(agentId);
|
|
if (!agent) return null;
|
|
|
|
if (agent.status === 'waiting_for_input') {
|
|
return agentManager.getPendingQuestions(agentId);
|
|
}
|
|
|
|
if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
|
|
// Agent finished without asking questions
|
|
return null;
|
|
}
|
|
|
|
// Still running - wait and check again
|
|
await sleep(POLL_INTERVAL_MS);
|
|
}
|
|
|
|
throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
|
|
},
|
|
|
|
async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
|
|
const deadline = Date.now() + timeoutMs;
|
|
|
|
while (Date.now() < deadline) {
|
|
const agent = await agentRepository.findById(agentId);
|
|
if (!agent) {
|
|
throw new Error(`Agent ${agentId} not found`);
|
|
}
|
|
|
|
if (agent.status === status) {
|
|
return;
|
|
}
|
|
|
|
// Check for terminal states that mean we'll never reach target
|
|
if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
|
|
throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
|
|
}
|
|
|
|
await sleep(POLL_INTERVAL_MS);
|
|
}
|
|
|
|
throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
|
|
},
|
|
|
|
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
|
|
return eventBus.getEventsByType<T>(type);
|
|
},
|
|
|
|
clearEvents(): void {
|
|
eventBus.clearEvents();
|
|
},
|
|
|
|
async killAllAgents(): Promise<void> {
|
|
const agents = await agentRepository.findAll();
|
|
for (const agent of agents) {
|
|
if (agent.status === 'running') {
|
|
try {
|
|
await agentManager.stop(agent.id);
|
|
} catch {
|
|
// Ignore errors during cleanup
|
|
}
|
|
}
|
|
}
|
|
},
|
|
|
|
async cleanup(): Promise<void> {
|
|
// Kill any running agents
|
|
await harness.killAllAgents();
|
|
|
|
// Clean up workspace directory if we created it
|
|
if (ownedWorkspace) {
|
|
try {
|
|
await rm(workspaceRoot, { recursive: true, force: true });
|
|
} catch {
|
|
// Ignore cleanup errors
|
|
}
|
|
}
|
|
},
|
|
};
|
|
|
|
return harness;
|
|
}
|
|
|
|
/**
|
|
* Check if real Claude tests should run.
|
|
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
|
|
*/
|
|
export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';
|
|
|
|
/**
|
|
* Check if real Codex tests should run.
|
|
* Set REAL_CODEX_TESTS=1 environment variable to enable.
|
|
*/
|
|
export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';
|
|
|
|
/**
|
|
* Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
|
|
*/
|
|
export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);
|
|
|
|
/**
|
|
* Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
|
|
*/
|
|
export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);
|
|
|
|
/**
|
|
* Default test timeout for real CLI tests (2 minutes).
|
|
* Real API calls take 5-30 seconds typically.
|
|
*/
|
|
export const REAL_TEST_TIMEOUT = 120000;
|
|
|
|
/**
|
|
* Extended test timeout for slow tests (5 minutes).
|
|
* Used for schema retry tests and crash recovery tests.
|
|
*/
|
|
export const EXTENDED_TEST_TIMEOUT = 300000;
|