Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | 5x 5x 5x 5x 5x 5x 5x | /**
* Real Provider Test Harness
*
* Extends the existing test infrastructure to use REAL MultiProviderAgentManager
* for integration testing with actual CLI providers like Claude and Codex.
*
* Unlike the standard TestHarness which uses MockAgentManager, this harness:
* - Uses real CLI spawning (costs real API credits!)
* - Provides poll-based waiting helpers
* - Captures events for inspection
* - Manages temp directories for worktrees
*/
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe } from 'vitest';
import type { DrizzleDatabase } from '../../../db/index.js';
import type { DomainEvent, EventBus } from '../../../events/types.js';
import { EventEmitterBus } from '../../../events/bus.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleInitiativeRepository,
} from '../../../db/repositories/drizzle/index.js';
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
/**
* Sleep helper for polling loops.
*/
export function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Event bus that captures all emitted events for inspection.
*/
export class CapturingEventBus extends EventEmitterBus {
emittedEvents: DomainEvent[] = [];
emit<T extends DomainEvent>(event: T): void {
this.emittedEvents.push(event);
super.emit(event);
}
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return this.emittedEvents.filter((e) => e.type === type) as T[];
}
clearEvents(): void {
this.emittedEvents = [];
}
}
/**
* Options for creating a real provider test harness.
*/
export interface RealProviderHarnessOptions {
/** Which provider to test (default: 'claude') */
provider?: 'claude' | 'codex';
/** Optional workspace root (temp dir created if omitted) */
workspaceRoot?: string;
}
/**
* Real Provider Test Harness interface.
*
* Provides everything needed to test against real CLI providers:
* - In-memory database with real repositories
* - Real MultiProviderAgentManager (spawns actual CLI processes)
* - Event capture for verification
* - Polling-based wait helpers
*/
export interface RealProviderHarness {
/** In-memory SQLite database */
db: DrizzleDatabase;
/** Event bus with capture capability */
eventBus: CapturingEventBus;
/** Real agent manager (not mock!) */
agentManager: MultiProviderAgentManager;
/** Workspace root directory */
workspaceRoot: string;
/** Agent repository */
agentRepository: AgentRepository;
/** Project repository */
projectRepository: ProjectRepository;
/** Account repository */
accountRepository: AccountRepository;
/** Initiative repository */
initiativeRepository: InitiativeRepository;
/**
* Wait for an agent to reach idle or crashed status.
* Polls the database at regular intervals.
*
* @param agentId - The agent ID to wait for
* @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
* @returns The agent result if completed, or null if crashed/timeout
*/
waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
/**
* Wait for an agent to enter waiting_for_input status.
* Polls the database at regular intervals.
*
* @param agentId - The agent ID to wait for
* @param timeoutMs - Maximum time to wait (default 120000ms)
* @returns The pending questions if waiting, or null if timeout/other status
*/
waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;
/**
* Wait for an agent to reach a specific status.
*
* @param agentId - The agent ID to wait for
* @param status - The target status
* @param timeoutMs - Maximum time to wait (default 120000ms)
*/
waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;
/**
* Get captured events filtered by type.
*/
getEventsByType<T extends DomainEvent>(type: T['type']): T[];
/**
* Clear all captured events.
*/
clearEvents(): void;
/**
* Kill all running agents (for cleanup).
*/
killAllAgents(): Promise<void>;
/**
* Clean up all resources (directories, processes).
* Call this in afterAll/afterEach.
*/
cleanup(): Promise<void>;
}
/** Default poll interval for status checks */
const POLL_INTERVAL_MS = 1000;
/**
* Create a test harness for real provider integration tests.
*
* This creates:
* - In-memory SQLite database
* - Temp directory for worktrees (or uses provided workspace)
* - Real MultiProviderAgentManager
* - Event capture bus
*
* @example
* ```typescript
* let harness: RealProviderHarness;
*
* beforeAll(async () => {
* harness = await createRealProviderHarness({ provider: 'claude' });
* });
*
* afterAll(async () => {
* await harness.cleanup();
* });
*
* it('spawns and completes', async () => {
* const agent = await harness.agentManager.spawn({...});
* const result = await harness.waitForAgentCompletion(agent.id);
* expect(result?.success).toBe(true);
* });
* ```
*/
export async function createRealProviderHarness(
options: RealProviderHarnessOptions = {}
): Promise<RealProviderHarness> {
// Create workspace directory (temp if not provided)
const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up
// Initialize git repo in temp workspace (required for worktree operations)
if (ownedWorkspace) {
const { execSync } = await import('node:child_process');
execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
// Create initial commit (worktrees require at least one commit)
execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
}
// Create in-memory database
const db = createTestDatabase();
// Create repositories
const agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
const initiativeRepository = new DrizzleInitiativeRepository(db);
// Create event bus with capture (parent class already sets maxListeners to 100)
const eventBus = new CapturingEventBus();
// Create REAL agent manager (not mock!)
const agentManager = new MultiProviderAgentManager(
agentRepository,
workspaceRoot,
projectRepository,
accountRepository,
eventBus
);
// Build harness
const harness: RealProviderHarness = {
db,
eventBus,
agentManager,
workspaceRoot,
agentRepository,
projectRepository,
accountRepository,
initiativeRepository,
async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'idle' || agent.status === 'stopped') {
// Agent completed - get result
return agentManager.getResult(agentId);
}
if (agent.status === 'crashed') {
// Agent crashed - return the error result
return agentManager.getResult(agentId);
}
if (agent.status === 'waiting_for_input') {
// Agent is waiting - return null (not completed)
return null;
}
// Still running - wait and check again
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
},
async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) return null;
if (agent.status === 'waiting_for_input') {
return agentManager.getPendingQuestions(agentId);
}
if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
// Agent finished without asking questions
return null;
}
// Still running - wait and check again
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
},
async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const agent = await agentRepository.findById(agentId);
if (!agent) {
throw new Error(`Agent ${agentId} not found`);
}
if (agent.status === status) {
return;
}
// Check for terminal states that mean we'll never reach target
if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
}
await sleep(POLL_INTERVAL_MS);
}
throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
},
getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
return eventBus.getEventsByType<T>(type);
},
clearEvents(): void {
eventBus.clearEvents();
},
async killAllAgents(): Promise<void> {
const agents = await agentRepository.findAll();
for (const agent of agents) {
if (agent.status === 'running') {
try {
await agentManager.stop(agent.id);
} catch {
// Ignore errors during cleanup
}
}
}
},
async cleanup(): Promise<void> {
// Kill any running agents
await harness.killAllAgents();
// Clean up workspace directory if we created it
if (ownedWorkspace) {
try {
await rm(workspaceRoot, { recursive: true, force: true });
} catch {
// Ignore cleanup errors
}
}
},
};
return harness;
}
/**
* Check if real Claude tests should run.
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
*/
export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';
/**
* Check if real Codex tests should run.
* Set REAL_CODEX_TESTS=1 environment variable to enable.
*/
export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';
/**
* Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
*/
export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);
/**
* Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
*/
export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);
/**
* Default test timeout for real CLI tests (2 minutes).
* Real API calls take 5-30 seconds typically.
*/
export const REAL_TEST_TIMEOUT = 120000;
/**
* Extended test timeout for slow tests (5 minutes).
* Used for schema retry tests and crash recovery tests.
*/
export const EXTENDED_TEST_TIMEOUT = 300000;
|