real-providers harness.ts

8.97% Statements 7/78
4.65% Branches 2/43
0% Functions 0/14
9.45% Lines 7/74
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5x
 
 
 
 
 
5x
 
 
 
 
5x
 
 
 
 
5x
 
 
 
 
 
5x
 
 
 
 
 
5x
  /**
 * Real Provider Test Harness
 *
 * Extends the existing test infrastructure to use REAL MultiProviderAgentManager
 * for integration testing with actual CLI providers like Claude and Codex.
 *
 * Unlike the standard TestHarness which uses MockAgentManager, this harness:
 * - Uses real CLI spawning (costs real API credits!)
 * - Provides poll-based waiting helpers
 * - Captures events for inspection
 * - Manages temp directories for worktrees
 */
 
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe } from 'vitest';
import type { DrizzleDatabase } from '../../../db/index.js';
import type { DomainEvent, EventBus } from '../../../events/types.js';
import { EventEmitterBus } from '../../../events/bus.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
import {
  DrizzleAgentRepository,
  DrizzleProjectRepository,
  DrizzleAccountRepository,
  DrizzleInitiativeRepository,
} from '../../../db/repositories/drizzle/index.js';
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';
 
/**
 * Sleep helper for polling loops.
 */
export function sleep(ms: number): Promise<void> {
  return new Promise((resolve) => setTimeout(resolve, ms));
}
 
/**
 * Event bus that captures all emitted events for inspection.
 */
export class CapturingEventBus extends EventEmitterBus {
  emittedEvents: DomainEvent[] = [];
 
  emit<T extends DomainEvent>(event: T): void {
    this.emittedEvents.push(event);
    super.emit(event);
  }
 
  getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
    return this.emittedEvents.filter((e) => e.type === type) as T[];
  }
 
  clearEvents(): void {
    this.emittedEvents = [];
  }
}
 
/**
 * Options for creating a real provider test harness.
 */
export interface RealProviderHarnessOptions {
  /** Which provider to test (default: 'claude') */
  provider?: 'claude' | 'codex';
  /** Optional workspace root (temp dir created if omitted) */
  workspaceRoot?: string;
}
 
/**
 * Real Provider Test Harness interface.
 *
 * Provides everything needed to test against real CLI providers:
 * - In-memory database with real repositories
 * - Real MultiProviderAgentManager (spawns actual CLI processes)
 * - Event capture for verification
 * - Polling-based wait helpers
 */
export interface RealProviderHarness {
  /** In-memory SQLite database */
  db: DrizzleDatabase;
  /** Event bus with capture capability */
  eventBus: CapturingEventBus;
  /** Real agent manager (not mock!) */
  agentManager: MultiProviderAgentManager;
  /** Workspace root directory */
  workspaceRoot: string;
 
  /** Agent repository */
  agentRepository: AgentRepository;
  /** Project repository */
  projectRepository: ProjectRepository;
  /** Account repository */
  accountRepository: AccountRepository;
  /** Initiative repository */
  initiativeRepository: InitiativeRepository;
 
  /**
   * Wait for an agent to reach idle or crashed status.
   * Polls the database at regular intervals.
   *
   * @param agentId - The agent ID to wait for
   * @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
   * @returns The agent result if completed, or null if crashed/timeout
   */
  waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;
 
  /**
   * Wait for an agent to enter waiting_for_input status.
   * Polls the database at regular intervals.
   *
   * @param agentId - The agent ID to wait for
   * @param timeoutMs - Maximum time to wait (default 120000ms)
   * @returns The pending questions if waiting, or null if timeout/other status
   */
  waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;
 
  /**
   * Wait for an agent to reach a specific status.
   *
   * @param agentId - The agent ID to wait for
   * @param status - The target status
   * @param timeoutMs - Maximum time to wait (default 120000ms)
   */
  waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;
 
  /**
   * Get captured events filtered by type.
   */
  getEventsByType<T extends DomainEvent>(type: T['type']): T[];
 
  /**
   * Clear all captured events.
   */
  clearEvents(): void;
 
  /**
   * Kill all running agents (for cleanup).
   */
  killAllAgents(): Promise<void>;
 
  /**
   * Clean up all resources (directories, processes).
   * Call this in afterAll/afterEach.
   */
  cleanup(): Promise<void>;
}
 
/** Default poll interval for status checks */
const POLL_INTERVAL_MS = 1000;
 
/**
 * Create a test harness for real provider integration tests.
 *
 * This creates:
 * - In-memory SQLite database
 * - Temp directory for worktrees (or uses provided workspace)
 * - Real MultiProviderAgentManager
 * - Event capture bus
 *
 * @example
 * ```typescript
 * let harness: RealProviderHarness;
 *
 * beforeAll(async () => {
 *   harness = await createRealProviderHarness({ provider: 'claude' });
 * });
 *
 * afterAll(async () => {
 *   await harness.cleanup();
 * });
 *
 * it('spawns and completes', async () => {
 *   const agent = await harness.agentManager.spawn({...});
 *   const result = await harness.waitForAgentCompletion(agent.id);
 *   expect(result?.success).toBe(true);
 * });
 * ```
 */
export async function createRealProviderHarness(
  options: RealProviderHarnessOptions = {}
): Promise<RealProviderHarness> {
  // Create workspace directory (temp if not provided)
  const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
  const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up
 
  // Initialize git repo in temp workspace (required for worktree operations)
  if (ownedWorkspace) {
    const { execSync } = await import('node:child_process');
    execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
    execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
    execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
    // Create initial commit (worktrees require at least one commit)
    execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
  }
 
  // Create in-memory database
  const db = createTestDatabase();
 
  // Create repositories
  const agentRepository = new DrizzleAgentRepository(db);
  const projectRepository = new DrizzleProjectRepository(db);
  const accountRepository = new DrizzleAccountRepository(db);
  const initiativeRepository = new DrizzleInitiativeRepository(db);
 
  // Create event bus with capture (parent class already sets maxListeners to 100)
  const eventBus = new CapturingEventBus();
 
  // Create REAL agent manager (not mock!)
  const agentManager = new MultiProviderAgentManager(
    agentRepository,
    workspaceRoot,
    projectRepository,
    accountRepository,
    eventBus
  );
 
  // Build harness
  const harness: RealProviderHarness = {
    db,
    eventBus,
    agentManager,
    workspaceRoot,
    agentRepository,
    projectRepository,
    accountRepository,
    initiativeRepository,
 
    async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
      const deadline = Date.now() + timeoutMs;
 
      while (Date.now() < deadline) {
        const agent = await agentRepository.findById(agentId);
        if (!agent) return null;
 
        if (agent.status === 'idle' || agent.status === 'stopped') {
          // Agent completed - get result
          return agentManager.getResult(agentId);
        }
 
        if (agent.status === 'crashed') {
          // Agent crashed - return the error result
          return agentManager.getResult(agentId);
        }
 
        if (agent.status === 'waiting_for_input') {
          // Agent is waiting - return null (not completed)
          return null;
        }
 
        // Still running - wait and check again
        await sleep(POLL_INTERVAL_MS);
      }
 
      throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
    },
 
    async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
      const deadline = Date.now() + timeoutMs;
 
      while (Date.now() < deadline) {
        const agent = await agentRepository.findById(agentId);
        if (!agent) return null;
 
        if (agent.status === 'waiting_for_input') {
          return agentManager.getPendingQuestions(agentId);
        }
 
        if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
          // Agent finished without asking questions
          return null;
        }
 
        // Still running - wait and check again
        await sleep(POLL_INTERVAL_MS);
      }
 
      throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
    },
 
    async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
      const deadline = Date.now() + timeoutMs;
 
      while (Date.now() < deadline) {
        const agent = await agentRepository.findById(agentId);
        if (!agent) {
          throw new Error(`Agent ${agentId} not found`);
        }
 
        if (agent.status === status) {
          return;
        }
 
        // Check for terminal states that mean we'll never reach target
        if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
          throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
        }
 
        await sleep(POLL_INTERVAL_MS);
      }
 
      throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
    },
 
    getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
      return eventBus.getEventsByType<T>(type);
    },
 
    clearEvents(): void {
      eventBus.clearEvents();
    },
 
    async killAllAgents(): Promise<void> {
      const agents = await agentRepository.findAll();
      for (const agent of agents) {
        if (agent.status === 'running') {
          try {
            await agentManager.stop(agent.id);
          } catch {
            // Ignore errors during cleanup
          }
        }
      }
    },
 
    async cleanup(): Promise<void> {
      // Kill any running agents
      await harness.killAllAgents();
 
      // Clean up workspace directory if we created it
      if (ownedWorkspace) {
        try {
          await rm(workspaceRoot, { recursive: true, force: true });
        } catch {
          // Ignore cleanup errors
        }
      }
    },
  };
 
  return harness;
}
 
/**
 * Check if real Claude tests should run.
 * Set REAL_CLAUDE_TESTS=1 environment variable to enable.
 */
export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';
 
/**
 * Check if real Codex tests should run.
 * Set REAL_CODEX_TESTS=1 environment variable to enable.
 */
export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';
 
/**
 * Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
 */
export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);
 
/**
 * Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
 */
export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);
 
/**
 * Default test timeout for real CLI tests (2 minutes).
 * Real API calls take 5-30 seconds typically.
 */
export const REAL_TEST_TIMEOUT = 120000;
 
/**
 * Extended test timeout for slow tests (5 minutes).
 * Used for schema retry tests and crash recovery tests.
 */
export const EXTENDED_TEST_TIMEOUT = 300000;