Codewalkers/apps/server/test/integration/real-providers/harness.ts

/**
 * Real Provider Test Harness
 *
 * Extends the existing test infrastructure to use REAL MultiProviderAgentManager
 * for integration testing with actual CLI providers like Claude and Codex.
 *
 * Unlike the standard TestHarness which uses MockAgentManager, this harness:
 * - Uses real CLI spawning (costs real API credits!)
 * - Provides poll-based waiting helpers
 * - Captures events for inspection
 * - Manages temp directories for worktrees
 */

import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe } from 'vitest';
import type { DrizzleDatabase } from '../../../db/index.js';
import type { DomainEvent, EventBus } from '../../../events/types.js';
import { EventEmitterBus } from '../../../events/bus.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import type { AgentResult, PendingQuestions, AgentStatus } from '../../../agent/types.js';
import type { AgentRepository } from '../../../db/repositories/agent-repository.js';
import type { ProjectRepository } from '../../../db/repositories/project-repository.js';
import type { AccountRepository } from '../../../db/repositories/account-repository.js';
import type { InitiativeRepository } from '../../../db/repositories/initiative-repository.js';
import {
  DrizzleAgentRepository,
  DrizzleProjectRepository,
  DrizzleAccountRepository,
  DrizzleInitiativeRepository,
} from '../../../db/repositories/drizzle/index.js';
import { createTestDatabase } from '../../../db/repositories/drizzle/test-helpers.js';

/**
 * Sleep helper for polling loops.
 */
export function sleep(ms: number): Promise<void> {
  return new Promise((resolve) => setTimeout(resolve, ms));
}

/**
 * Event bus that captures all emitted events for inspection.
 */
export class CapturingEventBus extends EventEmitterBus {
  emittedEvents: DomainEvent[] = [];

  emit<T extends DomainEvent>(event: T): void {
    this.emittedEvents.push(event);
    super.emit(event);
  }

  getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
    return this.emittedEvents.filter((e) => e.type === type) as T[];
  }

  clearEvents(): void {
    this.emittedEvents = [];
  }
}

/**
 * Options for creating a real provider test harness.
 */
export interface RealProviderHarnessOptions {
  /** Which provider to test (default: 'claude') */
  provider?: 'claude' | 'codex';
  /** Optional workspace root (temp dir created if omitted) */
  workspaceRoot?: string;
}

/**
 * Real Provider Test Harness interface.
 *
 * Provides everything needed to test against real CLI providers:
 * - In-memory database with real repositories
 * - Real MultiProviderAgentManager (spawns actual CLI processes)
 * - Event capture for verification
 * - Polling-based wait helpers
 */
export interface RealProviderHarness {
  /** In-memory SQLite database */
  db: DrizzleDatabase;
  /** Event bus with capture capability */
  eventBus: CapturingEventBus;
  /** Real agent manager (not mock!) */
  agentManager: MultiProviderAgentManager;
  /** Workspace root directory */
  workspaceRoot: string;

  /** Agent repository */
  agentRepository: AgentRepository;
  /** Project repository */
  projectRepository: ProjectRepository;
  /** Account repository */
  accountRepository: AccountRepository;
  /** Initiative repository */
  initiativeRepository: InitiativeRepository;

  /**
   * Wait for an agent to reach idle or crashed status.
   * Polls the database at regular intervals.
   *
   * @param agentId - The agent ID to wait for
   * @param timeoutMs - Maximum time to wait (default 120000ms = 2 minutes)
   * @returns The agent result if completed, or null if crashed/timeout
   */
  waitForAgentCompletion(agentId: string, timeoutMs?: number): Promise<AgentResult | null>;

  /**
   * Wait for an agent to enter waiting_for_input status.
   * Polls the database at regular intervals.
   *
   * @param agentId - The agent ID to wait for
   * @param timeoutMs - Maximum time to wait (default 120000ms)
   * @returns The pending questions if waiting, or null if timeout/other status
   */
  waitForAgentWaiting(agentId: string, timeoutMs?: number): Promise<PendingQuestions | null>;

  /**
   * Wait for an agent to reach a specific status.
   *
   * @param agentId - The agent ID to wait for
   * @param status - The target status
   * @param timeoutMs - Maximum time to wait (default 120000ms)
   */
  waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs?: number): Promise<void>;

  /**
   * Get captured events filtered by type.
   */
  getEventsByType<T extends DomainEvent>(type: T['type']): T[];

  /**
   * Clear all captured events.
   */
  clearEvents(): void;

  /**
   * Kill all running agents (for cleanup).
   */
  killAllAgents(): Promise<void>;

  /**
   * Clean up all resources (directories, processes).
   * Call this in afterAll/afterEach.
   */
  cleanup(): Promise<void>;
}

/** Default poll interval for status checks */
const POLL_INTERVAL_MS = 1000;

/**
 * Create a test harness for real provider integration tests.
 *
 * This creates:
 * - In-memory SQLite database
 * - Temp directory for worktrees (or uses provided workspace)
 * - Real MultiProviderAgentManager
 * - Event capture bus
 *
 * @example
 * ```typescript
 * let harness: RealProviderHarness;
 *
 * beforeAll(async () => {
 *   harness = await createRealProviderHarness({ provider: 'claude' });
 * });
 *
 * afterAll(async () => {
 *   await harness.cleanup();
 * });
 *
 * it('spawns and completes', async () => {
 *   const agent = await harness.agentManager.spawn({...});
 *   const result = await harness.waitForAgentCompletion(agent.id);
 *   expect(result?.success).toBe(true);
 * });
 * ```
 */
export async function createRealProviderHarness(
  options: RealProviderHarnessOptions = {}
): Promise<RealProviderHarness> {
  // Create workspace directory (temp if not provided)
  const workspaceRoot = options.workspaceRoot ?? (await mkdtemp(join(tmpdir(), 'cw-test-')));
  const ownedWorkspace = !options.workspaceRoot; // Track if we need to clean up

  // Initialize git repo in temp workspace (required for worktree operations)
  if (ownedWorkspace) {
    const { execSync } = await import('node:child_process');
    execSync('git init', { cwd: workspaceRoot, stdio: 'ignore' });
    execSync('git config user.email "test@test.com"', { cwd: workspaceRoot, stdio: 'ignore' });
    execSync('git config user.name "Test"', { cwd: workspaceRoot, stdio: 'ignore' });
    // Create initial commit (worktrees require at least one commit)
    execSync('touch .gitkeep && git add .gitkeep && git commit -m "init"', { cwd: workspaceRoot, stdio: 'ignore' });
  }

  // Create in-memory database
  const db = createTestDatabase();

  // Create repositories
  const agentRepository = new DrizzleAgentRepository(db);
  const projectRepository = new DrizzleProjectRepository(db);
  const accountRepository = new DrizzleAccountRepository(db);
  const initiativeRepository = new DrizzleInitiativeRepository(db);

  // Create event bus with capture (parent class disables maxListeners warning)
  const eventBus = new CapturingEventBus();

  // Create REAL agent manager (not mock!)
  const agentManager = new MultiProviderAgentManager(
    agentRepository,
    workspaceRoot,
    projectRepository,
    accountRepository,
    eventBus
  );

  // Build harness
  const harness: RealProviderHarness = {
    db,
    eventBus,
    agentManager,
    workspaceRoot,
    agentRepository,
    projectRepository,
    accountRepository,
    initiativeRepository,

    async waitForAgentCompletion(agentId: string, timeoutMs = 120000): Promise<AgentResult | null> {
      const deadline = Date.now() + timeoutMs;

      while (Date.now() < deadline) {
        const agent = await agentRepository.findById(agentId);
        if (!agent) return null;

        if (agent.status === 'idle' || agent.status === 'stopped') {
          // Agent completed - get result
          return agentManager.getResult(agentId);
        }

        if (agent.status === 'crashed') {
          // Agent crashed - return the error result
          return agentManager.getResult(agentId);
        }

        if (agent.status === 'waiting_for_input') {
          // Agent is waiting - return null (not completed)
          return null;
        }

        // Still running - wait and check again
        await sleep(POLL_INTERVAL_MS);
      }

      throw new Error(`Timeout waiting for agent ${agentId} to complete after ${timeoutMs}ms`);
    },

    async waitForAgentWaiting(agentId: string, timeoutMs = 120000): Promise<PendingQuestions | null> {
      const deadline = Date.now() + timeoutMs;

      while (Date.now() < deadline) {
        const agent = await agentRepository.findById(agentId);
        if (!agent) return null;

        if (agent.status === 'waiting_for_input') {
          return agentManager.getPendingQuestions(agentId);
        }

        if (agent.status === 'idle' || agent.status === 'stopped' || agent.status === 'crashed') {
          // Agent finished without asking questions
          return null;
        }

        // Still running - wait and check again
        await sleep(POLL_INTERVAL_MS);
      }

      throw new Error(`Timeout waiting for agent ${agentId} to request input after ${timeoutMs}ms`);
    },

    async waitForAgentStatus(agentId: string, status: AgentStatus, timeoutMs = 120000): Promise<void> {
      const deadline = Date.now() + timeoutMs;

      while (Date.now() < deadline) {
        const agent = await agentRepository.findById(agentId);
        if (!agent) {
          throw new Error(`Agent ${agentId} not found`);
        }

        if (agent.status === status) {
          return;
        }

        // Check for terminal states that mean we'll never reach target
        if (status === 'running' && ['idle', 'stopped', 'crashed', 'waiting_for_input'].includes(agent.status)) {
          throw new Error(`Agent ${agentId} already in terminal state ${agent.status}, cannot reach ${status}`);
        }

        await sleep(POLL_INTERVAL_MS);
      }

      throw new Error(`Timeout waiting for agent ${agentId} to reach status ${status} after ${timeoutMs}ms`);
    },

    getEventsByType<T extends DomainEvent>(type: T['type']): T[] {
      return eventBus.getEventsByType<T>(type);
    },

    clearEvents(): void {
      eventBus.clearEvents();
    },

    async killAllAgents(): Promise<void> {
      const agents = await agentRepository.findAll();
      for (const agent of agents) {
        if (agent.status === 'running') {
          try {
            await agentManager.stop(agent.id);
          } catch {
            // Ignore errors during cleanup
          }
        }
      }
    },

    async cleanup(): Promise<void> {
      // Kill any running agents
      await harness.killAllAgents();

      // Clean up workspace directory if we created it
      if (ownedWorkspace) {
        try {
          await rm(workspaceRoot, { recursive: true, force: true });
        } catch {
          // Ignore cleanup errors
        }
      }
    },
  };

  return harness;
}

/**
 * Check if real Claude tests should run.
 * Set REAL_CLAUDE_TESTS=1 environment variable to enable.
 */
export const shouldRunRealClaudeTests = process.env.REAL_CLAUDE_TESTS === '1';

/**
 * Check if real Codex tests should run.
 * Set REAL_CODEX_TESTS=1 environment variable to enable.
 */
export const shouldRunRealCodexTests = process.env.REAL_CODEX_TESTS === '1';

/**
 * Skip wrapper for Claude tests - skips unless REAL_CLAUDE_TESTS=1.
 */
export const describeRealClaude: typeof describe = shouldRunRealClaudeTests ? describe : (describe.skip as typeof describe);

/**
 * Skip wrapper for Codex tests - skips unless REAL_CODEX_TESTS=1.
 */
export const describeRealCodex: typeof describe = shouldRunRealCodexTests ? describe : (describe.skip as typeof describe);

/**
 * Default test timeout for real CLI tests (2 minutes).
 * Real API calls take 5-30 seconds typically.
 */
export const REAL_TEST_TIMEOUT = 120000;

/**
 * Extended test timeout for slow tests (5 minutes).
 * Used for schema retry tests and crash recovery tests.
 */
export const EXTENDED_TEST_TIMEOUT = 300000;