/** * Real Claude CLI Integration Tests * * IMPORTANT: These tests call the real Claude CLI and incur API costs. * They are SKIPPED by default and should only be run manually for validation. * * To run these tests: * ```bash * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-claude.test.ts --test-timeout=120000 * ``` * * Purpose: * - Validate that JSON schemas work correctly with Claude CLI --json-schema flag * - Confirm MockAgentManager accurately simulates real CLI behavior * - Document actual response structure and costs * * Updated (2026-02-06): Now uses the universal agentSignalSchema instead of * per-mode schemas. Agents output trivial signals (done/questions/error) and * write files instead of producing mode-specific JSON. * * Total validation cost: ~$0.10 (3 tests) */ import { describe, it, expect, beforeAll } from 'vitest'; import { execa } from 'execa'; import { agentSignalJsonSchema, agentSignalSchema, } from '../../agent/schema.js'; /** * Result structure from Claude CLI with --output-format json * * When --json-schema is used: * - result: "" (empty string) * - structured_output: { ... } (the validated JSON object) */ interface ClaudeCliResult { type: 'result'; subtype: 'success' | 'error' | 'error_max_turns'; is_error: boolean; session_id: string; result: string; structured_output?: unknown; total_cost_usd?: number; } /** * Helper to call Claude CLI directly with a prompt and JSON schema. * * @param prompt - The prompt to send to Claude * @param jsonSchema - JSON schema to enforce structured output * @param timeoutMs - Timeout in milliseconds (default 90s) * @returns Parsed CLI result with structured_output */ async function callClaudeCli( prompt: string, jsonSchema: object, timeoutMs = 90000 ): Promise<{ cliResult: ClaudeCliResult; structuredOutput: unknown }> { const startTime = Date.now(); const { stdout } = await execa( 'claude', [ '-p', prompt, '--output-format', 'json', '--json-schema', JSON.stringify(jsonSchema), ], { timeout: timeoutMs, } ); const duration = Date.now() - startTime; const cliResult: ClaudeCliResult = JSON.parse(stdout); console.log(`\n Duration: ${(duration / 1000).toFixed(1)}s`); console.log(` Cost: $${cliResult.total_cost_usd?.toFixed(4) ?? 'N/A'}`); console.log(` Session ID: ${cliResult.session_id}`); console.log(` Result field empty: ${cliResult.result === ''}`); console.log(` Has structured_output: ${cliResult.structured_output !== undefined}`); // When --json-schema is used, structured output is in structured_output field // The result field is typically empty when using --json-schema const structuredOutput = cliResult.structured_output ?? JSON.parse(cliResult.result); return { cliResult, structuredOutput }; } /** * Check if real Claude tests should run. * Set REAL_CLAUDE_TESTS=1 environment variable to enable. */ const shouldRunRealTests = process.env.REAL_CLAUDE_TESTS === '1'; /** * Skip wrapper - tests are expensive and should run manually */ const describeReal = shouldRunRealTests ? describe : describe.skip; // Individual test timeout - real API calls take 5-30 seconds const TEST_TIMEOUT = 120000; // 2 minutes describeReal('Real Claude CLI Integration', () => { beforeAll(() => { console.log('\n=== Running Real Claude CLI Tests ==='); console.log('These tests call the real Claude API and incur costs.\n'); }); describe('Universal Signal Schema', () => { it( 'should return done status', async () => { const prompt = `Complete this simple task: Say "Hello, World!" as a test. Output your response in the required JSON format with status "done".`; const { cliResult, structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema); console.log(' Output:', JSON.stringify(structuredOutput, null, 2)); // Verify the CLI response structure expect(cliResult.subtype).toBe('success'); expect(cliResult.result).toBe(''); // Empty when using --json-schema expect(cliResult.structured_output).toBeDefined(); // Validate against Zod schema const parsed = agentSignalSchema.parse(structuredOutput); expect(parsed.status).toBe('done'); }, TEST_TIMEOUT ); it( 'should return questions status with array', async () => { const prompt = `You are working on a vague task: "Make it better" You MUST ask clarifying questions before proceeding. You cannot complete this task without more information. Output your response with status "questions" and include at least 2 questions with unique IDs.`; const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema); console.log(' Output:', JSON.stringify(structuredOutput, null, 2)); // Validate against Zod schema const parsed = agentSignalSchema.parse(structuredOutput); expect(parsed.status).toBe('questions'); if (parsed.status === 'questions') { expect(Array.isArray(parsed.questions)).toBe(true); expect(parsed.questions.length).toBeGreaterThanOrEqual(1); expect(parsed.questions[0].id).toBeTruthy(); expect(parsed.questions[0].question).toBeTruthy(); } }, TEST_TIMEOUT ); it( 'should return error status', async () => { const prompt = `You have encountered an unrecoverable error. Output your response with status "error" and a descriptive error message.`; const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema); console.log(' Output:', JSON.stringify(structuredOutput, null, 2)); // Validate against Zod schema const parsed = agentSignalSchema.parse(structuredOutput); expect(parsed.status).toBe('error'); if (parsed.status === 'error') { expect(parsed.error).toBeTruthy(); } }, TEST_TIMEOUT ); }); });