Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt standard monorepo conventions (apps/ for runnable apps, packages/ for reusable libraries). Update all config files, shared package imports, test fixtures, and documentation to reflect new paths. Key fixes: - Update workspace config to ["apps/*", "packages/*"] - Update tsconfig.json rootDir/include for apps/server/ - Add apps/web/** to vitest exclude list - Update drizzle.config.ts schema path - Fix ensure-schema.ts migration path detection (3 levels up in dev, 2 levels up in dist) - Fix tests/integration/cli-server.test.ts import paths - Update packages/shared imports to apps/server/ paths - Update all docs/ files with new paths
184 lines
5.9 KiB
TypeScript
184 lines
5.9 KiB
TypeScript
/**
|
|
* Real Claude CLI Integration Tests
|
|
*
|
|
* IMPORTANT: These tests call the real Claude CLI and incur API costs.
|
|
* They are SKIPPED by default and should only be run manually for validation.
|
|
*
|
|
* To run these tests:
|
|
* ```bash
|
|
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-claude.test.ts --test-timeout=120000
|
|
* ```
|
|
*
|
|
* Purpose:
|
|
* - Validate that JSON schemas work correctly with Claude CLI --json-schema flag
|
|
* - Confirm MockAgentManager accurately simulates real CLI behavior
|
|
* - Document actual response structure and costs
|
|
*
|
|
* Updated (2026-02-06): Now uses the universal agentSignalSchema instead of
|
|
* per-mode schemas. Agents output trivial signals (done/questions/error) and
|
|
* write files instead of producing mode-specific JSON.
|
|
*
|
|
* Total validation cost: ~$0.10 (3 tests)
|
|
*/
|
|
|
|
import { describe, it, expect, beforeAll } from 'vitest';
|
|
import { execa } from 'execa';
|
|
import {
|
|
agentSignalJsonSchema,
|
|
agentSignalSchema,
|
|
} from '../../agent/schema.js';
|
|
|
|
/**
|
|
* Result structure from Claude CLI with --output-format json
|
|
*
|
|
* When --json-schema is used:
|
|
* - result: "" (empty string)
|
|
* - structured_output: { ... } (the validated JSON object)
|
|
*/
|
|
interface ClaudeCliResult {
|
|
type: 'result';
|
|
subtype: 'success' | 'error' | 'error_max_turns';
|
|
is_error: boolean;
|
|
session_id: string;
|
|
result: string;
|
|
structured_output?: unknown;
|
|
total_cost_usd?: number;
|
|
}
|
|
|
|
/**
|
|
* Helper to call Claude CLI directly with a prompt and JSON schema.
|
|
*
|
|
* @param prompt - The prompt to send to Claude
|
|
* @param jsonSchema - JSON schema to enforce structured output
|
|
* @param timeoutMs - Timeout in milliseconds (default 90s)
|
|
* @returns Parsed CLI result with structured_output
|
|
*/
|
|
async function callClaudeCli(
|
|
prompt: string,
|
|
jsonSchema: object,
|
|
timeoutMs = 90000
|
|
): Promise<{ cliResult: ClaudeCliResult; structuredOutput: unknown }> {
|
|
const startTime = Date.now();
|
|
|
|
const { stdout } = await execa(
|
|
'claude',
|
|
[
|
|
'-p',
|
|
prompt,
|
|
'--output-format',
|
|
'json',
|
|
'--json-schema',
|
|
JSON.stringify(jsonSchema),
|
|
],
|
|
{
|
|
timeout: timeoutMs,
|
|
}
|
|
);
|
|
|
|
const duration = Date.now() - startTime;
|
|
const cliResult: ClaudeCliResult = JSON.parse(stdout);
|
|
|
|
console.log(`\n Duration: ${(duration / 1000).toFixed(1)}s`);
|
|
console.log(` Cost: $${cliResult.total_cost_usd?.toFixed(4) ?? 'N/A'}`);
|
|
console.log(` Session ID: ${cliResult.session_id}`);
|
|
console.log(` Result field empty: ${cliResult.result === ''}`);
|
|
console.log(` Has structured_output: ${cliResult.structured_output !== undefined}`);
|
|
|
|
// When --json-schema is used, structured output is in structured_output field
|
|
// The result field is typically empty when using --json-schema
|
|
const structuredOutput = cliResult.structured_output ?? JSON.parse(cliResult.result);
|
|
|
|
return { cliResult, structuredOutput };
|
|
}
|
|
|
|
/**
|
|
* Check if real Claude tests should run.
|
|
* Set REAL_CLAUDE_TESTS=1 environment variable to enable.
|
|
*/
|
|
const shouldRunRealTests = process.env.REAL_CLAUDE_TESTS === '1';
|
|
|
|
/**
|
|
* Skip wrapper - tests are expensive and should run manually
|
|
*/
|
|
const describeReal = shouldRunRealTests ? describe : describe.skip;
|
|
|
|
// Individual test timeout - real API calls take 5-30 seconds
|
|
const TEST_TIMEOUT = 120000; // 2 minutes
|
|
|
|
describeReal('Real Claude CLI Integration', () => {
|
|
beforeAll(() => {
|
|
console.log('\n=== Running Real Claude CLI Tests ===');
|
|
console.log('These tests call the real Claude API and incur costs.\n');
|
|
});
|
|
|
|
describe('Universal Signal Schema', () => {
|
|
it(
|
|
'should return done status',
|
|
async () => {
|
|
const prompt = `Complete this simple task: Say "Hello, World!" as a test.
|
|
|
|
Output your response in the required JSON format with status "done".`;
|
|
|
|
const { cliResult, structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
|
|
|
|
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
|
|
|
|
// Verify the CLI response structure
|
|
expect(cliResult.subtype).toBe('success');
|
|
expect(cliResult.result).toBe(''); // Empty when using --json-schema
|
|
expect(cliResult.structured_output).toBeDefined();
|
|
|
|
// Validate against Zod schema
|
|
const parsed = agentSignalSchema.parse(structuredOutput);
|
|
expect(parsed.status).toBe('done');
|
|
},
|
|
TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'should return questions status with array',
|
|
async () => {
|
|
const prompt = `You are working on a vague task: "Make it better"
|
|
|
|
You MUST ask clarifying questions before proceeding. You cannot complete this task without more information.
|
|
|
|
Output your response with status "questions" and include at least 2 questions with unique IDs.`;
|
|
|
|
const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
|
|
|
|
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
|
|
|
|
// Validate against Zod schema
|
|
const parsed = agentSignalSchema.parse(structuredOutput);
|
|
expect(parsed.status).toBe('questions');
|
|
if (parsed.status === 'questions') {
|
|
expect(Array.isArray(parsed.questions)).toBe(true);
|
|
expect(parsed.questions.length).toBeGreaterThanOrEqual(1);
|
|
expect(parsed.questions[0].id).toBeTruthy();
|
|
expect(parsed.questions[0].question).toBeTruthy();
|
|
}
|
|
},
|
|
TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'should return error status',
|
|
async () => {
|
|
const prompt = `You have encountered an unrecoverable error. Output your response with status "error" and a descriptive error message.`;
|
|
|
|
const { structuredOutput } = await callClaudeCli(prompt, agentSignalJsonSchema);
|
|
|
|
console.log(' Output:', JSON.stringify(structuredOutput, null, 2));
|
|
|
|
// Validate against Zod schema
|
|
const parsed = agentSignalSchema.parse(structuredOutput);
|
|
expect(parsed.status).toBe('error');
|
|
if (parsed.status === 'error') {
|
|
expect(parsed.error).toBeTruthy();
|
|
}
|
|
},
|
|
TEST_TIMEOUT
|
|
);
|
|
});
|
|
});
|