Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt standard monorepo conventions (apps/ for runnable apps, packages/ for reusable libraries). Update all config files, shared package imports, test fixtures, and documentation to reflect new paths. Key fixes: - Update workspace config to ["apps/*", "packages/*"] - Update tsconfig.json rootDir/include for apps/server/ - Add apps/web/** to vitest exclude list - Update drizzle.config.ts schema path - Fix ensure-schema.ts migration path detection (3 levels up in dev, 2 levels up in dist) - Fix tests/integration/cli-server.test.ts import paths - Update packages/shared imports to apps/server/ paths - Update all docs/ files with new paths
307 lines
9.8 KiB
TypeScript
307 lines
9.8 KiB
TypeScript
/**
|
|
* Schema Validation & Retry Integration Tests
|
|
*
|
|
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
|
|
* They are SKIPPED by default and should only be run manually for validation.
|
|
*
|
|
* To run these tests:
|
|
* ```bash
|
|
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/schema-retry.test.ts --test-timeout=300000
|
|
* ```
|
|
*
|
|
* Tests covered:
|
|
* - Valid JSON output validation
|
|
* - Questions status parsing
|
|
* - Schema validation failure with retry
|
|
* - Max retry limit handling
|
|
*
|
|
* Estimated cost: ~$0.20 per full run (includes retries)
|
|
*/
|
|
|
|
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
|
|
import {
|
|
createRealProviderHarness,
|
|
describeRealClaude,
|
|
REAL_TEST_TIMEOUT,
|
|
EXTENDED_TEST_TIMEOUT,
|
|
type RealProviderHarness,
|
|
} from './harness.js';
|
|
import { MINIMAL_PROMPTS } from './prompts.js';
|
|
import type { AgentResumedEvent, AgentCrashedEvent } from '../../../events/types.js';
|
|
|
|
describeRealClaude('Schema Validation & Retry', () => {
|
|
let harness: RealProviderHarness;
|
|
|
|
beforeAll(async () => {
|
|
console.log('\n=== Running Schema Validation & Retry Tests ===');
|
|
console.log('These tests call the real Claude API and incur costs.');
|
|
console.log('Retry tests may take longer and cost more.\n');
|
|
harness = await createRealProviderHarness({ provider: 'claude' });
|
|
});
|
|
|
|
afterAll(async () => {
|
|
await harness.cleanup();
|
|
});
|
|
|
|
beforeEach(() => {
|
|
harness.clearEvents();
|
|
});
|
|
|
|
describe('Valid Output', () => {
|
|
it(
|
|
'validates done status output',
|
|
async () => {
|
|
// Spawn agent with minimal done prompt
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: MINIMAL_PROMPTS.done,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
});
|
|
|
|
// Wait for completion
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
// Verify completion
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
expect(dbAgent?.status).toBe('idle');
|
|
expect(result?.success).toBe(true);
|
|
|
|
// No retry events should have been emitted
|
|
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
|
|
expect(resumeEvents.length).toBe(0);
|
|
|
|
console.log(' Status: idle (valid done output)');
|
|
console.log(' Result:', result?.message);
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'validates questions status output',
|
|
async () => {
|
|
// Spawn agent with questions prompt
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: MINIMAL_PROMPTS.questions,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
});
|
|
|
|
// Wait for waiting_for_input
|
|
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
// Verify questions were validated
|
|
expect(questions).toBeTruthy();
|
|
expect(questions?.questions).toBeInstanceOf(Array);
|
|
expect(questions?.questions.length).toBeGreaterThan(0);
|
|
|
|
// Each question should have id and question fields
|
|
for (const q of questions?.questions ?? []) {
|
|
expect(q.id).toBeTruthy();
|
|
expect(q.question).toBeTruthy();
|
|
}
|
|
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
expect(dbAgent?.status).toBe('waiting_for_input');
|
|
|
|
// No retry events
|
|
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
|
|
expect(resumeEvents.length).toBe(0);
|
|
|
|
console.log(' Status: waiting_for_input (valid questions output)');
|
|
console.log(' Questions:', questions?.questions.length);
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'validates multiple questions',
|
|
async () => {
|
|
// Spawn agent with multiple questions prompt
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: MINIMAL_PROMPTS.multipleQuestions,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
});
|
|
|
|
// Wait for waiting_for_input
|
|
const questions = await harness.waitForAgentWaiting(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
// Verify multiple questions
|
|
expect(questions?.questions.length).toBeGreaterThanOrEqual(2);
|
|
|
|
// Each question should have unique ID
|
|
const ids = questions?.questions.map((q) => q.id) ?? [];
|
|
const uniqueIds = new Set(ids);
|
|
expect(uniqueIds.size).toBe(ids.length);
|
|
|
|
console.log(' Questions:', questions?.questions.map((q) => q.id).join(', '));
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
});
|
|
|
|
describe('Retry Logic', () => {
|
|
it(
|
|
'retries when output does not match schema',
|
|
async () => {
|
|
// Prompt that produces non-JSON first, then valid JSON
|
|
// Note: Claude may or may not produce invalid output first
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: MINIMAL_PROMPTS.badThenGood,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
});
|
|
|
|
// Wait for completion (may involve retries)
|
|
const result = await harness.waitForAgentCompletion(agent.id, EXTENDED_TEST_TIMEOUT);
|
|
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
|
|
// Either succeeded with retry OR succeeded first time
|
|
expect(['idle', 'crashed']).toContain(dbAgent?.status);
|
|
|
|
// Check for retry events
|
|
const resumeEvents = harness.getEventsByType<AgentResumedEvent>('agent:resumed');
|
|
console.log(' Retry attempts:', resumeEvents.length);
|
|
console.log(' Final status:', dbAgent?.status);
|
|
|
|
if (dbAgent?.status === 'idle') {
|
|
expect(result?.success).toBe(true);
|
|
console.log(' Result:', result?.message);
|
|
} else {
|
|
// Crashed after max retries
|
|
const crashedEvents = harness.getEventsByType<AgentCrashedEvent>('agent:crashed');
|
|
expect(crashedEvents.length).toBeGreaterThan(0);
|
|
console.log(' Crashed after retries');
|
|
}
|
|
},
|
|
EXTENDED_TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'extracts JSON from markdown code blocks',
|
|
async () => {
|
|
// Prompt that produces JSON wrapped in markdown
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: `Output the result wrapped in a markdown code block like this:
|
|
\`\`\`json
|
|
{"status":"done","result":"extracted from markdown"}
|
|
\`\`\``,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
});
|
|
|
|
// Wait for completion
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
console.log(' Status:', dbAgent?.status);
|
|
console.log(' Result:', result?.message);
|
|
|
|
// Should succeed (JSON extraction from code block)
|
|
if (dbAgent?.status === 'idle') {
|
|
expect(result?.success).toBe(true);
|
|
}
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'extracts JSON from text with surrounding content',
|
|
async () => {
|
|
// Prompt that produces JSON with text before it
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: `First say "Here is my response:" then output the JSON:
|
|
{"status":"done","result":"extracted from text"}`,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
});
|
|
|
|
// Wait for completion
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
console.log(' Status:', dbAgent?.status);
|
|
console.log(' Result:', result?.message);
|
|
|
|
// Should succeed (JSON extraction from last {...} block)
|
|
if (dbAgent?.status === 'idle') {
|
|
expect(result?.success).toBe(true);
|
|
}
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
});
|
|
|
|
describe('Mode-Specific Schemas', () => {
|
|
it(
|
|
'validates discuss mode output',
|
|
async () => {
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: MINIMAL_PROMPTS.discussComplete,
|
|
mode: 'discuss',
|
|
provider: 'claude',
|
|
});
|
|
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
expect(dbAgent?.status).toBe('idle');
|
|
expect(result?.success).toBe(true);
|
|
|
|
console.log(' Discuss mode result:', result?.message);
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'validates plan mode output',
|
|
async () => {
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: MINIMAL_PROMPTS.planComplete,
|
|
mode: 'plan',
|
|
provider: 'claude',
|
|
});
|
|
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
expect(dbAgent?.status).toBe('idle');
|
|
expect(result?.success).toBe(true);
|
|
|
|
console.log(' Plan mode result:', result?.message);
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'validates detail mode output',
|
|
async () => {
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: MINIMAL_PROMPTS.detailComplete,
|
|
mode: 'detail',
|
|
provider: 'claude',
|
|
});
|
|
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
expect(dbAgent?.status).toBe('idle');
|
|
expect(result?.success).toBe(true);
|
|
|
|
console.log(' Detail mode result:', result?.message);
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
});
|
|
});
|