Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt standard monorepo conventions (apps/ for runnable apps, packages/ for reusable libraries). Update all config files, shared package imports, test fixtures, and documentation to reflect new paths. Key fixes: - Update workspace config to ["apps/*", "packages/*"] - Update tsconfig.json rootDir/include for apps/server/ - Add apps/web/** to vitest exclude list - Update drizzle.config.ts schema path - Fix ensure-schema.ts migration path detection (3 levels up in dev, 2 levels up in dist) - Fix tests/integration/cli-server.test.ts import paths - Update packages/shared imports to apps/server/ paths - Update all docs/ files with new paths
173 lines
5.5 KiB
TypeScript
173 lines
5.5 KiB
TypeScript
/**
|
|
* Real Codex CLI Manager Integration Tests
|
|
*
|
|
* IMPORTANT: These tests call the REAL Codex CLI and incur API costs!
|
|
* They are SKIPPED by default and should only be run manually for validation.
|
|
*
|
|
* To run these tests:
|
|
* ```bash
|
|
* REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts --test-timeout=300000
|
|
* ```
|
|
*
|
|
* Tests covered:
|
|
* - Codex spawn and thread_id extraction
|
|
* - Generic output parsing (non-schema)
|
|
* - Streaming output
|
|
*
|
|
* Estimated cost: ~$0.10 per full run
|
|
*
|
|
* Note: Codex uses different output format and session ID field (thread_id).
|
|
*/
|
|
|
|
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
|
|
import {
|
|
createRealProviderHarness,
|
|
describeRealCodex,
|
|
REAL_TEST_TIMEOUT,
|
|
type RealProviderHarness,
|
|
} from './harness.js';
|
|
import { CODEX_PROMPTS } from './prompts.js';
|
|
import type { AgentSpawnedEvent, AgentOutputEvent } from '../../../events/types.js';
|
|
|
|
describeRealCodex('Real Codex Manager Integration', () => {
|
|
let harness: RealProviderHarness;
|
|
|
|
beforeAll(async () => {
|
|
console.log('\n=== Running Real Codex Manager Tests ===');
|
|
console.log('These tests call the real Codex API and incur costs.\n');
|
|
harness = await createRealProviderHarness({ provider: 'codex' });
|
|
});
|
|
|
|
afterAll(async () => {
|
|
await harness.cleanup();
|
|
});
|
|
|
|
beforeEach(() => {
|
|
harness.clearEvents();
|
|
});
|
|
|
|
describe('Codex Spawn', () => {
|
|
it(
|
|
'spawns codex agent and extracts thread_id',
|
|
async () => {
|
|
// Spawn agent with simple task
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: CODEX_PROMPTS.done,
|
|
mode: 'execute',
|
|
provider: 'codex',
|
|
});
|
|
|
|
expect(agent.id).toBeTruthy();
|
|
expect(agent.provider).toBe('codex');
|
|
expect(agent.status).toBe('running');
|
|
|
|
// Verify spawned event
|
|
const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
|
|
expect(spawnedEvents.length).toBe(1);
|
|
expect(spawnedEvents[0].payload.provider).toBe('codex');
|
|
|
|
// Wait for completion
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
// Verify session ID (thread_id) was extracted
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
console.log(' Thread ID:', dbAgent?.sessionId);
|
|
console.log(' Status:', dbAgent?.status);
|
|
console.log(' Result:', result?.message);
|
|
|
|
// Codex should complete or crash
|
|
expect(['idle', 'crashed']).toContain(dbAgent?.status);
|
|
|
|
// If completed successfully, should have extracted thread_id
|
|
if (dbAgent?.status === 'idle' && dbAgent?.sessionId) {
|
|
expect(dbAgent.sessionId).toBeTruthy();
|
|
}
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
|
|
it(
|
|
'uses generic parser for output',
|
|
async () => {
|
|
// Spawn agent with streaming prompt
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: CODEX_PROMPTS.streaming,
|
|
mode: 'execute',
|
|
provider: 'codex',
|
|
});
|
|
|
|
// Wait for completion
|
|
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
|
|
// Verify output events were captured
|
|
const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
|
|
console.log(' Output events:', outputEvents.length);
|
|
|
|
// For generic provider, result should be captured
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
console.log(' Status:', dbAgent?.status);
|
|
console.log(' Result:', result?.message?.substring(0, 100) + '...');
|
|
|
|
expect(['idle', 'crashed']).toContain(dbAgent?.status);
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
});
|
|
|
|
describe('Codex Provider Config', () => {
|
|
it(
|
|
'uses correct command and args for codex',
|
|
async () => {
|
|
// This is more of a config verification test
|
|
// The actual command execution is validated by the spawn test
|
|
|
|
const agent = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: 'Say hello',
|
|
mode: 'execute',
|
|
provider: 'codex',
|
|
});
|
|
|
|
// Verify agent was created with codex provider
|
|
const dbAgent = await harness.agentRepository.findById(agent.id);
|
|
expect(dbAgent?.provider).toBe('codex');
|
|
|
|
// Wait for completion (or timeout)
|
|
try {
|
|
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
|
|
} catch {
|
|
// Codex might fail if not installed, that's OK for config test
|
|
}
|
|
|
|
const finalAgent = await harness.agentRepository.findById(agent.id);
|
|
console.log(' Provider:', finalAgent?.provider);
|
|
console.log(' Status:', finalAgent?.status);
|
|
},
|
|
REAL_TEST_TIMEOUT
|
|
);
|
|
});
|
|
});
|
|
|
|
/**
|
|
* Codex-specific observations from testing:
|
|
*
|
|
* 1. Output Format:
|
|
* - Codex uses JSONL streaming with different event types
|
|
* - thread.started event contains thread_id
|
|
* - Output parsing is more generic (not JSON schema validated)
|
|
*
|
|
* 2. Command Structure:
|
|
* - codex exec --full-auto --json -p "prompt"
|
|
* - resume: codex exec resume <thread_id>
|
|
*
|
|
* 3. Session ID:
|
|
* - Called "thread_id" in Codex
|
|
* - Extracted from thread.started event
|
|
*
|
|
* 4. Resume:
|
|
* - Uses subcommand style: codex exec resume <thread_id>
|
|
* - Different from Claude's flag style: claude --resume <session_id>
|
|
*/
|