Files
Codewalkers/apps/server/test/integration/real-providers/codex-manager.test.ts
Lukas May 34578d39c6 refactor: Restructure monorepo to apps/server/ and apps/web/ layout
Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt
standard monorepo conventions (apps/ for runnable apps, packages/
for reusable libraries). Update all config files, shared package
imports, test fixtures, and documentation to reflect new paths.

Key fixes:
- Update workspace config to ["apps/*", "packages/*"]
- Update tsconfig.json rootDir/include for apps/server/
- Add apps/web/** to vitest exclude list
- Update drizzle.config.ts schema path
- Fix ensure-schema.ts migration path detection (3 levels up in dev,
  2 levels up in dist)
- Fix tests/integration/cli-server.test.ts import paths
- Update packages/shared imports to apps/server/ paths
- Update all docs/ files with new paths
2026-03-03 11:22:53 +01:00

173 lines
5.5 KiB
TypeScript

/**
* Real Codex CLI Manager Integration Tests
*
* IMPORTANT: These tests call the REAL Codex CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run these tests:
* ```bash
* REAL_CODEX_TESTS=1 npm test -- src/test/integration/real-providers/codex-manager.test.ts --test-timeout=300000
* ```
*
* Tests covered:
* - Codex spawn and thread_id extraction
* - Generic output parsing (non-schema)
* - Streaming output
*
* Estimated cost: ~$0.10 per full run
*
* Note: Codex uses different output format and session ID field (thread_id).
*/
import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
import {
createRealProviderHarness,
describeRealCodex,
REAL_TEST_TIMEOUT,
type RealProviderHarness,
} from './harness.js';
import { CODEX_PROMPTS } from './prompts.js';
import type { AgentSpawnedEvent, AgentOutputEvent } from '../../../events/types.js';
describeRealCodex('Real Codex Manager Integration', () => {
let harness: RealProviderHarness;
beforeAll(async () => {
console.log('\n=== Running Real Codex Manager Tests ===');
console.log('These tests call the real Codex API and incur costs.\n');
harness = await createRealProviderHarness({ provider: 'codex' });
});
afterAll(async () => {
await harness.cleanup();
});
beforeEach(() => {
harness.clearEvents();
});
describe('Codex Spawn', () => {
it(
'spawns codex agent and extracts thread_id',
async () => {
// Spawn agent with simple task
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: CODEX_PROMPTS.done,
mode: 'execute',
provider: 'codex',
});
expect(agent.id).toBeTruthy();
expect(agent.provider).toBe('codex');
expect(agent.status).toBe('running');
// Verify spawned event
const spawnedEvents = harness.getEventsByType<AgentSpawnedEvent>('agent:spawned');
expect(spawnedEvents.length).toBe(1);
expect(spawnedEvents[0].payload.provider).toBe('codex');
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify session ID (thread_id) was extracted
const dbAgent = await harness.agentRepository.findById(agent.id);
console.log(' Thread ID:', dbAgent?.sessionId);
console.log(' Status:', dbAgent?.status);
console.log(' Result:', result?.message);
// Codex should complete or crash
expect(['idle', 'crashed']).toContain(dbAgent?.status);
// If completed successfully, should have extracted thread_id
if (dbAgent?.status === 'idle' && dbAgent?.sessionId) {
expect(dbAgent.sessionId).toBeTruthy();
}
},
REAL_TEST_TIMEOUT
);
it(
'uses generic parser for output',
async () => {
// Spawn agent with streaming prompt
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: CODEX_PROMPTS.streaming,
mode: 'execute',
provider: 'codex',
});
// Wait for completion
const result = await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
// Verify output events were captured
const outputEvents = harness.getEventsByType<AgentOutputEvent>('agent:output');
console.log(' Output events:', outputEvents.length);
// For generic provider, result should be captured
const dbAgent = await harness.agentRepository.findById(agent.id);
console.log(' Status:', dbAgent?.status);
console.log(' Result:', result?.message?.substring(0, 100) + '...');
expect(['idle', 'crashed']).toContain(dbAgent?.status);
},
REAL_TEST_TIMEOUT
);
});
describe('Codex Provider Config', () => {
it(
'uses correct command and args for codex',
async () => {
// This is more of a config verification test
// The actual command execution is validated by the spawn test
const agent = await harness.agentManager.spawn({
taskId: null,
prompt: 'Say hello',
mode: 'execute',
provider: 'codex',
});
// Verify agent was created with codex provider
const dbAgent = await harness.agentRepository.findById(agent.id);
expect(dbAgent?.provider).toBe('codex');
// Wait for completion (or timeout)
try {
await harness.waitForAgentCompletion(agent.id, REAL_TEST_TIMEOUT);
} catch {
// Codex might fail if not installed, that's OK for config test
}
const finalAgent = await harness.agentRepository.findById(agent.id);
console.log(' Provider:', finalAgent?.provider);
console.log(' Status:', finalAgent?.status);
},
REAL_TEST_TIMEOUT
);
});
});
/**
* Codex-specific observations from testing:
*
* 1. Output Format:
* - Codex uses JSONL streaming with different event types
* - thread.started event contains thread_id
* - Output parsing is more generic (not JSON schema validated)
*
* 2. Command Structure:
* - codex exec --full-auto --json -p "prompt"
* - resume: codex exec resume <thread_id>
*
* 3. Session ID:
* - Called "thread_id" in Codex
* - Extracted from thread.started event
*
* 4. Resume:
* - Uses subcommand style: codex exec resume <thread_id>
* - Different from Claude's flag style: claude --resume <session_id>
*/