From 9902069d8db9d681ab4190ba663e6e53d70406d0 Mon Sep 17 00:00:00 2001 From: Lukas May Date: Tue, 10 Feb 2026 13:49:04 +0100 Subject: [PATCH] test: Add real Claude inter-agent conversation integration test Two-session test: Agent A listens for questions and answers, Agent B asks a question and captures the response. Also fixes missing conversationRepository passthrough in tRPC adapter. --- src/server/trpc-adapter.ts | 4 + .../real-providers/conversation.test.ts | 317 ++++++++++++++++++ 2 files changed, 321 insertions(+) create mode 100644 src/test/integration/real-providers/conversation.test.ts diff --git a/src/server/trpc-adapter.ts b/src/server/trpc-adapter.ts index e94e163..2a88b1c 100644 --- a/src/server/trpc-adapter.ts +++ b/src/server/trpc-adapter.ts @@ -19,6 +19,7 @@ import type { ProjectRepository } from '../db/repositories/project-repository.js import type { AccountRepository } from '../db/repositories/account-repository.js'; import type { ChangeSetRepository } from '../db/repositories/change-set-repository.js'; import type { LogChunkRepository } from '../db/repositories/log-chunk-repository.js'; +import type { ConversationRepository } from '../db/repositories/conversation-repository.js'; import type { AccountCredentialManager } from '../agent/credentials/types.js'; import type { DispatchManager, PhaseDispatchManager } from '../dispatch/types.js'; import type { CoordinationManager } from '../coordination/types.js'; @@ -70,6 +71,8 @@ export interface TrpcAdapterOptions { executionOrchestrator?: ExecutionOrchestrator; /** Preview manager for Docker-based preview deployments */ previewManager?: PreviewManager; + /** Conversation repository for inter-agent communication */ + conversationRepository?: ConversationRepository; /** Absolute path to the workspace root (.cwrc directory) */ workspaceRoot?: string; } @@ -150,6 +153,7 @@ export function createTrpcHandler(options: TrpcAdapterOptions) { branchManager: options.branchManager, executionOrchestrator: options.executionOrchestrator, previewManager: options.previewManager, + conversationRepository: options.conversationRepository, workspaceRoot: options.workspaceRoot, }), }); diff --git a/src/test/integration/real-providers/conversation.test.ts b/src/test/integration/real-providers/conversation.test.ts new file mode 100644 index 0000000..ca3f90a --- /dev/null +++ b/src/test/integration/real-providers/conversation.test.ts @@ -0,0 +1,317 @@ +/** + * Real Claude Inter-Agent Conversation Integration Tests + * + * IMPORTANT: These tests call the REAL Claude CLI and incur API costs! + * They are SKIPPED by default and should only be run manually for validation. + * + * To run: + * ```bash + * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000 + * ``` + * + * Tests covered: + * - Two real Claude sessions communicating via cw ask / cw listen / cw answer + * - Agent identity (agentId/agentName) in manifest.json + * - Conversation lifecycle: create → pending → answered + * + * Estimated cost: ~$0.20 per full run (two Claude sessions) + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { mkdtemp, rm, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { drizzle } from 'drizzle-orm/better-sqlite3'; +import * as schema from '../../../db/schema.js'; +import { ensureSchema } from '../../../db/ensure-schema.js'; +import { EventEmitterBus } from '../../../events/bus.js'; +import type { DomainEvent } from '../../../events/types.js'; +import { MultiProviderAgentManager } from '../../../agent/manager.js'; +import { + DrizzleAgentRepository, + DrizzleProjectRepository, + DrizzleAccountRepository, + DrizzleConversationRepository, + DrizzleTaskRepository, + DrizzlePhaseRepository, + DrizzlePageRepository, + DrizzleLogChunkRepository, + DrizzleChangeSetRepository, +} from '../../../db/repositories/drizzle/index.js'; +import { CoordinationServer } from '../../../server/index.js'; +import { ProcessManager, ProcessRegistry } from '../../../process/index.js'; +import { LogManager } from '../../../logging/index.js'; +import { createTrpcClient } from '../../../cli/trpc-client.js'; +import type { TrpcClient } from '../../../cli/trpc-client.js'; +import { describeRealClaude, sleep } from './harness.js'; + +const CONVERSATION_TEST_TIMEOUT = 180000; // 3 minutes per test + +describeRealClaude('Real Inter-Agent Conversation', () => { + let workspaceRoot: string; + let agentManager: MultiProviderAgentManager; + let agentRepository: DrizzleAgentRepository; + let conversationRepository: DrizzleConversationRepository; + let server: CoordinationServer; + let client: TrpcClient; + let testPort: number; + let eventBus: EventEmitterBus; + const originalCwPort = process.env.CW_PORT; + + beforeAll(async () => { + console.log('\n=== Running Real Inter-Agent Conversation Tests ==='); + console.log('These tests spawn TWO Claude sessions and incur costs.\n'); + + // Create temp workspace + workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-conv-test-')); + const { execSync } = await import('node:child_process'); + execSync('git init && git config user.email "test@test.com" && git config user.name "Test" && touch .gitkeep && git add . && git commit -m "init"', { + cwd: workspaceRoot, + stdio: 'ignore', + }); + + // Create in-memory DB + const sqlite = new Database(':memory:'); + sqlite.pragma('foreign_keys = ON'); + const db = drizzle(sqlite, { schema }); + ensureSchema(db); + + // Create repositories + agentRepository = new DrizzleAgentRepository(db); + const projectRepository = new DrizzleProjectRepository(db); + const accountRepository = new DrizzleAccountRepository(db); + conversationRepository = new DrizzleConversationRepository(db); + const taskRepository = new DrizzleTaskRepository(db); + const phaseRepository = new DrizzlePhaseRepository(db); + const pageRepository = new DrizzlePageRepository(db); + const logChunkRepository = new DrizzleLogChunkRepository(db); + const changeSetRepository = new DrizzleChangeSetRepository(db); + + // Event bus + eventBus = new EventEmitterBus(); + + // Agent manager + agentManager = new MultiProviderAgentManager( + agentRepository, + workspaceRoot, + projectRepository, + accountRepository, + eventBus, + undefined, // no credential manager + changeSetRepository, + phaseRepository, + taskRepository, + pageRepository, + logChunkRepository, + ); + + // Start server on random port + testPort = 40000 + Math.floor(Math.random() * 10000); + const registry = new ProcessRegistry(); + const processManager = new ProcessManager(registry, eventBus); + const logManager = new LogManager(); + + server = new CoordinationServer( + { port: testPort, pidFile: `/tmp/cw-conv-test-${testPort}.pid` }, + processManager, + logManager, + eventBus, + { + agentManager, + taskRepository, + projectRepository, + accountRepository, + conversationRepository, + phaseRepository, + pageRepository, + logChunkRepository, + changeSetRepository, + workspaceRoot, + }, + ); + + await server.start(); + console.log(` Server started on port ${testPort}`); + + // Set CW_PORT so spawned agents can reach the server + process.env.CW_PORT = String(testPort); + + // Create tRPC client + client = createTrpcClient(testPort); + }); + + afterAll(async () => { + // Restore env + if (originalCwPort) { + process.env.CW_PORT = originalCwPort; + } else { + delete process.env.CW_PORT; + } + + // Kill agents + const agents = await agentRepository.findAll(); + for (const a of agents) { + if (a.status === 'running') { + try { await agentManager.stop(a.id); } catch { /* ignore */ } + } + } + + // Stop server + try { await server.stop(); } catch { /* ignore */ } + + // Clean up + try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ } + }); + + it( + 'two Claude agents communicate via cw ask/listen/answer', + async () => { + // Spawn Agent A — the responder + // It starts a listener, waits for a question, answers it, then completes + const agentA = await agentManager.spawn({ + taskId: null, + prompt: `You are Agent A in a multi-agent test. Your job: + +1. Read .cw/input/manifest.json to get your agentId +2. Start a background listener: cw listen --agent-id --timeout 120000 > /tmp/cw-listen-output.txt & + LISTEN_PID=$! +3. Wait for the listener to find a question by polling the file: + while [ ! -s /tmp/cw-listen-output.txt ]; do sleep 1; done +4. Parse the JSON from /tmp/cw-listen-output.txt to get conversationId +5. Answer the conversation: cw answer "The answer is 42" --conversation-id +6. Kill the listener: kill $LISTEN_PID 2>/dev/null +7. Write .cw/output/signal.json with: {"status":"done"} + +IMPORTANT: +- The server is on port ${testPort} +- Use the exact answer text: "The answer is 42" +- Do NOT ask any questions or produce other output`, + mode: 'execute', + provider: 'claude', + }); + + console.log(` Agent A spawned: ${agentA.id} (${agentA.name})`); + + // Small delay to let Agent A start its listener + await sleep(5000); + + // Spawn Agent B — the questioner + // It asks Agent A a question, waits for the answer, writes it to signal.json result + const agentB = await agentManager.spawn({ + taskId: null, + prompt: `You are Agent B in a multi-agent test. Your job: + +1. Read .cw/input/manifest.json to get your agentId +2. Ask Agent A a question using this command: + ANSWER=$(cw ask "What is the meaning of everything?" --from --agent-id ${agentA.id} --timeout 120000) +3. Write .cw/output/signal.json with: {"status":"done","result":""} + +IMPORTANT: +- The server is on port ${testPort} +- Agent A's ID is: ${agentA.id} +- Write the EXACT answer you received from Agent A into the result field +- Do NOT ask any questions or produce other output`, + mode: 'execute', + provider: 'claude', + }); + + console.log(` Agent B spawned: ${agentB.id} (${agentB.name})`); + + // Wait for both agents to complete + const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT; + + let agentADone = false; + let agentBDone = false; + let agentBResult: string | null = null; + + while (Date.now() < deadline && (!agentADone || !agentBDone)) { + const a = await agentRepository.findById(agentA.id); + const b = await agentRepository.findById(agentB.id); + + if (a && a.status !== 'running') { + agentADone = true; + console.log(` Agent A status: ${a.status}`); + } + if (b && b.status !== 'running') { + agentBDone = true; + console.log(` Agent B status: ${b.status}`); + if (b.result) { + try { + const parsed = JSON.parse(b.result); + agentBResult = parsed.message ?? parsed.result ?? b.result; + } catch { + agentBResult = b.result; + } + } + } + + if (!agentADone || !agentBDone) { + await sleep(2000); + } + } + + // Verify results + const finalA = await agentRepository.findById(agentA.id); + const finalB = await agentRepository.findById(agentB.id); + + console.log(` Agent A final status: ${finalA?.status}`); + console.log(` Agent B final status: ${finalB?.status}`); + console.log(` Agent B result: ${finalB?.result}`); + + // Agent A should have completed (idle) + expect(finalA?.status).toBe('idle'); + + // Agent B should have completed (idle) + expect(finalB?.status).toBe('idle'); + + // Agent B's result should contain the answer from Agent A + expect(finalB?.result).toBeTruthy(); + expect(finalB!.result).toContain('42'); + + // Verify the conversation exists in the database + const conversations = await conversationRepository.findPendingForAgent(agentA.id); + // Should be empty (all answered) + expect(conversations.length).toBe(0); + }, + CONVERSATION_TEST_TIMEOUT, + ); + + it( + 'agent manifest.json contains agentId and agentName', + async () => { + const agent = await agentManager.spawn({ + taskId: null, + prompt: `Read .cw/input/manifest.json and write its contents to .cw/output/signal.json in this format: +{"status":"done","result":""} +Do NOT modify the manifest contents. Just copy them into the result field as a string.`, + mode: 'execute', + provider: 'claude', + inputContext: {}, + }); + + console.log(` Identity test agent: ${agent.id} (${agent.name})`); + + // Wait for completion + const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT; + while (Date.now() < deadline) { + const a = await agentRepository.findById(agent.id); + if (a && a.status !== 'running') break; + await sleep(1000); + } + + const final = await agentRepository.findById(agent.id); + console.log(` Agent status: ${final?.status}`); + console.log(` Agent result: ${final?.result}`); + + expect(final?.status).toBe('idle'); + expect(final?.result).toBeTruthy(); + + // The result should contain the manifest contents which include agentId and agentName + const result = final!.result!; + expect(result).toContain(agent.id); + expect(result).toContain(agent.name); + }, + CONVERSATION_TEST_TIMEOUT, + ); +});