test: Add real Claude inter-agent conversation integration test

Two-session test: Agent A listens for questions and answers, Agent B
asks a question and captures the response. Also fixes missing
conversationRepository passthrough in tRPC adapter.
This commit is contained in:
Lukas May
2026-02-10 13:49:04 +01:00
parent 60f06671e4
commit 9902069d8d
2 changed files with 321 additions and 0 deletions

View File

@@ -19,6 +19,7 @@ import type { ProjectRepository } from '../db/repositories/project-repository.js
import type { AccountRepository } from '../db/repositories/account-repository.js';
import type { ChangeSetRepository } from '../db/repositories/change-set-repository.js';
import type { LogChunkRepository } from '../db/repositories/log-chunk-repository.js';
import type { ConversationRepository } from '../db/repositories/conversation-repository.js';
import type { AccountCredentialManager } from '../agent/credentials/types.js';
import type { DispatchManager, PhaseDispatchManager } from '../dispatch/types.js';
import type { CoordinationManager } from '../coordination/types.js';
@@ -70,6 +71,8 @@ export interface TrpcAdapterOptions {
executionOrchestrator?: ExecutionOrchestrator;
/** Preview manager for Docker-based preview deployments */
previewManager?: PreviewManager;
/** Conversation repository for inter-agent communication */
conversationRepository?: ConversationRepository;
/** Absolute path to the workspace root (.cwrc directory) */
workspaceRoot?: string;
}
@@ -150,6 +153,7 @@ export function createTrpcHandler(options: TrpcAdapterOptions) {
branchManager: options.branchManager,
executionOrchestrator: options.executionOrchestrator,
previewManager: options.previewManager,
conversationRepository: options.conversationRepository,
workspaceRoot: options.workspaceRoot,
}),
});

View File

@@ -0,0 +1,317 @@
/**
* Real Claude Inter-Agent Conversation Integration Tests
*
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run:
* ```bash
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
* ```
*
* Tests covered:
* - Two real Claude sessions communicating via cw ask / cw listen / cw answer
* - Agent identity (agentId/agentName) in manifest.json
* - Conversation lifecycle: create → pending → answered
*
* Estimated cost: ~$0.20 per full run (two Claude sessions)
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { mkdtemp, rm, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import { drizzle } from 'drizzle-orm/better-sqlite3';
import * as schema from '../../../db/schema.js';
import { ensureSchema } from '../../../db/ensure-schema.js';
import { EventEmitterBus } from '../../../events/bus.js';
import type { DomainEvent } from '../../../events/types.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleConversationRepository,
DrizzleTaskRepository,
DrizzlePhaseRepository,
DrizzlePageRepository,
DrizzleLogChunkRepository,
DrizzleChangeSetRepository,
} from '../../../db/repositories/drizzle/index.js';
import { CoordinationServer } from '../../../server/index.js';
import { ProcessManager, ProcessRegistry } from '../../../process/index.js';
import { LogManager } from '../../../logging/index.js';
import { createTrpcClient } from '../../../cli/trpc-client.js';
import type { TrpcClient } from '../../../cli/trpc-client.js';
import { describeRealClaude, sleep } from './harness.js';
const CONVERSATION_TEST_TIMEOUT = 180000; // 3 minutes per test
describeRealClaude('Real Inter-Agent Conversation', () => {
let workspaceRoot: string;
let agentManager: MultiProviderAgentManager;
let agentRepository: DrizzleAgentRepository;
let conversationRepository: DrizzleConversationRepository;
let server: CoordinationServer;
let client: TrpcClient;
let testPort: number;
let eventBus: EventEmitterBus;
const originalCwPort = process.env.CW_PORT;
beforeAll(async () => {
console.log('\n=== Running Real Inter-Agent Conversation Tests ===');
console.log('These tests spawn TWO Claude sessions and incur costs.\n');
// Create temp workspace
workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-conv-test-'));
const { execSync } = await import('node:child_process');
execSync('git init && git config user.email "test@test.com" && git config user.name "Test" && touch .gitkeep && git add . && git commit -m "init"', {
cwd: workspaceRoot,
stdio: 'ignore',
});
// Create in-memory DB
const sqlite = new Database(':memory:');
sqlite.pragma('foreign_keys = ON');
const db = drizzle(sqlite, { schema });
ensureSchema(db);
// Create repositories
agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
conversationRepository = new DrizzleConversationRepository(db);
const taskRepository = new DrizzleTaskRepository(db);
const phaseRepository = new DrizzlePhaseRepository(db);
const pageRepository = new DrizzlePageRepository(db);
const logChunkRepository = new DrizzleLogChunkRepository(db);
const changeSetRepository = new DrizzleChangeSetRepository(db);
// Event bus
eventBus = new EventEmitterBus();
// Agent manager
agentManager = new MultiProviderAgentManager(
agentRepository,
workspaceRoot,
projectRepository,
accountRepository,
eventBus,
undefined, // no credential manager
changeSetRepository,
phaseRepository,
taskRepository,
pageRepository,
logChunkRepository,
);
// Start server on random port
testPort = 40000 + Math.floor(Math.random() * 10000);
const registry = new ProcessRegistry();
const processManager = new ProcessManager(registry, eventBus);
const logManager = new LogManager();
server = new CoordinationServer(
{ port: testPort, pidFile: `/tmp/cw-conv-test-${testPort}.pid` },
processManager,
logManager,
eventBus,
{
agentManager,
taskRepository,
projectRepository,
accountRepository,
conversationRepository,
phaseRepository,
pageRepository,
logChunkRepository,
changeSetRepository,
workspaceRoot,
},
);
await server.start();
console.log(` Server started on port ${testPort}`);
// Set CW_PORT so spawned agents can reach the server
process.env.CW_PORT = String(testPort);
// Create tRPC client
client = createTrpcClient(testPort);
});
afterAll(async () => {
// Restore env
if (originalCwPort) {
process.env.CW_PORT = originalCwPort;
} else {
delete process.env.CW_PORT;
}
// Kill agents
const agents = await agentRepository.findAll();
for (const a of agents) {
if (a.status === 'running') {
try { await agentManager.stop(a.id); } catch { /* ignore */ }
}
}
// Stop server
try { await server.stop(); } catch { /* ignore */ }
// Clean up
try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
});
it(
'two Claude agents communicate via cw ask/listen/answer',
async () => {
// Spawn Agent A — the responder
// It starts a listener, waits for a question, answers it, then completes
const agentA = await agentManager.spawn({
taskId: null,
prompt: `You are Agent A in a multi-agent test. Your job:
1. Read .cw/input/manifest.json to get your agentId
2. Start a background listener: cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-output.txt &
LISTEN_PID=$!
3. Wait for the listener to find a question by polling the file:
while [ ! -s /tmp/cw-listen-output.txt ]; do sleep 1; done
4. Parse the JSON from /tmp/cw-listen-output.txt to get conversationId
5. Answer the conversation: cw answer "The answer is 42" --conversation-id <conversationId>
6. Kill the listener: kill $LISTEN_PID 2>/dev/null
7. Write .cw/output/signal.json with: {"status":"done"}
IMPORTANT:
- The server is on port ${testPort}
- Use the exact answer text: "The answer is 42"
- Do NOT ask any questions or produce other output`,
mode: 'execute',
provider: 'claude',
});
console.log(` Agent A spawned: ${agentA.id} (${agentA.name})`);
// Small delay to let Agent A start its listener
await sleep(5000);
// Spawn Agent B — the questioner
// It asks Agent A a question, waits for the answer, writes it to signal.json result
const agentB = await agentManager.spawn({
taskId: null,
prompt: `You are Agent B in a multi-agent test. Your job:
1. Read .cw/input/manifest.json to get your agentId
2. Ask Agent A a question using this command:
ANSWER=$(cw ask "What is the meaning of everything?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
3. Write .cw/output/signal.json with: {"status":"done","result":"<the ANSWER you received>"}
IMPORTANT:
- The server is on port ${testPort}
- Agent A's ID is: ${agentA.id}
- Write the EXACT answer you received from Agent A into the result field
- Do NOT ask any questions or produce other output`,
mode: 'execute',
provider: 'claude',
});
console.log(` Agent B spawned: ${agentB.id} (${agentB.name})`);
// Wait for both agents to complete
const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
let agentADone = false;
let agentBDone = false;
let agentBResult: string | null = null;
while (Date.now() < deadline && (!agentADone || !agentBDone)) {
const a = await agentRepository.findById(agentA.id);
const b = await agentRepository.findById(agentB.id);
if (a && a.status !== 'running') {
agentADone = true;
console.log(` Agent A status: ${a.status}`);
}
if (b && b.status !== 'running') {
agentBDone = true;
console.log(` Agent B status: ${b.status}`);
if (b.result) {
try {
const parsed = JSON.parse(b.result);
agentBResult = parsed.message ?? parsed.result ?? b.result;
} catch {
agentBResult = b.result;
}
}
}
if (!agentADone || !agentBDone) {
await sleep(2000);
}
}
// Verify results
const finalA = await agentRepository.findById(agentA.id);
const finalB = await agentRepository.findById(agentB.id);
console.log(` Agent A final status: ${finalA?.status}`);
console.log(` Agent B final status: ${finalB?.status}`);
console.log(` Agent B result: ${finalB?.result}`);
// Agent A should have completed (idle)
expect(finalA?.status).toBe('idle');
// Agent B should have completed (idle)
expect(finalB?.status).toBe('idle');
// Agent B's result should contain the answer from Agent A
expect(finalB?.result).toBeTruthy();
expect(finalB!.result).toContain('42');
// Verify the conversation exists in the database
const conversations = await conversationRepository.findPendingForAgent(agentA.id);
// Should be empty (all answered)
expect(conversations.length).toBe(0);
},
CONVERSATION_TEST_TIMEOUT,
);
it(
'agent manifest.json contains agentId and agentName',
async () => {
const agent = await agentManager.spawn({
taskId: null,
prompt: `Read .cw/input/manifest.json and write its contents to .cw/output/signal.json in this format:
{"status":"done","result":"<raw contents of manifest.json>"}
Do NOT modify the manifest contents. Just copy them into the result field as a string.`,
mode: 'execute',
provider: 'claude',
inputContext: {},
});
console.log(` Identity test agent: ${agent.id} (${agent.name})`);
// Wait for completion
const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
while (Date.now() < deadline) {
const a = await agentRepository.findById(agent.id);
if (a && a.status !== 'running') break;
await sleep(1000);
}
const final = await agentRepository.findById(agent.id);
console.log(` Agent status: ${final?.status}`);
console.log(` Agent result: ${final?.result}`);
expect(final?.status).toBe('idle');
expect(final?.result).toBeTruthy();
// The result should contain the manifest contents which include agentId and agentName
const result = final!.result!;
expect(result).toContain(agent.id);
expect(result).toContain(agent.name);
},
CONVERSATION_TEST_TIMEOUT,
);
});