test: Add real Claude inter-agent conversation integration test

Two-session test: Agent A listens for questions and answers, Agent B asks a question and captures the response. Also fixes missing conversationRepository passthrough in tRPC adapter.
2026-02-10 13:49:04 +01:00
parent 60f06671e4
commit 9902069d8d
2 changed files with 321 additions and 0 deletions
--- a/src/server/trpc-adapter.ts
+++ b/src/server/trpc-adapter.ts
@@ -19,6 +19,7 @@ import type { ProjectRepository } from '../db/repositories/project-repository.js
 import type { AccountRepository } from '../db/repositories/account-repository.js';
 import type { ChangeSetRepository } from '../db/repositories/change-set-repository.js';
 import type { LogChunkRepository } from '../db/repositories/log-chunk-repository.js';
+import type { ConversationRepository } from '../db/repositories/conversation-repository.js';
 import type { AccountCredentialManager } from '../agent/credentials/types.js';
 import type { DispatchManager, PhaseDispatchManager } from '../dispatch/types.js';
 import type { CoordinationManager } from '../coordination/types.js';
@@ -70,6 +71,8 @@ export interface TrpcAdapterOptions {
  executionOrchestrator?: ExecutionOrchestrator;
  /** Preview manager for Docker-based preview deployments */
  previewManager?: PreviewManager;
+  /** Conversation repository for inter-agent communication */
+  conversationRepository?: ConversationRepository;
  /** Absolute path to the workspace root (.cwrc directory) */
  workspaceRoot?: string;
 }
@@ -150,6 +153,7 @@ export function createTrpcHandler(options: TrpcAdapterOptions) {
          branchManager: options.branchManager,
          executionOrchestrator: options.executionOrchestrator,
          previewManager: options.previewManager,
+          conversationRepository: options.conversationRepository,
          workspaceRoot: options.workspaceRoot,
        }),
    });
--- a/src/test/integration/real-providers/conversation.test.ts
+++ b/src/test/integration/real-providers/conversation.test.ts
@@ -0,0 +1,317 @@
+/**
+ * Real Claude Inter-Agent Conversation Integration Tests
+ *
+ * IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
+ * They are SKIPPED by default and should only be run manually for validation.
+ *
+ * To run:
+ * ```bash
+ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
+ * ```
+ *
+ * Tests covered:
+ * - Two real Claude sessions communicating via cw ask / cw listen / cw answer
+ * - Agent identity (agentId/agentName) in manifest.json
+ * - Conversation lifecycle: create → pending → answered
+ *
+ * Estimated cost: ~$0.20 per full run (two Claude sessions)
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { mkdtemp, rm, readFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import Database from 'better-sqlite3';
+import { drizzle } from 'drizzle-orm/better-sqlite3';
+import * as schema from '../../../db/schema.js';
+import { ensureSchema } from '../../../db/ensure-schema.js';
+import { EventEmitterBus } from '../../../events/bus.js';
+import type { DomainEvent } from '../../../events/types.js';
+import { MultiProviderAgentManager } from '../../../agent/manager.js';
+import {
+  DrizzleAgentRepository,
+  DrizzleProjectRepository,
+  DrizzleAccountRepository,
+  DrizzleConversationRepository,
+  DrizzleTaskRepository,
+  DrizzlePhaseRepository,
+  DrizzlePageRepository,
+  DrizzleLogChunkRepository,
+  DrizzleChangeSetRepository,
+} from '../../../db/repositories/drizzle/index.js';
+import { CoordinationServer } from '../../../server/index.js';
+import { ProcessManager, ProcessRegistry } from '../../../process/index.js';
+import { LogManager } from '../../../logging/index.js';
+import { createTrpcClient } from '../../../cli/trpc-client.js';
+import type { TrpcClient } from '../../../cli/trpc-client.js';
+import { describeRealClaude, sleep } from './harness.js';
+
+const CONVERSATION_TEST_TIMEOUT = 180000; // 3 minutes per test
+
+describeRealClaude('Real Inter-Agent Conversation', () => {
+  let workspaceRoot: string;
+  let agentManager: MultiProviderAgentManager;
+  let agentRepository: DrizzleAgentRepository;
+  let conversationRepository: DrizzleConversationRepository;
+  let server: CoordinationServer;
+  let client: TrpcClient;
+  let testPort: number;
+  let eventBus: EventEmitterBus;
+  const originalCwPort = process.env.CW_PORT;
+
+  beforeAll(async () => {
+    console.log('\n=== Running Real Inter-Agent Conversation Tests ===');
+    console.log('These tests spawn TWO Claude sessions and incur costs.\n');
+
+    // Create temp workspace
+    workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-conv-test-'));
+    const { execSync } = await import('node:child_process');
+    execSync('git init && git config user.email "test@test.com" && git config user.name "Test" && touch .gitkeep && git add . && git commit -m "init"', {
+      cwd: workspaceRoot,
+      stdio: 'ignore',
+    });
+
+    // Create in-memory DB
+    const sqlite = new Database(':memory:');
+    sqlite.pragma('foreign_keys = ON');
+    const db = drizzle(sqlite, { schema });
+    ensureSchema(db);
+
+    // Create repositories
+    agentRepository = new DrizzleAgentRepository(db);
+    const projectRepository = new DrizzleProjectRepository(db);
+    const accountRepository = new DrizzleAccountRepository(db);
+    conversationRepository = new DrizzleConversationRepository(db);
+    const taskRepository = new DrizzleTaskRepository(db);
+    const phaseRepository = new DrizzlePhaseRepository(db);
+    const pageRepository = new DrizzlePageRepository(db);
+    const logChunkRepository = new DrizzleLogChunkRepository(db);
+    const changeSetRepository = new DrizzleChangeSetRepository(db);
+
+    // Event bus
+    eventBus = new EventEmitterBus();
+
+    // Agent manager
+    agentManager = new MultiProviderAgentManager(
+      agentRepository,
+      workspaceRoot,
+      projectRepository,
+      accountRepository,
+      eventBus,
+      undefined, // no credential manager
+      changeSetRepository,
+      phaseRepository,
+      taskRepository,
+      pageRepository,
+      logChunkRepository,
+    );
+
+    // Start server on random port
+    testPort = 40000 + Math.floor(Math.random() * 10000);
+    const registry = new ProcessRegistry();
+    const processManager = new ProcessManager(registry, eventBus);
+    const logManager = new LogManager();
+
+    server = new CoordinationServer(
+      { port: testPort, pidFile: `/tmp/cw-conv-test-${testPort}.pid` },
+      processManager,
+      logManager,
+      eventBus,
+      {
+        agentManager,
+        taskRepository,
+        projectRepository,
+        accountRepository,
+        conversationRepository,
+        phaseRepository,
+        pageRepository,
+        logChunkRepository,
+        changeSetRepository,
+        workspaceRoot,
+      },
+    );
+
+    await server.start();
+    console.log(`  Server started on port ${testPort}`);
+
+    // Set CW_PORT so spawned agents can reach the server
+    process.env.CW_PORT = String(testPort);
+
+    // Create tRPC client
+    client = createTrpcClient(testPort);
+  });
+
+  afterAll(async () => {
+    // Restore env
+    if (originalCwPort) {
+      process.env.CW_PORT = originalCwPort;
+    } else {
+      delete process.env.CW_PORT;
+    }
+
+    // Kill agents
+    const agents = await agentRepository.findAll();
+    for (const a of agents) {
+      if (a.status === 'running') {
+        try { await agentManager.stop(a.id); } catch { /* ignore */ }
+      }
+    }
+
+    // Stop server
+    try { await server.stop(); } catch { /* ignore */ }
+
+    // Clean up
+    try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
+  });
+
+  it(
+    'two Claude agents communicate via cw ask/listen/answer',
+    async () => {
+      // Spawn Agent A — the responder
+      // It starts a listener, waits for a question, answers it, then completes
+      const agentA = await agentManager.spawn({
+        taskId: null,
+        prompt: `You are Agent A in a multi-agent test. Your job:
+
+1. Read .cw/input/manifest.json to get your agentId
+2. Start a background listener: cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-output.txt &
+   LISTEN_PID=$!
+3. Wait for the listener to find a question by polling the file:
+   while [ ! -s /tmp/cw-listen-output.txt ]; do sleep 1; done
+4. Parse the JSON from /tmp/cw-listen-output.txt to get conversationId
+5. Answer the conversation: cw answer "The answer is 42" --conversation-id <conversationId>
+6. Kill the listener: kill $LISTEN_PID 2>/dev/null
+7. Write .cw/output/signal.json with: {"status":"done"}
+
+IMPORTANT:
+- The server is on port ${testPort}
+- Use the exact answer text: "The answer is 42"
+- Do NOT ask any questions or produce other output`,
+        mode: 'execute',
+        provider: 'claude',
+      });
+
+      console.log(`  Agent A spawned: ${agentA.id} (${agentA.name})`);
+
+      // Small delay to let Agent A start its listener
+      await sleep(5000);
+
+      // Spawn Agent B — the questioner
+      // It asks Agent A a question, waits for the answer, writes it to signal.json result
+      const agentB = await agentManager.spawn({
+        taskId: null,
+        prompt: `You are Agent B in a multi-agent test. Your job:
+
+1. Read .cw/input/manifest.json to get your agentId
+2. Ask Agent A a question using this command:
+   ANSWER=$(cw ask "What is the meaning of everything?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
+3. Write .cw/output/signal.json with: {"status":"done","result":"<the ANSWER you received>"}
+
+IMPORTANT:
+- The server is on port ${testPort}
+- Agent A's ID is: ${agentA.id}
+- Write the EXACT answer you received from Agent A into the result field
+- Do NOT ask any questions or produce other output`,
+        mode: 'execute',
+        provider: 'claude',
+      });
+
+      console.log(`  Agent B spawned: ${agentB.id} (${agentB.name})`);
+
+      // Wait for both agents to complete
+      const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
+
+      let agentADone = false;
+      let agentBDone = false;
+      let agentBResult: string | null = null;
+
+      while (Date.now() < deadline && (!agentADone || !agentBDone)) {
+        const a = await agentRepository.findById(agentA.id);
+        const b = await agentRepository.findById(agentB.id);
+
+        if (a && a.status !== 'running') {
+          agentADone = true;
+          console.log(`  Agent A status: ${a.status}`);
+        }
+        if (b && b.status !== 'running') {
+          agentBDone = true;
+          console.log(`  Agent B status: ${b.status}`);
+          if (b.result) {
+            try {
+              const parsed = JSON.parse(b.result);
+              agentBResult = parsed.message ?? parsed.result ?? b.result;
+            } catch {
+              agentBResult = b.result;
+            }
+          }
+        }
+
+        if (!agentADone || !agentBDone) {
+          await sleep(2000);
+        }
+      }
+
+      // Verify results
+      const finalA = await agentRepository.findById(agentA.id);
+      const finalB = await agentRepository.findById(agentB.id);
+
+      console.log(`  Agent A final status: ${finalA?.status}`);
+      console.log(`  Agent B final status: ${finalB?.status}`);
+      console.log(`  Agent B result: ${finalB?.result}`);
+
+      // Agent A should have completed (idle)
+      expect(finalA?.status).toBe('idle');
+
+      // Agent B should have completed (idle)
+      expect(finalB?.status).toBe('idle');
+
+      // Agent B's result should contain the answer from Agent A
+      expect(finalB?.result).toBeTruthy();
+      expect(finalB!.result).toContain('42');
+
+      // Verify the conversation exists in the database
+      const conversations = await conversationRepository.findPendingForAgent(agentA.id);
+      // Should be empty (all answered)
+      expect(conversations.length).toBe(0);
+    },
+    CONVERSATION_TEST_TIMEOUT,
+  );
+
+  it(
+    'agent manifest.json contains agentId and agentName',
+    async () => {
+      const agent = await agentManager.spawn({
+        taskId: null,
+        prompt: `Read .cw/input/manifest.json and write its contents to .cw/output/signal.json in this format:
+{"status":"done","result":"<raw contents of manifest.json>"}
+Do NOT modify the manifest contents. Just copy them into the result field as a string.`,
+        mode: 'execute',
+        provider: 'claude',
+        inputContext: {},
+      });
+
+      console.log(`  Identity test agent: ${agent.id} (${agent.name})`);
+
+      // Wait for completion
+      const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
+      while (Date.now() < deadline) {
+        const a = await agentRepository.findById(agent.id);
+        if (a && a.status !== 'running') break;
+        await sleep(1000);
+      }
+
+      const final = await agentRepository.findById(agent.id);
+      console.log(`  Agent status: ${final?.status}`);
+      console.log(`  Agent result: ${final?.result}`);
+
+      expect(final?.status).toBe('idle');
+      expect(final?.result).toBeTruthy();
+
+      // The result should contain the manifest contents which include agentId and agentName
+      const result = final!.result!;
+      expect(result).toContain(agent.id);
+      expect(result).toContain(agent.name);
+    },
+    CONVERSATION_TEST_TIMEOUT,
+  );
+});