diff --git a/src/test/integration/real-providers/conversation.test.ts b/src/test/integration/real-providers/conversation.test.ts index ca3f90a..a3d168e 100644 --- a/src/test/integration/real-providers/conversation.test.ts +++ b/src/test/integration/real-providers/conversation.test.ts @@ -9,309 +9,532 @@ * REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000 * ``` * - * Tests covered: - * - Two real Claude sessions communicating via cw ask / cw listen / cw answer - * - Agent identity (agentId/agentName) in manifest.json - * - Conversation lifecycle: create → pending → answered + * Architecture: + * - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer) + * - In-memory ConversationRepository (no SQLite, no FK constraints) + * - Real agent harness for spawning two Claude sessions with actual coding tasks + * - Two sequential questions prove the listen→answer→re-listen cycle works * - * Estimated cost: ~$0.20 per full run (two Claude sessions) + * Estimated cost: ~$0.30 per full run (two Claude sessions) */ -import { describe, it, expect, beforeAll, afterAll } from 'vitest'; -import { mkdtemp, rm, readFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; +import { it, expect, beforeAll, afterAll } from 'vitest'; +import { createServer } from 'node:http'; +import type { Server } from 'node:http'; +import { readFileSync, existsSync } from 'node:fs'; import { join } from 'node:path'; -import Database from 'better-sqlite3'; -import { drizzle } from 'drizzle-orm/better-sqlite3'; -import * as schema from '../../../db/schema.js'; -import { ensureSchema } from '../../../db/ensure-schema.js'; +import { nanoid } from 'nanoid'; +import { fetchRequestHandler } from '@trpc/server/adapters/fetch'; +import { router, publicProcedure } from '../../../trpc/trpc.js'; +import { conversationProcedures } from '../../../trpc/routers/conversation.js'; import { EventEmitterBus } from '../../../events/bus.js'; -import type { DomainEvent } from '../../../events/types.js'; -import { MultiProviderAgentManager } from '../../../agent/manager.js'; +import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js'; +import type { Conversation } from '../../../db/schema.js'; import { - DrizzleAgentRepository, - DrizzleProjectRepository, - DrizzleAccountRepository, - DrizzleConversationRepository, - DrizzleTaskRepository, - DrizzlePhaseRepository, - DrizzlePageRepository, - DrizzleLogChunkRepository, - DrizzleChangeSetRepository, -} from '../../../db/repositories/drizzle/index.js'; -import { CoordinationServer } from '../../../server/index.js'; -import { ProcessManager, ProcessRegistry } from '../../../process/index.js'; -import { LogManager } from '../../../logging/index.js'; -import { createTrpcClient } from '../../../cli/trpc-client.js'; -import type { TrpcClient } from '../../../cli/trpc-client.js'; -import { describeRealClaude, sleep } from './harness.js'; + createRealProviderHarness, + describeRealClaude, + sleep, + type RealProviderHarness, +} from './harness.js'; -const CONVERSATION_TEST_TIMEOUT = 180000; // 3 minutes per test +const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation -describeRealClaude('Real Inter-Agent Conversation', () => { - let workspaceRoot: string; - let agentManager: MultiProviderAgentManager; - let agentRepository: DrizzleAgentRepository; - let conversationRepository: DrizzleConversationRepository; - let server: CoordinationServer; - let client: TrpcClient; - let testPort: number; - let eventBus: EventEmitterBus; +// --------------------------------------------------------------------------- +// In-memory ConversationRepository — no SQLite, no FK constraints +// --------------------------------------------------------------------------- + +class InMemoryConversationRepository implements ConversationRepository { + private store = new Map(); + + async create(data: CreateConversationData): Promise { + const now = new Date(); + const conversation: Conversation = { + id: nanoid(), + fromAgentId: data.fromAgentId, + toAgentId: data.toAgentId, + initiativeId: data.initiativeId ?? null, + phaseId: data.phaseId ?? null, + taskId: data.taskId ?? null, + question: data.question, + answer: null, + status: 'pending', + createdAt: now, + updatedAt: now, + }; + this.store.set(conversation.id, conversation); + return conversation; + } + + async findById(id: string): Promise { + return this.store.get(id) ?? null; + } + + async findPendingForAgent(toAgentId: string): Promise { + return [...this.store.values()] + .filter((c) => c.toAgentId === toAgentId && c.status === 'pending') + .sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime()); + } + + async answer(id: string, answer: string): Promise { + const conv = this.store.get(id); + if (!conv) return null; + const updated: Conversation = { + ...conv, + answer, + status: 'answered' as const, + updatedAt: new Date(), + }; + this.store.set(id, updated); + return updated; + } + + /** Test helper — return all conversations */ + getAll(): Conversation[] { + return [...this.store.values()]; + } +} + +// --------------------------------------------------------------------------- +// Mock conversation server — serves ONLY conversation tRPC procedures +// --------------------------------------------------------------------------- + +async function startMockConversationServer(): Promise<{ + server: Server; + port: number; + repo: InMemoryConversationRepository; +}> { + const repo = new InMemoryConversationRepository(); + const eventBus = new EventEmitterBus(); + + // Mini router with only conversation procedures + const miniRouter = router({ + ...conversationProcedures(publicProcedure), + }); + + const httpServer = createServer(async (req, res) => { + if (!req.url?.startsWith('/trpc')) { + res.writeHead(404); + res.end('Not found'); + return; + } + + const host = req.headers.host ?? 'localhost'; + const url = new URL(req.url, `http://${host}`); + + let body: string | undefined; + if (req.method !== 'GET' && req.method !== 'HEAD') { + body = await new Promise((resolve) => { + let data = ''; + req.on('data', (chunk: Buffer) => { + data += chunk.toString(); + }); + req.on('end', () => resolve(data)); + }); + } + + const headers = new Headers(); + for (const [key, value] of Object.entries(req.headers)) { + if (value) { + if (Array.isArray(value)) { + value.forEach((v) => headers.append(key, v)); + } else { + headers.set(key, value); + } + } + } + + const fetchRequest = new Request(url.toString(), { + method: req.method, + headers, + body: body ?? undefined, + }); + + const fetchResponse = await fetchRequestHandler({ + endpoint: '/trpc', + req: fetchRequest, + router: miniRouter, + createContext: () => + ({ + eventBus, + serverStartedAt: new Date(), + processCount: 0, + conversationRepository: repo, + // Stub — requireAgentManager is called unconditionally in createConversation, + // but list() is only invoked for taskId/phaseId resolution. With --agent-id + // targeting, list() is never called. + agentManager: { list: async () => [] }, + }) as any, + }); + + res.statusCode = fetchResponse.status; + fetchResponse.headers.forEach((value, key) => { + res.setHeader(key, value); + }); + + if (fetchResponse.body) { + const reader = fetchResponse.body.getReader(); + const pump = async () => { + while (true) { + const { done, value } = await reader.read(); + if (done) { + res.end(); + return; + } + res.write(value); + } + }; + pump().catch(() => res.end()); + } else { + res.end(await fetchResponse.text()); + } + }); + + const port = 40000 + Math.floor(Math.random() * 10000); + await new Promise((resolve) => { + httpServer.listen(port, '127.0.0.1', () => resolve()); + }); + + return { server: httpServer, port, repo }; +} + +// --------------------------------------------------------------------------- +// Diagnostic helpers +// --------------------------------------------------------------------------- + +function dumpAgentLogs(workspaceRoot: string, agentName: string) { + const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName); + if (!existsSync(logDir)) { + console.log(` [${agentName}] No log directory at ${logDir}`); + return; + } + // Dump output.jsonl (last 30 lines) + const outputPath = join(logDir, 'output.jsonl'); + if (existsSync(outputPath)) { + const lines = readFileSync(outputPath, 'utf-8').trim().split('\n'); + const last = lines.slice(-30); + console.log(` [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`); + for (const line of last) { + try { + const ev = JSON.parse(line); + if (ev.type === 'assistant' && ev.message?.content) { + for (const block of ev.message.content) { + if (block.type === 'text') { + console.log(` TEXT: ${block.text.substring(0, 200)}`); + } else if (block.type === 'tool_use') { + console.log(` TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`); + } + } + } else if (ev.type === 'result') { + console.log(` RESULT: ${JSON.stringify(ev).substring(0, 300)}`); + } + } catch { + console.log(` RAW: ${line.substring(0, 200)}`); + } + } + } + // Dump stderr + const stderrPath = join(logDir, 'stderr.log'); + if (existsSync(stderrPath)) { + const stderr = readFileSync(stderrPath, 'utf-8').trim(); + if (stderr) { + console.log(` [${agentName}] stderr: ${stderr.substring(0, 500)}`); + } + } +} + +// --------------------------------------------------------------------------- +// Test suite +// --------------------------------------------------------------------------- + +describeRealClaude('Real Inter-Agent Conversation (mock server)', () => { + let harness: RealProviderHarness; + let mockServer: Server; + let mockPort: number; + let mockRepo: InMemoryConversationRepository; const originalCwPort = process.env.CW_PORT; beforeAll(async () => { - console.log('\n=== Running Real Inter-Agent Conversation Tests ==='); - console.log('These tests spawn TWO Claude sessions and incur costs.\n'); + console.log('\n=== Real Inter-Agent Conversation Test ==='); + console.log('Mock conversation server + two Claude sessions.\n'); - // Create temp workspace - workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-conv-test-')); - const { execSync } = await import('node:child_process'); - execSync('git init && git config user.email "test@test.com" && git config user.name "Test" && touch .gitkeep && git add . && git commit -m "init"', { - cwd: workspaceRoot, - stdio: 'ignore', - }); + // Start mock conversation server (only listen/ask/answer endpoints) + const mock = await startMockConversationServer(); + mockServer = mock.server; + mockPort = mock.port; + mockRepo = mock.repo; + console.log(` Mock server on port ${mockPort}`); - // Create in-memory DB - const sqlite = new Database(':memory:'); - sqlite.pragma('foreign_keys = ON'); - const db = drizzle(sqlite, { schema }); - ensureSchema(db); + // Set CW_PORT so agents' cw commands hit the mock server + process.env.CW_PORT = String(mockPort); - // Create repositories - agentRepository = new DrizzleAgentRepository(db); - const projectRepository = new DrizzleProjectRepository(db); - const accountRepository = new DrizzleAccountRepository(db); - conversationRepository = new DrizzleConversationRepository(db); - const taskRepository = new DrizzleTaskRepository(db); - const phaseRepository = new DrizzlePhaseRepository(db); - const pageRepository = new DrizzlePageRepository(db); - const logChunkRepository = new DrizzleLogChunkRepository(db); - const changeSetRepository = new DrizzleChangeSetRepository(db); - - // Event bus - eventBus = new EventEmitterBus(); - - // Agent manager - agentManager = new MultiProviderAgentManager( - agentRepository, - workspaceRoot, - projectRepository, - accountRepository, - eventBus, - undefined, // no credential manager - changeSetRepository, - phaseRepository, - taskRepository, - pageRepository, - logChunkRepository, - ); - - // Start server on random port - testPort = 40000 + Math.floor(Math.random() * 10000); - const registry = new ProcessRegistry(); - const processManager = new ProcessManager(registry, eventBus); - const logManager = new LogManager(); - - server = new CoordinationServer( - { port: testPort, pidFile: `/tmp/cw-conv-test-${testPort}.pid` }, - processManager, - logManager, - eventBus, - { - agentManager, - taskRepository, - projectRepository, - accountRepository, - conversationRepository, - phaseRepository, - pageRepository, - logChunkRepository, - changeSetRepository, - workspaceRoot, - }, - ); - - await server.start(); - console.log(` Server started on port ${testPort}`); - - // Set CW_PORT so spawned agents can reach the server - process.env.CW_PORT = String(testPort); - - // Create tRPC client - client = createTrpcClient(testPort); + // Real agent harness for spawning + worktrees (no full CoordinationServer) + harness = await createRealProviderHarness({ provider: 'claude' }); + console.log(` Workspace: ${harness.workspaceRoot}`); }); afterAll(async () => { - // Restore env if (originalCwPort) { process.env.CW_PORT = originalCwPort; } else { delete process.env.CW_PORT; } - - // Kill agents - const agents = await agentRepository.findAll(); - for (const a of agents) { - if (a.status === 'running') { - try { await agentManager.stop(a.id); } catch { /* ignore */ } - } - } - - // Stop server - try { await server.stop(); } catch { /* ignore */ } - - // Clean up - try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ } + await harness?.cleanup(); + mockServer?.close(); }); it( - 'two Claude agents communicate via cw ask/listen/answer', + 'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)', async () => { - // Spawn Agent A — the responder - // It starts a listener, waits for a question, answers it, then completes - const agentA = await agentManager.spawn({ + const agentSuffix = nanoid(6); // unique suffix for temp files + + // --------------------------------------------------------------- + // Agent A — builds a validator module WHILE answering questions + // in the background via cw listen + // --------------------------------------------------------------- + const agentA = await harness.agentManager.spawn({ taskId: null, - prompt: `You are Agent A in a multi-agent test. Your job: + prompt: `You are Agent A in a multi-agent coordination test. -1. Read .cw/input/manifest.json to get your agentId -2. Start a background listener: cw listen --agent-id --timeout 120000 > /tmp/cw-listen-output.txt & - LISTEN_PID=$! -3. Wait for the listener to find a question by polling the file: - while [ ! -s /tmp/cw-listen-output.txt ]; do sleep 1; done -4. Parse the JSON from /tmp/cw-listen-output.txt to get conversationId -5. Answer the conversation: cw answer "The answer is 42" --conversation-id -6. Kill the listener: kill $LISTEN_PID 2>/dev/null -7. Write .cw/output/signal.json with: {"status":"done"} +You have TWO concurrent responsibilities: +1. Build a TypeScript validator module (your main coding task) +2. Answer questions from other agents via a background listener -IMPORTANT: -- The server is on port ${testPort} -- Use the exact answer text: "The answer is 42" -- Do NOT ask any questions or produce other output`, - mode: 'execute', - provider: 'claude', - }); +SETUP (do this first): +- Read .cw/input/manifest.json to get your agentId +- Start a background listener that writes to a temp file: + cw listen --agent-id --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 & + LISTEN_PID=$! - console.log(` Agent A spawned: ${agentA.id} (${agentA.name})`); +MAIN CODING TASK — implement a user registration validator: - // Small delay to let Agent A start its listener - await sleep(5000); +1. Create types.ts: + export interface RegistrationInput { name: string; email: string; password: string; } + export interface ValidationResult { valid: boolean; errors: string[]; } - // Spawn Agent B — the questioner - // It asks Agent A a question, waits for the answer, writes it to signal.json result - const agentB = await agentManager.spawn({ - taskId: null, - prompt: `You are Agent B in a multi-agent test. Your job: +2. Create validator.ts: + Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult + Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars. -1. Read .cw/input/manifest.json to get your agentId -2. Ask Agent A a question using this command: - ANSWER=$(cw ask "What is the meaning of everything?" --from --agent-id ${agentA.id} --timeout 120000) -3. Write .cw/output/signal.json with: {"status":"done","result":""} +3. Create index.ts that re-exports everything from types.ts and validator.ts. -IMPORTANT: -- The server is on port ${testPort} -- Agent A's ID is: ${agentA.id} -- Write the EXACT answer you received from Agent A into the result field -- Do NOT ask any questions or produce other output`, - mode: 'execute', - provider: 'claude', - }); +BETWEEN EACH FILE, check for incoming questions: + if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then + # parse the JSON, get conversationId and question + # answer: cw answer "" --conversation-id + # clear and restart listener: + > /tmp/cw-listen-${agentSuffix}.txt + cw listen --agent-id --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 & + LISTEN_PID=$! + fi - console.log(` Agent B spawned: ${agentB.id} (${agentB.name})`); +You will receive TWO questions total while you work. Answer them based on the code you are writing. - // Wait for both agents to complete - const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT; +CLEANUP: After all 3 files are written and both questions answered: +- kill $LISTEN_PID 2>/dev/null +- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"} - let agentADone = false; - let agentBDone = false; - let agentBResult: string | null = null; - - while (Date.now() < deadline && (!agentADone || !agentBDone)) { - const a = await agentRepository.findById(agentA.id); - const b = await agentRepository.findById(agentB.id); - - if (a && a.status !== 'running') { - agentADone = true; - console.log(` Agent A status: ${a.status}`); - } - if (b && b.status !== 'running') { - agentBDone = true; - console.log(` Agent B status: ${b.status}`); - if (b.result) { - try { - const parsed = JSON.parse(b.result); - agentBResult = parsed.message ?? parsed.result ?? b.result; - } catch { - agentBResult = b.result; - } - } - } - - if (!agentADone || !agentBDone) { - await sleep(2000); - } - } - - // Verify results - const finalA = await agentRepository.findById(agentA.id); - const finalB = await agentRepository.findById(agentB.id); - - console.log(` Agent A final status: ${finalA?.status}`); - console.log(` Agent B final status: ${finalB?.status}`); - console.log(` Agent B result: ${finalB?.result}`); - - // Agent A should have completed (idle) - expect(finalA?.status).toBe('idle'); - - // Agent B should have completed (idle) - expect(finalB?.status).toBe('idle'); - - // Agent B's result should contain the answer from Agent A - expect(finalB?.result).toBeTruthy(); - expect(finalB!.result).toContain('42'); - - // Verify the conversation exists in the database - const conversations = await conversationRepository.findPendingForAgent(agentA.id); - // Should be empty (all answered) - expect(conversations.length).toBe(0); - }, - CONVERSATION_TEST_TIMEOUT, - ); - - it( - 'agent manifest.json contains agentId and agentName', - async () => { - const agent = await agentManager.spawn({ - taskId: null, - prompt: `Read .cw/input/manifest.json and write its contents to .cw/output/signal.json in this format: -{"status":"done","result":""} -Do NOT modify the manifest contents. Just copy them into the result field as a string.`, +CRITICAL: +- The listener MUST run in the background while you write code. +- Check for questions between files, not as blocking waits. +- The CW_PORT environment variable is already set to ${mockPort}.`, mode: 'execute', provider: 'claude', inputContext: {}, }); - console.log(` Identity test agent: ${agent.id} (${agent.name})`); + console.log(` Agent A: ${agentA.id} (${agentA.name})`); - // Wait for completion - const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT; - while (Date.now() < deadline) { - const a = await agentRepository.findById(agent.id); - if (a && a.status !== 'running') break; - await sleep(1000); + // Give Agent A time to start its background listener and begin coding + await sleep(15000); + + // --------------------------------------------------------------- + // Agent B — builds a client module, asks Agent A questions to + // learn the validation rules, then uses answers in its code + // --------------------------------------------------------------- + const agentB = await harness.agentManager.spawn({ + taskId: null, + prompt: `You are Agent B in a multi-agent coordination test. + +Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module. + +YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules: + +1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>. + Leave a TODO comment where validation will go. + +2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks: + FIELDS=$(cw ask "What are the required fields and their types for registration?" --from --agent-id ${agentA.id} --timeout 120000) + +3. Ask Agent A about the specific email validation rules: + EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from --agent-id ${agentA.id} --timeout 120000) + +4. Create validated-client.ts — a COMPLETE implementation using the answers: + Import the scaffold, add a validateBeforeSubmit(name, email, password) function + that implements the EXACT validation rules Agent A told you about. + Include a comment at the top with the rules you received. + +5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"} + +CRITICAL: +- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first). +- Use the ACTUAL answers from Agent A in your validated-client.ts implementation. +- The CW_PORT environment variable is already set to ${mockPort}.`, + mode: 'execute', + provider: 'claude', + inputContext: {}, + }); + + console.log(` Agent B: ${agentB.id} (${agentB.name})`); + + // --------------------------------------------------------------- + // Wait for both agents to stop running, then verify conversations + // --------------------------------------------------------------- + const deadline = Date.now() + TEST_TIMEOUT; + let aDone = false; + let bDone = false; + let lastLogTime = 0; + + while (Date.now() < deadline && (!aDone || !bDone)) { + const agentAInfo = await harness.agentRepository.findById(agentA.id); + const agentBInfo = await harness.agentRepository.findById(agentB.id); + + // Periodic progress logging every 30s + if (Date.now() - lastLogTime > 30000) { + const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000); + console.log(` [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`); + lastLogTime = Date.now(); + } + + if (agentAInfo && agentAInfo.status !== 'running' && !aDone) { + aDone = true; + console.log(` Agent A final status: ${agentAInfo.status}`); + dumpAgentLogs(harness.workspaceRoot, agentA.name); + } + if (agentBInfo && agentBInfo.status !== 'running' && !bDone) { + bDone = true; + console.log(` Agent B final status: ${agentBInfo.status}`); + dumpAgentLogs(harness.workspaceRoot, agentB.name); + } + + if (!aDone || !bDone) await sleep(2000); } - const final = await agentRepository.findById(agent.id); - console.log(` Agent status: ${final?.status}`); - console.log(` Agent result: ${final?.result}`); + expect(aDone).toBe(true); + expect(bDone).toBe(true); - expect(final?.status).toBe('idle'); - expect(final?.result).toBeTruthy(); + // --------------------------------------------------------------- + // Verify conversations in mock repo + // --------------------------------------------------------------- + const allConversations = mockRepo.getAll(); + console.log(` Total conversations: ${allConversations.length}`); + for (const c of allConversations) { + console.log( + ` ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`, + ); + } - // The result should contain the manifest contents which include agentId and agentName - const result = final!.result!; - expect(result).toContain(agent.id); - expect(result).toContain(agent.name); + // Exactly 2 conversations, both answered + expect(allConversations.length).toBe(2); + expect(allConversations.every((c) => c.status === 'answered')).toBe(true); + + // Both target Agent A, both from Agent B + expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true); + expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true); + + // Questions should be distinct (one about fields, one about email validation) + const questions = allConversations.map((c) => c.question); + expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true); + expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true); + + // Both answers should be non-empty + expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true); + + // --------------------------------------------------------------- + // Verify Agent A's coding output — validator module files exist + // --------------------------------------------------------------- + const aWorkdir = join( + harness.workspaceRoot, + 'agent-workdirs', + agentA.name, + 'workspace', + ); + const aFiles = ['types.ts', 'validator.ts', 'index.ts']; + for (const f of aFiles) { + const filePath = join(aWorkdir, f); + const exists = existsSync(filePath); + console.log(` Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`); + expect(exists).toBe(true); + } + // validator.ts should contain actual validation logic + const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8'); + console.log(` Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`); + expect(validatorContent.toLowerCase()).toContain('email'); + expect(validatorContent.toLowerCase()).toContain('password'); + + // --------------------------------------------------------------- + // Verify Agent B's coding output — client module files exist + // --------------------------------------------------------------- + const bWorkdir = join( + harness.workspaceRoot, + 'agent-workdirs', + agentB.name, + 'workspace', + ); + const bFiles = ['client-scaffold.ts', 'validated-client.ts']; + for (const f of bFiles) { + const filePath = join(bWorkdir, f); + const exists = existsSync(filePath); + console.log(` Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`); + expect(exists).toBe(true); + } + // validated-client.ts should reference validation rules from Agent A's answers + const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8'); + console.log(` Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`); + expect(clientContent.toLowerCase()).toContain('email'); + + // --------------------------------------------------------------- + // Verify interleaving: Agent A's JSONL log has coding tool calls + // (Write for .ts files) interleaved with conversation tool calls + // (Bash for cw listen/answer) + // --------------------------------------------------------------- + const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl'); + const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n'); + const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = []; + + for (const line of aLog) { + try { + const ev = JSON.parse(line); + if (ev.type !== 'assistant' || !ev.message?.content) continue; + for (const block of ev.message.content) { + if (block.type !== 'tool_use') continue; + const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input); + if (block.name === 'Write' && input.includes('.ts')) { + toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) }); + } else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) { + toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) }); + } + } + } catch { /* skip non-JSON lines */ } + } + + console.log(` Agent A interleaving (${toolCalls.length} relevant tool calls):`); + for (const tc of toolCalls) { + console.log(` [${tc.type}] ${tc.name}: ${tc.detail}`); + } + + // Must have both code and conversation tool calls + const hasCode = toolCalls.some((tc) => tc.type === 'code'); + const hasConversation = toolCalls.some((tc) => tc.type === 'conversation'); + expect(hasCode).toBe(true); + expect(hasConversation).toBe(true); + + // Verify interleaving: at least one code call must appear AFTER a conversation call + // (proving coding continued after handling a question) + const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation'); + const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code'); + console.log(` First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`); + expect(lastCodeIdx).toBeGreaterThan(firstConvIdx); }, - CONVERSATION_TEST_TIMEOUT, + TEST_TIMEOUT, ); });