test: Rewrite conversation integration test with mock server and real tasks

Replace full CoordinationServer with a lightweight mock that serves only
conversation tRPC procedures backed by an in-memory repository. Agents
now have real coding tasks (write spec, ask questions, create summary)
and the two-question flow proves the listen→answer→re-listen cycle works.
This commit is contained in:
Lukas May
2026-02-10 14:37:12 +01:00
parent f8c5dce588
commit 9f5421f6bc

View File

@@ -9,309 +9,532 @@
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
* ```
*
* Tests covered:
* - Two real Claude sessions communicating via cw ask / cw listen / cw answer
* - Agent identity (agentId/agentName) in manifest.json
* - Conversation lifecycle: create → pending → answered
* Architecture:
* - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer)
* - In-memory ConversationRepository (no SQLite, no FK constraints)
* - Real agent harness for spawning two Claude sessions with actual coding tasks
* - Two sequential questions prove the listen→answer→re-listen cycle works
*
* Estimated cost: ~$0.20 per full run (two Claude sessions)
* Estimated cost: ~$0.30 per full run (two Claude sessions)
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { mkdtemp, rm, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { it, expect, beforeAll, afterAll } from 'vitest';
import { createServer } from 'node:http';
import type { Server } from 'node:http';
import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import { drizzle } from 'drizzle-orm/better-sqlite3';
import * as schema from '../../../db/schema.js';
import { ensureSchema } from '../../../db/ensure-schema.js';
import { nanoid } from 'nanoid';
import { fetchRequestHandler } from '@trpc/server/adapters/fetch';
import { router, publicProcedure } from '../../../trpc/trpc.js';
import { conversationProcedures } from '../../../trpc/routers/conversation.js';
import { EventEmitterBus } from '../../../events/bus.js';
import type { DomainEvent } from '../../../events/types.js';
import { MultiProviderAgentManager } from '../../../agent/manager.js';
import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js';
import type { Conversation } from '../../../db/schema.js';
import {
DrizzleAgentRepository,
DrizzleProjectRepository,
DrizzleAccountRepository,
DrizzleConversationRepository,
DrizzleTaskRepository,
DrizzlePhaseRepository,
DrizzlePageRepository,
DrizzleLogChunkRepository,
DrizzleChangeSetRepository,
} from '../../../db/repositories/drizzle/index.js';
import { CoordinationServer } from '../../../server/index.js';
import { ProcessManager, ProcessRegistry } from '../../../process/index.js';
import { LogManager } from '../../../logging/index.js';
import { createTrpcClient } from '../../../cli/trpc-client.js';
import type { TrpcClient } from '../../../cli/trpc-client.js';
import { describeRealClaude, sleep } from './harness.js';
createRealProviderHarness,
describeRealClaude,
sleep,
type RealProviderHarness,
} from './harness.js';
const CONVERSATION_TEST_TIMEOUT = 180000; // 3 minutes per test
const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation
describeRealClaude('Real Inter-Agent Conversation', () => {
let workspaceRoot: string;
let agentManager: MultiProviderAgentManager;
let agentRepository: DrizzleAgentRepository;
let conversationRepository: DrizzleConversationRepository;
let server: CoordinationServer;
let client: TrpcClient;
let testPort: number;
let eventBus: EventEmitterBus;
// ---------------------------------------------------------------------------
// In-memory ConversationRepository — no SQLite, no FK constraints
// ---------------------------------------------------------------------------
class InMemoryConversationRepository implements ConversationRepository {
private store = new Map<string, Conversation>();
async create(data: CreateConversationData): Promise<Conversation> {
const now = new Date();
const conversation: Conversation = {
id: nanoid(),
fromAgentId: data.fromAgentId,
toAgentId: data.toAgentId,
initiativeId: data.initiativeId ?? null,
phaseId: data.phaseId ?? null,
taskId: data.taskId ?? null,
question: data.question,
answer: null,
status: 'pending',
createdAt: now,
updatedAt: now,
};
this.store.set(conversation.id, conversation);
return conversation;
}
async findById(id: string): Promise<Conversation | null> {
return this.store.get(id) ?? null;
}
async findPendingForAgent(toAgentId: string): Promise<Conversation[]> {
return [...this.store.values()]
.filter((c) => c.toAgentId === toAgentId && c.status === 'pending')
.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
}
async answer(id: string, answer: string): Promise<Conversation | null> {
const conv = this.store.get(id);
if (!conv) return null;
const updated: Conversation = {
...conv,
answer,
status: 'answered' as const,
updatedAt: new Date(),
};
this.store.set(id, updated);
return updated;
}
/** Test helper — return all conversations */
getAll(): Conversation[] {
return [...this.store.values()];
}
}
// ---------------------------------------------------------------------------
// Mock conversation server — serves ONLY conversation tRPC procedures
// ---------------------------------------------------------------------------
async function startMockConversationServer(): Promise<{
server: Server;
port: number;
repo: InMemoryConversationRepository;
}> {
const repo = new InMemoryConversationRepository();
const eventBus = new EventEmitterBus();
// Mini router with only conversation procedures
const miniRouter = router({
...conversationProcedures(publicProcedure),
});
const httpServer = createServer(async (req, res) => {
if (!req.url?.startsWith('/trpc')) {
res.writeHead(404);
res.end('Not found');
return;
}
const host = req.headers.host ?? 'localhost';
const url = new URL(req.url, `http://${host}`);
let body: string | undefined;
if (req.method !== 'GET' && req.method !== 'HEAD') {
body = await new Promise<string>((resolve) => {
let data = '';
req.on('data', (chunk: Buffer) => {
data += chunk.toString();
});
req.on('end', () => resolve(data));
});
}
const headers = new Headers();
for (const [key, value] of Object.entries(req.headers)) {
if (value) {
if (Array.isArray(value)) {
value.forEach((v) => headers.append(key, v));
} else {
headers.set(key, value);
}
}
}
const fetchRequest = new Request(url.toString(), {
method: req.method,
headers,
body: body ?? undefined,
});
const fetchResponse = await fetchRequestHandler({
endpoint: '/trpc',
req: fetchRequest,
router: miniRouter,
createContext: () =>
({
eventBus,
serverStartedAt: new Date(),
processCount: 0,
conversationRepository: repo,
// Stub — requireAgentManager is called unconditionally in createConversation,
// but list() is only invoked for taskId/phaseId resolution. With --agent-id
// targeting, list() is never called.
agentManager: { list: async () => [] },
}) as any,
});
res.statusCode = fetchResponse.status;
fetchResponse.headers.forEach((value, key) => {
res.setHeader(key, value);
});
if (fetchResponse.body) {
const reader = fetchResponse.body.getReader();
const pump = async () => {
while (true) {
const { done, value } = await reader.read();
if (done) {
res.end();
return;
}
res.write(value);
}
};
pump().catch(() => res.end());
} else {
res.end(await fetchResponse.text());
}
});
const port = 40000 + Math.floor(Math.random() * 10000);
await new Promise<void>((resolve) => {
httpServer.listen(port, '127.0.0.1', () => resolve());
});
return { server: httpServer, port, repo };
}
// ---------------------------------------------------------------------------
// Diagnostic helpers
// ---------------------------------------------------------------------------
function dumpAgentLogs(workspaceRoot: string, agentName: string) {
const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName);
if (!existsSync(logDir)) {
console.log(` [${agentName}] No log directory at ${logDir}`);
return;
}
// Dump output.jsonl (last 30 lines)
const outputPath = join(logDir, 'output.jsonl');
if (existsSync(outputPath)) {
const lines = readFileSync(outputPath, 'utf-8').trim().split('\n');
const last = lines.slice(-30);
console.log(` [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`);
for (const line of last) {
try {
const ev = JSON.parse(line);
if (ev.type === 'assistant' && ev.message?.content) {
for (const block of ev.message.content) {
if (block.type === 'text') {
console.log(` TEXT: ${block.text.substring(0, 200)}`);
} else if (block.type === 'tool_use') {
console.log(` TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`);
}
}
} else if (ev.type === 'result') {
console.log(` RESULT: ${JSON.stringify(ev).substring(0, 300)}`);
}
} catch {
console.log(` RAW: ${line.substring(0, 200)}`);
}
}
}
// Dump stderr
const stderrPath = join(logDir, 'stderr.log');
if (existsSync(stderrPath)) {
const stderr = readFileSync(stderrPath, 'utf-8').trim();
if (stderr) {
console.log(` [${agentName}] stderr: ${stderr.substring(0, 500)}`);
}
}
}
// ---------------------------------------------------------------------------
// Test suite
// ---------------------------------------------------------------------------
describeRealClaude('Real Inter-Agent Conversation (mock server)', () => {
let harness: RealProviderHarness;
let mockServer: Server;
let mockPort: number;
let mockRepo: InMemoryConversationRepository;
const originalCwPort = process.env.CW_PORT;
beforeAll(async () => {
console.log('\n=== Running Real Inter-Agent Conversation Tests ===');
console.log('These tests spawn TWO Claude sessions and incur costs.\n');
console.log('\n=== Real Inter-Agent Conversation Test ===');
console.log('Mock conversation server + two Claude sessions.\n');
// Create temp workspace
workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-conv-test-'));
const { execSync } = await import('node:child_process');
execSync('git init && git config user.email "test@test.com" && git config user.name "Test" && touch .gitkeep && git add . && git commit -m "init"', {
cwd: workspaceRoot,
stdio: 'ignore',
});
// Start mock conversation server (only listen/ask/answer endpoints)
const mock = await startMockConversationServer();
mockServer = mock.server;
mockPort = mock.port;
mockRepo = mock.repo;
console.log(` Mock server on port ${mockPort}`);
// Create in-memory DB
const sqlite = new Database(':memory:');
sqlite.pragma('foreign_keys = ON');
const db = drizzle(sqlite, { schema });
ensureSchema(db);
// Set CW_PORT so agents' cw commands hit the mock server
process.env.CW_PORT = String(mockPort);
// Create repositories
agentRepository = new DrizzleAgentRepository(db);
const projectRepository = new DrizzleProjectRepository(db);
const accountRepository = new DrizzleAccountRepository(db);
conversationRepository = new DrizzleConversationRepository(db);
const taskRepository = new DrizzleTaskRepository(db);
const phaseRepository = new DrizzlePhaseRepository(db);
const pageRepository = new DrizzlePageRepository(db);
const logChunkRepository = new DrizzleLogChunkRepository(db);
const changeSetRepository = new DrizzleChangeSetRepository(db);
// Event bus
eventBus = new EventEmitterBus();
// Agent manager
agentManager = new MultiProviderAgentManager(
agentRepository,
workspaceRoot,
projectRepository,
accountRepository,
eventBus,
undefined, // no credential manager
changeSetRepository,
phaseRepository,
taskRepository,
pageRepository,
logChunkRepository,
);
// Start server on random port
testPort = 40000 + Math.floor(Math.random() * 10000);
const registry = new ProcessRegistry();
const processManager = new ProcessManager(registry, eventBus);
const logManager = new LogManager();
server = new CoordinationServer(
{ port: testPort, pidFile: `/tmp/cw-conv-test-${testPort}.pid` },
processManager,
logManager,
eventBus,
{
agentManager,
taskRepository,
projectRepository,
accountRepository,
conversationRepository,
phaseRepository,
pageRepository,
logChunkRepository,
changeSetRepository,
workspaceRoot,
},
);
await server.start();
console.log(` Server started on port ${testPort}`);
// Set CW_PORT so spawned agents can reach the server
process.env.CW_PORT = String(testPort);
// Create tRPC client
client = createTrpcClient(testPort);
// Real agent harness for spawning + worktrees (no full CoordinationServer)
harness = await createRealProviderHarness({ provider: 'claude' });
console.log(` Workspace: ${harness.workspaceRoot}`);
});
afterAll(async () => {
// Restore env
if (originalCwPort) {
process.env.CW_PORT = originalCwPort;
} else {
delete process.env.CW_PORT;
}
// Kill agents
const agents = await agentRepository.findAll();
for (const a of agents) {
if (a.status === 'running') {
try { await agentManager.stop(a.id); } catch { /* ignore */ }
}
}
// Stop server
try { await server.stop(); } catch { /* ignore */ }
// Clean up
try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
await harness?.cleanup();
mockServer?.close();
});
it(
'two Claude agents communicate via cw ask/listen/answer',
'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)',
async () => {
// Spawn Agent A — the responder
// It starts a listener, waits for a question, answers it, then completes
const agentA = await agentManager.spawn({
taskId: null,
prompt: `You are Agent A in a multi-agent test. Your job:
const agentSuffix = nanoid(6); // unique suffix for temp files
1. Read .cw/input/manifest.json to get your agentId
2. Start a background listener: cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-output.txt &
// ---------------------------------------------------------------
// Agent A — builds a validator module WHILE answering questions
// in the background via cw listen
// ---------------------------------------------------------------
const agentA = await harness.agentManager.spawn({
taskId: null,
prompt: `You are Agent A in a multi-agent coordination test.
You have TWO concurrent responsibilities:
1. Build a TypeScript validator module (your main coding task)
2. Answer questions from other agents via a background listener
SETUP (do this first):
- Read .cw/input/manifest.json to get your agentId
- Start a background listener that writes to a temp file:
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
LISTEN_PID=$!
3. Wait for the listener to find a question by polling the file:
while [ ! -s /tmp/cw-listen-output.txt ]; do sleep 1; done
4. Parse the JSON from /tmp/cw-listen-output.txt to get conversationId
5. Answer the conversation: cw answer "The answer is 42" --conversation-id <conversationId>
6. Kill the listener: kill $LISTEN_PID 2>/dev/null
7. Write .cw/output/signal.json with: {"status":"done"}
IMPORTANT:
- The server is on port ${testPort}
- Use the exact answer text: "The answer is 42"
- Do NOT ask any questions or produce other output`,
mode: 'execute',
provider: 'claude',
});
MAIN CODING TASK — implement a user registration validator:
console.log(` Agent A spawned: ${agentA.id} (${agentA.name})`);
1. Create types.ts:
export interface RegistrationInput { name: string; email: string; password: string; }
export interface ValidationResult { valid: boolean; errors: string[]; }
// Small delay to let Agent A start its listener
await sleep(5000);
2. Create validator.ts:
Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult
Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars.
// Spawn Agent B — the questioner
// It asks Agent A a question, waits for the answer, writes it to signal.json result
const agentB = await agentManager.spawn({
taskId: null,
prompt: `You are Agent B in a multi-agent test. Your job:
3. Create index.ts that re-exports everything from types.ts and validator.ts.
1. Read .cw/input/manifest.json to get your agentId
2. Ask Agent A a question using this command:
ANSWER=$(cw ask "What is the meaning of everything?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
3. Write .cw/output/signal.json with: {"status":"done","result":"<the ANSWER you received>"}
BETWEEN EACH FILE, check for incoming questions:
if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then
# parse the JSON, get conversationId and question
# answer: cw answer "<answer based on your code>" --conversation-id <id>
# clear and restart listener:
> /tmp/cw-listen-${agentSuffix}.txt
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
LISTEN_PID=$!
fi
IMPORTANT:
- The server is on port ${testPort}
- Agent A's ID is: ${agentA.id}
- Write the EXACT answer you received from Agent A into the result field
- Do NOT ask any questions or produce other output`,
mode: 'execute',
provider: 'claude',
});
You will receive TWO questions total while you work. Answer them based on the code you are writing.
console.log(` Agent B spawned: ${agentB.id} (${agentB.name})`);
CLEANUP: After all 3 files are written and both questions answered:
- kill $LISTEN_PID 2>/dev/null
- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"}
// Wait for both agents to complete
const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
let agentADone = false;
let agentBDone = false;
let agentBResult: string | null = null;
while (Date.now() < deadline && (!agentADone || !agentBDone)) {
const a = await agentRepository.findById(agentA.id);
const b = await agentRepository.findById(agentB.id);
if (a && a.status !== 'running') {
agentADone = true;
console.log(` Agent A status: ${a.status}`);
}
if (b && b.status !== 'running') {
agentBDone = true;
console.log(` Agent B status: ${b.status}`);
if (b.result) {
try {
const parsed = JSON.parse(b.result);
agentBResult = parsed.message ?? parsed.result ?? b.result;
} catch {
agentBResult = b.result;
}
}
}
if (!agentADone || !agentBDone) {
await sleep(2000);
}
}
// Verify results
const finalA = await agentRepository.findById(agentA.id);
const finalB = await agentRepository.findById(agentB.id);
console.log(` Agent A final status: ${finalA?.status}`);
console.log(` Agent B final status: ${finalB?.status}`);
console.log(` Agent B result: ${finalB?.result}`);
// Agent A should have completed (idle)
expect(finalA?.status).toBe('idle');
// Agent B should have completed (idle)
expect(finalB?.status).toBe('idle');
// Agent B's result should contain the answer from Agent A
expect(finalB?.result).toBeTruthy();
expect(finalB!.result).toContain('42');
// Verify the conversation exists in the database
const conversations = await conversationRepository.findPendingForAgent(agentA.id);
// Should be empty (all answered)
expect(conversations.length).toBe(0);
},
CONVERSATION_TEST_TIMEOUT,
);
it(
'agent manifest.json contains agentId and agentName',
async () => {
const agent = await agentManager.spawn({
taskId: null,
prompt: `Read .cw/input/manifest.json and write its contents to .cw/output/signal.json in this format:
{"status":"done","result":"<raw contents of manifest.json>"}
Do NOT modify the manifest contents. Just copy them into the result field as a string.`,
CRITICAL:
- The listener MUST run in the background while you write code.
- Check for questions between files, not as blocking waits.
- The CW_PORT environment variable is already set to ${mockPort}.`,
mode: 'execute',
provider: 'claude',
inputContext: {},
});
console.log(` Identity test agent: ${agent.id} (${agent.name})`);
console.log(` Agent A: ${agentA.id} (${agentA.name})`);
// Wait for completion
const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
while (Date.now() < deadline) {
const a = await agentRepository.findById(agent.id);
if (a && a.status !== 'running') break;
await sleep(1000);
// Give Agent A time to start its background listener and begin coding
await sleep(15000);
// ---------------------------------------------------------------
// Agent B — builds a client module, asks Agent A questions to
// learn the validation rules, then uses answers in its code
// ---------------------------------------------------------------
const agentB = await harness.agentManager.spawn({
taskId: null,
prompt: `You are Agent B in a multi-agent coordination test.
Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module.
YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules:
1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>.
Leave a TODO comment where validation will go.
2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks:
FIELDS=$(cw ask "What are the required fields and their types for registration?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
3. Ask Agent A about the specific email validation rules:
EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
4. Create validated-client.ts — a COMPLETE implementation using the answers:
Import the scaffold, add a validateBeforeSubmit(name, email, password) function
that implements the EXACT validation rules Agent A told you about.
Include a comment at the top with the rules you received.
5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"}
CRITICAL:
- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first).
- Use the ACTUAL answers from Agent A in your validated-client.ts implementation.
- The CW_PORT environment variable is already set to ${mockPort}.`,
mode: 'execute',
provider: 'claude',
inputContext: {},
});
console.log(` Agent B: ${agentB.id} (${agentB.name})`);
// ---------------------------------------------------------------
// Wait for both agents to stop running, then verify conversations
// ---------------------------------------------------------------
const deadline = Date.now() + TEST_TIMEOUT;
let aDone = false;
let bDone = false;
let lastLogTime = 0;
while (Date.now() < deadline && (!aDone || !bDone)) {
const agentAInfo = await harness.agentRepository.findById(agentA.id);
const agentBInfo = await harness.agentRepository.findById(agentB.id);
// Periodic progress logging every 30s
if (Date.now() - lastLogTime > 30000) {
const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000);
console.log(` [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`);
lastLogTime = Date.now();
}
const final = await agentRepository.findById(agent.id);
console.log(` Agent status: ${final?.status}`);
console.log(` Agent result: ${final?.result}`);
if (agentAInfo && agentAInfo.status !== 'running' && !aDone) {
aDone = true;
console.log(` Agent A final status: ${agentAInfo.status}`);
dumpAgentLogs(harness.workspaceRoot, agentA.name);
}
if (agentBInfo && agentBInfo.status !== 'running' && !bDone) {
bDone = true;
console.log(` Agent B final status: ${agentBInfo.status}`);
dumpAgentLogs(harness.workspaceRoot, agentB.name);
}
expect(final?.status).toBe('idle');
expect(final?.result).toBeTruthy();
if (!aDone || !bDone) await sleep(2000);
}
// The result should contain the manifest contents which include agentId and agentName
const result = final!.result!;
expect(result).toContain(agent.id);
expect(result).toContain(agent.name);
expect(aDone).toBe(true);
expect(bDone).toBe(true);
// ---------------------------------------------------------------
// Verify conversations in mock repo
// ---------------------------------------------------------------
const allConversations = mockRepo.getAll();
console.log(` Total conversations: ${allConversations.length}`);
for (const c of allConversations) {
console.log(
` ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`,
);
}
// Exactly 2 conversations, both answered
expect(allConversations.length).toBe(2);
expect(allConversations.every((c) => c.status === 'answered')).toBe(true);
// Both target Agent A, both from Agent B
expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true);
expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true);
// Questions should be distinct (one about fields, one about email validation)
const questions = allConversations.map((c) => c.question);
expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true);
expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true);
// Both answers should be non-empty
expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true);
// ---------------------------------------------------------------
// Verify Agent A's coding output — validator module files exist
// ---------------------------------------------------------------
const aWorkdir = join(
harness.workspaceRoot,
'agent-workdirs',
agentA.name,
'workspace',
);
const aFiles = ['types.ts', 'validator.ts', 'index.ts'];
for (const f of aFiles) {
const filePath = join(aWorkdir, f);
const exists = existsSync(filePath);
console.log(` Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
expect(exists).toBe(true);
}
// validator.ts should contain actual validation logic
const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8');
console.log(` Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`);
expect(validatorContent.toLowerCase()).toContain('email');
expect(validatorContent.toLowerCase()).toContain('password');
// ---------------------------------------------------------------
// Verify Agent B's coding output — client module files exist
// ---------------------------------------------------------------
const bWorkdir = join(
harness.workspaceRoot,
'agent-workdirs',
agentB.name,
'workspace',
);
const bFiles = ['client-scaffold.ts', 'validated-client.ts'];
for (const f of bFiles) {
const filePath = join(bWorkdir, f);
const exists = existsSync(filePath);
console.log(` Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
expect(exists).toBe(true);
}
// validated-client.ts should reference validation rules from Agent A's answers
const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8');
console.log(` Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`);
expect(clientContent.toLowerCase()).toContain('email');
// ---------------------------------------------------------------
// Verify interleaving: Agent A's JSONL log has coding tool calls
// (Write for .ts files) interleaved with conversation tool calls
// (Bash for cw listen/answer)
// ---------------------------------------------------------------
const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl');
const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n');
const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = [];
for (const line of aLog) {
try {
const ev = JSON.parse(line);
if (ev.type !== 'assistant' || !ev.message?.content) continue;
for (const block of ev.message.content) {
if (block.type !== 'tool_use') continue;
const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input);
if (block.name === 'Write' && input.includes('.ts')) {
toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) });
} else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) {
toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) });
}
}
} catch { /* skip non-JSON lines */ }
}
console.log(` Agent A interleaving (${toolCalls.length} relevant tool calls):`);
for (const tc of toolCalls) {
console.log(` [${tc.type}] ${tc.name}: ${tc.detail}`);
}
// Must have both code and conversation tool calls
const hasCode = toolCalls.some((tc) => tc.type === 'code');
const hasConversation = toolCalls.some((tc) => tc.type === 'conversation');
expect(hasCode).toBe(true);
expect(hasConversation).toBe(true);
// Verify interleaving: at least one code call must appear AFTER a conversation call
// (proving coding continued after handling a question)
const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation');
const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code');
console.log(` First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`);
expect(lastCodeIdx).toBeGreaterThan(firstConvIdx);
},
CONVERSATION_TEST_TIMEOUT,
TEST_TIMEOUT,
);
});