test: Rewrite conversation integration test with mock server and real tasks
Replace full CoordinationServer with a lightweight mock that serves only conversation tRPC procedures backed by an in-memory repository. Agents now have real coding tasks (write spec, ask questions, create summary) and the two-question flow proves the listen→answer→re-listen cycle works.
This commit is contained in:
@@ -9,309 +9,532 @@
|
||||
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
|
||||
* ```
|
||||
*
|
||||
* Tests covered:
|
||||
* - Two real Claude sessions communicating via cw ask / cw listen / cw answer
|
||||
* - Agent identity (agentId/agentName) in manifest.json
|
||||
* - Conversation lifecycle: create → pending → answered
|
||||
* Architecture:
|
||||
* - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer)
|
||||
* - In-memory ConversationRepository (no SQLite, no FK constraints)
|
||||
* - Real agent harness for spawning two Claude sessions with actual coding tasks
|
||||
* - Two sequential questions prove the listen→answer→re-listen cycle works
|
||||
*
|
||||
* Estimated cost: ~$0.20 per full run (two Claude sessions)
|
||||
* Estimated cost: ~$0.30 per full run (two Claude sessions)
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { mkdtemp, rm, readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { createServer } from 'node:http';
|
||||
import type { Server } from 'node:http';
|
||||
import { readFileSync, existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import Database from 'better-sqlite3';
|
||||
import { drizzle } from 'drizzle-orm/better-sqlite3';
|
||||
import * as schema from '../../../db/schema.js';
|
||||
import { ensureSchema } from '../../../db/ensure-schema.js';
|
||||
import { nanoid } from 'nanoid';
|
||||
import { fetchRequestHandler } from '@trpc/server/adapters/fetch';
|
||||
import { router, publicProcedure } from '../../../trpc/trpc.js';
|
||||
import { conversationProcedures } from '../../../trpc/routers/conversation.js';
|
||||
import { EventEmitterBus } from '../../../events/bus.js';
|
||||
import type { DomainEvent } from '../../../events/types.js';
|
||||
import { MultiProviderAgentManager } from '../../../agent/manager.js';
|
||||
import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js';
|
||||
import type { Conversation } from '../../../db/schema.js';
|
||||
import {
|
||||
DrizzleAgentRepository,
|
||||
DrizzleProjectRepository,
|
||||
DrizzleAccountRepository,
|
||||
DrizzleConversationRepository,
|
||||
DrizzleTaskRepository,
|
||||
DrizzlePhaseRepository,
|
||||
DrizzlePageRepository,
|
||||
DrizzleLogChunkRepository,
|
||||
DrizzleChangeSetRepository,
|
||||
} from '../../../db/repositories/drizzle/index.js';
|
||||
import { CoordinationServer } from '../../../server/index.js';
|
||||
import { ProcessManager, ProcessRegistry } from '../../../process/index.js';
|
||||
import { LogManager } from '../../../logging/index.js';
|
||||
import { createTrpcClient } from '../../../cli/trpc-client.js';
|
||||
import type { TrpcClient } from '../../../cli/trpc-client.js';
|
||||
import { describeRealClaude, sleep } from './harness.js';
|
||||
createRealProviderHarness,
|
||||
describeRealClaude,
|
||||
sleep,
|
||||
type RealProviderHarness,
|
||||
} from './harness.js';
|
||||
|
||||
const CONVERSATION_TEST_TIMEOUT = 180000; // 3 minutes per test
|
||||
const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation
|
||||
|
||||
describeRealClaude('Real Inter-Agent Conversation', () => {
|
||||
let workspaceRoot: string;
|
||||
let agentManager: MultiProviderAgentManager;
|
||||
let agentRepository: DrizzleAgentRepository;
|
||||
let conversationRepository: DrizzleConversationRepository;
|
||||
let server: CoordinationServer;
|
||||
let client: TrpcClient;
|
||||
let testPort: number;
|
||||
let eventBus: EventEmitterBus;
|
||||
// ---------------------------------------------------------------------------
|
||||
// In-memory ConversationRepository — no SQLite, no FK constraints
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
class InMemoryConversationRepository implements ConversationRepository {
|
||||
private store = new Map<string, Conversation>();
|
||||
|
||||
async create(data: CreateConversationData): Promise<Conversation> {
|
||||
const now = new Date();
|
||||
const conversation: Conversation = {
|
||||
id: nanoid(),
|
||||
fromAgentId: data.fromAgentId,
|
||||
toAgentId: data.toAgentId,
|
||||
initiativeId: data.initiativeId ?? null,
|
||||
phaseId: data.phaseId ?? null,
|
||||
taskId: data.taskId ?? null,
|
||||
question: data.question,
|
||||
answer: null,
|
||||
status: 'pending',
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
};
|
||||
this.store.set(conversation.id, conversation);
|
||||
return conversation;
|
||||
}
|
||||
|
||||
async findById(id: string): Promise<Conversation | null> {
|
||||
return this.store.get(id) ?? null;
|
||||
}
|
||||
|
||||
async findPendingForAgent(toAgentId: string): Promise<Conversation[]> {
|
||||
return [...this.store.values()]
|
||||
.filter((c) => c.toAgentId === toAgentId && c.status === 'pending')
|
||||
.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
|
||||
}
|
||||
|
||||
async answer(id: string, answer: string): Promise<Conversation | null> {
|
||||
const conv = this.store.get(id);
|
||||
if (!conv) return null;
|
||||
const updated: Conversation = {
|
||||
...conv,
|
||||
answer,
|
||||
status: 'answered' as const,
|
||||
updatedAt: new Date(),
|
||||
};
|
||||
this.store.set(id, updated);
|
||||
return updated;
|
||||
}
|
||||
|
||||
/** Test helper — return all conversations */
|
||||
getAll(): Conversation[] {
|
||||
return [...this.store.values()];
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock conversation server — serves ONLY conversation tRPC procedures
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function startMockConversationServer(): Promise<{
|
||||
server: Server;
|
||||
port: number;
|
||||
repo: InMemoryConversationRepository;
|
||||
}> {
|
||||
const repo = new InMemoryConversationRepository();
|
||||
const eventBus = new EventEmitterBus();
|
||||
|
||||
// Mini router with only conversation procedures
|
||||
const miniRouter = router({
|
||||
...conversationProcedures(publicProcedure),
|
||||
});
|
||||
|
||||
const httpServer = createServer(async (req, res) => {
|
||||
if (!req.url?.startsWith('/trpc')) {
|
||||
res.writeHead(404);
|
||||
res.end('Not found');
|
||||
return;
|
||||
}
|
||||
|
||||
const host = req.headers.host ?? 'localhost';
|
||||
const url = new URL(req.url, `http://${host}`);
|
||||
|
||||
let body: string | undefined;
|
||||
if (req.method !== 'GET' && req.method !== 'HEAD') {
|
||||
body = await new Promise<string>((resolve) => {
|
||||
let data = '';
|
||||
req.on('data', (chunk: Buffer) => {
|
||||
data += chunk.toString();
|
||||
});
|
||||
req.on('end', () => resolve(data));
|
||||
});
|
||||
}
|
||||
|
||||
const headers = new Headers();
|
||||
for (const [key, value] of Object.entries(req.headers)) {
|
||||
if (value) {
|
||||
if (Array.isArray(value)) {
|
||||
value.forEach((v) => headers.append(key, v));
|
||||
} else {
|
||||
headers.set(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fetchRequest = new Request(url.toString(), {
|
||||
method: req.method,
|
||||
headers,
|
||||
body: body ?? undefined,
|
||||
});
|
||||
|
||||
const fetchResponse = await fetchRequestHandler({
|
||||
endpoint: '/trpc',
|
||||
req: fetchRequest,
|
||||
router: miniRouter,
|
||||
createContext: () =>
|
||||
({
|
||||
eventBus,
|
||||
serverStartedAt: new Date(),
|
||||
processCount: 0,
|
||||
conversationRepository: repo,
|
||||
// Stub — requireAgentManager is called unconditionally in createConversation,
|
||||
// but list() is only invoked for taskId/phaseId resolution. With --agent-id
|
||||
// targeting, list() is never called.
|
||||
agentManager: { list: async () => [] },
|
||||
}) as any,
|
||||
});
|
||||
|
||||
res.statusCode = fetchResponse.status;
|
||||
fetchResponse.headers.forEach((value, key) => {
|
||||
res.setHeader(key, value);
|
||||
});
|
||||
|
||||
if (fetchResponse.body) {
|
||||
const reader = fetchResponse.body.getReader();
|
||||
const pump = async () => {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
res.write(value);
|
||||
}
|
||||
};
|
||||
pump().catch(() => res.end());
|
||||
} else {
|
||||
res.end(await fetchResponse.text());
|
||||
}
|
||||
});
|
||||
|
||||
const port = 40000 + Math.floor(Math.random() * 10000);
|
||||
await new Promise<void>((resolve) => {
|
||||
httpServer.listen(port, '127.0.0.1', () => resolve());
|
||||
});
|
||||
|
||||
return { server: httpServer, port, repo };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Diagnostic helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function dumpAgentLogs(workspaceRoot: string, agentName: string) {
|
||||
const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName);
|
||||
if (!existsSync(logDir)) {
|
||||
console.log(` [${agentName}] No log directory at ${logDir}`);
|
||||
return;
|
||||
}
|
||||
// Dump output.jsonl (last 30 lines)
|
||||
const outputPath = join(logDir, 'output.jsonl');
|
||||
if (existsSync(outputPath)) {
|
||||
const lines = readFileSync(outputPath, 'utf-8').trim().split('\n');
|
||||
const last = lines.slice(-30);
|
||||
console.log(` [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`);
|
||||
for (const line of last) {
|
||||
try {
|
||||
const ev = JSON.parse(line);
|
||||
if (ev.type === 'assistant' && ev.message?.content) {
|
||||
for (const block of ev.message.content) {
|
||||
if (block.type === 'text') {
|
||||
console.log(` TEXT: ${block.text.substring(0, 200)}`);
|
||||
} else if (block.type === 'tool_use') {
|
||||
console.log(` TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`);
|
||||
}
|
||||
}
|
||||
} else if (ev.type === 'result') {
|
||||
console.log(` RESULT: ${JSON.stringify(ev).substring(0, 300)}`);
|
||||
}
|
||||
} catch {
|
||||
console.log(` RAW: ${line.substring(0, 200)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Dump stderr
|
||||
const stderrPath = join(logDir, 'stderr.log');
|
||||
if (existsSync(stderrPath)) {
|
||||
const stderr = readFileSync(stderrPath, 'utf-8').trim();
|
||||
if (stderr) {
|
||||
console.log(` [${agentName}] stderr: ${stderr.substring(0, 500)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test suite
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describeRealClaude('Real Inter-Agent Conversation (mock server)', () => {
|
||||
let harness: RealProviderHarness;
|
||||
let mockServer: Server;
|
||||
let mockPort: number;
|
||||
let mockRepo: InMemoryConversationRepository;
|
||||
const originalCwPort = process.env.CW_PORT;
|
||||
|
||||
beforeAll(async () => {
|
||||
console.log('\n=== Running Real Inter-Agent Conversation Tests ===');
|
||||
console.log('These tests spawn TWO Claude sessions and incur costs.\n');
|
||||
console.log('\n=== Real Inter-Agent Conversation Test ===');
|
||||
console.log('Mock conversation server + two Claude sessions.\n');
|
||||
|
||||
// Create temp workspace
|
||||
workspaceRoot = await mkdtemp(join(tmpdir(), 'cw-conv-test-'));
|
||||
const { execSync } = await import('node:child_process');
|
||||
execSync('git init && git config user.email "test@test.com" && git config user.name "Test" && touch .gitkeep && git add . && git commit -m "init"', {
|
||||
cwd: workspaceRoot,
|
||||
stdio: 'ignore',
|
||||
});
|
||||
// Start mock conversation server (only listen/ask/answer endpoints)
|
||||
const mock = await startMockConversationServer();
|
||||
mockServer = mock.server;
|
||||
mockPort = mock.port;
|
||||
mockRepo = mock.repo;
|
||||
console.log(` Mock server on port ${mockPort}`);
|
||||
|
||||
// Create in-memory DB
|
||||
const sqlite = new Database(':memory:');
|
||||
sqlite.pragma('foreign_keys = ON');
|
||||
const db = drizzle(sqlite, { schema });
|
||||
ensureSchema(db);
|
||||
// Set CW_PORT so agents' cw commands hit the mock server
|
||||
process.env.CW_PORT = String(mockPort);
|
||||
|
||||
// Create repositories
|
||||
agentRepository = new DrizzleAgentRepository(db);
|
||||
const projectRepository = new DrizzleProjectRepository(db);
|
||||
const accountRepository = new DrizzleAccountRepository(db);
|
||||
conversationRepository = new DrizzleConversationRepository(db);
|
||||
const taskRepository = new DrizzleTaskRepository(db);
|
||||
const phaseRepository = new DrizzlePhaseRepository(db);
|
||||
const pageRepository = new DrizzlePageRepository(db);
|
||||
const logChunkRepository = new DrizzleLogChunkRepository(db);
|
||||
const changeSetRepository = new DrizzleChangeSetRepository(db);
|
||||
|
||||
// Event bus
|
||||
eventBus = new EventEmitterBus();
|
||||
|
||||
// Agent manager
|
||||
agentManager = new MultiProviderAgentManager(
|
||||
agentRepository,
|
||||
workspaceRoot,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
eventBus,
|
||||
undefined, // no credential manager
|
||||
changeSetRepository,
|
||||
phaseRepository,
|
||||
taskRepository,
|
||||
pageRepository,
|
||||
logChunkRepository,
|
||||
);
|
||||
|
||||
// Start server on random port
|
||||
testPort = 40000 + Math.floor(Math.random() * 10000);
|
||||
const registry = new ProcessRegistry();
|
||||
const processManager = new ProcessManager(registry, eventBus);
|
||||
const logManager = new LogManager();
|
||||
|
||||
server = new CoordinationServer(
|
||||
{ port: testPort, pidFile: `/tmp/cw-conv-test-${testPort}.pid` },
|
||||
processManager,
|
||||
logManager,
|
||||
eventBus,
|
||||
{
|
||||
agentManager,
|
||||
taskRepository,
|
||||
projectRepository,
|
||||
accountRepository,
|
||||
conversationRepository,
|
||||
phaseRepository,
|
||||
pageRepository,
|
||||
logChunkRepository,
|
||||
changeSetRepository,
|
||||
workspaceRoot,
|
||||
},
|
||||
);
|
||||
|
||||
await server.start();
|
||||
console.log(` Server started on port ${testPort}`);
|
||||
|
||||
// Set CW_PORT so spawned agents can reach the server
|
||||
process.env.CW_PORT = String(testPort);
|
||||
|
||||
// Create tRPC client
|
||||
client = createTrpcClient(testPort);
|
||||
// Real agent harness for spawning + worktrees (no full CoordinationServer)
|
||||
harness = await createRealProviderHarness({ provider: 'claude' });
|
||||
console.log(` Workspace: ${harness.workspaceRoot}`);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
// Restore env
|
||||
if (originalCwPort) {
|
||||
process.env.CW_PORT = originalCwPort;
|
||||
} else {
|
||||
delete process.env.CW_PORT;
|
||||
}
|
||||
|
||||
// Kill agents
|
||||
const agents = await agentRepository.findAll();
|
||||
for (const a of agents) {
|
||||
if (a.status === 'running') {
|
||||
try { await agentManager.stop(a.id); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
// Stop server
|
||||
try { await server.stop(); } catch { /* ignore */ }
|
||||
|
||||
// Clean up
|
||||
try { await rm(workspaceRoot, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
await harness?.cleanup();
|
||||
mockServer?.close();
|
||||
});
|
||||
|
||||
it(
|
||||
'two Claude agents communicate via cw ask/listen/answer',
|
||||
'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)',
|
||||
async () => {
|
||||
// Spawn Agent A — the responder
|
||||
// It starts a listener, waits for a question, answers it, then completes
|
||||
const agentA = await agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `You are Agent A in a multi-agent test. Your job:
|
||||
const agentSuffix = nanoid(6); // unique suffix for temp files
|
||||
|
||||
1. Read .cw/input/manifest.json to get your agentId
|
||||
2. Start a background listener: cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-output.txt &
|
||||
// ---------------------------------------------------------------
|
||||
// Agent A — builds a validator module WHILE answering questions
|
||||
// in the background via cw listen
|
||||
// ---------------------------------------------------------------
|
||||
const agentA = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `You are Agent A in a multi-agent coordination test.
|
||||
|
||||
You have TWO concurrent responsibilities:
|
||||
1. Build a TypeScript validator module (your main coding task)
|
||||
2. Answer questions from other agents via a background listener
|
||||
|
||||
SETUP (do this first):
|
||||
- Read .cw/input/manifest.json to get your agentId
|
||||
- Start a background listener that writes to a temp file:
|
||||
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
|
||||
LISTEN_PID=$!
|
||||
3. Wait for the listener to find a question by polling the file:
|
||||
while [ ! -s /tmp/cw-listen-output.txt ]; do sleep 1; done
|
||||
4. Parse the JSON from /tmp/cw-listen-output.txt to get conversationId
|
||||
5. Answer the conversation: cw answer "The answer is 42" --conversation-id <conversationId>
|
||||
6. Kill the listener: kill $LISTEN_PID 2>/dev/null
|
||||
7. Write .cw/output/signal.json with: {"status":"done"}
|
||||
|
||||
IMPORTANT:
|
||||
- The server is on port ${testPort}
|
||||
- Use the exact answer text: "The answer is 42"
|
||||
- Do NOT ask any questions or produce other output`,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
MAIN CODING TASK — implement a user registration validator:
|
||||
|
||||
console.log(` Agent A spawned: ${agentA.id} (${agentA.name})`);
|
||||
1. Create types.ts:
|
||||
export interface RegistrationInput { name: string; email: string; password: string; }
|
||||
export interface ValidationResult { valid: boolean; errors: string[]; }
|
||||
|
||||
// Small delay to let Agent A start its listener
|
||||
await sleep(5000);
|
||||
2. Create validator.ts:
|
||||
Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult
|
||||
Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars.
|
||||
|
||||
// Spawn Agent B — the questioner
|
||||
// It asks Agent A a question, waits for the answer, writes it to signal.json result
|
||||
const agentB = await agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `You are Agent B in a multi-agent test. Your job:
|
||||
3. Create index.ts that re-exports everything from types.ts and validator.ts.
|
||||
|
||||
1. Read .cw/input/manifest.json to get your agentId
|
||||
2. Ask Agent A a question using this command:
|
||||
ANSWER=$(cw ask "What is the meaning of everything?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
|
||||
3. Write .cw/output/signal.json with: {"status":"done","result":"<the ANSWER you received>"}
|
||||
BETWEEN EACH FILE, check for incoming questions:
|
||||
if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then
|
||||
# parse the JSON, get conversationId and question
|
||||
# answer: cw answer "<answer based on your code>" --conversation-id <id>
|
||||
# clear and restart listener:
|
||||
> /tmp/cw-listen-${agentSuffix}.txt
|
||||
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
|
||||
LISTEN_PID=$!
|
||||
fi
|
||||
|
||||
IMPORTANT:
|
||||
- The server is on port ${testPort}
|
||||
- Agent A's ID is: ${agentA.id}
|
||||
- Write the EXACT answer you received from Agent A into the result field
|
||||
- Do NOT ask any questions or produce other output`,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
});
|
||||
You will receive TWO questions total while you work. Answer them based on the code you are writing.
|
||||
|
||||
console.log(` Agent B spawned: ${agentB.id} (${agentB.name})`);
|
||||
CLEANUP: After all 3 files are written and both questions answered:
|
||||
- kill $LISTEN_PID 2>/dev/null
|
||||
- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"}
|
||||
|
||||
// Wait for both agents to complete
|
||||
const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
|
||||
|
||||
let agentADone = false;
|
||||
let agentBDone = false;
|
||||
let agentBResult: string | null = null;
|
||||
|
||||
while (Date.now() < deadline && (!agentADone || !agentBDone)) {
|
||||
const a = await agentRepository.findById(agentA.id);
|
||||
const b = await agentRepository.findById(agentB.id);
|
||||
|
||||
if (a && a.status !== 'running') {
|
||||
agentADone = true;
|
||||
console.log(` Agent A status: ${a.status}`);
|
||||
}
|
||||
if (b && b.status !== 'running') {
|
||||
agentBDone = true;
|
||||
console.log(` Agent B status: ${b.status}`);
|
||||
if (b.result) {
|
||||
try {
|
||||
const parsed = JSON.parse(b.result);
|
||||
agentBResult = parsed.message ?? parsed.result ?? b.result;
|
||||
} catch {
|
||||
agentBResult = b.result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!agentADone || !agentBDone) {
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify results
|
||||
const finalA = await agentRepository.findById(agentA.id);
|
||||
const finalB = await agentRepository.findById(agentB.id);
|
||||
|
||||
console.log(` Agent A final status: ${finalA?.status}`);
|
||||
console.log(` Agent B final status: ${finalB?.status}`);
|
||||
console.log(` Agent B result: ${finalB?.result}`);
|
||||
|
||||
// Agent A should have completed (idle)
|
||||
expect(finalA?.status).toBe('idle');
|
||||
|
||||
// Agent B should have completed (idle)
|
||||
expect(finalB?.status).toBe('idle');
|
||||
|
||||
// Agent B's result should contain the answer from Agent A
|
||||
expect(finalB?.result).toBeTruthy();
|
||||
expect(finalB!.result).toContain('42');
|
||||
|
||||
// Verify the conversation exists in the database
|
||||
const conversations = await conversationRepository.findPendingForAgent(agentA.id);
|
||||
// Should be empty (all answered)
|
||||
expect(conversations.length).toBe(0);
|
||||
},
|
||||
CONVERSATION_TEST_TIMEOUT,
|
||||
);
|
||||
|
||||
it(
|
||||
'agent manifest.json contains agentId and agentName',
|
||||
async () => {
|
||||
const agent = await agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `Read .cw/input/manifest.json and write its contents to .cw/output/signal.json in this format:
|
||||
{"status":"done","result":"<raw contents of manifest.json>"}
|
||||
Do NOT modify the manifest contents. Just copy them into the result field as a string.`,
|
||||
CRITICAL:
|
||||
- The listener MUST run in the background while you write code.
|
||||
- Check for questions between files, not as blocking waits.
|
||||
- The CW_PORT environment variable is already set to ${mockPort}.`,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
inputContext: {},
|
||||
});
|
||||
|
||||
console.log(` Identity test agent: ${agent.id} (${agent.name})`);
|
||||
console.log(` Agent A: ${agentA.id} (${agentA.name})`);
|
||||
|
||||
// Wait for completion
|
||||
const deadline = Date.now() + CONVERSATION_TEST_TIMEOUT;
|
||||
while (Date.now() < deadline) {
|
||||
const a = await agentRepository.findById(agent.id);
|
||||
if (a && a.status !== 'running') break;
|
||||
await sleep(1000);
|
||||
// Give Agent A time to start its background listener and begin coding
|
||||
await sleep(15000);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Agent B — builds a client module, asks Agent A questions to
|
||||
// learn the validation rules, then uses answers in its code
|
||||
// ---------------------------------------------------------------
|
||||
const agentB = await harness.agentManager.spawn({
|
||||
taskId: null,
|
||||
prompt: `You are Agent B in a multi-agent coordination test.
|
||||
|
||||
Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module.
|
||||
|
||||
YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules:
|
||||
|
||||
1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>.
|
||||
Leave a TODO comment where validation will go.
|
||||
|
||||
2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks:
|
||||
FIELDS=$(cw ask "What are the required fields and their types for registration?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
|
||||
|
||||
3. Ask Agent A about the specific email validation rules:
|
||||
EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
|
||||
|
||||
4. Create validated-client.ts — a COMPLETE implementation using the answers:
|
||||
Import the scaffold, add a validateBeforeSubmit(name, email, password) function
|
||||
that implements the EXACT validation rules Agent A told you about.
|
||||
Include a comment at the top with the rules you received.
|
||||
|
||||
5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"}
|
||||
|
||||
CRITICAL:
|
||||
- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first).
|
||||
- Use the ACTUAL answers from Agent A in your validated-client.ts implementation.
|
||||
- The CW_PORT environment variable is already set to ${mockPort}.`,
|
||||
mode: 'execute',
|
||||
provider: 'claude',
|
||||
inputContext: {},
|
||||
});
|
||||
|
||||
console.log(` Agent B: ${agentB.id} (${agentB.name})`);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Wait for both agents to stop running, then verify conversations
|
||||
// ---------------------------------------------------------------
|
||||
const deadline = Date.now() + TEST_TIMEOUT;
|
||||
let aDone = false;
|
||||
let bDone = false;
|
||||
let lastLogTime = 0;
|
||||
|
||||
while (Date.now() < deadline && (!aDone || !bDone)) {
|
||||
const agentAInfo = await harness.agentRepository.findById(agentA.id);
|
||||
const agentBInfo = await harness.agentRepository.findById(agentB.id);
|
||||
|
||||
// Periodic progress logging every 30s
|
||||
if (Date.now() - lastLogTime > 30000) {
|
||||
const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000);
|
||||
console.log(` [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`);
|
||||
lastLogTime = Date.now();
|
||||
}
|
||||
|
||||
const final = await agentRepository.findById(agent.id);
|
||||
console.log(` Agent status: ${final?.status}`);
|
||||
console.log(` Agent result: ${final?.result}`);
|
||||
if (agentAInfo && agentAInfo.status !== 'running' && !aDone) {
|
||||
aDone = true;
|
||||
console.log(` Agent A final status: ${agentAInfo.status}`);
|
||||
dumpAgentLogs(harness.workspaceRoot, agentA.name);
|
||||
}
|
||||
if (agentBInfo && agentBInfo.status !== 'running' && !bDone) {
|
||||
bDone = true;
|
||||
console.log(` Agent B final status: ${agentBInfo.status}`);
|
||||
dumpAgentLogs(harness.workspaceRoot, agentB.name);
|
||||
}
|
||||
|
||||
expect(final?.status).toBe('idle');
|
||||
expect(final?.result).toBeTruthy();
|
||||
if (!aDone || !bDone) await sleep(2000);
|
||||
}
|
||||
|
||||
// The result should contain the manifest contents which include agentId and agentName
|
||||
const result = final!.result!;
|
||||
expect(result).toContain(agent.id);
|
||||
expect(result).toContain(agent.name);
|
||||
expect(aDone).toBe(true);
|
||||
expect(bDone).toBe(true);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify conversations in mock repo
|
||||
// ---------------------------------------------------------------
|
||||
const allConversations = mockRepo.getAll();
|
||||
console.log(` Total conversations: ${allConversations.length}`);
|
||||
for (const c of allConversations) {
|
||||
console.log(
|
||||
` ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`,
|
||||
);
|
||||
}
|
||||
|
||||
// Exactly 2 conversations, both answered
|
||||
expect(allConversations.length).toBe(2);
|
||||
expect(allConversations.every((c) => c.status === 'answered')).toBe(true);
|
||||
|
||||
// Both target Agent A, both from Agent B
|
||||
expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true);
|
||||
expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true);
|
||||
|
||||
// Questions should be distinct (one about fields, one about email validation)
|
||||
const questions = allConversations.map((c) => c.question);
|
||||
expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true);
|
||||
expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true);
|
||||
|
||||
// Both answers should be non-empty
|
||||
expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true);
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify Agent A's coding output — validator module files exist
|
||||
// ---------------------------------------------------------------
|
||||
const aWorkdir = join(
|
||||
harness.workspaceRoot,
|
||||
'agent-workdirs',
|
||||
agentA.name,
|
||||
'workspace',
|
||||
);
|
||||
const aFiles = ['types.ts', 'validator.ts', 'index.ts'];
|
||||
for (const f of aFiles) {
|
||||
const filePath = join(aWorkdir, f);
|
||||
const exists = existsSync(filePath);
|
||||
console.log(` Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
|
||||
expect(exists).toBe(true);
|
||||
}
|
||||
// validator.ts should contain actual validation logic
|
||||
const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8');
|
||||
console.log(` Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`);
|
||||
expect(validatorContent.toLowerCase()).toContain('email');
|
||||
expect(validatorContent.toLowerCase()).toContain('password');
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify Agent B's coding output — client module files exist
|
||||
// ---------------------------------------------------------------
|
||||
const bWorkdir = join(
|
||||
harness.workspaceRoot,
|
||||
'agent-workdirs',
|
||||
agentB.name,
|
||||
'workspace',
|
||||
);
|
||||
const bFiles = ['client-scaffold.ts', 'validated-client.ts'];
|
||||
for (const f of bFiles) {
|
||||
const filePath = join(bWorkdir, f);
|
||||
const exists = existsSync(filePath);
|
||||
console.log(` Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
|
||||
expect(exists).toBe(true);
|
||||
}
|
||||
// validated-client.ts should reference validation rules from Agent A's answers
|
||||
const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8');
|
||||
console.log(` Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`);
|
||||
expect(clientContent.toLowerCase()).toContain('email');
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Verify interleaving: Agent A's JSONL log has coding tool calls
|
||||
// (Write for .ts files) interleaved with conversation tool calls
|
||||
// (Bash for cw listen/answer)
|
||||
// ---------------------------------------------------------------
|
||||
const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl');
|
||||
const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n');
|
||||
const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = [];
|
||||
|
||||
for (const line of aLog) {
|
||||
try {
|
||||
const ev = JSON.parse(line);
|
||||
if (ev.type !== 'assistant' || !ev.message?.content) continue;
|
||||
for (const block of ev.message.content) {
|
||||
if (block.type !== 'tool_use') continue;
|
||||
const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input);
|
||||
if (block.name === 'Write' && input.includes('.ts')) {
|
||||
toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) });
|
||||
} else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) {
|
||||
toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) });
|
||||
}
|
||||
}
|
||||
} catch { /* skip non-JSON lines */ }
|
||||
}
|
||||
|
||||
console.log(` Agent A interleaving (${toolCalls.length} relevant tool calls):`);
|
||||
for (const tc of toolCalls) {
|
||||
console.log(` [${tc.type}] ${tc.name}: ${tc.detail}`);
|
||||
}
|
||||
|
||||
// Must have both code and conversation tool calls
|
||||
const hasCode = toolCalls.some((tc) => tc.type === 'code');
|
||||
const hasConversation = toolCalls.some((tc) => tc.type === 'conversation');
|
||||
expect(hasCode).toBe(true);
|
||||
expect(hasConversation).toBe(true);
|
||||
|
||||
// Verify interleaving: at least one code call must appear AFTER a conversation call
|
||||
// (proving coding continued after handling a question)
|
||||
const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation');
|
||||
const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code');
|
||||
console.log(` First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`);
|
||||
expect(lastCodeIdx).toBeGreaterThan(firstConvIdx);
|
||||
},
|
||||
CONVERSATION_TEST_TIMEOUT,
|
||||
TEST_TIMEOUT,
|
||||
);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user