Files
Codewalkers/apps/server/test/integration/real-providers/conversation.test.ts
Lukas May 34578d39c6 refactor: Restructure monorepo to apps/server/ and apps/web/ layout
Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt
standard monorepo conventions (apps/ for runnable apps, packages/
for reusable libraries). Update all config files, shared package
imports, test fixtures, and documentation to reflect new paths.

Key fixes:
- Update workspace config to ["apps/*", "packages/*"]
- Update tsconfig.json rootDir/include for apps/server/
- Add apps/web/** to vitest exclude list
- Update drizzle.config.ts schema path
- Fix ensure-schema.ts migration path detection (3 levels up in dev,
  2 levels up in dist)
- Fix tests/integration/cli-server.test.ts import paths
- Update packages/shared imports to apps/server/ paths
- Update all docs/ files with new paths
2026-03-03 11:22:53 +01:00

541 lines
21 KiB
TypeScript

/**
* Real Claude Inter-Agent Conversation Integration Tests
*
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
* They are SKIPPED by default and should only be run manually for validation.
*
* To run:
* ```bash
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
* ```
*
* Architecture:
* - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer)
* - In-memory ConversationRepository (no SQLite, no FK constraints)
* - Real agent harness for spawning two Claude sessions with actual coding tasks
* - Two sequential questions prove the listen→answer→re-listen cycle works
*
* Estimated cost: ~$0.30 per full run (two Claude sessions)
*/
import { it, expect, beforeAll, afterAll } from 'vitest';
import { createServer } from 'node:http';
import type { Server } from 'node:http';
import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import { nanoid } from 'nanoid';
import { fetchRequestHandler } from '@trpc/server/adapters/fetch';
import { router, publicProcedure } from '../../../trpc/trpc.js';
import { conversationProcedures } from '../../../trpc/routers/conversation.js';
import { EventEmitterBus } from '../../../events/bus.js';
import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js';
import type { Conversation } from '../../../db/schema.js';
import {
createRealProviderHarness,
describeRealClaude,
sleep,
type RealProviderHarness,
} from './harness.js';
const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation
// ---------------------------------------------------------------------------
// In-memory ConversationRepository — no SQLite, no FK constraints
// ---------------------------------------------------------------------------
class InMemoryConversationRepository implements ConversationRepository {
private store = new Map<string, Conversation>();
async create(data: CreateConversationData): Promise<Conversation> {
const now = new Date();
const conversation: Conversation = {
id: nanoid(),
fromAgentId: data.fromAgentId,
toAgentId: data.toAgentId,
initiativeId: data.initiativeId ?? null,
phaseId: data.phaseId ?? null,
taskId: data.taskId ?? null,
question: data.question,
answer: null,
status: 'pending',
createdAt: now,
updatedAt: now,
};
this.store.set(conversation.id, conversation);
return conversation;
}
async findById(id: string): Promise<Conversation | null> {
return this.store.get(id) ?? null;
}
async findPendingForAgent(toAgentId: string): Promise<Conversation[]> {
return [...this.store.values()]
.filter((c) => c.toAgentId === toAgentId && c.status === 'pending')
.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
}
async answer(id: string, answer: string): Promise<Conversation | null> {
const conv = this.store.get(id);
if (!conv) return null;
const updated: Conversation = {
...conv,
answer,
status: 'answered' as const,
updatedAt: new Date(),
};
this.store.set(id, updated);
return updated;
}
/** Test helper — return all conversations */
getAll(): Conversation[] {
return [...this.store.values()];
}
}
// ---------------------------------------------------------------------------
// Mock conversation server — serves ONLY conversation tRPC procedures
// ---------------------------------------------------------------------------
async function startMockConversationServer(): Promise<{
server: Server;
port: number;
repo: InMemoryConversationRepository;
}> {
const repo = new InMemoryConversationRepository();
const eventBus = new EventEmitterBus();
// Mini router with only conversation procedures
const miniRouter = router({
...conversationProcedures(publicProcedure),
});
const httpServer = createServer(async (req, res) => {
if (!req.url?.startsWith('/trpc')) {
res.writeHead(404);
res.end('Not found');
return;
}
const host = req.headers.host ?? 'localhost';
const url = new URL(req.url, `http://${host}`);
let body: string | undefined;
if (req.method !== 'GET' && req.method !== 'HEAD') {
body = await new Promise<string>((resolve) => {
let data = '';
req.on('data', (chunk: Buffer) => {
data += chunk.toString();
});
req.on('end', () => resolve(data));
});
}
const headers = new Headers();
for (const [key, value] of Object.entries(req.headers)) {
if (value) {
if (Array.isArray(value)) {
value.forEach((v) => headers.append(key, v));
} else {
headers.set(key, value);
}
}
}
const fetchRequest = new Request(url.toString(), {
method: req.method,
headers,
body: body ?? undefined,
});
const fetchResponse = await fetchRequestHandler({
endpoint: '/trpc',
req: fetchRequest,
router: miniRouter,
createContext: () =>
({
eventBus,
serverStartedAt: new Date(),
processCount: 0,
conversationRepository: repo,
// Stub — requireAgentManager is called unconditionally in createConversation,
// but list() is only invoked for taskId/phaseId resolution. With --agent-id
// targeting, list() is never called.
agentManager: { list: async () => [] },
}) as any,
});
res.statusCode = fetchResponse.status;
fetchResponse.headers.forEach((value, key) => {
res.setHeader(key, value);
});
if (fetchResponse.body) {
const reader = fetchResponse.body.getReader();
const pump = async () => {
while (true) {
const { done, value } = await reader.read();
if (done) {
res.end();
return;
}
res.write(value);
}
};
pump().catch(() => res.end());
} else {
res.end(await fetchResponse.text());
}
});
const port = 40000 + Math.floor(Math.random() * 10000);
await new Promise<void>((resolve) => {
httpServer.listen(port, '127.0.0.1', () => resolve());
});
return { server: httpServer, port, repo };
}
// ---------------------------------------------------------------------------
// Diagnostic helpers
// ---------------------------------------------------------------------------
function dumpAgentLogs(workspaceRoot: string, agentName: string) {
const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName);
if (!existsSync(logDir)) {
console.log(` [${agentName}] No log directory at ${logDir}`);
return;
}
// Dump output.jsonl (last 30 lines)
const outputPath = join(logDir, 'output.jsonl');
if (existsSync(outputPath)) {
const lines = readFileSync(outputPath, 'utf-8').trim().split('\n');
const last = lines.slice(-30);
console.log(` [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`);
for (const line of last) {
try {
const ev = JSON.parse(line);
if (ev.type === 'assistant' && ev.message?.content) {
for (const block of ev.message.content) {
if (block.type === 'text') {
console.log(` TEXT: ${block.text.substring(0, 200)}`);
} else if (block.type === 'tool_use') {
console.log(` TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`);
}
}
} else if (ev.type === 'result') {
console.log(` RESULT: ${JSON.stringify(ev).substring(0, 300)}`);
}
} catch {
console.log(` RAW: ${line.substring(0, 200)}`);
}
}
}
// Dump stderr
const stderrPath = join(logDir, 'stderr.log');
if (existsSync(stderrPath)) {
const stderr = readFileSync(stderrPath, 'utf-8').trim();
if (stderr) {
console.log(` [${agentName}] stderr: ${stderr.substring(0, 500)}`);
}
}
}
// ---------------------------------------------------------------------------
// Test suite
// ---------------------------------------------------------------------------
describeRealClaude('Real Inter-Agent Conversation (mock server)', () => {
let harness: RealProviderHarness;
let mockServer: Server;
let mockPort: number;
let mockRepo: InMemoryConversationRepository;
const originalCwPort = process.env.CW_PORT;
beforeAll(async () => {
console.log('\n=== Real Inter-Agent Conversation Test ===');
console.log('Mock conversation server + two Claude sessions.\n');
// Start mock conversation server (only listen/ask/answer endpoints)
const mock = await startMockConversationServer();
mockServer = mock.server;
mockPort = mock.port;
mockRepo = mock.repo;
console.log(` Mock server on port ${mockPort}`);
// Set CW_PORT so agents' cw commands hit the mock server
process.env.CW_PORT = String(mockPort);
// Real agent harness for spawning + worktrees (no full CoordinationServer)
harness = await createRealProviderHarness({ provider: 'claude' });
console.log(` Workspace: ${harness.workspaceRoot}`);
});
afterAll(async () => {
if (originalCwPort) {
process.env.CW_PORT = originalCwPort;
} else {
delete process.env.CW_PORT;
}
await harness?.cleanup();
mockServer?.close();
});
it(
'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)',
async () => {
const agentSuffix = nanoid(6); // unique suffix for temp files
// ---------------------------------------------------------------
// Agent A — builds a validator module WHILE answering questions
// in the background via cw listen
// ---------------------------------------------------------------
const agentA = await harness.agentManager.spawn({
taskId: null,
prompt: `You are Agent A in a multi-agent coordination test.
You have TWO concurrent responsibilities:
1. Build a TypeScript validator module (your main coding task)
2. Answer questions from other agents via a background listener
SETUP (do this first):
- Read .cw/input/manifest.json to get your agentId
- Start a background listener that writes to a temp file:
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
LISTEN_PID=$!
MAIN CODING TASK — implement a user registration validator:
1. Create types.ts:
export interface RegistrationInput { name: string; email: string; password: string; }
export interface ValidationResult { valid: boolean; errors: string[]; }
2. Create validator.ts:
Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult
Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars.
3. Create index.ts that re-exports everything from types.ts and validator.ts.
BETWEEN EACH FILE, check for incoming questions:
if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then
# parse the JSON, get conversationId and question
# answer: cw answer "<answer based on your code>" --conversation-id <id>
# clear and restart listener:
> /tmp/cw-listen-${agentSuffix}.txt
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
LISTEN_PID=$!
fi
You will receive TWO questions total while you work. Answer them based on the code you are writing.
CLEANUP: After all 3 files are written and both questions answered:
- kill $LISTEN_PID 2>/dev/null
- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"}
CRITICAL:
- The listener MUST run in the background while you write code.
- Check for questions between files, not as blocking waits.
- The CW_PORT environment variable is already set to ${mockPort}.`,
mode: 'execute',
provider: 'claude',
inputContext: {},
});
console.log(` Agent A: ${agentA.id} (${agentA.name})`);
// Give Agent A time to start its background listener and begin coding
await sleep(15000);
// ---------------------------------------------------------------
// Agent B — builds a client module, asks Agent A questions to
// learn the validation rules, then uses answers in its code
// ---------------------------------------------------------------
const agentB = await harness.agentManager.spawn({
taskId: null,
prompt: `You are Agent B in a multi-agent coordination test.
Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module.
YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules:
1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>.
Leave a TODO comment where validation will go.
2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks:
FIELDS=$(cw ask "What are the required fields and their types for registration?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
3. Ask Agent A about the specific email validation rules:
EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
4. Create validated-client.ts — a COMPLETE implementation using the answers:
Import the scaffold, add a validateBeforeSubmit(name, email, password) function
that implements the EXACT validation rules Agent A told you about.
Include a comment at the top with the rules you received.
5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"}
CRITICAL:
- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first).
- Use the ACTUAL answers from Agent A in your validated-client.ts implementation.
- The CW_PORT environment variable is already set to ${mockPort}.`,
mode: 'execute',
provider: 'claude',
inputContext: {},
});
console.log(` Agent B: ${agentB.id} (${agentB.name})`);
// ---------------------------------------------------------------
// Wait for both agents to stop running, then verify conversations
// ---------------------------------------------------------------
const deadline = Date.now() + TEST_TIMEOUT;
let aDone = false;
let bDone = false;
let lastLogTime = 0;
while (Date.now() < deadline && (!aDone || !bDone)) {
const agentAInfo = await harness.agentRepository.findById(agentA.id);
const agentBInfo = await harness.agentRepository.findById(agentB.id);
// Periodic progress logging every 30s
if (Date.now() - lastLogTime > 30000) {
const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000);
console.log(` [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`);
lastLogTime = Date.now();
}
if (agentAInfo && agentAInfo.status !== 'running' && !aDone) {
aDone = true;
console.log(` Agent A final status: ${agentAInfo.status}`);
dumpAgentLogs(harness.workspaceRoot, agentA.name);
}
if (agentBInfo && agentBInfo.status !== 'running' && !bDone) {
bDone = true;
console.log(` Agent B final status: ${agentBInfo.status}`);
dumpAgentLogs(harness.workspaceRoot, agentB.name);
}
if (!aDone || !bDone) await sleep(2000);
}
expect(aDone).toBe(true);
expect(bDone).toBe(true);
// ---------------------------------------------------------------
// Verify conversations in mock repo
// ---------------------------------------------------------------
const allConversations = mockRepo.getAll();
console.log(` Total conversations: ${allConversations.length}`);
for (const c of allConversations) {
console.log(
` ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`,
);
}
// Exactly 2 conversations, both answered
expect(allConversations.length).toBe(2);
expect(allConversations.every((c) => c.status === 'answered')).toBe(true);
// Both target Agent A, both from Agent B
expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true);
expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true);
// Questions should be distinct (one about fields, one about email validation)
const questions = allConversations.map((c) => c.question);
expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true);
expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true);
// Both answers should be non-empty
expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true);
// ---------------------------------------------------------------
// Verify Agent A's coding output — validator module files exist
// ---------------------------------------------------------------
const aWorkdir = join(
harness.workspaceRoot,
'agent-workdirs',
agentA.name,
'workspace',
);
const aFiles = ['types.ts', 'validator.ts', 'index.ts'];
for (const f of aFiles) {
const filePath = join(aWorkdir, f);
const exists = existsSync(filePath);
console.log(` Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
expect(exists).toBe(true);
}
// validator.ts should contain actual validation logic
const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8');
console.log(` Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`);
expect(validatorContent.toLowerCase()).toContain('email');
expect(validatorContent.toLowerCase()).toContain('password');
// ---------------------------------------------------------------
// Verify Agent B's coding output — client module files exist
// ---------------------------------------------------------------
const bWorkdir = join(
harness.workspaceRoot,
'agent-workdirs',
agentB.name,
'workspace',
);
const bFiles = ['client-scaffold.ts', 'validated-client.ts'];
for (const f of bFiles) {
const filePath = join(bWorkdir, f);
const exists = existsSync(filePath);
console.log(` Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
expect(exists).toBe(true);
}
// validated-client.ts should reference validation rules from Agent A's answers
const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8');
console.log(` Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`);
expect(clientContent.toLowerCase()).toContain('email');
// ---------------------------------------------------------------
// Verify interleaving: Agent A's JSONL log has coding tool calls
// (Write for .ts files) interleaved with conversation tool calls
// (Bash for cw listen/answer)
// ---------------------------------------------------------------
const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl');
const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n');
const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = [];
for (const line of aLog) {
try {
const ev = JSON.parse(line);
if (ev.type !== 'assistant' || !ev.message?.content) continue;
for (const block of ev.message.content) {
if (block.type !== 'tool_use') continue;
const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input);
if (block.name === 'Write' && input.includes('.ts')) {
toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) });
} else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) {
toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) });
}
}
} catch { /* skip non-JSON lines */ }
}
console.log(` Agent A interleaving (${toolCalls.length} relevant tool calls):`);
for (const tc of toolCalls) {
console.log(` [${tc.type}] ${tc.name}: ${tc.detail}`);
}
// Must have both code and conversation tool calls
const hasCode = toolCalls.some((tc) => tc.type === 'code');
const hasConversation = toolCalls.some((tc) => tc.type === 'conversation');
expect(hasCode).toBe(true);
expect(hasConversation).toBe(true);
// Verify interleaving: at least one code call must appear AFTER a conversation call
// (proving coding continued after handling a question)
const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation');
const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code');
console.log(` First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`);
expect(lastCodeIdx).toBeGreaterThan(firstConvIdx);
},
TEST_TIMEOUT,
);
});