Add five new tRPC query procedures powering the Radar page's per-agent behavioral metrics (questions asked, subagent spawns, compaction events, inter-agent messages) plus the batch repository methods they require. Repository changes: - LogChunkRepository: add findByAgentIds() for batch fetching without N+1 - ConversationRepository: add countByFromAgentIds() and findByFromAgentId() - Drizzle adapters: implement all three new methods using inArray() - InMemoryConversationRepository (integration test): implement new methods tRPC procedures added: - agent.listForRadar: filtered agent list with per-agent metrics computed from log chunks (questionsCount, subagentsCount, compactionsCount) and conversation counts (messagesCount); supports timeRange/status/mode/initiative filters - agent.getCompactionEvents: compact system init chunks for one agent (cap 200) - agent.getSubagentSpawns: Agent tool_use entries with prompt preview (cap 200) - agent.getQuestionsAsked: AskUserQuestion tool calls with questions array (cap 200) - conversation.getByFromAgent: conversations by fromAgentId with toAgentName resolved All 13 new unit tests pass; existing test suite unaffected. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
559 lines
22 KiB
TypeScript
559 lines
22 KiB
TypeScript
/**
|
|
* Real Claude Inter-Agent Conversation Integration Tests
|
|
*
|
|
* IMPORTANT: These tests call the REAL Claude CLI and incur API costs!
|
|
* They are SKIPPED by default and should only be run manually for validation.
|
|
*
|
|
* To run:
|
|
* ```bash
|
|
* REAL_CLAUDE_TESTS=1 npm test -- src/test/integration/real-providers/conversation.test.ts --test-timeout=300000
|
|
* ```
|
|
*
|
|
* Architecture:
|
|
* - Mock conversation server (only cw listen/ask/answer endpoints, no full CoordinationServer)
|
|
* - In-memory ConversationRepository (no SQLite, no FK constraints)
|
|
* - Real agent harness for spawning two Claude sessions with actual coding tasks
|
|
* - Two sequential questions prove the listen→answer→re-listen cycle works
|
|
*
|
|
* Estimated cost: ~$0.30 per full run (two Claude sessions)
|
|
*/
|
|
|
|
import { it, expect, beforeAll, afterAll } from 'vitest';
|
|
import { createServer } from 'node:http';
|
|
import type { Server } from 'node:http';
|
|
import { readFileSync, existsSync } from 'node:fs';
|
|
import { join } from 'node:path';
|
|
import { nanoid } from 'nanoid';
|
|
import { fetchRequestHandler } from '@trpc/server/adapters/fetch';
|
|
import { router, publicProcedure } from '../../../trpc/trpc.js';
|
|
import { conversationProcedures } from '../../../trpc/routers/conversation.js';
|
|
import { EventEmitterBus } from '../../../events/bus.js';
|
|
import type { ConversationRepository, CreateConversationData } from '../../../db/repositories/conversation-repository.js';
|
|
import type { Conversation } from '../../../db/schema.js';
|
|
import {
|
|
createRealProviderHarness,
|
|
describeRealClaude,
|
|
sleep,
|
|
type RealProviderHarness,
|
|
} from './harness.js';
|
|
|
|
const TEST_TIMEOUT = 300000; // 5 minutes — agents do real coding + conversation
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// In-memory ConversationRepository — no SQLite, no FK constraints
|
|
// ---------------------------------------------------------------------------
|
|
|
|
class InMemoryConversationRepository implements ConversationRepository {
|
|
private store = new Map<string, Conversation>();
|
|
|
|
async create(data: CreateConversationData): Promise<Conversation> {
|
|
const now = new Date();
|
|
const conversation: Conversation = {
|
|
id: nanoid(),
|
|
fromAgentId: data.fromAgentId,
|
|
toAgentId: data.toAgentId,
|
|
initiativeId: data.initiativeId ?? null,
|
|
phaseId: data.phaseId ?? null,
|
|
taskId: data.taskId ?? null,
|
|
question: data.question,
|
|
answer: null,
|
|
status: 'pending',
|
|
createdAt: now,
|
|
updatedAt: now,
|
|
};
|
|
this.store.set(conversation.id, conversation);
|
|
return conversation;
|
|
}
|
|
|
|
async findById(id: string): Promise<Conversation | null> {
|
|
return this.store.get(id) ?? null;
|
|
}
|
|
|
|
async findPendingForAgent(toAgentId: string): Promise<Conversation[]> {
|
|
return [...this.store.values()]
|
|
.filter((c) => c.toAgentId === toAgentId && c.status === 'pending')
|
|
.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
|
|
}
|
|
|
|
async answer(id: string, answer: string): Promise<Conversation | null> {
|
|
const conv = this.store.get(id);
|
|
if (!conv) return null;
|
|
const updated: Conversation = {
|
|
...conv,
|
|
answer,
|
|
status: 'answered' as const,
|
|
updatedAt: new Date(),
|
|
};
|
|
this.store.set(id, updated);
|
|
return updated;
|
|
}
|
|
|
|
async countByFromAgentIds(agentIds: string[]): Promise<{ agentId: string; count: number }[]> {
|
|
if (agentIds.length === 0) return [];
|
|
const counts = new Map<string, number>();
|
|
for (const conv of this.store.values()) {
|
|
if (agentIds.includes(conv.fromAgentId)) {
|
|
counts.set(conv.fromAgentId, (counts.get(conv.fromAgentId) ?? 0) + 1);
|
|
}
|
|
}
|
|
return [...counts.entries()].map(([agentId, count]) => ({ agentId, count }));
|
|
}
|
|
|
|
async findByFromAgentId(agentId: string): Promise<Conversation[]> {
|
|
return [...this.store.values()]
|
|
.filter((c) => c.fromAgentId === agentId)
|
|
.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime())
|
|
.slice(0, 200);
|
|
}
|
|
|
|
/** Test helper — return all conversations */
|
|
getAll(): Conversation[] {
|
|
return [...this.store.values()];
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Mock conversation server — serves ONLY conversation tRPC procedures
|
|
// ---------------------------------------------------------------------------
|
|
|
|
async function startMockConversationServer(): Promise<{
|
|
server: Server;
|
|
port: number;
|
|
repo: InMemoryConversationRepository;
|
|
}> {
|
|
const repo = new InMemoryConversationRepository();
|
|
const eventBus = new EventEmitterBus();
|
|
|
|
// Mini router with only conversation procedures
|
|
const miniRouter = router({
|
|
...conversationProcedures(publicProcedure),
|
|
});
|
|
|
|
const httpServer = createServer(async (req, res) => {
|
|
if (!req.url?.startsWith('/trpc')) {
|
|
res.writeHead(404);
|
|
res.end('Not found');
|
|
return;
|
|
}
|
|
|
|
const host = req.headers.host ?? 'localhost';
|
|
const url = new URL(req.url, `http://${host}`);
|
|
|
|
let body: string | undefined;
|
|
if (req.method !== 'GET' && req.method !== 'HEAD') {
|
|
body = await new Promise<string>((resolve) => {
|
|
let data = '';
|
|
req.on('data', (chunk: Buffer) => {
|
|
data += chunk.toString();
|
|
});
|
|
req.on('end', () => resolve(data));
|
|
});
|
|
}
|
|
|
|
const headers = new Headers();
|
|
for (const [key, value] of Object.entries(req.headers)) {
|
|
if (value) {
|
|
if (Array.isArray(value)) {
|
|
value.forEach((v) => headers.append(key, v));
|
|
} else {
|
|
headers.set(key, value);
|
|
}
|
|
}
|
|
}
|
|
|
|
const fetchRequest = new Request(url.toString(), {
|
|
method: req.method,
|
|
headers,
|
|
body: body ?? undefined,
|
|
});
|
|
|
|
const fetchResponse = await fetchRequestHandler({
|
|
endpoint: '/trpc',
|
|
req: fetchRequest,
|
|
router: miniRouter,
|
|
createContext: () =>
|
|
({
|
|
eventBus,
|
|
serverStartedAt: new Date(),
|
|
processCount: 0,
|
|
conversationRepository: repo,
|
|
// Stub — requireAgentManager is called unconditionally in createConversation,
|
|
// but list() is only invoked for taskId/phaseId resolution. With --agent-id
|
|
// targeting, list() is never called.
|
|
agentManager: { list: async () => [] },
|
|
}) as any,
|
|
});
|
|
|
|
res.statusCode = fetchResponse.status;
|
|
fetchResponse.headers.forEach((value, key) => {
|
|
res.setHeader(key, value);
|
|
});
|
|
|
|
if (fetchResponse.body) {
|
|
const reader = fetchResponse.body.getReader();
|
|
const pump = async () => {
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) {
|
|
res.end();
|
|
return;
|
|
}
|
|
res.write(value);
|
|
}
|
|
};
|
|
pump().catch(() => res.end());
|
|
} else {
|
|
res.end(await fetchResponse.text());
|
|
}
|
|
});
|
|
|
|
const port = 40000 + Math.floor(Math.random() * 10000);
|
|
await new Promise<void>((resolve) => {
|
|
httpServer.listen(port, '127.0.0.1', () => resolve());
|
|
});
|
|
|
|
return { server: httpServer, port, repo };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Diagnostic helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function dumpAgentLogs(workspaceRoot: string, agentName: string) {
|
|
const logDir = join(workspaceRoot, '.cw', 'agent-logs', agentName);
|
|
if (!existsSync(logDir)) {
|
|
console.log(` [${agentName}] No log directory at ${logDir}`);
|
|
return;
|
|
}
|
|
// Dump output.jsonl (last 30 lines)
|
|
const outputPath = join(logDir, 'output.jsonl');
|
|
if (existsSync(outputPath)) {
|
|
const lines = readFileSync(outputPath, 'utf-8').trim().split('\n');
|
|
const last = lines.slice(-30);
|
|
console.log(` [${agentName}] output.jsonl (last ${last.length}/${lines.length} lines):`);
|
|
for (const line of last) {
|
|
try {
|
|
const ev = JSON.parse(line);
|
|
if (ev.type === 'assistant' && ev.message?.content) {
|
|
for (const block of ev.message.content) {
|
|
if (block.type === 'text') {
|
|
console.log(` TEXT: ${block.text.substring(0, 200)}`);
|
|
} else if (block.type === 'tool_use') {
|
|
console.log(` TOOL: ${block.name} ${JSON.stringify(block.input).substring(0, 150)}`);
|
|
}
|
|
}
|
|
} else if (ev.type === 'result') {
|
|
console.log(` RESULT: ${JSON.stringify(ev).substring(0, 300)}`);
|
|
}
|
|
} catch {
|
|
console.log(` RAW: ${line.substring(0, 200)}`);
|
|
}
|
|
}
|
|
}
|
|
// Dump stderr
|
|
const stderrPath = join(logDir, 'stderr.log');
|
|
if (existsSync(stderrPath)) {
|
|
const stderr = readFileSync(stderrPath, 'utf-8').trim();
|
|
if (stderr) {
|
|
console.log(` [${agentName}] stderr: ${stderr.substring(0, 500)}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Test suite
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describeRealClaude('Real Inter-Agent Conversation (mock server)', () => {
|
|
let harness: RealProviderHarness;
|
|
let mockServer: Server;
|
|
let mockPort: number;
|
|
let mockRepo: InMemoryConversationRepository;
|
|
const originalCwPort = process.env.CW_PORT;
|
|
|
|
beforeAll(async () => {
|
|
console.log('\n=== Real Inter-Agent Conversation Test ===');
|
|
console.log('Mock conversation server + two Claude sessions.\n');
|
|
|
|
// Start mock conversation server (only listen/ask/answer endpoints)
|
|
const mock = await startMockConversationServer();
|
|
mockServer = mock.server;
|
|
mockPort = mock.port;
|
|
mockRepo = mock.repo;
|
|
console.log(` Mock server on port ${mockPort}`);
|
|
|
|
// Set CW_PORT so agents' cw commands hit the mock server
|
|
process.env.CW_PORT = String(mockPort);
|
|
|
|
// Real agent harness for spawning + worktrees (no full CoordinationServer)
|
|
harness = await createRealProviderHarness({ provider: 'claude' });
|
|
console.log(` Workspace: ${harness.workspaceRoot}`);
|
|
});
|
|
|
|
afterAll(async () => {
|
|
if (originalCwPort) {
|
|
process.env.CW_PORT = originalCwPort;
|
|
} else {
|
|
delete process.env.CW_PORT;
|
|
}
|
|
await harness?.cleanup();
|
|
mockServer?.close();
|
|
});
|
|
|
|
it(
|
|
'two agents with real tasks communicate via cw ask/listen/answer (two questions prove re-listen)',
|
|
async () => {
|
|
const agentSuffix = nanoid(6); // unique suffix for temp files
|
|
|
|
// ---------------------------------------------------------------
|
|
// Agent A — builds a validator module WHILE answering questions
|
|
// in the background via cw listen
|
|
// ---------------------------------------------------------------
|
|
const agentA = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: `You are Agent A in a multi-agent coordination test.
|
|
|
|
You have TWO concurrent responsibilities:
|
|
1. Build a TypeScript validator module (your main coding task)
|
|
2. Answer questions from other agents via a background listener
|
|
|
|
SETUP (do this first):
|
|
- Read .cw/input/manifest.json to get your agentId
|
|
- Start a background listener that writes to a temp file:
|
|
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
|
|
LISTEN_PID=$!
|
|
|
|
MAIN CODING TASK — implement a user registration validator:
|
|
|
|
1. Create types.ts:
|
|
export interface RegistrationInput { name: string; email: string; password: string; }
|
|
export interface ValidationResult { valid: boolean; errors: string[]; }
|
|
|
|
2. Create validator.ts:
|
|
Import from types.ts. Export function validateRegistration(input: RegistrationInput): ValidationResult
|
|
Rules: name min 2 chars, email must have exactly one @ and domain with a dot and no spaces and max 254 chars, password min 8 chars.
|
|
|
|
3. Create index.ts that re-exports everything from types.ts and validator.ts.
|
|
|
|
BETWEEN EACH FILE, check for incoming questions:
|
|
if [ -s /tmp/cw-listen-${agentSuffix}.txt ]; then
|
|
# parse the JSON, get conversationId and question
|
|
# answer: cw answer "<answer based on your code>" --conversation-id <id>
|
|
# clear and restart listener:
|
|
> /tmp/cw-listen-${agentSuffix}.txt
|
|
cw listen --agent-id <YOUR_AGENT_ID> --timeout 120000 > /tmp/cw-listen-${agentSuffix}.txt 2>&1 &
|
|
LISTEN_PID=$!
|
|
fi
|
|
|
|
You will receive TWO questions total while you work. Answer them based on the code you are writing.
|
|
|
|
CLEANUP: After all 3 files are written and both questions answered:
|
|
- kill $LISTEN_PID 2>/dev/null
|
|
- Write .cw/output/signal.json: {"status":"done","result":"validator module complete, answered 2 questions"}
|
|
|
|
CRITICAL:
|
|
- The listener MUST run in the background while you write code.
|
|
- Check for questions between files, not as blocking waits.
|
|
- The CW_PORT environment variable is already set to ${mockPort}.`,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
inputContext: {},
|
|
});
|
|
|
|
console.log(` Agent A: ${agentA.id} (${agentA.name})`);
|
|
|
|
// Give Agent A time to start its background listener and begin coding
|
|
await sleep(15000);
|
|
|
|
// ---------------------------------------------------------------
|
|
// Agent B — builds a client module, asks Agent A questions to
|
|
// learn the validation rules, then uses answers in its code
|
|
// ---------------------------------------------------------------
|
|
const agentB = await harness.agentManager.spawn({
|
|
taskId: null,
|
|
prompt: `You are Agent B in a multi-agent coordination test.
|
|
|
|
Read .cw/input/manifest.json to get your agentId. Agent A (ID: ${agentA.id}) is building a validator module.
|
|
|
|
YOUR CODING TASK — build a registration API client that includes client-side validation matching Agent A's server-side rules:
|
|
|
|
1. Create client-scaffold.ts with a basic RegistrationClient class that has a register(name, email, password) method that returns Promise<{ok: boolean}>.
|
|
Leave a TODO comment where validation will go.
|
|
|
|
2. NOW ask Agent A what the validation rules are — you need this to write proper client-side checks:
|
|
FIELDS=$(cw ask "What are the required fields and their types for registration?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
|
|
|
|
3. Ask Agent A about the specific email validation rules:
|
|
EMAIL_RULES=$(cw ask "What are the exact email validation rules you implemented?" --from <YOUR_AGENT_ID> --agent-id ${agentA.id} --timeout 120000)
|
|
|
|
4. Create validated-client.ts — a COMPLETE implementation using the answers:
|
|
Import the scaffold, add a validateBeforeSubmit(name, email, password) function
|
|
that implements the EXACT validation rules Agent A told you about.
|
|
Include a comment at the top with the rules you received.
|
|
|
|
5. Write .cw/output/signal.json: {"status":"done","result":"client module complete with validation from agent A"}
|
|
|
|
CRITICAL:
|
|
- Create client-scaffold.ts BEFORE asking questions (you have independent work to do first).
|
|
- Use the ACTUAL answers from Agent A in your validated-client.ts implementation.
|
|
- The CW_PORT environment variable is already set to ${mockPort}.`,
|
|
mode: 'execute',
|
|
provider: 'claude',
|
|
inputContext: {},
|
|
});
|
|
|
|
console.log(` Agent B: ${agentB.id} (${agentB.name})`);
|
|
|
|
// ---------------------------------------------------------------
|
|
// Wait for both agents to stop running, then verify conversations
|
|
// ---------------------------------------------------------------
|
|
const deadline = Date.now() + TEST_TIMEOUT;
|
|
let aDone = false;
|
|
let bDone = false;
|
|
let lastLogTime = 0;
|
|
|
|
while (Date.now() < deadline && (!aDone || !bDone)) {
|
|
const agentAInfo = await harness.agentRepository.findById(agentA.id);
|
|
const agentBInfo = await harness.agentRepository.findById(agentB.id);
|
|
|
|
// Periodic progress logging every 30s
|
|
if (Date.now() - lastLogTime > 30000) {
|
|
const elapsed = Math.round((Date.now() - (deadline - TEST_TIMEOUT)) / 1000);
|
|
console.log(` [${elapsed}s] A=${agentAInfo?.status ?? '?'} B=${agentBInfo?.status ?? '?'} convs=${mockRepo.getAll().length}`);
|
|
lastLogTime = Date.now();
|
|
}
|
|
|
|
if (agentAInfo && agentAInfo.status !== 'running' && !aDone) {
|
|
aDone = true;
|
|
console.log(` Agent A final status: ${agentAInfo.status}`);
|
|
dumpAgentLogs(harness.workspaceRoot, agentA.name);
|
|
}
|
|
if (agentBInfo && agentBInfo.status !== 'running' && !bDone) {
|
|
bDone = true;
|
|
console.log(` Agent B final status: ${agentBInfo.status}`);
|
|
dumpAgentLogs(harness.workspaceRoot, agentB.name);
|
|
}
|
|
|
|
if (!aDone || !bDone) await sleep(2000);
|
|
}
|
|
|
|
expect(aDone).toBe(true);
|
|
expect(bDone).toBe(true);
|
|
|
|
// ---------------------------------------------------------------
|
|
// Verify conversations in mock repo
|
|
// ---------------------------------------------------------------
|
|
const allConversations = mockRepo.getAll();
|
|
console.log(` Total conversations: ${allConversations.length}`);
|
|
for (const c of allConversations) {
|
|
console.log(
|
|
` ${c.id}: ${c.status} — Q: "${c.question}" A: "${c.answer?.substring(0, 80)}..."`,
|
|
);
|
|
}
|
|
|
|
// Exactly 2 conversations, both answered
|
|
expect(allConversations.length).toBe(2);
|
|
expect(allConversations.every((c) => c.status === 'answered')).toBe(true);
|
|
|
|
// Both target Agent A, both from Agent B
|
|
expect(allConversations.every((c) => c.toAgentId === agentA.id)).toBe(true);
|
|
expect(allConversations.every((c) => c.fromAgentId === agentB.id)).toBe(true);
|
|
|
|
// Questions should be distinct (one about fields, one about email validation)
|
|
const questions = allConversations.map((c) => c.question);
|
|
expect(questions.some((q) => q.toLowerCase().includes('field'))).toBe(true);
|
|
expect(questions.some((q) => q.toLowerCase().includes('email'))).toBe(true);
|
|
|
|
// Both answers should be non-empty
|
|
expect(allConversations.every((c) => c.answer && c.answer.length > 0)).toBe(true);
|
|
|
|
// ---------------------------------------------------------------
|
|
// Verify Agent A's coding output — validator module files exist
|
|
// ---------------------------------------------------------------
|
|
const aWorkdir = join(
|
|
harness.workspaceRoot,
|
|
'agent-workdirs',
|
|
agentA.name,
|
|
'workspace',
|
|
);
|
|
const aFiles = ['types.ts', 'validator.ts', 'index.ts'];
|
|
for (const f of aFiles) {
|
|
const filePath = join(aWorkdir, f);
|
|
const exists = existsSync(filePath);
|
|
console.log(` Agent A file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
|
|
expect(exists).toBe(true);
|
|
}
|
|
// validator.ts should contain actual validation logic
|
|
const validatorContent = readFileSync(join(aWorkdir, 'validator.ts'), 'utf-8');
|
|
console.log(` Agent A validator.ts (${validatorContent.length} chars): ${validatorContent.substring(0, 120)}...`);
|
|
expect(validatorContent.toLowerCase()).toContain('email');
|
|
expect(validatorContent.toLowerCase()).toContain('password');
|
|
|
|
// ---------------------------------------------------------------
|
|
// Verify Agent B's coding output — client module files exist
|
|
// ---------------------------------------------------------------
|
|
const bWorkdir = join(
|
|
harness.workspaceRoot,
|
|
'agent-workdirs',
|
|
agentB.name,
|
|
'workspace',
|
|
);
|
|
const bFiles = ['client-scaffold.ts', 'validated-client.ts'];
|
|
for (const f of bFiles) {
|
|
const filePath = join(bWorkdir, f);
|
|
const exists = existsSync(filePath);
|
|
console.log(` Agent B file ${f}: ${exists ? 'EXISTS' : 'MISSING'}`);
|
|
expect(exists).toBe(true);
|
|
}
|
|
// validated-client.ts should reference validation rules from Agent A's answers
|
|
const clientContent = readFileSync(join(bWorkdir, 'validated-client.ts'), 'utf-8');
|
|
console.log(` Agent B validated-client.ts (${clientContent.length} chars): ${clientContent.substring(0, 120)}...`);
|
|
expect(clientContent.toLowerCase()).toContain('email');
|
|
|
|
// ---------------------------------------------------------------
|
|
// Verify interleaving: Agent A's JSONL log has coding tool calls
|
|
// (Write for .ts files) interleaved with conversation tool calls
|
|
// (Bash for cw listen/answer)
|
|
// ---------------------------------------------------------------
|
|
const aLogPath = join(harness.workspaceRoot, '.cw', 'agent-logs', agentA.name, 'output.jsonl');
|
|
const aLog = readFileSync(aLogPath, 'utf-8').trim().split('\n');
|
|
const toolCalls: { type: 'code' | 'conversation'; name: string; detail: string }[] = [];
|
|
|
|
for (const line of aLog) {
|
|
try {
|
|
const ev = JSON.parse(line);
|
|
if (ev.type !== 'assistant' || !ev.message?.content) continue;
|
|
for (const block of ev.message.content) {
|
|
if (block.type !== 'tool_use') continue;
|
|
const input = typeof block.input === 'string' ? block.input : JSON.stringify(block.input);
|
|
if (block.name === 'Write' && input.includes('.ts')) {
|
|
toolCalls.push({ type: 'code', name: 'Write', detail: input.substring(0, 80) });
|
|
} else if (block.name === 'Bash' && (input.includes('cw listen') || input.includes('cw answer'))) {
|
|
toolCalls.push({ type: 'conversation', name: 'Bash', detail: input.substring(0, 80) });
|
|
}
|
|
}
|
|
} catch { /* skip non-JSON lines */ }
|
|
}
|
|
|
|
console.log(` Agent A interleaving (${toolCalls.length} relevant tool calls):`);
|
|
for (const tc of toolCalls) {
|
|
console.log(` [${tc.type}] ${tc.name}: ${tc.detail}`);
|
|
}
|
|
|
|
// Must have both code and conversation tool calls
|
|
const hasCode = toolCalls.some((tc) => tc.type === 'code');
|
|
const hasConversation = toolCalls.some((tc) => tc.type === 'conversation');
|
|
expect(hasCode).toBe(true);
|
|
expect(hasConversation).toBe(true);
|
|
|
|
// Verify interleaving: at least one code call must appear AFTER a conversation call
|
|
// (proving coding continued after handling a question)
|
|
const firstConvIdx = toolCalls.findIndex((tc) => tc.type === 'conversation');
|
|
const lastCodeIdx = toolCalls.length - 1 - [...toolCalls].reverse().findIndex((tc) => tc.type === 'code');
|
|
console.log(` First conversation at index ${firstConvIdx}, last code at index ${lastCodeIdx}`);
|
|
expect(lastCodeIdx).toBeGreaterThan(firstConvIdx);
|
|
},
|
|
TEST_TIMEOUT,
|
|
);
|
|
});
|