/** * Agent Working Directory Verification Tests * * Tests that verify agents actually run in their intended working directories. * These tests use simple shell commands to prove the agent execution location. * * IMPORTANT: These tests spawn real CLI processes and may incur API costs. * They are SKIPPED by default to prevent accidental charges. * * To run these tests: * ```bash * REAL_WORKDIR_TESTS=1 npm test -- src/test/integration/agent-workdir-verification.test.ts --test-timeout=120000 * ``` */ import { describe, it, expect, beforeAll, afterAll } from 'vitest'; import { mkdtemp, rm, readFile } from 'node:fs/promises'; import { existsSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { MultiProviderAgentManager } from '../../agent/manager.js'; import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js'; import { DrizzleAgentRepository, DrizzleProjectRepository, DrizzleAccountRepository, DrizzleInitiativeRepository, } from '../../db/repositories/drizzle/index.js'; import { EventEmitterBus } from '../../events/bus.js'; const SHOULD_SKIP = !process.env.REAL_WORKDIR_TESTS; const TEST_TIMEOUT = 60000; describe.skipIf(SHOULD_SKIP)('Agent Working Directory Verification', () => { let tempDir: string; let agentManager: MultiProviderAgentManager; let agentRepository: DrizzleAgentRepository; beforeAll(async () => { if (SHOULD_SKIP) return; console.log('\n=== Running Agent Working Directory Tests ==='); console.log('These tests verify agents run in correct working directories.\n'); // Create temp directory for test workspace tempDir = await mkdtemp(join(tmpdir(), 'cw-workdir-test-')); // Set up test database and repositories const db = await createTestDatabase(); const eventBus = new EventEmitterBus(); agentRepository = new DrizzleAgentRepository(db); const projectRepository = new DrizzleProjectRepository(db); const accountRepository = new DrizzleAccountRepository(db); agentManager = new MultiProviderAgentManager( agentRepository, tempDir, projectRepository, accountRepository, eventBus, ); }); afterAll(async () => { if (SHOULD_SKIP || !tempDir) return; try { await rm(tempDir, { recursive: true }); } catch (err) { console.warn('Failed to cleanup temp directory:', err); } }); it('spawns agent in correct standalone working directory', async () => { const prompt = ` Write your current working directory to a file called 'verify-pwd.txt'. Use this exact bash command: pwd > verify-pwd.txt Then output the signal: {"done": true} `.trim(); // Spawn standalone agent const agent = await agentManager.spawn({ taskId: null, prompt, mode: 'execute', provider: 'claude', }); expect(agent.id).toBeTruthy(); expect(agent.status).toBe('running'); // Wait for completion (poll agent status) let attempts = 0; const maxAttempts = 60; // 60 seconds timeout while (attempts < maxAttempts) { await new Promise(resolve => setTimeout(resolve, 1000)); attempts++; const currentAgent = await agentRepository.findById(agent.id); if (!currentAgent || currentAgent.status !== 'running') { break; } } // Verify final agent state const completedAgent = await agentRepository.findById(agent.id); expect(completedAgent).toBeTruthy(); expect(completedAgent!.status).not.toBe('running'); // Get the agent's expected working directory const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace'); // Read diagnostic files const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json'); const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt'); const verifyPwdFile = join(expectedWorkdir, 'verify-pwd.txt'); // Verify diagnostic files exist expect(existsSync(diagnosticFile), 'spawn diagnostic file should exist').toBe(true); expect(existsSync(expectedPwdFile), 'expected pwd file should exist').toBe(true); // Read diagnostic data const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8')); const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim(); console.log('Diagnostic data:', diagnostic); console.log('Expected working directory:', expectedPwd); // Verify diagnostic consistency expect(diagnostic.intendedCwd).toBe(expectedWorkdir); expect(diagnostic.cwdExistsAtSpawn).toBe(true); expect(expectedPwd).toBe(expectedWorkdir); // The critical test: verify the agent actually wrote the file in the expected location if (existsSync(verifyPwdFile)) { const actualPwd = (await readFile(verifyPwdFile, 'utf-8')).trim(); console.log('Agent reported working directory:', actualPwd); // This is the key verification: the pwd reported by the agent should match expected expect(actualPwd).toBe(expectedWorkdir); } else { // If the file doesn't exist, the agent either failed or ran somewhere else console.warn('Agent did not create verify-pwd.txt file'); console.log('Expected at:', verifyPwdFile); // Let's check if it was created elsewhere (debugging) const alternativeLocations = [ join(tempDir, 'verify-pwd.txt'), join(process.cwd(), 'verify-pwd.txt'), ]; for (const loc of alternativeLocations) { if (existsSync(loc)) { const content = await readFile(loc, 'utf-8'); console.log(`Found verify-pwd.txt at unexpected location ${loc}:`, content.trim()); } } throw new Error('Agent did not create pwd verification file in expected location'); } }, TEST_TIMEOUT); it('creates diagnostic files with correct metadata', async () => { const prompt = `Output the signal: {"done": true}`; const agent = await agentManager.spawn({ taskId: null, prompt, mode: 'execute', provider: 'claude', }); // Wait a bit for spawn to complete await new Promise(resolve => setTimeout(resolve, 2000)); const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace'); const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json'); const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt'); // Verify files exist immediately after spawn expect(existsSync(diagnosticFile), 'diagnostic file should be created after spawn').toBe(true); expect(existsSync(expectedPwdFile), 'expected pwd file should be created').toBe(true); // Verify diagnostic content const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8')); const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim(); expect(diagnostic.agentId).toBe(agent.id); expect(diagnostic.alias).toBe(agent.name); expect(diagnostic.intendedCwd).toBe(expectedWorkdir); expect(diagnostic.provider).toBe('claude'); expect(diagnostic.cwdExistsAtSpawn).toBe(true); expect(diagnostic.customCwdProvided).toBe(false); expect(typeof diagnostic.timestamp).toBe('string'); expect(Array.isArray(diagnostic.args)).toBe(true); expect(expectedPwd).toBe(expectedWorkdir); }); });