Codewalkers/apps/server/test/integration/agent-workdir-verification.test.ts

/**
 * Agent Working Directory Verification Tests
 *
 * Tests that verify agents actually run in their intended working directories.
 * These tests use simple shell commands to prove the agent execution location.
 *
 * IMPORTANT: These tests spawn real CLI processes and may incur API costs.
 * They are SKIPPED by default to prevent accidental charges.
 *
 * To run these tests:
 * ```bash
 * REAL_WORKDIR_TESTS=1 npm test -- src/test/integration/agent-workdir-verification.test.ts --test-timeout=120000
 * ```
 */

import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { mkdtemp, rm, readFile } from 'node:fs/promises';
import { existsSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { MultiProviderAgentManager } from '../../agent/manager.js';
import { createTestDatabase } from '../../db/repositories/drizzle/test-helpers.js';
import {
  DrizzleAgentRepository,
  DrizzleProjectRepository,
  DrizzleAccountRepository,
  DrizzleInitiativeRepository,
} from '../../db/repositories/drizzle/index.js';
import { EventEmitterBus } from '../../events/bus.js';

const SHOULD_SKIP = !process.env.REAL_WORKDIR_TESTS;
const TEST_TIMEOUT = 60000;

describe.skipIf(SHOULD_SKIP)('Agent Working Directory Verification', () => {
  let tempDir: string;
  let agentManager: MultiProviderAgentManager;
  let agentRepository: DrizzleAgentRepository;

  beforeAll(async () => {
    if (SHOULD_SKIP) return;

    console.log('\n=== Running Agent Working Directory Tests ===');
    console.log('These tests verify agents run in correct working directories.\n');

    // Create temp directory for test workspace
    tempDir = await mkdtemp(join(tmpdir(), 'cw-workdir-test-'));

    // Set up test database and repositories
    const db = await createTestDatabase();
    const eventBus = new EventEmitterBus();

    agentRepository = new DrizzleAgentRepository(db);
    const projectRepository = new DrizzleProjectRepository(db);
    const accountRepository = new DrizzleAccountRepository(db);

    agentManager = new MultiProviderAgentManager(
      agentRepository,
      tempDir,
      projectRepository,
      accountRepository,
      eventBus,
    );
  });

  afterAll(async () => {
    if (SHOULD_SKIP || !tempDir) return;
    try {
      await rm(tempDir, { recursive: true });
    } catch (err) {
      console.warn('Failed to cleanup temp directory:', err);
    }
  });

  it('spawns agent in correct standalone working directory', async () => {
    const prompt = `
Write your current working directory to a file called 'verify-pwd.txt'.
Use this exact bash command:

pwd > verify-pwd.txt

Then output the signal: {"done": true}
`.trim();

    // Spawn standalone agent
    const agent = await agentManager.spawn({
      taskId: null,
      prompt,
      mode: 'execute',
      provider: 'claude',
    });

    expect(agent.id).toBeTruthy();
    expect(agent.status).toBe('running');

    // Wait for completion (poll agent status)
    let attempts = 0;
    const maxAttempts = 60; // 60 seconds timeout

    while (attempts < maxAttempts) {
      await new Promise(resolve => setTimeout(resolve, 1000));
      attempts++;

      const currentAgent = await agentRepository.findById(agent.id);
      if (!currentAgent || currentAgent.status !== 'running') {
        break;
      }
    }

    // Verify final agent state
    const completedAgent = await agentRepository.findById(agent.id);
    expect(completedAgent).toBeTruthy();
    expect(completedAgent!.status).not.toBe('running');

    // Get the agent's expected working directory
    const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');

    // Read diagnostic files
    const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
    const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');
    const verifyPwdFile = join(expectedWorkdir, 'verify-pwd.txt');

    // Verify diagnostic files exist
    expect(existsSync(diagnosticFile), 'spawn diagnostic file should exist').toBe(true);
    expect(existsSync(expectedPwdFile), 'expected pwd file should exist').toBe(true);

    // Read diagnostic data
    const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
    const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();

    console.log('Diagnostic data:', diagnostic);
    console.log('Expected working directory:', expectedPwd);

    // Verify diagnostic consistency
    expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
    expect(diagnostic.cwdExistsAtSpawn).toBe(true);
    expect(expectedPwd).toBe(expectedWorkdir);

    // The critical test: verify the agent actually wrote the file in the expected location
    if (existsSync(verifyPwdFile)) {
      const actualPwd = (await readFile(verifyPwdFile, 'utf-8')).trim();
      console.log('Agent reported working directory:', actualPwd);

      // This is the key verification: the pwd reported by the agent should match expected
      expect(actualPwd).toBe(expectedWorkdir);
    } else {
      // If the file doesn't exist, the agent either failed or ran somewhere else
      console.warn('Agent did not create verify-pwd.txt file');
      console.log('Expected at:', verifyPwdFile);

      // Let's check if it was created elsewhere (debugging)
      const alternativeLocations = [
        join(tempDir, 'verify-pwd.txt'),
        join(process.cwd(), 'verify-pwd.txt'),
      ];

      for (const loc of alternativeLocations) {
        if (existsSync(loc)) {
          const content = await readFile(loc, 'utf-8');
          console.log(`Found verify-pwd.txt at unexpected location ${loc}:`, content.trim());
        }
      }

      throw new Error('Agent did not create pwd verification file in expected location');
    }
  }, TEST_TIMEOUT);

  it('creates diagnostic files with correct metadata', async () => {
    const prompt = `Output the signal: {"done": true}`;

    const agent = await agentManager.spawn({
      taskId: null,
      prompt,
      mode: 'execute',
      provider: 'claude',
    });

    // Wait a bit for spawn to complete
    await new Promise(resolve => setTimeout(resolve, 2000));

    const expectedWorkdir = join(tempDir, 'agent-workdirs', agent.name, 'workspace');
    const diagnosticFile = join(expectedWorkdir, '.cw', 'spawn-diagnostic.json');
    const expectedPwdFile = join(expectedWorkdir, '.cw', 'expected-pwd.txt');

    // Verify files exist immediately after spawn
    expect(existsSync(diagnosticFile), 'diagnostic file should be created after spawn').toBe(true);
    expect(existsSync(expectedPwdFile), 'expected pwd file should be created').toBe(true);

    // Verify diagnostic content
    const diagnostic = JSON.parse(await readFile(diagnosticFile, 'utf-8'));
    const expectedPwd = (await readFile(expectedPwdFile, 'utf-8')).trim();

    expect(diagnostic.agentId).toBe(agent.id);
    expect(diagnostic.alias).toBe(agent.name);
    expect(diagnostic.intendedCwd).toBe(expectedWorkdir);
    expect(diagnostic.provider).toBe('claude');
    expect(diagnostic.cwdExistsAtSpawn).toBe(true);
    expect(diagnostic.customCwdProvided).toBe(false);
    expect(typeof diagnostic.timestamp).toBe('string');
    expect(Array.isArray(diagnostic.args)).toBe(true);

    expect(expectedPwd).toBe(expectedWorkdir);
  });
});