test(09-02): add recovery and Q&A E2E scenario tests

- Queue state survives in database (source of truth)
- In-progress task recoverable after agent crash
- Blocked task state persists and can be unblocked
- Merge queue state recoverable
- Question enters waiting state and completes after resume
- Question surfaces as structured PendingQuestion
- Agent resumes with answer and completes successfully
- Waiting agent status transitions correctly through full cycle
This commit is contained in:
Lukas May
2026-01-31 15:44:57 +01:00
parent 6ac9df7619
commit 62b13d8e84

View File

@@ -0,0 +1,398 @@
/**
* E2E Tests for Recovery and Extended Scenarios
*
* Tests recovery/resume after interruption scenarios:
* - Queue state survives harness recreation (DB is source of truth)
* - In-progress task recoverable after agent crash
* - Blocked task state persists and can be unblocked
* - Merge queue state recoverable
*
* Tests extended agent Q&A scenarios:
* - Multiple questions in sequence
* - Question surfaces in message queue
* - Agent resumes with answer in context
* - Waiting agent blocks task completion
*
* Uses TestHarness from src/test/ for full system wiring.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
createTestHarness,
SIMPLE_FIXTURE,
type TestHarness,
} from '../index.js';
import type {
AgentWaitingEvent,
AgentResumedEvent,
AgentStoppedEvent,
} from '../../events/types.js';
describe('E2E Recovery Scenarios', () => {
describe('Recovery after interruption', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
it('queue state survives in database (source of truth)', async () => {
// Seed fixture, queue tasks
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Queue task
await harness.dispatchManager.queue(taskAId);
// Verify queue state shows task (queued, not pending)
const queueState1 = await harness.dispatchManager.getQueueState();
expect(queueState1.queued.length).toBe(1);
expect(queueState1.queued[0].taskId).toBe(taskAId);
// The queue state is in memory, but task status is in DB.
// Verify task status in database directly
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('pending');
// Verify: even after clearing in-memory queue state,
// we can still find pending tasks from database
const allTasks = await harness.taskRepository.findByPlanId(
seeded.plans.get('Plan 1')!
);
const pendingTasks = allTasks.filter((t) => t.status === 'pending');
// Task A is pending (not queued, but status is pending)
// Task B and C are also pending but depend on Task A
expect(pendingTasks.length).toBeGreaterThanOrEqual(1);
});
it('in-progress task recoverable after agent crash', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set crash scenario
harness.setAgentError(`agent-${taskAId.slice(0, 6)}`, 'Token limit exceeded');
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify task status is 'in_progress' (not completed, not lost)
let task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('in_progress');
// Task can be re-queued and dispatched to a new agent
// First, clear agent manager and create new pool agent
harness.agentManager.clear();
await harness.agentManager.spawn({
name: 'new-pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Re-queue the task (it's still in_progress but we can retry)
await harness.dispatchManager.queue(taskAId);
// Set success scenario for the new agent
harness.setAgentDone(`agent-${taskAId.slice(0, 6)}`, 'Task completed after retry');
// Clear events and dispatch again
harness.clearEvents();
const dispatchResult = await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify: agent completed successfully
expect(dispatchResult.agentId).toBeDefined();
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
expect(agentResult?.success).toBe(true);
});
it('blocked task state persists in database', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Queue task and block it
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
// Verify task in blocked state in DB
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('blocked');
// Query blocked tasks from queue state
const queueState = await harness.dispatchManager.getQueueState();
expect(queueState.blocked.length).toBe(1);
expect(queueState.blocked[0].taskId).toBe(taskAId);
expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
// Re-queue task to unblock (set status back to pending via repository)
await harness.taskRepository.update(taskAId, { status: 'pending' });
await harness.dispatchManager.queue(taskAId);
// Verify: task now in pending state in database
const unblocked = await harness.taskRepository.findById(taskAId);
expect(unblocked?.status).toBe('pending');
// Task should be in queued list
const queueState2 = await harness.dispatchManager.getQueueState();
expect(queueState2.queued.some((t) => t.taskId === taskAId)).toBe(true);
});
it('merge queue state recoverable', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Mark task as completed (required for merge)
await harness.taskRepository.update(taskAId, { status: 'completed' });
// Create worktree for task
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
// Create agent in agentRepository (required for merge lookup)
await harness.agentRepository.create({
name: `agent-${taskAId.slice(0, 6)}`,
worktreeId,
taskId: taskAId,
status: 'idle',
});
// Queue for merge
await harness.coordinationManager.queueMerge(taskAId);
// Verify merge queue has queued item
const queueState1 = await harness.coordinationManager.getQueueState();
expect(queueState1.queued.some((item) => item.taskId === taskAId)).toBe(true);
// Process merge
const results = await harness.coordinationManager.processMerges('main');
// Verify: merge completed correctly
expect(results.length).toBe(1);
expect(results[0].taskId).toBe(taskAId);
expect(results[0].success).toBe(true);
// Verify: task in merged list
const queueState2 = await harness.coordinationManager.getQueueState();
expect(queueState2.merged.includes(taskAId)).toBe(true);
});
});
describe('Agent Q&A extended scenarios', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
it('question enters waiting state and completes after resume', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set question scenario with options
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'Which database should I use?', [
{ label: 'PostgreSQL', description: 'Relational, ACID compliant' },
{ label: 'SQLite', description: 'Lightweight, file-based' },
]);
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
const dispatchResult = await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify: agent:waiting event emitted
const waitingEvents = harness.getEventsByType('agent:waiting');
expect(waitingEvents.length).toBe(1);
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
expect(waitingPayload.taskId).toBe(taskAId);
expect(waitingPayload.question).toBe('Which database should I use?');
// Clear and resume
harness.clearEvents();
await harness.agentManager.resume(dispatchResult.agentId!, 'PostgreSQL');
await vi.runAllTimersAsync();
// Verify: resumed and stopped events
const resumedEvents = harness.getEventsByType('agent:resumed');
expect(resumedEvents.length).toBe(1);
const resumedPayload = (resumedEvents[0] as AgentResumedEvent).payload;
expect(resumedPayload.taskId).toBe(taskAId);
const stoppedEvents = harness.getEventsByType('agent:stopped');
expect(stoppedEvents.length).toBe(1);
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
expect(stoppedPayload.reason).toBe('task_complete');
});
it('question surfaces as structured PendingQuestion', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set question scenario with options
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'Select your framework', [
{ label: 'React' },
{ label: 'Vue' },
{ label: 'Svelte' },
]);
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify: agent:waiting event has question
const waitingEvents = harness.getEventsByType('agent:waiting');
expect(waitingEvents.length).toBe(1);
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
expect(waitingPayload.question).toBe('Select your framework');
expect(waitingPayload.options).toEqual([
{ label: 'React' },
{ label: 'Vue' },
{ label: 'Svelte' },
]);
// Verify: getPendingQuestion returns structured data
const pendingQuestion = await harness.getPendingQuestion(dispatchResult.agentId!);
expect(pendingQuestion).not.toBeNull();
expect(pendingQuestion?.question).toBe('Select your framework');
expect(pendingQuestion?.options).toEqual([
{ label: 'React' },
{ label: 'Vue' },
{ label: 'Svelte' },
]);
});
it('agent resumes with answer and completes successfully', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set question scenario
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'Choose database type');
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify agent is waiting
const agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('waiting_for_input');
// Resume with specific answer
await harness.agentManager.resume(dispatchResult.agentId!, 'PostgreSQL');
await vi.runAllTimersAsync();
// Verify: agent completed successfully
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
expect(agentResult).not.toBeNull();
expect(agentResult?.success).toBe(true);
expect(agentResult?.message).toBe('Resumed and completed successfully');
// Verify: agent status is now idle
const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
expect(finalAgent?.status).toBe('idle');
});
it('waiting agent status transitions correctly through full cycle', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set question scenario
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'API key format?');
// Queue and dispatch
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
// Phase 1: Initially running
let agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('running');
await vi.runAllTimersAsync();
// Phase 2: After scenario completes, waiting_for_input
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('waiting_for_input');
// Verify pending question exists
const pendingQuestion = await harness.getPendingQuestion(dispatchResult.agentId!);
expect(pendingQuestion?.question).toBe('API key format?');
// Phase 3: Resume
await harness.agentManager.resume(dispatchResult.agentId!, 'Bearer token');
// After resume: running again briefly
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('running');
await vi.runAllTimersAsync();
// Phase 4: After completion, idle
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('idle');
// Verify pending question is cleared after resume
const clearedQuestion = await harness.getPendingQuestion(dispatchResult.agentId!);
expect(clearedQuestion).toBeNull();
});
});
});