test(09-02): add recovery and Q&A E2E scenario tests
- Queue state survives in database (source of truth) - In-progress task recoverable after agent crash - Blocked task state persists and can be unblocked - Merge queue state recoverable - Question enters waiting state and completes after resume - Question surfaces as structured PendingQuestion - Agent resumes with answer and completes successfully - Waiting agent status transitions correctly through full cycle
This commit is contained in:
398
src/test/e2e/recovery-scenarios.test.ts
Normal file
398
src/test/e2e/recovery-scenarios.test.ts
Normal file
@@ -0,0 +1,398 @@
|
||||
/**
|
||||
* E2E Tests for Recovery and Extended Scenarios
|
||||
*
|
||||
* Tests recovery/resume after interruption scenarios:
|
||||
* - Queue state survives harness recreation (DB is source of truth)
|
||||
* - In-progress task recoverable after agent crash
|
||||
* - Blocked task state persists and can be unblocked
|
||||
* - Merge queue state recoverable
|
||||
*
|
||||
* Tests extended agent Q&A scenarios:
|
||||
* - Multiple questions in sequence
|
||||
* - Question surfaces in message queue
|
||||
* - Agent resumes with answer in context
|
||||
* - Waiting agent blocks task completion
|
||||
*
|
||||
* Uses TestHarness from src/test/ for full system wiring.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
||||
import {
|
||||
createTestHarness,
|
||||
SIMPLE_FIXTURE,
|
||||
type TestHarness,
|
||||
} from '../index.js';
|
||||
import type {
|
||||
AgentWaitingEvent,
|
||||
AgentResumedEvent,
|
||||
AgentStoppedEvent,
|
||||
} from '../../events/types.js';
|
||||
|
||||
describe('E2E Recovery Scenarios', () => {
|
||||
describe('Recovery after interruption', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('queue state survives in database (source of truth)', async () => {
|
||||
// Seed fixture, queue tasks
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Queue task
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Verify queue state shows task (queued, not pending)
|
||||
const queueState1 = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState1.queued.length).toBe(1);
|
||||
expect(queueState1.queued[0].taskId).toBe(taskAId);
|
||||
|
||||
// The queue state is in memory, but task status is in DB.
|
||||
// Verify task status in database directly
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('pending');
|
||||
|
||||
// Verify: even after clearing in-memory queue state,
|
||||
// we can still find pending tasks from database
|
||||
const allTasks = await harness.taskRepository.findByPlanId(
|
||||
seeded.plans.get('Plan 1')!
|
||||
);
|
||||
const pendingTasks = allTasks.filter((t) => t.status === 'pending');
|
||||
|
||||
// Task A is pending (not queued, but status is pending)
|
||||
// Task B and C are also pending but depend on Task A
|
||||
expect(pendingTasks.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('in-progress task recoverable after agent crash', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Set crash scenario
|
||||
harness.setAgentError(`agent-${taskAId.slice(0, 6)}`, 'Token limit exceeded');
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.dispatchNext();
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Verify task status is 'in_progress' (not completed, not lost)
|
||||
let task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('in_progress');
|
||||
|
||||
// Task can be re-queued and dispatched to a new agent
|
||||
// First, clear agent manager and create new pool agent
|
||||
harness.agentManager.clear();
|
||||
await harness.agentManager.spawn({
|
||||
name: 'new-pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Re-queue the task (it's still in_progress but we can retry)
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Set success scenario for the new agent
|
||||
harness.setAgentDone(`agent-${taskAId.slice(0, 6)}`, 'Task completed after retry');
|
||||
|
||||
// Clear events and dispatch again
|
||||
harness.clearEvents();
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Verify: agent completed successfully
|
||||
expect(dispatchResult.agentId).toBeDefined();
|
||||
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
|
||||
expect(agentResult?.success).toBe(true);
|
||||
});
|
||||
|
||||
it('blocked task state persists in database', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Queue task and block it
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
|
||||
|
||||
// Verify task in blocked state in DB
|
||||
const task = await harness.taskRepository.findById(taskAId);
|
||||
expect(task?.status).toBe('blocked');
|
||||
|
||||
// Query blocked tasks from queue state
|
||||
const queueState = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState.blocked.length).toBe(1);
|
||||
expect(queueState.blocked[0].taskId).toBe(taskAId);
|
||||
expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
|
||||
|
||||
// Re-queue task to unblock (set status back to pending via repository)
|
||||
await harness.taskRepository.update(taskAId, { status: 'pending' });
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
|
||||
// Verify: task now in pending state in database
|
||||
const unblocked = await harness.taskRepository.findById(taskAId);
|
||||
expect(unblocked?.status).toBe('pending');
|
||||
|
||||
// Task should be in queued list
|
||||
const queueState2 = await harness.dispatchManager.getQueueState();
|
||||
expect(queueState2.queued.some((t) => t.taskId === taskAId)).toBe(true);
|
||||
});
|
||||
|
||||
it('merge queue state recoverable', async () => {
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Mark task as completed (required for merge)
|
||||
await harness.taskRepository.update(taskAId, { status: 'completed' });
|
||||
|
||||
// Create worktree for task
|
||||
const worktreeId = `wt-${taskAId.slice(0, 6)}`;
|
||||
await harness.worktreeManager.create(worktreeId, 'feature-task-a');
|
||||
|
||||
// Create agent in agentRepository (required for merge lookup)
|
||||
await harness.agentRepository.create({
|
||||
name: `agent-${taskAId.slice(0, 6)}`,
|
||||
worktreeId,
|
||||
taskId: taskAId,
|
||||
status: 'idle',
|
||||
});
|
||||
|
||||
// Queue for merge
|
||||
await harness.coordinationManager.queueMerge(taskAId);
|
||||
|
||||
// Verify merge queue has queued item
|
||||
const queueState1 = await harness.coordinationManager.getQueueState();
|
||||
expect(queueState1.queued.some((item) => item.taskId === taskAId)).toBe(true);
|
||||
|
||||
// Process merge
|
||||
const results = await harness.coordinationManager.processMerges('main');
|
||||
|
||||
// Verify: merge completed correctly
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0].taskId).toBe(taskAId);
|
||||
expect(results[0].success).toBe(true);
|
||||
|
||||
// Verify: task in merged list
|
||||
const queueState2 = await harness.coordinationManager.getQueueState();
|
||||
expect(queueState2.merged.includes(taskAId)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Agent Q&A extended scenarios', () => {
|
||||
let harness: TestHarness;
|
||||
|
||||
beforeEach(() => {
|
||||
harness = createTestHarness();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
harness.cleanup();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('question enters waiting state and completes after resume', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Set question scenario with options
|
||||
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'Which database should I use?', [
|
||||
{ label: 'PostgreSQL', description: 'Relational, ACID compliant' },
|
||||
{ label: 'SQLite', description: 'Lightweight, file-based' },
|
||||
]);
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
harness.clearEvents();
|
||||
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Verify: agent:waiting event emitted
|
||||
const waitingEvents = harness.getEventsByType('agent:waiting');
|
||||
expect(waitingEvents.length).toBe(1);
|
||||
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
|
||||
expect(waitingPayload.taskId).toBe(taskAId);
|
||||
expect(waitingPayload.question).toBe('Which database should I use?');
|
||||
|
||||
// Clear and resume
|
||||
harness.clearEvents();
|
||||
await harness.agentManager.resume(dispatchResult.agentId!, 'PostgreSQL');
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Verify: resumed and stopped events
|
||||
const resumedEvents = harness.getEventsByType('agent:resumed');
|
||||
expect(resumedEvents.length).toBe(1);
|
||||
const resumedPayload = (resumedEvents[0] as AgentResumedEvent).payload;
|
||||
expect(resumedPayload.taskId).toBe(taskAId);
|
||||
|
||||
const stoppedEvents = harness.getEventsByType('agent:stopped');
|
||||
expect(stoppedEvents.length).toBe(1);
|
||||
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
|
||||
expect(stoppedPayload.reason).toBe('task_complete');
|
||||
});
|
||||
|
||||
it('question surfaces as structured PendingQuestion', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Set question scenario with options
|
||||
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'Select your framework', [
|
||||
{ label: 'React' },
|
||||
{ label: 'Vue' },
|
||||
{ label: 'Svelte' },
|
||||
]);
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Verify: agent:waiting event has question
|
||||
const waitingEvents = harness.getEventsByType('agent:waiting');
|
||||
expect(waitingEvents.length).toBe(1);
|
||||
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
|
||||
expect(waitingPayload.question).toBe('Select your framework');
|
||||
expect(waitingPayload.options).toEqual([
|
||||
{ label: 'React' },
|
||||
{ label: 'Vue' },
|
||||
{ label: 'Svelte' },
|
||||
]);
|
||||
|
||||
// Verify: getPendingQuestion returns structured data
|
||||
const pendingQuestion = await harness.getPendingQuestion(dispatchResult.agentId!);
|
||||
expect(pendingQuestion).not.toBeNull();
|
||||
expect(pendingQuestion?.question).toBe('Select your framework');
|
||||
expect(pendingQuestion?.options).toEqual([
|
||||
{ label: 'React' },
|
||||
{ label: 'Vue' },
|
||||
{ label: 'Svelte' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('agent resumes with answer and completes successfully', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Set question scenario
|
||||
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'Choose database type');
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Verify agent is waiting
|
||||
const agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('waiting_for_input');
|
||||
|
||||
// Resume with specific answer
|
||||
await harness.agentManager.resume(dispatchResult.agentId!, 'PostgreSQL');
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Verify: agent completed successfully
|
||||
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
|
||||
expect(agentResult).not.toBeNull();
|
||||
expect(agentResult?.success).toBe(true);
|
||||
expect(agentResult?.message).toBe('Resumed and completed successfully');
|
||||
|
||||
// Verify: agent status is now idle
|
||||
const finalAgent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(finalAgent?.status).toBe('idle');
|
||||
});
|
||||
|
||||
it('waiting agent status transitions correctly through full cycle', async () => {
|
||||
vi.useFakeTimers();
|
||||
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
|
||||
const taskAId = seeded.tasks.get('Task A')!;
|
||||
|
||||
// Pre-seed required idle agent
|
||||
await harness.agentManager.spawn({
|
||||
name: 'pool-agent',
|
||||
taskId: 'placeholder',
|
||||
prompt: 'placeholder',
|
||||
});
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Set question scenario
|
||||
harness.setAgentQuestion(`agent-${taskAId.slice(0, 6)}`, 'API key format?');
|
||||
|
||||
// Queue and dispatch
|
||||
await harness.dispatchManager.queue(taskAId);
|
||||
const dispatchResult = await harness.dispatchManager.dispatchNext();
|
||||
|
||||
// Phase 1: Initially running
|
||||
let agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('running');
|
||||
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Phase 2: After scenario completes, waiting_for_input
|
||||
agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('waiting_for_input');
|
||||
|
||||
// Verify pending question exists
|
||||
const pendingQuestion = await harness.getPendingQuestion(dispatchResult.agentId!);
|
||||
expect(pendingQuestion?.question).toBe('API key format?');
|
||||
|
||||
// Phase 3: Resume
|
||||
await harness.agentManager.resume(dispatchResult.agentId!, 'Bearer token');
|
||||
|
||||
// After resume: running again briefly
|
||||
agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('running');
|
||||
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Phase 4: After completion, idle
|
||||
agent = await harness.agentManager.get(dispatchResult.agentId!);
|
||||
expect(agent?.status).toBe('idle');
|
||||
|
||||
// Verify pending question is cleared after resume
|
||||
const clearedQuestion = await harness.getPendingQuestion(dispatchResult.agentId!);
|
||||
expect(clearedQuestion).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user