test(08-02): create E2E edge case tests for crash, waiting, and blocking

- Agent crash scenario: spawned -> crashed events, status not completed
- Agent waiting for input: waiting event with question, resume flow
- Task blocking: blocked list, event emission, not dispatchable
This commit is contained in:
Lukas May
2026-01-31 09:12:45 +01:00
parent c823a6b44b
commit e0d8fc85c6

View File

@@ -0,0 +1,341 @@
/**
* E2E Tests for Edge Cases
*
* Tests edge case scenarios in dispatch/coordination flow:
* - Agent crashes during task
* - Agent waiting for input and resume
* - Task blocking
* - Merge conflicts
*
* Uses TestHarness from src/test/ for full system wiring.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
createTestHarness,
SIMPLE_FIXTURE,
type TestHarness,
} from '../index.js';
import type {
AgentSpawnedEvent,
AgentCrashedEvent,
AgentWaitingEvent,
AgentResumedEvent,
AgentStoppedEvent,
TaskBlockedEvent,
MergeConflictedEvent,
} from '../../events/types.js';
describe('E2E Edge Cases', () => {
let harness: TestHarness;
beforeEach(() => {
harness = createTestHarness();
});
afterEach(() => {
harness.cleanup();
vi.useRealTimers();
});
describe('Agent crash during task', () => {
it('emits agent:spawned then agent:crashed events', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent for DispatchManager
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set crash scenario BEFORE dispatch
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
outcome: 'crash',
message: 'Token limit exceeded',
});
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify: agent:spawned event emitted
const spawnedEvents = harness.getEventsByType('agent:spawned');
expect(spawnedEvents.length).toBe(1);
const spawnedPayload = (spawnedEvents[0] as AgentSpawnedEvent).payload;
expect(spawnedPayload.taskId).toBe(taskAId);
// Verify: agent:crashed event emitted
const crashedEvents = harness.getEventsByType('agent:crashed');
expect(crashedEvents.length).toBe(1);
const crashedPayload = (crashedEvents[0] as AgentCrashedEvent).payload;
expect(crashedPayload.taskId).toBe(taskAId);
expect(crashedPayload.error).toBe('Token limit exceeded');
});
it('task status should NOT be completed after crash', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set crash scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
outcome: 'crash',
message: 'Token limit exceeded',
});
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Task status should be 'in_progress' (not 'completed')
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('in_progress');
});
it('captures error message in agent result', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set crash scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
outcome: 'crash',
message: 'Out of memory',
});
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Get agent result - should have error
const agentResult = await harness.agentManager.getResult(dispatchResult.agentId!);
expect(agentResult).not.toBeNull();
expect(agentResult?.success).toBe(false);
expect(agentResult?.message).toBe('Out of memory');
});
});
describe('Agent waiting for input and resume', () => {
it('emits agent:waiting event with question', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set waiting scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
outcome: 'waiting_for_input',
question: 'Which database should I use?',
});
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify: agent:waiting event emitted
const waitingEvents = harness.getEventsByType('agent:waiting');
expect(waitingEvents.length).toBe(1);
const waitingPayload = (waitingEvents[0] as AgentWaitingEvent).payload;
expect(waitingPayload.taskId).toBe(taskAId);
expect(waitingPayload.question).toBe('Which database should I use?');
});
it('resumes agent and completes after resume', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set waiting scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
outcome: 'waiting_for_input',
question: 'Which database should I use?',
});
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
const dispatchResult = await harness.dispatchManager.dispatchNext();
await vi.runAllTimersAsync();
// Verify agent is in waiting_for_input status
const agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('waiting_for_input');
// Clear events to check resume events
harness.clearEvents();
// Resume agent with response
await harness.agentManager.resume(dispatchResult.agentId!, 'PostgreSQL');
await vi.runAllTimersAsync();
// Verify: agent:resumed event emitted
const resumedEvents = harness.getEventsByType('agent:resumed');
expect(resumedEvents.length).toBe(1);
const resumedPayload = (resumedEvents[0] as AgentResumedEvent).payload;
expect(resumedPayload.taskId).toBe(taskAId);
// Verify: agent:stopped event emitted (after resume completes)
const stoppedEvents = harness.getEventsByType('agent:stopped');
expect(stoppedEvents.length).toBe(1);
const stoppedPayload = (stoppedEvents[0] as AgentStoppedEvent).payload;
expect(stoppedPayload.taskId).toBe(taskAId);
expect(stoppedPayload.reason).toBe('task_complete');
});
it('agent status transitions correctly through waiting and resume', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Set waiting scenario
harness.setAgentScenario(`agent-${taskAId.slice(0, 6)}`, {
outcome: 'waiting_for_input',
question: 'Which database should I use?',
});
await harness.dispatchManager.queue(taskAId);
const dispatchResult = await harness.dispatchManager.dispatchNext();
// Initially running
let agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('running');
await vi.runAllTimersAsync();
// After scenario completes: waiting_for_input
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('waiting_for_input');
// Resume
await harness.agentManager.resume(dispatchResult.agentId!, 'PostgreSQL');
// After resume: running again
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('running');
await vi.runAllTimersAsync();
// After completion: idle
agent = await harness.agentManager.get(dispatchResult.agentId!);
expect(agent?.status).toBe('idle');
});
});
describe('Task blocking', () => {
it('blocked task appears in blocked list from getQueueState', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
const queueState = await harness.dispatchManager.getQueueState();
expect(queueState.blocked.length).toBe(1);
expect(queueState.blocked[0].taskId).toBe(taskAId);
expect(queueState.blocked[0].reason).toBe('Waiting for user decision');
});
it('blocked task emits task:blocked event', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
await harness.dispatchManager.queue(taskAId);
harness.clearEvents();
await harness.dispatchManager.blockTask(taskAId, 'Waiting for user decision');
const blockedEvents = harness.getEventsByType('task:blocked');
expect(blockedEvents.length).toBe(1);
const blockedPayload = (blockedEvents[0] as TaskBlockedEvent).payload;
expect(blockedPayload.taskId).toBe(taskAId);
expect(blockedPayload.reason).toBe('Waiting for user decision');
});
it('getNextDispatchable does not return blocked task', async () => {
vi.useFakeTimers();
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
const taskBId = seeded.tasks.get('Task B')!;
// Pre-seed required idle agent
await harness.agentManager.spawn({
name: 'pool-agent',
taskId: 'placeholder',
prompt: 'placeholder',
});
await vi.runAllTimersAsync();
// Queue Task A and block it
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
// Queue Task B (not blocked, but depends on Task A which needs to be completed first)
// Actually Task B depends on Task A in SIMPLE_FIXTURE, but the dependency
// isn't loaded into the queue. Queue a fresh task instead.
// For this test, we just verify blocked task is not returned.
// Get next dispatchable - should be null since Task A is blocked
const next = await harness.dispatchManager.getNextDispatchable();
expect(next).toBeNull();
});
it('task status is set to blocked in database', async () => {
const seeded = await harness.seedFixture(SIMPLE_FIXTURE);
const taskAId = seeded.tasks.get('Task A')!;
await harness.dispatchManager.queue(taskAId);
await harness.dispatchManager.blockTask(taskAId, 'Blocked for testing');
const task = await harness.taskRepository.findById(taskAId);
expect(task?.status).toBe('blocked');
});
});
});