/** * RetryPolicy — Comprehensive retry logic with error-specific handling. * * Implements intelligent retry strategies for different types of agent failures. * Replaces scattered retry logic with unified, configurable policies. */ import { createModuleLogger } from '../../logger/index.js'; const log = createModuleLogger('retry-policy'); export type AgentErrorType = | 'auth_failure' // 401 errors, invalid tokens | 'usage_limit' // Rate limiting, quota exceeded | 'missing_signal' // Process completed but no signal.json | 'process_crash' // Process exited with error code | 'timeout' // Process timed out | 'unknown'; // Unclassified errors export interface AgentError { type: AgentErrorType; message: string; isTransient: boolean; // Can this error be resolved by retrying? requiresAccountSwitch: boolean; // Should we switch to next account? shouldPersistToDB: boolean; // Should this error be saved for debugging? exitCode?: number | null; signal?: string | null; originalError?: Error; } export interface RetryPolicy { readonly maxAttempts: number; readonly backoffMs: number[]; shouldRetry(error: AgentError, attempt: number): boolean; getRetryDelay(attempt: number): number; } export class DefaultRetryPolicy implements RetryPolicy { readonly maxAttempts = 3; readonly backoffMs = [1000, 2000, 4000]; // 1s, 2s, 4s exponential backoff shouldRetry(error: AgentError, attempt: number): boolean { if (attempt >= this.maxAttempts) { log.debug({ errorType: error.type, attempt, maxAttempts: this.maxAttempts }, 'max retry attempts reached'); return false; } switch (error.type) { case 'auth_failure': // Retry auth failures - tokens might be refreshed log.debug({ attempt, errorType: error.type }, 'retrying auth failure'); return true; case 'usage_limit': // Don't retry usage limits - need account switch log.debug({ attempt, errorType: error.type }, 'not retrying usage limit - requires account switch'); return false; case 'missing_signal': // Retry missing signal - add instruction prompt log.debug({ attempt, errorType: error.type }, 'retrying missing signal with instruction'); return true; case 'process_crash': // Only retry transient crashes const shouldRetryTransient = error.isTransient; log.debug({ attempt, errorType: error.type, isTransient: error.isTransient, shouldRetry: shouldRetryTransient }, 'process crash retry decision'); return shouldRetryTransient; case 'timeout': // Retry timeouts up to max attempts log.debug({ attempt, errorType: error.type }, 'retrying timeout'); return true; case 'unknown': default: // Don't retry unknown errors by default log.debug({ attempt, errorType: error.type }, 'not retrying unknown error'); return false; } } getRetryDelay(attempt: number): number { const index = Math.min(attempt - 1, this.backoffMs.length - 1); const delay = this.backoffMs[index] || this.backoffMs[this.backoffMs.length - 1]; log.debug({ attempt, delay }, 'retry delay calculated'); return delay; } } /** * AgentExhaustedError - Special error indicating account needs switching. * When thrown, caller should attempt account failover rather than retry. */ export class AgentExhaustedError extends Error { constructor(message: string, public readonly originalError?: AgentError) { super(message); this.name = 'AgentExhaustedError'; } } /** * AgentFailureError - Terminal failure that cannot be retried. * Indicates all retry attempts have been exhausted or error is non-retriable. */ export class AgentFailureError extends Error { constructor(message: string, public readonly originalError?: AgentError) { super(message); this.name = 'AgentFailureError'; } }