/** * ErrorAnalyzer — Intelligent error classification and handling strategies. * * Analyzes various error conditions from agent processes and classifies them * for appropriate retry and recovery strategies. Replaces scattered error * handling with centralized, comprehensive error analysis. */ import { createModuleLogger } from '../../logger/index.js'; import type { SignalManager } from './signal-manager.js'; import type { AgentError, AgentErrorType } from './retry-policy.js'; const log = createModuleLogger('error-analyzer'); // Common error patterns for different providers const ERROR_PATTERNS = { auth_failure: [ /unauthorized/i, /invalid.*(token|key|credential)/i, /authentication.*failed/i, /401/, /access.*denied/i, /invalid.*session/i, /expired.*token/i, ], usage_limit: [ /rate.*(limit|exceeded)/i, /quota.*exceeded/i, /too.*many.*requests/i, /429/, /usage.*limit/i, /throttled/i, /credit.*insufficient/i, /api.*limit.*reached/i, ], timeout: [ /timeout/i, /timed.*out/i, /deadline.*exceeded/i, /connection.*timeout/i, /read.*timeout/i, ], process_crash: [ /segmentation.*fault/i, /core.*dumped/i, /fatal.*error/i, /killed/i, /aborted/i, ], }; export class AgentErrorAnalyzer { constructor(private signalManager: SignalManager) {} /** * Analyze an error and classify it for retry strategy. * Combines multiple signals: error message, exit code, stderr, and workdir state. */ async analyzeError( error: Error | string, exitCode?: number | null, stderr?: string, agentWorkdir?: string ): Promise { const errorMessage = error instanceof Error ? error.message : String(error); const fullContext = [errorMessage, stderr].filter(Boolean).join(' '); log.debug({ errorMessage, exitCode, hasStderr: !!stderr, hasWorkdir: !!agentWorkdir }, 'analyzing agent error'); // Check for auth failure patterns if (this.matchesPattern(fullContext, ERROR_PATTERNS.auth_failure)) { return { type: 'auth_failure', message: errorMessage, isTransient: true, requiresAccountSwitch: false, shouldPersistToDB: true, exitCode, originalError: error instanceof Error ? error : undefined, }; } // Check for usage limit patterns if (this.matchesPattern(fullContext, ERROR_PATTERNS.usage_limit)) { return { type: 'usage_limit', message: errorMessage, isTransient: false, requiresAccountSwitch: true, shouldPersistToDB: true, exitCode, originalError: error instanceof Error ? error : undefined, }; } // Check for timeout patterns if (this.matchesPattern(fullContext, ERROR_PATTERNS.timeout)) { return { type: 'timeout', message: errorMessage, isTransient: true, requiresAccountSwitch: false, shouldPersistToDB: true, exitCode, originalError: error instanceof Error ? error : undefined, }; } // Special case: process completed successfully but no signal.json if (agentWorkdir && exitCode === 0) { const hasSignal = await this.signalManager.checkSignalExists(agentWorkdir); if (!hasSignal) { log.debug({ agentWorkdir }, 'process completed successfully but no signal.json found'); return { type: 'missing_signal', message: 'Process completed successfully but no signal.json was generated', isTransient: true, requiresAccountSwitch: false, shouldPersistToDB: false, exitCode, originalError: error instanceof Error ? error : undefined, }; } } // Check for process crash patterns if (this.matchesPattern(fullContext, ERROR_PATTERNS.process_crash) || (exitCode !== null && exitCode !== 0 && exitCode !== undefined)) { // Determine if crash is transient based on exit code and patterns const isTransient = this.isTransientCrash(exitCode, stderr); return { type: 'process_crash', message: errorMessage, isTransient, requiresAccountSwitch: false, shouldPersistToDB: true, exitCode, originalError: error instanceof Error ? error : undefined, }; } // Unknown error type log.debug({ errorMessage, exitCode, stderr: stderr?.substring(0, 200) + '...' }, 'error does not match known patterns, classifying as unknown'); return { type: 'unknown', message: errorMessage, isTransient: false, requiresAccountSwitch: false, shouldPersistToDB: true, exitCode, originalError: error instanceof Error ? error : undefined, }; } /** * Validate credentials with a brief test request using invalid token. * This helps distinguish between token expiry vs. account exhaustion. */ async validateTokenWithInvalidRequest(accountId: string): Promise { // User requirement: "brief check with invalid access token to determine behavior" // This would need integration with credential system and is provider-specific // For now, return true to indicate token appears valid log.debug({ accountId }, 'token validation requested (not yet implemented)'); return true; } /** * Check if error message or stderr matches any of the given patterns. */ private matchesPattern(text: string, patterns: RegExp[]): boolean { if (!text) return false; return patterns.some(pattern => pattern.test(text)); } /** * Determine if a process crash is likely transient (can be retried). * Based on exit codes and stderr content. */ private isTransientCrash(exitCode?: number | null, stderr?: string): boolean { // Exit codes that indicate transient failures const transientExitCodes = new Set([ 130, // SIGINT (interrupted) 143, // SIGTERM (terminated) 124, // timeout command 1, // Generic error (might be transient) ]); if (exitCode !== null && exitCode !== undefined) { if (transientExitCodes.has(exitCode)) { log.debug({ exitCode }, 'exit code indicates transient failure'); return true; } // Very high exit codes often indicate system issues if (exitCode > 128 && exitCode < 256) { log.debug({ exitCode }, 'signal-based exit code may be transient'); return true; } } // Check stderr for transient patterns if (stderr) { const transientPatterns = [ /temporary/i, /network.*error/i, /connection.*refused/i, /service.*unavailable/i, /disk.*full/i, /out.*of.*memory/i, ]; if (transientPatterns.some(pattern => pattern.test(stderr))) { log.debug({ stderr: stderr.substring(0, 100) + '...' }, 'stderr indicates transient failure'); return true; } } log.debug({ exitCode, hasStderr: !!stderr }, 'crash appears non-transient'); return false; } }