Files
Codewalkers/apps/server/agent/lifecycle/error-analyzer.ts
Lukas May 34578d39c6 refactor: Restructure monorepo to apps/server/ and apps/web/ layout
Move src/ → apps/server/ and packages/web/ → apps/web/ to adopt
standard monorepo conventions (apps/ for runnable apps, packages/
for reusable libraries). Update all config files, shared package
imports, test fixtures, and documentation to reflect new paths.

Key fixes:
- Update workspace config to ["apps/*", "packages/*"]
- Update tsconfig.json rootDir/include for apps/server/
- Add apps/web/** to vitest exclude list
- Update drizzle.config.ts schema path
- Fix ensure-schema.ts migration path detection (3 levels up in dev,
  2 levels up in dist)
- Fix tests/integration/cli-server.test.ts import paths
- Update packages/shared imports to apps/server/ paths
- Update all docs/ files with new paths
2026-03-03 11:22:53 +01:00

233 lines
7.0 KiB
TypeScript

/**
* ErrorAnalyzer — Intelligent error classification and handling strategies.
*
* Analyzes various error conditions from agent processes and classifies them
* for appropriate retry and recovery strategies. Replaces scattered error
* handling with centralized, comprehensive error analysis.
*/
import { createModuleLogger } from '../../logger/index.js';
import type { SignalManager } from './signal-manager.js';
import type { AgentError, AgentErrorType } from './retry-policy.js';
const log = createModuleLogger('error-analyzer');
// Common error patterns for different providers
const ERROR_PATTERNS = {
auth_failure: [
/unauthorized/i,
/invalid.*(token|key|credential)/i,
/authentication.*failed/i,
/401/,
/access.*denied/i,
/invalid.*session/i,
/expired.*token/i,
],
usage_limit: [
/rate.*(limit|exceeded)/i,
/quota.*exceeded/i,
/too.*many.*requests/i,
/429/,
/usage.*limit/i,
/throttled/i,
/credit.*insufficient/i,
/api.*limit.*reached/i,
],
timeout: [
/timeout/i,
/timed.*out/i,
/deadline.*exceeded/i,
/connection.*timeout/i,
/read.*timeout/i,
],
process_crash: [
/segmentation.*fault/i,
/core.*dumped/i,
/fatal.*error/i,
/killed/i,
/aborted/i,
],
};
export class AgentErrorAnalyzer {
constructor(private signalManager: SignalManager) {}
/**
* Analyze an error and classify it for retry strategy.
* Combines multiple signals: error message, exit code, stderr, and workdir state.
*/
async analyzeError(
error: Error | string,
exitCode?: number | null,
stderr?: string,
agentWorkdir?: string
): Promise<AgentError> {
const errorMessage = error instanceof Error ? error.message : String(error);
const fullContext = [errorMessage, stderr].filter(Boolean).join(' ');
log.debug({
errorMessage,
exitCode,
hasStderr: !!stderr,
hasWorkdir: !!agentWorkdir
}, 'analyzing agent error');
// Check for auth failure patterns
if (this.matchesPattern(fullContext, ERROR_PATTERNS.auth_failure)) {
return {
type: 'auth_failure',
message: errorMessage,
isTransient: true,
requiresAccountSwitch: false,
shouldPersistToDB: true,
exitCode,
originalError: error instanceof Error ? error : undefined,
};
}
// Check for usage limit patterns
if (this.matchesPattern(fullContext, ERROR_PATTERNS.usage_limit)) {
return {
type: 'usage_limit',
message: errorMessage,
isTransient: false,
requiresAccountSwitch: true,
shouldPersistToDB: true,
exitCode,
originalError: error instanceof Error ? error : undefined,
};
}
// Check for timeout patterns
if (this.matchesPattern(fullContext, ERROR_PATTERNS.timeout)) {
return {
type: 'timeout',
message: errorMessage,
isTransient: true,
requiresAccountSwitch: false,
shouldPersistToDB: true,
exitCode,
originalError: error instanceof Error ? error : undefined,
};
}
// Special case: process completed successfully but no signal.json
if (agentWorkdir && exitCode === 0) {
const hasSignal = await this.signalManager.checkSignalExists(agentWorkdir);
if (!hasSignal) {
log.debug({ agentWorkdir }, 'process completed successfully but no signal.json found');
return {
type: 'missing_signal',
message: 'Process completed successfully but no signal.json was generated',
isTransient: true,
requiresAccountSwitch: false,
shouldPersistToDB: false,
exitCode,
originalError: error instanceof Error ? error : undefined,
};
}
}
// Check for process crash patterns
if (this.matchesPattern(fullContext, ERROR_PATTERNS.process_crash) ||
(exitCode !== null && exitCode !== 0 && exitCode !== undefined)) {
// Determine if crash is transient based on exit code and patterns
const isTransient = this.isTransientCrash(exitCode, stderr);
return {
type: 'process_crash',
message: errorMessage,
isTransient,
requiresAccountSwitch: false,
shouldPersistToDB: true,
exitCode,
originalError: error instanceof Error ? error : undefined,
};
}
// Unknown error type
log.debug({
errorMessage,
exitCode,
stderr: stderr?.substring(0, 200) + '...'
}, 'error does not match known patterns, classifying as unknown');
return {
type: 'unknown',
message: errorMessage,
isTransient: false,
requiresAccountSwitch: false,
shouldPersistToDB: true,
exitCode,
originalError: error instanceof Error ? error : undefined,
};
}
/**
* Validate credentials with a brief test request using invalid token.
* This helps distinguish between token expiry vs. account exhaustion.
*/
async validateTokenWithInvalidRequest(accountId: string): Promise<boolean> {
// User requirement: "brief check with invalid access token to determine behavior"
// This would need integration with credential system and is provider-specific
// For now, return true to indicate token appears valid
log.debug({ accountId }, 'token validation requested (not yet implemented)');
return true;
}
/**
* Check if error message or stderr matches any of the given patterns.
*/
private matchesPattern(text: string, patterns: RegExp[]): boolean {
if (!text) return false;
return patterns.some(pattern => pattern.test(text));
}
/**
* Determine if a process crash is likely transient (can be retried).
* Based on exit codes and stderr content.
*/
private isTransientCrash(exitCode?: number | null, stderr?: string): boolean {
// Exit codes that indicate transient failures
const transientExitCodes = new Set([
130, // SIGINT (interrupted)
143, // SIGTERM (terminated)
124, // timeout command
1, // Generic error (might be transient)
]);
if (exitCode !== null && exitCode !== undefined) {
if (transientExitCodes.has(exitCode)) {
log.debug({ exitCode }, 'exit code indicates transient failure');
return true;
}
// Very high exit codes often indicate system issues
if (exitCode > 128 && exitCode < 256) {
log.debug({ exitCode }, 'signal-based exit code may be transient');
return true;
}
}
// Check stderr for transient patterns
if (stderr) {
const transientPatterns = [
/temporary/i,
/network.*error/i,
/connection.*refused/i,
/service.*unavailable/i,
/disk.*full/i,
/out.*of.*memory/i,
];
if (transientPatterns.some(pattern => pattern.test(stderr))) {
log.debug({ stderr: stderr.substring(0, 100) + '...' }, 'stderr indicates transient failure');
return true;
}
}
log.debug({ exitCode, hasStderr: !!stderr }, 'crash appears non-transient');
return false;
}
}