Introduction
Error handling is critical for building reliable applications. How you handle errors determines whether users see friendly messages or confusing failures. This guide covers error handling patterns, exception design, and building resilient systems.
Good error handling is about being prepared for the unexpected and handling it gracefully.
Exception Hierarchy
Custom Exception Structure
from typing import Optional
from enum import Enum
class ErrorCode(str, Enum):
# Validation errors
INVALID_INPUT = "INVALID_INPUT"
MISSING_FIELD = "MISSING_FIELD"
# Authentication/Authorization
UNAUTHORIZED = "UNAUTHORIZED"
FORBIDDEN = "FORBIDDEN"
# Resource errors
NOT_FOUND = "NOT_FOUND"
ALREADY_EXISTS = "ALREADY_EXISTS"
# External service errors
EXTERNAL_SERVICE_ERROR = "EXTERNAL_SERVICE_ERROR"
RATE_LIMIT_EXCEEDED = "RATE_LIMIT_EXCEEDED"
# System errors
INTERNAL_ERROR = "INTERNAL_ERROR"
DATABASE_ERROR = "DATABASE_ERROR"
class AppException(Exception):
"""Base application exception."""
def __init__(
self,
message: str,
code: ErrorCode,
status_code: int = 500,
details: Optional[dict] = None
):
self.message = message
self.code = code
self.status_code = status_code
self.details = details or {}
super().__init__(self.message)
class ValidationException(AppException):
def __init__(self, message: str, details: Optional[dict] = None):
super().__init__(
message=message,
code=ErrorCode.INVALID_INPUT,
status_code=400,
details=details
)
class NotFoundException(AppException):
def __init__(self, resource: str, identifier: str):
super().__init__(
message=f"{resource} not found: {identifier}",
code=ErrorCode.NOT_FOUND,
status_code=404,
details={"resource": resource, "identifier": identifier}
)
class ExternalServiceException(AppException):
def __init__(self, service: str, message: str):
super().__init__(
message=f"{service} error: {message}",
code=ErrorCode.EXTERNAL_SERVICE_ERROR,
status_code=502,
details={"service": service}
)
Error Handling in FastAPI
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
app = FastAPI()
# Custom exception handlers
@app.exception_handler(ValidationException)
async def validation_exception_handler(request: Request, exc: ValidationException):
return JSONResponse(
status_code=400,
content={
"error": {
"code": exc.code,
"message": exc.message,
"details": exc.details
}
}
)
@app.exception_handler(NotFoundException)
async def not_found_exception_handler(request: Request, exc: NotFoundException):
return JSONResponse(
status_code=404,
content={
"error": {
"code": exc.code,
"message": exc.message
}
}
)
@app.exception_handler(Exception)
async def general_exception_handler(request: Request, exc: Exception):
# Log the error
logger.error(f"Unhandled exception: {exc}", exc_info=True)
return JSONResponse(
status_code=500,
content={
"error": {
"code": ErrorCode.INTERNAL_ERROR,
"message": "An unexpected error occurred"
}
}
)
# Using exceptions in routes
@app.get("/users/{user_id}")
async def get_user(user_id: str):
user = await find_user(user_id)
if not user:
raise NotFoundException("User", user_id)
return user
Result Pattern
from typing import TypeVar, Generic, Optional
from dataclasses import dataclass
T = TypeVar('T')
@dataclass
class Success(Generic[T]):
value: T
@dataclass
class Failure:
error: Exception
message: str
Result = Success[T] | Failure
def ok(value: T) -> Success[T]:
return Success(value)
def err(message: str, error: Optional[Exception] = None) -> Failure:
return Failure(error or Exception(message), message)
# Usage
def process_payment(order_id: str) -> Result[PaymentResult]:
try:
order = get_order(order_id)
if not order:
return err("Order not found")
if order.status == "paid":
return err("Order already paid")
payment = charge_card(order)
update_order_status(order_id, "paid")
return ok(PaymentResult(order_id=order_id, payment_id=payment.id))
except PaymentDeclined as e:
return err("Payment declined", e)
except Exception as e:
return err("Payment failed", e)
# Handling results
result = process_payment("order_123")
if isinstance(result, Success):
print(f"Payment successful: {result.value.payment_id}")
else:
print(f"Payment failed: {result.message}")
Retry Patterns
Retry with Backoff
import asyncio
import random
from functools import wraps
from typing import Callable, Type
def retry_with_backoff(
max_retries: int = 3,
base_delay: float = 1.0,
max_delay: float = 60.0,
exponential_base: float = 2.0,
exceptions: tuple = (Exception,)
):
"""Retry decorator with exponential backoff."""
def decorator(func: Callable):
@wraps(func)
async def wrapper(*args, **kwargs):
last_exception = None
for attempt in range(max_retries + 1):
try:
return await func(*args, **kwargs)
except exceptions as e:
last_exception = e
if attempt == max_retries:
break
# Calculate delay
delay = min(
base_delay * (exponential_base ** attempt),
max_delay
)
# Add jitter
delay *= (0.5 + random.random())
logger.warning(
f"Attempt {attempt + 1} failed: {e}. "
f"Retrying in {delay:.2f}s"
)
await asyncio.sleep(delay)
raise last_exception
return wrapper
return decorator
# Usage
@retry_with_backoff(max_retries=3, base_delay=1.0)
async def call_external_api(data: dict):
response = await http_client.post(API_URL, json=data)
response.raise_for_status()
return response.json()
Circuit Breaker
import time
from enum import Enum
from threading import Lock
class CircuitState(Enum):
CLOSED = "closed" # Normal operation
OPEN = "open" # Failing, reject calls
HALF_OPEN = "half_open" # Testing recovery
class CircuitBreaker:
def __init__(
self,
failure_threshold: int = 5,
timeout: int = 60,
expected_exception: type = Exception
):
self.failure_threshold = failure_threshold
self.timeout = timeout
self.expected_exception = expected_exception
self.failure_count = 0
self.last_failure_time = None
self.state = CircuitState.CLOSED
self.lock = Lock()
def call(self, func: Callable, *args, **kwargs):
with self.lock:
if self.state == CircuitState.OPEN:
if time.time() - self.last_failure_time > self.timeout:
self.state = CircuitState.HALF_OPEN
else:
raise CircuitOpenError()
try:
result = func(*args, **kwargs)
self._on_success()
return result
except self.expected_exception as e:
self._on_failure()
raise
def _on_success(self):
with self.lock:
self.failure_count = 0
self.state = CircuitState.CLOSED
def _on_failure(self):
with self.lock:
self.failure_count += 1
self.last_failure_time = time.time()
if self.failure_count >= self.failure_threshold:
self.state = CircuitState.OPEN
Logging Errors
import logging
from contextvars import ContextVar
# Request context
request_id: ContextVar[str] = ContextVar('request_id')
class ErrorLogger:
def __init__(self, logger: logging.Logger):
self.logger = logger
def log_error(
self,
error: Exception,
context: dict,
level: str = "error"
):
log_data = {
"error_type": type(error).__name__,
"error_message": str(error),
"request_id": request_id.get("unknown"),
**context
}
if level == "error":
self.logger.error(
f"Error: {error}",
extra={"error_data": log_data},
exc_info=True
)
elif level == "warning":
self.logger.warning(str(error), extra={"error_data": log_data})
Best Practices
- Use custom exceptions: Domain-specific error types
- Fail fast: Validate input early
- Log appropriately: Capture context for debugging
- Return consistent errors: Same structure across API
- Don’t expose internals: Generic messages to users
- Implement retries: For transient failures
- Use circuit breakers: Prevent cascade failures
Additional Exception Types
class AuthenticationError(AppException):
"""Authentication failed."""
def __init__(self, message: str = "Authentication required"):
super().__init__(
message=message,
code=ErrorCode.UNAUTHORIZED,
status_code=401
)
class AuthorizationError(AppException):
"""Permission denied."""
def __init__(self, message: str = "Permission denied"):
super().__init__(
message=message,
code=ErrorCode.FORBIDDEN,
status_code=403
)
Result Type with Methods
The discriminated union pattern (Success/Failure) works well, but a class-based Result type provides a unified interface:
from dataclasses import dataclass
from typing import TypeVar, Generic, Optional
T = TypeVar('T')
@dataclass
class Result(Generic[T]):
_value: Optional[T]
_error: Optional[Exception]
@classmethod
def success(cls, value: T) -> 'Result[T]':
return cls(_value=value, _error=None)
@classmethod
def failure(cls, error: Exception) -> 'Result[T]':
return cls(_value=None, _error=error)
@property
def is_success(self) -> bool:
return self._error is None
@property
def is_failure(self) -> bool:
return self._error is not None
def get_or_none(self) -> Optional[T]:
return self._value
def get_or_raise(self) -> T:
if self._error:
raise self._error
return self._value
def get_or_default(self, default: T) -> T:
return self._value if self._value is not None else default
def map(self, func) -> 'Result':
if self.is_success:
try:
return Result.success(func(self._value))
except Exception as e:
return Result.failure(e)
return self
# Usage
def create_user(email: str) -> Result[User]:
if not validate_email(email):
return Result.failure(ValidationException("Invalid email"))
user = User(email)
user_repo.save(user)
return Result.success(user)
result = create_user("[email protected]")
if result.is_success:
user = result.get_or_raise()
Circuit Breaker with Recovery Verification
The standard circuit breaker transitions from OPEN to HALF_OPEN after a timeout, then requires multiple consecutive successes before fully closing:
class CircuitBreakerWithRecovery:
def __init__(self, name: str, failure_threshold: int = 5, recovery_timeout: float = 60.0):
self.name = name
self.failure_threshold = failure_threshold
self.recovery_timeout = recovery_timeout
self._state = CircuitState.CLOSED
self._failure_count = 0
self._last_failure_time = None
self._success_count = 0
@property
def state(self) -> CircuitState:
if self._state == CircuitState.OPEN:
if time.time() - self._last_failure_time > self.recovery_timeout:
self._state = CircuitState.HALF_OPEN
self._success_count = 0
return self._state
def call(self, func, *args, **kwargs):
if self.state == CircuitState.OPEN:
raise CircuitOpenError(f"Circuit {self.name} is open")
try:
result = func(*args, **kwargs)
self._on_success()
return result
except Exception as e:
self._on_failure()
raise
def _on_success(self):
if self._state == CircuitState.HALF_OPEN:
self._success_count += 1
if self._success_count >= 3:
self._state = CircuitState.CLOSED
self._failure_count = 0
else:
self._failure_count = 0
def _on_failure(self):
self._failure_count += 1
self._last_failure_time = time.time()
if self._failure_count >= self.failure_threshold:
self._state = CircuitState.OPEN
breaker = CircuitBreakerWithRecovery("payment-service", failure_threshold=3)
Resources
- “Release It!” by Michael T. Nygard
- Microsoft Error Handling Guidelines
Conclusion
Good error handling makes applications reliable and user-friendly. By implementing proper exception hierarchies, consistent error responses, and resilience patterns like retries and circuit breakers, you can build applications that handle failures gracefully.
Comments