Introduction
Data validation is the first line of defense against invalid data, security vulnerabilities, and system errors. Proper validation ensures data integrity, prevents attacks, and improves user experience. This guide covers validation patterns, tools, and best practices.
Never trust input from users, external services, or untrusted sources. Validate everything.
Validation Principles
Defense in Depth
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โ Validation Layers โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
โ โ
โ 1. Client-side (User Experience) โ
โ - Immediate feedback โ
โ - Reduces server load โ
โ โ
โ 2. API Layer (First Defense) โ
โ - Schema validation โ
โ - Type checking โ
โ โ
โ 3. Business Logic (Domain Rules) โ
โ - Custom validators โ
โ - Business rules โ
โ โ
โ 4. Database (Last Resort) โ
โ - Constraints โ
โ - Data types โ
โ โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
Schema Validation
Pydantic Models
from pydantic import BaseModel, Field, validator, constr, conint
from typing import Optional, List
from datetime import datetime
from enum import Enum
class UserRole(str, Enum):
ADMIN = "admin"
USER = "user"
GUEST = "guest"
class UserCreate(BaseModel):
# String constraints
username: constr(min_length=3, max_length=50, pattern=r"^[a-zA-Z0-9_]+$")
email: str = Field(..., description="User email address")
password: str = Field(..., min_length=8)
# Business fields
role: UserRole = UserRole.USER
age: Optional[conint(ge=13, le=120)] = None
# Custom validator
@validator('email')
def email_must_be_valid(cls, v):
if '@' not in v or '.' not in v:
raise ValueError('Invalid email format')
return v.lower()
@validator('password')
def password_strength(cls, v):
if not any(c.isupper() for c in v):
raise ValueError('Password must contain uppercase')
if not any(c.islower() for c in v):
raise ValueError('Password must contain lowercase')
if not any(c.isdigit() for c in v):
raise ValueError('Password must contain number')
return v
class OrderCreate(BaseModel):
items: List[dict]
shipping_address: dict
@validator('items')
def items_not_empty(cls, v):
if not v:
raise ValueError('Order must have at least one item')
return v
@validator('items')
def validate_items(cls, v):
for item in v:
if item.get('quantity', 0) <= 0:
raise ValueError('Item quantity must be positive')
if item.get('price', 0) < 0:
raise ValueError('Item price cannot be negative')
return v
class OrderResponse(BaseModel):
id: str
created_at: datetime
total: float
class Config:
from_attributes = True
Custom Validators
from pydantic import validator, root_validator
class PaymentRequest(BaseModel):
amount: float
currency: str
card_number: str
@validator('amount')
def amount_must_be_positive(cls, v):
if v <= 0:
raise ValueError('Amount must be positive')
if v > 1000000:
raise ValueError('Amount exceeds maximum')
return round(v, 2)
@root_validator
def validate_currency_amount_pair(cls, values):
amount = values.get('amount')
currency = values.get('currency')
if currency == 'JPY' and amount != int(amount):
raise ValueError('JPY must be integer')
return values
@validator('card_number')
def validate_card_number(cls, v):
# Remove spaces and dashes
cleaned = v.replace(' ', '').replace('-', '')
# Luhn algorithm
if not cls._luhn_check(cleaned):
raise ValueError('Invalid card number')
return cleaned
@staticmethod
def _luhn_check(card_number: str) -> bool:
def digits_of(n):
return [int(d) for d in str(n)]
digits = digits_of(card_number)
odd_digits = digits[-1::-2]
even_digits = digits[-2::-2]
checksum = sum(odd_digits)
for d in even_digits:
checksum += sum(digits_of(d * 2))
return checksum % 10 == 0
Zod Validation
import { z } from 'zod';
// Define schema
const UserSchema = z.object({
username: z.string()
.min(3, "Username must be at least 3 characters")
.max(50, "Username must be less than 50 characters")
.regex(/^[a-zA-Z0-9_]+$/, "Invalid username format"),
email: z.string()
.email("Invalid email format"),
password: z.string()
.min(8, "Password must be at least 8 characters")
.refine(
(pwd) => /[A-Z]/.test(pwd),
"Password must contain uppercase"
),
age: z.number()
.min(13, "Must be at least 13 years old")
.max(120, "Invalid age")
.optional(),
role: z.enum(["admin", "user", "guest"])
});
type User = z.infer<typeof UserSchema>;
// Validate
function createUser(data: unknown): User {
return UserSchema.parse(data);
}
// Safe parse
function safeCreateUser(data: unknown) {
const result = UserSchema.safeParse(data);
if (!result.success) {
return {
success: false,
errors: result.error.issues
};
}
return {
success: true,
data: result.data
};
}
API Request Validation
FastAPI Example
from fastapi import FastAPI, HTTPException, Depends
from pydantic import BaseModel, validator
app = FastAPI()
class ItemCreate(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
price: float = Field(..., gt=0)
quantity: int = Field(..., ge=0)
tags: list[str] = []
@validator('name')
def name_not_special(cls, v):
if any(c in v for c in ['<', '>', '{', '}']):
raise ValueError('Invalid characters in name')
return v
@app.post("/items")
async def create_item(item: ItemCreate):
# item is already validated
return {"id": "123", **item.dict()}
# Query parameter validation
from typing import Optional
@app.get("/items")
async def list_items(
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0),
search: Optional[str] = Query(None, max_length=100)
):
return {"items": [], "limit": limit, "offset": offset}
Custom Validation Errors
from fastapi import HTTPException
from pydantic import ValidationError
@app.exception_handler(ValidationError)
async def validation_exception_handler(request, exc):
errors = []
for error in exc.errors():
errors.append({
"field": ".".join(str(loc) for loc in error["loc"]),
"message": error["msg"],
"type": error["type"]
})
return JSONResponse(
status_code=422,
content={
"detail": "Validation failed",
"errors": errors
}
)
Database Validation
SQL Constraints
-- Not null constraints
ALTER TABLE users
ALTER COLUMN email SET NOT NULL;
-- Unique constraints
ALTER TABLE users
ADD CONSTRAINT unique_email UNIQUE (email);
-- Check constraints
ALTER TABLE orders
ADD CONSTRAINT positive_amount
CHECK (amount > 0);
-- Foreign key constraints
ALTER TABLE orders
ADD CONSTRAINT fk_customer
FOREIGN KEY (customer_id) REFERENCES customers(id);
-- Enum constraints
CREATE TYPE user_role AS ENUM ('admin', 'user', 'guest');
ALTER TABLE users
ADD COLUMN role user_role NOT NULL DEFAULT 'user';
Sanitization
Input Sanitization
import html
import re
from datetime import datetime
def sanitize_string(value: str, max_length: int = 1000) -> str:
"""Sanitize string input."""
# Remove null bytes
value = value.replace('\x00', '')
# Truncate
value = value[:max_length]
return value
def sanitize_html(value: str) -> str:
"""Sanitize HTML to prevent XSS."""
return html.escape(value)
def sanitize_sql(value: str) -> str:
"""Sanitize for SQL (use parameterized queries instead!)."""
# This is NOT safe - just showing what NOT to do
# Use parameterized queries instead!
dangerous_chars = ["'", '"', ';', '--', '/*', '*/']
for char in dangerous_chars:
value = value.replace(char, '')
return value
class Sanitizer:
"""Comprehensive input sanitizer."""
@staticmethod
def sanitize_email(email: str) -> str:
return email.strip().lower()
@staticmethod
def sanitize_username(username: str) -> str:
# Only allow alphanumeric and underscore
return re.sub(r'[^a-zA-Z0-9_]', '', username)
@staticmethod
def sanitize_search(query: str) -> str:
# Remove special search operators
return re.sub(r'[+\-><()~*]', '', query)[:200]
Best Practices
- Validate at API boundary: First defense
- Fail fast: Reject invalid input immediately
- Use schemas: Define expected structure
- Sanitize output: Escape when displaying
- Use parameterized queries: Prevent SQL injection
- Validate business rules: Beyond type checking
Conclusion
Robust data validation protects against invalid data, security vulnerabilities, and system errors. By implementing comprehensive validation at every layerโAPI, business logic, and databaseโyou can ensure data integrity and system reliability.
Comments