Skip to main content
โšก Calmops

Data Validation: Building Robust Input Handling

Introduction

Data validation is the first line of defense against invalid data, security vulnerabilities, and system errors. Proper validation ensures data integrity, prevents attacks, and improves user experience. This guide covers validation patterns, tools, and best practices.

Never trust input from users, external services, or untrusted sources. Validate everything.

Validation Principles

Defense in Depth

โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
โ”‚                 Validation Layers                              โ”‚
โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
โ”‚                                                             โ”‚
โ”‚  1. Client-side (User Experience)                          โ”‚
โ”‚     - Immediate feedback                                   โ”‚
โ”‚     - Reduces server load                                  โ”‚
โ”‚                                                             โ”‚
โ”‚  2. API Layer (First Defense)                              โ”‚
โ”‚     - Schema validation                                    โ”‚
โ”‚     - Type checking                                        โ”‚
โ”‚                                                             โ”‚
โ”‚  3. Business Logic (Domain Rules)                          โ”‚
โ”‚     - Custom validators                                    โ”‚
โ”‚     - Business rules                                       โ”‚
โ”‚                                                             โ”‚
โ”‚  4. Database (Last Resort)                                 โ”‚
โ”‚     - Constraints                                          โ”‚
โ”‚     - Data types                                           โ”‚
โ”‚                                                             โ”‚
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜

Schema Validation

Pydantic Models

from pydantic import BaseModel, Field, validator, constr, conint
from typing import Optional, List
from datetime import datetime
from enum import Enum

class UserRole(str, Enum):
    ADMIN = "admin"
    USER = "user"
    GUEST = "guest"

class UserCreate(BaseModel):
    # String constraints
    username: constr(min_length=3, max_length=50, pattern=r"^[a-zA-Z0-9_]+$")
    email: str = Field(..., description="User email address")
    password: str = Field(..., min_length=8)
    
    # Business fields
    role: UserRole = UserRole.USER
    age: Optional[conint(ge=13, le=120)] = None
    
    # Custom validator
    @validator('email')
    def email_must_be_valid(cls, v):
        if '@' not in v or '.' not in v:
            raise ValueError('Invalid email format')
        return v.lower()
    
    @validator('password')
    def password_strength(cls, v):
        if not any(c.isupper() for c in v):
            raise ValueError('Password must contain uppercase')
        if not any(c.islower() for c in v):
            raise ValueError('Password must contain lowercase')
        if not any(c.isdigit() for c in v):
            raise ValueError('Password must contain number')
        return v

class OrderCreate(BaseModel):
    items: List[dict]
    shipping_address: dict
    
    @validator('items')
    def items_not_empty(cls, v):
        if not v:
            raise ValueError('Order must have at least one item')
        return v
    
    @validator('items')
    def validate_items(cls, v):
        for item in v:
            if item.get('quantity', 0) <= 0:
                raise ValueError('Item quantity must be positive')
            if item.get('price', 0) < 0:
                raise ValueError('Item price cannot be negative')
        return v

class OrderResponse(BaseModel):
    id: str
    created_at: datetime
    total: float
    
    class Config:
        from_attributes = True

Custom Validators

from pydantic import validator, root_validator

class PaymentRequest(BaseModel):
    amount: float
    currency: str
    card_number: str
    
    @validator('amount')
    def amount_must_be_positive(cls, v):
        if v <= 0:
            raise ValueError('Amount must be positive')
        if v > 1000000:
            raise ValueError('Amount exceeds maximum')
        return round(v, 2)
    
    @root_validator
    def validate_currency_amount_pair(cls, values):
        amount = values.get('amount')
        currency = values.get('currency')
        
        if currency == 'JPY' and amount != int(amount):
            raise ValueError('JPY must be integer')
        
        return values
    
    @validator('card_number')
    def validate_card_number(cls, v):
        # Remove spaces and dashes
        cleaned = v.replace(' ', '').replace('-', '')
        
        # Luhn algorithm
        if not cls._luhn_check(cleaned):
            raise ValueError('Invalid card number')
        
        return cleaned
    
    @staticmethod
    def _luhn_check(card_number: str) -> bool:
        def digits_of(n):
            return [int(d) for d in str(n)]
        
        digits = digits_of(card_number)
        odd_digits = digits[-1::-2]
        even_digits = digits[-2::-2]
        
        checksum = sum(odd_digits)
        for d in even_digits:
            checksum += sum(digits_of(d * 2))
        
        return checksum % 10 == 0

Zod Validation

import { z } from 'zod';

// Define schema
const UserSchema = z.object({
  username: z.string()
    .min(3, "Username must be at least 3 characters")
    .max(50, "Username must be less than 50 characters")
    .regex(/^[a-zA-Z0-9_]+$/, "Invalid username format"),
  
  email: z.string()
    .email("Invalid email format"),
  
  password: z.string()
    .min(8, "Password must be at least 8 characters")
    .refine(
      (pwd) => /[A-Z]/.test(pwd),
      "Password must contain uppercase"
    ),
  
  age: z.number()
    .min(13, "Must be at least 13 years old")
    .max(120, "Invalid age")
    .optional(),
  
  role: z.enum(["admin", "user", "guest"])
});

type User = z.infer<typeof UserSchema>;

// Validate
function createUser(data: unknown): User {
  return UserSchema.parse(data);
}

// Safe parse
function safeCreateUser(data: unknown) {
  const result = UserSchema.safeParse(data);
  
  if (!result.success) {
    return {
      success: false,
      errors: result.error.issues
    };
  }
  
  return {
    success: true,
    data: result.data
  };
}

API Request Validation

FastAPI Example

from fastapi import FastAPI, HTTPException, Depends
from pydantic import BaseModel, validator

app = FastAPI()

class ItemCreate(BaseModel):
    name: str = Field(..., min_length=1, max_length=100)
    price: float = Field(..., gt=0)
    quantity: int = Field(..., ge=0)
    tags: list[str] = []
    
    @validator('name')
    def name_not_special(cls, v):
        if any(c in v for c in ['<', '>', '{', '}']):
            raise ValueError('Invalid characters in name')
        return v

@app.post("/items")
async def create_item(item: ItemCreate):
    # item is already validated
    return {"id": "123", **item.dict()}

# Query parameter validation
from typing import Optional

@app.get("/items")
async def list_items(
    limit: int = Query(100, ge=1, le=1000),
    offset: int = Query(0, ge=0),
    search: Optional[str] = Query(None, max_length=100)
):
    return {"items": [], "limit": limit, "offset": offset}

Custom Validation Errors

from fastapi import HTTPException
from pydantic import ValidationError

@app.exception_handler(ValidationError)
async def validation_exception_handler(request, exc):
    errors = []
    for error in exc.errors():
        errors.append({
            "field": ".".join(str(loc) for loc in error["loc"]),
            "message": error["msg"],
            "type": error["type"]
        })
    
    return JSONResponse(
        status_code=422,
        content={
            "detail": "Validation failed",
            "errors": errors
        }
    )

Database Validation

SQL Constraints

-- Not null constraints
ALTER TABLE users 
ALTER COLUMN email SET NOT NULL;

-- Unique constraints
ALTER TABLE users 
ADD CONSTRAINT unique_email UNIQUE (email);

-- Check constraints
ALTER TABLE orders 
ADD CONSTRAINT positive_amount 
CHECK (amount > 0);

-- Foreign key constraints
ALTER TABLE orders 
ADD CONSTRAINT fk_customer 
FOREIGN KEY (customer_id) REFERENCES customers(id);

-- Enum constraints
CREATE TYPE user_role AS ENUM ('admin', 'user', 'guest');
ALTER TABLE users 
ADD COLUMN role user_role NOT NULL DEFAULT 'user';

Sanitization

Input Sanitization

import html
import re
from datetime import datetime

def sanitize_string(value: str, max_length: int = 1000) -> str:
    """Sanitize string input."""
    # Remove null bytes
    value = value.replace('\x00', '')
    
    # Truncate
    value = value[:max_length]
    
    return value

def sanitize_html(value: str) -> str:
    """Sanitize HTML to prevent XSS."""
    return html.escape(value)

def sanitize_sql(value: str) -> str:
    """Sanitize for SQL (use parameterized queries instead!)."""
    # This is NOT safe - just showing what NOT to do
    # Use parameterized queries instead!
    dangerous_chars = ["'", '"', ';', '--', '/*', '*/']
    for char in dangerous_chars:
        value = value.replace(char, '')
    return value

class Sanitizer:
    """Comprehensive input sanitizer."""
    
    @staticmethod
    def sanitize_email(email: str) -> str:
        return email.strip().lower()
    
    @staticmethod
    def sanitize_username(username: str) -> str:
        # Only allow alphanumeric and underscore
        return re.sub(r'[^a-zA-Z0-9_]', '', username)
    
    @staticmethod
    def sanitize_search(query: str) -> str:
        # Remove special search operators
        return re.sub(r'[+\-><()~*]', '', query)[:200]

Best Practices

  1. Validate at API boundary: First defense
  2. Fail fast: Reject invalid input immediately
  3. Use schemas: Define expected structure
  4. Sanitize output: Escape when displaying
  5. Use parameterized queries: Prevent SQL injection
  6. Validate business rules: Beyond type checking

Conclusion

Robust data validation protects against invalid data, security vulnerabilities, and system errors. By implementing comprehensive validation at every layerโ€”API, business logic, and databaseโ€”you can ensure data integrity and system reliability.

Comments