Data Validation and Transformation in Go

Introduction

Data validation and transformation are critical for ensuring data quality. This guide covers implementing robust validation and transformation systems in Go.

Proper validation and transformation prevent errors, ensure consistency, and maintain data integrity throughout your pipeline.

Data Validation

Basic Validation

package main

import (
	"fmt"
	"regexp"
	"strings"
)

// ValidationError represents a validation error
type ValidationError struct {
	Field   string
	Message string
}

// Validator validates data
type Validator interface {
	Validate(interface{}) error
}

// StringValidator validates strings
type StringValidator struct {
	minLength int
	maxLength int
	pattern   *regexp.Regexp
}

// NewStringValidator creates a new string validator
func NewStringValidator(minLen, maxLen int, pattern string) *StringValidator {
	var regex *regexp.Regexp
	if pattern != "" {
		regex = regexp.MustCompile(pattern)
	}

	return &StringValidator{
		minLength: minLen,
		maxLength: maxLen,
		pattern:   regex,
	}
}

// Validate validates a string
func (sv *StringValidator) Validate(v interface{}) error {
	str, ok := v.(string)
	if !ok {
		return fmt.Errorf("expected string, got %T", v)
	}

	if len(str) < sv.minLength {
		return fmt.Errorf("string too short: minimum %d characters", sv.minLength)
	}

	if len(str) > sv.maxLength {
		return fmt.Errorf("string too long: maximum %d characters", sv.maxLength)
	}

	if sv.pattern != nil && !sv.pattern.MatchString(str) {
		return fmt.Errorf("string does not match pattern")
	}

	return nil
}

// EmailValidator validates email addresses
type EmailValidator struct{}

// Validate validates an email
func (ev *EmailValidator) Validate(v interface{}) error {
	email, ok := v.(string)
	if !ok {
		return fmt.Errorf("expected string, got %T", v)
	}

	pattern := `^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`
	matched, _ := regexp.MatchString(pattern, email)

	if !matched {
		return fmt.Errorf("invalid email format")
	}

	return nil
}

// IntRangeValidator validates integer ranges
type IntRangeValidator struct {
	min int
	max int
}

// NewIntRangeValidator creates a new int range validator
func NewIntRangeValidator(min, max int) *IntRangeValidator {
	return &IntRangeValidator{min: min, max: max}
}

// Validate validates an integer
func (irv *IntRangeValidator) Validate(v interface{}) error {
	num, ok := v.(int)
	if !ok {
		return fmt.Errorf("expected int, got %T", v)
	}

	if num < irv.min || num > irv.max {
		return fmt.Errorf("value out of range: %d-%d", irv.min, irv.max)
	}

	return nil
}

Good: Proper Validation and Transformation

package main

import (
	"fmt"
	"strings"
	"sync"
)

// ValidationRule defines a validation rule
type ValidationRule struct {
	Field     string
	Validator Validator
}

// DataValidator validates data against rules
type DataValidator struct {
	rules []ValidationRule
}

// NewDataValidator creates a new data validator
func NewDataValidator() *DataValidator {
	return &DataValidator{
		rules: []ValidationRule{},
	}
}

// AddRule adds a validation rule
func (dv *DataValidator) AddRule(field string, validator Validator) {
	dv.rules = append(dv.rules, ValidationRule{
		Field:     field,
		Validator: validator,
	})
}

// Validate validates data
func (dv *DataValidator) Validate(data map[string]interface{}) []ValidationError {
	var errors []ValidationError

	for _, rule := range dv.rules {
		value, exists := data[rule.Field]
		if !exists {
			errors = append(errors, ValidationError{
				Field:   rule.Field,
				Message: "field is required",
			})
			continue
		}

		if err := rule.Validator.Validate(value); err != nil {
			errors = append(errors, ValidationError{
				Field:   rule.Field,
				Message: err.Error(),
			})
		}
	}

	return errors
}

// Transformer transforms data
type Transformer interface {
	Transform(interface{}) (interface{}, error)
}

// TrimTransformer trims whitespace
type TrimTransformer struct{}

// Transform trims a string
func (tt *TrimTransformer) Transform(v interface{}) (interface{}, error) {
	str, ok := v.(string)
	if !ok {
		return nil, fmt.Errorf("expected string")
	}

	return strings.TrimSpace(str), nil
}

// ToUpperTransformer converts to uppercase
type ToUpperTransformer struct{}

// Transform converts to uppercase
func (tut *ToUpperTransformer) Transform(v interface{}) (interface{}, error) {
	str, ok := v.(string)
	if !ok {
		return nil, fmt.Errorf("expected string")
	}

	return strings.ToUpper(str), nil
}

// TransformationPipeline chains transformations
type TransformationPipeline struct {
	transformers []Transformer
}

// NewTransformationPipeline creates a new pipeline
func NewTransformationPipeline() *TransformationPipeline {
	return &TransformationPipeline{
		transformers: []Transformer{},
	}
}

// AddTransformer adds a transformer
func (tp *TransformationPipeline) AddTransformer(transformer Transformer) {
	tp.transformers = append(tp.transformers, transformer)
}

// Transform applies all transformations
func (tp *TransformationPipeline) Transform(v interface{}) (interface{}, error) {
	result := v

	for _, transformer := range tp.transformers {
		transformed, err := transformer.Transform(result)
		if err != nil {
			return nil, err
		}
		result = transformed
	}

	return result, nil
}

// DataProcessor validates and transforms data
type DataProcessor struct {
	validator *DataValidator
	pipelines map[string]*TransformationPipeline
	mu        sync.RWMutex
}

// NewDataProcessor creates a new data processor
func NewDataProcessor(validator *DataValidator) *DataProcessor {
	return &DataProcessor{
		validator: validator,
		pipelines: make(map[string]*TransformationPipeline),
	}
}

// AddTransformationPipeline adds a transformation pipeline
func (dp *DataProcessor) AddTransformationPipeline(field string, pipeline *TransformationPipeline) {
	dp.mu.Lock()
	defer dp.mu.Unlock()

	dp.pipelines[field] = pipeline
}

// Process validates and transforms data
func (dp *DataProcessor) Process(data map[string]interface{}) (map[string]interface{}, []ValidationError) {
	// Validate
	errors := dp.validator.Validate(data)
	if len(errors) > 0 {
		return nil, errors
	}

	// Transform
	dp.mu.RLock()
	pipelines := dp.pipelines
	dp.mu.RUnlock()

	result := make(map[string]interface{})

	for key, value := range data {
		if pipeline, exists := pipelines[key]; exists {
			transformed, err := pipeline.Transform(value)
			if err != nil {
				errors = append(errors, ValidationError{
					Field:   key,
					Message: fmt.Sprintf("transformation failed: %v", err),
				})
				continue
			}
			result[key] = transformed
		} else {
			result[key] = value
		}
	}

	return result, errors
}

Bad: Improper Validation

package main

// BAD: No validation
func BadProcessData(data map[string]interface{}) {
	// No validation
	// No error handling
	// Direct use of data
}

// BAD: No error collection
func BadValidation(data map[string]interface{}) error {
	// Returns on first error
	// Doesn't collect all errors
	return nil
}

// BAD: No transformation
func BadTransformation(data map[string]interface{}) {
	// No data cleaning
	// No normalization
}

Problems:

No validation
No error collection
No transformation
No error handling

Advanced Validation Patterns

Conditional Validation

package main

// ConditionalValidator validates based on conditions
type ConditionalValidator struct {
	condition func(map[string]interface{}) bool
	validator Validator
}

// NewConditionalValidator creates a new conditional validator
func NewConditionalValidator(condition func(map[string]interface{}) bool, validator Validator) *ConditionalValidator {
	return &ConditionalValidator{
		condition: condition,
		validator: validator,
	}
}

// Validate validates conditionally
func (cv *ConditionalValidator) Validate(v interface{}) error {
	// Condition check would be done at a higher level
	return cv.validator.Validate(v)
}

// CrossFieldValidator validates across multiple fields
type CrossFieldValidator struct {
	validate func(map[string]interface{}) error
}

// NewCrossFieldValidator creates a new cross-field validator
func NewCrossFieldValidator(validate func(map[string]interface{}) error) *CrossFieldValidator {
	return &CrossFieldValidator{
		validate: validate,
	}
}

// Validate validates across fields
func (cfv *CrossFieldValidator) Validate(data map[string]interface{}) error {
	return cfv.validate(data)
}

Custom Transformers

package main

// NormalizeTransformer normalizes data
type NormalizeTransformer struct {
	normalize func(interface{}) (interface{}, error)
}

// NewNormalizeTransformer creates a new normalize transformer
func NewNormalizeTransformer(normalize func(interface{}) (interface{}, error)) *NormalizeTransformer {
	return &NormalizeTransformer{
		normalize: normalize,
	}
}

// Transform normalizes data
func (nt *NormalizeTransformer) Transform(v interface{}) (interface{}, error) {
	return nt.normalize(v)
}

// DefaultTransformer provides default values
type DefaultTransformer struct {
	defaultValue interface{}
}

// NewDefaultTransformer creates a new default transformer
func NewDefaultTransformer(defaultValue interface{}) *DefaultTransformer {
	return &DefaultTransformer{
		defaultValue: defaultValue,
	}
}

// Transform provides default value if nil
func (dt *DefaultTransformer) Transform(v interface{}) (interface{}, error) {
	if v == nil {
		return dt.defaultValue, nil
	}
	return v, nil
}

Best Practices

1. Validate Early

// Validate at entry point
errors := validator.Validate(data)
if len(errors) > 0 {
	return errors
}

2. Collect All Errors

// Don't return on first error
var errors []ValidationError

3. Transform After Validation

// Validate first, then transform

4. Document Rules

// Document validation rules clearly

Common Pitfalls

1. No Validation

Always validate input data.

2. Returning on First Error

Collect all validation errors.

3. No Error Messages

Provide clear error messages.

4. No Transformation

Clean and normalize data.

Resources

Summary

Proper validation and transformation are essential. Key takeaways:

Validate all input data
Collect all validation errors
Provide clear error messages
Transform data appropriately
Document validation rules
Test edge cases
Monitor data quality

By mastering validation and transformation, you ensure data integrity throughout your pipeline.