Data Validation and Transformation in Go
Introduction
Data validation and transformation are critical for ensuring data quality. This guide covers implementing robust validation and transformation systems in Go.
Proper validation and transformation prevent errors, ensure consistency, and maintain data integrity throughout your pipeline.
Data Validation
Basic Validation
package main
import (
"fmt"
"regexp"
"strings"
)
// ValidationError represents a validation error
type ValidationError struct {
Field string
Message string
}
// Validator validates data
type Validator interface {
Validate(interface{}) error
}
// StringValidator validates strings
type StringValidator struct {
minLength int
maxLength int
pattern *regexp.Regexp
}
// NewStringValidator creates a new string validator
func NewStringValidator(minLen, maxLen int, pattern string) *StringValidator {
var regex *regexp.Regexp
if pattern != "" {
regex = regexp.MustCompile(pattern)
}
return &StringValidator{
minLength: minLen,
maxLength: maxLen,
pattern: regex,
}
}
// Validate validates a string
func (sv *StringValidator) Validate(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("expected string, got %T", v)
}
if len(str) < sv.minLength {
return fmt.Errorf("string too short: minimum %d characters", sv.minLength)
}
if len(str) > sv.maxLength {
return fmt.Errorf("string too long: maximum %d characters", sv.maxLength)
}
if sv.pattern != nil && !sv.pattern.MatchString(str) {
return fmt.Errorf("string does not match pattern")
}
return nil
}
// EmailValidator validates email addresses
type EmailValidator struct{}
// Validate validates an email
func (ev *EmailValidator) Validate(v interface{}) error {
email, ok := v.(string)
if !ok {
return fmt.Errorf("expected string, got %T", v)
}
pattern := `^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`
matched, _ := regexp.MatchString(pattern, email)
if !matched {
return fmt.Errorf("invalid email format")
}
return nil
}
// IntRangeValidator validates integer ranges
type IntRangeValidator struct {
min int
max int
}
// NewIntRangeValidator creates a new int range validator
func NewIntRangeValidator(min, max int) *IntRangeValidator {
return &IntRangeValidator{min: min, max: max}
}
// Validate validates an integer
func (irv *IntRangeValidator) Validate(v interface{}) error {
num, ok := v.(int)
if !ok {
return fmt.Errorf("expected int, got %T", v)
}
if num < irv.min || num > irv.max {
return fmt.Errorf("value out of range: %d-%d", irv.min, irv.max)
}
return nil
}
Good: Proper Validation and Transformation
package main
import (
"fmt"
"strings"
"sync"
)
// ValidationRule defines a validation rule
type ValidationRule struct {
Field string
Validator Validator
}
// DataValidator validates data against rules
type DataValidator struct {
rules []ValidationRule
}
// NewDataValidator creates a new data validator
func NewDataValidator() *DataValidator {
return &DataValidator{
rules: []ValidationRule{},
}
}
// AddRule adds a validation rule
func (dv *DataValidator) AddRule(field string, validator Validator) {
dv.rules = append(dv.rules, ValidationRule{
Field: field,
Validator: validator,
})
}
// Validate validates data
func (dv *DataValidator) Validate(data map[string]interface{}) []ValidationError {
var errors []ValidationError
for _, rule := range dv.rules {
value, exists := data[rule.Field]
if !exists {
errors = append(errors, ValidationError{
Field: rule.Field,
Message: "field is required",
})
continue
}
if err := rule.Validator.Validate(value); err != nil {
errors = append(errors, ValidationError{
Field: rule.Field,
Message: err.Error(),
})
}
}
return errors
}
// Transformer transforms data
type Transformer interface {
Transform(interface{}) (interface{}, error)
}
// TrimTransformer trims whitespace
type TrimTransformer struct{}
// Transform trims a string
func (tt *TrimTransformer) Transform(v interface{}) (interface{}, error) {
str, ok := v.(string)
if !ok {
return nil, fmt.Errorf("expected string")
}
return strings.TrimSpace(str), nil
}
// ToUpperTransformer converts to uppercase
type ToUpperTransformer struct{}
// Transform converts to uppercase
func (tut *ToUpperTransformer) Transform(v interface{}) (interface{}, error) {
str, ok := v.(string)
if !ok {
return nil, fmt.Errorf("expected string")
}
return strings.ToUpper(str), nil
}
// TransformationPipeline chains transformations
type TransformationPipeline struct {
transformers []Transformer
}
// NewTransformationPipeline creates a new pipeline
func NewTransformationPipeline() *TransformationPipeline {
return &TransformationPipeline{
transformers: []Transformer{},
}
}
// AddTransformer adds a transformer
func (tp *TransformationPipeline) AddTransformer(transformer Transformer) {
tp.transformers = append(tp.transformers, transformer)
}
// Transform applies all transformations
func (tp *TransformationPipeline) Transform(v interface{}) (interface{}, error) {
result := v
for _, transformer := range tp.transformers {
transformed, err := transformer.Transform(result)
if err != nil {
return nil, err
}
result = transformed
}
return result, nil
}
// DataProcessor validates and transforms data
type DataProcessor struct {
validator *DataValidator
pipelines map[string]*TransformationPipeline
mu sync.RWMutex
}
// NewDataProcessor creates a new data processor
func NewDataProcessor(validator *DataValidator) *DataProcessor {
return &DataProcessor{
validator: validator,
pipelines: make(map[string]*TransformationPipeline),
}
}
// AddTransformationPipeline adds a transformation pipeline
func (dp *DataProcessor) AddTransformationPipeline(field string, pipeline *TransformationPipeline) {
dp.mu.Lock()
defer dp.mu.Unlock()
dp.pipelines[field] = pipeline
}
// Process validates and transforms data
func (dp *DataProcessor) Process(data map[string]interface{}) (map[string]interface{}, []ValidationError) {
// Validate
errors := dp.validator.Validate(data)
if len(errors) > 0 {
return nil, errors
}
// Transform
dp.mu.RLock()
pipelines := dp.pipelines
dp.mu.RUnlock()
result := make(map[string]interface{})
for key, value := range data {
if pipeline, exists := pipelines[key]; exists {
transformed, err := pipeline.Transform(value)
if err != nil {
errors = append(errors, ValidationError{
Field: key,
Message: fmt.Sprintf("transformation failed: %v", err),
})
continue
}
result[key] = transformed
} else {
result[key] = value
}
}
return result, errors
}
Bad: Improper Validation
package main
// BAD: No validation
func BadProcessData(data map[string]interface{}) {
// No validation
// No error handling
// Direct use of data
}
// BAD: No error collection
func BadValidation(data map[string]interface{}) error {
// Returns on first error
// Doesn't collect all errors
return nil
}
// BAD: No transformation
func BadTransformation(data map[string]interface{}) {
// No data cleaning
// No normalization
}
Problems:
- No validation
- No error collection
- No transformation
- No error handling
Advanced Validation Patterns
Conditional Validation
package main
// ConditionalValidator validates based on conditions
type ConditionalValidator struct {
condition func(map[string]interface{}) bool
validator Validator
}
// NewConditionalValidator creates a new conditional validator
func NewConditionalValidator(condition func(map[string]interface{}) bool, validator Validator) *ConditionalValidator {
return &ConditionalValidator{
condition: condition,
validator: validator,
}
}
// Validate validates conditionally
func (cv *ConditionalValidator) Validate(v interface{}) error {
// Condition check would be done at a higher level
return cv.validator.Validate(v)
}
// CrossFieldValidator validates across multiple fields
type CrossFieldValidator struct {
validate func(map[string]interface{}) error
}
// NewCrossFieldValidator creates a new cross-field validator
func NewCrossFieldValidator(validate func(map[string]interface{}) error) *CrossFieldValidator {
return &CrossFieldValidator{
validate: validate,
}
}
// Validate validates across fields
func (cfv *CrossFieldValidator) Validate(data map[string]interface{}) error {
return cfv.validate(data)
}
Custom Transformers
package main
// NormalizeTransformer normalizes data
type NormalizeTransformer struct {
normalize func(interface{}) (interface{}, error)
}
// NewNormalizeTransformer creates a new normalize transformer
func NewNormalizeTransformer(normalize func(interface{}) (interface{}, error)) *NormalizeTransformer {
return &NormalizeTransformer{
normalize: normalize,
}
}
// Transform normalizes data
func (nt *NormalizeTransformer) Transform(v interface{}) (interface{}, error) {
return nt.normalize(v)
}
// DefaultTransformer provides default values
type DefaultTransformer struct {
defaultValue interface{}
}
// NewDefaultTransformer creates a new default transformer
func NewDefaultTransformer(defaultValue interface{}) *DefaultTransformer {
return &DefaultTransformer{
defaultValue: defaultValue,
}
}
// Transform provides default value if nil
func (dt *DefaultTransformer) Transform(v interface{}) (interface{}, error) {
if v == nil {
return dt.defaultValue, nil
}
return v, nil
}
Best Practices
1. Validate Early
// Validate at entry point
errors := validator.Validate(data)
if len(errors) > 0 {
return errors
}
2. Collect All Errors
// Don't return on first error
var errors []ValidationError
3. Transform After Validation
// Validate first, then transform
4. Document Rules
// Document validation rules clearly
Common Pitfalls
1. No Validation
Always validate input data.
2. Returning on First Error
Collect all validation errors.
3. No Error Messages
Provide clear error messages.
4. No Transformation
Clean and normalize data.
Resources
Summary
Proper validation and transformation are essential. Key takeaways:
- Validate all input data
- Collect all validation errors
- Provide clear error messages
- Transform data appropriately
- Document validation rules
- Test edge cases
- Monitor data quality
By mastering validation and transformation, you ensure data integrity throughout your pipeline.
Comments