Skip to main content
โšก Calmops

Regular Expressions in Go

Regular Expressions in Go

Go’s regexp package provides powerful regular expression support using RE2 syntax. This guide covers pattern matching, capturing groups, and practical regex patterns.

Basic Pattern Matching

Simple Matching

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Compile regex pattern
    re := regexp.MustCompile(`hello`)
    
    // Test if pattern matches
    fmt.Println(re.MatchString("hello world"))      // true
    fmt.Println(re.MatchString("goodbye world"))    // false
    
    // Find first match
    match := re.FindString("hello world hello")
    fmt.Println(match)  // hello
    
    // Find all matches
    matches := re.FindAllString("hello world hello", -1)
    fmt.Println(matches)  // [hello hello]
}

Case-Insensitive Matching

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Case-insensitive pattern
    re := regexp.MustCompile(`(?i)hello`)
    
    fmt.Println(re.MatchString("Hello"))   // true
    fmt.Println(re.MatchString("HELLO"))   // true
    fmt.Println(re.MatchString("hello"))   // true
}

Capturing Groups

Basic Capturing

package main

import (
    "fmt"
    "regexp"
)

func main() {
    re := regexp.MustCompile(`(\w+)@(\w+\.\w+)`)
    
    // Find with groups
    match := re.FindStringSubmatch("[email protected]")
    fmt.Println(match)
    // Output: [[email protected] user example.com]
    
    // match[0] = full match
    // match[1] = first group
    // match[2] = second group
}

Named Groups

package main

import (
    "fmt"
    "regexp"
)

func main() {
    re := regexp.MustCompile(`(?P<user>\w+)@(?P<domain>\w+\.\w+)`)
    
    match := re.FindStringSubmatch("[email protected]")
    names := re.SubexpNames()
    
    result := make(map[string]string)
    for i, name := range names {
        if i != 0 && name != "" {
            result[name] = match[i]
        }
    }
    
    fmt.Println(result)
    // Output: map[domain:example.com user:user]
}

Common Patterns

Email Validation

package main

import (
    "fmt"
    "regexp"
)

func isValidEmail(email string) bool {
    re := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
    return re.MatchString(email)
}

func main() {
    fmt.Println(isValidEmail("[email protected]"))      // true
    fmt.Println(isValidEmail("invalid.email"))         // false
    fmt.Println(isValidEmail("[email protected]")) // true
}

Phone Number Validation

package main

import (
    "fmt"
    "regexp"
)

func isValidPhone(phone string) bool {
    re := regexp.MustCompile(`^\+?1?\d{9,15}$`)
    return re.MatchString(phone)
}

func main() {
    fmt.Println(isValidPhone("1234567890"))      // true
    fmt.Println(isValidPhone("+11234567890"))    // true
    fmt.Println(isValidPhone("123"))             // false
}

URL Extraction

package main

import (
    "fmt"
    "regexp"
)

func extractURLs(text string) []string {
    re := regexp.MustCompile(`https?://[^\s]+`)
    return re.FindAllString(text, -1)
}

func main() {
    text := "Check out https://golang.org and http://example.com"
    urls := extractURLs(text)
    fmt.Println(urls)
    // Output: [https://golang.org http://example.com]
}

IP Address Validation

package main

import (
    "fmt"
    "regexp"
)

func isValidIP(ip string) bool {
    re := regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3}$`)
    return re.MatchString(ip)
}

func main() {
    fmt.Println(isValidIP("192.168.1.1"))    // true
    fmt.Println(isValidIP("256.1.1.1"))      // true (basic check)
    fmt.Println(isValidIP("192.168.1"))      // false
}

String Replacement

Simple Replacement

package main

import (
    "fmt"
    "regexp"
)

func main() {
    re := regexp.MustCompile(`\d+`)
    
    // Replace first match
    result := re.ReplaceAllString("abc123def456", "X")
    fmt.Println(result)  // abcXdefX
    
    // Replace with function
    result = re.ReplaceAllStringFunc("abc123def456", func(s string) string {
        return "[" + s + "]"
    })
    fmt.Println(result)  // abc[123]def[456]
}

Capture Group Replacement

package main

import (
    "fmt"
    "regexp"
)

func main() {
    re := regexp.MustCompile(`(\w+)@(\w+\.\w+)`)
    
    // Replace with capture groups
    result := re.ReplaceAllString(
        "[email protected]",
        "$1 at $2",
    )
    fmt.Println(result)  // user at example.com
}

Splitting and Finding

Split by Pattern

package main

import (
    "fmt"
    "regexp"
)

func main() {
    re := regexp.MustCompile(`\s+`)
    
    text := "hello   world   go"
    parts := re.Split(text, -1)
    fmt.Println(parts)  // [hello world go]
}

Find All with Indices

package main

import (
    "fmt"
    "regexp"
)

func main() {
    re := regexp.MustCompile(`\d+`)
    text := "abc123def456ghi789"
    
    // Find all with positions
    matches := re.FindAllStringIndex(text, -1)
    for _, match := range matches {
        fmt.Printf("Match at %d-%d: %s\n", match[0], match[1], text[match[0]:match[1]])
    }
}

Performance Considerations

Compile Once, Use Many Times

package main

import (
    "fmt"
    "regexp"
    "time"
)

// โŒ Bad: Compile every time
func validateEmailBad(email string) bool {
    re := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
    return re.MatchString(email)
}

// โœ… Good: Compile once
var emailRegex = regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)

func validateEmailGood(email string) bool {
    return emailRegex.MatchString(email)
}

func main() {
    email := "[email protected]"
    
    start := time.Now()
    for i := 0; i < 100000; i++ {
        validateEmailBad(email)
    }
    fmt.Println("Bad:", time.Since(start))
    
    start = time.Now()
    for i := 0; i < 100000; i++ {
        validateEmailGood(email)
    }
    fmt.Println("Good:", time.Since(start))
}

Practical Examples

Log Parser

package main

import (
    "fmt"
    "regexp"
)

type LogEntry struct {
    Level   string
    Message string
}

func parseLog(line string) *LogEntry {
    re := regexp.MustCompile(`\[(\w+)\]\s+(.+)`)
    match := re.FindStringSubmatch(line)
    
    if match == nil {
        return nil
    }
    
    return &LogEntry{
        Level:   match[1],
        Message: match[2],
    }
}

func main() {
    line := "[ERROR] Database connection failed"
    entry := parseLog(line)
    fmt.Printf("Level: %s, Message: %s\n", entry.Level, entry.Message)
}

CSV Parser

package main

import (
    "fmt"
    "regexp"
)

func parseCSV(line string) []string {
    re := regexp.MustCompile(`"([^"]*)"|([^,]+)`)
    matches := re.FindAllStringSubmatch(line, -1)
    
    var result []string
    for _, match := range matches {
        if match[1] != "" {
            result = append(result, match[1])
        } else {
            result = append(result, match[2])
        }
    }
    return result
}

func main() {
    line := `"John Doe",30,"New York"`
    fields := parseCSV(line)
    fmt.Println(fields)  // [John Doe 30 New York]
}

Best Practices

โœ… Good Practices

  1. Compile once - Store compiled regex in variables
  2. Use raw strings - Avoid escaping backslashes
  3. Test patterns - Use regex testing tools
  4. Document patterns - Explain complex regex
  5. Use named groups - For clarity
  6. Handle errors - Check for compilation errors
  7. Use appropriate anchors - ^ and $ for boundaries
  8. Avoid catastrophic backtracking - Keep patterns simple

โŒ Anti-Patterns

// โŒ Bad: Compile every time
func validate(s string) bool {
    re := regexp.MustCompile(`pattern`)
    return re.MatchString(s)
}

// โœ… Good: Compile once
var re = regexp.MustCompile(`pattern`)
func validate(s string) bool {
    return re.MatchString(s)
}

// โŒ Bad: Escaped backslashes
re := regexp.MustCompile("\\d+")

// โœ… Good: Raw strings
re := regexp.MustCompile(`\d+`)

// โŒ Bad: Catastrophic backtracking
re := regexp.MustCompile(`(a+)+b`)

// โœ… Good: Simple patterns
re := regexp.MustCompile(`a+b`)

Resources and References

Official Documentation

Tools and Resources

Summary

Regular expressions are powerful for pattern matching:

  • Compile patterns once for performance
  • Use raw strings to avoid escaping
  • Leverage capturing groups for extraction
  • Use named groups for clarity
  • Test patterns with online tools
  • Document complex patterns
  • Avoid catastrophic backtracking

Master regex for effective text processing in Go.

Comments