Regular Expressions in Go
Go’s regexp package provides powerful regular expression support using RE2 syntax. This guide covers pattern matching, capturing groups, and practical regex patterns.
Basic Pattern Matching
Simple Matching
package main
import (
"fmt"
"regexp"
)
func main() {
// Compile regex pattern
re := regexp.MustCompile(`hello`)
// Test if pattern matches
fmt.Println(re.MatchString("hello world")) // true
fmt.Println(re.MatchString("goodbye world")) // false
// Find first match
match := re.FindString("hello world hello")
fmt.Println(match) // hello
// Find all matches
matches := re.FindAllString("hello world hello", -1)
fmt.Println(matches) // [hello hello]
}
Case-Insensitive Matching
package main
import (
"fmt"
"regexp"
)
func main() {
// Case-insensitive pattern
re := regexp.MustCompile(`(?i)hello`)
fmt.Println(re.MatchString("Hello")) // true
fmt.Println(re.MatchString("HELLO")) // true
fmt.Println(re.MatchString("hello")) // true
}
Capturing Groups
Basic Capturing
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`(\w+)@(\w+\.\w+)`)
// Find with groups
match := re.FindStringSubmatch("[email protected]")
fmt.Println(match)
// Output: [[email protected] user example.com]
// match[0] = full match
// match[1] = first group
// match[2] = second group
}
Named Groups
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`(?P<user>\w+)@(?P<domain>\w+\.\w+)`)
match := re.FindStringSubmatch("[email protected]")
names := re.SubexpNames()
result := make(map[string]string)
for i, name := range names {
if i != 0 && name != "" {
result[name] = match[i]
}
}
fmt.Println(result)
// Output: map[domain:example.com user:user]
}
Common Patterns
Email Validation
package main
import (
"fmt"
"regexp"
)
func isValidEmail(email string) bool {
re := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
return re.MatchString(email)
}
func main() {
fmt.Println(isValidEmail("[email protected]")) // true
fmt.Println(isValidEmail("invalid.email")) // false
fmt.Println(isValidEmail("[email protected]")) // true
}
Phone Number Validation
package main
import (
"fmt"
"regexp"
)
func isValidPhone(phone string) bool {
re := regexp.MustCompile(`^\+?1?\d{9,15}$`)
return re.MatchString(phone)
}
func main() {
fmt.Println(isValidPhone("1234567890")) // true
fmt.Println(isValidPhone("+11234567890")) // true
fmt.Println(isValidPhone("123")) // false
}
URL Extraction
package main
import (
"fmt"
"regexp"
)
func extractURLs(text string) []string {
re := regexp.MustCompile(`https?://[^\s]+`)
return re.FindAllString(text, -1)
}
func main() {
text := "Check out https://golang.org and http://example.com"
urls := extractURLs(text)
fmt.Println(urls)
// Output: [https://golang.org http://example.com]
}
IP Address Validation
package main
import (
"fmt"
"regexp"
)
func isValidIP(ip string) bool {
re := regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3}$`)
return re.MatchString(ip)
}
func main() {
fmt.Println(isValidIP("192.168.1.1")) // true
fmt.Println(isValidIP("256.1.1.1")) // true (basic check)
fmt.Println(isValidIP("192.168.1")) // false
}
String Replacement
Simple Replacement
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`\d+`)
// Replace first match
result := re.ReplaceAllString("abc123def456", "X")
fmt.Println(result) // abcXdefX
// Replace with function
result = re.ReplaceAllStringFunc("abc123def456", func(s string) string {
return "[" + s + "]"
})
fmt.Println(result) // abc[123]def[456]
}
Capture Group Replacement
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`(\w+)@(\w+\.\w+)`)
// Replace with capture groups
result := re.ReplaceAllString(
"[email protected]",
"$1 at $2",
)
fmt.Println(result) // user at example.com
}
Splitting and Finding
Split by Pattern
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`\s+`)
text := "hello world go"
parts := re.Split(text, -1)
fmt.Println(parts) // [hello world go]
}
Find All with Indices
package main
import (
"fmt"
"regexp"
)
func main() {
re := regexp.MustCompile(`\d+`)
text := "abc123def456ghi789"
// Find all with positions
matches := re.FindAllStringIndex(text, -1)
for _, match := range matches {
fmt.Printf("Match at %d-%d: %s\n", match[0], match[1], text[match[0]:match[1]])
}
}
Performance Considerations
Compile Once, Use Many Times
package main
import (
"fmt"
"regexp"
"time"
)
// โ Bad: Compile every time
func validateEmailBad(email string) bool {
re := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
return re.MatchString(email)
}
// โ
Good: Compile once
var emailRegex = regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
func validateEmailGood(email string) bool {
return emailRegex.MatchString(email)
}
func main() {
email := "[email protected]"
start := time.Now()
for i := 0; i < 100000; i++ {
validateEmailBad(email)
}
fmt.Println("Bad:", time.Since(start))
start = time.Now()
for i := 0; i < 100000; i++ {
validateEmailGood(email)
}
fmt.Println("Good:", time.Since(start))
}
Practical Examples
Log Parser
package main
import (
"fmt"
"regexp"
)
type LogEntry struct {
Level string
Message string
}
func parseLog(line string) *LogEntry {
re := regexp.MustCompile(`\[(\w+)\]\s+(.+)`)
match := re.FindStringSubmatch(line)
if match == nil {
return nil
}
return &LogEntry{
Level: match[1],
Message: match[2],
}
}
func main() {
line := "[ERROR] Database connection failed"
entry := parseLog(line)
fmt.Printf("Level: %s, Message: %s\n", entry.Level, entry.Message)
}
CSV Parser
package main
import (
"fmt"
"regexp"
)
func parseCSV(line string) []string {
re := regexp.MustCompile(`"([^"]*)"|([^,]+)`)
matches := re.FindAllStringSubmatch(line, -1)
var result []string
for _, match := range matches {
if match[1] != "" {
result = append(result, match[1])
} else {
result = append(result, match[2])
}
}
return result
}
func main() {
line := `"John Doe",30,"New York"`
fields := parseCSV(line)
fmt.Println(fields) // [John Doe 30 New York]
}
Best Practices
โ Good Practices
- Compile once - Store compiled regex in variables
- Use raw strings - Avoid escaping backslashes
- Test patterns - Use regex testing tools
- Document patterns - Explain complex regex
- Use named groups - For clarity
- Handle errors - Check for compilation errors
- Use appropriate anchors -
^and$for boundaries - Avoid catastrophic backtracking - Keep patterns simple
โ Anti-Patterns
// โ Bad: Compile every time
func validate(s string) bool {
re := regexp.MustCompile(`pattern`)
return re.MatchString(s)
}
// โ
Good: Compile once
var re = regexp.MustCompile(`pattern`)
func validate(s string) bool {
return re.MatchString(s)
}
// โ Bad: Escaped backslashes
re := regexp.MustCompile("\\d+")
// โ
Good: Raw strings
re := regexp.MustCompile(`\d+`)
// โ Bad: Catastrophic backtracking
re := regexp.MustCompile(`(a+)+b`)
// โ
Good: Simple patterns
re := regexp.MustCompile(`a+b`)
Resources and References
Official Documentation
- regexp Package - Complete reference
- RE2 Syntax - Pattern syntax
- Effective Go - Best practices
Recommended Reading
- Regular Expression Tutorial - Comprehensive guide
- Regex Cheat Sheet - Quick reference
- Go Regex Examples - Official examples
Tools and Resources
- Regex101 - Online regex tester
- Regex Pal - Pattern testing
- Go Playground - Online Go editor
Summary
Regular expressions are powerful for pattern matching:
- Compile patterns once for performance
- Use raw strings to avoid escaping
- Leverage capturing groups for extraction
- Use named groups for clarity
- Test patterns with online tools
- Document complex patterns
- Avoid catastrophic backtracking
Master regex for effective text processing in Go.
Comments