Skip to main content
โšก Calmops

Regex and Text Processing in Rust

Regex and Text Processing in Rust

TL;DR: This guide covers regex and text processing in Rust. You’ll learn pattern matching, text parsing, input validation, and performance techniques using the regex crate.


Introduction

Text processing is fundamental to many applications. Rust’s regex crate provides:

  • Fast pattern matching
  • Capture groups
  • Unicode support
  • Named captures

Basic Pattern Matching

use regex::Regex;

pub fn basic_matching() {
    let re = Regex::new(r"\d+").unwrap();
    
    // Find first match
    if re.is_match("abc123def") {
        println!("Found number!");
    }
    
    // Find all matches
    for mat in re.find_iter("123 abc 456 def 789") {
        println!("Found: {}", mat.as_str());
    }
}

Capture Groups

pub fn capture_groups() {
    let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
    
    if let Some(caps) = re.captures("2024-12-25") {
        let year = caps.get(1).unwrap().as_str();
        let month = caps.get(2).unwrap().as_str();
        let day = caps.get(3).unwrap().as_str();
        
        println!("{}/{}/{}", month, day, year);
    }
}

Input Validation

pub struct Validator {
    email: Regex,
    phone: Regex,
    url: Regex,
}

impl Validator {
    pub fn new() -> Self {
        Self {
            email: Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap(),
            phone: Regex::new(r"^\+?1?\d{9,15}$").unwrap(),
            url: Regex::new(r"^https?://[^\s]+$").unwrap(),
        }
    }
    
    pub fn is_valid_email(&self, email: &str) -> bool {
        self.email.is_match(email)
    }
}

Text Replacement

pub fn replace_text() {
    let re = Regex::new(r"\b(\w+)\b").unwrap();
    
    let result = re.replace_all("hello world", |caps: &regex::Captures| {
        caps[1].to_uppercase()
    });
    
    println!("{}", result); // "HELLO WORLD"
}

Performance Tips

// Compile regex once, reuse many times
static EMAIL_RE: std::sync::LazyLock<Regex> = 
    std::sync::LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap());

pub fn fast_validation(email: &str) -> bool {
    EMAIL_RE.is_match(email)
}

Conclusion

Rust’s regex crate provides powerful text processing:

  1. Pattern matching - Fast, Unicode-aware
  2. Captures - Extract and manipulate
  3. Validation - Input sanitization
  4. Replacement - Text transformation

Comments