Skip to main content
โšก Calmops

Profiling Go Programs: CPU and Memory

Profiling Go Programs: CPU and Memory

Profiling is essential for understanding where your Go program spends time and memory. Go’s built-in profiling tools, particularly pprof, provide powerful capabilities to identify performance bottlenecks and optimize your applications.

Understanding Go Profiling

Go provides several types of profiling:

  • CPU Profiling: Shows where your program spends CPU time
  • Memory Profiling: Tracks heap allocations and memory usage
  • Goroutine Profiling: Analyzes active goroutines
  • Block Profiling: Measures synchronization blocking
  • Mutex Profiling: Analyzes lock contention

CPU Profiling

CPU profiling samples your program’s execution to determine where it spends time.

Basic CPU Profiling

package main

import (
	"fmt"
	"os"
	"runtime/pprof"
)

func fibonacci(n int) int {
	if n <= 1 {
		return n
	}
	return fibonacci(n-1) + fibonacci(n-2)
}

func main() {
	// Create CPU profile file
	f, err := os.Create("cpu.prof")
	if err != nil {
		fmt.Println("Could not create CPU profile:", err)
		return
	}
	defer f.Close()

	// Start CPU profiling
	if err := pprof.StartCPUProfile(f); err != nil {
		fmt.Println("Could not start CPU profile:", err)
		return
	}
	defer pprof.StopCPUProfile()

	// Run your code
	result := fibonacci(40)
	fmt.Println("Result:", result)
}

Analyzing CPU Profiles

# Generate profile
go run main.go

# Analyze with pprof
go tool pprof cpu.prof

# Common pprof commands
# top - show top functions by CPU time
# list <function> - show source code with CPU time
# web - generate graph (requires graphviz)

Good: Targeted CPU Profiling

package main

import (
	"fmt"
	"os"
	"runtime/pprof"
	"time"
)

func expensiveOperation() {
	// Simulate work
	sum := 0
	for i := 0; i < 1000000000; i++ {
		sum += i
	}
}

func cheapOperation() {
	// Quick operation
	_ = 1 + 1
}

func main() {
	f, _ := os.Create("cpu.prof")
	defer f.Close()
	pprof.StartCPUProfile(f)
	defer pprof.StopCPUProfile()

	start := time.Now()
	
	// Profile only the expensive operation
	expensiveOperation()
	
	fmt.Printf("Took: %v\n", time.Since(start))
}

Bad: Profiling Everything

// โŒ AVOID: Profiling includes startup overhead
package main

import (
	"os"
	"runtime/pprof"
)

func main() {
	f, _ := os.Create("cpu.prof")
	defer f.Close()
	pprof.StartCPUProfile(f)
	defer pprof.StopCPUProfile()

	// Lots of initialization code
	// This adds noise to the profile
	
	// Actual work happens here
}

Memory Profiling

Memory profiling tracks heap allocations and helps identify memory leaks.

Basic Memory Profiling

package main

import (
	"fmt"
	"os"
	"runtime"
	"runtime/pprof"
)

func allocateMemory() []int {
	// Allocate large slice
	data := make([]int, 1000000)
	for i := range data {
		data[i] = i
	}
	return data
}

func main() {
	// Create memory profile file
	f, err := os.Create("mem.prof")
	if err != nil {
		fmt.Println("Could not create memory profile:", err)
		return
	}
	defer f.Close()

	// Run garbage collection before profiling
	runtime.GC()

	// Write memory profile
	if err := pprof.WriteHeapProfile(f); err != nil {
		fmt.Println("Could not write memory profile:", err)
		return
	}

	// Allocate memory
	data := allocateMemory()
	fmt.Println("Allocated:", len(data))
}

Analyzing Memory Profiles

# Generate profile
go run main.go

# Analyze with pprof
go tool pprof mem.prof

# Common commands
# top - show top allocators
# list <function> - show allocation by line
# alloc_space - total allocations
# alloc_objects - number of allocations
# inuse_space - current memory usage
# inuse_objects - current object count

Good: Tracking Memory Allocations

package main

import (
	"fmt"
	"os"
	"runtime"
	"runtime/pprof"
)

type DataProcessor struct {
	buffer []byte
	cache  map[string][]byte
}

func NewDataProcessor() *DataProcessor {
	return &DataProcessor{
		buffer: make([]byte, 0, 4096),
		cache:  make(map[string][]byte),
	}
}

func (dp *DataProcessor) Process(data []byte) {
	// Reuse buffer to reduce allocations
	dp.buffer = dp.buffer[:0]
	dp.buffer = append(dp.buffer, data...)
	
	// Cache results
	key := fmt.Sprintf("item_%d", len(dp.cache))
	dp.cache[key] = append([]byte{}, dp.buffer...)
}

func main() {
	f, _ := os.Create("mem.prof")
	defer f.Close()

	runtime.GC()
	pprof.WriteHeapProfile(f)

	processor := NewDataProcessor()
	for i := 0; i < 1000; i++ {
		processor.Process([]byte("test data"))
	}

	runtime.GC()
	pprof.WriteHeapProfile(f)
}

Bad: Excessive Allocations

// โŒ AVOID: Creating new allocations in loops
package main

import (
	"fmt"
)

func processData() {
	for i := 0; i < 1000000; i++ {
		// Creates new slice each iteration
		data := make([]byte, 1024)
		// Creates new map each iteration
		cache := make(map[string]string)
		
		data[0] = byte(i)
		cache["key"] = fmt.Sprintf("value_%d", i)
	}
}

func main() {
	processData()
}

HTTP Profiling

Go’s net/http/pprof package provides HTTP endpoints for profiling.

Setting Up HTTP Profiling

package main

import (
	"fmt"
	"log"
	"net/http"
	_ "net/http/pprof"
	"time"
)

func expensiveHandler(w http.ResponseWriter, r *http.Request) {
	// Simulate expensive operation
	sum := 0
	for i := 0; i < 1000000000; i++ {
		sum += i
	}
	fmt.Fprintf(w, "Sum: %d\n", sum)
}

func main() {
	http.HandleFunc("/expensive", expensiveHandler)
	
	// pprof endpoints are automatically registered
	// /debug/pprof/ - profile index
	// /debug/pprof/heap - memory profile
	// /debug/pprof/profile - CPU profile
	// /debug/pprof/goroutine - goroutine profile
	
	log.Println("Server running on :6060")
	log.Fatal(http.ListenAndServe(":6060", nil))
}

Profiling HTTP Endpoints

# CPU profile (30 seconds)
go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30

# Memory profile
go tool pprof http://localhost:6060/debug/pprof/heap

# Goroutine profile
go tool pprof http://localhost:6060/debug/pprof/goroutine

# All profiles
go tool pprof http://localhost:6060/debug/pprof/allocs

Goroutine Profiling

Analyze goroutine creation and lifecycle.

Goroutine Profiling Example

package main

import (
	"fmt"
	"os"
	"runtime"
	"runtime/pprof"
	"time"
)

func worker(id int, done chan bool) {
	time.Sleep(100 * time.Millisecond)
	done <- true
}

func main() {
	f, _ := os.Create("goroutine.prof")
	defer f.Close()

	// Create many goroutines
	done := make(chan bool, 100)
	for i := 0; i < 100; i++ {
		go worker(i, done)
	}

	// Wait for completion
	for i := 0; i < 100; i++ {
		<-done
	}

	// Write goroutine profile
	pprof.Lookup("goroutine").WriteTo(f, 0)
	
	fmt.Printf("Active goroutines: %d\n", runtime.NumGoroutine())
}

Block Profiling

Measure synchronization blocking and contention.

Block Profiling Example

package main

import (
	"fmt"
	"os"
	"runtime"
	"runtime/pprof"
	"sync"
	"time"
)

func main() {
	// Enable block profiling
	runtime.SetBlockProfileRate(1)

	f, _ := os.Create("block.prof")
	defer f.Close()

	var mu sync.Mutex
	done := make(chan bool, 10)

	// Create goroutines that contend for lock
	for i := 0; i < 10; i++ {
		go func() {
			for j := 0; j < 100; j++ {
				mu.Lock()
				time.Sleep(1 * time.Millisecond)
				mu.Unlock()
			}
			done <- true
		}()
	}

	// Wait for completion
	for i := 0; i < 10; i++ {
		<-done
	}

	// Write block profile
	pprof.Lookup("block").WriteTo(f, 0)
}

Mutex Profiling

Analyze lock contention and mutex performance.

Mutex Profiling Example

package main

import (
	"fmt"
	"os"
	"runtime"
	"runtime/pprof"
	"sync"
)

func main() {
	// Enable mutex profiling
	runtime.SetMutexProfileFraction(1)

	f, _ := os.Create("mutex.prof")
	defer f.Close()

	var mu sync.Mutex
	counter := 0
	done := make(chan bool, 100)

	// Create goroutines that contend for lock
	for i := 0; i < 100; i++ {
		go func() {
			for j := 0; j < 1000; j++ {
				mu.Lock()
				counter++
				mu.Unlock()
			}
			done <- true
		}()
	}

	// Wait for completion
	for i := 0; i < 100; i++ {
		<-done
	}

	fmt.Printf("Counter: %d\n", counter)

	// Write mutex profile
	pprof.Lookup("mutex").WriteTo(f, 0)
}

Practical Profiling Workflow

Step 1: Identify the Problem

package main

import (
	"fmt"
	"time"
)

func slowFunction() {
	// Simulate slow operation
	sum := 0
	for i := 0; i < 1000000000; i++ {
		sum += i
	}
}

func main() {
	start := time.Now()
	slowFunction()
	fmt.Printf("Took: %v\n", time.Since(start))
}

Step 2: Profile the Code

# Add profiling to your code
# Run with profiling enabled
# Generate profile file

Step 3: Analyze Results

# Use pprof to analyze
go tool pprof cpu.prof

# Look for:
# - Functions consuming most CPU
# - Unexpected function calls
# - Inefficient algorithms

Step 4: Optimize

// Optimize based on findings
// Example: Use more efficient algorithm
func fastFunction() {
	// Use mathematical formula instead of loop
	n := 1000000000
	sum := n * (n + 1) / 2
	_ = sum
}

Best Practices

  1. Profile in Production-Like Environment: Use realistic data and load
  2. Profile Long Enough: Ensure sufficient samples (at least 30 seconds for CPU)
  3. Isolate Changes: Profile before and after optimizations
  4. Use Multiple Profiles: Combine CPU, memory, and goroutine profiles
  5. Automate Profiling: Integrate profiling into your CI/CD pipeline
  6. Monitor Over Time: Track performance metrics across versions
  7. Profile Real Workloads: Use actual user data when possible

Common Pitfalls

  • Profiling Overhead: CPU profiling adds ~10% overhead
  • Sampling Bias: Profiles are statistical samples, not exact
  • Short Runs: Insufficient data leads to inaccurate profiles
  • Ignoring Context: Profile results depend on system state
  • Over-Optimization: Don’t optimize prematurely based on profiles

Resources

Summary

Profiling is crucial for Go performance optimization. Use pprof to identify bottlenecks in CPU usage and memory allocation. Combine multiple profiling types for comprehensive analysis. Always profile in realistic environments and validate optimizations with before/after comparisons.

Comments