Skip to main content
โšก Calmops

Performance Tuning for Systems in Go

Performance Tuning for Systems in Go

Introduction

System-level performance tuning requires understanding bottlenecks and optimization techniques. This guide covers profiling, benchmarking, and optimization strategies.

Effective performance tuning ensures your system applications run efficiently under load.

Profiling

CPU Profiling

package main

import (
	"fmt"
	"os"
	"runtime/pprof"
)

// StartCPUProfile starts CPU profiling
func StartCPUProfile(filename string) (*os.File, error) {
	f, err := os.Create(filename)
	if err != nil {
		return nil, err
	}

	if err := pprof.StartCPUProfile(f); err != nil {
		f.Close()
		return nil, err
	}

	return f, nil
}

// StopCPUProfile stops CPU profiling
func StopCPUProfile(f *os.File) error {
	pprof.StopCPUProfile()
	return f.Close()
}

// Example usage
func CPUProfilingExample() {
	f, _ := StartCPUProfile("cpu.prof")
	defer StopCPUProfile(f)

	// Run code to profile
	for i := 0; i < 1000000; i++ {
		_ = i * i
	}
}

Memory Profiling

package main

import (
	"os"
	"runtime"
	"runtime/pprof"
)

// WriteMemProfile writes memory profile
func WriteMemProfile(filename string) error {
	f, err := os.Create(filename)
	if err != nil {
		return err
	}
	defer f.Close()

	runtime.GC()
	return pprof.WriteHeapProfile(f)
}

// WriteGoroutineProfile writes goroutine profile
func WriteGoroutineProfile(filename string) error {
	f, err := os.Create(filename)
	if err != nil {
		return err
	}
	defer f.Close()

	return pprof.Lookup("goroutine").WriteTo(f, 0)
}

Good: Proper Performance Tuning

package main

import (
	"fmt"
	"runtime"
	"sync"
	"time"
)

// PerformanceMonitor monitors performance metrics
type PerformanceMonitor struct {
	startTime time.Time
	startMem  uint64
}

// NewPerformanceMonitor creates a new monitor
func NewPerformanceMonitor() *PerformanceMonitor {
	var m runtime.MemStats
	runtime.ReadMemStats(&m)

	return &PerformanceMonitor{
		startTime: time.Now(),
		startMem:  m.Alloc,
	}
}

// Report reports performance metrics
func (pm *PerformanceMonitor) Report() {
	var m runtime.MemStats
	runtime.ReadMemStats(&m)

	duration := time.Since(pm.startTime)
	memUsed := m.Alloc - pm.startMem

	fmt.Printf("Duration: %v\n", duration)
	fmt.Printf("Memory used: %d MB\n", memUsed/1024/1024)
	fmt.Printf("Goroutines: %d\n", runtime.NumGoroutine())
	fmt.Printf("GC runs: %d\n", m.NumGC)
}

// Benchmark benchmarks a function
func Benchmark(name string, fn func()) {
	monitor := NewPerformanceMonitor()

	fn()

	fmt.Printf("=== %s ===\n", name)
	monitor.Report()
}

// OptimizedLoop demonstrates optimized loop
func OptimizedLoop(n int) int64 {
	var sum int64

	// Avoid function calls in loop
	for i := 0; i < n; i++ {
		sum += int64(i)
	}

	return sum
}

// UnoptimizedLoop demonstrates unoptimized loop
func UnoptimizedLoop(n int) int64 {
	var sum int64

	// Function call in loop - slower
	for i := 0; i < len(make([]int, n)); i++ {
		sum += int64(i)
	}

	return sum
}

// ConcurrencyOptimization demonstrates concurrency optimization
func ConcurrencyOptimization(data []int, workers int) int64 {
	chunkSize := len(data) / workers
	results := make(chan int64, workers)
	var wg sync.WaitGroup

	for i := 0; i < workers; i++ {
		wg.Add(1)
		go func(start int) {
			defer wg.Done()

			var sum int64
			end := start + chunkSize
			if i == workers-1 {
				end = len(data)
			}

			for j := start; j < end; j++ {
				sum += int64(data[j])
			}

			results <- sum
		}(i * chunkSize)
	}

	wg.Wait()
	close(results)

	var total int64
	for sum := range results {
		total += sum
	}

	return total
}

// CacheOptimization demonstrates cache optimization
type CacheOptimized struct {
	data []int
}

// NewCacheOptimized creates cache-optimized structure
func NewCacheOptimized(size int) *CacheOptimized {
	return &CacheOptimized{
		data: make([]int, size),
	}
}

// Process processes data with cache optimization
func (co *CacheOptimized) Process() int64 {
	var sum int64

	// Sequential access - better cache locality
	for i := 0; i < len(co.data); i++ {
		sum += int64(co.data[i])
	}

	return sum
}

Bad: Improper Performance

package main

// BAD: Function calls in loop
func BadLoop(n int) int64 {
	var sum int64

	for i := 0; i < n; i++ {
		sum += int64(expensiveFunction(i))
	}

	return sum
}

func expensiveFunction(x int) int {
	return x * x
}

// BAD: No concurrency
func BadSequential(data []int) int64 {
	var sum int64

	for _, v := range data {
		sum += int64(v)
	}

	return sum
}

// BAD: Poor cache locality
func BadCacheLocality(matrix [][]int) int64 {
	var sum int64

	// Column-major access - poor cache locality
	for j := 0; j < len(matrix[0]); j++ {
		for i := 0; i < len(matrix); i++ {
			sum += int64(matrix[i][j])
		}
	}

	return sum
}

Problems:

  • Function calls in loops
  • No concurrency
  • Poor cache locality
  • No optimization

Benchmarking

package main

import (
	"testing"
)

// BenchmarkOptimized benchmarks optimized function
func BenchmarkOptimized(b *testing.B) {
	for i := 0; i < b.N; i++ {
		OptimizedLoop(1000)
	}
}

// BenchmarkUnoptimized benchmarks unoptimized function
func BenchmarkUnoptimized(b *testing.B) {
	for i := 0; i < b.N; i++ {
		UnoptimizedLoop(1000)
	}
}

// BenchmarkConcurrency benchmarks concurrency
func BenchmarkConcurrency(b *testing.B) {
	data := make([]int, 10000)
	for i := range data {
		data[i] = i
	}

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		ConcurrencyOptimization(data, 4)
	}
}

Best Practices

1. Profile Before Optimizing

monitor := NewPerformanceMonitor()
// Run code
monitor.Report()

2. Benchmark Changes

// Compare before and after

3. Optimize Bottlenecks

// Focus on hot paths

4. Monitor in Production

// Continuous monitoring

Common Pitfalls

1. Premature Optimization

Profile first.

2. No Benchmarking

Always benchmark changes.

3. Ignoring Cache

Consider cache locality.

4. No Monitoring

Monitor production performance.

Resources

Summary

Performance tuning is essential. Key takeaways:

  • Profile before optimizing
  • Benchmark changes
  • Optimize hot paths
  • Consider cache locality
  • Use concurrency appropriately
  • Monitor production
  • Test on target hardware

By mastering performance tuning, you can build efficient systems.

Comments