Skip to main content
โšก Calmops

Greedy Algorithms: Principles and Practice

Greedy algorithms make locally optimal choices in hopes of finding a global optimum. They’re often simpler than dynamic programming and work well for problems with the “greedy choice property.”

In this guide, we’ll explore greedy algorithm fundamentals, classic problems, and when greedy approaches work.

Understanding Greedy Algorithms

How Greedy Works

โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
โ”‚              Greedy vs Dynamic Programming                  โ”‚
โ”‚                                                             โ”‚
โ”‚   Greedy:                                                   โ”‚
โ”‚   1. Make best choice NOW                                   โ”‚
โ”‚   2. Never reconsider                                        โ”‚
โ”‚   3. Works for "optimal substructure" + "greedy choice"   โ”‚
โ”‚                                                             โ”‚
โ”‚   Dynamic Programming:                                      โ”‚
โ”‚   1. Consider ALL choices                                   โ”‚
โ”‚   2. Recompute subproblems                                  โ”‚
โ”‚   3. Always finds optimal solution                          โ”‚
โ”‚                                                             โ”‚
โ”‚   Example - Making Change:                                  โ”‚
โ”‚   Coins: [25, 10, 5, 1]                                    โ”‚
โ”‚   Amount: 30                                               โ”‚
โ”‚                                                             โ”‚
โ”‚   Greedy: 25 + 5 = 2 coins โœ“ (works for US coins)          โ”‚
โ”‚   Greedy: For 30 with [25, 20, 10]? 20+10 = 2 coins โœ“     โ”‚
โ”‚   Greedy: For 30 with [25, 15, 1]? 25+1+1+1+1+1 = 6 โœ—     โ”‚
โ”‚           DP: 15+15 = 2 coins (optimal!)                  โ”‚
โ”‚                                                             โ”‚
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜

Greedy Choice Property

# Greedy choice property means:
# Making a locally optimal choice leads to global optimum

greedy_requirements = {
    "greedy_choice_property": {
        "description": "Local optimal choice is part of global optimum",
        "example": "Take largest coin first for coin change (sometimes)"
    },
    
    "optimal_substructure": {
        "description": "Optimal solution contains optimal solutions to subproblems",
        "example": "Shortest path has subpaths that are also shortest"
    }
}

# Not all problems have greedy choice property!
# Need to prove greedy works or use DP

Classic Greedy Problems

Activity Selection

# Problem: Select maximum number of non-overlapping activities

def activity_selection(activities):
    """
    activities: list of (start, end) tuples
    Returns: Maximum number of activities
    """
    # Sort by end time
    sorted_activities = sorted(activities, key=lambda x: x[1])
    
    count = 1
    last_end = sorted_activities[0][1]
    
    for start, end in sorted_activities[1:]:
        if start >= last_end:  # Non-overlapping
            count += 1
            last_end = end
    
    return count


def activity_selection_with_results(activities):
    """Also returns which activities are selected"""
    if not activities:
        return 0, []
    
    sorted_activities = sorted(activities, key=lambda x: x[1])
    
    selected = [sorted_activities[0]]
    last_end = sorted_activities[0][1]
    
    for start, end in sorted_activities[1:]:
        if start >= last_end:
            selected.append((start, end))
            last_end = end
    
    return len(selected), selected


# Example
activities = [(1, 4), (3, 5), (0, 6), (5, 7), (3, 9), (5, 9), (6, 10), (8, 11)]
print(activity_selection(activities))  # 4

Fractional Knapsack

# Problem: Take fractions of items (unlike 0/1 knapsack)

def fractional_knapsack(items, capacity):
    """
    items: list of (weight, value) tuples
    Returns: Maximum value (can take fractions)
    """
    # Calculate value/weight ratio
    ratios = [(w, v, v/w) for w, v in items]
    
    # Sort by ratio (highest first)
    ratios.sort(key=lambda x: x[2], reverse=True)
    
    total_value = 0
    remaining_capacity = capacity
    
    for weight, value, ratio in ratios:
        if remaining_capacity == 0:
            break
        
        if weight <= remaining_capacity:
            total_value += value
            remaining_capacity -= weight
        else:
            # Take fraction
            fraction = remaining_capacity / weight
            total_value += value * fraction
            remaining_capacity = 0
    
    return total_value


# Example
items = [(10, 60), (20, 100), (30, 120)]  # (weight, value)
capacity = 50
# Ratios: 6, 5, 4
# Take all of weight 10 and 20 = 60 + 100 = 160
# Take 20/30 of weight 30 = 120 * 20/30 = 80
# Total: 160 + 80 = 240
print(fractional_knapsack(items, capacity))  # 240.0

Huffman Coding

# Problem: Create optimal prefix-free encoding

import heapq
from collections import Counter

class HuffmanNode:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
    
    def __lt__(self, other):
        return self.freq < other.freq


def huffman_encoding(text):
    """Create Huffman codes for text"""
    if not text:
        return {}, ""
    
    # Count frequency
    freq = Counter(text)
    
    # Build min heap
    heap = [HuffmanNode(char, f) for char, f in freq.items()]
    heapq.heapify(heap)
    
    # Build tree
    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        
        merged = HuffmanNode(None, left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(heap, merged)
    
    # Generate codes
    codes = {}
    
    def generate_codes(node, current_code=""):
        if node is None:
            return
        
        if node.char is not None:
            codes[node.char] = current_code or "0"
            return
        
        generate_codes(node.left, current_code + "0")
        generate_codes(node.right, current_code + "1")
    
    generate_codes(heap[0])
    
    # Encode text
    encoded = "".join(codes[char] for char in text)
    
    return codes, encoded


def huffman_decoding(codes, encoded):
    """Decode Huffman-encoded text"""
    if not encoded:
        return ""
    
    # Build reverse lookup
    reverse_codes = {v: k for k, v in codes.items()}
    
    decoded = []
    current = ""
    
    for bit in encoded:
        current += bit
        if current in reverse_codes:
            decoded.append(reverse_codes[current])
            current = ""
    
    return "".join(decoded)


# Example
text = "this is an example for huffman encoding"
codes, encoded = huffman_encoding(text)
print(f"Codes: {codes}")
print(f"Encoded: {encoded[:50]}...")
decoded = huffman_decoding(codes, encoded)
print(f"Decoded: {decoded}")

Minimum Spanning Tree (Prim’s Algorithm)

# Problem: Find MST using Prim's algorithm

import heapq

def prim_mst(graph, start=0):
    """
    graph: adjacency list {node: [(neighbor, weight), ...]}
    Returns: MST edges and total weight
    """
    n = len(graph)
    visited = [False] * n
    min_heap = [(0, start, -1)]  # (weight, current, parent)
    total_weight = 0
    mst_edges = []
    
    while min_heap and sum(visited) < n:
        weight, node, parent = heapq.heappop(min_heap)
        
        if visited[node]:
            continue
        
        visited[node] = True
        total_weight += weight
        
        if parent != -1:
            mst_edges.append((parent, node, weight))
        
        for neighbor, w in graph[node]:
            if not visited[neighbor]:
                heapq.heappush(min_heap, (w, neighbor, node))
    
    return total_weight, mst_edges


# Example
graph = {
    0: [(1, 4), (7, 8)],
    1: [(0, 4), (2, 8), (7, 11)],
    2: [(1, 8), (3, 7), (5, 4), (8, 2)],
    3: [(2, 7), (4, 9), (5, 14)],
    4: [(3, 9), (5, 10)],
    5: [(2, 4), (3, 14), (4, 10), (6, 2)],
    6: [(5, 2), (7, 1), (8, 6)],
    7: [(0, 8), (1, 11), (6, 1), (8, 7)],
    8: [(2, 2), (6, 6), (7, 7)]
}

weight, edges = prim_mst(graph)
print(f"MST weight: {weight}")  # 37
print(f"Edges: {edges}")

Minimum Spanning Tree (Kruskal’s Algorithm)

# Problem: Find MST using Kruskal's algorithm

class UnionFind:
    def __init__(self, n):
        self.parent = list(range(n))
        self.rank = [0] * n
    
    def find(self, x):
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])
        return self.parent[x]
    
    def union(self, x, y):
        px, py = self.find(x), self.find(y)
        
        if px == py:
            return False
        
        if self.rank[px] < self.rank[py]:
            px, py = py, px
        
        self.parent[py] = px
        
        if self.rank[px] == self.rank[py]:
            self.rank[px] += 1
        
        return True


def kruskal_mst(n, edges):
    """
    n: number of nodes
    edges: list of (weight, u, v) tuples
    Returns: MST total weight and edges
    """
    # Sort edges by weight
    edges.sort()
    
    uf = UnionFind(n)
    total_weight = 0
    mst_edges = []
    
    for weight, u, v in edges:
        if uf.union(u, v):
            total_weight += weight
            mst_edges.append((u, v, weight))
            
            if len(mst_edges) == n - 1:
                break
    
    return total_weight, mst_edges


# Same example as Prim's
edges = [
    (4, 0, 1), (8, 0, 7), (11, 1, 7), (2, 2, 8), (4, 2, 5),
    (7, 3, 4), (9, 3, 5), (14, 4, 5), (10, 2, 3), (2, 6, 8),
    (1, 6, 7), (7, 7, 8), (6, 2, 3), (8, 2, 1), (4, 1, 2)
]

weight, mst = kruskal_mst(9, edges)
print(f"MST weight: {weight}")  # 37
print(f"Edges: {mst}")

Dijkstra’s Shortest Path

# Problem: Find shortest path from source to all nodes

import heapq

def dijkstra(graph, source):
    """
    graph: adjacency list {node: [(neighbor, weight), ...]}
    Returns: distances from source
    """
    n = len(graph)
    distances = [float('inf')] * n
    distances[source] = 0
    
    min_heap = [(0, source)]
    visited = set()
    
    while min_heap:
        dist, node = heapq.heappop(min_heap)
        
        if node in visited:
            continue
        
        visited.add(node)
        
        for neighbor, weight in graph[node]:
            new_dist = dist + weight
            
            if new_dist < distances[neighbor]:
                distances[neighbor] = new_dist
                heapq.heappush(min_heap, (new_dist, neighbor))
    
    return distances


# Example
graph = {
    'A': [('B', 4), ('C', 2)],
    'B': [('A', 4), ('C', 1), ('D', 5)],
    'C': [('A', 2), ('B', 1), ('D', 8), ('E', 10)],
    'D': [('B', 5), ('C', 8), ('E', 2), ('F', 6)],
    'E': [('C', 10), ('D', 2), ('F', 3)],
    'F': [('D', 6), ('E', 3)]
}

# Convert to numeric indices
node_to_idx = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5}
idx_to_node = {v: k for k, v in node_to_idx.items()}

numeric_graph = {i: [] for i in range(6)}
for node, neighbors in graph.items():
    u = node_to_idx[node]
    for neighbor, weight in neighbors:
        v = node_to_idx[neighbor]
        numeric_graph[u].append((v, weight))

distances = dijkstra(numeric_graph, 0)
print("Shortest distances from A:")
for i, d in enumerate(distances):
    print(f"  {idx_to_node[i]}: {d}")

Job Sequencing with Deadlines

# Problem: Maximize profit by scheduling jobs before deadlines

def job_sequencing(jobs):
    """
    jobs: list of (id, deadline, profit)
    Returns: Scheduled jobs and total profit
    """
    # Sort by profit (highest first)
    sorted_jobs = sorted(jobs, key=lambda x: x[2], reverse=True)
    
    max_deadline = max(job[1] for job in jobs)
    slots = [-1] * (max_deadline + 1)  # Track filled time slots
    total_profit = 0
    scheduled = []
    
    for job_id, deadline, profit in sorted_jobs:
        # Find latest available slot before deadline
        for time in range(min(max_deadline, deadline), 0, -1):
            if slots[time] == -1:  # Slot available
                slots[time] = job_id
                total_profit += profit
                scheduled.append((job_id, time))
                break
    
    return total_profit, scheduled


# Example
jobs = [
    ('J1', 2, 100),
    ('J2', 1, 20),
    ('J3', 2, 40),
    ('J4', 1, 30),
    ('J5', 3, 50)
]

profit, scheduled = job_sequencing(jobs)
print(f"Total profit: {profit}")  # 190
print(f"Scheduled: {scheduled}")

Egyptian Fraction

# Problem: Represent fraction as sum of distinct unit fractions

def egyptian_fraction(numerator, denominator):
    """
    Greedy algorithm: Always pick largest unit fraction
    """
    result = []
    
    while numerator > 0:
        # Ceiling division
        unit_denom = (denominator + numerator - 1) // numerator
        
        result.append(f"1/{unit_denom}")
        
        numerator = numerator * unit_denom - denominator
        denominator = denominator * unit_denom
    
    return result


# Example
result = egyptian_fraction(6, 14)
print(" + ".join(result))  # 1/3 + 1/4 + 1/42

result = egyptian_fraction(4, 5)
print(" + ".join(result))  # 1/2 + 1/4 + 1/20

Greedy vs Dynamic Programming

# When to use which?

decision_guide = {
    "use_greedy_when": [
        "Greedy choice property can be proven",
        "Local optimal leads to global optimal",
        "Problem has optimal substructure",
        "Need fast, simple solution",
        "Approximate solution acceptable"
    ],
    
    "use_dp_when": [
        "Greedy choice doesn't work",
        "Need guaranteed optimal solution",
        "Problem has overlapping subproblems",
        "All choices need to be considered",
        "When optimal solution is required"
    ]
}

# Examples of problems
problem_comparison = {
    "coin_change": {
        "greedy": "Works for canonical coin systems",
        "dp": "Always gives optimal solution"
    },
    
    "fractional_knapsack": {
        "greedy": "Optimal (can take fractions)",
        "dp": "Not needed"
    },
    
    "0/1_knapsack": {
        "greedy": "Not optimal",
        "dp": "Always optimal"
    },
    
    "activity_selection": {
        "greedy": "Optimal",
        "dp": "Overkill"
    },
    
    "shortest_path": {
        "greedy_dijkstra": "Optimal (no negative weights)",
        "dp_bellman_ford": "Needed for negative weights"
    }
}

Proof of Greedy Optimality

# Common proof techniques

proof_techniques = {
    "exchange_argument": {
        "description": "Show any optimal solution can be transformed to greedy solution",
        "example": "Activity selection: swap first activity with later one"
    },
    
    "cut_and_paste": {
        "description": "Show greedy solution is as good as optimal",
        "example": "Huffman coding: prove merging smallest frequencies is optimal"
    },
    
    "induction": {
        "description": "Prove greedy works for base case and can extend",
        "example": "Prim's MST algorithm"
    }
}

Conclusion

Greedy algorithms provide efficient solutions when they work:

  • Greedy choice property: Local optimal leads to global optimal
  • Optimal substructure: Problem can be broken into subproblems
  • Common problems: Activity selection, MST, shortest path, Huffman coding
  • Proof needed: Always verify greedy optimality

Know when greedy works vs when DP is needed.


Comments