Greedy algorithms make locally optimal choices in hopes of finding a global optimum. They’re often simpler than dynamic programming and work well for problems with the “greedy choice property.”
In this guide, we’ll explore greedy algorithm fundamentals, classic problems, and when greedy approaches work.
Understanding Greedy Algorithms
How Greedy Works
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โ Greedy vs Dynamic Programming โ
โ โ
โ Greedy: โ
โ 1. Make best choice NOW โ
โ 2. Never reconsider โ
โ 3. Works for "optimal substructure" + "greedy choice" โ
โ โ
โ Dynamic Programming: โ
โ 1. Consider ALL choices โ
โ 2. Recompute subproblems โ
โ 3. Always finds optimal solution โ
โ โ
โ Example - Making Change: โ
โ Coins: [25, 10, 5, 1] โ
โ Amount: 30 โ
โ โ
โ Greedy: 25 + 5 = 2 coins โ (works for US coins) โ
โ Greedy: For 30 with [25, 20, 10]? 20+10 = 2 coins โ โ
โ Greedy: For 30 with [25, 15, 1]? 25+1+1+1+1+1 = 6 โ โ
โ DP: 15+15 = 2 coins (optimal!) โ
โ โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
Greedy Choice Property
# Greedy choice property means:
# Making a locally optimal choice leads to global optimum
greedy_requirements = {
"greedy_choice_property": {
"description": "Local optimal choice is part of global optimum",
"example": "Take largest coin first for coin change (sometimes)"
},
"optimal_substructure": {
"description": "Optimal solution contains optimal solutions to subproblems",
"example": "Shortest path has subpaths that are also shortest"
}
}
# Not all problems have greedy choice property!
# Need to prove greedy works or use DP
Classic Greedy Problems
Activity Selection
# Problem: Select maximum number of non-overlapping activities
def activity_selection(activities):
"""
activities: list of (start, end) tuples
Returns: Maximum number of activities
"""
# Sort by end time
sorted_activities = sorted(activities, key=lambda x: x[1])
count = 1
last_end = sorted_activities[0][1]
for start, end in sorted_activities[1:]:
if start >= last_end: # Non-overlapping
count += 1
last_end = end
return count
def activity_selection_with_results(activities):
"""Also returns which activities are selected"""
if not activities:
return 0, []
sorted_activities = sorted(activities, key=lambda x: x[1])
selected = [sorted_activities[0]]
last_end = sorted_activities[0][1]
for start, end in sorted_activities[1:]:
if start >= last_end:
selected.append((start, end))
last_end = end
return len(selected), selected
# Example
activities = [(1, 4), (3, 5), (0, 6), (5, 7), (3, 9), (5, 9), (6, 10), (8, 11)]
print(activity_selection(activities)) # 4
Fractional Knapsack
# Problem: Take fractions of items (unlike 0/1 knapsack)
def fractional_knapsack(items, capacity):
"""
items: list of (weight, value) tuples
Returns: Maximum value (can take fractions)
"""
# Calculate value/weight ratio
ratios = [(w, v, v/w) for w, v in items]
# Sort by ratio (highest first)
ratios.sort(key=lambda x: x[2], reverse=True)
total_value = 0
remaining_capacity = capacity
for weight, value, ratio in ratios:
if remaining_capacity == 0:
break
if weight <= remaining_capacity:
total_value += value
remaining_capacity -= weight
else:
# Take fraction
fraction = remaining_capacity / weight
total_value += value * fraction
remaining_capacity = 0
return total_value
# Example
items = [(10, 60), (20, 100), (30, 120)] # (weight, value)
capacity = 50
# Ratios: 6, 5, 4
# Take all of weight 10 and 20 = 60 + 100 = 160
# Take 20/30 of weight 30 = 120 * 20/30 = 80
# Total: 160 + 80 = 240
print(fractional_knapsack(items, capacity)) # 240.0
Huffman Coding
# Problem: Create optimal prefix-free encoding
import heapq
from collections import Counter
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
def huffman_encoding(text):
"""Create Huffman codes for text"""
if not text:
return {}, ""
# Count frequency
freq = Counter(text)
# Build min heap
heap = [HuffmanNode(char, f) for char, f in freq.items()]
heapq.heapify(heap)
# Build tree
while len(heap) > 1:
left = heapq.heappop(heap)
right = heapq.heappop(heap)
merged = HuffmanNode(None, left.freq + right.freq)
merged.left = left
merged.right = right
heapq.heappush(heap, merged)
# Generate codes
codes = {}
def generate_codes(node, current_code=""):
if node is None:
return
if node.char is not None:
codes[node.char] = current_code or "0"
return
generate_codes(node.left, current_code + "0")
generate_codes(node.right, current_code + "1")
generate_codes(heap[0])
# Encode text
encoded = "".join(codes[char] for char in text)
return codes, encoded
def huffman_decoding(codes, encoded):
"""Decode Huffman-encoded text"""
if not encoded:
return ""
# Build reverse lookup
reverse_codes = {v: k for k, v in codes.items()}
decoded = []
current = ""
for bit in encoded:
current += bit
if current in reverse_codes:
decoded.append(reverse_codes[current])
current = ""
return "".join(decoded)
# Example
text = "this is an example for huffman encoding"
codes, encoded = huffman_encoding(text)
print(f"Codes: {codes}")
print(f"Encoded: {encoded[:50]}...")
decoded = huffman_decoding(codes, encoded)
print(f"Decoded: {decoded}")
Minimum Spanning Tree (Prim’s Algorithm)
# Problem: Find MST using Prim's algorithm
import heapq
def prim_mst(graph, start=0):
"""
graph: adjacency list {node: [(neighbor, weight), ...]}
Returns: MST edges and total weight
"""
n = len(graph)
visited = [False] * n
min_heap = [(0, start, -1)] # (weight, current, parent)
total_weight = 0
mst_edges = []
while min_heap and sum(visited) < n:
weight, node, parent = heapq.heappop(min_heap)
if visited[node]:
continue
visited[node] = True
total_weight += weight
if parent != -1:
mst_edges.append((parent, node, weight))
for neighbor, w in graph[node]:
if not visited[neighbor]:
heapq.heappush(min_heap, (w, neighbor, node))
return total_weight, mst_edges
# Example
graph = {
0: [(1, 4), (7, 8)],
1: [(0, 4), (2, 8), (7, 11)],
2: [(1, 8), (3, 7), (5, 4), (8, 2)],
3: [(2, 7), (4, 9), (5, 14)],
4: [(3, 9), (5, 10)],
5: [(2, 4), (3, 14), (4, 10), (6, 2)],
6: [(5, 2), (7, 1), (8, 6)],
7: [(0, 8), (1, 11), (6, 1), (8, 7)],
8: [(2, 2), (6, 6), (7, 7)]
}
weight, edges = prim_mst(graph)
print(f"MST weight: {weight}") # 37
print(f"Edges: {edges}")
Minimum Spanning Tree (Kruskal’s Algorithm)
# Problem: Find MST using Kruskal's algorithm
class UnionFind:
def __init__(self, n):
self.parent = list(range(n))
self.rank = [0] * n
def find(self, x):
if self.parent[x] != x:
self.parent[x] = self.find(self.parent[x])
return self.parent[x]
def union(self, x, y):
px, py = self.find(x), self.find(y)
if px == py:
return False
if self.rank[px] < self.rank[py]:
px, py = py, px
self.parent[py] = px
if self.rank[px] == self.rank[py]:
self.rank[px] += 1
return True
def kruskal_mst(n, edges):
"""
n: number of nodes
edges: list of (weight, u, v) tuples
Returns: MST total weight and edges
"""
# Sort edges by weight
edges.sort()
uf = UnionFind(n)
total_weight = 0
mst_edges = []
for weight, u, v in edges:
if uf.union(u, v):
total_weight += weight
mst_edges.append((u, v, weight))
if len(mst_edges) == n - 1:
break
return total_weight, mst_edges
# Same example as Prim's
edges = [
(4, 0, 1), (8, 0, 7), (11, 1, 7), (2, 2, 8), (4, 2, 5),
(7, 3, 4), (9, 3, 5), (14, 4, 5), (10, 2, 3), (2, 6, 8),
(1, 6, 7), (7, 7, 8), (6, 2, 3), (8, 2, 1), (4, 1, 2)
]
weight, mst = kruskal_mst(9, edges)
print(f"MST weight: {weight}") # 37
print(f"Edges: {mst}")
Dijkstra’s Shortest Path
# Problem: Find shortest path from source to all nodes
import heapq
def dijkstra(graph, source):
"""
graph: adjacency list {node: [(neighbor, weight), ...]}
Returns: distances from source
"""
n = len(graph)
distances = [float('inf')] * n
distances[source] = 0
min_heap = [(0, source)]
visited = set()
while min_heap:
dist, node = heapq.heappop(min_heap)
if node in visited:
continue
visited.add(node)
for neighbor, weight in graph[node]:
new_dist = dist + weight
if new_dist < distances[neighbor]:
distances[neighbor] = new_dist
heapq.heappush(min_heap, (new_dist, neighbor))
return distances
# Example
graph = {
'A': [('B', 4), ('C', 2)],
'B': [('A', 4), ('C', 1), ('D', 5)],
'C': [('A', 2), ('B', 1), ('D', 8), ('E', 10)],
'D': [('B', 5), ('C', 8), ('E', 2), ('F', 6)],
'E': [('C', 10), ('D', 2), ('F', 3)],
'F': [('D', 6), ('E', 3)]
}
# Convert to numeric indices
node_to_idx = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5}
idx_to_node = {v: k for k, v in node_to_idx.items()}
numeric_graph = {i: [] for i in range(6)}
for node, neighbors in graph.items():
u = node_to_idx[node]
for neighbor, weight in neighbors:
v = node_to_idx[neighbor]
numeric_graph[u].append((v, weight))
distances = dijkstra(numeric_graph, 0)
print("Shortest distances from A:")
for i, d in enumerate(distances):
print(f" {idx_to_node[i]}: {d}")
Job Sequencing with Deadlines
# Problem: Maximize profit by scheduling jobs before deadlines
def job_sequencing(jobs):
"""
jobs: list of (id, deadline, profit)
Returns: Scheduled jobs and total profit
"""
# Sort by profit (highest first)
sorted_jobs = sorted(jobs, key=lambda x: x[2], reverse=True)
max_deadline = max(job[1] for job in jobs)
slots = [-1] * (max_deadline + 1) # Track filled time slots
total_profit = 0
scheduled = []
for job_id, deadline, profit in sorted_jobs:
# Find latest available slot before deadline
for time in range(min(max_deadline, deadline), 0, -1):
if slots[time] == -1: # Slot available
slots[time] = job_id
total_profit += profit
scheduled.append((job_id, time))
break
return total_profit, scheduled
# Example
jobs = [
('J1', 2, 100),
('J2', 1, 20),
('J3', 2, 40),
('J4', 1, 30),
('J5', 3, 50)
]
profit, scheduled = job_sequencing(jobs)
print(f"Total profit: {profit}") # 190
print(f"Scheduled: {scheduled}")
Egyptian Fraction
# Problem: Represent fraction as sum of distinct unit fractions
def egyptian_fraction(numerator, denominator):
"""
Greedy algorithm: Always pick largest unit fraction
"""
result = []
while numerator > 0:
# Ceiling division
unit_denom = (denominator + numerator - 1) // numerator
result.append(f"1/{unit_denom}")
numerator = numerator * unit_denom - denominator
denominator = denominator * unit_denom
return result
# Example
result = egyptian_fraction(6, 14)
print(" + ".join(result)) # 1/3 + 1/4 + 1/42
result = egyptian_fraction(4, 5)
print(" + ".join(result)) # 1/2 + 1/4 + 1/20
Greedy vs Dynamic Programming
# When to use which?
decision_guide = {
"use_greedy_when": [
"Greedy choice property can be proven",
"Local optimal leads to global optimal",
"Problem has optimal substructure",
"Need fast, simple solution",
"Approximate solution acceptable"
],
"use_dp_when": [
"Greedy choice doesn't work",
"Need guaranteed optimal solution",
"Problem has overlapping subproblems",
"All choices need to be considered",
"When optimal solution is required"
]
}
# Examples of problems
problem_comparison = {
"coin_change": {
"greedy": "Works for canonical coin systems",
"dp": "Always gives optimal solution"
},
"fractional_knapsack": {
"greedy": "Optimal (can take fractions)",
"dp": "Not needed"
},
"0/1_knapsack": {
"greedy": "Not optimal",
"dp": "Always optimal"
},
"activity_selection": {
"greedy": "Optimal",
"dp": "Overkill"
},
"shortest_path": {
"greedy_dijkstra": "Optimal (no negative weights)",
"dp_bellman_ford": "Needed for negative weights"
}
}
Proof of Greedy Optimality
# Common proof techniques
proof_techniques = {
"exchange_argument": {
"description": "Show any optimal solution can be transformed to greedy solution",
"example": "Activity selection: swap first activity with later one"
},
"cut_and_paste": {
"description": "Show greedy solution is as good as optimal",
"example": "Huffman coding: prove merging smallest frequencies is optimal"
},
"induction": {
"description": "Prove greedy works for base case and can extend",
"example": "Prim's MST algorithm"
}
}
Conclusion
Greedy algorithms provide efficient solutions when they work:
- Greedy choice property: Local optimal leads to global optimal
- Optimal substructure: Problem can be broken into subproblems
- Common problems: Activity selection, MST, shortest path, Huffman coding
- Proof needed: Always verify greedy optimality
Know when greedy works vs when DP is needed.
Comments