Introduction
Running PostgreSQL in production requires robust operational practices. This guide covers essential DBA tasks: backup and recovery, replication for scaling and high availability, connection pooling, and comprehensive monitoring.
Installation and Initial Configuration
Basic Configuration
# postgresql.conf - Key settings
listen_addresses = 'localhost' # Or '*' for all
port = 5432
max_connections = 100
shared_buffers = 256MB # 25% of RAM
effective_cache_size = 768MB # 75% of RAM
maintenance_work_mem = 64MB
wal_buffers = 16MB
checkpoint_completion_target = 0.9
random_page_cost = 1.1 # For SSD
effective_io_concurrency = 200
work_mem = 4MB
min_wal_size = 1GB
max_wal_size = 4GB
# Logging
log_destination = 'stderr'
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
log_statement = 'ddl' # Log DDL statements
log_min_duration_statement = 1000 # Log queries > 1s
pg_hba.conf - Authentication
# pg_hba.conf - Client authentication
# TYPE DATABASE USER ADDRESS METHOD
# Local connections
local all all trust
local all all peer
# IPv4 local connections
host all all 127.0.0.1/32 scram-sha-256
host all all 10.0.0.0/8 scram-sha-256
# Replication connections
host replication all 10.0.0.0/8 scram-sha-256
User and Role Management
-- Create role with login capability
CREATE ROLE myapp LOGIN PASSWORD 'secure_password';
-- Create superuser
CREATE ROLE admin LOGIN SUPERUSER PASSWORD 'admin_password';
-- Create role with database
CREATE ROLE reader LOGIN PASSWORD 'reader_password' VALID UNTIL '2027-01-01';
-- Grant privileges
GRANT CONNECT ON DATABASE myapp TO myapp;
GRANT USAGE ON SCHEMA public TO myapp;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO myapp;
GRANT INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO myapp;
-- Grant default privileges (for new tables)
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT ON TABLES TO reader;
-- Create read-only role
CREATE ROLE readonly;
GRANT CONNECT ON DATABASE myapp TO readonly;
GRANT USAGE ON SCHEMA public TO readonly;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
GRANT USAGE ON ALL SEQUENCES IN SCHEMA public TO readonly;
Backup Strategies
Logical Backup with pg_dump
# Dump single database
pg_dump -U postgres -Fc mydb > mydb.dump
# Dump with custom format (parallel)
pg_dump -U postgres -Fd -j 4 -f mydb_dir mydb
# Dump all databases
pg_dumpall -U postgres -f all_databases.sql
# Dump only schema
pg_dump -U postgres --schema-only mydb > schema.sql
# Dump only data
pg_dump -U postgres --data-only mydb > data.sql
# Dump specific table
pg_dump -U postgres -t users -t orders mydb > tables.sql
# Restore
pg_restore -U postgres -d mydb mydb.dump
psql -U postgres -d mydb -f all_databases.sql
Physical Backup with pg_basebackup
# Create base backup (requires replication connection)
pg_basebackup -U replication -h primary.example.com -D /var/lib/postgresql/17/main -Xs -P -R
# Options:
# -Xs: Include WAL files (stream)
# -P: Show progress
# -R: Create replication configuration
# Create backup with tar
pg_basebackup -U replication -h primary.example.com -D /backup -Ft -z -P
Continuous Archiving (Point-in-Time Recovery)
# postgresql.conf - WAL archiving
wal_level = replica
max_wal_senders = 10
archive_mode = on
archive_command = 'cp %p /archive/%f'
archive_timeout = 300 # Force archive every 5 minutes
# Create base backup with WAL
pg_basebackup -U replication -h localhost -D /var/lib/postgresql/17/main -Xs -P
# Configure recovery
# postgresql.auto.conf (in data directory)
restore_command = 'cp /archive/%f %p'
recovery_target_time = '2026-03-05 10:00:00 UTC'
# Create recovery signal file
touch /var/lib/postgresql/17/main/recovery.signal
Backup Script Example
#!/bin/bash
# backup.sh
BACKUP_DIR="/backup/postgres"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7
# Create backup directory
mkdir -p $BACKUP_DIR
# Dump database
pg_dump -U postgres -Fc mydb > "$BACKUP_DIR/mydb_$DATE.dump"
# Compress old dumps
find $BACKUP_DIR -name "*.dump" -mtime +$RETENTION_DAYS -delete
# Keep only last 7 days
echo "Backup completed: mydb_$DATE.dump"
Streaming Replication
Master Configuration
-- On master: create replication user
CREATE USER replication WITH REPLICATION PASSWORD 'rep_password';
-- postgresql.conf
wal_level = replica
max_wal_senders = 10
max_replication_slots = 10
wal_keep_size = 1GB
hot_standby = on
Slave Configuration
# pg_hba.conf on master
host replication replica_user 10.0.1.0/24 scram-sha-256
# Recovery configuration on slave
# recovery.conf (PostgreSQL 12+ uses postgresql.conf)
primary_conninfo = 'host=10.0.1.10 port=5432 user=replication password=rep_password'
restore_command = 'cp /wal/%f %p'
hot_standby = on
# Create standby signal file
touch /var/lib/postgresql/17/main/standby.signal
# Start PostgreSQL on slave
pg_ctl -D /var/lib/postgresql/17/main start
Monitor Replication
-- Check replication status
SELECT * FROM pg_stat_replication;
-- Check replication lag
SELECT pid, usename, client_addr, state,
(now() - pg_postmaster_start_time()) AS uptime,
COALESCE(pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn), 0) / 1024 / 1024 AS lag_mb
FROM pg_stat_replication;
-- Replication slots
SELECT * FROM pg_replication_slots;
--wal_senders
SELECT * FROM pg_stat_wal_senders;
Logical Replication
-- On publisher
CREATE PUBLICATION mydb_pub FOR ALL TABLES;
-- Or specific tables
CREATE PUBLICATION orders_pub FOR TABLE orders, order_items;
-- Allow subscription
ALTER PUBLICATION mydb_pub ADD TABLE products;
-- On subscriber
CREATE SUBSCRIPTION mydb_sub
CONNECTION 'host=10.0.1.10 port=5432 dbname=mydb user=postgres password=pass'
PUBLICATION mydb_pub;
-- Monitor
SELECT * FROM pg_stat_subscription;
High Availability
Patroni Setup
# patroni.yml
scope: postgres-cluster
namespace: /service
name: postgres-1
restapi:
listen: 0.0.0.0:8008
connect_address: 10.0.1.10:8008
etcd:
hosts: 10.0.1.20:2379,10.0.1.21:2379,10.0.1.22:2379
postgresql:
listen: 0.0.0.0:5432
connect_address: 10.0.1.10:5432
data_dir: /var/lib/postgresql/17/main
parameters:
wal_level: replica
max_wal_senders: 10
max_replication_slots: 10
hot_standby: on
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
primary_start_timeout: 300
synchronous_mode: true
synchronous_node_count: 1
initdb:
- encoding: UTF8
- data-checksums
users:
admin:
password: admin_password
options:
- createrole
- replication
HAProxy Configuration
# haproxy.cfg
listen postgres-primary
bind 0.0.0.0:5433
mode tcp
option tcp-check
balance roundrobin
server postgres-1 10.0.1.10:5432 check inter 2000 rise 2 fall 3
server postgres-2 10.0.1.11:5432 check inter 2000 rise 2 fall 3
listen postgres-replicas
bind 0.0.0.0:5434
mode tcp
option tcp-check
balance roundrobin
server postgres-2 10.0.1.11:5432 check inter 2000 rise 2 fall 3
server postgres-3 10.0.1.12:5432 check inter 2000 rise 2 fall 3
Connection Pooling with PgBouncer
Installation and Configuration
# Debian/Ubuntu
apt-get install pgbouncer
# pgbouncer.ini
[databases]
mydb = host=127.0.0.1 port=5432 dbname=mydb
[pgbouncer]
listen_addr = 0.0.0.0
listen_port = 6432
auth_type = scram-sha-256
auth_file = /etc/pgbouncer/userlist.txt
pool_mode = transaction # or session
max_client_conn = 1000
default_pool_size = 25
min_pool_size = 5
reserve_pool_timeout = 5
server_lifetime = 3600
server_idle_timeout = 600
# User list format (user/password)
"postgres" "SCRAM-SHA-256$..."
# pg_hba.conf for pgbouncer
host all all 127.0.0.1/32 scram-sha-256
Application Connection
# Connect to PgBouncer instead of PostgreSQL directly
import psycopg2
conn = psycopg2.connect(
host="localhost",
port=6432,
database="mydb",
user="app_user",
password="app_password"
)
Monitoring
System Views
-- Current connections
SELECT * FROM pg_stat_activity;
-- Long running queries
SELECT pid, now() - pg_stat_activity.query_start AS duration, query
FROM pg_stat_activity
WHERE state = 'active' AND query NOT ILIKE '%pg_stat_activity%'
ORDER BY duration DESC;
-- Table statistics
SELECT * FROM pg_stat_user_tables ORDER BY seq_scan DESC;
SELECT * FROM pg_stat_user_indexes ORDER BY idx_scan DESC;
-- Cache hit ratio
SELECT
sum(heap_blks_read) as heap_read,
sum(heap_blks_hit) as heap_hit,
round(sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) * 100, 2) as ratio
FROM pg_statio_user_tables;
-- Index usage
SELECT
relname,
idx_scan,
seq_scan,
CASE WHEN idx_scan + seq_scan > 0
THEN round(idx_scan::numeric / (idx_scan + seq_scan) * 100, 2)
ELSE 0 END as idx_pct
FROM pg_stat_user_tables
ORDER BY idx_scan + seq_scan DESC;
Monitoring with pgAdmin
-- Dashboard queries
-- Server activity
SELECT count(*) as total_connections,
count(*) filter (where state = 'active') as active,
count(*) filter (where state = 'idle') as idle
FROM pg_stat_activity;
-- Database stats
SELECT datname, numbackends, xact_commit, xact_rollback, blks_read, blks_hit,
round(100.0 * blks_hit / NULLIF(blks_read + blks_hit, 0), 2) as cache_hit_ratio
FROM pg_stat_database
WHERE datname = current_database();
Prometheus exporter
# Install postgres_exporter
docker run -d \
--name postgres_exporter \
-e DATA_SOURCE_NAME="postgresql://postgres:password@localhost:5432/postgres?sslmode=disable" \
-p 9187:9187 \
prometheuscommunity/postgres-exporter
# prometheus.yml
scrape_configs:
- job_name: 'postgres'
static_configs:
- targets: ['localhost:9187']
VACUUM and Maintenance
VACUUM Operations
-- Regular VACUUM (reclaims space)
VACUUM users;
-- VACUUM with ANALYZE (updates statistics)
VACUUM ANALYZE orders;
-- VACUUM FULL (rewrites entire table, requires exclusive lock)
VACUUM FULL users;
-- Monitor bloat
SELECT schemaname, tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
round(100.0 * dead_tuples / NULLIF(n_live_tuples + dead_tuples, 0), 2) as bloat_pct
FROM pg_stat_user_tables
WHERE n_live_tuples > 0
ORDER BY bloat_pct DESC;
Index Maintenance
-- Reindex to improve performance
REINDEX TABLE users;
REINDEX DATABASE mydb;
-- Concurrent reindex (PostgreSQL 13+)
REINDEX TABLE CONCURRENTLY users;
-- Check index size
SELECT indexrelname, pg_size_pretty(pg_relation_size(indexrelid))
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;
Performance Tuning
Key Parameters
# postgresql.conf - Performance tuning
# Memory
shared_buffers = '2GB' # 25% of available RAM
effective_cache_size = '6GB' # 75% of available RAM
work_mem = '64MB' # Per-sort operation
maintenance_work_mem = '512MB' # For VACUUM, CREATE INDEX
wal_buffers = '64MB'
# Checkpoints
checkpoint_completion_target = 0.9
wal_compression = on
# Query planner
random_page_cost = 1.1 # For SSDs
effective_io_concurrency = 200
default_statistics_target = 100
# Parallel queries
max_worker_processes = 8
max_parallel_workers_per_gather = 4
parallel_leader_participation = on
# Logging
log_min_duration_statement = 1000 # Log queries > 1s
log_connections = on
log_disconnections = on
Analyzing Query Performance
-- Enable query timing
\timing on
-- EXPLAIN ANALYZE
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT * FROM orders WHERE user_id = 100;
-- EXPLAIN output interpretation
-- Seq Scan = bad for large tables (need index)
-- Index Scan = good
-- Bitmap Heap Scan = moderate
-- Nested Loop = can be expensive for large datasets
-- Hash Join = efficient for large joins
Connection Management
-- Kill idle connections
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE state = 'idle'
AND query_start < now() - interval '15 minutes';
-- Set statement timeout
SET statement_timeout = '30s';
SET statement_timeout = '5min';
-- Per-user defaults
ALTER ROLE myuser SET statement_timeout = '1min';
Conclusion
PostgreSQL operations require careful attention to backups, replication, and monitoring. With proper configuration and tools like Patroni, PgBouncer, and comprehensive monitoring, you can achieve a highly available, performant database infrastructure.
In the next article, we’ll dive into PostgreSQL’s internal architecture: MVCC, the query planner, storage, and transaction management.
Comments