Database Design and Optimization
Database design and optimization are critical for application performance. This article covers best practices.
Introduction
Database design provides:
- Data integrity
- Query performance
- Scalability
- Maintainability
- Reliability
Understanding database design helps you:
- Design efficient schemas
- Optimize queries
- Improve performance
- Scale databases
- Ensure data consistency
Schema Design
Normalization
// โ
Good: Normalized schema (3NF)
// Users table
{
id: 1,
name: 'John',
email: '[email protected]'
}
// Departments table
{
id: 1,
name: 'Engineering'
}
// User_Departments table (junction)
{
userId: 1,
departmentId: 1
}
// โ
Good: Denormalization for performance
// User_Departments_Denormalized
{
userId: 1,
userName: 'John',
departmentId: 1,
departmentName: 'Engineering'
}
// โ
Good: Choose based on needs
// Normalize: Data consistency, storage efficiency
// Denormalize: Query performance, reduced joins
Data Types
// โ
Good: Appropriate data types
const userSchema = {
id: 'UUID', // Unique identifier
email: 'VARCHAR(255)', // Email address
password: 'VARCHAR(255)', // Hashed password
age: 'INTEGER', // Age
balance: 'DECIMAL(10, 2)', // Money
isActive: 'BOOLEAN', // Flag
createdAt: 'TIMESTAMP', // Date/time
metadata: 'JSON', // Flexible data
tags: 'TEXT[]' // Array
};
// โ
Good: Constraints
const userSchema = {
id: 'UUID PRIMARY KEY',
email: 'VARCHAR(255) UNIQUE NOT NULL',
password: 'VARCHAR(255) NOT NULL',
age: 'INTEGER CHECK (age >= 0)',
createdAt: 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'
};
Relationships
// โ
Good: One-to-Many relationship
// Users (1) -> Posts (Many)
const postSchema = {
id: 'UUID PRIMARY KEY',
userId: 'UUID FOREIGN KEY REFERENCES users(id)',
title: 'VARCHAR(255)',
content: 'TEXT'
};
// โ
Good: Many-to-Many relationship
// Users (Many) -> Groups (Many)
const userGroupSchema = {
userId: 'UUID FOREIGN KEY REFERENCES users(id)',
groupId: 'UUID FOREIGN KEY REFERENCES groups(id)',
PRIMARY KEY (userId, groupId)
};
// โ
Good: Self-referencing relationship
// Comments (parent-child)
const commentSchema = {
id: 'UUID PRIMARY KEY',
parentId: 'UUID FOREIGN KEY REFERENCES comments(id)',
content: 'TEXT'
};
Indexing Strategy
Index Types
// โ
Good: Primary key index
CREATE TABLE users (
id UUID PRIMARY KEY,
email VARCHAR(255)
);
// โ
Good: Unique index
CREATE UNIQUE INDEX idx_users_email ON users(email);
// โ
Good: Composite index
CREATE INDEX idx_posts_userId_createdAt ON posts(userId, createdAt);
// โ
Good: Full-text index
CREATE FULLTEXT INDEX idx_posts_content ON posts(content);
// โ
Good: Partial index
CREATE INDEX idx_active_users ON users(id) WHERE isActive = true;
// โ
Good: Index usage
// Query: SELECT * FROM users WHERE email = '[email protected]'
// Uses: idx_users_email
// Query: SELECT * FROM posts WHERE userId = 1 ORDER BY createdAt DESC
// Uses: idx_posts_userId_createdAt
Index Optimization
// โ
Good: Analyze query performance
EXPLAIN ANALYZE
SELECT * FROM posts WHERE userId = 1 ORDER BY createdAt DESC;
// โ
Good: Identify missing indexes
SELECT * FROM pg_stat_statements
WHERE query LIKE '%SELECT%'
ORDER BY mean_exec_time DESC;
// โ
Good: Monitor index usage
SELECT schemaname, tablename, indexname, idx_scan
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
// โ
Good: Remove unused indexes
DROP INDEX idx_unused_index;
Query Optimization
Query Patterns
// โ Bad: N+1 query problem
const users = await User.find();
for (const user of users) {
user.posts = await Post.find({ userId: user._id });
}
// โ
Good: Use JOIN or populate
const users = await User.find().populate('posts');
// โ
Good: Batch queries
const userIds = users.map(u => u._id);
const posts = await Post.find({ userId: { $in: userIds } });
// โ Bad: SELECT *
SELECT * FROM users;
// โ
Good: Select specific columns
SELECT id, name, email FROM users;
// โ Bad: Inefficient WHERE clause
SELECT * FROM users WHERE YEAR(createdAt) = 2024;
// โ
Good: Efficient WHERE clause
SELECT * FROM users WHERE createdAt >= '2024-01-01' AND createdAt < '2025-01-01';
Aggregation
// โ
Good: Aggregation pipeline
db.posts.aggregate([
{
$match: { status: 'published' }
},
{
$group: {
_id: '$userId',
count: { $sum: 1 },
avgLength: { $avg: { $strLenCP: '$content' } }
}
},
{
$sort: { count: -1 }
},
{
$limit: 10
}
]);
// โ
Good: SQL aggregation
SELECT userId, COUNT(*) as count, AVG(LENGTH(content)) as avgLength
FROM posts
WHERE status = 'published'
GROUP BY userId
ORDER BY count DESC
LIMIT 10;
Performance Tuning
Connection Pooling
// โ
Good: Connection pool configuration
const pool = new Pool({
user: 'postgres',
password: 'password',
host: 'localhost',
port: 5432,
database: 'myapp',
max: 20, // Maximum connections
idleTimeoutMillis: 30000, // Idle timeout
connectionTimeoutMillis: 2000 // Connection timeout
});
// โ
Good: Monitor pool
pool.on('error', (err) => {
console.error('Unexpected error on idle client', err);
});
pool.on('connect', () => {
console.log('New connection established');
});
Caching
// โ
Good: Query result caching
const redis = require('redis');
const client = redis.createClient();
async function getUser(userId) {
const cacheKey = `user:${userId}`;
// Check cache
const cached = await client.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
// Query database
const user = await User.findById(userId);
// Cache result
await client.setex(cacheKey, 3600, JSON.stringify(user));
return user;
}
// โ
Good: Cache invalidation
async function updateUser(userId, updates) {
const user = await User.findByIdAndUpdate(userId, updates);
// Invalidate cache
await client.del(`user:${userId}`);
return user;
}
Partitioning
// โ
Good: Range partitioning
CREATE TABLE posts (
id UUID,
createdAt TIMESTAMP,
content TEXT
) PARTITION BY RANGE (YEAR(createdAt));
CREATE TABLE posts_2023 PARTITION OF posts
FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
CREATE TABLE posts_2024 PARTITION OF posts
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');
// โ
Good: Hash partitioning
CREATE TABLE users (
id UUID,
email VARCHAR(255)
) PARTITION BY HASH (id);
CREATE TABLE users_0 PARTITION OF users
FOR VALUES WITH (MODULUS 4, REMAINDER 0);
Monitoring and Maintenance
Database Monitoring
// โ
Good: Monitor slow queries
SET log_min_duration_statement = 1000; // Log queries > 1 second
// โ
Good: Monitor connections
SELECT datname, count(*) FROM pg_stat_activity GROUP BY datname;
// โ
Good: Monitor table size
SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename))
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
// โ
Good: Monitor index size
SELECT schemaname, tablename, indexname, pg_size_pretty(pg_relation_size(indexrelid))
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;
Maintenance Tasks
// โ
Good: Vacuum and analyze
VACUUM ANALYZE;
// โ
Good: Reindex
REINDEX TABLE users;
// โ
Good: Update statistics
ANALYZE users;
// โ
Good: Backup
pg_dump -U postgres myapp > backup.sql
// โ
Good: Restore
psql -U postgres myapp < backup.sql
Best Practices
-
Design for queries:
// โ Good: Design schema based on queries // If you frequently query by email, index it CREATE INDEX idx_users_email ON users(email); // โ Bad: Index everything CREATE INDEX idx_users_name ON users(name); CREATE INDEX idx_users_age ON users(age); -
Use appropriate data types:
// โ Good: Appropriate types age: INTEGER balance: DECIMAL(10, 2) isActive: BOOLEAN // โ Bad: Wrong types age: VARCHAR(3) balance: VARCHAR(10) isActive: VARCHAR(5) -
Monitor performance:
// โ Good: Monitor queries EXPLAIN ANALYZE SELECT * FROM users WHERE email = '[email protected]'; // โ Bad: No monitoring
Summary
Database design and optimization are essential. Key takeaways:
- Design normalized schemas
- Use appropriate data types
- Create strategic indexes
- Optimize queries
- Implement caching
- Monitor performance
- Maintain databases
- Plan for growth
Related Resources
Next Steps
- Learn about Authentication Flows
- Explore Frontend-Backend Integration
- Study Deployment
- Practice database design
- Optimize database performance
Comments