Database Design and Optimization

Database design and optimization are critical for application performance. This article covers best practices.

Introduction

Database design provides:

Data integrity
Query performance
Scalability
Maintainability
Reliability

Understanding database design helps you:

Design efficient schemas
Optimize queries
Improve performance
Scale databases
Ensure data consistency

Schema Design

Normalization

// ✅ Good: Normalized schema (3NF)
// Users table
{
  id: 1,
  name: 'John',
  email: '[email protected]'
}

// Departments table
{
  id: 1,
  name: 'Engineering'
}

// User_Departments table (junction)
{
  userId: 1,
  departmentId: 1
}

// ✅ Good: Denormalization for performance
// User_Departments_Denormalized
{
  userId: 1,
  userName: 'John',
  departmentId: 1,
  departmentName: 'Engineering'
}

// ✅ Good: Choose based on needs
// Normalize: Data consistency, storage efficiency
// Denormalize: Query performance, reduced joins

Data Types

// ✅ Good: Appropriate data types
const userSchema = {
  id: 'UUID',                    // Unique identifier
  email: 'VARCHAR(255)',         // Email address
  password: 'VARCHAR(255)',      // Hashed password
  age: 'INTEGER',                // Age
  balance: 'DECIMAL(10, 2)',     // Money
  isActive: 'BOOLEAN',           // Flag
  createdAt: 'TIMESTAMP',        // Date/time
  metadata: 'JSON',              // Flexible data
  tags: 'TEXT[]'                 // Array
};

// ✅ Good: Constraints
const userSchema = {
  id: 'UUID PRIMARY KEY',
  email: 'VARCHAR(255) UNIQUE NOT NULL',
  password: 'VARCHAR(255) NOT NULL',
  age: 'INTEGER CHECK (age >= 0)',
  createdAt: 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'
};

Relationships

// ✅ Good: One-to-Many relationship
// Users (1) -> Posts (Many)
const postSchema = {
  id: 'UUID PRIMARY KEY',
  userId: 'UUID FOREIGN KEY REFERENCES users(id)',
  title: 'VARCHAR(255)',
  content: 'TEXT'
};

// ✅ Good: Many-to-Many relationship
// Users (Many) -> Groups (Many)
const userGroupSchema = {
  userId: 'UUID FOREIGN KEY REFERENCES users(id)',
  groupId: 'UUID FOREIGN KEY REFERENCES groups(id)',
  PRIMARY KEY (userId, groupId)
};

// ✅ Good: Self-referencing relationship
// Comments (parent-child)
const commentSchema = {
  id: 'UUID PRIMARY KEY',
  parentId: 'UUID FOREIGN KEY REFERENCES comments(id)',
  content: 'TEXT'
};

Indexing Strategy

Index Types

// ✅ Good: Primary key index
CREATE TABLE users (
  id UUID PRIMARY KEY,
  email VARCHAR(255)
);

// ✅ Good: Unique index
CREATE UNIQUE INDEX idx_users_email ON users(email);

// ✅ Good: Composite index
CREATE INDEX idx_posts_userId_createdAt ON posts(userId, createdAt);

// ✅ Good: Full-text index
CREATE FULLTEXT INDEX idx_posts_content ON posts(content);

// ✅ Good: Partial index
CREATE INDEX idx_active_users ON users(id) WHERE isActive = true;

// ✅ Good: Index usage
// Query: SELECT * FROM users WHERE email = '[email protected]'
// Uses: idx_users_email

// Query: SELECT * FROM posts WHERE userId = 1 ORDER BY createdAt DESC
// Uses: idx_posts_userId_createdAt

Index Optimization

// ✅ Good: Analyze query performance
EXPLAIN ANALYZE
SELECT * FROM posts WHERE userId = 1 ORDER BY createdAt DESC;

// ✅ Good: Identify missing indexes
SELECT * FROM pg_stat_statements
WHERE query LIKE '%SELECT%'
ORDER BY mean_exec_time DESC;

// ✅ Good: Monitor index usage
SELECT schemaname, tablename, indexname, idx_scan
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

// ✅ Good: Remove unused indexes
DROP INDEX idx_unused_index;

Query Optimization

Query Patterns

// ❌ Bad: N+1 query problem
const users = await User.find();
for (const user of users) {
  user.posts = await Post.find({ userId: user._id });
}

// ✅ Good: Use JOIN or populate
const users = await User.find().populate('posts');

// ✅ Good: Batch queries
const userIds = users.map(u => u._id);
const posts = await Post.find({ userId: { $in: userIds } });

// ❌ Bad: SELECT *
SELECT * FROM users;

// ✅ Good: Select specific columns
SELECT id, name, email FROM users;

// ❌ Bad: Inefficient WHERE clause
SELECT * FROM users WHERE YEAR(createdAt) = 2024;

// ✅ Good: Efficient WHERE clause
SELECT * FROM users WHERE createdAt >= '2024-01-01' AND createdAt < '2025-01-01';

Aggregation

// ✅ Good: Aggregation pipeline
db.posts.aggregate([
  {
    $match: { status: 'published' }
  },
  {
    $group: {
      _id: '$userId',
      count: { $sum: 1 },
      avgLength: { $avg: { $strLenCP: '$content' } }
    }
  },
  {
    $sort: { count: -1 }
  },
  {
    $limit: 10
  }
]);

// ✅ Good: SQL aggregation
SELECT userId, COUNT(*) as count, AVG(LENGTH(content)) as avgLength
FROM posts
WHERE status = 'published'
GROUP BY userId
ORDER BY count DESC
LIMIT 10;

Performance Tuning

Connection Pooling

// ✅ Good: Connection pool configuration
const pool = new Pool({
  user: 'postgres',
  password: 'password',
  host: 'localhost',
  port: 5432,
  database: 'myapp',
  max: 20,                    // Maximum connections
  idleTimeoutMillis: 30000,   // Idle timeout
  connectionTimeoutMillis: 2000 // Connection timeout
});

// ✅ Good: Monitor pool
pool.on('error', (err) => {
  console.error('Unexpected error on idle client', err);
});

pool.on('connect', () => {
  console.log('New connection established');
});

Caching

// ✅ Good: Query result caching
const redis = require('redis');
const client = redis.createClient();

async function getUser(userId) {
  const cacheKey = `user:${userId}`;
  
  // Check cache
  const cached = await client.get(cacheKey);
  if (cached) {
    return JSON.parse(cached);
  }
  
  // Query database
  const user = await User.findById(userId);
  
  // Cache result
  await client.setex(cacheKey, 3600, JSON.stringify(user));
  
  return user;
}

// ✅ Good: Cache invalidation
async function updateUser(userId, updates) {
  const user = await User.findByIdAndUpdate(userId, updates);
  
  // Invalidate cache
  await client.del(`user:${userId}`);
  
  return user;
}

Partitioning

// ✅ Good: Range partitioning
CREATE TABLE posts (
  id UUID,
  createdAt TIMESTAMP,
  content TEXT
) PARTITION BY RANGE (YEAR(createdAt));

CREATE TABLE posts_2023 PARTITION OF posts
  FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');

CREATE TABLE posts_2024 PARTITION OF posts
  FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

// ✅ Good: Hash partitioning
CREATE TABLE users (
  id UUID,
  email VARCHAR(255)
) PARTITION BY HASH (id);

CREATE TABLE users_0 PARTITION OF users
  FOR VALUES WITH (MODULUS 4, REMAINDER 0);

Monitoring and Maintenance

Database Monitoring

// ✅ Good: Monitor slow queries
SET log_min_duration_statement = 1000; // Log queries > 1 second

// ✅ Good: Monitor connections
SELECT datname, count(*) FROM pg_stat_activity GROUP BY datname;

// ✅ Good: Monitor table size
SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename))
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;

// ✅ Good: Monitor index size
SELECT schemaname, tablename, indexname, pg_size_pretty(pg_relation_size(indexrelid))
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;

Maintenance Tasks

// ✅ Good: Vacuum and analyze
VACUUM ANALYZE;

// ✅ Good: Reindex
REINDEX TABLE users;

// ✅ Good: Update statistics
ANALYZE users;

// ✅ Good: Backup
pg_dump -U postgres myapp > backup.sql

// ✅ Good: Restore
psql -U postgres myapp < backup.sql

Best Practices

Design for queries:

// ✅ Good: Design schema based on queries
// If you frequently query by email, index it
CREATE INDEX idx_users_email ON users(email);

// ❌ Bad: Index everything
CREATE INDEX idx_users_name ON users(name);
CREATE INDEX idx_users_age ON users(age);

Use appropriate data types:

// ✅ Good: Appropriate types
age: INTEGER
balance: DECIMAL(10, 2)
isActive: BOOLEAN

// ❌ Bad: Wrong types
age: VARCHAR(3)
balance: VARCHAR(10)
isActive: VARCHAR(5)

Monitor performance:

// ✅ Good: Monitor queries
EXPLAIN ANALYZE SELECT * FROM users WHERE email = '[email protected]';

// ❌ Bad: No monitoring

Summary

Database design and optimization are essential. Key takeaways:

Design normalized schemas
Use appropriate data types
Create strategic indexes
Optimize queries
Implement caching
Monitor performance
Maintain databases
Plan for growth

Next Steps

Learn about Authentication Flows
Explore Frontend-Backend Integration
Study Deployment
Practice database design
Optimize database performance