Skip to main content
โšก Calmops

Database Design and Optimization

Database Design and Optimization

Database design and optimization are critical for application performance. This article covers best practices.

Introduction

Database design provides:

  • Data integrity
  • Query performance
  • Scalability
  • Maintainability
  • Reliability

Understanding database design helps you:

  • Design efficient schemas
  • Optimize queries
  • Improve performance
  • Scale databases
  • Ensure data consistency

Schema Design

Normalization

// โœ… Good: Normalized schema (3NF)
// Users table
{
  id: 1,
  name: 'John',
  email: '[email protected]'
}

// Departments table
{
  id: 1,
  name: 'Engineering'
}

// User_Departments table (junction)
{
  userId: 1,
  departmentId: 1
}

// โœ… Good: Denormalization for performance
// User_Departments_Denormalized
{
  userId: 1,
  userName: 'John',
  departmentId: 1,
  departmentName: 'Engineering'
}

// โœ… Good: Choose based on needs
// Normalize: Data consistency, storage efficiency
// Denormalize: Query performance, reduced joins

Data Types

// โœ… Good: Appropriate data types
const userSchema = {
  id: 'UUID',                    // Unique identifier
  email: 'VARCHAR(255)',         // Email address
  password: 'VARCHAR(255)',      // Hashed password
  age: 'INTEGER',                // Age
  balance: 'DECIMAL(10, 2)',     // Money
  isActive: 'BOOLEAN',           // Flag
  createdAt: 'TIMESTAMP',        // Date/time
  metadata: 'JSON',              // Flexible data
  tags: 'TEXT[]'                 // Array
};

// โœ… Good: Constraints
const userSchema = {
  id: 'UUID PRIMARY KEY',
  email: 'VARCHAR(255) UNIQUE NOT NULL',
  password: 'VARCHAR(255) NOT NULL',
  age: 'INTEGER CHECK (age >= 0)',
  createdAt: 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'
};

Relationships

// โœ… Good: One-to-Many relationship
// Users (1) -> Posts (Many)
const postSchema = {
  id: 'UUID PRIMARY KEY',
  userId: 'UUID FOREIGN KEY REFERENCES users(id)',
  title: 'VARCHAR(255)',
  content: 'TEXT'
};

// โœ… Good: Many-to-Many relationship
// Users (Many) -> Groups (Many)
const userGroupSchema = {
  userId: 'UUID FOREIGN KEY REFERENCES users(id)',
  groupId: 'UUID FOREIGN KEY REFERENCES groups(id)',
  PRIMARY KEY (userId, groupId)
};

// โœ… Good: Self-referencing relationship
// Comments (parent-child)
const commentSchema = {
  id: 'UUID PRIMARY KEY',
  parentId: 'UUID FOREIGN KEY REFERENCES comments(id)',
  content: 'TEXT'
};

Indexing Strategy

Index Types

// โœ… Good: Primary key index
CREATE TABLE users (
  id UUID PRIMARY KEY,
  email VARCHAR(255)
);

// โœ… Good: Unique index
CREATE UNIQUE INDEX idx_users_email ON users(email);

// โœ… Good: Composite index
CREATE INDEX idx_posts_userId_createdAt ON posts(userId, createdAt);

// โœ… Good: Full-text index
CREATE FULLTEXT INDEX idx_posts_content ON posts(content);

// โœ… Good: Partial index
CREATE INDEX idx_active_users ON users(id) WHERE isActive = true;

// โœ… Good: Index usage
// Query: SELECT * FROM users WHERE email = '[email protected]'
// Uses: idx_users_email

// Query: SELECT * FROM posts WHERE userId = 1 ORDER BY createdAt DESC
// Uses: idx_posts_userId_createdAt

Index Optimization

// โœ… Good: Analyze query performance
EXPLAIN ANALYZE
SELECT * FROM posts WHERE userId = 1 ORDER BY createdAt DESC;

// โœ… Good: Identify missing indexes
SELECT * FROM pg_stat_statements
WHERE query LIKE '%SELECT%'
ORDER BY mean_exec_time DESC;

// โœ… Good: Monitor index usage
SELECT schemaname, tablename, indexname, idx_scan
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

// โœ… Good: Remove unused indexes
DROP INDEX idx_unused_index;

Query Optimization

Query Patterns

// โŒ Bad: N+1 query problem
const users = await User.find();
for (const user of users) {
  user.posts = await Post.find({ userId: user._id });
}

// โœ… Good: Use JOIN or populate
const users = await User.find().populate('posts');

// โœ… Good: Batch queries
const userIds = users.map(u => u._id);
const posts = await Post.find({ userId: { $in: userIds } });

// โŒ Bad: SELECT *
SELECT * FROM users;

// โœ… Good: Select specific columns
SELECT id, name, email FROM users;

// โŒ Bad: Inefficient WHERE clause
SELECT * FROM users WHERE YEAR(createdAt) = 2024;

// โœ… Good: Efficient WHERE clause
SELECT * FROM users WHERE createdAt >= '2024-01-01' AND createdAt < '2025-01-01';

Aggregation

// โœ… Good: Aggregation pipeline
db.posts.aggregate([
  {
    $match: { status: 'published' }
  },
  {
    $group: {
      _id: '$userId',
      count: { $sum: 1 },
      avgLength: { $avg: { $strLenCP: '$content' } }
    }
  },
  {
    $sort: { count: -1 }
  },
  {
    $limit: 10
  }
]);

// โœ… Good: SQL aggregation
SELECT userId, COUNT(*) as count, AVG(LENGTH(content)) as avgLength
FROM posts
WHERE status = 'published'
GROUP BY userId
ORDER BY count DESC
LIMIT 10;

Performance Tuning

Connection Pooling

// โœ… Good: Connection pool configuration
const pool = new Pool({
  user: 'postgres',
  password: 'password',
  host: 'localhost',
  port: 5432,
  database: 'myapp',
  max: 20,                    // Maximum connections
  idleTimeoutMillis: 30000,   // Idle timeout
  connectionTimeoutMillis: 2000 // Connection timeout
});

// โœ… Good: Monitor pool
pool.on('error', (err) => {
  console.error('Unexpected error on idle client', err);
});

pool.on('connect', () => {
  console.log('New connection established');
});

Caching

// โœ… Good: Query result caching
const redis = require('redis');
const client = redis.createClient();

async function getUser(userId) {
  const cacheKey = `user:${userId}`;
  
  // Check cache
  const cached = await client.get(cacheKey);
  if (cached) {
    return JSON.parse(cached);
  }
  
  // Query database
  const user = await User.findById(userId);
  
  // Cache result
  await client.setex(cacheKey, 3600, JSON.stringify(user));
  
  return user;
}

// โœ… Good: Cache invalidation
async function updateUser(userId, updates) {
  const user = await User.findByIdAndUpdate(userId, updates);
  
  // Invalidate cache
  await client.del(`user:${userId}`);
  
  return user;
}

Partitioning

// โœ… Good: Range partitioning
CREATE TABLE posts (
  id UUID,
  createdAt TIMESTAMP,
  content TEXT
) PARTITION BY RANGE (YEAR(createdAt));

CREATE TABLE posts_2023 PARTITION OF posts
  FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');

CREATE TABLE posts_2024 PARTITION OF posts
  FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

// โœ… Good: Hash partitioning
CREATE TABLE users (
  id UUID,
  email VARCHAR(255)
) PARTITION BY HASH (id);

CREATE TABLE users_0 PARTITION OF users
  FOR VALUES WITH (MODULUS 4, REMAINDER 0);

Monitoring and Maintenance

Database Monitoring

// โœ… Good: Monitor slow queries
SET log_min_duration_statement = 1000; // Log queries > 1 second

// โœ… Good: Monitor connections
SELECT datname, count(*) FROM pg_stat_activity GROUP BY datname;

// โœ… Good: Monitor table size
SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename))
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;

// โœ… Good: Monitor index size
SELECT schemaname, tablename, indexname, pg_size_pretty(pg_relation_size(indexrelid))
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;

Maintenance Tasks

// โœ… Good: Vacuum and analyze
VACUUM ANALYZE;

// โœ… Good: Reindex
REINDEX TABLE users;

// โœ… Good: Update statistics
ANALYZE users;

// โœ… Good: Backup
pg_dump -U postgres myapp > backup.sql

// โœ… Good: Restore
psql -U postgres myapp < backup.sql

Best Practices

  1. Design for queries:

    // โœ… Good: Design schema based on queries
    // If you frequently query by email, index it
    CREATE INDEX idx_users_email ON users(email);
    
    // โŒ Bad: Index everything
    CREATE INDEX idx_users_name ON users(name);
    CREATE INDEX idx_users_age ON users(age);
    
  2. Use appropriate data types:

    // โœ… Good: Appropriate types
    age: INTEGER
    balance: DECIMAL(10, 2)
    isActive: BOOLEAN
    
    // โŒ Bad: Wrong types
    age: VARCHAR(3)
    balance: VARCHAR(10)
    isActive: VARCHAR(5)
    
  3. Monitor performance:

    // โœ… Good: Monitor queries
    EXPLAIN ANALYZE SELECT * FROM users WHERE email = '[email protected]';
    
    // โŒ Bad: No monitoring
    

Summary

Database design and optimization are essential. Key takeaways:

  • Design normalized schemas
  • Use appropriate data types
  • Create strategic indexes
  • Optimize queries
  • Implement caching
  • Monitor performance
  • Maintain databases
  • Plan for growth

Next Steps

Comments