Skip to main content
โšก Calmops

ClickHouse Use Cases: Real-World Applications and Production Patterns

Introduction

ClickHouse excels in analytical scenarios where query speed on massive datasets is critical. From web analytics to IoT monitoring, ClickHouse powers real-time decision-making at companies handling billions of events daily.

This article explores practical ClickHouse use cases, implementation patterns, and best practices for production deployments.


Web Analytics

Page View Tracking

-- Create page views table
CREATE TABLE page_views (
    user_id UInt64,
    session_id UUID,
    page_url String,
    referrer_url String,
    country LowCardinality(String),
    device_type LowCardinality(String),
    browser LowCardinality(String),
    timestamp DateTime,
    load_time Float32,
    is_bounce UInt8
) ENGINE = MergeTree()
ORDER BY (country, timestamp, user_id)
PARTITION BY toYYYYMM(timestamp);

-- Query examples
-- Daily page views by country
SELECT 
    toDate(timestamp) as date,
    country,
    count() as page_views,
    uniq(user_id) as unique_users,
    avg(load_time) as avg_load_time
FROM page_views
WHERE timestamp >= '2026-01-01'
GROUP BY date, country
ORDER BY date, page_views DESC;

-- Bounce rate analysis
SELECT 
    page_url,
    count() as total_visits,
    sum(is_bounce) as bounces,
    round(sum(is_bounce) / count() * 100, 2) as bounce_rate
FROM page_views
WHERE timestamp >= '2026-01-01'
GROUP BY page_url
HAVING total_visits > 100
ORDER BY bounce_rate DESC
LIMIT 20;

User Session Analysis

-- Session funnel analysis
SELECT 
    funnel_step,
    count() as users,
    round(count() / lagInFrame(count()) over (ORDER BY funnel_step) * 100, 1) as conversion_rate
FROM (
    SELECT 
        session_id,
        groupArray(page_url) as pages,
        'step1' as funnel_step
    FROM page_views
    WHERE timestamp >= '2026-01-01'
    GROUP BY session_id
    HAVING has(pages, '/signup')
    
    UNION ALL
    
    SELECT 
        session_id,
        groupArray(page_url) as pages,
        'step2' as funnel_step
    FROM page_views
    WHERE timestamp >= '2026-01-01'
    GROUP BY session_id
    HAVING has(pages, '/pricing')
    
    UNION ALL
    
    SELECT 
        session_id,
        groupArray(page_url) as pages,
        'step3' as funnel_step
    FROM page_views
    WHERE timestamp >= '2026-01-01'
    GROUP BY session_id
    HAVING has(pages, '/thank-you')
)
GROUP BY funnel_step;

Application Monitoring

Performance Monitoring

-- Create metrics table
CREATE TABLE app_metrics (
    service String,
    endpoint String,
    status UInt16,
    response_time Float32,
    timestamp DateTime,
    host LowCardinality(String),
    environment LowCardinality(String)
) ENGINE = MergeTree()
ORDER BY (service, endpoint, timestamp)
PARTITION BY toYYYYMM(timestamp)
TTL timestamp + INTERVAL 30 DAY;

-- Performance alerts
SELECT 
    service,
    endpoint,
    quantile(0.95)(response_time) as p95,
    quantile(0.99)(response_time) as p99,
    max(response_time) as max_response,
    count() as total_requests,
    sum(status >= 500) as errors
FROM app_metrics
WHERE timestamp >= now() - INTERVAL 1 HOUR
GROUP BY service, endpoint
HAVING errors > 10 OR p99 > 5
ORDER BY errors DESC;

Error Tracking

-- Error aggregation
SELECT 
    toStartOfMinute(timestamp) as minute,
    service,
    error_type,
    count() as error_count
FROM app_logs
WHERE timestamp >= now() - INTERVAL 1 HOUR
  AND level = 'ERROR'
GROUP BY minute, service, error_type
ORDER BY minute, error_count DESC;

IoT and Sensors

Sensor Data

-- Create sensor data table
CREATE TABLE sensor_data (
    sensor_id String,
    location String,
    temperature Nullable(Float32),
    humidity Nullable(Float32),
    pressure Nullable(Float32),
    battery Float32,
    timestamp DateTime,
    metadata String
) ENGINE = MergeTree()
ORDER BY (sensor_id, timestamp)
PARTITION BY toYYYYMMDD(timestamp);

-- Real-time aggregation
SELECT 
    sensor_id,
    location,
    avg(temperature) as avg_temp,
    max(temperature) as max_temp,
    min(temperature) as min_temp,
    count() as readings,
    avg(battery) as avg_battery
FROM sensor_data
WHERE timestamp >= now() - INTERVAL 1 HOUR
GROUP BY sensor_id, location
ORDER BY location, sensor_id;

-- Anomaly detection
SELECT 
    sensor_id,
    timestamp,
    temperature,
    avg_temp,
    abs(temperature - avg_temp) / stddevPop(temperature) as z_score
FROM (
    SELECT 
        sensor_id,
        timestamp,
        temperature,
        avg(temperature) OVER (PARTITION BY sensor_id 
            ORDER BY timestamp 
            ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) as avg_temp,
        stddevPop(temperature) OVER (PARTITION BY sensor_id 
            ORDER BY timestamp 
            ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) as stddev_temp
    FROM sensor_data
    WHERE timestamp >= now() - INTERVAL 1 DAY
)
WHERE abs(temperature - avg_temp) / stddev_temp > 3;

Business Intelligence

Revenue Analytics

-- Revenue dashboard
SELECT 
    toDate(timestamp) as date,
    product_category,
    country,
    count() as transactions,
    sum(amount) as revenue,
    avg(amount) as avg_order_value,
    sum(amount) / count(DISTINCT user_id) as revenue_per_user
FROM transactions
WHERE timestamp >= '2026-01-01'
GROUP BY date, product_category, country
ORDER BY date DESC, revenue DESC;

-- Cohort analysis
WITH users AS (
    SELECT 
        user_id,
        min(toDate(timestamp)) as cohort_date
    FROM transactions
    GROUP BY user_id
)
SELECT 
    toMonth(cohort_date) as cohort_month,
    dateDiff('month', cohort_date, toDate(timestamp)) as month_number,
    count(DISTINCT t.user_id) as active_users,
    sum(t.amount) as revenue
FROM transactions t
JOIN users u ON t.user_id = u.user_id
GROUP BY 1, 2
ORDER BY 1, 2;

Real-time KPIs

import clickhouse_connect

class RealtimeKPI:
    """Real-time KPI calculations."""
    
    def __init__(self, config):
        self.client = clickhouse_connect.get_client(**config)
    
    def get_kpis(self):
        """Calculate key KPIs."""
        # Revenue today
        revenue = self.client.query("""
            SELECT sum(amount) FROM transactions
            WHERE toDate(timestamp) = today()
        """).result_rows[0][0]
        
        # Orders today
        orders = self.client.query("""
            SELECT count() FROM transactions
            WHERE toDate(timestamp) = today()
        """).result_rows[0][0]
        
        # Active users (last 7 days)
        users = self.client.query("""
            SELECT uniq(user_id) FROM events
            WHERE timestamp >= now() - INTERVAL 7 DAY
        """).result_rows[0][0]
        
        return {
            'revenue_today': revenue or 0,
            'orders_today': orders or 0,
            'active_users_7d': users or 0
        }
    
    def get_trends(self, days=30):
        """Get daily trends."""
        return self.client.query(f"""
            SELECT 
                toDate(timestamp) as date,
                count() as events,
                sum(amount) as revenue
            FROM transactions
            WHERE timestamp >= today() - INTERVAL {days} DAY
            GROUP BY date
            ORDER BY date
        """).result_set.to_pandas()

# Usage
kpi = RealtimeKPI({'host': 'localhost'})
print(kpi.get_kpis())

Log Management

Application Logs

-- Create logs table
CREATE TABLE logs (
    timestamp DateTime,
    level String,
    service String,
    message String,
    trace_id String,
    user_id Nullable(UInt64),
    metadata String
) ENGINE = MergeTree()
ORDER BY (service, timestamp, level)
PARTITION BY toYYYYMM(timestamp)
TTL timestamp + INTERVAL 7 DAY;

-- Log search
SELECT timestamp, level, service, message
FROM logs
WHERE timestamp >= now() - INTERVAL 1 HOUR
  AND service = 'api'
  AND level IN ('ERROR', 'FATAL')
ORDER BY timestamp DESC
LIMIT 100;

-- Error patterns
SELECT 
    extract(message, 'Error: (.+)') as error_pattern,
    count() as occurrences
FROM logs
WHERE timestamp >= now() - INTERVAL 1 DAY
  AND level = 'ERROR'
GROUP BY error_pattern
ORDER BY occurrences DESC
LIMIT 20;

Time Series Analysis

Stock/Financial Data

-- Create financial data table
CREATE TABLE stock_prices (
    symbol String,
    timestamp DateTime,
    open Float32,
    high Float32,
    low Float32,
    close Float32,
    volume UInt64
) ENGINE = MergeTree()
ORDER BY (symbol, timestamp);

-- Technical indicators
SELECT 
    symbol,
    timestamp,
    close,
    avg(close) OVER (
        PARTITION BY symbol 
        ORDER BY timestamp 
        ROWS BETWEEN 19 PRECEDING AND CURRENT ROW
    ) as sma_20,
    close - avg(close) OVER (
        PARTITION BY symbol 
        ORDER BY timestamp 
        ROWS BETWEEN 19 PRECEDING AND CURRENT ROW
    ) as price_diff
FROM stock_prices
WHERE symbol = 'AAPL'
ORDER BY timestamp;

-- Volatility calculation
SELECT 
    symbol,
    toStartOfDay(timestamp) as date,
    stddevPop(close) as volatility,
    max(high) - min(low) as daily_range
FROM stock_prices
WHERE timestamp >= '2026-01-01'
GROUP BY symbol, date
ORDER BY volatility DESC
LIMIT 10;

Ad Tech / Real-Time Bidding

Bid Analysis

-- Bid stream analysis
CREATE TABLE bids (
    timestamp DateTime,
    auction_id UUID,
    bidder_id UInt64,
    impression_id UUID,
    campaign_id UInt64,
    bid_amount Float32,
    won UInt8,
    win_amount Nullable(Float32),
    country LowCardinality(String)
) ENGINE = MergeTree()
ORDER BY (timestamp, auction_id)
PARTITION BY toYYYYMMDD(timestamp);

-- Win rate by campaign
SELECT 
    campaign_id,
    count() as bids,
    sum(won) as wins,
    round(sum(won) / count() * 100, 2) as win_rate,
    sum(win_amount) / sum(won) as avg_win_price
FROM bids
WHERE timestamp >= now() - INTERVAL 1 DAY
GROUP BY campaign_id
HAVING bids > 100
ORDER BY win_rate DESC;

-- Revenue optimization
SELECT 
    country,
    sum(win_amount) as revenue,
    count() as impressions,
    round(sum(win_amount) / count() * 1000, 2) as cpm
FROM bids
WHERE won = 1
  AND timestamp >= now() - INTERVAL 7 DAY
GROUP BY country
ORDER BY revenue DESC;

Best Practices Summary

When to Use ClickHouse

Use Case Recommendation
Web analytics โœ… Excellent
Application monitoring โœ… Excellent
IoT/sensors โœ… Excellent
Business intelligence โœ… Excellent
Real-time dashboards โœ… Excellent
Transactional (OLTP) โŒ Not suitable
Full-text search โš ๏ธ Limited
Graph data โŒ Not suitable

Performance Tips

-- Partition by time
PARTITION BY toYYYYMM(timestamp)

-- Good primary key
ORDER BY (event_type, timestamp, user_id)

-- Batch inserts (1000+ rows)
INSERT INTO table VALUES (1,'a'),(2,'b'),...;

-- Filter on partition key
WHERE timestamp >= '2026-01-01'

Resources


Conclusion

ClickHouse excels in analytical scenarios requiring real-time queries on massive datasets. From web analytics to IoT monitoring, ClickHouse provides the performance needed for modern data-driven applications.

With this comprehensive guide, you now have the knowledge to implement ClickHouse effectively in various production scenarios.

Comments