ClickHouse Guide

When to Use ClickHouse

✓ Great for
  • Analytics (billions of rows)
  • Log/event aggregation
  • Real-time dashboards
  • Time-series metrics
✗ Not for
  • Frequent single-row updates
  • OLTP (transactions)
  • Complex JOINs
  • Low-latency point reads

MergeTree Table Example

-- Web analytics events table
CREATE TABLE page_views
(
    event_time    DateTime,
    date          Date DEFAULT toDate(event_time),
    user_id       UInt64,
    session_id    String,
    page_url      LowCardinality(String),  -- for low-cardinality strings
    country       LowCardinality(String),
    device_type   Enum8('desktop'=1, 'mobile'=2, 'tablet'=3),
    duration_sec  UInt16,
    is_bounce     UInt8
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(date)       -- Monthly partitions
ORDER BY (date, page_url, user_id) -- Sort key (affects perf)
TTL date + INTERVAL 1 YEAR;       -- Auto-delete after 1 year

Efficient Aggregation Queries

-- Count unique users per page today (fast with bitmap)
SELECT
    page_url,
    uniqExact(user_id) AS unique_users,
    countIf(is_bounce = 1) / count() AS bounce_rate,
    avg(duration_sec) AS avg_duration
FROM page_views
WHERE date = today()
GROUP BY page_url
ORDER BY unique_users DESC
LIMIT 20;

-- Materialized view for pre-aggregation
CREATE MATERIALIZED VIEW daily_stats
ENGINE = SummingMergeTree()
ORDER BY (date, page_url)
AS SELECT date, page_url, count() AS views, uniq(user_id) AS users
FROM page_views GROUP BY date, page_url;