第 45 章

高级 SQL 技巧

MySQL SQL 高级技巧

本章介绍工作中真正会用到的 SQL 高级技巧:递归 CTE、LATERAL 横向关联、Upsert、多表 UPDATE/DELETE、行列转换、JSON 聚合、以及优化器 Hints。每个技巧都附带实际业务场景和完整可运行 SQL。

1. CTE 与递归 CTE

1.1 普通 CTE(可读性重构)

-- 用 CTE 拆解复杂查询,取代嵌套子查询
WITH
  active_users AS (
    SELECT id, name FROM users WHERE status = 'active' AND created_at > '2024-01-01'
  ),
  user_orders AS (
    SELECT user_id, COUNT(*) AS order_cnt, SUM(amount) AS total
    FROM orders
    WHERE status = 'paid'
    GROUP BY user_id
  )
SELECT u.id, u.name, o.order_cnt, o.total
FROM active_users u
LEFT JOIN user_orders o ON u.id = o.user_id
ORDER BY o.total DESC NULLS LAST;

1.2 递归 CTE(树形结构/层级查询)

最典型的应用:组织架构树、分类树、BOM 物料清单、评论回复链。

-- 表结构:部门层级
CREATE TABLE departments (
  id      INT PRIMARY KEY,
  name    VARCHAR(100),
  parent_id INT NULL  -- NULL 表示顶层
);

-- 向下递归:找某个节点的所有子孙节点
WITH RECURSIVE dept_tree AS (
  -- 锚点(起始节点)
  SELECT id, name, parent_id, 0 AS depth, CAST(name AS CHAR(1000)) AS path
  FROM departments WHERE id = 1  -- 从 id=1 开始

  UNION ALL

  -- 递归部分:找所有子节点
  SELECT d.id, d.name, d.parent_id, dt.depth + 1,
         CONCAT(dt.path, ' > ', d.name)
  FROM departments d
  INNER JOIN dept_tree dt ON d.parent_id = dt.id
  WHERE dt.depth < 10  -- 防止无限递归(防环)
)
SELECT * FROM dept_tree ORDER BY path;

-- 向上递归:找某节点的所有祖先(面包屑导航)
WITH RECURSIVE ancestors AS (
  SELECT id, name, parent_id FROM departments WHERE id = 42  -- 目标节点

  UNION ALL

  SELECT d.id, d.name, d.parent_id
  FROM departments d
  INNER JOIN ancestors a ON d.id = a.parent_id  -- 往上找
)
SELECT * FROM ancestors ORDER BY id;

**递归 CTE 限制:**默认最大递归深度 1000(cte_max_recursion_depth),可调整。生产中务必加深度限制条件,防止数据有环时死循环。

1.3 递归生成序列

-- 生成 1-100 的数字序列(替代数字辅助表)
WITH RECURSIVE nums AS (
  SELECT 1 AS n
  UNION ALL
  SELECT n + 1 FROM nums WHERE n < 100
)
SELECT * FROM nums;

-- 生成日期序列(按天填充,用于报表补零)
WITH RECURSIVE dates AS (
  SELECT DATE('2024-01-01') AS d
  UNION ALL
  SELECT DATE_ADD(d, INTERVAL 1 DAY) FROM dates WHERE d < '2024-01-31'
)
SELECT d, COALESCE(s.cnt, 0) AS sales_count
FROM dates
LEFT JOIN (
  SELECT DATE(created_at) AS dt, COUNT(*) AS cnt
  FROM orders GROUP BY dt
) s ON dates.d = s.dt;

2. LATERAL 横向关联(MySQL 8.0+)

LATERAL 允许子查询引用前面 FROM 子句的表,解决了很多"每组取前 N 条"的问题。

-- 场景:每个用户取最近 3 笔订单(每组 Top-N)
SELECT u.id, u.name, recent.order_id, recent.amount, recent.created_at
FROM users u
JOIN LATERAL (
  SELECT order_id, amount, created_at
  FROM orders
  WHERE user_id = u.id  -- 引用外层 u.id,这就是 LATERAL 的作用
  ORDER BY created_at DESC
  LIMIT 3
) AS recent ON TRUE
WHERE u.status = 'active';

-- 对比:不用 LATERAL 的笨方法(需要 ROW_NUMBER 窗口函数)
SELECT id, user_id, order_id, amount FROM (
  SELECT *, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) AS rn
  FROM orders
) ranked
JOIN users u ON u.id = ranked.user_id
WHERE ranked.rn <= 3 AND u.status = 'active';

3. Upsert:INSERT ON DUPLICATE KEY UPDATE

-- 场景:统计页面 PV,有则加一,无则插入
INSERT INTO page_stats (page_id, pv, uv, last_visit)
VALUES (101, 1, 1, NOW())
ON DUPLICATE KEY UPDATE
  pv = pv + 1,
  last_visit = NOW();
-- page_id 有唯一索引或主键,冲突时执行 UPDATE

-- 场景:批量 Upsert(高效导入)
INSERT INTO product_prices (product_id, price, updated_at)
VALUES (1, 99.9, NOW()), (2, 199.9, NOW()), (3, 299.9, NOW())
ON DUPLICATE KEY UPDATE
  price = VALUES(price),
  updated_at = VALUES(updated_at);
-- MySQL 8.0.20+ 推荐用别名语法:
-- ON DUPLICATE KEY UPDATE price = new.price, updated_at = new.updated_at;

-- REPLACE INTO(不推荐):先 DELETE 再 INSERT,会重置 AUTO_INCREMENT,触发级联删除
-- 推荐用 INSERT ... ON DUPLICATE KEY UPDATE,更可控

注意:INSERT ... ON DUPLICATE KEY UPDATE 在有多个唯一索引时行为可能出乎意料(任何唯一键冲突都会触发 UPDATE)。在高并发场景下要结合事务使用,或用 INSERT IGNORE(只忽略冲突,不更新)。

4. 多表 UPDATE 和 DELETE

-- 多表 UPDATE:根据 orders 表更新 users 表的最后购买时间
UPDATE users u
JOIN orders o ON u.id = o.user_id
SET u.last_order_at = o.created_at,
    u.total_spent = u.total_spent + o.amount
WHERE o.status = 'paid' AND o.id = 9999;

-- 用子查询 UPDATE(等价,更通用)
UPDATE users
SET last_order_at = (
  SELECT MAX(created_at) FROM orders WHERE user_id = users.id AND status = 'paid'
)
WHERE id IN (SELECT DISTINCT user_id FROM orders WHERE status = 'paid');

-- 多表 DELETE:删除 users 表中没有订单的用户
DELETE u
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.user_id IS NULL AND u.created_at < '2023-01-01';

-- 注意:MySQL DELETE 不支持 DELETE FROM t1 USING t1 JOIN t2 的 USING 语法(PostgreSQL 有)
-- 用 JOIN 语法:DELETE t1 FROM t1 JOIN t2 ON ... WHERE ...

5. 行转列(PIVOT)

-- 场景:把月份从行变列,每行一个产品,每列一个月的销售额

-- 原始数据:
-- product_id | month | sales
-- 1          | 1     | 1000
-- 1          | 2     | 1500
-- 2          | 1     | 800

-- 静态行转列(月份固定时)
SELECT
  product_id,
  SUM(CASE WHEN month = 1 THEN sales ELSE 0 END) AS jan,
  SUM(CASE WHEN month = 2 THEN sales ELSE 0 END) AS feb,
  SUM(CASE WHEN month = 3 THEN sales ELSE 0 END) AS mar,
  ...
  SUM(CASE WHEN month = 12 THEN sales ELSE 0 END) AS dec
FROM monthly_sales
GROUP BY product_id;

-- 动态行转列(月份数量不定,需要拼 SQL)
-- 步骤1:获取所有月份
SET @cols = NULL;
SELECT GROUP_CONCAT(
  DISTINCT CONCAT('SUM(CASE WHEN month=', month, ' THEN sales ELSE 0 END) AS `M', month, '`')
  ORDER BY month
) INTO @cols FROM monthly_sales;

-- 步骤2:拼动态 SQL
SET @sql = CONCAT('SELECT product_id, ', @cols, ' FROM monthly_sales GROUP BY product_id');
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;

6. JSON 聚合函数

-- JSON_ARRAYAGG:将多行值聚合为 JSON 数组
SELECT
  u.id,
  u.name,
  JSON_ARRAYAGG(
    JSON_OBJECT('order_id', o.id, 'amount', o.amount, 'status', o.status)
  ) AS orders
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- 输出:{"id":1, "name":"Alice", "orders":[{"order_id":1,"amount":99.9,...},...]}

-- JSON_OBJECTAGG:聚合为 JSON 对象(key-value 映射)
SELECT
  product_id,
  JSON_OBJECTAGG(attribute_name, attribute_value) AS attributes
FROM product_attributes
GROUP BY product_id;
-- 输出:{"color": "red", "size": "L", "material": "cotton"}

-- 结合 JSON_ARRAYAGG 做 GROUP_CONCAT 的替代(不受 group_concat_max_len 限制)
SELECT
  category_id,
  JSON_ARRAYAGG(product_name ORDER BY product_name) AS product_list
FROM products
GROUP BY category_id;

7. 优化器 Hints(MySQL 8.0+)

-- 强制使用特定索引(比 FORCE INDEX 更现代)
SELECT /*+ INDEX(orders idx_user_id) */ *
FROM orders WHERE user_id = 1;

-- 强制使用 Hash Join(当 Nested Loop 性能差时)
SELECT /*+ HASH_JOIN(u, o) */
  u.name, o.amount
FROM users u JOIN orders o ON u.id = o.user_id;

-- 禁用 Block Nested Loop(防止内存不足问题)
SELECT /*+ NO_BNL(o) */ * FROM orders o WHERE ...;

-- 强制 JOIN 顺序(让驱动表固定)
SELECT /*+ JOIN_ORDER(small_table, large_table) */ *
FROM small_table JOIN large_table ON ...;

-- 设置单个查询的最大执行时间(超时 kill)
SELECT /*+ MAX_EXECUTION_TIME(5000) */ *  -- 5000ms
FROM large_table WHERE ...;

-- 查询级 SET_VAR(8.0 特性,仅影响当前查询)
SELECT /*+ SET_VAR(sort_buffer_size = 1M) */ *
FROM t ORDER BY col;

**Hints vs FORCE INDEX:**Hints 语法更灵活,不绑定具体索引名,且支持更多维度(JOIN 顺序、Join 算法、执行超时等)。但 Hints 是"建议",不保证优化器一定采用。FORCE INDEX 才是强制的。

本章评分
4.7  / 5  (3 评分)

💬 留言讨论