第 45 章
高级 SQL 技巧
MySQL SQL 高级技巧
本章介绍工作中真正会用到的 SQL 高级技巧:递归 CTE、LATERAL 横向关联、Upsert、多表 UPDATE/DELETE、行列转换、JSON 聚合、以及优化器 Hints。每个技巧都附带实际业务场景和完整可运行 SQL。
1. CTE 与递归 CTE
1.1 普通 CTE(可读性重构)
-- 用 CTE 拆解复杂查询,取代嵌套子查询
WITH
active_users AS (
SELECT id, name FROM users WHERE status = 'active' AND created_at > '2024-01-01'
),
user_orders AS (
SELECT user_id, COUNT(*) AS order_cnt, SUM(amount) AS total
FROM orders
WHERE status = 'paid'
GROUP BY user_id
)
SELECT u.id, u.name, o.order_cnt, o.total
FROM active_users u
LEFT JOIN user_orders o ON u.id = o.user_id
ORDER BY o.total DESC NULLS LAST;
1.2 递归 CTE(树形结构/层级查询)
最典型的应用:组织架构树、分类树、BOM 物料清单、评论回复链。
-- 表结构:部门层级
CREATE TABLE departments (
id INT PRIMARY KEY,
name VARCHAR(100),
parent_id INT NULL -- NULL 表示顶层
);
-- 向下递归:找某个节点的所有子孙节点
WITH RECURSIVE dept_tree AS (
-- 锚点(起始节点)
SELECT id, name, parent_id, 0 AS depth, CAST(name AS CHAR(1000)) AS path
FROM departments WHERE id = 1 -- 从 id=1 开始
UNION ALL
-- 递归部分:找所有子节点
SELECT d.id, d.name, d.parent_id, dt.depth + 1,
CONCAT(dt.path, ' > ', d.name)
FROM departments d
INNER JOIN dept_tree dt ON d.parent_id = dt.id
WHERE dt.depth < 10 -- 防止无限递归(防环)
)
SELECT * FROM dept_tree ORDER BY path;
-- 向上递归:找某节点的所有祖先(面包屑导航)
WITH RECURSIVE ancestors AS (
SELECT id, name, parent_id FROM departments WHERE id = 42 -- 目标节点
UNION ALL
SELECT d.id, d.name, d.parent_id
FROM departments d
INNER JOIN ancestors a ON d.id = a.parent_id -- 往上找
)
SELECT * FROM ancestors ORDER BY id;
**递归 CTE 限制:**默认最大递归深度 1000(
cte_max_recursion_depth),可调整。生产中务必加深度限制条件,防止数据有环时死循环。
1.3 递归生成序列
-- 生成 1-100 的数字序列(替代数字辅助表)
WITH RECURSIVE nums AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM nums WHERE n < 100
)
SELECT * FROM nums;
-- 生成日期序列(按天填充,用于报表补零)
WITH RECURSIVE dates AS (
SELECT DATE('2024-01-01') AS d
UNION ALL
SELECT DATE_ADD(d, INTERVAL 1 DAY) FROM dates WHERE d < '2024-01-31'
)
SELECT d, COALESCE(s.cnt, 0) AS sales_count
FROM dates
LEFT JOIN (
SELECT DATE(created_at) AS dt, COUNT(*) AS cnt
FROM orders GROUP BY dt
) s ON dates.d = s.dt;
2. LATERAL 横向关联(MySQL 8.0+)
LATERAL 允许子查询引用前面 FROM 子句的表,解决了很多"每组取前 N 条"的问题。
-- 场景:每个用户取最近 3 笔订单(每组 Top-N)
SELECT u.id, u.name, recent.order_id, recent.amount, recent.created_at
FROM users u
JOIN LATERAL (
SELECT order_id, amount, created_at
FROM orders
WHERE user_id = u.id -- 引用外层 u.id,这就是 LATERAL 的作用
ORDER BY created_at DESC
LIMIT 3
) AS recent ON TRUE
WHERE u.status = 'active';
-- 对比:不用 LATERAL 的笨方法(需要 ROW_NUMBER 窗口函数)
SELECT id, user_id, order_id, amount FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) AS rn
FROM orders
) ranked
JOIN users u ON u.id = ranked.user_id
WHERE ranked.rn <= 3 AND u.status = 'active';
3. Upsert:INSERT ON DUPLICATE KEY UPDATE
-- 场景:统计页面 PV,有则加一,无则插入
INSERT INTO page_stats (page_id, pv, uv, last_visit)
VALUES (101, 1, 1, NOW())
ON DUPLICATE KEY UPDATE
pv = pv + 1,
last_visit = NOW();
-- page_id 有唯一索引或主键,冲突时执行 UPDATE
-- 场景:批量 Upsert(高效导入)
INSERT INTO product_prices (product_id, price, updated_at)
VALUES (1, 99.9, NOW()), (2, 199.9, NOW()), (3, 299.9, NOW())
ON DUPLICATE KEY UPDATE
price = VALUES(price),
updated_at = VALUES(updated_at);
-- MySQL 8.0.20+ 推荐用别名语法:
-- ON DUPLICATE KEY UPDATE price = new.price, updated_at = new.updated_at;
-- REPLACE INTO(不推荐):先 DELETE 再 INSERT,会重置 AUTO_INCREMENT,触发级联删除
-- 推荐用 INSERT ... ON DUPLICATE KEY UPDATE,更可控
注意:
INSERT ... ON DUPLICATE KEY UPDATE在有多个唯一索引时行为可能出乎意料(任何唯一键冲突都会触发 UPDATE)。在高并发场景下要结合事务使用,或用INSERT IGNORE(只忽略冲突,不更新)。
4. 多表 UPDATE 和 DELETE
-- 多表 UPDATE:根据 orders 表更新 users 表的最后购买时间
UPDATE users u
JOIN orders o ON u.id = o.user_id
SET u.last_order_at = o.created_at,
u.total_spent = u.total_spent + o.amount
WHERE o.status = 'paid' AND o.id = 9999;
-- 用子查询 UPDATE(等价,更通用)
UPDATE users
SET last_order_at = (
SELECT MAX(created_at) FROM orders WHERE user_id = users.id AND status = 'paid'
)
WHERE id IN (SELECT DISTINCT user_id FROM orders WHERE status = 'paid');
-- 多表 DELETE:删除 users 表中没有订单的用户
DELETE u
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.user_id IS NULL AND u.created_at < '2023-01-01';
-- 注意:MySQL DELETE 不支持 DELETE FROM t1 USING t1 JOIN t2 的 USING 语法(PostgreSQL 有)
-- 用 JOIN 语法:DELETE t1 FROM t1 JOIN t2 ON ... WHERE ...
5. 行转列(PIVOT)
-- 场景:把月份从行变列,每行一个产品,每列一个月的销售额
-- 原始数据:
-- product_id | month | sales
-- 1 | 1 | 1000
-- 1 | 2 | 1500
-- 2 | 1 | 800
-- 静态行转列(月份固定时)
SELECT
product_id,
SUM(CASE WHEN month = 1 THEN sales ELSE 0 END) AS jan,
SUM(CASE WHEN month = 2 THEN sales ELSE 0 END) AS feb,
SUM(CASE WHEN month = 3 THEN sales ELSE 0 END) AS mar,
...
SUM(CASE WHEN month = 12 THEN sales ELSE 0 END) AS dec
FROM monthly_sales
GROUP BY product_id;
-- 动态行转列(月份数量不定,需要拼 SQL)
-- 步骤1:获取所有月份
SET @cols = NULL;
SELECT GROUP_CONCAT(
DISTINCT CONCAT('SUM(CASE WHEN month=', month, ' THEN sales ELSE 0 END) AS `M', month, '`')
ORDER BY month
) INTO @cols FROM monthly_sales;
-- 步骤2:拼动态 SQL
SET @sql = CONCAT('SELECT product_id, ', @cols, ' FROM monthly_sales GROUP BY product_id');
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
6. JSON 聚合函数
-- JSON_ARRAYAGG:将多行值聚合为 JSON 数组
SELECT
u.id,
u.name,
JSON_ARRAYAGG(
JSON_OBJECT('order_id', o.id, 'amount', o.amount, 'status', o.status)
) AS orders
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- 输出:{"id":1, "name":"Alice", "orders":[{"order_id":1,"amount":99.9,...},...]}
-- JSON_OBJECTAGG:聚合为 JSON 对象(key-value 映射)
SELECT
product_id,
JSON_OBJECTAGG(attribute_name, attribute_value) AS attributes
FROM product_attributes
GROUP BY product_id;
-- 输出:{"color": "red", "size": "L", "material": "cotton"}
-- 结合 JSON_ARRAYAGG 做 GROUP_CONCAT 的替代(不受 group_concat_max_len 限制)
SELECT
category_id,
JSON_ARRAYAGG(product_name ORDER BY product_name) AS product_list
FROM products
GROUP BY category_id;
7. 优化器 Hints(MySQL 8.0+)
-- 强制使用特定索引(比 FORCE INDEX 更现代)
SELECT /*+ INDEX(orders idx_user_id) */ *
FROM orders WHERE user_id = 1;
-- 强制使用 Hash Join(当 Nested Loop 性能差时)
SELECT /*+ HASH_JOIN(u, o) */
u.name, o.amount
FROM users u JOIN orders o ON u.id = o.user_id;
-- 禁用 Block Nested Loop(防止内存不足问题)
SELECT /*+ NO_BNL(o) */ * FROM orders o WHERE ...;
-- 强制 JOIN 顺序(让驱动表固定)
SELECT /*+ JOIN_ORDER(small_table, large_table) */ *
FROM small_table JOIN large_table ON ...;
-- 设置单个查询的最大执行时间(超时 kill)
SELECT /*+ MAX_EXECUTION_TIME(5000) */ * -- 5000ms
FROM large_table WHERE ...;
-- 查询级 SET_VAR(8.0 特性,仅影响当前查询)
SELECT /*+ SET_VAR(sort_buffer_size = 1M) */ *
FROM t ORDER BY col;
**Hints vs FORCE INDEX:**Hints 语法更灵活,不绑定具体索引名,且支持更多维度(JOIN 顺序、Join 算法、执行超时等)。但 Hints 是"建议",不保证优化器一定采用。FORCE INDEX 才是强制的。