MySQL高级特性
在掌握了MySQL的基础操作和核心功能后,我们现在来探索一些更高级的特性,这些特性可以帮助我们编写更高效、更复杂的查询,并更好地管理和优化数据库。
子查询与公用表表达式(CTE)
子查询
子查询是嵌套在其他SQL语句中的查询,它可以返回单个值、一行、一列或一个表。
标量子查询
标量子查询返回单个值,通常用于SELECT、WHERE或HAVING子句中。
-- 在SELECT中使用标量子查询
SELECT
name,
(SELECT AVG(grade) FROM grades) AS 全校平均分,
grade AS 个人成绩
FROM students s
JOIN grades g ON s.id = g.student_id;
-- 在WHERE中使用标量子查询
SELECT name, grade
FROM students s
JOIN grades g ON s.id = g.student_id
WHERE grade > (SELECT AVG(grade) FROM grades);
-- 在HAVING中使用标量子查询
SELECT c.course_name, AVG(g.grade) AS 平均分
FROM courses c
JOIN grades g ON c.id = g.course_id
GROUP BY c.id, c.course_name
HAVING AVG(g.grade) > (SELECT AVG(grade) FROM grades);列子查询
列子查询返回单列多行,通常与IN、NOT IN、ANY、ALL等操作符一起使用。
-- 使用IN和列子查询
SELECT name, major
FROM students
WHERE major IN (SELECT DISTINCT major FROM students WHERE name LIKE '张%');
-- 使用NOT IN和列子查询
SELECT name, major
FROM students
WHERE major NOT IN (SELECT DISTINCT major FROM students WHERE enrollment_date < '2020-01-01');
-- 使用ANY操作符
SELECT name, grade
FROM students s
JOIN grades g ON s.id = g.student_id
WHERE grade > ANY (SELECT grade FROM grades WHERE course_id = 1);
-- 使用ALL操作符
SELECT name, grade
FROM students s
JOIN grades g ON s.id = g.student_id
WHERE grade > ALL (SELECT grade FROM grades WHERE course_id = 1);行子查询
行子查询返回一行多列,通常用于与行构造器进行比较。
-- 行子查询示例
SELECT name, major, enrollment_date
FROM students
WHERE (major, enrollment_date) = (
SELECT major, MIN(enrollment_date)
FROM students
GROUP BY major
LIMIT 1
);表子查询
表子查询返回多行多列,可以作为临时表使用。
-- 表子查询示例
SELECT s.name, avg_grades.平均分
FROM students s
JOIN (
SELECT student_id, AVG(grade) AS 平均分
FROM grades
GROUP BY student_id
) avg_grades ON s.id = avg_grades.student_id
WHERE avg_grades.平均分 > 85;公用表表达式(CTE)
公用表表达式(Common Table Expressions,CTE)是使用WITH子句定义的临时结果集,可以提高查询的可读性和维护性。
基本CTE
-- 基本CTE示例
WITH top_students AS (
SELECT student_id, AVG(grade) AS avg_grade
FROM grades
GROUP BY student_id
HAVING AVG(grade) > 85
)
SELECT s.name, ts.avg_grade
FROM students s
JOIN top_students ts ON s.id = ts.student_id
ORDER BY ts.avg_grade DESC;递归CTE
递归CTE用于处理层次结构数据,如组织架构、分类树等。
-- 创建员工表用于演示递归CTE
CREATE TABLE employees (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(50) NOT NULL,
manager_id INT,
department VARCHAR(50)
);
-- 插入示例数据
INSERT INTO employees (name, manager_id, department) VALUES
('CEO', NULL, 'Management'),
('CTO', 1, 'Technology'),
('CFO', 1, 'Finance'),
('技术经理', 2, 'Technology'),
('财务经理', 3, 'Finance'),
('程序员1', 4, 'Technology'),
('程序员2', 4, 'Technology'),
('会计1', 5, 'Finance');
-- 递归CTE查询组织架构
WITH RECURSIVE org_chart AS (
-- 基础查询:找到顶级管理者
SELECT id, name, manager_id, department, 0 as level, CAST(name AS CHAR(1000)) as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归查询:找到下属员工
SELECT e.id, e.name, e.manager_id, e.department, oc.level + 1,
CONCAT(oc.path, ' -> ', e.name)
FROM employees e
JOIN org_chart oc ON e.manager_id = oc.id
)
SELECT CONCAT(REPEAT(' ', level), name) AS 组织架构, department
FROM org_chart
ORDER BY path;索引优化
索引是提高数据库查询性能的重要工具,合理使用索引可以显著提升查询速度。
索引基础
索引的作用
索引类似于书籍的目录,可以帮助数据库快速定位到所需的数据,而无需扫描整个表。
-- 创建测试表
CREATE TABLE large_table (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(50),
email VARCHAR(100),
age INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 插入大量测试数据(示例)
-- INSERT INTO large_table (name, email, age) VALUES (...);
-- 没有索引的查询(全表扫描)
SELECT * FROM large_table WHERE email = 'test@example.com';
-- 创建索引后查询(索引查找)
CREATE INDEX idx_email ON large_table(email);
SELECT * FROM large_table WHERE email = 'test@example.com';索引的原理
MySQL主要使用B+树结构来实现索引,这种结构特别适合范围查询和排序操作。
索引类型
主键索引(PRIMARY KEY)
主键索引是自动创建的唯一索引,不允许重复值和空值。
-- 在创建表时定义主键索引
CREATE TABLE users (
id INT AUTO_INCREMENT,
username VARCHAR(50) NOT NULL,
PRIMARY KEY (id)
);
-- 在修改表时添加主键索引
ALTER TABLE users ADD PRIMARY KEY (id);普通索引(INDEX)
普通索引是最基本的索引类型,没有任何限制。
-- 创建普通索引
CREATE INDEX idx_username ON users(username);
-- 在创建表时定义普通索引
CREATE TABLE products (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100),
category VARCHAR(50),
price DECIMAL(10,2),
INDEX idx_name (name),
INDEX idx_category (category)
);唯一索引(UNIQUE)
唯一索引确保索引列的值唯一,但允许空值。
-- 创建唯一索引
CREATE UNIQUE INDEX idx_email ON users(email);
-- 在创建表时定义唯一索引
CREATE TABLE users (
id INT AUTO_INCREMENT PRIMARY KEY,
username VARCHAR(50) NOT NULL,
email VARCHAR(100) UNIQUE,
UNIQUE INDEX idx_username (username)
);复合索引
复合索引是在多个列上创建的索引,遵循最左前缀原则。
-- 创建复合索引
CREATE INDEX idx_name_age ON users(name, age);
-- 最左前缀原则示例
-- 有效使用索引的查询
SELECT * FROM users WHERE name = '张三';
SELECT * FROM users WHERE name = '张三' AND age = 25;
SELECT * FROM users WHERE name = '张三' AND age > 20;
-- 无法有效使用索引的查询
SELECT * FROM users WHERE age = 25; -- 没有使用最左列
SELECT * FROM users WHERE name LIKE '%张%' AND age = 25; -- 最左列使用了范围查询索引优化
适合建索引的场景
- 频繁查询的列:经常出现在WHERE子句中的列
- 连接列:用于表连接的外键列
- 排序列:经常用于ORDER BY的列
- 分组列:经常用于GROUP BY的列
-- 为经常查询的列创建索引
CREATE INDEX idx_status ON orders(status);
-- 为连接列创建索引
CREATE INDEX idx_user_id ON orders(user_id);
-- 为排序列创建索引
CREATE INDEX idx_created_at ON orders(created_at);
-- 为复合查询条件创建复合索引
CREATE INDEX idx_status_created ON orders(status, created_at);不适合建索引的场景
- 数据量少的表:小表全表扫描可能比索引查找更快
- 频繁修改的列:索引会增加INSERT、UPDATE、DELETE操作的开销
- 有大量重复值的列:低选择性的列建立索引效果不佳
-- 不建议为性别这种低选择性列创建索引
-- ALTER TABLE users ADD INDEX idx_gender (gender); -- 不推荐
-- 不建议为经常更新的列创建索引
-- ALTER TABLE users ADD INDEX idx_last_login (last_login_time); -- 需谨慎考虑索引维护
-- 查看表的索引信息
SHOW INDEX FROM users;
-- 分析表的索引使用情况
ANALYZE TABLE users;
-- 重建索引(优化索引碎片)
OPTIMIZE TABLE users;
-- 删除索引
DROP INDEX idx_username ON users;事务与ACID特性
事务是数据库操作的逻辑单位,由一系列操作组成,这些操作要么全部执行,要么全部不执行。
事务概念
事务将多个操作组合成一个不可分割的工作单元,确保数据的一致性和完整性。
-- 事务示例:银行转账
START TRANSACTION;
-- 从账户A扣款
UPDATE accounts SET balance = balance - 1000 WHERE account_id = 'A';
-- 向账户B转账
UPDATE accounts SET balance = balance + 1000 WHERE account_id = 'B';
-- 检查余额是否足够
SELECT balance FROM accounts WHERE account_id = 'A';
-- 如果余额足够,提交事务
COMMIT;
-- 如果余额不足,回滚事务
-- ROLLBACK;ACID特性
原子性(Atomicity)
事务是一个不可分割的工作单位,要么全部执行,要么全部不执行。
-- 原子性示例
START TRANSACTION;
INSERT INTO orders (user_id, product_id, quantity) VALUES (1, 101, 2);
INSERT INTO order_items (order_id, product_id, quantity) VALUES (LAST_INSERT_ID(), 101, 2);
UPDATE products SET stock = stock - 2 WHERE id = 101;
-- 如果所有操作都成功,提交事务
COMMIT;
-- 如果任何一个操作失败,回滚事务
-- ROLLBACK;一致性(Consistency)
事务执行前后,数据库从一个一致性状态转换到另一个一致性状态。
-- 一致性示例:确保库存不为负数
START TRANSACTION;
-- 检查库存
SELECT stock FROM products WHERE id = 101;
-- 如果库存足够,执行订单
UPDATE products SET stock = stock - 2 WHERE id = 101 AND stock >= 2;
-- 检查更新是否成功
SELECT ROW_COUNT() AS affected_rows;
-- 如果更新成功,提交事务;否则回滚
-- COMMIT; 或 ROLLBACK;隔离性(Isolation)
多个事务并发执行时,一个事务的执行不应影响其他事务。
-- 事务A
START TRANSACTION;
SELECT balance FROM accounts WHERE account_id = 'A'; -- 假设结果为1000
UPDATE accounts SET balance = balance - 100 WHERE account_id = 'A';
-- 此时事务B查询账户A的余额仍为1000(取决于隔离级别)
COMMIT;
-- 事务B
START TRANSACTION;
SELECT balance FROM accounts WHERE account_id = 'A'; -- 结果取决于隔离级别
UPDATE accounts SET balance = balance + 50 WHERE account_id = 'A';
COMMIT;持久性(Durability)
事务一旦提交,对数据库的改变是永久性的。
-- 持久性示例
START TRANSACTION;
INSERT INTO logs (message, timestamp) VALUES ('重要操作', NOW());
UPDATE users SET last_login = NOW() WHERE id = 1;
COMMIT; -- 一旦提交,即使系统崩溃,数据也不会丢失事务控制
开始事务
-- 开始事务的方式
START TRANSACTION;
-- 或者
BEGIN;提交事务
-- 提交事务
COMMIT;
-- 或者
COMMIT WORK;回滚事务
-- 回滚事务
ROLLBACK;
-- 或者
ROLLBACK WORK;保存点
保存点允许在事务中设置检查点,可以回滚到特定的保存点。
-- 使用保存点
START TRANSACTION;
INSERT INTO users (username, email) VALUES ('user1', 'user1@example.com');
SAVEPOINT sp1;
INSERT INTO users (username, email) VALUES ('user2', 'user2@example.com');
SAVEPOINT sp2;
-- 发生错误,回滚到保存点sp1
ROLLBACK TO sp1;
-- 继续执行其他操作
INSERT INTO users (username, email) VALUES ('user3', 'user3@example.com');
COMMIT;事务隔离级别
事务隔离级别定义了一个事务可能受其他并发事务影响的程度。
隔离级别分类
读未提交(READ UNCOMMITTED)
最低的隔离级别,允许读取未提交的数据变更,可能导致脏读。
-- 设置隔离级别为读未提交
SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
-- 事务A
START TRANSACTION;
UPDATE accounts SET balance = balance - 100 WHERE account_id = 'A';
-- 此时事务B可以读取到未提交的数据(脏读)
-- ROLLBACK; -- 如果回滚,事务B读取到的就是无效数据读已提交(READ COMMITTED)
允许读取已提交的数据变更,可防止脏读,但可能出现不可重复读。
-- 设置隔离级别为读已提交
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- 事务A
START TRANSACTION;
SELECT balance FROM accounts WHERE account_id = 'A'; -- 假设结果为1000
-- 事务B
START TRANSACTION;
UPDATE accounts SET balance = balance - 100 WHERE account_id = 'A';
COMMIT;
-- 事务A再次查询
SELECT balance FROM accounts WHERE account_id = 'A'; -- 结果为900(不可重复读)
COMMIT;可重复读(REPEATABLE READ)
MySQL默认的隔离级别,可防止脏读和不可重复读,但可能出现幻读。
-- 设置隔离级别为可重复读
SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-- 事务A
START TRANSACTION;
SELECT COUNT(*) FROM users WHERE age > 20; -- 假设结果为5
-- 事务B
START TRANSACTION;
INSERT INTO users (username, age) VALUES ('newuser', 25);
COMMIT;
-- 事务A再次查询
SELECT COUNT(*) FROM users WHERE age > 20; -- 结果仍为5(可重复读)
-- 但如果执行插入操作,可能会发现新记录(幻读)
COMMIT;串行化(SERIALIZABLE)
最高的隔离级别,完全隔离事务,防止脏读、不可重复读和幻读,但性能最低。
-- 设置隔离级别为串行化
SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- 在串行化模式下,事务会按顺序执行,避免并发问题查看与设置隔离级别
-- 查看当前会话的隔离级别
SELECT @@transaction_isolation;
-- 查看全局隔离级别
SELECT @@global.transaction_isolation;
-- 设置会话隔离级别
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- 设置全局隔离级别
SET GLOBAL TRANSACTION ISOLATION LEVEL REPEATABLE READ;实践项目
为了巩固所学知识,建议进行以下实践项目:
实现银行转账功能(事务保证原子性)
-- 创建银行账户表
CREATE TABLE bank_accounts (
id INT AUTO_INCREMENT PRIMARY KEY,
account_number VARCHAR(20) UNIQUE NOT NULL,
account_holder VARCHAR(50) NOT NULL,
balance DECIMAL(15,2) DEFAULT 0.00,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 插入测试数据
INSERT INTO bank_accounts (account_number, account_holder, balance) VALUES
('ACC001', '张三', 10000.00),
('ACC002', '李四', 5000.00);
-- 银行转账存储过程
DELIMITER //
CREATE PROCEDURE transfer_money(
IN from_account VARCHAR(20),
IN to_account VARCHAR(20),
IN amount DECIMAL(15,2)
)
BEGIN
DECLARE from_balance DECIMAL(15,2);
DECLARE EXIT HANDLER FOR SQLEXCEPTION
BEGIN
ROLLBACK;
RESIGNAL;
END;
START TRANSACTION;
-- 检查转出账户余额
SELECT balance INTO from_balance
FROM bank_accounts
WHERE account_number = from_account
FOR UPDATE;
IF from_balance < amount THEN
SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = '余额不足';
END IF;
-- 执行转账
UPDATE bank_accounts
SET balance = balance - amount
WHERE account_number = from_account;
UPDATE bank_accounts
SET balance = balance + amount
WHERE account_number = to_account;
-- 记录转账日志
INSERT INTO transfer_logs (from_account, to_account, amount, transfer_time)
VALUES (from_account, to_account, amount, NOW());
COMMIT;
END //
DELIMITER ;
-- 创建转账日志表
CREATE TABLE transfer_logs (
id INT AUTO_INCREMENT PRIMARY KEY,
from_account VARCHAR(20),
to_account VARCHAR(20),
amount DECIMAL(15,2),
transfer_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 使用转账功能
CALL transfer_money('ACC001', 'ACC002', 1000.00);为查询频繁的表设计合理索引并测试性能
-- 创建测试表
CREATE TABLE user_orders (
id INT AUTO_INCREMENT PRIMARY KEY,
user_id INT NOT NULL,
product_id INT NOT NULL,
order_date DATE NOT NULL,
status ENUM('pending', 'processing', 'shipped', 'delivered') DEFAULT 'pending',
amount DECIMAL(10,2),
INDEX idx_user_id (user_id),
INDEX idx_order_date (order_date),
INDEX idx_status (status),
INDEX idx_user_date (user_id, order_date)
);
-- 插入大量测试数据
-- (这里使用存储过程生成测试数据)
DELIMITER //
CREATE PROCEDURE generate_test_data(IN num_records INT)
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE user_id INT;
DECLARE product_id INT;
DECLARE order_date DATE;
DECLARE status ENUM('pending', 'processing', 'shipped', 'delivered');
DECLARE amount DECIMAL(10,2);
WHILE i < num_records DO
SET user_id = FLOOR(1 + RAND() * 1000);
SET product_id = FLOOR(1 + RAND() * 100);
SET order_date = DATE_SUB(CURDATE(), INTERVAL FLOOR(RAND() * 365) DAY);
SET status = ELT(FLOOR(1 + RAND() * 4), 'pending', 'processing', 'shipped', 'delivered');
SET amount = ROUND(10 + RAND() * 990, 2);
INSERT INTO user_orders (user_id, product_id, order_date, status, amount)
VALUES (user_id, product_id, order_date, status, amount);
SET i = i + 1;
END WHILE;
END //
DELIMITER ;
-- 生成10万条测试数据
CALL generate_test_data(100000);
-- 性能测试:查询特定用户的订单
-- 开启慢查询日志以分析性能
SET long_query_time = 0;
SET slow_query_log = ON;
-- 测试查询性能
SELECT COUNT(*) FROM user_orders WHERE user_id = 500;
SELECT * FROM user_orders WHERE user_id = 500 ORDER BY order_date DESC LIMIT 10;
SELECT COUNT(*) FROM user_orders WHERE order_date BETWEEN '2023-01-01' AND '2023-12-31';
SELECT COUNT(*) FROM user_orders WHERE user_id = 500 AND order_date >= '2023-01-01';
-- 分析查询执行计划
EXPLAIN SELECT * FROM user_orders WHERE user_id = 500 ORDER BY order_date DESC LIMIT 10;
EXPLAIN SELECT COUNT(*) FROM user_orders WHERE user_id = 500 AND order_date >= '2023-01-01';
-- 优化索引
-- 如果发现查询性能不佳,可以添加复合索引
CREATE INDEX idx_user_date_status ON user_orders(user_id, order_date, status);用子查询和CTE解决复杂统计问题
-- 创建销售数据表
CREATE TABLE sales (
id INT AUTO_INCREMENT PRIMARY KEY,
product_id INT NOT NULL,
category VARCHAR(50),
sale_date DATE NOT NULL,
quantity INT,
unit_price DECIMAL(10,2),
salesperson_id INT
);
-- 插入测试数据
INSERT INTO sales (product_id, category, sale_date, quantity, unit_price, salesperson_id) VALUES
(1, '电子产品', '2023-01-15', 2, 2999.00, 101),
(2, '服装', '2023-01-20', 5, 299.00, 102),
(3, '电子产品', '2023-02-01', 1, 5999.00, 101),
(4, '图书', '2023-02-10', 3, 59.00, 103);
-- 使用子查询解决复杂统计问题
-- 1. 查询销售额超过平均销售额的产品
SELECT product_id, SUM(quantity * unit_price) AS total_sales
FROM sales
GROUP BY product_id
HAVING total_sales > (
SELECT AVG(product_sales)
FROM (
SELECT SUM(quantity * unit_price) AS product_sales
FROM sales
GROUP BY product_id
) AS avg_sales
);
-- 2. 查询每个类别的最佳销售员
SELECT category, salesperson_id, total_sales
FROM (
SELECT
category,
salesperson_id,
SUM(quantity * unit_price) AS total_sales,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY SUM(quantity * unit_price) DESC) as rn
FROM sales
GROUP BY category, salesperson_id
) AS ranked_sales
WHERE rn = 1;
-- 使用CTE解决复杂统计问题
-- 1. 使用CTE计算每个季度的销售统计
WITH quarterly_sales AS (
SELECT
YEAR(sale_date) AS sale_year,
QUARTER(sale_date) AS sale_quarter,
category,
SUM(quantity * unit_price) AS quarterly_total,
COUNT(*) AS order_count
FROM sales
GROUP BY YEAR(sale_date), QUARTER(sale_date), category
),
category_ranking AS (
SELECT
sale_year,
sale_quarter,
category,
quarterly_total,
order_count,
RANK() OVER (PARTITION BY sale_year, sale_quarter ORDER BY quarterly_total DESC) AS category_rank
FROM quarterly_sales
)
SELECT
sale_year,
sale_quarter,
category,
quarterly_total,
order_count,
category_rank
FROM category_ranking
WHERE category_rank <= 3
ORDER BY sale_year, sale_quarter, category_rank;
-- 2. 使用递归CTE查询组织架构中的销售业绩
-- 假设有员工表和销售员的层级关系
WITH RECURSIVE sales_hierarchy AS (
-- 基础查询:顶级销售经理
SELECT
id,
name,
manager_id,
0 AS level,
CAST(name AS CHAR(1000)) AS path,
0 AS total_sales
FROM employees
WHERE position = '销售总监' AND manager_id IS NULL
UNION ALL
-- 递归查询:下属销售员及其业绩
SELECT
e.id,
e.name,
e.manager_id,
sh.level + 1,
CONCAT(sh.path, ' -> ', e.name),
COALESCE((
SELECT SUM(s.quantity * s.unit_price)
FROM sales s
WHERE s.salesperson_id = e.id
), 0) AS total_sales
FROM employees e
JOIN sales_hierarchy sh ON e.manager_id = sh.id
WHERE e.position LIKE '%销售%'
)
SELECT
CONCAT(REPEAT(' ', level), name) AS 组织架构,
total_sales AS 总销售额
FROM sales_hierarchy
ORDER BY path;通过以上实践项目,您将深入理解MySQL的高级特性,包括子查询、CTE、索引优化、事务处理和隔离级别等重要概念,为构建高性能的数据库应用打下坚实基础。