-- 查询学生表中的所有性别 SELECTDISTINCT sgender FROM t_stu ; -- 给查询出的字段起别名 AS是可以省略的 使用空格 SELECT age AS 年龄,sname AS 姓名 FROM tb_stu; SELECT sid a,sname b,sgender gender,score c FROM t_stu;
SELECTCOUNT(employee_id) AS total_employees FROM salaries; SELECTSUM(salary) AS total_salaries FROM salaries; SELECTAVG(salary) AS average_salary FROM salaries; SELECTMAX(salary) AS highest_salary FROM salaries; SELECTMIN(salary) AS lowest_salary FROM salaries;
group by分组查询
查询出来的字段要求是group by后的字段,查询字段中可以出现组函数和分组字段
group by后面可以跟聚合函数 可以起别名
1 2 3 4 5 6
-- 查询每个部门的部门编号和每个部门的工资和 SELECT deptno,SUM(sal) FROM emp GROUPBY deptno; -- 查询每个部门的部门编号以及每个部门的人数 SELECT deptno,SUM(sal),COUNT(1) FROM emp GROUPBY deptno; -- 查询每个部门的部门编号以及每个部门员工工资大于1500的人数: SELECT deptno,COUNT(1) FROM emp WHERE sal>1500GROUPBY deptno;
按多个字段分组,后面字段一致的为一组
1 2 3 4
-- 按job进行分类 SELECTCOUNT(*),job FROM emp GROUPBY job; -- 按job和mgr进行分类 SELECTCOUNT(*),job,mgr FROM emp GROUPBY job,mgr;
having子句
where是对分组前进行过滤;having是对分组后进行过滤
where中不能出现分组/聚合函数,having中可以出现
where是比分组先执行的,having是在分组之后执行的;
having后面可以跟别名
1 2 3 4
-- 查询工资总和大于9000的部门编号以及工资和: SELECT deptno,sum(sal) FROM emp GROUPBY deptno HAVINGsum(sal)>9000; -- having中使用别名 SELECT deptno,sum(sal) 总薪资 FROM emp GROUPBY deptno HAVING 总薪资>9000;
1 2
-- 查询部门员工个数大于3的,having中使用了别名 SELECTCOUNT(1) cc,deptno FROM emp GROUPBY deptno HAVING cc>3;
SELECT department_id, department_name FROM departments d WHERE EXISTS (SELECT 1 FROM employees e WHERE e.department_id = d.department_id);
WITH
WITH 语句在 SQL 中用于定义一个或多个公用表表达式(Common Table Expressions, CTEs)。公用表表达式是一种临时结果集,它可以在主查询中引用,使得查询更具可读性和维护性。WITH 语句通常用于简化复杂的查询,尤其是在需要重复使用相同的子查询时。
1 2 3 4 5 6 7 8 9
WITH department_avg_salary AS ( SELECT department_id, AVG(salary) AS avg_salary FROM employees GROUP BY department_id ) SELECT e.employee_name, e.salary FROM employees e JOIN department_avg_salary d ON e.department_id = d.department_id WHERE e.salary > d.avg_salary;
# 单列合并,默认以逗号分隔 select a,group_concat(name) as tn from t groupby id; # 分号 SELECT GROUP_CONCAT(name ORDERBY name ASC SEPARATOR '; ') FROM employees GROUPBY department;
STD()
计算标准差
1 2 3
SELECT STD(salary) FROM employees GROUP BY department; # same as SELECT STDDEV(salary) FROM employees GROUP BY department;
VARIANCE()
计算方差
1 2 3
SELECT VARIANCE(salary) FROM employees GROUP BY department; # same as SELECT VAR_POP(salary) FROM employees GROUP BY department;
SELECT DATE_ADD('2024-08-02', INTERVAL 10 DAY) AS new_date; -- 示例结果: '2024-08-12'
DATE_SUB
对指定的日期减去一个时间间隔。
1 2 3 4
DATE_SUB(date, INTERVAL value unit)
SELECT DATE_SUB('2024-08-02', INTERVAL 1 MONTH) AS new_date; -- 示例结果: '2024-07-02'
DATEDIFF
计算两个日期之间的天数差。
1 2 3 4
DATEDIFF(date1, date2)
SELECT DATEDIFF('2024-08-02', '2024-07-01') AS days_diff; -- 示例结果: 32
TIMESTAMPDIFF(unit, datetime1, datetime2)
计算两个日期或时间之间的差异,单位可以是秒、分钟、小时、天等。
1 2 3 4
TIMESTAMPDIFF(unit, datetime1, datetime2)
SELECT TIMESTAMPDIFF(DAY, '2024-07-01', '2024-08-02') AS days_diff; -- 示例结果: 32
单位以指定的interval为准,常用可选:
SECOND 秒,
MINUTE 分钟(返回秒数差除以60的整数部分)
HOUR 小时(返回秒数差除以3600的整数部分)
DAY 天数(返回秒数差除以3600*24的整数部分)
MONTH 月数
YEAR 年数
TIMESTAMPADD(unit, value, datetime)
1 2 3 4
TIMESTAMPADD(unit, value, datetime)
SELECT TIMESTAMPADD(MONTH, 2, '2024-08-02') AS new_date; -- 示例结果: '2024-10-02'
窗口函数
MySQL 8.0 引入了窗口函数
语法
1 2 3 4
[你要的操作] OVER ( PARTITIONBY<用于分组的列名> ORDERBY<按序叠加的列名> ROWS|RANGE<窗口滑动的数据范围> )
<窗口滑动的数据范围> 用来限定 [你要的操作] 所运用的数据的范围,具体有如下这些:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
当前 - current row 之前的 - preceding 之后的 - following 无界限 - unbounded 表示从前面的起点 - unbounded preceding 表示到后面的终点 - unbounded following
举例 取当前行和前五行:ROWS between 5 preceding and current row --共6行 取当前行和后五行:ROWS between current row and 5 following --共6行 取前五行和后五行:ROWS between 5 preceding and 5 following --共11行 取当前行和前六行:ROWS 6 preceding(等价于between...and current row) --共7行 这一天和前面6天:RANGE between interval 6 day preceding and current row --共7天 这一天和前面6天:RANGE interval 6 day preceding(等价于between...and current row) --共7天 字段值落在当前值-100到+200的区间:RANGE between 100 preceding and 200 following --共301个数值
CREATEPROCEDURE loop_example() BEGIN DECLARE counter INTDEFAULT0; loop_start: LOOP SET counter = counter +1; IF counter >5THEN LEAVE loop_start; END IF; SELECT counter; END LOOP; END//
DELIMITER ;
REPEAT
在存储过程中创建循环,直到条件为真才退出循环。
1 2 3 4 5 6 7 8 9 10 11 12 13
DELIMITER //
CREATEPROCEDURE repeat_example() BEGIN DECLARE counter INTDEFAULT0; REPEAT SET counter = counter +1; SELECT counter; UNTIL counter >5 END REPEAT; END//
DELIMITER ;
WHILE
在存储过程中创建循环,条件为真时执行。
1 2 3 4 5 6 7 8 9 10 11 12
DELIMITER //
CREATEPROCEDURE while_example() BEGIN DECLARE counter INTDEFAULT0; WHILE counter <=5 DO SET counter = counter +1; SELECT counter; END WHILE; END//
DELIMITER ;
LEAVE
在存储过程或循环中用于退出循环或块。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
DELIMITER //
CREATEPROCEDURE leave_example() BEGIN DECLARE counter INTDEFAULT0; loop_start: LOOP SET counter = counter +1; IF counter >5THEN LEAVE loop_start; END IF; SELECT counter; END LOOP; END//
DELIMITER ;
ITERATE
在存储过程中的循环中跳过当前迭代,继续下一个迭代。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
DELIMITER //
CREATEPROCEDURE iterate_example() BEGIN DECLARE counter INTDEFAULT0; loop_start: LOOP SET counter = counter +1; IF counter =3THEN ITERATE loop_start; END IF; SELECT counter; IF counter >=5THEN LEAVE loop_start; END IF; END LOOP; END//
DELIMITER ;
拓展
聚合函数里使用表达式
1
avg(a2.timestamp -a1.timestamp )
AVG括弧里直接加条件,过滤掉一些数据
1 2
avg(rating<3) # 只计算评分小于三的均值
1
SELECT product_id, SUM(quantity * price) AS total_sales FROM sales GROUPBY product_id;
条件计数
常见的三形式
1
1
count(if(c.action='confirmed',1,null))
当然,几乎所有使用if的地方可以替换为case end 表达式。
2
1 2 3 4
sum(if(c.action='confirmed',1,0)) #更为简洁的一种形式 SELECTSUM(status ='confirmed') AS confirmed_count FROM orders; # 在 MySQL 中,布尔表达式会返回 1(表示 TRUE)或 0(表示 FALSE)
//查询不以a,w,j开头的学生信息 select id,name from t where name regexp ‘^[^awj]’
LeetCode练习题
高频 SQL 50 题
查询
可回收且低脂的产品
1 2 3
select product_id from Products where low_fats ='Y'and recyclable ='Y';
寻找用户推荐人
1 2 3
SELECT name FROM Customer WHERE ifnull(referee_id,1) !=2;
大的国家
1 2 3
select name, population, area from World where area>=3000000or population>=25000000
文章浏览 I
1 2 3
selectdistinct author_id id from Views where author_id = viewer_id orderby author_id
无效的推文
1 2 3
select tweet_id from Tweets where length(content) >15
连接
使用唯一标识码替换员工ID
1 2 3
select e1.unique_id,e.name from Employees e leftjoin EmployeeUNI e1 on e.id= e1.id
产品销售分析 I
1 2 3
select p.product_name,s.year,s.price from Sales s leftjoin Product p on s.product_id = p.product_id;
进店却未进行过交易的顾客
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# 子查询 select customer_id, count(customer_id) as count_no_trans from visits where visit_id notin (select visit_id from transactions) groupby customer_id;
# 也可以使用notexists select customer_id, count(customer_id) as count_no_trans from visits v wherenotexists (select1from transactions t where v.visit_id = t.visit_id) groupby customer_id;
# 也可以使用leftjoin select customer_id, count(customer_id) count_no_trans from visits v leftjoin transactions t on v.visit_id = t.visit_id where transaction_id isnull groupby customer_id;
上升的温度
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# 使用date_sub函数,date_add也可以 select w1.id from weather w1 leftjoin weather w2 on DATE_SUB(w1.recordDate, INTERVAL1DAY) =w2.recordDate where w1.temperature>w2.temperature
# 使用datediff函数 select w2.id from Weather w1 join Weather w2 on datediff(w2.recordDate,w1.recordDate) =1 where w1.temperature < w2.temperature;
# Exists select w1.id from Weather w1 where Exists(select1from weather w2 where date_sub(w1.recordDate, interval1day) = w2.recordDate and w1.temperature>w2.temperature)
每台机器的进程平均运行时间
1 2 3 4 5 6
select a.machine_id,ROUND(SUM(b.timestamp-a.timestamp)/count(*),3) as processing_time from Activity a join Activity b on a.machine_id = b.machine_id and a.process_id = b.process_id and a.activity_type ='start'and b.activity_type='end' groupby a.machine_id # 此题Exists大概不可行
员工奖金
1 2 3 4 5
# join select name ,bonus from employee e leftjoin bonus b on e.empId = b.empId where bonus<1000or bonus isnull
# join select st.student_id, st.student_name,su.subject_name ,count(e.subject_name) as attended_exams from Students st crossjoin Subjects su leftjoin Examinations e on st.student_id = e.student_id and su.subject_name = e.subject_name groupby st.student_id, su.subject_name orderby st.student_id ,su.subject_name
# with语句 WITH student_with_sbjects AS ( SELECT s1.student_id, s1.student_name, s2.subject_name FROM Students s1 CROSSJOIN Subjects s2 ), exam_count AS ( SELECT student_id,subject_name,COUNT(*) as attended_exams FROM Examinations GROUPBY student_id,subject_name ) SELECT s.student_id,s.student_name,s.subject_name,IFNULL(e.attended_exams, 0) AS attended_exams FROM student_with_sbjects s LEFTJOIN exam_count e ON s.student_id = e.student_id AND s.subject_name = e.subject_name ORDERBY s.student_id,s.subject_name
至少有5名直接下属的经理
1 2 3 4 5 6 7 8
#join select e1.name from Employee e1 join Employee e2 on e1.id =e2.managerId groupby e2.managerId havingcount(*) >=5
# 使用exists会很慢 select e1.name from Employee e1 whereexists (select1from Employee e2 where e1.id = e2.managerId groupby e2.managerId havingcount(*)>=5)
确认率
1 2 3 4 5 6
select s.user_id, round(ifNull(sum(c.action ="confirmed"),0)/count(*),2) as confirmation_rate from Signups s leftjoin Confirmations c on s.user_id = c.user_id groupby s.user_id
聚合函数
有趣的电影
1 2 3 4
select* from cinema where id%2=1and description !='boring' orderby rating desc
平均售价
1 2 3 4 5 6 7
select Prices.product_id, ifnull(ROUND(SUM(price*units)/SUM(units),2),0) as average_price from UnitsSold rightjoin Prices on Prices.product_id = UnitsSold.product_id and purchase_date between start_date and end_date groupby Prices.product_id
项目员工 I
1 2 3 4 5
select p.project_id, Round(avg(e.experience_years),2) as average_years from Project p leftjoin Employee e on p.employee_id=e.employee_id groupby p.project_id
各赛事的用户注册率
1 2 3 4
select contest_id, ROUND((count(*)/(selectcount(*) from Users))*100,2) as percentage from Register groupby contest_id orderby percentage desc,contest_id asc
查询结果的质量和占比
1 2 3 4 5 6
select query_name, ROUND(avg(rating/position),2) as quality, ROUND(sum(casewhen rating<3then1else0end) /count(result) *100,2) as poor_query_percentage from Queries groupby query_name having query_name isnotnull
每月交易 I
1 2 3 4 5 6 7
select date_format(trans_date,'%Y-%m') asmonth,country, count(*) as trans_count, sum(if(state='approved',1,0)) as approved_count, sum(amount) as trans_total_amount, sum(if (state='approved',amount,0)) as approved_total_amount from Transactions groupbymonth,country
即时食物配送 II
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# 窗口函数+子查询 select round((sum(if(order_date=customer_pref_delivery_date, 1, 0))/count(*))*100, 2) as immediate_percentage from ( select*, row_number() over(partitionby customer_id orderby order_date) as first_order from Delivery ) A where A.first_order =1
# join select round(100*sum(casewhen d1.order_date=d1.customer_pref_delivery_date and d1.order_date = d2.first_order then1else0end) /count(distinct d1.customer_id),2) as immediate_percentage from Delivery d1 leftjoin (select customer_id, min(order_date) as first_order from Delivery groupby customer_id) d2 on d1.customer_id =d2.customer_id
# join select round(sum(casewhen a1.event_date = DATE_ADD(a2.first_login, interval1day) then1else0end)/count(distinct a1.player_id),2) as fraction from Activity a1 leftjoin (select player_id, min(event_date) AS first_login from Activity groupby player_id) a2 on a1.player_id = a2.player_id
# 在连接的时候筛选 SELECT ROUND(COUNT(a.player_id) /COUNT(m.player_id),2) AS fraction FROM (SELECT player_id, MIN(event_date) AS'firstD'from Activity Groupby player_id) m LEFTJOIN Activity a ON m.player_id = a.player_id ANDdate(m.firstD) = SUBDATE(date(a.event_date), interval1day)
# 使用 with 搭配 窗口函数
with FirstLogin as ( select player_id, event_date, min(event_date) over (partitionby player_id) as first_login, lead(event_date) over (partitionby player_id orderby event_date) as next_login from Activity ) select round(sum(casewhen next_login = date_add(first_login, interval1day) then1else0end) /count(distinct player_id), 2) as fraction from FirstLogin where event_date = first_login;
排序和分组
每位教师所教授的科目种类的数量
1 2 3
select teacher_id,count(distinct subject_id) as cnt from Teacher group by teacher_id
查询近30天活跃用户数
1 2
select activity_date asday ,count(distinct user_id) as active_users from Activity where activity_date between "2019-06-28" and "2019-07-27" groupby activity_date
销售分析III
1 2 3 4 5
select product_id ,product_name from Product where product_id in (selectdistinct product_id from Sales groupby product_id havingcount(if(sale_date notbetween "2019-01-01" and "2019-03-31",1,null))=0 )
超过5名学生的课
1 2 3
select class from Courses groupby class havingcount(student)>=5
求关注者的数量
1 2 3 4
select user_id, count(distinct follower_id) as followers_count from Followers groupby user_id
只出现一次的最大数字
1 2 3 4 5
select ifnull((select num from MyNumbers groupby num havingcount(num) =1 orderby num desc limit 1),null) as num
买下所有产品的客户
1 2 3
select customer_id from Customer groupby customer_id havingcount(distinct product_key) = (selectcount(*) from Product)
高级查询和连接
每位经理的下属员工数量
1 2 3 4 5 6 7 8 9 10
select e1.employee_id, e1.name, count(e2.reports_to) as reports_count, round(avg(e2.age)) as average_age from Employees e1 leftjoin Employees e2 on e1.employee_id = e2.reports_to groupby e1.employee_id having reports_count >0 orderby e1.employee_id
员工的直属部门
1 2 3 4 5 6 7 8 9
select employee_id,department_id from Employee where primary_flag = "Y" groupby employee_id union select employee_id,department_id from Employee groupby employee_id havingcount(department_id) =1
判断三角形
1 2 3
select x,y,z , if(x+y>z and y+z>x and x+z>y,"Yes","No") as triangle from Triangle
连续出现的数字
1 2 3 4 5
SELECTdistinct a.num as ConsecutiveNums from Logs as a leftjoin Logs as b on a.id = b.id-1 leftjoin Logs as c on a.id = c.id-2 where a.num = b.num and b.num = c.num
# 注意这种写法是错误的,因为case并不能起到组函数的作用,它会逐一检查每一行,可能会记录覆盖。 WITH prices AS ( SELECT product_id, new_price, change_date, ROW_NUMBER() OVER (PARTITIONBY product_id ORDERBY change_date DESC) AS ranking FROM Products ) SELECT p.product_id, CASE WHEN p.change_date >'2019-08-16'THEN10 ELSE ( SELECT pr.new_price FROM prices pr WHERE pr.product_id = p.product_id AND pr.ranking =1 ) ENDAS price FROM Products p GROUPBY p.product_id
# 正确 select product_id,new_price as price from (select*,row_number() over (partitionby product_id orderby change_date desc) as ranking from Products where change_date<="2019-08-16") as P where ranking =1 union select product_id,10from Products where product_id notin (select product_id from Products where change_date<="2019-08-16")
最后一个能进入巴士的人
1 2 3 4
select person_name from (select*,sum(weight) over (orderby turn) as total_weight from Queue) q where q.total_weight<=1000orderby q.total_weight desc limit 1
按分类统计薪水
1 2 3 4 5
select "Low Salary" as category,sum(income<20000) as accounts_count from Accounts unionall select "Average Salary" as category,sum(income>=20000and income<=50000) as accounts_count from Accounts unionall select "High Salary" as category,sum(income>50000) as accounts_count from Accounts
子查询
上级经理已离职的公司员工
1 2 3 4 5
select employee_id from Employees where salary <30000 and manager_id notin (select employee_id from Employees) orderby employee_id
select s1.id,ifnull(s2.student,s1.student) as student from Seat s1 leftjoin (select id ,student,if(id%2=0,id-1,id+1) as change_id from Seat ) s2 on s1.id = s2.change_id
# 窗口函数 SELECT id, CASE WHEN id %2=0THEN prev_student WHEN id %2=1THEN next_student ENDAS student FROM ( SELECT id, student, LAG(student, 1, student) OVER (ORDERBY id) AS prev_student, LEAD(student, 1, student) OVER (ORDERBY id) AS next_student FROM Seat ) AS subquery;
# 三次自连接 SELECT s1.id, CASE WHEN s1.id %2=1and s2.student isnotnullTHEN s2.student WHEN s1.id %2=1and s2.student isnullTHEN s1.student ELSE s3.student END student FROM Seat s1 LEFTJOIN Seat s2 ON s1.id = s2.id -1 LEFTJOIN Seat s3 ON s1.id = s3.id +1
电影评分
1 2 3 4 5 6 7 8
(select u.name as results from MovieRating mr leftjoin Users u on mr.user_id=u.user_id groupby mr.user_id orderbycount(mr.movie_id) desc ,u.name limit 1) unionall (select m.title as results from MovieRating mr leftjoin Movies m on mr.movie_id = m.movie_id wheremonth(created_at) =2andyear(created_at) =2020 groupby mr.movie_id orderbyAVG(mr.rating) desc,m.title limit 1)
2016年的投资
1 2 3
select Round(sum(tiv_2016),2) as tiv_2016 from Insurance where pid in (select pid from Insurance groupby lat,lon havingcount(pid) =1) and pid notin (select pid from Insurance groupby tiv_2015 havingcount(pid) =1)
部门工资前三高的所有员工
1 2 3 4 5 6 7
select Department,Employee,Salary from ( select d.name as Department,e.name as Employee,e.salary as Salary ,DENSE_RANK() over (partitionby d.id orderby salary desc) as order_salary from Employee e leftjoin department d on e.departmentId = d.id ) p where order_salary<=3
字符串函数 / 正则表达式
修复表中的名字
1 2 3
select user_id ,concat(upper(substr(name,1,1)),lower(substr(name,2))) as name from Users orderby user_id
患某种疾病的患者
1 2 3 4 5
select*from Patients where conditions regexp '\\bDIAB1.*'
deletefrom Person where id notin ( select a.id from (selectmin(id) as id from Person groupby email) as a )
第二高的薪水
1 2 3
select ( selectdistinct salary from employee orderby salary desc limit 1,1 ) as SecondHighestSalary;
按日期分组销售产品
1 2 3 4 5 6
select sell_date, count(distinct product) as num_sold, group_concat(distinct product orderby product separator ",") as products from Activities groupby sell_date orderby sell_date
列出指定时间段内所有的下单产品
1 2 3 4 5 6 7
select p.product_name, sum(o.unit) as unit from Orders o leftjoin Products p on o.product_id = p.product_id whereMonth(order_date) =2andyear(order_date) =2020 groupby p.product_id having unit>=100