欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

进阶SQL总结

程序员文章站 2022-07-12 10:28:27
...

零、引入

1、mysql8.0的使用

注意:时区问题

2、zeppelin的配置以及使用

3、创建表的sql文件,我没有上传,需要的可以私聊

一、行转列(if、case的使用)

1、sql的if语法:

if(表达式,if code,else code)
cname,语文,数学,物理
张三,74,0,0
张三,0,83,0

2、sql的case…end语法:

类似java中的switch case default
case 列名
when “常量值” then “结果1”
when “常量值” then “结果2”
when “常量值” then “结果3”
else “结果4”
end

进阶SQL总结

SELECT cname AS '姓名',
	MAX(CASE cource WHEN '语文' THEN score ELSE 0 END) AS '语文', 
	MAX(CASE cource WHEN '数学' THEN score ELSE 0 END) AS '数学',
	MAX(CASE cource WHEN '物理' THEN score ELSE 0 END) AS '物理' 
FROM students_score 
GROUP BY cname;

SELECT cname AS '姓名',
	MAX(CASE cource WHEN '语文' THEN score ELSE 0 END) AS '语文', 
	SUM(IF(cource='数学',score,0)) AS '数学',
	MAX(IF(cource='物理',score,0)) AS '物理'
FROM students_score 
GROUP BY cname;


SELECT cname AS '姓名',
	SUM(IF(cource='语文',score,0)) AS '语文',
	SUM(IF(cource='数学',score,0)) AS '数学',
	SUM(IF(cource='物理',score,0)) AS '物理'
FROM students_score
GROUP BY cname;

SELECT cname AS '姓名',
	MAX(IF(cource='语文',score,0)) AS '语文',
	MAX(IF(cource='数学',score,0)) AS '数学',
	MAX(IF(cource='物理',score,0)) AS '物理'
FROM students_score
GROUP BY cname;

SELECT cname AS '姓名',
	MAX(IF(cource='语文',score,0)) AS '语文',
	SUM(IF(cource='数学',score,0)) AS '数学',
	MAX(IF(cource='物理',score,0)) AS '物理'
FROM students_score
GROUP BY cname;

二、客户信息分析练习

主要使用聚合函数count(),不多解释

#1).最受欢迎的信用卡
SELECT credit_type AS '最受欢迎信用卡', COUNT(credit_type) AS '使用数量' 
FROM customer_details 
GROUP BY credit_type
ORDER BY COUNT(credit_type) DESC;


SELECT credit_type, COUNT(DISTINCT credit_no) AS credit_cnt
FROM customer_details
GROUP BY credit_type
ORDER BY credit_cnt DESC
LIMIT 0,5;

#2).前 5 个最多的客户职业
SELECT job AS '职业', COUNT(job) AS job_cnt 
FROM customer_details 
GROUP BY job 
ORDER BY job_cnt DESC
LIMIT 0,5;

#3).前三个美国女性持有的最流行的行用卡
SELECT credit_type AS '女性最受欢迎信用卡', COUNT(credit_type) AS '使用数量' 
FROM customer_details 
WHERE gender='Female' AND country='United States'
GROUP BY credit_type 
ORDER BY COUNT(credit_type) DESC
LIMIT 0,3;

#4).按性别和国家进行客户统计
SELECT gender AS '性别', country AS '国家', COUNT(customer_id) AS '客户人数'
FROM customer_details
GROUP BY gender, country;

三、交易分析练习

涉及较为全面:

  1. 聚合函数的使用
    • sum()、count()、max()、avg()、min()、round()等等
  2. 日期格式转换
    • DATE_FORMAT(DATE,’%y’)等等
  3. 日期拼接,不使用聚合函数,显示季度
    • CONCAT(DATE_FORMAT(DATE,’%y’),’-’,CEIL(DATE_FORMAT(DATE,’%m’)/3.0))
  4. 通过case进行时间段计算
  5. 多表联查
  6. 分组
  7. 排序
  8. 等等
#1).按月度统计总收益
SELECT SUM(price) AS month_revenue, DATE_FORMAT(DATE,'%Y-%m') AS pruchase_month
FROM transactions
GROUP BY pruchase_month;

SELECT SUM(price) AS month_revenue, DATE_FORMAT(DATE,'%Y-%m') AS `year_month`
FROM transactions
GROUP BY `year_month`;

#2).按季度统计总收益
SELECT year_quarter, SUM(price) 
FROM (
	SELECT price, CONCAT(DATE_FORMAT(DATE,'%y'),'-',CEIL(DATE_FORMAT(DATE,'%m')/3.0)) AS year_quarter
	FROM transactions
) base
GROUP BY year_quarter;

SELECT year_quarter, SUM(price) 
FROM (
	SELECT price, CONCAT(DATE_FORMAT(DATE,'%y'),'-',QUARTER(DATE)) AS year_quarter
	FROM transactions
) base
GROUP BY year_quarter;

#3).按年统计总收益
SELECT SUM(price) AS year_revenue, DATE_FORMAT(DATE,'%Y') AS YEAR
FROM transactions
GROUP BY YEAR;

#4).统计每周各天的总收益
SELECT ROUND(SUM(price),2) AS total_price,
	CASE
		WHEN DATE_FORMAT(DATE,'%w')='0' THEN 'Monday'
		WHEN DATE_FORMAT(DATE,'%w')='1' THEN 'Tuesday'
		WHEN DATE_FORMAT(DATE,'%w')='2' THEN 'Wednesday'
		WHEN DATE_FORMAT(DATE,'%w')='3' THEN 'Thursday'
		WHEN DATE_FORMAT(DATE,'%w')='4' THEN 'Firday'
		WHEN DATE_FORMAT(DATE,'%w')='5' THEN 'Saturday'
		WHEN DATE_FORMAT(DATE,'%w')='6' THEN 'Sunday'
	END AS week_day
FROM transactions
GROUP BY week_day;
 
#5)按时间段统计平均收益和总收益
SELECT SUM(price) AS time_total_revenue, AVG(price) AS time_avg_revenue,
	CASE
		WHEN time_in_hrs>5 AND time_in_hrs<=8 THEN 'early morning'
		WHEN time_in_hrs>8 AND time_in_hrs<=11 THEN 'morning'
		WHEN time_in_hrs>11 AND time_in_hrs<=13 THEN 'noon'
		WHEN time_in_hrs>13 AND time_in_hrs<=18 THEN 'afternoon'
		WHEN time_in_hrs>18 AND time_in_hrs<=22 THEN 'evening'
		ELSE 'night'
	END AS time_slot
FROM(
	SELECT TIME, 
		price, 
		(CAST(SUBSTRING_INDEX(TIME,':',1) AS DECIMAL(4,2)) + CAST(SUBSTRING_INDEX(TIME,':',2) AS DECIMAL(4,2))/60) AS time_in_hrs
	FROM transactions
) base
GROUP BY time_slot;

#6).统计消费次数排行前 10 位的客户
SELECT cd.first_name AS cust_name,
	COUNT(DISTINCT ts.transaction_id) AS trans_count 
FROM transactions AS ts 
INNER JOIN customer_details AS cd 
ON ts.customer_id = cd.customer_id 
GROUP BY ts.customer_id 
ORDER BY trans_count DESC 
LIMIT 10;

SELECT cd.first_name AS cust_name, COUNT(DISTINCT ts.transaction_id) AS trans_count
FROM transactions ts
INNER JOIN customer_details cd
ON ts.customer_id = cd.customer_id
GROUP BY ts.customer_id
ORDER BY trans_count DESC
LIMIT 10;

#7).统计消费额前10位客户
SELECT cust_name, ROUND(SUM(price),2) AS spend_total
FROM(
	SELECT DISTINCT ts.transaction_id,
		cd.first_name AS cust_name,
		ts.price,
		ts.customer_id
	FROM transactions ts
	INNER JOIN customer_details cd 
	ON ts.`customer_id` = cd.`customer_id`
) base
GROUP BY customer_id
ORDER BY spend_total DESC
LIMIT 10;

#8).统计每年度、季度总客户数
SELECT YEAR,year_quarter,COUNT(DISTINCT customer_id) AS total_count
FROM(
	SELECT customer_id,
		DATE_FORMAT(DATE,'%Y') AS YEAR,
		CONCAT(DATE_FORMAT(DATE,'%y'),'-',CEIL(DATE_FORMAT(DATE,'%m')/3.0)) AS year_quarter
	FROM transactions
) base
GROUP BY YEAR,year_quarter
ORDER BY year_quarter DESC;

#9).找出平均消费额最大的客户
SELECT cd.first_name AS cust_name,MAX(avg_price) AS avg_spend
FROM(
	SELECT customer_id,
		ROUND(AVG(price),2) AS avg_price
	FROM transactions
	GROUP BY customer_id
) base
INNER JOIN customer_details cd
ON cd.`customer_id`=base.customer_id;

SELECT customer_id, AVG(price) AS avg_price
FROM transactions
GROUP BY customer_id
ORDER BY avg_price DESC
LIMIT 1;



#10).统计最受欢迎的产品(分别从购买客户数量、购买频次、消费额三个维度分析)
#a、购买顾客数量
SELECT product,COUNT(customer_id) AS customer_count
FROM transactions
GROUP BY product
ORDER BY customer_count DESC
LIMIT 10;

#b、购买频次
SELECT product,COUNT(DISTINCT transaction_id) AS buy_count
FROM transactions
GROUP BY product
ORDER BY buy_count DESC
LIMIT 10;

#c、消费额
SELECT product,SUM(price) AS sum_price
FROM transactions
GROUP BY product
ORDER BY sum_price DESC
LIMIT 10;

四、门店分析练习

涉及内容同上较全,不多说了

#1).按客流量找出最受欢迎的门店
SELECT sd.store_name,total_cust
FROM(
	SELECT store_id,
		COUNT(DISTINCT customer_id) AS total_cust
	FROM transactions
	GROUP BY store_id
)ts
INNER JOIN store_details sd 
ON ts.store_id = sd.store_id
ORDER BY total_cust DESC;

#2).按客户消费额找出最受欢迎的门店
SELECT sd.store_name,store_total_price
FROM(
	SELECT store_id,
		ROUND(SUM(price),2) AS store_total_price
	FROM transactions
	GROUP BY store_id
)ts
INNER JOIN store_details sd
ON sd.`store_id`=ts.store_id
ORDER BY store_total_price DESC;

#3).按交易频次找出最受欢迎的门店
SELECT sd.store_name,total_trans
FROM(
	SELECT store_id,
		COUNT(DISTINCT transaction_id) AS total_trans 
	FROM transactions
	GROUP BY store_id
)ts
INNER JOIN store_details sd
ON sd.`store_id`=ts.store_id
ORDER BY total_trans DESC;

#4).按客流量找出每个门店最受欢迎的商品
SELECT store_name,product,max_cust
FROM(
	SELECT store_id, 
		product, 
		MAX(cust_count) AS max_cust
	FROM(
		SELECT store_id, 
			product,
			COUNT(DISTINCT customer_id) AS cust_count
		FROM transactions
		GROUP BY store_id,product
	)temp
	GROUP BY store_id
)base
INNER JOIN store_details sd 
ON base.store_id = sd.`store_id`;

SELECT store_name,product,MAX(max_cust)
FROM(
	SELECT store_id,product,COUNT(DISTINCT customer_id) AS max_cust
	FROM transactions
	GROUP BY store_id,product
)ts
INNER JOIN store_details sd
ON ts.store_id=sd.`store_id`
GROUP BY ts.store_id

#5).统计每个门店客流量与雇员的比率
SELECT sd.store_name, base.visit_count, sd.employee_number, ROUND(base.visit_count/sd.employee_number,2) AS ratio
FROM(
	SELECT store_id,
		COUNT(DISTINCT customer_id) AS visit_count
	FROM transactions
	GROUP BY store_id	
)base
INNER JOIN store_details sd
ON base.store_id=sd.`store_id`;

#6).按年度-月份统计每家门店的收益
SELECT store_name, per_yearmonth, total_price
FROM(
	SELECT store_id,
		DATE_FORMAT(DATE,'%Y-%m') AS per_yearmonth,
		ROUND(SUM(price),2) AS total_price
	FROM transactions
	GROUP BY store_id,per_yearmonth
)base
INNER JOIN store_details sd
ON sd.`store_id`=base.store_id;

#7).找出每家门店最繁忙的时刻
SELECT store_name,time_slot,total_transaction 
FROM (
	SELECT store_id,
		COUNT(transaction_id) AS total_transaction,
		CASE 
			WHEN time_times > 5 AND time_times <= 8 THEN 'early morning'
			WHEN time_times > 8 AND time_times <= 11 THEN 'morning'
			WHEN time_times > 11 AND time_times <= 13 THEN 'noon'
			WHEN time_times > 13 AND time_times <= 18 THEN 'afternoon'
			WHEN time_times > 18 AND time_times <= 22 THEN 'evening'
			ELSE 'night'
		END AS time_slot
	FROM (
		SELECT store_id,
			TIME,
			transaction_id,
			(CAST(SUBSTRING_INDEX(TIME,':',1) AS DECIMAL(4,2)) + CAST(SUBSTRING_INDEX(TIME,':',-1) AS DECIMAL(4,2))/60) AS time_times
		FROM transactions
	) tmp
	GROUP BY time_slot,store_id
	ORDER BY total_transaction DESC
)base 
JOIN store_details sd 
ON sd.store_id = base.store_id;

五、窗口函数(mysql8.0新特性)

1、窗口函数定义

进阶SQL总结
进阶SQL总结
进阶SQL总结
进阶SQL总结
进阶SQL总结

2、窗口函数分析

进阶SQL总结

3、窗口函数练习

#窗口函数排序

SELECT NAME,dept_num,salary,
	row_number() over(PARTITION BY dept_num ORDER BY salary DESC) AS row_mc,
	rank() over(PARTITION BY dept_num ORDER BY salary DESC) AS rank_mc,
	dense_rank() over(PARTITION BY dept_num ORDER BY salary DESC) AS dense_mc
FROM employee_contract


#2-1、聚合函数 (统计员工姓名、所在部门编号、薪水、所在部门的人数、部门总薪水、最高薪水、最低薪水、平均薪水)-1
SELECT NAME,dept_num,salary,
	COUNT(*) over(PARTITION BY dept_num) AS row_counts,
	SUM(salary) over(PARTITION BY dept_num) AS row_sum,
	MAX(salary) over(PARTITION BY dept_num) AS row_max,
	MIN(salary) over(PARTITION BY dept_num) AS row_min,
	AVG(salary) over(PARTITION BY dept_num) AS row_avg
FROM employee_contract;

#sum
SELECT NAME,dept_num,salary,
   SUM(salary) over(PARTITION BY dept_num) AS totalDeptSum,
   SUM(salary) over(ORDER BY dept_num) AS runningTotalDeptSum1,
   SUM(salary) over(ORDER BY dept_num,NAME ) AS runningTotalDeptSum2,
   SUM(salary) over(PARTITION BY dept_num ORDER BY NAME) AS runningTotalDeptSum3
FROM employee_contract
	
#2-3、练习题1:统计最近两年各季度的累加利润
SELECT YEAR(order_time), QUARTER(order_time), order_money,
	SUM(order_money) over(ORDER BY QUARTER(order_time)) AS twoyear_quarter_money
FROM orderinfo;

#2-3、练习题2:统计最近两年各年各季度的累加利润
SELECT YEAR(order_time), QUARTER(order_time), order_money,
	SUM(order_money) over(PARTITION BY YEAR(order_time) ORDER BY QUARTER(order_time)) AS oneyear_quarter_monery
FROM orderinfo;

#2-3、练习题3:统计最近两年各月的累加利润	
SELECT YEAR(order_time), QUARTER(order_time), MONTH(order_time), order_money,
	SUM(order_money) over(PARTITION BY YEAR(order_time) ORDER BY MONTH(order_time)) AS month_money
FROM orderinfo
WHERE YEAR(order_time)+3>YEAR(NOW());
	
#1、统计order_items表中销量最多的前10个商品
SELECT order_item_product_id AS id, SUM(order_item_quantity) AS total_count
FROM order_items
GROUP BY id
ORDER BY total_count DESC
LIMIT 10;

#select order_item_product_id ,
#	sum(order_item_quantity) over(partition by order_item_product_id order by order_item_quantity desc) as total_count
#from order_items;

#2、根据商品子类id大小对每个商品大类下的子类进行排名
SELECT *,
	row_number() over(PARTITION BY category_department_id ORDER BY category_id) AS mc
FROM categories;

#3、统计order_items表中各订单中不同商品总数、订单总金额、订单最高/最低/平均金额
SELECT order_item_order_id,
	COUNT(*) over(PARTITION BY order_item_order_id) AS total_count,
	SUM(order_item_subtotal) over(PARTITION BY order_item_order_id) AS sum_price,
	MAX(order_item_subtotal) over(PARTITION BY order_item_order_id) AS max_price,
	MIN(order_item_subtotal) over(PARTITION BY order_item_order_id) AS min_price,
	AVG(order_item_subtotal) over(PARTITION BY order_item_order_id) AS avg_prive
FROM order_items;
相关标签: SQL