[hive] 经典sql题及答案(二)
推荐:
题目部分
4 、编写连续7 天登录的总人数:
数据:
t1表
Uid dt login_status(1登录成功,0异常)
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 1
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1
编写sql实现
6 、编写sql 语句实现每班前三名,分数一样并列,同时求出前三名按名次排序的一次的分差:
数据:
stu表
Stu_no class score
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87
编写sql 实现,结果如下:
结果数据:
班级 stu_no score rn rn1 rn_diff
1901 1 90 1 1 90
1901 2 90 1 1 0
1901 3 83 3 1 -7
1902 7 99 1 1 99
1902 9 87 2 2 -12
1902 8 67 3 3 -20
8 、编写sql 实现行列互换:
数据如下:
编写sql 实现,得到结果如下:
9 、编写sql 实现如下:
数据:
t1表
uid tags
1 1,2,3
2 2,3
3 1,2
编写sql实现如下结果:
uid tag
1 1
1 2
1 3
2 2
2 3
3 1
3 2
10 、行转列
数据:
T1表:
Tags
1,2,3
1,2
2,3
T2表:
Id lab
1 A
2 B
3 C
根据T1和T2表的数据,编写sql实现如下结果:
ids tags
1,2,3 A,B,C
1,2 A,B
2,3 B,C
11 、行转列
数据:
t1表:
id tag flag
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8
编写sql实现如下结果:
id tag flag
a b 1|2|3
c d 6|8
16 、时间格式转换:yyyyMMdd -> yyyy-MM-dd
数据:
t1表
20171205
编写sql实现如下的结果:
2017-12-05
答案部分
4
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 1
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1
create table sql004
(
udi int,
dt string,
login_status int
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql004' into table sql004;
按用户分组,过滤掉登陆异常的记录并加一行等差数列
select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1;t1
dt与等差数列做差
select
udi,
dt,
login_status,
`rank`,
date_sub(dt,`rank`) sub
from
(select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1)t1;t2
按sub字段做聚集
select
udi,
dt,
login_status,
`rank`,
sub,
count(sub) over(partition by udi,sub) acount
from
(select
udi,
dt,
login_status,
`rank`,
date_sub(dt,`rank`) sub
from
(select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1)t1)t2;t3
得到数量大于7的记录
select
udi,
dt,
login_status
from
(select
udi,
dt,
login_status,
`rank`,
sub,
count(sub) over(partition by udi,sub) acount
from
(select
udi,
dt,
login_status,
`rank`,
date_sub(dt,`rank`) sub
from
(select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1)t1)t2)t3
where acount>6;
6
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87
create table sql006
(
stu_no int,
class int,
score int
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql006'into table sql006;
获得排名,再加一列上一名次学生的成绩
select
stu_no,
class,
score,
rank() over(partition by class order by score desc) `rank`,
lag(score,1,0) over(partition by class order by score desc) preced
from
sql006;t1
过滤出前三名的学生,做前后两名学生成绩的差
select
class,
stu_no,
score,
`rank`,
score-preced rn_diff
from
(select
stu_no,
class,
score,
rank() over(partition by class order by score desc) `rank`,
lag(score,1,0) over(partition by class order by score desc) preced
from
sql006)t1
where
`rank`<4;
8
1 001 语文 90
2 001 数学 92
3 001 英语 80
4 002 语文 88
5 002 数学 90
6 002 英语 75.5
7 003 语文 70
8 003 数学 85
9 003 英语 90
10 003 政治 82
create table sql008
(
id int,
userid string,
subject string,
score int
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql008' into table sql008;
先用if()做初步的列转行
select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008;t1
按userid做聚合,取有意义的值
select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid;t2
加一列求和
select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2;t3
再加一行total
select
'total' userid,
sum(yuwen),
sum(shuxue),
sum(yingyu),
sum(zhengzhi),
sum(total)
from
(select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2)t3;t4
将t3视图和t4视图连接起来的到最后的结果(t4加的这行total完全没意义啊)
select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2
union
select
'total' userid,
sum(yuwen),
sum(shuxue),
sum(yingyu),
sum(zhengzhi),
sum(total)
from
(select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2)t3;
9
1 1,2,3
2 2,3
3 1,2
create table sql009
(
uid int,
tags array<string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
load data local inpath '/root/in/sql009' into table sql009;
利用udtf函数explode()
select
uid,
tag
from
sql009
lateral view explode(tags)t as tag;
10
T1表:
Tags
1,2,3
1,2
2,3
T2表:
Id lab
1 A
2 B
3 C
create table sql010t1
(
tags array<string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
create table sql010t2
(
id int,
lab string
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql010t1' into table sql010t1;
load data local inpath '/root/in/sql010t2' into table sql010t2;
为sql010t1加上行标识
select
row_number() over() id,
tags
from
sql010t1;t1
将数组拆开
select
id,
tag
from
(select
row_number() over() id,
tags
from
sql010t1)t1
lateral view explode(tags)t as tag;t1
t1和sql010t2两表关联查询
select
t1.id,
tag,
lab
from
(select
id,
tag
from
(select
row_number() over() id,
tags
from
sql010t1)t1
lateral view explode(tags)t as tag)t1
join
sql010t2
on
t1.tag=sql010t2.id;t2
根据行标识聚合,获得结果
select
concat_ws(',',collect_set(tag)) ids,
concat_ws(',',collect_set(lab)) tags
from
(select
t1.id,
tag,
lab
from
(select
id,
tag
from
(select
row_number() over() id,
tags
from
sql010t1)t1
lateral view explode(tags)t as tag)t1
join
sql010t2
on
t1.tag=sql010t2.id)t2
group by
id;
11
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8
create table sql011
(
id string,
tag string,
flag string
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql011' into table sql011;
select
id,
tag,
concat_ws('|',collect_set(flag)) flag
from sql011
group by
id,tag;
16
select from_unixtime(unix_timestamp('20171205','yyyymmdd'),'yyyy-mm-dd') from dual;