欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

[hive] 经典sql题及答案(二)

程序员文章站 2024-03-15 18:23:42
...
推荐:

经典sql题及答案(一)

题目部分

4 、编写连续7 天登录的总人数:
数据:
t1表
Uid dt login_status(1登录成功,0异常)
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 1
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1

编写sql实现

6 、编写sql 语句实现每班前三名,分数一样并列,同时求出前三名按名次排序的一次的分差:
数据:
stu表
Stu_no class score
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87
编写sql 实现,结果如下:
结果数据:
班级 stu_no score rn rn1 rn_diff
1901 1 90 1 1 90
1901 2 90 1 1 0
1901 3 83 3 1 -7
1902 7 99 1 1 99
1902 9 87 2 2 -12
1902 8 67 3 3 -20

8 、编写sql 实现行列互换:
数据如下:
[hive] 经典sql题及答案(二)
编写sql 实现,得到结果如下:
[hive] 经典sql题及答案(二)

9 、编写sql 实现如下:
数据:
t1表
uid tags
1 1,2,3
2 2,3
3 1,2
编写sql实现如下结果:
uid tag
1 1
1 2
1 3
2 2
2 3
3 1
3 2

10 、行转列
数据:
T1表:
Tags
1,2,3
1,2
2,3
T2表:
Id lab
1 A
2 B
3 C
根据T1和T2表的数据,编写sql实现如下结果:
ids tags
1,2,3 A,B,C
1,2 A,B
2,3 B,C

11 、行转列
数据:
t1表:
id tag flag
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8
编写sql实现如下结果:
id tag flag
a b 1|2|3
c d 6|8

16 、时间格式转换:yyyyMMdd -> yyyy-MM-dd
数据:
t1表
20171205
编写sql实现如下的结果:
2017-12-05

答案部分

4
1	2019-07-11	1
1	2019-07-12	1
1	2019-07-13	1
1	2019-07-14	1
1	2019-07-15	1
1	2019-07-16	1
1	2019-07-17	1
1	2019-07-18	1
2	2019-07-11	1
2	2019-07-12	1
2	2019-07-13	0
2	2019-07-14	1
2	2019-07-15	1
2	2019-07-16	0
2	2019-07-17	1
2	2019-07-18	0
3	2019-07-11	1
3	2019-07-12	1
3	2019-07-13	1
3	2019-07-14	1
3	2019-07-15	1
3	2019-07-16	1
3	2019-07-17	1
3	2019-07-18	1

create table sql004
(
udi int,
dt string,
login_status int
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql004' into table sql004;

按用户分组,过滤掉登陆异常的记录并加一行等差数列
select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1;t1
dt与等差数列做差
select
udi,
dt,
login_status,
`rank`,
date_sub(dt,`rank`) sub
from
(select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1)t1;t2
按sub字段做聚集
select
udi,
dt,
login_status,
`rank`,
sub,
count(sub) over(partition by udi,sub) acount
from
(select
udi,
dt,
login_status,
`rank`,
date_sub(dt,`rank`) sub
from
(select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1)t1)t2;t3
得到数量大于7的记录
select
udi,
dt,
login_status
from
(select
udi,
dt,
login_status,
`rank`,
sub,
count(sub) over(partition by udi,sub) acount
from
(select
udi,
dt,
login_status,
`rank`,
date_sub(dt,`rank`) sub
from
(select
udi,
dt,
login_status,
row_number() over(partition by udi order by dt) `rank`
from
sql004
where login_status=1)t1)t2)t3
where acount>6;

6
1	1901	90
2	1901	90
3	1901	83
4	1901	60
5	1902	66
6	1902	23
7	1902	99
8	1902	67
9	1902	87

create table sql006
(
stu_no int,
class int,
score int
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql006'into table sql006;

获得排名,再加一列上一名次学生的成绩
select
stu_no,
class,
score,
rank() over(partition by class order by score desc) `rank`,
lag(score,1,0) over(partition by class order by score desc) preced
from
sql006;t1
过滤出前三名的学生,做前后两名学生成绩的差
select
class,
stu_no,
score,
`rank`,
score-preced rn_diff
from
(select
stu_no,
class,
score,
rank() over(partition by class order by score desc) `rank`,
lag(score,1,0) over(partition by class order by score desc) preced
from
sql006)t1
where 
`rank`<4;

8
1	001	语文	90
2	001	数学	92
3	001	英语	80
4	002	语文	88
5	002	数学	90
6	002	英语	75.5
7	003	语文	70
8	003	数学	85
9	003	英语	90
10	003	政治	82

create table sql008
(
id int,
userid string,
subject string,
score int
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql008' into table sql008;

先用if()做初步的列转行
select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008;t1
按userid做聚合,取有意义的值
select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid;t2
加一列求和
select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2;t3
再加一行total
select
'total' userid,
sum(yuwen),
sum(shuxue), 
sum(yingyu),
sum(zhengzhi),
sum(total) 
from
(select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2)t3;t4
将t3视图和t4视图连接起来的到最后的结果(t4加的这行total完全没意义啊)
select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2
union
select
'total' userid,
sum(yuwen),
sum(shuxue), 
sum(yingyu),
sum(zhengzhi),
sum(total) 
from
(select
userid,
yuwen,
shuxue,
yingyu,
zhengzhi,
yuwen+shuxue+yingyu+zhengzhi total
from
(select
userid,
max(yuwen1) yuwen,
max(shuxue1) shuxue,
max(yingyu1) yingyu,
max(zhengzhi1) zhengzhi
from
(select
userid,
if(subject='语文',score,0) yuwen1,
if(subject='数学',score,0) shuxue1,
if(subject='英语',score,0) yingyu1,
if(subject='政治',score,0) zhengzhi1
from
sql008)t1
group by
userid)t2)t3;

9
1	1,2,3
2	2,3
3	1,2

create table sql009
(
uid int,
tags array<string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
load data local inpath '/root/in/sql009' into table sql009;
利用udtf函数explode()
select
uid,
tag
from
sql009
lateral view explode(tags)t as tag;

10
T1表:
Tags
1,2,3
1,2
2,3

T2表:
Id	lab
1	A
2	B
3	C

create table sql010t1
(
tags array<string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ',';

create table sql010t2
(
id int,
lab string
)
row format delimited
fields terminated by '\t';

load data local inpath '/root/in/sql010t1' into table sql010t1;
load data local inpath '/root/in/sql010t2' into table sql010t2;

为sql010t1加上行标识
select
row_number() over() id,
tags
from
sql010t1;t1
将数组拆开
select
id,
tag
from
(select
row_number() over() id,
tags
from
sql010t1)t1
lateral view explode(tags)t as tag;t1
t1和sql010t2两表关联查询
select
t1.id,
tag,
lab
from
(select
id,
tag
from
(select
row_number() over() id,
tags
from
sql010t1)t1
lateral view explode(tags)t as tag)t1
join
sql010t2
on 
t1.tag=sql010t2.id;t2
根据行标识聚合,获得结果
select
    concat_ws(',',collect_set(tag)) ids,
    concat_ws(',',collect_set(lab)) tags
from
    (select
        t1.id,
        tag,
        lab
    from
        (select
            id,
        tag
        from
            (select
                row_number() over() id,
                tags
            from
                sql010t1)t1
        lateral view explode(tags)t as tag)t1
    join
        sql010t2
    on 
        t1.tag=sql010t2.id)t2
group by
    id;

11
a	b	2
a	b	1
a	b	3
c	d	6
c	d	8
c	d	8

create table sql011
(
id string,
tag string,
flag string
)
row format delimited
fields terminated by '\t';
load data local inpath '/root/in/sql011' into table sql011;

select
id,
tag,
concat_ws('|',collect_set(flag)) flag
from sql011
group by
id,tag;

16
select from_unixtime(unix_timestamp('20171205','yyyymmdd'),'yyyy-mm-dd') from dual;