[hive] 经典sql题及答案(三)
推荐:
题目部分
22 、使用hive 求出两个数据集的差集?
数据
t1表:
id name
1 zs
2 ls
t2表:
id name
1 zs
3 ww
结果如下:
id name
2 ls
3 ww
23
25 、每个用户连续登陆的最大天数?
数据:
login表
uid,date
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
结果如下:
uid cnt_days
1 3
2 2
3 1
4 3
32 、有如下三张表:
表A(登录表):
ds user_id
2019-08-06 1
2019-08-06 2
2019-08-06 3
2019-08-06 4
表B(阅读表):
ds user_id read_num
2019-08-06 1 2
2019-08-06 2 3
2019-08-06 3 6
表C(付费表):
ds user_id price
2019-08-06 1 55.6
2019-08-06 2 55.8
基于上述三张表,请使用hive的hql语句实现如下需求:
(1)、用户登录并且当天有个阅读的用户数,已经阅读书籍数量
(2)、用户登录并且阅读,但是没有付费的用户数
(3)、用户登录并且付费,付费用户书籍和金额
37 数据如下:
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,wangwu,化学,70,2017
4,zhangsan,语文,80,2015
5,zhangsan,化学,90,2015
6,lisi,语文,70,2015
答案部分
22
create table sql022t1
(
id string,
name string
)
row format delimited
fields terminated by '\t';
create table sql022t2
like sql022t1;
load data local inpath '/root/in/sql022t1' overwrite into table sql022t1;
load data local inpath '/root/in/sql022t2' overwrite into table sql022t2;
交集
select
t1.id,
t1.name,
t2.name
from
sql022t1 t1
join
sql022t2 t2
on
t1.id=t2.id;tt1
并集
select
id,
name
from
sql022t1
union
select
id,
name
from
sql022t2;tt2
差集=并集-交集
select
tt2.id,
tt2.name
from
(
select
t1.id id,
t1.name name,
t2.name name1
from
sql022t1 t1
join
sql022t2 t2
on
t1.id=t2.id
)tt1
right join
(select
id,
name
from
sql022t1 t1
union
select
id,
name
from
sql022t2)tt2
on
tt1.id=tt2.id
where
tt1.id is null;
23
123,dasfdasas,3,200,1535945356,2018-08-08
124,dasfadass,1,200,1535945356,2018-08-08
125,dadassfas,3,200,1535945356,2018-08-09
126,dadassfas,2,200,1535945356,2018-08-09
127,dasfdasas,5,200,1535945356,2018-08-09
create table sql023
(
orderid int,
userid string,
productid int,
price int,
`timestamp` int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql023'into table sql023;
得到每个客户今天和昨天每天购买的商品列表(单个商品只出现一次)
select
userid,
productid,
dt
from
sql023
where
dt = '2018-08-08'
or
dt = '2018-08-09'
group by
userid,productid,dt;t1
筛选出昨天和今天购买过3号商品的用户
select
userid,
productid,
count(dt)
from
(select
userid,
productid,
dt
from
sql023
where
dt = '2018-08-08'
or
dt = '2018-08-09'
group by
userid,productid,dt)t1
group by
userid,productid
having
count(dt)>1;
25
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
create table sql025
(
uid int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql025'into table sql025;
按用户分组加一列等差数列
select
uid,
dt,
row_number() over(partition by uid order by dt) `rank`
from
sql025;t1
日期与等差数列做差
select
uid,
dt,
`rank`,
date_sub(dt,`rank`) sub
from
()t1;t2
按差值汇聚,通过count来计算每次连续登录的天数
select
uid,
sub,
count(uid) amount
from
()t2
group by
uid,sub;t3
取最大值得到每个用户连续登录的最大天数
select
uid,
max(amount)
from
(select
uid,
sub,
count(uid) amount
from
(select
uid,
dt,
`rank`,
date_sub(dt,`rank`) sub
from
(select
uid,
dt,
row_number() over(partition by uid order by dt) `rank`
from
sql025)t1)t2
group by
uid,sub)t3
group by
uid;
32
2019-08-06,1
2019-08-06,2
2019-08-06,3
2019-08-06,4
2019-08-06,1,2
2019-08-06,2,3
2019-08-06,3,6
2019-08-06,1,55.6
2019-08-06,2,55.8
create table sql032t1
(
ds string,
user_id int
)
row format delimited
fields terminated by ',';
create table sql032t2
(
ds string,
user_id int,
read_num int
)
row format delimited
fields terminated by ',';
create table sql032t3
(
ds string,
user_id int,
price int
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql032t1' into table sql032t1;
load data local inpath '/root/in/sql032t2' into table sql032t2;
load data local inpath '/root/in/sql032t3' into table sql032t3;
(1)
select
t1.user_id,
t2.read_num
from
sql032t1 t1
join
sql032t2 t2
on
t1.user_id=t2.user_id;
36
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,wangwu,化学,70,2017
4,zhangsan,语文,80,2015
5,zhangsan,化学,90,2015
6,lisi,语文,70,2015
create table sql037
(
id int,
userid string,
course string,
score int,
term int
)
row format delimited
fields terminated by ',';
load data local inpath'/root/in/sql037'into table sql037;
select
id,
userid,
course,
score,
term,
rank() over(partition by course,term order by score desc) `rank`
from
sql037;t1
select
id,
userid,
course,
score,
term
from
(select
id,
userid,
course,
score,
term,
rank() over(partition by course,term order by score desc) `rank`
from
sql037)t1
where
`rank`=1;
上一篇: 一、Python基础 4.if语句
下一篇: Android MVP框架学习实践