欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

[hive] 经典sql题及答案(三)

程序员文章站 2024-03-15 18:20:00
...
推荐:

经典sql题及答案(一)
经典sql题及答案(二)

题目部分

22 、使用hive 求出两个数据集的差集?
数据
t1表:
id name
1 zs
2 ls
t2表:
id name
1 zs
3 ww
结果如下:
id name
2 ls
3 ww

23
[hive] 经典sql题及答案(三)

25 、每个用户连续登陆的最大天数?
数据:
login表
uid,date
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
结果如下:
uid cnt_days
1 3
2 2
3 1
4 3

32 、有如下三张表:
表A(登录表):
ds user_id
2019-08-06 1
2019-08-06 2
2019-08-06 3
2019-08-06 4
表B(阅读表):
ds user_id read_num
2019-08-06 1 2
2019-08-06 2 3
2019-08-06 3 6
表C(付费表):
ds user_id price
2019-08-06 1 55.6
2019-08-06 2 55.8
基于上述三张表,请使用hive的hql语句实现如下需求:
(1)、用户登录并且当天有个阅读的用户数,已经阅读书籍数量
(2)、用户登录并且阅读,但是没有付费的用户数
(3)、用户登录并且付费,付费用户书籍和金额

37 数据如下:
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,wangwu,化学,70,2017
4,zhangsan,语文,80,2015
5,zhangsan,化学,90,2015
6,lisi,语文,70,2015
[hive] 经典sql题及答案(三)

答案部分

22
create table sql022t1
(
id string,
name string
)
row format delimited 
fields terminated by '\t';
create table sql022t2
like sql022t1;
load data local inpath '/root/in/sql022t1' overwrite into table sql022t1;
load data local inpath '/root/in/sql022t2' overwrite into table sql022t2;

交集
select
    t1.id,
    t1.name,
    t2.name
from
    sql022t1 t1
join
    sql022t2 t2
on
    t1.id=t2.id;tt1
并集
select
    id,
    name
from
    sql022t1
union
select
    id,
    name
from
    sql022t2;tt2
差集=并集-交集
select
tt2.id,
tt2.name
from
    ( 
    select
    t1.id id,
    t1.name name,
    t2.name name1
    from
        sql022t1 t1
    join
        sql022t2 t2
    on
    t1.id=t2.id
    )tt1
right join
    (select
    id,
    name
    from
        sql022t1 t1
    union
    select
        id,
        name
    from
    sql022t2)tt2
on
    tt1.id=tt2.id
where
    tt1.id is null;
    
23
123,dasfdasas,3,200,1535945356,2018-08-08
124,dasfadass,1,200,1535945356,2018-08-08
125,dadassfas,3,200,1535945356,2018-08-09
126,dadassfas,2,200,1535945356,2018-08-09
127,dasfdasas,5,200,1535945356,2018-08-09

create table sql023
(
orderid int,
userid string,
productid int,
price int,
`timestamp` int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql023'into table sql023;

得到每个客户今天和昨天每天购买的商品列表(单个商品只出现一次)
select
    userid,
    productid,
    dt
from
    sql023
where
    dt = '2018-08-08'
or
    dt = '2018-08-09'
group by
    userid,productid,dt;t1
筛选出昨天和今天购买过3号商品的用户
select
    userid,
    productid,
    count(dt)
from
    (select
        userid,
        productid,
        dt
    from
        sql023
    where
        dt = '2018-08-08'
    or
        dt = '2018-08-09'
    group by
        userid,productid,dt)t1
group by
    userid,productid
having
    count(dt)>1;
    
25
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
create table sql025
(
uid int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql025'into table sql025;
按用户分组加一列等差数列
select
    uid,
    dt,
    row_number() over(partition by uid order by dt) `rank`
from
    sql025;t1
日期与等差数列做差
select
    uid,
    dt,
    `rank`,
    date_sub(dt,`rank`) sub
from
    ()t1;t2
按差值汇聚,通过count来计算每次连续登录的天数
select
    uid,
    sub,
    count(uid) amount
from
    ()t2
group by
    uid,sub;t3
取最大值得到每个用户连续登录的最大天数
select
    uid,
    max(amount)
from
    (select
        uid,
        sub,
        count(uid) amount
    from
        (select
            uid,
            dt,
            `rank`,
            date_sub(dt,`rank`) sub
        from
            (select
                uid,
                dt,
                row_number() over(partition by uid order by dt) `rank`
            from
                sql025)t1)t2
    group by
        uid,sub)t3
group by
    uid;
    
32
2019-08-06,1
2019-08-06,2
2019-08-06,3
2019-08-06,4

2019-08-06,1,2
2019-08-06,2,3
2019-08-06,3,6

2019-08-06,1,55.6
2019-08-06,2,55.8

create table sql032t1
(
ds string,
user_id int
)
row format delimited
fields terminated by ',';
create table sql032t2
(
ds string,
user_id int,
read_num int
)
row format delimited
fields terminated by ',';
create table sql032t3
(
ds string,
user_id int,
price int
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql032t1' into table sql032t1;
load data local inpath '/root/in/sql032t2' into table sql032t2;
load data local inpath '/root/in/sql032t3' into table sql032t3;
(1)
select
    t1.user_id,
    t2.read_num
from
    sql032t1 t1
join
    sql032t2 t2
on
    t1.user_id=t2.user_id;

36
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,wangwu,化学,70,2017
4,zhangsan,语文,80,2015
5,zhangsan,化学,90,2015
6,lisi,语文,70,2015
create table sql037
(
id int,
userid string,
course string,
score int,
term int
)
row format delimited
fields terminated by ',';
load data local inpath'/root/in/sql037'into table sql037;
select
    id,
    userid,
    course,
    score,
    term,
    rank() over(partition by course,term order by score desc) `rank`
from
    sql037;t1
select
    id,
    userid,
    course,
    score,
    term
from
        (select
        id,
        userid,
        course,
        score,
        term,
        rank() over(partition by course,term order by score desc) `rank`
    from
        sql037)t1
where
    `rank`=1;