数据仓库-增量转全量,全量转增量
程序员文章站
2022-07-01 08:21:59
...
增量转全量
ods增量抽取数据,dwd层增量转全量
思路:合并数据 = T-1的增量数据 + T-2的全量数据(排除T-1发生变化的数据)
实现:
method1:left join
INSERT OVERWRITE TABLE dwd_fyp_user_info_df PARTITION (biz_date='20200824')
--T-1的增量数据
SELECT
id
,name
,mobile
,sex
,create_time
,update_time
FROM
ods_fyp_user_info_di
WHERE
biz_date='20200824'
UNION ALL
--T-2的全量数据(排除T-1发生变化的数据)
SELECT
a.id
,a.name
,a.mobile
,a.sex
,a.create_time
,a.update_time
FROM
(
SELECT
id
,name
,mobile
,sex
,create_time
,update_time
FROM
dwd_fyp_user_info_df -- T-2的全量数据
WHERE
biz_date = '20200823'
)a
LEFT OUTER JOIN
(
SELECT
id
,name
,mobile
,sex
,create_time
,update_time
FROM
ods_fyp_user_info_di --T-1的增量数据
WHERE
biz_date='20200824'
)b
ON a.id = b.id
WHERE b.id IS NULL;
method2:row_number()
思路:合并数据 = (T-1的增量数据 + T-2的全量数据) 排序去重
INSERT OVERWRITE TABLE dwd_fyp_user_info_df PARTITION (biz_date='20200824')
SELECT
a.id
,a.nick_name
,a.phone_num
,a.gender
,a.create_time
,a.update_time
FROM
(
SELECT
a.id
,a.nick_name
,a.phone_num
,a.gender
,a.create_time
,a.update_time
,ROW_NUMBER() OVER (PARTITION BY a.id ORDER BY a.update_time DESC) AS rn
FROM
(
--T-1的增量数据
SELECT
id
,nick_name
,phone_num
,gender
,create_time
,update_time
FROM
ods_fyp_user_info_di
WHERE
biz_date='20200824'
UNION ALL
--T-2的全量数据
SELECT
id
,nick_name
,phone_num
,gender
,create_time
,update_time
FROM
dwd_fyp_user_info_df
WHERE
biz_date = '20200823'
)a
)a
WHERE rn = 1;
全量转增量 left join
SELECT
a.*
FROM
(
SELECT
*
FROM
dwd_fyp_user_info_df
WHERE
biz_date = '20200824'
) a
LEFT OUTER JOIN
(
SELECT
*
FROM
dwd_fyp_user_info_df
WHERE
biz_date = '20200823'
) b
ON a.id = b.id
WHERE b.id IS NULL;
下一篇: 关于FFMPEG采集摄像头推流方法说明