欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

数据仓库-增量转全量,全量转增量

程序员文章站 2022-07-01 08:21:59
...

增量转全量

ods增量抽取数据,dwd层增量转全量
思路:合并数据 = T-1的增量数据 + T-2的全量数据(排除T-1发生变化的数据)

实现:

method1:left join

INSERT OVERWRITE TABLE dwd_fyp_user_info_df  PARTITION (biz_date='20200824')
--T-1的增量数据
SELECT
     id          
    ,name                     
    ,mobile                     
    ,sex                                             
    ,create_time                   
    ,update_time                   
FROM
    ods_fyp_user_info_di  
WHERE
    biz_date='20200824'

UNION ALL 
--T-2的全量数据(排除T-1发生变化的数据)
SELECT
    a.id                  
    ,a.name                    
    ,a.mobile                    
    ,a.sex                                
    ,a.create_time                  
    ,a.update_time                  
FROM
(
    SELECT
        id       
        ,name                     
        ,mobile                     
        ,sex                                   
        ,create_time                   
        ,update_time                   
    FROM
        dwd_fyp_user_info_df -- T-2的全量数据
    WHERE
        biz_date = '20200823'
)a
LEFT OUTER JOIN
(
    SELECT
         id           
        ,name                     
        ,mobile                     
        ,sex                        
        ,create_time                   
        ,update_time                   
    FROM
        ods_fyp_user_info_di --T-1的增量数据
    WHERE
        biz_date='20200824'
)b
ON a.id = b.id
WHERE b.id IS NULL;

method2:row_number()

思路:合并数据 = (T-1的增量数据 + T-2的全量数据) 排序去重

INSERT OVERWRITE TABLE dwd_fyp_user_info_df  PARTITION (biz_date='20200824')
SELECT
         a.id                  
        ,a.nick_name                     
        ,a.phone_num                     
        ,a.gender                           
        ,a.create_time                   
        ,a.update_time                   
FROM                                     
(                                        
    SELECT                               
         a.id                  
        ,a.nick_name                     
        ,a.phone_num                     
        ,a.gender                             
        ,a.create_time                   
        ,a.update_time                   
        ,ROW_NUMBER() OVER (PARTITION BY a.id ORDER BY a.update_time DESC) AS rn 
    FROM
    (
--T-1的增量数据
        SELECT
             id          
            ,nick_name                      
            ,phone_num                      
            ,gender                                   
            ,create_time                    
            ,update_time                    
        FROM
            ods_fyp_user_info_di  
        WHERE
            biz_date='20200824'

        UNION ALL
--T-2的全量数据
        SELECT
            id   
            ,nick_name                      
            ,phone_num                      
            ,gender                             
            ,create_time                    
            ,update_time                    
        FROM
            dwd_fyp_user_info_df  
        WHERE
            biz_date = '20200823'
    )a
)a
WHERE rn = 1;

全量转增量    left join

SELECT
	a.*
FROM
	(
	SELECT 
		* 
	FROM 
		dwd_fyp_user_info_df    
	WHERE 
		biz_date = '20200824'
	) a
	LEFT OUTER JOIN
	(
	SELECT 
		* 
	FROM 
		dwd_fyp_user_info_df   
	WHERE 
		biz_date = '20200823'
	) b
ON a.id = b.id
WHERE b.id IS NULL;