欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

测试merge效率

程序员文章站 2022-04-15 15:53:47
测试说明: MERGE是oracle提供的一种特殊的sql语法,非常适用于数据同步场景,即: (把A表数据插到B表,如果B表存在相同主键的记录则使用A表数据对B表进行更新) 数据同步的常规做法是先尝试插入,插入失败再进行更新,MERGE比这种常规做法效率高很多。 (特别是A与B表基本一致,同步时主键 ......

测试说明:

merge是oracle提供的一种特殊的sql语法,非常适用于数据同步场景,即: (把a表数据插到b表,如果b表存在相同主键的记录则使用a表数据对b表进行更新) 数据同步的常规做法是先尝试插入,插入失败再进行更新,merge比这种常规做法效率高很多。 (特别是a与b表基本一致,同步时主键冲突比较多的情况,效率能相差10倍以上)

为了验证merge效率,我建了两张表,tab_test_c(初始化生成50000条记录)和tab_test_q(初始化从tab_test_c生成40000条记录), 写了两个plsql脚本,分别将tab_test_c的数据同步到tab_test_q,看它们效率区别。

第一个脚本使用merge语法,第二个脚本使用常规先插入,出现主键冲突的操作。


测试结果:
 使用merge语法的脚本同步数据耗时0.04秒,使用常规操作耗时14.77秒,效率差369倍

 

测试脚本:

set serveroutput on
-- 启动计时 以便观察脚本执行时间
set timing on
set time on
-- 数据初始化 
drop table tab_test_c;
create table tab_test_c 
(
    c1 varchar2(512),
    c2 varchar2(512),
    c3 varchar2(512),
    c4 varchar2(512),
    c5 varchar2(512),
    c6 varchar2(512),
    c7 varchar2(512),
    c8 varchar2(512),
    c9 varchar2(512),
    c10 varchar2(512)
);

declare
    v_total number;
begin
    v_total := 0;
    loop
        exit when v_total >= 50000;
        for cur in (select owner, object_name, subobject_name, object_id, data_object_id, object_type, 
            created, last_ddl_time, timestamp from all_objects where rownum < 101)
        loop
            insert into tab_test_c values (cur.owner, cur.object_name, cur.subobject_name, 
            cur.object_id, cur.data_object_id, 
            cur.object_type, cur.created, 
            cur.last_ddl_time, cur.timestamp, v_total);
            v_total := v_total + 1;
        end loop;
    end loop;
    commit;
end;
/

-- 建唯一索引
select count(1) from tab_test_c;
create unique index uid_test_c_1 on tab_test_c(c10);
--初始化tab_test_q表数据,先从tab_test_c生成同步40000条数据,剩下10000条数据使用脚本同步过来
drop table tab_test_q;
create table tab_test_q as select * from tab_test_c where rownum < 40001;
create unique index uid_test_q_1 on tab_test_q(c10);
-- 验证数据未同步成功 此时记录数差1000
select count(*) from tab_test_q;
-- 使用merge语法同步tab_test_c的数据到tab_test_q
declare
    cursor cur is select * from tab_test_c;
    type mergearray_t is table of tab_test_c % rowtype index by binary_integer;
    mergearray mergearray_t;
begin
    open cur;
    loop
        exit when cur % notfound;
        fetch cur bulk collect into mergearray limit 16; -- 每次限十几条记录,不要占用太多内存 这个数字调大点效率会更高
        begin
            forall rw in 1 .. mergearray.count
                merge into tab_test_q a
                using (select mergearray(rw).c1 c1, mergearray(rw).c2 c2, mergearray(rw).c3 c3, mergearray(rw).c4 c4, 
                    mergearray(rw).c5 c5, mergearray(rw).c6 c6, mergearray(rw).c7 c7, mergearray(rw).c8 c8, 
                    mergearray(rw).c9 c9, mergearray(rw).c10 c10 from dual) b
                on (a.c10 = b.c10)
                when matched then
                    update set a.c1 = mergearray(rw).c1, a.c2 = mergearray(rw).c2, a.c3 = mergearray(rw).c3, 
                        a.c4 = mergearray(rw).c4, a.c5 = mergearray(rw).c5, 
                        a.c6 = mergearray(rw).c6, a.c7 = mergearray(rw).c7, a.c8 = mergearray(rw).c8, 
                        a.c9 = mergearray(rw).c9
                when not matched then
                    insert (c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) values(mergearray(rw).c1, mergearray(rw).c2,
                        mergearray(rw).c3, mergearray(rw).c4, mergearray(rw).c5, mergearray(rw).c6, 
                        mergearray(rw).c7, mergearray(rw).c8, mergearray(rw).c9, mergearray(rw).c10);
            -- dbms_output.put_line(mergearray.count);
        exception
            when others then
                    dbms_output.put_line('error1');
        end;
    end loop;
    close cur;
    commit;
end;
/
--耗时0.04秒
-- 验证数据同步成功 
select count(*) from tab_test_q;
--初始化tab_test_q表数据,先从tab_test_c生成同步40000条数据,剩下10000条数据使用脚本同步过来
drop table tab_test_q;
create table tab_test_q as select * from tab_test_c where rownum < 40001;
create unique index uid_test_q_1 on tab_test_q(c10);
-- 验证数据未同步成功 此时记录数差1000
select count(*) from tab_test_q;
-- 使用常规语法同步tab_test_c的数据到tab_test_q
begin
    for cur in (select * from tab_test_c)
    loop
        begin
            insert into tab_test_q(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) 
            values(cur.c1, cur.c2, cur.c3, cur.c4, cur.c5, cur.c6, cur.c7, cur.c8, cur.c9, cur.c10);
        exception
            when dup_val_on_index then --唯一索引冲突时更新
                update tab_test_q set c1 = cur.c1, c2 = cur.c2, c3 = cur.c3, c4 = cur.c4, c5 = cur.c5, c6 = cur.c6, c7 = cur.c7, c8 = cur.c8, c9 = cur.c9
                where c10 = cur.c10;
            when others then
                    dbms_output.put_line('error1');
        end;
    end loop;
    commit;
end;
/
--耗时14.77秒
-- 验证数据同步成功
select count(*) from tab_test_q;