[20190402]Library Cache mutex.txt
[20190402]library cache mutex.txt
1.环境:
scott@book> @ ver1
port_string version banner
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/linux 2.4.xx 11.2.0.4.0 oracle database 11g enterprise edition release 11.2.0.4.0 - 64bit production
$ cat m2.txt
set verify off
column a noprint new_value v_a;
--select mod ( &&3 ,3) a from dual ;
--alter session set optimizer_index_cost_adj= &&3;
host sleep $(echo &&3/50| bc -l )
insert into job_times values ( sys_context ('userenv', 'sid') ,dbms_utility.get_time ,'&&2') ;
commit ;
declare
v_id number;
v_d date;
begin
for i in 1 .. &&1 loop
--select 1 into v_id from dual ;
--select sysdate into v_d from dual ;
select deptno into v_id from dept where deptno=10;
end loop;
end ;
/
update job_times set time_ela = dbms_utility.get_time - time_ela where sid=sys_context ('userenv', 'sid') and method='&&2';
commit;
--quit
--//执行许多次,就可以在 x$mutex_sleep_history有记录,出现mutex_type='library cache'的情况
$ seq 150 | xargs -i {} -p 150 bash -c "sqlplus -s -l scott/book @m2.txt 1e6 f2_150 {} >/dev/null"
sys@book> @ mutexy 16 a31kd5tkdvvmm
hash sum_sleeps sum_gets location mutex_type mutex_addr sqlid kglnaown c100
---------- ---------- ---------- ------------------------------ -------------------- ---------------- ------------- -------- ---------------------------------------
1692266099 161187946 600188278 kksfbc [kkschlpin1] cursor pin 000000007c88e330 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 9215322 1.8927e+11 kkslockdelete [kkschlpin6] cursor pin 000000007c88e330 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 5019588 1.0546e+11 kksfbc [kkschlfsp2] cursor pin 000000007c88e330 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 1351526 63672414 kgllkc1 57 library cache 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 499588 9089718 kkslce [kkschlpin2] cursor pin 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 212212 3838112 kglgethandlereference 124 library cache 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 131248 1919076 kgllkdl1 85 library cache 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 10118 1814754 kglhdgn2 106 library cache 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
8 rows selected.
--//探究mutex_type='library cache',mutex_addr='000000007c957128'在哪里,还有1个location问题?
--//我执行许多次才出现这样的情况.
2.分析:
sys@book> @ fcha 000000007c957128
find in which heap (uga, pga or shared pool) the memory address 000000007c957128 resides...
warning!!! this script will query x$ksmsp, which will cause heavy shared pool latch contention
in systems under load and with large shared pool. this may even completely hang
your instance until the query has finished! you probably do not want to run this in production!
press enter to continue, ctrl+c to cancel...
loc ksmchptr ksmchidx ksmchdur ksmchcom ksmchsiz ksmchcls ksmchtyp ksmchpar
--- ---------------- ---------- ---------- ---------------- ---------- -------- ---------- ----------------
sga 000000007c956fb8 1 1 kglhd 560 recr 80 00
--//执行如下,也可以获得类似结果:
select * from x$ksmsp where to_number ('000000007c957128', 'xxxxxxxxxxxxxxxx') between to_number(ksmchptr, 'xxxxxxxxxxxxxxxx') and to_number(ksmchptr, 'xxxxxxxxxxxxxxxx')+ksmchsiz -1;
addr indx inst_id ksmchidx ksmchdur ksmchcom ksmchptr ksmchsiz ksmchcls ksmchtyp ksmchpar
---------------- ---------- ---------- ---------- ---------- ---------------- ---------------- ---------- -------- ---------- ----------------
00007f3cede1a5b8 16459 1 1 1 kglhd 000000007c956fb8 560 recr 80 00
sys@book> @ sharepool/shp4 a31kd5tkdvvmm 0
text kglhdadr kglhdpar c40 kglhdlmd kglhdpmd kglhdivc kglobhd0 kglobhd6 kglobhs0 kglobhs6 kglobt16 n0_6_16 n20 kglnahsh kglobt03 kglobt09
-------------- ---------------- ---------------- ---------------------------------------- ---------- ---------- ---------- ---------------- ---------------- ---------- ---------- ---------- --------- ---------- ---------- ------------- ----------
子游标句柄地址 000000007c7f8b90 000000007c956fe8 select deptno from dept where deptno=10 0 0 0 000000007c4468d8 000000007c88e3d8 4528 8088 3072 15688 15688 1692266099 a31kd5tkdvvmm 0
父游标句柄地址 000000007c956fe8 000000007c956fe8 select deptno from dept where deptno=10 0 0 0 000000007ca7bfe8 00 4720 0 0 4720 4720 1692266099 a31kd5tkdvvmm 65535
--//可以发现mutex_addr='000000007c957128'的地址与父游标句柄的地址比较接近.实际上从ksmchcom=kglhd也可以确定,
sys@book> @ fcha 000000007c956fe8
find in which heap (uga, pga or shared pool) the memory address 000000007c956fe8 resides...
warning!!! this script will query x$ksmsp, which will cause heavy shared pool latch contention
in systems under load and with large shared pool. this may even completely hang
your instance until the query has finished! you probably do not want to run this in production!
press enter to continue, ctrl+c to cancel...
loc ksmchptr ksmchidx ksmchdur ksmchcom ksmchsiz ksmchcls ksmchtyp ksmchpar
--- ---------------- ---------- ---------- ---------------- ---------- -------- ---------- ----------------
sga 000000007c956fb8 1 1 kglhd 560 recr 80 00
select * from x$ksmsp where to_number ('000000007c956fe8', 'xxxxxxxxxxxxxxxx') between to_number(ksmchptr, 'xxxxxxxxxxxxxxxx') and to_number(ksmchptr, 'xxxxxxxxxxxxxxxx')+ksmchsiz-1;
addr indx inst_id ksmchidx ksmchdur ksmchcom ksmchptr ksmchsiz ksmchcls ksmchtyp ksmchpar
---------------- ---------- ---------- ---------- ---------- ---------------- ---------------- ---------- -------- ---------- ----------------
00007f3ceddfbc00 12611 1 1 1 kglhd 000000007c956fb8 560 recr 80 00
--//library cache mutex结构体在父游标句柄地址里面.
--//0x7c957128=2090168616 0x7c956fb8=2090168248 2090168616-2090168248 = 368,在父游标句柄地址偏移368字节处.
3.知道地址就可以模拟问题产生:
--//session 1:
scott@book> @ spid
sid serial# process server spid pid p_serial# c50
---------- ---------- ------------------------ --------- ------ ------- ---------- --------------------------------------------------
32 35 10708 dedicated 10709 26 84 alter system kill session '32,35' immediate;
--//32=0x20
--//session 2:
sys@book> oradebug setmypid
statement processed.
sys@book> oradebug peek 0x000000007c957128 8
[07c957128, 07c957130) = 00000000 00000000
sys@book> oradebug poke 0x000000007c957128 8 0x0000004400000020
before: [07c957128, 07c957130) = 00000000 00000000
after: [07c957128, 07c957130) = 00000020 00000044
--//session 1:
scott@book> @ m2.txt 1 c1 0
1 row created.
commit complete.
--//挂起!!注第一次执行会挂起,第2次就不会了.
--//session 2:
sys@book> @ wait
p1raw p2raw p3raw p1 p2 p3 sid serial# seq# event status state wait_time_micro seconds_in_wait wait_class
---------------- ---------------- ---------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------------- -------- ------- --------------- --------------- -----------
0000000064ddee73 0000004400000020 0000000000000039 1692266099 2.9206e+11 57 32 33 38 library cache: mutex x active waiting 35878972 36 concurrency
--//出现library cache: mutex x 等待.
sys@book> oradebug poke 0x000000007c957128 8 0x0
before: [07c957128, 07c957130) = 00000020 00000044
after: [07c957128, 07c957130) = 00000000 00000000
--//再次执行,因为该光标已经cache. pl/sql语句中的sql语句与sqlplus执行的sql语句方式不同.
sys@book> select * from v$open_cursor where sql_id='a31kd5tkdvvmm';
saddr sid user_name address hash_value sql_id sql_text last_sql_active_tim sql_exec_id cursor_type
---------------- ---- --------- ---------------- ---------- ------------- --------------------------------------- ------------------- ----------- --------------------
000000008635de10 32 scott 000000007c956fe8 1692266099 a31kd5tkdvvmm select deptno from dept where deptno=10 pl/sql cursor cached
--//第一次执行就cache了.第2次执行不会在library cache上出现阻塞.
sys@book> oradebug poke 0x000000007c957128 8 0x0000004400000020
before: [07c957128, 07c957130) = 00000000 00000000
after: [07c957128, 07c957130) = 00000020 00000044
scott@book> @ m2.txt 1 c1 0
1 row created.
commit complete.
pl/sql procedure successfully completed.
11 rows updated.
commit complete.
--//可以正常执行.但是退出会出现问题.
scott@book> quit
--//挂起!!
sys@book> @ wait
p1raw p2raw p3raw p1 p2 p3 sid serial# seq# event status state wait_time_micro seconds_in_wait wait_class
---------------- ---------------- ---------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------------- -------- ------- --------------- --------------- -----------
0000000064ddee73 0000004400000020 0000000000000055 1692266099 2.9206e+11 85 32 35 57 library cache: mutex x active waiting 17103974 17 concurrency
--//还有1个情况是阻塞无法查询x$mutex_sleep_history视图,
sys@book> @ mutexy 6 a31kd5tkdvvmm
--//挂起!!取消后正常!!
sys@book> oradebug poke 0x000000007c957128 8 0x00
before: [07c957128, 07c957130) = 00000020 00000044
after: [07c957128, 07c957130) = 00000000 00000000
----
hash sum_sleeps sum_gets location mutex_type mutex_addr sqlid kglnaown c100
---------- ---------- ---------- ------------------------------ -------------------- ---------------- ------------- -------- ---------------------------------------
1692266099 161187946 600188278 kksfbc [kkschlpin1] cursor pin 000000007c88e330 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 9215322 1.8927e+11 kkslockdelete [kkschlpin6] cursor pin 000000007c88e330 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 5019588 1.0546e+11 kksfbc [kkschlfsp2] cursor pin 000000007c88e330 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 1351526 63672414 kgllkc1 57 library cache 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 499588 9089718 kkslce [kkschlpin2] cursor pin 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
1692266099 212212 3838112 kglgethandlereference 124 library cache 000000007c957128 a31kd5tkdvvmm select deptno from dept where deptno=10
6 rows selected.
--//感觉在11g下出现library cache的情况性很小.library cache主要出现在sql语句登录第1-3次执行时需要通过哈希定位检查父游标是
--//否存在,以及是否要硬解析的情况,如果cache就不再需要了,不同会话相同语句同时执行的可能性在这里发生阻塞的可能性很小,更容
--//易出现cursor: pin s.(我的测试也说明这个问题),加上session_cached_cursors作用,更难在这里出现阻塞.
--//当然一些应用开始执行的语句可能都是一样的,可能密集的登录时偶尔出现不会成为主要矛盾.
--//不像10g,采用是latch caceh latch,而且还有数量限制.而11g是采用mutex,library cache mutex的结构体在生成的父游标句柄里面,
--//等于打散到共享池的许多地方,出现碰撞的概率降低了.
4.附上相关脚本:
$ cat tpt/cha
channels2.sql channels3.sql channels.sql
[oracle@gxqyydg4 ip=100.78 ~/sqllaji ] $ cat tpt/fcha.sql
--------------------------------------------------------------------------------
--
-- file name: fcha.sql (find chunk address) v0.2
-- purpose: find in which heap (uga, pga or shared pool) a memory address resides
--
-- author: tanel poder
-- copyright: (c) http://blog.tanelpoder.com | @tanelpoder
--
-- usage: @fcha <addr_hex>
-- @fcha f6a14448
--
-- other: this would only report an uga/pga chunk address if it belongs
-- to *your* process/session (x$ksmup and x$ksmpp do not see other
-- session/process memory)
--
--------------------------------------------------------------------------------
prompt find in which heap (uga, pga or shared pool) the memory address &1 resides...
prompt
prompt warning!!! this script will query x$ksmsp, which will cause heavy shared pool latch contention
prompt in systems under load and with large shared pool. this may even completely hang
prompt your instance until the query has finished! you probably do not want to run this in production!
prompt
pause press enter to continue, ctrl+c to cancel...
select
'sga' loc,
ksmchptr,
ksmchidx,
ksmchdur,
ksmchcom,
ksmchsiz,
ksmchcls,
ksmchtyp,
ksmchpar
from
x$ksmsp
where
to_number(substr('&1', instr(lower('&1'), 'x')+1) ,'xxxxxxxxxxxxxxxx')
between
to_number(ksmchptr,'xxxxxxxxxxxxxxxx')
and to_number(ksmchptr,'xxxxxxxxxxxxxxxx') + ksmchsiz - 1
union all
select
'uga',
ksmchptr,
null,
null,
ksmchcom,
ksmchsiz,
ksmchcls,
ksmchtyp,
ksmchpar
from
x$ksmup
where
to_number(substr('&1', instr(lower('&1'), 'x')+1) ,'xxxxxxxxxxxxxxxx')
between
to_number(ksmchptr,'xxxxxxxxxxxxxxxx')
and to_number(ksmchptr,'xxxxxxxxxxxxxxxx') + ksmchsiz - 1
union all
select
'pga',
ksmchptr,
null,
null,
ksmchcom,
ksmchsiz,
ksmchcls,
ksmchtyp,
ksmchpar
from
x$ksmpp
where
to_number(substr('&1', instr(lower('&1'), 'x')+1) ,'xxxxxxxxxxxxxxxx')
between
to_number(ksmchptr,'xxxxxxxxxxxxxxxx')
and to_number(ksmchptr,'xxxxxxxxxxxxxxxx') + ksmchsiz - 1
/
$ cat mutexy.sql
column kglnaown format a20
column mutex_type format a20
column kglnaobj format a100
column location format a30
select * from (
select kglnahsh hash
,sum (sleeps) sum_sleeps
,sum (gets) sum_gets
,location
,mutex_type
,mutex_addr
,kglobt03 sqlid
,kglnaown
,replace(kglnaobj,chr(13)) c100
--,substr (kglnaobj, 1, 140) object
from x$kglob, x$mutex_sleep_history
where kglnahsh = mutex_identifier
and kglobt03= decode('&&2','',kglobt03,lower('&&2'))
group by kglnaobj
,kglobt03
,kglnaown
,kglnahsh
,location
,mutex_type
,mutex_addr
order by sum_sleeps desc ) where rownum<= &1;
$ cat sharepool/shp4.sql
column n0_6_16 format 99999999
select decode (kglhdadr,
kglhdpar, '父游标句柄地址',
'子游标句柄地址')
text,
kglhdadr,
kglhdpar,
substr(kglnaobj,1,40) c40,
kglhdlmd,
kglhdpmd,
kglhdivc,
kglobhd0,
kglobhd6,
kglobhs0,kglobhs6,kglobt16,
kglobhs0+kglobhs6+kglobt16 n0_6_16,
kglobhs0+kglobhs1+kglobhs2+kglobhs3+kglobhs4+kglobhs5+kglobhs6+kglobt16 n20,
kglnahsh,
kglobt03 ,
kglobt09
from x$kglob
where kglobt03 = '&1' or kglhdpar='&1' or kglhdadr='&1' or kglnahsh= &2;
推荐阅读
-
某个普通用户登录一直hang住,不报错。library cache lock等待
-
[20190402]Library Cache mutex.txt
-
DBA手记(学习)-library cache pin
-
Oracle Library cache 内部机制 说明
-
Oracle 11g下重现library cache lock等待事件
-
Oracle Library Cache Lock 解决思路
-
Oracle latch:library cache 导致 数据库挂起 故障
-
[20190319]shared pool latch与library cache latch的简单探究.txt
-
Oracle latch:library cache 导致 数据库挂起 故障
-
Oracle Library cache 内部机制 说明