对象释放函数kmem_cache_free核心函数slab_free的实现详解
程序员文章站
2022-04-18 23:14:15
...
1.kmem_cache_free函数
void kmem_cache_free(struct kmem_cache *s, void *x)//X是要释放的对象
{
s = cache_from_obj(s, x);//virt_to_head_page通过对象x找到该对象所在的slab的首个page,再通过page找到所属的slab缓存
if (!s)
return;
slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);//virt_to_head_page通过对象x找到该对象所在的slab的首个page
trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
virt_to_head_page函数:
static inline struct page *virt_to_head_page(const void *x)
{
struct page *page = virt_to_page(x);//把内核中的虚拟地址转化为struct page
//判断该page是不是pagetail,是的话就返回page->first_page,否则直接返回page
return compound_head(page);
}
compound_head函数:
static inline struct page *compound_head(struct page *page)
{
if (unlikely(PageTail(page)))
return page->first_page;//如果一个slab含有多个连续的物理页,那么除了第一个页之外的所有页都是tailpage,并且其struct page中都有变量first_page指向第一个page
return page;
}
2.slab_free函数
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
* can perform fastpath freeing without additional function calls.
*
* The fastpath is only possible if we are freeing to the current cpu slab
* of this processor. This typically the case if we have just allocated
* the item before.
*
* If fastpath is not possible then fall back to __slab_free where we deal
* with all sorts of special processing.
*
* Bulk free of a freelist with several objects (all pointing to the
* same page) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
void *head, void *tail, int cnt,
unsigned long addr)
{
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
slab_free_freelist_hook(s, head, tail);//和debug相关
redo:
/*
* Determine the currently cpus per cpu slab.
* The cpu may change afterward. However that does not matter since
* data is retrieved via this pointer. If we are on the same cpu
* during the cmpxchg then the free will succedd.
*/
preempt_disable();
c = __this_cpu_ptr(s->cpu_slab);//获得当前的kmem_cache_cpu
tid = c->tid;//当前kmem_cache_cpu中的transcation id
preempt_enable();
//最简单的释放情形1:此对象x的slab正好处于当前cpu的缓存中
if (likely(page == c->page)) {//如果相等则表示当前slub被当前cpu cache了
set_freepointer(s, tail_obj, c->freelist);//相当于把object->next=c->freelist
if (unlikely(!this_cpu_cmpxchg_double(//将当前cpu的下个空闲对象设置为释放的对象x
s->cpu_slab->freelist, s->cpu_slab->tid,
c->freelist, tid,
head, next_tid(tid)))) {
note_cmpxchg_failure("slab_free", s, tid);
goto redo;
}
stat(s, FREE_FASTPATH);
} else//不在当前cpu缓存中,则需要释放到当前cpu的部分空slab中或者node的部分空slab中
__slab_free(s, page, head, tail_obj, cnt, addr);
}
3.__slab_free函数
/*
* Slow patch handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
*
* So we still attempt to reduce cache line usage. Just take the slab
* lock and free the item. If there is no additional partial page
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
void *head, void *tail, int cnt,
unsigned long addr)
{
void *prior;
int was_frozen;
struct page new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long uninitialized_var(flags);
stat(s, FREE_SLOWPATH);
if (kmem_cache_debug(s) &&
!(n = free_debug_processing(s, page, head, tail, cnt,
addr, &flags)))
return;
do {
if (unlikely(n)) {
spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = page->freelist;//获取释放对象所在slab的第一个空闲对象
counters = page->counters;
set_freepointer(s, tail, prior);
// 此处为什么不直接new=*page,是由于page这个结构体比较大,直接整个结构体赋值肯定比赋值counter和freelist加起来为double word的开销大。
new.counters = counters; //counters在struct page中是在union中, 所以counters一赋值给new,就相当于把inuse,object,frozen都赋值给new了
was_frozen = new.frozen;//frozen==1表示该slab被某个cpu cache住了
new.inuse -= cnt;//inuse表示该slab中已分配出去的对象个数
/* 1)!new.inuse || !prior为真的情况是原来slab只有一个对象被分配出去,那现在这个对象被释放则slab中所有的object都是free的,或者原来slab->freelist为NULL,说明原来slab在kmem_cache_node的full链里。
2)此时,如果!was_frozen也为真(即没有cpu cache住这个slab),那么就需要去操作这个kmem_cache_node的partial或full链,则执行get_node并对node加锁
3)如果此时!was_frozen为假(即有cpu cache住这个slab,此处有可能是我本身这个cpu,因为从slab_free开始执行操作并没有对page加锁,在slab_free函数中先判断page==c->page只是减少这个可能性而已),那就直接把object加到page->freelist中就可以了。
4)如果!new.inuse || !prior为假,那就不用去操作kmem_cache_node中的链,就直接把object加到page->freelist中就可以了。*/
if ((!new.inuse || !prior) && !was_frozen) {
if (!kmem_cache_debug(s) && !prior)
/*
* Slab was on no list before and will be partially empty
* We can defer the list move and instead freeze it.
*/
new.frozen = 1;
else { /* Needs to be taken off a list */
n = get_node(s, page_to_nid(page));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
* drop the list_lock without any processing.
*
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
spin_lock_irqsave(&n->list_lock, flags);
}
}
} while (!cmpxchg_double_slab(s, page,
prior, counters,
head, new.counters,
"__slab_free"));
if (likely(!n)) {
/*
* If we just froze the page then put it onto the
* per cpu partial list.
*/
if (new.frozen && !was_frozen) {
put_cpu_partial(s, page, 1);
stat(s, CPU_PARTIAL_FREE);
}
/*
* The list lock was not taken therefore no list
* activity can be necessary.
*/
if (was_frozen)
stat(s, FREE_FROZEN);
return;
}
/*如果释放掉这个对象后,slab中的object均为空闲的,那么就要考虑知否释放掉这个slab,因为kmem_cache_node中没有free链只有partial和full链。
最终是否真正释放的条件是当前node的partial链中的slab个数比最小阈值大,那么就goto slab_empty./*
if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
goto slab_empty;
/*
* Objects left in the slab. If it was not on the partial list before
* then add it.
*/
//如果prior为NULL,则表示在释放之前该slab没有空闲的object了,即在full 链中
if (kmem_cache_debug(s) && unlikely(!prior)) {
remove_full(s, page);
add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);//对kmem_cache_node的操作结束,则释放node锁
return;
slab_empty:
if (prior) {
/*
* Slab on the partial list.
*/
remove_partial(n, page);
stat(s, FREE_REMOVE_PARTIAL);
} else
/* Slab must be on the full list */
/*这种情况下就是原来那个slab只有一个object,那这个object被分配出去后就在full链里,现在要释放这个object就得把这个slab从full链上摘除*/
remove_full(s, page);
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
}
discard_slab函数:
static void discard_slab(struct kmem_cache *s, struct page *page)
{
//减少对应kmem_cache_node上的slab和total_object个数
dec_slabs_node(s, page_to_nid(page), page->objects);
free_slab(s, page);
}
dec_slabs_node函数:
static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
struct kmem_cache_node *n = get_node(s, node);
atomic_long_dec(&n->nr_slabs);
atomic_long_sub(objects, &n->total_objects);
}
free_slab函数:
static void free_slab(struct kmem_cache *s, struct page *page)
{
if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
struct rcu_head *head;
if (need_reserve_slab_rcu) {
int order = compound_order(page);
int offset = (PAGE_SIZE << order) - s->reserved;
VM_BUG_ON(s->reserved != sizeof(*head));
head = page_address(page) + offset;
} else {
head = (void *)&page->lru;
}
call_rcu(head, rcu_free_slab);
} else
__free_slab(s, page);
}
__free_slab函数:
static void __free_slab(struct kmem_cache *s, struct page *page)
{
int order = compound_order(page);//从该slab的首个page结构体中获得该slab的页的个数的order信息
int pages = 1 << order;//获得页个数
if (kmem_cache_debug(s)) {
void *p;
slab_pad_check(s, page);
for_each_object(p, s, page_address(page),
page->objects)
check_object(s, page, p, SLUB_RED_INACTIVE);
}
kmemcheck_free_shadow(page, compound_order(page));
mod_zone_page_state(page_zone(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages);
__ClearPageSlab(page);
reset_page_mapcount(page);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
__free_pages(page, order);//把物理页面真正释放掉
}