欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

对象释放函数kmem_cache_free核心函数slab_free的实现详解

程序员文章站 2022-04-18 23:14:15
...

1.kmem_cache_free函数

void kmem_cache_free(struct kmem_cache *s, void *x)//X是要释放的对象
{
	s = cache_from_obj(s, x);//virt_to_head_page通过对象x找到该对象所在的slab的首个page,再通过page找到所属的slab缓存
	if (!s)
		return;
	slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);//virt_to_head_page通过对象x找到该对象所在的slab的首个page
	trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);

virt_to_head_page函数:

static inline struct page *virt_to_head_page(const void *x)
{
	struct page *page = virt_to_page(x);//把内核中的虚拟地址转化为struct page
	//判断该page是不是pagetail,是的话就返回page->first_page,否则直接返回page
	return compound_head(page); 
}

compound_head函数:

static inline struct page *compound_head(struct page *page)
{
	if (unlikely(PageTail(page)))
		return page->first_page;//如果一个slab含有多个连续的物理页,那么除了第一个页之外的所有页都是tailpage,并且其struct page中都有变量first_page指向第一个page
	return page;
}

2.slab_free函数

/*
 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
 * can perform fastpath freeing without additional function calls.
 *
 * The fastpath is only possible if we are freeing to the current cpu slab
 * of this processor. This typically the case if we have just allocated
 * the item before.
 *
 * If fastpath is not possible then fall back to __slab_free where we deal
 * with all sorts of special processing.
 *
 * Bulk free of a freelist with several objects (all pointing to the
 * same page) possible by specifying head and tail ptr, plus objects
 * count (cnt). Bulk free indicated by tail pointer being set.
 */
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
				      void *head, void *tail, int cnt,
				      unsigned long addr)
{
	void *tail_obj = tail ? : head;
	struct kmem_cache_cpu *c;
	unsigned long tid;

	slab_free_freelist_hook(s, head, tail);//和debug相关

redo:
	/*
	 * Determine the currently cpus per cpu slab.
	 * The cpu may change afterward. However that does not matter since
	 * data is retrieved via this pointer. If we are on the same cpu
	 * during the cmpxchg then the free will succedd.
	 */
	preempt_disable();
	c = __this_cpu_ptr(s->cpu_slab);//获得当前的kmem_cache_cpu

	tid = c->tid;//当前kmem_cache_cpu中的transcation id
	preempt_enable();
	//最简单的释放情形1:此对象x的slab正好处于当前cpu的缓存中
	if (likely(page == c->page)) {//如果相等则表示当前slub被当前cpu cache了
		set_freepointer(s, tail_obj, c->freelist);//相当于把object->next=c->freelist

		if (unlikely(!this_cpu_cmpxchg_double(//将当前cpu的下个空闲对象设置为释放的对象x
				s->cpu_slab->freelist, s->cpu_slab->tid,
				c->freelist, tid,
				head, next_tid(tid)))) {

			note_cmpxchg_failure("slab_free", s, tid);
			goto redo;
		}
		stat(s, FREE_FASTPATH);
	} else//不在当前cpu缓存中,则需要释放到当前cpu的部分空slab中或者node的部分空slab中
		__slab_free(s, page, head, tail_obj, cnt, addr);

}

3.__slab_free函数

/*
 * Slow patch handling. This may still be called frequently since objects
 * have a longer lifetime than the cpu slabs in most processing loads.
 *
 * So we still attempt to reduce cache line usage. Just take the slab
 * lock and free the item. If there is no additional partial page
 * handling required then we can return immediately.
 */
static void __slab_free(struct kmem_cache *s, struct page *page,
			void *head, void *tail, int cnt,
			unsigned long addr)

{
	void *prior;
	int was_frozen;
	struct page new;
	unsigned long counters;
	struct kmem_cache_node *n = NULL;
	unsigned long uninitialized_var(flags);

	stat(s, FREE_SLOWPATH);

	if (kmem_cache_debug(s) &&
	    !(n = free_debug_processing(s, page, head, tail, cnt,
					addr, &flags)))
		return;

	do {
		if (unlikely(n)) {
			spin_unlock_irqrestore(&n->list_lock, flags);
			n = NULL;
		}
		prior = page->freelist;//获取释放对象所在slab的第一个空闲对象
		counters = page->counters;
		set_freepointer(s, tail, prior);
		// 此处为什么不直接new=*page,是由于page这个结构体比较大,直接整个结构体赋值肯定比赋值counter和freelist加起来为double word的开销大。
		new.counters = counters; //counters在struct page中是在union中, 所以counters一赋值给new,就相当于把inuse,object,frozen都赋值给new了
		was_frozen = new.frozen;//frozen==1表示该slab被某个cpu cache住了
		new.inuse -= cnt;//inuse表示该slab中已分配出去的对象个数
/* 1)!new.inuse || !prior为真的情况是原来slab只有一个对象被分配出去,那现在这个对象被释放则slab中所有的object都是free的,或者原来slab->freelist为NULL,说明原来slab在kmem_cache_node的full链里。
   2)此时,如果!was_frozen也为真(即没有cpu cache住这个slab),那么就需要去操作这个kmem_cache_node的partial或full链,则执行get_node并对node加锁
   3)如果此时!was_frozen为假(即有cpu cache住这个slab,此处有可能是我本身这个cpu,因为从slab_free开始执行操作并没有对page加锁,在slab_free函数中先判断page==c->page只是减少这个可能性而已),那就直接把object加到page->freelist中就可以了。
   4)如果!new.inuse || !prior为假,那就不用去操作kmem_cache_node中的链,就直接把object加到page->freelist中就可以了。*/
		if ((!new.inuse || !prior) && !was_frozen) {

			if (!kmem_cache_debug(s) && !prior)

				/*
				 * Slab was on no list before and will be partially empty
				 * We can defer the list move and instead freeze it.
				 */
				new.frozen = 1;

			else { /* Needs to be taken off a list */

	                        n = get_node(s, page_to_nid(page));
				/*
				 * Speculatively acquire the list_lock.
				 * If the cmpxchg does not succeed then we may
				 * drop the list_lock without any processing.
				 *
				 * Otherwise the list_lock will synchronize with
				 * other processors updating the list of slabs.
				 */
				spin_lock_irqsave(&n->list_lock, flags);

			}
		}

	} while (!cmpxchg_double_slab(s, page,
		prior, counters,
		head, new.counters,
		"__slab_free"));

	if (likely(!n)) {

		/*
		 * If we just froze the page then put it onto the
		 * per cpu partial list.
		 */
		if (new.frozen && !was_frozen) {
			put_cpu_partial(s, page, 1);
			stat(s, CPU_PARTIAL_FREE);
		}
		/*
		 * The list lock was not taken therefore no list
		 * activity can be necessary.
		 */
                if (was_frozen)
                        stat(s, FREE_FROZEN);
                return;
        }
/*如果释放掉这个对象后,slab中的object均为空闲的,那么就要考虑知否释放掉这个slab,因为kmem_cache_node中没有free链只有partial和full链。
最终是否真正释放的条件是当前node的partial链中的slab个数比最小阈值大,那么就goto slab_empty./*
	if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
		goto slab_empty;

	/*
	 * Objects left in the slab. If it was not on the partial list before
	 * then add it.
	 */
//如果prior为NULL,则表示在释放之前该slab没有空闲的object了,即在full 链中
	if (kmem_cache_debug(s) && unlikely(!prior)) {
		remove_full(s, page);
		add_partial(n, page, DEACTIVATE_TO_TAIL);
		stat(s, FREE_ADD_PARTIAL);
	}
	spin_unlock_irqrestore(&n->list_lock, flags);//对kmem_cache_node的操作结束,则释放node锁
	return;

slab_empty:
	if (prior) {
		/*
		 * Slab on the partial list.
		 */
		remove_partial(n, page);
		stat(s, FREE_REMOVE_PARTIAL);
	} else
		/* Slab must be on the full list */
	/*这种情况下就是原来那个slab只有一个object,那这个object被分配出去后就在full链里,现在要释放这个object就得把这个slab从full链上摘除*/
		remove_full(s, page);

	spin_unlock_irqrestore(&n->list_lock, flags);
	stat(s, FREE_SLAB);
	discard_slab(s, page);
}

discard_slab函数:

static void discard_slab(struct kmem_cache *s, struct page *page)
{
	//减少对应kmem_cache_node上的slab和total_object个数
	dec_slabs_node(s, page_to_nid(page), page->objects);
	free_slab(s, page);
}

dec_slabs_node函数:

static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
	struct kmem_cache_node *n = get_node(s, node);
	atomic_long_dec(&n->nr_slabs);
	atomic_long_sub(objects, &n->total_objects);
}

free_slab函数:

static void free_slab(struct kmem_cache *s, struct page *page)
{
	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
		struct rcu_head *head;

		if (need_reserve_slab_rcu) {
			int order = compound_order(page);
			int offset = (PAGE_SIZE << order) - s->reserved;
			VM_BUG_ON(s->reserved != sizeof(*head));
			head = page_address(page) + offset;
		} else {
			head = (void *)&page->lru;
		}
		call_rcu(head, rcu_free_slab);
	} else
		__free_slab(s, page);
}

__free_slab函数:

static void __free_slab(struct kmem_cache *s, struct page *page)
{
	int order = compound_order(page);//从该slab的首个page结构体中获得该slab的页的个数的order信息
	int pages = 1 << order;//获得页个数

	if (kmem_cache_debug(s)) {
		void *p;
		slab_pad_check(s, page);
		for_each_object(p, s, page_address(page),
						page->objects)
			check_object(s, page, p, SLUB_RED_INACTIVE);
	}

	kmemcheck_free_shadow(page, compound_order(page));
	mod_zone_page_state(page_zone(page),
		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
		-pages);

	__ClearPageSlab(page);
	reset_page_mapcount(page);
	if (current->reclaim_state)
		current->reclaim_state->reclaimed_slab += pages;
	__free_pages(page, order);//把物理页面真正释放掉
}