5.分配slab对象

void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	void *ret = slab_alloc(cachep, flags, _RET_IP_);

	trace_kmem_cache_alloc(_RET_IP_, ret,
			       cachep->object_size, cachep->size, flags);

	return ret;
}

slab对象的分配是从这个函数开始的，他的核心函数是salb_allocnode

static __always_inline void *
slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
{
	unsigned long save_flags;
	void *objp;

	flags &= gfp_allowed_mask;
	cachep = slab_pre_alloc_hook(cachep, flags); ???
	if (unlikely(!cachep))
		return NULL;

	cache_alloc_debugcheck_before(cachep, flags);
	local_irq_save(save_flags);
	objp = __do_cache_alloc(cachep, flags);
	local_irq_restore(save_flags);
	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
	prefetchw(objp);

	if (unlikely(flags & __GFP_ZERO) && objp)
		memset(objp, 0, cachep->object_size);

	slab_post_alloc_hook(cachep, flags, 1, &objp);
	return objp;
}

static __always_inline void *
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	return ____cache_alloc(cachep, flags);
}

这个函数中的核心函数就是__do_cache_alloc->____cache_alloc。调用以前会先关闭本地中断。数组

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	void *objp;
	struct array_cache *ac;

	check_irq_off();

	ac = cpu_cache_get(cachep); //获取本地缓冲池array_cache
	if (likely(ac->avail)) { //判断本地缓冲池内有没有空闲对象
		ac->touched = 1;
		objp = ac->entry[--ac->avail]; //分配一个空闲对象

		STATS_INC_ALLOCHIT(cachep);
		goto out;
	}

	STATS_INC_ALLOCMISS(cachep);
	objp = cache_alloc_refill(cachep, flags);//若是空闲对象为0，到这里申请新的对象
	/*“ ac”能够由cache_alloc_refill（）更新，而kmemleak_erase（）须要正确的值。*/
	ac = cpu_cache_get(cachep);

out:
	/*为了不出现假阴性，若是每一个CPU缓存之一中的对象泄漏，咱们须要确保kmemleak不会将数组指针视为对该对象的引用。*/
	if (objp)
		kmemleak_erase(&ac->entry[ac->avail]);
	return objp;
}

若是没有可用的对象时，会调用cache_alloc_refill来申请新的内存缓存

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
	int batchcount;
	struct kmem_cache_node *n;
	struct array_cache *ac, *shared;
	int node;
	void *list = NULL;
	struct page *page;

	check_irq_off();
	node = numa_mem_id();

	ac = cpu_cache_get(cachep); //获取本地对线个缓冲池
	batchcount = ac->batchcount;
	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
		/*若是此高速缓存上的近期活动不多，则仅执行部分从新填充。不然，咱们可能会产生笔芯弹跳。
		 */
		batchcount = BATCHREFILL_LIMIT;
	}
	n = get_node(cachep, node); //从node数组中获取slab节点

	BUG_ON(ac->avail > 0 || !n);
	shared = READ_ONCE(n->shared);//获取共享缓冲池
	if (!n->free_objects && (!shared || !shared->avail))
		goto direct_grow;

	spin_lock(&n->list_lock);
	shared = READ_ONCE(n->shared);

	/* 看看若是咱们能够从共享数组中从新填充 */
	if (shared && transfer_objects(ac, shared, batchcount)) {//若是有空闲的对象，就迁移batchcount个对象到本地缓冲池中
		shared->touched = 1;
		goto alloc_done;
	}

	while (batchcount > 0) {
		/* Get slab alloc is to come from. */
		page = get_first_slab(n, false);
		if (!page)
			goto must_grow;

		check_spinlock_acquired(cachep);

		batchcount = alloc_block(cachep, ac, page, batchcount);
		fixup_slab_list(cachep, n, page, &list);
	}

must_grow:
	n->free_objects -= ac->avail;
alloc_done:
	spin_unlock(&n->list_lock);
	fixup_objfreelist_debug(cachep, &list);

direct_grow:
	if (unlikely(!ac->avail)) {
		/* Check if we can use obj in pfmemalloc slab */
		if (sk_memalloc_socks()) {
			void *obj = cache_alloc_pfmemalloc(cachep, n, flags);

			if (obj)
				return obj;
		}

		page = cache_grow_begin(cachep, gfp_exact_node(flags), node);

		/*cache_grow_begin（）能够从新启用中断，而后ac能够更改。*/
		ac = cpu_cache_get(cachep);
		if (!ac->avail && page)
			alloc_block(cachep, ac, page, batchcount);
		cache_grow_end(cachep, page);

		if (!ac->avail)
			return NULL;
	}
	ac->touched = 1;

	return ac->entry[--ac->avail];
}

若是共享缓冲池中没有空闲对象，会先查看slab_partial链表和slab_free函数

static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
	struct page *page;

	assert_spin_locked(&n->list_lock);
	page = list_first_entry_or_null(&n->slabs_partial, struct page, lru);
	if (!page) {
		n->free_touched = 1;
		page = list_first_entry_or_null(&n->slabs_free, struct page,
						lru);
		if (page)
			n->free_slabs--;
	}

	if (sk_memalloc_socks())
		page = get_valid_first_slab(n, page, pfmemalloc);

	return page;
}

申请分配新的slab布局

static struct page *cache_grow_begin(struct kmem_cache *cachep,
				gfp_t flags, int nodeid)
{
	void *freelist;
	size_t offset;
	gfp_t local_flags;
	int page_node;
	struct kmem_cache_node *n;
	struct page *page;

	/*
	懒一点，只在这里检查有效标志，不要把它放在kmem_cache_alloc()的关键路径中。
	 */
	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
		gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
		flags &= ~GFP_SLAB_BUG_MASK;
		pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
				invalid_mask, &invalid_mask, flags, &flags);
		dump_stack();
	}
	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

	check_irq_off();
	if (gfpflags_allow_blocking(local_flags))
		local_irq_enable();

	/*
	获取对象的mem。试图从“nodeid”分配物理页面。
	 */
	page = kmem_getpages(cachep, local_flags, nodeid);
	if (!page)
		goto failed;

	page_node = page_to_nid(page);
	n = get_node(cachep, page_node);

	/* 获取slab的colour，并计算下一个值。 */
	n->colour_next++;
	if (n->colour_next >= cachep->colour)
		n->colour_next = 0;

	offset = n->colour_next;
	if (offset >= cachep->colour)
		offset = 0;

	offset *= cachep->colour_off;

	/* Get slab management. */
    //计算slab中的cache colour和freelist，以及对象的地址布局 
	freelist = alloc_slabmgmt(cachep, page, offset,
			local_flags & ~GFP_CONSTRAINT_MASK, page_node);
	if (OFF_SLAB(cachep) && !freelist)
		goto opps1;

	slab_map_pages(cachep, page, freelist);

	kasan_poison_slab(page);
	cache_init_objs(cachep, page);

	if (gfpflags_allow_blocking(local_flags))
		local_irq_disable();

	return page;

opps1:
	kmem_freepages(cachep, page);
failed:
	if (gfpflags_allow_blocking(local_flags))
		local_irq_disable();
	return NULL;
}


static void *alloc_slabmgmt(struct kmem_cache *cachep,
				   struct page *page, int colour_off,
				   gfp_t local_flags, int nodeid)
{
	void *freelist;
	void *addr = page_address(page);

	page->s_mem = addr + colour_off;
	page->active = 0;

	if (OBJFREELIST_SLAB(cachep))
		freelist = NULL;
	else if (OFF_SLAB(cachep)) {
		/* Slab management obj is off-slab. */
		freelist = kmem_cache_alloc_node(cachep->freelist_cache,
					      local_flags, nodeid);
		if (!freelist)
			return NULL;
	} else {
		/* We will use last bytes at the slab for freelist */
		freelist = addr + (PAGE_SIZE << cachep->gfporder) -
				cachep->freelist_size;
	}

	return freelist;
}

freelist能够当作一个char型的数组，每一个对象占用一个元素来保存对象序号。s_mem就是第一个对象开始的地址。若是freelist不在slab上，就会从新申请一个free_list_cache大小的内存来放freelist。不然就取内存块的最后的freelist_size大小的内存用来存放freelist。post

static void slab_map_pages(struct kmem_cache *cache, struct page *page,
			   void *freelist)
{
	page->slab_cache = cache;
	page->freelist = freelist;
}

slab_map_pages函数会把freelist和kmem_cache赋给这个page.fetch

static void cache_init_objs(struct kmem_cache *cachep,
			    struct page *page)
{
	int i;
	void *objp;
	bool shuffled;

	cache_init_objs_debug(cachep, page);

	/* 若是启用，尝试随机化空闲列表 */
	shuffled = shuffle_freelist(cachep, page);

	if (!shuffled && OBJFREELIST_SLAB(cachep)) {
		page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
						obj_offset(cachep);
	}

	for (i = 0; i < cachep->num; i++) {
		objp = index_to_obj(cachep, page, i);
		kasan_init_slab_obj(cachep, objp);

		/* 构造函数可能会破坏毒物信息 */
		if (DEBUG == 0 && cachep->ctor) {
			kasan_unpoison_object_data(cachep, objp);
			cachep->ctor(objp);
			kasan_poison_object_data(cachep, objp);
		}

		if (!shuffled)
			set_free_obj(page, i, i);
	}
}

static inline void set_free_obj(struct page *page,
					unsigned int idx, freelist_idx_t val)
{
	((freelist_idx_t *)(page->freelist))[idx] = val;
}

接下来，在cache_init_obj函数中，page->free_list先指向了最后一个obj的位置。而后遍历全部的obj,获取这个obj的地址，并将这个obj的序号填入freelist中。ui

这时候，在cache_alloc_refill函数中再次去判断array_cache中是否有可用的对象，若是没有的话调用alloc_blockspa

static __always_inline int alloc_block(struct kmem_cache *cachep,
		struct array_cache *ac, struct page *page, int batchcount)
{
	/*必须至少有一个对象可供分配。*/
	BUG_ON(page->active >= cachep->num);

	while (page->active < cachep->num && batchcount--) {
		STATS_INC_ALLOCED(cachep);
		STATS_INC_ACTIVE(cachep);
		STATS_SET_HIGH(cachep);

		ac->entry[ac->avail++] = slab_get_obj(cachep, page);
	}

	return batchcount;
}

若是活跃的page中活跃的obj数量小于kmem_cache中的对象数量，就将page中的空闲对象写入共享对象缓冲池中。debug

static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
{
	struct kmem_cache_node *n;
	void *list = NULL;

	check_irq_off();

	if (!page)
		return;

	INIT_LIST_HEAD(&page->lru);
	n = get_node(cachep, page_to_nid(page));

	spin_lock(&n->list_lock);
	n->total_slabs++;
	if (!page->active) {
		list_add_tail(&page->lru, &(n->slabs_free));
		n->free_slabs++;
	} else
		fixup_slab_list(cachep, n, page, &list);

	STATS_INC_GROWN(cachep);
	n->free_objects += cachep->num - page->active;
	spin_unlock(&n->list_lock);

	fixup_objfreelist_debug(cachep, &list);
}

接下来执行cache_grow_end函数，先获取到这个页的node节点，该节点上的slab数量加1。若是page中的对象都是空闲的，就将page的lru链表加在node的slabs_free链表后面。若是已经有对象被申请过了，就执行fixup_slab_list。最后，node中的可用obj的数量要加上这个kmem_cache的总数减去已经在使用的。

static inline void fixup_slab_list(struct kmem_cache *cachep,
				struct kmem_cache_node *n, struct page *page,
				void **list)
{
	/* 将slabp移至正确的slabp列表： */
	list_del(&page->lru);
	if (page->active == cachep->num) {
		list_add(&page->lru, &n->slabs_full);
		if (OBJFREELIST_SLAB(cachep)) {
#if DEBUG
			/* Poisoning will be done without holding the lock */
			if (cachep->flags & SLAB_POISON) {
				void **objp = page->freelist;

				*objp = *list;
				*list = objp;
			}
#endif
			page->freelist = NULL;
		}
	} else
		list_add(&page->lru, &n->slabs_partial);
}

若是kmem_cache中的全部对象都在使用了，就把这page->lru添加到node的slabs_full队列中去，而后将page的freelist置为空。不然的话，就将page->lru加入到node的slabs_partial链表中去。

这时，在cache_alloc_refill函数最后就能够返回avail所指向的对象。就是可用的对象。

【内核源码学习笔记】slab分配器（4）分配slab对象

5.分配slab对象