【内核源码学习笔记】slab分配器(3)建立slab描述符

4.建立slab描述符

struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
		slab_flags_t flags, void (*ctor)(void *))
{
	return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
					  ctor);
}

首先会先查找是否有已经建立的描述符能够直接使用node

struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
		   slab_flags_t flags, void (*ctor)(void *))
{
	struct kmem_cache *cachep;

	cachep = find_mergeable(size, align, flags, name, ctor);
	if (cachep) {
		cachep->refcount++;

		/*调整对象大小,以便咱们清除kzalloc上的完整对象。*/
		cachep->object_size = max_t(int, cachep->object_size, size);
	}
	return cachep;
}


struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
		slab_flags_t flags, const char *name, void (*ctor)(void *))
{
	struct kmem_cache *s;

	if (slab_nomerge)
		return NULL;

	if (ctor)
		return NULL;

	size = ALIGN(size, sizeof(void *));
	align = calculate_alignment(flags, align, size);
	size = ALIGN(size, align);
	flags = kmem_cache_flags(size, flags, name, NULL);

	if (flags & SLAB_NEVER_MERGE)
		return NULL;

	list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) { //遍历slab_root_caches中的节点,找到size合适的
		if (slab_unmergeable(s))
			continue;

		if (size > s->size)
			continue;

		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
			continue;
		/*
		 * Check if alignment is compatible.
		 * Courtesy of Adrian Drzewiecki
		 */
		if ((s->size & ~(align - 1)) != s->size)
			continue;

		if (s->size - size >= sizeof(void *))
			continue;

		if (IS_ENABLED(CONFIG_SLAB) && align &&
			(align > s->align || s->align % align))
			continue;

		return s;
	}
	return NULL;
}

若是没有找到就会调用create_cache函数建立新的kmem_cache。数组

static struct kmem_cache *create_cache(const char *name,
		unsigned int object_size, unsigned int align,
		slab_flags_t flags, unsigned int useroffset,
		unsigned int usersize, void (*ctor)(void *),
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
	struct kmem_cache *s;
	int err;

	if (WARN_ON(useroffset + usersize > object_size))
		useroffset = usersize = 0;

	err = -ENOMEM;
	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);//分配一个kmem_cache数据结构
	if (!s)
		goto out;

	//将name/size/align等参数写到成员中,
	s->name = name;
	s->size = s->object_size = object_size;
	s->align = align;
	s->ctor = ctor;
	s->useroffset = useroffset;
	s->usersize = usersize;

	err = init_memcg_params(s, root_cache);
	if (err)
		goto out_free_cache;

	//建立slab缓冲区
	err = __kmem_cache_create(s, flags);
	if (err)
		goto out_free_cache;

	s->refcount = 1;
	//将新的缓冲区加入到全局链表slab_caches中
	list_add(&s->list, &slab_caches);
	memcg_link_cache(s, memcg);
out:
	if (err)
		return ERR_PTR(err);
	return s;

out_free_cache:
	destroy_memcg_params(s);
	kmem_cache_free(kmem_cache, s);
	goto out;
}

这里会先调用kmem_cache_zalloc申请一个kmem_cache数据结构,而后调用__kmem_cache_create()建立缓冲区,最后将缓冲区s->list加入到全局链表slab_caches中缓存

int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
{
	size_t ralign = BYTES_PER_WORD;
	gfp_t gfp;
	int err;
	unsigned int size = cachep->size;

	/*
检查大小是否以字为单位。为了不在使用Redzoning时某些拱门未对齐的访问,而且确保全部slab上的bufctl也正确对齐,须要这样作。
	 */
	size = ALIGN(size, BYTES_PER_WORD);//检查size与系统的的word长度对齐

	if (flags & SLAB_RED_ZONE) {
		ralign = REDZONE_ALIGN;
		/* If redzoning, ensure that the second redzone is suitably
		 * aligned, by adjusting the object size accordingly. */
		size = ALIGN(size, REDZONE_ALIGN);
	}

	/* 3) caller mandated alignment */
	if (ralign < cachep->align) { //计算align对齐的大小
		ralign = cachep->align;
	}
	/* disable debug if necessary */
	if (ralign > __alignof__(unsigned long long))
		flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
	/*
	 * 4) Store it.
	 */
	cachep->align = ralign;
	cachep->colour_off = cache_line_size(); //计算L1 cache行的大小
	/* Offset must be a multiple of the alignment. */
	if (cachep->colour_off < cachep->align)
		cachep->colour_off = cachep->align;

	if (slab_is_available())
		gfp = GFP_KERNEL; //分配掩码
	else
		gfp = GFP_NOWAIT;


	kasan_cache_create(cachep, &size, &flags);

	size = ALIGN(size, cachep->align);//根据size 与align的对齐关系,计算出size的大小
	/*
	 * We should restrict the number of objects in a slab to implement
	 * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
	 */
	if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
		size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);


	if (set_objfreelist_slab_cache(cachep, size, flags)) {
		flags |= CFLGS_OBJFREELIST_SLAB;
		goto done;
	}

	if (set_off_slab_cache(cachep, size, flags)) {
		flags |= CFLGS_OFF_SLAB;
		goto done;
	}

	if (set_on_slab_cache(cachep, size, flags))
		goto done;

	return -E2BIG;

done:
	cachep->freelist_size = cachep->num * sizeof(freelist_idx_t); //freelist index占用空间的大小
	cachep->flags = flags;
	cachep->allocflags = __GFP_COMP;
	if (flags & SLAB_CACHE_DMA)
		cachep->allocflags |= GFP_DMA;
	if (flags & SLAB_CACHE_DMA32)
		cachep->allocflags |= GFP_DMA32;
	if (flags & SLAB_RECLAIM_ACCOUNT)
		cachep->allocflags |= __GFP_RECLAIMABLE;
	cachep->size = size;
	cachep->reciprocal_buffer_size = reciprocal_value(size);


	if (OFF_SLAB(cachep)) {
		cachep->freelist_cache =
			kmalloc_slab(cachep->freelist_size, 0u);
	}

	err = setup_cpu_cache(cachep, gfp); //配置slab描述符
	if (err) {
		__kmem_cache_release(cachep);
		return err;
	}

	return 0;
}

这里咱们先拿到内存的大小size,检查是否与系统的WORD长度对齐。设置kmem_cache的colour为第1行缓存的大小。网络

下面咱们会先调用slab_objfreelist_slab_cache函数数据结构

static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
			size_t size, slab_flags_t flags)
{
	size_t left;

	cachep->num = 0;

	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
		return false;

	left = calculate_slab_order(cachep, size,
			flags | CFLGS_OBJFREELIST_SLAB);
	if (!cachep->num)
		return false;

	if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)
		return false;

	cachep->colour = left / cachep->colour_off;

	return true;
}

在这个函数中,咱们先计算slab的order和left空间。kmem_cache的着色区为left/colour_offdom

static size_t calculate_slab_order(struct kmem_cache *cachep,
				size_t size, slab_flags_t flags)
{
	size_t left_over = 0;
	int gfporder;

	for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {//从0开始计算最合适的gpforder值, 2^22
		unsigned int num;
		size_t remainder;

		num = cache_estimate(gfporder, size, flags, &remainder); //计算在2^gfporder个页面大小时,能够容纳多少个obj对象,剩下的用来cache colour
		if (!num)
			continue;

		/* 没法处理超过SLAB_OBJ_MAX_NUM个对象 */
		if (num > SLAB_OBJ_MAX_NUM)
			break;

		if (flags & CFLGS_OFF_SLAB) {
			struct kmem_cache *freelist_cache;
			size_t freelist_size;

			freelist_size = num * sizeof(freelist_idx_t);
			freelist_cache = kmalloc_slab(freelist_size, 0u);
			if (!freelist_cache)
				continue;

			/*须要避免在cache_grow_begin()中可能出现的循环条件*/
			if (OFF_SLAB(freelist_cache))
				continue;

			/* check if off slab has enough benefit */
			if (freelist_cache->size > cachep->size / 2)
				continue;
		}

		/* Found something acceptable - save it away */
		cachep->num = num;
		cachep->gfporder = gfporder;
		left_over = remainder;

		/*可回收VFS的平板一般具备GFP_NOFS的大部分分配,当咱们没法缩小dcac时,咱们真的不想分配高阶页面*/
		if (flags & SLAB_RECLAIM_ACCOUNT)
			break;

		/*大量的对象是好的,可是对于gfp()来讲,很是大的slab目前是不利的。*/
		if (gfporder >= slab_max_order)
			break;

		/*可接受的内部碎片?*/
		if (left_over * 8 <= (PAGE_SIZE << gfporder))
			break;
	}
	return left_over;
}

计算slab的order时,是从0开始尝试,一直到gfporder的最大值。针对每个order值,先估算在当前的2^order数量个页面中,能够容纳多少个对象。个数要大于SLAB能够容纳的最大值。这里咱们的flag应该不会进入CFLGS_OFF_SLAB分支。那么就设置kmem_cache的个数与gfporder。剩余空间若是小于页面的1/8,那么这个内碎片也是能够接受的。函数

计算申请页面能够放多少个object须要调用cache_estimate函数ui

static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size,
		slab_flags_t flags, size_t *left_over)
{
	unsigned int num;
	size_t slab_size = PAGE_SIZE << gfporder;

slab管理结构能够在slab外,也能够在slab内。
若是在slab内,则为slab分配的内存用于:每一个对象的buffer_size字节,每一个对象的freelist。不须要考虑freelist的对齐,由于freelist会放在在slab页面的末尾。每一个对象将处于正确的对齐状态。
若是在slab外,则对齐须要的大小将已经计算到尺寸中。由于slab都是页面对齐的,因此对象在分配时将处于正确的对齐状态。
	if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) {
		num = slab_size / buffer_size;
		*left_over = slab_size % buffer_size;
	} else {
		num = slab_size / (buffer_size + sizeof(freelist_idx_t));
		*left_over = slab_size %
			(buffer_size + sizeof(freelist_idx_t));
	}

	return num;
}

若是上面的过程执行失败,会调用set_off_slab_cache函数,申请slab结构在slab外的缓冲区。调用流程与set_objfreelist_slab_cache相似。set_objfreelist_slab_cache会尝试把freelist放在slab外面,若是一个object放不下freelist index,就表示这样作不太合适,须要选择其余的kmem_cache.set_objfreelist_slab_cache若是执行失败,会调用set_off_slab_cache,这个是会把freelist_index放在slab外面,这里会先找合适的kmem_cache,若是找不到就算是失败了。若是找到了,就判断当前的剩余空间能不能放下一个freelist,若是放不下,就将freelist放在slab外面,若是能放下,就把freelist放在slab里面。若是都不行,就会把freelist放在slab内部。这个函数中不会判断freelist index与object的大小。spa

最后会进入done标签中。kmem_cache中的freelist大小为对象个数*index。若是管理结构是在slab以外,那么会给freelist_cache单独申请一块内存,用来放free_list。debug

最后调用setup_cpu_cache函数配置slab描述符。

static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
{
	if (slab_state >= FULL) //状态为FULL时,表示slab机制已经初始化完成
		return enable_cpucache(cachep, gfp);

	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1); 
	if (!cachep->cpu_cache)
		return 1;

	if (slab_state == DOWN) {
		/* Creation of first cache (kmem_cache). */
		set_up_node(kmem_cache, CACHE_CACHE);
	} else if (slab_state == PARTIAL) {
		/* For kmem_cache_node */
		set_up_node(cachep, SIZE_NODE);
	} else {
		int node;

		for_each_online_node(node) {
			cachep->node[node] = kmalloc_node(
				sizeof(struct kmem_cache_node), gfp, node);
			BUG_ON(!cachep->node[node]);
			kmem_cache_node_init(cachep->node[node]);
		}
	}

	cachep->node[numa_mem_id()]->next_reap =
			jiffies + REAPTIMEOUT_NODE +
			((unsigned long)cachep) % REAPTIMEOUT_NODE;

	cpu_cache_get(cachep)->avail = 0;
	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
	cpu_cache_get(cachep)->batchcount = 1;
	cpu_cache_get(cachep)->touched = 0;
	cachep->batchcount = 1;
	cachep->limit = BOOT_CPUCACHE_ENTRIES;
	return 0;
}

若是此时slab_state的状态为FULL,表示slab机制已经初始化完成了。调用enable_cpucache函数,使能cpu_cache。若是状态是PATRIAL_NODE或UP,会遍历全部的节点,申请kmem_cache_node结构,写到node节点中。并调用kmem_cache_node_init对这个节点进行初始化

/* Called with slab_mutex held always */
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
	int err;
	int limit = 0;
	int shared = 0;
	int batchcount = 0;

	err = cache_random_seq_create(cachep, cachep->num, gfp);
	if (err)
		goto end;

	if (!is_root_cache(cachep)) {
		struct kmem_cache *root = memcg_root_cache(cachep);
		limit = root->limit;
		shared = root->shared;
		batchcount = root->batchcount;
	}

	if (limit && shared && batchcount)
		goto skip_setup;
	/*
头阵列用于三个目的:
-建立LIFO排序,即返回高速缓存的对象
 -减小自旋锁操做的次数。
 -减小slab和bufctl链上的链表操做数:数组操做更便宜。
 猜中了数字,咱们应该按照Bonwick的描述进行自动调谐。
	 */
	//根据对象的大小来计算空闲对象的最大阈值limit,limit默认选择120
	if (cachep->size > 131072)
		limit = 1;
	else if (cachep->size > PAGE_SIZE)
		limit = 8;
	else if (cachep->size > 1024)
		limit = 24;
	else if (cachep->size > 256)
		limit = 54;
	else
		limit = 120;

	/*
CPU限制的任务(例如网络路由)可能表现出cpu限制的分配行为:一个cpu上的大多数分配,另外一个cpu上的大多数空闲操做。对于这些状况,必须在cpus之间传递有效的对象。这是由共享阵列提供的。该阵列替代Bonwick的弹匣层。在单处理器上,它在功能上等效于(但效率较低)更大的限制。所以默认状况下处于禁用状态。
	 */
	shared = 0;
	//若是slab对象须要小于一个页面,shared设为8
	if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
		shared = 8;

	batchcount = (limit + 1) / 2;
skip_setup:
	//计算batchcount数目(用于本地缓冲池和共享缓冲池之间填充对象的数量)
	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
end:
	if (err)
		pr_err("enable_cpucache failed for %s, error %d\n",
		       cachep->name, -err);
	return err;
}

在enable_cpucache函数中。会根据对象的大小来计算空闲对象的最大阈值。设置shared,batchcount大小,而后调用do_tune_cpucache

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
				int batchcount, int shared, gfp_t gfp)
{
	int ret;
	struct kmem_cache *c;

	//配置slab描述符
	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);

	if (slab_state < FULL)
		return ret;

	if ((ret < 0) || !is_root_cache(cachep))
		return ret;

	lockdep_assert_held(&slab_mutex);
	for_each_memcg_cache(c, cachep) {
		/* return value determined by the root cache only */
		__do_tune_cpucache(c, limit, batchcount, shared, gfp);
	}

	return ret;
}

在这个函数中,首先会调用__do_tune_cpucache来配置slab描述符,若是slab状态是FULL。

/*始终在保持slab_mutex的状况下调用 */
static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
				int batchcount, int shared, gfp_t gfp)
{
	struct array_cache __percpu *cpu_cache, *prev;
	int cpu;

	//分配per-CPU类型的struct array_cache数据结构(对象缓冲池)
	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
	if (!cpu_cache)
		return -ENOMEM;

	prev = cachep->cpu_cache;
	cachep->cpu_cache = cpu_cache;
	/*
若是没有先前的cpu_cache,则无需同步远程cpus,所以跳过IPI。
	 */
	if (prev)
		kick_all_cpus_sync();

	check_irq_on();
	cachep->batchcount = batchcount;
	cachep->limit = limit;
	cachep->shared = shared;

	if (!prev)
		goto setup_node;

	for_each_online_cpu(cpu) {
		LIST_HEAD(list);
		int node;
		struct kmem_cache_node *n;
		struct array_cache *ac = per_cpu_ptr(prev, cpu);

		node = cpu_to_mem(cpu);
		n = get_node(cachep, node);
		spin_lock_irq(&n->list_lock);
		free_block(cachep, ac->entry, ac->avail, node, &list);
		spin_unlock_irq(&n->list_lock);
		slabs_destroy(cachep, &list);
	}
	free_percpu(prev);

setup_node:
	//初始化slab缓冲区cachep->kmem_cache_node数据结构
	return setup_kmem_cache_nodes(cachep, gfp);
}

在__do_tune_cpucache函数中,会先调用alloc_kmem_cache_cpus申请cpu_cache.这里还会设置kmem_cache中的limit,shared,batchcount等值。而后遍历每个在线的CPU,读取他的array_cache。再获取nodeID ,找到他的kmem_node,而后调用free_block函数,释放里面的对象。最后调用setup_kmem_cache_node函数初始化kmem_cache_node缓冲区

static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
{
	int ret;
	int node;
	struct kmem_cache_node *n;

	for_each_online_node(node) { 
		//遍历全部的numa节点
		ret = setup_kmem_cache_node(cachep, node, gfp, true);
		if (ret)
			goto fail;

	}

	return 0;

fail:
	if (!cachep->list.next) {
		/* Cache is not active yet. Roll back what we did */
		node--;
		while (node >= 0) {
			n = get_node(cachep, node);
			if (n) {
				kfree(n->shared);
				free_alien_cache(n->alien);
				kfree(n);
				cachep->node[node] = NULL;
			}
			node--;
		}
	}
	return -ENOMEM;
}

static int setup_kmem_cache_node(struct kmem_cache *cachep,
				int node, gfp_t gfp, bool force_change)
{
	int ret = -ENOMEM;
	struct kmem_cache_node *n; //slab节点
	struct array_cache *old_shared = NULL;
	struct array_cache *new_shared = NULL;
	struct alien_cache **new_alien = NULL;
	LIST_HEAD(list);

	if (use_alien_caches) {
		new_alien = alloc_alien_cache(node, cachep->limit, gfp);
		if (!new_alien)
			goto fail;
	}

	//多核系统中shared可能大于0,
	if (cachep->shared) {
		//分配一个共享对象缓冲池,多核CPU之间共享空闲缓存对象
		new_shared = alloc_arraycache(node,
			cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
		if (!new_shared)
			goto fail;
	}

	ret = init_cache_node(cachep, node, gfp);
	if (ret)
		goto fail;

	n = get_node(cachep, node);
	spin_lock_irq(&n->list_lock);
	if (n->shared && force_change) {
		free_block(cachep, n->shared->entry,
				n->shared->avail, node, &list);
		n->shared->avail = 0;
	}

	if (!n->shared || force_change) {
		old_shared = n->shared;
		n->shared = new_shared;
		new_shared = NULL;
	}

	if (!n->alien) {
		n->alien = new_alien;
		new_alien = NULL;
	}

	spin_unlock_irq(&n->list_lock);
	slabs_destroy(cachep, &list);

	/*为了保护在禁用irq的状况下对n-> shared的无锁访问。若是在禁用irq的上下文中n-> shared不为NULL,则能够保证在从新启用irq以前对其进行访问都是有效的,由于它将在syncnize_rcu()以后释放。*/
	if (old_shared && force_change)
		synchronize_rcu();

fail:
	kfree(old_shared);
	kfree(new_shared);
	free_alien_cache(new_alien);

	return ret;
}

在这里会遍历全部的numa节点,调用setup_mem_cache_node函数进行初始化。若是这个kmem_cache中须要配置共享缓冲池,就身亲一个array_cache结构。

static struct array_cache *alloc_arraycache(int node, int entries,
					    int batchcount, gfp_t gfp)
{
	size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
	struct array_cache *ac = NULL;

	ac = kmalloc_node(memsize, gfp, node);
	/*
array_cache结构包含指向空闲对象的指针。可是,当将此类对象分配或转移到另外一个缓存时,不会清除指针,而且在kmemleak扫描期间能够将它们视为有效引用。所以,kmemleak不得扫描此类对象。
	 */
	kmemleak_no_scan(ac);
	init_arraycache(ac, entries, batchcount);
	return ac;
}

static void init_arraycache(struct array_cache *ac, int limit, int batch)
{
	if (ac) {
		ac->avail = 0;
		ac->limit = limit;
		ac->batchcount = batch;
		ac->touched = 0;
	}
}

申请的大小是指定的entrys大小加上array_cache自己的大小。申请了以后会将各个成员初始化。

static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
{
	struct kmem_cache_node *n;

	/*
在开始任何事情以前,请为cpu设置kmem_cache_node。确保此节点上的其余CPU还没有分配此CPU
	 */
	n = get_node(cachep, node);
	if (n) {
		spin_lock_irq(&n->list_lock);
		n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
				cachep->num;
		spin_unlock_irq(&n->list_lock);

		return 0;
	}

	n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
	if (!n)
		return -ENOMEM;

	kmem_cache_node_init(n);
	n->next_reap = jiffies + REAPTIMEOUT_NODE +
		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;

	n->free_limit =
		(1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;

	/*kmem_cache_nodes不会随CPU来来去去。 slab_mutex在这里是足够的保护。*/
	cachep->node[node] = n;

	return 0;
}

static void kmem_cache_node_init(struct kmem_cache_node *parent)
{
	INIT_LIST_HEAD(&parent->slabs_full);
	INIT_LIST_HEAD(&parent->slabs_partial);
	INIT_LIST_HEAD(&parent->slabs_free);
	parent->total_slabs = 0;
	parent->free_slabs = 0;
	parent->shared = NULL;
	parent->alien = NULL;
	parent->colour_next = 0;
	spin_lock_init(&parent->list_lock);
	parent->free_objects = 0;
	parent->free_touched = 0;
}

而后会调用init_cache_node函数,若是这个node已经存在的话,找到这个nodeid 对应的kmeme_cache_node节点,设置它的free_limit值。若是不存在,就新申请一个node,把它填入kmem_cache的node数组中。

如今咱们保证了能够拿到node节点,会释放掉这个kmem_cache的共享缓冲池。以上就完成了slab的初始化。

 

kmem_cache的销毁是调用kmem_cache_destory函数

void kmem_cache_destroy(struct kmem_cache *s)
{
	int err;

	if (unlikely(!s)) //若是kmem_cache为空就直接退出
		return;

	get_online_cpus();//与put_online_cpus配合使用
	get_online_mems();

	mutex_lock(&slab_mutex);

	s->refcount--; //缓存的应用计数减1.
	if (s->refcount)//若是计数不为0 ,表示还有其余人在使用,则直接退出
		goto out_unlock;

#ifdef CONFIG_MEMCG_KMEM
	memcg_set_kmem_cache_dying(s);

	mutex_unlock(&slab_mutex);

	put_online_mems();
	put_online_cpus();

	flush_memcg_workqueue(s);

	get_online_cpus();
	get_online_mems();

	mutex_lock(&slab_mutex);
#endif
	//引用计数已是0了,就西安晓辉memcg,成功的话继续调用shutdown_cache销毁缓存
	err = shutdown_memcg_caches(s);
	if (!err)
		err = shutdown_cache(s);

	if (err) {
		pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
		       s->name);
		dump_stack();
	}
out_unlock:
	mutex_unlock(&slab_mutex);

	put_online_mems();
	put_online_cpus();
}

释放缓存

static int shutdown_cache(struct kmem_cache *s)
{
	/* free asan quarantined objects */
	kasan_cache_shutdown(s);

	//释放全部被slab占用的资源
	if (__kmem_cache_shutdown(s) != 0)
		return -EBUSY;

	memcg_unlink_cache(s);
	//删除list
	list_del(&s->list);

	if (s->flags & SLAB_TYPESAFE_BY_RCU) {
#ifdef SLAB_SUPPORTS_SYSFS
		sysfs_slab_unlink(s);
#endif
		//若是有 rcu的话,就由slab_caches_to_rcu_destroy_work来释放
		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
		schedule_work(&slab_caches_to_rcu_destroy_work);
	} else {
#ifdef SLAB_SUPPORTS_SYSFS
		sysfs_slab_unlink(s);
		sysfs_slab_release(s);
#else
		//释放缓存
		slab_kmem_cache_release(s);
#endif
	}

	return 0;
}


void slab_kmem_cache_release(struct kmem_cache *s)
{
	__kmem_cache_release(s);
	destroy_memcg_params(s);
	kfree_const(s->name);
	kmem_cache_free(kmem_cache, s);//释放缓存对象
}


void __kmem_cache_release(struct kmem_cache *cachep)
{
	int i;
	struct kmem_cache_node *n;

	cache_random_seq_destroy(cachep);

	free_percpu(cachep->cpu_cache); //释放cpu_cache

	/* NUMA: free the node structures */
	for_each_kmem_cache_node(cachep, i, n) {
		kfree(n->shared); //释放共享缓冲池
		free_alien_cache(n->alien);
		kfree(n);//释放kmem_node节点
		cachep->node[i] = NULL;
	}
}
相关文章
相关标签/搜索