cache_t的结构分析

时间 2020-01-09

标签 cache 结构分析繁體版

原文原文链接

一.探索前需知算法

上篇文章已经讨论过类的的结构是个结构体里面有 isa、superclass、cache_t、 class_data_bits_t bits，这样咱们可用经过x/p 4gx Class 看到类的内存地址信息，首地址(isa)偏移16位获得cache_t所在的指针地址.缓存

struct objc_class : objc_object {    // Class ISA;    Class superclass;    cache_t cache;             // formerly cache pointer and vtable    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags复制代码

二.cache_t 的初步了解bash

1.cache_t 的底层结构less

struct cache_t {    
struct bucket_t *_buckets;   
 mask_t _mask;   
 mask_t _occupied;
public:    struct bucket_t *buckets();    
mask_t mask();   
 mask_t occupied();    
void incrementOccupied();    
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);    
void initializeToEmpty();    
mask_t capacity();    
bool isConstantEmptyCache();    
bool canBeFreed();    
static size_t bytesForCapacity(uint32_t cap);    
static struct bucket_t * endMarker(struct bucket_t *b, uint32_t cap);    
void expand();    
void reallocate(mask_t oldCapacity, mask_t newCapacity);    
struct bucket_t * find(cache_key_t key, id receiver);    
static void bad_cache(id receiver, SEL sel, Class isa) __attribute__((noreturn));
};
复制代码

能够看到cache_t 是个结构体，里面有buckets(能够理解为整个类里面的缓存池),而缓存池里有许许多多的bucket，这bucket就是每一个方法的缓存，_mask 和_occupied这个咱们下面再说，public 里面是些关于整个缓存流程里的公共方法.函数

经过lldb 进行调试:ui

此时可能会有些人疑问，明明调了alloc和class的方法为何缓存里没有东西，其实若是看过上篇文章朋友就会知道这些方法是类进行调用的，存在元类的缓存区里.而断点打在sayHello上还没来得及缓存，下面咱们把断点打在sayCode上 .this

cache_t里面的 buckets缓存池里的的确确缓存了sayHello的方法.可是每次使用lldb进行调试确实比较麻烦，咱们能够换种方式进行调试.在下面先调用下三个对象方法以下：spa

typedef uint32_t mask_t;
typedef uintptr_t cache_key_t;
typedef unsigned long  uintptr_t;


struct lg_bucket_t {
    IMP _imp;
    cache_key_t _key;
};

struct lg_cache_t {
    struct lg_bucket_t *_buckets;
    mask_t _mask;
    mask_t _occupied;
};

struct lg_class_data_bits_t {
    uintptr_t bits;
};

struct lg_objc_class {
    Class ISA;
    Class superclass;
    struct lg_cache_t cache;             // formerly cache pointer and vtable
    struct lg_class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
};

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        LGPerson *person = [LGPerson alloc];
        Class pClass = [LGPerson class];
        // cache_t 为何没有 - 第一次
        [person sayHello];
        [person sayCode];
        [person sayNB]; // 临界点 - 清理 过去 - 扩容 - cache_t 缓存
   
        struct lg_objc_class *lg_pClass = (__bridge struct lg_objc_class *)(pClass);
        for (mask_t i = 0; i<lg_pClass->cache._mask; i++) {
            struct lg_bucket_t bucket = lg_pClass->cache._buckets[i];
            NSLog(@"%lu - %p",bucket._key,bucket._imp);
        }
        NSLog(@"%@ - %p",person,pClass);
        NSLog(@"%@ - %p",person,pClass);
复制代码

咱们看下循环里的打印结果：指针

2020-01-01 22:29:06.985587+0800 LGTest[2357:230137] 4294970368 - 0x100000e5c
2020-01-01 22:29:06.985647+0800 LGTest[2357:230137] 4294970416 - 0x100000e65
2020-01-01 22:29:06.985689+0800 LGTest[2357:230137] 4294970512 - 0x100000e6d
复制代码

若是是调用四个对象方法会怎么样呢？调试

#import <Foundation/Foundation.h>
#import "LGPerson.h"
#import <objc/runtime.h>

typedef uint32_t mask_t;
typedef uintptr_t cache_key_t;
typedef unsigned long  uintptr_t;


struct lg_bucket_t {
    IMP _imp;
    cache_key_t _key;
};

struct lg_cache_t {
    struct lg_bucket_t *_buckets;
    mask_t _mask;
    mask_t _occupied;
};

struct lg_class_data_bits_t {
    uintptr_t bits;
};

struct lg_objc_class {
    Class ISA;
    Class superclass;
    struct lg_cache_t cache;             // formerly cache pointer and vtable
    struct lg_class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
};

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        LGPerson *person = [LGPerson alloc];
        Class pClass = [LGPerson class];
        // cache_t 为何没有 - 第一次
        [person sayHello];
        [person sayCode];
        [person sayNB]; // 临界点 - 清理 过去 - 扩容 - cache_t 缓存
        [person sayMaster];
        
        // 某一个值 1000 - 10W
        // 浪费 - 动态方法
        // 

        // cache_t mask
        // 方法缓存的原理 证实咱们须要处理
        // 来一个方法就缓存 - 有一个特殊的处理
        
        // 我不知道哪一个缓存的方法 - 上帝视角

        struct lg_objc_class *lg_pClass = (__bridge struct lg_objc_class *)(pClass);
        for (mask_t i = 0; i<lg_pClass->cache._mask; i++) {
            struct lg_bucket_t bucket = lg_pClass->cache._buckets[i];
            NSLog(@"%lu - %p",bucket._key,bucket._imp);
        }
        NSLog(@"%@ - %p",person,pClass);
          NSLog(@"%@ - %p",person,pClass);
    }
    return 0;
}

复制代码

输出：

2020-01-01 22:37:43.437962+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438056+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438163+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438252+0800 LGTest[2385:234120] 4294970464 - 0x100000e73
2020-01-01 22:37:43.438302+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438379+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438458+0800 LGTest[2385:234120] 0 - 0x0
复制代码

其中cache_t里_mask有7个，可是只有一个地方有值等于只有一个方法被存储了，这是为何呢？咱们只能在源码里找到答案.

三.cache_t 的底层探索

static void cache_fill_nolock(Class cls, SEL sel, IMP imp, id receiver)
{
    cacheUpdateLock.assertLocked();

    // Never cache before +initialize is done
    if (!cls->isInitialized()) return;

    // Make sure the entry wasn't added to the cache by some other thread // before we grabbed the cacheUpdateLock. if (cache_getImp(cls, sel)) return; cache_t *cache = getCache(cls); cache_key_t key = getKey(sel); // Use the cache as-is if it is less than 3/4 full mask_t newOccupied = cache->occupied() + 1; mask_t capacity = cache->capacity(); if (cache->isConstantEmptyCache()) { // Cache is read-only. Replace it. cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE); } else if (newOccupied <= capacity / 4 * 3) { // Cache is less than 3/4 full. Use it as-is. } else { // Cache is too full. Expand it. cache->expand(); } // Scan for the first unused slot and insert there. // There is guaranteed to be an empty slot because the // minimum size is 4 and we resized at 3/4 full. bucket_t *bucket = cache->find(key, receiver); if (bucket->key() == 0) cache->incrementOccupied(); bucket->set(key, imp); } 复制代码

首先 cacheUpdateLock.assertLocked(); 访问底层cache时，先锁起来由于这个方法会很频繁进行调用，因此避免访问过程当中出现混乱先对当前的操做lock起来.

if (cache_getImp(cls, sel)) return 进行下判断当前的方法有没有以前被缓存过.

cache_t *cache = getCache(cls);cache_key_t key = getKey(sel);

这不操做是获取当前类的缓存，将sel 强转成cache_key_t 类型的key，

mask_t newOccupied = cache->occupied() + 1;

cache->occupied() 获取当前类中已经存储的方法个数，由于如今正在执行存储的操做因此 newOccupied = cache->occupied() + 1

mask_t capacity = cache->capacity();

获取当前类的存储空间.

下一步就要进行缓存了这又分为了三种状况：

3.1 当前这个类以前没有进行存储也就是说当前代码刚刚走到调用第一个实例方法的时候.(cache->occupied() = 0 , cache->capacity() = 0) cache->isConstantEmptyCache() 判断为YES，进入到 cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE);的流程。

由于 cache->capacity() = 0，因此 newCapacity = INIT_CACHE_SIZE (1 << INIT_CACHE_SIZE_LOG2) 也就是等于4

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
{
    bool freeOld = canBeFreed();

    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. // This is thought to save cache memory at the cost of extra cache fills. // fixme re-measure this assert(newCapacity > 0); assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1); setBucketsAndMask(newBuckets, newCapacity - 1); if (freeOld) { cache_collect_free(oldBuckets, oldCapacity); cache_collect(false); } } bool cache_t::canBeFreed(){ return !isConstantEmptyCache();}复制代码

第一个判断就是判断是不是类第一次进行存储，若是是第一次存储 freeOld 会返回false，不然返回yes。

bucket_t *oldBuckets = buckets();

bucket_t *newBuckets = allocateBuckets(newCapacity);

这个就是获取旧的缓存池和设置新的缓存池(设置缓存池的空间有多少)

setBucketsAndMask(newBuckets, newCapacity - 1);

在这里设置缓存池里的 mask 为缓存空间 - 1，因此当第一个方法存储完以后 mask会为3 ，这和上面lldb所打印的正好是吻合的，

bucket_t *bucket = cache->find(key, receiver);

if (bucket->key() == 0) cache->incrementOccupied();

bucket->set(key, imp);

最后经过 cache 哈希算法找到buckets（缓存池里）找到缓存空间里找到最适合的bucket ，将方法实现imp和key 关联起来.

关于这方法的具体实现以下:

bucket_t * cache_t::find(cache_key_t k, id receiver)
{
    assert(k != 0);

    bucket_t *b = buckets();
    mask_t m = mask();
    // 经过cache_hash函数【begin  = k & m】计算出key值 k 对应的 index值 begin，用来记录查询起始索引
    mask_t begin = cache_hash(k, m);
    // begin 赋值给 i，用于切换索引
    mask_t i = begin;
    do {
        if (b[i].key() == 0  ||  b[i].key() == k) {
            //用这个i从散列表取值，若是取出来的bucket_t的 key = k，则查询成功，返回该bucket_t，
            //若是key = 0，说明在索引i的位置上尚未缓存过方法，一样须要返回该bucket_t，用于停止缓存查询。
            return &b[i];
        }
    } while ((i = cache_next(i, m)) != begin);
    
    // 这一步其实至关于 i = i-1,回到上面do循环里面，至关于查找散列表上一个单元格里面的元素，再次进行key值 k的比较，
    //当i=0时，也就i指向散列表最首个元素索引的时候从新将mask赋值给i，使其指向散列表最后一个元素，从新开始反向遍历散列表，
    //其实就至关于绕圈，把散列表头尾连起来，不就是一个圈嘛，从begin值开始，递减索引值，当走过一圈以后，必然会从新回到begin值，
    //若是此时尚未找到key对应的bucket_t，或者是空的bucket_t，则循环结束，说明查找失败，调用bad_cache方法。
 
    // hack
    Class cls = (Class)((uintptr_t)this - offsetof(objc_class, cache));
    cache_t::bad_cache(receiver, (SEL)k, cls);
}

复制代码

3.2 当 newOccupied <= capacity / 4 * 3

这是什么意思呢，就是第一个方法执行完以后，Occupied = 1 ，mask = 3，capacity = 4
进入第二个方法 newOccupied = Occupied + 1 为2 2小于缓存池整个空间的3/4，等于说整个缓存空间还能够容纳第二个方法，因此就在buckets（缓存池中）找到最合适的bucket（缓存桶）和上面经过cache 哈希寻找是同样的，将方法实现imp和key 关联起来.

3.3 当 newOccupied > capacity / 4 * 3

在这种状况下就须要扩容，扩大整个缓存池、扩大的空间为以前的两倍.

void cache_t::expand()
{
    cacheUpdateLock.assertLocked();
    
    uint32_t oldCapacity = capacity();
    uint32_t newCapacity = oldCapacity ? oldCapacity*2 : INIT_CACHE_SIZE;

    if ((uint32_t)(mask_t)newCapacity != newCapacity) {
        // mask overflow - can't grow further // fixme this wastes one bit of mask newCapacity = oldCapacity; } reallocate(oldCapacity, newCapacity); } 复制代码

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
{
    bool freeOld = canBeFreed();

    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. // This is thought to save cache memory at the cost of extra cache fills. // fixme re-measure this assert(newCapacity > 0); assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1); setBucketsAndMask(newBuckets, newCapacity - 1); if (freeOld) { cache_collect_free(oldBuckets, oldCapacity); cache_collect(false); } } 复制代码

空间扩展完以后设置新的缓存池，而且会把以前的缓存给清除，最后在buckets（缓存池中）找到最合适的bucket（缓存桶）和上面经过cache 哈希寻找是同样的，将方法实现imp和key 关联起来.

四.总结

OC 中实例方法缓存在类上面，类方法缓存在元类上面。

cache_t 缓存会提早进行扩容防止溢出。

方法缓存是为了最大化的提升程序的执行效率。

苹果在方法缓存这里用的是开放寻址法来解决哈希冲突。