Go gcache 源码分析（图解）

时间 2019-11-10

标签 gcache 源码分析图解繁體版

原文原文链接

概述

gcache是一个用go实现的并发安全的本地缓存库。他能够实现以下功能：git

指定缓存的的大小，初始化之时为cache设置size大小。
支持多种缓存的策略：Simple、LRU、LFU、ARC

Simple：最普通的缓存策略，根据先存入的先淘汰。
LUR：Least Recently Used，意思是最近最少使用。LRU Cache 的替换原则就是将最近最少使用的内容替换掉。
LFU：Least Frequently Used ，意思是最近最不经常使用。LFU Cache先淘汰必定时间内被访问次数最少的页面。
ARC：Adaptive Replacement Cache，ARC介于 LRU 和 LFU 之间。

支持多个回调函数

LoaderExpireFunc：过时回调函数
EvictedFunc：淘汰回调函数
PurgeVisitorFunc：清除全部key回调函数
AddedFunc：新怎key回调函数
SerializeFunc：对value序列化回调函数
DeserializeFunc：对value反序列化回调函数

支持计数事件

HitCount：命中次数
MissCount：没有命中的次数
LookupCount：查找次数
HitRate：命中率

使用singleflight机制，多我的请求一个key，保证只有一个真正获取数据其他等待结果。

简单使用

其实github上已经有了很详细的例子，其中有简单key/value、设置超时时间、设置淘汰策略、设置回调函数等各类例子。这里简单摘抄一些简单的例子：github

简单key/value 设置

package main

import (
  "github.com/bluele/gcache"
  "fmt"
)

func main() {
  gc := gcache.New(20).
    LRU().
    Build()
  gc.Set("key", "ok")
  value, err := gc.Get("key")
  if err != nil {
    panic(err)
  }
  fmt.Println("Get:", value)
}

Get: ok

设置过时时间

package main

import (
  "github.com/bluele/gcache"
  "fmt"
  "time"
)

func main() {
  gc := gcache.New(20).
    LRU().
    Build()
  gc.SetWithExpire("key", "ok", time.Second*10)
  value, _ := gc.Get("key")
  fmt.Println("Get:", value)

  // Wait for value to expire
  time.Sleep(time.Second*10)

  value, err = gc.Get("key")
  if err != nil {
    panic(err)
  }
  fmt.Println("Get:", value)
}

Get: ok
// 10 seconds later, new attempt:
panic: ErrKeyNotFound

使用load回调函数

package main

import (
  "github.com/bluele/gcache"
  "fmt"
)

func main() {
  gc := gcache.New(20).
    LRU().
    LoaderFunc(func(key interface{}) (interface{}, error) {
      return "ok", nil
    }).
    Build()
  value, err := gc.Get("key")
  if err != nil {
    panic(err)
  }
  fmt.Println("Get:", value)
}
Get: ok

源码分析

实体和初始化

builder类

// 缓存builder对象，存放时间、大小和各类回调函数
type CacheBuilder struct {
    clock            Clock
    tp               string
    size             int
    loaderExpireFunc LoaderExpireFunc
    evictedFunc      EvictedFunc
    purgeVisitorFunc PurgeVisitorFunc
    addedFunc        AddedFunc
    expiration       *time.Duration
    deserializeFunc  DeserializeFunc
    serializeFunc    SerializeFunc
}

设置过时时间、策略、回调函数

// 设置策略 设置CacheBuilder的回调函数属性
func (cb *CacheBuilder) LRU() *CacheBuilder {
    return cb.EvictType(TYPE_LRU)
}

// 设置过时时间 设置CacheBuilder的Expiration属性
func (cb *CacheBuilder) Expiration(expiration time.Duration) *CacheBuilder {
    cb.expiration = &expiration
    return cb
}

// 设置驱除回调函数
func (cb *CacheBuilder) EvictedFunc(evictedFunc EvictedFunc) *CacheBuilder {
    cb.evictedFunc = evictedFunc
    return cb
}

build 输出cache对象

// 判断size和类型
func (cb *CacheBuilder) Build() Cache {
    if cb.size <= 0 && cb.tp != TYPE_SIMPLE {
        panic("gcache: Cache size <= 0")
    }

    return cb.build()
}

// 根据type来新建相对应的cache对象
func (cb *CacheBuilder) build() Cache {
    switch cb.tp {
    case TYPE_SIMPLE:
        return newSimpleCache(cb)
    case TYPE_LRU:
        return newLRUCache(cb)
    case TYPE_LFU:
        return newLFUCache(cb)
    case TYPE_ARC:
        return newARC(cb)
    default:
        panic("gcache: Unknown type " + cb.tp)
    }
}

// 举例一个SimpleCache  
func newSimpleCache(cb *CacheBuilder) *SimpleCache {
    c := &SimpleCache{}
    buildCache(&c.baseCache, cb)

    c.init()
    c.loadGroup.cache = c
    return c
}

// init 初始化simple 中的map
func (c *SimpleCache) init() {
    if c.size <= 0 {
        c.items = make(map[interface{}]*simpleItem)
    } else {
        c.items = make(map[interface{}]*simpleItem, c.size)
    }
}

// 初始化回调函数
func buildCache(c *baseCache, cb *CacheBuilder) {
    c.clock = cb.clock
    c.size = cb.size
    c.loaderExpireFunc = cb.loaderExpireFunc
    c.expiration = cb.expiration
    c.addedFunc = cb.addedFunc
    c.deserializeFunc = cb.deserializeFunc
    c.serializeFunc = cb.serializeFunc
    c.evictedFunc = cb.evictedFunc
    c.purgeVisitorFunc = cb.purgeVisitorFunc
    c.stats = &stats{}
}

接口和整体流程

type Cache interface {
    Set(key, value interface{}) error
    SetWithExpire(key, value interface{}, expiration time.Duration) error
    Get(key interface{}) (interface{}, error)
    GetIFPresent(key interface{}) (interface{}, error)
    GetALL(checkExpired bool) map[interface{}]interface{}
    get(key interface{}, onLoad bool) (interface{}, error)
    Remove(key interface{}) bool
    Purge()
    Keys(checkExpired bool) []interface{}
    Len(checkExpired bool) int
    Has(key interface{}) bool

    statsAccessor
}

type statsAccessor interface {
    HitCount() uint64
    MissCount() uint64
    LookupCount() uint64
    HitRate() float64
}


type baseCache struct {
    clock            Clock
    size             int
    loaderExpireFunc LoaderExpireFunc
    evictedFunc      EvictedFunc
    purgeVisitorFunc PurgeVisitorFunc
    addedFunc        AddedFunc
    deserializeFunc  DeserializeFunc
    serializeFunc    SerializeFunc
    expiration       *time.Duration
    mu               sync.RWMutex
    loadGroup        Group
    *stats
}

SimpleCache

SimpleCache是gcache中最简单的一种，其中比较重要的函数就是Get，Set。
在SimpleCache结构体中items保存这simpleItem。simpleItem结构体中保存具体值和过时时间。
Get，Set函数就是经过操做items属性来保存和获取缓存中的值的。下面咱们详细看一下代码：算法

结构体

type SimpleCache struct {
    baseCache
    items map[interface{}]*simpleItem
}

type simpleItem struct {
    clock      Clock
    value      interface{}
    expiration *time.Time
}

Set方法

func (c *SimpleCache) set(key, value interface{}) (interface{}, error) {
    var err error
    // 判断是否有序列化函数 有则执行回调函数
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    // 检查是否存在key
    item, ok := c.items[key]
    if ok {
        item.value = value
    } else {
        // 检查是否超过设置的大小范围
        if (len(c.items) >= c.size) && c.size > 0 {
            // 若是超过大小则驱逐一个
            c.evict(1)
        }
        // 组成simpleItem对象
        item = &simpleItem{
            clock: c.clock,
            value: value,
        }
        c.items[key] = item
    }
    
    // 判断是否有过时时间
    if c.expiration != nil {
        // 若是有则设置过时时间
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }
    // 判断是否有添加函数 有则添加
    if c.addedFunc != nil {
        c.addedFunc(key, value)
    }

    return item, nil
}

// SimpleCache 驱逐方法 
// 驱逐策略则是最简单的淘汰一个，由于map的特性 range访问的是随机的数据。因此驱逐出去的数据也是随机的一个。
func (c *SimpleCache) evict(count int) {
    now := c.clock.Now()
    current := 0
    for key, item := range c.items {
        if current >= count {
            return
        }
        if item.expiration == nil || now.After(*item.expiration) {
            defer c.remove(key)
            current++
        }
    }
}

Get方法

// get函数 从缓存中获取数据
func (c *SimpleCache) get(key interface{}, onLoad bool) (interface{}, error) {
    // 内部方法根据key获取值
    v, err := c.getValue(key, onLoad)
    if err != nil {
        return nil, err
    }
    if c.deserializeFunc != nil {
        return c.deserializeFunc(key, v)
    }
    return v, nil
}

// 内部获取方法
// 1. 加锁
// 2. 判断是否过时 若是过时直接删除数据
// 3. 若是没有过时则返回数据 增长hit基数器 
// 4. 若是没有命中 增长MissCount
func (c *SimpleCache) getValue(key interface{}, onLoad bool) (interface{}, error) {
    c.mu.Lock()
    item, ok := c.items[key]
    if ok {
        if !item.IsExpired(nil) {
            v := item.value
            c.mu.Unlock()
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return v, nil
        }
        c.remove(key)
    }
    c.mu.Unlock()
    if !onLoad {
        c.stats.IncrMissCount()
    }
    return nil, KeyNotFoundError
}

LRUCache

LRU在以前已经介绍过了，意思是最近最少使用。LRU Cache 的替换原则就是将最近最少使用的内容替换掉。
gcache实现的方法是经过链表来实现这个策略。当每次get或者set以后则把这个节点放到链表的头部，当须要超过size时则删除链表尾部的节点数据。这样就实现了最近最少使用的策略。缓存

结构体

type LRUCache struct {
    baseCache
    items     map[interface{}]*list.Element
    evictList *list.List
}

type lruItem struct {
    clock      Clock
    key        interface{}
    value      interface{}
    expiration *time.Time
}

Set方法

// 先加锁防止多线程修改数据，调用内部set方法设置数据。
func (c *LRUCache) Set(key, value interface{}) error {
    c.mu.Lock()
    defer c.mu.Unlock()
    _, err := c.set(key, value)
    return err
}

// 内部设置数据方法
func (c *LRUCache) set(key, value interface{}) (interface{}, error) {
    var err error
    // 判断执行序列化回调函数
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    // Check for existing item
    var item *lruItem
    // 从items map中获取值 
    if it, ok := c.items[key]; ok {
        // 若是key本来就存在，则从新设置而后移动节点到链表的头部
        c.evictList.MoveToFront(it)
        item = it.Value.(*lruItem)
        item.value = value
    } else {
        // 若是超过size则调用evict函数根据LRU策略去除缓存中的一个数据
        if c.evictList.Len() >= c.size {
            c.evict(1)
        }
        // 建立对象而后放入链表和items中
        item = &lruItem{
            clock: c.clock,
            key:   key,
            value: value,
        }
        c.items[key] = c.evictList.PushFront(item)
    }
    // 判断是否有过时时间 有则设置
    if c.expiration != nil {
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }
    // 判断调用 added回调函数
    if c.addedFunc != nil {
        c.addedFunc(key, value)
    }

    return item, nil
}

// 驱逐函数 
func (c *LRUCache) evict(count int) {
    // 循环删除链表尾部的节点
    for i := 0; i < count; i++ {
        ent := c.evictList.Back()
        if ent == nil {
            return
        } else {
            c.removeElement(ent)
        }
    }
}

LFU Cache

LFU：意思是最近最不经常使用。LFU Cache先淘汰必定时间内被访问次数最少的页面。安全

源码分析

LFU策略，淘汰的是访问次数最少的，意味着cache须要保存每一个缓存数据的访问次数。但如何保存访问次数呢，咱们能够看下面的结构体定义。数据结构

items map[interface{}]*lfuItem ：保存数据，保证访问时候的高效
lfuItem：保存在map中，其中存放这key、value、过时时间、一个链表节点的地址。这个地址用来方便操做链表中的数据。
freqList：链表结构，保存freqEntry
freqEntry：包含两个字段一个是freq用来保存访问次数，另外一个是items map类型用来保存次访问次数的具体数据，能够是多个

gcache的LFU使用一个map来保存数据一个链表（包含次数和map）来保存缓存中数据被访问的次数。初次set时访问次数默认为0。若是淘汰则是淘汰被访问次数最少的，则能够从链表的头部开始扫描，一直找到最少的。多线程

图解

初始化

图一是set5个字符串到cache中，5个字符串不重复。items中的数据咱们不看只画了链表中的数据状态。
这个时候链表中只有一个节点，这个节点数据中的freq为0，意味着这个节点中的数据都是没有被访问的。
并发

操做事后的图

图二是通过几回get和一次set操做后的链表数据结果。能够看到链表的每个节点都表明着一个访问次数而且依次递增。
每次get访问数据时候经过上面提到的lfuItem中的指针获取到节点在链表所在的位置，把数据日后移动一个节点。若是没有节点测建立一个以此类推。那么获得的结果就是越靠近头部的数据访问次数是最少的。若是淘汰则优先淘汰这些数据。
函数

结构体

type LFUCache struct {
    baseCache
    items    map[interface{}]*lfuItem
    freqList *list.List // list for freqEntry
}

type freqEntry struct {
    freq  uint
    items map[*lfuItem]struct{}
}

type lfuItem struct {
    clock       Clock
    key         interface{}
    value       interface{}
    freqElement *list.Element
    expiration  *time.Time
}

Set方法

func (c *LFUCache) Set(key, value interface{}) error {
    c.mu.Lock()
    defer c.mu.Unlock()
    _, err := c.set(key, value)
    return err
}

// set内部方法
func (c *LFUCache) set(key, value interface{}) (interface{}, error) {
    var err error
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    // 检查key是否存在
    item, ok := c.items[key]
    if ok {
        // 存在则直接赋值
        item.value = value
    } else {
        // 不存在而且数量超出则执行驱逐函数
        if len(c.items) >= c.size {
            c.evict(1)
        }
        // 新建item对象
        item = &lfuItem{
            clock:       c.clock,
            key:         key,
            value:       value,
            freqElement: nil,
        }
        // 把新建的lfuitem对象放到链表第一个节点中
        el := c.freqList.Front()
        fe := el.Value.(*freqEntry)
        fe.items[item] = struct{}{}

        item.freqElement = el
        c.items[key] = item
    }

    if c.expiration != nil {
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }

    if c.addedFunc != nil {
        c.addedFunc(key, value)
    }

    return item, nil
}

// 驱逐函数
func (c *LFUCache) evict(count int) {
    // 获取链表第一个节点
    entry := c.freqList.Front()
    // 循环count
    for i := 0; i < count; {
        if entry == nil {
            return
        } else {
            // 循环判断啊链表节点中是否有数据 若是没有则调用next 继续循环
            for item, _ := range entry.Value.(*freqEntry).items {
                if i >= count {
                    return
                }
                c.removeItem(item)
                i++
            }
            entry = entry.Next()
        }
    }
}

Get方法

func (c *LFUCache) get(key interface{}, onLoad bool) (interface{}, error) {
    v, err := c.getValue(key, onLoad)
    if err != nil {
        return nil, err
    }
    if c.deserializeFunc != nil {
        return c.deserializeFunc(key, v)
    }
    return v, nil
}

// 判断是否过时，若是没过时则获取而且执行increment函数操做链表
func (c *LFUCache) getValue(key interface{}, onLoad bool) (interface{}, error) {
    c.mu.Lock()
    item, ok := c.items[key]
    if ok {
        if !item.IsExpired(nil) {
            c.increment(item)
            v := item.value
            c.mu.Unlock()
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return v, nil
        }
        c.removeItem(item)
    }
    c.mu.Unlock()
    if !onLoad {
        c.stats.IncrMissCount()
    }
    return nil, KeyNotFoundError
}

// 将lfuItem 放入下一个节点中的map中，若是没有则建立一个新的lfuItem
func (c *LFUCache) increment(item *lfuItem) {
    currentFreqElement := item.freqElement
    currentFreqEntry := currentFreqElement.Value.(*freqEntry)
    nextFreq := currentFreqEntry.freq + 1
    delete(currentFreqEntry.items, item)

    nextFreqElement := currentFreqElement.Next()
    if nextFreqElement == nil {
        nextFreqElement = c.freqList.InsertAfter(&freqEntry{
            freq:  nextFreq,
            items: make(map[*lfuItem]struct{}),
        }, currentFreqElement)
    }
    nextFreqElement.Value.(*freqEntry).items[item] = struct{}{}
    item.freqElement = nextFreqElement
}

ARC Cache

ARC：Adaptive Replacement Cache，ARC介于 LRU 和 LFU 之间。源码分析

源码分析

ARC是介于LRU和LFU之间的算法。也是经过map来存储数据，保证存取的性能。那是如何实现LRU和LFU又是如何平衡两个策略的呢？
结构体能够参看下面的代码：

items: map数据结构保存key，value则是arcItem结构体，其中包含了key、value、过时时间。注意其中没有像LFU的链表指针。
t1：LRU策略，set以后会放入t1中限制数量跟整个cache数量相同。
t2：LFU策略，当get访问以后会从t1移动到t2之中，不过不管访问几回都会在t2之中，不像LFU同样会记录访问次数。
b1：接收t1（LRU）策略淘汰的缓存数据。若是超过size则直接从cache中删除。
b2：接收t2（LFU）策略淘汰的缓存数据。跟b1同样超过size也会从cache中删除。

那每次Set、Get数据又是怎么流动的呢？下面图解：
图一：是初始化而且添加5条数据以后cache内部数据结构。items保存所有数据，由于没有访问数据则全部数据都会放到t1中。

图二：获取了aaa、bbb、ddd、eee4个数据，而后有set了fff到cache中。假设这个cache的size为5。
其中aaa、bbb、ddd、eee被移动到了t2中，剩下的ccc没有访问则会继续保留再t1之中。可是最后一条语句又设置了fff到cache中。发现size已经满则须要淘汰一个数据，则会淘汰t1中的数据ccc移动到b1中。items之中则没有ccc数据了。
最终的数据流动以下图：

结构体

type ARC struct {
    baseCache
    items map[interface{}]*arcItem

    part int
    t1   *arcList
    t2   *arcList
    b1   *arcList
    b2   *arcList
}

type arcItem struct {
    clock      Clock
    key        interface{}
    value      interface{}
    expiration *time.Time
}

type arcList struct {
    l    *list.List
    keys map[interface{}]*list.Element
}

Set方法

func (c *ARC) Set(key, value interface{}) error {
    c.mu.Lock()
    defer c.mu.Unlock()
    _, err := c.set(key, value)
    return err
}

// 1. 判断缓存中是否有数据
// 2. 在b1，b2中查看是否存在，若是存在则删除b1 b2 从新放入到t2中
// 3. 
func (c *ARC) set(key, value interface{}) (interface{}, error) {
    var err error
    if c.serializeFunc != nil {
        value, err = c.serializeFunc(key, value)
        if err != nil {
            return nil, err
        }
    }

    item, ok := c.items[key]
    if ok {
        item.value = value
    } else {
        item = &arcItem{
            clock: c.clock,
            key:   key,
            value: value,
        }
        c.items[key] = item
    }

    if c.expiration != nil {
        t := c.clock.Now().Add(*c.expiration)
        item.expiration = &t
    }

    defer func() {
        if c.addedFunc != nil {
            c.addedFunc(key, value)
        }
    }()

    if c.t1.Has(key) || c.t2.Has(key) {
        return item, nil
    }

    if elt := c.b1.Lookup(key); elt != nil {
        c.setPart(minInt(c.size, c.part+maxInt(c.b2.Len()/c.b1.Len(), 1)))
        c.replace(key)
        c.b1.Remove(key, elt)
        c.t2.PushFront(key)
        return item, nil
    }

    if elt := c.b2.Lookup(key); elt != nil {
        c.setPart(maxInt(0, c.part-maxInt(c.b1.Len()/c.b2.Len(), 1)))
        c.replace(key)
        c.b2.Remove(key, elt)
        c.t2.PushFront(key)
        return item, nil
    }

    if c.isCacheFull() && c.t1.Len()+c.b1.Len() == c.size {
        if c.t1.Len() < c.size {
            c.b1.RemoveTail()
            c.replace(key)
        } else {
            pop := c.t1.RemoveTail()
            item, ok := c.items[pop]
            if ok {
                delete(c.items, pop)
                if c.evictedFunc != nil {
                    c.evictedFunc(item.key, item.value)
                }
            }
        }
    } else {
        total := c.t1.Len() + c.b1.Len() + c.t2.Len() + c.b2.Len()
        if total >= c.size {
            if total == (2 * c.size) {
                if c.b2.Len() > 0 {
                    c.b2.RemoveTail()
                } else {
                    c.b1.RemoveTail()
                }
            }
            c.replace(key)
        }
    }
    c.t1.PushFront(key)
    return item, nil
}

Get方法

若是t1中存在则从t1移动到t2，若是存在再t2之中则放到t2的头部节点。

func (c *ARC) Get(key interface{}) (interface{}, error) {
    v, err := c.get(key, false)
    if err == KeyNotFoundError {
        return c.getWithLoader(key, true)
    }
    return v, err
}

func (c *ARC) get(key interface{}, onLoad bool) (interface{}, error) {
    v, err := c.getValue(key, onLoad)
    if err != nil {
        return nil, err
    }
    if c.deserializeFunc != nil {
        return c.deserializeFunc(key, v)
    }
    return v, nil
}

func (c *ARC) getValue(key interface{}, onLoad bool) (interface{}, error) {
    c.mu.Lock()
    defer c.mu.Unlock()
    if elt := c.t1.Lookup(key); elt != nil {
        c.t1.Remove(key, elt)
        item := c.items[key]
        if !item.IsExpired(nil) {
            c.t2.PushFront(key)
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return item.value, nil
        } else {
            delete(c.items, key)
            c.b1.PushFront(key)
            if c.evictedFunc != nil {
                c.evictedFunc(item.key, item.value)
            }
        }
    }
    if elt := c.t2.Lookup(key); elt != nil {
        item := c.items[key]
        if !item.IsExpired(nil) {
            c.t2.MoveToFront(elt)
            if !onLoad {
                c.stats.IncrHitCount()
            }
            return item.value, nil
        } else {
            delete(c.items, key)
            c.t2.Remove(key, elt)
            c.b2.PushFront(key)
            if c.evictedFunc != nil {
                c.evictedFunc(item.key, item.value)
            }
        }
    }

    if !onLoad {
        c.stats.IncrMissCount()
    }
    return nil, KeyNotFoundError
}

总结

自此gcache全部的策略都已经分析完了。看完分析能够看出来gcache支持的策略不少，而且使用十分简单。只要在声明的时候肯定好策略就可使用对应的策略。更加支持各类回调函数，让逻辑更加灵活复合各类需求。写这篇文章也在网上找了一些资料，可是都不是特别的详细因此不停的调试和画图分析出来的结果。但愿能对你们能有所帮助。