Golang的interface探究

时间 2019-11-08

原文原文链接

golang被诟病最多的，没有泛型应该算一个。做为强类型语言来讲，没有泛型不少时候在业务开发上会有些不适应，可是它有个interface
类型，被不少人拿来当泛型玩，若是你了解它的原理也是没问题的。
可是你真的了解吗？golang

Interface

golang 中的interface，能够将任意类型的变量赋予它。常见的咱们区分两种，一种就是struct类型的，由于struct
可能会有func；另一种，就是非结构体的普通类型（下面提到的普通类型，都是指代除struct外的类型）安全

eface

1 package main
  2
  3 import "fmt"
  4
  5 func main() {
  6     var x int
  7     var y interface{}
  8     x = 1
  9     y = x
 10     fmt.Println(y)
 11 }

当咱们把int类型的变量赋值给interface类型时，会发生什么：并发

TEXT main.main(SB) /Users/such/gomodule/runtime/main.go
    main.go:5    0x4a23a0    64488b0c25f8ffffff    mov rcx, qword ptr fs:[0xfffffff8]
    main.go:5    0x4a23a9    488d4424f8        lea rax, ptr [rsp-0x8]
    main.go:5    0x4a23ae    483b4110        cmp rax, qword ptr [rcx+0x10]
    main.go:5    0x4a23b2    0f86c7000000        jbe 0x4a247f
=>    main.go:5    0x4a23b8*    4881ec88000000        sub rsp, 0x88
    main.go:5    0x4a23bf    4889ac2480000000    mov qword ptr [rsp+0x80], rbp
    main.go:5    0x4a23c7    488dac2480000000    lea rbp, ptr [rsp+0x80]
    main.go:6    0x4a23cf    48c744243000000000    mov qword ptr [rsp+0x30], 0x0
    main.go:7    0x4a23d8    0f57c0            xorps xmm0, xmm0
    main.go:7    0x4a23db    0f11442448        movups xmmword ptr [rsp+0x48], xmm0
    main.go:8    0x4a23e0    48c744243001000000    mov qword ptr [rsp+0x30], 0x1
    main.go:9    0x4a23e9    48c7042401000000    mov qword ptr [rsp], 0x1
    main.go:9    0x4a23f1    e89a70f6ff        call $runtime.convT64

追到runtime的convT64方法，一探究竟。函数

// type uint64InterfacePtr uint64
// var uint64Eface interface{} = uint64InterfacePtr(0)
// var uint64Type *_type = (*eface)(unsafe.Pointer(&uint64Eface))._type

func convT64(val uint64) (x unsafe.Pointer) {
    if val == 0 {
        x = unsafe.Pointer(&zeroVal[0])
    } else {
        x = mallocgc(8, uint64Type, false)
        *(*uint64)(x) = val
    }
    return
}

这个方法返回了 val 的指针，其中uint64Type就是一个 0 值的uint64指针。有个疑问，这里uint64Type定义时，eface 是什么：ui

type eface struct {
    _type *_type
    data  unsafe.Pointer
}

这个结构体，刚好知足了，对于普通类型转换interface，或者说是将普通类型赋值给interface所必须的两个字段，当前类型的type
和值（这里貌似有点绕口）。真实的是，eface确实就是表示这类interface的结构体，在runtime中，还能看到其余普通类型的转换，
convTslice、convTstring、convT64、convT32等其余几个方法。atom

iface

若是是一个拥有func的struct类型的变量，赋值给另外一个interface，这类的interface在底层是怎么存的呢。以下所示：3d

1 package main                                                                                                                                                                                                                
  2 
  3 import "fmt"
  4 
  5 type Human interface{ Introduce() string }
  6 
  7 type Bob struct{ Human }
  8 
  9 func (b Bob) Introduce() string { return "Name: Bob" }
 10 
 11 func main() {
 12     var y Human
 13     x := Bob{}
 14     y = x
 15     fmt.Println(y)
 16 }

TEXT main.main(SB) /Users/such/gomodule/runtime/main.go
        main.go:11      0x10b71a0       65488b0c2530000000              mov rcx, qword ptr gs:[0x30]
        main.go:11      0x10b71a9       488d4424d0                      lea rax, ptr [rsp-0x30]
        main.go:11      0x10b71ae       483b4110                        cmp rax, qword ptr [rcx+0x10]
        main.go:11      0x10b71b2       0f860f010000                    jbe 0x10b72c7
        ...省略部分指令
        main.go:14      0x10b7202       e84921f5ff                      call $runtime.convT2I

看汇编代码，在 16 行时，调用了runtime.convT2I，这个方法返回的类型是iface指针

func convT2I(tab *itab, elem unsafe.Pointer) (i iface) {
    t := tab._type
    if raceenabled {
        raceReadObjectPC(t, elem, getcallerpc(), funcPC(convT2I))
    }
    if msanenabled {
        msanread(elem, t.size)
    }
    x := mallocgc(t.size, t, true)
    typedmemmove(t, x, elem)
    i.tab = tab
    i.data = x
    return
}

itab包括具体值的type和 interface 的type，还有其余字段code

type itab struct {
    inter *interfacetype    // 接口定义的类型
    _type *_type            // 接口指向具体值的 type
    hash  uint32            // 类型的hash值
    _     [4]byte
    fun   [1]uintptr        // 判断接口是否实现全部方法（下面会讲到）
}

在itab结构体的init方法中，是全部字段的初始化，重点看这个方法：协程

func (m *itab) init() string {
    inter := m.inter
    typ := m._type
    x := typ.uncommon()

    // 在 interfacetype 的结构体中，mhdr 存着全部须要实现的方法的
    // 结构体切片 []imethod，都是按照方法名的字典序排列的，其中：
    // ni 是全量的方法（全部要实现的方法）的个数
    // nt 是已实现的方法的个数
    ni := len(inter.mhdr)
    nt := int(x.mcount)
    xmhdr := (*[1 << 16]method)(add(unsafe.Pointer(x), uintptr(x.moff)))[:nt:nt]
    j := 0
    methods := (*[1 << 16]unsafe.Pointer)(unsafe.Pointer(&m.fun[0]))[:ni:ni]
    var fun0 unsafe.Pointer
imethods:
    for k := 0; k < ni; k++ {   // 从第一个开始，逐个对比
        i := &inter.mhdr[k]
        itype := inter.typ.typeOff(i.ityp)
        name := inter.typ.nameOff(i.name)
        iname := name.name()
        ipkg := name.pkgPath()
        if ipkg == "" {
            ipkg = inter.pkgpath.name()
        }
        for ; j < nt; j++ {
            t := &xmhdr[j]
            tname := typ.nameOff(t.name)
            // 比较已实现方法的 type 和 name 是否一致
            if typ.typeOff(t.mtyp) == itype && tname.name() == iname {
                pkgPath := tname.pkgPath()
                if pkgPath == "" {
                    pkgPath = typ.nameOff(x.pkgpath).name()
                }
                if tname.isExported() || pkgPath == ipkg {
                    if m != nil {
                        // 计算每一个 method 对应代码块的内存地址
                        ifn := typ.textOff(t.ifn)
                        if k == 0 {
                            fun0 = ifn // we'll set m.fun[0] at the end
                        } else {
                            methods[k] = ifn
                        }
                    }
                    continue imethods
                }
            }
        }
        // 若是没有找到，将 func[0] 设置为0，返回该实现的 method 的 name
        m.fun[0] = 0
        return iname
    }
    // 第一个方法的 ptr 和 type 的 hash
    m.fun[0] = uintptr(fun0)
    m.hash = typ.hash
    return ""
}

itabTable

还有一种将interface类型的实现，赋值给另一个interface：

TEXT main.main(SB) /Users/such/gomodule/runtime/main.go
    ...省略部分指令
    main.go:18    0x10b71f5    488d842480000000        lea rax, ptr [rsp+0x80]
    main.go:18    0x10b71fd    4889442408            mov qword ptr [rsp+0x8], rax
    main.go:18    0x10b7202    e84921f5ff            call $runtime.convT2I

func convI2I(inter *interfacetype, i iface) (r iface) {
    tab := i.tab
    if tab == nil {
        return
    }
    if tab.inter == inter {
        r.tab = tab
        r.data = i.data
        return
    }
    r.tab = getitab(inter, tab._type, false)
    r.data = i.data
    return
}

经过前面的分析，咱们又知道， iface 是由 tab 和 data 两个字段组成。因此，实际上 convI2I 函数真正要作的事，
找到新 interface 的 tab 和 data，就大功告成了。在iface.go 文件头部定义了itabTable全局哈希表存全部itab，
其实就是空间换时间的思想。
itabTable是itabTableType结构体（个人golang版本是1.12.7）

type itabTableType struct {
    size    uintptr             // 大小，2的幂
    count   uintptr             // 已有的 itab entry 个数
    entries [itabInitSize]*itab // 保存 itab entry
}

getitab

getitab是查找itab的方法

func getitab(inter *interfacetype, typ *_type, canfail bool) *itab {
    if len(inter.mhdr) == 0 {
        throw("internal error - misuse of itab")
    }
    if typ.tflag&tflagUncommon == 0 {
        if canfail {
            return nil
        }
        name := inter.typ.nameOff(inter.mhdr[0].name)
        panic(&TypeAssertionError{nil, typ, &inter.typ, name.name()})
    }
    
    var m *itab
    t := (*itabTableType)(atomic.Loadp(unsafe.Pointer(&itabTable)))
    if m = t.find(inter, typ); m != nil {
        goto finish
    }

    // Not found.  Grab the lock and try again.
    lock(&itabLock)
    if m = itabTable.find(inter, typ); m != nil {
        unlock(&itabLock)
        goto finish
    }

    // Entry doesn't exist yet. Make a new entry & add it.
    m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(inter.mhdr)-1)*sys.PtrSize, 0, &memstats.other_sys))
    m.inter = inter
    m._type = typ
    m.init()
    itabAdd(m)
    unlock(&itabLock)
finish:
    if m.fun[0] != 0 {
        return m
    }
    if canfail {
        return nil
    }
    // 若是不是 "_, ok := " 类型的断言，会有panic
    panic(&TypeAssertionError{concrete: typ, asserted: &inter.typ, missingMethod: m.init()})
}

会调用find方法，根据interfacetype和_type的 hash 值，在itabTable中查找，找到的话直接返回；
不然，生成新的itab，加入 itabTable 中。有个问题，就是为何第一次不加锁找，而第二次加锁？
我我的的理解是：`首先：应该仍是想避免锁的开销（以前在滴滴有幸听过曹大分享【内存重排】，对经常使用package在concurrently时，锁引发的问题作了一些分析。），
而第二次加锁，我以为更多的是在未找到 itab 后，会新生成一个 itab 写入全局哈希表中，若是有其余协程在查询时，也未找到，能够并发安全写入。`

itabAdd

func itabAdd(m *itab) {
    if getg().m.mallocing != 0 {
        throw("malloc deadlock")
    }

    t := itabTable
    if t.count >= 3*(t.size/4) { // 75% load factor
        t2 := (*itabTableType)(mallocgc((2+2*t.size)*sys.PtrSize, nil, true))
        t2.size = t.size * 2
        
        iterate_itabs(t2.add)
        if t2.count != t.count {
            throw("mismatched count during itab table copy")
        }
        atomicstorep(unsafe.Pointer(&itabTable), unsafe.Pointer(t2))
        t = itabTable
    }
    t.add(m)
}

itabAdd 是添加itab加入itabTable的方法。既然是hash表，就必定会发生扩容。每次都
是2的倍数的增加，建立新的 itabTable 再原子的替换。在 iterate_itabs（复制）时，并
未加锁，这里不是协程安全的，而是在添加前，在getitab方法中有锁的操做，会等待复制完成。