趣探 Mach-O:FishHook 解析

这是Mach-O系列的第三篇git

阅读 FishHook源码以前,你可能须要对如下知识有个简单的了解程序员

本文的阐述顺序按照函数调用过程来进行github

Fishhook 能够作什么

在此借用阿里百川的一张分析图,能够比较清晰的了解FishHook发挥了哪些做用数组

阿里百川
阿里百川

FishHook在这里是对动态连接库起做用,修改对应的函数实现bash

对于动态连接库里面的C函数,第一次调用的时候,咱们会获得函数和实现地址的对应关系,函数的实现地址存放在一个叫la_symbol_ptr的地方,第二次调用的时候,直接经过la_symbol_ptr找到函数地址就能够,再也不须要繁琐的获取函数地址的过程。(具体经过哪些过程,能够参考刚才的连接:Mach-O 的动态连接过程app

那么,上图的含义就很明了了dom

在程序运行时,动态连接的 C 函数dynamic(...)地址记录在DATA segment下的la_symbol_ptr中;初始时,程序只知道dynamic函数的符号名而不知道函数的实现地址;首次调用时,程序经过TEXT segment中的stub_helper取得绑定信息,经过dyld_stub_binder来更新la_symbol_ptr中的符号实现地址;这样,再次调用时,就能够经过la_symbol_ptr直接找到dynamic函数的实现;若是咱们须要替换dynamic函数的实现,只须要修改__la_symbol_ptr便可,也就是咱们要谈的Fishhookide

Fishhook 的实现

经过fishhook的官方文档能够知道,Fishhook的使用方法大体以下: 函数

static int (*original_open)(const char *, int, ...);

int new_open(const char *path, int oflag, ...) {
    va_list ap = {0};
    mode_t mode = 0;

    if ((oflag & O_CREAT) != 0) {
        // mode only applies to O_CREAT
        va_start(ap, oflag);
        mode = va_arg(ap, int);
        va_end(ap);
        printf("Calling real open('%s', %d, %d)\n", path, oflag, mode);
        return original_open(path, oflag, mode);
    } else {
        printf("Calling real open('%s', %d)\n", path, oflag);
        return original_open(path, oflag, mode);
    }
}

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        struct rebinding open_rebinding = { "open", new_open, (void *)&original_open };
        rebind_symbols((struct rebinding[1]){open_rebinding}, 1);
        __unused int fd = open(argv[0], O_RDONLY);
    }
    return 0;
}复制代码

先从函数的入口,rebind_symbols开始谈起吧,rebind_symbols主要是使用_dyld_register_func_for_add_image来注册回调函数,在加载动态库的时候执行一些操做oop

int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
  // 调用 prepend_rebindings 的函数,将整个 rebindings 数组添加到 _rebindings_head 这个私有链表的头部
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
  if (retval < 0) {
    return retval;
  }

  // 判断 _rebindings_head->next 的值来判断是否为第一次调用
  // If this was the first call, register callback for image additions (which is also invoked for
  // existing images, otherwise, just run on existing images
  if (!_rebindings_head->next) {
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}复制代码

对于prepend_rebindings的代码以下

// 链表的数组结构
struct rebindings_entry {
  struct rebinding *rebindings;
  size_t rebindings_nel;
  struct rebindings_entry *next;
};

static struct rebindings_entry *_rebindings_head;

static int prepend_rebindings(struct rebindings_entry **rebindings_head,
                              struct rebinding rebindings[],
                              size_t nel) {
  struct rebindings_entry *new_entry = malloc(sizeof(struct rebindings_entry));
  if (!new_entry) {
    return -1;
  }
  new_entry->rebindings = malloc(sizeof(struct rebinding) * nel);
  if (!new_entry->rebindings) {
    free(new_entry);
    return -1;
  }
  // 将 rebindings 插入到链表头部
  memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
  new_entry->rebindings_nel = nel;
  new_entry->next = *rebindings_head;
  *rebindings_head = new_entry;
  return 0;
}复制代码

基础结构解释

Dl_info

/*
 * Structure filled in by dladdr().
 */
typedef struct dl_info {
        const char      *dli_fname;     /* Pathname of shared object */
        void            *dli_fbase;     /* Base address of shared object */
        const char      *dli_sname;     /* Name of nearest symbol */
        void            *dli_saddr;     /* Address of nearest symbol */
} Dl_info;复制代码

咱们一会通过 dladdr()处理后的有效信息都会放进这个结构体中

  • fname:路径名,例如
/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation复制代码
  • dli_fbase:镜像的的起始地址(Base address of shared object,好比上面的 CoreFoundation)
  • dli_saddr :符号的地址
  • dli_sname:符号的名字,即下面的第四列的函数信息
Thread 0:
0     libsystem_kernel.dylib          0x11135810a __semwait_signal + 94474
1     libsystem_c.dylib               0x1110dab0b sleep + 518923
2     QYPerformanceMonitor            0x10dda4f1b -[ViewController tableView:cellForRowAtIndexPath:] + 7963
3     UIKit                           0x10ed4d4f4 -[UITableView _createPreparedCellForGlobalRow:withIndexPath:willDisplay:] + 1586420复制代码

LC_SYMTAB

struct symtab_command {
    uint32_t    cmd;        /* LC_SYMTAB */
    uint32_t    cmdsize;    /* sizeof(struct symtab_command) */
    uint32_t    symoff;        /* symbol table offset */
    uint32_t    nsyms;        /* number of symbol table entries */
    uint32_t    stroff;        /* string table offset */
    uint32_t    strsize;    /* string table size in bytes */
};复制代码

主要是提供符号表的偏移量,以及元素个数,还有字符串表的偏移和其长度。符号表在 Mach-O目标文件中的地址能够经过LC_SYMTAB加载命令指定的 symoff找到,对应的符号名称在stroff,总共有nsyms条符号信息

LC_DYSYMTAB

这个数组结构有些复杂,有兴趣的能够阅读loader.h文件,内部标示了动态符号表的偏移量和符号个数

struct dysymtab_command {
    uint32_t cmd;    /* LC_DYSYMTAB */
    uint32_t cmdsize;    /* sizeof(struct dysymtab_command) */
    uint32_t indirectsymoff; /* file offset to the indirect symbol table */
    uint32_t nindirectsyms;  /* number of indirect symbol table entries */
    .......复制代码

_rebind_symbols_for_image

对于关键的代码 _rebind_symbols_for_image 以下

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }

  // segment_command_64
  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  // LC_SYMTAB
  struct symtab_command* symtab_cmd = NULL;
 // LC_DYSYMTAB
  struct dysymtab_command* dysymtab_cmd = NULL;

    // 下面是要寻找load_command,因此越过mach_header_t
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        //遍历寻找__LINKEDIT
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
      //遍历寻找lc_symtab
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      //遍历寻找lc_dysymtab
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }复制代码

为何要寻找这个几个LoadCommand的信息呢?就如上面介绍的__LINKEDITLC_DYSYMTABLC_SYMTAB都提供了重要的信息。

__LINKEDIT段 含有为动态连接库使用的原始数据,好比符号,字符串,重定位表条目等等

阅读下面的代码以前,先来看一个计算公式

连接时程序的基址 = __LINKEDIT.VM_Address -__LINKEDIT.File_Offset + silde的改变值

这里出现了一个 slide,那么slide是啥呢?先看一下ASLR

ASLR:Address space layout randomization,将可执行程序随机装载到内存中,这里的随机只是偏移,而不是打乱,具体作法就是经过内核将 Mach-O的段“平移”某个随机系数。slide 正是ASLR引入的偏移

也就是说程序的基址等于__LINKEDIT的地址减去偏移量,而后再加上ASLR形成的偏移

// 连接时程序的基址
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;

  // 符号表的地址 = 基址 + 符号表偏移量
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  // 字符串表的地址 = 基址 + 字符串表偏移量
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

  // 动态符号表地址 = 基址 + 动态符号表偏移量
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);复制代码

符号表中的元素都是nlist_t结构体,nlist_t中有不少学问,这里先看一下他的基础结构

/*
 * This is the symbol table entry structure for 32-bit architectures.
 */
struct nlist {
    union {
        uint32_t n_strx;    /* index into the string table */
    } n_un;
    uint8_t n_type;        /* type flag, see below */
    uint8_t n_sect;        /* section number or NO_SECT */
    int16_t n_desc;        /* see <mach-o/stab.h> */
    uint32_t n_value;    /* value of this symbol (or stab offset) */
};复制代码

而后再次遍历loadcommands,寻找__DATA__DATA_CONSTsection,并对对__nl_symbol_ptr以及__la_symbol_ptr进行rebind

cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }

      //找到__DATA和__DATA_CONST的section,对__nl_symbol_ptr以及__la_symbol_ptr进行rebind
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          // sect为Section,symtab为符号表,strtab字符串表,indirect_symtab动态符号表(indirect symbol table)
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }复制代码

##perform_rebinding_with_section

nl_symbol_ptrla_symbol_ptrsection中的reserved1字段指明对应的indirect symbol table起始的index

For the two relevant sections, the section headers (struct sections from ) provide an offset (in the reserved1 field) into what is known as the indirect symbol table. The indirect symbol table, which is located in the LINKEDIT segment of the binary, is just an array of indexes into the symbol table (also in LINKEDIT) whose order is identical to that of the pointers in the non-lazy and lazy symbol sections

So, given struct section nl_symbol_ptr, the corresponding index in the symbol table of the first address in that section is indirect_symbol_table[nl_symbol_ptr->reserved1]. The symbol table itself is an array of struct nlists (see ), and each nlist contains an index into the string table in LINKEDIT which where the actual symbol names are stored. So, for each pointer nl_symbol_ptr and __la_symbol_ptr, we are able to find the corresponding symbol and then the corresponding string to compare against the requested symbol names, and if there is a match, we replace the pointer in the section with the replacement.

结合英文,看下面的代码就很容易理解

// sect为Section,symtab为符号表,strtab字符串表,indirect_symtab动态符号表(indirect symbol table)
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
  // `nl_symbol_ptr`和`la_symbol_ptr`section中的`reserved1`字段指明对应的`indirect symbol table`起始的index
    //动态符号表中第一个解析的符号的起始地址
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;

  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);

  for (uint i = 0; i < section->size / sizeof(void *); i++) {
      // 符号表的index
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
    //获取每个须要动态解析的符号在符号表中的偏移量
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;

    //经过字符串表偏移量获取符号对应的字符串(符号的名字)
    char *symbol_name = strtab + strtab_offset;复制代码

上面的代码其实就能够用官方的一个图片很直观的表示

走到这里是找到了字符串表对应的符号(字符串)

如何替换实现

遍历 rebindings 数组,符号进行比较,相同的符号就进行实现替换,这里的代码比较清晰,直接贴出

struct rebindings_entry *cur = rebindings;
    while (cur) {
        for (uint j = 0; j < cur->rebindings_nel; j++) {
            if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
                if (cur->rebindings[j].replaced != NULL &&
                    indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
                    *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
                }
                indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
                goto symbol_loop;
            }
        }
        cur = cur->next;
    }
symbol_loop:;
}复制代码

参考连接

相关文章
相关标签/搜索