dyld
经过更新Mach-O二进制文件中特定__DATA
段的指针来绑定惰性和非惰性符号。fishhook经过传递给rebind_symbols
的符号名来肯定须要更新的位置,而后用相应的替换项从新绑定这些符号。git
对于给定的镜像,__DATA
段能够包含与动态符号绑定相关的两个部分:__nl_symbol_ptr
和__la_symbol_ptr
。github
__nl_symbol_ptr
是指向非延迟绑定数据的指针数组(这些指针在加载库时绑定)。shell
__la_symbol_ptr
是指向导入函数的指针数组,一般在第一次调用该符号时由名为dyld_stub_binder
的例程填充(也能够在启动时告诉dyld
绑定这些指针)。数组
为了找到对应于这些部分中某个特定位置的符号的名称,咱们须要经过几个间接层来进行查看。数据结构
对于两个相关部分,section header
(<mach-o/loader.h>
中声明的struct section
)提供一个偏移量(在reserved1
字段中)到所谓的间接符号表中。ide
间接符号表位于二进制文件的__LINKEDIT
段中,它只是符号表(也在__LINKEDIT
中)中的索引数组,其顺序与非惰性和惰性符号部分中的指针顺序相同。所以,struct section nl_symbol_ptr
,该部分中第一个地址的符号表中的对应索引是indirect_symbol_table[nl_symbol_ptr->reserved1]
。函数
符号表自己是一个struct nlist
数组(请参见<mach-o/nlist.h>
),每一个nlist
都包含一个指向__LINKEDIT
中字符串表的索引,其中存储了实际的符号名。所以,对于每一个指针__nl_symbol_ptr
和__la_symbol_ptr
,咱们均可以找到相应的符号,而后找到相应的字符串与请求的符号名进行比较,若是有匹配项,咱们用替换项替换节中的指针。oop
//---------------------------------更改NSLog-----------
//函数指针
static void(*sys_nslog)(NSString * format,...);
//定义一个新的函数
void my_nslog(NSString * format,...){
format = [format stringByAppendingString:@"你咋又来了 \n"];
//调用原始的
sys_nslog(format);
}
@implementation ViewController
- (void)viewDidLoad {
[super viewDidLoad];
NSLog(@"log来了,老弟");
struct rebinding nslog;
nslog.name = "NSLog";
nslog.replacement = my_nslog;
nslog.replaced = (void *)&sys_nslog;
struct rebinding rebs[1] = {nslog};
rebind_symbols(rebs, 1);
NSLog(@"log来了,老弟");
}
@end
复制代码
运行结果:测试
2020-03-16 09:47:38.526862+0800 Demo[28657:5210895] log来了,老弟
2020-03-16 09:47:38.536892+0800 Demo[28657:5210895] log来了,老弟你咋又来了
复制代码
MachOView会弹出输入框让你输入PID。flex
这个PID在Xcode的Show the Debug navigator菜单下,能够用⌘ + 7快速切过来。这里咱们能够看到进程的PID,输入到上面的框中。
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;
// 给须要rebinding的方法结构体开辟出对应的空间
// 生成对应的链表结构(rebindings_entry)
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel) {
// 开辟一个rebindings_entry大小的空间
struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
// 一共有nel个rebinding
new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
// 将rebinding赋值给new_entry->rebindings
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
// 继续赋值nel
new_entry->rebindings_nel = nel;
// 每次都将new_entry插入头部
new_entry->next = *rebindings_head;
// rebindings_head从新指向头部
*rebindings_head = new_entry;
return 0;
}
复制代码
这里定义了rebindings_entry链表。每次进行绑定的时候,会传入struct rebinding rebindings[]数组,建立一个新的rebindings_entry结构,而后把这个结构插入链表头部。
static void _rebind_symbols_for_image(const struct mach_header *header, intptr_t slide) {
// 找到对应的符号,进行重绑定
rebind_symbols_for_image(_rebindings_head, header, slide);
}
// 在知道肯定的MachO,可使用该方法
int rebind_symbols_image(void *header,
intptr_t slide,
struct rebinding rebindings[],
size_t rebindings_nel) {
struct rebindings_entry *rebindings_head = NULL;
int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide);
if (rebindings_head) {
free(rebindings_head->rebindings);
}
free(rebindings_head);
return retval;
}
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// 若是这是第一次调用,请为image添加注册回调(这也会为现有image调用,不然,只在现有image上运行
if (!_rebindings_head->next) {
// 向每一个image注册_rebind_symbols_for_image函数,而且当即触发一次
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
// _dyld_image_count() 获取image数量
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
// _dyld_get_image_header(i) 获取第i个image的header指针
// _dyld_get_image_vmaddr_slide(i) 获取第i个image的基址
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
复制代码
rebind_symbols_image和rebind_symbols是两个公开的方法,用于从新绑定符号。rebind_symbols_image用于指定镜像的符号绑定,rebind_symbols对全部镜像进行处理。
无论是哪一个方法,最后都是调用rebind_symbols_for_image去获取相关部分的地址。
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
// 判断当前macho是否在进程里,若是不在则直接返回
if (dladdr(header, &info) == 0) {
return;
}
// 定义好几个变量,后面去遍历查找
segment_command_t *cur_seg_cmd;
// MachO中Load Commons中的linkedit
segment_command_t *linkedit_segment = NULL;
// MachO中LC_SYMTAB
struct symtab_command* symtab_cmd = NULL;
// MachO中LC_DYSYMTAB
struct dysymtab_command* dysymtab_cmd = NULL;
// header的首地址+mach_header的内存大小
// 获得跳过mach_header的地址,也就是直接到Load Commons的地址
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍历Load Commons 找到上面三个遍历
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
// 若是是LC_SEGMENT_64
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
// 找到linkedit
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
}
// 若是是LC_SYMTAB,就找到了symtab_cmd
else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
}
// 若是是LC_DYSYMTAB,就找到了dysymtab_cmd
else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
// 下面其中任何一个值没有都直接return
// 由于image不是须要找的image
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}
// Find base symbol/string table addresses
// 找到linkedit的头地址
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
// 获取symbol_table的真实地址
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 获取string_table的真实地址
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// Get indirect symbol table (array of uint32_t indices into symbol table)
// 获取indirect_symtab的真实地址
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
// 一样的,获得跳过mach_header的地址,获得Load Commons的地址
cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍历Load Commons,找到对应符号进行从新绑定
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
// 若是不是__DATA段,也不是__DATA_CONST段,直接跳过
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
// 遍历全部的section
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
// 找懒加载表S_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
// 重绑定的真正函数
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
// 找非懒加载表S_NON_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
// 重绑定的真正函数
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
复制代码
最上面,经过header
指针和header
大小获取到加载指令的基址。而后遍历获取3个数据结构:
// MachO中Load Commons中的linkedit
segment_command_t *linkedit_segment = NULL;
// MachO中LC_SYMTAB
struct symtab_command* symtab_cmd = NULL;
// MachO中LC_DYSYMTAB
struct dysymtab_command* dysymtab_cmd = NULL;
复制代码
下面是比较核心的代码:
// 找到linkedit的头地址
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
复制代码
咱们来看看linkedit_segment->vmaddr对应4294995968
,linkedit_segment->fileoff对应28672
。这样可能看不太出来这是基地址,咱们格式化一下:
(lldb) p/x 4294995968
(long) $0 = 0x0000000100007000
(lldb) p/x 28672
(int) $1 = 0x00007000
(lldb) p/x 4294995968 - 28672
(long) $2 = 0x0000000100000000
复制代码
咱们能够看出这个部分就是拿到了image对应的内存基址。
// 获取symbol_table的真实地址
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 获取string_table的真实地址
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
复制代码
从struct symtab_command结构中获取到符号表的字符表的偏移量,而后加载基址就是内存中两个表的地址了。
(lldb) p/x 0x0000000100000000 + 30200
(long) $3 = 0x00000001000075f8
(lldb) p/x 0x0000000100000000 + 33408
(long) $4 = 0x0000000100008280
复制代码
经过MachOView咱们也验证了这两个地址是正确的。
// 获取indirect_symtab的真实地址
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
复制代码
经过struct dysymtab_command获取间接符号表。
(lldb) p/x 0x0000000100000000 + 33224
(long) $5 = 0x00000001000081c8
复制代码
间接符号表的地址咱们也得到了。
// 一样的,获得跳过mach_header的地址,获得Load Commons的地址
cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍历Load Commons,找到对应符号进行从新绑定
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
// 若是不是__DATA段,也不是__DATA_CONST段,直接跳过
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
// 遍历全部的section
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
// 找懒加载表S_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
// 重绑定的真正函数
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
// 找非懒加载表S_NON_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
// 重绑定的真正函数
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
复制代码
对于给定的image
,__DATA
段包含与动态符号绑定相关的两个部分:__nl_symbol_ptr
和__la_symbol_ptr
。遍历找到这个两个部分,而后进行符号从新绑定。
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
// reserved1对应的的是indirect_symbol中的offset,也就是indirect_symbol的真实地址
// indirect_symtab+offset就是indirect_symbol_indices(indirect_symbol的数组)
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
// 函数地址,addr就是section的偏移地址
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
// 遍历section中的每一个符号
for (uint i = 0; i < section->size / sizeof(void *); i++) {
// 访问indirect_symbol,symtab_index就是indirect_symbol中data的值
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
// 访问symbol_table,根据symtab_index获取到symbol_table中的偏移offset
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
// 访问string_table,根据strtab_offset获取symbol_name
char *symbol_name = strtab + strtab_offset;
// string_table中的全部函数名都是以"."开始的,因此一个函数必定有两个字符
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
// 已经存入的rebindings_entry
while (cur) {
// 循环每一个entry中须要重绑定的函数
for (uint j = 0; j < cur->rebindings_nel; j++) {
// 判断symbol_name是不是一个正确的函数名
// 须要被重绑定的函数名是否与当前symbol_name相等
if (symbol_name_longer_than_1 &&
strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
// 判断replaced是否存在
// 判断replaced和老的函数是不是同样的
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
// 将原函数的地址给新函数replaced
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
// 将replacement赋值给刚刚找到的
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
// 继续下一个须要绑定的函数
cur = cur->next;
}
symbol_loop:;
}
}
复制代码
这个部分就像fishhook
原理里面提到的:
indirect_symbol_indices[nl_symbol_ptr->reserved1]
拿到间接符号表的函数起始地址。indirect_symbol_bindings
是nl_symbol_ptr
中对应的函数指针数组。.
开头的,因此至少有2个字符。 symbol_name[1] 是去掉开头.
的字符串。replaced
中的函数指针,再将原来函数的地址替换为咱们要绑定的replacement
函数地址。若是以为本文对你有所帮助,给我点个赞吧~