objc_msgSend
是基于汇编实现的,hook objc_msgSend
和咱们平时 hook OC 方法不同,在 github 上有开源的项目经过 hook objc_msgSend
来监控每一个函数的耗时状况。这篇文章对其 hook 逻辑的主要代码进行分析记录。阅读前建议先了解开源库 fishhook 的源码。git
先看开源 项目 主要代码github
#define call(b, value) \
__asm volatile ("stp x8, x9, [sp, #-16]!\n"); \
__asm volatile ("mov x12, %0\n" :: "r"(value)); \
__asm volatile ("ldp x8, x9, [sp], #16\n"); \
__asm volatile (#b " x12\n");
#define save() \
__asm volatile ( \
"stp x8, x9, [sp, #-16]!\n" \
"stp x6, x7, [sp, #-16]!\n" \
"stp x4, x5, [sp, #-16]!\n" \
"stp x2, x3, [sp, #-16]!\n" \
"stp x0, x1, [sp, #-16]!\n");
#define load() \
__asm volatile ( \
"ldp x0, x1, [sp], #16\n" \
"ldp x2, x3, [sp], #16\n" \
"ldp x4, x5, [sp], #16\n" \
"ldp x6, x7, [sp], #16\n" \
"ldp x8, x9, [sp], #16\n" );
#define link(b, value) \
__asm volatile ("stp x8, lr, [sp, #-16]!\n"); \
__asm volatile ("sub sp, sp, #16\n"); \
call(b, value); \
__asm volatile ("add sp, sp, #16\n"); \
__asm volatile ("ldp x8, lr, [sp], #16\n");
#define ret() __asm volatile ("ret\n");
__attribute__((__naked__))
static void hook_Objc_msgSend() {
// Save parameters.
/// Step 1
save()
/// Step 2
__asm volatile ("mov x2, lr\n");
__asm volatile ("mov x3, x4\n");
// Call our before_objc_msgSend.
/// Step 3
call(blr, &before_objc_msgSend)
// Load parameters.
/// Step 4
load()
// Call through to the original objc_msgSend.
/// Step 5
call(blr, orig_objc_msgSend)
// Save original objc_msgSend return value.
/// Step 6
save()
// Call our after_objc_msgSend.
/// Step 7
call(blr, &after_objc_msgSend)
// restore lr
/// Step 8
__asm volatile ("mov lr, x0\n");
// Load original objc_msgSend return value.
/// Step 9
load()
// return
/// Step 10
ret()
}
复制代码
对以上代码咱们分步骤来看函数
save()
保存函数入参(x0-x8)到栈内存,由于接下来你的函数调用修改原有参数。这里源码里面看到 x9 的值也被保存了,这里的缘由是由于栈指针移动必须知足 SP Mod 16 = 0
的条件,而在 x8 寄存器只占用8个字节,剩余8个字节控件由 x9 来填充post
#define save() \
__asm volatile ( \
"stp x8, x9, [sp, #-16]!\n" \
"stp x6, x7, [sp, #-16]!\n" \
"stp x4, x5, [sp, #-16]!\n" \
"stp x2, x3, [sp, #-16]!\n" \
"stp x0, x1, [sp, #-16]!\n");
复制代码
保存 lr 到 x2,以便 call(blr, &before_objc_msgSend)
的调用,保存到 x2 是由于 before_objc_msgSend
函数第三个参数须要传入 lr,方便后续返回;blr
指令会改变 lr 寄存器的值,因此调用前先保存 lrui
#define call(b, value) \
__asm volatile ("stp x8, x9, [sp, #-16]!\n"); \
__asm volatile ("mov x12, %0\n" :: "r"(value)); \
__asm volatile ("ldp x8, x9, [sp], #16\n"); \
__asm volatile (#b " x12\n");
void before_objc_msgSend(id self, SEL _cmd, uintptr_t lr) {
push_call_record(self, object_getClass(self), _cmd, lr);
}
static inline void push_call_record(id _self, Class _cls, SEL _cmd, uintptr_t lr) {
thread_call_stack *cs = get_thread_call_stack();
if (cs) {
int nextIndex = (++cs->index);
if (nextIndex >= cs->allocated_length) {
cs->allocated_length += 64;
cs->stack = (thread_call_record *)realloc(cs->stack, cs->allocated_length * sizeof(thread_call_record));
}
thread_call_record *newRecord = &cs->stack[nextIndex];
newRecord->self = _self;
newRecord->cls = _cls;
newRecord->cmd = _cmd;
newRecord->lr = lr;
if (cs->is_main_thread && _call_record_enabled) {
struct timeval now;
gettimeofday(&now, NULL);
newRecord->time = (now.tv_sec % 100) * 1000000 + now.tv_usec;
}
}
}
复制代码
__asm volatile ("mov x3, x4\n");
目前我的认为是冗余代码,在整个流程中貌似并无实际做用。spa
经过 blr
指令 跳转执行 before_objc_msgSend
函数。这里会先保存 x八、x9 寄存器的值,缘由是__asm volatile ("mov x12, %0\n" :: "r"(value))
执行命令过程当中会经过 x8 来保存函数地址,再进行跳转,因此这里会先要保存 x8,和步骤1相同,栈指针移动必须知足 SP Mod 16 = 0
的条件,因此 x9 也被保存。执行完以后 x八、x9 恢复。指针
#define call(b, value) \
__asm volatile ("stp x8, x9, [sp, #-16]!\n"); \
__asm volatile ("mov x12, %0\n" :: "r"(value)); \
__asm volatile ("ldp x8, x9, [sp], #16\n"); \
__asm volatile (#b " x12\n");
复制代码
在 __asm volatile ("mov x12, %0\n" :: "r"(value))
下断点能够看到 cpu 是经过 adrp
+ add
2个指令结合寻址到函数的地址并执行,过程当中改变了 x8 的值 rest
Step 4 到 Step 6,恢复原有入参,执行原函数,而后保存入参code
call(blr, &after_objc_msgSend)
和步骤3类似,执行 hook 收尾的函数,主要是经过 TSD 返回步骤3保存的原来 lr 寄存器保存的内容,也就是hook前的 lr 寄存器值cdn
static inline uintptr_t pop_call_record() {
thread_call_stack *cs = get_thread_call_stack();
int curIndex = cs->index;
int nextIndex = cs->index--;
thread_call_record *pRecord = &cs->stack[nextIndex];
if (cs->is_main_thread && _call_record_enabled) {
struct timeval now;
gettimeofday(&now, NULL);
uint64_t time = (now.tv_sec % 100) * 1000000 + now.tv_usec;
if (time < pRecord->time) {
time += 100 * 1000000;
}
uint64_t cost = time - pRecord->time;
if (cost > _min_time_cost && cs->index < _max_call_depth) {
if (!_smCallRecords) {
_smRecordAlloc = 1024;
_smCallRecords = malloc(sizeof(smCallRecord) * _smRecordAlloc);
}
_smRecordNum++;
if (_smRecordNum >= _smRecordAlloc) {
_smRecordAlloc += 1024;
_smCallRecords = realloc(_smCallRecords, sizeof(smCallRecord) * _smRecordAlloc);
}
smCallRecord *log = &_smCallRecords[_smRecordNum - 1];
log->cls = pRecord->cls;
log->depth = curIndex;
log->sel = pRecord->cmd;
log->time = cost;
}
}
return pRecord->lr;
}
复制代码
__asm volatile ("mov lr, x0\n");
将步骤5返回的值(原来lr的初始值)到lr寄存器
Step 9 - Step 10 恢复寄存器值,并返回。主要目的是还原原始函数的执行以后的状态。
以上就是整个汇编 hook objc_msgSend
的主要过程,目前遗留一个问题是:
__asm volatile ("mov x3, x4\n");
这行代码是否属于冗余代码呢?