研究gcc中不定长数组的实现方式

时间 2019-11-15

标签研究 gcc 不定数组实现方式栏目 GCC 繁體版

原文原文链接

一直对C99标准中的不定长数组很好奇，不知道编译器是怎么实现这种功能的，我猜想步骤以下：shell

如下面的代码为例，数组

#include <stdio.h>
int main(){
    int len;
    scanf("%d",&len);
    char buff[len];
    
    int size;
    size = sizeof(buff);
    printf("%d",size);
    return 0;
}

当程序运行到了声明数组这一步的时候，也就是char buff[len]这里，main函数的栈帧会增加len*sizeof(int)个字节来容纳buff数组。猜想终究只是猜想，不如亲自动手反编译看看汇编代码理解得清楚。
函数

编译上面代码事后使用ida反编译，得到以下代码。我通过一上午的努力，终于把代码看懂并加上了注释（请不要嘲讽我，毕竟新手上路）。spa

.text:00401350 sub_401350      proc near
.text:00401350
.text:00401350 func_arg_0      = dword ptr -40h
.text:00401350 func_arg_1      = dword ptr -3Ch
.text:00401350 var_38          = dword ptr -38h
.text:00401350 buff_len        = dword ptr -28h
.text:00401350 buff_size       = dword ptr -24h
.text:00401350 var_20          = dword ptr -20h
.text:00401350 buff_len_sub_1  = dword ptr -1Ch
.text:00401350 var_C           = dword ptr -0Ch
.text:00401350 arg_0           = dword ptr  4
.text:00401350
.text:00401350                 lea     ecx, [esp+arg_0]
.text:00401354                 and     esp, 0FFFFFFF0h
.text:00401357                 push    dword ptr [ecx-4]
.text:0040135A                 push    ebp
.text:0040135B                 mov     ebp, esp
.text:0040135D                 push    esi
.text:0040135E                 push    ebx
.text:0040135F                 push    ecx
.text:00401360                 sub     esp, 2Ch        ; char *
.text:00401363                 call    sub_401990
.text:00401368                 mov     eax, esp
.text:0040136A                 mov     ebx, eax
.text:0040136C                 lea     eax, [ebp+buff_len]
.text:0040136F                 mov     [esp+40h+func_arg_1], eax ; buff_len的地址
.text:00401373                 mov     [esp+40h+func_arg_0], offset aD ; "%d"
.text:0040137A                 call    scanf
.text:0040137F                 mov     ecx, [ebp+buff_len] ; ecx=buff_len
.text:00401382                 lea     eax, [ecx-1]    ; eax=buff_len-1
.text:00401385                 mov     [ebp+buff_len_sub_1], eax
.text:00401388                 mov     edx, ecx        ; edx=buff_len
.text:0040138A                 mov     eax, 10h
.text:0040138F                 sub     eax, 1          ; eax=15d  (d表明十进制数)
.text:00401392                 add     eax, edx        ; eax=buff_len+15d
.text:00401394                 mov     esi, 10h        ; esi=16d
.text:00401399                 mov     edx, 0          ; 被除数高位置0
.text:0040139E                 div     esi             ; eax = (buff_len + 15d) / 16d
.text:0040139E                                         ; edx = (buff_len + 15d) % 16d
.text:004013A0                 imul    eax, 10h        ; eax = eax * 16d
.text:004013A3                 call    sub_401BD0
.text:004013A8                 sub     esp, eax        ; 在内存中开辟buff的空间
.text:004013A8                                         ; eax即为buff的实际大小
.text:004013AA                 lea     eax, [esp+40h+var_38]
.text:004013AE                 add     eax, 0
.text:004013B1                 mov     [ebp+var_20], eax
.text:004013B4                 mov     [ebp+buff_size], ecx
.text:004013B7                 mov     eax, [ebp+buff_size]
.text:004013BA                 mov     [esp+40h+func_arg_1], eax
.text:004013BE                 mov     [esp+40h+func_arg_0], offset aD ; "%d"
.text:004013C5                 call    printf
.text:004013CA                 mov     eax, 0
.text:004013CF                 mov     esp, ebx
.text:004013D1                 lea     esp, [ebp-0Ch]
.text:004013D4                 pop     ecx
.text:004013D5                 pop     ebx
.text:004013D6                 pop     esi
.text:004013D7                 pop     ebp
.text:004013D8                 lea     esp, [ecx-4]
.text:004013DB                 retn
.text:004013DB sub_401350      endp

代码不是很长，我截取了重点部分的代码code

buff_len        = dword ptr -28h
mov     ecx, [ebp+buff_len] ; ecx=申请的数组长度
mov     edx, ecx        ; edx=buff_len
mov     eax, 10h
sub     eax, 1          ; eax=15d
add     eax, edx        ; eax=buff_len+15d
mov     esi, 10h        ; esi=16d
mov     edx, 0          ; 被除数高位置0
div     esi             ; eax = (buff_len + 15d) / 16d
                        ; edx = (buff_len + 15d) % 16d
imul    eax, 10h        ; eax = eax * 16d
sub     esp, eax        ; 在内存中开辟buff的空间
                        ; eax即为buff的实际大小

这是程序在执行scanf之后进行的操做，此时用户输入的长度已经被储存到[ebp+buff_len]中去了。内存

简单观察后能够看到动态数组所占空间的大小是由固定公式计算获得的，公式为:编译器

数组所占内存大小=((申请大小+15Byte)/16)*16
io

分析到这里能够确定个人猜想是不正确的了。实际上数组的增加是以16字节的倍数增加的,也就是说即便你申请1字节的数组，实际获得的数组却占用了16字节。我不明白这么作有什么意义，并且还有些浪费内存。也许是为了对齐内存什么的吧。
编译

而我在运行上面程序的时候，输出sizeof的结果倒是咱们所申请的那个数值，按道理不该该输出数组实际占用的内存吗，看了反编译代码才发现这根本就是一个骗局！原来数组的大小根本是没法计算的，之因此sizeof能工做，是由于程序编译之后用了一个变量储存了数组的长度(并且不是实际占用内存的大小，而是咱们申请的大小)，事后使用sizeof关键字的时候就直接读取这个变量就获得了数组的大小。class

不得不感叹不少华丽的表象的内部是那么混乱