memcpy的速度测试

想把一组char a[4096]的数组拷贝到short b[6][256]中,尝试过用循环移位的方式,还用中间变量short c[2048]的方式。得出的结论:
1. 移位方式效率最低
2. 借用中间变量,变量若是是局部栈中,那么速度比全局的中间变量速度快,缺点是怕栈溢出。
验证代码以下:web

//file name testcpyspeed.c
#include<stdio.h>
#include<unistd.h>
#include<stdlib.h>
#include<string.h>
#define MTW_VIRTUTALCHNUMS (2)
#define MTW_PHYCHNUMS (6)
#define MTW_MAXCHNUMS (MTW_VIRTUTALCHNUMS+MTW_PHYCHNUMS)
#define MTW_SAMPLESSZIE (256)
typedef short int16_t;
int16_t mic_data[MTW_PHYCHNUMS][MTW_SAMPLESSZIE] = {0,};
int16_t *mic[MTW_PHYCHNUMS] = {0,};
//int16_t ref_fft[MTW_SAMPLESSZIE] = {0,};
int16_t ref_fft[MTW_SAMPLESSZIE] = {0,};
int16_t output[MTW_SAMPLESSZIE] = {0,};

static int mmap_mwsrchannel(char *databuff,int len)
{
        int i=0;
        if(len<4096){
                printf("ERROR: %s %d error: length:%d\n",__func__,__LINE__,len);\
                return  -1;
        }
        for( i = 0; i < 256; i++){
                mic_data[0][i] = ((databuff[i * 16])|(databuff[i * 16 +1 ]<<8));
                mic_data[1][i] = ((databuff[i * 16+1*2])|(databuff[i * 16 +1*2 +1 ]<<8));
                mic_data[2][i] = ((databuff[i * 16 +2*2])|(databuff[i * 16 +2*2 +1]<<8));
                mic_data[3][i] = ((databuff[i * 16+3*2])|(databuff[i * 16 +3*2 +1 ]<<8));
                ref_fft[i]=((databuff[i * 16 + 4*2])|(databuff[i * 16 + 4*2 +1]<<8));
                mic_data[4][i] = ((databuff[i * 16 + 6 * 2 ])|(databuff[i * 16 + + 6 *2 +1 ]<<8));              
                mic_data[5][i] = ((databuff[i * 16 + 7 * 2 ])|(databuff[i * 16 + 7 * 2 +1 ]<<8));
        }
        return 0;
}
void record_to_buff(char *buff,int len)
{
        int i=0;
        for(i=0;i<len;i++){
                buff[i]=i%256;
        }
}
int main(int argc,char *argv[])
{
        int i=0;
        int len=atoi(argv[1]);
        char *buff=malloc(4096);
        for(i=0;i<len;i++){
                record_to_buff(buff,4096);
                mmap_mwsrchannel(buff,4096);
        }
        return 0;
}
linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ cat teststos.c 
#include<stdio.h>
#include<unistd.h>
#include<stdlib.h>
#include<string.h>
#define MTW_VIRTUTALCHNUMS (2)
#define MTW_PHYCHNUMS (6)
#define MTW_MAXCHNUMS (MTW_VIRTUTALCHNUMS+MTW_PHYCHNUMS)
#define MTW_SAMPLESSZIE (256)
typedef short int16_t;
int16_t mic_data[MTW_PHYCHNUMS][MTW_SAMPLESSZIE] = {0,};
int16_t *mic[MTW_PHYCHNUMS] = {0,};
//int16_t ref_fft[MTW_SAMPLESSZIE] = {0,};
int16_t ref_fft[MTW_SAMPLESSZIE] = {0,};
int16_t output[MTW_SAMPLESSZIE] = {0,};

static int mmap_mwsrchannel(char *databuff,int len)
{
        int i=0;
        if(len<4096){
                printf("ERROR: %s %d error: length:%d\n",__func__,__LINE__,len);\
                return  -1;
        }
        int16_t data[2048];
        memcpy(data,databuff,4096);
        for( i = 0; i < 256; i++){
                mic_data[0][i] = data[i * 8 ];
                mic_data[1][i] = data[i * 8 + 1 ];
                mic_data[2][i] = data[i * 8 + 2];
                mic_data[3][i] = data[i * 8 +3];
                ref_fft[i]=data[i * 8 + 4];
                mic_data[4][i] = data[i * 8 + 6];
                mic_data[5][i] = data[i * 8 + 7]; 
        }
        return 0;
}
void record_to_buff(char *buff,int len)
{
        int i=0;
        for(i=0;i<len;i++){
                buff[i]=i%256;
        }
}
int main(int argc,char *argv[])
{
        int i=0;
        int len=atoi(argv[1]);
        char *buff=malloc(4096);
        for(i=0;i<len;i++){
                record_to_buff(buff,4096);
                mmap_mwsrchannel(buff,4096);
        }
        return 0;
}
linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ cat teststoglobals.c 
#include<stdio.h>
#include<unistd.h>
#include<stdlib.h>
#include<string.h>
#define MTW_VIRTUTALCHNUMS (2)
#define MTW_PHYCHNUMS (6)
#define MTW_MAXCHNUMS (MTW_VIRTUTALCHNUMS+MTW_PHYCHNUMS)
#define MTW_SAMPLESSZIE (256)
typedef short int16_t;
int16_t mic_data[MTW_PHYCHNUMS][MTW_SAMPLESSZIE] = {0,};
int16_t *mic[MTW_PHYCHNUMS] = {0,};
//int16_t ref_fft[MTW_SAMPLESSZIE] = {0,};
int16_t ref_fft[MTW_SAMPLESSZIE] = {0,};
int16_t output[MTW_SAMPLESSZIE] = {0,};

int16_t data[2048];
static int mmap_mwsrchannel(char *databuff,int len)
{
        int i=0;
        if(len<4096){
                printf("ERROR: %s %d error: length:%d\n",__func__,__LINE__,len);\
                return  -1;
        }
        memcpy(data,databuff,4096);
        for( i = 0; i < 256; i++){
                mic_data[0][i] = data[i * 8 ];
                mic_data[1][i] = data[i * 8 + 1 ];
                mic_data[2][i] = data[i * 8 + 2];
                mic_data[3][i] = data[i * 8 +3];
                ref_fft[i]=data[i * 8 + 4];
                mic_data[4][i] = data[i * 8 + 6];
                mic_data[5][i] = data[i * 8 + 7]; 
        }
        return 0;
}
void record_to_buff(char *buff,int len)
{
        int i=0;
        for(i=0;i<len;i++){
                buff[i]=i%256;
        }
}
int main(int argc,char *argv[])
{
        int i=0;
        int len=atoi(argv[1]);
        char *buff=malloc(4096);
        for(i=0;i<len;i++){
                record_to_buff(buff,4096);
                mmap_mwsrchannel(buff,4096);
        }
        return 0;
}
//编译:
cc testcpyspeed.c -o testor ;gcc teststos.c -o test16_16  ; gcc teststoglobals.c  -o teststoglobals
//验证:
linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ time ./testor 200000

real    0m1.999s
user    0m1.996s
sys     0m0.000s
linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ time ./test16_16 200000

real    0m1.661s
user    0m1.660s
sys     0m0.000s
linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ time ./teststoglobals 200000

real    0m1.658s
user    0m1.656s
sys     0m0.000s

linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ time ./test16_16 4000000

real    0m33.031s
user    0m33.028s
sys     0m0.000s
linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ time ./teststoglobals 4000000

real    0m33.157s
user    0m33.156s
sys     0m0.000s
linjuntao@linjuntao:~/Desktop/proctice/chartoshort$ time ./testor 4000000

real    0m40.298s
user    0m40.292s
sys     0m0.004s

若是执行频率比较高,仍是使用局部临时变量拷贝比较快。数组