最近在学习AArch64的SVE技术时,发现目前能够在网上找到的gcc版本都不支持SVE intrinsic方式调用,在看文档时发现,GCC要到2020年的GCC10时才会支持:html
在github上看到了gcc仓库里存在一个名为aarch64/sve-acle-branch的分支:linux
而后将这个分支的代码下载下来进行交叉编译,而后去编译用SVE intrinsic方式实现的SVE测试程序,发现果真能够编过,也能够运行。编译方法参考交叉编译用于生成aarch64指令的GCC (9.2),这里有一份已经编译好的:连接: https://pan.baidu.com/s/1s3FWLgwavaGqR427NcW-hA 提取码: kpnggit
测试程序:github
#include <stdlib.h> #include <stdio.h> #include <arm_sve.h> // Scalar version. void add_arrays(double * restrict dst, double *src, double c, const int N) { for (int i = 0; i < N; i++) dst[i] = src[i] + c; } // Vector version void vla_add_arrays(double * restrict dst, double *src, double c, const int N) { int64_t i = 0; svbool_t pg = svwhilelt_b64(i, (int64_t)N); while (svptest_any(svptrue_b64(), pg)) { svfloat64_t vsrc = svld1(pg, src + i); svfloat64_t vdst = svadd_x(pg, vsrc, c); svst1(pg, dst + i, vdst); i += svcntd(); pg = svwhilelt_b64(i, (int64_t)N); } } // Vector version void vla_add_arrays_2(double *dst, double *src, double c, const int N) { for (int i = 0; i < N; i += svcntd()) { svbool_t Pg = svwhilelt_b64(i, N); svfloat64_t vsrc = svld1(Pg, &src[i]); svfloat64_t vdst = svadd_x(Pg, vsrc, c); svst1(Pg, &dst[i], vdst); } } int main(void) { double src[100]; double c; double dst_serial[100], dst_vla[100], dst_vla2[100]; for (int i = 0; i < 100; ++i) { src[i] = (double) i / ((double) i + 1); } c = src[rand() % 100]; add_arrays(dst_serial, src, c, 100); vla_add_arrays(dst_vla, src, c, 100); vla_add_arrays_2(dst_vla2, src, c, 100); for (int i = 0; i < 100; ++i) { printf("%f %f %f, %f, %f\n", dst_serial[i], dst_vla[i], dst_vla2[i], src[i], c); } return 0; }
编译:post
CC=/home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc CFLAGS=-g -Wall -march=armv8.2-a+sve -O1 LDFLAGS=--static all:demo demo:demo.o %.o:%c clean: $(RM) *.o demo
测试运行:学习
$ make clean;make; rm -f *.o demo /home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc -g -Wall -march=armv8.2-a+sve -O1 -c -o demo.o demo.c /home/pengdl/work/SVE/cross_compile/install/bin/aarch64-linux-gnu-gcc --static demo.o -o demo pengdl@pengdl-dell:~/work/SVE/cross_compile/teset $ qemu-aarch64 ./demo 0.988095 0.988095 0.988095, 0.000000, 0.988095 1.488095 1.488095 1.488095, 0.500000, 0.988095 1.654762 1.654762 1.654762, 0.666667, 0.988095 1.738095 1.738095 1.738095, 0.750000, 0.988095 1.788095 1.788095 1.788095, 0.800000, 0.988095 1.821429 1.821429 1.821429, 0.833333, 0.988095 1.845238 1.845238 1.845238, 0.857143, 0.988095 1.863095 1.863095 1.863095, 0.875000, 0.988095 1.876984 1.876984 1.876984, 0.888889, 0.988095 1.888095 1.888095 1.888095, 0.900000, 0.988095 ... ...
完。测试