在OpenMP并行程序设计中,for循环是一种独立的并行指令。它很是重要!它的指令格式是:ios
#include <omp.h> #pragma omp parallel for for(i = begin;i < end;++i) { // Content }
parallel for指令的后面必需要紧跟for语句块!!!!markdown
而且for循环并行必需要处在parallel并行区块内!!!!不然会看成串行执行!多线程
此前一篇博客已经说明了,OpenMP的并行计算模式是插入并行语句的方法,如上图。当咱们的串行程序执行到并行语句块的时候,会从主线程中派生出线程组,而后线程组对计算任务进行均分,而后并行计算。并行计算结束后从新回到串行程序。ide
#include <omp.h> #include <cstdio> #include <cstdlib> #include <cmath> #include <ctime> const int maxn = 5e7; const int mod = 10000; int vec1[maxn], vec2[maxn], vec[maxn], i; int main() { srand((unsigned int)time(NULL)); for (i = 0; i < maxn; ++i) { vec1[i] = rand() % mod; vec2[i] = rand() % mod; } printf("--------------before parallel compute---------------\n"); clock_t s, t; s = clock(); for (i = 0; i < maxn; ++i) { vec[i] = vec1[i] * vec2[i]; } t = clock(); printf("--------------used time = %d ms---------------\n", t - s); s = clock(); printf("--------------enter parallel compute---------------\n"); #pragma omp parallel num_threads(20) shared(vec1, vec2, vec) private(i) { #pragma omp for for (i = 0; i < maxn; ++i) { vec[i] = vec1[i] * vec2[i]; } } t = clock(); printf("--------------used time = %d ms---------------\n", t - s); return 0; }
#include <omp.h> #include <cstdio> #include <cstdlib> #include <cmath> #include <ctime> const int maxn = 1000; const int mod = 10000; int vec1[maxn][maxn], vec2[maxn][maxn], vec[maxn][maxn], i, j, k; int main() { srand((unsigned int)time(NULL)); for (i = 0; i < maxn; ++i) { for (j = 0; j < maxn; ++j) { vec1[i][j] = rand() % mod; vec2[i][j] = rand() % mod; } } printf("--------------before parallel compute---------------\n"); clock_t s1, t1, s2, t2; s1 = clock(); for (i = 0; i < maxn; ++i) { for (j = 0; j < maxn; ++j) { for (k = 0; k < maxn; ++k) { vec[i][j] += (vec1[i][k] * vec2[k][j]); } } } t1 = clock(); printf("----------------used time = %d ms-----------------\n", t1 - s1); printf("--------------enter parallel compute---------------\n"); s2 = clock(); #pragma omp parallel for collapse(2) schedule(dynamic) private(i, j, k) shared(vec1, vec2, vec) for (i = 0; i < maxn; ++i) { for (j = 0; j < maxn; ++j) { for (k = 0; k < maxn; ++k) { vec[i][j] += (vec1[i][k] * vec2[k][j]); } } } t2 = clock(); printf("----------------used time = %d ms-----------------\n", t2 - s2); printf("\n----------------the speedup ratio = %lf---------------\n", 1.0 * (t1 - s1) / (t2 - s2)); return 0; }
一旦在前面有使用过学习
#pragma omp parallel ...
语句,而且当前还处在这个并行区,而后这个时候你想使用for循环并行,则千万不要再搞一次:atom
#pragma omp parallel for
这样的操做了,由于这样会让线程组重组,而后至关于有两重并行,举个例子看看:spa
#include <omp.h> #include <iostream> using namespace std; int main() { #pragma omp parallel num_threads(10) { #pragma omp for for (int i = 0; i < 5; ++i) { #pragma omp critical { cout << "i = " << i << endl; } } } return 0; }
此时的运行结果是:操作系统
#include <omp.h> #include <iostream> using namespace std; int main() { #pragma omp parallel num_threads(4) { #pragma omp parallel for // 注意看这里哦 for (int i = 0; i < 5; ++i) { #pragma omp critical { cout << "i = " << i << endl; } } } return 0; }
看看这样的“画蛇添足”的运行结果:线程
咱们会发现,这个0 ~ 4 被重复执行啦!这样会影响并行程序结果,还会误觉得运行的开销变大哦!!!!设计
形成这种结果的缘由就是:parallel命令会告诉操做系统,此时我要重组线程组,要从新开始并行程序运行。而后这下好啦,每一个线程到了那句指令的时候都重组线程组,白白多执行4次(取决于线程数)
其实同步啊,在并行计算里有两种含义:
第一:线程/进程的运行有的快有的慢,我想要在某处各个线程/进程达到一样的状态,这叫并行程序的运行同步
第二:对于共享内存的模型,咱们须要控制数据的访问,达到线程同步。这样作的目的是为了防止多个进程/线程同时访问某个数据、内存,致使该数据同时改变,这样的做用下会让数据失真!举个例子:初始有变量a = 2,好比线程A要让a++,线程B要让a*=2。若是不控制访问,让变量a(或者某语句块)的执行的时候只能让一个线程进入执行,其余线程等待执行。则会让资源出现同步问题。这就叫数据同步。
#include <omp.h> #include <iostream> using namespace std; int main() { int i, len, cnt = 0; #pragma omp parallel num_threads(6) { len = omp_get_num_threads(); #pragma omp for private(i) for (i = 0; i < len; ++i) { #pragma omp critical { cout << "Current is " << i << endl; cnt += i; } } } cout << "cnt = " << cnt << endl; return 0; }