Openmp Tutorial: Seung-Jai Min
Openmp Tutorial: Seung-Jai Min
Seung-Jai Min
(smin@purdue.edu)
• Thread Libraries
- Win32 API / Posix threads
• Compiler Directives OUR FOCUS
- OpenMP (Shared memory programming)
• Message Passing Libraries
- MPI (Distributed memory programming)
#include “omp.h”
void main()
{
int i, k, N=1000;
double A[N], B[N], C[N];
#pragma omp parallel for
for (i=0; i<N; i++) {
A[i] = B[i] + k*C[i];
}
}
printf(“program begin\n”);
N = 1000; Serial
M = 500; Serial
#pragma omp parallel for
for (j=0; j<M; j++) Parallel
p[j] = q[j] – r[j];
printf(“program done\n”); Serial
A single
copy of “A”
is shared
pooh(0,A) pooh(1,A) pooh(2,A) pooh(3,A)
between all
threads.
printf(“all done\n”); Implicit barrier: threads wait here for
all threads to finish before proceeding
ECE 563 Programming Parallel Machines 14
The OpenMP API
Combined parallel work-share
• OpenMP shortcut: Put the “parallel” and the
work-share on the same line
int i; int i;
double res[MAX]; double res[MAX];
#pragma omp parallel #pragma omp parallel for
{ for (i=0;i< MAX; i++) {
#pragma omp for res[i] = huge();
for (i=0;i< MAX; i++) { }
res[i] = huge();
}
}
the same OpenMP
ECE 563 Programming Parallel Machines 15
Shared Memory Model
private private
int main()
{
int ii, jj; /* PRIVATE */
int foo(int x)
int B[100]; /* SHARED */
{
#pragma omp parallel private(jj)
/* PRIVATE */
{
int count=0;
int kk = 1; /* PRIVATE */
return x*count;
#pragma omp for
}
for (ii=0; ii<N; ii++)
for (jj=0; jj<N; jj++)
A[ii][jj] = foo(B[ii][jj]);
}
}
ECE 563 Programming Parallel Machines 19
Work Sharing Construct
Loop Construct
#pragma omp for [clause[[,] clause …] new-line
for-loops
137 120 105 100 100 100 100 100 100 100 38
p0 p3 p0 p1 p2 p3 p0 p1 p2 p3 p0
sum = 0;
#pragma omp parallel for reduction (+:sum)
for (i=0; i<N; i++)
{
sum = sum + A[i];
}
#define N 12000
#pragma omp parallel for private(j)
for (i=1; i<N-1; i++)
for (j=1; j<N-1; j++)
a[i][j] = (b[i][j-1]+b[i][j]+b[i][j+1]
b[i-1][j]+b[i+1][j])/5.0;
#define N 12000
#pragma omp parallel for private(j)
for (i=1; i<N-1; i++)
for (j=1; j<N-1; j++)
a[i][j] = b[i][j];
#define N <big-number>
chunk = ???;
#pragma omp parallel for schedule (static, chunk)
for (i=1; i<N-1; i++)
a[i][j] = ( b[i-2] + b[i-1] + b[i]
b[i+1] + b[i+2] )/5.0;
#define N 12000
#pragma omp parallel for private(j)
for (i=1; i<N-1; i++)
for (j=i; j<N-1; j++)
a[i][j] = (b[i][j-1]+b[i][j]+b[i][j+1]
b[i-1][j]+b[i+1][j])/5.0;
http://www.openmp.org
http://openmp.org/wp/resources