Pseudo Code of Mpi Programs
Pseudo Code of Mpi Programs
Pseudo Code of Mpi Programs
ASSIGNMENT I
SUBMITTED BY:
MOHD ASHFAQ
192CD013
MTECH CDS, 1ST SEM
Q1. Write an OpenMP program which can efficiently parallelize prime
number generator. Given a large input N, program should generate
all the prime numbers till N. Note the time taken by sequential
program to generate prime numbers.
a) Try and report what is the best #OMP threads that are required
to parallelize this efficiently. Experiment with different number
of threads = 2,4,8... Compare the time taken by parallel version of
your code with serial code.
b) Try to experiment with parallel for contruct's schedule clause.
Use different schedule kind to check if it has any effect on the
execution time. Use different chunk sizes for each kind.
I. PSUEDOCODES:
I. PSEUDOCODES:
Sequential 0 0 0
Static 0.001953 0.001953 0.005859
Static, 0.001953 0.002930 0.008789
chunksize
Dynamic 0.000977 0.003906 0.007812
Guided 0.000977 0.001953 0.002930
Runtime 0.000977 0.007812 0.011719
III. GRAPH
I. PSEUDOCODES:
II. OUTPUTS
$ mpicc 3pp.c
$ mpiexec -n 7 ./a.out
mpi_mm has started with 7 tasks.
Initializing arrays...
Array A[7][10]:
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
Array b[10][7]:
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
Sending 2 rows to task 1 offset=0
Sending 1 rows to task 2 offset=2
Sending 1 rows to task 3 offset=3
Sending 1 rows to task 4 offset=4
Sending 1 rows to task 5 offset=5
Sending 1 rows to task 6 offset=6
Received results from task 1
Received results from task 2
Received results from task 3
Received results from task 4
Received results from task 5
Received results from task 6
******************************************************
Result Matrix:
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
******************************************************
The process took: 0.280028
******************************************************
Array B[7][10]:
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1
Resultant, A*B =
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
7 7 7 7 7 7 7
******************************************************
The process took: 0.022957
******************************************************
III. OBSERVATION
I. PSEUDOCODES:
II. OUTPUT
A> Scatter
$ mpicc 4Scatter.c
$ mpiexec -n 10 ./a.out
Matrix A:
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Matrix B:
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
DOT PRODUCT= 50
B> Scatterv
$ mpicc 4ScatterV.c
$ mpiexec -n 10 ./a.out
Matrix A:
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Matrix B:
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
DOT PRODUCT= 50
5. The value of π is computed mathematically as follows:
∫
Write an MPI program to compute π using MPI _Bcast and MPI _Reduce.
Compare execution time for serial code and parallel code.
I. PSEUDOCODES:
A> main()
1.MPI_Init(&argc,&argv);
2.MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
3.MPI_Comm_rank(MPI_COMM_WORLD,&myid);
4.if( MASTER )
n = 100000; /* Total no. of evaluation points*/
5.MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD );
/* Share intervals with other processors */
6.Initialize sum = 0.0, h = 1.0/n;
7.for( i = myid+0.5 ; i < n; i += numprocs )
sum += dx_arctan(i*h);
8.mypi = h*sum;
B> dx_arctan(x): -
1. Return ((4.0/(1.0+x*x)); /*f(x) for calculating pi value*/
II. OUTPUT:
$ mpicc 5.c
$ mpiexec -n 8 ./a.out
This is Process-7/8
This is Process-6/8
This is Process-4/8
This is Process-3/8
This is Process-1/8
This is Process-2/8
This is Process-0/8
This is Process-5/8
This program uses 8 processes
I. PSEUDOCODE:
1. Get N, max;
2. Initialize arr[N], hist[max];
3. for( i = 0; i < N ; i++ )/*initialize arr[] with random no */
arr[i] =(int) rand()%max;
4. omp_lock_t writelock;
5. omp_init_lock(&writelock);
6. omp_set_num_threads(thread_count);
7. #pragma omp parallel_for schedule() /*parallelize the code*/
for( i = 0; i < N; i++ )
{
omp_set_lock( &writelock);
hist[arr[i]] +=1;
omp_unset_lock(&writelock);
}
8. omp_destroy_lock(&writelock);
9. for( i = 0; i < max; i++ )
{
perc[i] = 100*((float)hist[i]/(float)N);
for (int j = 0; j < perc[i]; j++ )
Display “ <pattern> ” ;
Display hist[i];
}
II. OUTPUT:
$ mpicc 7.c
$ mpiexec -n 8 ./a.out
Enter the array length: 1000
Enter the largest histogram value:10
0:***********107
1:**********95
2:**********99
3:**********97
4:**********97
5:*********89
6:**********100
7:***********106
8:***********104
9:***********106
8. Write a hybrid Open MP-MPI program to compute Matrix Vector
multiplication.
I. PSEUDOCODE
1. MPI_Init(&argc,&argv);
2. MPI_Comm_size(MPI_COMM_WORLD, &size);
3. MPI_Comm_rank(MPI_COMM_WORLD, &rank);
4. if( rank == MASTER )
{
4.1 **arr = malloc(sizeof(<var_type*) * rows);
4.2 for( i = 0; i <rows; i++ )
arr[i] = malloc(sizeof(<var_type>) * columns);
4.3 Initialize arr[rows][columns];
4.4 *vector = malloc(sizeof(<var_type>) * columns);
4.5 Initialise vector[columns];
4.6 row_each = rows/size;
/* Share rows and vector with other processors */
4.7 MPI_Bcast( &row_each, 1, MPI_INT, 0, MPI_COMM_WORLD);
4.8 MPI_Bcast( vector, columns, MPI_INT, 0,
MPI_COMM_WORLD);
4.9 for( i = 1; i < size; i++ )
{
for( j = 0; j < row_each; j++ )
MPI_Send(arr[j + (i*row_each)], columns,
MPI_INT, (i), 0, MPI_COMM_WORLD);
}
4.10 MPI_Recv(vector, columns, MPI_INT , 0, MPI_ANY_TAG,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
4.11 Display vector;
}
5. else
{
5.1 *local_arr = malloc(sizeof(<var_type>*)*row_each);
5.2 for( i = 0; i < row_each; i++ )
*local_arr[i] = malloc(sizeof(<var_type>)*columns);
5.3 for( i = 0; i < row_each; i++ )
MPI_Recv(local_arr[i], columns, MPI_INT , 0,
MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
5.4 #pragma omp parallel
{
#pragma omp for
for( i = 0; i < row_each; i++ )
{
for(j=0; j<columns; j++)
product += vector[j]*local_arr[i][j];
}
}
5.5 MPI_Send(product, columns, MPI_INT, (i), 0,
MPI_COMM_WORLD);
}
6. MPI_Finalize();
II. OUTPUT:
printing vector:
1
7
3
7
I. PSEUDOCODE
1. MPI_Init(&argc, &argv);
2. MPI_Comm_rank(MPI_COMM_WORLD, &pid);
3. MPI_Comm_size(MPI_COMM_WORLD, &np);
4. if( pid == MASTER )
{
4.1 Get n; /*size of the array*/
4.2 for( i = 0; i < n; i++ )
A[i] = rand();
/*Initialize A[] with random no */
4.3 elements_per_process = n/np;
4.4 if( np > 1 )
{
/* distributes the portion of array to child
processes to calculate their partial sums*/
4.4.1 for(i = 1; i < np - 1; i++)
{
4.4.1.1 index =
i*elements_per_process;
4.4.1.2 MPI_Send(
&elements_per_process, 1, MPI_INT,
i, 0, MPI_COMM_WORLD);
4.4.1.3 MPI_Send(&a[index],
elements_per_process,
MPI_INT, i,
0, MPI_COMM_WORLD);
}
/* last process adds remaining elements*/
4.4.2 index = i*elements_per_process;
4.4.3 elements_left = n - index;
4.4.4 MPI_Send(&elements_left, 1, MPI_INT, i, 0,
MPI_COMM_WORLD);
4.4.5 MPI_Send(&a[index], elements_left,
}
/*master process add its own sub array*/
4.5 Initialize sum = 0;
4.6 for (i = 0; i < elements_per_process; i++)
sum += a[i];
/*collects partial sums from other processes*/
4.7 for( i = 1; i < np; i++ )
{
MPI_Recv(&tmp, 1, MPI_INT, MPI_ANY_SOURCE,
0, MPI_COMM_WORLD, &status);
sender = status.MPI_SOURCE;
sum += tmp;
}
4.8 Display sum; /*Print the final sum of the array*/
}
5. else /*slave processes */
{
5.1 MPI_Recv(&n_elements_recieved, 1, MPI_INT, 0, 0,
MPI_COMM_WORLD,&status);
5.2 MPI_Recv(&a2, n_elements_recieved, MPI_INT, 0, 0,
MPI_COMM_WORLD, &status);
5.3 Initislize partial_sum = 0;
5.4 for( i = 0; i < n_elements_recieved; i++)
partial_sum += a2[i];
/*send the partial sum to the root process*/
5.5 MPI_Send(&partial_sum, 1, MPI_INT, 0, 0,
}
6. MPI_Finalize();
II. OUTPUT:
$ mpicc 9.c
$ mpiexec -n 8 ./a.out
I. PSEUDOCODE:
1. Parallel pseudocode
A> main()
1.Get a, b, n;
2.h = (b-a)/n;
3.MPI_Init(&argc, &argv);
4.MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
5.MPI_Comm_size(MPI_COMM_WORLD, &commsz);
6.local_n = n/commsz;
7.local_a = a + myrank*local_n*h;
8.local_b = local_a + local_n*h;
9.local_int = Trap(local_a, local_b, local_n, h);
10.if( myrank != MASTER )
MPI_Send ( &local_int, 1, MPI_DOUBLE, 0, 0,MPI_COMM_WORLD
);
else
{
total_int = local_int;
for ( source = 1; source < commsz; source++ )
{
MPI_Recv ( &local_int, 1, MPI_DOUBLE, source,
0,MPI_COMM_WORLD,
MPI_STATUS_IGNORE );
total_int += local_int;
}
11.if( MASTER )
Display total_int; /*the final area under the curve*/
12.MPI_Finalize();
C> f(x)
1.return(1/(1+pow(x,2)));
D> f(x)
1. return(1/(1+pow(x,2)));
II. OUTPUT