Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
17 views

CUDA

The document contains code for performing vector addition and matrix multiplication on a GPU using CUDA. For vector addition, two arrays are copied to the GPU memory and a kernel is launched to add the corresponding elements of the arrays and store the results in a third array. For matrix multiplication, two matrices are copied to the GPU and a kernel is launched that calculates the dot product of each row of the first matrix with each column of the second matrix to produce the output matrix. The results are then copied back from the GPU to CPU memory and printed.

Uploaded by

killua gojo
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views

CUDA

The document contains code for performing vector addition and matrix multiplication on a GPU using CUDA. For vector addition, two arrays are copied to the GPU memory and a kernel is launched to add the corresponding elements of the arrays and store the results in a third array. For matrix multiplication, two matrices are copied to the GPU and a kernel is launched that calculates the dot product of each row of the first matrix with each column of the second matrix to produce the output matrix. The results are then copied back from the GPU to CPU memory and printed.

Uploaded by

killua gojo
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

HPC ASSIGNMENT 4

VECTOR ADDITION

%%cu
#include<stdio.h>
#include<cuda.h>
__global__ void arradd(int *x,int *y, int *z) //kernel definition
{
int id=blockIdx.x;
/* blockIdx.x gives the respective block id which starts from 0 */
z[id]=x[id]+y[id];
}
int main()
{
int a[6]={1,2,4,5,6};
int b[6]={2,3,4,5};
int c[6];
int *d,*e,*f;
int i;
printf("\n Enter six elements of first array\n");
for(i=0;i<6;i++)
{
scanf("%d",&a[i]);
}
printf("\n Enter six elements of second array\n");
for(i=0;i<6;i++)
{
scanf("%d",&b[i]);
}
/* cudaMalloc() allocates memory from Global memory on GPU */
cudaMalloc((void **)&d,6*sizeof(int));
cudaMalloc((void **)&e,6*sizeof(int));
cudaMalloc((void **)&f,6*sizeof(int));
/* cudaMemcpy() copies the contents from destination to source. Here de
stination is GPU(d,e) and source is CPU(a,b) */
cudaMemcpy(d,a,6*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(e,b,6*sizeof(int),cudaMemcpyHostToDevice);
/* call to kernel. Here 6 is number of blocks, 1 is the number of threa
ds per block and d,e,f are the arguments */
arradd<<<6,1>>>(d,e,f);
/* Here we are copying content from GPU(Device) to CPU(Host) */
cudaMemcpy(c,f,6*sizeof(int),cudaMemcpyDeviceToHost);
printf("\nSum of two arrays:\n ");
for(i=0;i<6;i++)
{
printf("%d\t",c[i]);
}
/* Free the memory allocated to pointers d,e,f */
cudaFree(d);
cudaFree(e);
cudaFree(f);
return 0;
}

OUTPUT:-
Enter six elements of first array

Enter six elements of second array

Sum of two arrays:


3 5 8 10 6 0

MATRIX MULTIPLICATION

%%cu
#include<stdio.h>
#include<cuda.h>
#define row1 2 /* Number of rows of first matrix */
#define col1 3 /* Number of columns of first matrix */
#define row2 3 /* Number of rows of second matrix */
#define col2 2 /* Number of columns of second matrix */

__global__ void matproduct(int *l,int *m, int *n)


{
int x=blockIdx.x;
int y=blockIdx.y;
int k;

n[col2*y+x]=0;
for(k=0;k<col1;k++)
{
n[col2*y+x]=n[col2*y+x]+l[col1*y+k]*m[col2*k+x];
}
}

int main()
{
int a[row1][col1] = { 10, 11, 12, 13, 14, 15};

int b[row2][col2] = { 10, 11, 12, 13, 14, 15};


int c[row1][col2];
int *d,*e,*f;
int i,j;

cudaMalloc((void **)&d,row1*col1*sizeof(int));
cudaMalloc((void **)&e,row2*col2*sizeof(int));
cudaMalloc((void **)&f,row1*col2*sizeof(int));

cudaMemcpy(d,a,row1*col1*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(e,b,row2*col2*sizeof(int),cudaMemcpyHostToDevice);

dim3 grid(col2,row1);
/* Here we are defining two dimensional Grid(collection of blocks) stru
cture. Syntax is dim3 grid(no. of columns,no. of rows) */

matproduct<<<grid,1>>>(d,e,f);

cudaMemcpy(c,f,row1*col2*sizeof(int),cudaMemcpyDeviceToHost);
printf("\nProduct of two matrices:\n ");
for(i=0;i<row1;i++)
{
for(j=0;j<col2;j++)
{
printf("%d\t",c[i][j]);
}
printf("\n");
}

cudaFree(d);
cudaFree(e);
cudaFree(f);

return 0;
}

OUTPUT:-
Product of two matrices:
400 433
508 550

You might also like