Click here to Skip to main content
15,894,254 members
Please Sign up or sign in to vote.
2.33/5 (2 votes)
See more:
C#
#include <stdio.h>
#include <cuda.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <math.h>
//#include<conio.h>
#define N 100
float A[N][N];
int i,j,it,n;
float t_1;
float x[N],z[N],e[N],zmax,emax;
#define MAX_RANGE 9999

#define funcCheck(stmt) do {                                                    \
        cudaError_t err = stmt;                                               \
        if (err != cudaSuccess) {                                             \
            printf( "Failed to run stmt %d ", __LINE__);                       \
            printf( "Got CUDA error ...  %s ", cudaGetErrorString(err));    \
            return -1;                                                        \
        }                                                                     \
    } while(0)

// Compute eign values and vector
__global__ void eignvShared(float * A, float * C, 
                                    int numARows, int numAColumns,int numCRows, int numCColumns) 
{
    __shared__ float sA[32][32];   // Tile size of 32x32 

    int Row = blockDim.y*blockIdx.y + threadIdx.y;
    int Col = blockDim.x*blockIdx.x + threadIdx.x;
    float Cvalue = 0.0;
    sA[threadIdx.y][threadIdx.x] = 0.0;

    for (int k = 0; k < (((numAColumns - 1)/ 32) + 1); k++)
    {
        if ( (Row < numARows) && (threadIdx.x + (k*32)) < numAColumns)
        {
            sA[threadIdx.y][threadIdx.x] = A[(Row*numAColumns) + threadIdx.x + (k*32)];
        }
        else
        {
            sA[threadIdx.y][threadIdx.x] = 0.0;
        }            
        __syncthreads();
//////////////////////////////////////
//////////////////////////////////////
        for (int j = 0; j < 32; ++j)
        {
            Cvalue += sA[threadIdx.y][j] ;
        }
    }
    if ( Col < numAColumns)
    {
        C[numAColumns + Col] = Cvalue;
    }
}

void eignvOnHost(float * A, float * C,  int numARows,
                        int numAColumns,int numCRows, int numCColumns)
{
printf("\nEnter the column vector\n");
     scanf("%d",&n);
for(i=1; i<=N; i++)
    { 
        x[i]=A[i][n];
    }
///
for(it=0;it<100; it++)
 {
        for(i=1; i<=N; i++)
        {
            z[i]=0;
            for(j=1; j<=N; j++)
            {
                z[i]=z[i]+A[i][j]*x[j];
            }
        }
        zmax=fabs(z[1]);
        for(i=2; i<=N; i++)
        {
            if((fabs(z[i]))>zmax)
                zmax=fabs(z[i]);
        }
        for(i=1; i<=N; i++)
        {
            z[i]=z[i]/zmax;
        }
        for(i=1; i<=N; i++)
        {
            e[i]=0;
            e[i]=fabs((fabs(z[i]))-(fabs(x[i])));
        }
        emax=e[1];
        for(i=2; i<=N; i++)
        {
            if(e[i]>emax)
                emax=e[i];
        }
        for(i=1; i<=N; i++)
        {
            x[i]=z[i];
        }
    }
///
    
    return;
}

int main(int argc, char ** argv) {
    float * hostA; // The A matrix
    float * hostC; // The output C 
    float * hostComputedC;
    float * deviceA;
    float * deviceC;
clock_t c_1,c_2;
    // Please adjust rows and columns according to you need.
    int numARows = 512; // number of rows in the matrix A
    int numAColumns = 512; // number of columns in the matrix A
    int numCRows; // number of rows in the matrix C (you have to set this)
    int numCColumns; // number of columns in the matrix C (you have to set this)
c_1=time(NULL); // time measure: start mm
    hostA = (float *) malloc(sizeof(float)*numARows*numAColumns);

    for (int i = 0; i < numARows*numAColumns; i++)
    {
        hostA[i] = (rand() % MAX_RANGE) / 2.0;
    }
  // Setting numCRows and numCColumns
    numCRows =1;
    numCColumns = numAColumns;

   hostC = (float *) malloc(sizeof(float)*numCRows*numCColumns);    
   hostComputedC = (float *) malloc(sizeof(float)*numCRows*numCColumns);    

    // Allocating GPU memory
    funcCheck(cudaMalloc((void **)&deviceA, sizeof(float)*numARows*numAColumns));
    funcCheck(cudaMalloc((void **)&deviceC, sizeof(float)*numCRows*numCColumns));

    // Copy memory to the GPU 
    funcCheck(cudaMemcpy(deviceA, hostA, sizeof(float)*numARows*numAColumns, cudaMemcpyHostToDevice));
    // Initialize the grid and block dimensions 
    dim3 dimBlock(32, 32, 1);    
    dim3 dimGrid((numCColumns/32) + 1, (numCRows/32) + 1, 1);

    //@@ Launch the GPU Kernel here
    eignvShared<<<dimGrid, dimBlock>>>(deviceA, deviceC, numARows, numAColumns, numCRows, numCColumns);    

    cudaError_t err1 = cudaPeekAtLastError();
    cudaDeviceSynchronize();
    printf( "Got CUDA error ... %s \n", cudaGetErrorString(err1));

    // Copy the results in GPU memory back to the CPU    
    funcCheck(cudaMemcpy(hostC, deviceC, sizeof(float)*numCRows*numCColumns, cudaMemcpyDeviceToHost));

    eignvOnHost(hostA,  hostComputedC, numARows, numAColumns,numCRows, numCColumns);

    for (int i=0; i < numCColumns*numCRows; i++)
    {
        if (hostComputedC[i]  != hostC[i] )
        {
            printf("Mismatch at  Col = %d hostComputed[] = %f --device[] %f\n",  i % numCColumns, hostComputedC[i], hostC[i]);
            break;
        }
    }
    // Free the GPU memory
    funcCheck(cudaFree(deviceA));    
    funcCheck(cudaFree(deviceC));    

    free(hostA);
    free(hostC);
    free(hostComputedC);
c_2=time(NULL); 
t_1 = (float)(c_2-c_1); 
printf("Execution time: %f \n",t_1);
    return 0;
}


What I have tried:

how can i run this cuda program?
i have linux ubuntu
and visual stodio 2010
it compute the eigenvalue and eigenvector
please help me
Posted
Updated 28-Jun-16 4:11am
Comments
Sergey Alexandrovich Kryukov 27-Jun-16 23:04pm    
How to run? — off-topic. This is a site for software developers, engineers.
—SA
Patrice T 28-Jun-16 0:08am    
No tutorial for Cuda ? No example ?
Richard MacCutchan 28-Jun-16 3:59am    
You need to build it on your Linux system.
chandanadhikari 28-Jun-16 9:38am    
this is not a proper way of asking questions here. Please be more precise as to what problem you are facing when you run this program ... what error message you get etc.etc. ... this information will help us help you better !!
Member 12606956 29-Jun-16 16:47pm    
When i run it error : expression must have pointer to object type appear ...

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900