Click here to Skip to main content
15,891,316 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
I want to parallelize function in CUDA C which will count all vectors with sum equal of vector elements and elements not bigger of k. For example if number of vector elements n is 5, sum=10 and k=3 than, the number of vectors who satisfy this condition is 101. I've already make this function in cuda c but the problem is when the number of blocks and threads are bigger than 1, I know that the problem is in for cycles and I should to change it but I don't know from where to start. When I am calling the function with blocks and threads equal to one than the function is working on classic way and everything is good but in this case the function is not paralyzed.

The source code of all program is:

C
#include <stdio.h>
#include<stdlib.h>
#include<assert.h>
#include<cuda.h>

//function that count number of vectors
__device__ void count(int *vector, int *total, int n, int s)
{
   int i,sum=0;
   for(i=blockIdx.x*blockDim.x+threadIdx.x;i<n;i+=blockDim.x*gridDim.x)
   { 
     
     sum+=vector[i];
	 __syncthreads();
   }
   if(sum==s)
   {
     
     total[0]=total[0]+1;
   }
}

//main function
__global__ void computeVectors(int *vector, int n, int kk, int s, int *total)
{
   int k=0;
   int j,i,next;
   
   while(1)
   {
     //this is the problem, in for cycle
     for(j=blockIdx.x*blockDim.x+threadIdx.x; j<=kk; j+=blockDim.x*gridDim.x)
     {
       vector[k]=j;
       count(vector, total, n, s);
	   __syncthreads();
     }
     for(i=blockIdx.x*blockDim.x+threadIdx.x; i<n; i+=blockDim.x*gridDim.x)
     {
	   
       if(vector[i]<kk)
	      break;
     }	 
     next=i;
     vector[next]++;
     for(i=blockIdx.x*blockDim.x+threadIdx.x; i<sledno; i+=blockDim.x*gridDim.x)
     {
       vector[i]=0;
	   __syncthreads();
     }
     k=0;
     if(next>=n)
	    break;
   }
}

int main(){

    cudaError_t err = cudaSuccess;
	
    int n,k,sum;
    int counter=0;
	
    printf("Enter the length of vector n=");
    scanf("%d",&n);
    printf("Enter the max value of vector elements k=");
    scanf("%d",&k);
    printf("Enter the sum of vector elements sum=");
    scanf("%d",&sum);

   //initial vector with length n
     int *vec_h, *vec_d;
     size_t sizevec=n*sizeof(int);
     vec_h=(int *)malloc(sizevec);
     cudaMalloc((void **) &vec_d, sizevec);
	
	for(counter=0; counter<n; counter++)
		{
			vec_h[counter]=0;
		}
	cudaMemcpy(vec_d, vec_h, sizevec, cudaMemcpyHostToDevice);
	
    int *total_h, *total_d;
    size_t size=1*sizeof(int);
    total_h=(int *)malloc(size);
    cudaMalloc((void **) &total_d, size);
    total_h[0]=0;
    cudaMemcpy(total_d, total_h, size, cudaMemcpyHostToDevice);
	 
   //calling of main function
    computeVectors<<<1, 1>>>(vec_d, n, k, sum, total_d);

    cudaThreadSynchronize(); 
  
    err = cudaGetLastError();
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Error: %s!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    cudaMemcpy(total_h, total_d, size, cudaMemcpyDeviceToHost);
    printf("Number of vectors that satisfy condition is %d\n", total_h[0]);
	
	
    free(vec_h); 
    cudaFree(vec_d);
	
    free(total_h); 
    cudaFree(total_d);

    return 0;
}
Posted
Updated 18-Oct-13 22:37pm
v2
Comments
[no name] 18-Oct-13 23:24pm    
Not paralyze but parallelize.
https://www.google.com.au/#q=parallelize&spell=1

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900