Parallelize function which will count all vectors with sum equal of vector elements and elements not bigger of k

Question

0.00/5 (No votes)

See more:

I want to parallelize function in CUDA C which will count all vectors with sum equal of vector elements and elements not bigger of k. For example if number of vector elements n is 5, sum=10 and k=3 than, the number of vectors who satisfy this condition is 101. I've already make this function in cuda c but the problem is when the number of blocks and threads are bigger than 1, I know that the problem is in for cycles and I should to change it but I don't know from where to start. When I am calling the function with blocks and threads equal to one than the function is working on classic way and everything is good but in this case the function is not paralyzed.

The source code of all program is:

C

#include <stdio.h>
#include<stdlib.h>
#include<assert.h>
#include<cuda.h>

//function that count number of vectors
__device__ void count(int *vector, int *total, int n, int s)
{
   int i,sum=0;
   for(i=blockIdx.x*blockDim.x+threadIdx.x;i<n;i+=blockDim.x*gridDim.x)
   { 
     
     sum+=vector[i];
	 __syncthreads();
   }
   if(sum==s)
   {
     
     total[0]=total[0]+1;
   }
}

//main function
__global__ void computeVectors(int *vector, int n, int kk, int s, int *total)
{
   int k=0;
   int j,i,next;
   
   while(1)
   {
     //this is the problem, in for cycle
     for(j=blockIdx.x*blockDim.x+threadIdx.x; j<=kk; j+=blockDim.x*gridDim.x)
     {
       vector[k]=j;
       count(vector, total, n, s);
	   __syncthreads();
     }
     for(i=blockIdx.x*blockDim.x+threadIdx.x; i<n; i+=blockDim.x*gridDim.x)
     {
	   
       if(vector[i]<kk)
	      break;
     }	 
     next=i;
     vector[next]++;
     for(i=blockIdx.x*blockDim.x+threadIdx.x; i<sledno; i+=blockDim.x*gridDim.x)
     {
       vector[i]=0;
	   __syncthreads();
     }
     k=0;
     if(next>=n)
	    break;
   }
}

int main(){

    cudaError_t err = cudaSuccess;
	
    int n,k,sum;
    int counter=0;
	
    printf("Enter the length of vector n=");
    scanf("%d",&n);
    printf("Enter the max value of vector elements k=");
    scanf("%d",&k);
    printf("Enter the sum of vector elements sum=");
    scanf("%d",&sum);

   //initial vector with length n
     int *vec_h, *vec_d;
     size_t sizevec=n*sizeof(int);
     vec_h=(int *)malloc(sizevec);
     cudaMalloc((void **) &vec_d, sizevec);
	
	for(counter=0; counter<n; counter++)
		{
			vec_h[counter]=0;
		}
	cudaMemcpy(vec_d, vec_h, sizevec, cudaMemcpyHostToDevice);
	
    int *total_h, *total_d;
    size_t size=1*sizeof(int);
    total_h=(int *)malloc(size);
    cudaMalloc((void **) &total_d, size);
    total_h[0]=0;
    cudaMemcpy(total_d, total_h, size, cudaMemcpyHostToDevice);
	 
   //calling of main function
    computeVectors<<<1, 1>>>(vec_d, n, k, sum, total_d);

    cudaThreadSynchronize(); 
  
    err = cudaGetLastError();
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Error: %s!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    cudaMemcpy(total_h, total_d, size, cudaMemcpyDeviceToHost);
    printf("Number of vectors that satisfy condition is %d\n", total_h[0]);
	
	
    free(vec_h); 
    cudaFree(vec_d);
	
    free(total_h); 
    cudaFree(total_d);

    return 0;
}

Posted 18-Oct-13 7:34am

zlristovski

Updated 18-Oct-13 22:37pm

v2

Add a Solution

Comments

[no name] 18-Oct-13 23:24pm

Not paralyze but parallelize.
https://www.google.com.au/#q=parallelize&spell=1

Add your solution here

Treat my content as plain text, not as HTML

Preview 0

…

Existing Members

Sign in to your account

...or Join us

Download, Vote, Comment, Publish.

Your Email
Password
Forgot your password?

Your Email
This email is in use. Do you need your password?
Optional Password

I have read and agree to the Terms of Service and Privacy Policy
Please subscribe me to the CodeProject newsletters

When answering a question please:

Read the question carefully.
Understand that English isn't everyone's first language so be lenient of bad spelling and grammar.
If a question is poorly phrased then either ask for clarification, ignore it, or edit the question and fix the problem. Insults are not welcome.
Don't tell someone to read the manual. Chances are they have and don't get it. Provide an answer or move on to the next question.

Let's work to help developers, not make them feel stupid.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)