This simple CUDA program allocates a CUDA memory buffer and fills it with the value 42 as part of the Parallel Programming Seminar at IMI. To compile it, enter the following command line:
/usr/local/cuda/bin/nvcc 42.cu -o 42
To run, type:
./42
Below is the complete source code: (download)
#include <assert.h>
int const block_x=16;
int const block_y=16;
__global__
void
kernel_42( float * output, int pitch, int width, int height )
{
int x = blockIdx.x*block_x + threadIdx.x;
int y = blockIdx.y*block_y + threadIdx.y;
if( x<width && y<height )
output[pitch*y+x] = 42;
}
int
main( int argc, char const * argv[] )
{
int const width=4242;
int const height=4242;
void * ptr;
size_t pitch;
cudaError_t err=cudaMallocPitch(&ptr,&pitch,sizeof(float)*width,height);
if( err!=cudaSuccess )
return 1;
assert(!(pitch%sizeof(float)));
pitch /= sizeof(float);
dim3 block(block_x,block_y);
dim3 grid((width+block.x-1)/block.x,(height+block.y-1)/block.y);
kernel_42<<<grid,block>>>((float *)ptr,pitch,width,height);
}