-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsharedMemory.cu
45 lines (36 loc) · 1.06 KB
/
sharedMemory.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#include <stdio.h>
#include <cuda_runtime.h>
#include "helpers.h"
__global__
void sharedMemoryLoader(int* input, int* output, int N){
__shared__ int shared_data[256];
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if(tid < N){
shared_data[threadIdx.x] = input[tid];
}
__syncthreads();
if(tid < N){
output[tid] = shared_data[threadIdx.x];
}
}
int main(){
int N = 256;
int h_input[256], h_output[256];
for(int i=0; i<N; i++){
h_input[i] = i;
}
int *d_input, *d_output;
CUDA_CHECK(cudaMalloc(&d_input, N*sizeof(int)));
CUDA_CHECK(cudaMalloc(&d_output, N*sizeof(int)));
CUDA_CHECK(cudaMemcpy(d_input, h_input, N * sizeof(int), cudaMemcpyHostToDevice));
sharedMemoryLoader<<<1,256>>>(d_input, d_output, N);
CHECK_KERNEL_ERROR();
CUDA_CHECK(cudaMemcpy(h_output, d_output, N*sizeof(int), cudaMemcpyDeviceToHost));
for(int i=0; i<10; i++){
printf("%d ", h_output[i]);
}
printf("\n");
CUDA_CHECK(cudaFree(d_input));
CUDA_CHECK(cudaFree(d_output));
return 0;
}