-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from sstsimulator/allevin/update_copyrights
Update copyrights for balar
- Loading branch information
Showing
7 changed files
with
62 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,94 +1,109 @@ | ||
// Copyright 2009-2020 NTESS. Under the terms | ||
// of Contract DE-NA0003525 with NTESS, the U.S. | ||
// Government retains certain rights in this software. | ||
// | ||
// Copyright (c) 2009-2020, NTESS | ||
// All rights reserved. | ||
// | ||
// Portions are copyright of other developers: | ||
// See the file CONTRIBUTORS.TXT in the top level directory | ||
// the distribution for more information. | ||
// | ||
// This file is part of the SST software package. For license | ||
// information, see the LICENSE file in the top level directory of the | ||
// distribution. | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <math.h> | ||
#include <iostream> | ||
#include <iostream> | ||
// CUDA kernel. Each thread takes care of one element of c | ||
__global__ void vecAdd(int *a, int *b, int *c, int n) | ||
{ | ||
// Get our global thread ID | ||
int id = blockIdx.x*blockDim.x+threadIdx.x; | ||
|
||
// Make sure we do not go out of bounds | ||
if (id < n) | ||
c[id] = a[id] + b[id]; | ||
} | ||
|
||
int main( int argc, char* argv[] ) | ||
{ | ||
// Size of vectors | ||
int n = 131072; | ||
|
||
// Host input vectors | ||
int *h_a; | ||
int *h_b; | ||
printf("init point h_a %p\n",h_a); | ||
//Host output vector | ||
int *h_c; | ||
|
||
// Device input vectors | ||
int *d_a; | ||
printf("init point d_a %p\n",d_a); | ||
int *d_b; | ||
//Device output vector | ||
int *d_c; | ||
|
||
// Size, in bytes, of each vector | ||
size_t bytes = n*sizeof(int); | ||
|
||
// Allocate memory for each vector on host | ||
h_a = (int*)malloc(bytes); | ||
printf("malloc point h_a %p\n",h_a); | ||
h_b = (int*)malloc(bytes); | ||
h_c = (int*)malloc(bytes); | ||
|
||
// Allocate memory for each vector on GPU | ||
cudaMalloc(&d_a, bytes); | ||
printf("cuda malloc point d_a %p\n",d_a); | ||
cudaMalloc(&d_b, bytes); | ||
cudaMalloc(&d_c, bytes); | ||
|
||
int i; | ||
// Initialize vectors on host | ||
for( i = 0; i < n; i++ ) { | ||
h_a[i] =3; | ||
h_b[i] = 4; | ||
} | ||
|
||
// Copy host vectors to device | ||
printf("pre cpy point h_a %p\n",h_a); | ||
printf("pre cpy point d_a %p\n",d_a); | ||
cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice); | ||
cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice); | ||
|
||
int blockSize, gridSize; | ||
|
||
// Number of threads in each thread block | ||
blockSize = 256; | ||
|
||
// Number of thread blocks in grid | ||
gridSize = (int)ceil((float)n/blockSize); | ||
|
||
// Execute the kernel | ||
vecAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n); | ||
|
||
// Copy array back to host | ||
cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost ); | ||
|
||
// Sum up vector c and print result divided by n, this should equal 1 within error | ||
int sum = 0; | ||
for(i=0; i<n; i++) | ||
sum += h_c[i]; | ||
printf("final result: %d\n", sum/n); | ||
|
||
// Release device memory | ||
cudaFree(d_a); | ||
cudaFree(d_b); | ||
cudaFree(d_c); | ||
|
||
// Release host memory | ||
free(h_a); | ||
free(h_b); | ||
free(h_c); | ||
|
||
return 0; | ||
} |