Skip to content

Commit

Permalink
Merge pull request #2 from sstsimulator/allevin/update_copyrights
Browse files Browse the repository at this point in the history
Update copyrights for balar
  • Loading branch information
allevin authored Mar 31, 2020
2 parents 753f50e + 503e2b6 commit 77fc53d
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 32 deletions.
10 changes: 5 additions & 5 deletions balar.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Copyright 2009-2019 NTESS. Under the terms
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2019, NTESS
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
Expand Down Expand Up @@ -53,7 +53,7 @@ Balar::Balar(SST::ComponentId_t id, SST::Params& params): Component(id)
totalTransfer = 0;
ackTransfer = 0;
transferNumber = 0;

std::string gpu_clock = params.find<std::string>("clock", "1GHz");
TimeConverter* timecvt = registerClock( gpu_clock, new Clock::Handler<Balar>(this, &Balar::tick ) );

Expand All @@ -69,13 +69,13 @@ Balar::Balar(SST::ComponentId_t id, SST::Params& params): Component(id)
// CPU link allocation
gpu_to_cpu_cache_links = (SimpleMem**) malloc( sizeof(SimpleMem*) * cpu_core_count );
gpu_to_core_links = (Link**) malloc( sizeof(Link*) * cpu_core_count );

SubComponentSlotInfo* gpu_to_cpu_cache = getSubComponentSlotInfo("cpu_cache");
if (gpu_to_cpu_cache) {
if (!gpu_to_cpu_cache->isAllPopulated())
output->fatal(CALL_INFO, -1, "%s, Error: loading 'cpu_cache' subcomponents. All subcomponent slots from 0 to cpu core count must be populated. "
"Check your input config for non-populated slots\n", getName().c_str());

uint32_t subCompCount = gpu_to_cpu_cache->getMaxPopulatedSlotNumber() == -1 ? 0 : gpu_to_cpu_cache->getMaxPopulatedSlotNumber() + 1;
if (subCompCount != cpu_core_count)
output->fatal(CALL_INFO, -1, "%s, Error: loading 'cpu_cache' subcomponents and the number of subcomponents does not match the number of CPU cores. "
Expand Down
4 changes: 2 additions & 2 deletions balar.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Copyright 2009-2019 NTESS. Under the terms
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2019, NTESS
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
Expand Down
4 changes: 2 additions & 2 deletions balar_event.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Copyright 2009-2019 NTESS. Under the terms
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2019, NTESS
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
Expand Down
15 changes: 15 additions & 0 deletions cuda_runtime_api.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
// See the file CONTRIBUTORS.TXT in the top level directory
// the distribution for more information.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.

#if !defined(__dv)
#if defined(__cplusplus)
#define __dv(v) \
Expand Down
4 changes: 2 additions & 2 deletions mempool.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Copyright 2009-2019 NTESS. Under the terms
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2019, NTESS
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
Expand Down
4 changes: 2 additions & 2 deletions mempool.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Copyright 2009-2019 NTESS. Under the terms
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2019, NTESS
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
Expand Down
53 changes: 34 additions & 19 deletions tests/vectorAdd/vecAdd.cu
Original file line number Diff line number Diff line change
@@ -1,94 +1,109 @@
// Copyright 2009-2020 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2020, NTESS
// All rights reserved.
//
// Portions are copyright of other developers:
// See the file CONTRIBUTORS.TXT in the top level directory
// the distribution for more information.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <iostream>
// CUDA kernel. Each thread takes care of one element of c
__global__ void vecAdd(int *a, int *b, int *c, int n)
{
// Get our global thread ID
int id = blockIdx.x*blockDim.x+threadIdx.x;

// Make sure we do not go out of bounds
if (id < n)
c[id] = a[id] + b[id];
}

int main( int argc, char* argv[] )
{
// Size of vectors
int n = 131072;

// Host input vectors
int *h_a;
int *h_b;
printf("init point h_a %p\n",h_a);
//Host output vector
int *h_c;

// Device input vectors
int *d_a;
printf("init point d_a %p\n",d_a);
int *d_b;
//Device output vector
int *d_c;

// Size, in bytes, of each vector
size_t bytes = n*sizeof(int);

// Allocate memory for each vector on host
h_a = (int*)malloc(bytes);
printf("malloc point h_a %p\n",h_a);
h_b = (int*)malloc(bytes);
h_c = (int*)malloc(bytes);

// Allocate memory for each vector on GPU
cudaMalloc(&d_a, bytes);
printf("cuda malloc point d_a %p\n",d_a);
cudaMalloc(&d_b, bytes);
cudaMalloc(&d_c, bytes);

int i;
// Initialize vectors on host
for( i = 0; i < n; i++ ) {
h_a[i] =3;
h_b[i] = 4;
}

// Copy host vectors to device
printf("pre cpy point h_a %p\n",h_a);
printf("pre cpy point d_a %p\n",d_a);
cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);
cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);

int blockSize, gridSize;

// Number of threads in each thread block
blockSize = 256;

// Number of thread blocks in grid
gridSize = (int)ceil((float)n/blockSize);

// Execute the kernel
vecAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);

// Copy array back to host
cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );

// Sum up vector c and print result divided by n, this should equal 1 within error
int sum = 0;
for(i=0; i<n; i++)
sum += h_c[i];
printf("final result: %d\n", sum/n);

// Release device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);

// Release host memory
free(h_a);
free(h_b);
free(h_c);

return 0;
}

0 comments on commit 77fc53d

Please sign in to comment.