Skip to content

Commit

Permalink
fix gpu transform offset when copying poses
Browse files Browse the repository at this point in the history
  • Loading branch information
fbxiang committed May 22, 2024
1 parent eb3f9d9 commit d9031a3
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 16 deletions.
16 changes: 13 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: Build
on:
push:
branches: [main, dev]
workflow_dispatch:

jobs:
build-linux-all:
Expand Down Expand Up @@ -105,9 +106,18 @@ jobs:
release:
runs-on: ubuntu-latest
needs: [build-linux-all, build-windows-pybind]
permissions:
contents: write
permissions: write-all
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: "false"
fetch-depth: 0
fetch-tags: true
- name: Read branch file
id: getbranch
shell: bash
run: echo branch=$(git describe --tags --exact-match HEAD || echo nightly) >> $GITHUB_OUTPUT
- name: Download wheels
uses: actions/download-artifact@v4
with:
Expand Down Expand Up @@ -145,7 +155,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: nightly
tag_name: ${{ steps.getbranch.outputs.branch }}
name: 'Nightly Release'
prerelease: true
body: 'SAPIEN development nightly release. This release is mainly for internal testing. Stable releases are published to pypi https://pypi.org/project/sapien/'
Expand Down
3 changes: 3 additions & 0 deletions include/sapien/sapien_renderer/batched_render_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ class BatchedRenderSystem {

std::vector<std::shared_ptr<BatchedCamera>> mCameraBatches;

// size of a mat4 element in the transform buffer
int mTransformBufferElementByteOffset{0};

int mShapeCount{0};
CudaArray mCudaSceneTransformRefBuffer;
CudaArray mCudaShapeDataBuffer;
Expand Down
17 changes: 14 additions & 3 deletions src/sapien_renderer/batched_render_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,17 @@ void BatchedRenderSystem::init() {
// cache current versions
mSceneVersions.push_back(system->getScene()->getVersion());

sceneTransformRefs.push_back(system->getTransformCudaArray().ptr);
auto transformArray = system->getTransformCudaArray();
sceneTransformRefs.push_back(transformArray.ptr);

if (mTransformBufferElementByteOffset == 0) {
mTransformBufferElementByteOffset = transformArray.strides.at(0);
if (mTransformBufferElementByteOffset % 4 != 0) {
throw std::runtime_error("corrupted transform array buffer");
}
} else if (mTransformBufferElementByteOffset != transformArray.strides.at(0)) {
throw std::runtime_error("corrupted transform array buffer");
}

for (auto &body : system->getRenderBodyComponents()) {
for (auto &shape : body->getRenderShapes()) {
Expand Down Expand Up @@ -284,8 +294,9 @@ void BatchedRenderSystem::update() {

// upload data
update_object_transforms(
(float **)mCudaSceneTransformRefBuffer.ptr, (RenderShapeData *)mCudaShapeDataBuffer.ptr,
(float *)mCudaPoseHandle.ptr, mCudaPoseHandle.shape.at(1), mShapeCount, mCudaStream);
(float **)mCudaSceneTransformRefBuffer.ptr, mTransformBufferElementByteOffset / 4,
(RenderShapeData *)mCudaShapeDataBuffer.ptr, (float *)mCudaPoseHandle.ptr,
mCudaPoseHandle.shape.at(1), mShapeCount, mCudaStream);

update_camera_transforms((CameraData *)mCudaCameraDataBuffer.ptr, (float *)mCudaPoseHandle.ptr,
mCudaPoseHandle.shape.at(1), mCameraCount, mCudaStream);
Expand Down
13 changes: 7 additions & 6 deletions src/sapien_renderer/batched_render_system.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ inline CUDA_CALLABLE void PoseToMatrix(float *result, Pose const &pose, Vec3 con

__global__ void update_object_transforms_kernel(
float *__restrict__ *__restrict__ scene_transform_buffers, // output buffers
RenderShapeData *__restrict__ shapes,
int transform_stride, RenderShapeData *__restrict__ shapes,
float *__restrict__ poses, // parent pose array
int pose_stride, int count) {
int g = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -63,7 +63,7 @@ __global__ void update_object_transforms_kernel(
int scene_index = shape.sceneIndex;
int transform_index = shape.transformIndex;

PoseToMatrix(scene_transform_buffers[scene_index] + transform_index * 16, p, scale);
PoseToMatrix(scene_transform_buffers[scene_index] + transform_index * transform_stride, p, scale);
}

__global__ void update_camera_transforms_kernel(CameraData *cameras, float *poses, int pose_stride,
Expand Down Expand Up @@ -94,11 +94,12 @@ __global__ void update_camera_transforms_kernel(CameraData *cameras, float *pose

constexpr int BLOCK_SIZE = 128;

void update_object_transforms(float **scene_transform_buffers, RenderShapeData *render_shapes,
float *poses, int pose_stride, int count, CUstream_st *stream) {
void update_object_transforms(float **scene_transform_buffers, int transform_stride,
RenderShapeData *render_shapes, float *poses, int pose_stride,
int count, CUstream_st *stream) {
update_object_transforms_kernel<<<(count + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0,
(cudaStream_t)stream>>>(scene_transform_buffers, render_shapes,
poses, pose_stride, count);
(cudaStream_t)stream>>>(
scene_transform_buffers, transform_stride, render_shapes, poses, pose_stride, count);
}

void update_camera_transforms(CameraData *cameras, float *poses, int pose_stride, int count,
Expand Down
5 changes: 3 additions & 2 deletions src/sapien_renderer/batched_render_system.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ struct CameraData {
* count: size of the render_shapes array
* stream: cuda stream
* */
void update_object_transforms(float **scene_transform_buffers, RenderShapeData *render_shapes,
float *poses, int pose_stride, int count, CUstream_st *stream);
void update_object_transforms(float **scene_transform_buffers, int transform_stride,
RenderShapeData *render_shapes, float *poses, int pose_stride,
int count, CUstream_st *stream);

/** The first 32 numbers must be are view matrix and inverse view matrix */
void update_camera_transforms(CameraData *cameras, float *poses, int pose_stride, int count,
Expand Down
6 changes: 4 additions & 2 deletions src/sapien_renderer/sapien_renderer_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,11 @@ void SapienRendererSystem::step() {

CudaArrayHandle SapienRendererSystem::getTransformCudaArray() {
mScene->prepareObjectTransformBuffer();
int offset = mScene->getGpuTransformBufferSize();

auto buffer = mScene->getObjectTransformBuffer();
return CudaArrayHandle{.shape = {static_cast<int>(buffer->getSize() / 64), 4, 4},
.strides = {64, 16, 4},
return CudaArrayHandle{.shape = {static_cast<int>(buffer->getSize() / offset), 4, 4},
.strides = {offset, 16, 4},
.type = "f4",
.cudaId = buffer->getCudaDeviceId(),
.ptr = buffer->getCudaPtr()};
Expand Down

0 comments on commit d9031a3

Please sign in to comment.