Skip to content

Commit

Permalink
vulkan: memory barrier after compute dispatches.
Browse files Browse the repository at this point in the history
  • Loading branch information
slime73 committed Sep 30, 2024
1 parent 64e4eae commit 1ee8915
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 81 deletions.
69 changes: 32 additions & 37 deletions src/modules/graphics/vulkan/Buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,36 @@ Buffer::Buffer(love::graphics::Graphics *gfx, const Settings &settings, const st
, vgfx(dynamic_cast<Graphics*>(gfx))
, usageFlags(settings.usageFlags)
{
// All buffers can be copied to and from.
barrierDstAccessFlags = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
barrierDstStageFlags = VK_PIPELINE_STAGE_TRANSFER_BIT;

if (usageFlags & BUFFERUSAGEFLAG_VERTEX)
{
barrierDstAccessFlags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
barrierDstStageFlags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_INDEX)
{
barrierDstAccessFlags |= VK_ACCESS_INDEX_READ_BIT;
barrierDstStageFlags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_TEXEL)
{
barrierDstAccessFlags |= VK_ACCESS_SHADER_READ_BIT;
barrierDstStageFlags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_SHADER_STORAGE)
{
barrierDstAccessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
barrierDstStageFlags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_INDIRECT_ARGUMENTS)
{
barrierDstAccessFlags |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
barrierDstStageFlags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
}

loadVolatile();
}

Expand Down Expand Up @@ -302,44 +332,9 @@ void Buffer::postGPUWriteBarrier(VkCommandBuffer cmd)
VkMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = barrierDstAccessFlags;

VkPipelineStageFlags dstStageMask = 0;
addPostGPUWriteBarrierFlags(barrier.dstAccessMask, dstStageMask);

vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, dstStageMask, 0, 1, &barrier, 0, nullptr, 0, nullptr);
}

void Buffer::addPostGPUWriteBarrierFlags(VkAccessFlags &dstAccessFlags, VkPipelineStageFlags &dstStageFlags)
{
// All buffers can be copied to and from.
dstAccessFlags |= VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
dstStageFlags |= VK_PIPELINE_STAGE_TRANSFER_BIT;

if (usageFlags & BUFFERUSAGEFLAG_VERTEX)
{
dstAccessFlags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
dstStageFlags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_INDEX)
{
dstAccessFlags |= VK_ACCESS_INDEX_READ_BIT;
dstStageFlags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_TEXEL)
{
dstAccessFlags |= VK_ACCESS_SHADER_READ_BIT;
dstStageFlags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_SHADER_STORAGE)
{
dstAccessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
dstStageFlags |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
}
if (usageFlags & BUFFERUSAGEFLAG_INDIRECT_ARGUMENTS)
{
dstAccessFlags |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
dstStageFlags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
}
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, barrierDstStageFlags, 0, 1, &barrier, 0, nullptr, 0, nullptr);
}

} // vulkan
Expand Down
7 changes: 6 additions & 1 deletion src/modules/graphics/vulkan/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ class Buffer final
ptrdiff_t getTexelBufferHandle() const override;

void postGPUWriteBarrier(VkCommandBuffer cmd);
void addPostGPUWriteBarrierFlags(VkAccessFlags &dstAccessFlags, VkPipelineStageFlags &dstStageFlags);

VkAccessFlags getBarrierDstAccessFlags() const { return barrierDstAccessFlags; }
VkPipelineStageFlags getBarrierDstStageFlags() const { return barrierDstStageFlags; }

private:

Expand All @@ -76,6 +78,9 @@ class Buffer final
BufferUsageFlags usageFlags;
Range mappedRange;
bool coherent;

VkAccessFlags barrierDstAccessFlags = 0;
VkPipelineStageFlags barrierDstStageFlags = 0;
};

} // vulkan
Expand Down
98 changes: 78 additions & 20 deletions src/modules/graphics/vulkan/Graphics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ void Graphics::submitGpuCommands(SubmitMode submitMode, void *screenshotCallback
Vulkan::cmdTransitionImageLayout(
commandBuffers.at(currentFrame),
backbufferImage,
swapChainPixelFormat,
swapChainPixelFormat, true,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);
}
Expand Down Expand Up @@ -382,7 +382,7 @@ void Graphics::submitGpuCommands(SubmitMode submitMode, void *screenshotCallback
Vulkan::cmdTransitionImageLayout(
commandBuffers.at(currentFrame),
backbufferImage,
swapChainPixelFormat,
swapChainPixelFormat, true,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);

Expand All @@ -405,7 +405,7 @@ void Graphics::submitGpuCommands(SubmitMode submitMode, void *screenshotCallback
Vulkan::cmdTransitionImageLayout(
commandBuffers.at(currentFrame),
backbufferImage,
swapChainPixelFormat,
swapChainPixelFormat, true,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
fakeBackbuffer == nullptr ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
}
Expand Down Expand Up @@ -1194,8 +1194,56 @@ graphics::StreamBuffer *Graphics::newStreamBuffer(BufferUsage type, size_t size)
return new StreamBuffer(this, type, size);
}

static bool computeDispatchBarrierFlags(Shader *shader, VkAccessFlags &dstAccessFlags, VkPipelineStageFlags &dstStageFlags)
{
for (const auto &info : shader->getActiveTextureInfo())
{
if ((info.access & Shader::ACCESS_WRITE) == 0)
continue;

if (info.texture == nullptr)
return false;

auto tex = (Texture *) info.texture;

// All writable images use the GENERAL layout.
// TODO: this is pretty messy.
VkAccessFlags texAccessFlags = 0;
VkPipelineStageFlags texStageFlags = 0;
const PixelFormatInfo &info = getPixelFormatInfo(tex->getPixelFormat());
Vulkan::setImageLayoutTransitionOptions(false, tex->isRenderTarget(), info, VK_IMAGE_LAYOUT_GENERAL, texAccessFlags, texStageFlags);

dstAccessFlags |= texAccessFlags;
dstStageFlags |= texStageFlags;
}

for (const auto &info : shader->getActiveStorageBufferInfo())
{
if ((info.access & Shader::ACCESS_WRITE) == 0)
continue;

if (info.buffer == nullptr)
return false;

auto b = (Buffer *) info.buffer;
dstAccessFlags |= b->getBarrierDstAccessFlags();
dstStageFlags |= b->getBarrierDstStageFlags();
}

return true;
}

bool Graphics::dispatch(love::graphics::Shader *shader, int x, int y, int z)
{
auto computeShader = (Shader *) shader;

VkMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
VkPipelineStageFlags dstStageMask = 0;
if (!computeDispatchBarrierFlags(computeShader, barrier.dstAccessMask, dstStageMask))
return false;

usedShadersInFrame.insert(computeShader);

if (renderPassState.active)
Expand All @@ -1205,14 +1253,26 @@ bool Graphics::dispatch(love::graphics::Shader *shader, int x, int y, int z)

computeShader->cmdPushDescriptorSets(commandBuffers.at(currentFrame), VK_PIPELINE_BIND_POINT_COMPUTE);

// TODO: does this need any layout transitions?
vkCmdDispatch(commandBuffers.at(currentFrame), (uint32) x, (uint32) y, (uint32) z);

// Image layout transitions aren't needed, every writable image will be in the GENERAL layout.
if (barrier.dstAccessMask != 0 || dstStageMask != 0)
vkCmdPipelineBarrier(commandBuffers.at(currentFrame), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dstStageMask, 0, 1, &barrier, 0, nullptr, 0, nullptr);

return true;
}

bool Graphics::dispatch(love::graphics::Shader *shader, love::graphics::Buffer *indirectargs, size_t argsoffset)
{
auto computeShader = (Shader *) shader;

VkMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
VkPipelineStageFlags dstStageMask = 0;
if (!computeDispatchBarrierFlags(computeShader, barrier.dstAccessMask, dstStageMask))
return false;

usedShadersInFrame.insert(computeShader);

if (renderPassState.active)
Expand All @@ -1222,9 +1282,12 @@ bool Graphics::dispatch(love::graphics::Shader *shader, love::graphics::Buffer *

computeShader->cmdPushDescriptorSets(commandBuffers.at(currentFrame), VK_PIPELINE_BIND_POINT_COMPUTE);

// TODO: does this need any layout transitions?
vkCmdDispatchIndirect(commandBuffers.at(currentFrame), (VkBuffer) indirectargs->getHandle(), argsoffset);

// Image layout transitions aren't needed, every writable image will be in the GENERAL layout.
if (barrier.dstAccessMask != 0 || dstStageMask != 0)
vkCmdPipelineBarrier(commandBuffers.at(currentFrame), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dstStageMask, 0, 1, &barrier, 0, nullptr, 0, nullptr);

return true;
}

Expand Down Expand Up @@ -1315,7 +1378,7 @@ void Graphics::beginFrame()
Vulkan::cmdTransitionImageLayout(
commandBuffers.at(currentFrame),
swapChainImages[imageIndex],
swapChainPixelFormat,
swapChainPixelFormat, true,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
}
Expand All @@ -1326,15 +1389,15 @@ void Graphics::beginFrame()
Vulkan::cmdTransitionImageLayout(
commandBuffers.at(currentFrame),
depthImage,
depthStencilPixelFormat,
depthStencilPixelFormat, true,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);

if (colorImage)
Vulkan::cmdTransitionImageLayout(
commandBuffers.at(currentFrame),
colorImage,
swapChainPixelFormat,
swapChainPixelFormat, true,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);

Expand Down Expand Up @@ -2503,7 +2566,7 @@ void Graphics::setRenderPass(const RenderTargets &rts, int pixelw, int pixelh, b

FramebufferConfiguration configuration{};

std::vector<std::tuple<VkImage, PixelFormat, VkImageLayout, VkImageLayout, int, int>> transitionImages;
std::vector<std::tuple<VkImage, PixelFormat, bool, VkImageLayout, VkImageLayout, int, int>> transitionImages;

for (const auto &color : rts.colors)
{
Expand All @@ -2513,7 +2576,7 @@ void Graphics::setRenderPass(const RenderTargets &rts, int pixelw, int pixelh, b
VkImageLayout imagelayout = tex->getImageLayout();
if (imagelayout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
{
transitionImages.push_back({ (VkImage)tex->getHandle(), tex->getPixelFormat(), imagelayout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
transitionImages.push_back({ (VkImage)tex->getHandle(), tex->getPixelFormat(), tex->isRenderTarget(), imagelayout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
viewinfo.startMipmap + color.mipmap, viewinfo.startLayer + color.slice });
}
}
Expand All @@ -2525,7 +2588,7 @@ void Graphics::setRenderPass(const RenderTargets &rts, int pixelw, int pixelh, b
VkImageLayout imagelayout = tex->getImageLayout();
if (imagelayout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL)
{
transitionImages.push_back({ (VkImage)tex->getHandle(), tex->getPixelFormat(), imagelayout, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
transitionImages.push_back({ (VkImage)tex->getHandle(), tex->getPixelFormat(), tex->isRenderTarget(), imagelayout, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
viewinfo.startMipmap + rts.depthStencil.mipmap, viewinfo.startLayer + rts.depthStencil.slice });
}
}
Expand Down Expand Up @@ -2581,8 +2644,8 @@ void Graphics::startRenderPass()
renderPassState.framebufferConfiguration.staticData.renderPass = renderPassState.beginInfo.renderPass;
renderPassState.beginInfo.framebuffer = getFramebuffer(renderPassState.framebufferConfiguration);

for (const auto &[image, format, imageLayout, renderLayout, rootmip, rootlayer] : renderPassState.transitionImages)
Vulkan::cmdTransitionImageLayout(commandBuffers.at(currentFrame), image, format, imageLayout, renderLayout, rootmip, 1, rootlayer, 1);
for (const auto &[image, format, renderTarget, imageLayout, renderLayout, rootmip, rootlayer] : renderPassState.transitionImages)
Vulkan::cmdTransitionImageLayout(commandBuffers.at(currentFrame), image, format, renderTarget, imageLayout, renderLayout, rootmip, 1, rootlayer, 1);

vkCmdBeginRenderPass(commandBuffers.at(currentFrame), &renderPassState.beginInfo, VK_SUBPASS_CONTENTS_INLINE);

Expand All @@ -2595,8 +2658,8 @@ void Graphics::endRenderPass()

vkCmdEndRenderPass(commandBuffers.at(currentFrame));

for (const auto &[image, format, imageLayout, renderLayout, rootmip, rootlayer] : renderPassState.transitionImages)
Vulkan::cmdTransitionImageLayout(commandBuffers.at(currentFrame), image, format, renderLayout, imageLayout, rootmip, 1, rootlayer, 1);
for (const auto &[image, format, renderTarget, imageLayout, renderLayout, rootmip, rootlayer] : renderPassState.transitionImages)
Vulkan::cmdTransitionImageLayout(commandBuffers.at(currentFrame), image, format, renderTarget, renderLayout, imageLayout, rootmip, 1, rootlayer, 1);

for (auto &colorAttachment : renderPassState.renderPassConfiguration.colorAttachments)
colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
Expand Down Expand Up @@ -2686,11 +2749,6 @@ void Graphics::requestSwapchainRecreation()
}
}

void Graphics::setComputeShader(Shader *shader)
{
computeShader = shader;
}

VkSampler Graphics::getCachedSampler(const SamplerState &samplerState)
{
auto samplerkey = samplerState.toKey();
Expand Down
4 changes: 1 addition & 3 deletions src/modules/graphics/vulkan/Graphics.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ struct RenderpassState
RenderPassConfiguration renderPassConfiguration{};
FramebufferConfiguration framebufferConfiguration{};
VkPipeline pipeline = VK_NULL_HANDLE;
std::vector<std::tuple<VkImage, PixelFormat, VkImageLayout, VkImageLayout, int, int>> transitionImages;
std::vector<std::tuple<VkImage, PixelFormat, bool, VkImageLayout, VkImageLayout, int, int>> transitionImages;
uint32_t numColorAttachments = 0;
uint64 packedColorAttachmentFormats = 0;
float width = 0.0f;
Expand Down Expand Up @@ -267,7 +267,6 @@ class Graphics final : public love::graphics::Graphics
void addReadbackCallback(std::function<void()> callback);
void submitGpuCommands(SubmitMode, void *screenshotCallbackData = nullptr);
VkSampler getCachedSampler(const SamplerState &sampler);
void setComputeShader(Shader *computeShader);
graphics::Shader::BuiltinUniformData getCurrentBuiltinUniformData();
const OptionalDeviceExtensions &getEnabledOptionalDeviceExtensions() const;
const OptionalInstanceExtensions &getEnabledOptionalInstanceExtensions() const;
Expand Down Expand Up @@ -377,7 +376,6 @@ class Graphics final : public love::graphics::Graphics
std::unordered_map<uint64, VkSampler> samplers;
VkCommandPool commandPool = VK_NULL_HANDLE;
std::vector<VkCommandBuffer> commandBuffers;
Shader *computeShader = nullptr;
std::vector<VkSemaphore> imageAvailableSemaphores;
std::vector<VkSemaphore> renderFinishedSemaphores;
std::vector<VkFence> inFlightFences;
Expand Down
Loading

0 comments on commit 1ee8915

Please sign in to comment.