diff --git a/filament/backend/CMakeLists.txt b/filament/backend/CMakeLists.txt index 6be8a3959ec4..cef040982288 100644 --- a/filament/backend/CMakeLists.txt +++ b/filament/backend/CMakeLists.txt @@ -318,6 +318,8 @@ if (FILAMENT_SUPPORTS_WEBGPU) src/webgpu/WebGPURenderPrimitive.h src/webgpu/WebGPURenderTarget.cpp src/webgpu/WebGPURenderTarget.h + src/webgpu/WebGPUStagePool.cpp + src/webgpu/WebGPUStagePool.h src/webgpu/WebGPUStrings.h src/webgpu/WebGPUSwapChain.cpp src/webgpu/WebGPUSwapChain.h diff --git a/filament/backend/src/webgpu/WebGPUBufferBase.cpp b/filament/backend/src/webgpu/WebGPUBufferBase.cpp index 9bfa56a53028..4e038a0078a1 100644 --- a/filament/backend/src/webgpu/WebGPUBufferBase.cpp +++ b/filament/backend/src/webgpu/WebGPUBufferBase.cpp @@ -18,6 +18,7 @@ #include "WebGPUConstants.h" #include "WebGPUQueueManager.h" +#include "WebGPUStagePool.h" #include "DriverBase.h" #include @@ -65,7 +66,7 @@ WebGPUBufferBase::WebGPUBufferBase(wgpu::Device const& device, const wgpu::Buffe // of 4 by padding with zeros. void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor, const uint32_t byteOffset, wgpu::Device const& device, - WebGPUQueueManager* const webGPUQueueManager) { + WebGPUQueueManager* const webGPUQueueManager, WebGPUStagePool* const webGPUStagePool) { FILAMENT_CHECK_PRECONDITION(bufferDescriptor.buffer) << "updateGPUBuffer called with a null buffer"; FILAMENT_CHECK_PRECONDITION(bufferDescriptor.size + byteOffset <= mBuffer.GetSize()) @@ -85,15 +86,12 @@ void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor, const size_t stagingBufferSize = remainder == 0 ? bufferDescriptor.size : mainBulk + FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS; - // create a staging buffer - wgpu::BufferDescriptor descriptor{ - .label = "Filament WebGPU Staging Buffer", - .usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc, - .size = stagingBufferSize, - .mappedAtCreation = true }; - wgpu::Buffer stagingBuffer = device.CreateBuffer(&descriptor); + wgpu::Buffer stagingBuffer = webGPUStagePool->acquireBuffer(stagingBufferSize, + webGPUQueueManager->getLatestSubmissionState()); void* mappedRange = stagingBuffer.GetMappedRange(); + assert_invariant(mappedRange); + memcpy(mappedRange, bufferDescriptor.buffer, bufferDescriptor.size); // Make sure the padded memory is set to 0 to have deterministic behaviors @@ -106,7 +104,9 @@ void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor, // Copy the staging buffer contents to the destination buffer. webGPUQueueManager->getCommandEncoder().CopyBufferToBuffer(stagingBuffer, 0, mBuffer, - byteOffset, stagingBufferSize); + byteOffset, + remainder == 0 ? bufferDescriptor.size + : mainBulk + FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS); } } // namespace filament::backend diff --git a/filament/backend/src/webgpu/WebGPUBufferBase.h b/filament/backend/src/webgpu/WebGPUBufferBase.h index c637ca04f51a..e9a5c978e164 100644 --- a/filament/backend/src/webgpu/WebGPUBufferBase.h +++ b/filament/backend/src/webgpu/WebGPUBufferBase.h @@ -25,6 +25,7 @@ namespace filament::backend { class BufferDescriptor; class WebGPUQueueManager; +class WebGPUStagePool; /** * A base class for WebGPU buffer objects, providing common functionality for creating and @@ -40,7 +41,7 @@ class WebGPUBufferBase /* intended to be extended */ { * ensures the calls happen in the expected sequence. */ void updateGPUBuffer(BufferDescriptor const&, uint32_t byteOffset, wgpu::Device const& device, - WebGPUQueueManager* const webGPUQueueManager); + WebGPUQueueManager* const webGPUQueueManager, WebGPUStagePool* const webGPUStagePool); [[nodiscard]] wgpu::Buffer const& getBuffer() const { return mBuffer; } diff --git a/filament/backend/src/webgpu/WebGPUDriver.cpp b/filament/backend/src/webgpu/WebGPUDriver.cpp index ccb8c187db92..e45ecec0a4cc 100644 --- a/filament/backend/src/webgpu/WebGPUDriver.cpp +++ b/filament/backend/src/webgpu/WebGPUDriver.cpp @@ -107,6 +107,7 @@ WebGPUDriver::WebGPUDriver(WebGPUPlatform& platform, mAdapter{ mPlatform.requestAdapter(nullptr) }, mDevice{ mPlatform.requestDevice(mAdapter) }, mQueueManager{ mDevice }, + mStagePool{ mDevice }, mPipelineLayoutCache{ mDevice }, mPipelineCache{ mDevice }, mRenderPassMipmapGenerator{ mDevice, &mQueueManager }, @@ -177,6 +178,9 @@ void WebGPUDriver::endFrame(const uint32_t /* frameId */) { for (size_t i = 0; i < MAX_DESCRIPTOR_SET_COUNT; i++) { mCurrentDescriptorSets[i] = {}; } + + // Garbage collection (if necessary) + mStagePool.gc(); } // If a command encoder is in flight then the encoder is finished and submitted to the GPU queue. @@ -851,7 +855,7 @@ void WebGPUDriver::updateIndexBuffer(Handle indexBufferHandle, // draw calls are made. flush(); handleCast(indexBufferHandle) - ->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager); + ->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager, &mStagePool); scheduleDestroy(std::move(bufferDescriptor)); } @@ -862,14 +866,14 @@ void WebGPUDriver::updateBufferObject(Handle bufferObjectHandle, // draw calls are made. flush(); handleCast(bufferObjectHandle) - ->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager); + ->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager, &mStagePool); scheduleDestroy(std::move(bufferDescriptor)); } void WebGPUDriver::updateBufferObjectUnsynchronized(Handle bufferObjectHandle, BufferDescriptor&& bufferDescriptor, const uint32_t byteOffset) { handleCast(bufferObjectHandle) - ->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager); + ->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager, &mStagePool); scheduleDestroy(std::move(bufferDescriptor)); } diff --git a/filament/backend/src/webgpu/WebGPUDriver.h b/filament/backend/src/webgpu/WebGPUDriver.h index 3a8ffcaa3ded..915a435095f2 100644 --- a/filament/backend/src/webgpu/WebGPUDriver.h +++ b/filament/backend/src/webgpu/WebGPUDriver.h @@ -25,6 +25,7 @@ #include "webgpu/WebGPUPipelineLayoutCache.h" #include "webgpu/WebGPURenderPassMipmapGenerator.h" #include "webgpu/WebGPUQueueManager.h" +#include "webgpu/WebGPUStagePool.h" #include "webgpu/utils/AsyncTaskCounter.h" #include @@ -81,6 +82,7 @@ class WebGPUDriver final : public DriverBase { wgpu::Device mDevice = nullptr; wgpu::Limits mDeviceLimits = {}; WebGPUQueueManager mQueueManager; + WebGPUStagePool mStagePool; void* mNativeWindow = nullptr; WebGPUSwapChain* mSwapChain = nullptr; uint64_t mNextFakeHandle = 1; diff --git a/filament/backend/src/webgpu/WebGPUQueueManager.cpp b/filament/backend/src/webgpu/WebGPUQueueManager.cpp index 3fd9b9633e42..57edf5e1636c 100644 --- a/filament/backend/src/webgpu/WebGPUQueueManager.cpp +++ b/filament/backend/src/webgpu/WebGPUQueueManager.cpp @@ -64,6 +64,7 @@ wgpu::CommandEncoder WebGPUQueueManager::getCommandEncoder() { }; mCommandEncoder = mDevice.CreateCommandEncoder(&commandEncoderDescriptor); ASSERT_POSTCONDITION(mCommandEncoder, "Failed to create command encoder."); + mLatestSubmissionState = std::make_shared(); } return mCommandEncoder; } @@ -94,8 +95,6 @@ void WebGPUQueueManager::submit() { return; } - mLatestSubmissionState = std::make_shared(); - wgpu::CommandBufferDescriptor commandBufferDescriptor{ .label = "Filament Command Buffer", }; diff --git a/filament/backend/src/webgpu/WebGPUStagePool.cpp b/filament/backend/src/webgpu/WebGPUStagePool.cpp new file mode 100644 index 000000000000..c2ab8d9e8a2b --- /dev/null +++ b/filament/backend/src/webgpu/WebGPUStagePool.cpp @@ -0,0 +1,92 @@ +/* +* Copyright (C) 2025 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "WebGPUStagePool.h" + +#include "WebGPUConstants.h" +#include "WebGPUQueueManager.h" + +namespace filament::backend { + +WebGPUStagePool::WebGPUStagePool(wgpu::Device const& device) : mDevice(device) {} + +WebGPUStagePool::~WebGPUStagePool() = default; + +wgpu::Buffer WebGPUStagePool::acquireBuffer(size_t requiredSize, + std::shared_ptr submissionState) { + wgpu::Buffer buffer; + { + std::lock_guard lock(mMutex); + auto iter = mBuffers.lower_bound(requiredSize); + if (iter != mBuffers.end()) { + buffer = iter->second; + mBuffers.erase(iter); + } + } + if (!buffer.Get()) { + buffer = createNewBuffer(requiredSize); + } + mInProgress.push_back({submissionState, buffer}); + return buffer; +} + +void WebGPUStagePool::recycleBuffer(wgpu::Buffer buffer) { + struct UserData final { + wgpu::Buffer buffer; + WebGPUStagePool* webGPUStagePool; + }; + auto userData = + std::make_unique(UserData{ .buffer = buffer, .webGPUStagePool = this }); + buffer.MapAsync(wgpu::MapMode::Write, 0, buffer.GetSize(), wgpu::CallbackMode::AllowSpontaneous, + [data = std::move(userData)](wgpu::MapAsyncStatus status, const char* message) { + if (UTILS_LIKELY(status == wgpu::MapAsyncStatus::Success)) { + if (!data->webGPUStagePool) { + return; + } + std::lock_guard lock(data->webGPUStagePool->mMutex); + data->webGPUStagePool->mBuffers.insert( + { data->buffer.GetSize(), data->buffer }); + } else { + FWGPU_LOGE << "Failed to MapAsync when recycling staging buffer: " << message; + } + }); +} + +void WebGPUStagePool::gc() { + // We found that MapAsync would sometimes lead to GetMappedRange returning nullptr if the + // command using that staging buffer has not finished executing, so here we only recycle those + // buffers that are not still being used by any command + std::vector, wgpu::Buffer>> stillInProgress; + for (auto& [st, buffer]: mInProgress) { + if (st->getStatus() == FenceStatus::CONDITION_SATISFIED) { + recycleBuffer(buffer); + } else { + stillInProgress.push_back({st, buffer}); + } + } + std::swap(mInProgress, stillInProgress); +} + +wgpu::Buffer WebGPUStagePool::createNewBuffer(size_t bufferSize) { + wgpu::BufferDescriptor descriptor{ + .label = "Filament WebGPU Staging Buffer", + .usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc, + .size = bufferSize, + .mappedAtCreation = true }; + return mDevice.CreateBuffer(&descriptor); +} + +} // namespace filament::backend diff --git a/filament/backend/src/webgpu/WebGPUStagePool.h b/filament/backend/src/webgpu/WebGPUStagePool.h new file mode 100644 index 000000000000..f878bd555ced --- /dev/null +++ b/filament/backend/src/webgpu/WebGPUStagePool.h @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2025 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TNT_FILAMENT_BACKEND_WEBGPUSTAGEPOOL_H +#define TNT_FILAMENT_BACKEND_WEBGPUSTAGEPOOL_H + +#include + +#include +#include + +namespace filament::backend { + +struct WebGPUSubmissionState; + +class WebGPUStagePool { +public: + WebGPUStagePool(wgpu::Device const& device); + ~WebGPUStagePool(); + + wgpu::Buffer acquireBuffer(size_t requiredSize, + std::shared_ptr submissionState); + void recycleBuffer(wgpu::Buffer buffer); + void gc(); + +private: + wgpu::Buffer createNewBuffer(size_t bufferSize); + std::multimap mBuffers; + std::vector, wgpu::Buffer>> mInProgress; + std::mutex mMutex; + + wgpu::Device mDevice; +}; + +} // namespace filament::backend + +#endif // TNT_FILAMENT_BACKEND_WEBGPUSTAGEPOOL_H