Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/nanovdb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
\'
- name: test
run: cd build && sudo ctest -V -E ".*cuda.*"
run: cd build && sudo ctest -V -E ".*cuda.*|.*mgpu.*"

windows-nanovdb:
if: |
Expand Down Expand Up @@ -137,7 +137,7 @@ jobs:
\'
- name: test
shell: bash
run: cd build && ctest -V -E ".*cuda.*"
run: cd build && ctest -V -E ".*cuda.*|.*mgpu.*"

macos-nanovdb:
if: |
Expand Down Expand Up @@ -166,7 +166,7 @@ jobs:
--components=core,nano,nanotest,nanoexam,nanobench,nanotool
--cargs=\'-DUSE_EXPLICIT_INSTANTIATION=OFF -DNANOVDB_USE_CUDA=OFF -DNANOVDB_USE_OPENVDB=ON\'
- name: test
run: cd build && ctest -V -E ".*cuda.*"
run: cd build && ctest -V -E ".*cuda.*|.*mgpu.*"

nanovdb-lite:
if: |
Expand Down
2 changes: 1 addition & 1 deletion nanovdb/nanovdb/GridHandle.h
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ splitGrids(const GridHandle<BufferT> &handle, const BufferT* other = nullptr)
h = HandleT(std::move(buffer));
ptr = util::PtrAdd(ptr, src->mGridSize);
}
return std::move(handles);
return handles;
}// splitGrids

/// @brief Combines (or merges) multiple GridHandles into a single GridHandle containing all grids
Expand Down
7 changes: 4 additions & 3 deletions nanovdb/nanovdb/NanoVDB.h
Original file line number Diff line number Diff line change
Expand Up @@ -3213,18 +3213,19 @@ struct NANOVDB_ALIGN(NANOVDB_DATA_ALIGNMENT) InternalData
__hostdev__ const StatsT& average() const { return mAverage; }
__hostdev__ const StatsT& stdDeviation() const { return mStdDevi; }

// GCC 11 (and possibly prior versions) has a regression that results in invalid
// GCC 13 (and possibly prior versions) has a regression that results in invalid
// warnings when -Wstringop-overflow is turned on. For details, refer to
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101854
#if defined(__GNUC__) && (__GNUC__ < 12) && !defined(__APPLE__) && !defined(__llvm__)
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106757
#if defined(__GNUC__) && (__GNUC__ < 14) && !defined(__APPLE__) && !defined(__llvm__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-overflow"
#endif
__hostdev__ void setMin(const ValueT& v) { mMinimum = v; }
__hostdev__ void setMax(const ValueT& v) { mMaximum = v; }
__hostdev__ void setAvg(const StatsT& v) { mAverage = v; }
__hostdev__ void setDev(const StatsT& v) { mStdDevi = v; }
#if defined(__GNUC__) && (__GNUC__ < 12) && !defined(__APPLE__) && !defined(__llvm__)
#if defined(__GNUC__) && (__GNUC__ < 14) && !defined(__APPLE__) && !defined(__llvm__)
#pragma GCC diagnostic pop
#endif

Expand Down
368 changes: 339 additions & 29 deletions nanovdb/nanovdb/PNanoVDB.h

Large diffs are not rendered by default.

12 changes: 0 additions & 12 deletions nanovdb/nanovdb/cuda/DeviceMesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,18 +222,6 @@ inline size_t minDevicePageSize(const DeviceMesh& mesh)
return minGranularity;
}// minDevicePageSize

/// @brief Launches a function for each node in the device mesh in parallel and blocks until the have all completed
template<typename Func, typename... Args>
void parallelForEach(const nanovdb::cuda::DeviceMesh& mesh, Func func, Args... args)
{
std::vector<std::thread> threads;
for (const auto& [device, stream] : mesh) {
threads.emplace_back(func, device, stream, args...);
}
std::for_each(threads.begin(), threads.end(), [](std::thread& t) { t.join(); });
threads.clear();
}// parallelForEach

} // namespace cuda

} // namespace nanovdb
Expand Down
24 changes: 12 additions & 12 deletions nanovdb/nanovdb/cuda/GridHandle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ namespace nanovdb {

namespace cuda {

namespace {// anonymous namespace
__global__ void cpyGridHandleMeta(const GridData *d_data, GridHandleMetaData *d_meta)
namespace detail {

static __global__ void cpyGridHandleMeta(const GridData *d_data, GridHandleMetaData *d_meta)
{
nanovdb::cpyGridHandleMeta(d_data, d_meta);
}

__global__ void updateGridCount(GridData *d_data, uint32_t gridIndex, uint32_t gridCount, bool *d_dirty)
static __global__ void updateGridCount(GridData *d_data, uint32_t gridIndex, uint32_t gridCount, bool *d_dirty)
{
NANOVDB_ASSERT(gridIndex < gridCount);
*d_dirty = (d_data->mGridIndex != gridIndex) || (d_data->mGridCount != gridCount);
Expand All @@ -41,7 +42,8 @@ __global__ void updateGridCount(GridData *d_data, uint32_t gridIndex, uint32_t g
if (d_data->mChecksum.isEmpty()) *d_dirty = false;// no need to update checksum if it didn't already exist
}
}
}// anonymous namespace

}// namespace detail

template<typename BufferT, template <class, class...> class VectorT = std::vector>
inline typename util::enable_if<BufferTraits<BufferT>::hasDeviceDual, VectorT<GridHandle<BufferT>>>::type
Expand All @@ -52,14 +54,13 @@ splitGridHandles(const GridHandle<BufferT> &handle, const BufferT* other = nullp
VectorT<GridHandle<BufferT>> handles(handle.gridCount());
bool dirty, *d_dirty;// use this to check if the checksum needs to be recomputed
cudaCheck(util::cuda::mallocAsync((void**)&d_dirty, sizeof(bool), stream));
int device = 0;
cudaCheck(cudaGetDevice(&device));
int device = util::cuda::currentDevice();
for (uint32_t n=0; n<handle.gridCount(); ++n) {
auto buffer = BufferT::create(handle.gridSize(n), other, device, stream);
GridData *dst = reinterpret_cast<GridData*>(buffer.deviceData());
const GridData *src = reinterpret_cast<const GridData*>(ptr);
cudaCheck(cudaMemcpyAsync(dst, src, handle.gridSize(n), cudaMemcpyDeviceToDevice, stream));
updateGridCount<<<1, 1, 0, stream>>>(dst, 0u, 1u, d_dirty);
detail::updateGridCount<<<1, 1, 0, stream>>>(dst, 0u, 1u, d_dirty);
cudaCheckError();
cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream));
cudaCheck(cudaStreamSynchronize(stream));
Expand All @@ -68,7 +69,7 @@ splitGridHandles(const GridHandle<BufferT> &handle, const BufferT* other = nullp
ptr = util::PtrAdd(ptr, handle.gridSize(n));
}
cudaCheck(util::cuda::freeAsync(d_dirty, stream));
return std::move(handles);
return handles;
}// cuda::splitGridHandles

template<typename BufferT, template <class, class...> class VectorT>
Expand All @@ -81,8 +82,7 @@ mergeGridHandles(const VectorT<GridHandle<BufferT>> &handles, const BufferT* oth
gridCount += h.gridCount();
for (uint32_t n=0; n<h.gridCount(); ++n) size += h.gridSize(n);
}
int device = 0;
cudaCheck(cudaGetDevice(&device));
int device = util::cuda::currentDevice();
auto buffer = BufferT::create(size, other, device, stream);
void *dst = buffer.deviceData();
bool dirty, *d_dirty;// use this to check if the checksum needs to be recomputed
Expand All @@ -92,7 +92,7 @@ mergeGridHandles(const VectorT<GridHandle<BufferT>> &handles, const BufferT* oth
for (uint32_t n=0; n<h.gridCount(); ++n) {
cudaCheck(cudaMemcpyAsync(dst, src, h.gridSize(n), cudaMemcpyDeviceToDevice, stream));
GridData *data = reinterpret_cast<GridData*>(dst);
updateGridCount<<<1, 1, 0, stream>>>(data, counter++, gridCount, d_dirty);
detail::updateGridCount<<<1, 1, 0, stream>>>(data, counter++, gridCount, d_dirty);
cudaCheckError();
cudaCheck(cudaMemcpyAsync(&dirty, d_dirty, sizeof(bool), cudaMemcpyDeviceToHost, stream));
cudaCheck(cudaStreamSynchronize(stream));
Expand Down Expand Up @@ -136,7 +136,7 @@ GridHandle<BufferT>::GridHandle(T&& buffer)
if (!tmp.isValid()) throw std::runtime_error("GridHandle was constructed with an invalid device buffer");
GridHandleMetaData *d_metaData;
cudaMalloc((void**)&d_metaData, tmp.mGridCount*sizeof(GridHandleMetaData));
cuda::cpyGridHandleMeta<<<1,1>>>(d_data, d_metaData);
cuda::detail::cpyGridHandleMeta<<<1,1>>>(d_data, d_metaData);
mMetaData.resize(tmp.mGridCount);
cudaCheck(cudaMemcpy(mMetaData.data(), d_metaData,tmp.mGridCount*sizeof(GridHandleMetaData), cudaMemcpyDeviceToHost));
cudaCheck(cudaFree(d_metaData));
Expand Down
2 changes: 2 additions & 0 deletions nanovdb/nanovdb/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ nanovdb_example(NAME "ex_bump_pool_buffer")
nanovdb_example(NAME "ex_collide_level_set")
nanovdb_example(NAME "ex_raytrace_fog_volume")
nanovdb_example(NAME "ex_raytrace_level_set")
nanovdb_example(NAME "ex_dilate_nanovdb_cuda" OPENVDB)
nanovdb_example(NAME "ex_merge_nanovdb_cuda" OPENVDB)

if(CUDAToolkit_FOUND)
nanovdb_example(NAME "ex_make_mgpu_nanovdb") # requires cuRAND
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Copyright Contributors to the OpenVDB Project
// SPDX-License-Identifier: Apache-2.0

// the following files are from OpenVDB
#include <openvdb/tools/Morphology.h>
#include <openvdb/util/CpuTimer.h>

// the following files are from NanoVDB
#include <nanovdb/NanoVDB.h>
#include <nanovdb/cuda/DeviceBuffer.h>
#include <nanovdb/tools/CreateNanoGrid.h>

template<typename BuildT>
void mainDilateGrid(
nanovdb::NanoGrid<BuildT> *deviceGridOriginal,
nanovdb::NanoGrid<BuildT> *deviceGridDilated,
nanovdb::NanoGrid<BuildT> *indexGridOriginal,
nanovdb::NanoGrid<BuildT> *indexGridDilated,
uint32_t nnType,
uint32_t benchmark_iters
);

/// @brief This example depends on OpenVDB, NanoVDB, and CUDA
int main(int argc, char *argv[])
{
using GridT = openvdb::FloatGrid;
using BuildT = nanovdb::ValueOnIndex;

// Select the type of dilation here. The NN_EDGE case supports leaf dilation too (currently)
// openvdb::tools::NearestNeighbors nnType = openvdb::tools::NN_FACE_EDGE_VERTEX;
openvdb::tools::NearestNeighbors nnType = openvdb::tools::NN_FACE;

openvdb::util::CpuTimer cpuTimer;
const bool printGridDiagnostics = true;

try {

if (argc<2) OPENVDB_THROW(openvdb::ValueError, "usage: "+std::string(argv[0])+" input.vdb [<iterations>]\n");
int benchmark_iters = 10;
if (argc > 2) sscanf(argv[2], "%d", &benchmark_iters);

// Read the initial level set from file

cpuTimer.start("Read input VDB file");
openvdb::initialize();
openvdb::io::File inFile(argv[1]);
inFile.open(false); // disable delayed loading
auto baseGrids = inFile.getGrids();
inFile.close();
auto grid = openvdb::gridPtrCast<GridT>(baseGrids->at(0));
openvdb::FloatGrid* ptr = grid.get(); // raw pointer
if (!grid) OPENVDB_THROW(openvdb::ValueError, "First grid is not a FloatGrid\n");
cpuTimer.stop();

// Convert to indexGrid (original, un-dilated)
cpuTimer.start("Converting openVDB input to indexGrid (original version)");
auto handleOriginal = nanovdb::tools::openToIndexVDB<BuildT, nanovdb::cuda::DeviceBuffer>(
grid,
0u, // Don't copy data channel
false, // No stats
false, // No tiles
1 // Verbose mode
);
auto *indexGridOriginal = handleOriginal.grid<BuildT>();
cpuTimer.stop();

if (printGridDiagnostics) {
std::cout << "============ Original Grid ===========" << std::endl;
std::cout << "Allocated values [valueCount()] : " << indexGridOriginal->valueCount() << std::endl;
std::cout << "Active voxels [activeVoxelCount()] : " << indexGridOriginal->activeVoxelCount() << std::endl;
auto minCorner = indexGridOriginal->indexBBox().min(), maxCorner = indexGridOriginal->indexBBox().max();
std::cout << "Index-space bounding box : [" << minCorner.x() << "," << minCorner.y() << "," << minCorner.z()
<< "] -> [" << maxCorner.x() << "," << maxCorner.y() << "," << maxCorner.z() << "]" << std::endl;
std::cout << "Leaf nodes : " << indexGridOriginal->tree().nodeCount(0) << std::endl;
std::cout << "Lower internal nodes : " << indexGridOriginal->tree().nodeCount(1) << std::endl;
std::cout << "Upper internal nodes : " << indexGridOriginal->tree().nodeCount(2) << std::endl;
std::cout << "Leaf-level occupancy : "
<< 100.f * (float)(indexGridOriginal->activeVoxelCount())/(float)(indexGridOriginal->tree().nodeCount(0) * 512)
<< "%" << std::endl;
std::cout << "Memory usage : " << indexGridOriginal->gridSize() << " bytes" << std::endl;
}

// Dilation (CPU/OpenVDB version)
cpuTimer.start("Dilating openVDB (on CPU)");
openvdb::tools::dilateActiveValues(grid->tree(), 1, nnType);
cpuTimer.stop();

// Convert to indexGrid (dilated)
cpuTimer.start("Converting openVDB input to indexGrid (dilated version)");
auto handleDilated = nanovdb::tools::openToIndexVDB<BuildT, nanovdb::cuda::DeviceBuffer>(
grid,
0u, // Don't copy data channel
false, // No stats
false, // No tiles
1 // Verbose mode
);
cpuTimer.stop();

auto *indexGridDilated = handleDilated.grid<BuildT>();

if (printGridDiagnostics) {
std::cout << "============ Dilated Grid ============" << std::endl;
std::cout << "Allocated values [valueCount()] : " << indexGridDilated->valueCount() << std::endl;
std::cout << "Active voxels [activeVoxelCount()] : " << indexGridDilated->activeVoxelCount() << std::endl;
auto minCorner = indexGridDilated->indexBBox().min(), maxCorner = indexGridDilated->indexBBox().max();
std::cout << "Index-space bounding box : [" << minCorner.x() << "," << minCorner.y() << "," << minCorner.z()
<< "] -> [" << maxCorner.x() << "," << maxCorner.y() << "," << maxCorner.z() << "]" << std::endl;
std::cout << "Leaf nodes : " << indexGridDilated->tree().nodeCount(0) << std::endl;
std::cout << "Lower internal nodes : " << indexGridDilated->tree().nodeCount(1) << std::endl;
std::cout << "Upper internal nodes : " << indexGridDilated->tree().nodeCount(2) << std::endl;
std::cout << "Leaf-level occupancy : "
<< 100.f * (float)(indexGridDilated->activeVoxelCount())/(float)(indexGridDilated->tree().nodeCount(0) * 512)
<< "%" << std::endl;
std::cout << "Memory usage : " << indexGridDilated->gridSize() << " bytes" << std::endl;
}

// Copy both NanoVDB grids to GPU
handleOriginal.deviceUpload();
handleDilated.deviceUpload();
auto* deviceGridOriginal = handleOriginal.deviceGrid<BuildT>();
auto* deviceGridDilated = handleDilated.deviceGrid<BuildT>();
if (!deviceGridOriginal || !deviceGridDilated)
OPENVDB_THROW(openvdb::RuntimeError, "Failure while uploading indexGrids to GPU");

// Launch benchmark
mainDilateGrid( deviceGridOriginal, deviceGridDilated, indexGridOriginal, indexGridDilated, nnType, benchmark_iters );

}
catch (const std::exception& e) {
std::cerr << "An exception occurred: \"" << e.what() << "\"" << std::endl;
}
return 0;
}
Loading
Loading