Skip to content

Commit 6f3f350

Browse files
sifakismatthewdcong
authored andcommitted
fixing discrepancies between OpenToNano/PointsToGrid constructions
This PR fixes a few discrepancies between grid construction via (a) PointsToGrid, and (b) OpenToNanoVDB. The discrepancies are not typically inhibiting functionality of either construction, but result in grids that are not bit-identical, when they would otherwise be expected to be. The changes are: Removal of the PointsToGridData::flags member. The actual functionality of this flag was to allow control of solely the leaf-level flags (interior node, and GridData flags were already set independently), and in that aspect it was even incorrectly setting the IsBreadthFirst bit (which only makes sense for GridData::mFlags, not the leaf node flags). The method includeBBox was having no effect as it was, it was never called in any of the unittests, and its effect was overwritten later in the code. PointsToGrid has been corrected to flag HasBBox as active in every node (not just the leaf nodes, as was happening prior; the flags were previously being set to zero). The world space bounding box in OpenToNanoVDB was incrementing the upper bound of the index-space bounding box, prior to applying the transform ( hence, a grid with a single voxel [0,0,0] would have a WS bounding box of [0,0,0]->[1,1,1]). PointsToGrid was not applying this increment, resulting in a smaller WS bounding box. Signed-off-by: Matthew Cong <mcong@nvidia.com>
1 parent 178b816 commit 6f3f350

File tree

2 files changed

+52
-62
lines changed

2 files changed

+52
-62
lines changed

nanovdb/nanovdb/tools/cuda/DistributedPointsToGrid.cuh

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,6 @@ DistributedPointsToGrid<BuildT>::DistributedPointsToGrid(const nanovdb::cuda::De
461461
mTempDevicePools = new nanovdb::cuda::TempDevicePool[mDeviceMesh.deviceCount()];
462462

463463
cudaCheck(cudaMallocManaged(&mData, sizeof(PointsToGridData<BuildT>)));
464-
mData->flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst});
465464
mData->map = map;
466465

467466
mStripeCounts = nullptr;
@@ -906,7 +905,7 @@ template <typename BuildT>
906905
inline void DistributedPointsToGrid<BuildT>::processNodes()
907906
{
908907
// Parallel construction of upper, lower, and leaf nodes
909-
const uint8_t flags = static_cast<uint8_t>(mData->flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats
908+
const uint8_t flags = (uint8_t) GridFlags::HasBBox;
910909

911910
for (const auto& [deviceId, stream] : mDeviceMesh) {
912911
cudaCheck(cudaSetDevice(deviceId));
@@ -1002,58 +1001,56 @@ inline void DistributedPointsToGrid<BuildT>::processPoints(const PtrT, size_t)
10021001
template <typename BuildT>
10031002
inline void DistributedPointsToGrid<BuildT>::processBBox()
10041003
{
1005-
if (mData->flags.isMaskOn(GridFlags::HasBBox)) {
1006-
// Compute and propagate bounding boxes for the upper nodes and their descendents belonging to each device in parallel.
1007-
std::vector<cudaEvent_t> propagateLowerBBoxEvents(mDeviceMesh.deviceCount());
1008-
for (const auto& [deviceId, stream] : mDeviceMesh) {
1009-
cudaCheck(cudaSetDevice(deviceId));
1010-
// reset bbox in lower nodes
1011-
if (deviceNodeCount(deviceId)[1]) {
1012-
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[1]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[1], deviceNodeOffset(deviceId)[1], ResetLowerNodeBBoxFunctor<BuildT>(), mData);
1013-
cudaCheckError();
1014-
}
1015-
1016-
// update and propagate bbox from leaf -> lower/parent nodes
1017-
if (deviceNodeCount(deviceId)[0]) {
1018-
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[0]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[0], deviceNodeOffset(deviceId)[0], UpdateAndPropagateLeafBBoxFunctor<BuildT>(), mData);
1019-
cudaCheckError();
1020-
}
1021-
1022-
// reset bbox in upper nodes
1023-
if (deviceNodeCount(deviceId)[2]) {
1024-
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[2]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[2], deviceNodeOffset(deviceId)[2], ResetUpperNodeBBoxFunctor<BuildT>(), mData);
1025-
cudaCheckError();
1026-
}
1027-
1028-
// propagate bbox from lower -> upper/parent node
1029-
if (deviceNodeCount(deviceId)[1]) {
1030-
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[1]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[1], deviceNodeOffset(deviceId)[1], PropagateLowerBBoxFunctor<BuildT>(), mData);
1031-
cudaCheckError();
1032-
}
1004+
// Compute and propagate bounding boxes for the upper nodes and their descendents belonging to each device in parallel.
1005+
std::vector<cudaEvent_t> propagateLowerBBoxEvents(mDeviceMesh.deviceCount());
1006+
for (const auto& [deviceId, stream] : mDeviceMesh) {
1007+
cudaCheck(cudaSetDevice(deviceId));
1008+
// reset bbox in lower nodes
1009+
if (deviceNodeCount(deviceId)[1]) {
1010+
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[1]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[1], deviceNodeOffset(deviceId)[1], ResetLowerNodeBBoxFunctor<BuildT>(), mData);
1011+
cudaCheckError();
1012+
}
10331013

1034-
cudaEventCreate(&propagateLowerBBoxEvents[deviceId]);
1035-
cudaEventRecord(propagateLowerBBoxEvents[deviceId], stream);
1014+
// update and propagate bbox from leaf -> lower/parent nodes
1015+
if (deviceNodeCount(deviceId)[0]) {
1016+
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[0]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[0], deviceNodeOffset(deviceId)[0], UpdateAndPropagateLeafBBoxFunctor<BuildT>(), mData);
1017+
cudaCheckError();
10361018
}
10371019

1038-
// Wait until bounding boxes are computed for each upper node and then compute the root bounding box on the zeroth device
1039-
{
1040-
int deviceId = 0;
1041-
auto stream = mDeviceMesh[deviceId].stream;
1042-
cudaCheck(cudaSetDevice(deviceId));
1043-
for (const auto& propagateLowerBBoxEvent : propagateLowerBBoxEvents)
1044-
{
1045-
cudaStreamWaitEvent(stream, propagateLowerBBoxEvent);
1046-
}
1047-
// propagate bbox from upper -> root/parent node
1048-
util::cuda::lambdaKernel<<<numBlocks(mData->nodeCount[2]), mNumThreads, 0, stream>>>(mData->nodeCount[2], PropagateUpperBBoxFunctor<BuildT>(), mData);
1020+
// reset bbox in upper nodes
1021+
if (deviceNodeCount(deviceId)[2]) {
1022+
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[2]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[2], deviceNodeOffset(deviceId)[2], ResetUpperNodeBBoxFunctor<BuildT>(), mData);
10491023
cudaCheckError();
1024+
}
10501025

1051-
// update the world-bbox in the root node
1052-
util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, UpdateRootWorldBBoxFunctor<BuildT>(), mData);
1026+
// propagate bbox from lower -> upper/parent node
1027+
if (deviceNodeCount(deviceId)[1]) {
1028+
util::cuda::offsetLambdaKernel<<<numBlocks(deviceNodeCount(deviceId)[1]), mNumThreads, 0, stream>>>(deviceNodeCount(deviceId)[1], deviceNodeOffset(deviceId)[1], PropagateLowerBBoxFunctor<BuildT>(), mData);
10531029
cudaCheckError();
1030+
}
10541031

1055-
cudaCheck(cudaEventDestroy(propagateLowerBBoxEvents[deviceId]));
1032+
cudaEventCreate(&propagateLowerBBoxEvents[deviceId]);
1033+
cudaEventRecord(propagateLowerBBoxEvents[deviceId], stream);
1034+
}
1035+
1036+
// Wait until bounding boxes are computed for each upper node and then compute the root bounding box on the zeroth device
1037+
{
1038+
int deviceId = 0;
1039+
auto stream = mDeviceMesh[deviceId].stream;
1040+
cudaCheck(cudaSetDevice(deviceId));
1041+
for (const auto& propagateLowerBBoxEvent : propagateLowerBBoxEvents)
1042+
{
1043+
cudaStreamWaitEvent(stream, propagateLowerBBoxEvent);
10561044
}
1045+
// propagate bbox from upper -> root/parent node
1046+
util::cuda::lambdaKernel<<<numBlocks(mData->nodeCount[2]), mNumThreads, 0, stream>>>(mData->nodeCount[2], PropagateUpperBBoxFunctor<BuildT>(), mData);
1047+
cudaCheckError();
1048+
1049+
// update the world-bbox in the root node
1050+
util::cuda::lambdaKernel<<<1, 1, 0, stream>>>(1, UpdateRootWorldBBoxFunctor<BuildT>(), mData);
1051+
cudaCheckError();
1052+
1053+
cudaCheck(cudaEventDestroy(propagateLowerBBoxEvents[deviceId]));
10571054
}
10581055

10591056
// Explicitly synchronize so that move constructor in getHandle doesn't fail

nanovdb/nanovdb/tools/cuda/PointsToGrid.cuh

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ struct PointsToGridData {
277277
uint32_t *d_indx;// device pointer to point indices (or IDs)
278278
uint32_t nodeCount[3], *pointsPerLeafPrefix, *pointsPerLeaf;// 0=leaf,1=lower, 2=upper
279279
uint32_t voxelCount, *pointsPerVoxelPrefix, *pointsPerVoxel;
280-
BitFlags<16> flags;
281280
__hostdev__ NanoGrid<BuildT>& getGrid() const {return *util::PtrAdd<NanoGrid<BuildT>>(d_bufferPtr, grid);}
282281
__hostdev__ NanoTree<BuildT>& getTree() const {return *util::PtrAdd<NanoTree<BuildT>>(d_bufferPtr, tree);}
283282
__hostdev__ NanoRoot<BuildT>& getRoot() const {return *util::PtrAdd<NanoRoot<BuildT>>(d_bufferPtr, root);}
@@ -303,7 +302,6 @@ public:
303302
, mPointType(util::is_same<BuildT,Point>::value ? PointType::Default : PointType::Disable)
304303
{
305304
mData.map = map;
306-
mData.flags.initMask({GridFlags::HasBBox, GridFlags::IsBreadthFirst});
307305
mDeviceData = static_cast<PointsToGridData<BuildT>*>(ResourceT::allocateAsync(sizeof(PointsToGridData<BuildT>), ResourceT::DEFAULT_ALIGNMENT, mStream));
308306
}
309307

@@ -335,10 +333,6 @@ public:
335333
/// @param mode Mode of checksum computation
336334
void setChecksum(CheckMode mode = CheckMode::Disable){mChecksum = mode;}
337335

338-
/// @brief Toggle on and off the computation of a bounding-box
339-
/// @param on If true bbox will be computed
340-
void includeBBox(bool on = true) { mData.flags.setMask(GridFlags::HasBBox, on); }
341-
342336
/// @brief Set the name of the output grid
343337
/// @param name name of the output grid
344338
void setGridName(const std::string &name) {mGridName = name;}
@@ -901,6 +895,8 @@ struct BuildGridTreeRootFunctor
901895
default:
902896
printf("Error in PointsToGrid<BuildT, ResourceT>::processGridTreeRoot: invalid pointType\n");
903897
}
898+
} else if constexpr(BuildTraits<BuildT>::is_onindex) {
899+
grid.mGridClass = GridClass::IndexGrid;
904900
} else if constexpr(BuildTraits<BuildT>::is_offindex) {
905901
grid.mData1 = 1u + 512u*d_data->nodeCount[0];
906902
grid.mGridClass = GridClass::IndexGrid;
@@ -946,7 +942,7 @@ struct BuildUpperNodesFunctor
946942
#endif
947943
root.tile(tid)->setChild(ijk, &upper, &root);
948944
upper.mBBox[0] = ijk;
949-
upper.mFlags = 0;
945+
upper.mFlags = (uint64_t) GridFlags::HasBBox;
950946
upper.mValueMask.setOff();
951947
upper.mChildMask.setOff();
952948
upper.mMinimum = upper.mMaximum = typename NanoLower<BuildT>::ValueType(0);
@@ -992,7 +988,7 @@ struct BuildLowerNodesFunctor
992988
auto &lower = d_data->getLower(tid);
993989
upper.setChild(upperOffset, &lower);
994990
lower.mBBox[0] = upper.offsetToGlobalCoord(upperOffset);
995-
lower.mFlags = 0;
991+
lower.mFlags = (uint64_t) GridFlags::HasBBox;
996992
lower.mValueMask.setOff();
997993
lower.mChildMask.setOff();
998994
lower.mMinimum = lower.mMaximum = typename NanoLower<BuildT>::ValueType(0);// background;
@@ -1093,7 +1089,7 @@ struct SetLeafInactiveVoxelValuesFunctor
10931089
template<typename BuildT, typename ResourceT>
10941090
inline void PointsToGrid<BuildT, ResourceT>::processLeafNodes(size_t pointCount)
10951091
{
1096-
const uint8_t flags = static_cast<uint8_t>(mData.flags.data());// mIncludeStats ? 16u : 0u;// 4th bit indicates stats
1092+
const uint8_t flags = (uint8_t) GridFlags::HasBBox;
10971093

10981094
if (mVerbose==2) mTimer.start("process leaf meta data");
10991095
// loop over leaf nodes and add it to its parent node
@@ -1274,19 +1270,16 @@ struct UpdateRootWorldBBoxFunctor
12741270
{
12751271
__device__
12761272
void operator()(size_t tid, PointsToGridData<BuildT> *d_data) {
1277-
d_data->getGrid().mWorldBBox = d_data->getRoot().mBBox.transform(d_data->map);
1273+
auto BBox = d_data->getRoot().mBBox;
1274+
BBox.max() += 1;
1275+
d_data->getGrid().mFlags.setMaskOn(GridFlags::HasBBox);
1276+
d_data->getGrid().mWorldBBox = BBox.transform(d_data->map);
12781277
}
12791278
};
12801279

12811280
template<typename BuildT, typename ResourceT>
12821281
inline void PointsToGrid<BuildT, ResourceT>::processBBox()
12831282
{
1284-
if (mData.flags.isMaskOff(GridFlags::HasBBox)) {
1285-
ResourceT::deallocateAsync(mData.d_leaf_keys, mData.nodeCount[0]*sizeof(uint64_t), ResourceT::DEFAULT_ALIGNMENT, mStream);
1286-
ResourceT::deallocateAsync(mData.d_lower_keys, mData.nodeCount[1]*sizeof(uint64_t), ResourceT::DEFAULT_ALIGNMENT, mStream);
1287-
return;
1288-
}
1289-
12901283
// reset bbox in lower nodes
12911284
util::cuda::lambdaKernel<<<numBlocks(mData.nodeCount[1]), mNumThreads, 0, mStream>>>(mData.nodeCount[1], ResetLowerNodeBBoxFunctor<BuildT>(), mDeviceData);
12921285
cudaCheckError();

0 commit comments

Comments
 (0)