Move device histogram storage into histogram.cuh. (#10608)
This commit is contained in:
parent
cb62f9e73b
commit
6d9fcb771e
@ -6,6 +6,8 @@
|
|||||||
#include <memory> // for unique_ptr
|
#include <memory> // for unique_ptr
|
||||||
|
|
||||||
#include "../../common/cuda_context.cuh" // for CUDAContext
|
#include "../../common/cuda_context.cuh" // for CUDAContext
|
||||||
|
#include "../../common/device_helpers.cuh" // for LaunchN
|
||||||
|
#include "../../common/device_vector.cuh" // for device_vector
|
||||||
#include "../../data/ellpack_page.cuh" // for EllpackDeviceAccessor
|
#include "../../data/ellpack_page.cuh" // for EllpackDeviceAccessor
|
||||||
#include "feature_groups.cuh" // for FeatureGroupsAccessor
|
#include "feature_groups.cuh" // for FeatureGroupsAccessor
|
||||||
#include "xgboost/base.h" // for GradientPair, GradientPairInt64
|
#include "xgboost/base.h" // for GradientPair, GradientPairInt64
|
||||||
@ -60,6 +62,111 @@ class GradientQuantiser {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Data storage for node histograms on device. Automatically expands.
|
||||||
|
*
|
||||||
|
* @tparam kStopGrowingSize Do not grow beyond this size
|
||||||
|
*
|
||||||
|
* @author Rory
|
||||||
|
* @date 28/07/2018
|
||||||
|
*/
|
||||||
|
template <size_t kStopGrowingSize = 1 << 28>
|
||||||
|
class DeviceHistogramStorage {
|
||||||
|
private:
|
||||||
|
using GradientSumT = GradientPairInt64;
|
||||||
|
/** @brief Map nidx to starting index of its histogram. */
|
||||||
|
std::map<int, size_t> nidx_map_;
|
||||||
|
// Large buffer of zeroed memory, caches histograms
|
||||||
|
dh::device_vector<typename GradientSumT::ValueT> data_;
|
||||||
|
// If we run out of storage allocate one histogram at a time
|
||||||
|
// in overflow. Not cached, overwritten when a new histogram
|
||||||
|
// is requested
|
||||||
|
dh::device_vector<typename GradientSumT::ValueT> overflow_;
|
||||||
|
std::map<int, size_t> overflow_nidx_map_;
|
||||||
|
int n_bins_;
|
||||||
|
DeviceOrd device_id_;
|
||||||
|
static constexpr size_t kNumItemsInGradientSum =
|
||||||
|
sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT);
|
||||||
|
static_assert(kNumItemsInGradientSum == 2, "Number of items in gradient type should be 2.");
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Start with about 16mb
|
||||||
|
DeviceHistogramStorage() { data_.reserve(1 << 22); }
|
||||||
|
void Init(DeviceOrd device_id, int n_bins) {
|
||||||
|
this->n_bins_ = n_bins;
|
||||||
|
this->device_id_ = device_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Reset(Context const* ctx) {
|
||||||
|
auto d_data = data_.data().get();
|
||||||
|
dh::LaunchN(data_.size(), ctx->CUDACtx()->Stream(),
|
||||||
|
[=] __device__(size_t idx) { d_data[idx] = 0.0f; });
|
||||||
|
nidx_map_.clear();
|
||||||
|
overflow_nidx_map_.clear();
|
||||||
|
}
|
||||||
|
[[nodiscard]] bool HistogramExists(int nidx) const {
|
||||||
|
return nidx_map_.find(nidx) != nidx_map_.cend() ||
|
||||||
|
overflow_nidx_map_.find(nidx) != overflow_nidx_map_.cend();
|
||||||
|
}
|
||||||
|
[[nodiscard]] int Bins() const { return n_bins_; }
|
||||||
|
[[nodiscard]] size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
|
||||||
|
dh::device_vector<typename GradientSumT::ValueT>& Data() { return data_; }
|
||||||
|
|
||||||
|
void AllocateHistograms(Context const* ctx, const std::vector<int>& new_nidxs) {
|
||||||
|
for (int nidx : new_nidxs) {
|
||||||
|
CHECK(!HistogramExists(nidx));
|
||||||
|
}
|
||||||
|
// Number of items currently used in data
|
||||||
|
const size_t used_size = nidx_map_.size() * HistogramSize();
|
||||||
|
const size_t new_used_size = used_size + HistogramSize() * new_nidxs.size();
|
||||||
|
if (used_size >= kStopGrowingSize) {
|
||||||
|
// Use overflow
|
||||||
|
// Delete previous entries
|
||||||
|
overflow_nidx_map_.clear();
|
||||||
|
overflow_.resize(HistogramSize() * new_nidxs.size());
|
||||||
|
// Zero memory
|
||||||
|
auto d_data = overflow_.data().get();
|
||||||
|
dh::LaunchN(overflow_.size(), ctx->CUDACtx()->Stream(),
|
||||||
|
[=] __device__(size_t idx) { d_data[idx] = 0.0; });
|
||||||
|
// Append new histograms
|
||||||
|
for (int nidx : new_nidxs) {
|
||||||
|
overflow_nidx_map_[nidx] = overflow_nidx_map_.size() * HistogramSize();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
CHECK_GE(data_.size(), used_size);
|
||||||
|
// Expand if necessary
|
||||||
|
if (data_.size() < new_used_size) {
|
||||||
|
data_.resize(std::max(data_.size() * 2, new_used_size));
|
||||||
|
}
|
||||||
|
// Append new histograms
|
||||||
|
for (int nidx : new_nidxs) {
|
||||||
|
nidx_map_[nidx] = nidx_map_.size() * HistogramSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_GE(data_.size(), nidx_map_.size() * HistogramSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \summary Return pointer to histogram memory for a given node.
|
||||||
|
* \param nidx Tree node index.
|
||||||
|
* \return hist pointer.
|
||||||
|
*/
|
||||||
|
common::Span<GradientSumT> GetNodeHistogram(int nidx) {
|
||||||
|
CHECK(this->HistogramExists(nidx));
|
||||||
|
|
||||||
|
if (nidx_map_.find(nidx) != nidx_map_.cend()) {
|
||||||
|
// Fetch from normal cache
|
||||||
|
auto ptr = data_.data().get() + nidx_map_.at(nidx);
|
||||||
|
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
|
||||||
|
} else {
|
||||||
|
// Fetch from overflow
|
||||||
|
auto ptr = overflow_.data().get() + overflow_nidx_map_.at(nidx);
|
||||||
|
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class DeviceHistogramBuilderImpl;
|
class DeviceHistogramBuilderImpl;
|
||||||
|
|
||||||
class DeviceHistogramBuilder {
|
class DeviceHistogramBuilder {
|
||||||
|
|||||||
@ -49,113 +49,6 @@ namespace xgboost::tree {
|
|||||||
DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
|
DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
|
||||||
#endif // !defined(GTEST_TEST)
|
#endif // !defined(GTEST_TEST)
|
||||||
|
|
||||||
/**
|
|
||||||
* \struct DeviceHistogramStorage
|
|
||||||
*
|
|
||||||
* \summary Data storage for node histograms on device. Automatically expands.
|
|
||||||
*
|
|
||||||
* \tparam GradientSumT histogram entry type.
|
|
||||||
* \tparam kStopGrowingSize Do not grow beyond this size
|
|
||||||
*
|
|
||||||
* \author Rory
|
|
||||||
* \date 28/07/2018
|
|
||||||
*/
|
|
||||||
template <size_t kStopGrowingSize = 1 << 28>
|
|
||||||
class DeviceHistogramStorage {
|
|
||||||
private:
|
|
||||||
using GradientSumT = GradientPairInt64;
|
|
||||||
/*! \brief Map nidx to starting index of its histogram. */
|
|
||||||
std::map<int, size_t> nidx_map_;
|
|
||||||
// Large buffer of zeroed memory, caches histograms
|
|
||||||
dh::device_vector<typename GradientSumT::ValueT> data_;
|
|
||||||
// If we run out of storage allocate one histogram at a time
|
|
||||||
// in overflow. Not cached, overwritten when a new histogram
|
|
||||||
// is requested
|
|
||||||
dh::device_vector<typename GradientSumT::ValueT> overflow_;
|
|
||||||
std::map<int, size_t> overflow_nidx_map_;
|
|
||||||
int n_bins_;
|
|
||||||
DeviceOrd device_id_;
|
|
||||||
static constexpr size_t kNumItemsInGradientSum =
|
|
||||||
sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT);
|
|
||||||
static_assert(kNumItemsInGradientSum == 2, "Number of items in gradient type should be 2.");
|
|
||||||
|
|
||||||
public:
|
|
||||||
// Start with about 16mb
|
|
||||||
DeviceHistogramStorage() { data_.reserve(1 << 22); }
|
|
||||||
void Init(DeviceOrd device_id, int n_bins) {
|
|
||||||
this->n_bins_ = n_bins;
|
|
||||||
this->device_id_ = device_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Reset() {
|
|
||||||
auto d_data = data_.data().get();
|
|
||||||
dh::LaunchN(data_.size(), [=] __device__(size_t idx) { d_data[idx] = 0.0f; });
|
|
||||||
nidx_map_.clear();
|
|
||||||
overflow_nidx_map_.clear();
|
|
||||||
}
|
|
||||||
[[nodiscard]] bool HistogramExists(int nidx) const {
|
|
||||||
return nidx_map_.find(nidx) != nidx_map_.cend() ||
|
|
||||||
overflow_nidx_map_.find(nidx) != overflow_nidx_map_.cend();
|
|
||||||
}
|
|
||||||
[[nodiscard]] int Bins() const { return n_bins_; }
|
|
||||||
[[nodiscard]] size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
|
|
||||||
dh::device_vector<typename GradientSumT::ValueT>& Data() { return data_; }
|
|
||||||
|
|
||||||
void AllocateHistograms(const std::vector<int>& new_nidxs) {
|
|
||||||
for (int nidx : new_nidxs) {
|
|
||||||
CHECK(!HistogramExists(nidx));
|
|
||||||
}
|
|
||||||
// Number of items currently used in data
|
|
||||||
const size_t used_size = nidx_map_.size() * HistogramSize();
|
|
||||||
const size_t new_used_size = used_size + HistogramSize() * new_nidxs.size();
|
|
||||||
if (used_size >= kStopGrowingSize) {
|
|
||||||
// Use overflow
|
|
||||||
// Delete previous entries
|
|
||||||
overflow_nidx_map_.clear();
|
|
||||||
overflow_.resize(HistogramSize() * new_nidxs.size());
|
|
||||||
// Zero memory
|
|
||||||
auto d_data = overflow_.data().get();
|
|
||||||
dh::LaunchN(overflow_.size(),
|
|
||||||
[=] __device__(size_t idx) { d_data[idx] = 0.0; });
|
|
||||||
// Append new histograms
|
|
||||||
for (int nidx : new_nidxs) {
|
|
||||||
overflow_nidx_map_[nidx] = overflow_nidx_map_.size() * HistogramSize();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
CHECK_GE(data_.size(), used_size);
|
|
||||||
// Expand if necessary
|
|
||||||
if (data_.size() < new_used_size) {
|
|
||||||
data_.resize(std::max(data_.size() * 2, new_used_size));
|
|
||||||
}
|
|
||||||
// Append new histograms
|
|
||||||
for (int nidx : new_nidxs) {
|
|
||||||
nidx_map_[nidx] = nidx_map_.size() * HistogramSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CHECK_GE(data_.size(), nidx_map_.size() * HistogramSize());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \summary Return pointer to histogram memory for a given node.
|
|
||||||
* \param nidx Tree node index.
|
|
||||||
* \return hist pointer.
|
|
||||||
*/
|
|
||||||
common::Span<GradientSumT> GetNodeHistogram(int nidx) {
|
|
||||||
CHECK(this->HistogramExists(nidx));
|
|
||||||
|
|
||||||
if (nidx_map_.find(nidx) != nidx_map_.cend()) {
|
|
||||||
// Fetch from normal cache
|
|
||||||
auto ptr = data_.data().get() + nidx_map_.at(nidx);
|
|
||||||
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
|
|
||||||
} else {
|
|
||||||
// Fetch from overflow
|
|
||||||
auto ptr = overflow_.data().get() + overflow_nidx_map_.at(nidx);
|
|
||||||
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Manage memory for a single GPU
|
// Manage memory for a single GPU
|
||||||
struct GPUHistMakerDevice {
|
struct GPUHistMakerDevice {
|
||||||
private:
|
private:
|
||||||
@ -258,7 +151,7 @@ struct GPUHistMakerDevice {
|
|||||||
|
|
||||||
// Init histogram
|
// Init histogram
|
||||||
hist.Init(ctx_->Device(), page->Cuts().TotalBins());
|
hist.Init(ctx_->Device(), page->Cuts().TotalBins());
|
||||||
hist.Reset();
|
hist.Reset(ctx_);
|
||||||
|
|
||||||
this->InitFeatureGroupsOnce();
|
this->InitFeatureGroupsOnce();
|
||||||
|
|
||||||
@ -657,7 +550,7 @@ struct GPUHistMakerDevice {
|
|||||||
all_new.insert(all_new.end(), subtraction_nidx.begin(), subtraction_nidx.end());
|
all_new.insert(all_new.end(), subtraction_nidx.begin(), subtraction_nidx.end());
|
||||||
// Allocate the histograms
|
// Allocate the histograms
|
||||||
// Guaranteed contiguous memory
|
// Guaranteed contiguous memory
|
||||||
hist.AllocateHistograms(all_new);
|
hist.AllocateHistograms(ctx_, all_new);
|
||||||
|
|
||||||
for (auto nidx : hist_nidx) {
|
for (auto nidx : hist_nidx) {
|
||||||
this->BuildHist(nidx);
|
this->BuildHist(nidx);
|
||||||
@ -748,7 +641,7 @@ struct GPUHistMakerDevice {
|
|||||||
ctx_, info_, linalg::MakeVec(reinterpret_cast<ReduceT*>(&root_sum_quantised), 2));
|
ctx_, info_, linalg::MakeVec(reinterpret_cast<ReduceT*>(&root_sum_quantised), 2));
|
||||||
collective::SafeColl(rc);
|
collective::SafeColl(rc);
|
||||||
|
|
||||||
hist.AllocateHistograms({kRootNIdx});
|
hist.AllocateHistograms(ctx_, {kRootNIdx});
|
||||||
this->BuildHist(kRootNIdx);
|
this->BuildHist(kRootNIdx);
|
||||||
this->AllReduceHist(kRootNIdx, 1);
|
this->AllReduceHist(kRootNIdx, 1);
|
||||||
|
|
||||||
|
|||||||
@ -763,4 +763,6 @@ void DeleteRMMResource(RMMAllocator*) {}
|
|||||||
|
|
||||||
RMMAllocatorPtr SetUpRMMResourceForCppTests(int, char**) { return {nullptr, DeleteRMMResource}; }
|
RMMAllocatorPtr SetUpRMMResourceForCppTests(int, char**) { return {nullptr, DeleteRMMResource}; }
|
||||||
#endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1
|
#endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1
|
||||||
|
|
||||||
|
std::int32_t DistGpuIdx() { return common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank(); }
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -526,6 +526,9 @@ inline std::int32_t AllThreadsForTest() { return Context{}.Threads(); }
|
|||||||
|
|
||||||
inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
|
inline DeviceOrd FstCU() { return DeviceOrd::CUDA(0); }
|
||||||
|
|
||||||
|
// GPU device ordinal for distributed tests
|
||||||
|
std::int32_t DistGpuIdx();
|
||||||
|
|
||||||
inline auto GMockThrow(StringView msg) {
|
inline auto GMockThrow(StringView msg) {
|
||||||
return ::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr(msg));
|
return ::testing::ThrowsMessage<dmlc::Error>(::testing::HasSubstr(msg));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14,6 +14,46 @@
|
|||||||
#include "../../helpers.h"
|
#include "../../helpers.h"
|
||||||
|
|
||||||
namespace xgboost::tree {
|
namespace xgboost::tree {
|
||||||
|
TEST(Histogram, DeviceHistogramStorage) {
|
||||||
|
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
|
||||||
|
auto ctx = MakeCUDACtx(0);
|
||||||
|
constexpr size_t kNBins = 128;
|
||||||
|
constexpr int kNNodes = 4;
|
||||||
|
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
|
||||||
|
DeviceHistogramStorage<kStopGrowing> histogram;
|
||||||
|
histogram.Init(FstCU(), kNBins);
|
||||||
|
for (int i = 0; i < kNNodes; ++i) {
|
||||||
|
histogram.AllocateHistograms(&ctx, {i});
|
||||||
|
}
|
||||||
|
histogram.Reset(&ctx);
|
||||||
|
ASSERT_EQ(histogram.Data().size(), kStopGrowing);
|
||||||
|
|
||||||
|
// Use allocated memory but do not erase nidx_map.
|
||||||
|
for (int i = 0; i < kNNodes; ++i) {
|
||||||
|
histogram.AllocateHistograms(&ctx, {i});
|
||||||
|
}
|
||||||
|
for (int i = 0; i < kNNodes; ++i) {
|
||||||
|
ASSERT_TRUE(histogram.HistogramExists(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add two new nodes
|
||||||
|
histogram.AllocateHistograms(&ctx, {kNNodes});
|
||||||
|
histogram.AllocateHistograms(&ctx, {kNNodes + 1});
|
||||||
|
|
||||||
|
// Old cached nodes should still exist
|
||||||
|
for (int i = 0; i < kNNodes; ++i) {
|
||||||
|
ASSERT_TRUE(histogram.HistogramExists(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should be deleted
|
||||||
|
ASSERT_FALSE(histogram.HistogramExists(kNNodes));
|
||||||
|
// Most recent node should exist
|
||||||
|
ASSERT_TRUE(histogram.HistogramExists(kNNodes + 1));
|
||||||
|
|
||||||
|
// Add same node again - should fail
|
||||||
|
EXPECT_ANY_THROW(histogram.AllocateHistograms(&ctx, {kNNodes + 1}););
|
||||||
|
}
|
||||||
|
|
||||||
void TestDeterministicHistogram(bool is_dense, int shm_size, bool force_global) {
|
void TestDeterministicHistogram(bool is_dense, int shm_size, bool force_global) {
|
||||||
Context ctx = MakeCUDACtx(0);
|
Context ctx = MakeCUDACtx(0);
|
||||||
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
|
size_t constexpr kBins = 256, kCols = 120, kRows = 16384, kRounds = 16;
|
||||||
|
|||||||
@ -6,7 +6,6 @@
|
|||||||
#include <thrust/host_vector.h>
|
#include <thrust/host_vector.h>
|
||||||
#include <xgboost/base.h>
|
#include <xgboost/base.h>
|
||||||
|
|
||||||
#include <random>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -23,46 +22,6 @@
|
|||||||
#include "xgboost/json.h"
|
#include "xgboost/json.h"
|
||||||
|
|
||||||
namespace xgboost::tree {
|
namespace xgboost::tree {
|
||||||
TEST(GpuHist, DeviceHistogramStorage) {
|
|
||||||
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
|
|
||||||
dh::safe_cuda(cudaSetDevice(0));
|
|
||||||
constexpr size_t kNBins = 128;
|
|
||||||
constexpr int kNNodes = 4;
|
|
||||||
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
|
|
||||||
DeviceHistogramStorage<kStopGrowing> histogram;
|
|
||||||
histogram.Init(FstCU(), kNBins);
|
|
||||||
for (int i = 0; i < kNNodes; ++i) {
|
|
||||||
histogram.AllocateHistograms({i});
|
|
||||||
}
|
|
||||||
histogram.Reset();
|
|
||||||
ASSERT_EQ(histogram.Data().size(), kStopGrowing);
|
|
||||||
|
|
||||||
// Use allocated memory but do not erase nidx_map.
|
|
||||||
for (int i = 0; i < kNNodes; ++i) {
|
|
||||||
histogram.AllocateHistograms({i});
|
|
||||||
}
|
|
||||||
for (int i = 0; i < kNNodes; ++i) {
|
|
||||||
ASSERT_TRUE(histogram.HistogramExists(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add two new nodes
|
|
||||||
histogram.AllocateHistograms({kNNodes});
|
|
||||||
histogram.AllocateHistograms({kNNodes + 1});
|
|
||||||
|
|
||||||
// Old cached nodes should still exist
|
|
||||||
for (int i = 0; i < kNNodes; ++i) {
|
|
||||||
ASSERT_TRUE(histogram.HistogramExists(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Should be deleted
|
|
||||||
ASSERT_FALSE(histogram.HistogramExists(kNNodes));
|
|
||||||
// Most recent node should exist
|
|
||||||
ASSERT_TRUE(histogram.HistogramExists(kNNodes + 1));
|
|
||||||
|
|
||||||
// Add same node again - should fail
|
|
||||||
EXPECT_ANY_THROW(histogram.AllocateHistograms({kNNodes + 1}););
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<GradientPairPrecise> GetHostHistGpair() {
|
std::vector<GradientPairPrecise> GetHostHistGpair() {
|
||||||
// 24 bins, 3 bins for each feature (column).
|
// 24 bins, 3 bins for each feature (column).
|
||||||
std::vector<GradientPairPrecise> hist_gpair = {
|
std::vector<GradientPairPrecise> hist_gpair = {
|
||||||
@ -108,7 +67,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
|||||||
maker.row_partitioner = std::make_unique<RowPartitioner>(&ctx, kNRows, 0);
|
maker.row_partitioner = std::make_unique<RowPartitioner>(&ctx, kNRows, 0);
|
||||||
|
|
||||||
maker.hist.Init(ctx.Device(), page->Cuts().TotalBins());
|
maker.hist.Init(ctx.Device(), page->Cuts().TotalBins());
|
||||||
maker.hist.AllocateHistograms({0});
|
maker.hist.AllocateHistograms(&ctx, {0});
|
||||||
|
|
||||||
maker.gpair = gpair.DeviceSpan();
|
maker.gpair = gpair.DeviceSpan();
|
||||||
maker.quantiser = std::make_unique<GradientQuantiser>(&ctx, maker.gpair, MetaInfo());
|
maker.quantiser = std::make_unique<GradientQuantiser>(&ctx, maker.gpair, MetaInfo());
|
||||||
@ -425,8 +384,8 @@ TEST(GpuHist, MaxDepth) {
|
|||||||
namespace {
|
namespace {
|
||||||
RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
|
RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
|
||||||
ObjInfo task{ObjInfo::kRegression};
|
ObjInfo task{ObjInfo::kRegression};
|
||||||
GPUHistMaker hist_maker{ctx, &task};
|
std::unique_ptr<TreeUpdater> hist_maker {TreeUpdater::Create("grow_gpu_hist", ctx, &task)};
|
||||||
hist_maker.Configure(Args{});
|
hist_maker->Configure(Args{});
|
||||||
|
|
||||||
TrainParam param;
|
TrainParam param;
|
||||||
param.UpdateAllowUnknown(Args{});
|
param.UpdateAllowUnknown(Args{});
|
||||||
@ -436,7 +395,7 @@ RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
|
|||||||
|
|
||||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||||
RegTree tree;
|
RegTree tree;
|
||||||
hist_maker.Update(¶m, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
hist_maker->Update(¶m, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||||
{&tree});
|
{&tree});
|
||||||
return tree;
|
return tree;
|
||||||
}
|
}
|
||||||
@ -476,8 +435,8 @@ TEST_F(MGPUHistTest, HistColumnSplit) {
|
|||||||
namespace {
|
namespace {
|
||||||
RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
|
RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
|
||||||
ObjInfo task{ObjInfo::kRegression};
|
ObjInfo task{ObjInfo::kRegression};
|
||||||
GPUGlobalApproxMaker approx_maker{ctx, &task};
|
std::unique_ptr<TreeUpdater> approx_maker{TreeUpdater::Create("grow_gpu_approx", ctx, &task)};
|
||||||
approx_maker.Configure(Args{});
|
approx_maker->Configure(Args{});
|
||||||
|
|
||||||
TrainParam param;
|
TrainParam param;
|
||||||
param.UpdateAllowUnknown(Args{});
|
param.UpdateAllowUnknown(Args{});
|
||||||
@ -487,13 +446,13 @@ RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
|
|||||||
|
|
||||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||||
RegTree tree;
|
RegTree tree;
|
||||||
approx_maker.Update(¶m, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
approx_maker->Update(¶m, &gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||||
{&tree});
|
{&tree});
|
||||||
return tree;
|
return tree;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VerifyApproxColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
void VerifyApproxColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||||
Context ctx(MakeCUDACtx(GPUIDX));
|
auto ctx = MakeCUDACtx(DistGpuIdx());
|
||||||
|
|
||||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||||
auto const world_size = collective::GetWorldSize();
|
auto const world_size = collective::GetWorldSize();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user