[coll] Pass context to various functions. (#9772)
* [coll] Pass context to various functions. In the future, the `Context` object would be required for collective operations, this PR passes the context object to some required functions to prepare for swapping out the implementation.
This commit is contained in:
@@ -360,25 +360,27 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
|
||||
}
|
||||
|
||||
template <typename Adapter>
|
||||
auto MakeUnweightedCutsForTest(Adapter adapter, int32_t num_bins, float missing, size_t batch_size = 0) {
|
||||
auto MakeUnweightedCutsForTest(Context const* ctx, Adapter adapter, int32_t num_bins, float missing,
|
||||
size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts;
|
||||
HostDeviceVector<FeatureType> ft;
|
||||
SketchContainer sketch_container(ft, num_bins, adapter.NumColumns(), adapter.NumRows(),
|
||||
DeviceOrd::CUDA(0));
|
||||
MetaInfo info;
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, missing, &sketch_container, batch_size);
|
||||
sketch_container.MakeCuts(&batched_cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(ctx, &batched_cuts, info.IsColumnSplit());
|
||||
return batched_cuts;
|
||||
}
|
||||
|
||||
template <typename Adapter>
|
||||
void ValidateBatchedCuts(Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
|
||||
void ValidateBatchedCuts(Context const* ctx, Adapter adapter, int num_bins, DMatrix* dmat, size_t batch_size = 0) {
|
||||
common::HistogramCuts batched_cuts = MakeUnweightedCutsForTest(
|
||||
adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
|
||||
ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN(), batch_size);
|
||||
ValidateCuts(batched_cuts, dmat, num_bins);
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketch) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int rows = 5;
|
||||
int cols = 1;
|
||||
int num_bins = 4;
|
||||
@@ -391,8 +393,8 @@ TEST(HistUtil, AdapterDeviceSketch) {
|
||||
|
||||
data::CupyAdapter adapter(str);
|
||||
|
||||
auto device_cuts = MakeUnweightedCutsForTest(adapter, num_bins, missing);
|
||||
Context ctx;
|
||||
auto device_cuts = MakeUnweightedCutsForTest(&ctx, adapter, num_bins, missing);
|
||||
ctx = ctx.MakeCPU();
|
||||
auto host_cuts = GetHostCuts(&ctx, &adapter, num_bins, missing);
|
||||
|
||||
EXPECT_EQ(device_cuts.Values(), host_cuts.Values());
|
||||
@@ -401,6 +403,7 @@ TEST(HistUtil, AdapterDeviceSketch) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterDeviceSketchMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -410,7 +413,8 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
|
||||
|
||||
dh::GlobalMemoryLogger().Clear();
|
||||
ConsoleLogger::Configure({{"verbosity", "3"}});
|
||||
auto cuts = MakeUnweightedCutsForTest(adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
auto cuts =
|
||||
MakeUnweightedCutsForTest(&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
@@ -419,6 +423,7 @@ TEST(HistUtil, AdapterDeviceSketchMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -435,7 +440,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, false);
|
||||
EXPECT_LE(dh::GlobalMemoryLogger().PeakMemory(), bytes_required * 1.05);
|
||||
@@ -444,6 +449,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowMemory) {
|
||||
}
|
||||
|
||||
TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
int num_columns = 100;
|
||||
int num_rows = 1000;
|
||||
int num_bins = 256;
|
||||
@@ -465,7 +471,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
&sketch_container);
|
||||
|
||||
HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
ConsoleLogger::Configure({{"verbosity", "0"}});
|
||||
size_t bytes_required = detail::RequiredMemory(
|
||||
num_rows, num_columns, num_rows * num_columns, num_bins, true);
|
||||
@@ -475,6 +481,7 @@ TEST(HistUtil, AdapterSketchSlidingWindowWeightedMemory) {
|
||||
|
||||
void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
int32_t num_bins, bool weighted) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto h_x = GenerateRandomCategoricalSingleColumn(n, num_categories);
|
||||
thrust::device_vector<float> x(h_x);
|
||||
auto adapter = AdapterFromData(x, n, 1);
|
||||
@@ -498,7 +505,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
AdapterDeviceSketch(adapter.Value(), num_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(), &container);
|
||||
HistogramCuts cuts;
|
||||
container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
|
||||
thrust::sort(x.begin(), x.end());
|
||||
auto n_uniques = thrust::unique(x.begin(), x.end()) - x.begin();
|
||||
@@ -522,6 +529,7 @@ void TestCategoricalSketchAdapter(size_t n, size_t num_categories,
|
||||
TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
auto categorical_sizes = {2, 6, 8, 12};
|
||||
int num_bins = 256;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto sizes = {25, 100, 1000};
|
||||
for (auto n : sizes) {
|
||||
for (auto num_categories : categorical_sizes) {
|
||||
@@ -529,7 +537,7 @@ TEST(HistUtil, AdapterDeviceSketchCategorical) {
|
||||
auto dmat = GetDMatrixFromData(x, n, 1);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, n, 1);
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
|
||||
TestCategoricalSketchAdapter(n, num_categories, num_bins, true);
|
||||
TestCategoricalSketchAdapter(n, num_categories, num_bins, false);
|
||||
}
|
||||
@@ -540,13 +548,14 @@ TEST(HistUtil, AdapterDeviceSketchMultipleColumns) {
|
||||
auto bin_sizes = {2, 16, 256, 512};
|
||||
auto sizes = {100, 1000, 1500};
|
||||
int num_columns = 5;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
for (auto num_rows : sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
for (auto num_bins : bin_sizes) {
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -556,12 +565,13 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
int num_rows = 5000;
|
||||
auto batch_sizes = {0, 100, 1500, 6000};
|
||||
int num_columns = 5;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
for (auto batch_size : batch_sizes) {
|
||||
auto x = GenerateRandom(num_rows, num_columns);
|
||||
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get(), batch_size);
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get(), batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -647,12 +657,12 @@ TEST(HistUtil, SketchingEquivalent) {
|
||||
auto x_device = thrust::device_vector<float>(x);
|
||||
auto adapter = AdapterFromData(x_device, num_rows, num_columns);
|
||||
common::HistogramCuts adapter_cuts = MakeUnweightedCutsForTest(
|
||||
adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
&ctx, adapter, num_bins, std::numeric_limits<float>::quiet_NaN());
|
||||
EXPECT_EQ(dmat_cuts.Values(), adapter_cuts.Values());
|
||||
EXPECT_EQ(dmat_cuts.Ptrs(), adapter_cuts.Ptrs());
|
||||
EXPECT_EQ(dmat_cuts.MinValues(), adapter_cuts.MinValues());
|
||||
|
||||
ValidateBatchedCuts(adapter, num_bins, dmat.get());
|
||||
ValidateBatchedCuts(&ctx, adapter, num_bins, dmat.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -702,7 +712,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
.Device(DeviceOrd::CUDA(0))
|
||||
.GenerateArrayInterface(&storage);
|
||||
MetaInfo info;
|
||||
Context ctx;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto& h_weights = info.weights_.HostVector();
|
||||
if (with_group) {
|
||||
h_weights.resize(kGroups);
|
||||
@@ -731,7 +741,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
&sketch_container);
|
||||
|
||||
common::HistogramCuts cuts;
|
||||
sketch_container.MakeCuts(&cuts, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &cuts, info.IsColumnSplit());
|
||||
|
||||
auto dmat = GetDMatrixFromData(storage.HostVector(), kRows, kCols);
|
||||
if (with_group) {
|
||||
@@ -744,10 +754,9 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
ASSERT_EQ(cuts.Ptrs().size(), kCols + 1);
|
||||
ValidateCuts(cuts, dmat.get(), kBins);
|
||||
|
||||
auto cuda_ctx = MakeCUDACtx(0);
|
||||
if (with_group) {
|
||||
dmat->Info().weights_ = decltype(dmat->Info().weights_)(); // remove weight
|
||||
HistogramCuts non_weighted = DeviceSketch(&cuda_ctx, dmat.get(), kBins, 0);
|
||||
HistogramCuts non_weighted = DeviceSketch(&ctx, dmat.get(), kBins, 0);
|
||||
for (size_t i = 0; i < cuts.Values().size(); ++i) {
|
||||
ASSERT_EQ(cuts.Values()[i], non_weighted.Values()[i]);
|
||||
}
|
||||
@@ -773,7 +782,7 @@ void TestAdapterSketchFromWeights(bool with_group) {
|
||||
SketchContainer sketch_container{ft, kBins, kCols, kRows, DeviceOrd::CUDA(0)};
|
||||
AdapterDeviceSketch(adapter.Value(), kBins, info, std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_container);
|
||||
sketch_container.MakeCuts(&weighted, info.IsColumnSplit());
|
||||
sketch_container.MakeCuts(&ctx, &weighted, info.IsColumnSplit());
|
||||
ValidateCuts(weighted, dmat.get(), kBins);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
|
||||
HistogramCuts distributed_cuts;
|
||||
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
|
||||
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
|
||||
|
||||
// Generate cuts for single node environment
|
||||
collective::Finalize();
|
||||
@@ -117,7 +117,7 @@ void DoTestDistributedQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
|
||||
HistogramCuts single_node_cuts;
|
||||
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
|
||||
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
auto const& dptrs = distributed_cuts.Ptrs();
|
||||
@@ -220,7 +220,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
}
|
||||
|
||||
sketch_distributed.MakeCuts(m->Info(), &distributed_cuts);
|
||||
sketch_distributed.MakeCuts(&ctx, m->Info(), &distributed_cuts);
|
||||
}
|
||||
|
||||
// Generate cuts for single node environment
|
||||
@@ -243,7 +243,7 @@ void DoTestColSplitQuantile(size_t rows, size_t cols) {
|
||||
}
|
||||
}
|
||||
|
||||
sketch_on_single_node.MakeCuts(m->Info(), &single_node_cuts);
|
||||
sketch_on_single_node.MakeCuts(&ctx, m->Info(), &single_node_cuts);
|
||||
}
|
||||
|
||||
auto const& sptrs = single_node_cuts.Ptrs();
|
||||
|
||||
@@ -370,6 +370,7 @@ void TestAllReduceBasic() {
|
||||
constexpr size_t kRows = 1000, kCols = 100;
|
||||
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
auto ctx = MakeCUDACtx(device.ordinal);
|
||||
|
||||
// Set up single node version;
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
@@ -413,7 +414,7 @@ void TestAllReduceBasic() {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.AllReduce(&ctx, false);
|
||||
sketch_distributed.Unique();
|
||||
|
||||
ASSERT_EQ(sketch_distributed.ColumnsPtr().size(),
|
||||
@@ -517,6 +518,7 @@ void TestSameOnAllWorkers() {
|
||||
MetaInfo const &info) {
|
||||
auto const rank = collective::GetRank();
|
||||
auto const device = DeviceOrd::CUDA(GPUIDX);
|
||||
Context ctx = MakeCUDACtx(device.ordinal);
|
||||
HostDeviceVector<FeatureType> ft({}, device);
|
||||
SketchContainer sketch_distributed(ft, n_bins, kCols, kRows, device);
|
||||
HostDeviceVector<float> storage({}, device);
|
||||
@@ -528,7 +530,7 @@ void TestSameOnAllWorkers() {
|
||||
AdapterDeviceSketch(adapter.Value(), n_bins, info,
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
&sketch_distributed);
|
||||
sketch_distributed.AllReduce(false);
|
||||
sketch_distributed.AllReduce(&ctx, false);
|
||||
sketch_distributed.Unique();
|
||||
TestQuantileElemRank(device, sketch_distributed.Data(), sketch_distributed.ColumnsPtr(), true);
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_
|
||||
auto run = [&](auto rank) {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
config["federated_secure"] = false;
|
||||
config["federated_server_address"] = String(server_address);
|
||||
config["federated_world_size"] = world_size;
|
||||
config["federated_rank"] = rank;
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
* Copyright (c) 2017-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <xgboost/learner.h> // for Learner
|
||||
#include <xgboost/logging.h> // for LogCheck_NE, CHECK_NE, LogCheck_EQ
|
||||
#include <xgboost/objective.h> // for ObjFunction
|
||||
@@ -81,7 +82,9 @@ TEST(Learner, ParameterValidation) {
|
||||
|
||||
// whitespace
|
||||
learner->SetParam("tree method", "exact");
|
||||
EXPECT_THROW(learner->Configure(), dmlc::Error);
|
||||
EXPECT_THAT([&] { learner->Configure(); },
|
||||
::testing::ThrowsMessage<dmlc::Error>(
|
||||
::testing::HasSubstr(R"("tree method" contains whitespace)")));
|
||||
}
|
||||
|
||||
TEST(Learner, CheckGroup) {
|
||||
|
||||
@@ -19,14 +19,15 @@ auto ZeroParam() {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
inline GradientQuantiser DummyRoundingFactor() {
|
||||
inline GradientQuantiser DummyRoundingFactor(Context const* ctx) {
|
||||
thrust::device_vector<GradientPair> gpair(1);
|
||||
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
|
||||
return {dh::ToSpan(gpair), MetaInfo()};
|
||||
return {ctx, dh::ToSpan(gpair), MetaInfo()};
|
||||
}
|
||||
|
||||
thrust::device_vector<GradientPairInt64> ConvertToInteger(std::vector<GradientPairPrecise> x) {
|
||||
auto r = DummyRoundingFactor();
|
||||
thrust::device_vector<GradientPairInt64> ConvertToInteger(Context const* ctx,
|
||||
std::vector<GradientPairPrecise> x) {
|
||||
auto r = DummyRoundingFactor(ctx);
|
||||
std::vector<GradientPairInt64> y(x.size());
|
||||
for (std::size_t i = 0; i < x.size(); i++) {
|
||||
y[i] = r.ToFixedPoint(GradientPair(x[i]));
|
||||
@@ -41,11 +42,12 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
cuts_.cut_ptrs_.SetDevice(ctx.Device());
|
||||
cuts_.cut_values_.SetDevice(ctx.Device());
|
||||
cuts_.min_vals_.SetDevice(ctx.Device());
|
||||
thrust::device_vector<GradientPairInt64> feature_histogram{ConvertToInteger(feature_histogram_)};
|
||||
thrust::device_vector<GradientPairInt64> feature_histogram{
|
||||
ConvertToInteger(&ctx, feature_histogram_)};
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
auto d_feature_types = dh::ToSpan(feature_types);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitInputs input{1, 0, quantiser.ToFixedPoint(parent_sum_), dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -60,7 +62,7 @@ TEST_F(TestCategoricalSplitWithMissing, GPUHistEvaluator) {
|
||||
|
||||
evaluator.Reset(cuts_, dh::ToSpan(feature_types), feature_set.size(), param_, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
ASSERT_EQ(result.thresh, 1);
|
||||
this->CheckResult(result.loss_chg, result.findex, result.fvalue, result.is_cat,
|
||||
@@ -90,7 +92,7 @@ TEST(GpuHist, PartitionBasic) {
|
||||
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
|
||||
cuts.SetCategorical(true, max_cat);
|
||||
d_feature_types = dh::ToSpan(feature_types);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitSharedInputs shared_inputs{
|
||||
param,
|
||||
quantiser,
|
||||
@@ -108,10 +110,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
|
||||
@@ -122,10 +124,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-7.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-3.0, 1.0}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
|
||||
@@ -134,10 +136,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
{
|
||||
// All -1.0, gain from splitting should be 0.0
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-3.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
EvaluateSplitInputs input{2, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_FLOAT_EQ(result.loss_chg, 0.0f);
|
||||
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
|
||||
@@ -147,10 +149,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// value
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 6.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}});
|
||||
EvaluateSplitInputs input{3, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
@@ -160,10 +162,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-3.0, 1.0}, {-1.0, 1.0}});
|
||||
EvaluateSplitInputs input{4, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.dir, kLeftDir);
|
||||
EXPECT_EQ(cats, std::bitset<32>("10100000000000000000000000000000"));
|
||||
@@ -173,10 +175,10 @@ TEST(GpuHist, PartitionBasic) {
|
||||
// -1.0s go right
|
||||
// -3.0s go left
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-5.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger({{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-3.0, 1.0}, {-1.0, 1.0}, {-3.0, 1.0}});
|
||||
EvaluateSplitInputs input{5, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(cats, std::bitset<32>("01000000000000000000000000000000"));
|
||||
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
|
||||
@@ -205,7 +207,7 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
|
||||
cuts.SetCategorical(true, max_cat);
|
||||
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
quantiser,
|
||||
d_feature_types,
|
||||
@@ -220,10 +222,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger(
|
||||
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
EvaluateSplitInputs input{0, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(cats, std::bitset<32>("11000000000000000000000000000000"));
|
||||
@@ -233,10 +235,10 @@ TEST(GpuHist, PartitionTwoFeatures) {
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
auto feature_histogram = ConvertToInteger(
|
||||
{{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
|
||||
&ctx, {{-2.0, 1.0}, {-2.0, 1.0}, {-2.0, 1.0}, {-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
auto cats = std::bitset<32>(evaluator.GetHostNodeCats(input.nidx)[0]);
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(cats, std::bitset<32>("10000000000000000000000000000000"));
|
||||
@@ -266,7 +268,7 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
*std::max_element(cuts.cut_values_.HostVector().begin(), cuts.cut_values_.HostVector().end());
|
||||
cuts.SetCategorical(true, max_cat);
|
||||
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
quantiser,
|
||||
d_feature_types,
|
||||
@@ -283,15 +285,16 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
{
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{-6.0, 3.0});
|
||||
auto feature_histogram_a = ConvertToInteger(
|
||||
{{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
&ctx, {{-1.0, 1.0}, {-2.5, 1.0}, {-2.5, 1.0}, {-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
thrust::device_vector<EvaluateSplitInputs> inputs(2);
|
||||
inputs[0] = EvaluateSplitInputs{0, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram_a)};
|
||||
auto feature_histogram_b = ConvertToInteger({{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
auto feature_histogram_b = ConvertToInteger(&ctx, {{-1.0, 1.0}, {-1.0, 1.0}, {-4.0, 1.0}});
|
||||
inputs[1] = EvaluateSplitInputs{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram_b)};
|
||||
thrust::device_vector<GPUExpandEntry> results(2);
|
||||
evaluator.EvaluateSplits({0, 1}, 1, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(results));
|
||||
evaluator.EvaluateSplits(&ctx, {0, 1}, 1, dh::ToSpan(inputs), shared_inputs,
|
||||
dh::ToSpan(results));
|
||||
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(0)[0]),
|
||||
std::bitset<32>("10000000000000000000000000000000"));
|
||||
EXPECT_EQ(std::bitset<32>(evaluator.GetHostNodeCats(1)[0]),
|
||||
@@ -301,7 +304,7 @@ TEST(GpuHist, PartitionTwoNodes) {
|
||||
|
||||
void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -311,7 +314,8 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram =
|
||||
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
common::Span<FeatureType> d_feature_types;
|
||||
@@ -336,7 +340,7 @@ void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, false,
|
||||
ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
if (is_categorical) {
|
||||
@@ -352,7 +356,8 @@ TEST(GpuHist, EvaluateSingleSplit) { TestEvaluateSingleSplit(false); }
|
||||
TEST(GpuHist, EvaluateSingleCategoricalSplit) { TestEvaluateSingleSplit(true); }
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{1.0, 1.5});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -361,7 +366,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2};
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0};
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}});
|
||||
auto feature_histogram = ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -373,7 +378,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_set.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
EXPECT_EQ(result.fvalue, 1.0);
|
||||
@@ -383,14 +388,15 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitEmpty) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUHistEvaluator evaluator(tparam, 1, FstCU());
|
||||
DeviceSplitCandidate result =
|
||||
evaluator
|
||||
.EvaluateSingleSplit(
|
||||
EvaluateSplitInputs{},
|
||||
&ctx, EvaluateSplitInputs{},
|
||||
EvaluateSplitSharedInputs{
|
||||
GPUTrainingParam(tparam), DummyRoundingFactor(), {}, {}, {}, {}, false})
|
||||
GPUTrainingParam(tparam), DummyRoundingFactor(&ctx), {}, {}, {}, {}, false})
|
||||
.split;
|
||||
EXPECT_EQ(result.findex, -1);
|
||||
EXPECT_LT(result.loss_chg, 0.0f);
|
||||
@@ -398,7 +404,8 @@ TEST(GpuHist, EvaluateSingleSplitEmpty) {
|
||||
|
||||
// Feature 0 has a better split, but the algorithm must select feature 1
|
||||
TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.UpdateAllowUnknown(Args{});
|
||||
@@ -408,7 +415,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
||||
auto feature_histogram = ConvertToInteger({{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
auto feature_histogram =
|
||||
ConvertToInteger(&ctx, {{-10.0, 0.5}, {10.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -420,7 +428,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1);
|
||||
EXPECT_EQ(result.fvalue, 11.0);
|
||||
@@ -430,7 +438,8 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
|
||||
// Features 0 and 1 have identical gain, the algorithm must select 0
|
||||
TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.UpdateAllowUnknown(Args{});
|
||||
@@ -440,7 +449,8 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
thrust::device_vector<uint32_t> feature_segments = std::vector<bst_row_t>{0, 2, 4};
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 10.0};
|
||||
auto feature_histogram = ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
auto feature_histogram =
|
||||
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram)};
|
||||
EvaluateSplitSharedInputs shared_inputs{param,
|
||||
@@ -452,15 +462,16 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
false};
|
||||
|
||||
GPUHistEvaluator evaluator(tparam, feature_min_values.size(), FstCU());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 0);
|
||||
EXPECT_EQ(result.fvalue, 1.0);
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSplits) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(2);
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
tparam.UpdateAllowUnknown(Args{});
|
||||
@@ -471,9 +482,9 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
thrust::device_vector<float> feature_values = std::vector<float>{1.0, 2.0, 11.0, 12.0};
|
||||
thrust::device_vector<float> feature_min_values = std::vector<float>{0.0, 0.0};
|
||||
auto feature_histogram_left =
|
||||
ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram_right =
|
||||
ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}, {-0.5, 0.5}, {0.5, 0.5}});
|
||||
EvaluateSplitInputs input_left{1, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
dh::ToSpan(feature_histogram_left)};
|
||||
EvaluateSplitInputs input_right{2, 0, parent_sum, dh::ToSpan(feature_set),
|
||||
@@ -514,7 +525,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
|
||||
evaluator.Reset(cuts_, dh::ToSpan(ft), info_.num_col_, param_, false, ctx.Device());
|
||||
|
||||
// Convert the sample histogram to fixed point
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
thrust::host_vector<GradientPairInt64> h_hist;
|
||||
for (auto e : hist_[0]) {
|
||||
h_hist.push_back(quantiser.ToFixedPoint(e));
|
||||
@@ -531,7 +542,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
|
||||
cuts_.cut_values_.ConstDeviceSpan(),
|
||||
cuts_.min_vals_.ConstDeviceSpan(),
|
||||
false};
|
||||
auto split = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
auto split = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
ASSERT_NEAR(split.loss_chg, best_score_, 1e-2);
|
||||
}
|
||||
|
||||
@@ -541,7 +552,7 @@ namespace {
|
||||
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
auto ctx = MakeCUDACtx(GPUIDX);
|
||||
auto rank = collective::GetRank();
|
||||
auto quantiser = DummyRoundingFactor();
|
||||
auto quantiser = DummyRoundingFactor(&ctx);
|
||||
auto parent_sum = quantiser.ToFixedPoint(GradientPairPrecise{0.0, 1.0});
|
||||
TrainParam tparam = ZeroParam();
|
||||
GPUTrainingParam param{tparam};
|
||||
@@ -552,8 +563,8 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
thrust::device_vector<bst_feature_t> feature_set = std::vector<bst_feature_t>{0, 1};
|
||||
|
||||
// Setup gradients so that second feature gets higher gain
|
||||
auto feature_histogram = rank == 0 ? ConvertToInteger({{-0.5, 0.5}, {0.5, 0.5}})
|
||||
: ConvertToInteger({{-1.0, 0.5}, {1.0, 0.5}});
|
||||
auto feature_histogram = rank == 0 ? ConvertToInteger(&ctx, {{-0.5, 0.5}, {0.5, 0.5}})
|
||||
: ConvertToInteger(&ctx, {{-1.0, 0.5}, {1.0, 0.5}});
|
||||
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(), FeatureType::kCategorical);
|
||||
common::Span<FeatureType> d_feature_types;
|
||||
@@ -576,7 +587,7 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
|
||||
|
||||
GPUHistEvaluator evaluator{tparam, static_cast<bst_feature_t>(feature_set.size()), ctx.Device()};
|
||||
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(input, shared_inputs).split;
|
||||
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
|
||||
|
||||
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
|
||||
if (is_categorical) {
|
||||
|
||||
@@ -37,7 +37,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
FeatureGroups feature_groups(page->Cuts(), page->is_dense, shm_size,
|
||||
sizeof(GradientPairInt64));
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_histogram, quantiser);
|
||||
@@ -51,7 +51,7 @@ void TestDeterministicHistogram(bool is_dense, int shm_size) {
|
||||
dh::device_vector<GradientPairInt64> new_histogram(num_bins);
|
||||
auto d_new_histogram = dh::ToSpan(new_histogram);
|
||||
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(FstCU()),
|
||||
feature_groups.DeviceAccessor(FstCU()), gpair.DeviceSpan(), ridx,
|
||||
d_new_histogram, quantiser);
|
||||
@@ -129,7 +129,7 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
dh::device_vector<GradientPairInt64> cat_hist(num_categories);
|
||||
auto gpair = GenerateRandomGradients(kRows, 0, 2);
|
||||
gpair.SetDevice(DeviceOrd::CUDA(0));
|
||||
auto quantiser = GradientQuantiser(gpair.DeviceSpan(), MetaInfo());
|
||||
auto quantiser = GradientQuantiser(&ctx, gpair.DeviceSpan(), MetaInfo());
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
|
||||
@@ -181,7 +181,7 @@ void TestSyncHist(bool is_distributed) {
|
||||
|
||||
histogram.Buffer().Reset(1, n_nodes, space, target_hists);
|
||||
// sync hist
|
||||
histogram.SyncHistogram(&tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
|
||||
histogram.SyncHistogram(&ctx, &tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
|
||||
|
||||
using GHistRowT = common::GHistRow;
|
||||
auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,
|
||||
@@ -266,7 +266,7 @@ void TestBuildHistogram(bool is_distributed, bool force_read_by_column, bool is_
|
||||
histogram.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
|
||||
linalg::MakeTensorView(&ctx, gpair, gpair.size()), force_read_by_column);
|
||||
}
|
||||
histogram.SyncHistogram(&tree, nodes_to_build, {});
|
||||
histogram.SyncHistogram(&ctx, &tree, nodes_to_build, {});
|
||||
|
||||
// Check if number of histogram bins is correct
|
||||
ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back());
|
||||
@@ -366,7 +366,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
|
||||
force_read_by_column);
|
||||
}
|
||||
cat_hist.SyncHistogram(&tree, nodes_to_build, {});
|
||||
cat_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
|
||||
|
||||
/**
|
||||
* Generate hist with one hot encoded data.
|
||||
@@ -382,7 +382,7 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
|
||||
force_read_by_column);
|
||||
}
|
||||
onehot_hist.SyncHistogram(&tree, nodes_to_build, {});
|
||||
onehot_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
|
||||
|
||||
auto cat = cat_hist.Histogram()[0];
|
||||
auto onehot = onehot_hist.Histogram()[0];
|
||||
@@ -451,7 +451,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
||||
force_read_by_column);
|
||||
++page_idx;
|
||||
}
|
||||
multi_build.SyncHistogram(&tree, nodes, {});
|
||||
multi_build.SyncHistogram(ctx, &tree, nodes, {});
|
||||
|
||||
multi_page = multi_build.Histogram()[RegTree::kRoot];
|
||||
}
|
||||
@@ -480,7 +480,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
|
||||
single_build.BuildHist(0, space, gmat, row_set_collection, nodes,
|
||||
linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),
|
||||
force_read_by_column);
|
||||
single_build.SyncHistogram(&tree, nodes, {});
|
||||
single_build.SyncHistogram(ctx, &tree, nodes, {});
|
||||
|
||||
single_page = single_build.Histogram()[RegTree::kRoot];
|
||||
}
|
||||
@@ -570,7 +570,7 @@ class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
|
||||
CHECK_NE(partitioners.front()[tree.RightChild(best.nid)].Size(), 0);
|
||||
|
||||
hist_builder.BuildHistLeftRight(
|
||||
Xy.get(), &tree, partitioners, valid_candidates,
|
||||
&ctx, Xy.get(), &tree, partitioners, valid_candidates,
|
||||
linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1), batch);
|
||||
|
||||
if (limit) {
|
||||
|
||||
@@ -111,7 +111,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
maker.hist.AllocateHistograms({0});
|
||||
|
||||
maker.gpair = gpair.DeviceSpan();
|
||||
maker.quantiser = std::make_unique<GradientQuantiser>(maker.gpair, MetaInfo());
|
||||
maker.quantiser = std::make_unique<GradientQuantiser>(&ctx, maker.gpair, MetaInfo());
|
||||
maker.page = page.get();
|
||||
|
||||
maker.InitFeatureGroupsOnce();
|
||||
@@ -162,12 +162,6 @@ HistogramCutsWrapper GetHostCutMatrix () {
|
||||
return cmat;
|
||||
}
|
||||
|
||||
inline GradientQuantiser DummyRoundingFactor() {
|
||||
thrust::device_vector<GradientPair> gpair(1);
|
||||
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
|
||||
return {dh::ToSpan(gpair), MetaInfo()};
|
||||
}
|
||||
|
||||
void TestHistogramIndexImpl() {
|
||||
// Test if the compressed histogram index matches when using a sparse
|
||||
// dmatrix with and without using external memory
|
||||
|
||||
Reference in New Issue
Block a user