Define the new device parameter. (#9362)

This commit is contained in:
Jiaming Yuan
2023-07-13 19:30:25 +08:00
committed by GitHub
parent 2d0cd2817e
commit 04aff3af8e
63 changed files with 827 additions and 477 deletions

View File

@@ -16,8 +16,7 @@
namespace xgboost {
namespace common {
void TestSegmentedArgSort() {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
size_t constexpr kElements = 100, kGroups = 3;
dh::device_vector<size_t> sorted_idx(kElements, 0);
@@ -55,8 +54,7 @@ void TestSegmentedArgSort() {
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
TEST(Algorithm, GpuArgSort) {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
dh::device_vector<float> values(20);
dh::Iota(dh::ToSpan(values)); // accending

View File

@@ -227,7 +227,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
}
// check categorical
beg = n_samples;
for (std::size_t i = 0; i < n_categories; ++i) {
for (bst_cat_t i = 0; i < n_categories; ++i) {
// all from the second column
ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);
}

View File

@@ -4,6 +4,7 @@
#include <gtest/gtest.h>
#include "../../../src/common/linalg_op.cuh"
#include "../helpers.h"
#include "xgboost/context.h"
#include "xgboost/linalg.h"
@@ -54,8 +55,7 @@ void TestElementWiseKernel() {
}
void TestSlice() {
Context ctx;
ctx.gpu_id = 1;
auto ctx = MakeCUDACtx(1);
thrust::device_vector<double> data(2 * 3 * 4);
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
dh::LaunchN(1, [=] __device__(size_t) {

View File

@@ -23,8 +23,7 @@
namespace xgboost::ltr {
void TestCalcQueriesInvIDCG() {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
std::size_t n_groups = 5, n_samples_per_group = 32;
dh::device_vector<float> scores(n_samples_per_group * n_groups);
@@ -85,20 +84,17 @@ void TestRankingCache(Context const* ctx) {
} // namespace
TEST(RankingCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestRankingCache(&ctx);
}
TEST(NDCGCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestNDCGCache(&ctx);
}
TEST(MAPCache, InitFromGPU) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestMAPCache(&ctx);
}
} // namespace xgboost::ltr

View File

@@ -7,6 +7,7 @@
#include "../../../src/common/stats.h"
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
#include "../helpers.h"
namespace xgboost {
namespace common {
@@ -71,7 +72,7 @@ TEST(Stats, Median) {
ASSERT_EQ(m, .5f);
#if defined(XGBOOST_USE_CUDA)
ctx.gpu_id = 0;
ctx = ctx.MakeCUDA(0);
ASSERT_FALSE(ctx.IsCPU());
Median(&ctx, values, weights, &out);
m = out(0);
@@ -80,7 +81,7 @@ TEST(Stats, Median) {
}
{
ctx.gpu_id = Context::kCpuId;
ctx = ctx.MakeCPU();
// 4x2 matrix
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
HostDeviceVector<float> weights;
@@ -90,7 +91,7 @@ TEST(Stats, Median) {
ASSERT_EQ(out(1), .5f);
#if defined(XGBOOST_USE_CUDA)
ctx.gpu_id = 0;
ctx = ctx.MakeCUDA(0);
Median(&ctx, values, weights, &out);
ASSERT_EQ(out(0), .5f);
ASSERT_EQ(out(1), .5f);
@@ -123,8 +124,7 @@ TEST(Stats, Mean) {
#if defined(XGBOOST_USE_CUDA)
TEST(Stats, GPUMean) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto ctx = MakeCUDACtx(0);
TestMean(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)

View File

@@ -3,16 +3,17 @@
*/
#include <gtest/gtest.h>
#include <cstddef> // std::size_t
#include <utility> // std::pair
#include <vector> // std::vector
#include <cstddef> // std::size_t
#include <utility> // std::pair
#include <vector> // std::vector
#include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice
#include "../../../src/common/stats.cuh"
#include "xgboost/base.h" // XGBOOST_DEVICE
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // Tensor
#include "../helpers.h"
#include "xgboost/base.h" // XGBOOST_DEVICE
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // Tensor
namespace xgboost {
namespace common {
@@ -33,7 +34,7 @@ class StatsGPU : public ::testing::Test {
}
public:
void SetUp() override { ctx_.gpu_id = 0; }
void SetUp() override { ctx_ = MakeCUDACtx(0); }
void WeightedMulti() {
// data for one segment

View File

@@ -171,8 +171,7 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
ASSERT_TRUE(Xy->SingleColBlock());
bst_bin_t constexpr kBins{17};
auto p = BatchParam{kBins, threshold};
Context gpu_ctx;
gpu_ctx.gpu_id = 0;
auto gpu_ctx = MakeCUDACtx(0);
for (auto const &page : Xy->GetBatches<EllpackPage>(
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);

View File

@@ -180,7 +180,12 @@ TEST(GBTree, ChooseTreeMethod) {
learner->SetParam("tree_method", tree_method.value());
}
if (device.has_value()) {
learner->SetParam("gpu_id", device.value());
auto const& d = device.value();
if (std::isdigit(d.front()) || d.front() == '-') {
learner->SetParam("gpu_id", d);
} else {
learner->SetParam("device", d);
}
}
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
@@ -199,7 +204,12 @@ TEST(GBTree, ChooseTreeMethod) {
learner->SetParam("tree_method", tree_method.value());
}
if (device.has_value()) {
learner->SetParam("gpu_id", device.value());
auto const& d = device.value();
if (std::isdigit(d.front()) || d.front() == '-') {
learner->SetParam("gpu_id", d);
} else {
learner->SetParam("device", d);
}
}
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
@@ -215,11 +225,12 @@ TEST(GBTree, ChooseTreeMethod) {
// | | hist | gpu_hist | exact | NA |
// |--------+---------+----------+-------+-----|
// | CUDA:0 | GPU | GPU (w) | Err | GPU | # not yet tested
// | CPU | CPU | Err | CPU | CPU | # not yet tested
// | CUDA:0 | GPU | GPU (w) | Err | GPU |
// | CPU | CPU | GPU (w) | CPU | CPU |
// |--------+---------+----------+-------+-----|
// | -1 | CPU | GPU (w) | CPU | CPU |
// | 0 | GPU | GPU (w) | Err | GPU |
// |--------+---------+----------+-------+-----|
// | NA | CPU | GPU (w) | CPU | CPU |
//
// - (w): warning
@@ -237,18 +248,30 @@ TEST(GBTree, ChooseTreeMethod) {
// hist
{{"hist", "-1"}, "grow_quantile_histmaker"},
{{"hist", "0"}, "grow_gpu_hist"},
{{"hist", "cpu"}, "grow_quantile_histmaker"},
{{"hist", "cuda"}, "grow_gpu_hist"},
{{"hist", "cuda:0"}, "grow_gpu_hist"},
{{"hist", std::nullopt}, "grow_quantile_histmaker"},
// gpu_hist
{{"gpu_hist", "-1"}, "grow_gpu_hist"},
{{"gpu_hist", "0"}, "grow_gpu_hist"},
{{"gpu_hist", "cpu"}, "grow_gpu_hist"},
{{"gpu_hist", "cuda"}, "grow_gpu_hist"},
{{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
{{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
// exact
{{"exact", "-1"}, "grow_colmaker,prune"},
{{"exact", "0"}, "err"},
{{"exact", "cpu"}, "grow_colmaker,prune"},
{{"exact", "cuda"}, "err"},
{{"exact", "cuda:0"}, "err"},
{{"exact", std::nullopt}, "grow_colmaker,prune"},
// NA
{{std::nullopt, "-1"}, "grow_quantile_histmaker"},
{{std::nullopt, "0"}, "grow_gpu_hist"}, // default to hist
{{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
{{std::nullopt, "cuda"}, "grow_gpu_hist"},
{{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
{{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
};
@@ -392,8 +415,7 @@ class Dart : public testing::TestWithParam<char const*> {
for (size_t i = 0; i < 16; ++i) {
learner->UpdateOneIter(i, p_mat);
}
ConfigLearnerByCtx(&ctx, learner.get());
learner->SetParam("device", ctx.DeviceName());
HostDeviceVector<float> predts_training;
learner->Predict(p_mat, false, &predts_training, 0, 0, true);
@@ -654,8 +676,7 @@ TEST(GBTree, InplacePredictionError) {
RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(
"cache", true);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParam("booster", booster);
ConfigLearnerByCtx(ctx, learner.get());
learner->SetParams(Args{{"booster", booster}, {"device", ctx->DeviceName()}});
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, p_fmat);
@@ -697,9 +718,9 @@ TEST(GBTree, InplacePredictionError) {
#endif // defined(XGBOOST_USE_CUDA)
};
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParam("booster", booster);
learner->SetParam("max_bin", std::to_string(max_bins));
ConfigLearnerByCtx(ctx, learner.get());
learner->SetParams(Args{{"booster", booster},
{"max_bin", std::to_string(max_bins)},
{"device", ctx->DeviceName()}});
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, p_fmat);

View File

@@ -8,6 +8,7 @@
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <string> // for string
#include <thread> // for thread
#include "../../../src/data/adapter.h" // for ArrayAdapter
#include "../../../src/data/device_adapter.cuh" // for CupyAdapter
@@ -41,7 +42,7 @@ void TestInplaceFallback(Context const* ctx) {
// learner is configured to the device specified by ctx
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
ConfigLearnerByCtx(ctx, learner.get());
learner->SetParam("device", ctx->DeviceName());
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, Xy);
}
@@ -56,18 +57,31 @@ void TestInplaceFallback(Context const* ctx) {
HostDeviceVector<float>* out_predt{nullptr};
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
std::string output;
// test whether the warning is raised
#if !defined(_WIN32)
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
// might be related to https://github.com/dmlc/xgboost/issues/5793
::testing::internal::CaptureStderr();
std::thread{[&] {
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
// warning by using thread-local flags.
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
}}.join();
output = testing::internal::GetCapturedStderr();
ASSERT_NE(output.find("Falling back"), std::string::npos);
#endif
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
auto output = testing::internal::GetCapturedStderr();
ASSERT_NE(output.find("Falling back"), std::string::npos);
// test when the contexts match
Context new_ctx = *proxy->Ctx();
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
ConfigLearnerByCtx(&new_ctx, learner.get());
learner->SetParam("device", new_ctx.DeviceName());
HostDeviceVector<float>* out_predt_1{nullptr};
// no warning is raised
::testing::internal::CaptureStderr();

View File

@@ -559,16 +559,4 @@ class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
}
}
};
// A temporary solution before we move away from gpu_id.
inline void ConfigLearnerByCtx(Context const* ctx, Learner* learner) {
if (ctx->IsCPU()) {
learner->SetParam("tree_method", "hist");
} else {
learner->SetParam("tree_method", "gpu_hist");
}
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
learner->Configure();
ASSERT_EQ(learner->Ctx()->gpu_id, ctx->gpu_id);
}
} // namespace xgboost

View File

@@ -46,7 +46,6 @@ inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device)
inline void TestMultiClassError(int device, DataSplitMode data_split_mode) {
auto ctx = MakeCUDACtx(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "merror");
@@ -67,7 +66,6 @@ inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode:
inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) {
auto ctx = MakeCUDACtx(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
metric->Configure({});
ASSERT_STREQ(metric->Name(), "mlogloss");

View File

@@ -13,26 +13,22 @@
namespace xgboost::obj {
TEST(LambdaRank, GPUNDCGJsonIO) {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
TestNDCGJsonIO(&ctx);
}
TEST(LambdaRank, GPUMAPStat) {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
TestMAPStat(&ctx);
}
TEST(LambdaRank, GPUNDCGGPair) {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
TestNDCGGPair(&ctx);
}
void TestGPUMakePair() {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
MetaInfo info;
HostDeviceVector<float> predt;
@@ -126,8 +122,7 @@ void TestGPUMakePair() {
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
TEST(LambdaRank, GPUUnbiasedNDCG) {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
TestUnbiasedNDCG(&ctx);
}
@@ -161,8 +156,7 @@ TEST(LambdaRank, RankItemCountOnRight) {
}
TEST(LambdaRank, GPUMAPGPair) {
Context ctx;
ctx.gpu_id = 0;
auto ctx = MakeCUDACtx(0);
TestMAPGPair(&ctx);
}
} // namespace xgboost::obj

View File

@@ -305,12 +305,12 @@ TEST(Objective, CPU_vs_CUDA) {
{
// CPU
ctx.gpu_id = -1;
ctx = ctx.MakeCPU();
obj->GetGradient(preds, info, 0, &cpu_out_preds);
}
{
// CUDA
ctx.gpu_id = 0;
ctx = ctx.MakeCUDA(0);
obj->GetGradient(preds, info, 0, &cuda_out_preds);
}

View File

@@ -148,7 +148,7 @@ TEST(Plugin, CPUvsOneAPI) {
{
// CPU
ctx.gpu_id = -1;
ctx = ctx.MakeCPU();
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
}
{

View File

@@ -214,15 +214,16 @@ void TestUpdatePredictionCache(bool use_subsampling) {
}
} // namespace
TEST(CPUPredictor, GHistIndex) {
TEST(CPUPredictor, GHistIndexTraining) {
size_t constexpr kRows{128}, kCols{16}, kBins{64};
Context ctx;
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix(false);
HostDeviceVector<float> storage(kRows * kCols);
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(columnar.c_str());
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(kRows, kBins, "hist", p_full, p_hist);
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
}
TEST(CPUPredictor, CategoricalPrediction) {

View File

@@ -33,9 +33,8 @@ TEST(GPUPredictor, Basic) {
int n_row = i, n_col = i;
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
auto ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
@@ -71,7 +70,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
@@ -102,7 +101,7 @@ TEST(GPUPredictor, MGPUBasicColumnSplit) {
size_t n_row = i, n_col = i;
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch
@@ -132,18 +131,19 @@ TEST(GPUPredictor, EllpackBasic) {
}
TEST(GPUPredictor, EllpackTraining) {
size_t constexpr kRows { 128 }, kCols { 16 }, kBins { 64 };
auto p_ellpack =
RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).Device(0).GenerateDeviceDMatrix(false);
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows{128}, kCols{16}, kBins{64};
auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
.Bins(kBins)
.Device(ctx.Ordinal())
.GenerateDeviceDMatrix(false);
HostDeviceVector<float> storage(kRows * kCols);
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}
.Device(0)
.GenerateArrayInterface(&storage);
auto columnar =
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
auto adapter = data::CupyAdapter(columnar);
std::shared_ptr<DMatrix> p_full {
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)
};
TestTrainingPrediction(kRows, kBins, "gpu_hist", p_full, p_ellpack);
std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_ellpack);
}
TEST(GPUPredictor, ExternalMemoryTest) {
@@ -153,9 +153,8 @@ TEST(GPUPredictor, ExternalMemoryTest) {
gpu_predictor->Configure({});
const int n_classes = 3;
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
Context ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
std::vector<std::unique_ptr<DMatrix>> dmats;
@@ -185,7 +184,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows{128}, kCols{64};
RandomDataGenerator gen(kRows, kCols, 0.5);
gen.Device(ctx.gpu_id);
gen.Device(ctx.Ordinal());
HostDeviceVector<float> data;
std::string interface_str = gen.GenerateArrayInterface(&data);
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@@ -197,7 +196,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
auto ctx = MakeCUDACtx(0);
size_t constexpr kRows{128}, kCols{64};
RandomDataGenerator gen(kRows, kCols, 0.5);
gen.Device(ctx.gpu_id);
gen.Device(ctx.Ordinal());
std::vector<HostDeviceVector<float>> storage(kCols);
auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@@ -214,9 +213,8 @@ TEST(GpuPredictor, LesserFeatures) {
TEST(GPUPredictor, ShapStump) {
cudaSetDevice(0);
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
auto ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees;
@@ -241,9 +239,8 @@ TEST(GPUPredictor, ShapStump) {
}
TEST(GPUPredictor, Shap) {
Context ctx;
ctx.gpu_id = 0;
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
auto ctx = MakeCUDACtx(0);
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees;

View File

@@ -44,60 +44,49 @@ TEST(Predictor, PredictionCache) {
EXPECT_ANY_THROW(container.Entry(m));
}
void TestTrainingPrediction(size_t rows, size_t bins,
std::string tree_method,
std::shared_ptr<DMatrix> p_full,
std::shared_ptr<DMatrix> p_hist) {
void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {
size_t constexpr kCols = 16;
size_t constexpr kClasses = 3;
size_t constexpr kIters = 3;
std::unique_ptr<Learner> learner;
auto train = [&](Context const& ctx) {
p_hist->Info().labels.Reshape(rows, 1);
auto &h_label = p_hist->Info().labels.Data()->HostVector();
for (size_t i = 0; i < rows; ++i) {
h_label[i] = i % kClasses;
}
p_hist->Info().labels.Reshape(rows, 1);
auto &h_label = p_hist->Info().labels.Data()->HostVector();
learner.reset(Learner::Create({}));
learner->SetParam("tree_method", tree_method);
learner->SetParam("objective", "multi:softprob");
learner->SetParam("num_feature", std::to_string(kCols));
learner->SetParam("num_class", std::to_string(kClasses));
learner->SetParam("max_bin", std::to_string(bins));
ConfigLearnerByCtx(&ctx, learner.get());
learner->Configure();
for (size_t i = 0; i < rows; ++i) {
h_label[i] = i % kClasses;
}
for (size_t i = 0; i < kIters; ++i) {
learner->UpdateOneIter(i, p_hist);
}
learner.reset(Learner::Create({}));
learner->SetParams(Args{{"objective", "multi:softprob"},
{"num_feature", std::to_string(kCols)},
{"num_class", std::to_string(kClasses)},
{"max_bin", std::to_string(bins)},
{"device", ctx->DeviceName()}});
learner->Configure();
Json model{Object{}};
learner->SaveModel(&model);
for (size_t i = 0; i < kIters; ++i) {
learner->UpdateOneIter(i, p_hist);
}
learner.reset(Learner::Create({}));
learner->LoadModel(model);
ConfigLearnerByCtx(&ctx, learner.get());
learner->Configure();
Json model{Object{}};
learner->SaveModel(&model);
HostDeviceVector<float> from_full;
learner->Predict(p_full, false, &from_full, 0, 0);
learner.reset(Learner::Create({}));
learner->LoadModel(model);
learner->SetParam("device", ctx->DeviceName());
learner->Configure();
HostDeviceVector<float> from_hist;
learner->Predict(p_hist, false, &from_hist, 0, 0);
HostDeviceVector<float> from_full;
learner->Predict(p_full, false, &from_full, 0, 0);
for (size_t i = 0; i < rows; ++i) {
EXPECT_NEAR(from_hist.ConstHostVector()[i],
from_full.ConstHostVector()[i], kRtEps);
}
};
HostDeviceVector<float> from_hist;
learner->Predict(p_hist, false, &from_hist, 0, 0);
if (tree_method == "gpu_hist") {
train(MakeCUDACtx(0));
} else {
train(Context{});
for (size_t i = 0; i < rows; ++i) {
EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);
}
}
@@ -120,7 +109,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
learner->UpdateOneIter(it, m);
}
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
learner->SetParam("device", ctx->DeviceName());
learner->Configure();
HostDeviceVector<float> *p_out_predictions_0{nullptr};
@@ -153,7 +142,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - 0.5f, kRtEps);
}
learner->SetParam("gpu_id", "-1");
learner->SetParam("device", "cpu");
learner->Configure();
}
@@ -161,12 +150,12 @@ namespace {
std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMatrix> dmat,
size_t iters, size_t forest = 1) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParams(Args{{"num_parallel_tree", std::to_string(forest)}});
learner->SetParams(
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
for (size_t i = 0; i < iters; ++i) {
learner->UpdateOneIter(i, dmat);
}
ConfigLearnerByCtx(ctx, learner.get());
return learner;
}
@@ -215,7 +204,7 @@ void TestPredictionDeviceAccess() {
{
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
Context cpu_ctx;
ConfigLearnerByCtx(&cpu_ctx, learner.get());
learner->SetParam("device", cpu_ctx.DeviceName());
learner->Predict(m_test, false, &from_cpu, 0, 0);
ASSERT_TRUE(from_cpu.HostCanWrite());
ASSERT_FALSE(from_cpu.DeviceCanRead());
@@ -225,7 +214,7 @@ void TestPredictionDeviceAccess() {
HostDeviceVector<float> from_cuda;
{
Context cuda_ctx = MakeCUDACtx(0);
ConfigLearnerByCtx(&cuda_ctx, learner.get());
learner->SetParam("device", cuda_ctx.DeviceName());
learner->Predict(m_test, false, &from_cuda, 0, 0);
ASSERT_EQ(from_cuda.DeviceIdx(), 0);
ASSERT_TRUE(from_cuda.DeviceCanWrite());
@@ -465,11 +454,7 @@ void TestIterationRangeColumnSplit(Context const* ctx) {
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
if (ctx->IsCPU()) {
learner->SetParams(Args{{"gpu_id", std::to_string(-1)}});
} else {
learner->SetParams(Args{{"gpu_id", std::to_string(0)}});
}
learner->SetParam("device", ctx->DeviceName());
bool bound = false;
std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};
@@ -582,7 +567,7 @@ void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
learner.reset(Learner::Create({Xy}));
learner->LoadModel(model);
ConfigLearnerByCtx(ctx, learner.get());
learner->SetParam("device", ctx->DeviceName());
learner->Predict(Xy, false, &sparse_predt, 0, 0);
auto constexpr kWorldSize = 2;

View File

@@ -84,9 +84,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
}
// p_full and p_hist should come from the same data set.
void TestTrainingPrediction(size_t rows, size_t bins, std::string tree_method,
std::shared_ptr<DMatrix> p_full,
std::shared_ptr<DMatrix> p_hist);
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
bst_feature_t cols);

31
tests/cpp/test_context.cc Normal file
View File

@@ -0,0 +1,31 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
#include <xgboost/context.h>
namespace xgboost {
TEST(Context, CPU) {
Context ctx;
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
std::int32_t flag{0};
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
ASSERT_EQ(flag, -1);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "oops"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "-1"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CUDA"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU:0"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:+0"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:0-"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
}
} // namespace xgboost

99
tests/cpp/test_context.cu Normal file
View File

@@ -0,0 +1,99 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args
#include <xgboost/context.h>
#include <xgboost/json.h> // for FromJson, ToJson
#include <string> // for string, to_string
#include "../../src/common/common.h" // for AllVisibleGPUs
namespace xgboost {
namespace {
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
ASSERT_EQ(ctx.gpu_id, ord);
ASSERT_EQ(ctx.Device().ordinal, ord);
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
ASSERT_EQ(ctx.Ordinal(), ord);
ASSERT_TRUE(ctx.IsCUDA());
ASSERT_FALSE(ctx.IsCPU());
ASSERT_EQ(ctx.Device(), DeviceOrd::CUDA(ord));
Json jctx{ToJson(ctx)};
Context new_ctx;
FromJson(jctx, &new_ctx);
ASSERT_EQ(new_ctx.Device(), ctx.Device());
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
}
} // namespace
TEST(Context, DeviceOrdinal) {
Context ctx;
auto n_vis = common::AllVisibleGPUs();
auto ord = n_vis - 1;
std::string device = "cuda:" + std::to_string(ord);
ctx.UpdateAllowUnknown(Args{{"device", device}});
TestCUDA(ctx, ord);
device = "cuda:" + std::to_string(1001);
ctx.UpdateAllowUnknown(Args{{"device", device}});
ord = 1001 % n_vis;
TestCUDA(ctx, ord);
std::int32_t flag{0};
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
ASSERT_EQ(flag, 1);
Context new_ctx = ctx;
TestCUDA(new_ctx, ctx.Ordinal());
auto cpu_ctx = ctx.MakeCPU();
ASSERT_TRUE(cpu_ctx.IsCPU());
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
TestCUDA(cuda_ctx, ctx.Ordinal());
cuda_ctx.UpdateAllowUnknown(Args{{"fail_on_invalid_gpu_id", "true"}});
ASSERT_THROW({ cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:9999"}}); }, dmlc::Error);
cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:00"}});
ASSERT_EQ(cuda_ctx.Ordinal(), 0);
ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
// Test alias
ctx.UpdateAllowUnknown(Args{{"device", "gpu:0"}});
TestCUDA(ctx, 0);
ctx.UpdateAllowUnknown(Args{{"device", "gpu"}});
TestCUDA(ctx, 0);
// Test the thread local memory in dmlc is not linking different instances together.
cpu_ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
TestCUDA(ctx, 0);
ctx.UpdateAllowUnknown(Args{});
TestCUDA(ctx, 0);
}
TEST(Context, GPUId) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestCUDA(ctx, 0);
auto n_vis = common::AllVisibleGPUs();
auto ord = n_vis - 1;
ctx.UpdateAllowUnknown(Args{{"gpu_id", std::to_string(ord)}});
TestCUDA(ctx, ord);
auto device = "cuda:" + std::to_string(1001);
ctx.UpdateAllowUnknown(Args{{"device", device}});
ord = 1001 % n_vis;
TestCUDA(ctx, ord);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "-1"}});
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
}
} // namespace xgboost

View File

@@ -27,7 +27,6 @@
#include "../../src/common/io.h" // for LoadSequentialFile
#include "../../src/common/linalg_op.h" // for ElementWiseTransformHost, begin, end
#include "../../src/common/random.h" // for GlobalRandom
#include "../../src/common/transform_iterator.h" // for IndexTransformIter
#include "dmlc/io.h" // for Stream
#include "dmlc/omp.h" // for omp_get_max_threads
#include "dmlc/registry.h" // for Registry
@@ -35,14 +34,13 @@
#include "helpers.h" // for GetBaseScore, RandomDataGenerator
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
#include "xgboost/base.h" // for bst_float, Args, bst_feature_t, bst_int
#include "xgboost/context.h" // for Context
#include "xgboost/context.h" // for Context, DeviceOrd
#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType
#include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, Object, get, String, IsA, opera...
#include "xgboost/linalg.h" // for Tensor, TensorView
#include "xgboost/logging.h" // for ConsoleLogger
#include "xgboost/predictor.h" // for PredictionCacheEntry
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
#include "xgboost/string_view.h" // for StringView
namespace xgboost {
@@ -58,9 +56,9 @@ TEST(Learner, Basic) {
auto minor = XGBOOST_VER_MINOR;
auto patch = XGBOOST_VER_PATCH;
static_assert(std::is_integral<decltype(major)>::value, "Wrong major version type");
static_assert(std::is_integral<decltype(minor)>::value, "Wrong minor version type");
static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
static_assert(std::is_integral_v<decltype(major)>, "Wrong major version type");
static_assert(std::is_integral_v<decltype(minor)>, "Wrong minor version type");
static_assert(std::is_integral_v<decltype(patch)>, "Wrong patch version type");
}
TEST(Learner, ParameterValidation) {
@@ -92,8 +90,7 @@ TEST(Learner, CheckGroup) {
size_t constexpr kNumRows = 17;
bst_feature_t constexpr kNumCols = 15;
std::shared_ptr<DMatrix> p_mat{
RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
std::shared_ptr<DMatrix> p_mat{RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
std::vector<bst_float> weight(kNumGroups, 1);
std::vector<bst_int> group(kNumGroups);
group[0] = 2;
@@ -312,35 +309,36 @@ TEST(Learner, GPUConfiguration) {
learner->SetParams({Arg{"booster", "gblinear"},
Arg{"updater", "gpu_coord_descent"}});
learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
}
{
std::unique_ptr<Learner> learner {Learner::Create(mat)};
std::unique_ptr<Learner> learner{Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "gpu_hist"}});
learner->Configure();
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
}
{
std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "gpu_hist"},
Arg{"gpu_id", "-1"}});
learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
}
{
// with CPU algorithm
std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "hist"}});
learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, -1);
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CPU());
}
{
// with CPU algorithm, but `gpu_id` takes priority
std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "hist"},
Arg{"gpu_id", "0"}});
learner->SetParams({Arg{"tree_method", "hist"}, Arg{"gpu_id", "0"}});
learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
}
}
#endif // defined(XGBOOST_USE_CUDA)

View File

@@ -6,7 +6,9 @@
#include <xgboost/task.h> // for ObjInfo
#include <xgboost/tree_updater.h> // for TreeUpdater
#include <memory> // for unique_ptr
#include <memory> // for unique_ptr
#include "../helpers.h"
namespace xgboost {
TEST(Updater, HasNodePosition) {
@@ -19,7 +21,7 @@ TEST(Updater, HasNodePosition) {
ASSERT_TRUE(up->HasNodePosition());
#if defined(XGBOOST_USE_CUDA)
ctx.gpu_id = 0;
ctx = MakeCUDACtx(0);
up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
ASSERT_TRUE(up->HasNodePosition());
#endif // defined(XGBOOST_USE_CUDA)

View File

@@ -70,9 +70,9 @@ class TestPredictionCache : public ::testing::Test {
Context ctx;
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
if (updater_name == "grow_gpu_hist") {
ctx.gpu_id = 0;
ctx = ctx.MakeCUDA(0);
} else {
ctx.gpu_id = Context::kCpuId;
ctx = ctx.MakeCPU();
}
ObjInfo task{ObjInfo::kRegression};