Define the new device parameter. (#9362)
This commit is contained in:
@@ -28,6 +28,7 @@ class LintersPaths:
|
||||
"tests/python-gpu/test_gpu_prediction.py",
|
||||
"tests/python-gpu/load_pickle.py",
|
||||
"tests/python-gpu/test_gpu_pickling.py",
|
||||
"tests/python-gpu/test_gpu_eval_metrics.py",
|
||||
"tests/test_distributed/test_with_spark/",
|
||||
"tests/test_distributed/test_gpu_with_spark/",
|
||||
# demo
|
||||
|
||||
@@ -16,8 +16,7 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
void TestSegmentedArgSort() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
size_t constexpr kElements = 100, kGroups = 3;
|
||||
dh::device_vector<size_t> sorted_idx(kElements, 0);
|
||||
@@ -55,8 +54,7 @@ void TestSegmentedArgSort() {
|
||||
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
|
||||
TEST(Algorithm, GpuArgSort) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
dh::device_vector<float> values(20);
|
||||
dh::Iota(dh::ToSpan(values)); // accending
|
||||
|
||||
@@ -227,7 +227,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
}
|
||||
// check categorical
|
||||
beg = n_samples;
|
||||
for (std::size_t i = 0; i < n_categories; ++i) {
|
||||
for (bst_cat_t i = 0; i < n_categories; ++i) {
|
||||
// all from the second column
|
||||
ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/common/linalg_op.cuh"
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
@@ -54,8 +55,7 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
|
||||
void TestSlice() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 1;
|
||||
auto ctx = MakeCUDACtx(1);
|
||||
thrust::device_vector<double> data(2 * 3 * 4);
|
||||
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
|
||||
dh::LaunchN(1, [=] __device__(size_t) {
|
||||
|
||||
@@ -23,8 +23,7 @@
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestCalcQueriesInvIDCG() {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::size_t n_groups = 5, n_samples_per_group = 32;
|
||||
|
||||
dh::device_vector<float> scores(n_samples_per_group * n_groups);
|
||||
@@ -85,20 +84,17 @@ void TestRankingCache(Context const* ctx) {
|
||||
} // namespace
|
||||
|
||||
TEST(RankingCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "../../../src/common/stats.h"
|
||||
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -71,7 +72,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(m, .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
Median(&ctx, values, weights, &out);
|
||||
m = out(0);
|
||||
@@ -80,7 +81,7 @@ TEST(Stats, Median) {
|
||||
}
|
||||
|
||||
{
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
ctx = ctx.MakeCPU();
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
HostDeviceVector<float> weights;
|
||||
@@ -90,7 +91,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
Median(&ctx, values, weights, &out);
|
||||
ASSERT_EQ(out(0), .5f);
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
@@ -123,8 +124,7 @@ TEST(Stats, Mean) {
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(Stats, GPUMean) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMean(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@@ -3,16 +3,17 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstddef> // std::size_t
|
||||
#include <utility> // std::pair
|
||||
#include <vector> // std::vector
|
||||
#include <cstddef> // std::size_t
|
||||
#include <utility> // std::pair
|
||||
#include <vector> // std::vector
|
||||
|
||||
#include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice
|
||||
#include "../../../src/common/stats.cuh"
|
||||
#include "xgboost/base.h" // XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/linalg.h" // Tensor
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h" // XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
#include "xgboost/linalg.h" // Tensor
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -33,7 +34,7 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
public:
|
||||
void SetUp() override { ctx_.gpu_id = 0; }
|
||||
void SetUp() override { ctx_ = MakeCUDACtx(0); }
|
||||
|
||||
void WeightedMulti() {
|
||||
// data for one segment
|
||||
|
||||
@@ -171,8 +171,7 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
bst_bin_t constexpr kBins{17};
|
||||
auto p = BatchParam{kBins, threshold};
|
||||
Context gpu_ctx;
|
||||
gpu_ctx.gpu_id = 0;
|
||||
auto gpu_ctx = MakeCUDACtx(0);
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
|
||||
from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);
|
||||
|
||||
@@ -180,7 +180,12 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
learner->SetParam("tree_method", tree_method.value());
|
||||
}
|
||||
if (device.has_value()) {
|
||||
learner->SetParam("gpu_id", device.value());
|
||||
auto const& d = device.value();
|
||||
if (std::isdigit(d.front()) || d.front() == '-') {
|
||||
learner->SetParam("gpu_id", d);
|
||||
} else {
|
||||
learner->SetParam("device", d);
|
||||
}
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
@@ -199,7 +204,12 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
learner->SetParam("tree_method", tree_method.value());
|
||||
}
|
||||
if (device.has_value()) {
|
||||
learner->SetParam("gpu_id", device.value());
|
||||
auto const& d = device.value();
|
||||
if (std::isdigit(d.front()) || d.front() == '-') {
|
||||
learner->SetParam("gpu_id", d);
|
||||
} else {
|
||||
learner->SetParam("device", d);
|
||||
}
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
@@ -215,11 +225,12 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
|
||||
// | | hist | gpu_hist | exact | NA |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | CUDA:0 | GPU | GPU (w) | Err | GPU | # not yet tested
|
||||
// | CPU | CPU | Err | CPU | CPU | # not yet tested
|
||||
// | CUDA:0 | GPU | GPU (w) | Err | GPU |
|
||||
// | CPU | CPU | GPU (w) | CPU | CPU |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | -1 | CPU | GPU (w) | CPU | CPU |
|
||||
// | 0 | GPU | GPU (w) | Err | GPU |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | NA | CPU | GPU (w) | CPU | CPU |
|
||||
//
|
||||
// - (w): warning
|
||||
@@ -237,18 +248,30 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
// hist
|
||||
{{"hist", "-1"}, "grow_quantile_histmaker"},
|
||||
{{"hist", "0"}, "grow_gpu_hist"},
|
||||
{{"hist", "cpu"}, "grow_quantile_histmaker"},
|
||||
{{"hist", "cuda"}, "grow_gpu_hist"},
|
||||
{{"hist", "cuda:0"}, "grow_gpu_hist"},
|
||||
{{"hist", std::nullopt}, "grow_quantile_histmaker"},
|
||||
// gpu_hist
|
||||
{{"gpu_hist", "-1"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "0"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cpu"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cuda"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
|
||||
// exact
|
||||
{{"exact", "-1"}, "grow_colmaker,prune"},
|
||||
{{"exact", "0"}, "err"},
|
||||
{{"exact", "cpu"}, "grow_colmaker,prune"},
|
||||
{{"exact", "cuda"}, "err"},
|
||||
{{"exact", "cuda:0"}, "err"},
|
||||
{{"exact", std::nullopt}, "grow_colmaker,prune"},
|
||||
// NA
|
||||
{{std::nullopt, "-1"}, "grow_quantile_histmaker"},
|
||||
{{std::nullopt, "0"}, "grow_gpu_hist"}, // default to hist
|
||||
{{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
|
||||
{{std::nullopt, "cuda"}, "grow_gpu_hist"},
|
||||
{{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
|
||||
{{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
|
||||
};
|
||||
|
||||
@@ -392,8 +415,7 @@ class Dart : public testing::TestWithParam<char const*> {
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
learner->UpdateOneIter(i, p_mat);
|
||||
}
|
||||
|
||||
ConfigLearnerByCtx(&ctx, learner.get());
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
|
||||
HostDeviceVector<float> predts_training;
|
||||
learner->Predict(p_mat, false, &predts_training, 0, 0, true);
|
||||
@@ -654,8 +676,7 @@ TEST(GBTree, InplacePredictionError) {
|
||||
RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(
|
||||
"cache", true);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParam("booster", booster);
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParams(Args{{"booster", booster}, {"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, p_fmat);
|
||||
@@ -697,9 +718,9 @@ TEST(GBTree, InplacePredictionError) {
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParam("booster", booster);
|
||||
learner->SetParam("max_bin", std::to_string(max_bins));
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParams(Args{{"booster", booster},
|
||||
{"max_bin", std::to_string(max_bins)},
|
||||
{"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, p_fmat);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../src/data/adapter.h" // for ArrayAdapter
|
||||
#include "../../../src/data/device_adapter.cuh" // for CupyAdapter
|
||||
@@ -41,7 +42,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
// learner is configured to the device specified by ctx
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, Xy);
|
||||
}
|
||||
@@ -56,18 +57,31 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
HostDeviceVector<float>* out_predt{nullptr};
|
||||
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
||||
std::string output;
|
||||
// test whether the warning is raised
|
||||
#if !defined(_WIN32)
|
||||
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
|
||||
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
|
||||
// might be related to https://github.com/dmlc/xgboost/issues/5793
|
||||
::testing::internal::CaptureStderr();
|
||||
std::thread{[&] {
|
||||
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
|
||||
// warning by using thread-local flags.
|
||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||
&out_predt, 0, 0);
|
||||
}}.join();
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_NE(output.find("Falling back"), std::string::npos);
|
||||
#endif
|
||||
|
||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||
&out_predt, 0, 0);
|
||||
auto output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_NE(output.find("Falling back"), std::string::npos);
|
||||
|
||||
// test when the contexts match
|
||||
Context new_ctx = *proxy->Ctx();
|
||||
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
|
||||
|
||||
ConfigLearnerByCtx(&new_ctx, learner.get());
|
||||
learner->SetParam("device", new_ctx.DeviceName());
|
||||
HostDeviceVector<float>* out_predt_1{nullptr};
|
||||
// no warning is raised
|
||||
::testing::internal::CaptureStderr();
|
||||
|
||||
@@ -559,16 +559,4 @@ class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// A temporary solution before we move away from gpu_id.
|
||||
inline void ConfigLearnerByCtx(Context const* ctx, Learner* learner) {
|
||||
if (ctx->IsCPU()) {
|
||||
learner->SetParam("tree_method", "hist");
|
||||
} else {
|
||||
learner->SetParam("tree_method", "gpu_hist");
|
||||
}
|
||||
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
|
||||
learner->Configure();
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, ctx->gpu_id);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -46,7 +46,6 @@ inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device)
|
||||
|
||||
inline void TestMultiClassError(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "merror");
|
||||
@@ -67,7 +66,6 @@ inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode:
|
||||
|
||||
inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mlogloss");
|
||||
|
||||
@@ -13,26 +13,22 @@
|
||||
|
||||
namespace xgboost::obj {
|
||||
TEST(LambdaRank, GPUNDCGJsonIO) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGJsonIO(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPStat) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPStat(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUNDCGGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGGPair(&ctx);
|
||||
}
|
||||
|
||||
void TestGPUMakePair() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
MetaInfo info;
|
||||
HostDeviceVector<float> predt;
|
||||
@@ -126,8 +122,7 @@ void TestGPUMakePair() {
|
||||
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
|
||||
|
||||
TEST(LambdaRank, GPUUnbiasedNDCG) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestUnbiasedNDCG(&ctx);
|
||||
}
|
||||
|
||||
@@ -161,8 +156,7 @@ TEST(LambdaRank, RankItemCountOnRight) {
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPGPair(&ctx);
|
||||
}
|
||||
} // namespace xgboost::obj
|
||||
|
||||
@@ -305,12 +305,12 @@ TEST(Objective, CPU_vs_CUDA) {
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx.gpu_id = -1;
|
||||
ctx = ctx.MakeCPU();
|
||||
obj->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
// CUDA
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
obj->GetGradient(preds, info, 0, &cuda_out_preds);
|
||||
}
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ TEST(Plugin, CPUvsOneAPI) {
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx.gpu_id = -1;
|
||||
ctx = ctx.MakeCPU();
|
||||
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
|
||||
@@ -214,15 +214,16 @@ void TestUpdatePredictionCache(bool use_subsampling) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(CPUPredictor, GHistIndex) {
|
||||
TEST(CPUPredictor, GHistIndexTraining) {
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
Context ctx;
|
||||
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::ArrayAdapter(columnar.c_str());
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(kRows, kBins, "hist", p_full, p_hist);
|
||||
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPrediction) {
|
||||
|
||||
@@ -33,9 +33,8 @@ TEST(GPUPredictor, Basic) {
|
||||
int n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -71,7 +70,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -102,7 +101,7 @@ TEST(GPUPredictor, MGPUBasicColumnSplit) {
|
||||
size_t n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@@ -132,18 +131,19 @@ TEST(GPUPredictor, EllpackBasic) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, EllpackTraining) {
|
||||
size_t constexpr kRows { 128 }, kCols { 16 }, kBins { 64 };
|
||||
auto p_ellpack =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).Device(0).GenerateDeviceDMatrix(false);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Bins(kBins)
|
||||
.Device(ctx.Ordinal())
|
||||
.GenerateDeviceDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Device(0)
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto columnar =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
|
||||
auto adapter = data::CupyAdapter(columnar);
|
||||
std::shared_ptr<DMatrix> p_full {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)
|
||||
};
|
||||
TestTrainingPrediction(kRows, kBins, "gpu_hist", p_full, p_ellpack);
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_ellpack);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
@@ -153,9 +153,8 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
gpu_predictor->Configure({});
|
||||
|
||||
const int n_classes = 3;
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
|
||||
std::vector<std::unique_ptr<DMatrix>> dmats;
|
||||
@@ -185,7 +184,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.gpu_id);
|
||||
gen.Device(ctx.Ordinal());
|
||||
HostDeviceVector<float> data;
|
||||
std::string interface_str = gen.GenerateArrayInterface(&data);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@@ -197,7 +196,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.gpu_id);
|
||||
gen.Device(ctx.Ordinal());
|
||||
std::vector<HostDeviceVector<float>> storage(kCols);
|
||||
auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@@ -214,9 +213,8 @@ TEST(GpuPredictor, LesserFeatures) {
|
||||
TEST(GPUPredictor, ShapStump) {
|
||||
cudaSetDevice(0);
|
||||
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
@@ -241,9 +239,8 @@ TEST(GPUPredictor, ShapStump) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, Shap) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
|
||||
@@ -44,60 +44,49 @@ TEST(Predictor, PredictionCache) {
|
||||
EXPECT_ANY_THROW(container.Entry(m));
|
||||
}
|
||||
|
||||
void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist) {
|
||||
void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
|
||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {
|
||||
size_t constexpr kCols = 16;
|
||||
size_t constexpr kClasses = 3;
|
||||
size_t constexpr kIters = 3;
|
||||
|
||||
std::unique_ptr<Learner> learner;
|
||||
auto train = [&](Context const& ctx) {
|
||||
p_hist->Info().labels.Reshape(rows, 1);
|
||||
auto &h_label = p_hist->Info().labels.Data()->HostVector();
|
||||
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
h_label[i] = i % kClasses;
|
||||
}
|
||||
p_hist->Info().labels.Reshape(rows, 1);
|
||||
auto &h_label = p_hist->Info().labels.Data()->HostVector();
|
||||
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->SetParam("tree_method", tree_method);
|
||||
learner->SetParam("objective", "multi:softprob");
|
||||
learner->SetParam("num_feature", std::to_string(kCols));
|
||||
learner->SetParam("num_class", std::to_string(kClasses));
|
||||
learner->SetParam("max_bin", std::to_string(bins));
|
||||
ConfigLearnerByCtx(&ctx, learner.get());
|
||||
learner->Configure();
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
h_label[i] = i % kClasses;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
learner->UpdateOneIter(i, p_hist);
|
||||
}
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->SetParams(Args{{"objective", "multi:softprob"},
|
||||
{"num_feature", std::to_string(kCols)},
|
||||
{"num_class", std::to_string(kClasses)},
|
||||
{"max_bin", std::to_string(bins)},
|
||||
{"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
learner->UpdateOneIter(i, p_hist);
|
||||
}
|
||||
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->LoadModel(model);
|
||||
ConfigLearnerByCtx(&ctx, learner.get());
|
||||
learner->Configure();
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
|
||||
HostDeviceVector<float> from_full;
|
||||
learner->Predict(p_full, false, &from_full, 0, 0);
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->LoadModel(model);
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Configure();
|
||||
|
||||
HostDeviceVector<float> from_hist;
|
||||
learner->Predict(p_hist, false, &from_hist, 0, 0);
|
||||
HostDeviceVector<float> from_full;
|
||||
learner->Predict(p_full, false, &from_full, 0, 0);
|
||||
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
EXPECT_NEAR(from_hist.ConstHostVector()[i],
|
||||
from_full.ConstHostVector()[i], kRtEps);
|
||||
}
|
||||
};
|
||||
HostDeviceVector<float> from_hist;
|
||||
learner->Predict(p_hist, false, &from_hist, 0, 0);
|
||||
|
||||
if (tree_method == "gpu_hist") {
|
||||
train(MakeCUDACtx(0));
|
||||
} else {
|
||||
train(Context{});
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,7 +109,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
|
||||
learner->UpdateOneIter(it, m);
|
||||
}
|
||||
|
||||
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Configure();
|
||||
|
||||
HostDeviceVector<float> *p_out_predictions_0{nullptr};
|
||||
@@ -153,7 +142,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
|
||||
ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - 0.5f, kRtEps);
|
||||
}
|
||||
|
||||
learner->SetParam("gpu_id", "-1");
|
||||
learner->SetParam("device", "cpu");
|
||||
learner->Configure();
|
||||
}
|
||||
|
||||
@@ -161,12 +150,12 @@ namespace {
|
||||
std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMatrix> dmat,
|
||||
size_t iters, size_t forest = 1) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
||||
learner->SetParams(Args{{"num_parallel_tree", std::to_string(forest)}});
|
||||
learner->SetParams(
|
||||
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
|
||||
for (size_t i = 0; i < iters; ++i) {
|
||||
learner->UpdateOneIter(i, dmat);
|
||||
}
|
||||
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
return learner;
|
||||
}
|
||||
|
||||
@@ -215,7 +204,7 @@ void TestPredictionDeviceAccess() {
|
||||
{
|
||||
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
|
||||
Context cpu_ctx;
|
||||
ConfigLearnerByCtx(&cpu_ctx, learner.get());
|
||||
learner->SetParam("device", cpu_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cpu, 0, 0);
|
||||
ASSERT_TRUE(from_cpu.HostCanWrite());
|
||||
ASSERT_FALSE(from_cpu.DeviceCanRead());
|
||||
@@ -225,7 +214,7 @@ void TestPredictionDeviceAccess() {
|
||||
HostDeviceVector<float> from_cuda;
|
||||
{
|
||||
Context cuda_ctx = MakeCUDACtx(0);
|
||||
ConfigLearnerByCtx(&cuda_ctx, learner.get());
|
||||
learner->SetParam("device", cuda_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cuda, 0, 0);
|
||||
ASSERT_EQ(from_cuda.DeviceIdx(), 0);
|
||||
ASSERT_TRUE(from_cuda.DeviceCanWrite());
|
||||
@@ -465,11 +454,7 @@ void TestIterationRangeColumnSplit(Context const* ctx) {
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
|
||||
|
||||
if (ctx->IsCPU()) {
|
||||
learner->SetParams(Args{{"gpu_id", std::to_string(-1)}});
|
||||
} else {
|
||||
learner->SetParams(Args{{"gpu_id", std::to_string(0)}});
|
||||
}
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
|
||||
bool bound = false;
|
||||
std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};
|
||||
@@ -582,7 +567,7 @@ void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
|
||||
learner.reset(Learner::Create({Xy}));
|
||||
learner->LoadModel(model);
|
||||
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
|
||||
@@ -84,9 +84,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
|
||||
}
|
||||
|
||||
// p_full and p_hist should come from the same data set.
|
||||
void TestTrainingPrediction(size_t rows, size_t bins, std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist);
|
||||
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
|
||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
|
||||
|
||||
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
||||
bst_feature_t cols);
|
||||
|
||||
31
tests/cpp/test_context.cc
Normal file
31
tests/cpp/test_context.cc
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Context, CPU) {
|
||||
Context ctx;
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
ASSERT_EQ(flag, -1);
|
||||
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "oops"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "-1"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CUDA"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU:0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:+0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:0-"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
|
||||
}
|
||||
} // namespace xgboost
|
||||
99
tests/cpp/test_context.cu
Normal file
99
tests/cpp/test_context.cu
Normal file
@@ -0,0 +1,99 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/json.h> // for FromJson, ToJson
|
||||
|
||||
#include <string> // for string, to_string
|
||||
|
||||
#include "../../src/common/common.h" // for AllVisibleGPUs
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
|
||||
ASSERT_EQ(ctx.gpu_id, ord);
|
||||
ASSERT_EQ(ctx.Device().ordinal, ord);
|
||||
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
|
||||
ASSERT_EQ(ctx.Ordinal(), ord);
|
||||
ASSERT_TRUE(ctx.IsCUDA());
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CUDA(ord));
|
||||
|
||||
Json jctx{ToJson(ctx)};
|
||||
Context new_ctx;
|
||||
FromJson(jctx, &new_ctx);
|
||||
ASSERT_EQ(new_ctx.Device(), ctx.Device());
|
||||
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(Context, DeviceOrdinal) {
|
||||
Context ctx;
|
||||
auto n_vis = common::AllVisibleGPUs();
|
||||
auto ord = n_vis - 1;
|
||||
|
||||
std::string device = "cuda:" + std::to_string(ord);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
device = "cuda:" + std::to_string(1001);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
ord = 1001 % n_vis;
|
||||
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
ASSERT_EQ(flag, 1);
|
||||
|
||||
Context new_ctx = ctx;
|
||||
TestCUDA(new_ctx, ctx.Ordinal());
|
||||
|
||||
auto cpu_ctx = ctx.MakeCPU();
|
||||
ASSERT_TRUE(cpu_ctx.IsCPU());
|
||||
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
|
||||
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
|
||||
TestCUDA(cuda_ctx, ctx.Ordinal());
|
||||
|
||||
cuda_ctx.UpdateAllowUnknown(Args{{"fail_on_invalid_gpu_id", "true"}});
|
||||
ASSERT_THROW({ cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:9999"}}); }, dmlc::Error);
|
||||
cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:00"}});
|
||||
ASSERT_EQ(cuda_ctx.Ordinal(), 0);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
|
||||
// Test alias
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "gpu:0"}});
|
||||
TestCUDA(ctx, 0);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "gpu"}});
|
||||
TestCUDA(ctx, 0);
|
||||
|
||||
// Test the thread local memory in dmlc is not linking different instances together.
|
||||
cpu_ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
|
||||
TestCUDA(ctx, 0);
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
TestCUDA(ctx, 0);
|
||||
}
|
||||
|
||||
TEST(Context, GPUId) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestCUDA(ctx, 0);
|
||||
|
||||
auto n_vis = common::AllVisibleGPUs();
|
||||
auto ord = n_vis - 1;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", std::to_string(ord)}});
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
auto device = "cuda:" + std::to_string(1001);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
ord = 1001 % n_vis;
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "-1"}});
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -27,7 +27,6 @@
|
||||
#include "../../src/common/io.h" // for LoadSequentialFile
|
||||
#include "../../src/common/linalg_op.h" // for ElementWiseTransformHost, begin, end
|
||||
#include "../../src/common/random.h" // for GlobalRandom
|
||||
#include "../../src/common/transform_iterator.h" // for IndexTransformIter
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "dmlc/omp.h" // for omp_get_max_threads
|
||||
#include "dmlc/registry.h" // for Registry
|
||||
@@ -35,14 +34,13 @@
|
||||
#include "helpers.h" // for GetBaseScore, RandomDataGenerator
|
||||
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
|
||||
#include "xgboost/base.h" // for bst_float, Args, bst_feature_t, bst_int
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/context.h" // for Context, DeviceOrd
|
||||
#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, Object, get, String, IsA, opera...
|
||||
#include "xgboost/linalg.h" // for Tensor, TensorView
|
||||
#include "xgboost/logging.h" // for ConsoleLogger
|
||||
#include "xgboost/predictor.h" // for PredictionCacheEntry
|
||||
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace xgboost {
|
||||
@@ -58,9 +56,9 @@ TEST(Learner, Basic) {
|
||||
auto minor = XGBOOST_VER_MINOR;
|
||||
auto patch = XGBOOST_VER_PATCH;
|
||||
|
||||
static_assert(std::is_integral<decltype(major)>::value, "Wrong major version type");
|
||||
static_assert(std::is_integral<decltype(minor)>::value, "Wrong minor version type");
|
||||
static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
|
||||
static_assert(std::is_integral_v<decltype(major)>, "Wrong major version type");
|
||||
static_assert(std::is_integral_v<decltype(minor)>, "Wrong minor version type");
|
||||
static_assert(std::is_integral_v<decltype(patch)>, "Wrong patch version type");
|
||||
}
|
||||
|
||||
TEST(Learner, ParameterValidation) {
|
||||
@@ -92,8 +90,7 @@ TEST(Learner, CheckGroup) {
|
||||
size_t constexpr kNumRows = 17;
|
||||
bst_feature_t constexpr kNumCols = 15;
|
||||
|
||||
std::shared_ptr<DMatrix> p_mat{
|
||||
RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
|
||||
std::shared_ptr<DMatrix> p_mat{RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
|
||||
std::vector<bst_float> weight(kNumGroups, 1);
|
||||
std::vector<bst_int> group(kNumGroups);
|
||||
group[0] = 2;
|
||||
@@ -312,35 +309,36 @@ TEST(Learner, GPUConfiguration) {
|
||||
learner->SetParams({Arg{"booster", "gblinear"},
|
||||
Arg{"updater", "gpu_coord_descent"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "gpu_hist"}});
|
||||
learner->Configure();
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "gpu_hist"},
|
||||
Arg{"gpu_id", "-1"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
// with CPU algorithm
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "hist"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, -1);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CPU());
|
||||
}
|
||||
{
|
||||
// with CPU algorithm, but `gpu_id` takes priority
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "hist"},
|
||||
Arg{"gpu_id", "0"}});
|
||||
learner->SetParams({Arg{"tree_method", "hist"}, Arg{"gpu_id", "0"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@@ -6,7 +6,9 @@
|
||||
#include <xgboost/task.h> // for ObjInfo
|
||||
#include <xgboost/tree_updater.h> // for TreeUpdater
|
||||
|
||||
#include <memory> // for unique_ptr
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Updater, HasNodePosition) {
|
||||
@@ -19,7 +21,7 @@ TEST(Updater, HasNodePosition) {
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = MakeCUDACtx(0);
|
||||
up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@@ -70,9 +70,9 @@ class TestPredictionCache : public ::testing::Test {
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
|
||||
if (updater_name == "grow_gpu_hist") {
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
} else {
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
ctx = ctx.MakeCPU();
|
||||
}
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
|
||||
@@ -34,7 +34,7 @@ class TestLoadPickle:
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config["learner"]["generic_param"]["gpu_id"] == "-1"
|
||||
assert config["learner"]["generic_param"]["device"] == "cpu"
|
||||
|
||||
def test_context_is_preserved(self) -> None:
|
||||
"""Test the device context is preserved after pickling."""
|
||||
@@ -42,14 +42,14 @@ class TestLoadPickle:
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
assert config["learner"]["generic_param"]["device"] == "cuda:0"
|
||||
|
||||
def test_wrap_gpu_id(self) -> None:
|
||||
assert os.environ["CUDA_VISIBLE_DEVICES"] == "0"
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
assert config["learner"]["generic_param"]["device"] == "cuda:0"
|
||||
|
||||
x, y = build_dataset()
|
||||
test_x = xgb.DMatrix(x)
|
||||
|
||||
@@ -203,7 +203,7 @@ class TestQuantileDMatrix:
|
||||
np.testing.assert_equal(h_ret.indices, d_ret.indices)
|
||||
|
||||
booster = xgb.train(
|
||||
{"tree_method": "gpu_hist", "gpu_id": "0"}, dtrain=d_m
|
||||
{"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
|
||||
@@ -65,16 +65,20 @@ class TestGPUBasicModels:
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_invalid_gpu_id(self):
|
||||
from sklearn.datasets import load_digits
|
||||
|
||||
X, y = load_digits(return_X_y=True)
|
||||
# should pass with invalid gpu id
|
||||
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
|
||||
cls1 = xgb.XGBClassifier(tree_method="gpu_hist", gpu_id=9999)
|
||||
cls1.fit(X, y)
|
||||
# should throw error with fail_on_invalid_gpu_id enabled
|
||||
cls2 = xgb.XGBClassifier(
|
||||
tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True
|
||||
tree_method="gpu_hist", gpu_id=9999, fail_on_invalid_gpu_id=True
|
||||
)
|
||||
try:
|
||||
with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
|
||||
cls2.fit(X, y)
|
||||
|
||||
cls2 = xgb.XGBClassifier(
|
||||
tree_method="hist", device="cuda:9999", fail_on_invalid_gpu_id=True
|
||||
)
|
||||
with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
|
||||
cls2.fit(X, y)
|
||||
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
|
||||
except xgb.core.XGBoostError as err:
|
||||
assert "gpu_id 9999 is invalid" in str(err)
|
||||
|
||||
@@ -43,10 +43,16 @@ class TestGPUEvalMetrics:
|
||||
num_boost_round=10,
|
||||
)
|
||||
cpu_auc = float(booster.eval(Xy).split(":")[1])
|
||||
booster.set_param({"gpu_id": "0"})
|
||||
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
assert (
|
||||
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
|
||||
== "cuda:0"
|
||||
)
|
||||
gpu_auc = float(booster.eval(Xy).split(":")[1])
|
||||
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
assert (
|
||||
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
|
||||
== "cuda:0"
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(cpu_auc, gpu_auc)
|
||||
|
||||
|
||||
@@ -113,14 +113,6 @@ class TestPickling:
|
||||
param = {"tree_method": "gpu_hist", "verbosity": 1}
|
||||
bst = xgb.train(param, train_x)
|
||||
|
||||
with tm.captured_output() as (out, err):
|
||||
bst.inplace_predict(x)
|
||||
|
||||
# The warning is redirected to Python callback, so it's printed in stdout
|
||||
# instead of stderr.
|
||||
stdout = out.getvalue()
|
||||
assert stdout.find("mismatched devices") != -1
|
||||
|
||||
save_pickle(bst, model_path)
|
||||
|
||||
args = self.args_template.copy()
|
||||
@@ -177,7 +169,7 @@ class TestPickling:
|
||||
|
||||
# Switch to CPU predictor
|
||||
bst = model.get_booster()
|
||||
tm.set_ordinal(-1, bst)
|
||||
bst.set_param({"device": "cpu"})
|
||||
cpu_pred = model.predict(x, output_margin=True)
|
||||
np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)
|
||||
|
||||
|
||||
@@ -39,7 +39,8 @@ predict_parameter_strategy = strategies.fixed_dictionaries(
|
||||
}
|
||||
)
|
||||
|
||||
pytestmark = tm.timeout(20)
|
||||
# cupy nvrtc compilation can take a long time for the first run
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
|
||||
class TestGPUPredict:
|
||||
@@ -71,8 +72,8 @@ class TestGPUPredict:
|
||||
param = {
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "logloss",
|
||||
"tree_method": "gpu_hist",
|
||||
"gpu_id": 0,
|
||||
"tree_method": "hist",
|
||||
"device": "gpu:0",
|
||||
"max_depth": 1,
|
||||
}
|
||||
bst = xgb.train(
|
||||
@@ -84,7 +85,7 @@ class TestGPUPredict:
|
||||
gpu_pred_test = bst.predict(dtest, output_margin=True)
|
||||
gpu_pred_val = bst.predict(dval, output_margin=True)
|
||||
|
||||
bst.set_param({"gpu_id": -1, "tree_method": "hist"})
|
||||
bst.set_param({"device": "cpu", "tree_method": "hist"})
|
||||
bst_cpu = copy(bst)
|
||||
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
|
||||
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
|
||||
@@ -107,14 +108,15 @@ class TestGPUPredict:
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
|
||||
params = {}
|
||||
params["tree_method"] = "gpu_hist"
|
||||
params["tree_method"] = "hist"
|
||||
params["device"] = "cuda:0"
|
||||
bst = xgb.train(params, dtrain)
|
||||
|
||||
tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "cuda:0"})
|
||||
# Don't reuse the DMatrix for prediction, otherwise the result is cached.
|
||||
predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))
|
||||
predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))
|
||||
tm.set_ordinal(-1, bst)
|
||||
bst.set_param({"device": "cpu"})
|
||||
predict_cpu = bst.predict(xgb.DMatrix(X_test))
|
||||
|
||||
assert np.allclose(predict_gpu_0, predict_gpu_1)
|
||||
@@ -131,8 +133,8 @@ class TestGPUPredict:
|
||||
X_test, y_test = X[tr_size:, :], y[tr_size:]
|
||||
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"gpu_id": "0",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda:0",
|
||||
"n_jobs": -1,
|
||||
"seed": 123,
|
||||
}
|
||||
@@ -141,13 +143,54 @@ class TestGPUPredict:
|
||||
gpu_test_score = m.score(X_test, y_test)
|
||||
|
||||
# Now with cpu
|
||||
m = tm.set_ordinal(-1, m)
|
||||
m.set_params(device="cpu")
|
||||
cpu_train_score = m.score(X_train, y_train)
|
||||
cpu_test_score = m.score(X_test, y_test)
|
||||
|
||||
assert np.allclose(cpu_train_score, gpu_train_score)
|
||||
assert np.allclose(cpu_test_score, gpu_test_score)
|
||||
|
||||
@pytest.mark.parametrize("device", ["cpu", "cuda"])
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_inplace_predict_device_type(self, device: str) -> None:
|
||||
"""Test inplace predict with different device and data types.
|
||||
|
||||
The sklearn interface uses inplace predict by default and gbtree fallbacks to
|
||||
DMatrix whenever device doesn't match. This test checks that XGBoost can handle
|
||||
different combinations of device and input data type.
|
||||
|
||||
"""
|
||||
import cudf
|
||||
import cupy as cp
|
||||
import pandas as pd
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
reg = xgb.XGBRegressor(tree_method="hist", device=device)
|
||||
n_samples = 4096
|
||||
n_features = 13
|
||||
X, y, w = tm.make_regression(n_samples, n_features, use_cupy=True)
|
||||
X[X == 0.0] = 1.0
|
||||
|
||||
reg.fit(X, y, sample_weight=w)
|
||||
predt_0 = reg.predict(X)
|
||||
|
||||
X = cp.asnumpy(X)
|
||||
predt_1 = reg.predict(X)
|
||||
|
||||
df = pd.DataFrame(X)
|
||||
predt_2 = reg.predict(df)
|
||||
|
||||
df = cudf.DataFrame(X)
|
||||
predt_3 = reg.predict(df)
|
||||
|
||||
X_csr = csr_matrix(X)
|
||||
predt_4 = reg.predict(X_csr)
|
||||
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
np.testing.assert_allclose(predt_0, predt_2)
|
||||
np.testing.assert_allclose(predt_0, predt_3)
|
||||
np.testing.assert_allclose(predt_0, predt_4)
|
||||
|
||||
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
|
||||
import cupy as cp
|
||||
|
||||
@@ -175,7 +218,9 @@ class TestGPUPredict:
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
booster = xgb.train(
|
||||
{"tree_method": "gpu_hist", "gpu_id": device}, dtrain, num_boost_round=10
|
||||
{"tree_method": "hist", "device": f"cuda:{device}"},
|
||||
dtrain,
|
||||
num_boost_round=10,
|
||||
)
|
||||
|
||||
test = xgb.DMatrix(X[:10, ...], missing=missing)
|
||||
@@ -208,13 +253,13 @@ class TestGPUPredict:
|
||||
missing_idx = [i for i in range(0, X.shape[1], 16)]
|
||||
X[:, missing_idx] = missing
|
||||
reg = xgb.XGBRegressor(
|
||||
tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device
|
||||
tree_method="hist", n_estimators=8, missing=missing, device=f"cuda:{device}"
|
||||
)
|
||||
reg.fit(X, y)
|
||||
|
||||
reg = tm.set_ordinal(device, reg)
|
||||
reg.set_params(device=f"cuda:{device}")
|
||||
gpu_predt = reg.predict(X)
|
||||
reg = tm.set_ordinal(-1, reg)
|
||||
reg = reg.set_params(device="cpu")
|
||||
cpu_predt = reg.predict(cp.asnumpy(X))
|
||||
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
|
||||
cp.cuda.runtime.setDevice(0)
|
||||
@@ -250,7 +295,9 @@ class TestGPUPredict:
|
||||
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain, num_boost_round=10)
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "device": "cuda:0"}, dtrain, num_boost_round=10
|
||||
)
|
||||
test = xgb.DMatrix(X)
|
||||
predt_from_array = booster.inplace_predict(X)
|
||||
predt_from_dmatrix = booster.predict(test)
|
||||
@@ -280,12 +327,12 @@ class TestGPUPredict:
|
||||
def test_shap(self, num_rounds, dataset, param):
|
||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||
return
|
||||
param.update({"tree_method": "gpu_hist", "gpu_id": 0})
|
||||
param.update({"tree_method": "hist", "device": "gpu:0"})
|
||||
param = dataset.set_params(param)
|
||||
dmat = dataset.get_dmat()
|
||||
bst = xgb.train(param, dmat, num_rounds)
|
||||
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
|
||||
bst = tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "gpu:0"})
|
||||
shap = bst.predict(test_dmat, pred_contribs=True)
|
||||
margin = bst.predict(test_dmat, output_margin=True)
|
||||
assume(len(dataset.y) > 0)
|
||||
@@ -298,12 +345,12 @@ class TestGPUPredict:
|
||||
def test_shap_interactions(self, num_rounds, dataset, param):
|
||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||
return
|
||||
param.update({"tree_method": "hist", "gpu_id": 0})
|
||||
param.update({"tree_method": "hist", "device": "cuda:0"})
|
||||
param = dataset.set_params(param)
|
||||
dmat = dataset.get_dmat()
|
||||
bst = xgb.train(param, dmat, num_rounds)
|
||||
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
|
||||
bst = tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "cuda:0"})
|
||||
shap = bst.predict(test_dmat, pred_interactions=True)
|
||||
margin = bst.predict(test_dmat, output_margin=True)
|
||||
assume(len(dataset.y) > 0)
|
||||
@@ -317,16 +364,18 @@ class TestGPUPredict:
|
||||
def test_shap_categorical(self):
|
||||
X, y = tm.make_categorical(100, 20, 7, False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
|
||||
)
|
||||
|
||||
booster = tm.set_ordinal(0, booster)
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
shap = booster.predict(Xy, pred_contribs=True)
|
||||
margin = booster.predict(Xy, output_margin=True)
|
||||
np.testing.assert_allclose(
|
||||
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
|
||||
)
|
||||
|
||||
booster = tm.set_ordinal(-1, booster)
|
||||
booster.set_param({"device": "cpu"})
|
||||
shap = booster.predict(Xy, pred_contribs=True)
|
||||
margin = booster.predict(Xy, output_margin=True)
|
||||
np.testing.assert_allclose(
|
||||
@@ -334,8 +383,8 @@ class TestGPUPredict:
|
||||
)
|
||||
|
||||
def test_predict_leaf_basic(self):
|
||||
gpu_leaf = run_predict_leaf(0)
|
||||
cpu_leaf = run_predict_leaf(-1)
|
||||
gpu_leaf = run_predict_leaf("gpu:0")
|
||||
cpu_leaf = run_predict_leaf("cpu")
|
||||
np.testing.assert_equal(gpu_leaf, cpu_leaf)
|
||||
|
||||
def run_predict_leaf_booster(self, param, num_rounds, dataset):
|
||||
@@ -344,23 +393,22 @@ class TestGPUPredict:
|
||||
booster = xgb.train(
|
||||
param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds
|
||||
)
|
||||
booster = tm.set_ordinal(-1, booster)
|
||||
booster.set_param({"device": "cpu"})
|
||||
cpu_leaf = booster.predict(m, pred_leaf=True)
|
||||
|
||||
booster = tm.set_ordinal(0, booster)
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
gpu_leaf = booster.predict(m, pred_leaf=True)
|
||||
|
||||
np.testing.assert_equal(cpu_leaf, gpu_leaf)
|
||||
|
||||
@given(predict_parameter_strategy, tm.make_dataset_strategy())
|
||||
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||
def test_predict_leaf_gbtree(self, param, dataset):
|
||||
def test_predict_leaf_gbtree(self, param: dict, dataset: tm.TestDataset) -> None:
|
||||
# Unsupported for random forest
|
||||
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
|
||||
return
|
||||
|
||||
param["booster"] = "gbtree"
|
||||
param["tree_method"] = "gpu_hist"
|
||||
param.update({"booster": "gbtree", "tree_method": "hist", "device": "cuda:0"})
|
||||
self.run_predict_leaf_booster(param, 10, dataset)
|
||||
|
||||
@given(predict_parameter_strategy, tm.make_dataset_strategy())
|
||||
@@ -370,8 +418,7 @@ class TestGPUPredict:
|
||||
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
|
||||
return
|
||||
|
||||
param["booster"] = "dart"
|
||||
param["tree_method"] = "gpu_hist"
|
||||
param.update({"booster": "dart", "tree_method": "hist", "device": "cuda:0"})
|
||||
self.run_predict_leaf_booster(param, 10, dataset)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@@ -395,12 +442,12 @@ class TestGPUPredict:
|
||||
dtrain = xgb.DMatrix(df, label=y, enable_categorical=True)
|
||||
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"max_depth": 3,
|
||||
"learning_rate": 1.0,
|
||||
"base_score": 0.0,
|
||||
"eval_metric": "rmse",
|
||||
"gpu_id": "0",
|
||||
"device": "cuda:0",
|
||||
}
|
||||
|
||||
eval_history = {}
|
||||
@@ -412,7 +459,7 @@ class TestGPUPredict:
|
||||
verbose_eval=False,
|
||||
evals_result=eval_history,
|
||||
)
|
||||
bst = tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "cuda:0"})
|
||||
pred = bst.predict(dtrain)
|
||||
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
|
||||
np.testing.assert_almost_equal(
|
||||
@@ -434,14 +481,16 @@ class TestGPUPredict:
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
if n_classes == 2:
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda:0",
|
||||
"booster": "dart",
|
||||
"rate_drop": 0.5,
|
||||
"objective": "binary:logistic",
|
||||
}
|
||||
else:
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda:0",
|
||||
"booster": "dart",
|
||||
"rate_drop": 0.5,
|
||||
"objective": "multi:softprob",
|
||||
@@ -455,7 +504,7 @@ class TestGPUPredict:
|
||||
copied = booster.predict(Xy)
|
||||
|
||||
# CPU
|
||||
booster = tm.set_ordinal(-1, booster)
|
||||
booster.set_param({"device": "cpu"})
|
||||
cpu_inplace = booster.inplace_predict(X_)
|
||||
cpu_copied = booster.predict(Xy)
|
||||
|
||||
@@ -465,7 +514,7 @@ class TestGPUPredict:
|
||||
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
|
||||
|
||||
# GPU
|
||||
booster = tm.set_ordinal(0, booster)
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
inplace = booster.inplace_predict(X)
|
||||
copied = booster.predict(Xy)
|
||||
|
||||
@@ -482,7 +531,7 @@ class TestGPUPredict:
|
||||
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols)
|
||||
y = rng.randint(low=0, high=127, size=rows)
|
||||
dtrain = xgb.DMatrix(orig, label=y)
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)
|
||||
booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain)
|
||||
|
||||
predt_orig = booster.inplace_predict(orig)
|
||||
# all primitive types in numpy
|
||||
|
||||
@@ -28,7 +28,7 @@ def run_threaded_predict(X, rows, predict_func):
|
||||
assert f.result()
|
||||
|
||||
|
||||
def run_predict_leaf(gpu_id: int) -> np.ndarray:
|
||||
def run_predict_leaf(device: str) -> np.ndarray:
|
||||
rows = 100
|
||||
cols = 4
|
||||
classes = 5
|
||||
@@ -48,7 +48,7 @@ def run_predict_leaf(gpu_id: int) -> np.ndarray:
|
||||
num_boost_round=num_boost_round,
|
||||
)
|
||||
|
||||
booster = tm.set_ordinal(gpu_id, booster)
|
||||
booster.set_param({"device": device})
|
||||
empty = xgb.DMatrix(np.ones(shape=(0, cols)))
|
||||
empty_leaf = booster.predict(empty, pred_leaf=True)
|
||||
assert empty_leaf.shape[0] == 0
|
||||
@@ -74,14 +74,14 @@ def run_predict_leaf(gpu_id: int) -> np.ndarray:
|
||||
|
||||
# When there's only 1 tree, the output is a 1 dim vector
|
||||
booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
|
||||
booster = tm.set_ordinal(gpu_id, booster)
|
||||
booster.set_param({"device": device})
|
||||
assert booster.predict(m, pred_leaf=True).shape == (rows,)
|
||||
|
||||
return leaf
|
||||
|
||||
|
||||
def test_predict_leaf() -> None:
|
||||
run_predict_leaf(-1)
|
||||
run_predict_leaf("cpu")
|
||||
|
||||
|
||||
def test_predict_shape():
|
||||
|
||||
@@ -69,7 +69,7 @@ def run_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool, on_gpu: bool) -> None:
|
||||
train_Xy, valid_Xy = create_dmatrix_from_partitions(
|
||||
iter(dfs),
|
||||
feature_cols,
|
||||
gpu_id=device_id,
|
||||
dev_ordinal=device_id,
|
||||
use_qdm=is_qdm,
|
||||
kwargs=kwargs,
|
||||
enable_sparse_data_optim=False,
|
||||
|
||||
@@ -1025,6 +1025,7 @@ class XgboostLocalTest(SparkTestCase):
|
||||
self.assertTrue(hasattr(py_reg, "n_estimators"))
|
||||
self.assertEqual(py_reg.n_estimators.parent, py_reg.uid)
|
||||
self.assertFalse(hasattr(py_reg, "gpu_id"))
|
||||
self.assertFalse(hasattr(py_reg, "device"))
|
||||
self.assertEqual(py_reg.getOrDefault(py_reg.n_estimators), 100)
|
||||
self.assertEqual(py_reg.getOrDefault(py_reg.objective), "reg:squarederror")
|
||||
py_reg2 = SparkXGBRegressor(n_estimators=200)
|
||||
@@ -1038,6 +1039,7 @@ class XgboostLocalTest(SparkTestCase):
|
||||
self.assertTrue(hasattr(py_cls, "n_estimators"))
|
||||
self.assertEqual(py_cls.n_estimators.parent, py_cls.uid)
|
||||
self.assertFalse(hasattr(py_cls, "gpu_id"))
|
||||
self.assertFalse(hasattr(py_cls, "device"))
|
||||
self.assertEqual(py_cls.getOrDefault(py_cls.n_estimators), 100)
|
||||
self.assertEqual(py_cls.getOrDefault(py_cls.objective), None)
|
||||
py_cls2 = SparkXGBClassifier(n_estimators=200)
|
||||
@@ -1051,6 +1053,7 @@ class XgboostLocalTest(SparkTestCase):
|
||||
self.assertTrue(hasattr(py_cls, "n_estimators"))
|
||||
self.assertEqual(py_cls.n_estimators.parent, py_cls.uid)
|
||||
self.assertFalse(hasattr(py_cls, "gpu_id"))
|
||||
self.assertFalse(hasattr(py_cls, "device"))
|
||||
self.assertTrue(hasattr(py_cls, "arbitrary_params_dict"))
|
||||
expected_kwargs = {"sketch_eps": 0.03}
|
||||
self.assertEqual(
|
||||
|
||||
Reference in New Issue
Block a user