rocm enable for v2.0.1

This commit is contained in:
Hui Liu
2023-10-27 18:50:28 -07:00
447 changed files with 13518 additions and 8719 deletions

View File

@@ -0,0 +1,42 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/global_config.h> // for GlobalConfigThreadLocalStore
#include <xgboost/json.h> // for Json, Object
#include <xgboost/learner.h> // for Learner
#include <algorithm> // for transform
#include <string> // for string
#include <utility> // for swap
#include "../helpers.h" // for RandomDataGenerator
namespace xgboost {
TEST(GBlinear, DispatchUpdater) {
auto verbosity = 3;
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
auto test = [](std::string device) {
auto p_fmat = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix(true);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParams(
Args{{"booster", "gblinear"}, {"updater", "coord_descent"}, {"device", device}});
learner->Configure();
for (std::int32_t iter = 0; iter < 3; ++iter) {
learner->UpdateOneIter(iter, p_fmat);
}
Json config{Object{}};
::testing::internal::CaptureStderr();
learner->SaveConfig(&config);
auto str = ::testing::internal::GetCapturedStderr();
std::transform(device.cbegin(), device.cend(), device.begin(),
[](char c) { return std::toupper(c); });
ASSERT_NE(str.find(device), std::string::npos);
};
test("cpu");
test("gpu");
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
}
} // namespace xgboost

View File

@@ -1,17 +1,22 @@
/*!
* Copyright 2019-2022 XGBoost contributors
/**
* Copyright 2019-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <xgboost/context.h>
#include <xgboost/host_device_vector.h> // for HostDeviceVector
#include <xgboost/json.h> // for Json, Object
#include <xgboost/learner.h> // for Learner
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <optional> // for optional
#include <string> // for string
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
#include "../../../src/gbm/gbtree.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "xgboost/base.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/learner.h"
#include "xgboost/predictor.h"
namespace xgboost {
@@ -113,12 +118,11 @@ TEST(GBTree, WrongUpdater) {
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
TEST(GBTree, ChoosePredictor) {
// The test ensures data don't get pulled into device.
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;
std::size_t constexpr kRows = 17, kCols = 15;
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
auto const& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
p_dmat->Info().labels.Reshape(kRows);
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
@@ -127,14 +131,13 @@ TEST(GBTree, ChoosePredictor) {
learner->UpdateOneIter(i, p_dmat);
}
ASSERT_TRUE(data.HostCanWrite());
dmlc::TemporaryDirectory tempdir;
const std::string fname = tempdir.path + "/model_param.bst";
{
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
learner->Save(fo.get());
}
// a new learner
learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
{
@@ -146,6 +149,8 @@ TEST(GBTree, ChoosePredictor) {
learner->UpdateOneIter(i, p_dmat);
}
ASSERT_TRUE(data.HostCanWrite());
ASSERT_FALSE(data.DeviceCanWrite());
ASSERT_FALSE(data.DeviceCanRead());
// pull data into device.
data.HostVector();
@@ -162,7 +167,139 @@ TEST(GBTree, ChoosePredictor) {
// data is not pulled back into host
ASSERT_FALSE(data.HostCanWrite());
}
#endif // XGBOOST_USE_CUDA || XGBOOST_USE_HIP
TEST(GBTree, ChooseTreeMethod) {
bst_row_t n_samples{128};
bst_feature_t n_features{64};
auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);
auto with_update = [&](std::optional<std::string> device,
std::optional<std::string> tree_method) {
auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
if (tree_method.has_value()) {
learner->SetParam("tree_method", tree_method.value());
}
if (device.has_value()) {
auto const& d = device.value();
if (std::isdigit(d.front()) || d.front() == '-') {
learner->SetParam("gpu_id", d);
} else {
learner->SetParam("device", d);
}
}
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(0, Xy);
}
Json config{Object{}};
learner->SaveConfig(&config);
auto updater = config["learner"]["gradient_booster"]["updater"];
CHECK(!IsA<Null>(updater));
return updater;
};
auto with_boost = [&](std::optional<std::string> device, std::optional<std::string> tree_method) {
auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
if (tree_method.has_value()) {
learner->SetParam("tree_method", tree_method.value());
}
if (device.has_value()) {
auto const& d = device.value();
if (std::isdigit(d.front()) || d.front() == '-') {
learner->SetParam("gpu_id", d);
} else {
learner->SetParam("device", d);
}
}
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
HostDeviceVector<GradientPair> gpair{GenerateRandomGradients(Xy->Info().num_row_)};
learner->BoostOneIter(0, Xy, &gpair);
}
Json config{Object{}};
learner->SaveConfig(&config);
auto updater = config["learner"]["gradient_booster"]["updater"];
return updater;
};
// | | hist | gpu_hist | exact | NA |
// |--------+---------+----------+-------+-----|
// | CUDA:0 | GPU | GPU (w) | Err | GPU |
// | CPU | CPU | GPU (w) | CPU | CPU |
// |--------+---------+----------+-------+-----|
// | -1 | CPU | GPU (w) | CPU | CPU |
// | 0 | GPU | GPU (w) | Err | GPU |
// |--------+---------+----------+-------+-----|
// | NA | CPU | GPU (w) | CPU | CPU |
//
// - (w): warning
// - CPU: Run on CPU.
// - GPU: Run on CUDA.
// - Err: Not feasible.
// - NA: Parameter is not specified.
// When GPU hist is specified with a CPU context, we should emit an error. However, it's
// quite difficult to detect whether the CPU context is being used because it's the
// default or because it's specified by the user.
std::map<std::pair<std::optional<std::string>, std::optional<std::string>>, std::string>
expectation{
// hist
{{"hist", "-1"}, "grow_quantile_histmaker"},
{{"hist", "0"}, "grow_gpu_hist"},
{{"hist", "cpu"}, "grow_quantile_histmaker"},
{{"hist", "cuda"}, "grow_gpu_hist"},
{{"hist", "cuda:0"}, "grow_gpu_hist"},
{{"hist", std::nullopt}, "grow_quantile_histmaker"},
// gpu_hist
{{"gpu_hist", "-1"}, "grow_gpu_hist"},
{{"gpu_hist", "0"}, "grow_gpu_hist"},
{{"gpu_hist", "cpu"}, "grow_gpu_hist"},
{{"gpu_hist", "cuda"}, "grow_gpu_hist"},
{{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
{{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
// exact
{{"exact", "-1"}, "grow_colmaker,prune"},
{{"exact", "0"}, "err"},
{{"exact", "cpu"}, "grow_colmaker,prune"},
{{"exact", "cuda"}, "err"},
{{"exact", "cuda:0"}, "err"},
{{"exact", std::nullopt}, "grow_colmaker,prune"},
// NA
{{std::nullopt, "-1"}, "grow_quantile_histmaker"},
{{std::nullopt, "0"}, "grow_gpu_hist"}, // default to hist
{{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
{{std::nullopt, "cuda"}, "grow_gpu_hist"},
{{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
{{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
};
auto run_test = [&](auto fn) {
for (auto const& kv : expectation) {
auto device = kv.first.second;
auto tm = kv.first.first;
if (kv.second == "err") {
ASSERT_THROW({ fn(device, tm); }, dmlc::Error)
<< " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
continue;
}
auto up = fn(device, tm);
auto ups = get<Array const>(up);
auto exp_names = common::Split(kv.second, ',');
ASSERT_EQ(exp_names.size(), ups.size());
for (std::size_t i = 0; i < exp_names.size(); ++i) {
ASSERT_EQ(get<String const>(ups[i]["name"]), exp_names[i])
<< " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
}
}
};
run_test(with_update);
run_test(with_boost);
}
#endif // XGBOOST_USE_CUDA
// Some other parts of test are in `Tree.JsonIO'.
TEST(GBTree, JsonIO) {
@@ -171,32 +308,52 @@ TEST(GBTree, JsonIO) {
Context ctx;
LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
std::unique_ptr<GradientBooster> gbm {
CreateTrainedGBM("gbtree", Args{}, kRows, kCols, &mparam, &ctx) };
std::unique_ptr<GradientBooster> gbm{
CreateTrainedGBM("gbtree", Args{{"tree_method", "exact"}, {"default_direction", "left"}},
kRows, kCols, &mparam, &ctx)};
Json model {Object()};
Json model{Object()};
model["model"] = Object();
auto& j_model = model["model"];
auto j_model = model["model"];
model["config"] = Object();
auto& j_param = model["config"];
auto j_config = model["config"];
gbm->SaveModel(&j_model);
gbm->SaveConfig(&j_param);
gbm->SaveConfig(&j_config);
std::string model_str;
Json::Dump(model, &model_str);
model = Json::Load({model_str.c_str(), model_str.size()});
ASSERT_EQ(get<String>(model["model"]["name"]), "gbtree");
j_model = model["model"];
j_config = model["config"];
ASSERT_EQ(get<String>(j_model["name"]), "gbtree");
auto const& gbtree_model = model["model"]["model"];
auto gbtree_model = j_model["model"];
ASSERT_EQ(get<Array>(gbtree_model["trees"]).size(), 1ul);
ASSERT_EQ(get<Integer>(get<Object>(get<Array>(gbtree_model["trees"]).front()).at("id")), 0);
ASSERT_EQ(get<Array>(gbtree_model["tree_info"]).size(), 1ul);
auto j_train_param = model["config"]["gbtree_model_param"];
auto j_train_param = j_config["gbtree_model_param"];
ASSERT_EQ(get<String>(j_train_param["num_parallel_tree"]), "1");
auto check_config = [](Json j_up_config) {
auto colmaker = get<Array const>(j_up_config).front();
auto pruner = get<Array const>(j_up_config).back();
ASSERT_EQ(get<String const>(colmaker["name"]), "grow_colmaker");
ASSERT_EQ(get<String const>(pruner["name"]), "prune");
ASSERT_EQ(get<String const>(colmaker["colmaker_train_param"]["default_direction"]), "left");
};
check_config(j_config["updater"]);
std::unique_ptr<GradientBooster> loaded(gbm::GBTree::Create("gbtree", &ctx, &mparam));
loaded->LoadModel(j_model);
loaded->LoadConfig(j_config);
// roundtrip test
Json j_config_rt{Object{}};
loaded->SaveConfig(&j_config_rt);
check_config(j_config_rt["updater"]);
}
TEST(Dart, JsonIO) {
@@ -232,14 +389,15 @@ TEST(Dart, JsonIO) {
namespace {
class Dart : public testing::TestWithParam<char const*> {
public:
void Run(std::string predictor) {
void Run(std::string device) {
size_t constexpr kRows = 16, kCols = 10;
HostDeviceVector<float> data;
auto rng = RandomDataGenerator(kRows, kCols, 0);
if (predictor == "gpu_predictor") {
rng.Device(0);
Context ctx;
if (device == "GPU") {
ctx = MakeCUDACtx(0);
}
auto rng = RandomDataGenerator(kRows, kCols, 0).Device(ctx.gpu_id);
auto array_str = rng.GenerateArrayInterface(&data);
auto p_mat = GetDMatrixFromData(data.HostVector(), kRows, kCols);
@@ -257,15 +415,14 @@ class Dart : public testing::TestWithParam<char const*> {
for (size_t i = 0; i < 16; ++i) {
learner->UpdateOneIter(i, p_mat);
}
learner->SetParam("predictor", predictor);
learner->SetParam("device", ctx.DeviceName());
HostDeviceVector<float> predts_training;
learner->Predict(p_mat, false, &predts_training, 0, 0, true);
HostDeviceVector<float>* inplace_predts;
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
if (predictor == "gpu_predictor") {
if (ctx.IsCUDA()) {
x->SetCUDAArray(array_str.c_str());
} else {
x->SetArrayData(array_str.c_str());
@@ -295,11 +452,10 @@ class Dart : public testing::TestWithParam<char const*> {
TEST_P(Dart, Prediction) { this->Run(GetParam()); }
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart,
testing::Values("auto", "cpu_predictor", "gpu_predictor"));
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("CPU", "GPU"));
#else
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("auto", "cpu_predictor"));
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
INSTANTIATE_TEST_SUITE_P(PredictorTypes, Dart, testing::Values("CPU"));
#endif // defined(XGBOOST_USE_CUDA)
std::pair<Json, Json> TestModelSlice(std::string booster) {
@@ -511,4 +667,85 @@ TEST(GBTree, PredictRange) {
dmlc::Error);
}
}
TEST(GBTree, InplacePredictionError) {
std::size_t n_samples{2048}, n_features{32};
auto test_ext_err = [&](std::string booster, Context const* ctx) {
std::shared_ptr<DMatrix> p_fmat =
RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(
"cache", true);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParams(Args{{"booster", booster}, {"device", ctx->DeviceName()}});
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, p_fmat);
}
HostDeviceVector<float>* out_predt;
ASSERT_THROW(
{
learner->InplacePredict(p_fmat, PredictionType::kValue,
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);
},
dmlc::Error);
};
{
Context ctx;
test_ext_err("gbtree", &ctx);
test_ext_err("dart", &ctx);
}
#if defined(XGBOOST_USE_CUDA)
{
auto ctx = MakeCUDACtx(0);
test_ext_err("gbtree", &ctx);
test_ext_err("dart", &ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
auto test_qdm_err = [&](std::string booster, Context const* ctx) {
std::shared_ptr<DMatrix> p_fmat;
bst_bin_t max_bins = 16;
auto rng = RandomDataGenerator{n_samples, n_features, 0.5f}.Device(ctx->gpu_id).Bins(max_bins);
if (ctx->IsCPU()) {
p_fmat = rng.GenerateQuantileDMatrix(true);
} else {
#if defined(XGBOOST_USE_CUDA)
p_fmat = rng.GenerateDeviceDMatrix(true);
#else
CHECK(p_fmat);
#endif // defined(XGBOOST_USE_CUDA)
};
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParams(Args{{"booster", booster},
{"max_bin", std::to_string(max_bins)},
{"device", ctx->DeviceName()}});
learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, p_fmat);
}
HostDeviceVector<float>* out_predt;
ASSERT_THROW(
{
learner->InplacePredict(p_fmat, PredictionType::kValue,
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 0);
},
dmlc::Error);
};
{
Context ctx;
test_qdm_err("gbtree", &ctx);
test_qdm_err("dart", &ctx);
}
#if defined(XGBOOST_USE_CUDA)
{
auto ctx = MakeCUDACtx(0);
test_qdm_err("gbtree", &ctx);
test_qdm_err("dart", &ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
}
} // namespace xgboost

View File

@@ -0,0 +1,86 @@
/**
* Copyright 2023, XGBoost contributors
*/
#include <xgboost/context.h> // for Context
#include <xgboost/learner.h> // for Learner
#include <xgboost/string_view.h> // for StringView
#include <limits> // for numeric_limits
#include <memory> // for shared_ptr
#include <string> // for string
#include <thread> // for thread
#include "../../../src/data/adapter.h" // for ArrayAdapter
#include "../../../src/data/device_adapter.cuh" // for CupyAdapter
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
#include "../helpers.h" // for RandomDataGenerator
namespace xgboost {
void TestInplaceFallback(Context const* ctx) {
// prepare data
bst_row_t n_samples{1024};
bst_feature_t n_features{32};
HostDeviceVector<float> X_storage;
// use a different device than the learner
std::int32_t data_ordinal = ctx->IsCPU() ? 0 : -1;
auto X = RandomDataGenerator{n_samples, n_features, 0.0}
.Device(data_ordinal)
.GenerateArrayInterface(&X_storage);
HostDeviceVector<float> y_storage;
auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);
std::shared_ptr<DMatrix> Xy;
if (data_ordinal == Context::kCpuId) {
auto X_adapter = data::ArrayAdapter{StringView{X}};
Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
} else {
auto X_adapter = data::CupyAdapter{StringView{X}};
Xy.reset(DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx->Threads()));
}
Xy->SetInfo("label", y);
// learner is configured to the device specified by ctx
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->SetParam("device", ctx->DeviceName());
for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, Xy);
}
std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
if (data_ordinal == Context::kCpuId) {
proxy->SetArrayData(StringView{X});
} else {
proxy->SetCUDAArray(X.c_str());
}
HostDeviceVector<float>* out_predt{nullptr};
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
std::string output;
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
// test when the contexts match
Context new_ctx = *proxy->Ctx();
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
learner->SetParam("device", new_ctx.DeviceName());
HostDeviceVector<float>* out_predt_1{nullptr};
// no warning is raised
::testing::internal::CaptureStderr();
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt_1, 0, 0);
output = testing::internal::GetCapturedStderr();
ASSERT_TRUE(output.empty());
ASSERT_EQ(out_predt->ConstHostVector(), out_predt_1->ConstHostVector());
}
TEST(GBTree, InplacePredictFallback) {
auto ctx = MakeCUDACtx(0);
TestInplaceFallback(&ctx);
}
} // namespace xgboost