enable ROCm on latest XGBoost

This commit is contained in:
Hui Liu
2023-10-23 11:07:08 -07:00
328 changed files with 8028 additions and 3642 deletions

View File

@@ -70,12 +70,12 @@ TEST(DeviceAdapter, GetRowCounts) {
for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {
HostDeviceVector<float> storage;
auto str_arr = RandomDataGenerator{8192, n_features, 0.0}
.Device(ctx.gpu_id)
.Device(ctx.Device())
.GenerateArrayInterface(&storage);
auto adapter = CupyAdapter{str_arr};
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
offset.SetDevice(ctx.gpu_id);
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id,
offset.SetDevice(ctx.Device());
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.Device(),
std::numeric_limits<float>::quiet_NaN());
ASSERT_EQ(rstride, n_features);
}

View File

@@ -98,7 +98,7 @@ TEST(EllpackPage, FromCategoricalBasic) {
Context ctx{MakeCUDACtx(0)};
auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
auto ellpack = EllpackPage(&ctx, m.get(), p);
auto accessor = ellpack.Impl()->GetDeviceAccessor(0);
auto accessor = ellpack.Impl()->GetDeviceAccessor(FstCU());
ASSERT_EQ(kCats, accessor.NumBins());
auto x_copy = x;
@@ -156,13 +156,12 @@ TEST(EllpackPage, Copy) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
kRows);
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kRows);
// Copy batch pages into the result page.
size_t offset = 0;
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
size_t num_elements = result.Copy(0, batch.Impl(), offset);
size_t num_elements = result.Copy(FstCU(), batch.Impl(), offset);
offset += num_elements;
}
@@ -176,10 +175,12 @@ TEST(EllpackPage, Copy) {
EXPECT_EQ(impl->base_rowid, current_row);
for (size_t i = 0; i < impl->Size(); i++) {
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()), current_row,
row_d.data().get()));
thrust::copy(row_d.begin(), row_d.end(), row.begin());
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(0), current_row, row_result_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(FstCU()), current_row,
row_result_d.data().get()));
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
EXPECT_EQ(row, row_result);
@@ -203,8 +204,7 @@ TEST(EllpackPage, Compact) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(0, page->Cuts(), page->is_dense, page->row_stride,
kCompactedRows);
EllpackPageImpl result(FstCU(), page->Cuts(), page->is_dense, page->row_stride, kCompactedRows);
// Compact batch pages into the result page.
std::vector<size_t> row_indexes_h {
@@ -213,7 +213,7 @@ TEST(EllpackPage, Compact) {
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
result.Compact(0, batch.Impl(), row_indexes_span);
result.Compact(FstCU(), batch.Impl(), row_indexes_span);
}
size_t current_row = 0;
@@ -232,7 +232,7 @@ TEST(EllpackPage, Compact) {
continue;
}
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0),
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(FstCU()),
current_row, row_d.data().get()));
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaDeviceSynchronize());
@@ -242,7 +242,7 @@ TEST(EllpackPage, Compact) {
thrust::copy(row_d.begin(), row_d.end(), row.begin());
dh::LaunchN(kCols,
ReadRowFunction(result.GetDeviceAccessor(0), compacted_row,
ReadRowFunction(result.GetDeviceAccessor(FstCU()), compacted_row,
row_result_d.data().get()));
thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());

View File

@@ -30,7 +30,7 @@ namespace xgboost::data {
TEST(GradientIndex, ExternalMemoryBaseRowID) {
Context ctx;
auto p_fmat = RandomDataGenerator{4096, 256, 0.5}
.Device(ctx.gpu_id)
.Device(ctx.Device())
.Batches(8)
.GenerateSparsePageDMatrix("cache", true);

View File

@@ -16,9 +16,7 @@
#include "../helpers.h"
#include "test_iterative_dmatrix.h"
namespace xgboost {
namespace data {
namespace xgboost::data {
void TestEquivalent(float sparsity) {
Context ctx{MakeCUDACtx(0)};
@@ -28,14 +26,14 @@ void TestEquivalent(float sparsity) {
std::size_t offset = 0;
auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
std::unique_ptr<EllpackPageImpl> page_concatenated {
new EllpackPageImpl(0, first->Cuts(), first->is_dense,
new EllpackPageImpl(ctx.Device(), first->Cuts(), first->is_dense,
first->row_stride, 1000 * 100)};
for (auto& batch : m.GetBatches<EllpackPage>(&ctx, {})) {
auto page = batch.Impl();
size_t num_elements = page_concatenated->Copy(0, page, offset);
size_t num_elements = page_concatenated->Copy(ctx.Device(), page, offset);
offset += num_elements;
}
auto from_iter = page_concatenated->GetDeviceAccessor(0);
auto from_iter = page_concatenated->GetDeviceAccessor(ctx.Device());
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());
@@ -45,7 +43,7 @@ void TestEquivalent(float sparsity) {
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
auto from_data = ellpack.Impl()->GetDeviceAccessor(ctx.Device());
std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
std::vector<float> min_fvalues_iter(from_iter.min_fvalue.size());
@@ -157,10 +155,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator(
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(ctx.Device()).NullValue());
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(ctx.Device()).NullValue());
// null values get placed after valid values in a row
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(ctx.Device()).NullValue());
EXPECT_EQ(m.Info().num_col_, cols);
EXPECT_EQ(m.Info().num_row_, rows);
EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
@@ -188,5 +186,4 @@ TEST(IterativeDeviceDMatrix, Ref) {
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
&ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -12,6 +12,7 @@
#include "../helpers.h"
#include "xgboost/base.h"
namespace xgboost {
TEST(MetaInfo, GetSet) {
xgboost::Context ctx;
xgboost::MetaInfo info;
@@ -73,6 +74,49 @@ TEST(MetaInfo, GetSetFeature) {
// Other conditions are tested in `SaveLoadBinary`.
}
namespace {
void VerifyGetSetFeatureColumnSplit() {
xgboost::MetaInfo info;
info.data_split_mode = DataSplitMode::kCol;
auto const world_size = collective::GetWorldSize();
auto constexpr kCols{2};
std::vector<std::string> types{u8"float", u8"c"};
std::vector<char const *> c_types(kCols);
std::transform(types.cbegin(), types.cend(), c_types.begin(),
[](auto const &str) { return str.c_str(); });
info.num_col_ = kCols;
EXPECT_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()), dmlc::Error);
info.num_col_ = kCols * world_size;
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_type", c_types.data(), c_types.size()));
std::vector<std::string> expected_type_names{u8"float", u8"c", u8"float",
u8"c", u8"float", u8"c"};
EXPECT_EQ(info.feature_type_names, expected_type_names);
std::vector<xgboost::FeatureType> expected_types{
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical,
xgboost::FeatureType::kNumerical, xgboost::FeatureType::kCategorical};
EXPECT_EQ(info.feature_types.HostVector(), expected_types);
std::vector<std::string> names{u8"feature0", u8"feature1"};
std::vector<char const *> c_names(kCols);
std::transform(names.cbegin(), names.cend(), c_names.begin(),
[](auto const &str) { return str.c_str(); });
info.num_col_ = kCols;
EXPECT_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()), dmlc::Error);
info.num_col_ = kCols * world_size;
EXPECT_NO_THROW(info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size()));
std::vector<std::string> expected_names{u8"0.feature0", u8"0.feature1", u8"1.feature0",
u8"1.feature1", u8"2.feature0", u8"2.feature1"};
EXPECT_EQ(info.feature_names, expected_names);
}
} // anonymous namespace
TEST(MetaInfo, GetSetFeatureColumnSplit) {
auto constexpr kWorldSize{3};
RunWithInMemoryCommunicator(kWorldSize, VerifyGetSetFeatureColumnSplit);
}
TEST(MetaInfo, SaveLoadBinary) {
xgboost::MetaInfo info;
xgboost::Context ctx;
@@ -236,9 +280,9 @@ TEST(MetaInfo, Validate) {
info.num_nonzero_ = 12;
info.num_col_ = 3;
std::vector<xgboost::bst_group_t> groups (11);
xgboost::Context ctx;
Context ctx;
info.SetInfo(ctx, "group", groups.data(), xgboost::DataType::kUInt32, 11);
EXPECT_THROW(info.Validate(0), dmlc::Error);
EXPECT_THROW(info.Validate(FstCU()), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1);
EXPECT_THROW(
@@ -261,11 +305,11 @@ TEST(MetaInfo, Validate) {
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo(ctx, "label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.labels.SetDevice(0);
EXPECT_THROW(info.Validate(1), dmlc::Error);
info.labels.SetDevice(FstCU());
EXPECT_THROW(info.Validate(DeviceOrd::CUDA(1)), dmlc::Error);
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
d_groups.SetDevice(0);
d_groups.SetDevice(FstCU());
d_groups.DevicePointer(); // pull to device
std::string arr_interface_str{ArrayInterfaceStr(xgboost::linalg::MakeVec(
d_groups.ConstDevicePointer(), d_groups.Size(), xgboost::DeviceOrd::CUDA(0)))};
@@ -306,6 +350,5 @@ TEST(MetaInfo, HostExtend) {
}
}
namespace xgboost {
TEST(MetaInfo, CPUStridedData) { TestMetaInfoStridedData(DeviceOrd::CPU()); }
} // namespace xgboost

View File

@@ -1,31 +1,27 @@
/*!
* Copyright 2021 XGBoost contributors
/**
* Copyright 2021-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../helpers.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../../../src/data/adapter.h"
namespace xgboost {
namespace data {
#include "../../../src/data/adapter.h"
#include "../../../src/data/proxy_dmatrix.h"
#include "../helpers.h"
namespace xgboost::data {
TEST(ProxyDMatrix, HostData) {
DMatrixProxy proxy;
size_t constexpr kRows = 100, kCols = 10;
std::vector<HostDeviceVector<float>> label_storage(1);
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5)
.Device(0)
.GenerateArrayInterface(&storage);
auto data =
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
proxy.SetArrayData(data.c_str());
auto n_samples = HostAdapterDispatch(
&proxy, [](auto const &value) { return value.Size(); });
auto n_samples = HostAdapterDispatch(&proxy, [](auto const &value) { return value.Size(); });
ASSERT_EQ(n_samples, kRows);
auto n_features = HostAdapterDispatch(
&proxy, [](auto const &value) { return value.NumCols(); });
auto n_features = HostAdapterDispatch(&proxy, [](auto const &value) { return value.NumCols(); });
ASSERT_EQ(n_features, kCols);
}
} // namespace data
} // namespace xgboost
} // namespace xgboost::data

View File

@@ -19,10 +19,12 @@ namespace xgboost::data {
TEST(ProxyDMatrix, DeviceData) {
constexpr size_t kRows{100}, kCols{100};
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5).Device(0).GenerateArrayInterface(&storage);
auto data =
RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
std::vector<HostDeviceVector<float>> label_storage(1);
auto labels =
RandomDataGenerator(kRows, 1, 0).Device(0).GenerateColumnarArrayInterface(&label_storage);
auto labels = RandomDataGenerator(kRows, 1, 0)
.Device(FstCU())
.GenerateColumnarArrayInterface(&label_storage);
DMatrixProxy proxy;
proxy.SetCUDAArray(data.c_str());
@@ -35,7 +37,7 @@ TEST(ProxyDMatrix, DeviceData) {
std::vector<HostDeviceVector<float>> columnar_storage(kCols);
data = RandomDataGenerator(kRows, kCols, 0)
.Device(0)
.Device(FstCU())
.GenerateColumnarArrayInterface(&columnar_storage);
proxy.SetCUDAArray(data.c_str());
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));

View File

@@ -268,7 +268,7 @@ TEST(SimpleDMatrix, Slice) {
std::iota(upper.begin(), upper.end(), 1.0f);
auto& margin = p_m->Info().base_margin_;
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
std::array<int32_t, 3> ridxs {1, 3, 5};
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
@@ -341,7 +341,7 @@ TEST(SimpleDMatrix, SliceCol) {
std::iota(upper.begin(), upper.end(), 1.0f);
auto& margin = p_m->Info().base_margin_;
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, Context::kCpuId};
margin = decltype(p_m->Info().base_margin_){{kRows, kClasses}, DeviceOrd::CPU()};
auto constexpr kSlices {2};
auto constexpr kSliceSize {4};
@@ -428,3 +428,21 @@ TEST(SimpleDMatrix, Threads) {
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0, "")};
ASSERT_EQ(p_fmat->Ctx()->Threads(), AllThreadsForTest());
}
namespace {
void VerifyColumnSplit() {
size_t constexpr kRows {16};
size_t constexpr kCols {8};
auto dmat =
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(false, false, 1, DataSplitMode::kCol);
ASSERT_EQ(dmat->Info().num_col_, kCols * collective::GetWorldSize());
ASSERT_EQ(dmat->Info().num_row_, kRows);
ASSERT_EQ(dmat->Info().data_split_mode, DataSplitMode::kCol);
}
} // anonymous namespace
TEST(SimpleDMatrix, ColumnSplit) {
auto constexpr kWorldSize{3};
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit);
}

View File

@@ -138,11 +138,11 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
size_t offset = 0;
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
if (!impl_ext) {
impl_ext.reset(new EllpackPageImpl(
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
batch.Impl()->is_dense, batch.Impl()->row_stride, kRows));
impl_ext = std::make_unique<EllpackPageImpl>(batch.Impl()->gidx_buffer.Device(),
batch.Impl()->Cuts(), batch.Impl()->is_dense,
batch.Impl()->row_stride, kRows);
}
auto n_elems = impl_ext->Copy(0, batch.Impl(), offset);
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
offset += n_elems;
}
EXPECT_EQ(impl_ext->base_rowid, 0);
@@ -202,10 +202,12 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
EXPECT_EQ(impl_ext->base_rowid, current_row);
for (size_t i = 0; i < impl_ext->Size(); i++) {
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(0), current_row, row_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(impl->GetDeviceAccessor(ctx.Device()), current_row,
row_d.data().get()));
thrust::copy(row_d.begin(), row_d.end(), row.begin());
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(0), current_row, row_ext_d.data().get()));
dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(ctx.Device()), current_row,
row_ext_d.data().get()));
thrust::copy(row_ext_d.begin(), row_ext_d.end(), row_ext.begin());
EXPECT_EQ(row, row_ext);