Use context in SetInfo. (#7687)

* Use the name `Context`.
* Pass a context object into `SetInfo`.
* Add context to proxy matrix.
* Add context to iterative DMatrix.

This is to remove the use of the default number of threads during `SetInfo` as a follow-up on
removing the global omp variable while preparing for CUDA stream semantic.  Currently, XGBoost
uses the legacy CUDA stream, we will gradually remove them in the future in favor of non-blocking streams.
This commit is contained in:
Jiaming Yuan
2022-03-24 22:16:26 +08:00
committed by GitHub
parent f5b20286e2
commit 64575591d8
19 changed files with 142 additions and 142 deletions

View File

@@ -485,35 +485,30 @@ XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, const char* fname,
API_END();
}
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
const char* field,
const bst_float* info,
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const bst_float *info,
xgboost::bst_ulong len) {
API_BEGIN();
CHECK_HANDLE();
static_cast<std::shared_ptr<DMatrix>*>(handle)
->get()->Info().SetInfo(field, info, xgboost::DataType::kFloat32, len);
auto const& p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, info, xgboost::DataType::kFloat32, len);
API_END();
}
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
char const* field,
char const* interface_c_str) {
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, char const *field,
char const *interface_c_str) {
API_BEGIN();
CHECK_HANDLE();
static_cast<std::shared_ptr<DMatrix>*>(handle)
->get()->Info().SetInfo(field, interface_c_str);
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, interface_c_str);
API_END();
}
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
const char* field,
const unsigned* info,
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *info,
xgboost::bst_ulong len) {
API_BEGIN();
CHECK_HANDLE();
static_cast<std::shared_ptr<DMatrix>*>(handle)
->get()->Info().SetInfo(field, info, xgboost::DataType::kUInt32, len);
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, info, xgboost::DataType::kUInt32, len);
API_END();
}
@@ -549,25 +544,22 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
API_END();
}
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
void const *data, xgboost::bst_ulong size,
int type) {
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
xgboost::bst_ulong size, int type) {
API_BEGIN();
CHECK_HANDLE();
auto &info = static_cast<std::shared_ptr<DMatrix> *>(handle)->get()->Info();
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
CHECK(type >= 1 && type <= 4);
info.SetInfo(field, data, static_cast<DataType>(type), size);
p_fmat->SetInfo(field, data, static_cast<DataType>(type), size);
API_END();
}
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned* group,
xgboost::bst_ulong len) {
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, xgboost::bst_ulong len) {
API_BEGIN();
CHECK_HANDLE();
LOG(WARNING) << "XGDMatrixSetGroup is deprecated, use `XGDMatrixSetUIntInfo` instead.";
static_cast<std::shared_ptr<DMatrix>*>(handle)
->get()->Info().SetInfo("group", group, xgboost::DataType::kUInt32, len);
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo("group", group, xgboost::DataType::kUInt32, len);
API_END();
}

View File

@@ -409,7 +409,7 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
namespace {
template <int32_t D, typename T>
void CopyTensorInfoImpl(Json arr_interface, linalg::Tensor<T, D>* p_out) {
void CopyTensorInfoImpl(Context const& ctx, Json arr_interface, linalg::Tensor<T, D>* p_out) {
ArrayInterface<D> array{arr_interface};
if (array.n == 0) {
p_out->Reshape(array.shape);
@@ -428,16 +428,15 @@ void CopyTensorInfoImpl(Json arr_interface, linalg::Tensor<T, D>* p_out) {
return;
}
p_out->Reshape(array.shape);
auto t = p_out->View(GenericParameter::kCpuId);
auto t = p_out->View(Context::kCpuId);
CHECK(t.CContiguous());
// FIXME(jiamingy): Remove the use of this default thread.
linalg::ElementWiseTransformHost(t, common::OmpGetNumThreads(0), [&](auto i, auto) {
linalg::ElementWiseTransformHost(t, ctx.Threads(), [&](auto i, auto) {
return linalg::detail::Apply(TypedIndex<T, D>{array}, linalg::UnravelIndex<D>(i, t.Shape()));
});
}
} // namespace
void MetaInfo::SetInfo(StringView key, StringView interface_str) {
void MetaInfo::SetInfo(Context const& ctx, StringView key, StringView interface_str) {
Json j_interface = Json::Load(interface_str);
bool is_cuda{false};
if (IsA<Array>(j_interface)) {
@@ -454,16 +453,16 @@ void MetaInfo::SetInfo(StringView key, StringView interface_str) {
}
if (is_cuda) {
this->SetInfoFromCUDA(key, j_interface);
this->SetInfoFromCUDA(ctx, key, j_interface);
} else {
this->SetInfoFromHost(key, j_interface);
this->SetInfoFromHost(ctx, key, j_interface);
}
}
void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
void MetaInfo::SetInfoFromHost(Context const& ctx, StringView key, Json arr) {
// multi-dim float info
if (key == "base_margin") {
CopyTensorInfoImpl(arr, &this->base_margin_);
CopyTensorInfoImpl(ctx, arr, &this->base_margin_);
// FIXME(jiamingy): Remove the deprecated API and let all language bindings aware of
// input shape. This issue is CPU only since CUDA uses array interface from day 1.
//
@@ -477,7 +476,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
}
return;
} else if (key == "label") {
CopyTensorInfoImpl(arr, &this->labels);
CopyTensorInfoImpl(ctx, arr, &this->labels);
if (this->num_row_ != 0 && this->labels.Shape(0) != this->num_row_) {
CHECK_EQ(this->labels.Size() % this->num_row_, 0) << "Incorrect size for labels.";
size_t n_targets = this->labels.Size() / this->num_row_;
@@ -491,7 +490,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
// uint info
if (key == "group") {
linalg::Tensor<bst_group_t, 1> t;
CopyTensorInfoImpl(arr, &t);
CopyTensorInfoImpl(ctx, arr, &t);
auto const& h_groups = t.Data()->HostVector();
group_ptr_.clear();
group_ptr_.resize(h_groups.size() + 1, 0);
@@ -501,7 +500,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
return;
} else if (key == "qid") {
linalg::Tensor<bst_group_t, 1> t;
CopyTensorInfoImpl(arr, &t);
CopyTensorInfoImpl(ctx, arr, &t);
bool non_dec = true;
auto const& query_ids = t.Data()->HostVector();
for (size_t i = 1; i < query_ids.size(); ++i) {
@@ -526,7 +525,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
}
// float info
linalg::Tensor<float, 1> t;
CopyTensorInfoImpl<1>(arr, &t);
CopyTensorInfoImpl<1>(ctx, arr, &t);
if (key == "weight") {
this->weights_ = std::move(*t.Data());
auto const& h_weights = this->weights_.ConstHostVector();
@@ -548,13 +547,15 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
}
}
void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
void MetaInfo::SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype,
size_t num) {
auto proc = [&](auto cast_d_ptr) {
using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
auto t =
linalg::TensorView<T, 1>(common::Span<T>{cast_d_ptr, num}, {num}, GenericParameter::kCpuId);
auto t = linalg::TensorView<T, 1>(common::Span<T>{cast_d_ptr, num}, {num}, Context::kCpuId);
CHECK(t.CContiguous());
Json interface { linalg::ArrayInterface(t) };
Json interface {
linalg::ArrayInterface(t)
};
assert(ArrayInterface<1>{interface}.is_contiguous);
return interface;
};
@@ -562,22 +563,22 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
switch (dtype) {
case xgboost::DataType::kFloat32: {
auto cast_ptr = reinterpret_cast<const float*>(dptr);
this->SetInfoFromHost(key, proc(cast_ptr));
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
case xgboost::DataType::kDouble: {
auto cast_ptr = reinterpret_cast<const double*>(dptr);
this->SetInfoFromHost(key, proc(cast_ptr));
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt32: {
auto cast_ptr = reinterpret_cast<const uint32_t*>(dptr);
this->SetInfoFromHost(key, proc(cast_ptr));
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt64: {
auto cast_ptr = reinterpret_cast<const uint64_t*>(dptr);
this->SetInfoFromHost(key, proc(cast_ptr));
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
break;
}
default:
@@ -724,9 +725,7 @@ void MetaInfo::Validate(int32_t device) const {
"doesn't equal to actual number of rows given by data.";
}
auto check_device = [device](HostDeviceVector<float> const& v) {
CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
device == GenericParameter::kCpuId ||
v.DeviceIdx() == device)
CHECK(v.DeviceIdx() == Context::kCpuId || device == Context::kCpuId || v.DeviceIdx() == device)
<< "Data is resided on a different device than `gpu_id`. "
<< "Device that data is on: " << v.DeviceIdx() << ", "
<< "`gpu_id` for XGBoost: " << device;
@@ -769,7 +768,9 @@ void MetaInfo::Validate(int32_t device) const {
}
#if !defined(XGBOOST_USE_CUDA)
void MetaInfo::SetInfoFromCUDA(StringView key, Json arr) { common::AssertGPUSupport(); }
void MetaInfo::SetInfoFromCUDA(Context const& ctx, StringView key, Json arr) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
using DMatrixThreadLocal =

View File

@@ -115,7 +115,8 @@ void CopyQidImpl(ArrayInterface<1> array_interface, std::vector<bst_group_t>* p_
}
} // namespace
void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
// Context is not used until we have CUDA stream.
void MetaInfo::SetInfoFromCUDA(Context const&, StringView key, Json array) {
// multi-dim float info
if (key == "base_margin") {
CopyTensorInfoImpl(array, &base_margin_);

View File

@@ -43,18 +43,18 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
size_t batches = 0;
size_t accumulated_rows = 0;
bst_feature_t cols = 0;
int32_t device = GenericParameter::kCpuId;
int32_t current_device;
dh::safe_cuda(cudaGetDevice(&current_device));
auto get_device = [&]() -> int32_t {
int32_t d = (device == GenericParameter::kCpuId) ? current_device : device;
CHECK_NE(d, GenericParameter::kCpuId);
int32_t d = (ctx_.gpu_id == Context::kCpuId) ? current_device : ctx_.gpu_id;
CHECK_NE(d, Context::kCpuId);
return d;
};
while (iter.Next()) {
device = proxy->DeviceIdx();
CHECK_LT(device, common::AllVisibleGPUs());
ctx_.gpu_id = proxy->DeviceIdx();
CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs());
dh::safe_cuda(cudaSetDevice(get_device()));
if (cols == 0) {
cols = num_cols();

View File

@@ -21,6 +21,7 @@ namespace data {
class IterativeDeviceDMatrix : public DMatrix {
MetaInfo info_;
Context ctx_;
BatchParam batch_param_;
std::shared_ptr<EllpackPage> page_;
@@ -72,10 +73,7 @@ class IterativeDeviceDMatrix : public DMatrix {
MetaInfo &Info() override { return info_; }
MetaInfo const &Info() const override { return info_; }
GenericParameter const *Ctx() const override {
LOG(FATAL) << "`IterativeDMatrix` doesn't have context.";
return nullptr;
}
Context const *Ctx() const override { return &ctx_; }
};
#if !defined(XGBOOST_USE_CUDA)

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2020 XGBoost contributors
* Copyright 2020-2022, XGBoost contributors
*/
#include "proxy_dmatrix.h"
#include "device_adapter.cuh"
@@ -11,10 +11,10 @@ void DMatrixProxy::FromCudaColumnar(std::string interface_str) {
std::shared_ptr<data::CudfAdapter> adapter {new data::CudfAdapter(interface_str)};
auto const& value = adapter->Value();
this->batch_ = adapter;
device_ = adapter->DeviceIdx();
ctx_.gpu_id = adapter->DeviceIdx();
this->Info().num_col_ = adapter->NumColumns();
this->Info().num_row_ = adapter->NumRows();
if (device_ < 0) {
if (ctx_.gpu_id < 0) {
CHECK_EQ(this->Info().num_row_, 0);
}
}
@@ -22,13 +22,12 @@ void DMatrixProxy::FromCudaColumnar(std::string interface_str) {
void DMatrixProxy::FromCudaArray(std::string interface_str) {
std::shared_ptr<CupyAdapter> adapter(new CupyAdapter(interface_str));
this->batch_ = adapter;
device_ = adapter->DeviceIdx();
ctx_.gpu_id = adapter->DeviceIdx();
this->Info().num_col_ = adapter->NumColumns();
this->Info().num_row_ = adapter->NumRows();
if (device_ < 0) {
if (ctx_.gpu_id < 0) {
CHECK_EQ(this->Info().num_row_, 0);
}
}
} // namespace data
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2020-2021 XGBoost contributors
* Copyright 2020-2022, XGBoost contributors
*/
#ifndef XGBOOST_DATA_PROXY_DMATRIX_H_
#define XGBOOST_DATA_PROXY_DMATRIX_H_
@@ -45,7 +45,7 @@ class DataIterProxy {
class DMatrixProxy : public DMatrix {
MetaInfo info_;
dmlc::any batch_;
int32_t device_ { xgboost::GenericParameter::kCpuId };
Context ctx_;
#if defined(XGBOOST_USE_CUDA)
void FromCudaColumnar(std::string interface_str);
@@ -53,7 +53,7 @@ class DMatrixProxy : public DMatrix {
#endif // defined(XGBOOST_USE_CUDA)
public:
int DeviceIdx() const { return device_; }
int DeviceIdx() const { return ctx_.gpu_id; }
void SetData(char const* c_interface) {
common::AssertGPUSupport();
@@ -67,7 +67,7 @@ class DMatrixProxy : public DMatrix {
this->FromCudaArray(interface_str);
}
if (this->info_.num_row_ == 0) {
this->device_ = GenericParameter::kCpuId;
this->ctx_.gpu_id = Context::kCpuId;
}
#endif // defined(XGBOOST_USE_CUDA)
}
@@ -79,10 +79,7 @@ class DMatrixProxy : public DMatrix {
MetaInfo& Info() override { return info_; }
MetaInfo const& Info() const override { return info_; }
GenericParameter const* Ctx() const override {
LOG(FATAL) << "`ProxyDMatrix` doesn't have context.";
return nullptr;
}
Context const* Ctx() const override { return &ctx_; }
bool SingleColBlock() const override { return true; }
bool EllpackExists() const override { return true; }

View File

@@ -149,10 +149,8 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
weights.insert(weights.end(), batch.Weights(), batch.Weights() + batch.Size());
}
if (batch.BaseMargin() != nullptr) {
info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(),
batch.BaseMargin() + batch.Size(),
{batch.Size()},
GenericParameter::kCpuId};
info_.base_margin_ = decltype(info_.base_margin_){
batch.BaseMargin(), batch.BaseMargin() + batch.Size(), {batch.Size()}, Context::kCpuId};
}
if (batch.Qid() != nullptr) {
qids.insert(qids.end(), batch.Qid(), batch.Qid() + batch.Size());

View File

@@ -31,7 +31,7 @@ class SimpleDMatrix : public DMatrix {
MetaInfo& Info() override;
const MetaInfo& Info() const override;
GenericParameter const* Ctx() const override { return &ctx_; }
Context const* Ctx() const override { return &ctx_; }
bool SingleColBlock() const override { return true; }
DMatrix* Slice(common::Span<int32_t const> ridxs) override;
@@ -63,7 +63,7 @@ class SimpleDMatrix : public DMatrix {
}
private:
GenericParameter ctx_;
Context ctx_;
};
} // namespace data
} // namespace xgboost

View File

@@ -69,7 +69,7 @@ class SparsePageDMatrix : public DMatrix {
XGDMatrixCallbackNext *next_;
float missing_;
GenericParameter ctx_;
Context ctx_;
std::string cache_prefix_;
uint32_t n_batches_ {0};
// sparse page is the source to other page types, we make a special member function.
@@ -100,7 +100,7 @@ class SparsePageDMatrix : public DMatrix {
MetaInfo& Info() override;
const MetaInfo& Info() const override;
GenericParameter const* Ctx() const override { return &ctx_; }
Context const* Ctx() const override { return &ctx_; }
bool SingleColBlock() const override { return false; }
DMatrix *Slice(common::Span<int32_t const>) override {