Use context in SetInfo. (#7687)
* Use the name `Context`. * Pass a context object into `SetInfo`. * Add context to proxy matrix. * Add context to iterative DMatrix. This is to remove the use of the default number of threads during `SetInfo` as a follow-up on removing the global omp variable while preparing for CUDA stream semantic. Currently, XGBoost uses the legacy CUDA stream, we will gradually remove them in the future in favor of non-blocking streams.
This commit is contained in:
@@ -485,35 +485,30 @@ XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle, const char* fname,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
|
||||
const char* field,
|
||||
const bst_float* info,
|
||||
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const bst_float *info,
|
||||
xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)
|
||||
->get()->Info().SetInfo(field, info, xgboost::DataType::kFloat32, len);
|
||||
auto const& p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
p_fmat->SetInfo(field, info, xgboost::DataType::kFloat32, len);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
|
||||
char const* field,
|
||||
char const* interface_c_str) {
|
||||
XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, char const *field,
|
||||
char const *interface_c_str) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)
|
||||
->get()->Info().SetInfo(field, interface_c_str);
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
p_fmat->SetInfo(field, interface_c_str);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
|
||||
const char* field,
|
||||
const unsigned* info,
|
||||
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *info,
|
||||
xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)
|
||||
->get()->Info().SetInfo(field, info, xgboost::DataType::kUInt32, len);
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
p_fmat->SetInfo(field, info, xgboost::DataType::kUInt32, len);
|
||||
API_END();
|
||||
}
|
||||
|
||||
@@ -549,25 +544,22 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
|
||||
void const *data, xgboost::bst_ulong size,
|
||||
int type) {
|
||||
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
|
||||
xgboost::bst_ulong size, int type) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto &info = static_cast<std::shared_ptr<DMatrix> *>(handle)->get()->Info();
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
CHECK(type >= 1 && type <= 4);
|
||||
info.SetInfo(field, data, static_cast<DataType>(type), size);
|
||||
p_fmat->SetInfo(field, data, static_cast<DataType>(type), size);
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
|
||||
const unsigned* group,
|
||||
xgboost::bst_ulong len) {
|
||||
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
LOG(WARNING) << "XGDMatrixSetGroup is deprecated, use `XGDMatrixSetUIntInfo` instead.";
|
||||
static_cast<std::shared_ptr<DMatrix>*>(handle)
|
||||
->get()->Info().SetInfo("group", group, xgboost::DataType::kUInt32, len);
|
||||
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
|
||||
p_fmat->SetInfo("group", group, xgboost::DataType::kUInt32, len);
|
||||
API_END();
|
||||
}
|
||||
|
||||
|
||||
@@ -409,7 +409,7 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
|
||||
|
||||
namespace {
|
||||
template <int32_t D, typename T>
|
||||
void CopyTensorInfoImpl(Json arr_interface, linalg::Tensor<T, D>* p_out) {
|
||||
void CopyTensorInfoImpl(Context const& ctx, Json arr_interface, linalg::Tensor<T, D>* p_out) {
|
||||
ArrayInterface<D> array{arr_interface};
|
||||
if (array.n == 0) {
|
||||
p_out->Reshape(array.shape);
|
||||
@@ -428,16 +428,15 @@ void CopyTensorInfoImpl(Json arr_interface, linalg::Tensor<T, D>* p_out) {
|
||||
return;
|
||||
}
|
||||
p_out->Reshape(array.shape);
|
||||
auto t = p_out->View(GenericParameter::kCpuId);
|
||||
auto t = p_out->View(Context::kCpuId);
|
||||
CHECK(t.CContiguous());
|
||||
// FIXME(jiamingy): Remove the use of this default thread.
|
||||
linalg::ElementWiseTransformHost(t, common::OmpGetNumThreads(0), [&](auto i, auto) {
|
||||
linalg::ElementWiseTransformHost(t, ctx.Threads(), [&](auto i, auto) {
|
||||
return linalg::detail::Apply(TypedIndex<T, D>{array}, linalg::UnravelIndex<D>(i, t.Shape()));
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void MetaInfo::SetInfo(StringView key, StringView interface_str) {
|
||||
void MetaInfo::SetInfo(Context const& ctx, StringView key, StringView interface_str) {
|
||||
Json j_interface = Json::Load(interface_str);
|
||||
bool is_cuda{false};
|
||||
if (IsA<Array>(j_interface)) {
|
||||
@@ -454,16 +453,16 @@ void MetaInfo::SetInfo(StringView key, StringView interface_str) {
|
||||
}
|
||||
|
||||
if (is_cuda) {
|
||||
this->SetInfoFromCUDA(key, j_interface);
|
||||
this->SetInfoFromCUDA(ctx, key, j_interface);
|
||||
} else {
|
||||
this->SetInfoFromHost(key, j_interface);
|
||||
this->SetInfoFromHost(ctx, key, j_interface);
|
||||
}
|
||||
}
|
||||
|
||||
void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
|
||||
void MetaInfo::SetInfoFromHost(Context const& ctx, StringView key, Json arr) {
|
||||
// multi-dim float info
|
||||
if (key == "base_margin") {
|
||||
CopyTensorInfoImpl(arr, &this->base_margin_);
|
||||
CopyTensorInfoImpl(ctx, arr, &this->base_margin_);
|
||||
// FIXME(jiamingy): Remove the deprecated API and let all language bindings aware of
|
||||
// input shape. This issue is CPU only since CUDA uses array interface from day 1.
|
||||
//
|
||||
@@ -477,7 +476,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
|
||||
}
|
||||
return;
|
||||
} else if (key == "label") {
|
||||
CopyTensorInfoImpl(arr, &this->labels);
|
||||
CopyTensorInfoImpl(ctx, arr, &this->labels);
|
||||
if (this->num_row_ != 0 && this->labels.Shape(0) != this->num_row_) {
|
||||
CHECK_EQ(this->labels.Size() % this->num_row_, 0) << "Incorrect size for labels.";
|
||||
size_t n_targets = this->labels.Size() / this->num_row_;
|
||||
@@ -491,7 +490,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
|
||||
// uint info
|
||||
if (key == "group") {
|
||||
linalg::Tensor<bst_group_t, 1> t;
|
||||
CopyTensorInfoImpl(arr, &t);
|
||||
CopyTensorInfoImpl(ctx, arr, &t);
|
||||
auto const& h_groups = t.Data()->HostVector();
|
||||
group_ptr_.clear();
|
||||
group_ptr_.resize(h_groups.size() + 1, 0);
|
||||
@@ -501,7 +500,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
|
||||
return;
|
||||
} else if (key == "qid") {
|
||||
linalg::Tensor<bst_group_t, 1> t;
|
||||
CopyTensorInfoImpl(arr, &t);
|
||||
CopyTensorInfoImpl(ctx, arr, &t);
|
||||
bool non_dec = true;
|
||||
auto const& query_ids = t.Data()->HostVector();
|
||||
for (size_t i = 1; i < query_ids.size(); ++i) {
|
||||
@@ -526,7 +525,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
|
||||
}
|
||||
// float info
|
||||
linalg::Tensor<float, 1> t;
|
||||
CopyTensorInfoImpl<1>(arr, &t);
|
||||
CopyTensorInfoImpl<1>(ctx, arr, &t);
|
||||
if (key == "weight") {
|
||||
this->weights_ = std::move(*t.Data());
|
||||
auto const& h_weights = this->weights_.ConstHostVector();
|
||||
@@ -548,13 +547,15 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
|
||||
}
|
||||
}
|
||||
|
||||
void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
|
||||
void MetaInfo::SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype,
|
||||
size_t num) {
|
||||
auto proc = [&](auto cast_d_ptr) {
|
||||
using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
|
||||
auto t =
|
||||
linalg::TensorView<T, 1>(common::Span<T>{cast_d_ptr, num}, {num}, GenericParameter::kCpuId);
|
||||
auto t = linalg::TensorView<T, 1>(common::Span<T>{cast_d_ptr, num}, {num}, Context::kCpuId);
|
||||
CHECK(t.CContiguous());
|
||||
Json interface { linalg::ArrayInterface(t) };
|
||||
Json interface {
|
||||
linalg::ArrayInterface(t)
|
||||
};
|
||||
assert(ArrayInterface<1>{interface}.is_contiguous);
|
||||
return interface;
|
||||
};
|
||||
@@ -562,22 +563,22 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
|
||||
switch (dtype) {
|
||||
case xgboost::DataType::kFloat32: {
|
||||
auto cast_ptr = reinterpret_cast<const float*>(dptr);
|
||||
this->SetInfoFromHost(key, proc(cast_ptr));
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kDouble: {
|
||||
auto cast_ptr = reinterpret_cast<const double*>(dptr);
|
||||
this->SetInfoFromHost(key, proc(cast_ptr));
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kUInt32: {
|
||||
auto cast_ptr = reinterpret_cast<const uint32_t*>(dptr);
|
||||
this->SetInfoFromHost(key, proc(cast_ptr));
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
case xgboost::DataType::kUInt64: {
|
||||
auto cast_ptr = reinterpret_cast<const uint64_t*>(dptr);
|
||||
this->SetInfoFromHost(key, proc(cast_ptr));
|
||||
this->SetInfoFromHost(ctx, key, proc(cast_ptr));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -724,9 +725,7 @@ void MetaInfo::Validate(int32_t device) const {
|
||||
"doesn't equal to actual number of rows given by data.";
|
||||
}
|
||||
auto check_device = [device](HostDeviceVector<float> const& v) {
|
||||
CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
|
||||
device == GenericParameter::kCpuId ||
|
||||
v.DeviceIdx() == device)
|
||||
CHECK(v.DeviceIdx() == Context::kCpuId || device == Context::kCpuId || v.DeviceIdx() == device)
|
||||
<< "Data is resided on a different device than `gpu_id`. "
|
||||
<< "Device that data is on: " << v.DeviceIdx() << ", "
|
||||
<< "`gpu_id` for XGBoost: " << device;
|
||||
@@ -769,7 +768,9 @@ void MetaInfo::Validate(int32_t device) const {
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void MetaInfo::SetInfoFromCUDA(StringView key, Json arr) { common::AssertGPUSupport(); }
|
||||
void MetaInfo::SetInfoFromCUDA(Context const& ctx, StringView key, Json arr) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
using DMatrixThreadLocal =
|
||||
|
||||
@@ -115,7 +115,8 @@ void CopyQidImpl(ArrayInterface<1> array_interface, std::vector<bst_group_t>* p_
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
|
||||
// Context is not used until we have CUDA stream.
|
||||
void MetaInfo::SetInfoFromCUDA(Context const&, StringView key, Json array) {
|
||||
// multi-dim float info
|
||||
if (key == "base_margin") {
|
||||
CopyTensorInfoImpl(array, &base_margin_);
|
||||
|
||||
@@ -43,18 +43,18 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
|
||||
size_t batches = 0;
|
||||
size_t accumulated_rows = 0;
|
||||
bst_feature_t cols = 0;
|
||||
int32_t device = GenericParameter::kCpuId;
|
||||
|
||||
int32_t current_device;
|
||||
dh::safe_cuda(cudaGetDevice(¤t_device));
|
||||
auto get_device = [&]() -> int32_t {
|
||||
int32_t d = (device == GenericParameter::kCpuId) ? current_device : device;
|
||||
CHECK_NE(d, GenericParameter::kCpuId);
|
||||
int32_t d = (ctx_.gpu_id == Context::kCpuId) ? current_device : ctx_.gpu_id;
|
||||
CHECK_NE(d, Context::kCpuId);
|
||||
return d;
|
||||
};
|
||||
|
||||
while (iter.Next()) {
|
||||
device = proxy->DeviceIdx();
|
||||
CHECK_LT(device, common::AllVisibleGPUs());
|
||||
ctx_.gpu_id = proxy->DeviceIdx();
|
||||
CHECK_LT(ctx_.gpu_id, common::AllVisibleGPUs());
|
||||
dh::safe_cuda(cudaSetDevice(get_device()));
|
||||
if (cols == 0) {
|
||||
cols = num_cols();
|
||||
|
||||
@@ -21,6 +21,7 @@ namespace data {
|
||||
|
||||
class IterativeDeviceDMatrix : public DMatrix {
|
||||
MetaInfo info_;
|
||||
Context ctx_;
|
||||
BatchParam batch_param_;
|
||||
std::shared_ptr<EllpackPage> page_;
|
||||
|
||||
@@ -72,10 +73,7 @@ class IterativeDeviceDMatrix : public DMatrix {
|
||||
MetaInfo &Info() override { return info_; }
|
||||
MetaInfo const &Info() const override { return info_; }
|
||||
|
||||
GenericParameter const *Ctx() const override {
|
||||
LOG(FATAL) << "`IterativeDMatrix` doesn't have context.";
|
||||
return nullptr;
|
||||
}
|
||||
Context const *Ctx() const override { return &ctx_; }
|
||||
};
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020 XGBoost contributors
|
||||
* Copyright 2020-2022, XGBoost contributors
|
||||
*/
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "device_adapter.cuh"
|
||||
@@ -11,10 +11,10 @@ void DMatrixProxy::FromCudaColumnar(std::string interface_str) {
|
||||
std::shared_ptr<data::CudfAdapter> adapter {new data::CudfAdapter(interface_str)};
|
||||
auto const& value = adapter->Value();
|
||||
this->batch_ = adapter;
|
||||
device_ = adapter->DeviceIdx();
|
||||
ctx_.gpu_id = adapter->DeviceIdx();
|
||||
this->Info().num_col_ = adapter->NumColumns();
|
||||
this->Info().num_row_ = adapter->NumRows();
|
||||
if (device_ < 0) {
|
||||
if (ctx_.gpu_id < 0) {
|
||||
CHECK_EQ(this->Info().num_row_, 0);
|
||||
}
|
||||
}
|
||||
@@ -22,13 +22,12 @@ void DMatrixProxy::FromCudaColumnar(std::string interface_str) {
|
||||
void DMatrixProxy::FromCudaArray(std::string interface_str) {
|
||||
std::shared_ptr<CupyAdapter> adapter(new CupyAdapter(interface_str));
|
||||
this->batch_ = adapter;
|
||||
device_ = adapter->DeviceIdx();
|
||||
ctx_.gpu_id = adapter->DeviceIdx();
|
||||
this->Info().num_col_ = adapter->NumColumns();
|
||||
this->Info().num_row_ = adapter->NumRows();
|
||||
if (device_ < 0) {
|
||||
if (ctx_.gpu_id < 0) {
|
||||
CHECK_EQ(this->Info().num_row_, 0);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020-2021 XGBoost contributors
|
||||
* Copyright 2020-2022, XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_DATA_PROXY_DMATRIX_H_
|
||||
#define XGBOOST_DATA_PROXY_DMATRIX_H_
|
||||
@@ -45,7 +45,7 @@ class DataIterProxy {
|
||||
class DMatrixProxy : public DMatrix {
|
||||
MetaInfo info_;
|
||||
dmlc::any batch_;
|
||||
int32_t device_ { xgboost::GenericParameter::kCpuId };
|
||||
Context ctx_;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
void FromCudaColumnar(std::string interface_str);
|
||||
@@ -53,7 +53,7 @@ class DMatrixProxy : public DMatrix {
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
public:
|
||||
int DeviceIdx() const { return device_; }
|
||||
int DeviceIdx() const { return ctx_.gpu_id; }
|
||||
|
||||
void SetData(char const* c_interface) {
|
||||
common::AssertGPUSupport();
|
||||
@@ -67,7 +67,7 @@ class DMatrixProxy : public DMatrix {
|
||||
this->FromCudaArray(interface_str);
|
||||
}
|
||||
if (this->info_.num_row_ == 0) {
|
||||
this->device_ = GenericParameter::kCpuId;
|
||||
this->ctx_.gpu_id = Context::kCpuId;
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
@@ -79,10 +79,7 @@ class DMatrixProxy : public DMatrix {
|
||||
|
||||
MetaInfo& Info() override { return info_; }
|
||||
MetaInfo const& Info() const override { return info_; }
|
||||
GenericParameter const* Ctx() const override {
|
||||
LOG(FATAL) << "`ProxyDMatrix` doesn't have context.";
|
||||
return nullptr;
|
||||
}
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
|
||||
bool SingleColBlock() const override { return true; }
|
||||
bool EllpackExists() const override { return true; }
|
||||
|
||||
@@ -149,10 +149,8 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||
weights.insert(weights.end(), batch.Weights(), batch.Weights() + batch.Size());
|
||||
}
|
||||
if (batch.BaseMargin() != nullptr) {
|
||||
info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(),
|
||||
batch.BaseMargin() + batch.Size(),
|
||||
{batch.Size()},
|
||||
GenericParameter::kCpuId};
|
||||
info_.base_margin_ = decltype(info_.base_margin_){
|
||||
batch.BaseMargin(), batch.BaseMargin() + batch.Size(), {batch.Size()}, Context::kCpuId};
|
||||
}
|
||||
if (batch.Qid() != nullptr) {
|
||||
qids.insert(qids.end(), batch.Qid(), batch.Qid() + batch.Size());
|
||||
|
||||
@@ -31,7 +31,7 @@ class SimpleDMatrix : public DMatrix {
|
||||
|
||||
MetaInfo& Info() override;
|
||||
const MetaInfo& Info() const override;
|
||||
GenericParameter const* Ctx() const override { return &ctx_; }
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
|
||||
bool SingleColBlock() const override { return true; }
|
||||
DMatrix* Slice(common::Span<int32_t const> ridxs) override;
|
||||
@@ -63,7 +63,7 @@ class SimpleDMatrix : public DMatrix {
|
||||
}
|
||||
|
||||
private:
|
||||
GenericParameter ctx_;
|
||||
Context ctx_;
|
||||
};
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -69,7 +69,7 @@ class SparsePageDMatrix : public DMatrix {
|
||||
XGDMatrixCallbackNext *next_;
|
||||
|
||||
float missing_;
|
||||
GenericParameter ctx_;
|
||||
Context ctx_;
|
||||
std::string cache_prefix_;
|
||||
uint32_t n_batches_ {0};
|
||||
// sparse page is the source to other page types, we make a special member function.
|
||||
@@ -100,7 +100,7 @@ class SparsePageDMatrix : public DMatrix {
|
||||
|
||||
MetaInfo& Info() override;
|
||||
const MetaInfo& Info() const override;
|
||||
GenericParameter const* Ctx() const override { return &ctx_; }
|
||||
Context const* Ctx() const override { return &ctx_; }
|
||||
|
||||
bool SingleColBlock() const override { return false; }
|
||||
DMatrix *Slice(common::Span<int32_t const>) override {
|
||||
|
||||
Reference in New Issue
Block a user