Set device in device dmatrix. (#5596)

This commit is contained in:
Jiaming Yuan 2020-04-25 13:42:53 +08:00 committed by GitHub
parent ef26bc45bf
commit e726dd9902
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 41 additions and 5 deletions

View File

@ -102,7 +102,7 @@ class MetaInfo {
/*!
* \brief Validate all metainfo.
*/
void Validate() const;
void Validate(int32_t device) const;
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
/*!

View File

@ -338,7 +338,7 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
}
}
void MetaInfo::Validate() const {
void MetaInfo::Validate(int32_t device) const {
if (group_ptr_.size() != 0 && weights_.Size() != 0) {
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
<< "Size of weights must equal to number of groups when ranking "
@ -350,30 +350,44 @@ void MetaInfo::Validate() const {
<< "Invalid group structure. Number of rows obtained from groups "
"doesn't equal to actual number of rows given by data.";
}
auto check_device = [device](HostDeviceVector<float> const &v) {
CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
device == GenericParameter::kCpuId ||
v.DeviceIdx() == device)
<< "Data is resided on a different device than `gpu_id`. "
<< "Device that data is on: " << v.DeviceIdx() << ", "
<< "`gpu_id` for XGBoost: " << device;
};
if (weights_.Size() != 0) {
CHECK_EQ(weights_.Size(), num_row_)
<< "Size of weights must equal to number of rows.";
check_device(weights_);
return;
}
if (labels_.Size() != 0) {
CHECK_EQ(labels_.Size(), num_row_)
<< "Size of labels must equal to number of rows.";
check_device(labels_);
return;
}
if (labels_lower_bound_.Size() != 0) {
CHECK_EQ(labels_lower_bound_.Size(), num_row_)
<< "Size of label_lower_bound must equal to number of rows.";
check_device(labels_lower_bound_);
return;
}
if (labels_upper_bound_.Size() != 0) {
CHECK_EQ(labels_upper_bound_.Size(), num_row_)
<< "Size of label_upper_bound must equal to number of rows.";
check_device(labels_upper_bound_);
return;
}
CHECK_LE(num_nonzero_, num_col_ * num_row_);
if (base_margin_.Size() != 0) {
CHECK_EQ(base_margin_.Size() % num_row_, 0)
<< "Size of base margin must be a multiple of number of rows.";
check_device(base_margin_);
}
}

View File

@ -201,6 +201,7 @@ template <typename AdapterT>
DeviceDMatrix::DeviceDMatrix(AdapterT* adapter, float missing, int nthread, int max_bin) {
common::HistogramCuts cuts =
common::AdapterDeviceSketch(adapter, max_bin, missing);
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
auto& batch = adapter->Value();
// Work out how many valid entries we have in each row
dh::caching_device_vector<size_t> row_counts(adapter->NumRows() + 1, 0);

View File

@ -99,6 +99,7 @@ void CopyDataRowMajor(AdapterT* adapter, common::Span<Entry> data,
// be supported in future. Does not currently support inferring row/column size
template <typename AdapterT>
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
CHECK(adapter->NumRows() != kAdapterUnknownSize);
CHECK(adapter->NumColumns() != kAdapterUnknownSize);

View File

@ -1052,7 +1052,7 @@ class LearnerImpl : public LearnerIO {
void ValidateDMatrix(DMatrix* p_fmat) const {
MetaInfo const& info = p_fmat->Info();
info.Validate();
info.Validate(generic_parameters_.gpu_id);
auto const row_based_split = [this]() {
return tparam_.dsplit == DataSplitMode::kRow ||

View File

@ -149,9 +149,17 @@ TEST(MetaInfo, Validate) {
info.num_col_ = 3;
std::vector<xgboost::bst_group_t> groups (11);
info.SetInfo("group", groups.data(), xgboost::DataType::kUInt32, 11);
EXPECT_THROW(info.Validate(), dmlc::Error);
EXPECT_THROW(info.Validate(0), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
EXPECT_THROW(info.Validate(), dmlc::Error);
EXPECT_THROW(info.Validate(0), dmlc::Error);
#if defined(XGBOOST_USE_CUDA)
info.group_ptr_.clear();
labels.resize(info.num_row_);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.labels_.SetDevice(0);
EXPECT_THROW(info.Validate(1), dmlc::Error);
#endif // defined(XGBOOST_USE_CUDA)
}

View File

@ -136,3 +136,14 @@ Arrow specification.'''
n = 100
X = cp.random.random((n, 2))
xgb.DeviceQuantileDMatrix(X.toDlpack())
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_specified_device(self):
import cupy as cp
cp.cuda.runtime.setDevice(0)
dtrain = dmatrix_from_cupy(
np.float32, xgb.DeviceQuantileDMatrix, np.nan)
with pytest.raises(xgb.core.XGBoostError):
xgb.train({'tree_method': 'gpu_hist', 'gpu_id': 1},
dtrain, num_boost_round=10)

View File

@ -121,6 +121,7 @@ class TestGPUPredict(unittest.TestCase):
@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_cupy(self):
import cupy as cp
cp.cuda.runtime.setDevice(0)
rows = 1000
cols = 10
cp_rng = cp.random.RandomState(1994)