Convert labels into tensor. (#7456)

* Add a new ctor to tensor for `initilizer_list`.
* Change labels from host device vector to tensor.
* Rename the field from `labels_` to `labels` since it's a public member.
This commit is contained in:
Jiaming Yuan 2021-12-17 00:58:35 +08:00 committed by GitHub
parent 6f8a4633b7
commit 5b1161bb64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 319 additions and 258 deletions

View File

@ -56,7 +56,7 @@ class MetaInfo {
/*! \brief number of nonzero entries in the data */ /*! \brief number of nonzero entries in the data */
uint64_t num_nonzero_{0}; // NOLINT uint64_t num_nonzero_{0}; // NOLINT
/*! \brief label of each instance */ /*! \brief label of each instance */
HostDeviceVector<bst_float> labels_; // NOLINT linalg::Tensor<float, 2> labels;
/*! /*!
* \brief the index of begin and end of a group * \brief the index of begin and end of a group
* needed when the learning task is ranking. * needed when the learning task is ranking.
@ -119,12 +119,12 @@ class MetaInfo {
} }
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */ /*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
inline const std::vector<size_t>& LabelAbsSort() const { inline const std::vector<size_t>& LabelAbsSort() const {
if (label_order_cache_.size() == labels_.Size()) { if (label_order_cache_.size() == labels.Size()) {
return label_order_cache_; return label_order_cache_;
} }
label_order_cache_.resize(labels_.Size()); label_order_cache_.resize(labels.Size());
std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0); std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
const auto& l = labels_.HostVector(); const auto& l = labels.Data()->HostVector();
XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(), XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);}); [&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});

View File

@ -635,6 +635,20 @@ class Tensor {
HostDeviceVector<T> data_; HostDeviceVector<T> data_;
ShapeT shape_{0}; ShapeT shape_{0};
template <typename I, std::int32_t D>
void Initialize(I const (&shape)[D], std::int32_t device) {
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, shape_);
for (auto i = D; i < kDim; ++i) {
shape_[i] = 1;
}
if (device >= 0) {
data_.SetDevice(device);
data_.DevicePointer(); // Pull to device;
}
CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
}
public: public:
Tensor() = default; Tensor() = default;
@ -665,20 +679,20 @@ class Tensor {
*/ */
template <typename It, typename I, int32_t D> template <typename It, typename I, int32_t D>
explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) { explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) {
// shape
static_assert(D <= kDim, "Invalid shape.");
std::copy(shape, shape + D, shape_);
for (auto i = D; i < kDim; ++i) {
shape_[i] = 1;
}
auto &h_vec = data_.HostVector(); auto &h_vec = data_.HostVector();
h_vec.insert(h_vec.begin(), begin, end); h_vec.insert(h_vec.begin(), begin, end);
if (device >= 0) { // shape
data_.SetDevice(device); this->Initialize(shape, device);
data_.DevicePointer(); // Pull to device;
}
CHECK_EQ(data_.Size(), detail::CalcSize(shape_));
} }
template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], int32_t device) {
auto &h_vec = data_.HostVector();
h_vec = data;
// shape
this->Initialize(shape, device);
}
/** /**
* \brief Get a \ref TensorView for this tensor. * \brief Get a \ref TensorView for this tensor.
*/ */
@ -703,6 +717,9 @@ class Tensor {
} }
} }
auto HostView() const { return this->View(-1); }
auto HostView() { return this->View(-1); }
size_t Size() const { return data_.Size(); } size_t Size() const { return data_.Size(); }
auto Shape() const { return common::Span<size_t const, kDim>{shape_}; } auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
auto Shape(size_t i) const { return shape_[i]; } auto Shape(size_t i) const { return shape_[i]; }
@ -756,14 +773,15 @@ class Tensor {
/** /**
* \brief Set device ordinal for this tensor. * \brief Set device ordinal for this tensor.
*/ */
void SetDevice(int32_t device) { data_.SetDevice(device); } void SetDevice(int32_t device) const { data_.SetDevice(device); }
int32_t DeviceIdx() const { return data_.DeviceIdx(); }
}; };
// Only first axis is supported for now. // Only first axis is supported for now.
template <typename T, int32_t D> template <typename T, int32_t D>
void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) { void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
if (r.Data()->DeviceIdx() >= 0) { if (r.DeviceIdx() >= 0) {
l->Data()->SetDevice(r.Data()->DeviceIdx()); l->SetDevice(r.DeviceIdx());
} }
l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) { l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
for (size_t i = 1; i < D; ++i) { for (size_t i = 1; i < D; ++i) {

View File

@ -46,15 +46,15 @@ class MyLogistic : public ObjFunction {
out_gpair->Resize(preds.Size()); out_gpair->Resize(preds.Size());
const std::vector<bst_float>& preds_h = preds.HostVector(); const std::vector<bst_float>& preds_h = preds.HostVector();
std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector(); std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
const std::vector<bst_float>& labels_h = info.labels_.HostVector(); auto const labels_h = info.labels.HostView();
for (size_t i = 0; i < preds_h.size(); ++i) { for (size_t i = 0; i < preds_h.size(); ++i) {
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
// scale the negative examples! // scale the negative examples!
if (labels_h[i] == 0.0f) w *= param_.scale_neg_weight; if (labels_h(i) == 0.0f) w *= param_.scale_neg_weight;
// logistic transformation // logistic transformation
bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i])); bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
// this is the gradient // this is the gradient
bst_float grad = (p - labels_h[i]) * w; bst_float grad = (p - labels_h(i)) * w;
// this is the second order gradient // this is the second order gradient
bst_float hess = p * (1.0f - p) * w; bst_float hess = p * (1.0f - p) * w;
out_gpair_h.at(i) = GradientPair(grad, hess); out_gpair_h.at(i) = GradientPair(grad, hess);

View File

@ -956,11 +956,21 @@ thrust::device_ptr<T> tbegin(xgboost::common::Span<T>& span) { // NOLINT
return thrust::device_ptr<T>(span.data()); return thrust::device_ptr<T>(span.data());
} }
template <typename T>
thrust::device_ptr<T> tbegin(xgboost::common::Span<T> const& span) { // NOLINT
return thrust::device_ptr<T>(span.data());
}
template <typename T> template <typename T>
thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) { // NOLINT thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) { // NOLINT
return tbegin(span) + span.size(); return tbegin(span) + span.size();
} }
template <typename T>
thrust::device_ptr<T> tend(xgboost::common::Span<T> const& span) { // NOLINT
return tbegin(span) + span.size();
}
template <typename T> template <typename T>
XGBOOST_DEVICE auto trbegin(xgboost::common::Span<T> &span) { // NOLINT XGBOOST_DEVICE auto trbegin(xgboost::common::Span<T> &span) { // NOLINT
return thrust::make_reverse_iterator(span.data() + span.size()); return thrust::make_reverse_iterator(span.data() + span.size());

View File

@ -176,7 +176,7 @@ uint64_t constexpr MetaInfo::kNumField;
// implementation of inline functions // implementation of inline functions
void MetaInfo::Clear() { void MetaInfo::Clear() {
num_row_ = num_col_ = num_nonzero_ = 0; num_row_ = num_col_ = num_nonzero_ = 0;
labels_.HostVector().clear(); labels = decltype(labels){};
group_ptr_.clear(); group_ptr_.clear();
weights_.HostVector().clear(); weights_.HostVector().clear();
base_margin_ = decltype(base_margin_){}; base_margin_ = decltype(base_margin_){};
@ -213,8 +213,7 @@ void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
SaveScalarField(fo, u8"num_row", DataType::kUInt64, num_row_); ++field_cnt; SaveScalarField(fo, u8"num_row", DataType::kUInt64, num_row_); ++field_cnt;
SaveScalarField(fo, u8"num_col", DataType::kUInt64, num_col_); ++field_cnt; SaveScalarField(fo, u8"num_col", DataType::kUInt64, num_col_); ++field_cnt;
SaveScalarField(fo, u8"num_nonzero", DataType::kUInt64, num_nonzero_); ++field_cnt; SaveScalarField(fo, u8"num_nonzero", DataType::kUInt64, num_nonzero_); ++field_cnt;
SaveVectorField(fo, u8"labels", DataType::kFloat32, SaveTensorField(fo, u8"labels", DataType::kFloat32, labels); ++field_cnt;
{labels_.Size(), 1}, labels_); ++field_cnt;
SaveVectorField(fo, u8"group_ptr", DataType::kUInt32, SaveVectorField(fo, u8"group_ptr", DataType::kUInt32,
{group_ptr_.size(), 1}, group_ptr_); ++field_cnt; {group_ptr_.size(), 1}, group_ptr_); ++field_cnt;
SaveVectorField(fo, u8"weights", DataType::kFloat32, SaveVectorField(fo, u8"weights", DataType::kFloat32,
@ -291,7 +290,7 @@ void MetaInfo::LoadBinary(dmlc::Stream *fi) {
LoadScalarField(fi, u8"num_row", DataType::kUInt64, &num_row_); LoadScalarField(fi, u8"num_row", DataType::kUInt64, &num_row_);
LoadScalarField(fi, u8"num_col", DataType::kUInt64, &num_col_); LoadScalarField(fi, u8"num_col", DataType::kUInt64, &num_col_);
LoadScalarField(fi, u8"num_nonzero", DataType::kUInt64, &num_nonzero_); LoadScalarField(fi, u8"num_nonzero", DataType::kUInt64, &num_nonzero_);
LoadVectorField(fi, u8"labels", DataType::kFloat32, &labels_); LoadTensorField(fi, u8"labels", DataType::kFloat32, &labels);
LoadVectorField(fi, u8"group_ptr", DataType::kUInt32, &group_ptr_); LoadVectorField(fi, u8"group_ptr", DataType::kUInt32, &group_ptr_);
LoadVectorField(fi, u8"weights", DataType::kFloat32, &weights_); LoadVectorField(fi, u8"weights", DataType::kFloat32, &weights_);
LoadTensorField(fi, u8"base_margin", DataType::kFloat32, &base_margin_); LoadTensorField(fi, u8"base_margin", DataType::kFloat32, &base_margin_);
@ -326,7 +325,19 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
out.num_col_ = this->num_col_; out.num_col_ = this->num_col_;
// Groups is maintained by a higher level Python function. We should aim at deprecating // Groups is maintained by a higher level Python function. We should aim at deprecating
// the slice function. // the slice function.
out.labels_.HostVector() = Gather(this->labels_.HostVector(), ridxs); if (this->labels.Size() != this->num_row_) {
auto t_labels = this->labels.View(this->labels.Data()->DeviceIdx());
out.labels.Reshape(ridxs.size(), labels.Shape(1));
out.labels.Data()->HostVector() =
Gather(this->labels.Data()->HostVector(), ridxs, t_labels.Stride(0));
} else {
out.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
data->HostVector() = Gather(this->labels.Data()->HostVector(), ridxs);
shape[0] = data->Size();
shape[1] = 1;
});
}
out.labels_upper_bound_.HostVector() = out.labels_upper_bound_.HostVector() =
Gather(this->labels_upper_bound_.HostVector(), ridxs); Gather(this->labels_upper_bound_.HostVector(), ridxs);
out.labels_lower_bound_.HostVector() = out.labels_lower_bound_.HostVector() =
@ -343,13 +354,16 @@ MetaInfo MetaInfo::Slice(common::Span<int32_t const> ridxs) const {
if (this->base_margin_.Size() != this->num_row_) { if (this->base_margin_.Size() != this->num_row_) {
CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0) CHECK_EQ(this->base_margin_.Size() % this->num_row_, 0)
<< "Incorrect size of base margin vector."; << "Incorrect size of base margin vector.";
auto margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx()); auto t_margin = this->base_margin_.View(this->base_margin_.Data()->DeviceIdx());
out.base_margin_.Reshape(ridxs.size(), margin.Shape()[1]); out.base_margin_.Reshape(ridxs.size(), t_margin.Shape(1));
size_t stride = margin.Stride(0);
out.base_margin_.Data()->HostVector() = out.base_margin_.Data()->HostVector() =
Gather(this->base_margin_.Data()->HostVector(), ridxs, stride); Gather(this->base_margin_.Data()->HostVector(), ridxs, t_margin.Stride(0));
} else { } else {
out.base_margin_.Data()->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs); out.base_margin_.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
data->HostVector() = Gather(this->base_margin_.Data()->HostVector(), ridxs);
shape[0] = data->Size();
shape[1] = 1;
});
} }
out.feature_weights.Resize(this->feature_weights.Size()); out.feature_weights.Resize(this->feature_weights.Size());
@ -460,6 +474,17 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
this->base_margin_.Reshape(this->num_row_, n_groups); this->base_margin_.Reshape(this->num_row_, n_groups);
} }
return; return;
} else if (key == "label") {
CopyTensorInfoImpl(arr, &this->labels);
if (this->num_row_ != 0 && this->labels.Shape(0) != this->num_row_) {
CHECK_EQ(this->labels.Size() % this->num_row_, 0) << "Incorrect size for labels.";
size_t n_targets = this->labels.Size() / this->num_row_;
this->labels.Reshape(this->num_row_, n_targets);
}
auto const& h_labels = labels.Data()->ConstHostVector();
auto valid = std::none_of(h_labels.cbegin(), h_labels.cend(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
return;
} }
// uint info // uint info
if (key == "group") { if (key == "group") {
@ -500,12 +525,7 @@ void MetaInfo::SetInfoFromHost(StringView key, Json arr) {
// float info // float info
linalg::Tensor<float, 1> t; linalg::Tensor<float, 1> t;
CopyTensorInfoImpl<1>(arr, &t); CopyTensorInfoImpl<1>(arr, &t);
if (key == "label") { if (key == "weight") {
this->labels_ = std::move(*t.Data());
auto const& h_labels = labels_.ConstHostVector();
auto valid = std::none_of(h_labels.cbegin(), h_labels.cend(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
} else if (key == "weight") {
this->weights_ = std::move(*t.Data()); this->weights_ = std::move(*t.Data());
auto const& h_weights = this->weights_.ConstHostVector(); auto const& h_weights = this->weights_.ConstHostVector();
auto valid = std::none_of(h_weights.cbegin(), h_weights.cend(), auto valid = std::none_of(h_weights.cbegin(), h_weights.cend(),
@ -568,7 +588,7 @@ void MetaInfo::GetInfo(char const* key, bst_ulong* out_len, DataType dtype,
if (dtype == DataType::kFloat32) { if (dtype == DataType::kFloat32) {
const std::vector<bst_float>* vec = nullptr; const std::vector<bst_float>* vec = nullptr;
if (!std::strcmp(key, "label")) { if (!std::strcmp(key, "label")) {
vec = &this->labels_.HostVector(); vec = &this->labels.Data()->HostVector();
} else if (!std::strcmp(key, "weight")) { } else if (!std::strcmp(key, "weight")) {
vec = &this->weights_.HostVector(); vec = &this->weights_.HostVector();
} else if (!std::strcmp(key, "base_margin")) { } else if (!std::strcmp(key, "base_margin")) {
@ -649,8 +669,7 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
} }
this->num_col_ = that.num_col_; this->num_col_ = that.num_col_;
this->labels_.SetDevice(that.labels_.DeviceIdx()); linalg::Stack(&this->labels, that.labels);
this->labels_.Extend(that.labels_);
this->weights_.SetDevice(that.weights_.DeviceIdx()); this->weights_.SetDevice(that.weights_.DeviceIdx());
this->weights_.Extend(that.weights_); this->weights_.Extend(that.weights_);
@ -702,7 +721,7 @@ void MetaInfo::Validate(int32_t device) const {
<< "Invalid group structure. Number of rows obtained from groups " << "Invalid group structure. Number of rows obtained from groups "
"doesn't equal to actual number of rows given by data."; "doesn't equal to actual number of rows given by data.";
} }
auto check_device = [device](HostDeviceVector<float> const &v) { auto check_device = [device](HostDeviceVector<float> const& v) {
CHECK(v.DeviceIdx() == GenericParameter::kCpuId || CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
device == GenericParameter::kCpuId || device == GenericParameter::kCpuId ||
v.DeviceIdx() == device) v.DeviceIdx() == device)
@ -717,10 +736,10 @@ void MetaInfo::Validate(int32_t device) const {
check_device(weights_); check_device(weights_);
return; return;
} }
if (labels_.Size() != 0) { if (labels.Size() != 0) {
CHECK_EQ(labels_.Size(), num_row_) CHECK_EQ(labels.Size(), num_row_)
<< "Size of labels must equal to number of rows."; << "Size of labels must equal to number of rows.";
check_device(labels_); check_device(*labels.Data());
return; return;
} }
if (labels_lower_bound_.Size() != 0) { if (labels_lower_bound_.Size() != 0) {

View File

@ -119,6 +119,12 @@ void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
if (key == "base_margin") { if (key == "base_margin") {
CopyTensorInfoImpl(array, &base_margin_); CopyTensorInfoImpl(array, &base_margin_);
return; return;
} else if (key == "label") {
CopyTensorInfoImpl(array, &labels);
auto ptr = labels.Data()->ConstDevicePointer();
auto valid = thrust::none_of(thrust::device, ptr, ptr + labels.Size(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
return;
} }
// uint info // uint info
if (key == "group") { if (key == "group") {
@ -135,12 +141,7 @@ void MetaInfo::SetInfoFromCUDA(StringView key, Json array) {
// float info // float info
linalg::Tensor<float, 1> t; linalg::Tensor<float, 1> t;
CopyTensorInfoImpl(array, &t); CopyTensorInfoImpl(array, &t);
if (key == "label") { if (key == "weight") {
this->labels_ = std::move(*t.Data());
auto ptr = labels_.ConstDevicePointer();
auto valid = thrust::none_of(thrust::device, ptr, ptr + labels_.Size(), data::LabelsCheck{});
CHECK(valid) << "Label contains NaN, infinity or a value too large.";
} else if (key == "weight") {
this->weights_ = std::move(*t.Data()); this->weights_ = std::move(*t.Data());
auto ptr = weights_.ConstDevicePointer(); auto ptr = weights_.ConstDevicePointer();
auto valid = thrust::none_of(thrust::device, ptr, ptr + weights_.Size(), data::WeightsCheck{}); auto valid = thrust::none_of(thrust::device, ptr, ptr + weights_.Size(), data::WeightsCheck{});

View File

@ -153,7 +153,7 @@ void IterativeDeviceDMatrix::Initialize(DataIterHandle iter_handle, float missin
if (batches == 1) { if (batches == 1) {
this->info_ = std::move(proxy->Info()); this->info_ = std::move(proxy->Info());
this->info_.num_nonzero_ = nnz; this->info_.num_nonzero_ = nnz;
CHECK_EQ(proxy->Info().labels_.Size(), 0); CHECK_EQ(proxy->Info().labels.Size(), 0);
} }
iter.Reset(); iter.Reset();

View File

@ -127,14 +127,16 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
total_batch_size += batch.Size(); total_batch_size += batch.Size();
// Append meta information if available // Append meta information if available
if (batch.Labels() != nullptr) { if (batch.Labels() != nullptr) {
auto& labels = info_.labels_.HostVector(); info_.labels.ModifyInplace([&](auto* data, common::Span<size_t, 2> shape) {
labels.insert(labels.end(), batch.Labels(), shape[1] = 1;
batch.Labels() + batch.Size()); auto& labels = data->HostVector();
labels.insert(labels.end(), batch.Labels(), batch.Labels() + batch.Size());
shape[0] += batch.Size();
});
} }
if (batch.Weights() != nullptr) { if (batch.Weights() != nullptr) {
auto& weights = info_.weights_.HostVector(); auto& weights = info_.weights_.HostVector();
weights.insert(weights.end(), batch.Weights(), weights.insert(weights.end(), batch.Weights(), batch.Weights() + batch.Size());
batch.Weights() + batch.Size());
} }
if (batch.BaseMargin() != nullptr) { if (batch.BaseMargin() != nullptr) {
info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(), info_.base_margin_ = decltype(info_.base_margin_){batch.BaseMargin(),

View File

@ -32,17 +32,16 @@ namespace metric {
*/ */
template <typename Fn> template <typename Fn>
std::tuple<double, double, double> std::tuple<double, double, double>
BinaryAUC(common::Span<float const> predts, common::Span<float const> labels, BinaryAUC(common::Span<float const> predts, linalg::VectorView<float const> labels,
OptionalWeights weights, OptionalWeights weights,
std::vector<size_t> const &sorted_idx, Fn &&area_fn) { std::vector<size_t> const &sorted_idx, Fn &&area_fn) {
CHECK(!labels.empty()); CHECK_NE(labels.Size(), 0);
CHECK_EQ(labels.size(), predts.size()); CHECK_EQ(labels.Size(), predts.size());
auto p_predts = predts.data(); auto p_predts = predts.data();
auto p_labels = labels.data();
double auc{0}; double auc{0};
float label = p_labels[sorted_idx.front()]; float label = labels(sorted_idx.front());
float w = weights[sorted_idx[0]]; float w = weights[sorted_idx[0]];
double fp = (1.0 - label) * w, tp = label * w; double fp = (1.0 - label) * w, tp = label * w;
double tp_prev = 0, fp_prev = 0; double tp_prev = 0, fp_prev = 0;
@ -53,7 +52,7 @@ BinaryAUC(common::Span<float const> predts, common::Span<float const> labels,
tp_prev = tp; tp_prev = tp;
fp_prev = fp; fp_prev = fp;
} }
label = p_labels[sorted_idx[i]]; label = labels(sorted_idx[i]);
float w = weights[sorted_idx[i]]; float w = weights[sorted_idx[i]];
fp += (1.0f - label) * w; fp += (1.0f - label) * w;
tp += label * w; tp += label * w;
@ -82,7 +81,10 @@ double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
size_t n_classes, int32_t n_threads, size_t n_classes, int32_t n_threads,
BinaryAUC &&binary_auc) { BinaryAUC &&binary_auc) {
CHECK_NE(n_classes, 0); CHECK_NE(n_classes, 0);
auto const &labels = info.labels_.ConstHostVector(); auto const labels = info.labels.View(GenericParameter::kCpuId);
if (labels.Shape(0) != 0) {
CHECK_EQ(labels.Shape(1), 1) << "AUC doesn't support multi-target model.";
}
std::vector<double> results_storage(n_classes * 3, 0); std::vector<double> results_storage(n_classes * 3, 0);
linalg::TensorView<double, 2> results(results_storage, {n_classes, static_cast<size_t>(3)}, linalg::TensorView<double, 2> results(results_storage, {n_classes, static_cast<size_t>(3)},
@ -96,16 +98,17 @@ double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
predts, {static_cast<size_t>(info.num_row_), n_classes}, predts, {static_cast<size_t>(info.num_row_), n_classes},
GenericParameter::kCpuId); GenericParameter::kCpuId);
if (!info.labels_.Empty()) { if (info.labels.Size() != 0) {
common::ParallelFor(n_classes, n_threads, [&](auto c) { common::ParallelFor(n_classes, n_threads, [&](auto c) {
std::vector<float> proba(info.labels_.Size()); std::vector<float> proba(info.labels.Size());
std::vector<float> response(info.labels_.Size()); std::vector<float> response(info.labels.Size());
for (size_t i = 0; i < proba.size(); ++i) { for (size_t i = 0; i < proba.size(); ++i) {
proba[i] = predts_t(i, c); proba[i] = predts_t(i, c);
response[i] = labels[i] == c ? 1.0f : 0.0; response[i] = labels(i) == c ? 1.0f : 0.0;
} }
double fp; double fp;
std::tie(fp, tp(c), auc(c)) = binary_auc(proba, response, weights); std::tie(fp, tp(c), auc(c)) =
binary_auc(proba, linalg::MakeVec(response.data(), response.size(), -1), weights);
local_area(c) = fp * tp(c); local_area(c) = fp * tp(c);
}); });
} }
@ -135,9 +138,9 @@ double MultiClassOVR(common::Span<float const> predts, MetaInfo const &info,
return auc_sum; return auc_sum;
} }
std::tuple<double, double, double> std::tuple<double, double, double> BinaryROCAUC(common::Span<float const> predts,
BinaryROCAUC(common::Span<float const> predts, common::Span<float const> labels, linalg::VectorView<float const> labels,
OptionalWeights weights) { OptionalWeights weights) {
auto const sorted_idx = common::ArgSort<size_t>(predts, std::greater<>{}); auto const sorted_idx = common::ArgSort<size_t>(predts, std::greater<>{});
return BinaryAUC(predts, labels, weights, sorted_idx, TrapezoidArea); return BinaryAUC(predts, labels, weights, sorted_idx, TrapezoidArea);
} }
@ -146,15 +149,17 @@ BinaryROCAUC(common::Span<float const> predts, common::Span<float const> labels,
* Calculate AUC for 1 ranking group; * Calculate AUC for 1 ranking group;
*/ */
double GroupRankingROC(common::Span<float const> predts, double GroupRankingROC(common::Span<float const> predts,
common::Span<float const> labels, float w) { linalg::VectorView<float const> labels, float w) {
// on ranking, we just count all pairs. // on ranking, we just count all pairs.
double auc{0}; double auc{0};
auto const sorted_idx = common::ArgSort<size_t>(labels, std::greater<>{}); // argsort doesn't support tensor input yet.
auto raw_labels = labels.Values().subspan(0, labels.Size());
auto const sorted_idx = common::ArgSort<size_t>(raw_labels, std::greater<>{});
w = common::Sqr(w); w = common::Sqr(w);
double sum_w = 0.0f; double sum_w = 0.0f;
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.Size(); ++i) {
for (size_t j = i + 1; j < labels.size(); ++j) { for (size_t j = i + 1; j < labels.Size(); ++j) {
auto predt = predts[sorted_idx[i]] - predts[sorted_idx[j]]; auto predt = predts[sorted_idx[i]] - predts[sorted_idx[j]];
if (predt > 0) { if (predt > 0) {
predt = 1.0; predt = 1.0;
@ -180,14 +185,14 @@ double GroupRankingROC(common::Span<float const> predts,
* https://doi.org/10.1371/journal.pone.0092209 * https://doi.org/10.1371/journal.pone.0092209
*/ */
std::tuple<double, double, double> BinaryPRAUC(common::Span<float const> predts, std::tuple<double, double, double> BinaryPRAUC(common::Span<float const> predts,
common::Span<float const> labels, linalg::VectorView<float const> labels,
OptionalWeights weights) { OptionalWeights weights) {
auto const sorted_idx = common::ArgSort<size_t>(predts, std::greater<>{}); auto const sorted_idx = common::ArgSort<size_t>(predts, std::greater<>{});
double total_pos{0}, total_neg{0}; double total_pos{0}, total_neg{0};
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.Size(); ++i) {
auto w = weights[i]; auto w = weights[i];
total_pos += w * labels[i]; total_pos += w * labels(i);
total_neg += w * (1.0f - labels[i]); total_neg += w * (1.0f - labels(i));
} }
if (total_pos <= 0 || total_neg <= 0) { if (total_pos <= 0 || total_neg <= 0) {
return {1.0f, 1.0f, std::numeric_limits<float>::quiet_NaN()}; return {1.0f, 1.0f, std::numeric_limits<float>::quiet_NaN()};
@ -211,7 +216,7 @@ std::pair<double, uint32_t> RankingAUC(std::vector<float> const &predts,
CHECK_GE(info.group_ptr_.size(), 2); CHECK_GE(info.group_ptr_.size(), 2);
uint32_t n_groups = info.group_ptr_.size() - 1; uint32_t n_groups = info.group_ptr_.size() - 1;
auto s_predts = common::Span<float const>{predts}; auto s_predts = common::Span<float const>{predts};
auto s_labels = info.labels_.ConstHostSpan(); auto labels = info.labels.View(GenericParameter::kCpuId);
auto s_weights = info.weights_.ConstHostSpan(); auto s_weights = info.weights_.ConstHostSpan();
std::atomic<uint32_t> invalid_groups{0}; std::atomic<uint32_t> invalid_groups{0};
@ -222,9 +227,9 @@ std::pair<double, uint32_t> RankingAUC(std::vector<float> const &predts,
size_t cnt = info.group_ptr_[g] - info.group_ptr_[g - 1]; size_t cnt = info.group_ptr_[g] - info.group_ptr_[g - 1];
float w = s_weights.empty() ? 1.0f : s_weights[g - 1]; float w = s_weights.empty() ? 1.0f : s_weights[g - 1];
auto g_predts = s_predts.subspan(info.group_ptr_[g - 1], cnt); auto g_predts = s_predts.subspan(info.group_ptr_[g - 1], cnt);
auto g_labels = s_labels.subspan(info.group_ptr_[g - 1], cnt); auto g_labels = labels.Slice(linalg::Range(info.group_ptr_[g - 1], info.group_ptr_[g]));
double auc; double auc;
if (is_roc && g_labels.size() < 3) { if (is_roc && g_labels.Size() < 3) {
// With 2 documents, there's only 1 comparison can be made. So either // With 2 documents, there's only 1 comparison can be made. So either
// TP or FP will be zero. // TP or FP will be zero.
invalid_groups++; invalid_groups++;
@ -254,11 +259,11 @@ class EvalAUC : public Metric {
double auc {0}; double auc {0};
if (tparam_->gpu_id != GenericParameter::kCpuId) { if (tparam_->gpu_id != GenericParameter::kCpuId) {
preds.SetDevice(tparam_->gpu_id); preds.SetDevice(tparam_->gpu_id);
info.labels_.SetDevice(tparam_->gpu_id); info.labels.SetDevice(tparam_->gpu_id);
info.weights_.SetDevice(tparam_->gpu_id); info.weights_.SetDevice(tparam_->gpu_id);
} }
// We use the global size to handle empty dataset. // We use the global size to handle empty dataset.
std::array<size_t, 2> meta{info.labels_.Size(), preds.Size()}; std::array<size_t, 2> meta{info.labels.Size(), preds.Size()};
rabit::Allreduce<rabit::op::Max>(meta.data(), meta.size()); rabit::Allreduce<rabit::op::Max>(meta.data(), meta.size());
if (meta[0] == 0) { if (meta[0] == 0) {
// Empty across all workers, which is not supported. // Empty across all workers, which is not supported.
@ -271,8 +276,8 @@ class EvalAUC : public Metric {
CHECK_EQ(info.weights_.Size(), info.group_ptr_.size() - 1); CHECK_EQ(info.weights_.Size(), info.group_ptr_.size() - 1);
} }
uint32_t valid_groups = 0; uint32_t valid_groups = 0;
if (!info.labels_.Empty()) { if (info.labels.Size() != 0) {
CHECK_EQ(info.group_ptr_.back(), info.labels_.Size()); CHECK_EQ(info.group_ptr_.back(), info.labels.Size());
std::tie(auc, valid_groups) = std::tie(auc, valid_groups) =
static_cast<Curve *>(this)->EvalRanking(preds, info); static_cast<Curve *>(this)->EvalRanking(preds, info);
} }
@ -304,7 +309,7 @@ class EvalAUC : public Metric {
* binary classification * binary classification
*/ */
double fp{0}, tp{0}; double fp{0}, tp{0};
if (!(preds.Empty() || info.labels_.Empty())) { if (!(preds.Empty() || info.labels.Size() == 0)) {
std::tie(fp, tp, auc) = std::tie(fp, tp, auc) =
static_cast<Curve *>(this)->EvalBinary(preds, info); static_cast<Curve *>(this)->EvalBinary(preds, info);
} }
@ -367,7 +372,7 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
double fp, tp, auc; double fp, tp, auc;
if (tparam_->gpu_id == GenericParameter::kCpuId) { if (tparam_->gpu_id == GenericParameter::kCpuId) {
std::tie(fp, tp, auc) = std::tie(fp, tp, auc) =
BinaryROCAUC(predts.ConstHostVector(), info.labels_.ConstHostVector(), BinaryROCAUC(predts.ConstHostVector(), info.labels.HostView().Slice(linalg::All(), 0),
OptionalWeights{info.weights_.ConstHostSpan()}); OptionalWeights{info.weights_.ConstHostSpan()});
} else { } else {
std::tie(fp, tp, auc) = GPUBinaryROCAUC(predts.ConstDeviceSpan(), info, std::tie(fp, tp, auc) = GPUBinaryROCAUC(predts.ConstDeviceSpan(), info,
@ -420,7 +425,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
double pr, re, auc; double pr, re, auc;
if (tparam_->gpu_id == GenericParameter::kCpuId) { if (tparam_->gpu_id == GenericParameter::kCpuId) {
std::tie(pr, re, auc) = std::tie(pr, re, auc) =
BinaryPRAUC(predts.ConstHostSpan(), info.labels_.ConstHostSpan(), BinaryPRAUC(predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0),
OptionalWeights{info.weights_.ConstHostSpan()}); OptionalWeights{info.weights_.ConstHostSpan()});
} else { } else {
std::tie(pr, re, auc) = GPUBinaryPRAUC(predts.ConstDeviceSpan(), info, std::tie(pr, re, auc) = GPUBinaryPRAUC(predts.ConstDeviceSpan(), info,
@ -447,7 +452,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
uint32_t valid_groups = 0; uint32_t valid_groups = 0;
auto n_threads = tparam_->Threads(); auto n_threads = tparam_->Threads();
if (tparam_->gpu_id == GenericParameter::kCpuId) { if (tparam_->gpu_id == GenericParameter::kCpuId) {
auto labels = info.labels_.ConstHostSpan(); auto labels = info.labels.Data()->ConstHostSpan();
if (std::any_of(labels.cbegin(), labels.cend(), PRAUCLabelInvalid{})) { if (std::any_of(labels.cbegin(), labels.cend(), PRAUCLabelInvalid{})) {
InvalidLabels(); InvalidLabels();
} }

View File

@ -89,12 +89,12 @@ std::tuple<double, double, double>
GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info, GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
int32_t device, common::Span<size_t const> d_sorted_idx, int32_t device, common::Span<size_t const> d_sorted_idx,
Fn area_fn, std::shared_ptr<DeviceAUCCache> cache) { Fn area_fn, std::shared_ptr<DeviceAUCCache> cache) {
auto labels = info.labels_.ConstDeviceSpan(); auto labels = info.labels.View(device);
auto weights = info.weights_.ConstDeviceSpan(); auto weights = info.weights_.ConstDeviceSpan();
dh::safe_cuda(cudaSetDevice(device)); dh::safe_cuda(cudaSetDevice(device));
CHECK(!labels.empty()); CHECK_NE(labels.Size(), 0);
CHECK_EQ(labels.size(), predts.size()); CHECK_EQ(labels.Size(), predts.size());
/** /**
* Linear scan * Linear scan
@ -103,7 +103,7 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
auto get_fp_tp = [=]XGBOOST_DEVICE(size_t i) { auto get_fp_tp = [=]XGBOOST_DEVICE(size_t i) {
size_t idx = d_sorted_idx[i]; size_t idx = d_sorted_idx[i];
float label = labels[idx]; float label = labels(idx);
float w = get_weight[d_sorted_idx[i]]; float w = get_weight[d_sorted_idx[i]];
float fp = (1.0 - label) * w; float fp = (1.0 - label) * w;
@ -332,10 +332,10 @@ double GPUMultiClassAUCOVR(common::Span<float const> predts,
// Index is sorted within class. // Index is sorted within class.
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx); auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
auto labels = info.labels_.ConstDeviceSpan(); auto labels = info.labels.View(device);
auto weights = info.weights_.ConstDeviceSpan(); auto weights = info.weights_.ConstDeviceSpan();
size_t n_samples = labels.size(); size_t n_samples = labels.Shape(0);
if (n_samples == 0) { if (n_samples == 0) {
dh::TemporaryArray<double> resutls(n_classes * 4, 0.0f); dh::TemporaryArray<double> resutls(n_classes * 4, 0.0f);
@ -360,7 +360,7 @@ double GPUMultiClassAUCOVR(common::Span<float const> predts,
size_t class_id = i / n_samples; size_t class_id = i / n_samples;
// labels is a vector of size n_samples. // labels is a vector of size n_samples.
float label = labels[idx % n_samples] == class_id; float label = labels(idx % n_samples) == class_id;
float w = get_weight[d_sorted_idx[i] % n_samples]; float w = get_weight[d_sorted_idx[i] % n_samples];
float fp = (1.0 - label) * w; float fp = (1.0 - label) * w;
@ -528,10 +528,10 @@ GPURankingAUC(common::Span<float const> predts, MetaInfo const &info,
/** /**
* Sort the labels * Sort the labels
*/ */
auto d_labels = info.labels_.ConstDeviceSpan(); auto d_labels = info.labels.View(device);
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx); auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
dh::SegmentedArgSort<false>(d_labels, d_group_ptr, d_sorted_idx); dh::SegmentedArgSort<false>(d_labels.Values(), d_group_ptr, d_sorted_idx);
auto d_weights = info.weights_.ConstDeviceSpan(); auto d_weights = info.weights_.ConstDeviceSpan();
@ -631,19 +631,19 @@ GPUBinaryPRAUC(common::Span<float const> predts, MetaInfo const &info,
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx); auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
dh::ArgSort<false>(predts, d_sorted_idx); dh::ArgSort<false>(predts, d_sorted_idx);
auto labels = info.labels_.ConstDeviceSpan(); auto labels = info.labels.View(device);
auto d_weights = info.weights_.ConstDeviceSpan(); auto d_weights = info.weights_.ConstDeviceSpan();
auto get_weight = OptionalWeights{d_weights}; auto get_weight = OptionalWeights{d_weights};
auto it = dh::MakeTransformIterator<Pair>( auto it = dh::MakeTransformIterator<Pair>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) { thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
auto w = get_weight[d_sorted_idx[i]]; auto w = get_weight[d_sorted_idx[i]];
return thrust::make_pair(labels[d_sorted_idx[i]] * w, return thrust::make_pair(labels(d_sorted_idx[i]) * w,
(1.0f - labels[d_sorted_idx[i]]) * w); (1.0f - labels(d_sorted_idx[i])) * w);
}); });
dh::XGBCachingDeviceAllocator<char> alloc; dh::XGBCachingDeviceAllocator<char> alloc;
double total_pos, total_neg; double total_pos, total_neg;
thrust::tie(total_pos, total_neg) = thrust::tie(total_pos, total_neg) =
thrust::reduce(thrust::cuda::par(alloc), it, it + labels.size(), thrust::reduce(thrust::cuda::par(alloc), it, it + labels.Size(),
Pair{0.0, 0.0}, PairPlus<double, double>{}); Pair{0.0, 0.0}, PairPlus<double, double>{});
if (total_pos <= 0.0 || total_neg <= 0.0) { if (total_pos <= 0.0 || total_neg <= 0.0) {
@ -679,7 +679,7 @@ double GPUMultiClassPRAUC(common::Span<float const> predts,
/** /**
* Get total positive/negative * Get total positive/negative
*/ */
auto labels = info.labels_.ConstDeviceSpan(); auto labels = info.labels.View(device);
auto n_samples = info.num_row_; auto n_samples = info.num_row_;
dh::caching_device_vector<Pair> totals(n_classes); dh::caching_device_vector<Pair> totals(n_classes);
auto key_it = auto key_it =
@ -693,7 +693,7 @@ double GPUMultiClassPRAUC(common::Span<float const> predts,
auto idx = d_sorted_idx[i] % n_samples; auto idx = d_sorted_idx[i] % n_samples;
auto w = get_weight[idx]; auto w = get_weight[idx];
auto class_id = i / n_samples; auto class_id = i / n_samples;
auto y = labels[idx] == class_id; auto y = labels(idx) == class_id;
return thrust::make_pair(y * w, (1.0f - y) * w); return thrust::make_pair(y * w, (1.0f - y) * w);
}); });
dh::XGBCachingDeviceAllocator<char> alloc; dh::XGBCachingDeviceAllocator<char> alloc;
@ -726,7 +726,7 @@ GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
*/ */
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx); auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
auto labels = info.labels_.ConstDeviceSpan(); auto labels = info.labels.View(device);
auto weights = info.weights_.ConstDeviceSpan(); auto weights = info.weights_.ConstDeviceSpan();
uint32_t n_groups = static_cast<uint32_t>(info.group_ptr_.size() - 1); uint32_t n_groups = static_cast<uint32_t>(info.group_ptr_.size() - 1);
@ -734,7 +734,7 @@ GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
/** /**
* Linear scan * Linear scan
*/ */
size_t n_samples = labels.size(); size_t n_samples = labels.Shape(0);
dh::caching_device_vector<double> d_auc(n_groups, 0); dh::caching_device_vector<double> d_auc(n_groups, 0);
auto get_weight = OptionalWeights{weights}; auto get_weight = OptionalWeights{weights};
auto d_fptp = dh::ToSpan(cache->fptp); auto d_fptp = dh::ToSpan(cache->fptp);
@ -742,7 +742,7 @@ GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
size_t idx = d_sorted_idx[i]; size_t idx = d_sorted_idx[i];
size_t group_id = dh::SegmentId(d_group_ptr, idx); size_t group_id = dh::SegmentId(d_group_ptr, idx);
float label = labels[idx]; float label = labels(idx);
float w = get_weight[group_id]; float w = get_weight[group_id];
float fp = (1.0 - label) * w; float fp = (1.0 - label) * w;
@ -860,9 +860,9 @@ GPURankingPRAUC(common::Span<float const> predts, MetaInfo const &info,
dh::SegmentedArgSort<false>(predts, d_group_ptr, d_sorted_idx); dh::SegmentedArgSort<false>(predts, d_group_ptr, d_sorted_idx);
dh::XGBDeviceAllocator<char> alloc; dh::XGBDeviceAllocator<char> alloc;
auto labels = info.labels_.ConstDeviceSpan(); auto labels = info.labels.View(device);
if (thrust::any_of(thrust::cuda::par(alloc), dh::tbegin(labels), if (thrust::any_of(thrust::cuda::par(alloc), dh::tbegin(labels.Values()),
dh::tend(labels), PRAUCLabelInvalid{})) { dh::tend(labels.Values()), PRAUCLabelInvalid{})) {
InvalidLabels(); InvalidLabels();
} }
/** /**
@ -881,7 +881,7 @@ GPURankingPRAUC(common::Span<float const> predts, MetaInfo const &info,
auto g = dh::SegmentId(d_group_ptr, i); auto g = dh::SegmentId(d_group_ptr, i);
w = d_weights[g]; w = d_weights[g];
} }
auto y = labels[i]; auto y = labels(i);
return thrust::make_pair(y * w, (1.0 - y) * w); return thrust::make_pair(y * w, (1.0 - y) * w);
}); });
thrust::reduce_by_key(thrust::cuda::par(alloc), key_it, thrust::reduce_by_key(thrust::cuda::par(alloc), key_it,
@ -899,7 +899,7 @@ GPURankingPRAUC(common::Span<float const> predts, MetaInfo const &info,
return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp, return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp,
d_totals[group_id].first); d_totals[group_id].first);
}; };
return GPURankingPRAUCImpl(predts, info, d_group_ptr, n_groups, cache, fn); return GPURankingPRAUCImpl(predts, info, d_group_ptr, device, cache, fn);
} }
} // namespace metric } // namespace metric
} // namespace xgboost } // namespace xgboost

View File

@ -361,10 +361,10 @@ struct EvalEWiseBase : public Metric {
double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info, double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info,
bool distributed) override { bool distributed) override {
CHECK_EQ(preds.Size(), info.labels_.Size()) CHECK_EQ(preds.Size(), info.labels.Size())
<< "label and prediction size not match, " << "label and prediction size not match, "
<< "hint: use merror or mlogloss for multi-class classification"; << "hint: use merror or mlogloss for multi-class classification";
auto result = reducer_.Reduce(*tparam_, info.weights_, info.labels_, preds); auto result = reducer_.Reduce(*tparam_, info.weights_, *info.labels.Data(), preds);
double dat[2] { result.Residue(), result.Weights() }; double dat[2] { result.Residue(), result.Weights() };

View File

@ -169,19 +169,20 @@ template<typename Derived>
struct EvalMClassBase : public Metric { struct EvalMClassBase : public Metric {
double Eval(const HostDeviceVector<float> &preds, const MetaInfo &info, double Eval(const HostDeviceVector<float> &preds, const MetaInfo &info,
bool distributed) override { bool distributed) override {
if (info.labels_.Size() == 0) { if (info.labels.Size() == 0) {
CHECK_EQ(preds.Size(), 0); CHECK_EQ(preds.Size(), 0);
} else { } else {
CHECK(preds.Size() % info.labels_.Size() == 0) << "label and prediction size not match"; CHECK(preds.Size() % info.labels.Size() == 0) << "label and prediction size not match";
} }
double dat[2] { 0.0, 0.0 }; double dat[2] { 0.0, 0.0 };
if (info.labels_.Size() != 0) { if (info.labels.Size() != 0) {
const size_t nclass = preds.Size() / info.labels_.Size(); const size_t nclass = preds.Size() / info.labels.Size();
CHECK_GE(nclass, 1U) CHECK_GE(nclass, 1U)
<< "mlogloss and merror are only used for multi-class classification," << "mlogloss and merror are only used for multi-class classification,"
<< " use logloss for binary classification"; << " use logloss for binary classification";
int device = tparam_->gpu_id; int device = tparam_->gpu_id;
auto result = reducer_.Reduce(*tparam_, device, nclass, info.weights_, info.labels_, preds); auto result =
reducer_.Reduce(*tparam_, device, nclass, info.weights_, *info.labels.Data(), preds);
dat[0] = result.Residue(); dat[0] = result.Residue();
dat[1] = result.Weights(); dat[1] = result.Weights();
} }

View File

@ -107,7 +107,7 @@ struct EvalAMS : public Metric {
CHECK(!distributed) << "metric AMS do not support distributed evaluation"; CHECK(!distributed) << "metric AMS do not support distributed evaluation";
using namespace std; // NOLINT(*) using namespace std; // NOLINT(*)
const auto ndata = static_cast<bst_omp_uint>(info.labels_.Size()); const auto ndata = static_cast<bst_omp_uint>(info.labels.Size());
PredIndPairContainer rec(ndata); PredIndPairContainer rec(ndata);
const auto &h_preds = preds.ConstHostVector(); const auto &h_preds = preds.ConstHostVector();
@ -120,11 +120,11 @@ struct EvalAMS : public Metric {
const double br = 10.0; const double br = 10.0;
unsigned thresindex = 0; unsigned thresindex = 0;
double s_tp = 0.0, b_fp = 0.0, tams = 0.0; double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
const auto& labels = info.labels_.ConstHostVector(); const auto& labels = info.labels.View(GenericParameter::kCpuId);
for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) { for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
const unsigned ridx = rec[i].second; const unsigned ridx = rec[i].second;
const bst_float wt = info.GetWeight(ridx); const bst_float wt = info.GetWeight(ridx);
if (labels[ridx] > 0.5f) { if (labels(ridx) > 0.5f) {
s_tp += wt; s_tp += wt;
} else { } else {
b_fp += wt; b_fp += wt;
@ -164,7 +164,7 @@ struct EvalRank : public Metric, public EvalRankConfig {
public: public:
double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info, double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info,
bool distributed) override { bool distributed) override {
CHECK_EQ(preds.Size(), info.labels_.Size()) CHECK_EQ(preds.Size(), info.labels.Size())
<< "label size predict size not match"; << "label size predict size not match";
// quick consistency when group is not available // quick consistency when group is not available
@ -194,7 +194,7 @@ struct EvalRank : public Metric, public EvalRankConfig {
std::vector<double> sum_tloc(tparam_->Threads(), 0.0); std::vector<double> sum_tloc(tparam_->Threads(), 0.0);
if (!rank_gpu_ || tparam_->gpu_id < 0) { if (!rank_gpu_ || tparam_->gpu_id < 0) {
const auto &labels = info.labels_.ConstHostVector(); const auto& labels = info.labels.View(GenericParameter::kCpuId);
const auto &h_preds = preds.ConstHostVector(); const auto &h_preds = preds.ConstHostVector();
dmlc::OMPException exc; dmlc::OMPException exc;
@ -208,7 +208,7 @@ struct EvalRank : public Metric, public EvalRankConfig {
exc.Run([&]() { exc.Run([&]() {
rec.clear(); rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.emplace_back(h_preds[j], static_cast<int>(labels[j])); rec.emplace_back(h_preds[j], static_cast<int>(labels(j)));
} }
sum_tloc[omp_get_thread_num()] += this->EvalGroup(&rec); sum_tloc[omp_get_thread_num()] += this->EvalGroup(&rec);
}); });
@ -348,7 +348,7 @@ struct EvalCox : public Metric {
CHECK(!distributed) << "Cox metric does not support distributed evaluation"; CHECK(!distributed) << "Cox metric does not support distributed evaluation";
using namespace std; // NOLINT(*) using namespace std; // NOLINT(*)
const auto ndata = static_cast<bst_omp_uint>(info.labels_.Size()); const auto ndata = static_cast<bst_omp_uint>(info.labels.Size());
const auto &label_order = info.LabelAbsSort(); const auto &label_order = info.LabelAbsSort();
// pre-compute a sum for the denominator // pre-compute a sum for the denominator
@ -362,10 +362,10 @@ struct EvalCox : public Metric {
double out = 0; double out = 0;
double accumulated_sum = 0; double accumulated_sum = 0;
bst_omp_uint num_events = 0; bst_omp_uint num_events = 0;
const auto& labels = info.labels_.ConstHostVector(); const auto& labels = info.labels.HostView();
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
const size_t ind = label_order[i]; const size_t ind = label_order[i];
const auto label = labels[ind]; const auto label = labels(ind);
if (label > 0) { if (label > 0) {
out -= log(h_preds[ind]) - log(exp_p_sum); out -= log(h_preds[ind]) - log(exp_p_sum);
++num_events; ++num_events;
@ -373,7 +373,7 @@ struct EvalCox : public Metric {
// only update the denominator after we move forward in time (labels are sorted) // only update the denominator after we move forward in time (labels are sorted)
accumulated_sum += h_preds[ind]; accumulated_sum += h_preds[ind];
if (i == ndata - 1 || std::abs(label) < std::abs(labels[label_order[i + 1]])) { if (i == ndata - 1 || std::abs(label) < std::abs(labels(label_order[i + 1]))) {
exp_p_sum -= accumulated_sum; exp_p_sum -= accumulated_sum;
accumulated_sum = 0; accumulated_sum = 0;
} }

View File

@ -41,18 +41,18 @@ struct EvalRankGpu : public Metric, public EvalRankConfig {
auto device = tparam_->gpu_id; auto device = tparam_->gpu_id;
dh::safe_cuda(cudaSetDevice(device)); dh::safe_cuda(cudaSetDevice(device));
info.labels_.SetDevice(device); info.labels.SetDevice(device);
preds.SetDevice(device); preds.SetDevice(device);
auto dpreds = preds.ConstDevicePointer(); auto dpreds = preds.ConstDevicePointer();
auto dlabels = info.labels_.ConstDevicePointer(); auto dlabels = info.labels.View(device);
// Sort all the predictions // Sort all the predictions
dh::SegmentSorter<float> segment_pred_sorter; dh::SegmentSorter<float> segment_pred_sorter;
segment_pred_sorter.SortItems(dpreds, preds.Size(), gptr); segment_pred_sorter.SortItems(dpreds, preds.Size(), gptr);
// Compute individual group metric and sum them up // Compute individual group metric and sum them up
return EvalMetricT::EvalMetric(segment_pred_sorter, dlabels, *this); return EvalMetricT::EvalMetric(segment_pred_sorter, dlabels.Values().data(), *this);
} }
const char* Name() const override { const char* Name() const override {

View File

@ -33,11 +33,11 @@ class HingeObj : public ObjFunction {
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
HostDeviceVector<GradientPair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) CHECK_EQ(preds.Size(), info.labels.Size())
<< "labels are not correctly provided" << "labels are not correctly provided"
<< "preds.size=" << preds.Size() << "preds.size=" << preds.Size()
<< ", label.size=" << info.labels_.Size(); << ", label.size=" << info.labels.Size();
const size_t ndata = preds.Size(); const size_t ndata = preds.Size();
const bool is_null_weight = info.weights_.Size() == 0; const bool is_null_weight = info.weights_.Size() == 0;
@ -67,7 +67,7 @@ class HingeObj : public ObjFunction {
}, },
common::Range{0, static_cast<int64_t>(ndata)}, common::Range{0, static_cast<int64_t>(ndata)},
tparam_->gpu_id).Eval( tparam_->gpu_id).Eval(
out_gpair, &preds, &info.labels_, &info.weights_); out_gpair, &preds, info.labels.Data(), &info.weights_);
} }
void PredTransform(HostDeviceVector<bst_float> *io_preds) const override { void PredTransform(HostDeviceVector<bst_float> *io_preds) const override {

View File

@ -55,13 +55,13 @@ class SoftmaxMultiClassObj : public ObjFunction {
// Remove unused parameter compiler warning. // Remove unused parameter compiler warning.
(void) iter; (void) iter;
if (info.labels_.Size() == 0) { if (info.labels.Size() == 0) {
return; return;
} }
CHECK(preds.Size() == (static_cast<size_t>(param_.num_class) * info.labels_.Size())) CHECK(preds.Size() == (static_cast<size_t>(param_.num_class) * info.labels.Size()))
<< "SoftmaxMultiClassObj: label size and pred size does not match.\n" << "SoftmaxMultiClassObj: label size and pred size does not match.\n"
<< "label.Size() * num_class: " << "label.Size() * num_class: "
<< info.labels_.Size() * static_cast<size_t>(param_.num_class) << "\n" << info.labels.Size() * static_cast<size_t>(param_.num_class) << "\n"
<< "num_class: " << param_.num_class << "\n" << "num_class: " << param_.num_class << "\n"
<< "preds.Size(): " << preds.Size(); << "preds.Size(): " << preds.Size();
@ -70,7 +70,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
auto device = tparam_->gpu_id; auto device = tparam_->gpu_id;
out_gpair->SetDevice(device); out_gpair->SetDevice(device);
info.labels_.SetDevice(device); info.labels.SetDevice(device);
info.weights_.SetDevice(device); info.weights_.SetDevice(device);
preds.SetDevice(device); preds.SetDevice(device);
@ -115,7 +115,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
gpair[idx * nclass + k] = GradientPair(p * wt, h); gpair[idx * nclass + k] = GradientPair(p * wt, h);
} }
}, common::Range{0, ndata}, device, false) }, common::Range{0, ndata}, device, false)
.Eval(out_gpair, &info.labels_, &preds, &info.weights_, &label_correct_); .Eval(out_gpair, info.labels.Data(), &preds, &info.weights_, &label_correct_);
std::vector<int>& label_correct_h = label_correct_.HostVector(); std::vector<int>& label_correct_h = label_correct_.HostVector();
for (auto const flag : label_correct_h) { for (auto const flag : label_correct_h) {

View File

@ -760,15 +760,15 @@ class LambdaRankObj : public ObjFunction {
const MetaInfo& info, const MetaInfo& info,
int iter, int iter,
HostDeviceVector<GradientPair>* out_gpair) override { HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match"; CHECK_EQ(preds.Size(), info.labels.Size()) << "label size predict size not match";
// quick consistency when group is not available // quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size()); std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.Size());
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_; const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size()) CHECK(gptr.size() != 0 && gptr.back() == info.labels.Size())
<< "group structure not consistent with #rows" << ", " << "group structure not consistent with #rows" << ", "
<< "group ponter size: " << gptr.size() << ", " << "group ponter size: " << gptr.size() << ", "
<< "labels size: " << info.labels_.Size() << ", " << "labels size: " << info.labels.Size() << ", "
<< "group pointer back: " << (gptr.size() == 0 ? 0 : gptr.back()); << "group pointer back: " << (gptr.size() == 0 ? 0 : gptr.back());
#if defined(__CUDACC__) #if defined(__CUDACC__)
@ -820,7 +820,7 @@ class LambdaRankObj : public ObjFunction {
bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr); bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr);
const auto& preds_h = preds.HostVector(); const auto& preds_h = preds.HostVector();
const auto& labels = info.labels_.HostVector(); const auto& labels = info.labels.HostView();
std::vector<GradientPair>& gpair = out_gpair->HostVector(); std::vector<GradientPair>& gpair = out_gpair->HostVector();
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1); const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
out_gpair->Resize(preds.Size()); out_gpair->Resize(preds.Size());
@ -841,7 +841,7 @@ class LambdaRankObj : public ObjFunction {
exc.Run([&]() { exc.Run([&]() {
lst.clear(); pairs.clear(); lst.clear(); pairs.clear();
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) { for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
lst.emplace_back(preds_h[j], labels[j], j); lst.emplace_back(preds_h[j], labels(j), j);
gpair[j] = GradientPair(0.0f, 0.0f); gpair[j] = GradientPair(0.0f, 0.0f);
} }
std::stable_sort(lst.begin(), lst.end(), ListEntry::CmpPred); std::stable_sort(lst.begin(), lst.end(), ListEntry::CmpPred);
@ -916,7 +916,7 @@ class LambdaRankObj : public ObjFunction {
// Set the device ID and copy them to the device // Set the device ID and copy them to the device
out_gpair->SetDevice(device); out_gpair->SetDevice(device);
info.labels_.SetDevice(device); info.labels.SetDevice(device);
preds.SetDevice(device); preds.SetDevice(device);
info.weights_.SetDevice(device); info.weights_.SetDevice(device);
@ -924,19 +924,19 @@ class LambdaRankObj : public ObjFunction {
auto d_preds = preds.ConstDevicePointer(); auto d_preds = preds.ConstDevicePointer();
auto d_gpair = out_gpair->DevicePointer(); auto d_gpair = out_gpair->DevicePointer();
auto d_labels = info.labels_.ConstDevicePointer(); auto d_labels = info.labels.View(device);
SortedLabelList slist(param_); SortedLabelList slist(param_);
// Sort the labels within the groups on the device // Sort the labels within the groups on the device
slist.Sort(info.labels_, gptr); slist.Sort(*info.labels.Data(), gptr);
// Initialize the gradients next // Initialize the gradients next
out_gpair->Fill(GradientPair(0.0f, 0.0f)); out_gpair->Fill(GradientPair(0.0f, 0.0f));
// Finally, compute the gradients // Finally, compute the gradients
slist.ComputeGradients<LambdaWeightComputerT> slist.ComputeGradients<LambdaWeightComputerT>(d_preds, d_labels.Values().data(), info.weights_,
(d_preds, d_labels, info.weights_, iter, d_gpair, weight_normalization_factor); iter, d_gpair, weight_normalization_factor);
} }
#endif #endif

View File

@ -59,9 +59,9 @@ class RegLossObj : public ObjFunction {
void GetGradient(const HostDeviceVector<bst_float>& preds, void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo &info, int, const MetaInfo &info, int,
HostDeviceVector<GradientPair>* out_gpair) override { HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_EQ(preds.Size(), info.labels_.Size()) CHECK_EQ(preds.Size(), info.labels.Size())
<< " " << "labels are not correctly provided" << " " << "labels are not correctly provided"
<< "preds.size=" << preds.Size() << ", label.size=" << info.labels_.Size() << ", " << "preds.size=" << preds.Size() << ", label.size=" << info.labels.Size() << ", "
<< "Loss: " << Loss::Name(); << "Loss: " << Loss::Name();
size_t const ndata = preds.Size(); size_t const ndata = preds.Size();
out_gpair->Resize(ndata); out_gpair->Resize(ndata);
@ -81,8 +81,7 @@ class RegLossObj : public ObjFunction {
bool on_device = device >= 0; bool on_device = device >= 0;
// On CPU we run the transformation each thread processing a contigious block of data // On CPU we run the transformation each thread processing a contigious block of data
// for better performance. // for better performance.
const size_t n_data_blocks = const size_t n_data_blocks = std::max(static_cast<size_t>(1), (on_device ? ndata : nthreads));
std::max(static_cast<size_t>(1), (on_device ? ndata : nthreads));
const size_t block_size = ndata / n_data_blocks + !!(ndata % n_data_blocks); const size_t block_size = ndata / n_data_blocks + !!(ndata % n_data_blocks);
common::Transform<>::Init( common::Transform<>::Init(
[block_size, ndata] XGBOOST_DEVICE( [block_size, ndata] XGBOOST_DEVICE(
@ -116,7 +115,7 @@ class RegLossObj : public ObjFunction {
} }
}, },
common::Range{0, static_cast<int64_t>(n_data_blocks)}, device) common::Range{0, static_cast<int64_t>(n_data_blocks)}, device)
.Eval(&additional_input_, out_gpair, &preds, &info.labels_, .Eval(&additional_input_, out_gpair, &preds, info.labels.Data(),
&info.weights_); &info.weights_);
auto const flag = additional_input_.HostVector().begin()[0]; auto const flag = additional_input_.HostVector().begin()[0];
@ -218,8 +217,8 @@ class PoissonRegression : public ObjFunction {
void GetGradient(const HostDeviceVector<bst_float>& preds, void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo &info, int, const MetaInfo &info, int,
HostDeviceVector<GradientPair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided"; CHECK_EQ(preds.Size(), info.labels.Size()) << "labels are not correctly provided";
size_t const ndata = preds.Size(); size_t const ndata = preds.Size();
out_gpair->Resize(ndata); out_gpair->Resize(ndata);
auto device = tparam_->gpu_id; auto device = tparam_->gpu_id;
@ -249,7 +248,7 @@ class PoissonRegression : public ObjFunction {
expf(p + max_delta_step) * w}; expf(p + max_delta_step) * w};
}, },
common::Range{0, static_cast<int64_t>(ndata)}, device).Eval( common::Range{0, static_cast<int64_t>(ndata)}, device).Eval(
&label_correct_, out_gpair, &preds, &info.labels_, &info.weights_); &label_correct_, out_gpair, &preds, info.labels.Data(), &info.weights_);
// copy "label correct" flags back to host // copy "label correct" flags back to host
std::vector<int>& label_correct_h = label_correct_.HostVector(); std::vector<int>& label_correct_h = label_correct_.HostVector();
for (auto const flag : label_correct_h) { for (auto const flag : label_correct_h) {
@ -313,8 +312,8 @@ class CoxRegression : public ObjFunction {
void GetGradient(const HostDeviceVector<bst_float>& preds, void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo &info, int, const MetaInfo &info, int,
HostDeviceVector<GradientPair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided"; CHECK_EQ(preds.Size(), info.labels.Size()) << "labels are not correctly provided";
const auto& preds_h = preds.HostVector(); const auto& preds_h = preds.HostVector();
out_gpair->Resize(preds_h.size()); out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->HostVector(); auto& gpair = out_gpair->HostVector();
@ -334,7 +333,7 @@ class CoxRegression : public ObjFunction {
} }
// start calculating grad and hess // start calculating grad and hess
const auto& labels = info.labels_.HostVector(); const auto& labels = info.labels.HostView();
double r_k = 0; double r_k = 0;
double s_k = 0; double s_k = 0;
double last_exp_p = 0.0; double last_exp_p = 0.0;
@ -345,7 +344,7 @@ class CoxRegression : public ObjFunction {
const double p = preds_h[ind]; const double p = preds_h[ind];
const double exp_p = std::exp(p); const double exp_p = std::exp(p);
const double w = info.GetWeight(ind); const double w = info.GetWeight(ind);
const double y = labels[ind]; const double y = labels(ind);
const double abs_y = std::abs(y); const double abs_y = std::abs(y);
// only update the denominator after we move forward in time (labels are sorted) // only update the denominator after we move forward in time (labels are sorted)
@ -414,8 +413,8 @@ class GammaRegression : public ObjFunction {
void GetGradient(const HostDeviceVector<bst_float> &preds, void GetGradient(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info, int, const MetaInfo &info, int,
HostDeviceVector<GradientPair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided"; CHECK_EQ(preds.Size(), info.labels.Size()) << "labels are not correctly provided";
const size_t ndata = preds.Size(); const size_t ndata = preds.Size();
auto device = tparam_->gpu_id; auto device = tparam_->gpu_id;
out_gpair->Resize(ndata); out_gpair->Resize(ndata);
@ -443,7 +442,7 @@ class GammaRegression : public ObjFunction {
_out_gpair[_idx] = GradientPair((1 - y / expf(p)) * w, y / expf(p) * w); _out_gpair[_idx] = GradientPair((1 - y / expf(p)) * w, y / expf(p) * w);
}, },
common::Range{0, static_cast<int64_t>(ndata)}, device).Eval( common::Range{0, static_cast<int64_t>(ndata)}, device).Eval(
&label_correct_, out_gpair, &preds, &info.labels_, &info.weights_); &label_correct_, out_gpair, &preds, info.labels.Data(), &info.weights_);
// copy "label correct" flags back to host // copy "label correct" flags back to host
std::vector<int>& label_correct_h = label_correct_.HostVector(); std::vector<int>& label_correct_h = label_correct_.HostVector();
@ -514,8 +513,8 @@ class TweedieRegression : public ObjFunction {
void GetGradient(const HostDeviceVector<bst_float>& preds, void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo &info, int, const MetaInfo &info, int,
HostDeviceVector<GradientPair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels.Size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.Size(), info.labels_.Size()) << "labels are not correctly provided"; CHECK_EQ(preds.Size(), info.labels.Size()) << "labels are not correctly provided";
const size_t ndata = preds.Size(); const size_t ndata = preds.Size();
out_gpair->Resize(ndata); out_gpair->Resize(ndata);
@ -550,7 +549,7 @@ class TweedieRegression : public ObjFunction {
_out_gpair[_idx] = GradientPair(grad * w, hess * w); _out_gpair[_idx] = GradientPair(grad * w, hess * w);
}, },
common::Range{0, static_cast<int64_t>(ndata), 1}, device) common::Range{0, static_cast<int64_t>(ndata), 1}, device)
.Eval(&label_correct_, out_gpair, &preds, &info.labels_, &info.weights_); .Eval(&label_correct_, out_gpair, &preds, info.labels.Data(), &info.weights_);
// copy "label correct" flags back to host // copy "label correct" flags back to host
std::vector<int>& label_correct_h = label_correct_.HostVector(); std::vector<int>& label_correct_h = label_correct_.HostVector();

View File

@ -91,7 +91,7 @@ TEST(CAPI, ConfigIO) {
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.size(); ++i) {
labels[i] = i; labels[i] = i;
} }
p_dmat->Info().labels_.HostVector() = labels; p_dmat->Info().labels.Data()->HostVector() = labels;
std::shared_ptr<Learner> learner { Learner::Create(mat) }; std::shared_ptr<Learner> learner { Learner::Create(mat) };
@ -125,7 +125,7 @@ TEST(CAPI, JsonModelIO) {
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.size(); ++i) {
labels[i] = i; labels[i] = i;
} }
p_dmat->Info().labels_.HostVector() = labels; p_dmat->Info().labels.Data()->HostVector() = labels;
std::shared_ptr<Learner> learner { Learner::Create(mat) }; std::shared_ptr<Learner> learner { Learner::Create(mat) };

View File

@ -16,9 +16,9 @@ TEST(MetaInfo, GetSet) {
double double2[2] = {1.0, 2.0}; double double2[2] = {1.0, 2.0};
EXPECT_EQ(info.labels_.Size(), 0); EXPECT_EQ(info.labels.Size(), 0);
info.SetInfo("label", double2, xgboost::DataType::kFloat32, 2); info.SetInfo("label", double2, xgboost::DataType::kFloat32, 2);
EXPECT_EQ(info.labels_.Size(), 2); EXPECT_EQ(info.labels.Size(), 2);
float float2[2] = {1.0f, 2.0f}; float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f) EXPECT_EQ(info.GetWeight(1), 1.0f)
@ -120,8 +120,8 @@ TEST(MetaInfo, SaveLoadBinary) {
EXPECT_EQ(inforead.num_col_, info.num_col_); EXPECT_EQ(inforead.num_col_, info.num_col_);
EXPECT_EQ(inforead.num_nonzero_, info.num_nonzero_); EXPECT_EQ(inforead.num_nonzero_, info.num_nonzero_);
ASSERT_EQ(inforead.labels_.HostVector(), values); ASSERT_EQ(inforead.labels.Data()->HostVector(), values);
EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector()); EXPECT_EQ(inforead.labels.Data()->HostVector(), info.labels.Data()->HostVector());
EXPECT_EQ(inforead.group_ptr_, info.group_ptr_); EXPECT_EQ(inforead.group_ptr_, info.group_ptr_);
EXPECT_EQ(inforead.weights_.HostVector(), info.weights_.HostVector()); EXPECT_EQ(inforead.weights_.HostVector(), info.weights_.HostVector());
@ -236,8 +236,9 @@ TEST(MetaInfo, Validate) {
EXPECT_THROW(info.Validate(0), dmlc::Error); EXPECT_THROW(info.Validate(0), dmlc::Error);
std::vector<float> labels(info.num_row_ + 1); std::vector<float> labels(info.num_row_ + 1);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1); EXPECT_THROW(
EXPECT_THROW(info.Validate(0), dmlc::Error); { info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1); },
dmlc::Error);
// Make overflow data, which can happen when users pass group structure as int // Make overflow data, which can happen when users pass group structure as int
// or float. // or float.
@ -254,7 +255,7 @@ TEST(MetaInfo, Validate) {
info.group_ptr_.clear(); info.group_ptr_.clear();
labels.resize(info.num_row_); labels.resize(info.num_row_);
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_); info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
info.labels_.SetDevice(0); info.labels.SetDevice(0);
EXPECT_THROW(info.Validate(1), dmlc::Error); EXPECT_THROW(info.Validate(1), dmlc::Error);
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups}; xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
@ -269,12 +270,12 @@ TEST(MetaInfo, Validate) {
TEST(MetaInfo, HostExtend) { TEST(MetaInfo, HostExtend) {
xgboost::MetaInfo lhs, rhs; xgboost::MetaInfo lhs, rhs;
size_t const kRows = 100; size_t const kRows = 100;
lhs.labels_.Resize(kRows); lhs.labels.Reshape(kRows);
lhs.num_row_ = kRows; lhs.num_row_ = kRows;
rhs.labels_.Resize(kRows); rhs.labels.Reshape(kRows);
rhs.num_row_ = kRows; rhs.num_row_ = kRows;
ASSERT_TRUE(lhs.labels_.HostCanRead()); ASSERT_TRUE(lhs.labels.Data()->HostCanRead());
ASSERT_TRUE(rhs.labels_.HostCanRead()); ASSERT_TRUE(rhs.labels.Data()->HostCanRead());
size_t per_group = 10; size_t per_group = 10;
std::vector<xgboost::bst_group_t> groups; std::vector<xgboost::bst_group_t> groups;
@ -286,10 +287,10 @@ TEST(MetaInfo, HostExtend) {
lhs.Extend(rhs, true, true); lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2); ASSERT_EQ(lhs.num_row_, kRows * 2);
ASSERT_TRUE(lhs.labels_.HostCanRead()); ASSERT_TRUE(lhs.labels.Data()->HostCanRead());
ASSERT_TRUE(rhs.labels_.HostCanRead()); ASSERT_TRUE(rhs.labels.Data()->HostCanRead());
ASSERT_FALSE(lhs.labels_.DeviceCanRead()); ASSERT_FALSE(lhs.labels.Data()->DeviceCanRead());
ASSERT_FALSE(rhs.labels_.DeviceCanRead()); ASSERT_FALSE(rhs.labels.Data()->DeviceCanRead());
ASSERT_EQ(lhs.group_ptr_.front(), 0); ASSERT_EQ(lhs.group_ptr_.front(), 0);
ASSERT_EQ(lhs.group_ptr_.back(), kRows * 2); ASSERT_EQ(lhs.group_ptr_.back(), kRows * 2);

View File

@ -52,10 +52,10 @@ TEST(MetaInfo, FromInterface) {
MetaInfo info; MetaInfo info;
info.SetInfo("label", str.c_str()); info.SetInfo("label", str.c_str());
auto const& h_label = info.labels_.HostVector(); auto const& h_label = info.labels.HostView();
ASSERT_EQ(h_label.size(), d_data.size()); ASSERT_EQ(h_label.Size(), d_data.size());
for (size_t i = 0; i < d_data.size(); ++i) { for (size_t i = 0; i < d_data.size(); ++i) {
ASSERT_EQ(h_label[i], d_data[i]); ASSERT_EQ(h_label(i), d_data[i]);
} }
info.SetInfo("weight", str.c_str()); info.SetInfo("weight", str.c_str());
@ -147,15 +147,15 @@ TEST(MetaInfo, DeviceExtend) {
std::string str = PrepareData<float>("<f4", &d_data, kRows); std::string str = PrepareData<float>("<f4", &d_data, kRows);
lhs.SetInfo("label", str.c_str()); lhs.SetInfo("label", str.c_str());
rhs.SetInfo("label", str.c_str()); rhs.SetInfo("label", str.c_str());
ASSERT_FALSE(rhs.labels_.HostCanRead()); ASSERT_FALSE(rhs.labels.Data()->HostCanRead());
lhs.num_row_ = kRows; lhs.num_row_ = kRows;
rhs.num_row_ = kRows; rhs.num_row_ = kRows;
lhs.Extend(rhs, true, true); lhs.Extend(rhs, true, true);
ASSERT_EQ(lhs.num_row_, kRows * 2); ASSERT_EQ(lhs.num_row_, kRows * 2);
ASSERT_FALSE(lhs.labels_.HostCanRead()); ASSERT_FALSE(lhs.labels.Data()->HostCanRead());
ASSERT_FALSE(lhs.labels_.HostCanRead()); ASSERT_FALSE(lhs.labels.Data()->HostCanRead());
ASSERT_FALSE(rhs.labels_.HostCanRead()); ASSERT_FALSE(rhs.labels.Data()->HostCanRead());
} }
} // namespace xgboost } // namespace xgboost

View File

@ -16,30 +16,27 @@ namespace xgboost {
inline void TestMetaInfoStridedData(int32_t device) { inline void TestMetaInfoStridedData(int32_t device) {
MetaInfo info; MetaInfo info;
{ {
// label // labels
HostDeviceVector<float> labels; linalg::Tensor<float, 3> labels;
labels.Resize(64); labels.Reshape(4, 2, 3);
auto& h_labels = labels.HostVector(); auto& h_label = labels.Data()->HostVector();
std::iota(h_labels.begin(), h_labels.end(), 0.0f); std::iota(h_label.begin(), h_label.end(), 0.0);
bool is_gpu = device >= 0; auto t_labels = labels.View(device).Slice(linalg::All(), 0, linalg::All());
if (is_gpu) { ASSERT_EQ(t_labels.Shape().size(), 2);
labels.SetDevice(0);
}
auto t = linalg::TensorView<float const, 2>{ info.SetInfo("label", StringView{ArrayInterfaceStr(t_labels)});
is_gpu ? labels.ConstDeviceSpan() : labels.ConstHostSpan(), {32, 2}, device}; auto const& h_result = info.labels.View(-1);
auto s = t.Slice(linalg::All(), 0); ASSERT_EQ(h_result.Shape().size(), 2);
auto in_labels = labels.View(-1);
auto str = ArrayInterfaceStr(s); linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, float v_0) {
ASSERT_EQ(s.Size(), 32); auto tup = linalg::UnravelIndex(i, h_result.Shape());
auto i0 = std::get<0>(tup);
info.SetInfo("label", StringView{str}); auto i1 = std::get<1>(tup);
auto const& h_result = info.labels_.HostVector(); // Sliced at second dimension.
ASSERT_EQ(h_result.size(), 32); auto v_1 = in_labels(i0, 0, i1);
CHECK_EQ(v_0, v_1);
for (auto v : h_result) { return v_0;
ASSERT_EQ(static_cast<int32_t>(v) % 2, 0); });
}
} }
{ {
// qid // qid

View File

@ -23,7 +23,7 @@ TEST(ProxyDMatrix, DeviceData) {
proxy.SetInfo("label", labels.c_str()); proxy.SetInfo("label", labels.c_str());
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>)); ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
ASSERT_EQ(proxy.Info().labels_.Size(), kRows); ASSERT_EQ(proxy.Info().labels.Size(), kRows);
ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(), ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(),
kRows); kRows);
ASSERT_EQ( ASSERT_EQ(

View File

@ -20,7 +20,7 @@ TEST(SimpleDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 2); EXPECT_EQ(dmat->Info().num_row_, 2);
EXPECT_EQ(dmat->Info().num_col_, 5); EXPECT_EQ(dmat->Info().num_col_, 5);
EXPECT_EQ(dmat->Info().num_nonzero_, 6); EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_); EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
delete dmat; delete dmat;
} }
@ -258,7 +258,7 @@ TEST(SimpleDMatrix, Slice) {
std::array<int32_t, 3> ridxs {1, 3, 5}; std::array<int32_t, 3> ridxs {1, 3, 5};
std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) }; std::unique_ptr<DMatrix> out { p_m->Slice(ridxs) };
ASSERT_EQ(out->Info().labels_.Size(), ridxs.size()); ASSERT_EQ(out->Info().labels.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels_lower_bound_.Size(), ridxs.size()); ASSERT_EQ(out->Info().labels_lower_bound_.Size(), ridxs.size());
ASSERT_EQ(out->Info().labels_upper_bound_.Size(), ridxs.size()); ASSERT_EQ(out->Info().labels_upper_bound_.Size(), ridxs.size());
ASSERT_EQ(out->Info().base_margin_.Size(), ridxs.size() * kClasses); ASSERT_EQ(out->Info().base_margin_.Size(), ridxs.size() * kClasses);

View File

@ -113,7 +113,7 @@ TEST(SparsePageDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_row_, 8ul); EXPECT_EQ(dmat->Info().num_row_, 8ul);
EXPECT_EQ(dmat->Info().num_col_, 5ul); EXPECT_EQ(dmat->Info().num_col_, 5ul);
EXPECT_EQ(dmat->Info().num_nonzero_, kEntries); EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_); EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
delete dmat; delete dmat;
} }

View File

@ -105,7 +105,7 @@ TEST(GBTree, WrongUpdater) {
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
p_dmat->Info().labels_.Resize(kRows); p_dmat->Info().labels.Reshape(kRows);
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat})); auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
// Hist can not be used for updating tree. // Hist can not be used for updating tree.
@ -126,7 +126,7 @@ TEST(GBTree, ChoosePredictor) {
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(); auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data; auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
p_dmat->Info().labels_.Resize(kRows); p_dmat->Info().labels.Reshape(kRows);
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat})); auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}}); learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});

View File

@ -100,7 +100,8 @@ void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> out_hess) { std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info; xgboost::MetaInfo info;
info.num_row_ = labels.size(); info.num_row_ = labels.size();
info.labels_.HostVector() = labels; info.labels =
xgboost::linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
info.weights_.HostVector() = weights; info.weights_.HostVector() = weights;
CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess); CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
@ -135,7 +136,8 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
std::vector<xgboost::bst_float> out_hess) { std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info; xgboost::MetaInfo info;
info.num_row_ = labels.size(); info.num_row_ = labels.size();
info.labels_.HostVector() = labels; info.labels =
xgboost::linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
info.weights_.HostVector() = weights; info.weights_.HostVector() = weights;
info.group_ptr_ = groups; info.group_ptr_ = groups;
@ -149,7 +151,8 @@ xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
std::vector<xgboost::bst_uint> groups) { std::vector<xgboost::bst_uint> groups) {
xgboost::MetaInfo info; xgboost::MetaInfo info;
info.num_row_ = labels.size(); info.num_row_ = labels.size();
info.labels_.HostVector() = labels; info.labels =
xgboost::linalg::Tensor<float, 2>{labels.begin(), labels.end(), {labels.size()}, -1};
info.weights_.HostVector() = weights; info.weights_.HostVector() = weights;
info.group_ptr_ = groups; info.group_ptr_ = groups;
@ -340,17 +343,18 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
if (with_label) { if (with_label) {
RandomDataGenerator gen(rows_, 1, 0); RandomDataGenerator gen(rows_, 1, 0);
if (!float_label) { if (!float_label) {
gen.Lower(0).Upper(classes).GenerateDense(&out->Info().labels_); gen.Lower(0).Upper(classes).GenerateDense(out->Info().labels.Data());
auto& h_labels = out->Info().labels_.HostVector(); out->Info().labels.Reshape(out->Info().labels.Size());
auto& h_labels = out->Info().labels.Data()->HostVector();
for (auto& v : h_labels) { for (auto& v : h_labels) {
v = static_cast<float>(static_cast<uint32_t>(v)); v = static_cast<float>(static_cast<uint32_t>(v));
} }
} else { } else {
gen.GenerateDense(&out->Info().labels_); gen.GenerateDense(out->Info().labels.Data());
} }
} }
if (device_ >= 0) { if (device_ >= 0) {
out->Info().labels_.SetDevice(device_); out->Info().labels.SetDevice(device_);
out->Info().feature_types.SetDevice(device_); out->Info().feature_types.SetDevice(device_);
for (auto const& page : out->GetBatches<SparsePage>()) { for (auto const& page : out->GetBatches<SparsePage>()) {
page.data.SetDevice(device_); page.data.SetDevice(device_);
@ -520,7 +524,8 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
for (size_t i = 0; i < kRows; ++i) { for (size_t i = 0; i < kRows; ++i) {
labels[i] = i; labels[i] = i;
} }
p_dmat->Info().labels_.HostVector() = labels; p_dmat->Info().labels =
linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, -1};
HostDeviceVector<GradientPair> gpair; HostDeviceVector<GradientPair> gpair;
auto& h_gpair = gpair.HostVector(); auto& h_gpair = gpair.HostVector();
h_gpair.resize(kRows); h_gpair.resize(kRows);
@ -636,7 +641,7 @@ class RMMAllocator {};
void DeleteRMMResource(RMMAllocator* r) {} void DeleteRMMResource(RMMAllocator* r) {}
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) { RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
return RMMAllocatorPtr(nullptr, DeleteRMMResource); return {nullptr, DeleteRMMResource};
} }
#endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1 #endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1
} // namespace xgboost } // namespace xgboost

View File

@ -21,10 +21,10 @@ TEST(Metric, DeclareUnifiedTest(BinaryAUC)) {
// Invalid dataset // Invalid dataset
MetaInfo info; MetaInfo info;
info.labels_ = {0, 0}; info.labels = linalg::Tensor<float, 2>{{0.0f, 0.0f}, {2}, -1};
float auc = metric->Eval({1, 1}, info, false); float auc = metric->Eval({1, 1}, info, false);
ASSERT_TRUE(std::isnan(auc)); ASSERT_TRUE(std::isnan(auc));
info.labels_ = HostDeviceVector<float>{}; *info.labels.Data() = HostDeviceVector<float>{};
auc = metric->Eval(HostDeviceVector<float>{}, info, false); auc = metric->Eval(HostDeviceVector<float>{}, info, false);
ASSERT_TRUE(std::isnan(auc)); ASSERT_TRUE(std::isnan(auc));

View File

@ -17,7 +17,7 @@ inline void CheckDeterministicMetricElementWise(StringView name, int32_t device)
HostDeviceVector<float> predts; HostDeviceVector<float> predts;
MetaInfo info; MetaInfo info;
auto &h_labels = info.labels_.HostVector(); auto &h_labels = info.labels.Data()->HostVector();
auto &h_predts = predts.HostVector(); auto &h_predts = predts.HostVector();
SimpleLCG lcg; SimpleLCG lcg;

View File

@ -11,13 +11,14 @@ inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device)
HostDeviceVector<float> predts; HostDeviceVector<float> predts;
MetaInfo info; MetaInfo info;
auto &h_labels = info.labels_.HostVector();
auto &h_predts = predts.HostVector(); auto &h_predts = predts.HostVector();
SimpleLCG lcg; SimpleLCG lcg;
size_t n_samples = 2048, n_classes = 4; size_t n_samples = 2048, n_classes = 4;
h_labels.resize(n_samples);
info.labels.Reshape(n_samples);
auto &h_labels = info.labels.Data()->HostVector();
h_predts.resize(n_samples * n_classes); h_predts.resize(n_samples * n_classes);
{ {

View File

@ -1,5 +1,5 @@
/*! /*!
* Copyright 2017-2019 XGBoost contributors * Copyright 2017-2021 XGBoost contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <xgboost/objective.h> #include <xgboost/objective.h>
@ -293,8 +293,8 @@ TEST(Objective, CPU_vs_CUDA) {
} }
auto& info = pdmat->Info(); auto& info = pdmat->Info();
info.labels_.Resize(kRows); info.labels.Reshape(kRows);
auto& h_labels = info.labels_.HostVector(); auto& h_labels = info.labels.Data()->HostVector();
for (size_t i = 0; i < h_labels.size(); ++i) { for (size_t i = 0; i < h_labels.size(); ++i) {
h_labels[i] = 1 / (float)(i+1); h_labels[i] = 1 / (float)(i+1);
} }

View File

@ -45,8 +45,8 @@ void TestTrainingPrediction(size_t rows, size_t bins,
std::unique_ptr<Learner> learner; std::unique_ptr<Learner> learner;
auto train = [&](std::string predictor, HostDeviceVector<float> *out) { auto train = [&](std::string predictor, HostDeviceVector<float> *out) {
auto &h_label = p_hist->Info().labels_.HostVector(); p_hist->Info().labels.Reshape(rows, 1);
h_label.resize(rows); auto &h_label = p_hist->Info().labels.Data()->HostVector();
for (size_t i = 0; i < rows; ++i) { for (size_t i = 0; i < rows; ++i) {
h_label[i] = i % kClasses; h_label[i] = i % kClasses;

View File

@ -141,9 +141,8 @@ TEST(Learner, JsonModelIO) {
size_t constexpr kRows = 8; size_t constexpr kRows = 8;
int32_t constexpr kIters = 4; int32_t constexpr kIters = 4;
std::shared_ptr<DMatrix> p_dmat{ std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix()};
RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix()}; p_dmat->Info().labels.Reshape(kRows);
p_dmat->Info().labels_.Resize(kRows);
CHECK_NE(p_dmat->Info().num_col_, 0); CHECK_NE(p_dmat->Info().num_col_, 0);
{ {
@ -204,9 +203,8 @@ TEST(Learner, MultiThreadedPredict) {
size_t constexpr kRows = 1000; size_t constexpr kRows = 1000;
size_t constexpr kCols = 100; size_t constexpr kCols = 100;
std::shared_ptr<DMatrix> p_dmat{ std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()};
RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix()}; p_dmat->Info().labels.Reshape(kRows);
p_dmat->Info().labels_.Resize(kRows);
CHECK_NE(p_dmat->Info().num_col_, 0); CHECK_NE(p_dmat->Info().num_col_, 0);
std::shared_ptr<DMatrix> p_data{ std::shared_ptr<DMatrix> p_data{
@ -240,7 +238,7 @@ TEST(Learner, BinaryModelIO) {
size_t constexpr kRows = 8; size_t constexpr kRows = 8;
int32_t constexpr kIters = 4; int32_t constexpr kIters = 4;
auto p_dmat = RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix(); auto p_dmat = RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix();
p_dmat->Info().labels_.Resize(kRows); p_dmat->Info().labels.Reshape(kRows);
std::unique_ptr<Learner> learner{Learner::Create({p_dmat})}; std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
learner->SetParam("eval_metric", "rmsle"); learner->SetParam("eval_metric", "rmsle");
@ -279,7 +277,7 @@ TEST(Learner, GPUConfiguration) {
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.size(); ++i) {
labels[i] = i; labels[i] = i;
} }
p_dmat->Info().labels_.HostVector() = labels; p_dmat->Info().labels.Data()->HostVector() = labels;
{ {
std::unique_ptr<Learner> learner {Learner::Create(mat)}; std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"booster", "gblinear"}, learner->SetParams({Arg{"booster", "gblinear"},

View File

@ -204,8 +204,8 @@ class SerializationTest : public ::testing::Test {
void SetUp() override { void SetUp() override {
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix(); p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();
p_dmat_->Info().labels_.Resize(kRows); p_dmat_->Info().labels.Reshape(kRows);
auto &h_labels = p_dmat_->Info().labels_.HostVector(); auto& h_labels = p_dmat_->Info().labels.Data()->HostVector();
xgboost::SimpleLCG gen(0); xgboost::SimpleLCG gen(0);
SimpleRealUniformDistribution<float> dis(0.0f, 1.0f); SimpleRealUniformDistribution<float> dis(0.0f, 1.0f);
@ -219,6 +219,9 @@ class SerializationTest : public ::testing::Test {
} }
}; };
size_t constexpr SerializationTest::kRows;
size_t constexpr SerializationTest::kCols;
TEST_F(SerializationTest, Exact) { TEST_F(SerializationTest, Exact) {
TestLearnerSerialization({{"booster", "gbtree"}, TestLearnerSerialization({{"booster", "gbtree"},
{"seed", "0"}, {"seed", "0"},
@ -389,8 +392,8 @@ class LogitSerializationTest : public SerializationTest {
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix(); p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();
std::shared_ptr<DMatrix> p_dmat{p_dmat_}; std::shared_ptr<DMatrix> p_dmat{p_dmat_};
p_dmat->Info().labels_.Resize(kRows); p_dmat->Info().labels.Reshape(kRows);
auto &h_labels = p_dmat->Info().labels_.HostVector(); auto& h_labels = p_dmat->Info().labels.Data()->HostVector();
std::bernoulli_distribution flip(0.5); std::bernoulli_distribution flip(0.5);
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();
@ -513,8 +516,8 @@ class MultiClassesSerializationTest : public SerializationTest {
p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix(); p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix();
std::shared_ptr<DMatrix> p_dmat{p_dmat_}; std::shared_ptr<DMatrix> p_dmat{p_dmat_};
p_dmat->Info().labels_.Resize(kRows); p_dmat->Info().labels.Reshape(kRows);
auto &h_labels = p_dmat->Info().labels_.HostVector(); auto &h_labels = p_dmat->Info().labels.Data()->HostVector();
std::uniform_int_distribution<size_t> categorical(0, kClasses - 1); std::uniform_int_distribution<size_t> categorical(0, kClasses - 1);
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();

View File

@ -148,7 +148,8 @@ class TestPandas:
assert not np.any(arr == -1.0) assert not np.any(arr == -1.0)
X = X["f0"] X = X["f0"]
with pytest.raises(ValueError): y = y[:X.shape[0]]
with pytest.raises(ValueError, match=r".*enable_categorical.*"):
xgb.DMatrix(X, y) xgb.DMatrix(X, y)
Xy = xgb.DMatrix(X, y, enable_categorical=True) Xy = xgb.DMatrix(X, y, enable_categorical=True)