Reduced span overheads in objective function calculate (#7206)

Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
ShvetsKS 2021-09-22 23:43:59 +03:00 committed by GitHub
parent 9472be7d77
commit 475fd1abec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -73,30 +73,47 @@ class RegLossObj : public ObjFunction {
additional_input_.HostVector().begin()[1] = scale_pos_weight; additional_input_.HostVector().begin()[1] = scale_pos_weight;
additional_input_.HostVector().begin()[2] = is_null_weight; additional_input_.HostVector().begin()[2] = is_null_weight;
common::Transform<>::Init([] XGBOOST_DEVICE(size_t _idx, const size_t nthreads = tparam_->Threads();
common::Span<float> _additional_input, bool on_device = device >= 0;
common::Span<GradientPair> _out_gpair, // On CPU we run the transformation each thread processing a contigious block of data
common::Span<const bst_float> _preds, // for better performance.
common::Span<const bst_float> _labels, const size_t n_data_blocks =
common::Span<const bst_float> _weights) { std::max(static_cast<size_t>(1), (on_device ? ndata : nthreads));
const size_t block_size = ndata / n_data_blocks + !!(ndata % n_data_blocks);
common::Transform<>::Init(
[block_size, ndata] XGBOOST_DEVICE(
size_t data_block_idx, common::Span<float> _additional_input,
common::Span<GradientPair> _out_gpair,
common::Span<const bst_float> _preds,
common::Span<const bst_float> _labels,
common::Span<const bst_float> _weights) {
const bst_float* preds_ptr = _preds.data();
const bst_float* labels_ptr = _labels.data();
const bst_float* weights_ptr = _weights.data();
GradientPair* out_gpair_ptr = _out_gpair.data();
const size_t begin = data_block_idx*block_size;
const size_t end = std::min(ndata, begin + block_size);
const float _scale_pos_weight = _additional_input[1]; const float _scale_pos_weight = _additional_input[1];
const bool _is_null_weight = _additional_input[2]; const bool _is_null_weight = _additional_input[2];
bst_float p = Loss::PredTransform(_preds[_idx]); for (size_t idx = begin; idx < end; ++idx) {
bst_float w = _is_null_weight ? 1.0f : _weights[_idx]; bst_float p = Loss::PredTransform(preds_ptr[idx]);
bst_float label = _labels[_idx]; bst_float w = _is_null_weight ? 1.0f : weights_ptr[idx];
if (label == 1.0f) { bst_float label = labels_ptr[idx];
w *= _scale_pos_weight; if (label == 1.0f) {
w *= _scale_pos_weight;
}
if (!Loss::CheckLabel(label)) {
// If there is an incorrect label, the host code will know.
_additional_input[0] = 0;
}
out_gpair_ptr[idx] = GradientPair(Loss::FirstOrderGradient(p, label) * w,
Loss::SecondOrderGradient(p, label) * w);
} }
if (!Loss::CheckLabel(label)) {
// If there is an incorrect label, the host code will know.
_additional_input[0] = 0;
}
_out_gpair[_idx] = GradientPair(Loss::FirstOrderGradient(p, label) * w,
Loss::SecondOrderGradient(p, label) * w);
}, },
common::Range{0, static_cast<int64_t>(ndata)}, device).Eval( common::Range{0, static_cast<int64_t>(n_data_blocks)}, device)
&additional_input_, out_gpair, &preds, &info.labels_, &info.weights_); .Eval(&additional_input_, out_gpair, &preds, &info.labels_,
&info.weights_);
auto const flag = additional_input_.HostVector().begin()[0]; auto const flag = additional_input_.HostVector().begin()[0];
if (flag == 0) { if (flag == 0) {