Merge branch 'master' into sync-2024Jan24

This commit is contained in:
Hui Liu
2024-02-01 14:41:48 -08:00
99 changed files with 2476 additions and 283 deletions

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2014-2023 by XGBoost Contributors
* Copyright 2014-2024 by XGBoost Contributors
*/
#include "xgboost/c_api.h"
@@ -994,8 +994,8 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bs
auto *learner = static_cast<Learner *>(handle);
auto ctx = learner->Ctx()->MakeCPU();
auto t_grad = linalg::MakeTensorView(&ctx, common::Span{grad, len}, len);
auto t_hess = linalg::MakeTensorView(&ctx, common::Span{hess, len}, len);
auto t_grad = linalg::MakeTensorView(&ctx, common::Span{grad, static_cast<size_t>(len)}, len);
auto t_hess = linalg::MakeTensorView(&ctx, common::Span{hess, static_cast<size_t>(len)}, len);
auto s_grad = linalg::ArrayInterfaceStr(t_grad);
auto s_hess = linalg::ArrayInterfaceStr(t_hess);

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023, XGBoost Contributors
* Copyright 2017-2024, XGBoost Contributors
* \file column_matrix.h
* \brief Utility for fast column-wise access
* \author Philip Cho
@@ -176,7 +176,7 @@ class ColumnMatrix {
void SetValid(typename LBitField32::index_type i) {missing.Clear(i);}
/** @brief assign the storage to the view. */
void InitView() {
missing = LBitField32{Span{storage.data(), storage.size()}};
missing = LBitField32{Span{storage.data(), static_cast<size_t>(storage.size())}};
}
void GrowTo(std::size_t n_elements, bool init) {
@@ -318,8 +318,8 @@ class ColumnMatrix {
common::Span<const BinIdxType> bin_index = {
reinterpret_cast<const BinIdxType*>(&index_[feature_offset * bins_type_size_]),
column_size};
return std::move(DenseColumnIter<BinIdxType, any_missing>{
bin_index, static_cast<bst_bin_t>(index_base_[fidx]), missing_.missing, feature_offset});
return DenseColumnIter<BinIdxType, any_missing>{
bin_index, static_cast<bst_bin_t>(index_base_[fidx]), missing_.missing, feature_offset};
}
// all columns are dense column and has no missing value
@@ -332,7 +332,7 @@ class ColumnMatrix {
DispatchBinType(bins_type_size_, [&](auto t) {
using ColumnBinT = decltype(t);
auto column_index = Span<ColumnBinT>{reinterpret_cast<ColumnBinT*>(index_.data()),
index_.size() / sizeof(ColumnBinT)};
static_cast<size_t>(index_.size() / sizeof(ColumnBinT))};
ParallelFor(n_samples, n_threads, [&](auto rid) {
rid += base_rowid;
const size_t ibegin = rid * n_features;

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 by XGBoost Contributors
* Copyright 2017-2024 by XGBoost Contributors
* \file hist_util.h
* \brief Utility for fast histogram aggregation
* \author Philip Cho, Tianqi Chen
@@ -113,8 +113,8 @@ class HistogramCuts {
auto end = ptrs[column_id + 1];
auto beg = ptrs[column_id];
auto it = std::upper_bound(values.cbegin() + beg, values.cbegin() + end, value);
auto idx = it - values.cbegin();
idx -= !!(idx == end);
auto idx = static_cast<bst_bin_t>(it - values.cbegin());
idx -= !!(idx == static_cast<bst_bin_t>(end));
return idx;
}
@@ -136,8 +136,8 @@ class HistogramCuts {
auto beg = ptrs[fidx] + vals.cbegin();
// Truncates the value in case it's not perfectly rounded.
auto v = static_cast<float>(common::AsCat(value));
auto bin_idx = std::lower_bound(beg, end, v) - vals.cbegin();
if (bin_idx == ptrs.at(fidx + 1)) {
auto bin_idx = static_cast<bst_bin_t>(std::lower_bound(beg, end, v) - vals.cbegin());
if (bin_idx == static_cast<bst_bin_t>(ptrs.at(fidx + 1))) {
bin_idx -= 1;
}
return bin_idx;

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023, XGBoost Contributors
* Copyright 2023-2024, XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_REF_RESOURCE_VIEW_H_
#define XGBOOST_COMMON_REF_RESOURCE_VIEW_H_
@@ -76,7 +76,7 @@ class RefResourceView {
[[nodiscard]] size_type size() const { return size_; } // NOLINT
[[nodiscard]] size_type size_bytes() const { // NOLINT
return Span<const value_type>{data(), size()}.size_bytes();
return Span<const value_type>{data(), static_cast<size_t>(size())}.size_bytes();
}
[[nodiscard]] value_type* data() { return ptr_; }; // NOLINT
[[nodiscard]] value_type const* data() const { return ptr_; }; // NOLINT

View File

@@ -1,12 +1,12 @@
/**
* Copyright 2022 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_TRANSFORM_ITERATOR_H_
#define XGBOOST_COMMON_TRANSFORM_ITERATOR_H_
#include <cstddef> // std::size_t
#include <iterator> // std::random_access_iterator_tag
#include <type_traits> // std::result_of_t, std::add_pointer_t, std::add_lvalue_reference_t
#include <type_traits> // for invoke_result_t, add_pointer_t, add_lvalue_reference_t
#include <utility> // std::forward
#include "xgboost/span.h" // ptrdiff_t
@@ -25,11 +25,11 @@ class IndexTransformIter {
Fn fn_;
public:
using iterator_category = std::random_access_iterator_tag; // NOLINT
using reference = std::result_of_t<Fn(std::size_t)>; // NOLINT
using value_type = std::remove_cv_t<std::remove_reference_t<reference>>; // NOLINT
using difference_type = detail::ptrdiff_t; // NOLINT
using pointer = std::add_pointer_t<value_type>; // NOLINT
using iterator_category = std::random_access_iterator_tag; // NOLINT
using reference = std::invoke_result_t<Fn, std::size_t>; // NOLINT
using value_type = std::remove_cv_t<std::remove_reference_t<reference>>; // NOLINT
using difference_type = detail::ptrdiff_t; // NOLINT
using pointer = std::add_pointer_t<value_type>; // NOLINT
public:
/**

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
* \file array_interface.h
* \brief View of __array_interface__
*/
@@ -12,7 +12,7 @@
#include <limits> // for numeric_limits
#include <map>
#include <string>
#include <type_traits> // std::alignment_of,std::remove_pointer_t
#include <type_traits> // for alignment_of, remove_pointer_t, invoke_result_t
#include <utility>
#include <vector>
@@ -645,7 +645,7 @@ auto DispatchDType(ArrayInterfaceHandler::Type dtype, Fn dispatch) {
}
}
return std::result_of_t<Fn(std::int8_t)>();
return std::invoke_result_t<Fn, std::int8_t>();
}
template <std::int32_t D, typename Fn>

View File

@@ -261,12 +261,10 @@ bool NoInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
auto counting = thrust::make_counting_iterator(0llu);
auto value_iter = dh::MakeTransformIterator<bool>(counting, [=] XGBOOST_DEVICE(std::size_t idx) {
auto v = batch.GetElement(idx).value;
if (!is_valid(v)) {
// discard the invalid elements.
return true;
if (is_valid(v) && isinf(v)) {
return false;
}
// check that there's no inf in data.
return !std::isinf(v);
return true;
});
dh::XGBCachingDeviceAllocator<char> alloc;
// The default implementation in thrust optimizes any_of/none_of/all_of by using small

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by XGBoost contributors
* Copyright 2019-2024, XGBoost contributors
*/
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_output_iterator.h>
@@ -13,7 +13,7 @@
#include "../common/hist_util.cuh"
#include "../common/transform_iterator.h" // MakeIndexTransformIter
#include "./ellpack_page.cuh"
#include "device_adapter.cuh" // for HasInfInData
#include "device_adapter.cuh" // for NoInfInData
#include "ellpack_page.h"
#include "gradient_index.h"
#include "xgboost/data.h"

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023, XGBoost Contributors
* Copyright 2017-2024, XGBoost Contributors
* \brief Data type for fast histogram aggregation.
*/
#include "gradient_index.h"
@@ -148,7 +148,8 @@ void GHistIndexMatrix::ResizeIndex(const size_t n_index, const bool isDense) {
new_vec = {new_ptr, n_bytes / sizeof(std::uint8_t), malloc_resource};
}
this->data = std::move(new_vec);
this->index = common::Index{common::Span{data.data(), data.size()}, t_size};
this->index = common::Index{common::Span{data.data(), static_cast<size_t>(data.size())},
t_size};
};
if ((MaxNumBinPerFeat() - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) &&

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2021-2023 XGBoost contributors
* Copyright 2021-2024 XGBoost contributors
*/
#include <cstddef> // for size_t
#include <cstdint> // for uint8_t
@@ -40,7 +40,9 @@ class GHistIndexRawFormat : public SparsePageFormat<GHistIndexMatrix> {
return false;
}
// - index
page->index = common::Index{common::Span{page->data.data(), page->data.size()}, size_type};
page->index =
common::Index{common::Span{page->data.data(), static_cast<size_t>(page->data.size())},
size_type};
// hit count
if (!common::ReadVec(fi, &page->hit_count)) {

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2023, XGBoost contributors
* Copyright 2020-2024, XGBoost contributors
*/
#ifndef XGBOOST_DATA_PROXY_DMATRIX_H_
#define XGBOOST_DATA_PROXY_DMATRIX_H_
@@ -7,6 +7,7 @@
#include <any> // for any, any_cast
#include <memory>
#include <string>
#include <type_traits> // for invoke_result_t
#include <utility>
#include "adapter.h"
@@ -171,10 +172,10 @@ decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
}
if constexpr (get_value) {
return std::result_of_t<Fn(
decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
return std::invoke_result_t<
Fn, decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value())>();
} else {
return std::result_of_t<Fn(decltype(std::declval<std::shared_ptr<ArrayAdapter>>()))>();
return std::invoke_result_t<Fn, decltype(std::declval<std::shared_ptr<ArrayAdapter>>())>();
}
}
}

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
* \file simple_dmatrix.cuh
*/
#ifndef XGBOOST_DATA_SIMPLE_DMATRIX_CUH_
@@ -11,7 +11,7 @@
#include "../common/device_helpers.cuh"
#include "../common/error_msg.h" // for InfInData
#include "device_adapter.cuh" // for HasInfInData
#include "device_adapter.cuh" // for NoInfInData
namespace xgboost::data {

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 by Contributors
* Copyright 2017-2024 by Contributors
*/
#include "xgboost/predictor.h"
@@ -46,7 +46,7 @@ void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_row_t n
void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const {
CHECK_NE(model.learner_model_param->num_output_group, 0);
std::size_t n{model.learner_model_param->OutputLength() * info.num_row_};
auto n = static_cast<size_t>(model.learner_model_param->OutputLength() * info.num_row_);
const HostDeviceVector<bst_float>* base_margin = info.base_margin_.Data();
if (ctx_->Device().IsCUDA()) {

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2023 by XGBoost Contributors
* Copyright 2023-2024 by XGBoost Contributors
*/
#ifndef XGBOOST_TREE_HIST_HIST_CACHE_H_
#define XGBOOST_TREE_HIST_HIST_CACHE_H_
@@ -48,11 +48,13 @@ class BoundedHistCollection {
BoundedHistCollection() = default;
common::GHistRow operator[](std::size_t idx) {
auto offset = node_map_.at(idx);
return common::Span{data_->data(), data_->size()}.subspan(offset, n_total_bins_);
return common::Span{data_->data(), static_cast<size_t>(data_->size())}.subspan(
offset, n_total_bins_);
}
common::ConstGHistRow operator[](std::size_t idx) const {
auto offset = node_map_.at(idx);
return common::Span{data_->data(), data_->size()}.subspan(offset, n_total_bins_);
return common::Span{data_->data(), static_cast<size_t>(data_->size())}.subspan(
offset, n_total_bins_);
}
void Reset(bst_bin_t n_total_bins, std::size_t n_cached_nodes) {
n_total_bins_ = n_total_bins;