Use matrix for gradient. (#9508)
- Use the `linalg::Matrix` for storing gradients. - New API for the custom objective. - Custom objective for multi-class/multi-target is now required to return the correct shape. - Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
@@ -22,6 +22,7 @@
|
||||
#include "../common/charconv.h" // for from_chars, to_chars, NumericLimits, from_ch...
|
||||
#include "../common/hist_util.h" // for HistogramCuts
|
||||
#include "../common/io.h" // for FileExtension, LoadSequentialFile, MemoryBuf...
|
||||
#include "../common/linalg_op.h" // for ElementWiseTransformHost
|
||||
#include "../common/threading_utils.h" // for OmpGetNumThreads, ParallelFor
|
||||
#include "../data/adapter.h" // for ArrayAdapter, DenseAdapter, RecordBatchesIte...
|
||||
#include "../data/ellpack_page.h" // for EllpackPage
|
||||
@@ -68,6 +69,7 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
|
||||
}
|
||||
}
|
||||
|
||||
static_assert(DMLC_CXX11_THREAD_LOCAL, "XGBoost depends on thread-local storage.");
|
||||
using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
@@ -717,8 +719,7 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle,
|
||||
xgboost::bst_ulong *out) {
|
||||
XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, xgboost::bst_ulong *out) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto p_m = CastDMatrixHandle(handle);
|
||||
@@ -727,8 +728,7 @@ XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGDMatrixNumCol(const DMatrixHandle handle,
|
||||
xgboost::bst_ulong *out) {
|
||||
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, xgboost::bst_ulong *out) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
auto p_m = CastDMatrixHandle(handle);
|
||||
@@ -970,28 +970,71 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
|
||||
API_END();
|
||||
}
|
||||
|
||||
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
|
||||
DMatrixHandle dtrain,
|
||||
bst_float *grad,
|
||||
bst_float *hess,
|
||||
xgboost::bst_ulong len) {
|
||||
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bst_float *grad,
|
||||
bst_float *hess, xgboost::bst_ulong len) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
HostDeviceVector<GradientPair> tmp_gpair;
|
||||
auto* bst = static_cast<Learner*>(handle);
|
||||
auto* dtr =
|
||||
static_cast<std::shared_ptr<DMatrix>*>(dtrain);
|
||||
tmp_gpair.Resize(len);
|
||||
std::vector<GradientPair>& tmp_gpair_h = tmp_gpair.HostVector();
|
||||
if (len > 0) {
|
||||
xgboost_CHECK_C_ARG_PTR(grad);
|
||||
xgboost_CHECK_C_ARG_PTR(hess);
|
||||
}
|
||||
for (xgboost::bst_ulong i = 0; i < len; ++i) {
|
||||
tmp_gpair_h[i] = GradientPair(grad[i], hess[i]);
|
||||
}
|
||||
error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter");
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
auto ctx = learner->Ctx()->MakeCPU();
|
||||
|
||||
bst->BoostOneIter(0, *dtr, &tmp_gpair);
|
||||
auto t_grad = linalg::MakeTensorView(&ctx, common::Span{grad, len}, len);
|
||||
auto t_hess = linalg::MakeTensorView(&ctx, common::Span{hess, len}, len);
|
||||
|
||||
auto s_grad = linalg::ArrayInterfaceStr(t_grad);
|
||||
auto s_hess = linalg::ArrayInterfaceStr(t_hess);
|
||||
|
||||
return XGBoosterTrainOneIter(handle, dtrain, 0, s_grad.c_str(), s_hess.c_str());
|
||||
API_END();
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
// copy user-supplied CUDA gradient arrays
|
||||
void CopyGradientFromCUDAArrays(Context const *, ArrayInterface<2, false> const &,
|
||||
ArrayInterface<2, false> const &, linalg::Matrix<GradientPair> *)
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
{
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#else
|
||||
; // NOLINT
|
||||
#endif
|
||||
} // namespace xgboost
|
||||
|
||||
XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, int iter,
|
||||
char const *grad, char const *hess) {
|
||||
API_BEGIN();
|
||||
CHECK_HANDLE();
|
||||
xgboost_CHECK_C_ARG_PTR(grad);
|
||||
xgboost_CHECK_C_ARG_PTR(hess);
|
||||
auto p_fmat = CastDMatrixHandle(dtrain);
|
||||
ArrayInterface<2, false> i_grad{StringView{grad}};
|
||||
ArrayInterface<2, false> i_hess{StringView{hess}};
|
||||
StringView msg{"Mismatched shape between the gradient and hessian."};
|
||||
CHECK_EQ(i_grad.Shape(0), i_hess.Shape(0)) << msg;
|
||||
CHECK_EQ(i_grad.Shape(1), i_hess.Shape(1)) << msg;
|
||||
linalg::Matrix<GradientPair> gpair;
|
||||
auto grad_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_grad.data);
|
||||
auto hess_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_hess.data);
|
||||
CHECK_EQ(i_grad.Shape(0), p_fmat->Info().num_row_)
|
||||
<< "Mismatched size between the gradient and training data.";
|
||||
CHECK_EQ(grad_is_cuda, hess_is_cuda) << "gradient and hessian should be on the same device.";
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
auto ctx = learner->Ctx();
|
||||
if (!grad_is_cuda) {
|
||||
gpair.Reshape(i_grad.Shape(0), i_grad.Shape(1));
|
||||
auto const shape = gpair.Shape();
|
||||
auto h_gpair = gpair.HostView();
|
||||
DispatchDType(i_grad, DeviceOrd::CPU(), [&](auto &&t_grad) {
|
||||
DispatchDType(i_hess, DeviceOrd::CPU(), [&](auto &&t_hess) {
|
||||
common::ParallelFor(h_gpair.Size(), ctx->Threads(),
|
||||
detail::CustomGradHessOp{t_grad, t_hess, h_gpair});
|
||||
});
|
||||
});
|
||||
} else {
|
||||
CopyGradientFromCUDAArrays(ctx, i_grad, i_hess, &gpair);
|
||||
}
|
||||
learner->BoostOneIter(iter, p_fmat, &gpair);
|
||||
API_END();
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
/**
|
||||
* Copyright 2019-2023 by XGBoost Contributors
|
||||
*/
|
||||
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry
|
||||
#include <thrust/transform.h> // for transform
|
||||
|
||||
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
|
||||
#include "../common/cuda_context.cuh" // for CUDAContext
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../data/array_interface.h" // for DispatchDType, ArrayInterface
|
||||
#include "../data/device_adapter.cuh"
|
||||
#include "../data/proxy_dmatrix.h"
|
||||
#include "c_api_error.h"
|
||||
@@ -13,7 +17,6 @@
|
||||
#include "xgboost/learner.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
void XGBBuildInfoDevice(Json *p_info) {
|
||||
auto &info = *p_info;
|
||||
|
||||
@@ -55,6 +58,27 @@ void XGBoostAPIGuard::RestoreGPUAttribute() {
|
||||
// If errors, do nothing, assuming running on CPU only machine.
|
||||
cudaSetDevice(device_id_);
|
||||
}
|
||||
|
||||
void CopyGradientFromCUDAArrays(Context const *ctx, ArrayInterface<2, false> const &grad,
|
||||
ArrayInterface<2, false> const &hess,
|
||||
linalg::Matrix<GradientPair> *out_gpair) {
|
||||
auto grad_dev = dh::CudaGetPointerDevice(grad.data);
|
||||
auto hess_dev = dh::CudaGetPointerDevice(hess.data);
|
||||
CHECK_EQ(grad_dev, hess_dev) << "gradient and hessian should be on the same device.";
|
||||
auto &gpair = *out_gpair;
|
||||
gpair.SetDevice(grad_dev);
|
||||
gpair.Reshape(grad.Shape(0), grad.Shape(1));
|
||||
auto d_gpair = gpair.View(grad_dev);
|
||||
auto cuctx = ctx->CUDACtx();
|
||||
|
||||
DispatchDType(grad, DeviceOrd::CUDA(grad_dev), [&](auto &&t_grad) {
|
||||
DispatchDType(hess, DeviceOrd::CUDA(hess_dev), [&](auto &&t_hess) {
|
||||
CHECK_EQ(t_grad.Size(), t_hess.Size());
|
||||
thrust::for_each_n(cuctx->CTP(), thrust::make_counting_iterator(0ul), t_grad.Size(),
|
||||
detail::CustomGradHessOp{t_grad, t_hess, d_gpair});
|
||||
});
|
||||
});
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright (c) 2015-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2015-2023, XGBoost Contributors
|
||||
* \file c_api_error.h
|
||||
* \brief Error handling for C API.
|
||||
*/
|
||||
@@ -35,8 +35,8 @@
|
||||
} \
|
||||
return 0; // NOLINT(*)
|
||||
|
||||
#define CHECK_HANDLE() if (handle == nullptr) \
|
||||
LOG(FATAL) << "DMatrix/Booster has not been initialized or has already been disposed.";
|
||||
#define CHECK_HANDLE() \
|
||||
if (handle == nullptr) ::xgboost::detail::EmptyHandle();
|
||||
|
||||
/*!
|
||||
* \brief Set the last error message needed by C API
|
||||
|
||||
@@ -7,8 +7,10 @@
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <memory> // std::shared_ptr
|
||||
#include <string>
|
||||
#include <memory> // for shared_ptr
|
||||
#include <string> // for string
|
||||
#include <tuple> // for make_tuple
|
||||
#include <utility> // for move
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/c_api.h"
|
||||
@@ -16,7 +18,7 @@
|
||||
#include "xgboost/feature_map.h" // for FeatureMap
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/learner.h"
|
||||
#include "xgboost/linalg.h" // ArrayInterfaceHandler
|
||||
#include "xgboost/linalg.h" // ArrayInterfaceHandler, MakeTensorView, ArrayInterfaceStr
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h" // StringView
|
||||
|
||||
@@ -287,6 +289,19 @@ inline std::shared_ptr<DMatrix> CastDMatrixHandle(DMatrixHandle const handle) {
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
inline void EmptyHandle() {
|
||||
LOG(FATAL) << "DMatrix/Booster has not been initialized or has already been disposed.";
|
||||
}
|
||||
|
||||
inline xgboost::Context const *BoosterCtx(BoosterHandle handle) {
|
||||
if (handle == nullptr) {
|
||||
EmptyHandle();
|
||||
}
|
||||
auto *learner = static_cast<xgboost::Learner *>(handle);
|
||||
CHECK(learner);
|
||||
return learner->Ctx();
|
||||
}
|
||||
|
||||
template <typename PtrT, typename I, typename T>
|
||||
void MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data,
|
||||
std::size_t nindptr, std::string *indptr_str, std::string *indices_str,
|
||||
@@ -334,6 +349,40 @@ void MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data
|
||||
Json::Dump(jindices, indices_str);
|
||||
Json::Dump(jdata, data_str);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Make array interface for other language bindings.
|
||||
*/
|
||||
template <typename G, typename H>
|
||||
auto MakeGradientInterface(Context const *ctx, G const *grad, H const *hess, std::size_t n_samples,
|
||||
std::size_t n_targets) {
|
||||
auto t_grad =
|
||||
linalg::MakeTensorView(ctx, common::Span{grad, n_samples * n_targets}, n_samples, n_targets);
|
||||
auto t_hess =
|
||||
linalg::MakeTensorView(ctx, common::Span{hess, n_samples * n_targets}, n_samples, n_targets);
|
||||
auto s_grad = linalg::ArrayInterfaceStr(t_grad);
|
||||
auto s_hess = linalg::ArrayInterfaceStr(t_hess);
|
||||
return std::make_tuple(s_grad, s_hess);
|
||||
}
|
||||
|
||||
template <typename G, typename H>
|
||||
struct CustomGradHessOp {
|
||||
linalg::MatrixView<G> t_grad;
|
||||
linalg::MatrixView<H> t_hess;
|
||||
linalg::MatrixView<GradientPair> d_gpair;
|
||||
|
||||
CustomGradHessOp(linalg::MatrixView<G> t_grad, linalg::MatrixView<H> t_hess,
|
||||
linalg::MatrixView<GradientPair> d_gpair)
|
||||
: t_grad{std::move(t_grad)}, t_hess{std::move(t_hess)}, d_gpair{std::move(d_gpair)} {}
|
||||
|
||||
XGBOOST_DEVICE void operator()(std::size_t i) {
|
||||
auto [m, n] = linalg::UnravelIndex(i, t_grad.Shape(0), t_grad.Shape(1));
|
||||
auto g = t_grad(m, n);
|
||||
auto h = t_hess(m, n);
|
||||
// from struct of arrays to array of structs.
|
||||
d_gpair(m, n) = GradientPair{static_cast<float>(g), static_cast<float>(h)};
|
||||
}
|
||||
};
|
||||
} // namespace detail
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_C_API_C_API_UTILS_H_
|
||||
|
||||
Reference in New Issue
Block a user