Use matrix for gradient. (#9508)

- Use the `linalg::Matrix` for storing gradients.
- New API for the custom objective.
- Custom objective for multi-class/multi-target is now required to return the correct shape.
- Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
Jiaming Yuan
2023-08-24 05:29:52 +08:00
committed by GitHub
parent 6103dca0bb
commit 972730cde0
77 changed files with 1052 additions and 651 deletions

View File

@@ -22,6 +22,7 @@
#include "../common/charconv.h" // for from_chars, to_chars, NumericLimits, from_ch...
#include "../common/hist_util.h" // for HistogramCuts
#include "../common/io.h" // for FileExtension, LoadSequentialFile, MemoryBuf...
#include "../common/linalg_op.h" // for ElementWiseTransformHost
#include "../common/threading_utils.h" // for OmpGetNumThreads, ParallelFor
#include "../data/adapter.h" // for ArrayAdapter, DenseAdapter, RecordBatchesIte...
#include "../data/ellpack_page.h" // for EllpackPage
@@ -68,6 +69,7 @@ XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch) {
}
}
static_assert(DMLC_CXX11_THREAD_LOCAL, "XGBoost depends on thread-local storage.");
using GlobalConfigAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
#if !defined(XGBOOST_USE_CUDA)
@@ -717,8 +719,7 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
API_END();
}
XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle,
xgboost::bst_ulong *out) {
XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, xgboost::bst_ulong *out) {
API_BEGIN();
CHECK_HANDLE();
auto p_m = CastDMatrixHandle(handle);
@@ -727,8 +728,7 @@ XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle,
API_END();
}
XGB_DLL int XGDMatrixNumCol(const DMatrixHandle handle,
xgboost::bst_ulong *out) {
XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, xgboost::bst_ulong *out) {
API_BEGIN();
CHECK_HANDLE();
auto p_m = CastDMatrixHandle(handle);
@@ -970,28 +970,71 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
API_END();
}
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
DMatrixHandle dtrain,
bst_float *grad,
bst_float *hess,
xgboost::bst_ulong len) {
XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bst_float *grad,
bst_float *hess, xgboost::bst_ulong len) {
API_BEGIN();
CHECK_HANDLE();
HostDeviceVector<GradientPair> tmp_gpair;
auto* bst = static_cast<Learner*>(handle);
auto* dtr =
static_cast<std::shared_ptr<DMatrix>*>(dtrain);
tmp_gpair.Resize(len);
std::vector<GradientPair>& tmp_gpair_h = tmp_gpair.HostVector();
if (len > 0) {
xgboost_CHECK_C_ARG_PTR(grad);
xgboost_CHECK_C_ARG_PTR(hess);
}
for (xgboost::bst_ulong i = 0; i < len; ++i) {
tmp_gpair_h[i] = GradientPair(grad[i], hess[i]);
}
error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter");
auto *learner = static_cast<Learner *>(handle);
auto ctx = learner->Ctx()->MakeCPU();
bst->BoostOneIter(0, *dtr, &tmp_gpair);
auto t_grad = linalg::MakeTensorView(&ctx, common::Span{grad, len}, len);
auto t_hess = linalg::MakeTensorView(&ctx, common::Span{hess, len}, len);
auto s_grad = linalg::ArrayInterfaceStr(t_grad);
auto s_hess = linalg::ArrayInterfaceStr(t_hess);
return XGBoosterTrainOneIter(handle, dtrain, 0, s_grad.c_str(), s_hess.c_str());
API_END();
}
namespace xgboost {
// copy user-supplied CUDA gradient arrays
void CopyGradientFromCUDAArrays(Context const *, ArrayInterface<2, false> const &,
ArrayInterface<2, false> const &, linalg::Matrix<GradientPair> *)
#if !defined(XGBOOST_USE_CUDA)
{
common::AssertGPUSupport();
}
#else
; // NOLINT
#endif
} // namespace xgboost
XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, int iter,
char const *grad, char const *hess) {
API_BEGIN();
CHECK_HANDLE();
xgboost_CHECK_C_ARG_PTR(grad);
xgboost_CHECK_C_ARG_PTR(hess);
auto p_fmat = CastDMatrixHandle(dtrain);
ArrayInterface<2, false> i_grad{StringView{grad}};
ArrayInterface<2, false> i_hess{StringView{hess}};
StringView msg{"Mismatched shape between the gradient and hessian."};
CHECK_EQ(i_grad.Shape(0), i_hess.Shape(0)) << msg;
CHECK_EQ(i_grad.Shape(1), i_hess.Shape(1)) << msg;
linalg::Matrix<GradientPair> gpair;
auto grad_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_grad.data);
auto hess_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_hess.data);
CHECK_EQ(i_grad.Shape(0), p_fmat->Info().num_row_)
<< "Mismatched size between the gradient and training data.";
CHECK_EQ(grad_is_cuda, hess_is_cuda) << "gradient and hessian should be on the same device.";
auto *learner = static_cast<Learner *>(handle);
auto ctx = learner->Ctx();
if (!grad_is_cuda) {
gpair.Reshape(i_grad.Shape(0), i_grad.Shape(1));
auto const shape = gpair.Shape();
auto h_gpair = gpair.HostView();
DispatchDType(i_grad, DeviceOrd::CPU(), [&](auto &&t_grad) {
DispatchDType(i_hess, DeviceOrd::CPU(), [&](auto &&t_hess) {
common::ParallelFor(h_gpair.Size(), ctx->Threads(),
detail::CustomGradHessOp{t_grad, t_hess, h_gpair});
});
});
} else {
CopyGradientFromCUDAArrays(ctx, i_grad, i_hess, &gpair);
}
learner->BoostOneIter(iter, p_fmat, &gpair);
API_END();
}

View File

@@ -1,8 +1,12 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include "../common/api_entry.h" // XGBAPIThreadLocalEntry
#include <thrust/transform.h> // for transform
#include "../common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "../common/cuda_context.cuh" // for CUDAContext
#include "../common/threading_utils.h"
#include "../data/array_interface.h" // for DispatchDType, ArrayInterface
#include "../data/device_adapter.cuh"
#include "../data/proxy_dmatrix.h"
#include "c_api_error.h"
@@ -13,7 +17,6 @@
#include "xgboost/learner.h"
namespace xgboost {
void XGBBuildInfoDevice(Json *p_info) {
auto &info = *p_info;
@@ -55,6 +58,27 @@ void XGBoostAPIGuard::RestoreGPUAttribute() {
// If errors, do nothing, assuming running on CPU only machine.
cudaSetDevice(device_id_);
}
void CopyGradientFromCUDAArrays(Context const *ctx, ArrayInterface<2, false> const &grad,
ArrayInterface<2, false> const &hess,
linalg::Matrix<GradientPair> *out_gpair) {
auto grad_dev = dh::CudaGetPointerDevice(grad.data);
auto hess_dev = dh::CudaGetPointerDevice(hess.data);
CHECK_EQ(grad_dev, hess_dev) << "gradient and hessian should be on the same device.";
auto &gpair = *out_gpair;
gpair.SetDevice(grad_dev);
gpair.Reshape(grad.Shape(0), grad.Shape(1));
auto d_gpair = gpair.View(grad_dev);
auto cuctx = ctx->CUDACtx();
DispatchDType(grad, DeviceOrd::CUDA(grad_dev), [&](auto &&t_grad) {
DispatchDType(hess, DeviceOrd::CUDA(hess_dev), [&](auto &&t_hess) {
CHECK_EQ(t_grad.Size(), t_hess.Size());
thrust::for_each_n(cuctx->CTP(), thrust::make_counting_iterator(0ul), t_grad.Size(),
detail::CustomGradHessOp{t_grad, t_hess, d_gpair});
});
});
}
} // namespace xgboost
using namespace xgboost; // NOLINT

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) 2015-2022 by Contributors
/**
* Copyright 2015-2023, XGBoost Contributors
* \file c_api_error.h
* \brief Error handling for C API.
*/
@@ -35,8 +35,8 @@
} \
return 0; // NOLINT(*)
#define CHECK_HANDLE() if (handle == nullptr) \
LOG(FATAL) << "DMatrix/Booster has not been initialized or has already been disposed.";
#define CHECK_HANDLE() \
if (handle == nullptr) ::xgboost::detail::EmptyHandle();
/*!
* \brief Set the last error message needed by C API

View File

@@ -7,8 +7,10 @@
#include <algorithm>
#include <cstddef>
#include <functional>
#include <memory> // std::shared_ptr
#include <string>
#include <memory> // for shared_ptr
#include <string> // for string
#include <tuple> // for make_tuple
#include <utility> // for move
#include <vector>
#include "xgboost/c_api.h"
@@ -16,7 +18,7 @@
#include "xgboost/feature_map.h" // for FeatureMap
#include "xgboost/json.h"
#include "xgboost/learner.h"
#include "xgboost/linalg.h" // ArrayInterfaceHandler
#include "xgboost/linalg.h" // ArrayInterfaceHandler, MakeTensorView, ArrayInterfaceStr
#include "xgboost/logging.h"
#include "xgboost/string_view.h" // StringView
@@ -287,6 +289,19 @@ inline std::shared_ptr<DMatrix> CastDMatrixHandle(DMatrixHandle const handle) {
}
namespace detail {
inline void EmptyHandle() {
LOG(FATAL) << "DMatrix/Booster has not been initialized or has already been disposed.";
}
inline xgboost::Context const *BoosterCtx(BoosterHandle handle) {
if (handle == nullptr) {
EmptyHandle();
}
auto *learner = static_cast<xgboost::Learner *>(handle);
CHECK(learner);
return learner->Ctx();
}
template <typename PtrT, typename I, typename T>
void MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data,
std::size_t nindptr, std::string *indptr_str, std::string *indices_str,
@@ -334,6 +349,40 @@ void MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data
Json::Dump(jindices, indices_str);
Json::Dump(jdata, data_str);
}
/**
* @brief Make array interface for other language bindings.
*/
template <typename G, typename H>
auto MakeGradientInterface(Context const *ctx, G const *grad, H const *hess, std::size_t n_samples,
std::size_t n_targets) {
auto t_grad =
linalg::MakeTensorView(ctx, common::Span{grad, n_samples * n_targets}, n_samples, n_targets);
auto t_hess =
linalg::MakeTensorView(ctx, common::Span{hess, n_samples * n_targets}, n_samples, n_targets);
auto s_grad = linalg::ArrayInterfaceStr(t_grad);
auto s_hess = linalg::ArrayInterfaceStr(t_hess);
return std::make_tuple(s_grad, s_hess);
}
template <typename G, typename H>
struct CustomGradHessOp {
linalg::MatrixView<G> t_grad;
linalg::MatrixView<H> t_hess;
linalg::MatrixView<GradientPair> d_gpair;
CustomGradHessOp(linalg::MatrixView<G> t_grad, linalg::MatrixView<H> t_hess,
linalg::MatrixView<GradientPair> d_gpair)
: t_grad{std::move(t_grad)}, t_hess{std::move(t_hess)}, d_gpair{std::move(d_gpair)} {}
XGBOOST_DEVICE void operator()(std::size_t i) {
auto [m, n] = linalg::UnravelIndex(i, t_grad.Shape(0), t_grad.Shape(1));
auto g = t_grad(m, n);
auto h = t_hess(m, n);
// from struct of arrays to array of structs.
d_gpair(m, n) = GradientPair{static_cast<float>(g), static_cast<float>(h)};
}
};
} // namespace detail
} // namespace xgboost
#endif // XGBOOST_C_API_C_API_UTILS_H_