[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)
- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter. - The `predictor` parameter is removed. - Fallback to `DMatrix` when `inplace_predict` is not available. - The heuristic for choosing a predictor is only used during training.
This commit is contained in:
@@ -1023,7 +1023,6 @@ void InplacePredictImpl(std::shared_ptr<DMatrix> p_m, char const *c_json_config,
|
||||
const float **out_result) {
|
||||
xgboost_CHECK_C_ARG_PTR(c_json_config);
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
CHECK_EQ(get<Integer const>(config["cache_id"]), 0) << "Cache ID is not supported yet";
|
||||
|
||||
HostDeviceVector<float> *p_predt{nullptr};
|
||||
auto type = PredictionType(RequiredArg<Integer>(config, "type", __func__));
|
||||
@@ -1042,6 +1041,7 @@ void InplacePredictImpl(std::shared_ptr<DMatrix> p_m, char const *c_json_config,
|
||||
xgboost_CHECK_C_ARG_PTR(out_dim);
|
||||
CalcPredictShape(strict_shape, type, n_samples, n_features, chunksize, learner->Groups(),
|
||||
learner->BoostedRounds(), &shape, out_dim);
|
||||
CHECK_GE(p_predt->Size(), n_samples);
|
||||
|
||||
xgboost_CHECK_C_ARG_PTR(out_result);
|
||||
xgboost_CHECK_C_ARG_PTR(out_shape);
|
||||
|
||||
@@ -92,7 +92,7 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
|
||||
API_END();
|
||||
}
|
||||
|
||||
int InplacePreidctCuda(BoosterHandle handle, char const *c_array_interface,
|
||||
int InplacePreidctCUDA(BoosterHandle handle, char const *c_array_interface,
|
||||
char const *c_json_config, std::shared_ptr<DMatrix> p_m,
|
||||
xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
|
||||
const float **out_result) {
|
||||
@@ -107,7 +107,6 @@ int InplacePreidctCuda(BoosterHandle handle, char const *c_array_interface,
|
||||
proxy->SetCUDAArray(c_array_interface);
|
||||
|
||||
auto config = Json::Load(StringView{c_json_config});
|
||||
CHECK_EQ(get<Integer const>(config["cache_id"]), 0) << "Cache ID is not supported yet";
|
||||
auto *learner = static_cast<Learner *>(handle);
|
||||
|
||||
HostDeviceVector<float> *p_predt{nullptr};
|
||||
@@ -118,7 +117,13 @@ int InplacePreidctCuda(BoosterHandle handle, char const *c_array_interface,
|
||||
RequiredArg<Integer>(config, "iteration_begin", __func__),
|
||||
RequiredArg<Integer>(config, "iteration_end", __func__));
|
||||
CHECK(p_predt);
|
||||
CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead());
|
||||
if (learner->Ctx()->IsCPU()) {
|
||||
// Prediction using DMatrix as fallback.
|
||||
CHECK(p_predt->HostCanRead() && !p_predt->DeviceCanRead());
|
||||
} else {
|
||||
CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead());
|
||||
}
|
||||
p_predt->SetDevice(proxy->DeviceIdx());
|
||||
|
||||
auto &shape = learner->GetThreadLocal().prediction_shape;
|
||||
size_t n_samples = p_m->Info().num_row_;
|
||||
@@ -146,7 +151,7 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *c
|
||||
if (m) {
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
}
|
||||
return InplacePreidctCuda(handle, c_json_strs, c_json_config, p_m, out_shape, out_dim,
|
||||
return InplacePreidctCUDA(handle, c_json_strs, c_json_config, p_m, out_shape, out_dim,
|
||||
out_result);
|
||||
}
|
||||
|
||||
@@ -159,6 +164,6 @@ XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *c_js
|
||||
p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
|
||||
}
|
||||
xgboost_CHECK_C_ARG_PTR(out_result);
|
||||
return InplacePreidctCuda(handle, c_json_strs, c_json_config, p_m, out_shape, out_dim,
|
||||
return InplacePreidctCUDA(handle, c_json_strs, c_json_config, p_m, out_shape, out_dim,
|
||||
out_result);
|
||||
}
|
||||
|
||||
@@ -6,6 +6,11 @@
|
||||
#ifndef XGBOOST_COMMON_ERROR_MSG_H_
|
||||
#define XGBOOST_COMMON_ERROR_MSG_H_
|
||||
|
||||
#include <cinttypes> // for uint64_t
|
||||
#include <limits> // for numeric_limits
|
||||
|
||||
#include "xgboost/base.h" // for bst_feature_t
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace xgboost::error {
|
||||
@@ -33,5 +38,14 @@ constexpr StringView InconsistentMaxBin() {
|
||||
return "Inconsistent `max_bin`. `max_bin` should be the same across different QuantileDMatrix, "
|
||||
"and consistent with the Booster being trained.";
|
||||
}
|
||||
|
||||
constexpr StringView UnknownDevice() { return "Unknown device type."; }
|
||||
|
||||
inline void MaxFeatureSize(std::uint64_t n_features) {
|
||||
auto max_n_features = std::numeric_limits<bst_feature_t>::max();
|
||||
CHECK_LE(n_features, max_n_features)
|
||||
<< "Unfortunately, XGBoost does not support data matrices with "
|
||||
<< std::numeric_limits<bst_feature_t>::max() << " features or greater";
|
||||
}
|
||||
} // namespace xgboost::error
|
||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include <dmlc/data.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef> // std::size_t
|
||||
#include <cstddef> // for size_t
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../c_api/c_api_error.h"
|
||||
#include "../common/error_msg.h" // for MaxFeatureSize
|
||||
#include "../common/math.h"
|
||||
#include "array_interface.h"
|
||||
#include "arrow-cdi.h"
|
||||
@@ -300,9 +301,9 @@ class ArrayAdapter : public detail::SingleBatchDataIter<ArrayAdapterBatch> {
|
||||
array_interface_ = ArrayInterface<2>(get<Object const>(j));
|
||||
batch_ = ArrayAdapterBatch{array_interface_};
|
||||
}
|
||||
ArrayAdapterBatch const& Value() const override { return batch_; }
|
||||
size_t NumRows() const { return array_interface_.Shape(0); }
|
||||
size_t NumColumns() const { return array_interface_.Shape(1); }
|
||||
[[nodiscard]] ArrayAdapterBatch const& Value() const override { return batch_; }
|
||||
[[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); }
|
||||
[[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape(1); }
|
||||
|
||||
private:
|
||||
ArrayAdapterBatch batch_;
|
||||
|
||||
@@ -31,10 +31,10 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
|
||||
auto num_rows = [&]() {
|
||||
return Dispatch(proxy, [](auto const& value) { return value.NumRows(); });
|
||||
return cuda_impl::Dispatch(proxy, [](auto const& value) { return value.NumRows(); });
|
||||
};
|
||||
auto num_cols = [&]() {
|
||||
return Dispatch(proxy, [](auto const& value) { return value.NumCols(); });
|
||||
return cuda_impl::Dispatch(proxy, [](auto const& value) { return value.NumCols(); });
|
||||
};
|
||||
|
||||
size_t row_stride = 0;
|
||||
@@ -74,7 +74,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
get_device());
|
||||
auto* p_sketch = &sketch_containers.back();
|
||||
proxy->Info().weights_.SetDevice(get_device());
|
||||
Dispatch(proxy, [&](auto const& value) {
|
||||
cuda_impl::Dispatch(proxy, [&](auto const& value) {
|
||||
common::AdapterDeviceSketch(value, p.max_bin, proxy->Info(), missing, p_sketch);
|
||||
});
|
||||
}
|
||||
@@ -82,7 +82,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
accumulated_rows += batch_rows;
|
||||
dh::device_vector<size_t> row_counts(batch_rows + 1, 0);
|
||||
common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
|
||||
row_stride = std::max(row_stride, Dispatch(proxy, [=](auto const& value) {
|
||||
row_stride = std::max(row_stride, cuda_impl::Dispatch(proxy, [=](auto const& value) {
|
||||
return GetRowCounts(value, row_counts_span, get_device(), missing);
|
||||
}));
|
||||
nnz += thrust::reduce(thrust::cuda::par(alloc), row_counts.begin(), row_counts.end());
|
||||
@@ -136,14 +136,14 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
auto rows = num_rows();
|
||||
dh::device_vector<size_t> row_counts(rows + 1, 0);
|
||||
common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
|
||||
Dispatch(proxy, [=](auto const& value) {
|
||||
cuda_impl::Dispatch(proxy, [=](auto const& value) {
|
||||
return GetRowCounts(value, row_counts_span, get_device(), missing);
|
||||
});
|
||||
auto is_dense = this->IsDense();
|
||||
|
||||
proxy->Info().feature_types.SetDevice(get_device());
|
||||
auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();
|
||||
auto new_impl = Dispatch(proxy, [&](auto const& value) {
|
||||
auto new_impl = cuda_impl::Dispatch(proxy, [&](auto const& value) {
|
||||
return EllpackPageImpl(value, missing, get_device(), is_dense, row_counts_span,
|
||||
d_feature_types, row_stride, rows, cuts);
|
||||
});
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
/*!
|
||||
* Copyright 2021 by Contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
* \file proxy_dmatrix.cc
|
||||
*/
|
||||
|
||||
#include "proxy_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
void DMatrixProxy::SetArrayData(char const *c_interface) {
|
||||
std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter(StringView{c_interface})};
|
||||
namespace xgboost::data {
|
||||
void DMatrixProxy::SetArrayData(StringView interface_str) {
|
||||
std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{interface_str}};
|
||||
this->batch_ = adapter;
|
||||
this->Info().num_col_ = adapter->NumColumns();
|
||||
this->Info().num_row_ = adapter->NumRows();
|
||||
@@ -25,5 +24,36 @@ void DMatrixProxy::SetCSRData(char const *c_indptr, char const *c_indices,
|
||||
this->Info().num_row_ = adapter->NumRows();
|
||||
this->ctx_.gpu_id = Context::kCpuId;
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
namespace cuda_impl {
|
||||
std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
|
||||
std::shared_ptr<DMatrixProxy> proxy, float missing);
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *, std::shared_ptr<DMatrixProxy>,
|
||||
float) {
|
||||
return nullptr;
|
||||
}
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
} // namespace cuda_impl
|
||||
|
||||
std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
|
||||
std::shared_ptr<DMatrixProxy> proxy,
|
||||
float missing) {
|
||||
bool type_error{false};
|
||||
std::shared_ptr<DMatrix> p_fmat{nullptr};
|
||||
if (proxy->Ctx()->IsCPU()) {
|
||||
p_fmat = data::HostAdapterDispatch<false>(
|
||||
proxy.get(),
|
||||
[&](auto const &adapter) {
|
||||
auto p_fmat =
|
||||
std::shared_ptr<DMatrix>(DMatrix::Create(adapter.get(), missing, ctx->Threads()));
|
||||
return p_fmat;
|
||||
},
|
||||
&type_error);
|
||||
} else {
|
||||
p_fmat = cuda_impl::CreateDMatrixFromProxy(ctx, proxy, missing);
|
||||
}
|
||||
|
||||
return p_fmat;
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
/*!
|
||||
* Copyright 2020-2022, XGBoost contributors
|
||||
/**
|
||||
* Copyright 2020-2023, XGBoost contributors
|
||||
*/
|
||||
#include "proxy_dmatrix.h"
|
||||
#include "device_adapter.cuh"
|
||||
#include "proxy_dmatrix.cuh"
|
||||
#include "proxy_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
namespace xgboost::data {
|
||||
void DMatrixProxy::FromCudaColumnar(StringView interface_str) {
|
||||
std::shared_ptr<data::CudfAdapter> adapter{new CudfAdapter{interface_str}};
|
||||
auto const& value = adapter->Value();
|
||||
@@ -31,5 +30,15 @@ void DMatrixProxy::FromCudaArray(StringView interface_str) {
|
||||
ctx_.gpu_id = dh::CurrentDevice();
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
namespace cuda_impl {
|
||||
std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const* ctx,
|
||||
std::shared_ptr<DMatrixProxy> proxy,
|
||||
float missing) {
|
||||
return Dispatch<false>(proxy.get(), [&](auto const& adapter) {
|
||||
auto p_fmat = std::shared_ptr<DMatrix>{DMatrix::Create(adapter.get(), missing, ctx->Threads())};
|
||||
return p_fmat;
|
||||
});
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -6,19 +6,34 @@
|
||||
#include "device_adapter.cuh"
|
||||
#include "proxy_dmatrix.h"
|
||||
|
||||
namespace xgboost::data {
|
||||
template <typename Fn>
|
||||
namespace xgboost::data::cuda_impl {
|
||||
template <bool get_value = true, typename Fn>
|
||||
decltype(auto) Dispatch(DMatrixProxy const* proxy, Fn fn) {
|
||||
if (proxy->Adapter().type() == typeid(std::shared_ptr<CupyAdapter>)) {
|
||||
auto value = std::any_cast<std::shared_ptr<CupyAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
if constexpr (get_value) {
|
||||
auto value = std::any_cast<std::shared_ptr<CupyAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else {
|
||||
auto value = std::any_cast<std::shared_ptr<CupyAdapter>>(proxy->Adapter());
|
||||
return fn(value);
|
||||
}
|
||||
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<CudfAdapter>)) {
|
||||
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
if constexpr (get_value) {
|
||||
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else {
|
||||
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter());
|
||||
return fn(value);
|
||||
}
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
|
||||
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
if constexpr (get_value) {
|
||||
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else {
|
||||
auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter());
|
||||
return fn(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
} // namespace xgboost::data::cuda_impl
|
||||
|
||||
@@ -62,7 +62,7 @@ class DMatrixProxy : public DMatrix {
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
void SetArrayData(char const* c_interface);
|
||||
void SetArrayData(StringView interface_str);
|
||||
void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
|
||||
bst_feature_t n_features, bool on_host);
|
||||
|
||||
@@ -114,28 +114,62 @@ inline DMatrixProxy* MakeProxy(DMatrixHandle proxy) {
|
||||
return typed;
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
/**
|
||||
* @brief Dispatch function call based on input type.
|
||||
*
|
||||
* @tparam get_value Whether the funciton Fn accept an adapter batch or the adapter itself.
|
||||
* @tparam Fn The type of the function to be dispatched.
|
||||
*
|
||||
* @param proxy The proxy object holding the reference to the input.
|
||||
* @param fn The function to be dispatched.
|
||||
* @param type_error[out] Set to ture if it's not null and the input data is not recognized by
|
||||
* the host.
|
||||
*
|
||||
* @return The return value of the function being dispatched.
|
||||
*/
|
||||
template <bool get_value = true, typename Fn>
|
||||
decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_error = nullptr) {
|
||||
if (proxy->Adapter().type() == typeid(std::shared_ptr<CSRArrayAdapter>)) {
|
||||
auto value = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter())->Value();
|
||||
if constexpr (get_value) {
|
||||
auto value = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else {
|
||||
auto value = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
|
||||
return fn(value);
|
||||
}
|
||||
if (type_error) {
|
||||
*type_error = false;
|
||||
}
|
||||
return fn(value);
|
||||
} else if (proxy->Adapter().type() == typeid(std::shared_ptr<ArrayAdapter>)) {
|
||||
auto value = std::any_cast<std::shared_ptr<ArrayAdapter>>(proxy->Adapter())->Value();
|
||||
if constexpr (get_value) {
|
||||
auto value = std::any_cast<std::shared_ptr<ArrayAdapter>>(proxy->Adapter())->Value();
|
||||
return fn(value);
|
||||
} else {
|
||||
auto value = std::any_cast<std::shared_ptr<ArrayAdapter>>(proxy->Adapter());
|
||||
return fn(value);
|
||||
}
|
||||
if (type_error) {
|
||||
*type_error = false;
|
||||
}
|
||||
return fn(value);
|
||||
} else {
|
||||
if (type_error) {
|
||||
*type_error = true;
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
|
||||
}
|
||||
return std::result_of_t<Fn(decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
|
||||
if constexpr (get_value) {
|
||||
return std::result_of_t<Fn(
|
||||
decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value()))>();
|
||||
} else {
|
||||
return std::result_of_t<Fn(decltype(std::declval<std::shared_ptr<ArrayAdapter>>()))>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Create a `SimpleDMatrix` instance from a `DMatrixProxy`.
|
||||
*/
|
||||
std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const* ctx,
|
||||
std::shared_ptr<DMatrixProxy> proxy, float missing);
|
||||
} // namespace xgboost::data
|
||||
#endif // XGBOOST_DATA_PROXY_DMATRIX_H_
|
||||
|
||||
@@ -1,33 +1,31 @@
|
||||
/*!
|
||||
* Copyright 2021 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
*/
|
||||
#include "../common/device_helpers.cuh" // for CurrentDevice
|
||||
#include "proxy_dmatrix.cuh" // for Dispatch, DMatrixProxy
|
||||
#include "simple_dmatrix.cuh" // for CopyToSparsePage
|
||||
#include "sparse_page_source.h"
|
||||
#include "proxy_dmatrix.cuh"
|
||||
#include "simple_dmatrix.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
#include "xgboost/data.h" // for SparsePage
|
||||
|
||||
namespace xgboost::data {
|
||||
namespace detail {
|
||||
std::size_t NSamplesDevice(DMatrixProxy *proxy) {
|
||||
return Dispatch(proxy, [](auto const &value) { return value.NumRows(); });
|
||||
return cuda_impl::Dispatch(proxy, [](auto const &value) { return value.NumRows(); });
|
||||
}
|
||||
|
||||
std::size_t NFeaturesDevice(DMatrixProxy *proxy) {
|
||||
return Dispatch(proxy, [](auto const &value) { return value.NumCols(); });
|
||||
return cuda_impl::Dispatch(proxy, [](auto const &value) { return value.NumCols(); });
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
void DevicePush(DMatrixProxy* proxy, float missing, SparsePage* page) {
|
||||
void DevicePush(DMatrixProxy *proxy, float missing, SparsePage *page) {
|
||||
auto device = proxy->DeviceIdx();
|
||||
if (device < 0) {
|
||||
device = dh::CurrentDevice();
|
||||
}
|
||||
CHECK_GE(device, 0);
|
||||
|
||||
Dispatch(proxy, [&](auto const &value) {
|
||||
CopyToSparsePage(value, device, missing, page);
|
||||
});
|
||||
cuda_impl::Dispatch(proxy,
|
||||
[&](auto const &value) { CopyToSparsePage(value, device, missing, page); });
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -172,8 +172,7 @@ class GBLinear : public GradientBooster {
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
|
||||
uint32_t layer_begin, uint32_t /*layer_end*/, bool, int,
|
||||
unsigned) override {
|
||||
bst_layer_t layer_begin, bst_layer_t /*layer_end*/, bool) override {
|
||||
model_.LazyInitModel();
|
||||
LinearCheckLayer(layer_begin);
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
|
||||
@@ -210,8 +209,8 @@ class GBLinear : public GradientBooster {
|
||||
}
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned layer_begin, unsigned /*layer_end*/,
|
||||
void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
|
||||
bst_layer_t layer_begin, bst_layer_t /*layer_end*/,
|
||||
bool) override {
|
||||
LinearCheckLayer(layer_begin);
|
||||
std::vector<bst_float>& contribs = out_contribs->HostVector();
|
||||
|
||||
@@ -18,9 +18,11 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/error_msg.h" // for UnknownDevice
|
||||
#include "../common/random.h"
|
||||
#include "../common/threading_utils.h"
|
||||
#include "../common/timer.h"
|
||||
#include "../data/proxy_dmatrix.h" // for DMatrixProxy, HostAdapterDispatch
|
||||
#include "gbtree_model.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -58,9 +60,8 @@ void GBTree::Configure(Args const& cfg) {
|
||||
cpu_predictor_->Configure(cfg);
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
auto n_gpus = common::AllVisibleGPUs();
|
||||
if (!gpu_predictor_ && n_gpus != 0) {
|
||||
gpu_predictor_ = std::unique_ptr<Predictor>(
|
||||
Predictor::Create("gpu_predictor", this->ctx_));
|
||||
if (!gpu_predictor_) {
|
||||
gpu_predictor_ = std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", this->ctx_));
|
||||
}
|
||||
if (n_gpus != 0) {
|
||||
gpu_predictor_->Configure(cfg);
|
||||
@@ -374,12 +375,7 @@ void GBTree::LoadConfig(Json const& in) {
|
||||
// This would cause all trees to be pushed to trees_to_update
|
||||
// e.g. updating a model, then saving and loading it would result in an empty model
|
||||
tparam_.process_type = TreeProcessType::kDefault;
|
||||
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
|
||||
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
|
||||
LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. "
|
||||
"Changing predictor to auto.";
|
||||
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
|
||||
}
|
||||
std::int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
|
||||
|
||||
auto msg = StringView{
|
||||
R"(
|
||||
@@ -505,8 +501,8 @@ void GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, Gradien
|
||||
out_model.param.num_parallel_tree = model_.param.num_parallel_tree;
|
||||
}
|
||||
|
||||
void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) {
|
||||
void GBTree::PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) const {
|
||||
CHECK(configured_);
|
||||
if (layer_end == 0) {
|
||||
layer_end = this->BoostedRounds();
|
||||
@@ -526,7 +522,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool
|
||||
CHECK_EQ(out_preds->version, 0);
|
||||
}
|
||||
|
||||
auto const& predictor = GetPredictor(&out_preds->predictions, p_fmat);
|
||||
auto const& predictor = GetPredictor(is_training, &out_preds->predictions, p_fmat);
|
||||
if (out_preds->version == 0) {
|
||||
// out_preds->Size() can be non-zero as it's initialized here before any
|
||||
// tree is built at the 0^th iterator.
|
||||
@@ -546,68 +542,69 @@ void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Predictor> const &
|
||||
GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
|
||||
DMatrix *f_dmat) const {
|
||||
void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) {
|
||||
// dispatch to const function.
|
||||
this->PredictBatchImpl(p_fmat, out_preds, is_training, layer_begin, layer_end);
|
||||
}
|
||||
|
||||
void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
|
||||
PredictionCacheEntry* out_preds, bst_layer_t layer_begin,
|
||||
bst_layer_t layer_end) const {
|
||||
CHECK(configured_);
|
||||
if (tparam_.predictor != PredictorType::kAuto) {
|
||||
if (tparam_.predictor == PredictorType::kGPUPredictor) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK_GE(common::AllVisibleGPUs(), 1) << "No visible GPU is found for XGBoost.";
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
common::AssertGPUSupport();
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
if (tparam_.predictor == PredictorType::kOneAPIPredictor) {
|
||||
#if defined(XGBOOST_USE_ONEAPI)
|
||||
CHECK(oneapi_predictor_);
|
||||
return oneapi_predictor_;
|
||||
#else
|
||||
common::AssertOneAPISupport();
|
||||
#endif // defined(XGBOOST_USE_ONEAPI)
|
||||
}
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
|
||||
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
|
||||
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. XGBoost "
|
||||
<< "is running on: " << this->ctx_->DeviceName()
|
||||
<< ", while the input data is on: " << p_m->Ctx()->DeviceName() << ".";
|
||||
CHECK_EQ(out_preds->version, 0);
|
||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
||||
auto any_adapter = proxy->Adapter();
|
||||
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);
|
||||
this->PredictBatchImpl(p_fmat.get(), out_preds, false, layer_begin, layer_end);
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->ctx_->IsCPU()) {
|
||||
this->cpu_predictor_->InplacePredict(p_m, model_, missing, out_preds, tree_begin, tree_end);
|
||||
} else if (p_m->Ctx()->IsCUDA()) {
|
||||
CHECK(this->gpu_predictor_);
|
||||
this->gpu_predictor_->InplacePredict(p_m, model_, missing, out_preds, tree_begin, tree_end);
|
||||
} else {
|
||||
LOG(FATAL) << error::UnknownDevice();
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] std::unique_ptr<Predictor> const& GBTree::GetPredictor(
|
||||
bool is_training, HostDeviceVector<float> const* out_pred, DMatrix* f_dmat) const {
|
||||
CHECK(configured_);
|
||||
|
||||
// Data comes from SparsePageDMatrix. Since we are loading data in pages, no need to
|
||||
// prevent data copy.
|
||||
if (f_dmat && !f_dmat->SingleColBlock()) {
|
||||
if (ctx_->IsCPU()) {
|
||||
return cpu_predictor_;
|
||||
} else {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK_GE(common::AllVisibleGPUs(), 1) << "No visible GPU is found for XGBoost.";
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
common::AssertGPUSupport();
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
}
|
||||
}
|
||||
|
||||
// Data comes from Device DMatrix.
|
||||
auto is_ellpack = f_dmat && f_dmat->PageExists<EllpackPage>() &&
|
||||
!f_dmat->PageExists<SparsePage>();
|
||||
auto is_ellpack =
|
||||
f_dmat && f_dmat->PageExists<EllpackPage>() && !f_dmat->PageExists<SparsePage>();
|
||||
// Data comes from device memory, like CuDF or CuPy.
|
||||
auto is_from_device =
|
||||
f_dmat && f_dmat->PageExists<SparsePage>() &&
|
||||
(*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
|
||||
auto is_from_device = f_dmat && f_dmat->PageExists<SparsePage>() &&
|
||||
(*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
|
||||
auto on_device = is_ellpack || is_from_device;
|
||||
|
||||
// Use GPU Predictor if data is already on device and gpu_id is set.
|
||||
if (on_device && ctx_->gpu_id >= 0) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK_GE(common::AllVisibleGPUs(), 1) << "No visible GPU is found for XGBoost.";
|
||||
if (on_device && ctx_->IsCUDA()) {
|
||||
common::AssertGPUSupport();
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
LOG(FATAL) << "Data is on CUDA device, but XGBoost is not compiled with "
|
||||
"CUDA support.";
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
// GPU_Hist by default has prediction cache calculated from quantile values,
|
||||
@@ -619,23 +616,19 @@ GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
|
||||
if ((out_pred && out_pred->Size() == 0) && (model_.param.num_trees != 0) &&
|
||||
// FIXME(trivialfis): Implement a better method for testing whether data
|
||||
// is on device after DMatrix refactoring is done.
|
||||
!on_device) {
|
||||
!on_device && is_training) {
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
|
||||
if (tparam_.tree_method == TreeMethod::kGPUHist) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
CHECK_GE(common::AllVisibleGPUs(), 1) << "No visible GPU is found for XGBoost.";
|
||||
if (ctx_->IsCPU()) {
|
||||
return cpu_predictor_;
|
||||
} else {
|
||||
common::AssertGPUSupport();
|
||||
CHECK(gpu_predictor_);
|
||||
return gpu_predictor_;
|
||||
#else
|
||||
common::AssertGPUSupport();
|
||||
return cpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
}
|
||||
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
}
|
||||
|
||||
@@ -750,7 +743,7 @@ class Dart : public GBTree {
|
||||
bool training, unsigned layer_begin,
|
||||
unsigned layer_end) const {
|
||||
CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented();
|
||||
auto &predictor = this->GetPredictor(&p_out_preds->predictions, p_fmat);
|
||||
auto& predictor = this->GetPredictor(training, &p_out_preds->predictions, p_fmat);
|
||||
CHECK(predictor);
|
||||
predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions,
|
||||
model_);
|
||||
@@ -814,49 +807,46 @@ class Dart : public GBTree {
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
auto n_groups = model_.learner_model_param->num_output_group;
|
||||
|
||||
std::vector<Predictor const*> predictors {
|
||||
cpu_predictor_.get(),
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
gpu_predictor_.get()
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
};
|
||||
Predictor const* predictor{nullptr};
|
||||
StringView msg{"Unsupported data type for inplace predict."};
|
||||
if (ctx_->Device() != p_fmat->Ctx()->Device()) {
|
||||
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. XGBoost "
|
||||
<< "is running on: " << this->ctx_->DeviceName()
|
||||
<< ", while the input data is on: " << p_fmat->Ctx()->DeviceName() << ".";
|
||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
|
||||
auto any_adapter = proxy->Adapter();
|
||||
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);
|
||||
this->PredictBatchImpl(p_fmat.get(), p_out_preds, false, layer_begin, layer_end);
|
||||
return;
|
||||
}
|
||||
|
||||
StringView msg{"Unsupported data type for inplace predict."};
|
||||
PredictionCacheEntry predts;
|
||||
if (ctx_->gpu_id != Context::kCpuId) {
|
||||
predts.predictions.SetDevice(ctx_->gpu_id);
|
||||
}
|
||||
predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);
|
||||
|
||||
auto get_predictor = [&]() -> Predictor const* {
|
||||
if (ctx_->IsCPU()) {
|
||||
return cpu_predictor_.get();
|
||||
} else if (ctx_->IsCUDA()) {
|
||||
CHECK(this->gpu_predictor_);
|
||||
return gpu_predictor_.get();
|
||||
} else {
|
||||
LOG(FATAL) << error::UnknownDevice();
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
auto predict_impl = [&](size_t i) {
|
||||
predts.predictions.Fill(0);
|
||||
if (tparam_.predictor == PredictorType::kAuto) {
|
||||
// Try both predictor implementations
|
||||
bool success = false;
|
||||
for (auto const& p : predictors) {
|
||||
if (p && p->InplacePredict(p_fmat, model_, missing, &predts, i, i + 1)) {
|
||||
success = true;
|
||||
predictor = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
CHECK(success) << msg;
|
||||
} else {
|
||||
predictor = this->GetPredictor().get();
|
||||
bool success = predictor->InplacePredict(p_fmat, model_, missing, &predts, i, i + 1);
|
||||
CHECK(success) << msg << std::endl
|
||||
<< "Current Predictor: "
|
||||
<< (tparam_.predictor == PredictorType::kCPUPredictor ? "cpu_predictor"
|
||||
: "gpu_predictor");
|
||||
}
|
||||
bool success{get_predictor()->InplacePredict(p_fmat, model_, missing, &predts, i, i + 1)};
|
||||
CHECK(success) << msg;
|
||||
};
|
||||
|
||||
// Inplace predict is not used for training, so no need to drop tree.
|
||||
for (bst_tree_t i = tree_begin; i < tree_end; ++i) {
|
||||
predict_impl(i);
|
||||
if (i == tree_begin) {
|
||||
predictor->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, model_);
|
||||
get_predictor()->InitOutPredictions(p_fmat->Info(), &p_out_preds->predictions, model_);
|
||||
}
|
||||
// Multiple the tree weight
|
||||
auto w = this->weight_drop_.at(i);
|
||||
@@ -886,25 +876,24 @@ class Dart : public GBTree {
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned layer_begin, unsigned layer_end) override {
|
||||
DropTrees(false);
|
||||
auto &predictor = this->GetPredictor();
|
||||
auto &predictor = this->GetPredictor(false);
|
||||
uint32_t _, tree_end;
|
||||
std::tie(_, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
predictor->PredictInstance(inst, out_preds, model_, tree_end);
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned layer_begin, unsigned layer_end, bool approximate, int,
|
||||
unsigned) override {
|
||||
void PredictContribution(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end,
|
||||
bool approximate) override {
|
||||
CHECK(configured_);
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_, tree_end, &weight_drop_,
|
||||
approximate);
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(
|
||||
DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
|
||||
unsigned layer_begin, unsigned layer_end, bool approximate) override {
|
||||
void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end,
|
||||
bool approximate) override {
|
||||
CHECK(configured_);
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_, tree_end,
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
/*!
|
||||
* Copyright 2021 by Contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
*/
|
||||
#include "../common/device_helpers.cuh"
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/span.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace gbm {
|
||||
|
||||
namespace xgboost::gbm {
|
||||
void GPUCopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
|
||||
bst_group_t n_groups, bst_group_t group_id,
|
||||
HostDeviceVector<GradientPair> *out_gpair) {
|
||||
@@ -41,5 +38,4 @@ void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float
|
||||
out_predts[offset] += (predts[offset] - base_score(0)) * tree_w;
|
||||
});
|
||||
}
|
||||
} // namespace gbm
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::gbm
|
||||
|
||||
102
src/gbm/gbtree.h
102
src/gbm/gbtree.h
@@ -43,18 +43,10 @@ enum class TreeProcessType : int {
|
||||
kDefault = 0,
|
||||
kUpdate = 1
|
||||
};
|
||||
|
||||
enum class PredictorType : int {
|
||||
kAuto = 0,
|
||||
kCPUPredictor,
|
||||
kGPUPredictor,
|
||||
kOneAPIPredictor
|
||||
};
|
||||
} // namespace xgboost
|
||||
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::PredictorType);
|
||||
|
||||
namespace xgboost::gbm {
|
||||
/*! \brief training parameters */
|
||||
@@ -63,8 +55,6 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
|
||||
std::string updater_seq;
|
||||
/*! \brief type of boosting process to run */
|
||||
TreeProcessType process_type;
|
||||
// predictor type
|
||||
PredictorType predictor;
|
||||
// tree construction method
|
||||
TreeMethod tree_method;
|
||||
// declare parameters
|
||||
@@ -79,13 +69,6 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
|
||||
.describe("Whether to run the normal boosting process that creates new trees,"\
|
||||
" or to update the trees in an existing model.");
|
||||
DMLC_DECLARE_ALIAS(updater_seq, updater);
|
||||
DMLC_DECLARE_FIELD(predictor)
|
||||
.set_default(PredictorType::kAuto)
|
||||
.add_enum("auto", PredictorType::kAuto)
|
||||
.add_enum("cpu_predictor", PredictorType::kCPUPredictor)
|
||||
.add_enum("gpu_predictor", PredictorType::kGPUPredictor)
|
||||
.add_enum("oneapi_predictor", PredictorType::kOneAPIPredictor)
|
||||
.describe("Predictor algorithm type");
|
||||
DMLC_DECLARE_FIELD(tree_method)
|
||||
.set_default(TreeMethod::kAuto)
|
||||
.add_enum("auto", TreeMethod::kAuto)
|
||||
@@ -206,15 +189,9 @@ class GBTree : public GradientBooster {
|
||||
void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
PredictionCacheEntry* predt, ObjFunction const* obj) override;
|
||||
|
||||
bool UseGPU() const override {
|
||||
return
|
||||
tparam_.predictor == PredictorType::kGPUPredictor ||
|
||||
tparam_.tree_method == TreeMethod::kGPUHist;
|
||||
}
|
||||
[[nodiscard]] bool UseGPU() const override { return tparam_.tree_method == TreeMethod::kGPUHist; }
|
||||
|
||||
GBTreeTrainParam const& GetTrainParam() const {
|
||||
return tparam_;
|
||||
}
|
||||
[[nodiscard]] GBTreeTrainParam const& GetTrainParam() const { return tparam_; }
|
||||
|
||||
void Load(dmlc::Stream* fi) override { model_.Load(fi); }
|
||||
void Save(dmlc::Stream* fo) const override {
|
||||
@@ -236,39 +213,14 @@ class GBTree : public GradientBooster {
|
||||
return !model_.trees.empty() || !model_.trees_to_update.empty();
|
||||
}
|
||||
|
||||
void PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) const;
|
||||
|
||||
void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool training,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) override;
|
||||
|
||||
void InplacePredict(std::shared_ptr<DMatrix> p_m, float missing, PredictionCacheEntry* out_preds,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) const override {
|
||||
CHECK(configured_);
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
|
||||
std::vector<Predictor const *> predictors{
|
||||
cpu_predictor_.get(),
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
gpu_predictor_.get()
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
};
|
||||
StringView msg{"Unsupported data type for inplace predict."};
|
||||
if (tparam_.predictor == PredictorType::kAuto) {
|
||||
// Try both predictor implementations
|
||||
for (auto const &p : predictors) {
|
||||
if (p && p->InplacePredict(p_m, model_, missing, out_preds, tree_begin, tree_end)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << msg;
|
||||
} else {
|
||||
bool success = this->GetPredictor()->InplacePredict(p_m, model_, missing, out_preds,
|
||||
tree_begin, tree_end);
|
||||
CHECK(success) << msg << std::endl
|
||||
<< "Current Predictor: "
|
||||
<< (tparam_.predictor == PredictorType::kCPUPredictor
|
||||
? "cpu_predictor"
|
||||
: "gpu_predictor");
|
||||
}
|
||||
}
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) const override;
|
||||
|
||||
void FeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
|
||||
std::vector<bst_feature_t>* features,
|
||||
@@ -349,32 +301,29 @@ class GBTree : public GradientBooster {
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
CHECK_EQ(tree_begin, 0) << "Predict leaf supports only iteration end: (0, "
|
||||
"n_iteration), use model slicing instead.";
|
||||
this->GetPredictor()->PredictLeaf(p_fmat, out_preds, model_, tree_end);
|
||||
this->GetPredictor(false)->PredictLeaf(p_fmat, out_preds, model_, tree_end);
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
uint32_t layer_begin, uint32_t layer_end, bool approximate,
|
||||
int, unsigned) override {
|
||||
void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end,
|
||||
bool approximate) override {
|
||||
CHECK(configured_);
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
CHECK_EQ(tree_begin, 0)
|
||||
<< "Predict contribution supports only iteration end: (0, "
|
||||
"n_iteration), using model slicing instead.";
|
||||
this->GetPredictor()->PredictContribution(
|
||||
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
|
||||
CHECK_EQ(tree_begin, 0) << "Predict contribution supports only iteration end: (0, "
|
||||
"n_iteration), using model slicing instead.";
|
||||
this->GetPredictor(false)->PredictContribution(p_fmat, out_contribs, model_, tree_end, nullptr,
|
||||
approximate);
|
||||
}
|
||||
|
||||
void PredictInteractionContributions(
|
||||
DMatrix *p_fmat, HostDeviceVector<bst_float> *out_contribs,
|
||||
uint32_t layer_begin, uint32_t layer_end, bool approximate) override {
|
||||
void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end,
|
||||
bool approximate) override {
|
||||
CHECK(configured_);
|
||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||
CHECK_EQ(tree_begin, 0)
|
||||
<< "Predict interaction contribution supports only iteration end: (0, "
|
||||
"n_iteration), using model slicing instead.";
|
||||
this->GetPredictor()->PredictInteractionContributions(
|
||||
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
|
||||
CHECK_EQ(tree_begin, 0) << "Predict interaction contribution supports only iteration end: (0, "
|
||||
"n_iteration), using model slicing instead.";
|
||||
this->GetPredictor(false)->PredictInteractionContributions(p_fmat, out_contribs, model_,
|
||||
tree_end, nullptr, approximate);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
@@ -390,8 +339,9 @@ class GBTree : public GradientBooster {
|
||||
std::vector<HostDeviceVector<bst_node_t>>* out_position,
|
||||
std::vector<std::unique_ptr<RegTree>>* ret);
|
||||
|
||||
std::unique_ptr<Predictor> const& GetPredictor(HostDeviceVector<float> const* out_pred = nullptr,
|
||||
DMatrix* f_dmat = nullptr) const;
|
||||
[[nodiscard]] std::unique_ptr<Predictor> const& GetPredictor(
|
||||
bool is_training, HostDeviceVector<float> const* out_pred = nullptr,
|
||||
DMatrix* f_dmat = nullptr) const;
|
||||
|
||||
// commit new trees all at once
|
||||
virtual void CommitModel(TreesOneIter&& new_trees);
|
||||
@@ -410,9 +360,7 @@ class GBTree : public GradientBooster {
|
||||
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
|
||||
// Predictors
|
||||
std::unique_ptr<Predictor> cpu_predictor_;
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
std::unique_ptr<Predictor> gpu_predictor_;
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
std::unique_ptr<Predictor> gpu_predictor_{nullptr};
|
||||
#if defined(XGBOOST_USE_ONEAPI)
|
||||
std::unique_ptr<Predictor> oneapi_predictor_;
|
||||
#endif // defined(XGBOOST_USE_ONEAPI)
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "common/api_entry.h" // for XGBAPIThreadLocalEntry
|
||||
#include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_...
|
||||
#include "common/common.h" // for ToString, Split
|
||||
#include "common/error_msg.h" // for MaxFeatureSize
|
||||
#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
|
||||
#include "common/observer.h" // for TrainingObserver
|
||||
#include "common/random.h" // for GlobalRandom
|
||||
@@ -763,9 +764,7 @@ class LearnerConfiguration : public Learner {
|
||||
CHECK(matrix.first.ptr);
|
||||
CHECK(!matrix.second.ref.expired());
|
||||
const uint64_t num_col = matrix.first.ptr->Info().num_col_;
|
||||
CHECK_LE(num_col, static_cast<uint64_t>(std::numeric_limits<unsigned>::max()))
|
||||
<< "Unfortunately, XGBoost does not support data matrices with "
|
||||
<< std::numeric_limits<unsigned>::max() << " features or greater";
|
||||
error::MaxFeatureSize(num_col);
|
||||
num_feature = std::max(num_feature, static_cast<uint32_t>(num_col));
|
||||
}
|
||||
|
||||
@@ -1413,6 +1412,8 @@ class LearnerImpl : public LearnerIO {
|
||||
this->CheckModelInitialized();
|
||||
|
||||
auto& out_predictions = this->GetThreadLocal().prediction_entry;
|
||||
out_predictions.version = 0;
|
||||
|
||||
this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);
|
||||
if (type == PredictionType::kValue) {
|
||||
obj_->PredTransform(&out_predictions.predictions);
|
||||
|
||||
@@ -577,8 +577,8 @@ void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double
|
||||
if (lj(0) >= Eps64()) {
|
||||
tj_minus(i) = std::pow(lj(i) / lj(0), regularizer);
|
||||
}
|
||||
assert(!std::isinf(ti_plus(i)));
|
||||
assert(!std::isinf(tj_minus(i)));
|
||||
assert(!isinf(ti_plus(i)));
|
||||
assert(!isinf(tj_minus(i)));
|
||||
});
|
||||
}
|
||||
} // namespace cuda_impl
|
||||
|
||||
@@ -883,9 +883,8 @@ class CPUPredictor : public Predictor {
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
// parallel over local batch
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||
common::ParallelFor(nsize, n_threads, [&](bst_omp_uint i) {
|
||||
auto row_idx = static_cast<size_t>(batch.base_rowid + i);
|
||||
common::ParallelFor(batch.Size(), n_threads, [&](auto i) {
|
||||
auto row_idx = batch.base_rowid + i;
|
||||
RegTree::FVec &feats = feat_vecs[omp_get_thread_num()];
|
||||
if (feats.Size() == 0) {
|
||||
feats.Init(num_feature);
|
||||
|
||||
@@ -226,9 +226,7 @@ struct GPUHistMakerDevice {
|
||||
monitor.Init(std::string("GPUHistMakerDevice") + std::to_string(ctx_->gpu_id));
|
||||
}
|
||||
|
||||
~GPUHistMakerDevice() { // NOLINT
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
}
|
||||
~GPUHistMakerDevice() = default;
|
||||
|
||||
void InitFeatureGroupsOnce() {
|
||||
if (!feature_groups) {
|
||||
|
||||
Reference in New Issue
Block a user