Support F order for the tensor type. (#8872)
- Add F order support for tensor and view. - Use parameter pack for automatic type cast. (avoid excessive static cast for shape).
This commit is contained in:
@@ -23,9 +23,7 @@
|
||||
#include "xgboost/span.h" // Span
|
||||
#include "xgboost/tree_model.h" // RegTree
|
||||
|
||||
namespace xgboost {
|
||||
namespace obj {
|
||||
namespace detail {
|
||||
namespace xgboost::obj::detail {
|
||||
void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
|
||||
std::vector<bst_node_t> const& position, std::vector<size_t>* p_nptr,
|
||||
std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_ridx) {
|
||||
@@ -98,8 +96,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
auto const& h_node_idx = nidx;
|
||||
auto const& h_node_ptr = nptr;
|
||||
CHECK_LE(h_node_ptr.back(), info.num_row_);
|
||||
auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(),
|
||||
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
|
||||
auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_,
|
||||
predt.Size() / info.num_row_);
|
||||
|
||||
// loop over each leaf
|
||||
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
|
||||
@@ -138,11 +136,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
|
||||
MetaInfo const&, float learning_rate, HostDeviceVector<float> const&,
|
||||
float, RegTree*) {
|
||||
MetaInfo const&, float, HostDeviceVector<float> const&, float, RegTree*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
} // namespace detail
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::obj::detail
|
||||
|
||||
@@ -157,8 +157,8 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
HostDeviceVector<float> quantiles;
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
|
||||
auto d_predt = linalg::MakeTensorView(predt.ConstDeviceSpan(),
|
||||
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
|
||||
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,
|
||||
predt.Size() / info.num_row_);
|
||||
CHECK_LT(group_idx, d_predt.Shape(1));
|
||||
auto t_predt = d_predt.Slice(linalg::All(), group_idx);
|
||||
auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));
|
||||
|
||||
@@ -64,8 +64,7 @@ class QuantileRegression : public ObjFunction {
|
||||
out_gpair->SetDevice(ctx_->gpu_id);
|
||||
out_gpair->Resize(n_targets * info.num_row_);
|
||||
auto gpair =
|
||||
linalg::MakeTensorView(ctx_->IsCPU() ? out_gpair->HostSpan() : out_gpair->DeviceSpan(),
|
||||
{info.num_row_, n_alphas, n_targets / n_alphas}, ctx_->gpu_id);
|
||||
linalg::MakeTensorView(ctx_, out_gpair, info.num_row_, n_alphas, n_targets / n_alphas);
|
||||
|
||||
info.weights_.SetDevice(ctx_->gpu_id);
|
||||
common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
|
||||
@@ -80,15 +79,8 @@ class QuantileRegression : public ObjFunction {
|
||||
|
||||
linalg::ElementWiseKernel(
|
||||
ctx_, gpair, [=] XGBOOST_DEVICE(std::size_t i, GradientPair const&) mutable {
|
||||
auto idx = linalg::UnravelIndex(static_cast<std::size_t>(i),
|
||||
{static_cast<std::size_t>(n_samples),
|
||||
static_cast<std::size_t>(alpha.size()),
|
||||
static_cast<std::size_t>(n_targets / alpha.size())});
|
||||
|
||||
// std::tie is not available for cuda kernel.
|
||||
std::size_t sample_id = std::get<0>(idx);
|
||||
std::size_t quantile_id = std::get<1>(idx);
|
||||
std::size_t target_id = std::get<2>(idx);
|
||||
auto [sample_id, quantile_id, target_id] =
|
||||
linalg::UnravelIndex(i, n_samples, alpha.size(), n_targets / alpha.size());
|
||||
|
||||
auto d = predt(i) - labels(sample_id, target_id);
|
||||
auto h = weight[sample_id];
|
||||
|
||||
Reference in New Issue
Block a user