Support F order for the tensor type. (#8872)

- Add F order support for tensor and view.
- Use parameter pack for automatic type cast. (avoid excessive static cast for shape).
This commit is contained in:
Jiaming Yuan
2023-03-08 03:27:49 +08:00
committed by GitHub
parent f53055f75e
commit f236640427
9 changed files with 194 additions and 94 deletions

View File

@@ -23,9 +23,7 @@
#include "xgboost/span.h" // Span
#include "xgboost/tree_model.h" // RegTree
namespace xgboost {
namespace obj {
namespace detail {
namespace xgboost::obj::detail {
void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
std::vector<bst_node_t> const& position, std::vector<size_t>* p_nptr,
std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_ridx) {
@@ -98,8 +96,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
auto const& h_node_idx = nidx;
auto const& h_node_ptr = nptr;
CHECK_LE(h_node_ptr.back(), info.num_row_);
auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(),
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_,
predt.Size() / info.num_row_);
// loop over each leaf
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
@@ -138,11 +136,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
#if !defined(XGBOOST_USE_CUDA)
void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
MetaInfo const&, float learning_rate, HostDeviceVector<float> const&,
float, RegTree*) {
MetaInfo const&, float, HostDeviceVector<float> const&, float, RegTree*) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace detail
} // namespace obj
} // namespace xgboost
} // namespace xgboost::obj::detail

View File

@@ -157,8 +157,8 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
HostDeviceVector<float> quantiles;
predt.SetDevice(ctx->gpu_id);
auto d_predt = linalg::MakeTensorView(predt.ConstDeviceSpan(),
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,
predt.Size() / info.num_row_);
CHECK_LT(group_idx, d_predt.Shape(1));
auto t_predt = d_predt.Slice(linalg::All(), group_idx);
auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));

View File

@@ -64,8 +64,7 @@ class QuantileRegression : public ObjFunction {
out_gpair->SetDevice(ctx_->gpu_id);
out_gpair->Resize(n_targets * info.num_row_);
auto gpair =
linalg::MakeTensorView(ctx_->IsCPU() ? out_gpair->HostSpan() : out_gpair->DeviceSpan(),
{info.num_row_, n_alphas, n_targets / n_alphas}, ctx_->gpu_id);
linalg::MakeTensorView(ctx_, out_gpair, info.num_row_, n_alphas, n_targets / n_alphas);
info.weights_.SetDevice(ctx_->gpu_id);
common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
@@ -80,15 +79,8 @@ class QuantileRegression : public ObjFunction {
linalg::ElementWiseKernel(
ctx_, gpair, [=] XGBOOST_DEVICE(std::size_t i, GradientPair const&) mutable {
auto idx = linalg::UnravelIndex(static_cast<std::size_t>(i),
{static_cast<std::size_t>(n_samples),
static_cast<std::size_t>(alpha.size()),
static_cast<std::size_t>(n_targets / alpha.size())});
// std::tie is not available for cuda kernel.
std::size_t sample_id = std::get<0>(idx);
std::size_t quantile_id = std::get<1>(idx);
std::size_t target_id = std::get<2>(idx);
auto [sample_id, quantile_id, target_id] =
linalg::UnravelIndex(i, n_samples, alpha.size(), n_targets / alpha.size());
auto d = predt(i) - labels(sample_id, target_id);
auto h = weight[sample_id];