Support multi-target, fit intercept for hinge. (#9850)

This commit is contained in:
Jiaming Yuan
2023-12-08 05:50:41 +08:00
committed by GitHub
parent 39c637ee19
commit 42de9206fc
8 changed files with 221 additions and 155 deletions

View File

@@ -1,31 +1,48 @@
/*!
* Copyright 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2023, XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_LINALG_OP_CUH_
#define XGBOOST_COMMON_LINALG_OP_CUH_
#include "device_helpers.cuh"
#include <cstdint> // for int32_t
#include <cstdlib> // for size_t
#include <tuple> // for apply
#include "device_helpers.cuh" // for LaunchN
#include "linalg_op.h"
#include "xgboost/context.h"
#include "xgboost/linalg.h"
#include "xgboost/context.h" // for Context
#include "xgboost/linalg.h" // for TensorView
namespace xgboost {
namespace linalg {
template <typename T, int32_t D, typename Fn>
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
"For function with return, use transform instead.");
if (t.Contiguous()) {
auto ptr = t.Values().data();
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable { fn(i, ptr[i]); });
} else {
dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {
T& v = detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
fn(i, v);
namespace cuda_impl {
// Use template specialization to dispatch, Windows + CUDA 11.8 doesn't support extended
// lambda inside constexpr if
template <typename T, std::int32_t D>
struct ElementWiseImpl {
template <typename Fn>
void operator()(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s) {
static_assert(D > 1);
dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) mutable {
std::apply(fn, linalg::UnravelIndex(i, t.Shape()));
});
}
};
template <typename T>
struct ElementWiseImpl<T, 1> {
template <typename Fn>
void operator()(linalg::TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {
dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) { fn(i); });
}
};
template <typename T, std::int32_t D, typename Fn>
void ElementWiseKernel(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
cuda_impl::ElementWiseImpl<T, D>{}(t, fn, s);
}
} // namespace cuda_impl
template <typename T, int32_t D, typename Fn>
void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
@@ -42,7 +59,8 @@ void ElementWiseTransformDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_
template <typename T, int32_t D, typename Fn>
void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn) {
ctx->IsCUDA() ? ElementWiseKernelDevice(t, fn) : ElementWiseKernelHost(t, ctx->Threads(), fn);
ctx->IsCUDA() ? cuda_impl::ElementWiseKernel(t, fn)
: ElementWiseKernelHost(t, ctx->Threads(), fn);
}
} // namespace linalg
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2023, XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_LINALG_OP_H_
#define XGBOOST_COMMON_LINALG_OP_H_
@@ -27,17 +27,23 @@ void ElementWiseTransformHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&
}
}
template <typename T, int32_t D, typename Fn>
void ElementWiseKernelHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& fn) {
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
"For function with return, use transform instead.");
if (t.Contiguous()) {
auto ptr = t.Values().data();
common::ParallelFor(t.Size(), n_threads, [&](size_t i) { fn(i, ptr[i]); });
template <typename T, std::int32_t D, typename Fn>
void ElementWiseKernelHost(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn &&fn) {
if constexpr (D == 1) {
common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) { fn(i); });
} else if (D == 2 && t.CContiguous() && t.Shape(0) > t.Shape(1) * 64) {
// Heuristic. Tall, c-contiguous matrix,
auto n_rows = t.Shape(0);
auto n_columns = t.Shape(1);
common::ParallelFor(n_rows, n_threads, [&](std::size_t i) {
for (std::size_t j = 0; j < n_columns; ++j) {
fn(i, j);
}
});
} else {
common::ParallelFor(t.Size(), n_threads, [&](size_t i) {
auto& v = detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
fn(i, v);
common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) {
auto idx = linalg::UnravelIndex(i, t.Shape());
std::apply(fn, idx);
});
}
}