From 71b0528a2f7c65e181d4441b952955120e3c4cbe Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Fri, 17 Jul 2020 01:18:13 -0700 Subject: [PATCH] GPU implementation of AFT survival objective and metric (#5714) * Add interval accuracy * De-virtualize AFT functions * Lint * Refactor AFT metric using GPU-CPU reducer * Fix R build * Fix build on Windows * Fix copyright header * Clang-tidy * Fix crashing demo * Fix typos in comment; explain GPU ID * Remove unnecessary #include * Add C++ test for interval accuracy * Fix a bug in accuracy metric: use log pred * Refactor AFT objective using GPU-CPU Transform * Lint * Fix lint * Use Ninja to speed up build * Use time, not /usr/bin/time * Add cpu_build worker class, with concurrency = 1 * Use concurrency = 1 only for CUDA build * concurrency = 1 for clang-tidy * Address reviewer's feedback * Update link to AFT paper --- Jenkinsfile | 4 +- amalgamation/xgboost-all0.cc | 1 - src/common/probability_distribution.cc | 107 ------ src/common/probability_distribution.h | 154 +++++---- src/common/survival_util.cc | 248 +------------ src/common/survival_util.h | 327 ++++++++++++++++-- src/metric/metric.cc | 3 +- src/metric/survival_metric.cc | 104 +----- src/metric/survival_metric.cu | 304 ++++++++++++++++ src/objective/aft_obj.cc | 115 +----- src/objective/aft_obj.cu | 147 ++++++++ tests/ci_build/Dockerfile.cpu | 2 +- tests/ci_build/Dockerfile.gpu_build | 11 +- tests/ci_build/build_via_cmake.sh | 6 +- .../common/test_probability_distribution.cc | 100 +++--- tests/cpp/common/test_survival_util.cc | 31 +- tests/cpp/metric/test_survival_metric.cc | 95 ++--- tests/cpp/metric/test_survival_metric.cu | 81 +++++ tests/cpp/objective/test_aft_obj.cc | 26 +- tests/cpp/objective/test_aft_obj.cu | 6 + 20 files changed, 1050 insertions(+), 822 deletions(-) delete mode 100644 src/common/probability_distribution.cc create mode 100644 src/metric/survival_metric.cu create mode 100644 src/objective/aft_obj.cu create mode 100644 tests/cpp/metric/test_survival_metric.cu create mode 100644 tests/cpp/objective/test_aft_obj.cu diff --git a/Jenkinsfile b/Jenkinsfile index 17ada9be4..f65e2d6d7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -124,7 +124,7 @@ def checkoutSrcs() { } def ClangTidy() { - node('linux && cpu') { + node('linux && cpu_build') { unstash name: 'srcs' echo "Running clang-tidy job..." def container_type = "clang_tidy" @@ -239,7 +239,7 @@ def BuildCPUNonOmp() { } def BuildCUDA(args) { - node('linux && cpu') { + node('linux && cpu_build') { unstash name: 'srcs' echo "Build with CUDA ${args.cuda_version}" def container_type = "gpu_build" diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc index 6e8c09b7d..4dc86aa57 100644 --- a/amalgamation/xgboost-all0.cc +++ b/amalgamation/xgboost-all0.cc @@ -76,7 +76,6 @@ #include "../src/common/json.cc" #include "../src/common/io.cc" #include "../src/common/survival_util.cc" -#include "../src/common/probability_distribution.cc" #include "../src/common/version.cc" // c_api diff --git a/src/common/probability_distribution.cc b/src/common/probability_distribution.cc deleted file mode 100644 index 51bcc495b..000000000 --- a/src/common/probability_distribution.cc +++ /dev/null @@ -1,107 +0,0 @@ -/*! - * Copyright 2020 by Contributors - * \file probability_distribution.cc - * \brief Implementation of a few useful probability distributions - * \author Avinash Barnwal and Hyunsu Cho - */ - -#include -#include -#include "probability_distribution.h" - -namespace xgboost { -namespace common { - -ProbabilityDistribution* ProbabilityDistribution::Create(ProbabilityDistributionType dist) { - switch (dist) { - case ProbabilityDistributionType::kNormal: - return new NormalDist; - case ProbabilityDistributionType::kLogistic: - return new LogisticDist; - case ProbabilityDistributionType::kExtreme: - return new ExtremeDist; - default: - LOG(FATAL) << "Unknown distribution"; - } - return nullptr; -} - -double NormalDist::PDF(double z) { - const double pdf = std::exp(-z * z / 2) / std::sqrt(2 * probability_constant::kPI); - return pdf; -} - -double NormalDist::CDF(double z) { - const double cdf = 0.5 * (1 + std::erf(z / std::sqrt(2))); - return cdf; -} - -double NormalDist::GradPDF(double z) { - const double pdf = this->PDF(z); - const double grad = -1 * z * pdf; - return grad; -} - -double NormalDist::HessPDF(double z) { - const double pdf = this->PDF(z); - const double hess = (z * z - 1) * pdf; - return hess; -} - -double LogisticDist::PDF(double z) { - const double w = std::exp(z); - const double sqrt_denominator = 1 + w; - const double pdf - = (std::isinf(w) || std::isinf(w * w)) ? 0.0 : (w / (sqrt_denominator * sqrt_denominator)); - return pdf; -} - -double LogisticDist::CDF(double z) { - const double w = std::exp(z); - const double cdf = std::isinf(w) ? 1.0 : (w / (1 + w)); - return cdf; -} - -double LogisticDist::GradPDF(double z) { - const double pdf = this->PDF(z); - const double w = std::exp(z); - const double grad = std::isinf(w) ? 0.0 : pdf * (1 - w) / (1 + w); - return grad; -} - -double LogisticDist::HessPDF(double z) { - const double pdf = this->PDF(z); - const double w = std::exp(z); - const double hess - = (std::isinf(w) || std::isinf(w * w)) ? 0.0 : pdf * (w * w - 4 * w + 1) / ((1 + w) * (1 + w)); - return hess; -} - -double ExtremeDist::PDF(double z) { - const double w = std::exp(z); - const double pdf = std::isinf(w) ? 0.0 : (w * std::exp(-w)); - return pdf; -} - -double ExtremeDist::CDF(double z) { - const double w = std::exp(z); - const double cdf = 1 - std::exp(-w); - return cdf; -} - -double ExtremeDist::GradPDF(double z) { - const double pdf = this->PDF(z); - const double w = std::exp(z); - const double grad = std::isinf(w) ? 0.0 : ((1 - w) * pdf); - return grad; -} - -double ExtremeDist::HessPDF(double z) { - const double pdf = this->PDF(z); - const double w = std::exp(z); - const double hess = (std::isinf(w) || std::isinf(w * w)) ? 0.0 : ((w * w - 3 * w + 1) * pdf); - return hess; -} - -} // namespace common -} // namespace xgboost diff --git a/src/common/probability_distribution.h b/src/common/probability_distribution.h index 66d6a3d44..b581df0cf 100644 --- a/src/common/probability_distribution.h +++ b/src/common/probability_distribution.h @@ -1,5 +1,5 @@ /*! - * Copyright 2020 by Contributors + * Copyright 2019-2020 by Contributors * \file probability_distribution.h * \brief Implementation of a few useful probability distributions * \author Avinash Barnwal and Hyunsu Cho @@ -8,85 +8,115 @@ #ifndef XGBOOST_COMMON_PROBABILITY_DISTRIBUTION_H_ #define XGBOOST_COMMON_PROBABILITY_DISTRIBUTION_H_ +#include + namespace xgboost { namespace common { -namespace probability_constant { +#ifndef __CUDACC__ + +using std::exp; +using std::sqrt; +using std::isinf; +using std::isnan; + +#endif // __CUDACC__ /*! \brief Constant PI */ -const double kPI = 3.14159265358979323846; +constexpr double kPI = 3.14159265358979323846; /*! \brief The Euler-Mascheroni_constant */ -const double kEulerMascheroni = 0.57721566490153286060651209008240243104215933593992; - -} // namespace probability_constant +constexpr double kEulerMascheroni = 0.57721566490153286060651209008240243104215933593992; /*! \brief Enum encoding possible choices of probability distribution */ enum class ProbabilityDistributionType : int { kNormal = 0, kLogistic = 1, kExtreme = 2 }; -/*! \brief Interface for a probability distribution */ -class ProbabilityDistribution { - public: - /*! - * \brief Evaluate Probability Density Function (PDF) at a particular point - * \param z point at which to evaluate PDF - * \return Value of PDF evaluated - */ - virtual double PDF(double z) = 0; - /*! - * \brief Evaluate Cumulative Distribution Function (CDF) at a particular point - * \param z point at which to evaluate CDF - * \return Value of CDF evaluated - */ - virtual double CDF(double z) = 0; - /*! - * \brief Evaluate first derivative of PDF at a particular point - * \param z point at which to evaluate first derivative of PDF - * \return Value of first derivative of PDF evaluated - */ - virtual double GradPDF(double z) = 0; - /*! - * \brief Evaluate second derivative of PDF at a particular point - * \param z point at which to evaluate second derivative of PDF - * \return Value of second derivative of PDF evaluated - */ - virtual double HessPDF(double z) = 0; +struct NormalDistribution { + XGBOOST_DEVICE static double PDF(double z) { + return exp(-z * z / 2.0) / sqrt(2.0 * kPI); + } - /*! - * \brief Factory function to instantiate a new probability distribution object - * \param dist kind of probability distribution - * \return Reference to the newly created probability distribution object - */ - static ProbabilityDistribution* Create(ProbabilityDistributionType dist); - virtual ~ProbabilityDistribution() = default; + XGBOOST_DEVICE static double CDF(double z) { + return 0.5 * (1 + erf(z / sqrt(2.0))); + } + + XGBOOST_DEVICE static double GradPDF(double z) { + return -z * PDF(z); + } + + XGBOOST_DEVICE static double HessPDF(double z) { + return (z * z - 1.0) * PDF(z); + } + + XGBOOST_DEVICE static ProbabilityDistributionType Type() { + return ProbabilityDistributionType::kNormal; + } }; -/*! \brief The (standard) normal distribution */ -class NormalDist : public ProbabilityDistribution { - public: - double PDF(double z) override; - double CDF(double z) override; - double GradPDF(double z) override; - double HessPDF(double z) override; +struct LogisticDistribution { + XGBOOST_DEVICE static double PDF(double z) { + const double w = exp(z); + const double sqrt_denominator = 1 + w; + if (isinf(w) || isinf(w * w)) { + return 0.0; + } else { + return w / (sqrt_denominator * sqrt_denominator); + } + } + + XGBOOST_DEVICE static double CDF(double z) { + const double w = exp(z); + return isinf(w) ? 1.0 : (w / (1 + w)); + } + + XGBOOST_DEVICE static double GradPDF(double z) { + const double w = exp(z); + return isinf(w) ? 0.0 : (PDF(z) * (1 - w) / (1 + w)); + } + + XGBOOST_DEVICE static double HessPDF(double z) { + const double w = exp(z); + if (isinf(w) || isinf(w * w)) { + return 0.0; + } else { + return PDF(z) * (w * w - 4 * w + 1) / ((1 + w) * (1 + w)); + } + } + + XGBOOST_DEVICE static ProbabilityDistributionType Type() { + return ProbabilityDistributionType::kLogistic; + } }; -/*! \brief The (standard) logistic distribution */ -class LogisticDist : public ProbabilityDistribution { - public: - double PDF(double z) override; - double CDF(double z) override; - double GradPDF(double z) override; - double HessPDF(double z) override; -}; +struct ExtremeDistribution { + XGBOOST_DEVICE static double PDF(double z) { + const double w = exp(z); + return isinf(w) ? 0.0 : (w * exp(-w)); + } -/*! \brief The extreme distribution, also known as the Gumbel (minimum) distribution */ -class ExtremeDist : public ProbabilityDistribution { - public: - double PDF(double z) override; - double CDF(double z) override; - double GradPDF(double z) override; - double HessPDF(double z) override; + XGBOOST_DEVICE static double CDF(double z) { + const double w = exp(z); + return 1 - exp(-w); + } + + XGBOOST_DEVICE static double GradPDF(double z) { + const double w = exp(z); + return isinf(w) ? 0.0 : ((1 - w) * PDF(z)); + } + + XGBOOST_DEVICE static double HessPDF(double z) { + const double w = exp(z); + if (isinf(w) || isinf(w * w)) { + return 0.0; + } else { + return (w * w - 3 * w + 1) * PDF(z); + } + } + + XGBOOST_DEVICE static ProbabilityDistributionType Type() { + return ProbabilityDistributionType::kExtreme; + } }; } // namespace common diff --git a/src/common/survival_util.cc b/src/common/survival_util.cc index 5630da355..2e4d81bf6 100644 --- a/src/common/survival_util.cc +++ b/src/common/survival_util.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2019 by Contributors + * Copyright 2019-2020 by Contributors * \file survival_util.cc * \brief Utility functions, useful for implementing objective and metric functions for survival * analysis @@ -7,258 +7,12 @@ */ #include -#include -#include #include "survival_util.h" -/* -- Formulas are motivated from document - - http://members.cbio.mines-paristech.fr/~thocking/survival.pdf -- Detailed Derivation of Loss/Gradient/Hessian - - https://github.com/avinashbarnwal/GSOC-2019/blob/master/doc/Accelerated_Failure_Time.pdf -*/ - -namespace { - -// Allowable range for gradient and hessian. Used for regularization -constexpr double kMinGradient = -15.0; -constexpr double kMaxGradient = 15.0; -constexpr double kMinHessian = 1e-16; // Ensure that no data point gets zero hessian -constexpr double kMaxHessian = 15.0; - -constexpr double kEps = 1e-12; // A denomitor in a fraction should not be too small - -// Clip (limit) x to fit range [x_min, x_max]. -// If x < x_min, return x_min; if x > x_max, return x_max; if x_min <= x <= x_max, return x. -// This function assumes x_min < x_max; behavior is undefined if this assumption does not hold. -inline double Clip(double x, double x_min, double x_max) { - if (x < x_min) { - return x_min; - } - if (x > x_max) { - return x_max; - } - return x; -} - -using xgboost::common::ProbabilityDistributionType; - -enum class CensoringType : uint8_t { - kUncensored, kRightCensored, kLeftCensored, kIntervalCensored -}; - -using xgboost::GradientPairPrecise; - -inline GradientPairPrecise GetLimitAtInfPred(ProbabilityDistributionType dist_type, - CensoringType censor_type, - double sign, double sigma) { - switch (censor_type) { - case CensoringType::kUncensored: - switch (dist_type) { - case ProbabilityDistributionType::kNormal: - return sign ? GradientPairPrecise{ kMinGradient, 1.0 / (sigma * sigma) } - : GradientPairPrecise{ kMaxGradient, 1.0 / (sigma * sigma) }; - case ProbabilityDistributionType::kLogistic: - return sign ? GradientPairPrecise{ -1.0 / sigma, kMinHessian } - : GradientPairPrecise{ 1.0 / sigma, kMinHessian }; - case ProbabilityDistributionType::kExtreme: - return sign ? GradientPairPrecise{ kMinGradient, kMaxHessian } - : GradientPairPrecise{ 1.0 / sigma, kMinHessian }; - default: - LOG(FATAL) << "Unknown distribution type"; - } - case CensoringType::kRightCensored: - switch (dist_type) { - case ProbabilityDistributionType::kNormal: - return sign ? GradientPairPrecise{ kMinGradient, 1.0 / (sigma * sigma) } - : GradientPairPrecise{ 0.0, kMinHessian }; - case ProbabilityDistributionType::kLogistic: - return sign ? GradientPairPrecise{ -1.0 / sigma, kMinHessian } - : GradientPairPrecise{ 0.0, kMinHessian }; - case ProbabilityDistributionType::kExtreme: - return sign ? GradientPairPrecise{ kMinGradient, kMaxHessian } - : GradientPairPrecise{ 0.0, kMinHessian }; - default: - LOG(FATAL) << "Unknown distribution type"; - } - case CensoringType::kLeftCensored: - switch (dist_type) { - case ProbabilityDistributionType::kNormal: - return sign ? GradientPairPrecise{ 0.0, kMinHessian } - : GradientPairPrecise{ kMaxGradient, 1.0 / (sigma * sigma) }; - case ProbabilityDistributionType::kLogistic: - return sign ? GradientPairPrecise{ 0.0, kMinHessian } - : GradientPairPrecise{ 1.0 / sigma, kMinHessian }; - case ProbabilityDistributionType::kExtreme: - return sign ? GradientPairPrecise{ 0.0, kMinHessian } - : GradientPairPrecise{ 1.0 / sigma, kMinHessian }; - default: - LOG(FATAL) << "Unknown distribution type"; - } - case CensoringType::kIntervalCensored: - switch (dist_type) { - case ProbabilityDistributionType::kNormal: - return sign ? GradientPairPrecise{ kMinGradient, 1.0 / (sigma * sigma) } - : GradientPairPrecise{ kMaxGradient, 1.0 / (sigma * sigma) }; - case ProbabilityDistributionType::kLogistic: - return sign ? GradientPairPrecise{ -1.0 / sigma, kMinHessian } - : GradientPairPrecise{ 1.0 / sigma, kMinHessian }; - case ProbabilityDistributionType::kExtreme: - return sign ? GradientPairPrecise{ kMinGradient, kMaxHessian } - : GradientPairPrecise{ 1.0 / sigma, kMinHessian }; - default: - LOG(FATAL) << "Unknown distribution type"; - } - default: - LOG(FATAL) << "Unknown censoring type"; - } - - return { 0.0, 0.0 }; -} - -} // anonymous namespace - namespace xgboost { namespace common { DMLC_REGISTER_PARAMETER(AFTParam); -double AFTLoss::Loss(double y_lower, double y_upper, double y_pred, double sigma) { - const double log_y_lower = std::log(y_lower); - const double log_y_upper = std::log(y_upper); - - double cost; - - if (y_lower == y_upper) { // uncensored - const double z = (log_y_lower - y_pred) / sigma; - const double pdf = dist_->PDF(z); - // Regularize the denominator with eps, to avoid INF or NAN - cost = -std::log(std::max(pdf / (sigma * y_lower), kEps)); - } else { // censored; now check what type of censorship we have - double z_u, z_l, cdf_u, cdf_l; - if (std::isinf(y_upper)) { // right-censored - cdf_u = 1; - } else { // left-censored or interval-censored - z_u = (log_y_upper - y_pred) / sigma; - cdf_u = dist_->CDF(z_u); - } - if (std::isinf(y_lower)) { // left-censored - cdf_l = 0; - } else { // right-censored or interval-censored - z_l = (log_y_lower - y_pred) / sigma; - cdf_l = dist_->CDF(z_l); - } - // Regularize the denominator with eps, to avoid INF or NAN - cost = -std::log(std::max(cdf_u - cdf_l, kEps)); - } - - return cost; -} - -double AFTLoss::Gradient(double y_lower, double y_upper, double y_pred, double sigma) { - const double log_y_lower = std::log(y_lower); - const double log_y_upper = std::log(y_upper); - double numerator, denominator, gradient; // numerator and denominator of gradient - CensoringType censor_type; - bool z_sign; // sign of z-score - - if (y_lower == y_upper) { // uncensored - const double z = (log_y_lower - y_pred) / sigma; - const double pdf = dist_->PDF(z); - const double grad_pdf = dist_->GradPDF(z); - censor_type = CensoringType::kUncensored; - numerator = grad_pdf; - denominator = sigma * pdf; - z_sign = (z > 0); - } else { // censored; now check what type of censorship we have - double z_u = 0.0, z_l = 0.0, pdf_u, pdf_l, cdf_u, cdf_l; - censor_type = CensoringType::kIntervalCensored; - if (std::isinf(y_upper)) { // right-censored - pdf_u = 0; - cdf_u = 1; - censor_type = CensoringType::kRightCensored; - } else { // interval-censored or left-censored - z_u = (log_y_upper - y_pred) / sigma; - pdf_u = dist_->PDF(z_u); - cdf_u = dist_->CDF(z_u); - } - if (std::isinf(y_lower)) { // left-censored - pdf_l = 0; - cdf_l = 0; - censor_type = CensoringType::kLeftCensored; - } else { // interval-censored or right-censored - z_l = (log_y_lower - y_pred) / sigma; - pdf_l = dist_->PDF(z_l); - cdf_l = dist_->CDF(z_l); - } - z_sign = (z_u > 0 || z_l > 0); - numerator = pdf_u - pdf_l; - denominator = sigma * (cdf_u - cdf_l); - } - gradient = numerator / denominator; - if (denominator < kEps && (std::isnan(gradient) || std::isinf(gradient))) { - gradient = GetLimitAtInfPred(dist_type_, censor_type, z_sign, sigma).GetGrad(); - } - - return Clip(gradient, kMinGradient, kMaxGradient); -} - -double AFTLoss::Hessian(double y_lower, double y_upper, double y_pred, double sigma) { - const double log_y_lower = std::log(y_lower); - const double log_y_upper = std::log(y_upper); - double numerator, denominator, hessian; // numerator and denominator of hessian - CensoringType censor_type; - bool z_sign; // sign of z-score - - if (y_lower == y_upper) { // uncensored - const double z = (log_y_lower - y_pred) / sigma; - const double pdf = dist_->PDF(z); - const double grad_pdf = dist_->GradPDF(z); - const double hess_pdf = dist_->HessPDF(z); - censor_type = CensoringType::kUncensored; - numerator = -(pdf * hess_pdf - grad_pdf * grad_pdf); - denominator = sigma * sigma * pdf * pdf; - z_sign = (z > 0); - } else { // censored; now check what type of censorship we have - double z_u = 0.0, z_l = 0.0, grad_pdf_u, grad_pdf_l, pdf_u, pdf_l, cdf_u, cdf_l; - censor_type = CensoringType::kIntervalCensored; - if (std::isinf(y_upper)) { // right-censored - pdf_u = 0; - cdf_u = 1; - grad_pdf_u = 0; - censor_type = CensoringType::kRightCensored; - } else { // interval-censored or left-censored - z_u = (log_y_upper - y_pred) / sigma; - pdf_u = dist_->PDF(z_u); - cdf_u = dist_->CDF(z_u); - grad_pdf_u = dist_->GradPDF(z_u); - } - if (std::isinf(y_lower)) { // left-censored - pdf_l = 0; - cdf_l = 0; - grad_pdf_l = 0; - censor_type = CensoringType::kLeftCensored; - } else { // interval-censored or right-censored - z_l = (log_y_lower - y_pred) / sigma; - pdf_l = dist_->PDF(z_l); - cdf_l = dist_->CDF(z_l); - grad_pdf_l = dist_->GradPDF(z_l); - } - const double cdf_diff = cdf_u - cdf_l; - const double pdf_diff = pdf_u - pdf_l; - const double grad_diff = grad_pdf_u - grad_pdf_l; - const double sqrt_denominator = sigma * cdf_diff; - z_sign = (z_u > 0 || z_l > 0); - numerator = -(cdf_diff * grad_diff - pdf_diff * pdf_diff); - denominator = sqrt_denominator * sqrt_denominator; - } - hessian = numerator / denominator; - if (denominator < kEps && (std::isnan(hessian) || std::isinf(hessian))) { - hessian = GetLimitAtInfPred(dist_type_, censor_type, z_sign, sigma).GetHess(); - } - - return Clip(hessian, kMinHessian, kMaxHessian); -} - } // namespace common } // namespace xgboost diff --git a/src/common/survival_util.h b/src/common/survival_util.h index 50c6aab51..d0b09a49f 100644 --- a/src/common/survival_util.h +++ b/src/common/survival_util.h @@ -1,5 +1,5 @@ /*! - * Copyright 2019 by Contributors + * Copyright 2019-2020 by Contributors * \file survival_util.h * \brief Utility functions, useful for implementing objective and metric functions for survival * analysis @@ -8,8 +8,16 @@ #ifndef XGBOOST_COMMON_SURVIVAL_UTIL_H_ #define XGBOOST_COMMON_SURVIVAL_UTIL_H_ +/* + * For the derivation of the loss, gradient, and hessian for the Accelerated Failure Time model, + * refer to the paper "Survival regression with accelerated failure time model in XGBoost" + * at https://arxiv.org/abs/2006.04920. + */ + #include #include +#include +#include #include "probability_distribution.h" DECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType); @@ -17,6 +25,51 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::common::ProbabilityDistributionType); namespace xgboost { namespace common { +#ifndef __CUDACC__ + +using std::log; +using std::fmax; + +#endif // __CUDACC__ + +enum class CensoringType : uint8_t { + kUncensored, kRightCensored, kLeftCensored, kIntervalCensored +}; + +namespace aft { + +// Allowable range for gradient and hessian. Used for regularization +constexpr double kMinGradient = -15.0; +constexpr double kMaxGradient = 15.0; +constexpr double kMinHessian = 1e-16; // Ensure that no data point gets zero hessian +constexpr double kMaxHessian = 15.0; + +constexpr double kEps = 1e-12; // A denomitor in a fraction should not be too small + +// Clip (limit) x to fit range [x_min, x_max]. +// If x < x_min, return x_min; if x > x_max, return x_max; if x_min <= x <= x_max, return x. +// This function assumes x_min < x_max; behavior is undefined if this assumption does not hold. +XGBOOST_DEVICE +inline double Clip(double x, double x_min, double x_max) { + if (x < x_min) { + return x_min; + } + if (x > x_max) { + return x_max; + } + return x; +} + +template +XGBOOST_DEVICE inline double +GetLimitGradAtInfPred(CensoringType censor_type, bool sign, double sigma); + +template +XGBOOST_DEVICE inline double +GetLimitHessAtInfPred(CensoringType censor_type, bool sign, double sigma); + +} // namespace aft + /*! \brief Parameter structure for AFT loss and metric */ struct AFTParam : public XGBoostParameter { /*! \brief Choice of probability distribution for the noise term in AFT */ @@ -39,47 +92,245 @@ struct AFTParam : public XGBoostParameter { }; /*! \brief The AFT loss function */ -class AFTLoss { - private: - std::unique_ptr dist_; - ProbabilityDistributionType dist_type_; +template +struct AFTLoss { + XGBOOST_DEVICE inline static + double Loss(double y_lower, double y_upper, double y_pred, double sigma) { + const double log_y_lower = log(y_lower); + const double log_y_upper = log(y_upper); - public: - /*! - * \brief Constructor for AFT loss function - * \param dist_type Choice of probability distribution for the noise term in AFT - */ - explicit AFTLoss(ProbabilityDistributionType dist_type) - : dist_(ProbabilityDistribution::Create(dist_type)), - dist_type_(dist_type) {} + double cost; - public: - /*! - * \brief Compute the AFT loss - * \param y_lower Lower bound for the true label - * \param y_upper Upper bound for the true label - * \param y_pred Predicted label - * \param sigma Scaling factor to be applied to the distribution of the noise term - */ - double Loss(double y_lower, double y_upper, double y_pred, double sigma); - /*! - * \brief Compute the gradient of the AFT loss - * \param y_lower Lower bound for the true label - * \param y_upper Upper bound for the true label - * \param y_pred Predicted label - * \param sigma Scaling factor to be applied to the distribution of the noise term - */ - double Gradient(double y_lower, double y_upper, double y_pred, double sigma); - /*! - * \brief Compute the hessian of the AFT loss - * \param y_lower Lower bound for the true label - * \param y_upper Upper bound for the true label - * \param y_pred Predicted label - * \param sigma Scaling factor to be applied to the distribution of the noise term - */ - double Hessian(double y_lower, double y_upper, double y_pred, double sigma); + if (y_lower == y_upper) { // uncensored + const double z = (log_y_lower - y_pred) / sigma; + const double pdf = Distribution::PDF(z); + // Regularize the denominator with eps, to avoid INF or NAN + cost = -log(fmax(pdf / (sigma * y_lower), aft::kEps)); + } else { // censored; now check what type of censorship we have + double z_u, z_l, cdf_u, cdf_l; + if (isinf(y_upper)) { // right-censored + cdf_u = 1; + } else { // left-censored or interval-censored + z_u = (log_y_upper - y_pred) / sigma; + cdf_u = Distribution::CDF(z_u); + } + if (y_lower <= 0.0) { // left-censored + cdf_l = 0; + } else { // right-censored or interval-censored + z_l = (log_y_lower - y_pred) / sigma; + cdf_l = Distribution::CDF(z_l); + } + // Regularize the denominator with eps, to avoid INF or NAN + cost = -log(fmax(cdf_u - cdf_l, aft::kEps)); + } + + return cost; + } + + XGBOOST_DEVICE inline static + double Gradient(double y_lower, double y_upper, double y_pred, double sigma) { + const double log_y_lower = log(y_lower); + const double log_y_upper = log(y_upper); + double numerator, denominator, gradient; // numerator and denominator of gradient + CensoringType censor_type; + bool z_sign; // sign of z-score + + if (y_lower == y_upper) { // uncensored + const double z = (log_y_lower - y_pred) / sigma; + const double pdf = Distribution::PDF(z); + const double grad_pdf = Distribution::GradPDF(z); + censor_type = CensoringType::kUncensored; + numerator = grad_pdf; + denominator = sigma * pdf; + z_sign = (z > 0); + } else { // censored; now check what type of censorship we have + double z_u = 0.0, z_l = 0.0, pdf_u, pdf_l, cdf_u, cdf_l; + censor_type = CensoringType::kIntervalCensored; + if (isinf(y_upper)) { // right-censored + pdf_u = 0; + cdf_u = 1; + censor_type = CensoringType::kRightCensored; + } else { // interval-censored or left-censored + z_u = (log_y_upper - y_pred) / sigma; + pdf_u = Distribution::PDF(z_u); + cdf_u = Distribution::CDF(z_u); + } + if (y_lower <= 0.0) { // left-censored + pdf_l = 0; + cdf_l = 0; + censor_type = CensoringType::kLeftCensored; + } else { // interval-censored or right-censored + z_l = (log_y_lower - y_pred) / sigma; + pdf_l = Distribution::PDF(z_l); + cdf_l = Distribution::CDF(z_l); + } + z_sign = (z_u > 0 || z_l > 0); + numerator = pdf_u - pdf_l; + denominator = sigma * (cdf_u - cdf_l); + } + gradient = numerator / denominator; + if (denominator < aft::kEps && (isnan(gradient) || isinf(gradient))) { + gradient = aft::GetLimitGradAtInfPred(censor_type, z_sign, sigma); + } + + return aft::Clip(gradient, aft::kMinGradient, aft::kMaxGradient); + } + + XGBOOST_DEVICE inline static + double Hessian(double y_lower, double y_upper, double y_pred, double sigma) { + const double log_y_lower = log(y_lower); + const double log_y_upper = log(y_upper); + double numerator, denominator, hessian; // numerator and denominator of hessian + CensoringType censor_type; + bool z_sign; // sign of z-score + + if (y_lower == y_upper) { // uncensored + const double z = (log_y_lower - y_pred) / sigma; + const double pdf = Distribution::PDF(z); + const double grad_pdf = Distribution::GradPDF(z); + const double hess_pdf = Distribution::HessPDF(z); + censor_type = CensoringType::kUncensored; + numerator = -(pdf * hess_pdf - grad_pdf * grad_pdf); + denominator = sigma * sigma * pdf * pdf; + z_sign = (z > 0); + } else { // censored; now check what type of censorship we have + double z_u = 0.0, z_l = 0.0, grad_pdf_u, grad_pdf_l, pdf_u, pdf_l, cdf_u, cdf_l; + censor_type = CensoringType::kIntervalCensored; + if (isinf(y_upper)) { // right-censored + pdf_u = 0; + cdf_u = 1; + grad_pdf_u = 0; + censor_type = CensoringType::kRightCensored; + } else { // interval-censored or left-censored + z_u = (log_y_upper - y_pred) / sigma; + pdf_u = Distribution::PDF(z_u); + cdf_u = Distribution::CDF(z_u); + grad_pdf_u = Distribution::GradPDF(z_u); + } + if (y_lower <= 0.0) { // left-censored + pdf_l = 0; + cdf_l = 0; + grad_pdf_l = 0; + censor_type = CensoringType::kLeftCensored; + } else { // interval-censored or right-censored + z_l = (log_y_lower - y_pred) / sigma; + pdf_l = Distribution::PDF(z_l); + cdf_l = Distribution::CDF(z_l); + grad_pdf_l = Distribution::GradPDF(z_l); + } + const double cdf_diff = cdf_u - cdf_l; + const double pdf_diff = pdf_u - pdf_l; + const double grad_diff = grad_pdf_u - grad_pdf_l; + const double sqrt_denominator = sigma * cdf_diff; + z_sign = (z_u > 0 || z_l > 0); + numerator = -(cdf_diff * grad_diff - pdf_diff * pdf_diff); + denominator = sqrt_denominator * sqrt_denominator; + } + hessian = numerator / denominator; + if (denominator < aft::kEps && (isnan(hessian) || isinf(hessian))) { + hessian = aft::GetLimitHessAtInfPred(censor_type, z_sign, sigma); + } + + return aft::Clip(hessian, aft::kMinHessian, aft::kMaxHessian); + } }; +namespace aft { + +template <> +XGBOOST_DEVICE inline double +GetLimitGradAtInfPred(CensoringType censor_type, bool sign, double sigma) { + switch (censor_type) { + case CensoringType::kUncensored: + return sign ? kMinGradient : kMaxGradient; + case CensoringType::kRightCensored: + return sign ? kMinGradient : 0.0; + case CensoringType::kLeftCensored: + return sign ? 0.0 : kMaxGradient; + case CensoringType::kIntervalCensored: + return sign ? kMinGradient : kMaxGradient; + } + return std::numeric_limits::quiet_NaN(); +} + +template <> +XGBOOST_DEVICE inline double +GetLimitHessAtInfPred(CensoringType censor_type, bool sign, double sigma) { + switch (censor_type) { + case CensoringType::kUncensored: + return 1.0 / (sigma * sigma); + case CensoringType::kRightCensored: + return sign ? (1.0 / (sigma * sigma)) : kMinHessian; + case CensoringType::kLeftCensored: + return sign ? kMinHessian : (1.0 / (sigma * sigma)); + case CensoringType::kIntervalCensored: + return 1.0 / (sigma * sigma); + } + return std::numeric_limits::quiet_NaN(); +} + +template <> +XGBOOST_DEVICE inline double +GetLimitGradAtInfPred(CensoringType censor_type, bool sign, double sigma) { + switch (censor_type) { + case CensoringType::kUncensored: + return sign ? (-1.0 / sigma) : (1.0 / sigma); + case CensoringType::kRightCensored: + return sign ? (-1.0 / sigma) : 0.0; + case CensoringType::kLeftCensored: + return sign ? 0.0 : (1.0 / sigma); + case CensoringType::kIntervalCensored: + return sign ? (-1.0 / sigma) : (1.0 / sigma); + } + return std::numeric_limits::quiet_NaN(); +} + +template <> +XGBOOST_DEVICE inline double +GetLimitHessAtInfPred(CensoringType censor_type, bool sign, double sigma) { + switch (censor_type) { + case CensoringType::kUncensored: + case CensoringType::kRightCensored: + case CensoringType::kLeftCensored: + case CensoringType::kIntervalCensored: + return kMinHessian; + } + return std::numeric_limits::quiet_NaN(); +} + +template <> +XGBOOST_DEVICE inline double +GetLimitGradAtInfPred(CensoringType censor_type, bool sign, double sigma) { + switch (censor_type) { + case CensoringType::kUncensored: + return sign ? kMinGradient : (1.0 / sigma); + case CensoringType::kRightCensored: + return sign ? kMinGradient : 0.0; + case CensoringType::kLeftCensored: + return sign ? 0.0 : (1.0 / sigma); + case CensoringType::kIntervalCensored: + return sign ? kMinGradient : (1.0 / sigma); + } + return std::numeric_limits::quiet_NaN(); +} + +template <> +XGBOOST_DEVICE inline double +GetLimitHessAtInfPred(CensoringType censor_type, bool sign, double sigma) { + switch (censor_type) { + case CensoringType::kUncensored: + case CensoringType::kRightCensored: + return sign ? kMaxHessian : kMinHessian; + case CensoringType::kLeftCensored: + return kMinHessian; + case CensoringType::kIntervalCensored: + return sign ? kMaxHessian : kMinHessian; + } + return std::numeric_limits::quiet_NaN(); +} + +} // namespace aft + } // namespace common } // namespace xgboost diff --git a/src/metric/metric.cc b/src/metric/metric.cc index 941a0d9f1..2e1392e30 100644 --- a/src/metric/metric.cc +++ b/src/metric/metric.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2015-2019 by Contributors + * Copyright 2015-2020 by Contributors * \file metric_registry.cc * \brief Registry of objective functions. */ @@ -80,6 +80,7 @@ namespace metric { // List of files that will be force linked in static links. DMLC_REGISTRY_LINK_TAG(elementwise_metric); DMLC_REGISTRY_LINK_TAG(multiclass_metric); +DMLC_REGISTRY_LINK_TAG(survival_metric); DMLC_REGISTRY_LINK_TAG(rank_metric); #ifdef XGBOOST_USE_CUDA DMLC_REGISTRY_LINK_TAG(rank_metric_gpu); diff --git a/src/metric/survival_metric.cc b/src/metric/survival_metric.cc index cc0614fe7..cf21a7fa2 100644 --- a/src/metric/survival_metric.cc +++ b/src/metric/survival_metric.cc @@ -1,105 +1,11 @@ /*! - * Copyright 2019 by Contributors + * Copyright 2019-2020 by Contributors * \file survival_metric.cc * \brief Metrics for survival analysis * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xgboost/json.h" - -#include "../common/math.h" -#include "../common/survival_util.h" - -using AFTParam = xgboost::common::AFTParam; -using AFTLoss = xgboost::common::AFTLoss; - -namespace xgboost { -namespace metric { -// tag the this file, used by force static link later. -DMLC_REGISTRY_FILE_TAG(survival_metric); - -/*! \brief Negative log likelihood of Accelerated Failure Time model */ -struct EvalAFT : public Metric { - public: - explicit EvalAFT(const char* param) {} - - void Configure(const Args& args) override { - param_.UpdateAllowUnknown(args); - loss_.reset(new AFTLoss(param_.aft_loss_distribution)); - } - - void SaveConfig(Json* p_out) const override { - auto& out = *p_out; - out["name"] = String(this->Name()); - out["aft_loss_param"] = ToJson(param_); - } - - void LoadConfig(Json const& in) override { - FromJson(in["aft_loss_param"], ¶m_); - } - - bst_float Eval(const HostDeviceVector &preds, - const MetaInfo &info, - bool distributed) override { - CHECK_NE(info.labels_lower_bound_.Size(), 0U) - << "y_lower cannot be empty"; - CHECK_NE(info.labels_upper_bound_.Size(), 0U) - << "y_higher cannot be empty"; - CHECK_EQ(preds.Size(), info.labels_lower_bound_.Size()); - CHECK_EQ(preds.Size(), info.labels_upper_bound_.Size()); - - /* Compute negative log likelihood for each data point and compute weighted average */ - const auto& yhat = preds.HostVector(); - const auto& y_lower = info.labels_lower_bound_.HostVector(); - const auto& y_upper = info.labels_upper_bound_.HostVector(); - const auto& weights = info.weights_.HostVector(); - const bool is_null_weight = weights.empty(); - const float aft_loss_distribution_scale = param_.aft_loss_distribution_scale; - CHECK_LE(yhat.size(), static_cast(std::numeric_limits::max())) - << "yhat is too big"; - const omp_ulong nsize = static_cast(yhat.size()); - - double nloglik_sum = 0.0; - double weight_sum = 0.0; - #pragma omp parallel for \ - shared(weights, y_lower, y_upper, yhat) reduction(+:nloglik_sum, weight_sum) - for (omp_ulong i = 0; i < nsize; ++i) { - // If weights are empty, data is unweighted so we use 1.0 everywhere - const double w = is_null_weight ? 1.0 : weights[i]; - const double loss - = loss_->Loss(y_lower[i], y_upper[i], yhat[i], aft_loss_distribution_scale); - nloglik_sum += loss; - weight_sum += w; - } - - double dat[2]{nloglik_sum, weight_sum}; - if (distributed) { - rabit::Allreduce(dat, 2); - } - return static_cast(dat[0] / dat[1]); - } - - const char* Name() const override { - return "aft-nloglik"; - } - - private: - AFTParam param_; - std::unique_ptr loss_; -}; - -XGBOOST_REGISTER_METRIC(AFT, "aft-nloglik") -.describe("Negative log likelihood of Accelerated Failure Time model.") -.set_body([](const char* param) { return new EvalAFT(param); }); - -} // namespace metric -} // namespace xgboost +// Dummy file to keep the CUDA conditional compile trick. +#if !defined(XGBOOST_USE_CUDA) +#include "survival_metric.cu" +#endif // !defined(XGBOOST_USE_CUDA) diff --git a/src/metric/survival_metric.cu b/src/metric/survival_metric.cu new file mode 100644 index 000000000..ac25e4548 --- /dev/null +++ b/src/metric/survival_metric.cu @@ -0,0 +1,304 @@ +/*! + * Copyright 2019-2020 by Contributors + * \file survival_metric.cu + * \brief Metrics for survival analysis + * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking + */ + +#include +#include + +#include +#include + +#include "xgboost/json.h" +#include "xgboost/metric.h" +#include "xgboost/host_device_vector.h" + +#include "metric_common.h" +#include "../common/math.h" +#include "../common/survival_util.h" + +#if defined(XGBOOST_USE_CUDA) +#include // thrust::cuda::par +#include "../common/device_helpers.cuh" +#endif // XGBOOST_USE_CUDA + +using AFTParam = xgboost::common::AFTParam; +using ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType; +template +using AFTLoss = xgboost::common::AFTLoss; + +namespace xgboost { +namespace metric { +// tag the this file, used by force static link later. +DMLC_REGISTRY_FILE_TAG(survival_metric); + +template +class ElementWiseSurvivalMetricsReduction { + public: + ElementWiseSurvivalMetricsReduction() = default; + void Configure(EvalRow policy) { + policy_ = policy; + } + + PackedReduceResult CpuReduceMetrics( + const HostDeviceVector& weights, + const HostDeviceVector& labels_lower_bound, + const HostDeviceVector& labels_upper_bound, + const HostDeviceVector& preds) const { + size_t ndata = labels_lower_bound.Size(); + CHECK_EQ(ndata, labels_upper_bound.Size()); + + const auto& h_labels_lower_bound = labels_lower_bound.HostVector(); + const auto& h_labels_upper_bound = labels_upper_bound.HostVector(); + const auto& h_weights = weights.HostVector(); + const auto& h_preds = preds.HostVector(); + + double residue_sum = 0; + double weights_sum = 0; + +#pragma omp parallel for reduction(+: residue_sum, weights_sum) schedule(static) + for (omp_ulong i = 0; i < ndata; ++i) { + const double wt = h_weights.empty() ? 1.0 : static_cast(h_weights[i]); + residue_sum += policy_.EvalRow( + static_cast(h_labels_lower_bound[i]), + static_cast(h_labels_upper_bound[i]), + static_cast(h_preds[i])) * wt; + weights_sum += wt; + } + PackedReduceResult res{residue_sum, weights_sum}; + return res; + } + +#if defined(XGBOOST_USE_CUDA) + + PackedReduceResult DeviceReduceMetrics( + const HostDeviceVector& weights, + const HostDeviceVector& labels_lower_bound, + const HostDeviceVector& labels_upper_bound, + const HostDeviceVector& preds) { + size_t ndata = labels_lower_bound.Size(); + CHECK_EQ(ndata, labels_upper_bound.Size()); + + thrust::counting_iterator begin(0); + thrust::counting_iterator end = begin + ndata; + + auto s_label_lower_bound = labels_lower_bound.DeviceSpan(); + auto s_label_upper_bound = labels_upper_bound.DeviceSpan(); + auto s_preds = preds.DeviceSpan(); + auto s_weights = weights.DeviceSpan(); + + const bool is_null_weight = (weights.Size() == 0); + + auto d_policy = policy_; + + dh::XGBCachingDeviceAllocator alloc; + PackedReduceResult result = thrust::transform_reduce( + thrust::cuda::par(alloc), + begin, end, + [=] XGBOOST_DEVICE(size_t idx) { + double weight = is_null_weight ? 1.0 : static_cast(s_weights[idx]); + double residue = d_policy.EvalRow( + static_cast(s_label_lower_bound[idx]), + static_cast(s_label_upper_bound[idx]), + static_cast(s_preds[idx])); + residue *= weight; + return PackedReduceResult{residue, weight}; + }, + PackedReduceResult(), + thrust::plus()); + + return result; + } + +#endif // XGBOOST_USE_CUDA + + PackedReduceResult Reduce( + int device, + const HostDeviceVector& weights, + const HostDeviceVector& labels_lower_bound, + const HostDeviceVector& labels_upper_bound, + const HostDeviceVector& preds) { + PackedReduceResult result; + + if (device < 0) { + result = CpuReduceMetrics(weights, labels_lower_bound, labels_upper_bound, preds); + } +#if defined(XGBOOST_USE_CUDA) + else { // NOLINT + device_ = device; + preds.SetDevice(device_); + labels_lower_bound.SetDevice(device_); + labels_upper_bound.SetDevice(device_); + weights.SetDevice(device_); + + dh::safe_cuda(cudaSetDevice(device_)); + result = DeviceReduceMetrics(weights, labels_lower_bound, labels_upper_bound, preds); + } +#endif // defined(XGBOOST_USE_CUDA) + return result; + } + + private: + EvalRow policy_; +#if defined(XGBOOST_USE_CUDA) + int device_{-1}; +#endif // defined(XGBOOST_USE_CUDA) +}; + +struct EvalIntervalRegressionAccuracy { + void Configure(const Args& args) {} + + const char* Name() const { + return "interval-regression-accuracy"; + } + + XGBOOST_DEVICE double EvalRow( + double label_lower_bound, double label_upper_bound, double log_pred) const { + const double pred = exp(log_pred); + return (pred >= label_lower_bound && pred <= label_upper_bound) ? 1.0 : 0.0; + } + + static double GetFinal(double esum, double wsum) { + return wsum == 0 ? esum : esum / wsum; + } +}; + +/*! \brief Negative log likelihood of Accelerated Failure Time model */ +template +struct EvalAFTNLogLik { + void Configure(const Args& args) { + param_.UpdateAllowUnknown(args); + } + + const char* Name() const { + return "aft-nloglik"; + } + + XGBOOST_DEVICE double EvalRow( + double label_lower_bound, double label_upper_bound, double pred) const { + return AFTLoss::Loss( + label_lower_bound, label_upper_bound, pred, param_.aft_loss_distribution_scale); + } + + static double GetFinal(double esum, double wsum) { + return wsum == 0 ? esum : esum / wsum; + } + private: + AFTParam param_; +}; + +template +struct EvalEWiseSurvivalBase : public Metric { + EvalEWiseSurvivalBase() = default; + + void Configure(const Args& args) override { + policy_.Configure(args); + for (const auto& e : args) { + if (e.first == "gpu_id") { + device_ = dmlc::ParseSignedInt(e.second.c_str(), nullptr, 10); + } + } + reducer_.Configure(policy_); + } + + bst_float Eval(const HostDeviceVector& preds, + const MetaInfo& info, + bool distributed) override { + CHECK_NE(info.labels_lower_bound_.Size(), 0U) + << "labels_lower_bound cannot be empty"; + CHECK_NE(info.labels_upper_bound_.Size(), 0U) + << "labels_upper_bound cannot be empty"; + CHECK_EQ(preds.Size(), info.labels_lower_bound_.Size()); + CHECK_EQ(preds.Size(), info.labels_upper_bound_.Size()); + + auto result = reducer_.Reduce( + device_, info.weights_, info.labels_lower_bound_, info.labels_upper_bound_, preds); + + double dat[2] {result.Residue(), result.Weights()}; + + if (distributed) { + rabit::Allreduce(dat, 2); + } + return static_cast(Policy::GetFinal(dat[0], dat[1])); + } + + const char* Name() const override { + return policy_.Name(); + } + + private: + Policy policy_; + ElementWiseSurvivalMetricsReduction reducer_; + int device_{-1}; // used only for GPU metric +}; + +// This class exists because we want to perform dispatch according to the distribution type at +// configuration time, not at prediction time. +struct AFTNLogLikDispatcher : public Metric { + const char* Name() const override { + return "aft-nloglik"; + } + + bst_float Eval(const HostDeviceVector& preds, + const MetaInfo& info, + bool distributed) override { + CHECK(metric_) << "AFT metric must be configured first, with distribution type and scale"; + return metric_->Eval(preds, info, distributed); + } + + void Configure(const Args& args) override { + param_.UpdateAllowUnknown(args); + switch (param_.aft_loss_distribution) { + case common::ProbabilityDistributionType::kNormal: + metric_.reset(new EvalEWiseSurvivalBase>()); + break; + case common::ProbabilityDistributionType::kLogistic: + metric_.reset(new EvalEWiseSurvivalBase>()); + break; + case common::ProbabilityDistributionType::kExtreme: + metric_.reset(new EvalEWiseSurvivalBase>()); + break; + default: + LOG(FATAL) << "Unknown probability distribution"; + } + Args new_args{args}; + // tparam_ doesn't get propagated to the inner metric object because we didn't use + // Metric::Create(). I don't think it's a good idea to pollute the metric registry with + // specialized versions of the AFT metric, so as a work-around, manually pass the GPU ID + // into the inner metric via configuration. + new_args.emplace_back("gpu_id", std::to_string(tparam_->gpu_id)); + metric_->Configure(new_args); + } + + void SaveConfig(Json* p_out) const override { + auto& out = *p_out; + out["name"] = String(this->Name()); + out["aft_loss_param"] = ToJson(param_); + } + + void LoadConfig(const Json& in) override { + FromJson(in["aft_loss_param"], ¶m_); + } + + private: + AFTParam param_; + std::unique_ptr metric_; +}; + + +XGBOOST_REGISTER_METRIC(AFTNLogLik, "aft-nloglik") +.describe("Negative log likelihood of Accelerated Failure Time model.") +.set_body([](const char* param) { + return new AFTNLogLikDispatcher(); +}); + +XGBOOST_REGISTER_METRIC(IntervalRegressionAccuracy, "interval-regression-accuracy") +.describe("") +.set_body([](const char* param) { + return new EvalEWiseSurvivalBase(); +}); + +} // namespace metric +} // namespace xgboost diff --git a/src/objective/aft_obj.cc b/src/objective/aft_obj.cc index bf66d9991..407c97554 100644 --- a/src/objective/aft_obj.cc +++ b/src/objective/aft_obj.cc @@ -1,116 +1,21 @@ /*! - * Copyright 2015 by Contributors - * \file rank.cc - * \brief Definition of aft loss. + * Copyright 2019-2020 by Contributors + * \file aft_obj.cc + * \brief Definition of AFT loss for survival analysis. + * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xgboost/json.h" - -#include "../common/math.h" -#include "../common/random.h" -#include "../common/survival_util.h" - -using AFTParam = xgboost::common::AFTParam; -using AFTLoss = xgboost::common::AFTLoss; +// Dummy file to keep the CUDA conditional compile trick. +#include namespace xgboost { namespace obj { DMLC_REGISTRY_FILE_TAG(aft_obj); -class AFTObj : public ObjFunction { - public: - void Configure(const std::vector >& args) override { - param_.UpdateAllowUnknown(args); - loss_.reset(new AFTLoss(param_.aft_loss_distribution)); - } - - void GetGradient(const HostDeviceVector& preds, - const MetaInfo& info, - int iter, - HostDeviceVector* out_gpair) override { - /* Boilerplate */ - CHECK_EQ(preds.Size(), info.labels_lower_bound_.Size()); - CHECK_EQ(preds.Size(), info.labels_upper_bound_.Size()); - - const auto& yhat = preds.HostVector(); - const auto& y_lower = info.labels_lower_bound_.HostVector(); - const auto& y_upper = info.labels_upper_bound_.HostVector(); - const auto& weights = info.weights_.HostVector(); - const bool is_null_weight = weights.empty(); - - out_gpair->Resize(yhat.size()); - std::vector& gpair = out_gpair->HostVector(); - CHECK_LE(yhat.size(), static_cast(std::numeric_limits::max())) - << "yhat is too big"; - const omp_ulong nsize = static_cast(yhat.size()); - const float aft_loss_distribution_scale = param_.aft_loss_distribution_scale; - - #pragma omp parallel for \ - shared(weights, y_lower, y_upper, yhat, gpair) - for (omp_ulong i = 0; i < nsize; ++i) { - // If weights are empty, data is unweighted so we use 1.0 everywhere - const double w = is_null_weight ? 1.0 : weights[i]; - const double grad = loss_->Gradient(y_lower[i], y_upper[i], - yhat[i], aft_loss_distribution_scale); - const double hess = loss_->Hessian(y_lower[i], y_upper[i], - yhat[i], aft_loss_distribution_scale); - gpair[i] = GradientPair(grad * w, hess * w); - } - } - - void PredTransform(HostDeviceVector *io_preds) override { - // Trees give us a prediction in log scale, so exponentiate - std::vector &preds = io_preds->HostVector(); - const long ndata = static_cast(preds.size()); // NOLINT(*) - #pragma omp parallel for shared(preds) - for (long j = 0; j < ndata; ++j) { // NOLINT(*) - preds[j] = std::exp(preds[j]); - } - } - - void EvalTransform(HostDeviceVector *io_preds) override { - // do nothing here, since the AFT metric expects untransformed prediction score - } - - bst_float ProbToMargin(bst_float base_score) const override { - return std::log(base_score); - } - - const char* DefaultEvalMetric() const override { - return "aft-nloglik"; - } - - void SaveConfig(Json* p_out) const override { - auto& out = *p_out; - out["name"] = String("survival:aft"); - out["aft_loss_param"] = ToJson(param_); - } - - void LoadConfig(Json const& in) override { - FromJson(in["aft_loss_param"], ¶m_); - loss_.reset(new AFTLoss(param_.aft_loss_distribution)); - } - - private: - AFTParam param_; - std::unique_ptr loss_; -}; - -// register the objective functions -XGBOOST_REGISTER_OBJECTIVE(AFTObj, "survival:aft") -.describe("AFT loss function") -.set_body([]() { return new AFTObj(); }); - } // namespace obj } // namespace xgboost + +#ifndef XGBOOST_USE_CUDA +#include "aft_obj.cu" +#endif // XGBOOST_USE_CUDA diff --git a/src/objective/aft_obj.cu b/src/objective/aft_obj.cu new file mode 100644 index 000000000..da55a7fe4 --- /dev/null +++ b/src/objective/aft_obj.cu @@ -0,0 +1,147 @@ +/*! + * Copyright 2019-2020 by Contributors + * \file aft_obj.cu + * \brief Definition of AFT loss for survival analysis. + * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking + */ + +#include +#include +#include +#include + +#include "xgboost/host_device_vector.h" +#include "xgboost/json.h" +#include "xgboost/parameter.h" +#include "xgboost/span.h" +#include "xgboost/logging.h" +#include "xgboost/objective.h" + +#include "../common/transform.h" +#include "../common/survival_util.h" + +using AFTParam = xgboost::common::AFTParam; +using ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType; +template +using AFTLoss = xgboost::common::AFTLoss; + +namespace xgboost { +namespace obj { + +#if defined(XGBOOST_USE_CUDA) +DMLC_REGISTRY_FILE_TAG(aft_obj_gpu); +#endif // defined(XGBOOST_USE_CUDA) + +class AFTObj : public ObjFunction { + public: + void Configure(const std::vector >& args) override { + param_.UpdateAllowUnknown(args); + } + + template + void GetGradientImpl(const HostDeviceVector &preds, + const MetaInfo &info, + HostDeviceVector *out_gpair, + size_t ndata, int device, bool is_null_weight, + float aft_loss_distribution_scale) { + common::Transform<>::Init( + [=] XGBOOST_DEVICE(size_t _idx, + common::Span _out_gpair, + common::Span _preds, + common::Span _labels_lower_bound, + common::Span _labels_upper_bound, + common::Span _weights) { + const double pred = static_cast(_preds[_idx]); + const double label_lower_bound = static_cast(_labels_lower_bound[_idx]); + const double label_upper_bound = static_cast(_labels_upper_bound[_idx]); + const float grad = static_cast( + AFTLoss::Gradient(label_lower_bound, label_upper_bound, + pred, aft_loss_distribution_scale)); + const float hess = static_cast( + AFTLoss::Hessian(label_lower_bound, label_upper_bound, + pred, aft_loss_distribution_scale)); + const bst_float w = is_null_weight ? 1.0f : _weights[_idx]; + _out_gpair[_idx] = GradientPair(grad * w, hess * w); + }, + common::Range{0, static_cast(ndata)}, device).Eval( + out_gpair, &preds, &info.labels_lower_bound_, &info.labels_upper_bound_, + &info.weights_); + } + + void GetGradient(const HostDeviceVector& preds, + const MetaInfo& info, + int iter, + HostDeviceVector* out_gpair) override { + const size_t ndata = preds.Size(); + CHECK_EQ(info.labels_lower_bound_.Size(), ndata); + CHECK_EQ(info.labels_upper_bound_.Size(), ndata); + out_gpair->Resize(ndata); + const int device = tparam_->gpu_id; + const float aft_loss_distribution_scale = param_.aft_loss_distribution_scale; + const bool is_null_weight = info.weights_.Size() == 0; + if (!is_null_weight) { + CHECK_EQ(info.weights_.Size(), ndata) + << "Number of weights should be equal to number of data points."; + } + + switch (param_.aft_loss_distribution) { + case common::ProbabilityDistributionType::kNormal: + GetGradientImpl(preds, info, out_gpair, ndata, device, + is_null_weight, aft_loss_distribution_scale); + break; + case common::ProbabilityDistributionType::kLogistic: + GetGradientImpl(preds, info, out_gpair, ndata, device, + is_null_weight, aft_loss_distribution_scale); + break; + case common::ProbabilityDistributionType::kExtreme: + GetGradientImpl(preds, info, out_gpair, ndata, device, + is_null_weight, aft_loss_distribution_scale); + break; + default: + LOG(FATAL) << "Unrecognized distribution"; + } + } + + void PredTransform(HostDeviceVector *io_preds) override { + // Trees give us a prediction in log scale, so exponentiate + common::Transform<>::Init( + [] XGBOOST_DEVICE(size_t _idx, common::Span _preds) { + _preds[_idx] = exp(_preds[_idx]); + }, common::Range{0, static_cast(io_preds->Size())}, + tparam_->gpu_id) + .Eval(io_preds); + } + + void EvalTransform(HostDeviceVector *io_preds) override { + // do nothing here, since the AFT metric expects untransformed prediction score + } + + bst_float ProbToMargin(bst_float base_score) const override { + return std::log(base_score); + } + + const char* DefaultEvalMetric() const override { + return "aft-nloglik"; + } + + void SaveConfig(Json* p_out) const override { + auto& out = *p_out; + out["name"] = String("survival:aft"); + out["aft_loss_param"] = ToJson(param_); + } + + void LoadConfig(Json const& in) override { + FromJson(in["aft_loss_param"], ¶m_); + } + + private: + AFTParam param_; +}; + +// register the objective functions +XGBOOST_REGISTER_OBJECTIVE(AFTObj, "survival:aft") + .describe("AFT loss function") + .set_body([]() { return new AFTObj(); }); + +} // namespace obj +} // namespace xgboost diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index 1dd8a9da1..4dd860a88 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -7,7 +7,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell # Install all basic requirements RUN \ apt-get update && \ - apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev && \ + apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev ninja-build && \ # CMake wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \ bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ diff --git a/tests/ci_build/Dockerfile.gpu_build b/tests/ci_build/Dockerfile.gpu_build index 65bccce74..8a614db81 100644 --- a/tests/ci_build/Dockerfile.gpu_build +++ b/tests/ci_build/Dockerfile.gpu_build @@ -21,7 +21,14 @@ RUN \ bash Miniconda3.sh -b -p /opt/python && \ # CMake wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr + bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + # Ninja + mkdir -p /usr/local && \ + cd /usr/local/ && \ + wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \ + tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \ + cd ninja && \ + python ./configure.py --bootstrap # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ @@ -33,7 +40,7 @@ RUN \ yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \ rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm; -ENV PATH=/opt/python/bin:$PATH +ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++ ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp diff --git a/tests/ci_build/build_via_cmake.sh b/tests/ci_build/build_via_cmake.sh index 98808141b..fc7504233 100755 --- a/tests/ci_build/build_via_cmake.sh +++ b/tests/ci_build/build_via_cmake.sh @@ -4,7 +4,7 @@ set -e rm -rf build mkdir build cd build -cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON -make clean -make -j$(nproc) +cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON -GNinja +ninja clean +time ninja -v cd .. diff --git a/tests/cpp/common/test_probability_distribution.cc b/tests/cpp/common/test_probability_distribution.cc index 4363be9df..e8aebede6 100644 --- a/tests/cpp/common/test_probability_distribution.cc +++ b/tests/cpp/common/test_probability_distribution.cc @@ -11,59 +11,56 @@ namespace xgboost { namespace common { -TEST(ProbabilityDistribution, DistributionGeneric) { - // Assert d/dx CDF = PDF, d/dx PDF = GradPDF, d/dx GradPDF = HessPDF - // Do this for every distribution type - for (auto type : {ProbabilityDistributionType::kNormal, ProbabilityDistributionType::kLogistic, - ProbabilityDistributionType::kExtreme}) { - std::unique_ptr dist{ ProbabilityDistribution::Create(type) }; - double integral_of_pdf = dist->CDF(-2.0); - double integral_of_grad_pdf = dist->PDF(-2.0); - double integral_of_hess_pdf = dist->GradPDF(-2.0); - // Perform numerical differentiation and integration - // Enumerate 4000 grid points in range [-2, 2] - for (int i = 0; i <= 4000; ++i) { - const double x = static_cast(i) / 1000.0 - 2.0; - // Numerical differentiation (p. 246, Numerical Analysis 2nd ed. by Timothy Sauer) - EXPECT_NEAR((dist->CDF(x + 1e-5) - dist->CDF(x - 1e-5)) / 2e-5, dist->PDF(x), 6e-11); - EXPECT_NEAR((dist->PDF(x + 1e-5) - dist->PDF(x - 1e-5)) / 2e-5, dist->GradPDF(x), 6e-11); - EXPECT_NEAR((dist->GradPDF(x + 1e-5) - dist->GradPDF(x - 1e-5)) / 2e-5, - dist->HessPDF(x), 6e-11); - // Numerical integration using Trapezoid Rule (p. 257, Sauer) - integral_of_pdf += 5e-4 * (dist->PDF(x - 1e-3) + dist->PDF(x)); - integral_of_grad_pdf += 5e-4 * (dist->GradPDF(x - 1e-3) + dist->GradPDF(x)); - integral_of_hess_pdf += 5e-4 * (dist->HessPDF(x - 1e-3) + dist->HessPDF(x)); - EXPECT_NEAR(integral_of_pdf, dist->CDF(x), 2e-4); - EXPECT_NEAR(integral_of_grad_pdf, dist->PDF(x), 2e-4); - EXPECT_NEAR(integral_of_hess_pdf, dist->GradPDF(x), 2e-4); - } +template +void RunDistributionGenericTest() { + double integral_of_pdf = Distribution::CDF(-2.0); + double integral_of_grad_pdf = Distribution::PDF(-2.0); + double integral_of_hess_pdf = Distribution::GradPDF(-2.0); + // Perform numerical differentiation and integration + // Enumerate 4000 grid points in range [-2, 2] + for (int i = 0; i <= 4000; ++i) { + const double x = static_cast(i) / 1000.0 - 2.0; + // Numerical differentiation (p. 246, Numerical Analysis 2nd ed. by Timothy Sauer) + EXPECT_NEAR((Distribution::CDF(x + 1e-5) - Distribution::CDF(x - 1e-5)) / 2e-5, + Distribution::PDF(x), 6e-11); + EXPECT_NEAR((Distribution::PDF(x + 1e-5) - Distribution::PDF(x - 1e-5)) / 2e-5, + Distribution::GradPDF(x), 6e-11); + EXPECT_NEAR((Distribution::GradPDF(x + 1e-5) - Distribution::GradPDF(x - 1e-5)) / 2e-5, + Distribution::HessPDF(x), 6e-11); + // Numerical integration using Trapezoid Rule (p. 257, Sauer) + integral_of_pdf += 5e-4 * (Distribution::PDF(x - 1e-3) + Distribution::PDF(x)); + integral_of_grad_pdf += 5e-4 * (Distribution::GradPDF(x - 1e-3) + Distribution::GradPDF(x)); + integral_of_hess_pdf += 5e-4 * (Distribution::HessPDF(x - 1e-3) + Distribution::HessPDF(x)); + EXPECT_NEAR(integral_of_pdf, Distribution::CDF(x), 2e-4); + EXPECT_NEAR(integral_of_grad_pdf, Distribution::PDF(x), 2e-4); + EXPECT_NEAR(integral_of_hess_pdf, Distribution::GradPDF(x), 2e-4); } } -TEST(ProbabilityDistribution, NormalDist) { - std::unique_ptr dist{ - ProbabilityDistribution::Create(ProbabilityDistributionType::kNormal) - }; +TEST(ProbabilityDistribution, DistributionGeneric) { + // Assert d/dx CDF = PDF, d/dx PDF = GradPDF, d/dx GradPDF = HessPDF + // Do this for every distribution type + RunDistributionGenericTest(); + RunDistributionGenericTest(); + RunDistributionGenericTest(); +} +TEST(ProbabilityDistribution, NormalDist) { // "Three-sigma rule" (https://en.wikipedia.org/wiki/68–95–99.7_rule) // 68% of values are within 1 standard deviation away from the mean // 95% of values are within 2 standard deviation away from the mean // 99.7% of values are within 3 standard deviation away from the mean - EXPECT_NEAR(dist->CDF(0.5) - dist->CDF(-0.5), 0.3829, 0.00005); - EXPECT_NEAR(dist->CDF(1.0) - dist->CDF(-1.0), 0.6827, 0.00005); - EXPECT_NEAR(dist->CDF(1.5) - dist->CDF(-1.5), 0.8664, 0.00005); - EXPECT_NEAR(dist->CDF(2.0) - dist->CDF(-2.0), 0.9545, 0.00005); - EXPECT_NEAR(dist->CDF(2.5) - dist->CDF(-2.5), 0.9876, 0.00005); - EXPECT_NEAR(dist->CDF(3.0) - dist->CDF(-3.0), 0.9973, 0.00005); - EXPECT_NEAR(dist->CDF(3.5) - dist->CDF(-3.5), 0.9995, 0.00005); - EXPECT_NEAR(dist->CDF(4.0) - dist->CDF(-4.0), 0.9999, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(0.5) - NormalDistribution::CDF(-0.5), 0.3829, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(1.0) - NormalDistribution::CDF(-1.0), 0.6827, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(1.5) - NormalDistribution::CDF(-1.5), 0.8664, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(2.0) - NormalDistribution::CDF(-2.0), 0.9545, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(2.5) - NormalDistribution::CDF(-2.5), 0.9876, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(3.0) - NormalDistribution::CDF(-3.0), 0.9973, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(3.5) - NormalDistribution::CDF(-3.5), 0.9995, 0.00005); + EXPECT_NEAR(NormalDistribution::CDF(4.0) - NormalDistribution::CDF(-4.0), 0.9999, 0.00005); } TEST(ProbabilityDistribution, LogisticDist) { - std::unique_ptr dist{ - ProbabilityDistribution::Create(ProbabilityDistributionType::kLogistic) - }; - /** * Enforce known properties of the logistic distribution. * (https://en.wikipedia.org/wiki/Logistic_distribution) @@ -74,17 +71,13 @@ TEST(ProbabilityDistribution, LogisticDist) { const double x = static_cast(i) / 1000.0 - 2.0; // PDF = 1/4 * sech(x/2)**2 const double sech_x = 1.0 / std::cosh(x * 0.5); // hyperbolic secant at x/2 - EXPECT_NEAR(0.25 * sech_x * sech_x, dist->PDF(x), 1e-15); + EXPECT_NEAR(0.25 * sech_x * sech_x, LogisticDistribution::PDF(x), 1e-15); // CDF = 1/2 + 1/2 * tanh(x/2) - EXPECT_NEAR(0.5 + 0.5 * std::tanh(x * 0.5), dist->CDF(x), 1e-15); + EXPECT_NEAR(0.5 + 0.5 * std::tanh(x * 0.5), LogisticDistribution::CDF(x), 1e-15); } } TEST(ProbabilityDistribution, ExtremeDist) { - std::unique_ptr dist{ - ProbabilityDistribution::Create(ProbabilityDistributionType::kExtreme) - }; - /** * Enforce known properties of the extreme distribution (also known as Gumbel distribution). * The mean is the negative of the Euler-Mascheroni constant. @@ -99,9 +92,10 @@ TEST(ProbabilityDistribution, ExtremeDist) { for (int i = 0; i <= 25000; ++i) { const double x = static_cast(i) / 1000.0 - 20.0; // Numerical integration using Trapezoid Rule (p. 257, Sauer) - mean += 5e-4 * ((x - 1e-3) * dist->PDF(x - 1e-3) + x * dist->PDF(x)); + mean += + 5e-4 * ((x - 1e-3) * ExtremeDistribution::PDF(x - 1e-3) + x * ExtremeDistribution::PDF(x)); } - EXPECT_NEAR(mean, -probability_constant::kEulerMascheroni, 1e-7); + EXPECT_NEAR(mean, -kEulerMascheroni, 1e-7); // Enumerate 25000 grid points in range [-20, 5]. // Compute the variance of the distribution using numerical integration. @@ -111,10 +105,10 @@ TEST(ProbabilityDistribution, ExtremeDist) { for (int i = 0; i <= 25000; ++i) { const double x = static_cast(i) / 1000.0 - 20.0; // Numerical integration using Trapezoid Rule (p. 257, Sauer) - variance += 5e-4 * ((x - 1e-3 - mean) * (x - 1e-3 - mean) * dist->PDF(x - 1e-3) - + (x - mean) * (x - mean) * dist->PDF(x)); + variance += 5e-4 * ((x - 1e-3 - mean) * (x - 1e-3 - mean) * ExtremeDistribution::PDF(x - 1e-3) + + (x - mean) * (x - mean) * ExtremeDistribution::PDF(x)); } - EXPECT_NEAR(variance, probability_constant::kPI * probability_constant::kPI / 6.0, 1e-6); + EXPECT_NEAR(variance, kPI * kPI / 6.0, 1e-6); } } // namespace common diff --git a/tests/cpp/common/test_survival_util.cc b/tests/cpp/common/test_survival_util.cc index 53faaac6c..f54719885 100644 --- a/tests/cpp/common/test_survival_util.cc +++ b/tests/cpp/common/test_survival_util.cc @@ -8,36 +8,37 @@ namespace xgboost { namespace common { -inline static void RobustTestSuite(ProbabilityDistributionType dist_type, - double y_lower, double y_upper, double sigma) { - AFTLoss loss(dist_type); +template +inline static void RobustTestSuite(double y_lower, double y_upper, double sigma) { for (int i = 50; i >= -50; --i) { const double y_pred = std::pow(10.0, static_cast(i)); const double z = (std::log(y_lower) - std::log(y_pred)) / sigma; - const double gradient = loss.Gradient(y_lower, y_upper, std::log(y_pred), sigma); - const double hessian = loss.Hessian(y_lower, y_upper, std::log(y_pred), sigma); + const double gradient + = AFTLoss::Gradient(y_lower, y_upper, std::log(y_pred), sigma); + const double hessian + = AFTLoss::Hessian(y_lower, y_upper, std::log(y_pred), sigma); ASSERT_FALSE(std::isnan(gradient)) << "z = " << z << ", y \\in [" << y_lower << ", " << y_upper << "], y_pred = " << y_pred - << ", dist = " << static_cast(dist_type); + << ", dist = " << static_cast(Distribution::Type()); ASSERT_FALSE(std::isinf(gradient)) << "z = " << z << ", y \\in [" << y_lower << ", " << y_upper << "], y_pred = " << y_pred - << ", dist = " << static_cast(dist_type); + << ", dist = " << static_cast(Distribution::Type()); ASSERT_FALSE(std::isnan(hessian)) << "z = " << z << ", y \\in [" << y_lower << ", " << y_upper << "], y_pred = " << y_pred - << ", dist = " << static_cast(dist_type); + << ", dist = " << static_cast(Distribution::Type()); ASSERT_FALSE(std::isinf(hessian)) << "z = " << z << ", y \\in [" << y_lower << ", " << y_upper << "], y_pred = " << y_pred - << ", dist = " << static_cast(dist_type); + << ", dist = " << static_cast(Distribution::Type()); } } TEST(AFTLoss, RobustGradientPair) { // Ensure that INF and NAN don't show up in gradient pair - RobustTestSuite(ProbabilityDistributionType::kNormal, 16.0, 200.0, 2.0); - RobustTestSuite(ProbabilityDistributionType::kLogistic, 16.0, 200.0, 2.0); - RobustTestSuite(ProbabilityDistributionType::kExtreme, 16.0, 200.0, 2.0); - RobustTestSuite(ProbabilityDistributionType::kNormal, 100.0, 100.0, 2.0); - RobustTestSuite(ProbabilityDistributionType::kLogistic, 100.0, 100.0, 2.0); - RobustTestSuite(ProbabilityDistributionType::kExtreme, 100.0, 100.0, 2.0); + RobustTestSuite(16.0, 200.0, 2.0); + RobustTestSuite(16.0, 200.0, 2.0); + RobustTestSuite(16.0, 200.0, 2.0); + RobustTestSuite(100.0, 100.0, 2.0); + RobustTestSuite(100.0, 100.0, 2.0); + RobustTestSuite(100.0, 100.0, 2.0); } } // namespace common diff --git a/tests/cpp/metric/test_survival_metric.cc b/tests/cpp/metric/test_survival_metric.cc index f2c447085..ded9c4b0e 100644 --- a/tests/cpp/metric/test_survival_metric.cc +++ b/tests/cpp/metric/test_survival_metric.cc @@ -13,78 +13,39 @@ #include "../helpers.h" #include "../../../src/common/survival_util.h" +// CUDA conditional compile trick. +#include "test_survival_metric.cu" + namespace xgboost { namespace common { +/** Tests for Survival metrics that should run only on CPU **/ + /** * Reference values obtained from * https://github.com/avinashbarnwal/GSOC-2019/blob/master/AFT/R/combined_assignment.R **/ -TEST(Metric, AFTNegLogLik) { - auto lparam = CreateEmptyGenericParam(-1); // currently AFT metric is CPU only - - /** - * Test aggregate output from the AFT metric over a small test data set. - * This is unlike AFTLoss.* tests, which verify metric values over individual data points. - **/ - MetaInfo info; - info.num_row_ = 4; - info.labels_lower_bound_.HostVector() - = { 100.0f, -std::numeric_limits::infinity(), 60.0f, 16.0f }; - info.labels_upper_bound_.HostVector() - = { 100.0f, 20.0f, std::numeric_limits::infinity(), 200.0f }; - info.weights_.HostVector() = std::vector(); - HostDeviceVector preds(4, std::log(64)); - - struct TestCase { - std::string dist_type; - bst_float reference_value; - }; - for (const auto& test_case : std::vector{ {"normal", 2.1508f}, {"logistic", 2.1804f}, - {"extreme", 2.0706f} }) { - std::unique_ptr metric(Metric::Create("aft-nloglik", &lparam)); - metric->Configure({ {"aft_loss_distribution", test_case.dist_type}, - {"aft_loss_distribution_scale", "1.0"} }); - EXPECT_NEAR(metric->Eval(preds, info, false), test_case.reference_value, 1e-4); - } -} - -// Test configuration of AFT metric -TEST(AFTNegLogLikMetric, Configuration) { - auto lparam = CreateEmptyGenericParam(-1); // currently AFT metric is CPU only - std::unique_ptr metric(Metric::Create("aft-nloglik", &lparam)); - metric->Configure({{"aft_loss_distribution", "normal"}, {"aft_loss_distribution_scale", "10"}}); - - // Configuration round-trip test - Json j_obj{ Object() }; - metric->SaveConfig(&j_obj); - auto aft_param_json = j_obj["aft_loss_param"]; - EXPECT_EQ(get(aft_param_json["aft_loss_distribution"]), "normal"); - EXPECT_EQ(get(aft_param_json["aft_loss_distribution_scale"]), "10"); -} - /** * AFTLoss.* tests verify metric values over individual data points. **/ // Generate prediction value ranging from 2**1 to 2**15, using grid points in log scale // Then check prediction against the reference values +template static inline void CheckLossOverGridPoints( double true_label_lower_bound, double true_label_upper_bound, - ProbabilityDistributionType dist_type, const std::vector& reference_values) { const int num_point = 20; const double log_y_low = 1.0; const double log_y_high = 15.0; - std::unique_ptr loss(new AFTLoss(dist_type)); CHECK_EQ(num_point, reference_values.size()); for (int i = 0; i < num_point; ++i) { const double y_pred = std::pow(2.0, i * (log_y_high - log_y_low) / (num_point - 1) + log_y_low); - const double loss_val - = loss->Loss(true_label_lower_bound, true_label_upper_bound, std::log(y_pred), 1.0); + const double loss_val = AFTLoss::Loss( + true_label_lower_bound, true_label_upper_bound, std::log(y_pred), 1.0); EXPECT_NEAR(loss_val, reference_values[i], 1e-4); } } @@ -94,35 +55,29 @@ TEST(AFTLoss, Uncensored) { const double true_label_lower_bound = 100.0; const double true_label_upper_bound = true_label_lower_bound; - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kNormal, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 13.1761, 11.3085, 9.7017, 8.3558, 7.2708, 6.4466, 5.8833, 5.5808, 5.5392, 5.7585, 6.2386, 6.9795, 7.9813, 9.2440, 10.7675, 12.5519, 14.5971, 16.9032, 19.4702, 22.2980 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kLogistic, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 8.5568, 8.0720, 7.6038, 7.1620, 6.7612, 6.4211, 6.1659, 6.0197, 5.9990, 6.1064, 6.3293, 6.6450, 7.0289, 7.4594, 7.9205, 8.4008, 8.8930, 9.3926, 9.8966, 10.4033 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kExtreme, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 27.6310, 27.6310, 19.7177, 13.0281, 9.2183, 7.1365, 6.0916, 5.6688, 5.6195, 5.7941, 6.1031, 6.4929, 6.9310, 7.3981, 7.8827, 8.3778, 8.8791, 9.3842, 9.8916, 10.40033 }); } TEST(AFTLoss, LeftCensored) { // Given label (-inf, 20], compute the AFT loss for various prediction values - const double true_label_lower_bound = -std::numeric_limits::infinity(); + const double true_label_lower_bound = 0.0; const double true_label_upper_bound = 20.0; - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kNormal, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 0.0107, 0.0373, 0.1054, 0.2492, 0.5068, 0.9141, 1.5003, 2.2869, 3.2897, 4.5196, 5.9846, 7.6902, 9.6405, 11.8385, 14.2867, 16.9867, 19.9399, 23.1475, 26.6103, 27.6310 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kLogistic, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 0.0953, 0.1541, 0.2451, 0.3804, 0.5717, 0.8266, 1.1449, 1.5195, 1.9387, 2.3902, 2.8636, 3.3512, 3.8479, 4.3500, 4.8556, 5.3632, 5.8721, 6.3817, 6.8918, 7.4021 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kExtreme, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 0.0000, 0.0025, 0.0277, 0.1225, 0.3195, 0.6150, 0.9862, 1.4094, 1.8662, 2.3441, 2.8349, 3.3337, 3.8372, 4.3436, 4.8517, 5.3609, 5.8707, 6.3808, 6.8912, 7.4018 }); } @@ -132,16 +87,13 @@ TEST(AFTLoss, RightCensored) { const double true_label_lower_bound = 60.0; const double true_label_upper_bound = std::numeric_limits::infinity(); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kNormal, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 8.0000, 6.2537, 4.7487, 3.4798, 2.4396, 1.6177, 0.9993, 0.5638, 0.2834, 0.1232, 0.0450, 0.0134, 0.0032, 0.0006, 0.0001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kLogistic, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 3.4340, 2.9445, 2.4683, 2.0125, 1.5871, 1.2041, 0.8756, 0.6099, 0.4083, 0.2643, 0.1668, 0.1034, 0.0633, 0.0385, 0.0233, 0.0140, 0.0084, 0.0051, 0.0030, 0.0018 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kExtreme, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 27.6310, 18.0015, 10.8018, 6.4817, 3.8893, 2.3338, 1.4004, 0.8403, 0.5042, 0.3026, 0.1816, 0.1089, 0.0654, 0.0392, 0.0235, 0.0141, 0.0085, 0.0051, 0.0031, 0.0018 }); } @@ -150,17 +102,14 @@ TEST(AFTLoss, IntervalCensored) { // Given label [16, 200], compute the AFT loss for various prediction values const double true_label_lower_bound = 16.0; const double true_label_upper_bound = 200.0; - - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kNormal, + + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 3.9746, 2.8415, 1.9319, 1.2342, 0.7335, 0.4121, 0.2536, 0.2470, 0.3919, 0.6982, 1.1825, 1.8622, 2.7526, 3.8656, 5.2102, 6.7928, 8.6183, 10.6901, 13.0108, 15.5826 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kLogistic, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 2.2906, 1.8578, 1.4667, 1.1324, 0.8692, 0.6882, 0.5948, 0.5909, 0.6764, 0.8499, 1.1061, 1.4348, 1.8215, 2.2511, 2.7104, 3.1891, 3.6802, 4.1790, 4.6825, 5.1888 }); - CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, - ProbabilityDistributionType::kExtreme, + CheckLossOverGridPoints(true_label_lower_bound, true_label_upper_bound, { 8.0000, 4.8004, 2.8805, 1.7284, 1.0372, 0.6231, 0.3872, 0.3031, 0.3740, 0.5839, 0.8995, 1.2878, 1.7231, 2.1878, 2.6707, 3.1647, 3.6653, 4.1699, 4.6770, 5.1856 }); } diff --git a/tests/cpp/metric/test_survival_metric.cu b/tests/cpp/metric/test_survival_metric.cu new file mode 100644 index 000000000..81d69f939 --- /dev/null +++ b/tests/cpp/metric/test_survival_metric.cu @@ -0,0 +1,81 @@ +/*! + * Copyright (c) by Contributors 2020 + */ +#include +#include +#include "xgboost/metric.h" +#include "../helpers.h" +#include "../../../src/common/survival_util.h" + +/** Tests for Survival metrics that should run both on CPU and GPU **/ + +namespace xgboost { +namespace common { + +TEST(Metric, DeclareUnifiedTest(AFTNegLogLik)) { + auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX); + + /** + * Test aggregate output from the AFT metric over a small test data set. + * This is unlike AFTLoss.* tests, which verify metric values over individual data points. + **/ + MetaInfo info; + info.num_row_ = 4; + info.labels_lower_bound_.HostVector() + = { 100.0f, 0.0f, 60.0f, 16.0f }; + info.labels_upper_bound_.HostVector() + = { 100.0f, 20.0f, std::numeric_limits::infinity(), 200.0f }; + info.weights_.HostVector() = std::vector(); + HostDeviceVector preds(4, std::log(64)); + + struct TestCase { + std::string dist_type; + bst_float reference_value; + }; + for (const auto& test_case : std::vector{ {"normal", 2.1508f}, {"logistic", 2.1804f}, + {"extreme", 2.0706f} }) { + std::unique_ptr metric(Metric::Create("aft-nloglik", &lparam)); + metric->Configure({ {"aft_loss_distribution", test_case.dist_type}, + {"aft_loss_distribution_scale", "1.0"} }); + EXPECT_NEAR(metric->Eval(preds, info, false), test_case.reference_value, 1e-4); + } +} + +TEST(Metric, DeclareUnifiedTest(IntervalRegressionAccuracy)) { + auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX); + + MetaInfo info; + info.num_row_ = 4; + info.labels_lower_bound_.HostVector() = { 20.0f, 0.0f, 60.0f, 16.0f }; + info.labels_upper_bound_.HostVector() = { 80.0f, 20.0f, 80.0f, 200.0f }; + info.weights_.HostVector() = std::vector(); + HostDeviceVector preds(4, std::log(60.0f)); + + std::unique_ptr metric(Metric::Create("interval-regression-accuracy", &lparam)); + EXPECT_FLOAT_EQ(metric->Eval(preds, info, false), 0.75f); + info.labels_lower_bound_.HostVector()[2] = 70.0f; + EXPECT_FLOAT_EQ(metric->Eval(preds, info, false), 0.50f); + info.labels_upper_bound_.HostVector()[2] = std::numeric_limits::infinity(); + EXPECT_FLOAT_EQ(metric->Eval(preds, info, false), 0.50f); + info.labels_upper_bound_.HostVector()[3] = std::numeric_limits::infinity(); + EXPECT_FLOAT_EQ(metric->Eval(preds, info, false), 0.50f); + info.labels_lower_bound_.HostVector()[0] = 70.0f; + EXPECT_FLOAT_EQ(metric->Eval(preds, info, false), 0.25f); +} + +// Test configuration of AFT metric +TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) { + auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX); + std::unique_ptr metric(Metric::Create("aft-nloglik", &lparam)); + metric->Configure({{"aft_loss_distribution", "normal"}, {"aft_loss_distribution_scale", "10"}}); + + // Configuration round-trip test + Json j_obj{ Object() }; + metric->SaveConfig(&j_obj); + auto aft_param_json = j_obj["aft_loss_param"]; + EXPECT_EQ(get(aft_param_json["aft_loss_distribution"]), "normal"); + EXPECT_EQ(get(aft_param_json["aft_loss_distribution_scale"]), "10"); +} + +} // namespace common +} // namespace xgboost diff --git a/tests/cpp/objective/test_aft_obj.cc b/tests/cpp/objective/test_aft_obj.cc index 36f80756a..ff1193f9e 100644 --- a/tests/cpp/objective/test_aft_obj.cc +++ b/tests/cpp/objective/test_aft_obj.cc @@ -15,8 +15,8 @@ namespace xgboost { namespace common { -TEST(Objective, AFTObjConfiguration) { - auto lparam = CreateEmptyGenericParam(-1); // currently AFT objective is CPU only +TEST(Objective, DeclareUnifiedTest(AFTObjConfiguration)) { + auto lparam = CreateEmptyGenericParam(GPUIDX); std::unique_ptr objective(ObjFunction::Create("survival:aft", &lparam)); objective->Configure({ {"aft_loss_distribution", "logistic"}, {"aft_loss_distribution_scale", "5"} }); @@ -76,8 +76,8 @@ static inline void CheckGPairOverGridPoints( } } -TEST(Objective, AFTObjGPairUncensoredLabels) { - auto lparam = CreateEmptyGenericParam(-1); // currently AFT objective is CPU only +TEST(Objective, DeclareUnifiedTest(AFTObjGPairUncensoredLabels)) { + auto lparam = CreateEmptyGenericParam(GPUIDX); std::unique_ptr obj(ObjFunction::Create("survival:aft", &lparam)); CheckGPairOverGridPoints(obj.get(), 100.0f, 100.0f, "normal", @@ -100,29 +100,29 @@ TEST(Objective, AFTObjGPairUncensoredLabels) { 0.3026f, 0.1816f, 0.1090f, 0.0654f, 0.0392f, 0.0235f, 0.0141f, 0.0085f, 0.0051f, 0.0031f }); } -TEST(Objective, AFTObjGPairLeftCensoredLabels) { - auto lparam = CreateEmptyGenericParam(-1); // currently AFT objective is CPU only +TEST(Objective, DeclareUnifiedTest(AFTObjGPairLeftCensoredLabels)) { + auto lparam = CreateEmptyGenericParam(GPUIDX); std::unique_ptr obj(ObjFunction::Create("survival:aft", &lparam)); - CheckGPairOverGridPoints(obj.get(), -std::numeric_limits::infinity(), 20.0f, "normal", + CheckGPairOverGridPoints(obj.get(), 0.0f, 20.0f, "normal", { 0.0285f, 0.0832f, 0.1951f, 0.3804f, 0.6403f, 0.9643f, 1.3379f, 1.7475f, 2.1828f, 2.6361f, 3.1023f, 3.5779f, 4.0603f, 4.5479f, 5.0394f, 5.5340f, 6.0309f, 6.5298f, 7.0303f, 7.5326f }, { 0.0663f, 0.1559f, 0.2881f, 0.4378f, 0.5762f, 0.6878f, 0.7707f, 0.8300f, 0.8719f, 0.9016f, 0.9229f, 0.9385f, 0.9501f, 0.9588f, 0.9656f, 0.9709f, 0.9751f, 0.9785f, 0.9813f, 0.9877f }); - CheckGPairOverGridPoints(obj.get(), -std::numeric_limits::infinity(), 20.0f, "logistic", + CheckGPairOverGridPoints(obj.get(), 0.0f, 20.0f, "logistic", { 0.0909f, 0.1428f, 0.2174f, 0.3164f, 0.4355f, 0.5625f, 0.6818f, 0.7812f, 0.8561f, 0.9084f, 0.9429f, 0.9650f, 0.9787f, 0.9871f, 0.9922f, 0.9953f, 0.9972f, 0.9983f, 0.9990f, 0.9994f }, { 0.0826f, 0.1224f, 0.1701f, 0.2163f, 0.2458f, 0.2461f, 0.2170f, 0.1709f, 0.1232f, 0.0832f, 0.0538f, 0.0338f, 0.0209f, 0.0127f, 0.0077f, 0.0047f, 0.0028f, 0.0017f, 0.0010f, 0.0006f }); - CheckGPairOverGridPoints(obj.get(), -std::numeric_limits::infinity(), 20.0f, "extreme", + CheckGPairOverGridPoints(obj.get(), 0.0f, 20.0f, "extreme", { 0.0005f, 0.0149f, 0.1011f, 0.2815f, 0.4881f, 0.6610f, 0.7847f, 0.8665f, 0.9183f, 0.9504f, 0.9700f, 0.9820f, 0.9891f, 0.9935f, 0.9961f, 0.9976f, 0.9986f, 0.9992f, 0.9995f, 0.9997f }, { 0.0041f, 0.0747f, 0.2731f, 0.4059f, 0.3829f, 0.2901f, 0.1973f, 0.1270f, 0.0793f, 0.0487f, 0.0296f, 0.0179f, 0.0108f, 0.0065f, 0.0039f, 0.0024f, 0.0014f, 0.0008f, 0.0005f, 0.0003f }); } -TEST(Objective, AFTObjGPairRightCensoredLabels) { - auto lparam = CreateEmptyGenericParam(-1); // currently AFT objective is CPU only +TEST(Objective, DeclareUnifiedTest(AFTObjGPairRightCensoredLabels)) { + auto lparam = CreateEmptyGenericParam(GPUIDX); std::unique_ptr obj(ObjFunction::Create("survival:aft", &lparam)); CheckGPairOverGridPoints(obj.get(), 60.0f, std::numeric_limits::infinity(), "normal", @@ -145,8 +145,8 @@ TEST(Objective, AFTObjGPairRightCensoredLabels) { 0.1816f, 0.1089f, 0.0654f, 0.0392f, 0.0235f, 0.0141f, 0.0085f, 0.0051f, 0.0031f, 0.0018f }); } -TEST(Objective, AFTObjGPairIntervalCensoredLabels) { - auto lparam = CreateEmptyGenericParam(-1); // currently AFT objective is CPU only +TEST(Objective, DeclareUnifiedTest(AFTObjGPairIntervalCensoredLabels)) { + auto lparam = CreateEmptyGenericParam(GPUIDX); std::unique_ptr obj(ObjFunction::Create("survival:aft", &lparam)); CheckGPairOverGridPoints(obj.get(), 16.0f, 200.0f, "normal", diff --git a/tests/cpp/objective/test_aft_obj.cu b/tests/cpp/objective/test_aft_obj.cu new file mode 100644 index 000000000..3da6bc94b --- /dev/null +++ b/tests/cpp/objective/test_aft_obj.cu @@ -0,0 +1,6 @@ +/*! + * Copyright 2020 XGBoost contributors + */ +// Dummy file to keep the CUDA tests. + +#include "test_aft_obj.cc"