diff --git a/doc/gpu/index.rst b/doc/gpu/index.rst index f03e710bc..e554386a4 100644 --- a/doc/gpu/index.rst +++ b/doc/gpu/index.rst @@ -18,7 +18,7 @@ Tree construction (training) and prediction can be accelerated with CUDA-capable Usage ===== -Specify the ``tree_method`` parameter as one of the following algorithms. +Specify the ``tree_method`` parameter as one of the following algorithms. Algorithms ---------- @@ -31,11 +31,11 @@ Algorithms | gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Much faster and uses considerably less memory. NOTE: Will run very slowly on GPUs older than Pascal architecture. | +--------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -Supported parameters +Supported parameters -------------------- -.. |tick| unicode:: U+2714 -.. |cross| unicode:: U+2718 +.. |tick| unicode:: U+2714 +.. |cross| unicode:: U+2718 +--------------------------+---------------+--------------+ | parameter | ``gpu_exact`` | ``gpu_hist`` | @@ -78,6 +78,49 @@ The GPU algorithms currently work with CLI, Python and R packages. See :doc:`/bu param['max_bin'] = 16 param['tree_method'] = 'gpu_hist' +Objective functions +=================== +Most of the objective functions implemented in XGBoost can be run on GPU. Following table shows current support status. + +.. |tick| unicode:: U+2714 +.. |cross| unicode:: U+2718 + ++-----------------+-------------+ +| Objectives | GPU support | ++-----------------+-------------+ +| reg:linear | |tick| | ++-----------------+-------------+ +| reg:logistic | |tick| | ++-----------------+-------------+ +| binary:logistic | |tick| | ++-----------------+-------------+ +| binary:logitraw | |tick| | ++-----------------+-------------+ +| binary:hinge | |tick| | ++-----------------+-------------+ +| count:poisson | |tick| | ++-----------------+-------------+ +| reg:gamma | |tick| | ++-----------------+-------------+ +| reg:tweedie | |tick| | ++-----------------+-------------+ +| multi:softmax | |tick| | ++-----------------+-------------+ +| multi:softprob | |tick| | ++-----------------+-------------+ +| survival:cox | |cross| | ++-----------------+-------------+ +| rank:pairwise | |cross| | ++-----------------+-------------+ +| rank:ndcg | |cross| | ++-----------------+-------------+ +| rank:map | |cross| | ++-----------------+-------------+ + +For multi-gpu support, objective functions also honor the ``n_gpus`` parameter, +which, by default is set to 1. To disable running objectives on GPU, just set +``n_gpus`` to 0. + Benchmarks ========== You can run benchmarks on synthetic data for binary classification: @@ -118,4 +161,3 @@ Authors * ... and the rest of the H2O.ai and NVIDIA team. Please report bugs to the user forum https://discuss.xgboost.ai/. - diff --git a/doc/parameter.rst b/doc/parameter.rst index 216155e18..0b3d6f373 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -245,8 +245,8 @@ Parameters for Linear Booster (``booster=gblinear``) - Choice of algorithm to fit linear model - - ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run. - - ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. + - ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run. + - ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. * ``feature_selector`` [default= ``cyclic``] @@ -283,9 +283,6 @@ Specify the learning task and the corresponding learning objective. The objectiv - ``binary:logistic``: logistic regression for binary classification, output probability - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities. - - ``gpu:reg:linear``, ``gpu:reg:logistic``, ``gpu:binary:logistic``, ``gpu:binary:logitraw``: versions - of the corresponding objective functions evaluated on the GPU; note that like the GPU histogram algorithm, - they can only be used when the entire training session uses the same dataset - ``count:poisson`` --poisson regression for count data, output mean of poisson distribution - ``max_delta_step`` is set to 0.7 by default in poisson regression (used to safeguard optimization) diff --git a/src/common/common.h b/src/common/common.h index 88f34e6bc..53b8a8c6a 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -165,7 +165,7 @@ class GPUSet { CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1."; GpuIdType const n_devices_visible = AllVisible().Size(); - if (n_devices_visible == 0) { return Empty(); } + if (n_devices_visible == 0 || n_gpus == 0) { return Empty(); } GpuIdType const n_available_devices = n_devices_visible - gpu_id; diff --git a/src/objective/hinge.cu b/src/objective/hinge.cu index 9c218a266..46110813d 100644 --- a/src/objective/hinge.cu +++ b/src/objective/hinge.cu @@ -22,7 +22,7 @@ struct HingeObjParam : public dmlc::Parameter { int n_gpus; int gpu_id; DMLC_DECLARE_PARAMETER(HingeObjParam) { - DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(-1) + DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(GPUSet::kAll) .describe("Number of GPUs to use for multi-gpu algorithms."); DMLC_DECLARE_FIELD(gpu_id) .set_lower_bound(0) diff --git a/src/objective/multiclass_obj.cu b/src/objective/multiclass_obj.cu index a7919f9a5..085bc8bf1 100644 --- a/src/objective/multiclass_obj.cu +++ b/src/objective/multiclass_obj.cu @@ -31,7 +31,7 @@ struct SoftmaxMultiClassParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) { DMLC_DECLARE_FIELD(num_class).set_lower_bound(1) .describe("Number of output class in the multi-class classification."); - DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(-1) + DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(GPUSet::kAll) .describe("Number of GPUs to use for multi-gpu algorithms."); DMLC_DECLARE_FIELD(gpu_id) .set_lower_bound(0) @@ -49,7 +49,6 @@ class SoftmaxMultiClassObj : public ObjFunction { } void Configure(const std::vector >& args) override { param_.InitAllowUnknown(args); - CHECK(param_.n_gpus != 0) << "Must have at least one device"; // Default is -1 devices_ = GPUSet::All(param_.gpu_id, param_.n_gpus); label_correct_.Resize(devices_.IsEmpty() ? 1 : devices_.Size()); } diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu index a6c474f2a..26bc314e3 100644 --- a/src/objective/regression_obj.cu +++ b/src/objective/regression_obj.cu @@ -34,7 +34,7 @@ struct RegLossParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(RegLossParam) { DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f) .describe("Scale the weight of positive examples by this factor"); - DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(-1) + DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(GPUSet::kAll) .describe("Number of GPUs to use for multi-gpu algorithms."); DMLC_DECLARE_FIELD(gpu_id) .set_lower_bound(0) @@ -53,7 +53,6 @@ class RegLossObj : public ObjFunction { void Configure(const std::vector >& args) override { param_.InitAllowUnknown(args); - CHECK(param_.n_gpus != 0) << "Must have at least one device"; // Default is -1 devices_ = GPUSet::All(param_.gpu_id, param_.n_gpus); label_correct_.Resize(devices_.IsEmpty() ? 1 : devices_.Size()); } @@ -182,7 +181,7 @@ struct PoissonRegressionParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(max_delta_step).set_lower_bound(0.0f).set_default(0.7f) .describe("Maximum delta step we allow each weight estimation to be." \ " This parameter is required for possion regression."); - DMLC_DECLARE_FIELD(n_gpus).set_default(-1).set_lower_bound(-1) + DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(GPUSet::kAll) .describe("Number of GPUs to use for multi-gpu algorithms."); DMLC_DECLARE_FIELD(gpu_id) .set_lower_bound(0) @@ -197,7 +196,6 @@ class PoissonRegression : public ObjFunction { // declare functions void Configure(const std::vector >& args) override { param_.InitAllowUnknown(args); - CHECK(param_.n_gpus != 0) << "Must have at least one device"; // Default is -1 devices_ = GPUSet::All(param_.gpu_id, param_.n_gpus); label_correct_.Resize(devices_.IsEmpty() ? 1 : devices_.Size()); } @@ -364,7 +362,7 @@ struct GammaRegressionParam : public dmlc::Parameter { int n_gpus; int gpu_id; DMLC_DECLARE_PARAMETER(GammaRegressionParam) { - DMLC_DECLARE_FIELD(n_gpus).set_default(-1).set_lower_bound(-1) + DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(GPUSet::kAll) .describe("Number of GPUs to use for multi-gpu algorithms."); DMLC_DECLARE_FIELD(gpu_id) .set_lower_bound(0) @@ -379,7 +377,6 @@ class GammaRegression : public ObjFunction { // declare functions void Configure(const std::vector >& args) override { param_.InitAllowUnknown(args); - CHECK(param_.n_gpus != 0) << "Must have at least one device"; // Default is -1 devices_ = GPUSet::All(param_.gpu_id, param_.n_gpus); label_correct_.Resize(devices_.IsEmpty() ? 1 : devices_.Size()); } @@ -461,7 +458,7 @@ struct TweedieRegressionParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(TweedieRegressionParam) { DMLC_DECLARE_FIELD(tweedie_variance_power).set_range(1.0f, 2.0f).set_default(1.5f) .describe("Tweedie variance power. Must be between in range [1, 2)."); - DMLC_DECLARE_FIELD(n_gpus).set_default(-1).set_lower_bound(-1) + DMLC_DECLARE_FIELD(n_gpus).set_default(1).set_lower_bound(GPUSet::kAll) .describe("Number of GPUs to use for multi-gpu algorithms."); DMLC_DECLARE_FIELD(gpu_id) .set_lower_bound(0) @@ -476,7 +473,6 @@ class TweedieRegression : public ObjFunction { // declare functions void Configure(const std::vector >& args) override { param_.InitAllowUnknown(args); - CHECK(param_.n_gpus != 0) << "Must have at least one device"; // Default is -1 devices_ = GPUSet::All(param_.gpu_id, param_.n_gpus); label_correct_.Resize(devices_.IsEmpty() ? 1 : devices_.Size()); }