De-duplicate GPU parameters. (#4454)
* Only define `gpu_id` and `n_gpus` in `LearnerTrainParam` * Pass LearnerTrainParam through XGBoost vid factory method. * Disable all GPU usage when GPU related parameters are not specified (fixes XGBoost choosing GPU over aggressively). * Test learner train param io. * Fix gpu pickling.
This commit is contained in:
@@ -72,10 +72,6 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
|
||||
bool refresh_leaf;
|
||||
// auxiliary data structure
|
||||
std::vector<int> monotone_constraints;
|
||||
// gpu to use for single gpu algorithms
|
||||
int gpu_id;
|
||||
// number of GPUs to use
|
||||
int n_gpus;
|
||||
// the criteria to use for ranking splits
|
||||
std::string split_evaluator;
|
||||
|
||||
@@ -191,14 +187,6 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
|
||||
DMLC_DECLARE_FIELD(monotone_constraints)
|
||||
.set_default(std::vector<int>())
|
||||
.describe("Constraint of variable monotonicity");
|
||||
DMLC_DECLARE_FIELD(gpu_id)
|
||||
.set_lower_bound(0)
|
||||
.set_default(0)
|
||||
.describe("gpu to use for single gpu algorithms");
|
||||
DMLC_DECLARE_FIELD(n_gpus)
|
||||
.set_lower_bound(-1)
|
||||
.set_default(1)
|
||||
.describe("Number of GPUs to use for multi-gpu algorithms: -1=use all GPUs");
|
||||
DMLC_DECLARE_FIELD(split_evaluator)
|
||||
.set_default("elastic_net,monotonic,interaction")
|
||||
.describe("The criteria to use for ranking splits");
|
||||
|
||||
@@ -14,12 +14,14 @@ DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name) {
|
||||
TreeUpdater* TreeUpdater::Create(const std::string& name, LearnerTrainParam const* tparam) {
|
||||
auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown tree updater " << name;
|
||||
}
|
||||
return (e->body)();
|
||||
auto p_updater = (e->body)();
|
||||
p_updater->tparam_ = tparam;
|
||||
return p_updater;
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -769,7 +769,7 @@ class DistColMaker : public ColMaker {
|
||||
public:
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
param_.InitAllowUnknown(args);
|
||||
pruner_.reset(TreeUpdater::Create("prune"));
|
||||
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
||||
pruner_->Init(args);
|
||||
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
|
||||
spliteval_->Init(args);
|
||||
|
||||
@@ -443,9 +443,10 @@ void ArgMaxByKey(common::Span<ExactSplitCandidate> nodeSplits,
|
||||
common::Span<const DeviceNodeStats> nodes,
|
||||
int nUniqKeys,
|
||||
NodeIdT nodeStart, int len, const TrainParam param,
|
||||
ArgMaxByKeyAlgo algo) {
|
||||
ArgMaxByKeyAlgo algo,
|
||||
GPUSet const& devices) {
|
||||
dh::FillConst<ExactSplitCandidate, BLKDIM, ITEMS_PER_THREAD>(
|
||||
param.gpu_id, nodeSplits.data(), nUniqKeys,
|
||||
*(devices.begin()), nodeSplits.data(), nUniqKeys,
|
||||
ExactSplitCandidate());
|
||||
int nBlks = dh::DivRoundUp(len, ITEMS_PER_THREAD * BLKDIM);
|
||||
switch (algo) {
|
||||
@@ -585,7 +586,7 @@ class GPUMaker : public TreeUpdater {
|
||||
maxNodes_ = (1 << (param_.max_depth + 1)) - 1;
|
||||
maxLeaves_ = 1 << param_.max_depth;
|
||||
|
||||
devices_ = GPUSet::All(param_.gpu_id, param_.n_gpus);
|
||||
devices_ = GPUSet::All(tparam_->gpu_id, tparam_->n_gpus);
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
@@ -641,7 +642,7 @@ class GPUMaker : public TreeUpdater {
|
||||
float min_split_loss = param_.min_split_loss;
|
||||
auto gpu_param = GPUTrainingParam(param_);
|
||||
|
||||
dh::LaunchN(param_.gpu_id, nNodes, [=] __device__(int uid) {
|
||||
dh::LaunchN(*(devices_.begin()), nNodes, [=] __device__(int uid) {
|
||||
int absNodeId = uid + nodeStart;
|
||||
ExactSplitCandidate s = d_nodeSplits[uid];
|
||||
if (s.IsSplittable(min_split_loss)) {
|
||||
@@ -683,16 +684,18 @@ class GPUMaker : public TreeUpdater {
|
||||
instIds_.CurrentSpan(), nodeAssigns_.CurrentSpan(), n_vals_, nNodes,
|
||||
n_cols_, tmpScanGradBuff_, tmp_scan_key_buff_,
|
||||
colIds_, nodeStart);
|
||||
auto devices = GPUSet::All(tparam_->gpu_id, tparam_->n_gpus);
|
||||
ArgMaxByKey(nodeSplits_, gradscans_, gradsums_,
|
||||
vals_.CurrentSpan(), colIds_, nodeAssigns_.CurrentSpan(),
|
||||
nodes_, nNodes, nodeStart, n_vals_, param_,
|
||||
level <= kMaxAbkLevels ? kAbkSmem : kAbkGmem);
|
||||
level <= kMaxAbkLevels ? kAbkSmem : kAbkGmem,
|
||||
devices);
|
||||
Split2Node(nNodes, nodeStart);
|
||||
}
|
||||
|
||||
void AllocateAllData(int offsetSize) {
|
||||
int tmpBuffSize = ScanTempBufferSize(n_vals_);
|
||||
ba_.Allocate(param_.gpu_id, &vals_, n_vals_,
|
||||
ba_.Allocate(*(devices_.begin()), &vals_, n_vals_,
|
||||
&vals_cached_, n_vals_, &instIds_, n_vals_, &inst_ids_cached_, n_vals_,
|
||||
&colOffsets_, offsetSize, &gradsInst_, n_rows_, &nodeAssigns_, n_vals_,
|
||||
&nodeLocations_, n_vals_, &nodes_, maxNodes_, &node_assigns_per_inst_,
|
||||
@@ -783,7 +786,7 @@ class GPUMaker : public TreeUpdater {
|
||||
auto d_nodes = nodes_;
|
||||
auto d_sums = gradsums_;
|
||||
auto gpu_params = GPUTrainingParam(param_);
|
||||
dh::LaunchN(param_.gpu_id, 1, [=] __device__(int idx) {
|
||||
dh::LaunchN(*(devices_.begin()), 1, [=] __device__(int idx) {
|
||||
d_nodes[0] = DeviceNodeStats(d_sums[0], 0, gpu_params);
|
||||
});
|
||||
} else {
|
||||
@@ -800,7 +803,7 @@ class GPUMaker : public TreeUpdater {
|
||||
nodeAssigns_.Current(), instIds_.Current(), nodes_.data(),
|
||||
colOffsets_.data(), vals_.Current(), n_vals_, n_cols_);
|
||||
// gather the node assignments across all other columns too
|
||||
dh::Gather(param_.gpu_id, nodeAssigns_.Current(),
|
||||
dh::Gather(*(devices_.begin()), nodeAssigns_.Current(),
|
||||
node_assigns_per_inst_.data(), instIds_.Current(), n_vals_);
|
||||
SortKeys(level);
|
||||
}
|
||||
@@ -811,7 +814,7 @@ class GPUMaker : public TreeUpdater {
|
||||
// but we don't need more than level+1 bits for sorting!
|
||||
SegmentedSort(&tmp_mem_, &nodeAssigns_, &nodeLocations_, n_vals_, n_cols_,
|
||||
colOffsets_, 0, level + 1);
|
||||
dh::Gather<float, int>(param_.gpu_id, vals_.other(),
|
||||
dh::Gather<float, int>(*(devices_.begin()), vals_.other(),
|
||||
vals_.Current(), instIds_.other(), instIds_.Current(),
|
||||
nodeLocations_.Current(), n_vals_);
|
||||
vals_.buff.selector ^= 1;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2018 XGBoost contributors
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/functional.h>
|
||||
@@ -322,7 +322,7 @@ __global__ void EvaluateSplitKernel(
|
||||
node_histogram, // histogram for gradients
|
||||
common::Span<const int> feature_set, // Selected features
|
||||
DeviceNodeStats node,
|
||||
ELLPackMatrix matrix,
|
||||
ELLPackMatrix matrix,
|
||||
GPUTrainingParam gpu_param,
|
||||
common::Span<DeviceSplitCandidate> split_candidates, // resulting split
|
||||
ValueConstraint value_constraint,
|
||||
@@ -1377,13 +1377,16 @@ template <typename GradientSumT>
|
||||
class GPUHistMakerSpecialised{
|
||||
public:
|
||||
GPUHistMakerSpecialised() : initialised_{false}, p_last_fmat_{nullptr} {}
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string>>& args) {
|
||||
void Init(const std::vector<std::pair<std::string, std::string>>& args,
|
||||
LearnerTrainParam const* lparam) {
|
||||
param_.InitAllowUnknown(args);
|
||||
learner_param_ = lparam;
|
||||
hist_maker_param_.InitAllowUnknown(args);
|
||||
CHECK(param_.n_gpus != 0) << "Must have at least one device";
|
||||
n_devices_ = param_.n_gpus;
|
||||
dist_ = GPUDistribution::Block(GPUSet::All(param_.gpu_id, param_.n_gpus));
|
||||
auto devices = GPUSet::All(learner_param_->gpu_id,
|
||||
learner_param_->n_gpus);
|
||||
n_devices_ = devices.Size();
|
||||
CHECK(n_devices_ != 0) << "Must have at least one device";
|
||||
dist_ = GPUDistribution::Block(devices);
|
||||
|
||||
dh::CheckComputeCapability();
|
||||
|
||||
@@ -1446,7 +1449,8 @@ class GPUHistMakerSpecialised{
|
||||
|
||||
// Find the cuts.
|
||||
monitor_.StartCuda("Quantiles");
|
||||
common::DeviceSketch(batch, *info_, param_, &hmat_, hist_maker_param_.gpu_batch_nrows);
|
||||
common::DeviceSketch(batch, *info_, param_, &hmat_, hist_maker_param_.gpu_batch_nrows,
|
||||
GPUSet::All(learner_param_->gpu_id, learner_param_->n_gpus));
|
||||
n_bins_ = hmat_.row_ptr.back();
|
||||
monitor_.StopCuda("Quantiles");
|
||||
auto is_dense = info_->num_nonzero_ == info_->num_row_ * info_->num_col_;
|
||||
@@ -1552,6 +1556,7 @@ class GPUHistMakerSpecialised{
|
||||
int n_bins_;
|
||||
|
||||
GPUHistMakerTrainParam hist_maker_param_;
|
||||
LearnerTrainParam const* learner_param_;
|
||||
common::GHistIndexMatrix gmat_;
|
||||
|
||||
dh::AllReducer reducer_;
|
||||
@@ -1573,10 +1578,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
double_maker_.reset();
|
||||
if (hist_maker_param_.single_precision_histogram) {
|
||||
float_maker_.reset(new GPUHistMakerSpecialised<GradientPair>());
|
||||
float_maker_->Init(args);
|
||||
float_maker_->Init(args, tparam_);
|
||||
} else {
|
||||
double_maker_.reset(new GPUHistMakerSpecialised<GradientPairPrecise>());
|
||||
double_maker_->Init(args);
|
||||
double_maker_->Init(args, tparam_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ DMLC_REGISTRY_FILE_TAG(updater_prune);
|
||||
class TreePruner: public TreeUpdater {
|
||||
public:
|
||||
TreePruner() {
|
||||
syncher_.reset(TreeUpdater::Create("sync"));
|
||||
syncher_.reset(TreeUpdater::Create("sync", tparam_));
|
||||
}
|
||||
// set training parameter
|
||||
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
|
||||
@@ -35,7 +35,7 @@ DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
|
||||
void QuantileHistMaker::Init(const std::vector<std::pair<std::string, std::string> >& args) {
|
||||
// initialize pruner
|
||||
if (!pruner_) {
|
||||
pruner_.reset(TreeUpdater::Create("prune"));
|
||||
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
||||
}
|
||||
pruner_->Init(args);
|
||||
param_.InitAllowUnknown(args);
|
||||
|
||||
Reference in New Issue
Block a user