Combine thread launches into single launch per tree for gpu_hist (#4343)

* Combine thread launches into single launch per tree for gpu_hist
algorithm.

* Address deprecation warning

* Add manual column sampler constructor

* Turn off omp dynamic to get a guaranteed number of threads

* Enable openmp in cuda code
This commit is contained in:
Rory Mitchell
2019-04-29 09:58:34 +12:00
committed by GitHub
parent 146e83f3b3
commit 5e582b0fa7
10 changed files with 402 additions and 325 deletions

View File

@@ -62,6 +62,13 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
DMLC_DECLARE_FIELD(size_leaf_vector).set_lower_bound(0).set_default(0)
.describe("Size of leaf vector, reserved for vector tree");
}
bool operator==(const TreeParam& b) const {
return num_roots == b.num_roots && num_nodes == b.num_nodes &&
num_deleted == b.num_deleted && max_depth == b.max_depth &&
num_feature == b.num_feature &&
size_leaf_vector == b.size_leaf_vector;
}
};
/*! \brief node statistics used in regression tree */
@@ -74,6 +81,10 @@ struct RTreeNodeStat {
bst_float base_weight;
/*! \brief number of child that is leaf node known up to now */
int leaf_child_cnt;
bool operator==(const RTreeNodeStat& b) const {
return loss_chg == b.loss_chg && sum_hess == b.sum_hess &&
base_weight == b.base_weight && leaf_child_cnt == b.leaf_child_cnt;
}
};
/*!
@@ -188,6 +199,11 @@ class RegTree {
if (is_left_child) pidx |= (1U << 31);
this->parent_ = pidx;
}
bool operator==(const Node& b) const {
return parent_ == b.parent_ && cleft_ == b.cleft_ &&
cright_ == b.cright_ && sindex_ == b.sindex_ &&
info_.leaf_value == b.info_.leaf_value;
}
private:
/*!
@@ -304,6 +320,11 @@ class RegTree {
fo->Write(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * nodes_.size());
}
bool operator==(const RegTree& b) const {
return nodes_ == b.nodes_ && stats_ == b.stats_ &&
deleted_nodes_ == b.deleted_nodes_ && param == b.param;
}
/**
* \brief Expands a leaf node into two additional leaf nodes.
*