Column sampling at individual nodes (splits). (#3971)
* Column sampling at individual nodes (splits). * Documented colsample_bynode parameter. - also updated documentation for colsample_by* parameters * Updated documentation. * GetFeatureSet() returns shared pointer to std::vector. * Sync sampled columns across multiple processes.
This commit is contained in:
committed by
Jiaming Yuan
parent
e0a279114e
commit
42bf90eb8f
@@ -50,7 +50,9 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
|
||||
float max_delta_step;
|
||||
// whether we want to do subsample
|
||||
float subsample;
|
||||
// whether to subsample columns each split, in each level
|
||||
// whether to subsample columns in each split (node)
|
||||
float colsample_bynode;
|
||||
// whether to subsample columns in each level
|
||||
float colsample_bylevel;
|
||||
// whether to subsample columns during tree construction
|
||||
float colsample_bytree;
|
||||
@@ -149,6 +151,10 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
|
||||
.set_range(0.0f, 1.0f)
|
||||
.set_default(1.0f)
|
||||
.describe("Row subsample ratio of training instance.");
|
||||
DMLC_DECLARE_FIELD(colsample_bynode)
|
||||
.set_range(0.0f, 1.0f)
|
||||
.set_default(1.0f)
|
||||
.describe("Subsample ratio of columns, resample on each node (split).");
|
||||
DMLC_DECLARE_FIELD(colsample_bylevel)
|
||||
.set_range(0.0f, 1.0f)
|
||||
.set_default(1.0f)
|
||||
|
||||
@@ -168,8 +168,8 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
}
|
||||
{
|
||||
column_sampler_.Init(fmat.Info().num_col_, param_.colsample_bylevel,
|
||||
param_.colsample_bytree);
|
||||
column_sampler_.Init(fmat.Info().num_col_, param_.colsample_bynode,
|
||||
param_.colsample_bylevel, param_.colsample_bytree);
|
||||
}
|
||||
{
|
||||
// setup temp space for each thread
|
||||
@@ -625,7 +625,8 @@ class ColMaker: public TreeUpdater {
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat,
|
||||
RegTree *p_tree) {
|
||||
const std::vector<int> &feat_set = column_sampler_.GetFeatureSet(depth).HostVector();
|
||||
auto p_feature_set = column_sampler_.GetFeatureSet(depth);
|
||||
const auto& feat_set = *p_feature_set;
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
this->UpdateSolution(batch, feat_set, gpair, p_fmat);
|
||||
}
|
||||
|
||||
@@ -499,6 +499,8 @@ struct DeviceShard {
|
||||
dh::DVec<GradientPair> node_sum_gradients_d;
|
||||
/*! \brief row offset in SparsePage (the input data). */
|
||||
thrust::device_vector<size_t> row_ptrs;
|
||||
/*! \brief On-device feature set, only actually used on one of the devices */
|
||||
thrust::device_vector<int> feature_set_d;
|
||||
/*! The row offset for this shard. */
|
||||
bst_uint row_begin_idx;
|
||||
bst_uint row_end_idx;
|
||||
@@ -579,28 +581,31 @@ struct DeviceShard {
|
||||
}
|
||||
|
||||
DeviceSplitCandidate EvaluateSplit(int nidx,
|
||||
const HostDeviceVector<int>& feature_set,
|
||||
const std::vector<int>& feature_set,
|
||||
ValueConstraint value_constraint) {
|
||||
dh::safe_cuda(cudaSetDevice(device_id_));
|
||||
auto d_split_candidates = temp_memory.GetSpan<DeviceSplitCandidate>(feature_set.Size());
|
||||
auto d_split_candidates = temp_memory.GetSpan<DeviceSplitCandidate>(feature_set.size());
|
||||
feature_set_d.resize(feature_set.size());
|
||||
auto d_features = common::Span<int>(feature_set_d.data().get(),
|
||||
feature_set_d.size());
|
||||
dh::safe_cuda(cudaMemcpy(d_features.data(), feature_set.data(),
|
||||
d_features.size_bytes(), cudaMemcpyDefault));
|
||||
DeviceNodeStats node(node_sum_gradients[nidx], nidx, param);
|
||||
feature_set.Reshard(GPUSet::Range(device_id_, 1));
|
||||
|
||||
// One block for each feature
|
||||
int constexpr BLOCK_THREADS = 256;
|
||||
EvaluateSplitKernel<BLOCK_THREADS, GradientSumT>
|
||||
<<<uint32_t(feature_set.Size()), BLOCK_THREADS, 0>>>(
|
||||
hist.GetNodeHistogram(nidx), feature_set.DeviceSpan(device_id_), node,
|
||||
cut_.feature_segments.GetSpan(), cut_.min_fvalue.GetSpan(),
|
||||
cut_.gidx_fvalue_map.GetSpan(), GPUTrainingParam(param),
|
||||
d_split_candidates, value_constraint, monotone_constraints.GetSpan());
|
||||
<<<uint32_t(feature_set.size()), BLOCK_THREADS, 0>>>
|
||||
(hist.GetNodeHistogram(nidx), d_features, node,
|
||||
cut_.feature_segments.GetSpan(), cut_.min_fvalue.GetSpan(),
|
||||
cut_.gidx_fvalue_map.GetSpan(), GPUTrainingParam(param),
|
||||
d_split_candidates, value_constraint, monotone_constraints.GetSpan());
|
||||
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
std::vector<DeviceSplitCandidate> split_candidates(feature_set.Size());
|
||||
dh::safe_cuda(
|
||||
cudaMemcpy(split_candidates.data(), d_split_candidates.data(),
|
||||
split_candidates.size() * sizeof(DeviceSplitCandidate),
|
||||
cudaMemcpyDeviceToHost));
|
||||
std::vector<DeviceSplitCandidate> split_candidates(feature_set.size());
|
||||
dh::safe_cuda(cudaMemcpy(split_candidates.data(), d_split_candidates.data(),
|
||||
split_candidates.size() * sizeof(DeviceSplitCandidate),
|
||||
cudaMemcpyDeviceToHost));
|
||||
DeviceSplitCandidate best_split;
|
||||
for (auto candidate : split_candidates) {
|
||||
best_split.Update(candidate, param);
|
||||
@@ -1009,7 +1014,8 @@ class GPUHistMakerSpecialised{
|
||||
}
|
||||
monitor_.Stop("InitDataOnce", dist_.Devices());
|
||||
|
||||
column_sampler_.Init(info_->num_col_, param_.colsample_bylevel, param_.colsample_bytree);
|
||||
column_sampler_.Init(info_->num_col_, param_.colsample_bynode,
|
||||
param_.colsample_bylevel, param_.colsample_bytree);
|
||||
|
||||
// Copy gpair & reset memory
|
||||
monitor_.Start("InitDataReset", dist_.Devices());
|
||||
@@ -1100,7 +1106,7 @@ class GPUHistMakerSpecialised{
|
||||
|
||||
DeviceSplitCandidate EvaluateSplit(int nidx, RegTree* p_tree) {
|
||||
return shards_.front()->EvaluateSplit(
|
||||
nidx, column_sampler_.GetFeatureSet(p_tree->GetDepth(nidx)),
|
||||
nidx, *column_sampler_.GetFeatureSet(p_tree->GetDepth(nidx)),
|
||||
node_value_constraints_[nidx]);
|
||||
}
|
||||
|
||||
|
||||
@@ -354,11 +354,11 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
|
||||
p_last_fmat_ = &fmat;
|
||||
// initialize feature index
|
||||
if (data_layout_ == kDenseDataOneBased) {
|
||||
column_sampler_.Init(info.num_col_, param_.colsample_bylevel,
|
||||
param_.colsample_bytree, true);
|
||||
column_sampler_.Init(info.num_col_, param_.colsample_bynode,
|
||||
param_.colsample_bylevel, param_.colsample_bytree, true);
|
||||
} else {
|
||||
column_sampler_.Init(info.num_col_, param_.colsample_bylevel,
|
||||
param_.colsample_bytree, false);
|
||||
column_sampler_.Init(info.num_col_, param_.colsample_bynode,
|
||||
param_.colsample_bylevel, param_.colsample_bytree, false);
|
||||
}
|
||||
}
|
||||
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
|
||||
@@ -400,8 +400,8 @@ void QuantileHistMaker::Builder::EvaluateSplit(int nid,
|
||||
const RegTree& tree) {
|
||||
// start enumeration
|
||||
const MetaInfo& info = fmat.Info();
|
||||
const auto& feature_set = column_sampler_.GetFeatureSet(
|
||||
tree.GetDepth(nid)).HostVector();
|
||||
auto p_feature_set = column_sampler_.GetFeatureSet(tree.GetDepth(nid));
|
||||
const auto& feature_set = *p_feature_set;
|
||||
const auto nfeature = static_cast<bst_uint>(feature_set.size());
|
||||
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||
best_split_tloc_.resize(nthread);
|
||||
|
||||
Reference in New Issue
Block a user