Move GHistIndex into DMatrix. (#7064)

This commit is contained in:
Jiaming Yuan
2021-07-01 00:44:49 +08:00
committed by GitHub
parent 1c8fdf2218
commit 1cd20efe68
17 changed files with 386 additions and 320 deletions

View File

@@ -69,18 +69,22 @@ template<typename GradientSumT>
void QuantileHistMaker::CallBuilderUpdate(const std::unique_ptr<Builder<GradientSumT>>& builder,
HostDeviceVector<GradientPair> *gpair,
DMatrix *dmat,
GHistIndexMatrix const& gmat,
const std::vector<RegTree *> &trees) {
for (auto tree : trees) {
builder->Update(gmat_, column_matrix_, gpair, dmat, tree);
builder->Update(gmat, column_matrix_, gpair, dmat, tree);
}
}
void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
DMatrix *dmat,
const std::vector<RegTree *> &trees) {
auto const &gmat =
*(dmat->GetBatches<GHistIndexMatrix>(
BatchParam{GenericParameter::kCpuId, param_.max_bin})
.begin());
if (dmat != p_last_dmat_ || is_gmat_initialized_ == false) {
updater_monitor_.Start("GmatInitialization");
gmat_.Init(dmat, static_cast<uint32_t>(param_.max_bin));
column_matrix_.Init(gmat_, param_.sparse_threshold);
column_matrix_.Init(gmat, param_.sparse_threshold);
updater_monitor_.Stop("GmatInitialization");
// A proper solution is puting cut matrix in DMatrix, see:
// https://github.com/dmlc/xgboost/issues/5143
@@ -96,12 +100,12 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
if (!float_builder_) {
SetBuilder(n_trees, &float_builder_, dmat);
}
CallBuilderUpdate(float_builder_, gpair, dmat, trees);
CallBuilderUpdate(float_builder_, gpair, dmat, gmat, trees);
} else {
if (!double_builder_) {
SetBuilder(n_trees, &double_builder_, dmat);
}
CallBuilderUpdate(double_builder_, gpair, dmat, trees);
CallBuilderUpdate(double_builder_, gpair, dmat, gmat, trees);
}
param_.learning_rate = lr;
@@ -678,7 +682,7 @@ void QuantileHistMaker::Builder<GradientSumT>::InitData(const GHistIndexMatrix&
// We should check that the partitioning was done correctly
// and each row of the dataset fell into exactly one of the categories
}
MemStackAllocator<bool, 128> buff(this->nthread_);
common::MemStackAllocator<bool, 128> buff(this->nthread_);
bool* p_buff = buff.Get();
std::fill(p_buff, p_buff + this->nthread_, false);

View File

@@ -75,43 +75,9 @@ struct RandomReplace {
}
};
/*!
* \brief A C-style array with in-stack allocation. As long as the array is smaller than MaxStackSize, it will be allocated inside the stack. Otherwise, it will be heap-allocated.
*/
template<typename T, size_t MaxStackSize>
class MemStackAllocator {
public:
explicit MemStackAllocator(size_t required_size): required_size_(required_size) {
}
T* Get() {
if (!ptr_) {
if (MaxStackSize >= required_size_) {
ptr_ = stack_mem_;
} else {
ptr_ = reinterpret_cast<T*>(malloc(required_size_ * sizeof(T)));
do_free_ = true;
}
}
return ptr_;
}
~MemStackAllocator() {
if (do_free_) free(ptr_);
}
private:
T* ptr_ = nullptr;
bool do_free_ = false;
size_t required_size_;
T stack_mem_[MaxStackSize];
};
namespace tree {
using xgboost::common::GHistIndexMatrix;
using xgboost::GHistIndexMatrix;
using xgboost::common::GHistIndexRow;
using xgboost::common::HistCollection;
using xgboost::common::RowSetCollection;
@@ -243,8 +209,6 @@ class QuantileHistMaker: public TreeUpdater {
CPUHistMakerTrainParam hist_maker_param_;
// training parameter
TrainParam param_;
// quantized data matrix
GHistIndexMatrix gmat_;
// column accessor
ColumnMatrix column_matrix_;
DMatrix const* p_last_dmat_ {nullptr};
@@ -466,6 +430,7 @@ class QuantileHistMaker: public TreeUpdater {
void CallBuilderUpdate(const std::unique_ptr<Builder<GradientSumT>>& builder,
HostDeviceVector<GradientPair> *gpair,
DMatrix *dmat,
GHistIndexMatrix const& gmat,
const std::vector<RegTree *> &trees);
protected: