Use Booster context in DMatrix. (#8896)
- Pass context from booster to DMatrix. - Use context instead of integer for `n_threads`. - Check the consistency configuration for `max_bin`. - Test for all combinations of initialization options.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
/**
|
||||
* Copyright 2018-2023 by XGBoost Contributors
|
||||
* \author Rory Mitchell
|
||||
*/
|
||||
#pragma once
|
||||
@@ -78,11 +78,12 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for a given feature.
|
||||
*/
|
||||
inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
inline std::pair<double, double> GetGradient(Context const *ctx, int group_idx, int num_group,
|
||||
bst_feature_t fidx,
|
||||
std::vector<GradientPair> const &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
auto col = page[fidx];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
@@ -115,7 +116,7 @@ inline std::pair<double, double> GetGradientParallel(Context const *ctx, int gro
|
||||
std::vector<double> sum_grad_tloc(ctx->Threads(), 0.0);
|
||||
std::vector<double> sum_hess_tloc(ctx->Threads(), 0.0);
|
||||
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
auto col = page[fidx];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
@@ -177,16 +178,16 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
float dw, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat, int32_t n_threads) {
|
||||
inline void UpdateResidualParallel(Context const *ctx, bst_feature_t fidx, int group_idx,
|
||||
int num_group, float dw, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dw == 0.0f) return;
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
auto col = page[fidx];
|
||||
// update grad value
|
||||
const auto num_row = static_cast<bst_omp_uint>(col.size());
|
||||
common::ParallelFor(num_row, n_threads, [&](auto j) {
|
||||
common::ParallelFor(num_row, ctx->Threads(), [&](auto j) {
|
||||
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
|
||||
if (p.GetHess() < 0.0f) return;
|
||||
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
|
||||
@@ -203,12 +204,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
|
||||
std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
int32_t n_threads) {
|
||||
inline void UpdateBiasResidualParallel(Context const *ctx, int group_idx, int num_group,
|
||||
float dbias, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dbias == 0.0f) return;
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
common::ParallelFor(ndata, n_threads, [&](auto i) {
|
||||
common::ParallelFor(ndata, ctx->Threads(), [&](auto i) {
|
||||
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
|
||||
if (g.GetHess() < 0.0f) return;
|
||||
g += GradientPair(g.GetHess() * dbias, 0);
|
||||
@@ -220,18 +221,16 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias
|
||||
* in coordinate descent algorithms.
|
||||
*/
|
||||
class FeatureSelector {
|
||||
protected:
|
||||
int32_t n_threads_{-1};
|
||||
|
||||
public:
|
||||
explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {}
|
||||
FeatureSelector() = default;
|
||||
/*! \brief factory method */
|
||||
static FeatureSelector *Create(int choice, int32_t n_threads);
|
||||
static FeatureSelector *Create(int choice);
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~FeatureSelector() = default;
|
||||
/**
|
||||
* \brief Setting up the selector state prior to looping through features.
|
||||
*
|
||||
* \param ctx The booster context.
|
||||
* \param model The model.
|
||||
* \param gpair The gpair.
|
||||
* \param p_fmat The feature matrix.
|
||||
@@ -239,13 +238,12 @@ class FeatureSelector {
|
||||
* \param lambda Regularisation lambda.
|
||||
* \param param A parameter with algorithm-dependent use.
|
||||
*/
|
||||
virtual void Setup(const gbm::GBLinearModel &,
|
||||
const std::vector<GradientPair> &,
|
||||
DMatrix *,
|
||||
float , float , int ) {}
|
||||
virtual void Setup(Context const *, const gbm::GBLinearModel &,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float, int) {}
|
||||
/**
|
||||
* \brief Select next coordinate to update.
|
||||
*
|
||||
* \param ctx Booster context
|
||||
* \param iteration The iteration in a loop through features
|
||||
* \param model The model.
|
||||
* \param group_idx Zero-based index of the group.
|
||||
@@ -256,11 +254,9 @@ class FeatureSelector {
|
||||
*
|
||||
* \return The index of the selected feature. -1 indicates none selected.
|
||||
*/
|
||||
virtual int NextFeature(int iteration,
|
||||
const gbm::GBLinearModel &model,
|
||||
int group_idx,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) = 0;
|
||||
virtual int NextFeature(Context const *ctx, int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &gpair, DMatrix *p_fmat,
|
||||
float alpha, float lambda) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -269,9 +265,8 @@ class FeatureSelector {
|
||||
class CyclicFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int , const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
return iteration % model.learner_model_param->num_feature;
|
||||
}
|
||||
};
|
||||
@@ -283,8 +278,7 @@ class CyclicFeatureSelector : public FeatureSelector {
|
||||
class ShuffleFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair>&,
|
||||
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float, int) override {
|
||||
if (feat_index_.size() == 0) {
|
||||
feat_index_.resize(model.learner_model_param->num_feature);
|
||||
@@ -293,9 +287,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
|
||||
std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
|
||||
}
|
||||
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int iteration, const gbm::GBLinearModel &model, int,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
return feat_index_[iteration % model.learner_model_param->num_feature];
|
||||
}
|
||||
|
||||
@@ -310,9 +303,8 @@ class ShuffleFeatureSelector : public FeatureSelector {
|
||||
class RandomFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
int NextFeature(int, const gbm::GBLinearModel &model,
|
||||
int, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
return common::GlobalRandom()() % model.learner_model_param->num_feature;
|
||||
}
|
||||
};
|
||||
@@ -329,8 +321,7 @@ class RandomFeatureSelector : public FeatureSelector {
|
||||
class GreedyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &,
|
||||
void Setup(Context const *, const gbm::GBLinearModel &model, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float, int param) override {
|
||||
top_k_ = static_cast<bst_uint>(param);
|
||||
const bst_uint ngroup = model.learner_model_param->num_output_group;
|
||||
@@ -344,7 +335,7 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
}
|
||||
}
|
||||
|
||||
int NextFeature(int, const gbm::GBLinearModel &model,
|
||||
int NextFeature(Context const* ctx, int, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
// k-th selected feature for a group
|
||||
@@ -356,9 +347,9 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
const bst_omp_uint nfeat = model.learner_model_param->num_feature;
|
||||
// Calculate univariate gradient sums
|
||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) {
|
||||
common::ParallelFor(nfeat, ctx->Threads(), [&](bst_omp_uint i) {
|
||||
const auto col = page[i];
|
||||
const bst_uint ndata = col.size();
|
||||
auto &sums = gpair_sums_[group_idx * nfeat + i];
|
||||
@@ -406,9 +397,10 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
class ThriftyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
|
||||
void Setup(Context const *ctx, const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &gpair, DMatrix *p_fmat, float alpha, float lambda,
|
||||
int param) override {
|
||||
top_k_ = static_cast<bst_uint>(param);
|
||||
if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
|
||||
const bst_uint ngroup = model.learner_model_param->num_output_group;
|
||||
@@ -422,10 +414,10 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
}
|
||||
// Calculate univariate gradient sums
|
||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx)) {
|
||||
auto page = batch.GetView();
|
||||
// column-parallel is usually fastaer than row-parallel
|
||||
common::ParallelFor(nfeat, this->n_threads_, [&](auto i) {
|
||||
common::ParallelFor(nfeat, ctx->Threads(), [&](auto i) {
|
||||
const auto col = page[i];
|
||||
const bst_uint ndata = col.size();
|
||||
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
|
||||
@@ -462,9 +454,8 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
}
|
||||
}
|
||||
|
||||
int NextFeature(int, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
int NextFeature(Context const *, int, const gbm::GBLinearModel &model, int group_idx,
|
||||
const std::vector<GradientPair> &, DMatrix *, float, float) override {
|
||||
// k-th selected feature for a group
|
||||
auto k = counter_[group_idx]++;
|
||||
// stop after either reaching top-N or going through all the features in a group
|
||||
@@ -482,18 +473,18 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
std::vector<std::pair<double, double>> gpair_sums_;
|
||||
};
|
||||
|
||||
inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) {
|
||||
inline FeatureSelector *FeatureSelector::Create(int choice) {
|
||||
switch (choice) {
|
||||
case kCyclic:
|
||||
return new CyclicFeatureSelector(n_threads);
|
||||
return new CyclicFeatureSelector;
|
||||
case kShuffle:
|
||||
return new ShuffleFeatureSelector(n_threads);
|
||||
return new ShuffleFeatureSelector;
|
||||
case kThrifty:
|
||||
return new ThriftyFeatureSelector(n_threads);
|
||||
return new ThriftyFeatureSelector;
|
||||
case kGreedy:
|
||||
return new GreedyFeatureSelector(n_threads);
|
||||
return new GreedyFeatureSelector;
|
||||
case kRandom:
|
||||
return new RandomFeatureSelector(n_threads);
|
||||
return new RandomFeatureSelector;
|
||||
default:
|
||||
LOG(FATAL) << "unknown coordinate selector: " << choice;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user