Additional improvements for gblinear (#3134)
* fix rebase conflict * [core] additional gblinear improvements * [R] callback for gblinear coefficients history * force eta=1 for gblinear python tests * add top_k to GreedyFeatureSelector * set eta=1 in shotgun test * [core] fix SparsePage processing in gblinear; col-wise multithreading in greedy updater * set sorted flag within TryInitColData * gblinear tests: use scale, add external memory test * fix multiclass for greedy updater * fix whitespace * fix typo
This commit is contained in:
committed by
GitHub
parent
a1b48afa41
commit
706be4e5d4
@@ -7,6 +7,7 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include "../common/random.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -19,26 +20,21 @@ namespace linear {
|
||||
* \param sum_grad The sum gradient.
|
||||
* \param sum_hess The sum hess.
|
||||
* \param w The weight.
|
||||
* \param reg_lambda Unnormalised L2 penalty.
|
||||
* \param reg_alpha Unnormalised L1 penalty.
|
||||
* \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
|
||||
* \param reg_lambda Unnormalised L2 penalty.
|
||||
*
|
||||
* \return The weight update.
|
||||
*/
|
||||
|
||||
inline double CoordinateDelta(double sum_grad, double sum_hess, double w,
|
||||
double reg_lambda, double reg_alpha,
|
||||
double sum_instance_weight) {
|
||||
reg_alpha *= sum_instance_weight;
|
||||
reg_lambda *= sum_instance_weight;
|
||||
double reg_alpha, double reg_lambda) {
|
||||
if (sum_hess < 1e-5f) return 0.0f;
|
||||
double tmp = w - (sum_grad + reg_lambda * w) / (sum_hess + reg_lambda);
|
||||
const double sum_grad_l2 = sum_grad + reg_lambda * w;
|
||||
const double sum_hess_l2 = sum_hess + reg_lambda;
|
||||
const double tmp = w - sum_grad_l2 / sum_hess_l2;
|
||||
if (tmp >= 0) {
|
||||
return std::max(
|
||||
-(sum_grad + reg_lambda * w + reg_alpha) / (sum_hess + reg_lambda), -w);
|
||||
return std::max(-(sum_grad_l2 + reg_alpha) / sum_hess_l2, -w);
|
||||
} else {
|
||||
return std::min(
|
||||
-(sum_grad + reg_lambda * w - reg_alpha) / (sum_hess + reg_lambda), -w);
|
||||
return std::min(-(sum_grad_l2 - reg_alpha) / sum_hess_l2, -w);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +46,6 @@ inline double CoordinateDelta(double sum_grad, double sum_hess, double w,
|
||||
*
|
||||
* \return The weight update.
|
||||
*/
|
||||
|
||||
inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
|
||||
return -sum_grad / sum_hess;
|
||||
}
|
||||
@@ -66,15 +61,14 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for a given feature.
|
||||
*/
|
||||
|
||||
inline std::pair<double, double> GetGradient(
|
||||
int group_idx, int num_group, int fidx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
ColBatch::Inst col = batch[fidx];
|
||||
ColBatch::Inst col = batch[0];
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
const bst_float v = col[j].fvalue;
|
||||
@@ -88,7 +82,7 @@ inline std::pair<double, double> GetGradient(
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the gradient with respect to a single feature. Multithreaded.
|
||||
* \brief Get the gradient with respect to a single feature. Row-wise multithreaded.
|
||||
*
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param num_group Number of groups.
|
||||
@@ -98,16 +92,14 @@ inline std::pair<double, double> GetGradient(
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for a given feature.
|
||||
*/
|
||||
|
||||
inline std::pair<double, double> GetGradientParallel(
|
||||
int group_idx, int num_group, int fidx,
|
||||
|
||||
const std::vector<bst_gpair> &gpair, DMatrix *p_fmat) {
|
||||
inline std::pair<double, double> GetGradientParallel(int group_idx, int num_group, int fidx,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
ColBatch::Inst col = batch[fidx];
|
||||
ColBatch::Inst col = batch[0];
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
@@ -122,7 +114,7 @@ inline std::pair<double, double> GetGradientParallel(
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the gradient with respect to the bias. Multithreaded.
|
||||
* \brief Get the gradient with respect to the bias. Row-wise multithreaded.
|
||||
*
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param num_group Number of groups.
|
||||
@@ -131,10 +123,9 @@ inline std::pair<double, double> GetGradientParallel(
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for the bias.
|
||||
*/
|
||||
|
||||
inline std::pair<double, double> GetBiasGradientParallel(
|
||||
int group_idx, int num_group, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
const RowSet &rowset = p_fmat->buffered_rowset();
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
@@ -159,15 +150,14 @@ inline std::pair<double, double> GetBiasGradientParallel(
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
|
||||
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
float dw, std::vector<bst_gpair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dw == 0.0f) return;
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
ColBatch::Inst col = batch[fidx];
|
||||
ColBatch::Inst col = batch[0];
|
||||
// update grad value
|
||||
const bst_omp_uint num_row = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static)
|
||||
@@ -188,9 +178,7 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
|
||||
inline void UpdateBiasResidualParallel(int group_idx, int num_group,
|
||||
float dbias,
|
||||
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
|
||||
std::vector<bst_gpair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dbias == 0.0f) return;
|
||||
@@ -205,114 +193,292 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group,
|
||||
}
|
||||
|
||||
/**
|
||||
* \class FeatureSelector
|
||||
*
|
||||
* \brief Abstract class for stateful feature selection in coordinate descent
|
||||
* algorithms.
|
||||
* \brief Abstract class for stateful feature selection or ordering
|
||||
* in coordinate descent algorithms.
|
||||
*/
|
||||
|
||||
class FeatureSelector {
|
||||
public:
|
||||
static FeatureSelector *Create(std::string name);
|
||||
/*! \brief factory method */
|
||||
static FeatureSelector *Create(int choice);
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~FeatureSelector() {}
|
||||
|
||||
/**
|
||||
* \brief Setting up the selector state prior to looping through features.
|
||||
*
|
||||
* \param model The model.
|
||||
* \param gpair The gpair.
|
||||
* \param p_fmat The feature matrix.
|
||||
* \param alpha Regularisation alpha.
|
||||
* \param lambda Regularisation lambda.
|
||||
* \param param A parameter with algorithm-dependent use.
|
||||
*/
|
||||
virtual void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat,
|
||||
float alpha, float lambda, int param) {}
|
||||
/**
|
||||
* \brief Select next coordinate to update.
|
||||
*
|
||||
* \param iteration The iteration.
|
||||
* \param model The model.
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param gpair The gpair.
|
||||
* \param p_fmat The feature matrix.
|
||||
* \param alpha Regularisation alpha.
|
||||
* \param lambda Regularisation lambda.
|
||||
* \param sum_instance_weight The sum instance weight.
|
||||
* \param iteration The iteration in a loop through features
|
||||
* \param model The model.
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param gpair The gpair.
|
||||
* \param p_fmat The feature matrix.
|
||||
* \param alpha Regularisation alpha.
|
||||
* \param lambda Regularisation lambda.
|
||||
*
|
||||
* \return The index of the selected feature. -1 indicates the bias term.
|
||||
* \return The index of the selected feature. -1 indicates none selected.
|
||||
*/
|
||||
|
||||
virtual int SelectNextFeature(int iteration,
|
||||
const gbm::GBLinearModel &model,
|
||||
int group_idx,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) = 0;
|
||||
virtual int NextFeature(int iteration,
|
||||
const gbm::GBLinearModel &model,
|
||||
int group_idx,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* \class CyclicFeatureSelector
|
||||
*
|
||||
* \brief Deterministic selection by cycling through coordinates one at a time.
|
||||
* \brief Deterministic selection by cycling through features one at a time.
|
||||
*/
|
||||
|
||||
class CyclicFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
int SelectNextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) override {
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
return iteration % model.param.num_feature;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \class RandomFeatureSelector
|
||||
*
|
||||
* \brief A random coordinate selector.
|
||||
* \brief Similar to Cyclyc but with random feature shuffling prior to each update.
|
||||
* \note Its randomness is controllable by setting a random seed.
|
||||
*/
|
||||
class ShuffleFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
if (feat_index.size() == 0) {
|
||||
feat_index.resize(model.param.num_feature);
|
||||
std::iota(feat_index.begin(), feat_index.end(), 0);
|
||||
}
|
||||
std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom());
|
||||
}
|
||||
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
return feat_index[iteration % model.param.num_feature];
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<bst_uint> feat_index;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief A random (with replacement) coordinate selector.
|
||||
* \note Its randomness is controllable by setting a random seed.
|
||||
*/
|
||||
class RandomFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
int SelectNextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) override {
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
return common::GlobalRandom()() % model.param.num_feature;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \class GreedyFeatureSelector
|
||||
*
|
||||
* \brief Select coordinate with the greatest gradient magnitude.
|
||||
* \note It has O(num_feature^2) complexity. It is fully deterministic.
|
||||
*
|
||||
* \note It allows restricting the selection to top_k features per group with
|
||||
* the largest magnitude of univariate weight change, by passing the top_k value
|
||||
* through the `param` argument of Setup(). That would reduce the complexity to
|
||||
* O(num_feature*top_k).
|
||||
*/
|
||||
|
||||
class GreedyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
int SelectNextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) override {
|
||||
// Find best
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
top_k = static_cast<bst_uint>(param);
|
||||
const bst_uint ngroup = model.param.num_output_group;
|
||||
if (param <= 0) top_k = std::numeric_limits<bst_uint>::max();
|
||||
if (counter.size() == 0) {
|
||||
counter.resize(ngroup);
|
||||
gpair_sums.resize(model.param.num_feature * ngroup);
|
||||
}
|
||||
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
|
||||
counter[gid] = 0u;
|
||||
}
|
||||
}
|
||||
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
// k-th selected feature for a group
|
||||
auto k = counter[group_idx]++;
|
||||
// stop after either reaching top-K or going through all the features in a group
|
||||
if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1;
|
||||
|
||||
const int ngroup = model.param.num_output_group;
|
||||
const bst_omp_uint nfeat = model.param.num_feature;
|
||||
// Calculate univariate gradient sums
|
||||
std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.));
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||
const ColBatch::Inst col = batch[i];
|
||||
const bst_uint ndata = col.length;
|
||||
auto &sums = gpair_sums[group_idx * nfeat + i];
|
||||
for (bst_uint j = 0u; j < ndata; ++j) {
|
||||
const bst_float v = col[j].fvalue;
|
||||
auto &p = gpair[col[j].index * ngroup + group_idx];
|
||||
if (p.GetHess() < 0.f) continue;
|
||||
sums.first += p.GetGrad() * v;
|
||||
sums.second += p.GetHess() * v * v;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Find a feature with the largest magnitude of weight change
|
||||
int best_fidx = 0;
|
||||
double best_weight_update = 0.0f;
|
||||
|
||||
for (auto fidx = 0U; fidx < model.param.num_feature; fidx++) {
|
||||
const float w = model[fidx][group_idx];
|
||||
auto gradient = GetGradientParallel(
|
||||
group_idx, model.param.num_output_group, fidx, gpair, p_fmat);
|
||||
float dw = static_cast<float>(
|
||||
CoordinateDelta(gradient.first, gradient.second, w, lambda, alpha,
|
||||
sum_instance_weight));
|
||||
if (std::abs(dw) > std::abs(best_weight_update)) {
|
||||
for (bst_omp_uint fidx = 0; fidx < nfeat; ++fidx) {
|
||||
auto &s = gpair_sums[group_idx * nfeat + fidx];
|
||||
float dw = std::abs(static_cast<bst_float>(
|
||||
CoordinateDelta(s.first, s.second, model[fidx][group_idx], alpha, lambda)));
|
||||
if (dw > best_weight_update) {
|
||||
best_weight_update = dw;
|
||||
best_fidx = fidx;
|
||||
}
|
||||
}
|
||||
return best_fidx;
|
||||
}
|
||||
|
||||
protected:
|
||||
bst_uint top_k;
|
||||
std::vector<bst_uint> counter;
|
||||
std::vector<std::pair<double, double>> gpair_sums;
|
||||
};
|
||||
|
||||
inline FeatureSelector *FeatureSelector::Create(std::string name) {
|
||||
if (name == "cyclic") {
|
||||
return new CyclicFeatureSelector();
|
||||
} else if (name == "random") {
|
||||
return new RandomFeatureSelector();
|
||||
} else if (name == "greedy") {
|
||||
return new GreedyFeatureSelector();
|
||||
} else {
|
||||
LOG(FATAL) << name << ": unknown coordinate selector";
|
||||
/**
|
||||
* \brief Thrifty, approximately-greedy feature selector.
|
||||
*
|
||||
* \note Prior to cyclic updates, reorders features in descending magnitude of
|
||||
* their univariate weight changes. This operation is multithreaded and is a
|
||||
* linear complexity approximation of the quadratic greedy selection.
|
||||
*
|
||||
* \note It allows restricting the selection to top_k features per group with
|
||||
* the largest magnitude of univariate weight change, by passing the top_k value
|
||||
* through the `param` argument of Setup().
|
||||
*/
|
||||
class ThriftyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
top_k = static_cast<bst_uint>(param);
|
||||
if (param <= 0) top_k = std::numeric_limits<bst_uint>::max();
|
||||
const bst_uint ngroup = model.param.num_output_group;
|
||||
const bst_omp_uint nfeat = model.param.num_feature;
|
||||
|
||||
if (deltaw.size() == 0) {
|
||||
deltaw.resize(nfeat * ngroup);
|
||||
sorted_idx.resize(nfeat * ngroup);
|
||||
counter.resize(ngroup);
|
||||
gpair_sums.resize(nfeat * ngroup);
|
||||
}
|
||||
// Calculate univariate gradient sums
|
||||
std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.));
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
// column-parallel is usually faster than row-parallel
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||
const ColBatch::Inst col = batch[i];
|
||||
const bst_uint ndata = col.length;
|
||||
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
|
||||
auto &sums = gpair_sums[gid * nfeat + i];
|
||||
for (bst_uint j = 0u; j < ndata; ++j) {
|
||||
const bst_float v = col[j].fvalue;
|
||||
auto &p = gpair[col[j].index * ngroup + gid];
|
||||
if (p.GetHess() < 0.f) continue;
|
||||
sums.first += p.GetGrad() * v;
|
||||
sums.second += p.GetHess() * v * v;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// rank by descending weight magnitude within the groups
|
||||
std::fill(deltaw.begin(), deltaw.end(), 0.f);
|
||||
std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
|
||||
bst_float *pdeltaw = &deltaw[0];
|
||||
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
|
||||
// Calculate univariate weight changes
|
||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||
auto ii = gid * nfeat + i;
|
||||
auto &s = gpair_sums[ii];
|
||||
deltaw[ii] = static_cast<bst_float>(CoordinateDelta(
|
||||
s.first, s.second, model[i][gid], alpha, lambda));
|
||||
}
|
||||
// sort in descending order of deltaw abs values
|
||||
auto start = sorted_idx.begin() + gid * nfeat;
|
||||
std::sort(start, start + nfeat,
|
||||
[pdeltaw](size_t i, size_t j) {
|
||||
return std::abs(*(pdeltaw + i)) > std::abs(*(pdeltaw + j));
|
||||
});
|
||||
counter[gid] = 0u;
|
||||
}
|
||||
}
|
||||
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda) override {
|
||||
// k-th selected feature for a group
|
||||
auto k = counter[group_idx]++;
|
||||
// stop after either reaching top-N or going through all the features in a group
|
||||
if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1;
|
||||
// note that sorted_idx stores the "long" indices
|
||||
const size_t grp_offset = group_idx * model.param.num_feature;
|
||||
return static_cast<int>(sorted_idx[grp_offset + k] - grp_offset);
|
||||
}
|
||||
|
||||
protected:
|
||||
bst_uint top_k;
|
||||
std::vector<bst_float> deltaw;
|
||||
std::vector<size_t> sorted_idx;
|
||||
std::vector<bst_uint> counter;
|
||||
std::vector<std::pair<double, double>> gpair_sums;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief A set of available FeatureSelector's
|
||||
*/
|
||||
enum FeatureSelectorEnum {
|
||||
kCyclic = 0,
|
||||
kShuffle,
|
||||
kThrifty,
|
||||
kGreedy,
|
||||
kRandom
|
||||
};
|
||||
|
||||
inline FeatureSelector *FeatureSelector::Create(int choice) {
|
||||
switch (choice) {
|
||||
case kCyclic:
|
||||
return new CyclicFeatureSelector();
|
||||
case kShuffle:
|
||||
return new ShuffleFeatureSelector();
|
||||
case kThrifty:
|
||||
return new ThriftyFeatureSelector();
|
||||
case kGreedy:
|
||||
return new GreedyFeatureSelector();
|
||||
case kRandom:
|
||||
return new RandomFeatureSelector();
|
||||
default:
|
||||
LOG(FATAL) << "unknown coordinate selector: " << choice;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user