Refactor linear modelling and add new coordinate descent updater (#3103)
* Refactor linear modelling and add new coordinate descent updater * Allow unsorted column iterator * Add prediction cacheing to gblinear
This commit is contained in:
321
src/linear/coordinate_common.h
Normal file
321
src/linear/coordinate_common.h
Normal file
@@ -0,0 +1,321 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
* \author Rory Mitchell
|
||||
*/
|
||||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "../common/random.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace linear {
|
||||
|
||||
/**
|
||||
* \brief Calculate change in weight for a given feature. Applies l1/l2 penalty normalised by the
|
||||
* number of training instances.
|
||||
*
|
||||
* \param sum_grad The sum gradient.
|
||||
* \param sum_hess The sum hess.
|
||||
* \param w The weight.
|
||||
* \param reg_lambda Unnormalised L2 penalty.
|
||||
* \param reg_alpha Unnormalised L1 penalty.
|
||||
* \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
|
||||
*
|
||||
* \return The weight update.
|
||||
*/
|
||||
|
||||
inline double CoordinateDelta(double sum_grad, double sum_hess, double w,
|
||||
double reg_lambda, double reg_alpha,
|
||||
double sum_instance_weight) {
|
||||
reg_alpha *= sum_instance_weight;
|
||||
reg_lambda *= sum_instance_weight;
|
||||
if (sum_hess < 1e-5f) return 0.0f;
|
||||
double tmp = w - (sum_grad + reg_lambda * w) / (sum_hess + reg_lambda);
|
||||
if (tmp >= 0) {
|
||||
return std::max(
|
||||
-(sum_grad + reg_lambda * w + reg_alpha) / (sum_hess + reg_lambda), -w);
|
||||
} else {
|
||||
return std::min(
|
||||
-(sum_grad + reg_lambda * w - reg_alpha) / (sum_hess + reg_lambda), -w);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Calculate update to bias.
|
||||
*
|
||||
* \param sum_grad The sum gradient.
|
||||
* \param sum_hess The sum hess.
|
||||
*
|
||||
* \return The weight update.
|
||||
*/
|
||||
|
||||
inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
|
||||
return -sum_grad / sum_hess;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the gradient with respect to a single feature.
|
||||
*
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param num_group Number of groups.
|
||||
* \param fidx The target feature.
|
||||
* \param gpair Gradients.
|
||||
* \param p_fmat The feature matrix.
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for a given feature.
|
||||
*/
|
||||
|
||||
inline std::pair<double, double> GetGradient(
|
||||
int group_idx, int num_group, int fidx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
ColBatch::Inst col = batch[fidx];
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
const bst_float v = col[j].fvalue;
|
||||
auto &p = gpair[col[j].index * num_group + group_idx];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
sum_grad += p.GetGrad() * v;
|
||||
sum_hess += p.GetHess() * v * v;
|
||||
}
|
||||
}
|
||||
return std::make_pair(sum_grad, sum_hess);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the gradient with respect to a single feature. Multithreaded.
|
||||
*
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param num_group Number of groups.
|
||||
* \param fidx The target feature.
|
||||
* \param gpair Gradients.
|
||||
* \param p_fmat The feature matrix.
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for a given feature.
|
||||
*/
|
||||
|
||||
inline std::pair<double, double> GetGradientParallel(
|
||||
int group_idx, int num_group, int fidx,
|
||||
|
||||
const std::vector<bst_gpair> &gpair, DMatrix *p_fmat) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
ColBatch::Inst col = batch[fidx];
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
const bst_float v = col[j].fvalue;
|
||||
auto &p = gpair[col[j].index * num_group + group_idx];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
sum_grad += p.GetGrad() * v;
|
||||
sum_hess += p.GetHess() * v * v;
|
||||
}
|
||||
}
|
||||
return std::make_pair(sum_grad, sum_hess);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the gradient with respect to the bias. Multithreaded.
|
||||
*
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param num_group Number of groups.
|
||||
* \param gpair Gradients.
|
||||
* \param p_fmat The feature matrix.
|
||||
*
|
||||
* \return The gradient and diagonal Hessian entry for the bias.
|
||||
*/
|
||||
|
||||
inline std::pair<double, double> GetBiasGradientParallel(
|
||||
int group_idx, int num_group, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
const RowSet &rowset = p_fmat->buffered_rowset();
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
auto &p = gpair[rowset[i] * num_group + group_idx];
|
||||
if (p.GetHess() >= 0.0f) {
|
||||
sum_grad += p.GetGrad();
|
||||
sum_hess += p.GetHess();
|
||||
}
|
||||
}
|
||||
return std::make_pair(sum_grad, sum_hess);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Updates the gradient vector with respect to a change in weight.
|
||||
*
|
||||
* \param fidx The feature index.
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param num_group Number of groups.
|
||||
* \param dw The change in weight.
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
|
||||
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
float dw, std::vector<bst_gpair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dw == 0.0f) return;
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
ColBatch::Inst col = batch[fidx];
|
||||
// update grad value
|
||||
const bst_omp_uint num_row = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < num_row; ++j) {
|
||||
bst_gpair &p = (*in_gpair)[col[j].index * num_group + group_idx];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Updates the gradient vector based on a change in the bias.
|
||||
*
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param num_group Number of groups.
|
||||
* \param dbias The change in bias.
|
||||
* \param in_gpair The gradient vector to be updated.
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
|
||||
inline void UpdateBiasResidualParallel(int group_idx, int num_group,
|
||||
float dbias,
|
||||
std::vector<bst_gpair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
if (dbias == 0.0f) return;
|
||||
const RowSet &rowset = p_fmat->buffered_rowset();
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(p_fmat->info().num_row);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
bst_gpair &g = (*in_gpair)[rowset[i] * num_group + group_idx];
|
||||
if (g.GetHess() < 0.0f) continue;
|
||||
g += bst_gpair(g.GetHess() * dbias, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \class FeatureSelector
|
||||
*
|
||||
* \brief Abstract class for stateful feature selection in coordinate descent
|
||||
* algorithms.
|
||||
*/
|
||||
|
||||
class FeatureSelector {
|
||||
public:
|
||||
static FeatureSelector *Create(std::string name);
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~FeatureSelector() {}
|
||||
|
||||
/**
|
||||
* \brief Select next coordinate to update.
|
||||
*
|
||||
* \param iteration The iteration.
|
||||
* \param model The model.
|
||||
* \param group_idx Zero-based index of the group.
|
||||
* \param gpair The gpair.
|
||||
* \param p_fmat The feature matrix.
|
||||
* \param alpha Regularisation alpha.
|
||||
* \param lambda Regularisation lambda.
|
||||
* \param sum_instance_weight The sum instance weight.
|
||||
*
|
||||
* \return The index of the selected feature. -1 indicates the bias term.
|
||||
*/
|
||||
|
||||
virtual int SelectNextFeature(int iteration,
|
||||
const gbm::GBLinearModel &model,
|
||||
int group_idx,
|
||||
const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* \class CyclicFeatureSelector
|
||||
*
|
||||
* \brief Deterministic selection by cycling through coordinates one at a time.
|
||||
*/
|
||||
|
||||
class CyclicFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
int SelectNextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) override {
|
||||
return iteration % model.param.num_feature;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \class RandomFeatureSelector
|
||||
*
|
||||
* \brief A random coordinate selector.
|
||||
*/
|
||||
|
||||
class RandomFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
int SelectNextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) override {
|
||||
return common::GlobalRandom()() % model.param.num_feature;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \class GreedyFeatureSelector
|
||||
*
|
||||
* \brief Select coordinate with the greatest gradient magnitude.
|
||||
*/
|
||||
|
||||
class GreedyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
int SelectNextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int group_idx, const std::vector<bst_gpair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda,
|
||||
double sum_instance_weight) override {
|
||||
// Find best
|
||||
int best_fidx = 0;
|
||||
double best_weight_update = 0.0f;
|
||||
|
||||
for (auto fidx = 0U; fidx < model.param.num_feature; fidx++) {
|
||||
const float w = model[fidx][group_idx];
|
||||
auto gradient = GetGradientParallel(
|
||||
group_idx, model.param.num_output_group, fidx, gpair, p_fmat);
|
||||
float dw = static_cast<float>(
|
||||
CoordinateDelta(gradient.first, gradient.second, w, lambda, alpha,
|
||||
sum_instance_weight));
|
||||
if (std::abs(dw) > std::abs(best_weight_update)) {
|
||||
best_weight_update = dw;
|
||||
best_fidx = fidx;
|
||||
}
|
||||
}
|
||||
return best_fidx;
|
||||
}
|
||||
};
|
||||
|
||||
inline FeatureSelector *FeatureSelector::Create(std::string name) {
|
||||
if (name == "cyclic") {
|
||||
return new CyclicFeatureSelector();
|
||||
} else if (name == "random") {
|
||||
return new RandomFeatureSelector();
|
||||
} else if (name == "greedy") {
|
||||
return new GreedyFeatureSelector();
|
||||
} else {
|
||||
LOG(FATAL) << name << ": unknown coordinate selector";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace linear
|
||||
} // namespace xgboost
|
||||
29
src/linear/linear_updater.cc
Normal file
29
src/linear/linear_updater.cc
Normal file
@@ -0,0 +1,29 @@
|
||||
/*!
|
||||
* Copyright 2018
|
||||
*/
|
||||
#include <xgboost/linear_updater.h>
|
||||
#include <dmlc/registry.h>
|
||||
|
||||
namespace dmlc {
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg);
|
||||
} // namespace dmlc
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
LinearUpdater* LinearUpdater::Create(const std::string& name) {
|
||||
auto *e = ::dmlc::Registry< ::xgboost::LinearUpdaterReg>::Get()->Find(name);
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown linear updater " << name;
|
||||
}
|
||||
return (e->body)();
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
namespace xgboost {
|
||||
namespace linear {
|
||||
// List of files that will be force linked in static links.
|
||||
DMLC_REGISTRY_LINK_TAG(updater_shotgun);
|
||||
DMLC_REGISTRY_LINK_TAG(updater_coordinate);
|
||||
} // namespace linear
|
||||
} // namespace xgboost
|
||||
124
src/linear/updater_coordinate.cc
Normal file
124
src/linear/updater_coordinate.cc
Normal file
@@ -0,0 +1,124 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
* \author Rory Mitchell
|
||||
*/
|
||||
|
||||
#include <xgboost/linear_updater.h>
|
||||
#include "../common/timer.h"
|
||||
#include "coordinate_common.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace linear {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_coordinate);
|
||||
|
||||
// training parameter
|
||||
struct CoordinateTrainParam : public dmlc::Parameter<CoordinateTrainParam> {
|
||||
/*! \brief learning_rate */
|
||||
float learning_rate;
|
||||
/*! \brief regularization weight for L2 norm */
|
||||
float reg_lambda;
|
||||
/*! \brief regularization weight for L1 norm */
|
||||
float reg_alpha;
|
||||
std::string feature_selector;
|
||||
float maximum_weight;
|
||||
int debug_verbose;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(CoordinateTrainParam) {
|
||||
DMLC_DECLARE_FIELD(learning_rate)
|
||||
.set_lower_bound(0.0f)
|
||||
.set_default(1.0f)
|
||||
.describe("Learning rate of each update.");
|
||||
DMLC_DECLARE_FIELD(reg_lambda)
|
||||
.set_lower_bound(0.0f)
|
||||
.set_default(0.0f)
|
||||
.describe("L2 regularization on weights.");
|
||||
DMLC_DECLARE_FIELD(reg_alpha)
|
||||
.set_lower_bound(0.0f)
|
||||
.set_default(0.0f)
|
||||
.describe("L1 regularization on weights.");
|
||||
DMLC_DECLARE_FIELD(feature_selector)
|
||||
.set_default("cyclic")
|
||||
.describe(
|
||||
"Feature selection algorithm, one of cyclic/random/greedy");
|
||||
DMLC_DECLARE_FIELD(debug_verbose)
|
||||
.set_lower_bound(0)
|
||||
.set_default(0)
|
||||
.describe("flag to print out detailed breakdown of runtime");
|
||||
// alias of parameters
|
||||
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
||||
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \class CoordinateUpdater
|
||||
*
|
||||
* \brief Coordinate descent algorithm that updates one feature per iteration
|
||||
*/
|
||||
|
||||
class CoordinateUpdater : public LinearUpdater {
|
||||
public:
|
||||
// set training parameter
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string> > &args) override {
|
||||
param.InitAllowUnknown(args);
|
||||
selector.reset(FeatureSelector::Create(param.feature_selector));
|
||||
monitor.Init("CoordinateUpdater", param.debug_verbose);
|
||||
}
|
||||
void Update(std::vector<bst_gpair> *in_gpair, DMatrix *p_fmat,
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
// Calculate bias
|
||||
for (int group_idx = 0; group_idx < model->param.num_output_group;
|
||||
++group_idx) {
|
||||
auto grad = GetBiasGradientParallel(
|
||||
group_idx, model->param.num_output_group, *in_gpair, p_fmat);
|
||||
auto dbias = static_cast<float>(
|
||||
param.learning_rate * CoordinateDeltaBias(grad.first, grad.second));
|
||||
model->bias()[group_idx] += dbias;
|
||||
UpdateBiasResidualParallel(group_idx, model->param.num_output_group,
|
||||
dbias, in_gpair, p_fmat);
|
||||
}
|
||||
for (int group_idx = 0; group_idx < model->param.num_output_group;
|
||||
++group_idx) {
|
||||
for (auto i = 0U; i < model->param.num_feature; i++) {
|
||||
int fidx = selector->SelectNextFeature(
|
||||
i, *model, group_idx, *in_gpair, p_fmat, param.reg_alpha,
|
||||
param.reg_lambda, sum_instance_weight);
|
||||
this->UpdateFeature(fidx, group_idx, in_gpair, p_fmat, model,
|
||||
sum_instance_weight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateFeature(int fidx, int group_idx, std::vector<bst_gpair> *in_gpair,
|
||||
DMatrix *p_fmat, gbm::GBLinearModel *model,
|
||||
double sum_instance_weight) {
|
||||
bst_float &w = (*model)[fidx][group_idx];
|
||||
monitor.Start("GetGradientParallel");
|
||||
auto gradient = GetGradientParallel(
|
||||
group_idx, model->param.num_output_group, fidx, *in_gpair, p_fmat);
|
||||
monitor.Stop("GetGradientParallel");
|
||||
auto dw = static_cast<float>(
|
||||
param.learning_rate *
|
||||
CoordinateDelta(gradient.first, gradient.second, w, param.reg_lambda,
|
||||
param.reg_alpha, sum_instance_weight));
|
||||
w += dw;
|
||||
monitor.Start("UpdateResidualParallel");
|
||||
UpdateResidualParallel(fidx, group_idx, model->param.num_output_group, dw,
|
||||
in_gpair, p_fmat);
|
||||
monitor.Stop("UpdateResidualParallel");
|
||||
}
|
||||
|
||||
// training parameter
|
||||
CoordinateTrainParam param;
|
||||
std::unique_ptr<FeatureSelector> selector;
|
||||
common::Monitor monitor;
|
||||
};
|
||||
|
||||
DMLC_REGISTER_PARAMETER(CoordinateTrainParam);
|
||||
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
|
||||
.describe("Update linear model according to coordinate descent algorithm.")
|
||||
.set_body([]() { return new CoordinateUpdater(); });
|
||||
} // namespace linear
|
||||
} // namespace xgboost
|
||||
127
src/linear/updater_shotgun.cc
Normal file
127
src/linear/updater_shotgun.cc
Normal file
@@ -0,0 +1,127 @@
|
||||
/*!
|
||||
* Copyright 2018 by Contributors
|
||||
* \author Tianqi Chen, Rory Mitchell
|
||||
*/
|
||||
|
||||
#include <xgboost/linear_updater.h>
|
||||
#include "coordinate_common.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace linear {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_shotgun);
|
||||
|
||||
// training parameter
|
||||
struct ShotgunTrainParam : public dmlc::Parameter<ShotgunTrainParam> {
|
||||
/*! \brief learning_rate */
|
||||
float learning_rate;
|
||||
/*! \brief regularization weight for L2 norm */
|
||||
float reg_lambda;
|
||||
/*! \brief regularization weight for L1 norm */
|
||||
float reg_alpha;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(ShotgunTrainParam) {
|
||||
DMLC_DECLARE_FIELD(learning_rate)
|
||||
.set_lower_bound(0.0f)
|
||||
.set_default(1.0f)
|
||||
.describe("Learning rate of each update.");
|
||||
DMLC_DECLARE_FIELD(reg_lambda)
|
||||
.set_lower_bound(0.0f)
|
||||
.set_default(0.0f)
|
||||
.describe("L2 regularization on weights.");
|
||||
DMLC_DECLARE_FIELD(reg_alpha)
|
||||
.set_lower_bound(0.0f)
|
||||
.set_default(0.0f)
|
||||
.describe("L1 regularization on weights.");
|
||||
// alias of parameters
|
||||
DMLC_DECLARE_ALIAS(learning_rate, eta);
|
||||
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
||||
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
class ShotgunUpdater : public LinearUpdater {
|
||||
public:
|
||||
// set training parameter
|
||||
void Init(
|
||||
const std::vector<std::pair<std::string, std::string> > &args) override {
|
||||
param.InitAllowUnknown(args);
|
||||
}
|
||||
void Update(std::vector<bst_gpair> *in_gpair, DMatrix *p_fmat,
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
std::vector<bst_gpair> &gpair = *in_gpair;
|
||||
const int ngroup = model->param.num_output_group;
|
||||
const RowSet &rowset = p_fmat->buffered_rowset();
|
||||
// for all the output group
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
|
||||
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
||||
if (p.GetHess() >= 0.0f) {
|
||||
sum_grad += p.GetGrad();
|
||||
sum_hess += p.GetHess();
|
||||
}
|
||||
}
|
||||
// remove bias effect
|
||||
bst_float dw = static_cast<bst_float>(
|
||||
param.learning_rate * CoordinateDeltaBias(sum_grad, sum_hess));
|
||||
model->bias()[gid] += dw;
|
||||
// update grad value
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
bst_gpair &p = gpair[rowset[i] * ngroup + gid];
|
||||
if (p.GetHess() >= 0.0f) {
|
||||
p += bst_gpair(p.GetHess() * dw, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
// number of features
|
||||
const ColBatch &batch = iter->Value();
|
||||
const bst_omp_uint nfeat = static_cast<bst_omp_uint>(batch.size);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
ColBatch::Inst col = batch[i];
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
for (bst_uint j = 0; j < col.length; ++j) {
|
||||
const bst_float v = col[j].fvalue;
|
||||
bst_gpair &p = gpair[col[j].index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
sum_grad += p.GetGrad() * v;
|
||||
sum_hess += p.GetHess() * v * v;
|
||||
}
|
||||
bst_float &w = (*model)[fid][gid];
|
||||
bst_float dw = static_cast<bst_float>(
|
||||
param.learning_rate *
|
||||
CoordinateDelta(sum_grad, sum_hess, w, param.reg_lambda,
|
||||
param.reg_alpha, sum_instance_weight));
|
||||
w += dw;
|
||||
// update grad value
|
||||
for (bst_uint j = 0; j < col.length; ++j) {
|
||||
bst_gpair &p = gpair[col[j].index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// training parameter
|
||||
ShotgunTrainParam param;
|
||||
};
|
||||
|
||||
DMLC_REGISTER_PARAMETER(ShotgunTrainParam);
|
||||
|
||||
XGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, "shotgun")
|
||||
.describe(
|
||||
"Update linear model according to shotgun coordinate descent "
|
||||
"algorithm.")
|
||||
.set_body([]() { return new ShotgunUpdater(); });
|
||||
} // namespace linear
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user