Merge duplicated linear updater parameters. (#4013)

* Merge duplicated linear updater parameters.

* Split up coordinate descent parameter.
This commit is contained in:
Jiaming Yuan 2018-12-22 13:21:49 +08:00 committed by GitHub
parent f75a21af25
commit 85939c6a6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 151 additions and 208 deletions

View File

@ -8,11 +8,24 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <limits> #include <limits>
#include "./param.h"
#include "../common/random.h" #include "../common/random.h"
namespace xgboost { namespace xgboost {
namespace linear { namespace linear {
struct CoordinateParam : public dmlc::Parameter<CoordinateParam> {
int top_k;
DMLC_DECLARE_PARAMETER(CoordinateParam) {
DMLC_DECLARE_FIELD(top_k)
.set_lower_bound(0)
.set_default(0)
.describe("The number of top features to select in 'thrifty' feature_selector. "
"The value of zero means using all the features.");
}
};
/** /**
* \brief Calculate change in weight for a given feature. Applies l1/l2 penalty normalised by the * \brief Calculate change in weight for a given feature. Applies l1/l2 penalty normalised by the
* number of training instances. * number of training instances.
@ -442,17 +455,6 @@ class ThriftyFeatureSelector : public FeatureSelector {
std::vector<std::pair<double, double>> gpair_sums_; std::vector<std::pair<double, double>> gpair_sums_;
}; };
/**
* \brief A set of available FeatureSelector's
*/
enum FeatureSelectorEnum {
kCyclic = 0,
kShuffle,
kThrifty,
kGreedy,
kRandom
};
inline FeatureSelector *FeatureSelector::Create(int choice) { inline FeatureSelector *FeatureSelector::Create(int choice) {
switch (choice) { switch (choice) {
case kCyclic: case kCyclic:

View File

@ -3,6 +3,7 @@
*/ */
#include <xgboost/linear_updater.h> #include <xgboost/linear_updater.h>
#include <dmlc/registry.h> #include <dmlc/registry.h>
#include "./param.h"
namespace dmlc { namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg); DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg);
@ -22,6 +23,8 @@ LinearUpdater* LinearUpdater::Create(const std::string& name) {
namespace xgboost { namespace xgboost {
namespace linear { namespace linear {
DMLC_REGISTER_PARAMETER(LinearTrainParam);
// List of files that will be force linked in static links. // List of files that will be force linked in static links.
DMLC_REGISTRY_LINK_TAG(updater_shotgun); DMLC_REGISTRY_LINK_TAG(updater_shotgun);
DMLC_REGISTRY_LINK_TAG(updater_coordinate); DMLC_REGISTRY_LINK_TAG(updater_coordinate);

77
src/linear/param.h Normal file
View File

@ -0,0 +1,77 @@
/*!
* Copyright 2018 by Contributors
* \file param.h
* \brief training parameters.
*/
#ifndef XGBOOST_LINEAR_PARAM_H_
#define XGBOOST_LINEAR_PARAM_H_
#include <dmlc/parameter.h>
namespace xgboost {
namespace linear {
/**
* \brief A set of available FeatureSelector's
*/
enum FeatureSelectorEnum {
kCyclic = 0,
kShuffle,
kThrifty,
kGreedy,
kRandom
};
struct LinearTrainParam : public dmlc::Parameter<LinearTrainParam> {
/*! \brief learning_rate */
float learning_rate;
/*! \brief regularization weight for L2 norm */
float reg_lambda;
/*! \brief regularization weight for L1 norm */
float reg_alpha;
int feature_selector;
int n_gpus;
int gpu_id;
// declare parameters
DMLC_DECLARE_PARAMETER(LinearTrainParam) {
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(0.5f)
.describe("Learning rate of each update.");
DMLC_DECLARE_FIELD(reg_lambda)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L2 regularization on weights.");
DMLC_DECLARE_FIELD(reg_alpha)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L1 regularization on weights.");
DMLC_DECLARE_FIELD(feature_selector)
.set_default(kCyclic)
.add_enum("cyclic", kCyclic)
.add_enum("shuffle", kShuffle)
.add_enum("thrifty", kThrifty)
.add_enum("greedy", kGreedy)
.add_enum("random", kRandom)
.describe("Feature selection or ordering method.");
DMLC_DECLARE_FIELD(n_gpus).set_default(1).describe(
"Number of devices to use.");
DMLC_DECLARE_FIELD(gpu_id).set_default(0).describe(
"Primary device ordinal.");
// alias of parameters
DMLC_DECLARE_ALIAS(learning_rate, eta);
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
}
/*! \brief Denormalizes the regularization penalties - to be called at each update */
void DenormalizePenalties(double sum_instance_weight) {
reg_lambda_denorm = reg_lambda * sum_instance_weight;
reg_alpha_denorm = reg_alpha * sum_instance_weight;
}
// denormalizated regularization penalties
float reg_lambda_denorm;
float reg_alpha_denorm;
};
} // namespace linear
} // namespace xgboost
#endif // XGBOOST_LINEAR_PARAM_H_

View File

@ -4,66 +4,17 @@
*/ */
#include <xgboost/linear_updater.h> #include <xgboost/linear_updater.h>
#include "./param.h"
#include "../common/timer.h" #include "../common/timer.h"
#include "coordinate_common.h" #include "coordinate_common.h"
namespace xgboost { namespace xgboost {
namespace linear { namespace linear {
DMLC_REGISTER_PARAMETER(CoordinateParam);
DMLC_REGISTRY_FILE_TAG(updater_coordinate); DMLC_REGISTRY_FILE_TAG(updater_coordinate);
// training parameter // training parameter
struct CoordinateTrainParam : public dmlc::Parameter<CoordinateTrainParam> {
/*! \brief learning_rate */
float learning_rate;
/*! \brief regularization weight for L2 norm */
float reg_lambda;
/*! \brief regularization weight for L1 norm */
float reg_alpha;
int feature_selector;
int top_k;
// declare parameters
DMLC_DECLARE_PARAMETER(CoordinateTrainParam) {
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(0.5f)
.describe("Learning rate of each update.");
DMLC_DECLARE_FIELD(reg_lambda)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L2 regularization on weights.");
DMLC_DECLARE_FIELD(reg_alpha)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L1 regularization on weights.");
DMLC_DECLARE_FIELD(feature_selector)
.set_default(kCyclic)
.add_enum("cyclic", kCyclic)
.add_enum("shuffle", kShuffle)
.add_enum("thrifty", kThrifty)
.add_enum("greedy", kGreedy)
.add_enum("random", kRandom)
.describe("Feature selection or ordering method.");
DMLC_DECLARE_FIELD(top_k)
.set_lower_bound(0)
.set_default(0)
.describe("The number of top features to select in 'thrifty' feature_selector. "
"The value of zero means using all the features.");
// alias of parameters
DMLC_DECLARE_ALIAS(learning_rate, eta);
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
}
/*! \brief Denormalizes the regularization penalties - to be called at each update */
void DenormalizePenalties(double sum_instance_weight) {
reg_lambda_denorm = reg_lambda * sum_instance_weight;
reg_alpha_denorm = reg_alpha * sum_instance_weight;
}
// denormalizated regularization penalties
float reg_lambda_denorm;
float reg_alpha_denorm;
};
/** /**
* \class CoordinateUpdater * \class CoordinateUpdater
* *
@ -75,33 +26,37 @@ class CoordinateUpdater : public LinearUpdater {
// set training parameter // set training parameter
void Init( void Init(
const std::vector<std::pair<std::string, std::string> > &args) override { const std::vector<std::pair<std::string, std::string> > &args) override {
param.InitAllowUnknown(args); const std::vector<std::pair<std::string, std::string> > rest {
selector.reset(FeatureSelector::Create(param.feature_selector)); tparam_.InitAllowUnknown(args)
};
cparam_.InitAllowUnknown(rest);
selector.reset(FeatureSelector::Create(tparam_.feature_selector));
monitor.Init("CoordinateUpdater"); monitor.Init("CoordinateUpdater");
} }
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat, void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override { gbm::GBLinearModel *model, double sum_instance_weight) override {
param.DenormalizePenalties(sum_instance_weight); tparam_.DenormalizePenalties(sum_instance_weight);
const int ngroup = model->param.num_output_group; const int ngroup = model->param.num_output_group;
// update bias // update bias
for (int group_idx = 0; group_idx < ngroup; ++group_idx) { for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
auto grad = GetBiasGradientParallel(group_idx, ngroup, auto grad = GetBiasGradientParallel(group_idx, ngroup,
in_gpair->ConstHostVector(), p_fmat); in_gpair->ConstHostVector(), p_fmat);
auto dbias = static_cast<float>(param.learning_rate * auto dbias = static_cast<float>(tparam_.learning_rate *
CoordinateDeltaBias(grad.first, grad.second)); CoordinateDeltaBias(grad.first, grad.second));
model->bias()[group_idx] += dbias; model->bias()[group_idx] += dbias;
UpdateBiasResidualParallel(group_idx, ngroup, UpdateBiasResidualParallel(group_idx, ngroup,
dbias, &in_gpair->HostVector(), p_fmat); dbias, &in_gpair->HostVector(), p_fmat);
} }
// prepare for updating the weights // prepare for updating the weights
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat, param.reg_alpha_denorm, selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
param.reg_lambda_denorm, param.top_k); tparam_.reg_alpha_denorm,
tparam_.reg_lambda_denorm, cparam_.top_k);
// update weights // update weights
for (int group_idx = 0; group_idx < ngroup; ++group_idx) { for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
for (unsigned i = 0U; i < model->param.num_feature; i++) { for (unsigned i = 0U; i < model->param.num_feature; i++) {
int fidx = selector->NextFeature int fidx = selector->NextFeature
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, (i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm); tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
if (fidx < 0) break; if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model); this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
} }
@ -116,20 +71,20 @@ class CoordinateUpdater : public LinearUpdater {
auto gradient = auto gradient =
GetGradientParallel(group_idx, ngroup, fidx, *in_gpair, p_fmat); GetGradientParallel(group_idx, ngroup, fidx, *in_gpair, p_fmat);
auto dw = static_cast<float>( auto dw = static_cast<float>(
param.learning_rate * tparam_.learning_rate *
CoordinateDelta(gradient.first, gradient.second, w, param.reg_alpha_denorm, CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm,
param.reg_lambda_denorm)); tparam_.reg_lambda_denorm));
w += dw; w += dw;
UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat); UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat);
} }
CoordinateParam cparam_;
// training parameter // training parameter
CoordinateTrainParam param; LinearTrainParam tparam_;
std::unique_ptr<FeatureSelector> selector; std::unique_ptr<FeatureSelector> selector;
common::Monitor monitor; common::Monitor monitor;
}; };
DMLC_REGISTER_PARAMETER(CoordinateTrainParam);
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent") XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
.describe("Update linear model according to coordinate descent algorithm.") .describe("Update linear model according to coordinate descent algorithm.")
.set_body([]() { return new CoordinateUpdater(); }); .set_body([]() { return new CoordinateUpdater(); });

View File

@ -9,6 +9,7 @@
#include "../common/common.h" #include "../common/common.h"
#include "../common/device_helpers.cuh" #include "../common/device_helpers.cuh"
#include "../common/timer.h" #include "../common/timer.h"
#include "./param.h"
#include "coordinate_common.h" #include "coordinate_common.h"
namespace xgboost { namespace xgboost {
@ -16,64 +17,6 @@ namespace linear {
DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate); DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
// training parameter
struct GPUCoordinateTrainParam
: public dmlc::Parameter<GPUCoordinateTrainParam> {
/*! \brief learning_rate */
float learning_rate;
/*! \brief regularization weight for L2 norm */
float reg_lambda;
/*! \brief regularization weight for L1 norm */
float reg_alpha;
int feature_selector;
int top_k;
int n_gpus;
int gpu_id;
// declare parameters
DMLC_DECLARE_PARAMETER(GPUCoordinateTrainParam) {
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(1.0f)
.describe("Learning rate of each update.");
DMLC_DECLARE_FIELD(reg_lambda)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L2 regularization on weights.");
DMLC_DECLARE_FIELD(reg_alpha)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L1 regularization on weights.");
DMLC_DECLARE_FIELD(feature_selector)
.set_default(kCyclic)
.add_enum("cyclic", kCyclic)
.add_enum("shuffle", kShuffle)
.add_enum("thrifty", kThrifty)
.add_enum("greedy", kGreedy)
.add_enum("random", kRandom)
.describe("Feature selection or ordering method.");
DMLC_DECLARE_FIELD(top_k).set_lower_bound(0).set_default(0).describe(
"The number of top features to select in 'thrifty' feature_selector. "
"The value of zero means using all the features.");
DMLC_DECLARE_FIELD(n_gpus).set_default(1).describe(
"Number of devices to use.");
DMLC_DECLARE_FIELD(gpu_id).set_default(0).describe(
"Primary device ordinal.");
// alias of parameters
DMLC_DECLARE_ALIAS(learning_rate, eta);
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
}
/*! \brief Denormalizes the regularization penalties - to be called at each
* update */
void DenormalizePenalties(double sum_instance_weight) {
reg_lambda_denorm = reg_lambda * sum_instance_weight;
reg_alpha_denorm = reg_alpha * sum_instance_weight;
}
// denormalizated regularization penalties
float reg_lambda_denorm;
float reg_alpha_denorm;
};
void RescaleIndices(size_t ridx_begin, dh::DVec<Entry> *data) { void RescaleIndices(size_t ridx_begin, dh::DVec<Entry> *data) {
auto d_data = data->Data(); auto d_data = data->Data();
dh::LaunchN(data->DeviceIdx(), data->Size(), dh::LaunchN(data->DeviceIdx(), data->Size(),
@ -93,7 +36,7 @@ class DeviceShard {
public: public:
DeviceShard(int device_id, const SparsePage &batch, DeviceShard(int device_id, const SparsePage &batch,
bst_uint row_begin, bst_uint row_end, bst_uint row_begin, bst_uint row_end,
const GPUCoordinateTrainParam &param, const LinearTrainParam &param,
const gbm::GBLinearModelParam &model_param) const gbm::GBLinearModelParam &model_param)
: device_id_(device_id), : device_id_(device_id),
ridx_begin_(row_begin), ridx_begin_(row_begin),
@ -199,8 +142,8 @@ class GPUCoordinateUpdater : public LinearUpdater {
// set training parameter // set training parameter
void Init( void Init(
const std::vector<std::pair<std::string, std::string>> &args) override { const std::vector<std::pair<std::string, std::string>> &args) override {
param.InitAllowUnknown(args); tparam_.InitAllowUnknown(args);
selector.reset(FeatureSelector::Create(param.feature_selector)); selector.reset(FeatureSelector::Create(tparam_.feature_selector));
monitor.Init("GPUCoordinateUpdater"); monitor.Init("GPUCoordinateUpdater");
} }
@ -208,7 +151,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
const gbm::GBLinearModelParam &model_param) { const gbm::GBLinearModelParam &model_param) {
if (!shards.empty()) return; if (!shards.empty()) return;
dist_ = GPUDistribution::Block(GPUSet::All(param.gpu_id, param.n_gpus, dist_ = GPUDistribution::Block(GPUSet::All(tparam_.gpu_id, tparam_.n_gpus,
p_fmat->Info().num_row_)); p_fmat->Info().num_row_));
auto devices = dist_.Devices(); auto devices = dist_.Devices();
@ -237,13 +180,13 @@ class GPUCoordinateUpdater : public LinearUpdater {
[&](int i, std::unique_ptr<DeviceShard>& shard) { [&](int i, std::unique_ptr<DeviceShard>& shard) {
shard = std::unique_ptr<DeviceShard>( shard = std::unique_ptr<DeviceShard>(
new DeviceShard(devices.DeviceId(i), batch, row_segments[i], new DeviceShard(devices.DeviceId(i), batch, row_segments[i],
row_segments[i + 1], param, model_param)); row_segments[i + 1], tparam_, model_param));
}); });
} }
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat, void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override { gbm::GBLinearModel *model, double sum_instance_weight) override {
param.DenormalizePenalties(sum_instance_weight); tparam_.DenormalizePenalties(sum_instance_weight);
monitor.Start("LazyInitShards"); monitor.Start("LazyInitShards");
this->LazyInitShards(p_fmat, model->param); this->LazyInitShards(p_fmat, model->param);
monitor.Stop("LazyInitShards"); monitor.Stop("LazyInitShards");
@ -260,15 +203,15 @@ class GPUCoordinateUpdater : public LinearUpdater {
monitor.Stop("UpdateBias"); monitor.Stop("UpdateBias");
// prepare for updating the weights // prepare for updating the weights
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat, selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm, tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
param.top_k); coord_param_.top_k);
monitor.Start("UpdateFeature"); monitor.Start("UpdateFeature");
for (auto group_idx = 0; group_idx < model->param.num_output_group; for (auto group_idx = 0; group_idx < model->param.num_output_group;
++group_idx) { ++group_idx) {
for (auto i = 0U; i < model->param.num_feature; i++) { for (auto i = 0U; i < model->param.num_feature; i++) {
auto fidx = selector->NextFeature( auto fidx = selector->NextFeature(
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm); tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
if (fidx < 0) break; if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model); this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model);
} }
@ -287,7 +230,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
}); });
auto dbias = static_cast<float>( auto dbias = static_cast<float>(
param.learning_rate * tparam_.learning_rate *
CoordinateDeltaBias(grad.GetGrad(), grad.GetHess())); CoordinateDeltaBias(grad.GetGrad(), grad.GetHess()));
model->bias()[group_idx] += dbias; model->bias()[group_idx] += dbias;
@ -310,10 +253,10 @@ class GPUCoordinateUpdater : public LinearUpdater {
fidx); fidx);
}); });
auto dw = static_cast<float>(param.learning_rate * auto dw = static_cast<float>(tparam_.learning_rate *
CoordinateDelta(grad.GetGrad(), grad.GetHess(), CoordinateDelta(grad.GetGrad(), grad.GetHess(),
w, param.reg_alpha_denorm, w, tparam_.reg_alpha_denorm,
param.reg_lambda_denorm)); tparam_.reg_lambda_denorm));
w += dw; w += dw;
dh::ExecuteIndexShards(&shards, [&](int idx, std::unique_ptr<DeviceShard>& shard) { dh::ExecuteIndexShards(&shards, [&](int idx, std::unique_ptr<DeviceShard>& shard) {
@ -322,7 +265,8 @@ class GPUCoordinateUpdater : public LinearUpdater {
} }
// training parameter // training parameter
GPUCoordinateTrainParam param; LinearTrainParam tparam_;
CoordinateParam coord_param_;
GPUDistribution dist_; GPUDistribution dist_;
std::unique_ptr<FeatureSelector> selector; std::unique_ptr<FeatureSelector> selector;
common::Monitor monitor; common::Monitor monitor;
@ -330,7 +274,6 @@ class GPUCoordinateUpdater : public LinearUpdater {
std::vector<std::unique_ptr<DeviceShard>> shards; std::vector<std::unique_ptr<DeviceShard>> shards;
}; };
DMLC_REGISTER_PARAMETER(GPUCoordinateTrainParam);
XGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, "gpu_coord_descent") XGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, "gpu_coord_descent")
.describe( .describe(
"Update linear model according to coordinate descent algorithm. GPU " "Update linear model according to coordinate descent algorithm. GPU "

View File

@ -11,54 +11,16 @@ namespace linear {
DMLC_REGISTRY_FILE_TAG(updater_shotgun); DMLC_REGISTRY_FILE_TAG(updater_shotgun);
// training parameter
struct ShotgunTrainParam : public dmlc::Parameter<ShotgunTrainParam> {
/*! \brief learning_rate */
float learning_rate;
/*! \brief regularization weight for L2 norm */
float reg_lambda;
/*! \brief regularization weight for L1 norm */
float reg_alpha;
int feature_selector;
// declare parameters
DMLC_DECLARE_PARAMETER(ShotgunTrainParam) {
DMLC_DECLARE_FIELD(learning_rate)
.set_lower_bound(0.0f)
.set_default(0.5f)
.describe("Learning rate of each update.");
DMLC_DECLARE_FIELD(reg_lambda)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L2 regularization on weights.");
DMLC_DECLARE_FIELD(reg_alpha)
.set_lower_bound(0.0f)
.set_default(0.0f)
.describe("L1 regularization on weights.");
DMLC_DECLARE_FIELD(feature_selector)
.set_default(kCyclic)
.add_enum("cyclic", kCyclic)
.add_enum("shuffle", kShuffle)
.describe("Feature selection or ordering method.");
// alias of parameters
DMLC_DECLARE_ALIAS(learning_rate, eta);
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
}
/*! \brief Denormalizes the regularization penalties - to be called at each update */
void DenormalizePenalties(double sum_instance_weight) {
reg_lambda_denorm = reg_lambda * sum_instance_weight;
reg_alpha_denorm = reg_alpha * sum_instance_weight;
}
// denormalizated regularization penalties
float reg_lambda_denorm;
float reg_alpha_denorm;
};
class ShotgunUpdater : public LinearUpdater { class ShotgunUpdater : public LinearUpdater {
public: public:
// set training parameter // set training parameter
void Init(const std::vector<std::pair<std::string, std::string> > &args) override { void Init(const std::vector<std::pair<std::string, std::string> > &args) override {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
if (param_.feature_selector != kCyclic &&
param_.feature_selector != kShuffle) {
LOG(FATAL) << "Unsupported feature selector for shotgun updater.\n"
<< "Supported options are: {cyclic, shuffle}";
}
selector_.reset(FeatureSelector::Create(param_.feature_selector)); selector_.reset(FeatureSelector::Create(param_.feature_selector));
} }
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat, void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
@ -119,13 +81,11 @@ class ShotgunUpdater : public LinearUpdater {
protected: protected:
// training parameters // training parameters
ShotgunTrainParam param_; LinearTrainParam param_;
std::unique_ptr<FeatureSelector> selector_; std::unique_ptr<FeatureSelector> selector_;
}; };
DMLC_REGISTER_PARAMETER(ShotgunTrainParam);
XGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, "shotgun") XGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, "shotgun")
.describe( .describe(
"Update linear model according to shotgun coordinate descent " "Update linear model according to shotgun coordinate descent "

View File

@ -3,29 +3,32 @@
#include "../helpers.h" #include "../helpers.h"
#include "xgboost/gbm.h" #include "xgboost/gbm.h"
typedef std::pair<std::string, std::string> arg;
TEST(Linear, shotgun) { TEST(Linear, shotgun) {
typedef std::pair<std::string, std::string> arg;
auto mat = xgboost::CreateDMatrix(10, 10, 0); auto mat = xgboost::CreateDMatrix(10, 10, 0);
auto updater = std::unique_ptr<xgboost::LinearUpdater>( {
xgboost::LinearUpdater::Create("shotgun")); auto updater = std::unique_ptr<xgboost::LinearUpdater>(
updater->Init({{"eta", "1."}}); xgboost::LinearUpdater::Create("shotgun"));
xgboost::HostDeviceVector<xgboost::GradientPair> gpair( updater->Init({{"eta", "1."}});
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0)); xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
xgboost::gbm::GBLinearModel model; (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
model.param.num_feature = (*mat)->Info().num_col_; xgboost::gbm::GBLinearModel model;
model.param.num_output_group = 1; model.param.num_feature = (*mat)->Info().num_col_;
model.LazyInitModel(); model.param.num_output_group = 1;
updater->Update(&gpair, (*mat).get(), &model, gpair.Size()); model.LazyInitModel();
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
ASSERT_EQ(model.bias()[0], 5.0f); ASSERT_EQ(model.bias()[0], 5.0f);
}
{
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("shotgun"));
EXPECT_ANY_THROW(updater->Init({{"feature_selector", "random"}}));
}
delete mat; delete mat;
} }
TEST(Linear, coordinate) { TEST(Linear, coordinate) {
typedef std::pair<std::string, std::string> arg;
auto mat = xgboost::CreateDMatrix(10, 10, 0); auto mat = xgboost::CreateDMatrix(10, 10, 0);
auto updater = std::unique_ptr<xgboost::LinearUpdater>( auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("coord_descent")); xgboost::LinearUpdater::Create("coord_descent"));