Merge duplicated linear updater parameters. (#4013)
* Merge duplicated linear updater parameters. * Split up coordinate descent parameter.
This commit is contained in:
parent
f75a21af25
commit
85939c6a6e
@ -8,11 +8,24 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
|
#include "./param.h"
|
||||||
#include "../common/random.h"
|
#include "../common/random.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace linear {
|
namespace linear {
|
||||||
|
|
||||||
|
struct CoordinateParam : public dmlc::Parameter<CoordinateParam> {
|
||||||
|
int top_k;
|
||||||
|
DMLC_DECLARE_PARAMETER(CoordinateParam) {
|
||||||
|
DMLC_DECLARE_FIELD(top_k)
|
||||||
|
.set_lower_bound(0)
|
||||||
|
.set_default(0)
|
||||||
|
.describe("The number of top features to select in 'thrifty' feature_selector. "
|
||||||
|
"The value of zero means using all the features.");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Calculate change in weight for a given feature. Applies l1/l2 penalty normalised by the
|
* \brief Calculate change in weight for a given feature. Applies l1/l2 penalty normalised by the
|
||||||
* number of training instances.
|
* number of training instances.
|
||||||
@ -442,17 +455,6 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
|||||||
std::vector<std::pair<double, double>> gpair_sums_;
|
std::vector<std::pair<double, double>> gpair_sums_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief A set of available FeatureSelector's
|
|
||||||
*/
|
|
||||||
enum FeatureSelectorEnum {
|
|
||||||
kCyclic = 0,
|
|
||||||
kShuffle,
|
|
||||||
kThrifty,
|
|
||||||
kGreedy,
|
|
||||||
kRandom
|
|
||||||
};
|
|
||||||
|
|
||||||
inline FeatureSelector *FeatureSelector::Create(int choice) {
|
inline FeatureSelector *FeatureSelector::Create(int choice) {
|
||||||
switch (choice) {
|
switch (choice) {
|
||||||
case kCyclic:
|
case kCyclic:
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
*/
|
*/
|
||||||
#include <xgboost/linear_updater.h>
|
#include <xgboost/linear_updater.h>
|
||||||
#include <dmlc/registry.h>
|
#include <dmlc/registry.h>
|
||||||
|
#include "./param.h"
|
||||||
|
|
||||||
namespace dmlc {
|
namespace dmlc {
|
||||||
DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg);
|
DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg);
|
||||||
@ -22,6 +23,8 @@ LinearUpdater* LinearUpdater::Create(const std::string& name) {
|
|||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace linear {
|
namespace linear {
|
||||||
|
DMLC_REGISTER_PARAMETER(LinearTrainParam);
|
||||||
|
|
||||||
// List of files that will be force linked in static links.
|
// List of files that will be force linked in static links.
|
||||||
DMLC_REGISTRY_LINK_TAG(updater_shotgun);
|
DMLC_REGISTRY_LINK_TAG(updater_shotgun);
|
||||||
DMLC_REGISTRY_LINK_TAG(updater_coordinate);
|
DMLC_REGISTRY_LINK_TAG(updater_coordinate);
|
||||||
|
|||||||
77
src/linear/param.h
Normal file
77
src/linear/param.h
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/*!
|
||||||
|
* Copyright 2018 by Contributors
|
||||||
|
* \file param.h
|
||||||
|
* \brief training parameters.
|
||||||
|
*/
|
||||||
|
#ifndef XGBOOST_LINEAR_PARAM_H_
|
||||||
|
#define XGBOOST_LINEAR_PARAM_H_
|
||||||
|
#include <dmlc/parameter.h>
|
||||||
|
|
||||||
|
namespace xgboost {
|
||||||
|
namespace linear {
|
||||||
|
/**
|
||||||
|
* \brief A set of available FeatureSelector's
|
||||||
|
*/
|
||||||
|
enum FeatureSelectorEnum {
|
||||||
|
kCyclic = 0,
|
||||||
|
kShuffle,
|
||||||
|
kThrifty,
|
||||||
|
kGreedy,
|
||||||
|
kRandom
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LinearTrainParam : public dmlc::Parameter<LinearTrainParam> {
|
||||||
|
/*! \brief learning_rate */
|
||||||
|
float learning_rate;
|
||||||
|
/*! \brief regularization weight for L2 norm */
|
||||||
|
float reg_lambda;
|
||||||
|
/*! \brief regularization weight for L1 norm */
|
||||||
|
float reg_alpha;
|
||||||
|
int feature_selector;
|
||||||
|
int n_gpus;
|
||||||
|
int gpu_id;
|
||||||
|
// declare parameters
|
||||||
|
DMLC_DECLARE_PARAMETER(LinearTrainParam) {
|
||||||
|
DMLC_DECLARE_FIELD(learning_rate)
|
||||||
|
.set_lower_bound(0.0f)
|
||||||
|
.set_default(0.5f)
|
||||||
|
.describe("Learning rate of each update.");
|
||||||
|
DMLC_DECLARE_FIELD(reg_lambda)
|
||||||
|
.set_lower_bound(0.0f)
|
||||||
|
.set_default(0.0f)
|
||||||
|
.describe("L2 regularization on weights.");
|
||||||
|
DMLC_DECLARE_FIELD(reg_alpha)
|
||||||
|
.set_lower_bound(0.0f)
|
||||||
|
.set_default(0.0f)
|
||||||
|
.describe("L1 regularization on weights.");
|
||||||
|
DMLC_DECLARE_FIELD(feature_selector)
|
||||||
|
.set_default(kCyclic)
|
||||||
|
.add_enum("cyclic", kCyclic)
|
||||||
|
.add_enum("shuffle", kShuffle)
|
||||||
|
.add_enum("thrifty", kThrifty)
|
||||||
|
.add_enum("greedy", kGreedy)
|
||||||
|
.add_enum("random", kRandom)
|
||||||
|
.describe("Feature selection or ordering method.");
|
||||||
|
DMLC_DECLARE_FIELD(n_gpus).set_default(1).describe(
|
||||||
|
"Number of devices to use.");
|
||||||
|
DMLC_DECLARE_FIELD(gpu_id).set_default(0).describe(
|
||||||
|
"Primary device ordinal.");
|
||||||
|
// alias of parameters
|
||||||
|
DMLC_DECLARE_ALIAS(learning_rate, eta);
|
||||||
|
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
||||||
|
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
||||||
|
}
|
||||||
|
/*! \brief Denormalizes the regularization penalties - to be called at each update */
|
||||||
|
void DenormalizePenalties(double sum_instance_weight) {
|
||||||
|
reg_lambda_denorm = reg_lambda * sum_instance_weight;
|
||||||
|
reg_alpha_denorm = reg_alpha * sum_instance_weight;
|
||||||
|
}
|
||||||
|
// denormalizated regularization penalties
|
||||||
|
float reg_lambda_denorm;
|
||||||
|
float reg_alpha_denorm;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace linear
|
||||||
|
} // namespace xgboost
|
||||||
|
|
||||||
|
#endif // XGBOOST_LINEAR_PARAM_H_
|
||||||
@ -4,66 +4,17 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <xgboost/linear_updater.h>
|
#include <xgboost/linear_updater.h>
|
||||||
|
#include "./param.h"
|
||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
#include "coordinate_common.h"
|
#include "coordinate_common.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace linear {
|
namespace linear {
|
||||||
|
|
||||||
|
DMLC_REGISTER_PARAMETER(CoordinateParam);
|
||||||
DMLC_REGISTRY_FILE_TAG(updater_coordinate);
|
DMLC_REGISTRY_FILE_TAG(updater_coordinate);
|
||||||
|
|
||||||
// training parameter
|
// training parameter
|
||||||
struct CoordinateTrainParam : public dmlc::Parameter<CoordinateTrainParam> {
|
|
||||||
/*! \brief learning_rate */
|
|
||||||
float learning_rate;
|
|
||||||
/*! \brief regularization weight for L2 norm */
|
|
||||||
float reg_lambda;
|
|
||||||
/*! \brief regularization weight for L1 norm */
|
|
||||||
float reg_alpha;
|
|
||||||
int feature_selector;
|
|
||||||
int top_k;
|
|
||||||
// declare parameters
|
|
||||||
DMLC_DECLARE_PARAMETER(CoordinateTrainParam) {
|
|
||||||
DMLC_DECLARE_FIELD(learning_rate)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.5f)
|
|
||||||
.describe("Learning rate of each update.");
|
|
||||||
DMLC_DECLARE_FIELD(reg_lambda)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("L2 regularization on weights.");
|
|
||||||
DMLC_DECLARE_FIELD(reg_alpha)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("L1 regularization on weights.");
|
|
||||||
DMLC_DECLARE_FIELD(feature_selector)
|
|
||||||
.set_default(kCyclic)
|
|
||||||
.add_enum("cyclic", kCyclic)
|
|
||||||
.add_enum("shuffle", kShuffle)
|
|
||||||
.add_enum("thrifty", kThrifty)
|
|
||||||
.add_enum("greedy", kGreedy)
|
|
||||||
.add_enum("random", kRandom)
|
|
||||||
.describe("Feature selection or ordering method.");
|
|
||||||
DMLC_DECLARE_FIELD(top_k)
|
|
||||||
.set_lower_bound(0)
|
|
||||||
.set_default(0)
|
|
||||||
.describe("The number of top features to select in 'thrifty' feature_selector. "
|
|
||||||
"The value of zero means using all the features.");
|
|
||||||
// alias of parameters
|
|
||||||
DMLC_DECLARE_ALIAS(learning_rate, eta);
|
|
||||||
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
|
||||||
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
|
||||||
}
|
|
||||||
/*! \brief Denormalizes the regularization penalties - to be called at each update */
|
|
||||||
void DenormalizePenalties(double sum_instance_weight) {
|
|
||||||
reg_lambda_denorm = reg_lambda * sum_instance_weight;
|
|
||||||
reg_alpha_denorm = reg_alpha * sum_instance_weight;
|
|
||||||
}
|
|
||||||
// denormalizated regularization penalties
|
|
||||||
float reg_lambda_denorm;
|
|
||||||
float reg_alpha_denorm;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \class CoordinateUpdater
|
* \class CoordinateUpdater
|
||||||
*
|
*
|
||||||
@ -75,33 +26,37 @@ class CoordinateUpdater : public LinearUpdater {
|
|||||||
// set training parameter
|
// set training parameter
|
||||||
void Init(
|
void Init(
|
||||||
const std::vector<std::pair<std::string, std::string> > &args) override {
|
const std::vector<std::pair<std::string, std::string> > &args) override {
|
||||||
param.InitAllowUnknown(args);
|
const std::vector<std::pair<std::string, std::string> > rest {
|
||||||
selector.reset(FeatureSelector::Create(param.feature_selector));
|
tparam_.InitAllowUnknown(args)
|
||||||
|
};
|
||||||
|
cparam_.InitAllowUnknown(rest);
|
||||||
|
selector.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||||
monitor.Init("CoordinateUpdater");
|
monitor.Init("CoordinateUpdater");
|
||||||
}
|
}
|
||||||
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||||
param.DenormalizePenalties(sum_instance_weight);
|
tparam_.DenormalizePenalties(sum_instance_weight);
|
||||||
const int ngroup = model->param.num_output_group;
|
const int ngroup = model->param.num_output_group;
|
||||||
// update bias
|
// update bias
|
||||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||||
auto grad = GetBiasGradientParallel(group_idx, ngroup,
|
auto grad = GetBiasGradientParallel(group_idx, ngroup,
|
||||||
in_gpair->ConstHostVector(), p_fmat);
|
in_gpair->ConstHostVector(), p_fmat);
|
||||||
auto dbias = static_cast<float>(param.learning_rate *
|
auto dbias = static_cast<float>(tparam_.learning_rate *
|
||||||
CoordinateDeltaBias(grad.first, grad.second));
|
CoordinateDeltaBias(grad.first, grad.second));
|
||||||
model->bias()[group_idx] += dbias;
|
model->bias()[group_idx] += dbias;
|
||||||
UpdateBiasResidualParallel(group_idx, ngroup,
|
UpdateBiasResidualParallel(group_idx, ngroup,
|
||||||
dbias, &in_gpair->HostVector(), p_fmat);
|
dbias, &in_gpair->HostVector(), p_fmat);
|
||||||
}
|
}
|
||||||
// prepare for updating the weights
|
// prepare for updating the weights
|
||||||
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat, param.reg_alpha_denorm,
|
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
||||||
param.reg_lambda_denorm, param.top_k);
|
tparam_.reg_alpha_denorm,
|
||||||
|
tparam_.reg_lambda_denorm, cparam_.top_k);
|
||||||
// update weights
|
// update weights
|
||||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||||
for (unsigned i = 0U; i < model->param.num_feature; i++) {
|
for (unsigned i = 0U; i < model->param.num_feature; i++) {
|
||||||
int fidx = selector->NextFeature
|
int fidx = selector->NextFeature
|
||||||
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
(i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||||
param.reg_alpha_denorm, param.reg_lambda_denorm);
|
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||||
if (fidx < 0) break;
|
if (fidx < 0) break;
|
||||||
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
|
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
|
||||||
}
|
}
|
||||||
@ -116,20 +71,20 @@ class CoordinateUpdater : public LinearUpdater {
|
|||||||
auto gradient =
|
auto gradient =
|
||||||
GetGradientParallel(group_idx, ngroup, fidx, *in_gpair, p_fmat);
|
GetGradientParallel(group_idx, ngroup, fidx, *in_gpair, p_fmat);
|
||||||
auto dw = static_cast<float>(
|
auto dw = static_cast<float>(
|
||||||
param.learning_rate *
|
tparam_.learning_rate *
|
||||||
CoordinateDelta(gradient.first, gradient.second, w, param.reg_alpha_denorm,
|
CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm,
|
||||||
param.reg_lambda_denorm));
|
tparam_.reg_lambda_denorm));
|
||||||
w += dw;
|
w += dw;
|
||||||
UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat);
|
UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CoordinateParam cparam_;
|
||||||
// training parameter
|
// training parameter
|
||||||
CoordinateTrainParam param;
|
LinearTrainParam tparam_;
|
||||||
std::unique_ptr<FeatureSelector> selector;
|
std::unique_ptr<FeatureSelector> selector;
|
||||||
common::Monitor monitor;
|
common::Monitor monitor;
|
||||||
};
|
};
|
||||||
|
|
||||||
DMLC_REGISTER_PARAMETER(CoordinateTrainParam);
|
|
||||||
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
|
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
|
||||||
.describe("Update linear model according to coordinate descent algorithm.")
|
.describe("Update linear model according to coordinate descent algorithm.")
|
||||||
.set_body([]() { return new CoordinateUpdater(); });
|
.set_body([]() { return new CoordinateUpdater(); });
|
||||||
|
|||||||
@ -9,6 +9,7 @@
|
|||||||
#include "../common/common.h"
|
#include "../common/common.h"
|
||||||
#include "../common/device_helpers.cuh"
|
#include "../common/device_helpers.cuh"
|
||||||
#include "../common/timer.h"
|
#include "../common/timer.h"
|
||||||
|
#include "./param.h"
|
||||||
#include "coordinate_common.h"
|
#include "coordinate_common.h"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
@ -16,64 +17,6 @@ namespace linear {
|
|||||||
|
|
||||||
DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
|
DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
|
||||||
|
|
||||||
// training parameter
|
|
||||||
struct GPUCoordinateTrainParam
|
|
||||||
: public dmlc::Parameter<GPUCoordinateTrainParam> {
|
|
||||||
/*! \brief learning_rate */
|
|
||||||
float learning_rate;
|
|
||||||
/*! \brief regularization weight for L2 norm */
|
|
||||||
float reg_lambda;
|
|
||||||
/*! \brief regularization weight for L1 norm */
|
|
||||||
float reg_alpha;
|
|
||||||
int feature_selector;
|
|
||||||
int top_k;
|
|
||||||
int n_gpus;
|
|
||||||
int gpu_id;
|
|
||||||
// declare parameters
|
|
||||||
DMLC_DECLARE_PARAMETER(GPUCoordinateTrainParam) {
|
|
||||||
DMLC_DECLARE_FIELD(learning_rate)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(1.0f)
|
|
||||||
.describe("Learning rate of each update.");
|
|
||||||
DMLC_DECLARE_FIELD(reg_lambda)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("L2 regularization on weights.");
|
|
||||||
DMLC_DECLARE_FIELD(reg_alpha)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("L1 regularization on weights.");
|
|
||||||
DMLC_DECLARE_FIELD(feature_selector)
|
|
||||||
.set_default(kCyclic)
|
|
||||||
.add_enum("cyclic", kCyclic)
|
|
||||||
.add_enum("shuffle", kShuffle)
|
|
||||||
.add_enum("thrifty", kThrifty)
|
|
||||||
.add_enum("greedy", kGreedy)
|
|
||||||
.add_enum("random", kRandom)
|
|
||||||
.describe("Feature selection or ordering method.");
|
|
||||||
DMLC_DECLARE_FIELD(top_k).set_lower_bound(0).set_default(0).describe(
|
|
||||||
"The number of top features to select in 'thrifty' feature_selector. "
|
|
||||||
"The value of zero means using all the features.");
|
|
||||||
DMLC_DECLARE_FIELD(n_gpus).set_default(1).describe(
|
|
||||||
"Number of devices to use.");
|
|
||||||
DMLC_DECLARE_FIELD(gpu_id).set_default(0).describe(
|
|
||||||
"Primary device ordinal.");
|
|
||||||
// alias of parameters
|
|
||||||
DMLC_DECLARE_ALIAS(learning_rate, eta);
|
|
||||||
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
|
||||||
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
|
||||||
}
|
|
||||||
/*! \brief Denormalizes the regularization penalties - to be called at each
|
|
||||||
* update */
|
|
||||||
void DenormalizePenalties(double sum_instance_weight) {
|
|
||||||
reg_lambda_denorm = reg_lambda * sum_instance_weight;
|
|
||||||
reg_alpha_denorm = reg_alpha * sum_instance_weight;
|
|
||||||
}
|
|
||||||
// denormalizated regularization penalties
|
|
||||||
float reg_lambda_denorm;
|
|
||||||
float reg_alpha_denorm;
|
|
||||||
};
|
|
||||||
|
|
||||||
void RescaleIndices(size_t ridx_begin, dh::DVec<Entry> *data) {
|
void RescaleIndices(size_t ridx_begin, dh::DVec<Entry> *data) {
|
||||||
auto d_data = data->Data();
|
auto d_data = data->Data();
|
||||||
dh::LaunchN(data->DeviceIdx(), data->Size(),
|
dh::LaunchN(data->DeviceIdx(), data->Size(),
|
||||||
@ -93,7 +36,7 @@ class DeviceShard {
|
|||||||
public:
|
public:
|
||||||
DeviceShard(int device_id, const SparsePage &batch,
|
DeviceShard(int device_id, const SparsePage &batch,
|
||||||
bst_uint row_begin, bst_uint row_end,
|
bst_uint row_begin, bst_uint row_end,
|
||||||
const GPUCoordinateTrainParam ¶m,
|
const LinearTrainParam ¶m,
|
||||||
const gbm::GBLinearModelParam &model_param)
|
const gbm::GBLinearModelParam &model_param)
|
||||||
: device_id_(device_id),
|
: device_id_(device_id),
|
||||||
ridx_begin_(row_begin),
|
ridx_begin_(row_begin),
|
||||||
@ -199,8 +142,8 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
// set training parameter
|
// set training parameter
|
||||||
void Init(
|
void Init(
|
||||||
const std::vector<std::pair<std::string, std::string>> &args) override {
|
const std::vector<std::pair<std::string, std::string>> &args) override {
|
||||||
param.InitAllowUnknown(args);
|
tparam_.InitAllowUnknown(args);
|
||||||
selector.reset(FeatureSelector::Create(param.feature_selector));
|
selector.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||||
monitor.Init("GPUCoordinateUpdater");
|
monitor.Init("GPUCoordinateUpdater");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -208,7 +151,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
const gbm::GBLinearModelParam &model_param) {
|
const gbm::GBLinearModelParam &model_param) {
|
||||||
if (!shards.empty()) return;
|
if (!shards.empty()) return;
|
||||||
|
|
||||||
dist_ = GPUDistribution::Block(GPUSet::All(param.gpu_id, param.n_gpus,
|
dist_ = GPUDistribution::Block(GPUSet::All(tparam_.gpu_id, tparam_.n_gpus,
|
||||||
p_fmat->Info().num_row_));
|
p_fmat->Info().num_row_));
|
||||||
auto devices = dist_.Devices();
|
auto devices = dist_.Devices();
|
||||||
|
|
||||||
@ -237,13 +180,13 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
[&](int i, std::unique_ptr<DeviceShard>& shard) {
|
[&](int i, std::unique_ptr<DeviceShard>& shard) {
|
||||||
shard = std::unique_ptr<DeviceShard>(
|
shard = std::unique_ptr<DeviceShard>(
|
||||||
new DeviceShard(devices.DeviceId(i), batch, row_segments[i],
|
new DeviceShard(devices.DeviceId(i), batch, row_segments[i],
|
||||||
row_segments[i + 1], param, model_param));
|
row_segments[i + 1], tparam_, model_param));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||||
param.DenormalizePenalties(sum_instance_weight);
|
tparam_.DenormalizePenalties(sum_instance_weight);
|
||||||
monitor.Start("LazyInitShards");
|
monitor.Start("LazyInitShards");
|
||||||
this->LazyInitShards(p_fmat, model->param);
|
this->LazyInitShards(p_fmat, model->param);
|
||||||
monitor.Stop("LazyInitShards");
|
monitor.Stop("LazyInitShards");
|
||||||
@ -260,15 +203,15 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
monitor.Stop("UpdateBias");
|
monitor.Stop("UpdateBias");
|
||||||
// prepare for updating the weights
|
// prepare for updating the weights
|
||||||
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
||||||
param.reg_alpha_denorm, param.reg_lambda_denorm,
|
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
|
||||||
param.top_k);
|
coord_param_.top_k);
|
||||||
monitor.Start("UpdateFeature");
|
monitor.Start("UpdateFeature");
|
||||||
for (auto group_idx = 0; group_idx < model->param.num_output_group;
|
for (auto group_idx = 0; group_idx < model->param.num_output_group;
|
||||||
++group_idx) {
|
++group_idx) {
|
||||||
for (auto i = 0U; i < model->param.num_feature; i++) {
|
for (auto i = 0U; i < model->param.num_feature; i++) {
|
||||||
auto fidx = selector->NextFeature(
|
auto fidx = selector->NextFeature(
|
||||||
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||||
param.reg_alpha_denorm, param.reg_lambda_denorm);
|
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||||
if (fidx < 0) break;
|
if (fidx < 0) break;
|
||||||
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model);
|
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model);
|
||||||
}
|
}
|
||||||
@ -287,7 +230,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
});
|
});
|
||||||
|
|
||||||
auto dbias = static_cast<float>(
|
auto dbias = static_cast<float>(
|
||||||
param.learning_rate *
|
tparam_.learning_rate *
|
||||||
CoordinateDeltaBias(grad.GetGrad(), grad.GetHess()));
|
CoordinateDeltaBias(grad.GetGrad(), grad.GetHess()));
|
||||||
model->bias()[group_idx] += dbias;
|
model->bias()[group_idx] += dbias;
|
||||||
|
|
||||||
@ -310,10 +253,10 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
fidx);
|
fidx);
|
||||||
});
|
});
|
||||||
|
|
||||||
auto dw = static_cast<float>(param.learning_rate *
|
auto dw = static_cast<float>(tparam_.learning_rate *
|
||||||
CoordinateDelta(grad.GetGrad(), grad.GetHess(),
|
CoordinateDelta(grad.GetGrad(), grad.GetHess(),
|
||||||
w, param.reg_alpha_denorm,
|
w, tparam_.reg_alpha_denorm,
|
||||||
param.reg_lambda_denorm));
|
tparam_.reg_lambda_denorm));
|
||||||
w += dw;
|
w += dw;
|
||||||
|
|
||||||
dh::ExecuteIndexShards(&shards, [&](int idx, std::unique_ptr<DeviceShard>& shard) {
|
dh::ExecuteIndexShards(&shards, [&](int idx, std::unique_ptr<DeviceShard>& shard) {
|
||||||
@ -322,7 +265,8 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// training parameter
|
// training parameter
|
||||||
GPUCoordinateTrainParam param;
|
LinearTrainParam tparam_;
|
||||||
|
CoordinateParam coord_param_;
|
||||||
GPUDistribution dist_;
|
GPUDistribution dist_;
|
||||||
std::unique_ptr<FeatureSelector> selector;
|
std::unique_ptr<FeatureSelector> selector;
|
||||||
common::Monitor monitor;
|
common::Monitor monitor;
|
||||||
@ -330,7 +274,6 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
std::vector<std::unique_ptr<DeviceShard>> shards;
|
std::vector<std::unique_ptr<DeviceShard>> shards;
|
||||||
};
|
};
|
||||||
|
|
||||||
DMLC_REGISTER_PARAMETER(GPUCoordinateTrainParam);
|
|
||||||
XGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, "gpu_coord_descent")
|
XGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, "gpu_coord_descent")
|
||||||
.describe(
|
.describe(
|
||||||
"Update linear model according to coordinate descent algorithm. GPU "
|
"Update linear model according to coordinate descent algorithm. GPU "
|
||||||
|
|||||||
@ -11,54 +11,16 @@ namespace linear {
|
|||||||
|
|
||||||
DMLC_REGISTRY_FILE_TAG(updater_shotgun);
|
DMLC_REGISTRY_FILE_TAG(updater_shotgun);
|
||||||
|
|
||||||
// training parameter
|
|
||||||
struct ShotgunTrainParam : public dmlc::Parameter<ShotgunTrainParam> {
|
|
||||||
/*! \brief learning_rate */
|
|
||||||
float learning_rate;
|
|
||||||
/*! \brief regularization weight for L2 norm */
|
|
||||||
float reg_lambda;
|
|
||||||
/*! \brief regularization weight for L1 norm */
|
|
||||||
float reg_alpha;
|
|
||||||
int feature_selector;
|
|
||||||
// declare parameters
|
|
||||||
DMLC_DECLARE_PARAMETER(ShotgunTrainParam) {
|
|
||||||
DMLC_DECLARE_FIELD(learning_rate)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.5f)
|
|
||||||
.describe("Learning rate of each update.");
|
|
||||||
DMLC_DECLARE_FIELD(reg_lambda)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("L2 regularization on weights.");
|
|
||||||
DMLC_DECLARE_FIELD(reg_alpha)
|
|
||||||
.set_lower_bound(0.0f)
|
|
||||||
.set_default(0.0f)
|
|
||||||
.describe("L1 regularization on weights.");
|
|
||||||
DMLC_DECLARE_FIELD(feature_selector)
|
|
||||||
.set_default(kCyclic)
|
|
||||||
.add_enum("cyclic", kCyclic)
|
|
||||||
.add_enum("shuffle", kShuffle)
|
|
||||||
.describe("Feature selection or ordering method.");
|
|
||||||
// alias of parameters
|
|
||||||
DMLC_DECLARE_ALIAS(learning_rate, eta);
|
|
||||||
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
|
|
||||||
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
|
|
||||||
}
|
|
||||||
/*! \brief Denormalizes the regularization penalties - to be called at each update */
|
|
||||||
void DenormalizePenalties(double sum_instance_weight) {
|
|
||||||
reg_lambda_denorm = reg_lambda * sum_instance_weight;
|
|
||||||
reg_alpha_denorm = reg_alpha * sum_instance_weight;
|
|
||||||
}
|
|
||||||
// denormalizated regularization penalties
|
|
||||||
float reg_lambda_denorm;
|
|
||||||
float reg_alpha_denorm;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ShotgunUpdater : public LinearUpdater {
|
class ShotgunUpdater : public LinearUpdater {
|
||||||
public:
|
public:
|
||||||
// set training parameter
|
// set training parameter
|
||||||
void Init(const std::vector<std::pair<std::string, std::string> > &args) override {
|
void Init(const std::vector<std::pair<std::string, std::string> > &args) override {
|
||||||
param_.InitAllowUnknown(args);
|
param_.InitAllowUnknown(args);
|
||||||
|
if (param_.feature_selector != kCyclic &&
|
||||||
|
param_.feature_selector != kShuffle) {
|
||||||
|
LOG(FATAL) << "Unsupported feature selector for shotgun updater.\n"
|
||||||
|
<< "Supported options are: {cyclic, shuffle}";
|
||||||
|
}
|
||||||
selector_.reset(FeatureSelector::Create(param_.feature_selector));
|
selector_.reset(FeatureSelector::Create(param_.feature_selector));
|
||||||
}
|
}
|
||||||
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||||
@ -119,13 +81,11 @@ class ShotgunUpdater : public LinearUpdater {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
// training parameters
|
// training parameters
|
||||||
ShotgunTrainParam param_;
|
LinearTrainParam param_;
|
||||||
|
|
||||||
std::unique_ptr<FeatureSelector> selector_;
|
std::unique_ptr<FeatureSelector> selector_;
|
||||||
};
|
};
|
||||||
|
|
||||||
DMLC_REGISTER_PARAMETER(ShotgunTrainParam);
|
|
||||||
|
|
||||||
XGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, "shotgun")
|
XGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, "shotgun")
|
||||||
.describe(
|
.describe(
|
||||||
"Update linear model according to shotgun coordinate descent "
|
"Update linear model according to shotgun coordinate descent "
|
||||||
|
|||||||
@ -3,29 +3,32 @@
|
|||||||
#include "../helpers.h"
|
#include "../helpers.h"
|
||||||
#include "xgboost/gbm.h"
|
#include "xgboost/gbm.h"
|
||||||
|
|
||||||
typedef std::pair<std::string, std::string> arg;
|
|
||||||
|
|
||||||
TEST(Linear, shotgun) {
|
TEST(Linear, shotgun) {
|
||||||
typedef std::pair<std::string, std::string> arg;
|
|
||||||
auto mat = xgboost::CreateDMatrix(10, 10, 0);
|
auto mat = xgboost::CreateDMatrix(10, 10, 0);
|
||||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
{
|
||||||
xgboost::LinearUpdater::Create("shotgun"));
|
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||||
updater->Init({{"eta", "1."}});
|
xgboost::LinearUpdater::Create("shotgun"));
|
||||||
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
updater->Init({{"eta", "1."}});
|
||||||
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
|
||||||
xgboost::gbm::GBLinearModel model;
|
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
|
||||||
model.param.num_feature = (*mat)->Info().num_col_;
|
xgboost::gbm::GBLinearModel model;
|
||||||
model.param.num_output_group = 1;
|
model.param.num_feature = (*mat)->Info().num_col_;
|
||||||
model.LazyInitModel();
|
model.param.num_output_group = 1;
|
||||||
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
|
model.LazyInitModel();
|
||||||
|
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
|
||||||
|
|
||||||
ASSERT_EQ(model.bias()[0], 5.0f);
|
ASSERT_EQ(model.bias()[0], 5.0f);
|
||||||
|
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||||
|
xgboost::LinearUpdater::Create("shotgun"));
|
||||||
|
EXPECT_ANY_THROW(updater->Init({{"feature_selector", "random"}}));
|
||||||
|
}
|
||||||
delete mat;
|
delete mat;
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Linear, coordinate) {
|
TEST(Linear, coordinate) {
|
||||||
typedef std::pair<std::string, std::string> arg;
|
|
||||||
auto mat = xgboost::CreateDMatrix(10, 10, 0);
|
auto mat = xgboost::CreateDMatrix(10, 10, 0);
|
||||||
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
|
||||||
xgboost::LinearUpdater::Create("coord_descent"));
|
xgboost::LinearUpdater::Create("coord_descent"));
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user