Remove omp_get_max_threads in gbm and linear. (#7537)
* Use ctx in gbm. * Use ctx threads in gbm and linear.
This commit is contained in:
@@ -149,21 +149,21 @@ GetGradientParallel(GenericParameter const *ctx, int group_idx, int num_group,
|
||||
*/
|
||||
inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
DMatrix *p_fmat, int32_t n_threads) {
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
dmlc::OMPException exc;
|
||||
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
exc.Run([&]() {
|
||||
auto &p = gpair[i * num_group + group_idx];
|
||||
if (p.GetHess() >= 0.0f) {
|
||||
sum_grad += p.GetGrad();
|
||||
sum_hess += p.GetHess();
|
||||
}
|
||||
});
|
||||
}
|
||||
exc.Rethrow();
|
||||
std::vector<double> sum_grad_tloc(n_threads, 0);
|
||||
std::vector<double> sum_hess_tloc(n_threads, 0);
|
||||
|
||||
common::ParallelFor(ndata, n_threads, [&](auto i) {
|
||||
auto tid = omp_get_thread_num();
|
||||
auto &p = gpair[i * num_group + group_idx];
|
||||
if (p.GetHess() >= 0.0f) {
|
||||
sum_grad_tloc[tid] += p.GetGrad();
|
||||
sum_hess_tloc[tid] += p.GetHess();
|
||||
}
|
||||
});
|
||||
double sum_grad = std::accumulate(sum_grad_tloc.cbegin(), sum_grad_tloc.cend(), 0.0);
|
||||
double sum_hess = std::accumulate(sum_hess_tloc.cbegin(), sum_hess_tloc.cend(), 0.0);
|
||||
return std::make_pair(sum_grad, sum_hess);
|
||||
}
|
||||
|
||||
@@ -179,23 +179,18 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
|
||||
*/
|
||||
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
float dw, std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
DMatrix *p_fmat, int32_t n_threads) {
|
||||
if (dw == 0.0f) return;
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
auto page = batch.GetView();
|
||||
auto col = page[fidx];
|
||||
// update grad value
|
||||
const auto num_row = static_cast<bst_omp_uint>(col.size());
|
||||
dmlc::OMPException exc;
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < num_row; ++j) {
|
||||
exc.Run([&]() {
|
||||
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
|
||||
if (p.GetHess() < 0.0f) return;
|
||||
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
|
||||
});
|
||||
}
|
||||
exc.Rethrow();
|
||||
common::ParallelFor(num_row, n_threads, [&](auto j) {
|
||||
GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
|
||||
if (p.GetHess() < 0.0f) return;
|
||||
p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,20 +204,15 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
||||
* \param p_fmat The input feature matrix.
|
||||
*/
|
||||
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
|
||||
std::vector<GradientPair> *in_gpair,
|
||||
DMatrix *p_fmat) {
|
||||
std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
int32_t n_threads) {
|
||||
if (dbias == 0.0f) return;
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
dmlc::OMPException exc;
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
exc.Run([&]() {
|
||||
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
|
||||
if (g.GetHess() < 0.0f) return;
|
||||
g += GradientPair(g.GetHess() * dbias, 0);
|
||||
});
|
||||
}
|
||||
exc.Rethrow();
|
||||
common::ParallelFor(ndata, n_threads, [&](auto i) {
|
||||
GradientPair &g = (*in_gpair)[i * num_group + group_idx];
|
||||
if (g.GetHess() < 0.0f) return;
|
||||
g += GradientPair(g.GetHess() * dbias, 0);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -230,9 +220,13 @@ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias
|
||||
* in coordinate descent algorithms.
|
||||
*/
|
||||
class FeatureSelector {
|
||||
protected:
|
||||
int32_t n_threads_{-1};
|
||||
|
||||
public:
|
||||
explicit FeatureSelector(int32_t n_threads) : n_threads_{n_threads} {}
|
||||
/*! \brief factory method */
|
||||
static FeatureSelector *Create(int choice);
|
||||
static FeatureSelector *Create(int choice, int32_t n_threads);
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~FeatureSelector() = default;
|
||||
/**
|
||||
@@ -274,6 +268,7 @@ class FeatureSelector {
|
||||
*/
|
||||
class CyclicFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
int NextFeature(int iteration, const gbm::GBLinearModel &model,
|
||||
int , const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
@@ -287,6 +282,7 @@ class CyclicFeatureSelector : public FeatureSelector {
|
||||
*/
|
||||
class ShuffleFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair>&,
|
||||
DMatrix *, float, float, int) override {
|
||||
@@ -313,6 +309,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
|
||||
*/
|
||||
class RandomFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
int NextFeature(int, const gbm::GBLinearModel &model,
|
||||
int, const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float) override {
|
||||
@@ -331,6 +328,7 @@ class RandomFeatureSelector : public FeatureSelector {
|
||||
*/
|
||||
class GreedyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &,
|
||||
DMatrix *, float, float, int param) override {
|
||||
@@ -360,7 +358,7 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
auto page = batch.GetView();
|
||||
common::ParallelFor(nfeat, [&](bst_omp_uint i) {
|
||||
common::ParallelFor(nfeat, this->n_threads_, [&](bst_omp_uint i) {
|
||||
const auto col = page[i];
|
||||
const bst_uint ndata = col.size();
|
||||
auto &sums = gpair_sums_[group_idx * nfeat + i];
|
||||
@@ -407,6 +405,7 @@ class GreedyFeatureSelector : public FeatureSelector {
|
||||
*/
|
||||
class ThriftyFeatureSelector : public FeatureSelector {
|
||||
public:
|
||||
using FeatureSelector::FeatureSelector;
|
||||
void Setup(const gbm::GBLinearModel &model,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
DMatrix *p_fmat, float alpha, float lambda, int param) override {
|
||||
@@ -426,7 +425,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
auto page = batch.GetView();
|
||||
// column-parallel is usually fastaer than row-parallel
|
||||
common::ParallelFor(nfeat, [&](bst_omp_uint i) {
|
||||
common::ParallelFor(nfeat, this->n_threads_, [&](auto i) {
|
||||
const auto col = page[i];
|
||||
const bst_uint ndata = col.size();
|
||||
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
|
||||
@@ -483,18 +482,18 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
||||
std::vector<std::pair<double, double>> gpair_sums_;
|
||||
};
|
||||
|
||||
inline FeatureSelector *FeatureSelector::Create(int choice) {
|
||||
inline FeatureSelector *FeatureSelector::Create(int choice, int32_t n_threads) {
|
||||
switch (choice) {
|
||||
case kCyclic:
|
||||
return new CyclicFeatureSelector();
|
||||
return new CyclicFeatureSelector(n_threads);
|
||||
case kShuffle:
|
||||
return new ShuffleFeatureSelector();
|
||||
return new ShuffleFeatureSelector(n_threads);
|
||||
case kThrifty:
|
||||
return new ThriftyFeatureSelector();
|
||||
return new ThriftyFeatureSelector(n_threads);
|
||||
case kGreedy:
|
||||
return new GreedyFeatureSelector();
|
||||
return new GreedyFeatureSelector(n_threads);
|
||||
case kRandom:
|
||||
return new RandomFeatureSelector();
|
||||
return new RandomFeatureSelector(n_threads);
|
||||
default:
|
||||
LOG(FATAL) << "unknown coordinate selector: " << choice;
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ LinearUpdater* LinearUpdater::Create(const std::string& name, GenericParameter c
|
||||
LOG(FATAL) << "Unknown linear updater " << name;
|
||||
}
|
||||
auto p_linear = (e->body)();
|
||||
p_linear->learner_param_ = lparam;
|
||||
p_linear->ctx_ = lparam;
|
||||
return p_linear;
|
||||
}
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
tparam_.UpdateAllowUnknown(args)
|
||||
};
|
||||
cparam_.UpdateAllowUnknown(rest);
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads()));
|
||||
monitor_.Init("CoordinateUpdater");
|
||||
}
|
||||
|
||||
@@ -51,13 +51,13 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
const int ngroup = model->learner_model_param->num_output_group;
|
||||
// update bias
|
||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||
auto grad = GetBiasGradientParallel(group_idx, ngroup,
|
||||
in_gpair->ConstHostVector(), p_fmat);
|
||||
auto grad = GetBiasGradientParallel(group_idx, ngroup, in_gpair->ConstHostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
auto dbias = static_cast<float>(tparam_.learning_rate *
|
||||
CoordinateDeltaBias(grad.first, grad.second));
|
||||
model->Bias()[group_idx] += dbias;
|
||||
UpdateBiasResidualParallel(group_idx, ngroup,
|
||||
dbias, &in_gpair->HostVector(), p_fmat);
|
||||
UpdateBiasResidualParallel(group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
}
|
||||
// prepare for updating the weights
|
||||
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
|
||||
@@ -80,14 +80,15 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
DMatrix *p_fmat, gbm::GBLinearModel *model) {
|
||||
const int ngroup = model->learner_model_param->num_output_group;
|
||||
bst_float &w = (*model)[fidx][group_idx];
|
||||
auto gradient = GetGradientParallel(learner_param_, group_idx, ngroup, fidx,
|
||||
auto gradient = GetGradientParallel(ctx_, group_idx, ngroup, fidx,
|
||||
*in_gpair, p_fmat);
|
||||
auto dw = static_cast<float>(
|
||||
tparam_.learning_rate *
|
||||
CoordinateDelta(gradient.first, gradient.second, w, tparam_.reg_alpha_denorm,
|
||||
tparam_.reg_lambda_denorm));
|
||||
w += dw;
|
||||
UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat);
|
||||
UpdateResidualParallel(fidx, group_idx, ngroup, dw, in_gpair, p_fmat,
|
||||
ctx_->Threads());
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -32,7 +32,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
void Configure(Args const& args) override {
|
||||
tparam_.UpdateAllowUnknown(args);
|
||||
coord_param_.UpdateAllowUnknown(args);
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
|
||||
selector_.reset(FeatureSelector::Create(tparam_.feature_selector, ctx_->Threads()));
|
||||
monitor_.Init("GPUCoordinateUpdater");
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
}
|
||||
|
||||
void LazyInitDevice(DMatrix *p_fmat, const LearnerModelParam &model_param) {
|
||||
if (learner_param_->gpu_id < 0) return;
|
||||
if (ctx_->gpu_id < 0) return;
|
||||
|
||||
num_row_ = static_cast<size_t>(p_fmat->Info().num_row_);
|
||||
|
||||
@@ -60,7 +60,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
return;
|
||||
}
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(learner_param_->gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
// The begin and end indices for the section of each column associated with
|
||||
// this device
|
||||
std::vector<std::pair<bst_uint, bst_uint>> column_segments;
|
||||
@@ -103,7 +103,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
monitor_.Start("UpdateGpair");
|
||||
auto &in_gpair_host = in_gpair->ConstHostVector();
|
||||
// Update gpair
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
this->UpdateGpair(in_gpair_host);
|
||||
}
|
||||
monitor_.Stop("UpdateGpair");
|
||||
@@ -134,7 +134,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
++group_idx) {
|
||||
// Get gradient
|
||||
auto grad = GradientPair(0, 0);
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
grad = GetBiasGradient(group_idx, model->learner_model_param->num_output_group);
|
||||
}
|
||||
auto dbias = static_cast<float>(
|
||||
@@ -143,7 +143,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
model->Bias()[group_idx] += dbias;
|
||||
|
||||
// Update residual
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
UpdateBiasResidual(dbias, group_idx, model->learner_model_param->num_output_group);
|
||||
}
|
||||
}
|
||||
@@ -155,7 +155,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
bst_float &w = (*model)[fidx][group_idx];
|
||||
// Get gradient
|
||||
auto grad = GradientPair(0, 0);
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
grad = GetGradient(group_idx, model->learner_model_param->num_output_group, fidx);
|
||||
}
|
||||
auto dw = static_cast<float>(tparam_.learning_rate *
|
||||
@@ -164,14 +164,14 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
tparam_.reg_lambda_denorm));
|
||||
w += dw;
|
||||
|
||||
if (learner_param_->gpu_id >= 0) {
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
UpdateResidual(dw, group_idx, model->learner_model_param->num_output_group, fidx);
|
||||
}
|
||||
}
|
||||
|
||||
// This needs to be public because of the __device__ lambda.
|
||||
GradientPair GetBiasGradient(int group_idx, int num_group) {
|
||||
dh::safe_cuda(cudaSetDevice(learner_param_->gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
auto counting = thrust::make_counting_iterator(0ull);
|
||||
auto f = [=] __device__(size_t idx) {
|
||||
return idx * num_group + group_idx;
|
||||
@@ -195,7 +195,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
|
||||
// This needs to be public because of the __device__ lambda.
|
||||
GradientPair GetGradient(int group_idx, int num_group, int fidx) {
|
||||
dh::safe_cuda(cudaSetDevice(learner_param_->gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
common::Span<xgboost::Entry> d_col = dh::ToSpan(data_).subspan(row_ptr_[fidx]);
|
||||
size_t col_size = row_ptr_[fidx + 1] - row_ptr_[fidx];
|
||||
common::Span<GradientPair> d_gpair = dh::ToSpan(gpair_);
|
||||
|
||||
@@ -21,7 +21,7 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
LOG(FATAL) << "Unsupported feature selector for shotgun updater.\n"
|
||||
<< "Supported options are: {cyclic, shuffle}";
|
||||
}
|
||||
selector_.reset(FeatureSelector::Create(param_.feature_selector));
|
||||
selector_.reset(FeatureSelector::Create(param_.feature_selector, ctx_->Threads()));
|
||||
}
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
@@ -40,12 +40,13 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
|
||||
// update bias
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
auto grad = GetBiasGradientParallel(gid, ngroup,
|
||||
in_gpair->ConstHostVector(), p_fmat);
|
||||
auto grad = GetBiasGradientParallel(gid, ngroup, in_gpair->ConstHostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
auto dbias = static_cast<bst_float>(param_.learning_rate *
|
||||
CoordinateDeltaBias(grad.first, grad.second));
|
||||
model->Bias()[gid] += dbias;
|
||||
UpdateBiasResidualParallel(gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
|
||||
UpdateBiasResidualParallel(gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
}
|
||||
|
||||
// lock-free parallel updates of weights
|
||||
@@ -54,42 +55,35 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
|
||||
auto page = batch.GetView();
|
||||
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
|
||||
dmlc::OMPException exc;
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||
exc.Run([&]() {
|
||||
int ii = selector_->NextFeature
|
||||
(i, *model, 0, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
|
||||
param_.reg_lambda_denorm);
|
||||
if (ii < 0) return;
|
||||
const bst_uint fid = ii;
|
||||
auto col = page[ii];
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
for (auto& c : col) {
|
||||
const GradientPair &p = gpair[c.index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
const bst_float v = c.fvalue;
|
||||
sum_grad += p.GetGrad() * v;
|
||||
sum_hess += p.GetHess() * v * v;
|
||||
}
|
||||
bst_float &w = (*model)[fid][gid];
|
||||
auto dw = static_cast<bst_float>(
|
||||
param_.learning_rate *
|
||||
CoordinateDelta(sum_grad, sum_hess, w, param_.reg_alpha_denorm,
|
||||
param_.reg_lambda_denorm));
|
||||
if (dw == 0.f) continue;
|
||||
w += dw;
|
||||
// update grad values
|
||||
for (auto& c : col) {
|
||||
GradientPair &p = gpair[c.index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
p += GradientPair(p.GetHess() * c.fvalue * dw, 0);
|
||||
}
|
||||
common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) {
|
||||
int ii = selector_->NextFeature(i, *model, 0, in_gpair->ConstHostVector(), p_fmat,
|
||||
param_.reg_alpha_denorm, param_.reg_lambda_denorm);
|
||||
if (ii < 0) return;
|
||||
const bst_uint fid = ii;
|
||||
auto col = page[ii];
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
for (auto &c : col) {
|
||||
const GradientPair &p = gpair[c.index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
const bst_float v = c.fvalue;
|
||||
sum_grad += p.GetGrad() * v;
|
||||
sum_hess += p.GetHess() * v * v;
|
||||
}
|
||||
});
|
||||
}
|
||||
exc.Rethrow();
|
||||
bst_float &w = (*model)[fid][gid];
|
||||
auto dw = static_cast<bst_float>(
|
||||
param_.learning_rate * CoordinateDelta(sum_grad, sum_hess, w, param_.reg_alpha_denorm,
|
||||
param_.reg_lambda_denorm));
|
||||
if (dw == 0.f) continue;
|
||||
w += dw;
|
||||
// update grad values
|
||||
for (auto &c : col) {
|
||||
GradientPair &p = gpair[c.index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
p += GradientPair(p.GetHess() * c.fvalue * dw, 0);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user