Gradient based sampling for GPU Hist (#5093)
* Implement gradient based sampling for GPU Hist tree method. * Add samplers and handle compacted page in GPU Hist.
This commit is contained in:
380
src/tree/gpu_hist/gradient_based_sampler.cu
Normal file
380
src/tree/gpu_hist/gradient_based_sampler.cu
Normal file
@@ -0,0 +1,380 @@
|
||||
/*!
|
||||
* Copyright 2019 by XGBoost Contributors
|
||||
*/
|
||||
#include <thrust/functional.h>
|
||||
#include <thrust/random.h>
|
||||
#include <thrust/transform.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "../../common/compressed_iterator.h"
|
||||
#include "../../common/random.h"
|
||||
#include "gradient_based_sampler.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
/*! \brief A functor that returns random weights. */
|
||||
class RandomWeight : public thrust::unary_function<size_t, float> {
|
||||
public:
|
||||
explicit RandomWeight(size_t seed) : seed_(seed) {}
|
||||
|
||||
XGBOOST_DEVICE float operator()(size_t i) const {
|
||||
thrust::default_random_engine rng(seed_);
|
||||
thrust::uniform_real_distribution<float> dist;
|
||||
rng.discard(i);
|
||||
return dist(rng);
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t seed_;
|
||||
};
|
||||
|
||||
/*! \brief A functor that performs a Bernoulli trial to discard a gradient pair. */
|
||||
class BernoulliTrial : public thrust::unary_function<size_t, bool> {
|
||||
public:
|
||||
BernoulliTrial(size_t seed, float p) : rnd_(seed), p_(p) {}
|
||||
|
||||
XGBOOST_DEVICE bool operator()(size_t i) const {
|
||||
return rnd_(i) > p_;
|
||||
}
|
||||
|
||||
private:
|
||||
RandomWeight rnd_;
|
||||
float p_;
|
||||
};
|
||||
|
||||
/*! \brief A functor that returns true if the gradient pair is non-zero. */
|
||||
struct IsNonZero : public thrust::unary_function<GradientPair, bool> {
|
||||
XGBOOST_DEVICE bool operator()(const GradientPair& gpair) const {
|
||||
return gpair.GetGrad() != 0 || gpair.GetHess() != 0;
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief A functor that clears the row indexes with empty gradient. */
|
||||
struct ClearEmptyRows : public thrust::binary_function<GradientPair, size_t, size_t> {
|
||||
XGBOOST_DEVICE size_t operator()(const GradientPair& gpair, size_t row_index) const {
|
||||
if (gpair.GetGrad() != 0 || gpair.GetHess() != 0) {
|
||||
return row_index;
|
||||
} else {
|
||||
return std::numeric_limits<std::size_t>::max();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief A functor that combines the gradient pair into a single float.
|
||||
*
|
||||
* The approach here is based on Minimal Variance Sampling (MVS), with lambda set to 0.1.
|
||||
*
|
||||
* \see Ibragimov, B., & Gusev, G. (2019). Minimal Variance Sampling in Stochastic Gradient
|
||||
* Boosting. In Advances in Neural Information Processing Systems (pp. 15061-15071).
|
||||
*/
|
||||
class CombineGradientPair : public thrust::unary_function<GradientPair, float> {
|
||||
public:
|
||||
XGBOOST_DEVICE float operator()(const GradientPair& gpair) const {
|
||||
return sqrtf(powf(gpair.GetGrad(), 2) + kLambda * powf(gpair.GetHess(), 2));
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr float kLambda = 0.1f;
|
||||
};
|
||||
|
||||
/*! \brief A functor that calculates the difference between the sample rate and the desired sample
|
||||
* rows, given a cumulative gradient sum.
|
||||
*/
|
||||
class SampleRateDelta : public thrust::binary_function<float, size_t, float> {
|
||||
public:
|
||||
SampleRateDelta(common::Span<float> threshold, size_t n_rows, size_t sample_rows)
|
||||
: threshold_(threshold), n_rows_(n_rows), sample_rows_(sample_rows) {}
|
||||
|
||||
XGBOOST_DEVICE float operator()(float gradient_sum, size_t row_index) const {
|
||||
float lower = threshold_[row_index];
|
||||
float upper = threshold_[row_index + 1];
|
||||
float u = gradient_sum / static_cast<float>(sample_rows_ - n_rows_ + row_index + 1);
|
||||
if (u > lower && u <= upper) {
|
||||
threshold_[row_index + 1] = u;
|
||||
return 0.0f;
|
||||
} else {
|
||||
return std::numeric_limits<float>::max();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
common::Span<float> threshold_;
|
||||
size_t n_rows_;
|
||||
size_t sample_rows_;
|
||||
};
|
||||
|
||||
/*! \brief A functor that performs Poisson sampling, and scales gradient pairs by 1/p_i. */
|
||||
class PoissonSampling : public thrust::binary_function<GradientPair, size_t, GradientPair> {
|
||||
public:
|
||||
PoissonSampling(common::Span<float> threshold, size_t threshold_index, RandomWeight rnd)
|
||||
: threshold_(threshold), threshold_index_(threshold_index), rnd_(rnd) {}
|
||||
|
||||
XGBOOST_DEVICE GradientPair operator()(const GradientPair& gpair, size_t i) {
|
||||
// If the gradient and hessian are both empty, we should never select this row.
|
||||
if (gpair.GetGrad() == 0 && gpair.GetHess() == 0) {
|
||||
return gpair;
|
||||
}
|
||||
float combined_gradient = combine_(gpair);
|
||||
float u = threshold_[threshold_index_];
|
||||
float p = combined_gradient / u;
|
||||
if (p >= 1) {
|
||||
// Always select this row.
|
||||
return gpair;
|
||||
} else {
|
||||
// Select this row randomly with probability proportional to the combined gradient.
|
||||
// Scale gpair by 1/p.
|
||||
if (rnd_(i) <= p) {
|
||||
return gpair / p;
|
||||
} else {
|
||||
return GradientPair();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
common::Span<float> threshold_;
|
||||
size_t threshold_index_;
|
||||
RandomWeight rnd_;
|
||||
CombineGradientPair combine_;
|
||||
};
|
||||
|
||||
NoSampling::NoSampling(EllpackPageImpl* page) : page_(page) {}
|
||||
|
||||
GradientBasedSample NoSampling::Sample(common::Span<GradientPair> gpair, DMatrix* dmat) {
|
||||
return {dmat->Info().num_row_, page_, gpair};
|
||||
}
|
||||
|
||||
ExternalMemoryNoSampling::ExternalMemoryNoSampling(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param)
|
||||
: batch_param_(batch_param),
|
||||
page_(new EllpackPageImpl(batch_param.gpu_id, page->matrix.info, n_rows)) {}
|
||||
|
||||
GradientBasedSample ExternalMemoryNoSampling::Sample(common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
if (!page_concatenated_) {
|
||||
// Concatenate all the external memory ELLPACK pages into a single in-memory page.
|
||||
size_t offset = 0;
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(batch_param_)) {
|
||||
auto page = batch.Impl();
|
||||
size_t num_elements = page_->Copy(batch_param_.gpu_id, page, offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
page_concatenated_ = true;
|
||||
}
|
||||
return {dmat->Info().num_row_, page_.get(), gpair};
|
||||
}
|
||||
|
||||
UniformSampling::UniformSampling(EllpackPageImpl* page, float subsample)
|
||||
: page_(page), subsample_(subsample) {}
|
||||
|
||||
GradientBasedSample UniformSampling::Sample(common::Span<GradientPair> gpair, DMatrix* dmat) {
|
||||
// Set gradient pair to 0 with p = 1 - subsample
|
||||
thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<size_t>(0),
|
||||
BernoulliTrial(common::GlobalRandom()(), subsample_),
|
||||
GradientPair());
|
||||
return {dmat->Info().num_row_, page_, gpair};
|
||||
}
|
||||
|
||||
ExternalMemoryUniformSampling::ExternalMemoryUniformSampling(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample)
|
||||
: original_page_(page), batch_param_(batch_param), subsample_(subsample) {
|
||||
ba_.Allocate(batch_param_.gpu_id, &sample_row_index_, n_rows);
|
||||
}
|
||||
|
||||
GradientBasedSample ExternalMemoryUniformSampling::Sample(common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
// Set gradient pair to 0 with p = 1 - subsample
|
||||
thrust::replace_if(dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<size_t>(0),
|
||||
BernoulliTrial(common::GlobalRandom()(), subsample_),
|
||||
GradientPair());
|
||||
|
||||
// Count the sampled rows.
|
||||
size_t sample_rows = thrust::count_if(dh::tbegin(gpair), dh::tend(gpair), IsNonZero());
|
||||
size_t n_rows = dmat->Info().num_row_;
|
||||
|
||||
// Compact gradient pairs.
|
||||
gpair_.resize(sample_rows);
|
||||
thrust::copy_if(dh::tbegin(gpair), dh::tend(gpair), gpair_.begin(), IsNonZero());
|
||||
|
||||
// Index the sample rows.
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair), dh::tbegin(sample_row_index_), IsNonZero());
|
||||
thrust::exclusive_scan(dh::tbegin(sample_row_index_), dh::tend(sample_row_index_),
|
||||
dh::tbegin(sample_row_index_));
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair),
|
||||
dh::tbegin(sample_row_index_),
|
||||
dh::tbegin(sample_row_index_),
|
||||
ClearEmptyRows());
|
||||
|
||||
// Create a new ELLPACK page with empty rows.
|
||||
page_.reset(); // Release the device memory first before reallocating
|
||||
page_.reset(new EllpackPageImpl(batch_param_.gpu_id,
|
||||
original_page_->matrix.info,
|
||||
sample_rows));
|
||||
|
||||
// Compact the ELLPACK pages into the single sample page.
|
||||
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(batch_param_)) {
|
||||
page_->Compact(batch_param_.gpu_id, batch.Impl(), sample_row_index_);
|
||||
}
|
||||
|
||||
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
|
||||
}
|
||||
|
||||
GradientBasedSampling::GradientBasedSampling(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample) : page_(page), subsample_(subsample) {
|
||||
ba_.Allocate(batch_param.gpu_id,
|
||||
&threshold_, n_rows + 1,
|
||||
&grad_sum_, n_rows);
|
||||
}
|
||||
|
||||
GradientBasedSample GradientBasedSampling::Sample(common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
size_t n_rows = dmat->Info().num_row_;
|
||||
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
|
||||
gpair, threshold_, grad_sum_, n_rows * subsample_);
|
||||
|
||||
// Perform Poisson sampling in place.
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<size_t>(0),
|
||||
dh::tbegin(gpair),
|
||||
PoissonSampling(threshold_,
|
||||
threshold_index,
|
||||
RandomWeight(common::GlobalRandom()())));
|
||||
return {n_rows, page_, gpair};
|
||||
}
|
||||
|
||||
ExternalMemoryGradientBasedSampling::ExternalMemoryGradientBasedSampling(
|
||||
EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample) : original_page_(page), batch_param_(batch_param), subsample_(subsample) {
|
||||
ba_.Allocate(batch_param.gpu_id,
|
||||
&threshold_, n_rows + 1,
|
||||
&grad_sum_, n_rows,
|
||||
&sample_row_index_, n_rows);
|
||||
}
|
||||
|
||||
GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
size_t n_rows = dmat->Info().num_row_;
|
||||
size_t threshold_index = GradientBasedSampler::CalculateThresholdIndex(
|
||||
gpair, threshold_, grad_sum_, n_rows * subsample_);
|
||||
|
||||
// Perform Poisson sampling in place.
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair),
|
||||
thrust::counting_iterator<size_t>(0),
|
||||
dh::tbegin(gpair),
|
||||
PoissonSampling(threshold_,
|
||||
threshold_index,
|
||||
RandomWeight(common::GlobalRandom()())));
|
||||
|
||||
// Count the sampled rows.
|
||||
size_t sample_rows = thrust::count_if(dh::tbegin(gpair), dh::tend(gpair), IsNonZero());
|
||||
|
||||
// Compact gradient pairs.
|
||||
gpair_.resize(sample_rows);
|
||||
thrust::copy_if(dh::tbegin(gpair), dh::tend(gpair), gpair_.begin(), IsNonZero());
|
||||
|
||||
// Index the sample rows.
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair), dh::tbegin(sample_row_index_), IsNonZero());
|
||||
thrust::exclusive_scan(dh::tbegin(sample_row_index_), dh::tend(sample_row_index_),
|
||||
dh::tbegin(sample_row_index_));
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair),
|
||||
dh::tbegin(sample_row_index_),
|
||||
dh::tbegin(sample_row_index_),
|
||||
ClearEmptyRows());
|
||||
|
||||
// Create a new ELLPACK page with empty rows.
|
||||
page_.reset(); // Release the device memory first before reallocating
|
||||
page_.reset(new EllpackPageImpl(batch_param_.gpu_id,
|
||||
original_page_->matrix.info,
|
||||
sample_rows));
|
||||
|
||||
// Compact the ELLPACK pages into the single sample page.
|
||||
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(batch_param_)) {
|
||||
page_->Compact(batch_param_.gpu_id, batch.Impl(), sample_row_index_);
|
||||
}
|
||||
|
||||
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
|
||||
}
|
||||
|
||||
GradientBasedSampler::GradientBasedSampler(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample,
|
||||
int sampling_method) {
|
||||
monitor_.Init("gradient_based_sampler");
|
||||
|
||||
bool is_sampling = subsample < 1.0;
|
||||
bool is_external_memory = page->matrix.n_rows != n_rows;
|
||||
|
||||
if (is_sampling) {
|
||||
switch (sampling_method) {
|
||||
case TrainParam::kUniform:
|
||||
if (is_external_memory) {
|
||||
strategy_.reset(new ExternalMemoryUniformSampling(page, n_rows, batch_param, subsample));
|
||||
} else {
|
||||
strategy_.reset(new UniformSampling(page, subsample));
|
||||
}
|
||||
break;
|
||||
case TrainParam::kGradientBased:
|
||||
if (is_external_memory) {
|
||||
strategy_.reset(
|
||||
new ExternalMemoryGradientBasedSampling(page, n_rows, batch_param, subsample));
|
||||
} else {
|
||||
strategy_.reset(new GradientBasedSampling(page, n_rows, batch_param, subsample));
|
||||
}
|
||||
break;
|
||||
default:LOG(FATAL) << "unknown sampling method";
|
||||
}
|
||||
} else {
|
||||
if (is_external_memory) {
|
||||
strategy_.reset(new ExternalMemoryNoSampling(page, n_rows, batch_param));
|
||||
} else {
|
||||
strategy_.reset(new NoSampling(page));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sample a DMatrix based on the given gradient pairs.
|
||||
GradientBasedSample GradientBasedSampler::Sample(common::Span<GradientPair> gpair,
|
||||
DMatrix* dmat) {
|
||||
monitor_.StartCuda("Sample");
|
||||
GradientBasedSample sample = strategy_->Sample(gpair, dmat);
|
||||
monitor_.StopCuda("Sample");
|
||||
return sample;
|
||||
}
|
||||
|
||||
size_t GradientBasedSampler::CalculateThresholdIndex(common::Span<GradientPair> gpair,
|
||||
common::Span<float> threshold,
|
||||
common::Span<float> grad_sum,
|
||||
size_t sample_rows) {
|
||||
thrust::fill(dh::tend(threshold) - 1, dh::tend(threshold), std::numeric_limits<float>::max());
|
||||
thrust::transform(dh::tbegin(gpair), dh::tend(gpair),
|
||||
dh::tbegin(threshold),
|
||||
CombineGradientPair());
|
||||
thrust::sort(dh::tbegin(threshold), dh::tend(threshold) - 1);
|
||||
thrust::inclusive_scan(dh::tbegin(threshold), dh::tend(threshold) - 1, dh::tbegin(grad_sum));
|
||||
thrust::transform(dh::tbegin(grad_sum), dh::tend(grad_sum),
|
||||
thrust::counting_iterator<size_t>(0),
|
||||
dh::tbegin(grad_sum),
|
||||
SampleRateDelta(threshold, gpair.size(), sample_rows));
|
||||
thrust::device_ptr<float> min = thrust::min_element(dh::tbegin(grad_sum), dh::tend(grad_sum));
|
||||
return thrust::distance(dh::tbegin(grad_sum), min) + 1;
|
||||
}
|
||||
|
||||
}; // namespace tree
|
||||
}; // namespace xgboost
|
||||
153
src/tree/gpu_hist/gradient_based_sampler.cuh
Normal file
153
src/tree/gpu_hist/gradient_based_sampler.cuh
Normal file
@@ -0,0 +1,153 @@
|
||||
/*!
|
||||
* Copyright 2019 by XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/span.h>
|
||||
|
||||
#include "../../common/device_helpers.cuh"
|
||||
#include "../../data/ellpack_page.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
struct GradientBasedSample {
|
||||
/*!\brief Number of sampled rows. */
|
||||
size_t sample_rows;
|
||||
/*!\brief Sampled rows in ELLPACK format. */
|
||||
EllpackPageImpl* page;
|
||||
/*!\brief Gradient pairs for the sampled rows. */
|
||||
common::Span<GradientPair> gpair;
|
||||
};
|
||||
|
||||
class SamplingStrategy {
|
||||
public:
|
||||
/*! \brief Sample from a DMatrix based on the given gradient pairs. */
|
||||
virtual GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) = 0;
|
||||
};
|
||||
|
||||
/*! \brief No sampling in in-memory mode. */
|
||||
class NoSampling : public SamplingStrategy {
|
||||
public:
|
||||
explicit NoSampling(EllpackPageImpl* page);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
EllpackPageImpl* page_;
|
||||
};
|
||||
|
||||
/*! \brief No sampling in external memory mode. */
|
||||
class ExternalMemoryNoSampling : public SamplingStrategy {
|
||||
public:
|
||||
ExternalMemoryNoSampling(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
BatchParam batch_param_;
|
||||
std::unique_ptr<EllpackPageImpl> page_;
|
||||
bool page_concatenated_{false};
|
||||
};
|
||||
|
||||
/*! \brief Uniform sampling in in-memory mode. */
|
||||
class UniformSampling : public SamplingStrategy {
|
||||
public:
|
||||
UniformSampling(EllpackPageImpl* page, float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
EllpackPageImpl* page_;
|
||||
float subsample_;
|
||||
};
|
||||
|
||||
/*! \brief No sampling in external memory mode. */
|
||||
class ExternalMemoryUniformSampling : public SamplingStrategy {
|
||||
public:
|
||||
ExternalMemoryUniformSampling(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
dh::BulkAllocator ba_;
|
||||
EllpackPageImpl* original_page_;
|
||||
BatchParam batch_param_;
|
||||
float subsample_;
|
||||
std::unique_ptr<EllpackPageImpl> page_;
|
||||
dh::device_vector<GradientPair> gpair_{};
|
||||
common::Span<size_t> sample_row_index_;
|
||||
};
|
||||
|
||||
/*! \brief Gradient-based sampling in in-memory mode.. */
|
||||
class GradientBasedSampling : public SamplingStrategy {
|
||||
public:
|
||||
GradientBasedSampling(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
EllpackPageImpl* page_;
|
||||
float subsample_;
|
||||
dh::BulkAllocator ba_;
|
||||
common::Span<float> threshold_;
|
||||
common::Span<float> grad_sum_;
|
||||
};
|
||||
|
||||
/*! \brief Gradient-based sampling in external memory mode.. */
|
||||
class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
|
||||
public:
|
||||
ExternalMemoryGradientBasedSampling(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample);
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat) override;
|
||||
|
||||
private:
|
||||
dh::BulkAllocator ba_;
|
||||
EllpackPageImpl* original_page_;
|
||||
BatchParam batch_param_;
|
||||
float subsample_;
|
||||
common::Span<float> threshold_;
|
||||
common::Span<float> grad_sum_;
|
||||
std::unique_ptr<EllpackPageImpl> page_;
|
||||
dh::device_vector<GradientPair> gpair_;
|
||||
common::Span<size_t> sample_row_index_;
|
||||
};
|
||||
|
||||
/*! \brief Draw a sample of rows from a DMatrix.
|
||||
*
|
||||
* \see Ke, G., Meng, Q., Finley, T., Wang, T., Chen, W., Ma, W., ... & Liu, T. Y. (2017).
|
||||
* Lightgbm: A highly efficient gradient boosting decision tree. In Advances in Neural Information
|
||||
* Processing Systems (pp. 3146-3154).
|
||||
* \see Zhu, R. (2016). Gradient-based sampling: An adaptive importance sampling for least-squares.
|
||||
* In Advances in Neural Information Processing Systems (pp. 406-414).
|
||||
* \see Ohlsson, E. (1998). Sequential poisson sampling. Journal of official Statistics, 14(2), 149.
|
||||
*/
|
||||
class GradientBasedSampler {
|
||||
public:
|
||||
GradientBasedSampler(EllpackPageImpl* page,
|
||||
size_t n_rows,
|
||||
const BatchParam& batch_param,
|
||||
float subsample,
|
||||
int sampling_method);
|
||||
|
||||
/*! \brief Sample from a DMatrix based on the given gradient pairs. */
|
||||
GradientBasedSample Sample(common::Span<GradientPair> gpair, DMatrix* dmat);
|
||||
|
||||
/*! \brief Calculate the threshold used to normalize sampling probabilities. */
|
||||
static size_t CalculateThresholdIndex(common::Span<GradientPair> gpair,
|
||||
common::Span<float> threshold,
|
||||
common::Span<float> grad_sum,
|
||||
size_t sample_rows);
|
||||
|
||||
private:
|
||||
common::Monitor monitor_;
|
||||
std::unique_ptr<SamplingStrategy> strategy_;
|
||||
};
|
||||
}; // namespace tree
|
||||
}; // namespace xgboost
|
||||
@@ -125,7 +125,6 @@ class RowPartitioner {
|
||||
idx += segment.begin;
|
||||
RowIndexT ridx = d_ridx[idx];
|
||||
bst_node_t new_position = op(ridx); // new node id
|
||||
if (new_position == kIgnoredTreePosition) return;
|
||||
KERNEL_CHECK(new_position == left_nidx || new_position == right_nidx);
|
||||
AtomicIncrement(d_left_count, new_position == left_nidx);
|
||||
d_position[idx] = new_position;
|
||||
|
||||
Reference in New Issue
Block a user