Optimizations for RNG in InitData kernel (#5522)
* optimizations for subsampling in InitData * optimizations for subsampling in InitData Co-authored-by: SHVETS, KIRILL <kirill.shvets@intel.com>
This commit is contained in:
parent
e268fb0093
commit
a2d86b8e4b
@ -536,6 +536,63 @@ bool QuantileHistMaker::Builder::UpdatePredictionCache(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void QuantileHistMaker::Builder::InitSampling(const std::vector<GradientPair>& gpair,
|
||||||
|
const DMatrix& fmat,
|
||||||
|
std::vector<size_t>* row_indices) {
|
||||||
|
const auto& info = fmat.Info();
|
||||||
|
auto& rnd = common::GlobalRandom();
|
||||||
|
std::vector<size_t>& row_indices_local = *row_indices;
|
||||||
|
size_t* p_row_indices = row_indices_local.data();
|
||||||
|
#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
|
||||||
|
std::bernoulli_distribution coin_flip(param_.subsample);
|
||||||
|
size_t j = 0;
|
||||||
|
for (size_t i = 0; i < info.num_row_; ++i) {
|
||||||
|
if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) {
|
||||||
|
p_row_indices[j++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* resize row_indices to reduce memory */
|
||||||
|
row_indices_local.resize(j);
|
||||||
|
#else
|
||||||
|
const size_t nthread = this->nthread_;
|
||||||
|
std::vector<size_t> row_offsets(nthread, 0);
|
||||||
|
/* usage of mt19937_64 give 2x speed up for subsampling */
|
||||||
|
std::vector<std::mt19937> rnds(nthread);
|
||||||
|
/* create engine for each thread */
|
||||||
|
for (std::mt19937& r : rnds) {
|
||||||
|
r = rnd;
|
||||||
|
}
|
||||||
|
const size_t discard_size = info.num_row_ / nthread;
|
||||||
|
#pragma omp parallel num_threads(nthread)
|
||||||
|
{
|
||||||
|
const size_t tid = omp_get_thread_num();
|
||||||
|
const size_t ibegin = tid * discard_size;
|
||||||
|
const size_t iend = (tid == (nthread - 1)) ?
|
||||||
|
info.num_row_ : ibegin + discard_size;
|
||||||
|
std::bernoulli_distribution coin_flip(param_.subsample);
|
||||||
|
|
||||||
|
rnds[tid].discard(2*discard_size * tid);
|
||||||
|
for (size_t i = ibegin; i < iend; ++i) {
|
||||||
|
if (gpair[i].GetHess() >= 0.0f && coin_flip(rnds[tid])) {
|
||||||
|
p_row_indices[ibegin + row_offsets[tid]++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* discard global engine */
|
||||||
|
rnd = rnds[nthread - 1];
|
||||||
|
size_t prefix_sum = row_offsets[0];
|
||||||
|
for (size_t i = 1; i < nthread; ++i) {
|
||||||
|
const size_t ibegin = i * discard_size;
|
||||||
|
|
||||||
|
for (size_t k = 0; k < row_offsets[i]; ++k) {
|
||||||
|
row_indices_local[prefix_sum + k] = row_indices_local[ibegin + k];
|
||||||
|
}
|
||||||
|
prefix_sum += row_offsets[i];
|
||||||
|
}
|
||||||
|
/* resize row_indices to reduce memory */
|
||||||
|
row_indices_local.resize(prefix_sum);
|
||||||
|
#endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
|
||||||
|
}
|
||||||
void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
|
void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
|
||||||
const std::vector<GradientPair>& gpair,
|
const std::vector<GradientPair>& gpair,
|
||||||
const DMatrix& fmat,
|
const DMatrix& fmat,
|
||||||
@ -569,22 +626,14 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
|
|||||||
|
|
||||||
std::vector<size_t>& row_indices = *row_set_collection_.Data();
|
std::vector<size_t>& row_indices = *row_set_collection_.Data();
|
||||||
row_indices.resize(info.num_row_);
|
row_indices.resize(info.num_row_);
|
||||||
auto* p_row_indices = row_indices.data();
|
size_t* p_row_indices = row_indices.data();
|
||||||
// mark subsample and build list of member rows
|
// mark subsample and build list of member rows
|
||||||
|
|
||||||
if (param_.subsample < 1.0f) {
|
if (param_.subsample < 1.0f) {
|
||||||
CHECK_EQ(param_.sampling_method, TrainParam::kUniform)
|
CHECK_EQ(param_.sampling_method, TrainParam::kUniform)
|
||||||
<< "Only uniform sampling is supported, "
|
<< "Only uniform sampling is supported, "
|
||||||
<< "gradient-based sampling is only support by GPU Hist.";
|
<< "gradient-based sampling is only support by GPU Hist.";
|
||||||
std::bernoulli_distribution coin_flip(param_.subsample);
|
InitSampling(gpair, fmat, &row_indices);
|
||||||
auto& rnd = common::GlobalRandom();
|
|
||||||
size_t j = 0;
|
|
||||||
for (size_t i = 0; i < info.num_row_; ++i) {
|
|
||||||
if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) {
|
|
||||||
p_row_indices[j++] = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
row_indices.resize(j);
|
|
||||||
} else {
|
} else {
|
||||||
MemStackAllocator<bool, 128> buff(this->nthread_);
|
MemStackAllocator<bool, 128> buff(this->nthread_);
|
||||||
bool* p_buff = buff.Get();
|
bool* p_buff = buff.Get();
|
||||||
|
|||||||
@ -202,6 +202,9 @@ class QuantileHistMaker: public TreeUpdater {
|
|||||||
const DMatrix& fmat,
|
const DMatrix& fmat,
|
||||||
const RegTree& tree);
|
const RegTree& tree);
|
||||||
|
|
||||||
|
void InitSampling(const std::vector<GradientPair>& gpair,
|
||||||
|
const DMatrix& fmat, std::vector<size_t>* row_indices);
|
||||||
|
|
||||||
void EvaluateSplits(const std::vector<ExpandEntry>& nodes_set,
|
void EvaluateSplits(const std::vector<ExpandEntry>& nodes_set,
|
||||||
const GHistIndexMatrix& gmat,
|
const GHistIndexMatrix& gmat,
|
||||||
const HistCollection& hist,
|
const HistCollection& hist,
|
||||||
|
|||||||
@ -96,6 +96,31 @@ class QuantileHistMock : public QuantileHistMaker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TestInitDataSampling(const GHistIndexMatrix& gmat,
|
||||||
|
const std::vector<GradientPair>& gpair,
|
||||||
|
DMatrix* p_fmat,
|
||||||
|
const RegTree& tree) {
|
||||||
|
const size_t nthreads = omp_get_num_threads();
|
||||||
|
// save state of global rng engine
|
||||||
|
auto initial_rnd = common::GlobalRandom();
|
||||||
|
RealImpl::InitData(gmat, gpair, *p_fmat, tree);
|
||||||
|
std::vector<size_t> row_indices_initial = *row_set_collection_.Data();
|
||||||
|
|
||||||
|
for (size_t i_nthreads = 1; i_nthreads < 4; ++i_nthreads) {
|
||||||
|
omp_set_num_threads(i_nthreads);
|
||||||
|
// return initial state of global rng engine
|
||||||
|
common::GlobalRandom() = initial_rnd;
|
||||||
|
RealImpl::InitData(gmat, gpair, *p_fmat, tree);
|
||||||
|
std::vector<size_t>& row_indices = *row_set_collection_.Data();
|
||||||
|
ASSERT_EQ(row_indices_initial.size(), row_indices.size());
|
||||||
|
for (size_t i = 0; i < row_indices_initial.size(); ++i) {
|
||||||
|
ASSERT_EQ(row_indices_initial[i], row_indices[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
omp_set_num_threads(nthreads);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void TestBuildHist(int nid,
|
void TestBuildHist(int nid,
|
||||||
const GHistIndexMatrix& gmat,
|
const GHistIndexMatrix& gmat,
|
||||||
const DMatrix& fmat,
|
const DMatrix& fmat,
|
||||||
@ -266,6 +291,20 @@ class QuantileHistMock : public QuantileHistMaker {
|
|||||||
builder_->TestInitData(gmat, gpair, dmat_.get(), tree);
|
builder_->TestInitData(gmat, gpair, dmat_.get(), tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TestInitDataSampling() {
|
||||||
|
size_t constexpr kMaxBins = 4;
|
||||||
|
common::GHistIndexMatrix gmat;
|
||||||
|
gmat.Init(dmat_.get(), kMaxBins);
|
||||||
|
|
||||||
|
RegTree tree = RegTree();
|
||||||
|
tree.param.UpdateAllowUnknown(cfg_);
|
||||||
|
|
||||||
|
std::vector<GradientPair> gpair =
|
||||||
|
{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
|
||||||
|
{0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} };
|
||||||
|
|
||||||
|
builder_->TestInitDataSampling(gmat, gpair, dmat_.get(), tree);
|
||||||
|
}
|
||||||
void TestBuildHist() {
|
void TestBuildHist() {
|
||||||
RegTree tree = RegTree();
|
RegTree tree = RegTree();
|
||||||
tree.param.UpdateAllowUnknown(cfg_);
|
tree.param.UpdateAllowUnknown(cfg_);
|
||||||
@ -292,6 +331,15 @@ TEST(QuantileHist, InitData) {
|
|||||||
maker.TestInitData();
|
maker.TestInitData();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(QuantileHist, InitDataSampling) {
|
||||||
|
const float subsample = 0.5;
|
||||||
|
std::vector<std::pair<std::string, std::string>> cfg
|
||||||
|
{{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())},
|
||||||
|
{"subsample", std::to_string(subsample)}};
|
||||||
|
QuantileHistMock maker(cfg);
|
||||||
|
maker.TestInitDataSampling();
|
||||||
|
}
|
||||||
|
|
||||||
TEST(QuantileHist, BuildHist) {
|
TEST(QuantileHist, BuildHist) {
|
||||||
// Don't enable feature grouping
|
// Don't enable feature grouping
|
||||||
std::vector<std::pair<std::string, std::string>> cfg
|
std::vector<std::pair<std::string, std::string>> cfg
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user