Distributed Fast Histogram Algorithm (#4011)
* add back train method but mark as deprecated * add back train method but mark as deprecated * add back train method but mark as deprecated * fix scalastyle error * fix scalastyle error * fix scalastyle error * fix scalastyle error * init * allow hist algo * more changes * temp * update * remove hist sync * udpate rabit * change hist size * change the histogram * update kfactor * sync per node stats * temp * update * final * code clean * update rabit * more cleanup * fix errors * fix failed tests * enforce c++11 * fix lint issue * broadcast subsampled feature correctly * revert some changes * fix lint issue * enable monotone and interaction constraints * don't specify default for monotone and interactions * update docs
This commit is contained in:
@@ -83,9 +83,9 @@ void HistCutMatrix::Init
|
||||
summary_array[i].Reserve(max_num_bins * kFactor);
|
||||
summary_array[i].SetPrune(out, max_num_bins * kFactor);
|
||||
}
|
||||
CHECK_EQ(summary_array.size(), in_sketchs->size());
|
||||
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor);
|
||||
sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());
|
||||
|
||||
this->min_val.resize(sketchs.size());
|
||||
row_ptr.push_back(0);
|
||||
for (size_t fid = 0; fid < summary_array.size(); ++fid) {
|
||||
@@ -479,14 +479,14 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
|
||||
|
||||
#pragma omp parallel for num_threads(std::min(nthread, n_blocks)) schedule(guided)
|
||||
for (bst_omp_uint iblock = 0; iblock < n_blocks; iblock++) {
|
||||
const size_t istart = iblock*block_size;
|
||||
const size_t iend = (((iblock+1)*block_size > size) ? size : istart + block_size);
|
||||
const size_t istart = iblock * block_size;
|
||||
const size_t iend = (((iblock + 1) * block_size > size) ? size : istart + block_size);
|
||||
|
||||
const size_t bin = 2*thread_init_[0]*nbins_;
|
||||
memcpy(hist_data + istart, (data + bin + istart), sizeof(double)*(iend - istart));
|
||||
const size_t bin = 2 * thread_init_[0] * nbins_;
|
||||
memcpy(hist_data + istart, (data + bin + istart), sizeof(double) * (iend - istart));
|
||||
|
||||
for (size_t i_bin_part = 1; i_bin_part < n_worked_bins; ++i_bin_part) {
|
||||
const size_t bin = 2*thread_init_[i_bin_part]*nbins_;
|
||||
const size_t bin = 2 * thread_init_[i_bin_part] * nbins_;
|
||||
for (size_t i = istart; i < iend; i++) {
|
||||
hist_data[i] += data[bin + i];
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "row_set.h"
|
||||
#include "../tree/param.h"
|
||||
#include "./quantile.h"
|
||||
#include "../include/rabit/rabit.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@@ -43,6 +44,10 @@ struct GHistEntry {
|
||||
sum_hess += e.sum_hess;
|
||||
}
|
||||
|
||||
inline static void Reduce(GHistEntry& a, const GHistEntry& b) { // NOLINT(*)
|
||||
a.Add(b);
|
||||
}
|
||||
|
||||
/*! \brief set sum to be difference of two GHistEntry's */
|
||||
inline void SetSubtract(const GHistEntry& a, const GHistEntry& b) {
|
||||
sum_grad = a.sum_grad - b.sum_grad;
|
||||
@@ -166,7 +171,7 @@ class GHistIndexBlockMatrix {
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief histogram of graident statistics for a single node.
|
||||
* \brief histogram of gradient statistics for a single node.
|
||||
* Consists of multiple GHistEntry's, each entry showing total graident statistics
|
||||
* for that particular bin
|
||||
* Uses global bin id so as to represent all features simultaneously
|
||||
@@ -254,6 +259,10 @@ class GHistBuilder {
|
||||
// construct a histogram via subtraction trick
|
||||
void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent);
|
||||
|
||||
uint32_t GetNumBins() {
|
||||
return nbins_;
|
||||
}
|
||||
|
||||
private:
|
||||
/*! \brief number of threads for parallel computation */
|
||||
size_t nthread_;
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
|
||||
#include "io.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
/*!
|
||||
|
||||
Reference in New Issue
Block a user