[GPU-Plugin] Fix gpu_hist to allow matrices with more than just 2^{32} elements. Also fixed CPU hist algorithm. (#2518)
This commit is contained in:
committed by
Rory Mitchell
parent
c85bf9859e
commit
ca7fc9fda3
@@ -56,30 +56,30 @@ struct HistCutUnit {
|
||||
/*! \brief the index pointer of each histunit */
|
||||
const bst_float* cut;
|
||||
/*! \brief number of cutting point, containing the maximum point */
|
||||
size_t size;
|
||||
uint32_t size;
|
||||
// default constructor
|
||||
HistCutUnit() {}
|
||||
// constructor
|
||||
HistCutUnit(const bst_float* cut, unsigned size)
|
||||
HistCutUnit(const bst_float* cut, uint32_t size)
|
||||
: cut(cut), size(size) {}
|
||||
};
|
||||
|
||||
/*! \brief cut configuration for all the features */
|
||||
struct HistCutMatrix {
|
||||
/*! \brief actual unit pointer */
|
||||
std::vector<unsigned> row_ptr;
|
||||
/*! \brief unit pointer to rows by element position */
|
||||
std::vector<uint32_t> row_ptr;
|
||||
/*! \brief minimum value of each feature */
|
||||
std::vector<bst_float> min_val;
|
||||
/*! \brief the cut field */
|
||||
std::vector<bst_float> cut;
|
||||
/*! \brief Get histogram bound for fid */
|
||||
inline HistCutUnit operator[](unsigned fid) const {
|
||||
inline HistCutUnit operator[](bst_uint fid) const {
|
||||
return HistCutUnit(dmlc::BeginPtr(cut) + row_ptr[fid],
|
||||
row_ptr[fid + 1] - row_ptr[fid]);
|
||||
}
|
||||
// create histogram cut matrix given statistics from data
|
||||
// using approximate quantile sketch approach
|
||||
void Init(DMatrix* p_fmat, size_t max_num_bins);
|
||||
void Init(DMatrix* p_fmat, uint32_t max_num_bins);
|
||||
};
|
||||
|
||||
|
||||
@@ -89,11 +89,11 @@ struct HistCutMatrix {
|
||||
*/
|
||||
struct GHistIndexRow {
|
||||
/*! \brief The index of the histogram */
|
||||
const unsigned* index;
|
||||
const uint32_t* index;
|
||||
/*! \brief The size of the histogram */
|
||||
unsigned size;
|
||||
size_t size;
|
||||
GHistIndexRow() {}
|
||||
GHistIndexRow(const unsigned* index, unsigned size)
|
||||
GHistIndexRow(const uint32_t* index, size_t size)
|
||||
: index(index), size(size) {}
|
||||
};
|
||||
|
||||
@@ -103,21 +103,21 @@ struct GHistIndexRow {
|
||||
* This is a global histogram index.
|
||||
*/
|
||||
struct GHistIndexMatrix {
|
||||
/*! \brief row pointer */
|
||||
std::vector<unsigned> row_ptr;
|
||||
/*! \brief row pointer to rows by element position */
|
||||
std::vector<size_t> row_ptr;
|
||||
/*! \brief The index data */
|
||||
std::vector<unsigned> index;
|
||||
std::vector<uint32_t> index;
|
||||
/*! \brief hit count of each index */
|
||||
std::vector<unsigned> hit_count;
|
||||
std::vector<size_t> hit_count;
|
||||
/*! \brief The corresponding cuts */
|
||||
const HistCutMatrix* cut;
|
||||
// Create a global histogram matrix, given cut
|
||||
void Init(DMatrix* p_fmat);
|
||||
// get i-th row
|
||||
inline GHistIndexRow operator[](bst_uint i) const {
|
||||
inline GHistIndexRow operator[](size_t i) const {
|
||||
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
|
||||
}
|
||||
inline void GetFeatureCounts(bst_uint* counts) const {
|
||||
inline void GetFeatureCounts(size_t* counts) const {
|
||||
const unsigned nfeature = cut->row_ptr.size() - 1;
|
||||
for (unsigned fid = 0; fid < nfeature; ++fid) {
|
||||
const unsigned ibegin = cut->row_ptr[fid];
|
||||
@@ -129,18 +129,18 @@ struct GHistIndexMatrix {
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<unsigned> hit_count_tloc_;
|
||||
std::vector<size_t> hit_count_tloc_;
|
||||
};
|
||||
|
||||
struct GHistIndexBlock {
|
||||
const unsigned* row_ptr;
|
||||
const unsigned* index;
|
||||
const size_t* row_ptr;
|
||||
const uint32_t* index;
|
||||
|
||||
inline GHistIndexBlock(const unsigned* row_ptr, const unsigned* index)
|
||||
inline GHistIndexBlock(const size_t* row_ptr, const uint32_t* index)
|
||||
: row_ptr(row_ptr), index(index) {}
|
||||
|
||||
// get i-th row
|
||||
inline GHistIndexRow operator[](bst_uint i) const {
|
||||
inline GHistIndexRow operator[](size_t i) const {
|
||||
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
|
||||
}
|
||||
};
|
||||
@@ -153,23 +153,23 @@ class GHistIndexBlockMatrix {
|
||||
const ColumnMatrix& colmat,
|
||||
const FastHistParam& param);
|
||||
|
||||
inline GHistIndexBlock operator[](bst_uint i) const {
|
||||
inline GHistIndexBlock operator[](size_t i) const {
|
||||
return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin);
|
||||
}
|
||||
|
||||
inline unsigned GetNumBlock() const {
|
||||
inline size_t GetNumBlock() const {
|
||||
return blocks.size();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<unsigned> row_ptr;
|
||||
std::vector<unsigned> index;
|
||||
std::vector<size_t> row_ptr;
|
||||
std::vector<uint32_t> index;
|
||||
const HistCutMatrix* cut;
|
||||
struct Block {
|
||||
const unsigned* row_ptr_begin;
|
||||
const unsigned* row_ptr_end;
|
||||
const unsigned* index_begin;
|
||||
const unsigned* index_end;
|
||||
const size_t* row_ptr_begin;
|
||||
const size_t* row_ptr_end;
|
||||
const uint32_t* index_begin;
|
||||
const uint32_t* index_end;
|
||||
};
|
||||
std::vector<Block> blocks;
|
||||
};
|
||||
@@ -184,10 +184,10 @@ struct GHistRow {
|
||||
/*! \brief base pointer to first entry */
|
||||
GHistEntry* begin;
|
||||
/*! \brief number of entries */
|
||||
unsigned size;
|
||||
uint32_t size;
|
||||
|
||||
GHistRow() {}
|
||||
GHistRow(GHistEntry* begin, unsigned size)
|
||||
GHistRow(GHistEntry* begin, uint32_t size)
|
||||
: begin(begin), size(size) {}
|
||||
};
|
||||
|
||||
@@ -198,19 +198,19 @@ class HistCollection {
|
||||
public:
|
||||
// access histogram for i-th node
|
||||
inline GHistRow operator[](bst_uint nid) const {
|
||||
const size_t kMax = std::numeric_limits<size_t>::max();
|
||||
const uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||
CHECK_NE(row_ptr_[nid], kMax);
|
||||
return GHistRow(const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_);
|
||||
}
|
||||
|
||||
// have we computed a histogram for i-th node?
|
||||
inline bool RowExists(bst_uint nid) const {
|
||||
const size_t kMax = std::numeric_limits<size_t>::max();
|
||||
const uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||
return (nid < row_ptr_.size() && row_ptr_[nid] != kMax);
|
||||
}
|
||||
|
||||
// initialize histogram collection
|
||||
inline void Init(size_t nbins) {
|
||||
inline void Init(uint32_t nbins) {
|
||||
nbins_ = nbins;
|
||||
row_ptr_.clear();
|
||||
data_.clear();
|
||||
@@ -218,7 +218,7 @@ class HistCollection {
|
||||
|
||||
// create an empty histogram for i-th node
|
||||
inline void AddHistRow(bst_uint nid) {
|
||||
const size_t kMax = std::numeric_limits<size_t>::max();
|
||||
const uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||
if (nid >= row_ptr_.size()) {
|
||||
row_ptr_.resize(nid + 1, kMax);
|
||||
}
|
||||
@@ -230,12 +230,12 @@ class HistCollection {
|
||||
|
||||
private:
|
||||
/*! \brief number of all bins over all features */
|
||||
size_t nbins_;
|
||||
uint32_t nbins_;
|
||||
|
||||
std::vector<GHistEntry> data_;
|
||||
|
||||
/*! \brief row_ptr_[nid] locates bin for historgram of node nid */
|
||||
std::vector<size_t> row_ptr_;
|
||||
std::vector<uint32_t> row_ptr_;
|
||||
};
|
||||
|
||||
/*!
|
||||
@@ -244,7 +244,7 @@ class HistCollection {
|
||||
class GHistBuilder {
|
||||
public:
|
||||
// initialize builder
|
||||
inline void Init(size_t nthread, size_t nbins) {
|
||||
inline void Init(size_t nthread, uint32_t nbins) {
|
||||
nthread_ = nthread;
|
||||
nbins_ = nbins;
|
||||
}
|
||||
@@ -268,7 +268,7 @@ class GHistBuilder {
|
||||
/*! \brief number of threads for parallel computation */
|
||||
size_t nthread_;
|
||||
/*! \brief number of all bins over all features */
|
||||
size_t nbins_;
|
||||
uint32_t nbins_;
|
||||
std::vector<GHistEntry> data_;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user