Extract interaction constraint from split evaluator. (#5034)

*  Extract interaction constraints from split evaluator.

The reason for doing so is mostly for model IO, where num_feature and interaction_constraints are copied in split evaluator. Also interaction constraint by itself is a feature selector, acting like column sampler and it's inefficient to bury it deep in the evaluator chain. Lastly removing one another copied parameter is a win.

*  Enable inc for approx tree method.

As now the implementation is spited up from evaluator class, it's also enabled for approx method.

*  Removing obsoleted code in colmaker.

They are never documented nor actually used in real world. Also there isn't a single test for those code blocks.

*  Unifying the types used for row and column.

As the size of input dataset is marching to billion, incorrect use of int is subject to overflow, also singed integer overflow is undefined behaviour. This PR starts the procedure for unifying used index type to unsigned integers. There's optimization that can utilize this undefined behaviour, but after some testings I don't see the optimization is beneficial to XGBoost.
This commit is contained in:
Jiaming Yuan
2019-11-14 20:11:41 +08:00
committed by GitHub
parent 886bf93ba4
commit 97abcc7ee2
45 changed files with 688 additions and 652 deletions

View File

@@ -16,6 +16,8 @@
#include <vector>
#include "xgboost/base.h"
namespace xgboost {
namespace common {
/*!
@@ -23,7 +25,7 @@ namespace common {
* \tparam ValueType type of entries in the sparse matrix
* \tparam SizeType type of the index range holder
*/
template<typename ValueType, typename SizeType = std::size_t>
template<typename ValueType, typename SizeType = bst_ulong>
struct ParallelGroupBuilder {
public:
// parallel group builder of data

View File

@@ -421,7 +421,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
#pragma omp parallel for num_threads(nthread) schedule(static)
for (bst_omp_uint idx = 0; idx < bst_omp_uint(nbins); ++idx) {
for (size_t tid = 0; tid < nthread; ++tid) {
for (int32_t tid = 0; tid < nthread; ++tid) {
hit_count[idx] += hit_count_tloc_[tid * nbins + idx];
hit_count_tloc_[tid * nbins + idx] = 0; // reset for next batch
}

View File

@@ -157,9 +157,21 @@ void HostDeviceVector<T>::SetDevice(int device) const {}
// explicit instantiations are required, as HostDeviceVector isn't header-only
template class HostDeviceVector<bst_float>;
template class HostDeviceVector<GradientPair>;
template class HostDeviceVector<int>;
template class HostDeviceVector<int32_t>; // bst_node_t
template class HostDeviceVector<Entry>;
template class HostDeviceVector<size_t>;
template class HostDeviceVector<uint64_t>; // bst_row_t
template class HostDeviceVector<uint32_t>; // bst_feature_t
#if defined(__APPLE__)
/*
* On OSX:
*
* typedef unsigned int uint32_t;
* typedef unsigned long long uint64_t;
* typedef unsigned long __darwin_size_t;
*/
template class HostDeviceVector<std::size_t>;
#endif // defined(__APPLE__)
} // namespace xgboost

View File

@@ -351,8 +351,20 @@ void HostDeviceVector<T>::Resize(size_t new_size, T v) {
// explicit instantiations are required, as HostDeviceVector isn't header-only
template class HostDeviceVector<bst_float>;
template class HostDeviceVector<GradientPair>;
template class HostDeviceVector<int>;
template class HostDeviceVector<int32_t>; // bst_node_t
template class HostDeviceVector<Entry>;
template class HostDeviceVector<size_t>;
template class HostDeviceVector<uint64_t>; // bst_row_t
template class HostDeviceVector<uint32_t>; // bst_feature_t
#if defined(__APPLE__)
/*
* On OSX:
*
* typedef unsigned int uint32_t;
* typedef unsigned long long uint64_t;
* typedef unsigned long __darwin_size_t;
*/
template class HostDeviceVector<std::size_t>;
#endif // defined(__APPLE__)
} // namespace xgboost

View File

@@ -85,20 +85,20 @@ GlobalRandomEngine& GlobalRandom(); // NOLINT(*)
*/
class ColumnSampler {
std::shared_ptr<HostDeviceVector<int>> feature_set_tree_;
std::map<int, std::shared_ptr<HostDeviceVector<int>>> feature_set_level_;
std::shared_ptr<HostDeviceVector<bst_feature_t>> feature_set_tree_;
std::map<int, std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_set_level_;
float colsample_bylevel_{1.0f};
float colsample_bytree_{1.0f};
float colsample_bynode_{1.0f};
GlobalRandomEngine rng_;
std::shared_ptr<HostDeviceVector<int>> ColSample(
std::shared_ptr<HostDeviceVector<int>> p_features, float colsample) {
std::shared_ptr<HostDeviceVector<bst_feature_t>> ColSample(
std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features, float colsample) {
if (colsample == 1.0f) return p_features;
const auto& features = p_features->HostVector();
CHECK_GT(features.size(), 0);
int n = std::max(1, static_cast<int>(colsample * features.size()));
auto p_new_features = std::make_shared<HostDeviceVector<int>>();
auto p_new_features = std::make_shared<HostDeviceVector<bst_feature_t>>();
auto& new_features = *p_new_features;
new_features.Resize(features.size());
std::copy(features.begin(), features.end(),
@@ -147,7 +147,7 @@ class ColumnSampler {
colsample_bynode_ = colsample_bynode;
if (feature_set_tree_ == nullptr) {
feature_set_tree_ = std::make_shared<HostDeviceVector<int>>();
feature_set_tree_ = std::make_shared<HostDeviceVector<bst_feature_t>>();
}
Reset();
@@ -178,7 +178,7 @@ class ColumnSampler {
* construction of each tree node, and must be called the same number of times in each
* process and with the same parameters to return the same feature set across processes.
*/
std::shared_ptr<HostDeviceVector<int>> GetFeatureSet(int depth) {
std::shared_ptr<HostDeviceVector<bst_feature_t>> GetFeatureSet(int depth) {
if (colsample_bylevel_ == 1.0f && colsample_bynode_ == 1.0f) {
return feature_set_tree_;
}