From 97abcc7ee2a67194916fb1be17e55a29ca11ea57 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 14 Nov 2019 20:11:41 +0800 Subject: [PATCH] Extract interaction constraint from split evaluator. (#5034) * Extract interaction constraints from split evaluator. The reason for doing so is mostly for model IO, where num_feature and interaction_constraints are copied in split evaluator. Also interaction constraint by itself is a feature selector, acting like column sampler and it's inefficient to bury it deep in the evaluator chain. Lastly removing one another copied parameter is a win. * Enable inc for approx tree method. As now the implementation is spited up from evaluator class, it's also enabled for approx method. * Removing obsoleted code in colmaker. They are never documented nor actually used in real world. Also there isn't a single test for those code blocks. * Unifying the types used for row and column. As the size of input dataset is marching to billion, incorrect use of int is subject to overflow, also singed integer overflow is undefined behaviour. This PR starts the procedure for unifying used index type to unsigned integers. There's optimization that can utilize this undefined behaviour, but after some testings I don't see the optimization is beneficial to XGBoost. --- amalgamation/xgboost-all0.cc | 1 + doc/parameter.rst | 2 + .../feature_interaction_constraint.rst | 6 +- include/xgboost/base.h | 25 +- include/xgboost/data.h | 8 +- include/xgboost/predictor.h | 2 +- src/c_api/c_api.cc | 16 +- src/common/group_data.h | 4 +- src/common/hist_util.cc | 2 +- src/common/host_device_vector.cc | 16 +- src/common/host_device_vector.cu | 16 +- src/common/random.h | 14 +- src/data/data.cc | 13 +- src/data/simple_csr_source.cu | 16 +- src/predictor/gpu_predictor.cu | 6 +- src/tree/constraints.cc | 105 +++++++++ src/tree/constraints.cu | 22 +- src/tree/constraints.cuh | 18 +- src/tree/constraints.h | 67 ++++++ src/tree/gpu_hist/row_partitioner.cu | 40 ++-- src/tree/gpu_hist/row_partitioner.cuh | 34 +-- src/tree/param.h | 4 +- src/tree/split_evaluator.cc | 214 ------------------ src/tree/split_evaluator.h | 5 - src/tree/updater_basemaker-inl.h | 21 +- src/tree/updater_colmaker.cc | 172 ++++---------- src/tree/updater_gpu_hist.cu | 6 +- src/tree/updater_histmaker.cc | 132 ++++++----- src/tree/updater_quantile_hist.cc | 19 +- src/tree/updater_quantile_hist.h | 10 +- tests/cpp/common/test_gpu_hist_util.cu | 2 +- tests/cpp/common/test_random.cc | 2 +- tests/cpp/data/test_data.cc | 2 +- tests/cpp/data/test_metainfo.cc | 2 +- tests/cpp/helpers.h | 2 +- tests/cpp/test_main.cc | 1 + .../cpp/tree/gpu_hist/test_row_partitioner.cu | 2 +- tests/cpp/tree/test_constraints.cc | 60 +++++ tests/cpp/tree/test_constraints.cu | 86 +++---- tests/cpp/tree/test_gpu_hist.cu | 2 +- tests/cpp/tree/test_histmaker.cc | 69 ++++++ tests/cpp/tree/test_quantile_hist.cc | 8 +- tests/cpp/tree/test_split_evaluator.cc | 57 ----- .../test_gpu_interaction_constraints.py | 4 +- tests/python/test_interaction_constraints.py | 25 +- 45 files changed, 688 insertions(+), 652 deletions(-) create mode 100644 src/tree/constraints.cc create mode 100644 src/tree/constraints.h create mode 100644 tests/cpp/tree/test_constraints.cc create mode 100644 tests/cpp/tree/test_histmaker.cc delete mode 100644 tests/cpp/tree/test_split_evaluator.cc diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc index 6dea48d5e..3f80c5648 100644 --- a/amalgamation/xgboost-all0.cc +++ b/amalgamation/xgboost-all0.cc @@ -53,6 +53,7 @@ #include "../src/tree/updater_sync.cc" #include "../src/tree/updater_histmaker.cc" #include "../src/tree/updater_skmaker.cc" +#include "../src/tree/constraints.cc" // linear #include "../src/linear/linear_updater.cc" diff --git a/doc/parameter.rst b/doc/parameter.rst index d0ccacba6..3ad297b5a 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -142,6 +142,8 @@ Parameters for Tree Booster - ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting. - ``grow_local_histmaker``: based on local histogram counting. - ``grow_skmaker``: uses the approximate sketching algorithm. + - ``grow_quantile_histmaker``: Grow tree using quantized histogram. + - ``grow_gpu_hist``: Grow tree with GPU. - ``sync``: synchronizes trees in all distributed nodes. - ``refresh``: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed. - ``prune``: prunes the splits where loss < min_split_loss (or gamma). diff --git a/doc/tutorials/feature_interaction_constraint.rst b/doc/tutorials/feature_interaction_constraint.rst index ea4d252ca..205524025 100644 --- a/doc/tutorials/feature_interaction_constraint.rst +++ b/doc/tutorials/feature_interaction_constraint.rst @@ -172,9 +172,9 @@ parameter: early_stopping_rounds = 10) **Choice of tree construction algorithm**. To use feature interaction constraints, be sure -to set the ``tree_method`` parameter to one of the following: ``exact``, ``hist`` or -``gpu_hist``. Support for ``gpu_hist`` is added after (excluding) version 0.90. - +to set the ``tree_method`` parameter to one of the following: ``exact``, ``hist``, +``approx`` or ``gpu_hist``. Support for ``gpu_hist`` and ``approx`` is added only in +1.0.0. ************** Advanced topic diff --git a/include/xgboost/base.h b/include/xgboost/base.h index 1411ac0dc..1a4df84c0 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -100,17 +100,30 @@ /*! \brief namespace of xgboost*/ namespace xgboost { -/*! - * \brief unsigned integer type used in boost, - * used for feature index and row index. - */ + +/*! \brief unsigned integer type used for feature index. */ using bst_uint = uint32_t; // NOLINT +/*! \brief integer type. */ using bst_int = int32_t; // NOLINT -/*! \brief long integers */ -typedef uint64_t bst_ulong; // NOLINT(*) +/*! \brief unsigned long integers */ +using bst_ulong = uint64_t; /*! \brief float type, used for storing statistics */ using bst_float = float; // NOLINT +/*! \brief Type for data column (feature) index. */ +using bst_feature_t = uint32_t; // NOLINT +/*! \breif Type for data row index. + * + * Be careful `std::size_t' is implementation-defined. Meaning that the binary + * representation of DMatrix might not be portable across platform. Booster model should + * be portable as parameters are floating points. + */ +using bst_row_t = std::size_t; // NOLINT +/*! \brief Type for tree node index. */ +using bst_node_t = int32_t; // NOLINT +/*! \brief Type for ranking group index. */ +using bst_group_t = uint32_t; // NOLINT + namespace detail { /*! \brief Implementation of gradient statistics pair. Template specialisation * may be used to overload different gradients types e.g. low precision, high diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 7fa83472b..f949c338a 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -57,7 +57,7 @@ class MetaInfo { * \brief the index of begin and end of a group * needed when the learning task is ranking. */ - std::vector group_ptr_; + std::vector group_ptr_; /*! \brief weights of each instance, optional */ HostDeviceVector weights_; /*! @@ -136,7 +136,7 @@ class MetaInfo { /*! \brief Element from a sparse vector */ struct Entry { /*! \brief feature index */ - bst_uint index; + bst_feature_t index; /*! \brief feature value */ bst_float fvalue; /*! \brief default constructor */ @@ -146,7 +146,7 @@ struct Entry { * \param index The feature or row index. * \param fvalue The feature value. */ - Entry(bst_uint index, bst_float fvalue) : index(index), fvalue(fvalue) {} + Entry(bst_feature_t index, bst_float fvalue) : index(index), fvalue(fvalue) {} /*! \brief reversely compare feature values */ inline static bool CmpValue(const Entry& a, const Entry& b) { return a.fvalue < b.fvalue; @@ -174,7 +174,7 @@ struct BatchParam { class SparsePage { public: // Offset for each row. - HostDeviceVector offset; + HostDeviceVector offset; /*! \brief the data of the segments */ HostDeviceVector data; diff --git a/include/xgboost/predictor.h b/include/xgboost/predictor.h index 93316c0f8..7952ba42c 100644 --- a/include/xgboost/predictor.h +++ b/include/xgboost/predictor.h @@ -21,7 +21,7 @@ namespace xgboost { class TreeUpdater; namespace gbm { -class GBTreeModel; +struct GBTreeModel; } // namespace gbm } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 081cc5395..1f8965320 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -267,7 +267,9 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr, data::SimpleCSRSource& mat = *source; auto& offset_vec = mat.page_.offset.HostVector(); auto& data_vec = mat.page_.data.HostVector(); - common::ParallelGroupBuilder builder(&offset_vec, &data_vec); + common::ParallelGroupBuilder< + Entry, std::remove_reference::type::value_type> + builder(&offset_vec, &data_vec); builder.InitBudget(0, nthread); size_t ncol = nindptr - 1; // NOLINT(*) #pragma omp parallel for schedule(static) @@ -362,19 +364,20 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data, API_END(); } -void PrefixSum(size_t *x, size_t N) { - size_t *suma; +template +void PrefixSum(T *x, size_t N) { + std::vector suma; #pragma omp parallel { const int ithread = omp_get_thread_num(); const int nthreads = omp_get_num_threads(); #pragma omp single { - suma = new size_t[nthreads+1]; + suma.resize(nthreads+1); suma[0] = 0; } - size_t sum = 0; - size_t offset = 0; + T sum = 0; + T offset = 0; #pragma omp for schedule(static) for (omp_ulong i = 0; i < N; i++) { sum += x[i]; @@ -390,7 +393,6 @@ void PrefixSum(size_t *x, size_t N) { x[i] += offset; } } - delete[] suma; } XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, // NOLINT diff --git a/src/common/group_data.h b/src/common/group_data.h index f43398932..81a2d999e 100644 --- a/src/common/group_data.h +++ b/src/common/group_data.h @@ -16,6 +16,8 @@ #include +#include "xgboost/base.h" + namespace xgboost { namespace common { /*! @@ -23,7 +25,7 @@ namespace common { * \tparam ValueType type of entries in the sparse matrix * \tparam SizeType type of the index range holder */ -template +template struct ParallelGroupBuilder { public: // parallel group builder of data diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 4ead606f6..f1f94f762 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -421,7 +421,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { #pragma omp parallel for num_threads(nthread) schedule(static) for (bst_omp_uint idx = 0; idx < bst_omp_uint(nbins); ++idx) { - for (size_t tid = 0; tid < nthread; ++tid) { + for (int32_t tid = 0; tid < nthread; ++tid) { hit_count[idx] += hit_count_tloc_[tid * nbins + idx]; hit_count_tloc_[tid * nbins + idx] = 0; // reset for next batch } diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc index 1e03064ec..db31b2fe9 100644 --- a/src/common/host_device_vector.cc +++ b/src/common/host_device_vector.cc @@ -157,9 +157,21 @@ void HostDeviceVector::SetDevice(int device) const {} // explicit instantiations are required, as HostDeviceVector isn't header-only template class HostDeviceVector; template class HostDeviceVector; -template class HostDeviceVector; +template class HostDeviceVector; // bst_node_t template class HostDeviceVector; -template class HostDeviceVector; +template class HostDeviceVector; // bst_row_t +template class HostDeviceVector; // bst_feature_t + +#if defined(__APPLE__) +/* + * On OSX: + * + * typedef unsigned int uint32_t; + * typedef unsigned long long uint64_t; + * typedef unsigned long __darwin_size_t; + */ +template class HostDeviceVector; +#endif // defined(__APPLE__) } // namespace xgboost diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index a85194aed..b27d811f2 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -351,8 +351,20 @@ void HostDeviceVector::Resize(size_t new_size, T v) { // explicit instantiations are required, as HostDeviceVector isn't header-only template class HostDeviceVector; template class HostDeviceVector; -template class HostDeviceVector; +template class HostDeviceVector; // bst_node_t template class HostDeviceVector; -template class HostDeviceVector; +template class HostDeviceVector; // bst_row_t +template class HostDeviceVector; // bst_feature_t + +#if defined(__APPLE__) +/* + * On OSX: + * + * typedef unsigned int uint32_t; + * typedef unsigned long long uint64_t; + * typedef unsigned long __darwin_size_t; + */ +template class HostDeviceVector; +#endif // defined(__APPLE__) } // namespace xgboost diff --git a/src/common/random.h b/src/common/random.h index a4749f2ae..0d1c08bbe 100644 --- a/src/common/random.h +++ b/src/common/random.h @@ -85,20 +85,20 @@ GlobalRandomEngine& GlobalRandom(); // NOLINT(*) */ class ColumnSampler { - std::shared_ptr> feature_set_tree_; - std::map>> feature_set_level_; + std::shared_ptr> feature_set_tree_; + std::map>> feature_set_level_; float colsample_bylevel_{1.0f}; float colsample_bytree_{1.0f}; float colsample_bynode_{1.0f}; GlobalRandomEngine rng_; - std::shared_ptr> ColSample( - std::shared_ptr> p_features, float colsample) { + std::shared_ptr> ColSample( + std::shared_ptr> p_features, float colsample) { if (colsample == 1.0f) return p_features; const auto& features = p_features->HostVector(); CHECK_GT(features.size(), 0); int n = std::max(1, static_cast(colsample * features.size())); - auto p_new_features = std::make_shared>(); + auto p_new_features = std::make_shared>(); auto& new_features = *p_new_features; new_features.Resize(features.size()); std::copy(features.begin(), features.end(), @@ -147,7 +147,7 @@ class ColumnSampler { colsample_bynode_ = colsample_bynode; if (feature_set_tree_ == nullptr) { - feature_set_tree_ = std::make_shared>(); + feature_set_tree_ = std::make_shared>(); } Reset(); @@ -178,7 +178,7 @@ class ColumnSampler { * construction of each tree node, and must be called the same number of times in each * process and with the same parameters to return the same feature set across processes. */ - std::shared_ptr> GetFeatureSet(int depth) { + std::shared_ptr> GetFeatureSet(int depth) { if (colsample_bylevel_ == 1.0f && colsample_bynode_ == 1.0f) { return feature_set_tree_; } diff --git a/src/data/data.cc b/src/data/data.cc index 21b098162..0bf181ca9 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -229,7 +229,7 @@ DMatrix* DMatrix::Load(const std::string& uri, std::unique_ptr > parser( dmlc::Parser::Create(fname.c_str(), partid, npart, file_format.c_str())); - DMatrix* dmat; + DMatrix* dmat {nullptr}; try { dmat = DMatrix::Create(parser.get(), cache_file, page_size); @@ -253,9 +253,8 @@ DMatrix* DMatrix::Load(const std::string& uri, << "Choosing default parser in dmlc-core. " << "Consider providing a uri parameter like: filename?format=csv"; } - - LOG(FATAL) << "Encountered parser error:\n" << e.what(); } + LOG(FATAL) << "Encountered parser error:\n" << e.what(); } if (!silent) { @@ -361,8 +360,8 @@ DMatrix* DMatrix::Create(std::unique_ptr>&& source, namespace xgboost { SparsePage SparsePage::GetTranspose(int num_columns) const { SparsePage transpose; - common::ParallelGroupBuilder builder(&transpose.offset.HostVector(), - &transpose.data.HostVector()); + common::ParallelGroupBuilder builder(&transpose.offset.HostVector(), + &transpose.data.HostVector()); const int nthread = omp_get_max_threads(); builder.InitBudget(num_columns, nthread); long batch_size = static_cast(this->Size()); // NOLINT(*) @@ -424,7 +423,7 @@ void SparsePage::Push(const dmlc::RowBlock& batch) { void SparsePage::PushCSC(const SparsePage &batch) { std::vector& self_data = data.HostVector(); - std::vector& self_offset = offset.HostVector(); + std::vector& self_offset = offset.HostVector(); auto const& other_data = batch.data.ConstHostVector(); auto const& other_offset = batch.offset.ConstHostVector(); @@ -442,7 +441,7 @@ void SparsePage::PushCSC(const SparsePage &batch) { return; } - std::vector offset(other_offset.size()); + std::vector offset(other_offset.size()); offset[0] = 0; std::vector data(self_data.size() + other_data.size()); diff --git a/src/data/simple_csr_source.cu b/src/data/simple_csr_source.cu index bc7ce1cd5..af068a639 100644 --- a/src/data/simple_csr_source.cu +++ b/src/data/simple_csr_source.cu @@ -29,7 +29,7 @@ namespace data { template __global__ void CountValidKernel(Columnar const column, bool has_missing, float missing, - int32_t* flag, common::Span offsets) { + int32_t* flag, common::Span offsets) { auto const tid = threadIdx.x + blockDim.x * blockIdx.x; bool const missing_is_nan = common::CheckNAN(missing); @@ -59,7 +59,7 @@ __global__ void CountValidKernel(Columnar const column, template __device__ void AssignValue(T fvalue, int32_t colid, - common::Span out_offsets, common::Span out_data) { + common::Span out_offsets, common::Span out_data) { auto const tid = threadIdx.x + blockDim.x * blockIdx.x; int32_t oid = out_offsets[tid]; out_data[oid].fvalue = fvalue; @@ -70,7 +70,7 @@ __device__ void AssignValue(T fvalue, int32_t colid, template __global__ void CreateCSRKernel(Columnar const column, int32_t colid, bool has_missing, float missing, - common::Span offsets, common::Span out_data) { + common::Span offsets, common::Span out_data) { auto const tid = threadIdx.x + blockDim.x * blockIdx.x; if (column.size <= tid) { return; @@ -98,7 +98,7 @@ __global__ void CreateCSRKernel(Columnar const column, template void CountValid(std::vector const& j_columns, uint32_t column_id, bool has_missing, float missing, - HostDeviceVector* out_offset, + HostDeviceVector* out_offset, dh::caching_device_vector* out_d_flag, uint32_t* out_n_rows) { uint32_t constexpr kThreads = 256; @@ -121,7 +121,7 @@ void CountValid(std::vector const& j_columns, uint32_t column_id, CHECK_EQ(out_offset->Size(), n_rows + 1) << "All columns should have same number of rows."; - common::Span s_offsets = out_offset->DeviceSpan(); + common::Span s_offsets = out_offset->DeviceSpan(); uint32_t const kBlocks = common::DivRoundUp(n_rows, kThreads); dh::LaunchKernel {kBlocks, kThreads} ( @@ -135,7 +135,7 @@ void CountValid(std::vector const& j_columns, uint32_t column_id, template void CreateCSR(std::vector const& j_columns, uint32_t column_id, uint32_t n_rows, bool has_missing, float missing, - dh::device_vector* tmp_offset, common::Span s_data) { + dh::device_vector* tmp_offset, common::Span s_data) { uint32_t constexpr kThreads = 256; auto const& j_column = j_columns[column_id]; auto const& column_obj = get(j_column); @@ -174,13 +174,13 @@ void SimpleCSRSource::FromDeviceColumnar(std::vector const& columns, info.num_row_ = n_rows; auto s_offsets = this->page_.offset.DeviceSpan(); - thrust::device_ptr p_offsets(s_offsets.data()); + thrust::device_ptr p_offsets(s_offsets.data()); CHECK_GE(s_offsets.size(), n_rows + 1); thrust::inclusive_scan(p_offsets, p_offsets + n_rows + 1, p_offsets); // Created for building csr matrix, where we need to change index after processing each // column. - dh::device_vector tmp_offset(this->page_.offset.Size()); + dh::device_vector tmp_offset(this->page_.offset.Size()); dh::safe_cuda(cudaMemcpy(tmp_offset.data().get(), s_offsets.data(), s_offsets.size_bytes(), cudaMemcpyDeviceToDevice)); diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index e80ffe457..6e5566903 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -80,13 +80,13 @@ struct DevicePredictionNode { struct ElementLoader { bool use_shared; - common::Span d_row_ptr; + common::Span d_row_ptr; common::Span d_data; int num_features; float* smem; size_t entry_start; - __device__ ElementLoader(bool use_shared, common::Span row_ptr, + __device__ ElementLoader(bool use_shared, common::Span row_ptr, common::Span entry, int num_features, float* smem, int num_rows, size_t entry_start) : use_shared(use_shared), @@ -166,7 +166,7 @@ __global__ void PredictKernel(common::Span d_nodes, common::Span d_out_predictions, common::Span d_tree_segments, common::Span d_tree_group, - common::Span d_row_ptr, + common::Span d_row_ptr, common::Span d_data, size_t tree_begin, size_t tree_end, size_t num_features, size_t num_rows, size_t entry_start, diff --git a/src/tree/constraints.cc b/src/tree/constraints.cc new file mode 100644 index 000000000..5e5d440f7 --- /dev/null +++ b/src/tree/constraints.cc @@ -0,0 +1,105 @@ +/*! + * Copyright 2018-2019 by Contributors + */ +#include +#include +#include + +#include "xgboost/span.h" +#include "constraints.h" +#include "param.h" + +namespace xgboost { +void FeatureInteractionConstraintHost::Configure(tree::TrainParam const& param, + bst_feature_t const n_features) { + if (param.interaction_constraints.empty()) { + enabled_ = !param.interaction_constraints.empty(); + return; // short-circuit if no constraint is specified + } + enabled_ = true; + + this->interaction_constraint_str_ = param.interaction_constraints; + this->n_features_ = n_features; + this->Reset(); +} + +void FeatureInteractionConstraintHost::Reset() { + if (!enabled_) { + return; + } + // Parse interaction constraints + std::istringstream iss(this->interaction_constraint_str_); + dmlc::JSONReader reader(&iss); + // Read std::vector> first and then + // convert to std::vector> + std::vector> tmp; + try { + reader.Read(&tmp); + } catch (dmlc::Error const& e) { + LOG(FATAL) << "Failed to parse feature interaction constraint:\n" + << this->interaction_constraint_str_ << "\n" + << "With error:\n" << e.what(); + } + for (const auto& e : tmp) { + interaction_constraints_.emplace_back(e.begin(), e.end()); + } + + // Initialise interaction constraints record with all variables permitted for the first node + node_constraints_.clear(); + node_constraints_.resize(1, std::unordered_set()); + node_constraints_[0].reserve(n_features_); + for (bst_feature_t i = 0; i < n_features_; ++i) { + node_constraints_[0].insert(i); + } + + // Initialise splits record + splits_.clear(); + splits_.resize(1, std::unordered_set()); +} + +void FeatureInteractionConstraintHost::SplitImpl( + bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) { + bst_node_t newsize = std::max(left_id, right_id) + 1; + + // Record previous splits for child nodes + auto feature_splits = splits_[node_id]; // fid history of current node + feature_splits.insert(feature_id); // add feature of current node + splits_.resize(newsize); + splits_[left_id] = feature_splits; + splits_[right_id] = feature_splits; + + // Resize constraints record, initialise all features to be not permitted for new nodes + CHECK_NE(newsize, 0); + node_constraints_.resize(newsize, std::unordered_set()); + + // Permit features used in previous splits + for (bst_feature_t fid : feature_splits) { + node_constraints_[left_id].insert(fid); + node_constraints_[right_id].insert(fid); + } + + // Loop across specified interactions in constraints + for (const auto &constraint : interaction_constraints_) { + // flags whether the specified interaction is still relevant + bst_uint flag = 1; + + // Test relevance of specified interaction by checking all previous + // features are included + for (bst_uint checkvar : feature_splits) { + if (constraint.count(checkvar) == 0) { + flag = 0; + break; // interaction is not relevant due to unmet constraint + } + } + + // If interaction is still relevant, permit all other features in the + // interaction + if (flag == 1) { + for (bst_uint k : constraint) { + node_constraints_[left_id].insert(k); + node_constraints_[right_id].insert(k); + } + } + } +} +} // namespace xgboost diff --git a/src/tree/constraints.cu b/src/tree/constraints.cu index 472c2f0ec..a8d726814 100644 --- a/src/tree/constraints.cu +++ b/src/tree/constraints.cu @@ -173,7 +173,7 @@ void FeatureInteractionConstraint::ClearBuffers() { output_buffer_bits_, input_buffer_bits_); } -common::Span FeatureInteractionConstraint::QueryNode(int32_t node_id) { +common::Span FeatureInteractionConstraint::QueryNode(int32_t node_id) { if (!has_constraint_) { return {}; } CHECK_LT(node_id, s_node_constraints_.size()); @@ -184,7 +184,7 @@ common::Span FeatureInteractionConstraint::QueryNode(int32_t node_id) { auto p_result_buffer = result_buffer_.data(); LBitField64 node_constraints = s_node_constraints_[node_id]; - thrust::device_ptr const out_end = thrust::copy_if( + thrust::device_ptr const out_end = thrust::copy_if( thrust::device, begin, end, p_result_buffer, @@ -197,7 +197,7 @@ common::Span FeatureInteractionConstraint::QueryNode(int32_t node_id) { return {s_result_buffer_.data(), s_result_buffer_.data() + n_available}; } -__global__ void SetInputBufferKernel(common::Span feature_list_input, +__global__ void SetInputBufferKernel(common::Span feature_list_input, LBitField64 result_buffer_input) { uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < feature_list_input.size()) { @@ -212,8 +212,8 @@ __global__ void QueryFeatureListKernel(LBitField64 node_constraints, result_buffer_output &= result_buffer_input; } -common::Span FeatureInteractionConstraint::Query( - common::Span feature_list, int32_t nid) { +common::Span FeatureInteractionConstraint::Query( + common::Span feature_list, int32_t nid) { if (!has_constraint_ || nid == 0) { return feature_list; } @@ -238,7 +238,7 @@ common::Span FeatureInteractionConstraint::Query( LBitField64 local_result_buffer = output_buffer_bits_; - thrust::device_ptr const out_end = thrust::copy_if( + thrust::device_ptr const out_end = thrust::copy_if( thrust::device, begin, end, result_buffer_.data(), @@ -248,7 +248,7 @@ common::Span FeatureInteractionConstraint::Query( }); size_t const n_available = std::distance(result_buffer_.data(), out_end); - common::Span result = + common::Span result = {s_result_buffer_.data(), s_result_buffer_.data() + n_available}; return result; } @@ -258,12 +258,12 @@ common::Span FeatureInteractionConstraint::Query( __global__ void RestoreFeatureListFromSetsKernel( LBitField64 feature_buffer, - int32_t fid, + bst_feature_t fid, common::Span feature_interactions, common::Span feature_interactions_ptr, // of size n interaction set + 1 - common::Span interactions_list, - common::Span interactions_list_ptr) { + common::Span interactions_list, + common::Span interactions_list_ptr) { auto const tid_x = threadIdx.x + blockIdx.x * blockDim.x; auto const tid_y = threadIdx.y + blockIdx.y * blockDim.y; // painful mapping: fid -> sets related to it -> features related to sets. @@ -312,7 +312,7 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature, } void FeatureInteractionConstraint::Split( - int32_t node_id, int32_t feature_id, int32_t left_id, int32_t right_id) { + bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) { if (!has_constraint_) { return; } CHECK_NE(node_id, left_id) << " Split node: " << node_id << " and its left child: " diff --git a/src/tree/constraints.cuh b/src/tree/constraints.cuh index 2bb95d30c..3e982a00f 100644 --- a/src/tree/constraints.cuh +++ b/src/tree/constraints.cuh @@ -108,10 +108,10 @@ struct FeatureInteractionConstraint { * * d_sets_ptr_: |0, 1, 3, 4| */ - dh::device_vector d_sets_; - common::Span s_sets_; - dh::device_vector d_sets_ptr_; - common::Span s_sets_ptr_; + dh::device_vector d_sets_; + common::Span s_sets_; + dh::device_vector d_sets_ptr_; + common::Span s_sets_ptr_; // Allowed features attached to each node, have n_nodes bitfields, // each of size n_features. @@ -120,8 +120,8 @@ struct FeatureInteractionConstraint { common::Span s_node_constraints_; // buffer storing return feature list from Query, of size n_features. - dh::device_vector result_buffer_; - common::Span s_result_buffer_; + dh::device_vector result_buffer_; + common::Span s_result_buffer_; // Temp buffers, one bit for each possible feature. dh::device_vector output_buffer_bits_storage_; @@ -149,7 +149,7 @@ struct FeatureInteractionConstraint { /*! \brief Reset before constructing a new tree. */ void Reset(); /*! \brief Return a list of features given node id */ - common::Span QueryNode(int32_t nid); + common::Span QueryNode(int32_t nid); /*! * \brief Return a list of selected features from given feature_list and node id. * @@ -159,9 +159,9 @@ struct FeatureInteractionConstraint { * \return A list of features picked from `feature_list' that conform to constraints in * node. */ - common::Span Query(common::Span feature_list, int32_t nid); + common::Span Query(common::Span feature_list, int32_t nid); /*! \brief Apply split for node_id. */ - void Split(int32_t node_id, int32_t feature_id, int32_t left_id, int32_t right_id); + void Split(bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id); }; } // namespace xgboost diff --git a/src/tree/constraints.h b/src/tree/constraints.h new file mode 100644 index 000000000..222d763b0 --- /dev/null +++ b/src/tree/constraints.h @@ -0,0 +1,67 @@ +/*! + * Copyright 2018-2019 by Contributors + */ +#ifndef XGBOOST_TREE_CONSTRAINTS_H_ +#define XGBOOST_TREE_CONSTRAINTS_H_ + +#include +#include +#include + +#include "xgboost/span.h" +#include "xgboost/base.h" + +#include "param.h" + +namespace xgboost { +/*! + * \brief Feature interaction constraint implementation for CPU tree updaters. + * + * The interface is similiar to the one for GPU Hist. + */ +class FeatureInteractionConstraintHost { + protected: + // interaction_constraints_[constraint_id] contains a single interaction + // constraint, which specifies a group of feature IDs that can interact + // with each other + std::vector< std::unordered_set > interaction_constraints_; + // int_cont_[nid] contains the set of all feature IDs that are allowed to + // be used for a split at node nid + std::vector< std::unordered_set > node_constraints_; + // splits_[nid] contains the set of all feature IDs that have been used for + // splits in node nid and its parents + std::vector< std::unordered_set > splits_; + + std::vector return_buffer; + // string passed by user. + std::string interaction_constraint_str_; + // number of features in DMatrix/Booster + bst_feature_t n_features_; + bool enabled_{false}; + + void SplitImpl(int32_t node_id, bst_feature_t feature_id, bst_node_t left_id, + bst_node_t right_id); + + public: + FeatureInteractionConstraintHost() = default; + void Split(int32_t node_id, bst_feature_t feature_id, bst_node_t left_id, + bst_node_t right_id) { + if (!enabled_) { + return; + } else { + this->SplitImpl(node_id, feature_id, left_id, right_id); + } + } + + bool Query(bst_node_t nid, bst_feature_t fid) const { + if (!enabled_) { return true; } + return node_constraints_.at(nid).find(fid) != node_constraints_.at(nid).cend(); + } + + void Reset(); + + void Configure(tree::TrainParam const& param, bst_feature_t const n_features); +}; +} // namespace xgboost + +#endif // XGBOOST_TREE_CONSTRAINTS_H_ diff --git a/src/tree/gpu_hist/row_partitioner.cu b/src/tree/gpu_hist/row_partitioner.cu index b1635613c..176740f12 100644 --- a/src/tree/gpu_hist/row_partitioner.cu +++ b/src/tree/gpu_hist/row_partitioner.cu @@ -10,23 +10,22 @@ namespace xgboost { namespace tree { struct IndicateLeftTransform { - RowPartitioner::TreePositionT left_nidx; - explicit IndicateLeftTransform(RowPartitioner::TreePositionT left_nidx) + bst_node_t left_nidx; + explicit IndicateLeftTransform(bst_node_t left_nidx) : left_nidx(left_nidx) {} - __host__ __device__ __forceinline__ int operator()( - const RowPartitioner::TreePositionT& x) const { + __host__ __device__ __forceinline__ int operator()(const bst_node_t& x) const { return x == left_nidx ? 1 : 0; } }; /* * position: Position of rows belonged to current split node. */ -void RowPartitioner::SortPosition(common::Span position, - common::Span position_out, +void RowPartitioner::SortPosition(common::Span position, + common::Span position_out, common::Span ridx, common::Span ridx_out, - TreePositionT left_nidx, - TreePositionT right_nidx, + bst_node_t left_nidx, + bst_node_t right_nidx, int64_t* d_left_count, cudaStream_t stream) { // radix sort over 1 bit, see: // https://developer.nvidia.com/gpugems/GPUGems3/gpugems3_ch39.html @@ -53,8 +52,8 @@ void RowPartitioner::SortPosition(common::Span position, IndicateLeftTransform is_left(left_nidx); // an iterator that given a old position returns whether it belongs to left or right // node. - cub::TransformInputIterator + cub::TransformInputIterator in_itr(d_position_in, is_left); dh::DiscardLambdaItr out_itr(write_results); size_t temp_storage_bytes = 0; @@ -73,7 +72,7 @@ RowPartitioner::RowPartitioner(int device_idx, size_t num_rows) position_a.resize(num_rows); position_b.resize(num_rows); ridx = dh::DoubleBuffer{&ridx_a, &ridx_b}; - position = dh::DoubleBuffer{&position_a, &position_b}; + position = dh::DoubleBuffer{&position_a, &position_b}; ridx_segments.emplace_back(Segment(0, num_rows)); thrust::sequence( @@ -97,7 +96,7 @@ RowPartitioner::~RowPartitioner() { } common::Span RowPartitioner::GetRows( - TreePositionT nidx) { + bst_node_t nidx) { auto segment = ridx_segments.at(nidx); // Return empty span here as a valid result // Will error if we try to construct a span from a pointer with size 0 @@ -111,36 +110,35 @@ common::Span RowPartitioner::GetRows() { return ridx.CurrentSpan(); } -common::Span -RowPartitioner::GetPosition() { +common::Span RowPartitioner::GetPosition() { return position.CurrentSpan(); } std::vector RowPartitioner::GetRowsHost( - TreePositionT nidx) { + bst_node_t nidx) { auto span = GetRows(nidx); std::vector rows(span.size()); dh::CopyDeviceSpanToVector(&rows, span); return rows; } -std::vector RowPartitioner::GetPositionHost() { +std::vector RowPartitioner::GetPositionHost() { auto span = GetPosition(); - std::vector position(span.size()); + std::vector position(span.size()); dh::CopyDeviceSpanToVector(&position, span); return position; } void RowPartitioner::SortPositionAndCopy(const Segment& segment, - TreePositionT left_nidx, - TreePositionT right_nidx, + bst_node_t left_nidx, + bst_node_t right_nidx, int64_t* d_left_count, cudaStream_t stream) { SortPosition( // position_in - common::Span(position.Current() + segment.begin, + common::Span(position.Current() + segment.begin, segment.Size()), // position_out - common::Span(position.other() + segment.begin, + common::Span(position.other() + segment.begin, segment.Size()), // row index in common::Span(ridx.Current() + segment.begin, segment.Size()), diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh index 7cfe04d51..4b6dcfb60 100644 --- a/src/tree/gpu_hist/row_partitioner.cuh +++ b/src/tree/gpu_hist/row_partitioner.cuh @@ -2,6 +2,7 @@ * Copyright 2017-2019 XGBoost contributors */ #pragma once +#include "xgboost/base.h" #include "../../common/device_helpers.cuh" namespace xgboost { @@ -30,7 +31,6 @@ __forceinline__ __device__ void AtomicIncrement(int64_t* d_count, bool increment * partition training rows into different leaf nodes. */ class RowPartitioner { public: - using TreePositionT = int32_t; using RowIndexT = bst_uint; struct Segment; @@ -47,8 +47,8 @@ class RowPartitioner { std::vector ridx_segments; dh::caching_device_vector ridx_a; dh::caching_device_vector ridx_b; - dh::caching_device_vector position_a; - dh::caching_device_vector position_b; + dh::caching_device_vector position_a; + dh::caching_device_vector position_b; /*! \brief mapping for node id -> rows. * This looks like: * node id | 1 | 2 | @@ -56,7 +56,7 @@ class RowPartitioner { */ dh::DoubleBuffer ridx; /*! \brief mapping for row -> node id. */ - dh::DoubleBuffer position; + dh::DoubleBuffer position; dh::caching_device_vector left_counts; // Useful to keep a bunch of zeroed memory for sort position std::vector streams; @@ -70,7 +70,7 @@ class RowPartitioner { /** * \brief Gets the row indices of training instances in a given node. */ - common::Span GetRows(TreePositionT nidx); + common::Span GetRows(bst_node_t nidx); /** * \brief Gets all training rows in the set. @@ -80,17 +80,17 @@ class RowPartitioner { /** * \brief Gets the tree position of all training instances. */ - common::Span GetPosition(); + common::Span GetPosition(); /** * \brief Convenience method for testing */ - std::vector GetRowsHost(TreePositionT nidx); + std::vector GetRowsHost(bst_node_t nidx); /** * \brief Convenience method for testing */ - std::vector GetPositionHost(); + std::vector GetPositionHost(); /** * \brief Updates the tree position for set of training instances being split @@ -105,8 +105,8 @@ class RowPartitioner { * argument and return the new position for this training instance. */ template - void UpdatePosition(TreePositionT nidx, TreePositionT left_nidx, - TreePositionT right_nidx, UpdatePositionOpT op) { + void UpdatePosition(bst_node_t nidx, bst_node_t left_nidx, + bst_node_t right_nidx, UpdatePositionOpT op) { dh::safe_cuda(cudaSetDevice(device_idx)); Segment segment = ridx_segments.at(nidx); // rows belongs to node nidx auto d_ridx = ridx.CurrentSpan(); @@ -123,7 +123,7 @@ class RowPartitioner { // LaunchN starts from zero, so we restore the row index by adding segment.begin idx += segment.begin; RowIndexT ridx = d_ridx[idx]; - TreePositionT new_position = op(ridx); // new node id + bst_node_t new_position = op(ridx); // new node id KERNEL_CHECK(new_position == left_nidx || new_position == right_nidx); AtomicIncrement(d_left_count, new_position == left_nidx); d_position[idx] = new_position; @@ -172,16 +172,16 @@ class RowPartitioner { * segments. Based on a single pass of exclusive scan, uses iterators to * redirect inputs and outputs. */ - void SortPosition(common::Span position, - common::Span position_out, + void SortPosition(common::Span position, + common::Span position_out, common::Span ridx, - common::Span ridx_out, TreePositionT left_nidx, - TreePositionT right_nidx, int64_t* d_left_count, + common::Span ridx_out, bst_node_t left_nidx, + bst_node_t right_nidx, int64_t* d_left_count, cudaStream_t stream = nullptr); /*! \brief Sort row indices according to position. */ - void SortPositionAndCopy(const Segment& segment, TreePositionT left_nidx, - TreePositionT right_nidx, int64_t* d_left_count, + void SortPositionAndCopy(const Segment& segment, bst_node_t left_nidx, + bst_node_t right_nidx, int64_t* d_left_count, cudaStream_t stream); /** \brief Used to demarcate a contiguous set of row indices associated with * some tree node. */ diff --git a/src/tree/param.h b/src/tree/param.h index 2cebb3eec..ded36a313 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -194,7 +194,7 @@ struct TrainParam : public XGBoostParameter { "indices of features that are allowed to interact with each other." "See tutorial for more information"); DMLC_DECLARE_FIELD(split_evaluator) - .set_default("elastic_net,monotonic,interaction") + .set_default("elastic_net,monotonic") .describe("The criteria to use for ranking splits"); // ------ From cpu quantile histogram -------. @@ -395,7 +395,7 @@ struct XGBOOST_ALIGNAS(16) GradStats { */ struct SplitEntry { /*! \brief loss change after split this node */ - bst_float loss_chg{0.0f}; + bst_float loss_chg {0.0f}; /*! \brief split index */ unsigned sindex{0}; bst_float split_value{0.0f}; diff --git a/src/tree/split_evaluator.cc b/src/tree/split_evaluator.cc index ca3aeda71..b42c77645 100644 --- a/src/tree/split_evaluator.cc +++ b/src/tree/split_evaluator.cc @@ -64,10 +64,6 @@ bst_float SplitEvaluator::ComputeSplitScore(bst_uint nodeid, return ComputeSplitScore(nodeid, featureid, left_stats, right_stats, left_weight, right_weight); } -bool SplitEvaluator::CheckFeatureConstraint(bst_uint nodeid, bst_uint featureid) const { - return true; -} - //! \brief Encapsulates the parameters for ElasticNet struct ElasticNetParams : public XGBoostParameter { bst_float reg_lambda; @@ -159,10 +155,6 @@ class ElasticNet final : public SplitEvaluator { return w; } - bool CheckFeatureConstraint(bst_uint nodeid, bst_uint featureid) const override { - return true; - } - private: ElasticNetParams params_; @@ -307,10 +299,6 @@ class MonotonicConstraint final : public SplitEvaluator { } } - bool CheckFeatureConstraint(bst_uint nodeid, bst_uint featureid) const override { - return true; - } - private: MonotonicConstraintParams params_; std::unique_ptr inner_; @@ -332,207 +320,5 @@ XGBOOST_REGISTER_SPLIT_EVALUATOR(MonotonicConstraint, "monotonic") .set_body([](std::unique_ptr inner) { return new MonotonicConstraint(std::move(inner)); }); - -/*! \brief Encapsulates the parameters required by the InteractionConstraint - split evaluator -*/ -struct InteractionConstraintParams - : public XGBoostParameter { - std::string interaction_constraints; - bst_uint num_feature; - - DMLC_DECLARE_PARAMETER(InteractionConstraintParams) { - DMLC_DECLARE_FIELD(interaction_constraints) - .set_default("") - .describe("Constraints for interaction representing permitted interactions." - "The constraints must be specified in the form of a nest list," - "e.g. [[0, 1], [2, 3, 4]], where each inner list is a group of" - "indices of features that are allowed to interact with each other." - "See tutorial for more information"); - DMLC_DECLARE_FIELD(num_feature) - .describe("Number of total features used"); - } -}; - -DMLC_REGISTER_PARAMETER(InteractionConstraintParams); - -/*! \brief Enforces that the tree is monotonically increasing/decreasing with respect to a user specified set of - features. -*/ -class InteractionConstraint final : public SplitEvaluator { - public: - explicit InteractionConstraint(std::unique_ptr inner) { - if (!inner) { - LOG(FATAL) << "InteractionConstraint must be given an inner evaluator"; - } - inner_ = std::move(inner); - } - - void Init(const Args& args) - override { - inner_->Init(args); - params_.UpdateAllowUnknown(args); - Reset(); - } - - void Reset() override { - if (params_.interaction_constraints.empty()) { - return; // short-circuit if no constraint is specified - } - - // Parse interaction constraints - std::istringstream iss(params_.interaction_constraints); - dmlc::JSONReader reader(&iss); - // Read std::vector> first and then - // convert to std::vector> - std::vector> tmp; - try { - reader.Read(&tmp); - } catch (dmlc::Error const& e) { - LOG(FATAL) << "Failed to parse feature interaction constraint:\n" - << params_.interaction_constraints << "\n" - << "With error:\n" << e.what(); - } - for (const auto& e : tmp) { - interaction_constraints_.emplace_back(e.begin(), e.end()); - } - - // Initialise interaction constraints record with all variables permitted for the first node - node_constraints_.clear(); - node_constraints_.resize(1, std::unordered_set()); - node_constraints_[0].reserve(params_.num_feature); - for (bst_uint i = 0; i < params_.num_feature; ++i) { - node_constraints_[0].insert(i); - } - - // Initialise splits record - splits_.clear(); - splits_.resize(1, std::unordered_set()); - } - - SplitEvaluator* GetHostClone() const override { - if (params_.interaction_constraints.empty()) { - // No interaction constraints specified, just return a clone of inner - return inner_->GetHostClone(); - } else { - auto c = new InteractionConstraint( - std::unique_ptr(inner_->GetHostClone())); - c->params_ = this->params_; - c->Reset(); - return c; - } - } - - bst_float ComputeSplitScore(bst_uint nodeid, - bst_uint featureid, - const GradStats& left_stats, - const GradStats& right_stats, - bst_float left_weight, - bst_float right_weight) const override { - // Return negative infinity score if feature is not permitted by interaction constraints - if (!CheckInteractionConstraint(featureid, nodeid)) { - return -std::numeric_limits::infinity(); - } - - // Otherwise, get score from inner evaluator - bst_float score = inner_->ComputeSplitScore( - nodeid, featureid, left_stats, right_stats, left_weight, right_weight); - return score; - } - - bst_float ComputeScore(bst_uint parentID, const GradStats& stats, bst_float weight) - const override { - return inner_->ComputeScore(parentID, stats, weight); - } - - bst_float ComputeWeight(bst_uint parentID, const GradStats& stats) - const override { - return inner_->ComputeWeight(parentID, stats); - } - - void AddSplit(bst_uint nodeid, - bst_uint leftid, - bst_uint rightid, - bst_uint featureid, - bst_float leftweight, - bst_float rightweight) override { - inner_->AddSplit(nodeid, leftid, rightid, featureid, leftweight, rightweight); - - if (params_.interaction_constraints.empty()) { - return; // short-circuit if no constraint is specified - } - bst_uint newsize = std::max(leftid, rightid) + 1; - - // Record previous splits for child nodes - std::unordered_set feature_splits = splits_[nodeid]; // fid history of current node - feature_splits.insert(featureid); // add feature of current node - splits_.resize(newsize); - splits_[leftid] = feature_splits; - splits_[rightid] = feature_splits; - - // Resize constraints record, initialise all features to be not permitted for new nodes - node_constraints_.resize(newsize, std::unordered_set()); - - // Permit features used in previous splits - for (bst_uint fid : feature_splits) { - node_constraints_[leftid].insert(fid); - node_constraints_[rightid].insert(fid); - } - - // Loop across specified interactions in constraints - for (const auto& constraint : interaction_constraints_) { - bst_uint flag = 1; // flags whether the specified interaction is still relevant - - // Test relevance of specified interaction by checking all previous features are included - for (bst_uint checkvar : feature_splits) { - if (constraint.count(checkvar) == 0) { - flag = 0; - break; // interaction is not relevant due to unmet constraint - } - } - - // If interaction is still relevant, permit all other features in the interaction - if (flag == 1) { - for (bst_uint k : constraint) { - node_constraints_[leftid].insert(k); - node_constraints_[rightid].insert(k); - } - } - } - } - - bool CheckFeatureConstraint(bst_uint nodeid, bst_uint featureid) const override { - return CheckInteractionConstraint(featureid, nodeid); - } - - private: - InteractionConstraintParams params_; - std::unique_ptr inner_; - // interaction_constraints_[constraint_id] contains a single interaction - // constraint, which specifies a group of feature IDs that can interact - // with each other - std::vector< std::unordered_set > interaction_constraints_; - // int_cont_[nid] contains the set of all feature IDs that are allowed to - // be used for a split at node nid - std::vector< std::unordered_set > node_constraints_; - // splits_[nid] contains the set of all feature IDs that have been used for - // splits in node nid and its parents - std::vector< std::unordered_set > splits_; - - // Check interaction constraints. Returns true if a given feature ID is - // permissible in a given node; returns false otherwise - inline bool CheckInteractionConstraint(bst_uint featureid, bst_uint nodeid) const { - // short-circuit if no constraint is specified - return (params_.interaction_constraints.empty() - || node_constraints_.at(nodeid).count(featureid) > 0); - } -}; - -XGBOOST_REGISTER_SPLIT_EVALUATOR(InteractionConstraint, "interaction") -.describe("Enforces interaction constraints on tree features") -.set_body([](std::unique_ptr inner) { - return new InteractionConstraint(std::move(inner)); - }); - } // namespace tree } // namespace xgboost diff --git a/src/tree/split_evaluator.h b/src/tree/split_evaluator.h index c7281470a..905e5ed87 100644 --- a/src/tree/split_evaluator.h +++ b/src/tree/split_evaluator.h @@ -68,11 +68,6 @@ class SplitEvaluator { bst_uint featureid, bst_float leftweight, bst_float rightweight); - - // Check whether a given feature is feasible for a given node. - // Use this function to narrow the search space for split candidates - virtual bool CheckFeatureConstraint(bst_uint nodeid, - bst_uint featureid) const = 0; }; struct SplitEvaluatorReg diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h index 2d432c407..700f6e07a 100644 --- a/src/tree/updater_basemaker-inl.h +++ b/src/tree/updater_basemaker-inl.h @@ -9,15 +9,18 @@ #include -#include -#include + #include #include #include #include #include -#include "./param.h" +#include "xgboost/base.h" +#include "xgboost/tree_updater.h" +#include "param.h" +#include "constraints.h" + #include "../common/io.h" #include "../common/random.h" #include "../common/quantile.h" @@ -75,11 +78,12 @@ class BaseMaker: public TreeUpdater { return 2; } } - inline bst_float MaxValue(bst_uint fid) const { + bst_float MaxValue(bst_uint fid) const { return fminmax_[fid *2 + 1]; } - inline void SampleCol(float p, std::vector *p_findex) const { - std::vector &findex = *p_findex; + + void SampleCol(float p, std::vector *p_findex) const { + std::vector &findex = *p_findex; findex.clear(); for (size_t i = 0; i < fminmax_.size(); i += 2) { const auto fid = static_cast(i / 2); @@ -161,6 +165,7 @@ class BaseMaker: public TreeUpdater { } this->UpdateNode2WorkIndex(tree); } + this->interaction_constraints_.Configure(param_, fmat.Info().num_col_); } /*! \brief update queue expand add in new leaves */ inline void UpdateQueueExpand(const RegTree &tree) { @@ -215,7 +220,7 @@ class BaseMaker: public TreeUpdater { // so that they are ignored in future statistics collection const auto ndata = static_cast(p_fmat->Info().num_row_); - #pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static) for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) { const int nid = this->DecodePosition(ridx); if (tree[nid].IsLeaf()) { @@ -462,6 +467,8 @@ class BaseMaker: public TreeUpdater { */ std::vector position_; + FeatureInteractionConstraintHost interaction_constraints_; + private: inline void UpdateNode2WorkIndex(const RegTree &tree) { // update the node2workindex diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 853480d02..3039fdd77 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2014 by Contributors + * Copyright 2014-2019 by Contributors * \file updater_colmaker.cc * \brief use columnwise update to construct a tree * \author Tianqi Chen @@ -13,6 +13,7 @@ #include #include "param.h" +#include "constraints.h" #include "../common/random.h" #include "../common/bitmap.h" #include "split_evaluator.h" @@ -41,11 +42,13 @@ class ColMaker: public TreeUpdater { // rescale learning rate according to size of trees float lr = param_.learning_rate; param_.learning_rate = lr / trees.size(); + interaction_constraints_.Configure(param_, dmat->Info().num_row_); // build tree for (auto tree : trees) { Builder builder( param_, - std::unique_ptr(spliteval_->GetHostClone())); + std::unique_ptr(spliteval_->GetHostClone()), + interaction_constraints_); builder.Update(gpair->ConstHostVector(), dmat, tree); } param_.learning_rate = lr; @@ -56,6 +59,8 @@ class ColMaker: public TreeUpdater { TrainParam param_; // SplitEvaluator that will be cloned for each Builder std::unique_ptr spliteval_; + + FeatureInteractionConstraintHost interaction_constraints_; // data structure /*! \brief per thread x per node entry to store tmp data */ struct ThreadEntry { @@ -89,9 +94,11 @@ class ColMaker: public TreeUpdater { public: // constructor explicit Builder(const TrainParam& param, - std::unique_ptr spliteval) + std::unique_ptr spliteval, + FeatureInteractionConstraintHost _interaction_constraints) : param_(param), nthread_(omp_get_max_threads()), - spliteval_(std::move(spliteval)) {} + spliteval_(std::move(spliteval)), + interaction_constraints_{std::move(_interaction_constraints)} {} // update one tree, growing virtual void Update(const std::vector& gpair, DMatrix* p_fmat, @@ -116,6 +123,7 @@ class ColMaker: public TreeUpdater { snode_[nid].best.SplitIndex(), snode_[cleft].weight, snode_[cright].weight); + interaction_constraints_.Split(nid, snode_[nid].best.SplitIndex(), cleft, cright); } qexpand_ = newnodes; // if nothing left to be expand, break @@ -247,12 +255,13 @@ class ColMaker: public TreeUpdater { // this function does not support nested functions inline void ParallelFindSplit(const SparsePage::Inst &col, bst_uint fid, - DMatrix *p_fmat, + DMatrix *p_fmat, const std::vector &gpair) { // TODO(tqchen): double check stats order. const bool ind = col.size() != 0 && col[0].fvalue == col[col.size() - 1].fvalue; - bool need_forward = param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind); - bool need_backward = param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind); + auto col_density = p_fmat->GetColDensity(fid); + bool need_forward = param_.NeedForwardSearch(col_density, ind); + bool need_backward = param_.NeedBackwardSearch(col_density, ind); const std::vector &qexpand = qexpand_; #pragma omp parallel { @@ -391,7 +400,7 @@ class ColMaker: public TreeUpdater { // update enumeration solution inline void UpdateEnumeration(int nid, GradientPair gstats, bst_float fvalue, int d_step, bst_uint fid, - GradStats &c, std::vector &temp) { // NOLINT(*) + GradStats &c, std::vector &temp) const { // NOLINT(*) // get the statistics of nid ThreadEntry &e = temp[nid]; // test if first hit, this is fine, because we set 0 during init @@ -404,7 +413,7 @@ class ColMaker: public TreeUpdater { e.stats.sum_hess >= param_.min_child_weight) { c.SetSubstract(snode_[nid].stats, e.stats); if (c.sum_hess >= param_.min_child_weight) { - bst_float loss_chg; + bst_float loss_chg {0}; if (d_step == -1) { loss_chg = static_cast( spliteval_->ComputeSplitScore(nid, fid, c, e.stats) - @@ -438,12 +447,13 @@ class ColMaker: public TreeUpdater { } } // same as EnumerateSplit, with cacheline prefetch optimization - inline void EnumerateSplitCacheOpt(const Entry *begin, - const Entry *end, - int d_step, - bst_uint fid, - const std::vector &gpair, - std::vector &temp) { // NOLINT(*) + void EnumerateSplit(const Entry *begin, + const Entry *end, + int d_step, + bst_uint fid, + const std::vector &gpair, + std::vector &temp) const { // NOLINT(*) + CHECK(param_.cache_opt) << "Support for `cache_opt' is removed in 1.0.0"; const std::vector &qexpand = qexpand_; // clear all the temp statistics for (auto nid : qexpand) { @@ -474,12 +484,13 @@ class ColMaker: public TreeUpdater { } for (i = 0, p = it; i < kBuffer; ++i, p += d_step) { const int nid = buf_position[i]; - if (nid < 0) continue; + if (nid < 0 || !interaction_constraints_.Query(nid, fid)) { continue; } this->UpdateEnumeration(nid, buf_gpair[i], p->fvalue, d_step, fid, c, temp); } } + // finish up the ending piece for (it = align_end, i = 0; it != end; ++i, it += d_step) { buf_position[i] = position_[it->index]; @@ -487,7 +498,7 @@ class ColMaker: public TreeUpdater { } for (it = align_end, i = 0; it != end; ++i, it += d_step) { const int nid = buf_position[i]; - if (nid < 0) continue; + if (nid < 0 || !interaction_constraints_.Query(nid, fid)) { continue; } this->UpdateEnumeration(nid, buf_gpair[i], it->fvalue, d_step, fid, c, temp); @@ -518,136 +529,43 @@ class ColMaker: public TreeUpdater { } } - // enumerate the split values of specific feature - inline void EnumerateSplit(const Entry *begin, - const Entry *end, - int d_step, - bst_uint fid, - const std::vector &gpair, - const MetaInfo &info, - std::vector &temp) { // NOLINT(*) - // use cacheline aware optimization - if (param_.cache_opt != 0) { - EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp); - return; - } - const std::vector &qexpand = qexpand_; - // clear all the temp statistics - for (auto nid : qexpand) { - temp[nid].stats = GradStats(); - } - // left statistics - GradStats c; - for (const Entry *it = begin; it != end; it += d_step) { - const bst_uint ridx = it->index; - const int nid = position_[ridx]; - if (nid < 0) continue; - // start working - const bst_float fvalue = it->fvalue; - // get the statistics of nid - ThreadEntry &e = temp[nid]; - // test if first hit, this is fine, because we set 0 during init - if (e.stats.Empty()) { - e.stats.Add(gpair[ridx]); - e.last_fvalue = fvalue; - } else { - // try to find a split - if (fvalue != e.last_fvalue && - e.stats.sum_hess >= param_.min_child_weight) { - c.SetSubstract(snode_[nid].stats, e.stats); - if (c.sum_hess >= param_.min_child_weight) { - bst_float loss_chg; - if (d_step == -1) { - loss_chg = static_cast( - spliteval_->ComputeSplitScore(nid, fid, c, e.stats) - - snode_[nid].root_gain); - e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, - d_step == -1, c, e.stats); - } else { - loss_chg = static_cast( - spliteval_->ComputeSplitScore(nid, fid, e.stats, c) - - snode_[nid].root_gain); - e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, - d_step == -1, e.stats, c); - } - } - } - // update the statistics - e.stats.Add(gpair[ridx]); - e.last_fvalue = fvalue; - } - } - // finish updating all statistics, check if it is possible to include all sum statistics - for (int nid : qexpand) { - ThreadEntry &e = temp[nid]; - c.SetSubstract(snode_[nid].stats, e.stats); - if (e.stats.sum_hess >= param_.min_child_weight && - c.sum_hess >= param_.min_child_weight) { - bst_float loss_chg; - GradStats left_sum; - GradStats right_sum; - if (d_step == -1) { - left_sum = c; - right_sum = e.stats; - } else { - left_sum = e.stats; - right_sum = c; - } - loss_chg = static_cast( - spliteval_->ComputeSplitScore(nid, fid, left_sum, right_sum) - - snode_[nid].root_gain); - const bst_float gap = std::abs(e.last_fvalue) + kRtEps; - const bst_float delta = d_step == +1 ? gap: -gap; - e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1, left_sum, right_sum); - } - } - } - // update the solution candidate virtual void UpdateSolution(const SparsePage &batch, - const std::vector &feat_set, + const std::vector &feat_set, const std::vector &gpair, DMatrix*p_fmat) { - const MetaInfo& info = p_fmat->Info(); // start enumeration const auto num_features = static_cast(feat_set.size()); #if defined(_OPENMP) const int batch_size = // NOLINT std::max(static_cast(num_features / this->nthread_ / 32), 1); #endif // defined(_OPENMP) - int poption = param_.parallel_option; - if (poption == 2) { - poption = static_cast(num_features) * 2 < this->nthread_ ? 1 : 0; - } - if (poption == 0) { + + CHECK_EQ(param_.parallel_option, 0) << "Support for `parallel_option' is removed in 1.0.0"; + { std::vector densities(num_features); CHECK_EQ(feat_set.size(), num_features); for (bst_omp_uint i = 0; i < num_features; ++i) { - int32_t const fid = feat_set[i]; + bst_feature_t const fid = feat_set[i]; densities.at(i) = p_fmat->GetColDensity(fid); } #pragma omp parallel for schedule(dynamic, batch_size) for (bst_omp_uint i = 0; i < num_features; ++i) { - int32_t const fid = feat_set[i]; + bst_feature_t const fid = feat_set[i]; int32_t const tid = omp_get_thread_num(); auto c = batch[fid]; const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue; auto const density = densities[i]; if (param_.NeedForwardSearch(density, ind)) { this->EnumerateSplit(c.data(), c.data() + c.size(), +1, - fid, gpair, info, stemp_[tid]); + fid, gpair, stemp_[tid]); } if (param_.NeedBackwardSearch(density, ind)) { this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1, - fid, gpair, info, stemp_[tid]); + fid, gpair, stemp_[tid]); } } - } else { - for (bst_omp_uint fid = 0; fid < num_features; ++fid) { - this->ParallelFindSplit(batch[fid], fid, - p_fmat, gpair); - } } } // find splits at current level, do split per level @@ -664,7 +582,7 @@ class ColMaker: public TreeUpdater { this->SyncBestSolution(qexpand); // get the best result, we can synchronize the solution for (int nid : qexpand) { - NodeEntry &e = snode_[nid]; + NodeEntry const &e = snode_[nid]; // now we know the solution in snode[nid], set split if (e.best.loss_chg > kRtEps) { bst_float left_leaf_weight = @@ -694,7 +612,7 @@ class ColMaker: public TreeUpdater { // so that they are ignored in future statistics collection const auto ndata = static_cast(p_fmat->Info().num_row_); - #pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static) for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) { CHECK_LT(ridx, position_.size()) << "ridx exceed bound " << "ridx="<< ridx << " pos=" << position_.size(); @@ -740,7 +658,7 @@ class ColMaker: public TreeUpdater { for (auto fid : fsplits) { auto col = batch[fid]; const auto ndata = static_cast(col.size()); - #pragma omp parallel for schedule(static) +#pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; const int nid = this->DecodePosition(ridx); @@ -786,6 +704,8 @@ class ColMaker: public TreeUpdater { std::vector qexpand_; // Evaluates splits and computes optimal weights for a given split std::unique_ptr spliteval_; + + FeatureInteractionConstraintHost interaction_constraints_; }; }; @@ -810,7 +730,8 @@ class DistColMaker : public ColMaker { CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time"; Builder builder( param_, - std::unique_ptr(spliteval_->GetHostClone())); + std::unique_ptr(spliteval_->GetHostClone()), + interaction_constraints_); // build the tree builder.Update(gpair->ConstHostVector(), dmat, trees[0]); //// prune the tree, note that pruner will sync the tree @@ -823,8 +744,9 @@ class DistColMaker : public ColMaker { class Builder : public ColMaker::Builder { public: explicit Builder(const TrainParam ¶m, - std::unique_ptr spliteval) - : ColMaker::Builder(param, std::move(spliteval)) {} + std::unique_ptr spliteval, + FeatureInteractionConstraintHost _interaction_constraints) + : ColMaker::Builder(param, std::move(spliteval), std::move(_interaction_constraints)) {} inline void UpdatePosition(DMatrix* p_fmat, const RegTree &tree) { const auto ndata = static_cast(p_fmat->Info().num_row_); #pragma omp parallel for schedule(static) @@ -931,6 +853,8 @@ class DistColMaker : public ColMaker { TrainParam param_; // Cloned for each builder instantiation std::unique_ptr spliteval_; + + FeatureInteractionConstraintHost interaction_constraints_; }; XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker") diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 83ccb3b1d..628e3efca 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -247,7 +247,7 @@ __device__ void EvaluateFeature( template __global__ void EvaluateSplitKernel( common::Span node_histogram, // histogram for gradients - common::Span feature_set, // Selected features + common::Span feature_set, // Selected features DeviceNodeStats node, xgboost::EllpackMatrix matrix, GPUTrainingParam gpu_param, @@ -582,8 +582,8 @@ struct GPUHistMakerDevice { auto nidx = nidxs[i]; auto p_feature_set = column_sampler.GetFeatureSet(tree.GetDepth(nidx)); p_feature_set->SetDevice(device_id); - auto d_sampled_features = p_feature_set->DeviceSpan(); - common::Span d_feature_set = + common::Span d_sampled_features = p_feature_set->DeviceSpan(); + common::Span d_feature_set = interaction_constraints.Query(d_sampled_features, nidx); auto d_split_candidates = d_split_candidates_all.subspan(i * num_columns, d_feature_set.size()); diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc index 4b6223454..246aeabe9 100644 --- a/src/tree/updater_histmaker.cc +++ b/src/tree/updater_histmaker.cc @@ -1,18 +1,21 @@ /*! - * Copyright 2014 by Contributors + * Copyright 2014-2019 by Contributors * \file updater_histmaker.cc * \brief use histogram counting to construct a tree * \author Tianqi Chen */ #include -#include -#include #include #include +#include "xgboost/tree_updater.h" +#include "xgboost/base.h" +#include "xgboost/logging.h" + #include "../common/quantile.h" #include "../common/group_data.h" #include "./updater_basemaker-inl.h" +#include "constraints.h" namespace xgboost { namespace tree { @@ -24,12 +27,13 @@ class HistMaker: public BaseMaker { void Update(HostDeviceVector *gpair, DMatrix *p_fmat, const std::vector &trees) override { + interaction_constraints_.Configure(param_, p_fmat->Info().num_col_); // rescale learning rate according to size of trees float lr = param_.learning_rate; param_.learning_rate = lr / trees.size(); // build tree for (auto tree : trees) { - this->Update(gpair->ConstHostVector(), p_fmat, tree); + this->UpdateTree(gpair->ConstHostVector(), p_fmat, tree); } param_.learning_rate = lr; } @@ -38,43 +42,39 @@ class HistMaker: public BaseMaker { } protected: - /*! \brief a single histogram */ + /*! \brief a single column of histogram cuts */ struct HistUnit { /*! \brief cutting point of histogram, contains maximum point */ - const bst_float *cut; + const float *cut; /*! \brief content of statistics data */ GradStats *data; /*! \brief size of histogram */ - unsigned size; + uint32_t size; // default constructor HistUnit() = default; // constructor - HistUnit(const bst_float *cut, GradStats *data, unsigned size) - : cut(cut), data(data), size(size) {} + HistUnit(const float *cut, GradStats *data, uint32_t size) + : cut{cut}, data{data}, size{size} {} /*! \brief add a histogram to data */ - inline void Add(bst_float fv, - const std::vector &gpair, - const MetaInfo &info, - const bst_uint ridx) { - unsigned i = std::upper_bound(cut, cut + size, fv) - cut; + void Add(float fv, const std::vector &gpair, + const MetaInfo &info, const size_t ridx) { + unsigned bin = std::upper_bound(cut, cut + size, fv) - cut; CHECK_NE(size, 0U) << "try insert into size=0"; - CHECK_LT(i, size); - data[i].Add(gpair[ridx]); + CHECK_LT(bin, size); + data[bin].Add(gpair[ridx]); } }; /*! \brief a set of histograms from different index */ struct HistSet { /*! \brief the index pointer of each histunit */ - const unsigned *rptr; + const uint32_t *rptr; /*! \brief cutting points in each histunit */ const bst_float *cut; /*! \brief data in different hist unit */ std::vector data; - /*! \brief */ + /*! \brief return a column of histogram cuts */ inline HistUnit operator[](size_t fid) { - return {cut + rptr[fid], - &data[0] + rptr[fid], - rptr[fid+1] - rptr[fid]}; + return {cut + rptr[fid], &data[0] + rptr[fid], rptr[fid+1] - rptr[fid]}; } }; // thread workspace @@ -110,26 +110,27 @@ class HistMaker: public BaseMaker { // reducer for histogram rabit::Reducer histred_; // set of working features - std::vector fwork_set_; + std::vector selected_features_; // update function implementation - virtual void Update(const std::vector &gpair, - DMatrix *p_fmat, - RegTree *p_tree) { + virtual void UpdateTree(const std::vector &gpair, + DMatrix *p_fmat, + RegTree *p_tree) { CHECK(param_.max_depth > 0) << "max_depth must be larger than 0"; this->InitData(gpair, *p_fmat, *p_tree); - this->InitWorkSet(p_fmat, *p_tree, &fwork_set_); + this->InitWorkSet(p_fmat, *p_tree, &selected_features_); // mark root node as fresh. for (int i = 0; i < p_tree->param.num_roots; ++i) { (*p_tree)[i].SetLeaf(0.0f, 0); } + CHECK_EQ(p_tree->param.num_roots, 1) << "Support for num roots is removed."; for (int depth = 0; depth < param_.max_depth; ++depth) { // reset and propose candidate split - this->ResetPosAndPropose(gpair, p_fmat, fwork_set_, *p_tree); + this->ResetPosAndPropose(gpair, p_fmat, selected_features_, *p_tree); // create histogram - this->CreateHist(gpair, p_fmat, fwork_set_, *p_tree); + this->CreateHist(gpair, p_fmat, selected_features_, *p_tree); // find split based on histogram statistics - this->FindSplit(depth, gpair, p_fmat, fwork_set_, p_tree); + this->FindSplit(depth, gpair, p_fmat, selected_features_, p_tree); // reset position after split this->ResetPositionAfterSplit(p_fmat, *p_tree); this->UpdateQueueExpand(*p_tree); @@ -145,12 +146,12 @@ class HistMaker: public BaseMaker { // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly virtual void ResetPosAndPropose(const std::vector &gpair, DMatrix *p_fmat, - const std::vector &fset, + const std::vector &fset, const RegTree &tree) = 0; // initialize the current working set of features in this round virtual void InitWorkSet(DMatrix *p_fmat, const RegTree &tree, - std::vector *p_fset) { + std::vector *p_fset) { p_fset->resize(tree.param.num_feature); for (size_t i = 0; i < p_fset->size(); ++i) { (*p_fset)[i] = static_cast(i); @@ -162,15 +163,15 @@ class HistMaker: public BaseMaker { } virtual void CreateHist(const std::vector &gpair, DMatrix *p_fmat, - const std::vector &fset, + const std::vector &fset, const RegTree &tree) = 0; private: - inline void EnumerateSplit(const HistUnit &hist, - const GradStats &node_sum, - bst_uint fid, - SplitEntry *best, - GradStats *left_sum) { + void EnumerateSplit(const HistUnit &hist, + const GradStats &node_sum, + bst_uint fid, + SplitEntry *best, + GradStats *left_sum) const { if (hist.size == 0) return; double root_gain = CalcGain(param_, node_sum.GetGrad(), node_sum.GetHess()); @@ -203,31 +204,37 @@ class HistMaker: public BaseMaker { } } } - inline void FindSplit(int depth, - const std::vector &gpair, - DMatrix *p_fmat, - const std::vector &fset, - RegTree *p_tree) { - const size_t num_feature = fset.size(); + + void FindSplit(int depth, + const std::vector &gpair, + DMatrix *p_fmat, + const std::vector &feature_set, + RegTree *p_tree) { + const size_t num_feature = feature_set.size(); // get the best split condition for each node std::vector sol(qexpand_.size()); std::vector left_sum(qexpand_.size()); auto nexpand = static_cast(qexpand_.size()); - #pragma omp parallel for schedule(dynamic, 1) +#pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand_[wid]; CHECK_EQ(node2workindex_[nid], static_cast(wid)); SplitEntry &best = sol[wid]; GradStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0]; - for (size_t i = 0; i < fset.size(); ++i) { + for (size_t i = 0; i < feature_set.size(); ++i) { + // Query is thread safe as it's a const function. + if (!this->interaction_constraints_.Query(nid, feature_set[i])) { + continue; + } + EnumerateSplit(this->wspace_.hset[0][i + wid * (num_feature+1)], - node_sum, fset[i], &best, &left_sum[wid]); + node_sum, feature_set[i], &best, &left_sum[wid]); } } // get the best result, we can synchronize the solution for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { - const int nid = qexpand_[wid]; - const SplitEntry &best = sol[wid]; + const bst_node_t nid = qexpand_[wid]; + SplitEntry const& best = sol[wid]; const GradStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0]; this->SetStats(p_tree, nid, node_sum); // set up the values @@ -246,11 +253,13 @@ class HistMaker: public BaseMaker { best.DefaultLeft(), base_weight, left_leaf_weight, right_leaf_weight, best.loss_chg, node_sum.sum_hess); - // right side sum GradStats right_sum; right_sum.SetSubstract(node_sum, left_sum[wid]); - this->SetStats(p_tree, (*p_tree)[nid].LeftChild(), left_sum[wid]); - this->SetStats(p_tree, (*p_tree)[nid].RightChild(), right_sum); + auto left_child = (*p_tree)[nid].LeftChild(); + auto right_child = (*p_tree)[nid].RightChild(); + this->SetStats(p_tree, left_child, left_sum[wid]); + this->SetStats(p_tree, right_child, right_sum); + this->interaction_constraints_.Split(nid, best.SplitIndex(), left_child, right_child); } else { (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate); } @@ -314,7 +323,7 @@ class CQHistMaker: public HistMaker { // initialize the work set of tree void InitWorkSet(DMatrix *p_fmat, const RegTree &tree, - std::vector *p_fset) override { + std::vector *p_fset) override { if (p_fmat != cache_dmatrix_) { feat_helper_.InitByCol(p_fmat, tree); cache_dmatrix_ = p_fmat; @@ -325,7 +334,7 @@ class CQHistMaker: public HistMaker { // code to create histogram void CreateHist(const std::vector &gpair, DMatrix *p_fmat, - const std::vector &fset, + const std::vector &fset, const RegTree &tree) override { const MetaInfo &info = p_fmat->Info(); // fill in reverse map @@ -365,7 +374,6 @@ class CQHistMaker: public HistMaker { } }; // sync the histogram - // if it is C++11, use lazy evaluation for Allreduce this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data), this->wspace_.hset[0].data.size(), lazy_get_hist); } @@ -376,7 +384,7 @@ class CQHistMaker: public HistMaker { } void ResetPosAndPropose(const std::vector &gpair, DMatrix *p_fmat, - const std::vector &fset, + const std::vector &fset, const RegTree &tree) override { const MetaInfo &info = p_fmat->Info(); // fill in reverse map @@ -485,7 +493,7 @@ class CQHistMaker: public HistMaker { const SparsePage::Inst &col, const MetaInfo &info, const RegTree &tree, - const std::vector &fset, + const std::vector &fset, bst_uint fid_offset, std::vector *p_temp) { if (col.size() == 0) return; @@ -612,7 +620,7 @@ class CQHistMaker: public HistMaker { // temp space to map feature id to working index std::vector feat2workindex_; // set of index from fset that are current work set - std::vector work_set_; + std::vector work_set_; // set of index from that are split candidates. std::vector fsplit_set_; // thread temp data @@ -641,7 +649,7 @@ class GlobalProposalHistMaker: public CQHistMaker { protected: void ResetPosAndPropose(const std::vector &gpair, DMatrix *p_fmat, - const std::vector &fset, + const std::vector &fset, const RegTree &tree) override { if (this->qexpand_.size() == 1) { cached_rptr_.clear(); @@ -672,7 +680,7 @@ class GlobalProposalHistMaker: public CQHistMaker { // code to create histogram void CreateHist(const std::vector &gpair, DMatrix *p_fmat, - const std::vector &fset, + const std::vector &fset, const RegTree &tree) override { const MetaInfo &info = p_fmat->Info(); // fill in reverse map @@ -692,7 +700,8 @@ class GlobalProposalHistMaker: public CQHistMaker { this->SetDefaultPostion(p_fmat, tree); this->work_set_.insert(this->work_set_.end(), this->fsplit_set_.begin(), this->fsplit_set_.end()); - std::sort(this->work_set_.begin(), this->work_set_.end()); + XGBOOST_PARALLEL_SORT(this->work_set_.begin(), this->work_set_.end(), + std::lesswork_set_)::value_type>{}); this->work_set_.resize( std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin()); @@ -703,7 +712,7 @@ class GlobalProposalHistMaker: public CQHistMaker { // start enumeration const auto nsize = static_cast(this->work_set_.size()); - #pragma omp parallel for schedule(dynamic, 1) +#pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint i = 0; i < nsize; ++i) { int fid = this->work_set_[i]; int offset = this->feat2workindex_[fid]; @@ -740,6 +749,7 @@ XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker") return new CQHistMaker(); }); +// The updater for approx tree method. XGBOOST_REGISTER_TREE_UPDATER(HistMaker, "grow_histmaker") .describe("Tree constructor that uses approximate global of histogram construction.") .set_body([]() { diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index afdf15cb9..1a457a2a0 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -22,6 +22,7 @@ #include "./param.h" #include "./updater_quantile_hist.h" #include "./split_evaluator.h" +#include "constraints.h" #include "../common/random.h" #include "../common/hist_util.h" #include "../common/row_set.h" @@ -65,12 +66,14 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, // rescale learning rate according to size of trees float lr = param_.learning_rate; param_.learning_rate = lr / trees.size(); + int_constraint_.Configure(param_, dmat->Info().num_col_); // build tree if (!builder_) { builder_.reset(new Builder( param_, std::move(pruner_), - std::unique_ptr(spliteval_->GetHostClone()))); + std::unique_ptr(spliteval_->GetHostClone()), + int_constraint_)); } for (auto tree : trees) { builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree); @@ -170,6 +173,8 @@ void QuantileHistMaker::Builder::BuildNodeStats( auto parent_split_feature_id = snode_[parent_id].best.SplitIndex(); spliteval_->AddSplit(parent_id, left_sibling_id, nid, parent_split_feature_id, snode_[left_sibling_id].weight, snode_[nid].weight); + interaction_constraints_.Split(parent_id, parent_split_feature_id, + left_sibling_id, nid); } } builder_monitor_.Stop("BuildNodeStats"); @@ -298,6 +303,7 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide( bst_uint featureid = snode_[nid].best.SplitIndex(); spliteval_->AddSplit(nid, cleft, cright, featureid, snode_[cleft].weight, snode_[cright].weight); + interaction_constraints_.Split(nid, featureid, cleft, cright); this->EvaluateSplit(cleft, gmat, hist_, *p_fmat, *p_tree); this->EvaluateSplit(cright, gmat, hist_, *p_fmat, *p_tree); @@ -325,6 +331,7 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, const std::vector& gpair_h = gpair->ConstHostVector(); spliteval_->Reset(); + interaction_constraints_.Reset(); this->InitData(gmat, gpair_h, *p_fmat, *p_tree); @@ -457,7 +464,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, } bool has_neg_hess = false; - for (size_t tid = 0; tid < this->nthread_; ++tid) { + for (int32_t tid = 0; tid < this->nthread_; ++tid) { if (p_buff[tid]) { has_neg_hess = true; } @@ -561,8 +568,8 @@ void QuantileHistMaker::Builder::EvaluateSplit(const int nid, // start enumeration const MetaInfo& info = fmat.Info(); auto p_feature_set = column_sampler_.GetFeatureSet(tree.GetDepth(nid)); - const auto& feature_set = p_feature_set->HostVector(); - const auto nfeature = static_cast(feature_set.size()); + auto const& feature_set = p_feature_set->HostVector(); + const auto nfeature = static_cast(feature_set.size()); const auto nthread = static_cast(this->nthread_); best_split_tloc_.resize(nthread); #pragma omp parallel for schedule(static) num_threads(nthread) @@ -576,9 +583,7 @@ void QuantileHistMaker::Builder::EvaluateSplit(const int nid, const auto feature_id = static_cast(feature_set[i]); const auto tid = static_cast(omp_get_thread_num()); const auto node_id = static_cast(nid); - // Narrow search space by dropping features that are not feasible under the - // given set of constraints (e.g. feature interaction constraints) - if (spliteval_->CheckFeatureConstraint(node_id, feature_id)) { + if (interaction_constraints_.Query(node_id, feature_id)) { this->EnumerateSplit(-1, gmat, node_hist, snode_[nid], info, &best_split_tloc_[tid], feature_id, node_id); this->EnumerateSplit(+1, gmat, node_hist, snode_[nid], info, diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 222497113..62638e86e 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -19,6 +19,7 @@ #include #include +#include "constraints.h" #include "./param.h" #include "./split_evaluator.h" #include "../common/random.h" @@ -123,10 +124,11 @@ class QuantileHistMaker: public TreeUpdater { // constructor explicit Builder(const TrainParam& param, std::unique_ptr pruner, - std::unique_ptr spliteval) + std::unique_ptr spliteval, + FeatureInteractionConstraintHost int_constraints_) : param_(param), pruner_(std::move(pruner)), - spliteval_(std::move(spliteval)), p_last_tree_(nullptr), - p_last_fmat_(nullptr) { + spliteval_(std::move(spliteval)), interaction_constraints_{int_constraints_}, + p_last_tree_(nullptr), p_last_fmat_(nullptr) { builder_monitor_.Init("Quantile::Builder"); } // update one tree, growing @@ -296,6 +298,7 @@ class QuantileHistMaker: public TreeUpdater { GHistBuilder hist_builder_; std::unique_ptr pruner_; std::unique_ptr spliteval_; + FeatureInteractionConstraintHost interaction_constraints_; // back pointers to tree and data matrix const RegTree* p_last_tree_; @@ -321,6 +324,7 @@ class QuantileHistMaker: public TreeUpdater { std::unique_ptr builder_; std::unique_ptr pruner_; std::unique_ptr spliteval_; + FeatureInteractionConstraintHost int_constraint_; }; } // namespace tree diff --git a/tests/cpp/common/test_gpu_hist_util.cu b/tests/cpp/common/test_gpu_hist_util.cu index cdbafec4d..ff5683290 100644 --- a/tests/cpp/common/test_gpu_hist_util.cu +++ b/tests/cpp/common/test_gpu_hist_util.cu @@ -56,7 +56,7 @@ void TestDeviceSketch(bool use_external_memory) { size_t row_stride = DeviceSketch(device, max_bin, gpu_batch_nrows, dmat->get(), &hmat_gpu); // compare the row stride with the one obtained from the dmatrix - size_t expected_row_stride = 0; + bst_row_t expected_row_stride = 0; for (const auto &batch : dmat->get()->GetBatches()) { const auto &offset_vec = batch.offset.ConstHostVector(); for (int i = 1; i <= offset_vec.size() -1; ++i) { diff --git a/tests/cpp/common/test_random.cc b/tests/cpp/common/test_random.cc index 128b0fd8c..03f6251d5 100644 --- a/tests/cpp/common/test_random.cc +++ b/tests/cpp/common/test_random.cc @@ -55,7 +55,7 @@ TEST(ColumnSampler, ThreadSynchronisation) { int n = 128; size_t iterations = 10; size_t levels = 5; - std::vector reference_result; + std::vector reference_result; bool success = true; // Cannot use google test asserts in multithreaded region #pragma omp parallel num_threads(num_threads) diff --git a/tests/cpp/data/test_data.cc b/tests/cpp/data/test_data.cc index 3b5aa9faa..e74cb5613 100644 --- a/tests/cpp/data/test_data.cc +++ b/tests/cpp/data/test_data.cc @@ -9,7 +9,7 @@ namespace xgboost { TEST(SparsePage, PushCSC) { - std::vector offset {0}; + std::vector offset {0}; std::vector data; SparsePage page; page.offset.HostVector() = offset; diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc index c031dd9ed..9c32a0386 100644 --- a/tests/cpp/data/test_metainfo.cc +++ b/tests/cpp/data/test_metainfo.cc @@ -99,7 +99,7 @@ TEST(MetaInfo, LoadQid) { const std::vector expected_group_ptr{0, 4, 8, 12}; CHECK(info.group_ptr_ == expected_group_ptr); - const std::vector expected_offset{ + const std::vector expected_offset{ 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60 }; const std::vector expected_data{ diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 489b45583..b5bbf0ed7 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -249,7 +249,7 @@ inline std::unique_ptr BuildEllpackPage( 0.26f, 0.71f, 1.83f}); cmat.SetMins({0.1f, 0.2f, 0.3f, 0.1f, 0.2f, 0.3f, 0.2f, 0.2f}); - size_t row_stride = 0; + bst_row_t row_stride = 0; const auto &offset_vec = batch.offset.ConstHostVector(); for (size_t i = 1; i < offset_vec.size(); ++i) { row_stride = std::max(row_stride, offset_vec[i] - offset_vec[i-1]); diff --git a/tests/cpp/test_main.cc b/tests/cpp/test_main.cc index 4f11fd4bd..d9f3e8f33 100644 --- a/tests/cpp/test_main.cc +++ b/tests/cpp/test_main.cc @@ -8,6 +8,7 @@ int main(int argc, char ** argv) { xgboost::Args args {{"verbosity", "2"}}; xgboost::ConsoleLogger::Configure(args); + testing::InitGoogleTest(&argc, argv); testing::FLAGS_gtest_death_test_style = "threadsafe"; return RUN_ALL_TESTS(); diff --git a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu index 154d2030c..3210a25a1 100644 --- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu +++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu @@ -91,7 +91,7 @@ void TestUpdatePosition() { EXPECT_EQ(rp.GetRows(3).size(), 2); EXPECT_EQ(rp.GetRows(4).size(), 3); // Check position is as expected - EXPECT_EQ(rp.GetPositionHost(), std::vector({3,3,4,4,4,2,2,2,2,2})); + EXPECT_EQ(rp.GetPositionHost(), std::vector({3,3,4,4,4,2,2,2,2,2})); } TEST(RowPartitioner, Basic) { TestUpdatePosition(); } diff --git a/tests/cpp/tree/test_constraints.cc b/tests/cpp/tree/test_constraints.cc new file mode 100644 index 000000000..fa923a621 --- /dev/null +++ b/tests/cpp/tree/test_constraints.cc @@ -0,0 +1,60 @@ +#include +#include +#include + +#include +#include + +#include "../../../src/tree/constraints.h" + +namespace xgboost { +namespace tree { + +TEST(CPUFeatureInteractionConstraint, Empty) { + TrainParam param; + param.UpdateAllowUnknown(Args{}); + bst_feature_t constexpr kFeatures = 6; + + FeatureInteractionConstraintHost constraints; + constraints.Configure(param, kFeatures); + + // no-op + constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); + + std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; + common::Span s_input_feature_list = common::Span{h_input_feature_list}; + + for (auto f : h_input_feature_list) { + constraints.Query(f, 1); + } + + // no-op + ASSERT_TRUE(constraints.Query(94389, 12309)); +} + +TEST(CPUFeatureInteractionConstraint, Basic) { + std::string const constraints_str = R"constraint([[1, 2], [2, 3, 4]])constraint"; + + std::vector> args{ + {"interaction_constraints", constraints_str}}; + TrainParam param; + param.interaction_constraints = constraints_str; + bst_feature_t constexpr kFeatures = 6; + + FeatureInteractionConstraintHost constraints; + constraints.Configure(param, kFeatures); + constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); + + std::vector h_input_feature_list{0, 1, 2, 3, 4, 5}; + + ASSERT_TRUE(constraints.Query(1, 1)); + ASSERT_TRUE(constraints.Query(1, 2)); + ASSERT_TRUE(constraints.Query(1, 3)); + ASSERT_TRUE(constraints.Query(1, 4)); + + ASSERT_FALSE(constraints.Query(1, 0)); + ASSERT_FALSE(constraints.Query(1, 5)); +} + +} // namespace tree +} // namespace xgboost diff --git a/tests/cpp/tree/test_constraints.cu b/tests/cpp/tree/test_constraints.cu index 3523ca2f5..0bfe839ac 100644 --- a/tests/cpp/tree/test_constraints.cu +++ b/tests/cpp/tree/test_constraints.cu @@ -19,13 +19,13 @@ struct FConstraintWrapper : public FeatureInteractionConstraint { common::Span GetNodeConstraints() { return FeatureInteractionConstraint::s_node_constraints_; } - FConstraintWrapper(tree::TrainParam param, int32_t n_features) : + FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) : FeatureInteractionConstraint(param, n_features) {} - dh::device_vector const& GetDSets() const { + dh::device_vector const& GetDSets() const { return d_sets_; } - dh::device_vector const& GetDSetsPtr() const { + dh::device_vector const& GetDSetsPtr() const { return d_sets_ptr_; } }; @@ -65,7 +65,7 @@ void CompareBitField(LBitField64 d_field, std::set positions) { } // anonymous namespace -TEST(FeatureInteractionConstraint, Init) { +TEST(GPUFeatureInteractionConstraint, Init) { { int32_t constexpr kFeatures = 6; tree::TrainParam param = GetParameter(); @@ -123,7 +123,7 @@ TEST(FeatureInteractionConstraint, Init) { } } -TEST(FeatureInteractionConstraint, Split) { +TEST(GPUFeatureInteractionConstraint, Split) { tree::TrainParam param = GetParameter(); int32_t constexpr kFeatures = 6; FConstraintWrapper constraints(param, kFeatures); @@ -152,9 +152,9 @@ TEST(FeatureInteractionConstraint, Split) { } } -TEST(FeatureInteractionConstraint, QueryNode) { +TEST(GPUFeatureInteractionConstraint, QueryNode) { tree::TrainParam param = GetParameter(); - int32_t constexpr kFeatures = 6; + bst_feature_t constexpr kFeatures = 6; FConstraintWrapper constraints(param, kFeatures); { @@ -165,9 +165,9 @@ TEST(FeatureInteractionConstraint, QueryNode) { { constraints.Split(/*node_id=*/ 0, /*feature_id=*/ 1, 1, 2); auto span = constraints.QueryNode(0); - std::vector h_result (span.size()); - thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), + std::vector h_result (span.size()); + thrust::copy(thrust::device_ptr(span.data()), + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 2); ASSERT_EQ(h_result[0], 1); @@ -177,9 +177,9 @@ TEST(FeatureInteractionConstraint, QueryNode) { { constraints.Split(1, /*feature_id=*/0, 3, 4); auto span = constraints.QueryNode(1); - std::vector h_result (span.size()); - thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), + std::vector h_result (span.size()); + thrust::copy(thrust::device_ptr(span.data()), + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 3); ASSERT_EQ(h_result[0], 0); @@ -189,8 +189,8 @@ TEST(FeatureInteractionConstraint, QueryNode) { // same as parent span = constraints.QueryNode(3); h_result.resize(span.size()); - thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), + thrust::copy(thrust::device_ptr(span.data()), + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 3); ASSERT_EQ(h_result[0], 0); @@ -204,9 +204,9 @@ TEST(FeatureInteractionConstraint, QueryNode) { FConstraintWrapper large_features(large_param, 256); large_features.Split(0, 139, 1, 2); auto span = large_features.QueryNode(0); - std::vector h_result (span.size()); - thrust::copy(thrust::device_ptr(span.data()), - thrust::device_ptr(span.data() + span.size()), + std::vector h_result (span.size()); + thrust::copy(thrust::device_ptr(span.data()), + thrust::device_ptr(span.data() + span.size()), h_result.begin()); ASSERT_EQ(h_result.size(), 3); ASSERT_EQ(h_result[0], 1); @@ -217,10 +217,10 @@ TEST(FeatureInteractionConstraint, QueryNode) { namespace { -void CompareFeatureList(common::Span s_output, std::vector solution) { - std::vector h_output(s_output.size()); - thrust::copy(thrust::device_ptr(s_output.data()), - thrust::device_ptr(s_output.data() + s_output.size()), +void CompareFeatureList(common::Span s_output, std::vector solution) { + std::vector h_output(s_output.size()); + thrust::copy(thrust::device_ptr(s_output.data()), + thrust::device_ptr(s_output.data() + s_output.size()), h_output.begin()); ASSERT_EQ(h_output.size(), solution.size()); for (size_t i = 0; i < solution.size(); ++i) { @@ -230,21 +230,21 @@ void CompareFeatureList(common::Span s_output, std::vector sol } // anonymous namespace -TEST(FeatureInteractionConstraint, Query) { +TEST(GPUFeatureInteractionConstraint, Query) { { tree::TrainParam param = GetParameter(); - int32_t constexpr kFeatures = 6; + bst_feature_t constexpr kFeatures = 6; FConstraintWrapper constraints(param, kFeatures); - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); - common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); + std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list (h_input_feature_list); + common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); auto s_output = constraints.Query(s_input_feature_list, 0); CompareFeatureList(s_output, h_input_feature_list); } { tree::TrainParam param = GetParameter(); - int32_t constexpr kFeatures = 6; + bst_feature_t constexpr kFeatures = 6; FConstraintWrapper constraints(param, kFeatures); constraints.Split(/*node_id=*/0, /*feature_id=*/1, /*left_id=*/1, /*right_id=*/2); constraints.Split(/*node_id=*/1, /*feature_id=*/0, /*left_id=*/3, /*right_id=*/4); @@ -257,16 +257,16 @@ TEST(FeatureInteractionConstraint, Query) { * {split at 0} \ * / \ * (1)[0, 1, 2] (2)[1, 2] - * / \ - * / {split at 3} - * / \ - * (3)[0, 1, 2] (4)[0, 1, 2, 3, 4, 5] + * / \ + * / {split at 3} + * / \ + * (3)[0, 1, 2] (4)[0, 1, 2, 3, 4, 5] * */ - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); - common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); + std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list (h_input_feature_list); + common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); auto s_output = constraints.Query(s_input_feature_list, 1); CompareFeatureList(s_output, {0, 1, 2}); @@ -285,16 +285,16 @@ TEST(FeatureInteractionConstraint, Query) { // Test shared feature { tree::TrainParam param = GetParameter(); - int32_t constexpr kFeatures = 6; + bst_feature_t constexpr kFeatures = 6; std::string const constraints_str = R"constraint([[1, 2], [2, 3, 4]])constraint"; param.interaction_constraints = constraints_str; FConstraintWrapper constraints(param, kFeatures); constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); - common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); + std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list (h_input_feature_list); + common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); auto s_output = constraints.Query(s_input_feature_list, 1); CompareFeatureList(s_output, {1, 2, 3, 4}); @@ -303,13 +303,13 @@ TEST(FeatureInteractionConstraint, Query) { // Test choosing free feature in root { tree::TrainParam param = GetParameter(); - int32_t constexpr kFeatures = 6; + bst_feature_t constexpr kFeatures = 6; std::string const constraints_str = R"constraint([[0, 1]])constraint"; param.interaction_constraints = constraints_str; FConstraintWrapper constraints(param, kFeatures); - std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; - dh::device_vector d_input_feature_list (h_input_feature_list); - common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); + std::vector h_input_feature_list {0, 1, 2, 3, 4, 5}; + dh::device_vector d_input_feature_list (h_input_feature_list); + common::Span s_input_feature_list = dh::ToSpan(d_input_feature_list); constraints.Split(/*node_id=*/0, /*feature_id=*/2, /*left_id=*/1, /*right_id=*/2); auto s_output = constraints.Query(s_input_feature_list, 1); CompareFeatureList(s_output, {2}); diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 1b234d350..85afa9a6a 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -100,7 +100,7 @@ void TestBuildHist(bool use_shared_memory_histograms) { auto page = BuildEllpackPage(kNRows, kNCols); GPUHistMakerDevice maker(0, page.get(), kNRows, param, kNCols, kNCols); maker.InitHistogram(); - + xgboost::SimpleLCG gen; xgboost::SimpleRealUniformDistribution dist(0.0f, 1.0f); std::vector h_gpair(kNRows); diff --git a/tests/cpp/tree/test_histmaker.cc b/tests/cpp/tree/test_histmaker.cc new file mode 100644 index 000000000..950045d61 --- /dev/null +++ b/tests/cpp/tree/test_histmaker.cc @@ -0,0 +1,69 @@ +#include + +#include +#include + +#include "../helpers.h" + +namespace xgboost { +namespace tree { + +TEST(GrowHistMaker, InteractionConstraint) { + size_t constexpr kRows = 32; + size_t constexpr kCols = 16; + + GenericParameter param; + param.UpdateAllowUnknown(Args{{"gpu_id", "0"}}); + + auto pp_dmat = CreateDMatrix(kRows, kCols, 0.6, 3); + auto p_dmat = *pp_dmat; + + HostDeviceVector gradients (kRows); + std::vector& h_gradients = gradients.HostVector(); + + xgboost::SimpleLCG gen; + xgboost::SimpleRealUniformDistribution dist(0.0f, 1.0f); + + for (size_t i = 0; i < kRows; ++i) { + bst_float grad = dist(&gen); + bst_float hess = dist(&gen); + h_gradients[i] = GradientPair(grad, hess); + } + + { + // With constraints + RegTree tree; + tree.param.num_feature = kCols; + + std::unique_ptr updater { TreeUpdater::Create("grow_histmaker", ¶m) }; + updater->Configure(Args{ + {"interaction_constraints", "[[0, 1]]"}, + {"num_feature", std::to_string(kCols)}}); + updater->Update(&gradients, p_dmat.get(), {&tree}); + + ASSERT_EQ(tree.NumExtraNodes(), 4); + ASSERT_EQ(tree[0].SplitIndex(), 1); + + ASSERT_EQ(tree[tree[0].LeftChild()].SplitIndex(), 0); + ASSERT_EQ(tree[tree[0].RightChild()].SplitIndex(), 0); + } + { + // Without constraints + RegTree tree; + tree.param.num_feature = kCols; + + std::unique_ptr updater { TreeUpdater::Create("grow_histmaker", ¶m) }; + updater->Configure(Args{{"num_feature", std::to_string(kCols)}}); + updater->Update(&gradients, p_dmat.get(), {&tree}); + + ASSERT_EQ(tree.NumExtraNodes(), 10); + ASSERT_EQ(tree[0].SplitIndex(), 1); + + ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0); + ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0); + } + delete pp_dmat; +} + +} // namespace tree +} // namespace xgboost diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index 9420893d1..613098349 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -25,8 +25,9 @@ class QuantileHistMock : public QuantileHistMaker { BuilderMock(const TrainParam& param, std::unique_ptr pruner, - std::unique_ptr spliteval) - : RealImpl(param, std::move(pruner), std::move(spliteval)) {} + std::unique_ptr spliteval, + FeatureInteractionConstraintHost int_constraint) + : RealImpl(param, std::move(pruner), std::move(spliteval), std::move(int_constraint)) {} public: void TestInitData(const GHistIndexMatrix& gmat, @@ -238,7 +239,8 @@ class QuantileHistMock : public QuantileHistMaker { new BuilderMock( param_, std::move(pruner_), - std::unique_ptr(spliteval_->GetHostClone()))); + std::unique_ptr(spliteval_->GetHostClone()), + int_constraint_)); dmat_ = CreateDMatrix(kNRows, kNCols, 0.8, 3); } ~QuantileHistMock() override { delete dmat_; } diff --git a/tests/cpp/tree/test_split_evaluator.cc b/tests/cpp/tree/test_split_evaluator.cc deleted file mode 100644 index 0cc37d1ea..000000000 --- a/tests/cpp/tree/test_split_evaluator.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include -#include -#include "../../../src/tree/split_evaluator.h" - -namespace xgboost { -namespace tree { - -TEST(SplitEvaluator, Interaction) { - std::string constraints_str = R"interaction([[0, 1], [1, 2, 3]])interaction"; - std::vector> args{ - {"interaction_constraints", constraints_str}, - {"num_feature", "8"}}; - { - std::unique_ptr eval{ - SplitEvaluator::Create("elastic_net,interaction")}; - eval->Init(args); - - eval->AddSplit(0, 1, 2, /*feature_id=*/4, 0, 0); - eval->AddSplit(2, 3, 4, /*feature_id=*/5, 0, 0); - ASSERT_FALSE(eval->CheckFeatureConstraint(2, /*feature_id=*/0)); - ASSERT_FALSE(eval->CheckFeatureConstraint(2, /*feature_id=*/1)); - - ASSERT_TRUE(eval->CheckFeatureConstraint(2, /*feature_id=*/4)); - ASSERT_FALSE(eval->CheckFeatureConstraint(2, /*feature_id=*/5)); - - std::vector accepted_features; // for node 3 - for (int32_t f = 0; f < 8; ++f) { - if (eval->CheckFeatureConstraint(3, f)) { - accepted_features.emplace_back(f); - } - } - std::vector solutions{4, 5}; - ASSERT_EQ(accepted_features.size(), solutions.size()); - for (size_t f = 0; f < accepted_features.size(); ++f) { - ASSERT_EQ(accepted_features[f], solutions[f]); - } - } - - { - std::unique_ptr eval{ - SplitEvaluator::Create("elastic_net,interaction")}; - eval->Init(args); - eval->AddSplit(/*node_id=*/0, /*left_id=*/1, /*right_id=*/2, /*feature_id=*/4, 0, 0); - std::vector accepted_features; // for node 1 - for (int32_t f = 0; f < 8; ++f) { - if (eval->CheckFeatureConstraint(1, f)) { - accepted_features.emplace_back(f); - } - } - ASSERT_EQ(accepted_features.size(), 1); - ASSERT_EQ(accepted_features[0], 4); - } -} - -} // namespace tree -} // namespace xgboost diff --git a/tests/python-gpu/test_gpu_interaction_constraints.py b/tests/python-gpu/test_gpu_interaction_constraints.py index d0026dad1..0a135776d 100644 --- a/tests/python-gpu/test_gpu_interaction_constraints.py +++ b/tests/python-gpu/test_gpu_interaction_constraints.py @@ -11,7 +11,7 @@ class TestGPUInteractionConstraints(unittest.TestCase): cputest = test_ic.TestInteractionConstraints() def test_interaction_constraints(self): - self.cputest.test_interaction_constraints(tree_method='gpu_hist') + self.cputest.run_interaction_constraints(tree_method='gpu_hist') def test_training_accuracy(self): - self.cputest.test_training_accuracy(tree_method='gpu_hist') + self.cputest.training_accuracy(tree_method='gpu_hist') diff --git a/tests/python/test_interaction_constraints.py b/tests/python/test_interaction_constraints.py index 6ca842b92..91a980364 100644 --- a/tests/python/test_interaction_constraints.py +++ b/tests/python/test_interaction_constraints.py @@ -10,7 +10,7 @@ rng = np.random.RandomState(1994) class TestInteractionConstraints(unittest.TestCase): - def test_interaction_constraints(self, tree_method='hist'): + def run_interaction_constraints(self, tree_method): x1 = np.random.normal(loc=1.0, scale=1.0, size=1000) x2 = np.random.normal(loc=1.0, scale=1.0, size=1000) x3 = np.random.choice([1, 2, 3], size=1000, replace=True) @@ -25,8 +25,7 @@ class TestInteractionConstraints(unittest.TestCase): 'eta': 0.1, 'nthread': 2, 'interaction_constraints': '[[0, 1]]', - 'tree_method': tree_method, - 'verbosity': 2 + 'tree_method': tree_method } num_boost_round = 12 # Fit a model that only allows interaction between x1 and x2 @@ -50,8 +49,17 @@ class TestInteractionConstraints(unittest.TestCase): diff2 = preds[2] - preds[1] assert np.all(np.abs(diff2 - diff2[0]) < 1e-4) + def test_exact_interaction_constraints(self): + self.run_interaction_constraints(tree_method='exact') + + def test_hist_interaction_constraints(self): + self.run_interaction_constraints(tree_method='hist') + + def test_approx_interaction_constraints(self): + self.run_interaction_constraints(tree_method='approx') + @pytest.mark.skipif(**tm.no_sklearn()) - def test_training_accuracy(self, tree_method='hist'): + def training_accuracy(self, tree_method): from sklearn.metrics import accuracy_score dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1') dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1') @@ -73,3 +81,12 @@ class TestInteractionConstraints(unittest.TestCase): bst = xgboost.train(params, dtrain, num_boost_round) pred_dtest = (bst.predict(dtest) < 0.5) assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1 + + def test_hist_training_accuracy(self): + self.training_accuracy(tree_method='hist') + + def test_exact_training_accuracy(self): + self.training_accuracy(tree_method='exact') + + def test_approx_training_accuracy(self): + self.training_accuracy(tree_method='approx')