/*! * Copyright 2019 XGBoost contributors */ #include #include #include #include #include #include #include #include "xgboost/logging.h" #include "xgboost/span.h" #include "constraints.cuh" #include "param.h" #if defined(XGBOOST_USE_CUDA) #include "../common/device_helpers.cuh" #elif defined(XGBOOST_USE_HIP) #include "../common/device_helpers.hip.h" #endif namespace xgboost { size_t FeatureInteractionConstraintDevice::Features() const { return d_sets_ptr_.size() - 1; } void FeatureInteractionConstraintDevice::Configure( tree::TrainParam const& param, int32_t const n_features) { has_constraint_ = true; if (param.interaction_constraints.length() == 0) { has_constraint_ = false; return; } // --- Parse interaction constraints // Interaction constraints parsed from string parameter. After // parsing, this looks like {{0, 1, 2}, {2, 3 ,4}}. std::vector> h_feature_constraints; try { ParseInteractionConstraint(param.interaction_constraints, &h_feature_constraints); } catch (dmlc::Error const& e) { LOG(FATAL) << "Failed to parse feature interaction constraint:\n" << param.interaction_constraints << "\n" << "With error:\n" << e.what(); } n_sets_ = h_feature_constraints.size(); size_t const n_feat_storage = LBitField64::ComputeStorageSize(n_features); if (n_feat_storage == 0 && n_features != 0) { LOG(FATAL) << "Wrong storage size, n_features: " << n_features; } // --- Initialize allowed features attached to nodes. int32_t n_nodes { param.MaxNodes() }; node_constraints_.resize(n_nodes); node_constraints_storage_.resize(n_nodes); for (auto& n : node_constraints_storage_) { n.resize(LBitField64::ComputeStorageSize(n_features)); } for (size_t i = 0; i < node_constraints_storage_.size(); ++i) { auto span = dh::ToSpan(node_constraints_storage_[i]); node_constraints_[i] = LBitField64(span); } s_node_constraints_ = common::Span(node_constraints_.data(), node_constraints_.size()); // Represent constraints as CSR format, flatten is the value vector, // ptr is row_ptr vector in CSR. std::vector h_feature_constraints_flatten; for (auto const& constraints : h_feature_constraints) { for (uint32_t c : constraints) { h_feature_constraints_flatten.emplace_back(c); } } std::vector h_feature_constraints_ptr; size_t n_features_in_constraints = 0; h_feature_constraints_ptr.emplace_back(n_features_in_constraints); for (auto const& v : h_feature_constraints) { n_features_in_constraints += v.size(); h_feature_constraints_ptr.emplace_back(n_features_in_constraints); } // Copy the CSR to device. d_fconstraints_.resize(h_feature_constraints_flatten.size()); thrust::copy(h_feature_constraints_flatten.cbegin(), h_feature_constraints_flatten.cend(), d_fconstraints_.begin()); s_fconstraints_ = dh::ToSpan(d_fconstraints_); d_fconstraints_ptr_.resize(h_feature_constraints_ptr.size()); thrust::copy(h_feature_constraints_ptr.cbegin(), h_feature_constraints_ptr.cend(), d_fconstraints_ptr_.begin()); s_fconstraints_ptr_ = dh::ToSpan(d_fconstraints_ptr_); // --- Compute interaction sets attached to each feature. // Use a set to eliminate duplicated entries. std::vector > h_features_set(n_features); int32_t cid = 0; for (auto const& constraints : h_feature_constraints) { for (auto const& feat : constraints) { h_features_set.at(feat).insert(cid); } cid++; } // Compute device sets. std::vector h_sets; int32_t ptr = 0; std::vector h_sets_ptr {ptr}; for (auto const& feature : h_features_set) { for (auto constraint_id : feature) { h_sets.emplace_back(constraint_id); } // empty set is well defined here. ptr += feature.size(); h_sets_ptr.emplace_back(ptr); } d_sets_ = h_sets; d_sets_ptr_ = h_sets_ptr; s_sets_ = dh::ToSpan(d_sets_); s_sets_ptr_ = dh::ToSpan(d_sets_ptr_); d_feature_buffer_storage_.resize(LBitField64::ComputeStorageSize(n_features)); feature_buffer_ = LBitField64{dh::ToSpan(d_feature_buffer_storage_)}; // --- Initialize result buffers. output_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features)); output_buffer_bits_ = LBitField64(dh::ToSpan(output_buffer_bits_storage_)); input_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features)); input_buffer_bits_ = LBitField64(dh::ToSpan(input_buffer_bits_storage_)); result_buffer_.resize(n_features); s_result_buffer_ = dh::ToSpan(result_buffer_); } FeatureInteractionConstraintDevice::FeatureInteractionConstraintDevice( tree::TrainParam const& param, int32_t const n_features) : has_constraint_{true}, n_sets_{0} { this->Configure(param, n_features); } void FeatureInteractionConstraintDevice::Reset() { for (auto& node : node_constraints_storage_) { thrust::fill(node.begin(), node.end(), 0); } } __global__ void ClearBuffersKernel( LBitField64 result_buffer_output, LBitField64 result_buffer_input) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < result_buffer_output.Size()) { result_buffer_output.Clear(tid); } if (tid < result_buffer_input.Size()) { result_buffer_input.Clear(tid); } } void FeatureInteractionConstraintDevice::ClearBuffers() { CHECK_EQ(output_buffer_bits_.Size(), input_buffer_bits_.Size()); CHECK_LE(feature_buffer_.Size(), output_buffer_bits_.Size()); uint32_t constexpr kBlockThreads = 256; auto const n_grids = static_cast( common::DivRoundUp(input_buffer_bits_.Size(), kBlockThreads)); dh::LaunchKernel {n_grids, kBlockThreads} ( ClearBuffersKernel, output_buffer_bits_, input_buffer_bits_); } common::Span FeatureInteractionConstraintDevice::QueryNode(int32_t node_id) { if (!has_constraint_) { return {}; } CHECK_LT(node_id, s_node_constraints_.size()); ClearBuffers(); thrust::counting_iterator begin(0); thrust::counting_iterator end(result_buffer_.size()); auto p_result_buffer = result_buffer_.data(); LBitField64 node_constraints = s_node_constraints_[node_id]; thrust::device_ptr const out_end = thrust::copy_if( thrust::device, begin, end, p_result_buffer, [=]__device__(int32_t pos) { bool res = node_constraints.Check(pos); return res; }); size_t const n_available = std::distance(result_buffer_.data(), out_end); return {s_result_buffer_.data(), s_result_buffer_.data() + n_available}; } __global__ void SetInputBufferKernel(common::Span feature_list_input, LBitField64 result_buffer_input) { uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < feature_list_input.size()) { result_buffer_input.Set(feature_list_input[tid]); } } __global__ void QueryFeatureListKernel(LBitField64 node_constraints, LBitField64 result_buffer_input, LBitField64 result_buffer_output) { result_buffer_output |= node_constraints; result_buffer_output &= result_buffer_input; } common::Span FeatureInteractionConstraintDevice::Query( common::Span feature_list, int32_t nid) { if (!has_constraint_ || nid == 0) { return feature_list; } ClearBuffers(); LBitField64 node_constraints = s_node_constraints_[nid]; CHECK_EQ(input_buffer_bits_.Size(), output_buffer_bits_.Size()); uint32_t constexpr kBlockThreads = 256; auto n_grids = static_cast( common::DivRoundUp(output_buffer_bits_.Size(), kBlockThreads)); dh::LaunchKernel {n_grids, kBlockThreads} ( SetInputBufferKernel, feature_list, input_buffer_bits_); dh::LaunchKernel {n_grids, kBlockThreads} ( QueryFeatureListKernel, node_constraints, input_buffer_bits_, output_buffer_bits_); thrust::counting_iterator begin(0); thrust::counting_iterator end(result_buffer_.size()); LBitField64 local_result_buffer = output_buffer_bits_; thrust::device_ptr const out_end = thrust::copy_if( thrust::device, begin, end, result_buffer_.data(), [=]__device__(int32_t pos) { bool res = local_result_buffer.Check(pos); return res; }); size_t const n_available = std::distance(result_buffer_.data(), out_end); common::Span result = {s_result_buffer_.data(), s_result_buffer_.data() + n_available}; return result; } // Find interaction sets for each feature, then store all features in // those sets in a buffer. __global__ void RestoreFeatureListFromSetsKernel( LBitField64 feature_buffer, bst_feature_t fid, common::Span feature_interactions, common::Span feature_interactions_ptr, // of size n interaction set + 1 common::Span interactions_list, common::Span interactions_list_ptr) { auto const tid_x = threadIdx.x + blockIdx.x * blockDim.x; auto const tid_y = threadIdx.y + blockIdx.y * blockDim.y; // painful mapping: fid -> sets related to it -> features related to sets. auto const beg = interactions_list_ptr[fid]; auto const end = interactions_list_ptr[fid+1]; auto const n_sets = end - beg; if (tid_x < n_sets) { auto const set_id_pos = beg + tid_x; auto const set_id = interactions_list[set_id_pos]; auto const set_beg = feature_interactions_ptr[set_id]; auto const set_end = feature_interactions_ptr[set_id + 1]; auto const feature_pos = set_beg + tid_y; if (feature_pos < set_end) { feature_buffer.Set(feature_interactions[feature_pos]); } } } __global__ void InteractionConstraintSplitKernel(LBitField64 feature, int32_t feature_id, LBitField64 node, LBitField64 left, LBitField64 right) { auto tid = threadIdx.x + blockDim.x * blockIdx.x; if (tid > node.Size()) { return; } // enable constraints from feature node |= feature; // clear the buffer after use if (tid < feature.Size()) { feature.Clear(tid); } // enable constraints from parent left |= node; right |= node; if (tid == feature_id) { // enable the split feature, set all of them at last instead of // setting it for parent to avoid race. node.Set(feature_id); left.Set(feature_id); right.Set(feature_id); } } void FeatureInteractionConstraintDevice::Split( bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) { if (!has_constraint_) { return; } CHECK_NE(node_id, left_id) << " Split node: " << node_id << " and its left child: " << left_id << " cannot be the same."; CHECK_NE(node_id, right_id) << " Split node: " << node_id << " and its left child: " << right_id << " cannot be the same."; CHECK_LT(right_id, s_node_constraints_.size()); CHECK_NE(s_node_constraints_.size(), 0); LBitField64 node = s_node_constraints_[node_id]; LBitField64 left = s_node_constraints_[left_id]; LBitField64 right = s_node_constraints_[right_id]; dim3 const block3(16, 64, 1); dim3 const grid3(common::DivRoundUp(n_sets_, 16), common::DivRoundUp(s_fconstraints_.size(), 64)); dh::LaunchKernel {grid3, block3} ( RestoreFeatureListFromSetsKernel, feature_buffer_, feature_id, s_fconstraints_, s_fconstraints_ptr_, s_sets_, s_sets_ptr_); uint32_t constexpr kBlockThreads = 256; auto n_grids = static_cast(common::DivRoundUp(node.Size(), kBlockThreads)); dh::LaunchKernel {n_grids, kBlockThreads} ( InteractionConstraintSplitKernel, feature_buffer_, feature_id, node, left, right); } } // namespace xgboost