xgboost/src/tree/constraints.cu
2023-03-10 00:46:29 +01:00

341 lines
12 KiB
Plaintext

/*!
* Copyright 2019 XGBoost contributors
*/
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
#include <algorithm>
#include <string>
#include <set>
#include "xgboost/logging.h"
#include "xgboost/span.h"
#include "constraints.cuh"
#include "param.h"
#if defined(XGBOOST_USE_CUDA)
#include "../common/device_helpers.cuh"
#elif defined(XGBOOST_USE_HIP)
#include "../common/device_helpers.hip.h"
#endif
namespace xgboost {
size_t FeatureInteractionConstraintDevice::Features() const {
return d_sets_ptr_.size() - 1;
}
void FeatureInteractionConstraintDevice::Configure(
tree::TrainParam const& param, int32_t const n_features) {
has_constraint_ = true;
if (param.interaction_constraints.length() == 0) {
has_constraint_ = false;
return;
}
// --- Parse interaction constraints
// Interaction constraints parsed from string parameter. After
// parsing, this looks like {{0, 1, 2}, {2, 3 ,4}}.
std::vector<std::vector<bst_feature_t>> h_feature_constraints;
try {
ParseInteractionConstraint(param.interaction_constraints, &h_feature_constraints);
} catch (dmlc::Error const& e) {
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
<< param.interaction_constraints << "\n"
<< "With error:\n" << e.what();
}
n_sets_ = h_feature_constraints.size();
size_t const n_feat_storage = LBitField64::ComputeStorageSize(n_features);
if (n_feat_storage == 0 && n_features != 0) {
LOG(FATAL) << "Wrong storage size, n_features: " << n_features;
}
// --- Initialize allowed features attached to nodes.
int32_t n_nodes { param.MaxNodes() };
node_constraints_.resize(n_nodes);
node_constraints_storage_.resize(n_nodes);
for (auto& n : node_constraints_storage_) {
n.resize(LBitField64::ComputeStorageSize(n_features));
}
for (size_t i = 0; i < node_constraints_storage_.size(); ++i) {
auto span = dh::ToSpan(node_constraints_storage_[i]);
node_constraints_[i] = LBitField64(span);
}
s_node_constraints_ = common::Span<LBitField64>(node_constraints_.data(),
node_constraints_.size());
// Represent constraints as CSR format, flatten is the value vector,
// ptr is row_ptr vector in CSR.
std::vector<uint32_t> h_feature_constraints_flatten;
for (auto const& constraints : h_feature_constraints) {
for (uint32_t c : constraints) {
h_feature_constraints_flatten.emplace_back(c);
}
}
std::vector<size_t> h_feature_constraints_ptr;
size_t n_features_in_constraints = 0;
h_feature_constraints_ptr.emplace_back(n_features_in_constraints);
for (auto const& v : h_feature_constraints) {
n_features_in_constraints += v.size();
h_feature_constraints_ptr.emplace_back(n_features_in_constraints);
}
// Copy the CSR to device.
d_fconstraints_.resize(h_feature_constraints_flatten.size());
thrust::copy(h_feature_constraints_flatten.cbegin(), h_feature_constraints_flatten.cend(),
d_fconstraints_.begin());
s_fconstraints_ = dh::ToSpan(d_fconstraints_);
d_fconstraints_ptr_.resize(h_feature_constraints_ptr.size());
thrust::copy(h_feature_constraints_ptr.cbegin(), h_feature_constraints_ptr.cend(),
d_fconstraints_ptr_.begin());
s_fconstraints_ptr_ = dh::ToSpan(d_fconstraints_ptr_);
// --- Compute interaction sets attached to each feature.
// Use a set to eliminate duplicated entries.
std::vector<std::set<int32_t> > h_features_set(n_features);
int32_t cid = 0;
for (auto const& constraints : h_feature_constraints) {
for (auto const& feat : constraints) {
h_features_set.at(feat).insert(cid);
}
cid++;
}
// Compute device sets.
std::vector<int32_t> h_sets;
int32_t ptr = 0;
std::vector<int32_t> h_sets_ptr {ptr};
for (auto const& feature : h_features_set) {
for (auto constraint_id : feature) {
h_sets.emplace_back(constraint_id);
}
// empty set is well defined here.
ptr += feature.size();
h_sets_ptr.emplace_back(ptr);
}
d_sets_ = h_sets;
d_sets_ptr_ = h_sets_ptr;
s_sets_ = dh::ToSpan(d_sets_);
s_sets_ptr_ = dh::ToSpan(d_sets_ptr_);
d_feature_buffer_storage_.resize(LBitField64::ComputeStorageSize(n_features));
feature_buffer_ = LBitField64{dh::ToSpan(d_feature_buffer_storage_)};
// --- Initialize result buffers.
output_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features));
output_buffer_bits_ = LBitField64(dh::ToSpan(output_buffer_bits_storage_));
input_buffer_bits_storage_.resize(LBitField64::ComputeStorageSize(n_features));
input_buffer_bits_ = LBitField64(dh::ToSpan(input_buffer_bits_storage_));
result_buffer_.resize(n_features);
s_result_buffer_ = dh::ToSpan(result_buffer_);
}
FeatureInteractionConstraintDevice::FeatureInteractionConstraintDevice(
tree::TrainParam const& param, int32_t const n_features) :
has_constraint_{true}, n_sets_{0} {
this->Configure(param, n_features);
}
void FeatureInteractionConstraintDevice::Reset() {
for (auto& node : node_constraints_storage_) {
thrust::fill(node.begin(), node.end(), 0);
}
}
__global__ void ClearBuffersKernel(
LBitField64 result_buffer_output, LBitField64 result_buffer_input) {
auto tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < result_buffer_output.Size()) {
result_buffer_output.Clear(tid);
}
if (tid < result_buffer_input.Size()) {
result_buffer_input.Clear(tid);
}
}
void FeatureInteractionConstraintDevice::ClearBuffers() {
CHECK_EQ(output_buffer_bits_.Size(), input_buffer_bits_.Size());
CHECK_LE(feature_buffer_.Size(), output_buffer_bits_.Size());
uint32_t constexpr kBlockThreads = 256;
auto const n_grids = static_cast<uint32_t>(
common::DivRoundUp(input_buffer_bits_.Size(), kBlockThreads));
dh::LaunchKernel {n_grids, kBlockThreads} (
ClearBuffersKernel,
output_buffer_bits_, input_buffer_bits_);
}
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::QueryNode(int32_t node_id) {
if (!has_constraint_) { return {}; }
CHECK_LT(node_id, s_node_constraints_.size());
ClearBuffers();
thrust::counting_iterator<int32_t> begin(0);
thrust::counting_iterator<int32_t> end(result_buffer_.size());
auto p_result_buffer = result_buffer_.data();
LBitField64 node_constraints = s_node_constraints_[node_id];
thrust::device_ptr<bst_feature_t> const out_end = thrust::copy_if(
thrust::device,
begin, end,
p_result_buffer,
[=]__device__(int32_t pos) {
bool res = node_constraints.Check(pos);
return res;
});
size_t const n_available = std::distance(result_buffer_.data(), out_end);
return {s_result_buffer_.data(), s_result_buffer_.data() + n_available};
}
__global__ void SetInputBufferKernel(common::Span<bst_feature_t> feature_list_input,
LBitField64 result_buffer_input) {
uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < feature_list_input.size()) {
result_buffer_input.Set(feature_list_input[tid]);
}
}
__global__ void QueryFeatureListKernel(LBitField64 node_constraints,
LBitField64 result_buffer_input,
LBitField64 result_buffer_output) {
result_buffer_output |= node_constraints;
result_buffer_output &= result_buffer_input;
}
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::Query(
common::Span<bst_feature_t> feature_list, int32_t nid) {
if (!has_constraint_ || nid == 0) {
return feature_list;
}
ClearBuffers();
LBitField64 node_constraints = s_node_constraints_[nid];
CHECK_EQ(input_buffer_bits_.Size(), output_buffer_bits_.Size());
uint32_t constexpr kBlockThreads = 256;
auto n_grids = static_cast<uint32_t>(
common::DivRoundUp(output_buffer_bits_.Size(), kBlockThreads));
dh::LaunchKernel {n_grids, kBlockThreads} (
SetInputBufferKernel,
feature_list, input_buffer_bits_);
dh::LaunchKernel {n_grids, kBlockThreads} (
QueryFeatureListKernel,
node_constraints, input_buffer_bits_, output_buffer_bits_);
thrust::counting_iterator<int32_t> begin(0);
thrust::counting_iterator<int32_t> end(result_buffer_.size());
LBitField64 local_result_buffer = output_buffer_bits_;
thrust::device_ptr<bst_feature_t> const out_end = thrust::copy_if(
thrust::device,
begin, end,
result_buffer_.data(),
[=]__device__(int32_t pos) {
bool res = local_result_buffer.Check(pos);
return res;
});
size_t const n_available = std::distance(result_buffer_.data(), out_end);
common::Span<bst_feature_t> result =
{s_result_buffer_.data(), s_result_buffer_.data() + n_available};
return result;
}
// Find interaction sets for each feature, then store all features in
// those sets in a buffer.
__global__ void RestoreFeatureListFromSetsKernel(
LBitField64 feature_buffer,
bst_feature_t fid,
common::Span<bst_feature_t> feature_interactions,
common::Span<size_t> feature_interactions_ptr, // of size n interaction set + 1
common::Span<bst_feature_t> interactions_list,
common::Span<size_t> interactions_list_ptr) {
auto const tid_x = threadIdx.x + blockIdx.x * blockDim.x;
auto const tid_y = threadIdx.y + blockIdx.y * blockDim.y;
// painful mapping: fid -> sets related to it -> features related to sets.
auto const beg = interactions_list_ptr[fid];
auto const end = interactions_list_ptr[fid+1];
auto const n_sets = end - beg;
if (tid_x < n_sets) {
auto const set_id_pos = beg + tid_x;
auto const set_id = interactions_list[set_id_pos];
auto const set_beg = feature_interactions_ptr[set_id];
auto const set_end = feature_interactions_ptr[set_id + 1];
auto const feature_pos = set_beg + tid_y;
if (feature_pos < set_end) {
feature_buffer.Set(feature_interactions[feature_pos]);
}
}
}
__global__ void InteractionConstraintSplitKernel(LBitField64 feature,
int32_t feature_id,
LBitField64 node,
LBitField64 left,
LBitField64 right) {
auto tid = threadIdx.x + blockDim.x * blockIdx.x;
if (tid > node.Size()) {
return;
}
// enable constraints from feature
node |= feature;
// clear the buffer after use
if (tid < feature.Size()) {
feature.Clear(tid);
}
// enable constraints from parent
left |= node;
right |= node;
if (tid == feature_id) {
// enable the split feature, set all of them at last instead of
// setting it for parent to avoid race.
node.Set(feature_id);
left.Set(feature_id);
right.Set(feature_id);
}
}
void FeatureInteractionConstraintDevice::Split(
bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) {
if (!has_constraint_) { return; }
CHECK_NE(node_id, left_id)
<< " Split node: " << node_id << " and its left child: "
<< left_id << " cannot be the same.";
CHECK_NE(node_id, right_id)
<< " Split node: " << node_id << " and its left child: "
<< right_id << " cannot be the same.";
CHECK_LT(right_id, s_node_constraints_.size());
CHECK_NE(s_node_constraints_.size(), 0);
LBitField64 node = s_node_constraints_[node_id];
LBitField64 left = s_node_constraints_[left_id];
LBitField64 right = s_node_constraints_[right_id];
dim3 const block3(16, 64, 1);
dim3 const grid3(common::DivRoundUp(n_sets_, 16),
common::DivRoundUp(s_fconstraints_.size(), 64));
dh::LaunchKernel {grid3, block3} (
RestoreFeatureListFromSetsKernel,
feature_buffer_, feature_id,
s_fconstraints_, s_fconstraints_ptr_,
s_sets_, s_sets_ptr_);
uint32_t constexpr kBlockThreads = 256;
auto n_grids = static_cast<uint32_t>(common::DivRoundUp(node.Size(), kBlockThreads));
dh::LaunchKernel {n_grids, kBlockThreads} (
InteractionConstraintSplitKernel,
feature_buffer_,
feature_id,
node, left, right);
}
} // namespace xgboost