Fix r interaction constraints (#5543)
* Unify the parsing code. * Cleanup.
This commit is contained in:
parent
93df871c8c
commit
c245eb8755
@ -34,5 +34,22 @@ test_that("interaction constraints for regression", {
|
||||
expect_true({
|
||||
test1 & test2
|
||||
}, "Interaction Contraint Satisfied")
|
||||
|
||||
})
|
||||
|
||||
test_that("interaction constraints scientific representation", {
|
||||
rows <- 10
|
||||
## When number exceeds 1e5, R paste function uses scientific representation.
|
||||
## See: https://github.com/dmlc/xgboost/issues/5179
|
||||
cols <- 1e5+10
|
||||
|
||||
d <- matrix(rexp(rows, rate=.1), nrow=rows, ncol=cols)
|
||||
y <- rnorm(rows)
|
||||
|
||||
dtrain <- xgb.DMatrix(data=d, info = list(label=y))
|
||||
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
|
||||
|
||||
with_inc <- xgb.train(data=dtrain, tree_method='hist',
|
||||
interaction_constraints=inc, nrounds=10)
|
||||
without_inc <- xgb.train(data=dtrain, tree_method='hist', nrounds=10)
|
||||
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
|
||||
})
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/span.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "constraints.h"
|
||||
#include "param.h"
|
||||
|
||||
@ -27,14 +28,11 @@ void FeatureInteractionConstraintHost::Reset() {
|
||||
if (!enabled_) {
|
||||
return;
|
||||
}
|
||||
// Parse interaction constraints
|
||||
std::istringstream iss(this->interaction_constraint_str_);
|
||||
dmlc::JSONReader reader(&iss);
|
||||
// Read std::vector<std::vector<bst_uint>> first and then
|
||||
// convert to std::vector<std::unordered_set<bst_uint>>
|
||||
std::vector<std::vector<bst_uint>> tmp;
|
||||
// Read std::vector<std::vector<bst_feature_t>> first and then
|
||||
// convert to std::vector<std::unordered_set<bst_feature_t>>
|
||||
std::vector<std::vector<bst_feature_t>> tmp;
|
||||
try {
|
||||
reader.Read(&tmp);
|
||||
ParseInteractionConstraint(this->interaction_constraint_str_, &tmp);
|
||||
} catch (dmlc::Error const &e) {
|
||||
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
|
||||
<< this->interaction_constraint_str_ << "\n"
|
||||
|
||||
@ -7,9 +7,7 @@
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <set>
|
||||
|
||||
#include "xgboost/logging.h"
|
||||
@ -18,14 +16,13 @@
|
||||
#include "param.h"
|
||||
#include "../common/device_helpers.cuh"
|
||||
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
size_t FeatureInteractionConstraint::Features() const {
|
||||
size_t FeatureInteractionConstraintDevice::Features() const {
|
||||
return d_sets_ptr_.size() - 1;
|
||||
}
|
||||
|
||||
void FeatureInteractionConstraint::Configure(
|
||||
void FeatureInteractionConstraintDevice::Configure(
|
||||
tree::TrainParam const& param, int32_t const n_features) {
|
||||
has_constraint_ = true;
|
||||
if (param.interaction_constraints.length() == 0) {
|
||||
@ -33,13 +30,11 @@ void FeatureInteractionConstraint::Configure(
|
||||
return;
|
||||
}
|
||||
// --- Parse interaction constraints
|
||||
std::istringstream iss(param.interaction_constraints);
|
||||
dmlc::JSONReader reader(&iss);
|
||||
// Interaction constraints parsed from string parameter. After
|
||||
// parsing, this looks like {{0, 1, 2}, {2, 3 ,4}}.
|
||||
std::vector<std::vector<int32_t>> h_feature_constraints;
|
||||
std::vector<std::vector<bst_feature_t>> h_feature_constraints;
|
||||
try {
|
||||
reader.Read(&h_feature_constraints);
|
||||
ParseInteractionConstraint(param.interaction_constraints, &h_feature_constraints);
|
||||
} catch (dmlc::Error const& e) {
|
||||
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
|
||||
<< param.interaction_constraints << "\n"
|
||||
@ -68,13 +63,13 @@ void FeatureInteractionConstraint::Configure(
|
||||
|
||||
// Represent constraints as CSR format, flatten is the value vector,
|
||||
// ptr is row_ptr vector in CSR.
|
||||
std::vector<int32_t> h_feature_constraints_flatten;
|
||||
std::vector<uint32_t> h_feature_constraints_flatten;
|
||||
for (auto const& constraints : h_feature_constraints) {
|
||||
for (int32_t c : constraints) {
|
||||
for (uint32_t c : constraints) {
|
||||
h_feature_constraints_flatten.emplace_back(c);
|
||||
}
|
||||
}
|
||||
std::vector<int32_t> h_feature_constraints_ptr;
|
||||
std::vector<size_t> h_feature_constraints_ptr;
|
||||
size_t n_features_in_constraints = 0;
|
||||
h_feature_constraints_ptr.emplace_back(n_features_in_constraints);
|
||||
for (auto const& v : h_feature_constraints) {
|
||||
@ -130,13 +125,13 @@ void FeatureInteractionConstraint::Configure(
|
||||
s_result_buffer_ = dh::ToSpan(result_buffer_);
|
||||
}
|
||||
|
||||
FeatureInteractionConstraint::FeatureInteractionConstraint(
|
||||
FeatureInteractionConstraintDevice::FeatureInteractionConstraintDevice(
|
||||
tree::TrainParam const& param, int32_t const n_features) :
|
||||
has_constraint_{true}, n_sets_{0} {
|
||||
this->Configure(param, n_features);
|
||||
}
|
||||
|
||||
void FeatureInteractionConstraint::Reset() {
|
||||
void FeatureInteractionConstraintDevice::Reset() {
|
||||
for (auto& node : node_constraints_storage_) {
|
||||
thrust::fill(node.begin(), node.end(), 0);
|
||||
}
|
||||
@ -153,7 +148,7 @@ __global__ void ClearBuffersKernel(
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureInteractionConstraint::ClearBuffers() {
|
||||
void FeatureInteractionConstraintDevice::ClearBuffers() {
|
||||
CHECK_EQ(output_buffer_bits_.Size(), input_buffer_bits_.Size());
|
||||
CHECK_LE(feature_buffer_.Size(), output_buffer_bits_.Size());
|
||||
uint32_t constexpr kBlockThreads = 256;
|
||||
@ -164,7 +159,7 @@ void FeatureInteractionConstraint::ClearBuffers() {
|
||||
output_buffer_bits_, input_buffer_bits_);
|
||||
}
|
||||
|
||||
common::Span<bst_feature_t> FeatureInteractionConstraint::QueryNode(int32_t node_id) {
|
||||
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::QueryNode(int32_t node_id) {
|
||||
if (!has_constraint_) { return {}; }
|
||||
CHECK_LT(node_id, s_node_constraints_.size());
|
||||
|
||||
@ -203,7 +198,7 @@ __global__ void QueryFeatureListKernel(LBitField64 node_constraints,
|
||||
result_buffer_output &= result_buffer_input;
|
||||
}
|
||||
|
||||
common::Span<bst_feature_t> FeatureInteractionConstraint::Query(
|
||||
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::Query(
|
||||
common::Span<bst_feature_t> feature_list, int32_t nid) {
|
||||
if (!has_constraint_ || nid == 0) {
|
||||
return feature_list;
|
||||
@ -250,8 +245,8 @@ __global__ void RestoreFeatureListFromSetsKernel(
|
||||
LBitField64 feature_buffer,
|
||||
|
||||
bst_feature_t fid,
|
||||
common::Span<int32_t> feature_interactions,
|
||||
common::Span<int32_t> feature_interactions_ptr, // of size n interaction set + 1
|
||||
common::Span<bst_feature_t> feature_interactions,
|
||||
common::Span<size_t> feature_interactions_ptr, // of size n interaction set + 1
|
||||
|
||||
common::Span<bst_feature_t> interactions_list,
|
||||
common::Span<size_t> interactions_list_ptr) {
|
||||
@ -302,7 +297,7 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature,
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureInteractionConstraint::Split(
|
||||
void FeatureInteractionConstraintDevice::Split(
|
||||
bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) {
|
||||
if (!has_constraint_) { return; }
|
||||
CHECK_NE(node_id, left_id)
|
||||
|
||||
@ -88,18 +88,18 @@ struct ValueConstraint {
|
||||
};
|
||||
|
||||
// Feature interaction constraints built for GPU Hist updater.
|
||||
struct FeatureInteractionConstraint {
|
||||
struct FeatureInteractionConstraintDevice {
|
||||
protected:
|
||||
// Whether interaction constraint is used.
|
||||
bool has_constraint_;
|
||||
// n interaction sets.
|
||||
int32_t n_sets_;
|
||||
size_t n_sets_;
|
||||
|
||||
// The parsed feature interaction constraints as CSR.
|
||||
dh::device_vector<int32_t> d_fconstraints_;
|
||||
common::Span<int32_t> s_fconstraints_;
|
||||
dh::device_vector<int32_t> d_fconstraints_ptr_;
|
||||
common::Span<int32_t> s_fconstraints_ptr_;
|
||||
dh::device_vector<bst_feature_t> d_fconstraints_;
|
||||
common::Span<bst_feature_t> s_fconstraints_;
|
||||
dh::device_vector<size_t> d_fconstraints_ptr_;
|
||||
common::Span<size_t> s_fconstraints_ptr_;
|
||||
/* Interaction sets for each feature as CSR. For an input like:
|
||||
* [[0, 1], [1, 2]], this will have values:
|
||||
*
|
||||
@ -141,11 +141,11 @@ struct FeatureInteractionConstraint {
|
||||
|
||||
public:
|
||||
size_t Features() const;
|
||||
FeatureInteractionConstraint() = default;
|
||||
FeatureInteractionConstraintDevice() = default;
|
||||
void Configure(tree::TrainParam const& param, int32_t const n_features);
|
||||
FeatureInteractionConstraint(tree::TrainParam const& param, int32_t const n_features);
|
||||
FeatureInteractionConstraint(FeatureInteractionConstraint const& that) = default;
|
||||
FeatureInteractionConstraint(FeatureInteractionConstraint&& that) = default;
|
||||
FeatureInteractionConstraintDevice(tree::TrainParam const& param, int32_t const n_features);
|
||||
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice const& that) = default;
|
||||
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice&& that) = default;
|
||||
/*! \brief Reset before constructing a new tree. */
|
||||
void Reset();
|
||||
/*! \brief Return a list of features given node id */
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "xgboost/json.h"
|
||||
#include "param.h"
|
||||
|
||||
namespace std {
|
||||
@ -79,3 +80,31 @@ std::istream &operator>>(std::istream &is, std::vector<int> &t) {
|
||||
return is;
|
||||
}
|
||||
} // namespace std
|
||||
|
||||
namespace xgboost {
|
||||
void ParseInteractionConstraint(
|
||||
std::string const &constraint_str,
|
||||
std::vector<std::vector<bst_feature_t>> *p_out) {
|
||||
auto &out = *p_out;
|
||||
auto j_inc = Json::Load({constraint_str.c_str(), constraint_str.size()});
|
||||
auto const &all = get<Array>(j_inc);
|
||||
out.resize(all.size());
|
||||
for (size_t i = 0; i < all.size(); ++i) {
|
||||
auto const &set = get<Array const>(all[i]);
|
||||
for (auto const &v : set) {
|
||||
if (XGBOOST_EXPECT(IsA<Integer>(v), true)) {
|
||||
uint32_t u = static_cast<uint32_t const>(get<Integer const>(v));
|
||||
out[i].emplace_back(u);
|
||||
} else if (IsA<Number>(v)) {
|
||||
double d = get<Number const>(v);
|
||||
CHECK_EQ(std::floor(d), d)
|
||||
<< "Found floating point number in interaction constraints";
|
||||
out[i].emplace_back(static_cast<uint32_t const>(d));
|
||||
} else {
|
||||
LOG(FATAL) << "Unknown value type for interaction constraint:"
|
||||
<< v.GetValue().TypeStr();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@ -483,8 +483,21 @@ struct SplitEntryContainer {
|
||||
};
|
||||
|
||||
using SplitEntry = SplitEntryContainer<GradStats>;
|
||||
|
||||
} // namespace tree
|
||||
|
||||
/*
|
||||
* \brief Parse the interaction constraints from string.
|
||||
* \param constraint_str String storing the interfaction constraints:
|
||||
*
|
||||
* Example input string:
|
||||
*
|
||||
* "[[1, 2], [3, 4]]""
|
||||
*
|
||||
* \param p_out Pointer to output
|
||||
*/
|
||||
void ParseInteractionConstraint(
|
||||
std::string const &constraint_str,
|
||||
std::vector<std::vector<xgboost::bst_feature_t>> *p_out);
|
||||
} // namespace xgboost
|
||||
|
||||
// define string serializer for vector, to get the arguments
|
||||
|
||||
@ -436,7 +436,7 @@ struct GPUHistMakerDevice {
|
||||
common::Monitor monitor;
|
||||
std::vector<ValueConstraint> node_value_constraints;
|
||||
common::ColumnSampler column_sampler;
|
||||
FeatureInteractionConstraint interaction_constraints;
|
||||
FeatureInteractionConstraintDevice interaction_constraints;
|
||||
|
||||
using ExpandQueue =
|
||||
std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
|
||||
|
||||
@ -15,12 +15,12 @@
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
|
||||
struct FConstraintWrapper : public FeatureInteractionConstraint {
|
||||
struct FConstraintWrapper : public FeatureInteractionConstraintDevice {
|
||||
common::Span<LBitField64> GetNodeConstraints() {
|
||||
return FeatureInteractionConstraint::s_node_constraints_;
|
||||
return FeatureInteractionConstraintDevice::s_node_constraints_;
|
||||
}
|
||||
FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) :
|
||||
FeatureInteractionConstraint(param, n_features) {}
|
||||
FeatureInteractionConstraintDevice(param, n_features) {}
|
||||
|
||||
dh::device_vector<bst_feature_t> const& GetDSets() const {
|
||||
return d_sets_;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user