Fix r interaction constraints (#5543)

* Unify the parsing code.

* Cleanup.
This commit is contained in:
Jiaming Yuan 2020-04-18 06:53:51 +08:00 committed by GitHub
parent 93df871c8c
commit c245eb8755
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 107 additions and 55 deletions

View File

@ -34,5 +34,22 @@ test_that("interaction constraints for regression", {
expect_true({
test1 & test2
}, "Interaction Contraint Satisfied")
})
test_that("interaction constraints scientific representation", {
rows <- 10
## When number exceeds 1e5, R paste function uses scientific representation.
## See: https://github.com/dmlc/xgboost/issues/5179
cols <- 1e5+10
d <- matrix(rexp(rows, rate=.1), nrow=rows, ncol=cols)
y <- rnorm(rows)
dtrain <- xgb.DMatrix(data=d, info = list(label=y))
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
with_inc <- xgb.train(data=dtrain, tree_method='hist',
interaction_constraints=inc, nrounds=10)
without_inc <- xgb.train(data=dtrain, tree_method='hist', nrounds=10)
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
})

View File

@ -6,6 +6,7 @@
#include <vector>
#include "xgboost/span.h"
#include "xgboost/json.h"
#include "constraints.h"
#include "param.h"
@ -27,14 +28,11 @@ void FeatureInteractionConstraintHost::Reset() {
if (!enabled_) {
return;
}
// Parse interaction constraints
std::istringstream iss(this->interaction_constraint_str_);
dmlc::JSONReader reader(&iss);
// Read std::vector<std::vector<bst_uint>> first and then
// convert to std::vector<std::unordered_set<bst_uint>>
std::vector<std::vector<bst_uint>> tmp;
// Read std::vector<std::vector<bst_feature_t>> first and then
// convert to std::vector<std::unordered_set<bst_feature_t>>
std::vector<std::vector<bst_feature_t>> tmp;
try {
reader.Read(&tmp);
ParseInteractionConstraint(this->interaction_constraint_str_, &tmp);
} catch (dmlc::Error const &e) {
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
<< this->interaction_constraint_str_ << "\n"

View File

@ -7,9 +7,7 @@
#include <thrust/iterator/counting_iterator.h>
#include <algorithm>
#include <bitset>
#include <string>
#include <sstream>
#include <set>
#include "xgboost/logging.h"
@ -18,14 +16,13 @@
#include "param.h"
#include "../common/device_helpers.cuh"
namespace xgboost {
size_t FeatureInteractionConstraint::Features() const {
size_t FeatureInteractionConstraintDevice::Features() const {
return d_sets_ptr_.size() - 1;
}
void FeatureInteractionConstraint::Configure(
void FeatureInteractionConstraintDevice::Configure(
tree::TrainParam const& param, int32_t const n_features) {
has_constraint_ = true;
if (param.interaction_constraints.length() == 0) {
@ -33,13 +30,11 @@ void FeatureInteractionConstraint::Configure(
return;
}
// --- Parse interaction constraints
std::istringstream iss(param.interaction_constraints);
dmlc::JSONReader reader(&iss);
// Interaction constraints parsed from string parameter. After
// parsing, this looks like {{0, 1, 2}, {2, 3 ,4}}.
std::vector<std::vector<int32_t>> h_feature_constraints;
std::vector<std::vector<bst_feature_t>> h_feature_constraints;
try {
reader.Read(&h_feature_constraints);
ParseInteractionConstraint(param.interaction_constraints, &h_feature_constraints);
} catch (dmlc::Error const& e) {
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
<< param.interaction_constraints << "\n"
@ -68,13 +63,13 @@ void FeatureInteractionConstraint::Configure(
// Represent constraints as CSR format, flatten is the value vector,
// ptr is row_ptr vector in CSR.
std::vector<int32_t> h_feature_constraints_flatten;
std::vector<uint32_t> h_feature_constraints_flatten;
for (auto const& constraints : h_feature_constraints) {
for (int32_t c : constraints) {
for (uint32_t c : constraints) {
h_feature_constraints_flatten.emplace_back(c);
}
}
std::vector<int32_t> h_feature_constraints_ptr;
std::vector<size_t> h_feature_constraints_ptr;
size_t n_features_in_constraints = 0;
h_feature_constraints_ptr.emplace_back(n_features_in_constraints);
for (auto const& v : h_feature_constraints) {
@ -130,13 +125,13 @@ void FeatureInteractionConstraint::Configure(
s_result_buffer_ = dh::ToSpan(result_buffer_);
}
FeatureInteractionConstraint::FeatureInteractionConstraint(
FeatureInteractionConstraintDevice::FeatureInteractionConstraintDevice(
tree::TrainParam const& param, int32_t const n_features) :
has_constraint_{true}, n_sets_{0} {
this->Configure(param, n_features);
}
void FeatureInteractionConstraint::Reset() {
void FeatureInteractionConstraintDevice::Reset() {
for (auto& node : node_constraints_storage_) {
thrust::fill(node.begin(), node.end(), 0);
}
@ -153,7 +148,7 @@ __global__ void ClearBuffersKernel(
}
}
void FeatureInteractionConstraint::ClearBuffers() {
void FeatureInteractionConstraintDevice::ClearBuffers() {
CHECK_EQ(output_buffer_bits_.Size(), input_buffer_bits_.Size());
CHECK_LE(feature_buffer_.Size(), output_buffer_bits_.Size());
uint32_t constexpr kBlockThreads = 256;
@ -164,7 +159,7 @@ void FeatureInteractionConstraint::ClearBuffers() {
output_buffer_bits_, input_buffer_bits_);
}
common::Span<bst_feature_t> FeatureInteractionConstraint::QueryNode(int32_t node_id) {
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::QueryNode(int32_t node_id) {
if (!has_constraint_) { return {}; }
CHECK_LT(node_id, s_node_constraints_.size());
@ -203,7 +198,7 @@ __global__ void QueryFeatureListKernel(LBitField64 node_constraints,
result_buffer_output &= result_buffer_input;
}
common::Span<bst_feature_t> FeatureInteractionConstraint::Query(
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::Query(
common::Span<bst_feature_t> feature_list, int32_t nid) {
if (!has_constraint_ || nid == 0) {
return feature_list;
@ -250,8 +245,8 @@ __global__ void RestoreFeatureListFromSetsKernel(
LBitField64 feature_buffer,
bst_feature_t fid,
common::Span<int32_t> feature_interactions,
common::Span<int32_t> feature_interactions_ptr, // of size n interaction set + 1
common::Span<bst_feature_t> feature_interactions,
common::Span<size_t> feature_interactions_ptr, // of size n interaction set + 1
common::Span<bst_feature_t> interactions_list,
common::Span<size_t> interactions_list_ptr) {
@ -302,7 +297,7 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature,
}
}
void FeatureInteractionConstraint::Split(
void FeatureInteractionConstraintDevice::Split(
bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) {
if (!has_constraint_) { return; }
CHECK_NE(node_id, left_id)

View File

@ -88,18 +88,18 @@ struct ValueConstraint {
};
// Feature interaction constraints built for GPU Hist updater.
struct FeatureInteractionConstraint {
struct FeatureInteractionConstraintDevice {
protected:
// Whether interaction constraint is used.
bool has_constraint_;
// n interaction sets.
int32_t n_sets_;
size_t n_sets_;
// The parsed feature interaction constraints as CSR.
dh::device_vector<int32_t> d_fconstraints_;
common::Span<int32_t> s_fconstraints_;
dh::device_vector<int32_t> d_fconstraints_ptr_;
common::Span<int32_t> s_fconstraints_ptr_;
dh::device_vector<bst_feature_t> d_fconstraints_;
common::Span<bst_feature_t> s_fconstraints_;
dh::device_vector<size_t> d_fconstraints_ptr_;
common::Span<size_t> s_fconstraints_ptr_;
/* Interaction sets for each feature as CSR. For an input like:
* [[0, 1], [1, 2]], this will have values:
*
@ -141,11 +141,11 @@ struct FeatureInteractionConstraint {
public:
size_t Features() const;
FeatureInteractionConstraint() = default;
FeatureInteractionConstraintDevice() = default;
void Configure(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraint(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraint(FeatureInteractionConstraint const& that) = default;
FeatureInteractionConstraint(FeatureInteractionConstraint&& that) = default;
FeatureInteractionConstraintDevice(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice const& that) = default;
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice&& that) = default;
/*! \brief Reset before constructing a new tree. */
void Reset();
/*! \brief Return a list of features given node id */

View File

@ -5,6 +5,7 @@
#include <vector>
#include <utility>
#include "xgboost/json.h"
#include "param.h"
namespace std {
@ -79,3 +80,31 @@ std::istream &operator>>(std::istream &is, std::vector<int> &t) {
return is;
}
} // namespace std
namespace xgboost {
void ParseInteractionConstraint(
std::string const &constraint_str,
std::vector<std::vector<bst_feature_t>> *p_out) {
auto &out = *p_out;
auto j_inc = Json::Load({constraint_str.c_str(), constraint_str.size()});
auto const &all = get<Array>(j_inc);
out.resize(all.size());
for (size_t i = 0; i < all.size(); ++i) {
auto const &set = get<Array const>(all[i]);
for (auto const &v : set) {
if (XGBOOST_EXPECT(IsA<Integer>(v), true)) {
uint32_t u = static_cast<uint32_t const>(get<Integer const>(v));
out[i].emplace_back(u);
} else if (IsA<Number>(v)) {
double d = get<Number const>(v);
CHECK_EQ(std::floor(d), d)
<< "Found floating point number in interaction constraints";
out[i].emplace_back(static_cast<uint32_t const>(d));
} else {
LOG(FATAL) << "Unknown value type for interaction constraint:"
<< v.GetValue().TypeStr();
}
}
}
}
} // namespace xgboost

View File

@ -483,8 +483,21 @@ struct SplitEntryContainer {
};
using SplitEntry = SplitEntryContainer<GradStats>;
} // namespace tree
/*
* \brief Parse the interaction constraints from string.
* \param constraint_str String storing the interfaction constraints:
*
* Example input string:
*
* "[[1, 2], [3, 4]]""
*
* \param p_out Pointer to output
*/
void ParseInteractionConstraint(
std::string const &constraint_str,
std::vector<std::vector<xgboost::bst_feature_t>> *p_out);
} // namespace xgboost
// define string serializer for vector, to get the arguments

View File

@ -436,7 +436,7 @@ struct GPUHistMakerDevice {
common::Monitor monitor;
std::vector<ValueConstraint> node_value_constraints;
common::ColumnSampler column_sampler;
FeatureInteractionConstraint interaction_constraints;
FeatureInteractionConstraintDevice interaction_constraints;
using ExpandQueue =
std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,

View File

@ -15,12 +15,12 @@
namespace xgboost {
namespace {
struct FConstraintWrapper : public FeatureInteractionConstraint {
struct FConstraintWrapper : public FeatureInteractionConstraintDevice {
common::Span<LBitField64> GetNodeConstraints() {
return FeatureInteractionConstraint::s_node_constraints_;
return FeatureInteractionConstraintDevice::s_node_constraints_;
}
FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) :
FeatureInteractionConstraint(param, n_features) {}
FeatureInteractionConstraintDevice(param, n_features) {}
dh::device_vector<bst_feature_t> const& GetDSets() const {
return d_sets_;