Expand categorical node. (#6028)
Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
#if defined(__CUDACC__)
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/device_ptr.h>
|
||||
#include "device_helpers.cuh"
|
||||
#endif // defined(__CUDACC__)
|
||||
|
||||
#include "xgboost/span.h"
|
||||
@@ -54,23 +55,24 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
|
||||
*
|
||||
* \tparam Direction Whether the bits start from left or from right.
|
||||
*/
|
||||
template <typename VT, typename Direction>
|
||||
template <typename VT, typename Direction, bool IsConst = false>
|
||||
struct BitFieldContainer {
|
||||
using value_type = VT; // NOLINT
|
||||
using value_type = std::conditional_t<IsConst, VT const, VT>; // NOLINT
|
||||
using pointer = value_type*; // NOLINT
|
||||
|
||||
static value_type constexpr kValueSize = sizeof(value_type) * 8;
|
||||
static value_type constexpr kOne = 1; // force correct type.
|
||||
|
||||
struct Pos {
|
||||
value_type int_pos {0};
|
||||
value_type bit_pos {0};
|
||||
std::remove_const_t<value_type> int_pos {0};
|
||||
std::remove_const_t<value_type> bit_pos {0};
|
||||
};
|
||||
|
||||
private:
|
||||
common::Span<value_type> bits_;
|
||||
static_assert(!std::is_signed<VT>::value, "Must use unsiged type as underlying storage.");
|
||||
|
||||
public:
|
||||
XGBOOST_DEVICE static Pos ToBitPos(value_type pos) {
|
||||
Pos pos_v;
|
||||
if (pos == 0) {
|
||||
@@ -92,7 +94,7 @@ struct BitFieldContainer {
|
||||
/*\brief Compute the size of needed memory allocation. The returned value is in terms
|
||||
* of number of elements with `BitFieldContainer::value_type'.
|
||||
*/
|
||||
static size_t ComputeStorageSize(size_t size) {
|
||||
XGBOOST_DEVICE static size_t ComputeStorageSize(size_t size) {
|
||||
return common::DivRoundUp(size, kValueSize);
|
||||
}
|
||||
#if defined(__CUDA_ARCH__)
|
||||
@@ -134,19 +136,19 @@ struct BitFieldContainer {
|
||||
#endif // defined(__CUDA_ARCH__)
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
__device__ void Set(value_type pos) {
|
||||
__device__ auto Set(value_type pos) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type set_bit = kOne << pos_v.bit_pos;
|
||||
static_assert(sizeof(BitFieldAtomicType) == sizeof(value_type), "");
|
||||
AtomicOr(reinterpret_cast<BitFieldAtomicType*>(&value), set_bit);
|
||||
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
|
||||
atomicOr(reinterpret_cast<Type *>(&value), set_bit);
|
||||
}
|
||||
__device__ void Clear(value_type pos) {
|
||||
Pos pos_v = Direction::Shift(ToBitPos(pos));
|
||||
value_type& value = bits_[pos_v.int_pos];
|
||||
value_type clear_bit = ~(kOne << pos_v.bit_pos);
|
||||
static_assert(sizeof(BitFieldAtomicType) == sizeof(value_type), "");
|
||||
AtomicAnd(reinterpret_cast<BitFieldAtomicType*>(&value), clear_bit);
|
||||
using Type = typename dh::detail::AtomicDispatcher<sizeof(value_type)>::Type;
|
||||
atomicAnd(reinterpret_cast<Type *>(&value), clear_bit);
|
||||
}
|
||||
#else
|
||||
void Set(value_type pos) {
|
||||
@@ -165,6 +167,7 @@ struct BitFieldContainer {
|
||||
|
||||
XGBOOST_DEVICE bool Check(Pos pos_v) const {
|
||||
pos_v = Direction::Shift(pos_v);
|
||||
SPAN_LT(pos_v.int_pos, bits_.size());
|
||||
value_type const value = bits_[pos_v.int_pos];
|
||||
value_type const test_bit = kOne << pos_v.bit_pos;
|
||||
value_type result = test_bit & value;
|
||||
@@ -179,10 +182,11 @@ struct BitFieldContainer {
|
||||
|
||||
XGBOOST_DEVICE pointer Data() const { return bits_.data(); }
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, BitFieldContainer<VT, Direction> field) {
|
||||
inline friend std::ostream &
|
||||
operator<<(std::ostream &os, BitFieldContainer<VT, Direction, IsConst> field) {
|
||||
os << "Bits " << "storage size: " << field.bits_.size() << "\n";
|
||||
for (typename common::Span<value_type>::index_type i = 0; i < field.bits_.size(); ++i) {
|
||||
std::bitset<BitFieldContainer<VT, Direction>::kValueSize> bset(field.bits_[i]);
|
||||
std::bitset<BitFieldContainer<VT, Direction, IsConst>::kValueSize> bset(field.bits_[i]);
|
||||
os << bset << "\n";
|
||||
}
|
||||
return os;
|
||||
@@ -190,9 +194,9 @@ struct BitFieldContainer {
|
||||
};
|
||||
|
||||
// Bits start from left most bits (most significant bit).
|
||||
template <typename VT>
|
||||
struct LBitsPolicy : public BitFieldContainer<VT, LBitsPolicy<VT>> {
|
||||
using Container = BitFieldContainer<VT, LBitsPolicy<VT>>;
|
||||
template <typename VT, bool IsConst = false>
|
||||
struct LBitsPolicy : public BitFieldContainer<VT, LBitsPolicy<VT, IsConst>, IsConst> {
|
||||
using Container = BitFieldContainer<VT, LBitsPolicy<VT, IsConst>, IsConst>;
|
||||
using Pos = typename Container::Pos;
|
||||
using value_type = typename Container::value_type; // NOLINT
|
||||
|
||||
@@ -215,38 +219,13 @@ struct RBitsPolicy : public BitFieldContainer<VT, RBitsPolicy<VT>> {
|
||||
}
|
||||
};
|
||||
|
||||
// Format: <Direction>BitField<size of underlying type in bits>, underlying type must be unsigned.
|
||||
// Format: <Const><Direction>BitField<size of underlying type in bits>, underlying type
|
||||
// must be unsigned.
|
||||
using LBitField64 = BitFieldContainer<uint64_t, LBitsPolicy<uint64_t>>;
|
||||
using RBitField8 = BitFieldContainer<uint8_t, RBitsPolicy<unsigned char>>;
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
|
||||
template <typename V, typename D>
|
||||
inline void PrintDeviceBits(std::string name, BitFieldContainer<V, D> field) {
|
||||
std::cout << "Bits: " << name << std::endl;
|
||||
std::vector<typename BitFieldContainer<V, D>::value_type> h_field_bits(field.bits_.size());
|
||||
thrust::copy(thrust::device_ptr<typename BitFieldContainer<V, D>::value_type>(field.bits_.data()),
|
||||
thrust::device_ptr<typename BitFieldContainer<V, D>::value_type>(
|
||||
field.bits_.data() + field.bits_.size()),
|
||||
h_field_bits.data());
|
||||
BitFieldContainer<V, D> h_field;
|
||||
h_field.bits_ = {h_field_bits.data(), h_field_bits.data() + h_field_bits.size()};
|
||||
std::cout << h_field;
|
||||
}
|
||||
|
||||
inline void PrintDeviceStorage(std::string name, common::Span<int32_t> list) {
|
||||
std::cout << name << std::endl;
|
||||
std::vector<int32_t> h_list(list.size());
|
||||
thrust::copy(thrust::device_ptr<int32_t>(list.data()),
|
||||
thrust::device_ptr<int32_t>(list.data() + list.size()),
|
||||
h_list.data());
|
||||
for (auto v : h_list) {
|
||||
std::cout << v << ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
#endif // defined(__CUDACC__)
|
||||
using LBitField32 = BitFieldContainer<uint32_t, LBitsPolicy<uint32_t>>;
|
||||
using CLBitField32 = BitFieldContainer<uint32_t, LBitsPolicy<uint32_t, true>, true>;
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_COMMON_BITFIELD_H_
|
||||
|
||||
50
src/common/categorical.h
Normal file
50
src/common/categorical.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*!
|
||||
* Copyright 2020 by XGBoost Contributors
|
||||
* \file categorical.h
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_CATEGORICAL_H_
|
||||
#define XGBOOST_COMMON_CATEGORICAL_H_
|
||||
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/span.h"
|
||||
#include "xgboost/parameter.h"
|
||||
#include "bitfield.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
// Cast the categorical type.
|
||||
template <typename T>
|
||||
XGBOOST_DEVICE bst_cat_t AsCat(T const& v) {
|
||||
return static_cast<bst_cat_t>(v);
|
||||
}
|
||||
|
||||
/* \brief Whether is fidx a categorical feature.
|
||||
*
|
||||
* \param ft Feature type for all features.
|
||||
* \param fidx Feature index.
|
||||
* \return Whether feature pointed by fidx is categorical feature.
|
||||
*/
|
||||
inline XGBOOST_DEVICE bool IsCat(Span<FeatureType const> ft, bst_feature_t fidx) {
|
||||
return !ft.empty() && ft[fidx] == FeatureType::kCategorical;
|
||||
}
|
||||
|
||||
/* \brief Whether should it traverse to left branch of a tree.
|
||||
*
|
||||
* For one hot split, go to left if it's NOT the matching category.
|
||||
*/
|
||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, bst_cat_t cat) {
|
||||
auto pos = CLBitField32::ToBitPos(cat);
|
||||
if (pos.int_pos >= cats.size()) {
|
||||
return true;
|
||||
}
|
||||
CLBitField32 const s_cats(cats);
|
||||
return !s_cats.Check(cat);
|
||||
}
|
||||
|
||||
using CatBitField = LBitField32;
|
||||
using KCatBitField = CLBitField32;
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_COMMON_CATEGORICAL_H_
|
||||
@@ -275,6 +275,9 @@ Json& JsonNumber::operator[](int ind) {
|
||||
|
||||
bool JsonNumber::operator==(Value const& rhs) const {
|
||||
if (!IsA<JsonNumber>(&rhs)) { return false; }
|
||||
if (std::isinf(number_)) {
|
||||
return std::isinf(Cast<JsonNumber const>(&rhs)->GetNumber());
|
||||
}
|
||||
return std::abs(number_ - Cast<JsonNumber const>(&rhs)->GetNumber()) < kRtEps;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user