CPU evaluation for cat data. (#7393)

* Implementation for one hot based.
* Implementation for partition based. (LightGBM)
This commit is contained in:
Jiaming Yuan
2021-11-06 14:41:35 +08:00
committed by GitHub
parent 6ede12412c
commit d7d1b6e3a6
15 changed files with 540 additions and 166 deletions

View File

@@ -0,0 +1,44 @@
/*!
* Copyright 2021 by XGBoost Contributors
*
* \brief Utilities for testing categorical data support.
*/
#include <numeric>
#include <vector>
#include "xgboost/span.h"
#include "helpers.h"
#include "../../src/common/categorical.h"
namespace xgboost {
inline std::vector<float> OneHotEncodeFeature(std::vector<float> x,
size_t num_cat) {
std::vector<float> ret(x.size() * num_cat, 0);
size_t n_rows = x.size();
for (size_t r = 0; r < n_rows; ++r) {
bst_cat_t cat = common::AsCat(x[r]);
ret.at(num_cat * r + cat) = 1;
}
return ret;
}
template <typename GradientSumT>
void ValidateCategoricalHistogram(size_t n_categories,
common::Span<GradientSumT> onehot,
common::Span<GradientSumT> cat) {
auto cat_sum = std::accumulate(cat.cbegin(), cat.cend(), GradientPairPrecise{});
for (size_t c = 0; c < n_categories; ++c) {
auto zero = onehot[c * 2];
auto one = onehot[c * 2 + 1];
auto chosen = cat[c];
auto not_chosen = cat_sum - chosen;
ASSERT_LE(RelError(zero.GetGrad(), not_chosen.GetGrad()), kRtEps);
ASSERT_LE(RelError(zero.GetHess(), not_chosen.GetHess()), kRtEps);
ASSERT_LE(RelError(one.GetGrad(), chosen.GetGrad()), kRtEps);
ASSERT_LE(RelError(one.GetHess(), chosen.GetHess()), kRtEps);
}
}
} // namespace xgboost

View File

@@ -5,6 +5,13 @@
#include "../../../src/common/quantile.cuh"
namespace xgboost {
namespace {
struct IsSorted {
XGBOOST_DEVICE bool operator()(common::SketchEntry const& a, common::SketchEntry const& b) const {
return a.value < b.value;
}
};
}
namespace common {
TEST(GPUQuantile, Basic) {
constexpr size_t kRows = 1000, kCols = 100, kBins = 256;
@@ -52,9 +59,15 @@ void TestSketchUnique(float sparsity) {
ASSERT_EQ(sketch.Data().size(), h_columns_ptr.back());
sketch.Unique();
ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch.Data().data(),
sketch.Data().data() + sketch.Data().size(),
detail::SketchUnique{}));
std::vector<SketchEntry> h_data(sketch.Data().size());
thrust::copy(dh::tcbegin(sketch.Data()), dh::tcend(sketch.Data()), h_data.begin());
for (size_t i = 1; i < h_columns_ptr.size(); ++i) {
auto begin = h_columns_ptr[i - 1];
auto column = common::Span<SketchEntry>(h_data).subspan(begin, h_columns_ptr[i] - begin);
ASSERT_TRUE(std::is_sorted(column.begin(), column.end(), IsSorted{}));
}
});
}
@@ -84,8 +97,7 @@ void TestQuantileElemRank(int32_t device, Span<SketchEntry const> in,
if (with_error) {
ASSERT_GE(in_column[idx].rmin + in_column[idx].rmin * kRtEps,
prev_rmin);
ASSERT_GE(in_column[idx].rmax + in_column[idx].rmin * kRtEps,
prev_rmax);
ASSERT_GE(in_column[idx].rmax + in_column[idx].rmin * kRtEps, prev_rmax);
ASSERT_GE(in_column[idx].rmax + in_column[idx].rmin * kRtEps,
rmin_next);
} else {
@@ -169,7 +181,7 @@ TEST(GPUQuantile, MergeEmpty) {
TEST(GPUQuantile, MergeBasic) {
constexpr size_t kRows = 1000, kCols = 100;
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const& info) {
RunWithSeedsAndBins(kRows, [=](int32_t seed, size_t n_bins, MetaInfo const &info) {
HostDeviceVector<FeatureType> ft;
SketchContainer sketch_0(ft, n_bins, kCols, kRows, 0);
HostDeviceVector<float> storage_0;
@@ -265,9 +277,16 @@ void TestMergeDuplicated(int32_t n_bins, size_t cols, size_t rows, float frac) {
ASSERT_EQ(h_columns_ptr.back(), sketch_1.Data().size() + size_before_merge);
sketch_0.Unique();
ASSERT_TRUE(thrust::is_sorted(thrust::device, sketch_0.Data().data(),
sketch_0.Data().data() + sketch_0.Data().size(),
detail::SketchUnique{}));
columns_ptr = sketch_0.ColumnsPtr();
dh::CopyDeviceSpanToVector(&h_columns_ptr, columns_ptr);
std::vector<SketchEntry> h_data(sketch_0.Data().size());
dh::CopyDeviceSpanToVector(&h_data, sketch_0.Data());
for (size_t i = 1; i < h_columns_ptr.size(); ++i) {
auto begin = h_columns_ptr[i - 1];
auto column = Span<SketchEntry> {h_data}.subspan(begin, h_columns_ptr[i] - begin);
ASSERT_TRUE(std::is_sorted(column.begin(), column.end(), IsSorted{}));
}
}
TEST(GPUQuantile, MergeDuplicated) {

View File

@@ -48,7 +48,9 @@ template <typename Fn> void RunWithSeedsAndBins(size_t rows, Fn fn) {
std::vector<MetaInfo> infos(2);
auto& h_weights = infos.front().weights_.HostVector();
h_weights.resize(rows);
std::generate(h_weights.begin(), h_weights.end(), [&]() { return dist(&lcg); });
SimpleRealUniformDistribution<float> weight_dist(0, 10);
std::generate(h_weights.begin(), h_weights.end(), [&]() { return weight_dist(&lcg); });
for (auto seed : seeds) {
for (auto n_bin : bins) {

View File

@@ -172,12 +172,10 @@ SimpleLCG::StateType SimpleLCG::operator()() {
state_ = (alpha_ * state_) % mod_;
return state_;
}
SimpleLCG::StateType SimpleLCG::Min() const {
return seed_ * alpha_;
}
SimpleLCG::StateType SimpleLCG::Max() const {
return max_value_;
}
SimpleLCG::StateType SimpleLCG::Min() const { return min(); }
SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
// Make sure it's compile time constant.
static_assert(SimpleLCG::max() - SimpleLCG::min(), "");
void RandomDataGenerator::GenerateDense(HostDeviceVector<float> *out) const {
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
@@ -291,6 +289,7 @@ void RandomDataGenerator::GenerateCSR(
xgboost::SimpleRealUniformDistribution<bst_float> dist(lower_, upper_);
float sparsity = sparsity_ * (upper_ - lower_) + lower_;
SimpleRealUniformDistribution<bst_float> cat(0.0, max_cat_);
h_rptr.emplace_back(0);
for (size_t i = 0; i < rows_; ++i) {
@@ -298,7 +297,11 @@ void RandomDataGenerator::GenerateCSR(
for (size_t j = 0; j < cols_; ++j) {
auto g = dist(&lcg);
if (g >= sparsity) {
g = dist(&lcg);
if (common::IsCat(ft_, j)) {
g = common::AsCat(cat(&lcg));
} else {
g = dist(&lcg);
}
h_value.emplace_back(g);
rptr++;
h_cols.emplace_back(j);
@@ -347,11 +350,15 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
}
if (device_ >= 0) {
out->Info().labels_.SetDevice(device_);
out->Info().feature_types.SetDevice(device_);
for (auto const& page : out->GetBatches<SparsePage>()) {
page.data.SetDevice(device_);
page.offset.SetDevice(device_);
}
}
if (!ft_.empty()) {
out->Info().feature_types.HostVector() = ft_;
}
return out;
}

View File

@@ -106,42 +106,39 @@ bool IsNear(std::vector<xgboost::bst_float>::const_iterator _beg1,
*/
class SimpleLCG {
private:
using StateType = int64_t;
using StateType = uint64_t;
static StateType constexpr kDefaultInit = 3;
static StateType constexpr default_alpha_ = 61;
static StateType constexpr max_value_ = ((StateType)1 << 32) - 1;
static StateType constexpr kDefaultAlpha = 61;
static StateType constexpr kMaxValue = (static_cast<StateType>(1) << 32) - 1;
StateType state_;
StateType const alpha_;
StateType const mod_;
StateType seed_;
public:
using result_type = StateType; // NOLINT
public:
SimpleLCG() : state_{kDefaultInit},
alpha_{default_alpha_}, mod_{max_value_}, seed_{state_}{}
SimpleLCG() : state_{kDefaultInit}, alpha_{kDefaultAlpha}, mod_{kMaxValue} {}
SimpleLCG(SimpleLCG const& that) = default;
SimpleLCG(SimpleLCG&& that) = default;
void Seed(StateType seed) {
seed_ = seed;
}
void Seed(StateType seed) { state_ = seed % mod_; }
/*!
* \brief Initialize SimpleLCG.
*
* \param state Initial state, can also be considered as seed. If set to
* zero, SimpleLCG will use internal default value.
* \param alpha multiplier
* \param mod modulo
*/
explicit SimpleLCG(StateType state,
StateType alpha=default_alpha_, StateType mod=max_value_)
: state_{state == 0 ? kDefaultInit : state},
alpha_{alpha}, mod_{mod} , seed_{state} {}
explicit SimpleLCG(StateType state)
: state_{state == 0 ? kDefaultInit : state}, alpha_{kDefaultAlpha}, mod_{kMaxValue} {}
StateType operator()();
StateType Min() const;
StateType Max() const;
constexpr result_type static min() { return 0; }; // NOLINT
constexpr result_type static max() { return kMaxValue; } // NOLINT
};
template <typename ResultT>
@@ -217,10 +214,12 @@ class RandomDataGenerator {
float upper_;
int32_t device_;
int32_t seed_;
uint64_t seed_;
SimpleLCG lcg_;
size_t bins_;
std::vector<FeatureType> ft_;
bst_cat_t max_cat_;
Json ArrayInterfaceImpl(HostDeviceVector<float> *storage, size_t rows,
size_t cols) const;
@@ -242,7 +241,7 @@ class RandomDataGenerator {
device_ = d;
return *this;
}
RandomDataGenerator& Seed(int32_t s) {
RandomDataGenerator& Seed(uint64_t s) {
seed_ = s;
lcg_.Seed(seed_);
return *this;
@@ -251,6 +250,16 @@ class RandomDataGenerator {
bins_ = b;
return *this;
}
RandomDataGenerator& Type(common::Span<FeatureType> ft) {
CHECK_EQ(ft.size(), cols_);
ft_.resize(ft.size());
std::copy(ft.cbegin(), ft.cend(), ft_.begin());
return *this;
}
RandomDataGenerator& MaxCategory(bst_cat_t cat) {
max_cat_ = cat;
return *this;
}
void GenerateDense(HostDeviceVector<float>* out) const;

View File

@@ -1,9 +1,11 @@
#include <gtest/gtest.h>
#include <vector>
#include "../../helpers.h"
#include "../../../../src/common/categorical.h"
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
#include "../../../../src/tree/gpu_hist/histogram.cuh"
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
#include "../../categorical_helpers.h"
#include "../../helpers.h"
namespace xgboost {
namespace tree {
@@ -99,16 +101,6 @@ TEST(Histogram, GPUDeterministic) {
}
}
std::vector<float> OneHotEncodeFeature(std::vector<float> x, size_t num_cat) {
std::vector<float> ret(x.size() * num_cat, 0);
size_t n_rows = x.size();
for (size_t r = 0; r < n_rows; ++r) {
bst_cat_t cat = common::AsCat(x[r]);
ret.at(num_cat * r + cat) = 1;
}
return ret;
}
// Test 1 vs rest categorical histogram is equivalent to one hot encoded data.
void TestGPUHistogramCategorical(size_t num_categories) {
size_t constexpr kRows = 340;
@@ -123,7 +115,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
auto gpair = GenerateRandomGradients(kRows, 0, 2);
gpair.SetDevice(0);
auto rounding = CreateRoundingFactor<GradientPairPrecise>(gpair.DeviceSpan());
// Generate hist with cat data.
/**
* Generate hist with cat data.
*/
for (auto const &batch : cat_m->GetBatches<EllpackPage>(batch_param)) {
auto* page = batch.Impl();
FeatureGroups single_group(page->Cuts());
@@ -133,7 +127,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
rounding);
}
// Generate hist with one hot encoded data.
/**
* Generate hist with one hot encoded data.
*/
auto x_encoded = OneHotEncodeFeature(x, num_categories);
auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories);
dh::device_vector<GradientPairPrecise> encode_hist(2 * num_categories);
@@ -152,20 +148,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
std::vector<GradientPairPrecise> h_encode_hist(encode_hist.size());
thrust::copy(encode_hist.begin(), encode_hist.end(), h_encode_hist.begin());
for (size_t c = 0; c < num_categories; ++c) {
auto zero = h_encode_hist[c * 2];
auto one = h_encode_hist[c * 2 + 1];
auto chosen = h_cat_hist[c];
auto not_chosen = cat_sum - chosen;
ASSERT_LE(RelError(zero.GetGrad(), not_chosen.GetGrad()), kRtEps);
ASSERT_LE(RelError(zero.GetHess(), not_chosen.GetHess()), kRtEps);
ASSERT_LE(RelError(one.GetGrad(), chosen.GetGrad()), kRtEps);
ASSERT_LE(RelError(one.GetHess(), chosen.GetHess()), kRtEps);
}
ValidateCategoricalHistogram(num_categories,
common::Span<GradientPairPrecise>{h_encode_hist},
common::Span<GradientPairPrecise>{h_cat_hist});
}
TEST(Histogram, GPUHistCategorical) {

View File

@@ -7,7 +7,6 @@
namespace xgboost {
namespace tree {
template <typename GradientSumT> void TestEvaluateSplits() {
int static constexpr kRows = 8, kCols = 16;
auto orig = omp_get_max_threads();
@@ -16,14 +15,12 @@ template <typename GradientSumT> void TestEvaluateSplits() {
auto sampler = std::make_shared<common::ColumnSampler>();
TrainParam param;
param.UpdateAllowUnknown(Args{{}});
param.min_child_weight = 0;
param.reg_lambda = 0;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
auto evaluator =
HistEvaluator<GradientSumT, CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
auto evaluator = HistEvaluator<GradientSumT, CPUExpandEntry>{
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
common::HistCollection<GradientSumT> hist;
std::vector<GradientPair> row_gpairs = {
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
@@ -39,7 +36,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
std::iota(row_indices.begin(), row_indices.end(), 0);
row_set_collection.Init();
auto hist_builder = GHistBuilder<GradientSumT>(n_threads, gmat.cut.Ptrs().back());
auto hist_builder = GHistBuilder<GradientSumT>(omp_get_max_threads(), gmat.cut.Ptrs().back());
hist.Init(gmat.cut.Ptrs().back());
hist.AddHistRow(0);
hist.AllocateAllData();
@@ -58,7 +55,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
entries.front().depth = 0;
evaluator.InitRoot(GradStats{total_gpair});
evaluator.EvaluateSplits(hist, gmat.cut, tree, &entries);
evaluator.EvaluateSplits(hist, gmat.cut, {}, tree, &entries);
auto best_loss_chg =
evaluator.Evaluator().CalcSplitGain(
@@ -96,8 +93,8 @@ TEST(HistEvaluator, Apply) {
param.UpdateAllowUnknown(Args{{}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ =
HistEvaluator<float, CPUExpandEntry>{param, dmat->Info(), 4, sampler};
auto evaluator_ = HistEvaluator<float, CPUExpandEntry>{param, dmat->Info(), 4, sampler,
ObjInfo{ObjInfo::kRegression}};
CPUExpandEntry entry{0, 0, 10.0f};
entry.split.left_sum = GradStats{0.4, 0.6f};
@@ -108,5 +105,142 @@ TEST(HistEvaluator, Apply) {
ASSERT_EQ(tree.Stat(tree[0].LeftChild()).sum_hess, 0.6f);
ASSERT_EQ(tree.Stat(tree[0].RightChild()).sum_hess, 0.7f);
}
TEST(HistEvaluator, CategoricalPartition) {
int static constexpr kRows = 128, kCols = 1;
using GradientSumT = double;
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
TrainParam param;
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
size_t n_cats{8};
auto dmat =
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
int32_t n_threads = 16;
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator = HistEvaluator<GradientSumT, CPUExpandEntry>{
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, 32})) {
common::HistCollection<GradientSumT> hist;
std::vector<CPUExpandEntry> entries(1);
entries.front().nid = 0;
entries.front().depth = 0;
hist.Init(gmat.cut.TotalBins());
hist.AddHistRow(0);
hist.AllocateAllData();
auto node_hist = hist[0];
ASSERT_EQ(node_hist.size(), n_cats);
ASSERT_EQ(node_hist.size(), gmat.cut.Ptrs().back());
GradientPairPrecise total_gpair;
for (size_t i = 0; i < node_hist.size(); ++i) {
node_hist[i] = {static_cast<double>(node_hist.size() - i), 1.0};
total_gpair += node_hist[i];
}
SimpleLCG lcg;
std::shuffle(node_hist.begin(), node_hist.end(), lcg);
RegTree tree;
evaluator.InitRoot(GradStats{total_gpair});
evaluator.EvaluateSplits(hist, gmat.cut, ft, tree, &entries);
ASSERT_TRUE(entries.front().split.is_cat);
auto run_eval = [&](auto fn) {
for (size_t i = 1; i < gmat.cut.Ptrs().size(); ++i) {
GradStats left, right;
for (size_t j = gmat.cut.Ptrs()[i - 1]; j < gmat.cut.Ptrs()[i]; ++j) {
auto loss_chg = evaluator.Evaluator().CalcSplitGain(param, 0, i - 1, left, right) -
evaluator.Stats().front().root_gain;
fn(loss_chg);
left.Add(node_hist[j].GetGrad(), node_hist[j].GetHess());
right.SetSubstract(GradStats{total_gpair}, left);
}
}
};
// Assert that's the best split
auto best_loss_chg = entries.front().split.loss_chg;
run_eval([&](auto loss_chg) {
// Approximated test that gain returned by optimal partition is greater than
// numerical split.
ASSERT_GT(best_loss_chg, loss_chg);
});
// node_hist is captured in lambda.
std::sort(node_hist.begin(), node_hist.end(), [&](auto l, auto r) {
return evaluator.Evaluator().CalcWeightCat(param, l) <
evaluator.Evaluator().CalcWeightCat(param, r);
});
double reimpl = 0;
run_eval([&](auto loss_chg) { reimpl = std::max(loss_chg, reimpl); });
CHECK_EQ(reimpl, best_loss_chg);
}
}
namespace {
auto CompareOneHotAndPartition(bool onehot) {
int static constexpr kRows = 128, kCols = 1;
using GradientSumT = double;
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
TrainParam param;
if (onehot) {
// force use one-hot
param.UpdateAllowUnknown(
Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}, {"max_cat_to_onehot", "100"}});
} else {
param.UpdateAllowUnknown(
Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}, {"max_cat_to_onehot", "1"}});
}
size_t n_cats{2};
auto dmat =
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
int32_t n_threads = 16;
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator = HistEvaluator<GradientSumT, CPUExpandEntry>{
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, 32})) {
common::HistCollection<GradientSumT> hist;
entries.front().nid = 0;
entries.front().depth = 0;
hist.Init(gmat.cut.TotalBins());
hist.AddHistRow(0);
hist.AllocateAllData();
auto node_hist = hist[0];
CHECK_EQ(node_hist.size(), n_cats);
CHECK_EQ(node_hist.size(), gmat.cut.Ptrs().back());
GradientPairPrecise total_gpair;
for (size_t i = 0; i < node_hist.size(); ++i) {
node_hist[i] = {static_cast<double>(node_hist.size() - i), 1.0};
total_gpair += node_hist[i];
}
RegTree tree;
evaluator.InitRoot(GradStats{total_gpair});
evaluator.EvaluateSplits(hist, gmat.cut, ft, tree, &entries);
}
return entries.front();
}
} // anonymous namespace
TEST(HistEvaluator, Categorical) {
auto with_onehot = CompareOneHotAndPartition(true);
auto with_part = CompareOneHotAndPartition(false);
ASSERT_EQ(with_onehot.split.loss_chg, with_part.split.loss_chg);
}
} // namespace tree
} // namespace xgboost

View File

@@ -88,14 +88,14 @@ TEST(Param, SplitEntry) {
xgboost::tree::SplitEntry se2;
EXPECT_FALSE(se1.Update(se2));
EXPECT_FALSE(se2.Update(-1, 100, 0, true, xgboost::tree::GradStats(),
EXPECT_FALSE(se2.Update(-1, 100, 0, true, false, xgboost::tree::GradStats(),
xgboost::tree::GradStats()));
ASSERT_TRUE(se2.Update(1, 100, 0, true, xgboost::tree::GradStats(),
ASSERT_TRUE(se2.Update(1, 100, 0, true, false, xgboost::tree::GradStats(),
xgboost::tree::GradStats()));
ASSERT_TRUE(se1.Update(se2));
xgboost::tree::SplitEntry se3;
se3.Update(2, 101, 0, false, xgboost::tree::GradStats(),
se3.Update(2, 101, 0, false, false, xgboost::tree::GradStats(),
xgboost::tree::GradStats());
xgboost::tree::SplitEntry::Reduce(se2, se3);
EXPECT_EQ(se2.SplitIndex(), 101);