CPU evaluation for cat data. (#7393)
* Implementation for one hot based. * Implementation for partition based. (LightGBM)
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
#include "../../helpers.h"
|
||||
|
||||
#include "../../../../src/common/categorical.h"
|
||||
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/histogram.cuh"
|
||||
#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"
|
||||
#include "../../categorical_helpers.h"
|
||||
#include "../../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -99,16 +101,6 @@ TEST(Histogram, GPUDeterministic) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> OneHotEncodeFeature(std::vector<float> x, size_t num_cat) {
|
||||
std::vector<float> ret(x.size() * num_cat, 0);
|
||||
size_t n_rows = x.size();
|
||||
for (size_t r = 0; r < n_rows; ++r) {
|
||||
bst_cat_t cat = common::AsCat(x[r]);
|
||||
ret.at(num_cat * r + cat) = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Test 1 vs rest categorical histogram is equivalent to one hot encoded data.
|
||||
void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
size_t constexpr kRows = 340;
|
||||
@@ -123,7 +115,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
auto gpair = GenerateRandomGradients(kRows, 0, 2);
|
||||
gpair.SetDevice(0);
|
||||
auto rounding = CreateRoundingFactor<GradientPairPrecise>(gpair.DeviceSpan());
|
||||
// Generate hist with cat data.
|
||||
/**
|
||||
* Generate hist with cat data.
|
||||
*/
|
||||
for (auto const &batch : cat_m->GetBatches<EllpackPage>(batch_param)) {
|
||||
auto* page = batch.Impl();
|
||||
FeatureGroups single_group(page->Cuts());
|
||||
@@ -133,7 +127,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
rounding);
|
||||
}
|
||||
|
||||
// Generate hist with one hot encoded data.
|
||||
/**
|
||||
* Generate hist with one hot encoded data.
|
||||
*/
|
||||
auto x_encoded = OneHotEncodeFeature(x, num_categories);
|
||||
auto encode_m = GetDMatrixFromData(x_encoded, kRows, num_categories);
|
||||
dh::device_vector<GradientPairPrecise> encode_hist(2 * num_categories);
|
||||
@@ -152,20 +148,9 @@ void TestGPUHistogramCategorical(size_t num_categories) {
|
||||
|
||||
std::vector<GradientPairPrecise> h_encode_hist(encode_hist.size());
|
||||
thrust::copy(encode_hist.begin(), encode_hist.end(), h_encode_hist.begin());
|
||||
|
||||
for (size_t c = 0; c < num_categories; ++c) {
|
||||
auto zero = h_encode_hist[c * 2];
|
||||
auto one = h_encode_hist[c * 2 + 1];
|
||||
|
||||
auto chosen = h_cat_hist[c];
|
||||
auto not_chosen = cat_sum - chosen;
|
||||
|
||||
ASSERT_LE(RelError(zero.GetGrad(), not_chosen.GetGrad()), kRtEps);
|
||||
ASSERT_LE(RelError(zero.GetHess(), not_chosen.GetHess()), kRtEps);
|
||||
|
||||
ASSERT_LE(RelError(one.GetGrad(), chosen.GetGrad()), kRtEps);
|
||||
ASSERT_LE(RelError(one.GetHess(), chosen.GetHess()), kRtEps);
|
||||
}
|
||||
ValidateCategoricalHistogram(num_categories,
|
||||
common::Span<GradientPairPrecise>{h_encode_hist},
|
||||
common::Span<GradientPairPrecise>{h_cat_hist});
|
||||
}
|
||||
|
||||
TEST(Histogram, GPUHistCategorical) {
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
template <typename GradientSumT> void TestEvaluateSplits() {
|
||||
int static constexpr kRows = 8, kCols = 16;
|
||||
auto orig = omp_get_max_threads();
|
||||
@@ -16,14 +15,12 @@ template <typename GradientSumT> void TestEvaluateSplits() {
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{}});
|
||||
param.min_child_weight = 0;
|
||||
param.reg_lambda = 0;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
|
||||
|
||||
auto evaluator =
|
||||
HistEvaluator<GradientSumT, CPUExpandEntry>{param, dmat->Info(), n_threads, sampler};
|
||||
auto evaluator = HistEvaluator<GradientSumT, CPUExpandEntry>{
|
||||
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
|
||||
common::HistCollection<GradientSumT> hist;
|
||||
std::vector<GradientPair> row_gpairs = {
|
||||
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
|
||||
@@ -39,7 +36,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
|
||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||
row_set_collection.Init();
|
||||
|
||||
auto hist_builder = GHistBuilder<GradientSumT>(n_threads, gmat.cut.Ptrs().back());
|
||||
auto hist_builder = GHistBuilder<GradientSumT>(omp_get_max_threads(), gmat.cut.Ptrs().back());
|
||||
hist.Init(gmat.cut.Ptrs().back());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
@@ -58,7 +55,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
|
||||
entries.front().depth = 0;
|
||||
|
||||
evaluator.InitRoot(GradStats{total_gpair});
|
||||
evaluator.EvaluateSplits(hist, gmat.cut, tree, &entries);
|
||||
evaluator.EvaluateSplits(hist, gmat.cut, {}, tree, &entries);
|
||||
|
||||
auto best_loss_chg =
|
||||
evaluator.Evaluator().CalcSplitGain(
|
||||
@@ -96,8 +93,8 @@ TEST(HistEvaluator, Apply) {
|
||||
param.UpdateAllowUnknown(Args{{}});
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator_ =
|
||||
HistEvaluator<float, CPUExpandEntry>{param, dmat->Info(), 4, sampler};
|
||||
auto evaluator_ = HistEvaluator<float, CPUExpandEntry>{param, dmat->Info(), 4, sampler,
|
||||
ObjInfo{ObjInfo::kRegression}};
|
||||
|
||||
CPUExpandEntry entry{0, 0, 10.0f};
|
||||
entry.split.left_sum = GradStats{0.4, 0.6f};
|
||||
@@ -108,5 +105,142 @@ TEST(HistEvaluator, Apply) {
|
||||
ASSERT_EQ(tree.Stat(tree[0].LeftChild()).sum_hess, 0.6f);
|
||||
ASSERT_EQ(tree.Stat(tree[0].RightChild()).sum_hess, 0.7f);
|
||||
}
|
||||
|
||||
TEST(HistEvaluator, CategoricalPartition) {
|
||||
int static constexpr kRows = 128, kCols = 1;
|
||||
using GradientSumT = double;
|
||||
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
|
||||
|
||||
TrainParam param;
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}});
|
||||
|
||||
size_t n_cats{8};
|
||||
|
||||
auto dmat =
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
int32_t n_threads = 16;
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator = HistEvaluator<GradientSumT, CPUExpandEntry>{
|
||||
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, 32})) {
|
||||
common::HistCollection<GradientSumT> hist;
|
||||
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
entries.front().nid = 0;
|
||||
entries.front().depth = 0;
|
||||
|
||||
hist.Init(gmat.cut.TotalBins());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
auto node_hist = hist[0];
|
||||
ASSERT_EQ(node_hist.size(), n_cats);
|
||||
ASSERT_EQ(node_hist.size(), gmat.cut.Ptrs().back());
|
||||
|
||||
GradientPairPrecise total_gpair;
|
||||
for (size_t i = 0; i < node_hist.size(); ++i) {
|
||||
node_hist[i] = {static_cast<double>(node_hist.size() - i), 1.0};
|
||||
total_gpair += node_hist[i];
|
||||
}
|
||||
SimpleLCG lcg;
|
||||
std::shuffle(node_hist.begin(), node_hist.end(), lcg);
|
||||
|
||||
RegTree tree;
|
||||
evaluator.InitRoot(GradStats{total_gpair});
|
||||
evaluator.EvaluateSplits(hist, gmat.cut, ft, tree, &entries);
|
||||
ASSERT_TRUE(entries.front().split.is_cat);
|
||||
|
||||
auto run_eval = [&](auto fn) {
|
||||
for (size_t i = 1; i < gmat.cut.Ptrs().size(); ++i) {
|
||||
GradStats left, right;
|
||||
for (size_t j = gmat.cut.Ptrs()[i - 1]; j < gmat.cut.Ptrs()[i]; ++j) {
|
||||
auto loss_chg = evaluator.Evaluator().CalcSplitGain(param, 0, i - 1, left, right) -
|
||||
evaluator.Stats().front().root_gain;
|
||||
fn(loss_chg);
|
||||
left.Add(node_hist[j].GetGrad(), node_hist[j].GetHess());
|
||||
right.SetSubstract(GradStats{total_gpair}, left);
|
||||
}
|
||||
}
|
||||
};
|
||||
// Assert that's the best split
|
||||
auto best_loss_chg = entries.front().split.loss_chg;
|
||||
run_eval([&](auto loss_chg) {
|
||||
// Approximated test that gain returned by optimal partition is greater than
|
||||
// numerical split.
|
||||
ASSERT_GT(best_loss_chg, loss_chg);
|
||||
});
|
||||
// node_hist is captured in lambda.
|
||||
std::sort(node_hist.begin(), node_hist.end(), [&](auto l, auto r) {
|
||||
return evaluator.Evaluator().CalcWeightCat(param, l) <
|
||||
evaluator.Evaluator().CalcWeightCat(param, r);
|
||||
});
|
||||
|
||||
double reimpl = 0;
|
||||
run_eval([&](auto loss_chg) { reimpl = std::max(loss_chg, reimpl); });
|
||||
CHECK_EQ(reimpl, best_loss_chg);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
auto CompareOneHotAndPartition(bool onehot) {
|
||||
int static constexpr kRows = 128, kCols = 1;
|
||||
using GradientSumT = double;
|
||||
std::vector<FeatureType> ft(kCols, FeatureType::kCategorical);
|
||||
|
||||
TrainParam param;
|
||||
if (onehot) {
|
||||
// force use one-hot
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}, {"max_cat_to_onehot", "100"}});
|
||||
} else {
|
||||
param.UpdateAllowUnknown(
|
||||
Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}, {"max_cat_to_onehot", "1"}});
|
||||
}
|
||||
|
||||
size_t n_cats{2};
|
||||
|
||||
auto dmat =
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
int32_t n_threads = 16;
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator = HistEvaluator<GradientSumT, CPUExpandEntry>{
|
||||
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, 32})) {
|
||||
common::HistCollection<GradientSumT> hist;
|
||||
|
||||
entries.front().nid = 0;
|
||||
entries.front().depth = 0;
|
||||
|
||||
hist.Init(gmat.cut.TotalBins());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
auto node_hist = hist[0];
|
||||
|
||||
CHECK_EQ(node_hist.size(), n_cats);
|
||||
CHECK_EQ(node_hist.size(), gmat.cut.Ptrs().back());
|
||||
|
||||
GradientPairPrecise total_gpair;
|
||||
for (size_t i = 0; i < node_hist.size(); ++i) {
|
||||
node_hist[i] = {static_cast<double>(node_hist.size() - i), 1.0};
|
||||
total_gpair += node_hist[i];
|
||||
}
|
||||
RegTree tree;
|
||||
evaluator.InitRoot(GradStats{total_gpair});
|
||||
evaluator.EvaluateSplits(hist, gmat.cut, ft, tree, &entries);
|
||||
}
|
||||
return entries.front();
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(HistEvaluator, Categorical) {
|
||||
auto with_onehot = CompareOneHotAndPartition(true);
|
||||
auto with_part = CompareOneHotAndPartition(false);
|
||||
|
||||
ASSERT_EQ(with_onehot.split.loss_chg, with_part.split.loss_chg);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -88,14 +88,14 @@ TEST(Param, SplitEntry) {
|
||||
|
||||
xgboost::tree::SplitEntry se2;
|
||||
EXPECT_FALSE(se1.Update(se2));
|
||||
EXPECT_FALSE(se2.Update(-1, 100, 0, true, xgboost::tree::GradStats(),
|
||||
EXPECT_FALSE(se2.Update(-1, 100, 0, true, false, xgboost::tree::GradStats(),
|
||||
xgboost::tree::GradStats()));
|
||||
ASSERT_TRUE(se2.Update(1, 100, 0, true, xgboost::tree::GradStats(),
|
||||
ASSERT_TRUE(se2.Update(1, 100, 0, true, false, xgboost::tree::GradStats(),
|
||||
xgboost::tree::GradStats()));
|
||||
ASSERT_TRUE(se1.Update(se2));
|
||||
|
||||
xgboost::tree::SplitEntry se3;
|
||||
se3.Update(2, 101, 0, false, xgboost::tree::GradStats(),
|
||||
se3.Update(2, 101, 0, false, false, xgboost::tree::GradStats(),
|
||||
xgboost::tree::GradStats());
|
||||
xgboost::tree::SplitEntry::Reduce(se2, se3);
|
||||
EXPECT_EQ(se2.SplitIndex(), 101);
|
||||
|
||||
Reference in New Issue
Block a user