Add categorical data support to GPU Hist. (#6164)
This commit is contained in:
@@ -15,7 +15,7 @@ auto ZeroParam() {
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplit) {
|
||||
void TestEvaluateSingleSplit(bool is_categorical) {
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
|
||||
GradientPair parent_sum(0.0, 1.0);
|
||||
TrainParam tparam = ZeroParam();
|
||||
@@ -33,11 +33,19 @@ TEST(GpuHist, EvaluateSingleSplit) {
|
||||
thrust::device_vector<GradientPair> feature_histogram =
|
||||
std::vector<GradientPair>{
|
||||
{-0.5, 0.5}, {0.5, 0.5}, {-1.0, 0.5}, {1.0, 0.5}};
|
||||
|
||||
thrust::device_vector<int> monotonic_constraints(feature_set.size(), 0);
|
||||
dh::device_vector<FeatureType> feature_types(feature_set.size(),
|
||||
FeatureType::kCategorical);
|
||||
common::Span<FeatureType> d_feature_types;
|
||||
if (is_categorical) {
|
||||
d_feature_types = dh::ToSpan(feature_types);
|
||||
}
|
||||
EvaluateSplitInputs<GradientPair> input{1,
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
d_feature_types,
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
@@ -55,6 +63,14 @@ TEST(GpuHist, EvaluateSingleSplit) {
|
||||
parent_sum.GetHess());
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplit) {
|
||||
TestEvaluateSingleSplit(false);
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateCategoricalSplit) {
|
||||
TestEvaluateSingleSplit(true);
|
||||
}
|
||||
|
||||
TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
thrust::device_vector<DeviceSplitCandidate> out_splits(1);
|
||||
GradientPair parent_sum(1.0, 1.5);
|
||||
@@ -74,6 +90,7 @@ TEST(GpuHist, EvaluateSingleSplitMissing) {
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
{},
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
@@ -134,6 +151,7 @@ TEST(GpuHist, EvaluateSingleSplitFeatureSampling) {
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
{},
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
@@ -174,6 +192,7 @@ TEST(GpuHist, EvaluateSingleSplitBreakTies) {
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
{},
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
@@ -215,6 +234,7 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
{},
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
@@ -224,6 +244,7 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
parent_sum,
|
||||
param,
|
||||
dh::ToSpan(feature_set),
|
||||
{},
|
||||
dh::ToSpan(feature_segments),
|
||||
dh::ToSpan(feature_values),
|
||||
dh::ToSpan(feature_min_values),
|
||||
@@ -241,6 +262,5 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
EXPECT_EQ(result_right.findex, 0);
|
||||
EXPECT_EQ(result_right.fvalue, 1.0);
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -80,7 +80,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
|
||||
param.Init(args);
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientSumT> maker(0, page.get(), kNRows, param, kNCols, kNCols,
|
||||
GPUHistMakerDevice<GradientSumT> maker(0, page.get(), {}, kNRows, param, kNCols, kNCols,
|
||||
true, batch_param);
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
@@ -130,6 +130,48 @@ TEST(GpuHist, BuildHistSharedMem) {
|
||||
TestBuildHist<GradientPair>(true);
|
||||
}
|
||||
|
||||
TEST(GpuHist, ApplySplit) {
|
||||
RegTree tree;
|
||||
ExpandEntry candidate;
|
||||
candidate.nid = 0;
|
||||
candidate.left_weight = 1.0f;
|
||||
candidate.right_weight = 2.0f;
|
||||
candidate.base_weight = 3.0f;
|
||||
candidate.split.is_cat = true;
|
||||
candidate.split.fvalue = 1.0f; // at cat 1
|
||||
|
||||
size_t n_rows = 10;
|
||||
size_t n_cols = 10;
|
||||
|
||||
auto m = RandomDataGenerator{n_rows, n_cols, 0}.GenerateDMatrix(true);
|
||||
GenericParameter p;
|
||||
p.InitAllowUnknown(Args{});
|
||||
|
||||
TrainParam tparam;
|
||||
tparam.InitAllowUnknown(Args{});
|
||||
BatchParam bparam;
|
||||
bparam.gpu_id = 0;
|
||||
bparam.max_bin = 3;
|
||||
bparam.gpu_page_size = 0;
|
||||
|
||||
for (auto& ellpack : m->GetBatches<EllpackPage>(bparam)){
|
||||
auto impl = ellpack.Impl();
|
||||
HostDeviceVector<FeatureType> feature_types(10, FeatureType::kCategorical);
|
||||
feature_types.SetDevice(bparam.gpu_id);
|
||||
tree::GPUHistMakerDevice<GradientPairPrecise> updater(
|
||||
0, impl, feature_types.ConstDeviceSpan(), n_rows, tparam, 0, n_cols, true, bparam);
|
||||
updater.ApplySplit(candidate, &tree);
|
||||
|
||||
ASSERT_EQ(tree.GetSplitTypes().size(), 3);
|
||||
ASSERT_EQ(tree.GetSplitTypes()[0], FeatureType::kCategorical);
|
||||
ASSERT_EQ(tree.GetSplitCategories().size(), 1);
|
||||
uint32_t bits = 1u << 30; // bits: 0, 1, 0, 0, 0, ..., 0
|
||||
ASSERT_EQ(tree.GetSplitCategories().back(), bits);
|
||||
|
||||
ASSERT_EQ(updater.node_categories.size(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
HistogramCutsWrapper GetHostCutMatrix () {
|
||||
HistogramCutsWrapper cmat;
|
||||
cmat.SetPtrs({0, 3, 6, 9, 12, 15, 18, 21, 24});
|
||||
@@ -154,19 +196,18 @@ TEST(GpuHist, EvaluateRootSplit) {
|
||||
|
||||
TrainParam param;
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> args {
|
||||
{"max_depth", "1"},
|
||||
{"max_leaves", "0"},
|
||||
std::vector<std::pair<std::string, std::string>> args{
|
||||
{"max_depth", "1"},
|
||||
{"max_leaves", "0"},
|
||||
|
||||
// Disable all other parameters.
|
||||
{"colsample_bynode", "1"},
|
||||
{"colsample_bylevel", "1"},
|
||||
{"colsample_bytree", "1"},
|
||||
{"min_child_weight", "0.01"},
|
||||
{"reg_alpha", "0"},
|
||||
{"reg_lambda", "0"},
|
||||
{"max_delta_step", "0"}
|
||||
};
|
||||
// Disable all other parameters.
|
||||
{"colsample_bynode", "1"},
|
||||
{"colsample_bylevel", "1"},
|
||||
{"colsample_bytree", "1"},
|
||||
{"min_child_weight", "0.01"},
|
||||
{"reg_alpha", "0"},
|
||||
{"reg_lambda", "0"},
|
||||
{"max_delta_step", "0"}};
|
||||
param.Init(args);
|
||||
for (size_t i = 0; i < kNCols; ++i) {
|
||||
param.monotone_constraints.emplace_back(0);
|
||||
@@ -178,7 +219,7 @@ TEST(GpuHist, EvaluateRootSplit) {
|
||||
auto page = BuildEllpackPage(kNRows, kNCols);
|
||||
BatchParam batch_param{};
|
||||
GPUHistMakerDevice<GradientPairPrecise>
|
||||
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
|
||||
maker(0, page.get(), {}, kNRows, param, kNCols, kNCols, true, batch_param);
|
||||
// Initialize GPUHistMakerDevice::node_sum_gradients
|
||||
maker.node_sum_gradients = {};
|
||||
|
||||
@@ -257,7 +298,6 @@ void TestHistogramIndexImpl() {
|
||||
|
||||
ASSERT_EQ(maker->page->Cuts().TotalBins(), maker_ext->page->Cuts().TotalBins());
|
||||
ASSERT_EQ(maker->page->gidx_buffer.Size(), maker_ext->page->gidx_buffer.Size());
|
||||
|
||||
}
|
||||
|
||||
TEST(GpuHist, TestHistogramIndex) {
|
||||
|
||||
Reference in New Issue
Block a user