Validate out of range categorical value. (#7576)

* Use float in CPU categorical set to preserve the input value.
* Check out of range values.
This commit is contained in:
Jiaming Yuan
2022-01-18 20:16:19 +08:00
committed by GitHub
parent d6ea5cc1ed
commit deab0e32ba
8 changed files with 86 additions and 38 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2021 by XGBoost Contributors
* Copyright 2021-2022 by XGBoost Contributors
*/
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
@@ -303,8 +303,9 @@ template <typename GradientSumT, typename ExpandEntry> class HistEvaluator {
if (candidate.split.is_cat) {
std::vector<uint32_t> split_cats;
if (candidate.split.cat_bits.empty()) {
CHECK_LT(candidate.split.split_value, std::numeric_limits<bst_cat_t>::max())
<< "Categorical feature value too large.";
if (common::InvalidCat(candidate.split.split_value)) {
common::InvalidCategory();
}
auto cat = common::AsCat(candidate.split.split_value);
split_cats.resize(LBitField32::ComputeStorageSize(std::max(cat + 1, 1)), 0);
LBitField32 cat_bits;

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2021 XGBoost contributors
* Copyright 2017-2022 XGBoost contributors
*/
#include <thrust/copy.h>
#include <thrust/reduce.h>
@@ -572,11 +572,11 @@ struct GPUHistMakerDevice {
if (is_cat) {
CHECK_LT(candidate.split.fvalue, std::numeric_limits<bst_cat_t>::max())
<< "Categorical feature value too large.";
auto cat = common::AsCat(candidate.split.fvalue);
if (common::InvalidCat(cat)) {
if (common::InvalidCat(candidate.split.fvalue)) {
common::InvalidCategory();
}
std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(std::max(cat+1, 1)), 0);
auto cat = common::AsCat(candidate.split.fvalue);
std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(std::max(cat + 1, 1)), 0);
LBitField32 cats_bits(split_cats);
cats_bits.Set(cat);
dh::CopyToD(split_cats, &node_categories);