Add test for invalid categorical data values. (#7380)
* Add test for invalid categorical data values. * Add check during sketching.
This commit is contained in:
parent
c74df31bf9
commit
a55d43ccfd
@ -42,9 +42,9 @@ inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, bst_cat_t
|
|||||||
return !s_cats.Check(cat);
|
return !s_cats.Check(cat);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void CheckCat(bst_cat_t cat) {
|
inline void InvalidCategory() {
|
||||||
CHECK_GE(cat, 0) << "Invalid categorical value detected. Categorical value "
|
LOG(FATAL) << "Invalid categorical value detected. Categorical value "
|
||||||
"should be non-negative.";
|
"should be non-negative.";
|
||||||
}
|
}
|
||||||
|
|
||||||
struct IsCatOp {
|
struct IsCatOp {
|
||||||
|
|||||||
@ -580,6 +580,19 @@ void SketchContainer::AllReduce() {
|
|||||||
timer_.Stop(__func__);
|
timer_.Stop(__func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
struct InvalidCat {
|
||||||
|
Span<float const> values;
|
||||||
|
Span<uint32_t const> ptrs;
|
||||||
|
Span<FeatureType const> ft;
|
||||||
|
|
||||||
|
XGBOOST_DEVICE bool operator()(size_t i) {
|
||||||
|
auto fidx = dh::SegmentId(ptrs, i);
|
||||||
|
return IsCat(ft, fidx) && values[i] < 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
void SketchContainer::MakeCuts(HistogramCuts* p_cuts) {
|
void SketchContainer::MakeCuts(HistogramCuts* p_cuts) {
|
||||||
timer_.Start(__func__);
|
timer_.Start(__func__);
|
||||||
dh::safe_cuda(cudaSetDevice(device_));
|
dh::safe_cuda(cudaSetDevice(device_));
|
||||||
@ -669,6 +682,19 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts) {
|
|||||||
assert(idx+1 < in_column.size());
|
assert(idx+1 < in_column.size());
|
||||||
out_column[idx] = in_column[idx+1].value;
|
out_column[idx] = in_column[idx+1].value;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (has_categorical_) {
|
||||||
|
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||||
|
auto ptrs = p_cuts->cut_ptrs_.ConstDeviceSpan();
|
||||||
|
auto it = thrust::make_counting_iterator(0ul);
|
||||||
|
CHECK_EQ(p_cuts->Ptrs().back(), out_cut_values.size());
|
||||||
|
auto invalid =
|
||||||
|
thrust::any_of(thrust::cuda::par(alloc), it, it + out_cut_values.size(),
|
||||||
|
InvalidCat{out_cut_values, ptrs, d_ft});
|
||||||
|
if (invalid) {
|
||||||
|
InvalidCategory();
|
||||||
|
}
|
||||||
|
}
|
||||||
timer_.Stop(__func__);
|
timer_.Stop(__func__);
|
||||||
}
|
}
|
||||||
} // namespace common
|
} // namespace common
|
||||||
|
|||||||
@ -580,7 +580,9 @@ struct GPUHistMakerDevice {
|
|||||||
CHECK_LT(candidate.split.fvalue, std::numeric_limits<bst_cat_t>::max())
|
CHECK_LT(candidate.split.fvalue, std::numeric_limits<bst_cat_t>::max())
|
||||||
<< "Categorical feature value too large.";
|
<< "Categorical feature value too large.";
|
||||||
auto cat = common::AsCat(candidate.split.fvalue);
|
auto cat = common::AsCat(candidate.split.fvalue);
|
||||||
common::CheckCat(cat);
|
if (cat < 0) {
|
||||||
|
common::InvalidCategory();
|
||||||
|
}
|
||||||
std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(std::max(cat+1, 1)), 0);
|
std::vector<uint32_t> split_cats(LBitField32::ComputeStorageSize(std::max(cat+1, 1)), 0);
|
||||||
LBitField32 cats_bits(split_cats);
|
LBitField32 cats_bits(split_cats);
|
||||||
cats_bits.Set(cat);
|
cats_bits.Set(cat);
|
||||||
|
|||||||
@ -95,6 +95,21 @@ class TestGPUUpdaters:
|
|||||||
rounds = 4
|
rounds = 4
|
||||||
self.run_categorical_basic(rows, cols, rounds, cats)
|
self.run_categorical_basic(rows, cols, rounds, cats)
|
||||||
|
|
||||||
|
def test_invalid_categorical(self):
|
||||||
|
import cupy as cp
|
||||||
|
rng = np.random.default_rng()
|
||||||
|
X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10)
|
||||||
|
y = rng.normal(loc=0, scale=1, size=100)
|
||||||
|
|
||||||
|
# Check is performe during sketching.
|
||||||
|
Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
xgb.train({"tree_method": "gpu_hist"}, Xy)
|
||||||
|
|
||||||
|
X, y = cp.array(X), cp.array(y)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
Xy = xgb.DeviceQuantileDMatrix(X, y, feature_types=["c"] * 10)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@given(parameter_strategy, strategies.integers(1, 20),
|
@given(parameter_strategy, strategies.integers(1, 20),
|
||||||
tm.dataset_strategy)
|
tm.dataset_strategy)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user