Handle missing values in dataframe with category dtype. (#7331)
* Replace -1 in pandas initializer. * Unify `IsValid` functor. * Mimic pandas data handling in cuDF glue code. * Check invalid categories. * Fix DDM sketching.
This commit is contained in:
@@ -21,6 +21,7 @@
|
||||
|
||||
#include "array_interface.h"
|
||||
#include "../c_api/c_api_error.h"
|
||||
#include "../common/math.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
@@ -80,6 +81,24 @@ struct COOTuple {
|
||||
float value{0};
|
||||
};
|
||||
|
||||
struct IsValidFunctor {
|
||||
float missing;
|
||||
|
||||
XGBOOST_DEVICE explicit IsValidFunctor(float missing) : missing(missing) {}
|
||||
|
||||
XGBOOST_DEVICE bool operator()(float value) const {
|
||||
return !(common::CheckNAN(value) || value == missing);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bool operator()(const data::COOTuple& e) const {
|
||||
return !(common::CheckNAN(e.value) || e.value == missing);
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bool operator()(const Entry& e) const {
|
||||
return !(common::CheckNAN(e.fvalue) || e.fvalue == missing);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user