CPU evaluation for cat data. (#7393)

* Implementation for one hot based.
* Implementation for partition based. (LightGBM)
This commit is contained in:
Jiaming Yuan
2021-11-06 14:41:35 +08:00
committed by GitHub
parent 6ede12412c
commit d7d1b6e3a6
15 changed files with 540 additions and 166 deletions

View File

@@ -5,11 +5,12 @@
#ifndef XGBOOST_COMMON_CATEGORICAL_H_
#define XGBOOST_COMMON_CATEGORICAL_H_
#include "bitfield.h"
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/span.h"
#include "xgboost/parameter.h"
#include "bitfield.h"
#include "xgboost/span.h"
#include "xgboost/task.h"
namespace xgboost {
namespace common {
@@ -47,6 +48,15 @@ inline void InvalidCategory() {
"should be non-negative.";
}
/*!
* \brief Whether should we use onehot encoding for categorical data.
*/
inline bool UseOneHot(uint32_t n_cats, uint32_t max_cat_to_onehot, ObjInfo task) {
bool use_one_hot = n_cats < max_cat_to_onehot ||
(task.task != ObjInfo::kRegression && task.task != ObjInfo::kBinary);
return use_one_hot;
}
struct IsCatOp {
XGBOOST_DEVICE bool operator()(FeatureType ft) {
return ft == FeatureType::kCategorical;