Optimize GPU evaluation function for categorical data. (#7705)

* Use transform and cache.
This commit is contained in:
Jiaming Yuan
2022-02-28 17:46:29 +08:00
committed by GitHub
parent 18a4af63aa
commit 1d468e20a4
3 changed files with 77 additions and 30 deletions

View File

@@ -74,12 +74,12 @@ def categorical_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> None:
X_train, X_test, y_train, y_test = train_test_split(
X, y, random_state=1994, test_size=0.2
)
# Specify `enable_categorical`.
# Specify `enable_categorical` to True.
clf = xgb.XGBClassifier(
**params,
eval_metric="auc",
enable_categorical=True,
max_cat_to_onehot=1, # We use optimal partitioning exclusively
max_cat_to_onehot=1, # We use optimal partitioning exclusively
)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test), (X_train, y_train)])
clf.save_model(os.path.join(output_dir, "categorical.json"))
@@ -94,13 +94,12 @@ def onehot_encoding_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> Non
X_train, X_test, y_train, y_test = train_test_split(
X, y, random_state=42, test_size=0.2
)
# Specify `enable_categorical`.
clf = xgb.XGBClassifier(**params, enable_categorical=False)
# Specify `enable_categorical` to False as we are using encoded data.
clf = xgb.XGBClassifier(**params, eval_metric="auc", enable_categorical=False)
clf.fit(
X_train,
y_train,
eval_set=[(X_test, y_test), (X_train, y_train)],
eval_metric="auc",
)
clf.save_model(os.path.join(output_dir, "one-hot.json"))