Optimize GPU evaluation function for categorical data. (#7705)

* Use transform and cache.
2022-02-28 17:46:29 +08:00
parent 18a4af63aa
commit 1d468e20a4
3 changed files with 77 additions and 30 deletions
--- a/demo/guide-python/cat_in_the_dat.py
+++ b/demo/guide-python/cat_in_the_dat.py
@@ -74,12 +74,12 @@ def categorical_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> None:
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1994, test_size=0.2
    )
-    # Specify `enable_categorical`.
+    # Specify `enable_categorical` to True.
    clf = xgb.XGBClassifier(
        **params,
        eval_metric="auc",
        enable_categorical=True,
-        max_cat_to_onehot=1,    # We use optimal partitioning exclusively
+        max_cat_to_onehot=1,  # We use optimal partitioning exclusively
    )
    clf.fit(X_train, y_train, eval_set=[(X_test, y_test), (X_train, y_train)])
    clf.save_model(os.path.join(output_dir, "categorical.json"))
@@ -94,13 +94,12 @@ def onehot_encoding_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> Non
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=42, test_size=0.2
    )
-    # Specify `enable_categorical`.
-    clf = xgb.XGBClassifier(**params, enable_categorical=False)
+    # Specify `enable_categorical` to False as we are using encoded data.
+    clf = xgb.XGBClassifier(**params, eval_metric="auc", enable_categorical=False)
    clf.fit(
        X_train,
        y_train,
        eval_set=[(X_test, y_test), (X_train, y_train)],
-        eval_metric="auc",
    )
    clf.save_model(os.path.join(output_dir, "one-hot.json"))