Optimize GPU evaluation function for categorical data. (#7705)
* Use transform and cache.
This commit is contained in:
@@ -74,12 +74,12 @@ def categorical_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> None:
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, random_state=1994, test_size=0.2
|
||||
)
|
||||
# Specify `enable_categorical`.
|
||||
# Specify `enable_categorical` to True.
|
||||
clf = xgb.XGBClassifier(
|
||||
**params,
|
||||
eval_metric="auc",
|
||||
enable_categorical=True,
|
||||
max_cat_to_onehot=1, # We use optimal partitioning exclusively
|
||||
max_cat_to_onehot=1, # We use optimal partitioning exclusively
|
||||
)
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test), (X_train, y_train)])
|
||||
clf.save_model(os.path.join(output_dir, "categorical.json"))
|
||||
@@ -94,13 +94,12 @@ def onehot_encoding_model(X: pd.DataFrame, y: pd.Series, output_dir: str) -> Non
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, random_state=42, test_size=0.2
|
||||
)
|
||||
# Specify `enable_categorical`.
|
||||
clf = xgb.XGBClassifier(**params, enable_categorical=False)
|
||||
# Specify `enable_categorical` to False as we are using encoded data.
|
||||
clf = xgb.XGBClassifier(**params, eval_metric="auc", enable_categorical=False)
|
||||
clf.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
eval_set=[(X_test, y_test), (X_train, y_train)],
|
||||
eval_metric="auc",
|
||||
)
|
||||
clf.save_model(os.path.join(output_dir, "one-hot.json"))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user