Update JSON parser demo with categorical feature. (#8401)

- Parse categorical features in the Python example.
- Add tests.
- Update document.
This commit is contained in:
Jiaming Yuan
2022-10-28 20:57:43 +08:00
committed by GitHub
parent cfd2a9f872
commit a408c34558
7 changed files with 318 additions and 133 deletions

View File

@@ -1,9 +1,11 @@
import os
import subprocess
import tempfile
import sys
import pytest
import xgboost
from xgboost import testing as tm
pytestmark = tm.timeout(30)
@@ -138,10 +140,48 @@ def test_multioutput_reg() -> None:
subprocess.check_call(cmd)
# gpu_acceleration is not tested due to covertype dataset is being too huge.
# gamma regression is not tested as it requires running a R script first.
# aft viz is not tested due to ploting is not controled
# aft tunning is not tested due to extra dependency.
@pytest.mark.skipif(**tm.no_ubjson())
def test_json_model() -> None:
script = os.path.join(DEMO_DIR, "json-model", "json_parser.py")
def run_test(reg: xgboost.XGBRegressor) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "reg.json")
reg.save_model(path)
cmd = ["python", script, f"--model={path}"]
subprocess.check_call(cmd)
path = os.path.join(tmpdir, "reg.ubj")
reg.save_model(path)
cmd = ["python", script, f"--model={path}"]
subprocess.check_call(cmd)
# numerical
X, y = tm.make_sparse_regression(100, 10, 0.5, False)
reg = xgboost.XGBRegressor(n_estimators=2, tree_method="hist")
reg.fit(X, y)
run_test(reg)
# categorical
X, y = tm.make_categorical(
n_samples=1000,
n_features=10,
n_categories=6,
onehot=False,
sparsity=0.5,
cat_ratio=0.5,
)
reg = xgboost.XGBRegressor(
n_estimators=2, tree_method="hist", enable_categorical=True
)
reg.fit(X, y)
run_test(reg)
# - gpu_acceleration is not tested due to covertype dataset is being too huge.
# - gamma regression is not tested as it requires running a R script first.
# - aft viz is not tested due to ploting is not controlled
# - aft tunning is not tested due to extra dependency.
def test_cli_regression_demo():