Update JSON parser demo with categorical feature. (#8401)

- Parse categorical features in the Python example. - Add tests. - Update document.
2022-10-28 20:57:43 +08:00
parent cfd2a9f872
commit a408c34558
7 changed files with 318 additions and 133 deletions
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@@ -1,9 +1,11 @@
 import os
 import subprocess
+import tempfile
 import sys

 import pytest

+import xgboost
 from xgboost import testing as tm

 pytestmark = tm.timeout(30)
@@ -138,10 +140,48 @@ def test_multioutput_reg() -> None:
    subprocess.check_call(cmd)


-# gpu_acceleration is not tested due to covertype dataset is being too huge.
-# gamma regression is not tested as it requires running a R script first.
-# aft viz is not tested due to ploting is not controled
-# aft tunning is not tested due to extra dependency.
+@pytest.mark.skipif(**tm.no_ubjson())
+def test_json_model() -> None:
+    script = os.path.join(DEMO_DIR, "json-model", "json_parser.py")
+
+    def run_test(reg: xgboost.XGBRegressor) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, "reg.json")
+            reg.save_model(path)
+            cmd = ["python", script, f"--model={path}"]
+            subprocess.check_call(cmd)
+
+            path = os.path.join(tmpdir, "reg.ubj")
+            reg.save_model(path)
+            cmd = ["python", script, f"--model={path}"]
+            subprocess.check_call(cmd)
+
+    # numerical
+    X, y = tm.make_sparse_regression(100, 10, 0.5, False)
+    reg = xgboost.XGBRegressor(n_estimators=2, tree_method="hist")
+    reg.fit(X, y)
+    run_test(reg)
+
+    # categorical
+    X, y = tm.make_categorical(
+        n_samples=1000,
+        n_features=10,
+        n_categories=6,
+        onehot=False,
+        sparsity=0.5,
+        cat_ratio=0.5,
+    )
+    reg = xgboost.XGBRegressor(
+        n_estimators=2, tree_method="hist", enable_categorical=True
+    )
+    reg.fit(X, y)
+    run_test(reg)
+
+
+# - gpu_acceleration is not tested due to covertype dataset is being too huge.
+# - gamma regression is not tested as it requires running a R script first.
+# - aft viz is not tested due to ploting is not controlled
+# - aft tunning is not tested due to extra dependency.


 def test_cli_regression_demo():