[doc] Mention data consistency for categorical features. (#9678)
This commit is contained in:
@@ -21,6 +21,7 @@ class LintersPaths:
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python/test_dmatrix.py",
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_demos.py",
|
||||
"tests/python/test_predict.py",
|
||||
"tests/python/test_quantile_dmatrix.py",
|
||||
"tests/python/test_tree_regularization.py",
|
||||
@@ -41,6 +42,7 @@ class LintersPaths:
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/callbacks.py",
|
||||
"demo/guide-python/categorical.py",
|
||||
"demo/guide-python/cat_pipeline.py",
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/sklearn_parallel.py",
|
||||
"demo/guide-python/spark_estimator_examples.py",
|
||||
@@ -79,6 +81,7 @@ class LintersPaths:
|
||||
"python-package/",
|
||||
# tests
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_demos.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/python-gpu/load_pickle.py",
|
||||
@@ -89,6 +92,8 @@ class LintersPaths:
|
||||
"demo/json-model/json_parser.py",
|
||||
"demo/guide-python/external_memory.py",
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/categorical.py",
|
||||
"demo/guide-python/cat_pipeline.py",
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
|
||||
@@ -11,129 +11,143 @@ from xgboost import testing as tm
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
DEMO_DIR = tm.demo_dir(__file__)
|
||||
PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, 'guide-python')
|
||||
CLI_DEMO_DIR = os.path.join(DEMO_DIR, 'CLI')
|
||||
PYTHON_DEMO_DIR = os.path.join(DEMO_DIR, "guide-python")
|
||||
CLI_DEMO_DIR = os.path.join(DEMO_DIR, "CLI")
|
||||
|
||||
|
||||
def test_basic_walkthrough():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'basic_walkthrough.py')
|
||||
cmd = ['python', script]
|
||||
def test_basic_walkthrough() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "basic_walkthrough.py")
|
||||
cmd = ["python", script]
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
subprocess.check_call(cmd, cwd=tmpdir)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_matplotlib())
|
||||
def test_custom_multiclass_objective():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'custom_softmax.py')
|
||||
cmd = ['python', script, '--plot=0']
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_categorical() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "categorical.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_cat_pipeline() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "cat_pipeline.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_matplotlib())
|
||||
def test_custom_rmsle_objective():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'custom_rmsle.py')
|
||||
cmd = ['python', script, '--plot=0']
|
||||
def test_custom_multiclass_objective() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "custom_softmax.py")
|
||||
cmd = ["python", script, "--plot=0"]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_matplotlib())
|
||||
def test_feature_weights_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'feature_weights.py')
|
||||
cmd = ['python', script, '--plot=0']
|
||||
def test_custom_rmsle_objective() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "custom_rmsle.py")
|
||||
cmd = ["python", script, "--plot=0"]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_matplotlib())
|
||||
def test_feature_weights_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "feature_weights.py")
|
||||
cmd = ["python", script, "--plot=0"]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_sklearn_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
|
||||
cmd = ['python', script]
|
||||
def test_sklearn_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "sklearn_examples.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
assert os.path.exists('best_calif.pkl')
|
||||
os.remove('best_calif.pkl')
|
||||
assert os.path.exists("best_calif.pkl")
|
||||
os.remove("best_calif.pkl")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_sklearn_parallel_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_parallel.py')
|
||||
cmd = ['python', script]
|
||||
def test_sklearn_parallel_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "sklearn_parallel.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_sklearn_evals_result_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_evals_result.py')
|
||||
cmd = ['python', script]
|
||||
def test_sklearn_evals_result_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "sklearn_evals_result.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_boost_from_prediction_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'boost_from_prediction.py')
|
||||
cmd = ['python', script]
|
||||
def test_boost_from_prediction_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "boost_from_prediction.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_predict_first_ntree_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'predict_first_ntree.py')
|
||||
cmd = ['python', script]
|
||||
def test_predict_first_ntree_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "predict_first_ntree.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_individual_trees():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'individual_trees.py')
|
||||
cmd = ['python', script]
|
||||
def test_individual_trees() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "individual_trees.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_predict_leaf_indices_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'predict_leaf_indices.py')
|
||||
cmd = ['python', script]
|
||||
def test_predict_leaf_indices_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "predict_leaf_indices.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_generalized_linear_model_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'generalized_linear_model.py')
|
||||
cmd = ['python', script]
|
||||
def test_generalized_linear_model_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "generalized_linear_model.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_cross_validation_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'cross_validation.py')
|
||||
cmd = ['python', script]
|
||||
def test_cross_validation_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "cross_validation.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_external_memory_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'external_memory.py')
|
||||
cmd = ['python', script]
|
||||
def test_external_memory_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "external_memory.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_evals_result_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'evals_result.py')
|
||||
cmd = ['python', script]
|
||||
def test_evals_result_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "evals_result.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_aft_demo():
|
||||
script = os.path.join(DEMO_DIR, 'aft_survival', 'aft_survival_demo.py')
|
||||
cmd = ['python', script]
|
||||
def test_aft_demo() -> None:
|
||||
script = os.path.join(DEMO_DIR, "aft_survival", "aft_survival_demo.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
assert os.path.exists('aft_model.json')
|
||||
os.remove('aft_model.json')
|
||||
assert os.path.exists("aft_model.json")
|
||||
os.remove("aft_model.json")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_matplotlib())
|
||||
def test_callbacks_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'callbacks.py')
|
||||
cmd = ['python', script, '--plot=0']
|
||||
def test_callbacks_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "callbacks.py")
|
||||
cmd = ["python", script, "--plot=0"]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def test_continuation_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'continuation.py')
|
||||
cmd = ['python', script]
|
||||
def test_continuation_demo() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "continuation.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@@ -141,14 +155,14 @@ def test_continuation_demo():
|
||||
@pytest.mark.skipif(**tm.no_matplotlib())
|
||||
def test_multioutput_reg() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "multioutput_regression.py")
|
||||
cmd = ['python', script, "--plot=0"]
|
||||
cmd = ["python", script, "--plot=0"]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_quantile_reg() -> None:
|
||||
script = os.path.join(PYTHON_DEMO_DIR, "quantile_regression.py")
|
||||
cmd = ['python', script]
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
@@ -197,28 +211,30 @@ def test_json_model() -> None:
|
||||
# - aft tunning is not tested due to extra dependency.
|
||||
|
||||
|
||||
def test_cli_regression_demo():
|
||||
reg_dir = os.path.join(CLI_DEMO_DIR, 'regression')
|
||||
script = os.path.join(reg_dir, 'mapfeat.py')
|
||||
cmd = ['python', script]
|
||||
def test_cli_regression_demo() -> None:
|
||||
reg_dir = os.path.join(CLI_DEMO_DIR, "regression")
|
||||
script = os.path.join(reg_dir, "mapfeat.py")
|
||||
cmd = ["python", script]
|
||||
subprocess.check_call(cmd, cwd=reg_dir)
|
||||
|
||||
script = os.path.join(reg_dir, 'mknfold.py')
|
||||
cmd = ['python', script, 'machine.txt', '1']
|
||||
script = os.path.join(reg_dir, "mknfold.py")
|
||||
cmd = ["python", script, "machine.txt", "1"]
|
||||
subprocess.check_call(cmd, cwd=reg_dir)
|
||||
|
||||
exe = os.path.join(DEMO_DIR, os.path.pardir, 'xgboost')
|
||||
conf = os.path.join(reg_dir, 'machine.conf')
|
||||
exe = os.path.join(DEMO_DIR, os.path.pardir, "xgboost")
|
||||
conf = os.path.join(reg_dir, "machine.conf")
|
||||
subprocess.check_call([exe, conf], cwd=reg_dir)
|
||||
|
||||
|
||||
@pytest.mark.skipif(condition=sys.platform.startswith("win"),
|
||||
reason='Test requires sh execution.')
|
||||
def test_cli_binary_classification():
|
||||
cls_dir = os.path.join(CLI_DEMO_DIR, 'binary_classification')
|
||||
@pytest.mark.skipif(
|
||||
condition=sys.platform.startswith("win"), reason="Test requires sh execution."
|
||||
)
|
||||
def test_cli_binary_classification() -> None:
|
||||
cls_dir = os.path.join(CLI_DEMO_DIR, "binary_classification")
|
||||
with tm.DirectoryExcursion(cls_dir, cleanup=True):
|
||||
subprocess.check_call(['./runexp.sh'])
|
||||
os.remove('0002.model')
|
||||
subprocess.check_call(["./runexp.sh"])
|
||||
os.remove("0002.model")
|
||||
|
||||
|
||||
# year prediction is not tested due to data size being too large.
|
||||
# rank is not tested as it requires unrar command.
|
||||
|
||||
Reference in New Issue
Block a user