diff --git a/demo/guide-python/individual_trees.py b/demo/guide-python/individual_trees.py new file mode 100644 index 000000000..d940e8521 --- /dev/null +++ b/demo/guide-python/individual_trees.py @@ -0,0 +1,99 @@ +""" +Demo for prediction using individual trees and model slices +=========================================================== +""" +import os + +import numpy as np +from scipy.special import logit +from sklearn.datasets import load_svmlight_file + +import xgboost as xgb + +CURRENT_DIR = os.path.dirname(__file__) +train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train") +test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test") + + +def individual_tree() -> None: + """Get prediction from each individual tree and combine them together.""" + X_train, y_train = load_svmlight_file(train) + X_test, y_test = load_svmlight_file(test) + Xy_train = xgb.QuantileDMatrix(X_train, y_train) + + n_rounds = 4 + # Specify the base score, otherwise xgboost will estimate one from the training + # data. + base_score = 0.5 + params = { + "max_depth": 2, + "eta": 1, + "objective": "reg:logistic", + "tree_method": "hist", + "base_score": base_score, + } + booster = xgb.train(params, Xy_train, num_boost_round=n_rounds) + + # Use logit to inverse the base score back to raw leaf value (margin) + scores = np.full((X_test.shape[0],), logit(base_score)) + for i in range(n_rounds): + # - Use output_margin to get raw leaf values + # - Use iteration_range to get prediction for only one tree + # - Use previous prediction as base marign for the model + Xy_test = xgb.DMatrix(X_test, base_margin=scores) + + if i == n_rounds - 1: + # last round, get the transformed prediction + scores = booster.predict( + Xy_test, iteration_range=(i, i + 1), output_margin=False + ) + else: + # get raw leaf value for accumulation + scores = booster.predict( + Xy_test, iteration_range=(i, i + 1), output_margin=True + ) + + full = booster.predict(xgb.DMatrix(X_test), output_margin=False) + np.testing.assert_allclose(scores, full) + + +def model_slices() -> None: + """Inference with each individual using model slices.""" + X_train, y_train = load_svmlight_file(train) + X_test, y_test = load_svmlight_file(test) + Xy_train = xgb.QuantileDMatrix(X_train, y_train) + + n_rounds = 4 + # Specify the base score, otherwise xgboost will estimate one from the training + # data. + base_score = 0.5 + params = { + "max_depth": 2, + "eta": 1, + "objective": "reg:logistic", + "tree_method": "hist", + "base_score": base_score, + } + booster = xgb.train(params, Xy_train, num_boost_round=n_rounds) + trees = [booster[t] for t in range(n_rounds)] + + # Use logit to inverse the base score back to raw leaf value (margin) + scores = np.full((X_test.shape[0],), logit(base_score)) + for i, t in enumerate(trees): + # Feed previous scores into base margin. + Xy_test = xgb.DMatrix(X_test, base_margin=scores) + + if i == n_rounds - 1: + # last round, get the transformed prediction + scores = t.predict(Xy_test, output_margin=False) + else: + # get raw leaf value for accumulation + scores = t.predict(Xy_test, output_margin=True) + + full = booster.predict(xgb.DMatrix(X_test), output_margin=False) + np.testing.assert_allclose(scores, full) + + +if __name__ == "__main__": + individual_tree() + model_slices() diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py index 82d259581..34eb92fa6 100644 --- a/tests/ci_build/lint_python.py +++ b/tests/ci_build/lint_python.py @@ -150,6 +150,7 @@ def main(args: argparse.Namespace) -> None: "demo/guide-python/feature_weights.py", "demo/guide-python/sklearn_parallel.py", "demo/guide-python/spark_estimator_examples.py", + "demo/guide-python/individual_trees.py", # CI "tests/ci_build/lint_python.py", "tests/ci_build/test_r_package.py", @@ -191,6 +192,7 @@ def main(args: argparse.Namespace) -> None: "demo/guide-python/external_memory.py", "demo/guide-python/cat_in_the_dat.py", "demo/guide-python/feature_weights.py", + "demo/guide-python/individual_trees.py", # tests "tests/python/test_dt.py", "tests/python/test_data_iterator.py", diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py index 35570ba4d..28797f160 100644 --- a/tests/python/test_demos.py +++ b/tests/python/test_demos.py @@ -79,6 +79,12 @@ def test_predict_first_ntree_demo(): subprocess.check_call(cmd) +def test_individual_trees(): + script = os.path.join(PYTHON_DEMO_DIR, 'individual_trees.py') + cmd = ['python', script] + subprocess.check_call(cmd) + + def test_predict_leaf_indices_demo(): script = os.path.join(PYTHON_DEMO_DIR, 'predict_leaf_indices.py') cmd = ['python', script]