[doc] Add demo for inference using individual tree. (#8752)
This commit is contained in:
parent
28bb01aa22
commit
7b3d473593
99
demo/guide-python/individual_trees.py
Normal file
99
demo/guide-python/individual_trees.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
"""
|
||||||
|
Demo for prediction using individual trees and model slices
|
||||||
|
===========================================================
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy.special import logit
|
||||||
|
from sklearn.datasets import load_svmlight_file
|
||||||
|
|
||||||
|
import xgboost as xgb
|
||||||
|
|
||||||
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
|
train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train")
|
||||||
|
test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test")
|
||||||
|
|
||||||
|
|
||||||
|
def individual_tree() -> None:
|
||||||
|
"""Get prediction from each individual tree and combine them together."""
|
||||||
|
X_train, y_train = load_svmlight_file(train)
|
||||||
|
X_test, y_test = load_svmlight_file(test)
|
||||||
|
Xy_train = xgb.QuantileDMatrix(X_train, y_train)
|
||||||
|
|
||||||
|
n_rounds = 4
|
||||||
|
# Specify the base score, otherwise xgboost will estimate one from the training
|
||||||
|
# data.
|
||||||
|
base_score = 0.5
|
||||||
|
params = {
|
||||||
|
"max_depth": 2,
|
||||||
|
"eta": 1,
|
||||||
|
"objective": "reg:logistic",
|
||||||
|
"tree_method": "hist",
|
||||||
|
"base_score": base_score,
|
||||||
|
}
|
||||||
|
booster = xgb.train(params, Xy_train, num_boost_round=n_rounds)
|
||||||
|
|
||||||
|
# Use logit to inverse the base score back to raw leaf value (margin)
|
||||||
|
scores = np.full((X_test.shape[0],), logit(base_score))
|
||||||
|
for i in range(n_rounds):
|
||||||
|
# - Use output_margin to get raw leaf values
|
||||||
|
# - Use iteration_range to get prediction for only one tree
|
||||||
|
# - Use previous prediction as base marign for the model
|
||||||
|
Xy_test = xgb.DMatrix(X_test, base_margin=scores)
|
||||||
|
|
||||||
|
if i == n_rounds - 1:
|
||||||
|
# last round, get the transformed prediction
|
||||||
|
scores = booster.predict(
|
||||||
|
Xy_test, iteration_range=(i, i + 1), output_margin=False
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# get raw leaf value for accumulation
|
||||||
|
scores = booster.predict(
|
||||||
|
Xy_test, iteration_range=(i, i + 1), output_margin=True
|
||||||
|
)
|
||||||
|
|
||||||
|
full = booster.predict(xgb.DMatrix(X_test), output_margin=False)
|
||||||
|
np.testing.assert_allclose(scores, full)
|
||||||
|
|
||||||
|
|
||||||
|
def model_slices() -> None:
|
||||||
|
"""Inference with each individual using model slices."""
|
||||||
|
X_train, y_train = load_svmlight_file(train)
|
||||||
|
X_test, y_test = load_svmlight_file(test)
|
||||||
|
Xy_train = xgb.QuantileDMatrix(X_train, y_train)
|
||||||
|
|
||||||
|
n_rounds = 4
|
||||||
|
# Specify the base score, otherwise xgboost will estimate one from the training
|
||||||
|
# data.
|
||||||
|
base_score = 0.5
|
||||||
|
params = {
|
||||||
|
"max_depth": 2,
|
||||||
|
"eta": 1,
|
||||||
|
"objective": "reg:logistic",
|
||||||
|
"tree_method": "hist",
|
||||||
|
"base_score": base_score,
|
||||||
|
}
|
||||||
|
booster = xgb.train(params, Xy_train, num_boost_round=n_rounds)
|
||||||
|
trees = [booster[t] for t in range(n_rounds)]
|
||||||
|
|
||||||
|
# Use logit to inverse the base score back to raw leaf value (margin)
|
||||||
|
scores = np.full((X_test.shape[0],), logit(base_score))
|
||||||
|
for i, t in enumerate(trees):
|
||||||
|
# Feed previous scores into base margin.
|
||||||
|
Xy_test = xgb.DMatrix(X_test, base_margin=scores)
|
||||||
|
|
||||||
|
if i == n_rounds - 1:
|
||||||
|
# last round, get the transformed prediction
|
||||||
|
scores = t.predict(Xy_test, output_margin=False)
|
||||||
|
else:
|
||||||
|
# get raw leaf value for accumulation
|
||||||
|
scores = t.predict(Xy_test, output_margin=True)
|
||||||
|
|
||||||
|
full = booster.predict(xgb.DMatrix(X_test), output_margin=False)
|
||||||
|
np.testing.assert_allclose(scores, full)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
individual_tree()
|
||||||
|
model_slices()
|
||||||
@ -150,6 +150,7 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
"demo/guide-python/feature_weights.py",
|
"demo/guide-python/feature_weights.py",
|
||||||
"demo/guide-python/sklearn_parallel.py",
|
"demo/guide-python/sklearn_parallel.py",
|
||||||
"demo/guide-python/spark_estimator_examples.py",
|
"demo/guide-python/spark_estimator_examples.py",
|
||||||
|
"demo/guide-python/individual_trees.py",
|
||||||
# CI
|
# CI
|
||||||
"tests/ci_build/lint_python.py",
|
"tests/ci_build/lint_python.py",
|
||||||
"tests/ci_build/test_r_package.py",
|
"tests/ci_build/test_r_package.py",
|
||||||
@ -191,6 +192,7 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
"demo/guide-python/external_memory.py",
|
"demo/guide-python/external_memory.py",
|
||||||
"demo/guide-python/cat_in_the_dat.py",
|
"demo/guide-python/cat_in_the_dat.py",
|
||||||
"demo/guide-python/feature_weights.py",
|
"demo/guide-python/feature_weights.py",
|
||||||
|
"demo/guide-python/individual_trees.py",
|
||||||
# tests
|
# tests
|
||||||
"tests/python/test_dt.py",
|
"tests/python/test_dt.py",
|
||||||
"tests/python/test_data_iterator.py",
|
"tests/python/test_data_iterator.py",
|
||||||
|
|||||||
@ -79,6 +79,12 @@ def test_predict_first_ntree_demo():
|
|||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def test_individual_trees():
|
||||||
|
script = os.path.join(PYTHON_DEMO_DIR, 'individual_trees.py')
|
||||||
|
cmd = ['python', script]
|
||||||
|
subprocess.check_call(cmd)
|
||||||
|
|
||||||
|
|
||||||
def test_predict_leaf_indices_demo():
|
def test_predict_leaf_indices_demo():
|
||||||
script = os.path.join(PYTHON_DEMO_DIR, 'predict_leaf_indices.py')
|
script = os.path.join(PYTHON_DEMO_DIR, 'predict_leaf_indices.py')
|
||||||
cmd = ['python', script]
|
cmd = ['python', script]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user