""" Demo for using feature weight to change column sampling ======================================================= .. versionadded:: 1.3.0 """ import argparse import numpy as np from matplotlib import pyplot as plt import xgboost def main(args: argparse.Namespace) -> None: rng = np.random.RandomState(1994) kRows = 4196 kCols = 10 X = rng.randn(kRows, kCols) y = rng.randn(kRows) fw = np.ones(shape=(kCols,)) for i in range(kCols): fw[i] *= float(i) dtrain = xgboost.DMatrix(X, y) dtrain.set_info(feature_weights=fw) # Perform column sampling for each node split evaluation, the sampling process is # weighted by feature weights. bst = xgboost.train( {"tree_method": "hist", "colsample_bynode": 0.2}, dtrain, num_boost_round=10, evals=[(dtrain, "d")], ) feature_map = bst.get_fscore() # feature zero has 0 weight assert feature_map.get("f0", None) is None assert max(feature_map.values()) == feature_map.get("f9") if args.plot: xgboost.plot_importance(bst) plt.show() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--plot", type=int, default=1, help="Set to 0 to disable plotting the evaluation history.", ) args = parser.parse_args() main(args)