* Generate column matrix from gHistIndex. * Avoid synchronization with the sparse page once the cache is written. * Cleanups: Remove member variables/functions, change the update routine to look like approx and gpu_hist. * Remove pruner.
52 lines
1.2 KiB
Python
52 lines
1.2 KiB
Python
'''
|
|
Demo for using feature weight to change column sampling
|
|
=======================================================
|
|
|
|
.. versionadded:: 1.3.0
|
|
'''
|
|
|
|
import numpy as np
|
|
import xgboost
|
|
from matplotlib import pyplot as plt
|
|
import argparse
|
|
|
|
|
|
def main(args):
|
|
rng = np.random.RandomState(1994)
|
|
|
|
kRows = 1000
|
|
kCols = 10
|
|
|
|
X = rng.randn(kRows, kCols)
|
|
y = rng.randn(kRows)
|
|
fw = np.ones(shape=(kCols,))
|
|
for i in range(kCols):
|
|
fw[i] *= float(i)
|
|
|
|
dtrain = xgboost.DMatrix(X, y)
|
|
dtrain.set_info(feature_weights=fw)
|
|
|
|
bst = xgboost.train({'tree_method': 'hist',
|
|
'colsample_bynode': 0.2},
|
|
dtrain, num_boost_round=10,
|
|
evals=[(dtrain, 'd')])
|
|
feature_map = bst.get_fscore()
|
|
# feature zero has 0 weight
|
|
assert feature_map.get('f0', None) is None
|
|
assert max(feature_map.values()) == feature_map.get('f9')
|
|
|
|
if args.plot:
|
|
xgboost.plot_importance(bst)
|
|
plt.show()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'--plot',
|
|
type=int,
|
|
default=1,
|
|
help='Set to 0 to disable plotting the evaluation history.')
|
|
args = parser.parse_args()
|
|
main(args)
|