Init estimation for regression. (#8272)

This commit is contained in:
Jiaming Yuan
2023-01-11 02:04:56 +08:00
committed by GitHub
parent 1b58d81315
commit badeff1d74
29 changed files with 466 additions and 132 deletions

View File

@@ -1,9 +1,9 @@
'''
"""
Demo for using feature weight to change column sampling
=======================================================
.. versionadded:: 1.3.0
'''
"""
import argparse
@@ -13,10 +13,10 @@ from matplotlib import pyplot as plt
import xgboost
def main(args):
def main(args: argparse.Namespace) -> None:
rng = np.random.RandomState(1994)
kRows = 1000
kRows = 4196
kCols = 10
X = rng.randn(kRows, kCols)
@@ -28,26 +28,32 @@ def main(args):
dtrain = xgboost.DMatrix(X, y)
dtrain.set_info(feature_weights=fw)
bst = xgboost.train({'tree_method': 'hist',
'colsample_bynode': 0.2},
dtrain, num_boost_round=10,
evals=[(dtrain, 'd')])
# Perform column sampling for each node split evaluation, the sampling process is
# weighted by feature weights.
bst = xgboost.train(
{"tree_method": "hist", "colsample_bynode": 0.2},
dtrain,
num_boost_round=10,
evals=[(dtrain, "d")],
)
feature_map = bst.get_fscore()
# feature zero has 0 weight
assert feature_map.get('f0', None) is None
assert max(feature_map.values()) == feature_map.get('f9')
assert feature_map.get("f0", None) is None
assert max(feature_map.values()) == feature_map.get("f9")
if args.plot:
xgboost.plot_importance(bst)
plt.show()
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--plot',
"--plot",
type=int,
default=1,
help='Set to 0 to disable plotting the evaluation history.')
help="Set to 0 to disable plotting the evaluation history.",
)
args = parser.parse_args()
main(args)

View File

@@ -12,10 +12,15 @@ import xgboost as xgb
if __name__ == "__main__":
print("Parallel Parameter optimization")
X, y = fetch_california_housing(return_X_y=True)
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
'n_estimators': [50, 100, 200]}, verbose=1,
n_jobs=2)
xgb_model = xgb.XGBRegressor(
n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"
)
clf = GridSearchCV(
xgb_model,
{"max_depth": [2, 4, 6], "n_estimators": [50, 100, 200]},
verbose=1,
n_jobs=2,
)
clf.fit(X, y)
print(clf.best_score_)
print(clf.best_params_)