Init estimation for regression. (#8272)

2023-01-11 02:04:56 +08:00
parent 1b58d81315
commit badeff1d74
29 changed files with 466 additions and 132 deletions
--- a/demo/guide-python/feature_weights.py
+++ b/demo/guide-python/feature_weights.py
@@ -1,9 +1,9 @@
-'''
+"""
 Demo for using feature weight to change column sampling
 =======================================================

    .. versionadded:: 1.3.0
-'''
+"""

 import argparse

@@ -13,10 +13,10 @@ from matplotlib import pyplot as plt
 import xgboost


-def main(args):
+def main(args: argparse.Namespace) -> None:
    rng = np.random.RandomState(1994)

-    kRows = 1000
+    kRows = 4196
    kCols = 10

    X = rng.randn(kRows, kCols)
@@ -28,26 +28,32 @@ def main(args):
    dtrain = xgboost.DMatrix(X, y)
    dtrain.set_info(feature_weights=fw)

-    bst = xgboost.train({'tree_method': 'hist',
-                         'colsample_bynode': 0.2},
-                        dtrain, num_boost_round=10,
-                        evals=[(dtrain, 'd')])
+    # Perform column sampling for each node split evaluation, the sampling process is
+    # weighted by feature weights.
+    bst = xgboost.train(
+        {"tree_method": "hist", "colsample_bynode": 0.2},
+        dtrain,
+        num_boost_round=10,
+        evals=[(dtrain, "d")],
+    )
    feature_map = bst.get_fscore()
+
    # feature zero has 0 weight
-    assert feature_map.get('f0', None) is None
-    assert max(feature_map.values()) == feature_map.get('f9')
+    assert feature_map.get("f0", None) is None
+    assert max(feature_map.values()) == feature_map.get("f9")

    if args.plot:
        xgboost.plot_importance(bst)
        plt.show()


-if __name__ == '__main__':
+if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
-        '--plot',
+        "--plot",
        type=int,
        default=1,
-        help='Set to 0 to disable plotting the evaluation history.')
+        help="Set to 0 to disable plotting the evaluation history.",
+    )
    args = parser.parse_args()
    main(args)
--- a/demo/guide-python/sklearn_parallel.py
+++ b/demo/guide-python/sklearn_parallel.py
@@ -12,10 +12,15 @@ import xgboost as xgb
 if __name__ == "__main__":
    print("Parallel Parameter optimization")
    X, y = fetch_california_housing(return_X_y=True)
-    xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
-    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
-                                   'n_estimators': [50, 100, 200]}, verbose=1,
-                       n_jobs=2)
+    xgb_model = xgb.XGBRegressor(
+        n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"
+    )
+    clf = GridSearchCV(
+        xgb_model,
+        {"max_depth": [2, 4, 6], "n_estimators": [50, 100, 200]},
+        verbose=1,
+        n_jobs=2,
+    )
    clf.fit(X, y)
    print(clf.best_score_)
    print(clf.best_params_)