Revert ntree limit fix (#6616)

The old (before fix) best_ntree_limit ignores the num_class parameters, which is incorrect. In before we workarounded it in c++ layer to avoid possible breaking changes on other language bindings. But the Python interpretation stayed incorrect. The PR fixed that in Python to consider num_class, but didn't remove the old workaround, so tree calculation in predictor is incorrect, see PredictBatch in CPUPredictor.
2021-01-19 23:51:16 +08:00
parent d132933550
commit d6d72de339
6 changed files with 32 additions and 21 deletions
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -347,7 +347,7 @@ class TestModels:
        X, y = load_iris(return_X_y=True)
        cls = xgb.XGBClassifier(n_estimators=2)
        cls.fit(X, y, early_stopping_rounds=1, eval_set=[(X, y)])
-        assert cls.get_booster().best_ntree_limit == 2 * cls.n_classes_
+        assert cls.get_booster().best_ntree_limit == 2
        assert cls.best_ntree_limit == cls.get_booster().best_ntree_limit

        with tempfile.TemporaryDirectory() as tmpdir:
@@ -356,7 +356,7 @@ class TestModels:

            cls = xgb.XGBClassifier(n_estimators=2)
            cls.load_model(path)
-            assert cls.get_booster().best_ntree_limit == 2 * cls.n_classes_
+            assert cls.get_booster().best_ntree_limit == 2
            assert cls.best_ntree_limit == cls.get_booster().best_ntree_limit

    @pytest.mark.skipif(**tm.no_sklearn())
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -33,9 +33,15 @@ def run_predict_leaf(predictor):
    y = rng.randint(low=0, high=classes, size=rows)
    m = xgb.DMatrix(X, y)
    booster = xgb.train(
-        {'num_parallel_tree': num_parallel_tree, 'num_class': classes,
-         'predictor': predictor, 'tree_method': 'hist'}, m,
-        num_boost_round=num_boost_round)
+        {
+            "num_parallel_tree": num_parallel_tree,
+            "num_class": classes,
+            "predictor": predictor,
+            "tree_method": "hist",
+        },
+        m,
+        num_boost_round=num_boost_round,
+    )

    empty = xgb.DMatrix(np.ones(shape=(0, cols)))
    empty_leaf = booster.predict(empty, pred_leaf=True)
@@ -52,12 +58,19 @@ def run_predict_leaf(predictor):
            end = classes * num_parallel_tree * (j + 1)
            layer = row[start: end]
            for c in range(classes):
-                tree_group = layer[c * num_parallel_tree:
-                                   (c+1) * num_parallel_tree]
+                tree_group = layer[c * num_parallel_tree: (c + 1) * num_parallel_tree]
                assert tree_group.shape[0] == num_parallel_tree
                # no subsampling so tree in same forest should output same
                # leaf.
                assert np.all(tree_group == tree_group[0])
+
+    ntree_limit = 2
+    sliced = booster.predict(
+        m, pred_leaf=True, ntree_limit=num_parallel_tree * ntree_limit
+    )
+    first = sliced[0, ...]
+
+    assert first.shape[0] == classes * num_parallel_tree * ntree_limit
    return leaf


--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -119,13 +119,13 @@ class TestTrainingContinuation:
        gbdt_05 = xgb.train(xgb_params_03, dtrain_5class,
                            num_boost_round=7)
        assert gbdt_05.best_ntree_limit == (
-            gbdt_05.best_iteration + 1) * self.num_parallel_tree * 5
+            gbdt_05.best_iteration + 1) * self.num_parallel_tree
        gbdt_05 = xgb.train(xgb_params_03,
                            dtrain_5class,
                            num_boost_round=3,
                            xgb_model=gbdt_05)
        assert gbdt_05.best_ntree_limit == (
-            gbdt_05.best_iteration + 1) * self.num_parallel_tree * 5
+            gbdt_05.best_iteration + 1) * self.num_parallel_tree

        res1 = gbdt_05.predict(dtrain_5class)
        res2 = gbdt_05.predict(dtrain_5class,
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -933,9 +933,9 @@ class TestWithDask:
    def test_feature_weights(self, client: "Client") -> None:
        kRows = 1024
        kCols = 64
-
-        X = da.random.random((kRows, kCols), chunks=(32, -1))
-        y = da.random.random(kRows, chunks=32)
+        rng = da.random.RandomState(1994)
+        X = rng.random_sample((kRows, kCols), chunks=(32, -1))
+        y = rng.random_sample(kRows, chunks=32)

        fw = np.ones(shape=(kCols,))
        for i in range(kCols):
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -106,7 +106,7 @@ def test_best_ntree_limit():
        )

        if forest:
-            assert cls.best_ntree_limit == rounds * forest * cls.n_classes_
+            assert cls.best_ntree_limit == rounds * forest
        else:
            assert cls.best_ntree_limit == 0