[breaking] Add prediction fucntion for DMatrix and use inplace predict for dask. (#6668)

* Add a new API function for predicting on `DMatrix`. This function aligns with rest of the `XGBoosterPredictFrom*` functions on semantic of function arguments. * Purge `ntree_limit` from libxgboost, use iteration instead. * [dask] Use `inplace_predict` by default for dask sklearn models. * [dask] Run prediction shape inference on worker instead of client. The breaking change is in the Python sklearn `apply` function, I made it to be consistent with other prediction functions where `best_iteration` is used by default.
2021-02-08 18:26:32 +08:00
parent dbb5208a0a
commit 4656b09d5d
29 changed files with 1134 additions and 604 deletions
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -112,17 +112,24 @@ def _test_cupy_metainfo(DMatrixT):
@pytest.mark.skipif(**tm.no_sklearn())
 def test_cupy_training_with_sklearn():
    import cupy as cp
+
    np.random.seed(1)
    cp.random.seed(1)
-    X = cp.random.randn(50, 10, dtype='float32')
-    y = (cp.random.randn(50, dtype='float32') > 0).astype('int8')
+    X = cp.random.randn(50, 10, dtype="float32")
+    y = (cp.random.randn(50, dtype="float32") > 0).astype("int8")
    weights = np.random.random(50) + 1
    cupy_weights = cp.array(weights)
    base_margin = np.random.random(50)
    cupy_base_margin = cp.array(base_margin)

-    clf = xgb.XGBClassifier(gpu_id=0, tree_method='gpu_hist', use_label_encoder=False)
-    clf.fit(X, y, sample_weight=cupy_weights, base_margin=cupy_base_margin, eval_set=[(X, y)])
+    clf = xgb.XGBClassifier(gpu_id=0, tree_method="gpu_hist", use_label_encoder=False)
+    clf.fit(
+        X,
+        y,
+        sample_weight=cupy_weights,
+        base_margin=cupy_base_margin,
+        eval_set=[(X, y)],
+    )
    pred = clf.predict(X)
    assert np.array_equal(np.unique(pred), np.array([0, 1]))