[breaking] Add prediction fucntion for DMatrix and use inplace predict for dask. (#6668)

* Add a new API function for predicting on `DMatrix`.  This function aligns
with rest of the `XGBoosterPredictFrom*` functions on semantic of function
arguments.
* Purge `ntree_limit` from libxgboost, use iteration instead.
* [dask] Use `inplace_predict` by default for dask sklearn models.
* [dask] Run prediction shape inference on worker instead of client.

The breaking change is in the Python sklearn `apply` function, I made it to be
consistent with other prediction functions where `best_iteration` is used by
default.
This commit is contained in:
Jiaming Yuan
2021-02-08 18:26:32 +08:00
committed by GitHub
parent dbb5208a0a
commit 4656b09d5d
29 changed files with 1134 additions and 604 deletions

View File

@@ -112,17 +112,24 @@ def _test_cupy_metainfo(DMatrixT):
@pytest.mark.skipif(**tm.no_sklearn())
def test_cupy_training_with_sklearn():
import cupy as cp
np.random.seed(1)
cp.random.seed(1)
X = cp.random.randn(50, 10, dtype='float32')
y = (cp.random.randn(50, dtype='float32') > 0).astype('int8')
X = cp.random.randn(50, 10, dtype="float32")
y = (cp.random.randn(50, dtype="float32") > 0).astype("int8")
weights = np.random.random(50) + 1
cupy_weights = cp.array(weights)
base_margin = np.random.random(50)
cupy_base_margin = cp.array(base_margin)
clf = xgb.XGBClassifier(gpu_id=0, tree_method='gpu_hist', use_label_encoder=False)
clf.fit(X, y, sample_weight=cupy_weights, base_margin=cupy_base_margin, eval_set=[(X, y)])
clf = xgb.XGBClassifier(gpu_id=0, tree_method="gpu_hist", use_label_encoder=False)
clf.fit(
X,
y,
sample_weight=cupy_weights,
base_margin=cupy_base_margin,
eval_set=[(X, y)],
)
pred = clf.predict(X)
assert np.array_equal(np.unique(pred), np.array([0, 1]))