Fix inplace predict missing value. (#6787)

This commit is contained in:
Jiaming Yuan
2021-03-27 05:36:10 +08:00
committed by GitHub
parent 5c87c2bba8
commit a59c7323b4
8 changed files with 97 additions and 33 deletions

View File

@@ -154,17 +154,22 @@ class TestGPUPredict:
cp.cuda.runtime.setDevice(0)
rows = 1000
cols = 10
missing = 11 # set to integer for testing
cp_rng = cp.random.RandomState(1994)
cp.random.set_random_state(cp_rng)
X = cp.random.randn(rows, cols)
missing_idx = [i for i in range(0, cols, 4)]
X[:, missing_idx] = missing # set to be missing
y = cp.random.randn(rows)
dtrain = xgb.DMatrix(X, y)
booster = xgb.train({'tree_method': 'gpu_hist'},
dtrain, num_boost_round=10)
test = xgb.DMatrix(X[:10, ...])
predt_from_array = booster.inplace_predict(X[:10, ...])
booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10)
test = xgb.DMatrix(X[:10, ...], missing=missing)
predt_from_array = booster.inplace_predict(X[:10, ...], missing=missing)
predt_from_dmatrix = booster.predict(test)
cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
@@ -185,6 +190,20 @@ class TestGPUPredict:
base_margin = cp_rng.randn(rows)
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
# Create a wide dataset
X = cp_rng.randn(100, 10000)
y = cp_rng.randn(100)
missing_idx = [i for i in range(0, X.shape[1], 16)]
X[:, missing_idx] = missing
reg = xgb.XGBRegressor(tree_method="gpu_hist", n_estimators=8, missing=missing)
reg.fit(X, y)
gpu_predt = reg.predict(X)
reg.set_params(predictor="cpu_predictor")
cpu_predt = reg.predict(X)
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
@pytest.mark.skipif(**tm.no_cudf())
def test_inplace_predict_cudf(self):
import cupy as cp