[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)

- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter.
- The `predictor` parameter is removed.
- Fallback to `DMatrix` when `inplace_predict` is not available.
- The heuristic for choosing a predictor is only used during training.
This commit is contained in:
Jiaming Yuan
2023-07-03 19:23:54 +08:00
committed by GitHub
parent 3a0f787703
commit 39390cc2ee
54 changed files with 1049 additions and 778 deletions

View File

@@ -1,5 +1,4 @@
'''Test model IO with pickle.'''
import json
"""Test model IO with pickle."""
import os
import pickle
import subprocess
@@ -11,49 +10,48 @@ import xgboost as xgb
from xgboost import XGBClassifier
from xgboost import testing as tm
model_path = './model.pkl'
model_path = "./model.pkl"
pytestmark = tm.timeout(30)
def build_dataset():
N = 10
x = np.linspace(0, N*N, N*N)
x = np.linspace(0, N * N, N * N)
x = x.reshape((N, N))
y = np.linspace(0, N, N)
return x, y
def save_pickle(bst, path):
with open(path, 'wb') as fd:
with open(path, "wb") as fd:
pickle.dump(bst, fd)
def load_pickle(path):
with open(path, 'rb') as fd:
with open(path, "rb") as fd:
bst = pickle.load(fd)
return bst
class TestPickling:
args_template = [
"pytest",
"--verbose",
"-s",
"--fulltrace"]
args_template = ["pytest", "--verbose", "-s", "--fulltrace"]
def run_pickling(self, bst) -> None:
save_pickle(bst, model_path)
args = [
"pytest", "--verbose", "-s", "--fulltrace",
"./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl"
"pytest",
"--verbose",
"-s",
"--fulltrace",
"./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl",
]
command = ''
command = ""
for arg in args:
command += arg
command += ' '
command += " "
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
env = os.environ.copy()
# Passing new_environment directly to `env' argument results
# in failure on Windows:
@@ -72,7 +70,7 @@ class TestPickling:
x, y = build_dataset()
train_x = xgb.DMatrix(x, label=y)
param = {'tree_method': 'gpu_hist', "gpu_id": 0}
param = {"tree_method": "gpu_hist", "gpu_id": 0}
bst = xgb.train(param, train_x)
self.run_pickling(bst)
@@ -91,43 +89,46 @@ class TestPickling:
X, y = build_dataset()
dtrain = xgb.DMatrix(X, y)
bst = xgb.train({'tree_method': 'gpu_hist',
'gpu_id': 1},
dtrain, num_boost_round=6)
bst = xgb.train(
{"tree_method": "gpu_hist", "gpu_id": 1}, dtrain, num_boost_round=6
)
model_path = 'model.pkl'
model_path = "model.pkl"
save_pickle(bst, model_path)
cuda_environment = {'CUDA_VISIBLE_DEVICES': '0'}
cuda_environment = {"CUDA_VISIBLE_DEVICES": "0"}
env = os.environ.copy()
env.update(cuda_environment)
args = self.args_template.copy()
args.append(
"./tests/python-gpu/"
"load_pickle.py::TestLoadPickle::test_wrap_gpu_id"
"./tests/python-gpu/" "load_pickle.py::TestLoadPickle::test_wrap_gpu_id"
)
status = subprocess.call(args, env=env)
assert status == 0
os.remove(model_path)
def test_pickled_predictor(self):
x, y = build_dataset()
def test_pickled_context(self):
x, y = tm.make_sparse_regression(10, 10, sparsity=0.8, as_dense=True)
train_x = xgb.DMatrix(x, label=y)
param = {'tree_method': 'gpu_hist',
'verbosity': 1, 'predictor': 'gpu_predictor'}
param = {"tree_method": "gpu_hist", "verbosity": 1}
bst = xgb.train(param, train_x)
config = json.loads(bst.save_config())
assert config['learner']['gradient_booster']['gbtree_train_param'][
'predictor'] == 'gpu_predictor'
with tm.captured_output() as (out, err):
bst.inplace_predict(x)
# The warning is redirected to Python callback, so it's printed in stdout
# instead of stderr.
stdout = out.getvalue()
assert stdout.find("mismatched devices") != -1
save_pickle(bst, model_path)
args = self.args_template.copy()
args.append(
"./tests/python-gpu/"
"load_pickle.py::TestLoadPickle::test_predictor_type_is_auto")
root = tm.project_root(__file__)
path = os.path.join(root, "tests", "python-gpu", "load_pickle.py")
args.append(path + "::TestLoadPickle::test_context_is_removed")
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
env = os.environ.copy()
env.update(cuda_environment)
@@ -138,25 +139,29 @@ class TestPickling:
args = self.args_template.copy()
args.append(
"./tests/python-gpu/"
"load_pickle.py::TestLoadPickle::test_predictor_type_is_gpu")
"load_pickle.py::TestLoadPickle::test_context_is_preserved"
)
# Load in environment that has GPU.
env = os.environ.copy()
assert 'CUDA_VISIBLE_DEVICES' not in env.keys()
assert "CUDA_VISIBLE_DEVICES" not in env.keys()
status = subprocess.call(args, env=env)
assert status == 0
os.remove(model_path)
@pytest.mark.skipif(**tm.no_sklearn())
def test_predict_sklearn_pickle(self):
def test_predict_sklearn_pickle(self) -> None:
from sklearn.datasets import load_digits
x, y = load_digits(return_X_y=True)
kwargs = {'tree_method': 'gpu_hist',
'predictor': 'gpu_predictor',
'objective': 'binary:logistic',
'n_estimators': 10}
kwargs = {
"tree_method": "gpu_hist",
"objective": "binary:logistic",
"gpu_id": 0,
"n_estimators": 10,
}
model = XGBClassifier(**kwargs)
model.fit(x, y)
@@ -165,24 +170,25 @@ class TestPickling:
del model
# load model
model: xgb.XGBClassifier = load_pickle("model.pkl")
model = load_pickle("model.pkl")
os.remove("model.pkl")
gpu_pred = model.predict(x, output_margin=True)
# Switch to CPU predictor
bst = model.get_booster()
bst.set_param({'predictor': 'cpu_predictor'})
tm.set_ordinal(-1, bst)
cpu_pred = model.predict(x, output_margin=True)
np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)
def test_training_on_cpu_only_env(self):
cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
env = os.environ.copy()
env.update(cuda_environment)
args = self.args_template.copy()
args.append(
"./tests/python-gpu/"
"load_pickle.py::TestLoadPickle::test_training_on_cpu_only_env")
"load_pickle.py::TestLoadPickle::test_training_on_cpu_only_env"
)
status = subprocess.call(args, env=env)
assert status == 0