Fix prediction from loaded pickle. (#4516)

This commit is contained in:
Jiaming Yuan 2019-05-30 15:05:09 +08:00 committed by GitHub
parent fed665ae8a
commit b48f895027
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 8 deletions

View File

@ -119,7 +119,7 @@ class Learner : public rabit::Serializable {
bool pred_leaf = false, bool pred_leaf = false,
bool pred_contribs = false, bool pred_contribs = false,
bool approx_contribs = false, bool approx_contribs = false,
bool pred_interactions = false) const = 0; bool pred_interactions = false) = 0;
/*! /*!
* \brief Set additional attribute to the Booster. * \brief Set additional attribute to the Booster.

View File

@ -329,6 +329,7 @@ class LearnerImpl : public Learner {
const std::string prefix = "SAVED_PARAM_"; const std::string prefix = "SAVED_PARAM_";
if (kv.first.find(prefix) == 0) { if (kv.first.find(prefix) == 0) {
const std::string saved_param = kv.first.substr(prefix.length()); const std::string saved_param = kv.first.substr(prefix.length());
bool is_gpu_predictor = saved_param == "predictor" && kv.second == "gpu_predictor";
#ifdef XGBOOST_USE_CUDA #ifdef XGBOOST_USE_CUDA
if (saved_param == "predictor" || saved_param == "n_gpus" if (saved_param == "predictor" || saved_param == "n_gpus"
|| saved_param == "gpu_id") { || saved_param == "gpu_id") {
@ -346,10 +347,14 @@ class LearnerImpl : public Learner {
<< " * JVM packages: bst.setParam(\"" << " * JVM packages: bst.setParam(\""
<< saved_param << "\", [new value])"; << saved_param << "\", [new value])";
} }
#else
if (is_gpu_predictor) {
cfg_["predictor"] = "cpu_predictor";
kv.second = "cpu_predictor";
}
#endif // XGBOOST_USE_CUDA #endif // XGBOOST_USE_CUDA
// NO visiable GPU on current environment // NO visiable GPU on current environment
if (GPUSet::AllVisible().Size() == 0 && if (is_gpu_predictor && GPUSet::AllVisible().Size() == 0) {
(saved_param == "predictor" && kv.second == "gpu_predictor")) {
cfg_["predictor"] = "cpu_predictor"; cfg_["predictor"] = "cpu_predictor";
kv.second = "cpu_predictor"; kv.second = "cpu_predictor";
} }
@ -543,7 +548,7 @@ class LearnerImpl : public Learner {
void Predict(DMatrix* data, bool output_margin, void Predict(DMatrix* data, bool output_margin,
HostDeviceVector<bst_float>* out_preds, unsigned ntree_limit, HostDeviceVector<bst_float>* out_preds, unsigned ntree_limit,
bool pred_leaf, bool pred_contribs, bool approx_contribs, bool pred_leaf, bool pred_contribs, bool approx_contribs,
bool pred_interactions) const override { bool pred_interactions) override {
bool multiple_predictions = static_cast<int>(pred_leaf) + bool multiple_predictions = static_cast<int>(pred_leaf) +
static_cast<int>(pred_interactions) + static_cast<int>(pred_interactions) +
static_cast<int>(pred_contribs); static_cast<int>(pred_contribs);
@ -712,10 +717,11 @@ class LearnerImpl : public Learner {
* \param ntree_limit limit number of trees used for boosted tree * \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees * predictor, when it equals 0, this means we are using all the trees
*/ */
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds, void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0) const { unsigned ntree_limit = 0) {
CHECK(gbm_ != nullptr) CHECK(gbm_ != nullptr)
<< "Predict must happen after Load or InitModel"; << "Predict must happen after Load or InitModel";
ConfigurationWithKnownData(data);
gbm_->PredictBatch(data, out_preds, ntree_limit); gbm_->PredictBatch(data, out_preds, ntree_limit);
} }

View File

@ -4,7 +4,9 @@ import unittest
import numpy as np import numpy as np
import subprocess import subprocess
import os import os
import sys
import xgboost as xgb import xgboost as xgb
from xgboost import XGBClassifier
model_path = './model.pkl' model_path = './model.pkl'
@ -17,6 +19,17 @@ def build_dataset():
return x, y return x, y
def save_pickle(bst, path):
with open(path, 'wb') as fd:
pickle.dump(bst, fd)
def load_pickle(path):
with open(path, 'rb') as fd:
bst = pickle.load(fd)
return bst
class TestPickling(unittest.TestCase): class TestPickling(unittest.TestCase):
def test_pickling(self): def test_pickling(self):
x, y = build_dataset() x, y = build_dataset()
@ -27,8 +40,7 @@ class TestPickling(unittest.TestCase):
'verbosity': 1} 'verbosity': 1}
bst = xgb.train(param, train_x) bst = xgb.train(param, train_x)
with open(model_path, 'wb') as fd: save_pickle(bst, model_path)
pickle.dump(bst, fd)
args = ["pytest", args = ["pytest",
"--verbose", "--verbose",
"-s", "-s",
@ -51,3 +63,30 @@ class TestPickling(unittest.TestCase):
status = subprocess.call(command, env=env, shell=True) status = subprocess.call(command, env=env, shell=True)
assert status == 0 assert status == 0
os.remove(model_path) os.remove(model_path)
def test_predict_sklearn_pickle(self):
x, y = build_dataset()
kwargs = {'tree_method': 'gpu_hist',
'predictor': 'gpu_predictor',
'verbosity': 2,
'objective': 'binary:logistic',
'n_estimators': 10}
model = XGBClassifier(**kwargs)
model.fit(x, y)
save_pickle(model, "model.pkl")
del model
# load model
model: xgb.XGBClassifier = load_pickle("model.pkl")
os.remove("model.pkl")
gpu_pred = model.predict(x, output_margin=True)
# Switch to CPU predictor
bst = model.get_booster()
bst.set_param({'predictor': 'cpu_predictor'})
cpu_pred = model.predict(x, output_margin=True)
np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)