diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 9d04610b8..ff8ecfbdf 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -257,21 +257,29 @@ class GPUPredictor : public xgboost::Predictor { }; private: - void DevicePredictInternal(DMatrix* dmat, HostDeviceVector* out_preds, + void DevicePredictInternal(DMatrix* dmat, + HostDeviceVector* out_preds, const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end) { if (tree_end - tree_begin == 0) { return; } - // Add dmatrix to device if not seen before - if (this->device_matrix_cache_.find(dmat) == - this->device_matrix_cache_.end()) { - this->device_matrix_cache_.emplace( - dmat, std::unique_ptr( - new DeviceMatrix(dmat, param.gpu_id, param.silent))); + std::shared_ptr device_matrix; + // Matrix is not in host cache, create a temporary matrix + if (this->cache_.find(dmat) != this->cache_.end()) { + device_matrix = std::shared_ptr( + new DeviceMatrix(dmat, param.gpu_id, param.silent)); + } else { + // Create this matrix on device if doesn't exist + if (this->device_matrix_cache_.find(dmat) == + this->device_matrix_cache_.end()) { + this->device_matrix_cache_.emplace( + dmat, std::shared_ptr( + new DeviceMatrix(dmat, param.gpu_id, param.silent))); + } + device_matrix = device_matrix_cache_.find(dmat)->second; } - DeviceMatrix* device_matrix = device_matrix_cache_.find(dmat)->second.get(); dh::safe_cuda(cudaSetDevice(param.gpu_id)); CHECK_EQ(model.param.size_leaf_vector, 0); @@ -328,10 +336,10 @@ class GPUPredictor : public xgboost::Predictor { dh::safe_cuda(cudaDeviceSynchronize()); thrust::copy(device_matrix->predictions.begin(), - device_matrix->predictions.end(), out_preds->tbegin(param.gpu_id)); + device_matrix->predictions.end(), + out_preds->tbegin(param.gpu_id)); } - public: GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {} @@ -361,17 +369,16 @@ class GPUPredictor : public xgboost::Predictor { out_preds->resize(n, 0.0f, param.gpu_id); if (base_margin.size() != 0) { CHECK_EQ(out_preds->size(), n); - thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id)); + thrust::copy(base_margin.begin(), base_margin.end(), + out_preds->tbegin(param.gpu_id)); } else { thrust::fill(out_preds->tbegin(param.gpu_id), out_preds->tend(param.gpu_id), model.base_margin); } } - bool PredictFromCache(DMatrix* dmat, - HostDeviceVector* out_preds, - const gbm::GBTreeModel& model, - unsigned ntree_limit) { + bool PredictFromCache(DMatrix* dmat, HostDeviceVector* out_preds, + const gbm::GBTreeModel& model, unsigned ntree_limit) { if (ntree_limit == 0 || ntree_limit * model.param.num_output_group >= model.trees.size()) { auto it = cache_.find(dmat); @@ -380,9 +387,9 @@ class GPUPredictor : public xgboost::Predictor { if (y.size() != 0) { dh::safe_cuda(cudaSetDevice(param.gpu_id)); out_preds->resize(y.size(), 0.0f, param.gpu_id); - dh::safe_cuda - (cudaMemcpy(out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id), - out_preds->size() * sizeof(bst_float), cudaMemcpyDefault)); + dh::safe_cuda(cudaMemcpy( + out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id), + out_preds->size() * sizeof(bst_float), cudaMemcpyDefault)); return true; } } @@ -391,9 +398,10 @@ class GPUPredictor : public xgboost::Predictor { return false; } - void UpdatePredictionCache(const gbm::GBTreeModel& model, - std::vector>* updaters, - int num_new_trees) override { + void UpdatePredictionCache( + const gbm::GBTreeModel& model, + std::vector>* updaters, + int num_new_trees) override { auto old_ntree = model.trees.size() - num_new_trees; // update cache entry for (auto& kv : cache_) { @@ -408,7 +416,8 @@ class GPUPredictor : public xgboost::Predictor { static_cast(model.trees.size())); } else if (model.param.num_output_group == 1 && updaters->size() > 0 && num_new_trees == 1 && - updaters->back()->UpdatePredictionCache(e.data.get(), &predictions)) { + updaters->back()->UpdatePredictionCache(e.data.get(), + &predictions)) { // do nothing } else { DevicePredictInternal(dmat, &predictions, model, old_ntree, @@ -431,13 +440,12 @@ class GPUPredictor : public xgboost::Predictor { void PredictContribution(DMatrix* p_fmat, std::vector* out_contribs, - const gbm::GBTreeModel& model, - unsigned ntree_limit, - bool approximate, - int condition, + const gbm::GBTreeModel& model, unsigned ntree_limit, + bool approximate, int condition, unsigned condition_feature) override { - cpu_predictor->PredictContribution(p_fmat, out_contribs, model, - ntree_limit, approximate, condition, condition_feature); + cpu_predictor->PredictContribution(p_fmat, out_contribs, model, ntree_limit, + approximate, condition, + condition_feature); } void PredictInteractionContributions(DMatrix* p_fmat, @@ -460,7 +468,7 @@ class GPUPredictor : public xgboost::Predictor { private: GPUPredictionParam param; std::unique_ptr cpu_predictor; - std::unordered_map> + std::unordered_map> device_matrix_cache_; thrust::device_vector nodes; thrust::device_vector tree_segments; diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 27c51010f..4ce2a5be5 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1,5 +1,4 @@ import numpy as np -import random import xgboost as xgb import testing as tm from nose.tools import raises @@ -91,10 +90,6 @@ def test_feature_importances(): xgb_model = xgb.XGBClassifier(seed=0).fit(X, y) np.testing.assert_almost_equal(xgb_model.feature_importances_, exp) - # string columns, the feature order must be kept - chars = list('abcdefghijklmnopqrstuvwxyz') - X.columns = ["".join(random.sample(chars, 5)) for x in range(64)] - xgb_model = xgb.XGBClassifier(seed=0).fit(X, y) np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)