Fix bug with gpu_predictor caching behaviour (#3177)

* Fixes #3162
This commit is contained in:
Rory Mitchell 2018-03-18 10:35:10 +13:00 committed by GitHub
parent cdc036b752
commit 9fa45d3a9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 37 additions and 34 deletions

View File

@ -257,21 +257,29 @@ class GPUPredictor : public xgboost::Predictor {
}; };
private: private:
void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds, void DevicePredictInternal(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, size_t tree_begin, const gbm::GBTreeModel& model, size_t tree_begin,
size_t tree_end) { size_t tree_end) {
if (tree_end - tree_begin == 0) { if (tree_end - tree_begin == 0) {
return; return;
} }
// Add dmatrix to device if not seen before std::shared_ptr<DeviceMatrix> device_matrix;
// Matrix is not in host cache, create a temporary matrix
if (this->cache_.find(dmat) != this->cache_.end()) {
device_matrix = std::shared_ptr<DeviceMatrix>(
new DeviceMatrix(dmat, param.gpu_id, param.silent));
} else {
// Create this matrix on device if doesn't exist
if (this->device_matrix_cache_.find(dmat) == if (this->device_matrix_cache_.find(dmat) ==
this->device_matrix_cache_.end()) { this->device_matrix_cache_.end()) {
this->device_matrix_cache_.emplace( this->device_matrix_cache_.emplace(
dmat, std::unique_ptr<DeviceMatrix>( dmat, std::shared_ptr<DeviceMatrix>(
new DeviceMatrix(dmat, param.gpu_id, param.silent))); new DeviceMatrix(dmat, param.gpu_id, param.silent)));
} }
DeviceMatrix* device_matrix = device_matrix_cache_.find(dmat)->second.get(); device_matrix = device_matrix_cache_.find(dmat)->second;
}
dh::safe_cuda(cudaSetDevice(param.gpu_id)); dh::safe_cuda(cudaSetDevice(param.gpu_id));
CHECK_EQ(model.param.size_leaf_vector, 0); CHECK_EQ(model.param.size_leaf_vector, 0);
@ -328,10 +336,10 @@ class GPUPredictor : public xgboost::Predictor {
dh::safe_cuda(cudaDeviceSynchronize()); dh::safe_cuda(cudaDeviceSynchronize());
thrust::copy(device_matrix->predictions.begin(), thrust::copy(device_matrix->predictions.begin(),
device_matrix->predictions.end(), out_preds->tbegin(param.gpu_id)); device_matrix->predictions.end(),
out_preds->tbegin(param.gpu_id));
} }
public: public:
GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {} GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {}
@ -361,17 +369,16 @@ class GPUPredictor : public xgboost::Predictor {
out_preds->resize(n, 0.0f, param.gpu_id); out_preds->resize(n, 0.0f, param.gpu_id);
if (base_margin.size() != 0) { if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n); CHECK_EQ(out_preds->size(), n);
thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id)); thrust::copy(base_margin.begin(), base_margin.end(),
out_preds->tbegin(param.gpu_id));
} else { } else {
thrust::fill(out_preds->tbegin(param.gpu_id), thrust::fill(out_preds->tbegin(param.gpu_id),
out_preds->tend(param.gpu_id), model.base_margin); out_preds->tend(param.gpu_id), model.base_margin);
} }
} }
bool PredictFromCache(DMatrix* dmat, bool PredictFromCache(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
HostDeviceVector<bst_float>* out_preds, const gbm::GBTreeModel& model, unsigned ntree_limit) {
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
if (ntree_limit == 0 || if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) { ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = cache_.find(dmat); auto it = cache_.find(dmat);
@ -380,8 +387,8 @@ class GPUPredictor : public xgboost::Predictor {
if (y.size() != 0) { if (y.size() != 0) {
dh::safe_cuda(cudaSetDevice(param.gpu_id)); dh::safe_cuda(cudaSetDevice(param.gpu_id));
out_preds->resize(y.size(), 0.0f, param.gpu_id); out_preds->resize(y.size(), 0.0f, param.gpu_id);
dh::safe_cuda dh::safe_cuda(cudaMemcpy(
(cudaMemcpy(out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id), out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id),
out_preds->size() * sizeof(bst_float), cudaMemcpyDefault)); out_preds->size() * sizeof(bst_float), cudaMemcpyDefault));
return true; return true;
} }
@ -391,7 +398,8 @@ class GPUPredictor : public xgboost::Predictor {
return false; return false;
} }
void UpdatePredictionCache(const gbm::GBTreeModel& model, void UpdatePredictionCache(
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters, std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override { int num_new_trees) override {
auto old_ntree = model.trees.size() - num_new_trees; auto old_ntree = model.trees.size() - num_new_trees;
@ -408,7 +416,8 @@ class GPUPredictor : public xgboost::Predictor {
static_cast<bst_uint>(model.trees.size())); static_cast<bst_uint>(model.trees.size()));
} else if (model.param.num_output_group == 1 && updaters->size() > 0 && } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 && num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(e.data.get(), &predictions)) { updaters->back()->UpdatePredictionCache(e.data.get(),
&predictions)) {
// do nothing // do nothing
} else { } else {
DevicePredictInternal(dmat, &predictions, model, old_ntree, DevicePredictInternal(dmat, &predictions, model, old_ntree,
@ -431,13 +440,12 @@ class GPUPredictor : public xgboost::Predictor {
void PredictContribution(DMatrix* p_fmat, void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs, std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, const gbm::GBTreeModel& model, unsigned ntree_limit,
unsigned ntree_limit, bool approximate, int condition,
bool approximate,
int condition,
unsigned condition_feature) override { unsigned condition_feature) override {
cpu_predictor->PredictContribution(p_fmat, out_contribs, model, cpu_predictor->PredictContribution(p_fmat, out_contribs, model, ntree_limit,
ntree_limit, approximate, condition, condition_feature); approximate, condition,
condition_feature);
} }
void PredictInteractionContributions(DMatrix* p_fmat, void PredictInteractionContributions(DMatrix* p_fmat,
@ -460,7 +468,7 @@ class GPUPredictor : public xgboost::Predictor {
private: private:
GPUPredictionParam param; GPUPredictionParam param;
std::unique_ptr<Predictor> cpu_predictor; std::unique_ptr<Predictor> cpu_predictor;
std::unordered_map<DMatrix*, std::unique_ptr<DeviceMatrix>> std::unordered_map<DMatrix*, std::shared_ptr<DeviceMatrix>>
device_matrix_cache_; device_matrix_cache_;
thrust::device_vector<DevicePredictionNode> nodes; thrust::device_vector<DevicePredictionNode> nodes;
thrust::device_vector<size_t> tree_segments; thrust::device_vector<size_t> tree_segments;

View File

@ -1,5 +1,4 @@
import numpy as np import numpy as np
import random
import xgboost as xgb import xgboost as xgb
import testing as tm import testing as tm
from nose.tools import raises from nose.tools import raises
@ -91,10 +90,6 @@ def test_feature_importances():
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y) xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp) np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
# string columns, the feature order must be kept
chars = list('abcdefghijklmnopqrstuvwxyz')
X.columns = ["".join(random.sample(chars, 5)) for x in range(64)]
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y) xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp) np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)