Fix bug with gpu_predictor caching behaviour (#3177)

* Fixes #3162
This commit is contained in:
Rory Mitchell 2018-03-18 10:35:10 +13:00 committed by GitHub
parent cdc036b752
commit 9fa45d3a9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 37 additions and 34 deletions

View File

@ -257,21 +257,29 @@ class GPUPredictor : public xgboost::Predictor {
};
private:
void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
void DevicePredictInternal(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, size_t tree_begin,
size_t tree_end) {
if (tree_end - tree_begin == 0) {
return;
}
// Add dmatrix to device if not seen before
std::shared_ptr<DeviceMatrix> device_matrix;
// Matrix is not in host cache, create a temporary matrix
if (this->cache_.find(dmat) != this->cache_.end()) {
device_matrix = std::shared_ptr<DeviceMatrix>(
new DeviceMatrix(dmat, param.gpu_id, param.silent));
} else {
// Create this matrix on device if doesn't exist
if (this->device_matrix_cache_.find(dmat) ==
this->device_matrix_cache_.end()) {
this->device_matrix_cache_.emplace(
dmat, std::unique_ptr<DeviceMatrix>(
dmat, std::shared_ptr<DeviceMatrix>(
new DeviceMatrix(dmat, param.gpu_id, param.silent)));
}
DeviceMatrix* device_matrix = device_matrix_cache_.find(dmat)->second.get();
device_matrix = device_matrix_cache_.find(dmat)->second;
}
dh::safe_cuda(cudaSetDevice(param.gpu_id));
CHECK_EQ(model.param.size_leaf_vector, 0);
@ -328,10 +336,10 @@ class GPUPredictor : public xgboost::Predictor {
dh::safe_cuda(cudaDeviceSynchronize());
thrust::copy(device_matrix->predictions.begin(),
device_matrix->predictions.end(), out_preds->tbegin(param.gpu_id));
device_matrix->predictions.end(),
out_preds->tbegin(param.gpu_id));
}
public:
GPUPredictor() : cpu_predictor(Predictor::Create("cpu_predictor")) {}
@ -361,17 +369,16 @@ class GPUPredictor : public xgboost::Predictor {
out_preds->resize(n, 0.0f, param.gpu_id);
if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n);
thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id));
thrust::copy(base_margin.begin(), base_margin.end(),
out_preds->tbegin(param.gpu_id));
} else {
thrust::fill(out_preds->tbegin(param.gpu_id),
out_preds->tend(param.gpu_id), model.base_margin);
}
}
bool PredictFromCache(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model,
unsigned ntree_limit) {
bool PredictFromCache(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model, unsigned ntree_limit) {
if (ntree_limit == 0 ||
ntree_limit * model.param.num_output_group >= model.trees.size()) {
auto it = cache_.find(dmat);
@ -380,8 +387,8 @@ class GPUPredictor : public xgboost::Predictor {
if (y.size() != 0) {
dh::safe_cuda(cudaSetDevice(param.gpu_id));
out_preds->resize(y.size(), 0.0f, param.gpu_id);
dh::safe_cuda
(cudaMemcpy(out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id),
dh::safe_cuda(cudaMemcpy(
out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id),
out_preds->size() * sizeof(bst_float), cudaMemcpyDefault));
return true;
}
@ -391,7 +398,8 @@ class GPUPredictor : public xgboost::Predictor {
return false;
}
void UpdatePredictionCache(const gbm::GBTreeModel& model,
void UpdatePredictionCache(
const gbm::GBTreeModel& model,
std::vector<std::unique_ptr<TreeUpdater>>* updaters,
int num_new_trees) override {
auto old_ntree = model.trees.size() - num_new_trees;
@ -408,7 +416,8 @@ class GPUPredictor : public xgboost::Predictor {
static_cast<bst_uint>(model.trees.size()));
} else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 &&
updaters->back()->UpdatePredictionCache(e.data.get(), &predictions)) {
updaters->back()->UpdatePredictionCache(e.data.get(),
&predictions)) {
// do nothing
} else {
DevicePredictInternal(dmat, &predictions, model, old_ntree,
@ -431,13 +440,12 @@ class GPUPredictor : public xgboost::Predictor {
void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model,
unsigned ntree_limit,
bool approximate,
int condition,
const gbm::GBTreeModel& model, unsigned ntree_limit,
bool approximate, int condition,
unsigned condition_feature) override {
cpu_predictor->PredictContribution(p_fmat, out_contribs, model,
ntree_limit, approximate, condition, condition_feature);
cpu_predictor->PredictContribution(p_fmat, out_contribs, model, ntree_limit,
approximate, condition,
condition_feature);
}
void PredictInteractionContributions(DMatrix* p_fmat,
@ -460,7 +468,7 @@ class GPUPredictor : public xgboost::Predictor {
private:
GPUPredictionParam param;
std::unique_ptr<Predictor> cpu_predictor;
std::unordered_map<DMatrix*, std::unique_ptr<DeviceMatrix>>
std::unordered_map<DMatrix*, std::shared_ptr<DeviceMatrix>>
device_matrix_cache_;
thrust::device_vector<DevicePredictionNode> nodes;
thrust::device_vector<size_t> tree_segments;

View File

@ -1,5 +1,4 @@
import numpy as np
import random
import xgboost as xgb
import testing as tm
from nose.tools import raises
@ -91,10 +90,6 @@ def test_feature_importances():
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
# string columns, the feature order must be kept
chars = list('abcdefghijklmnopqrstuvwxyz')
X.columns = ["".join(random.sample(chars, 5)) for x in range(64)]
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)