Remove omp_get_max_threads in CPU predictor. (#7519)

This is part of the on going effort to remove the dependency on global omp variables.
This commit is contained in:
Jiaming Yuan
2022-01-04 22:12:15 +08:00
committed by GitHub
parent 5516281881
commit 68cdbc9c16
6 changed files with 68 additions and 68 deletions

View File

@@ -633,12 +633,12 @@ class GPUPredictor : public xgboost::Predictor {
size_t num_features,
HostDeviceVector<bst_float>* predictions,
size_t batch_offset, bool is_dense) const {
batch.offset.SetDevice(generic_param_->gpu_id);
batch.data.SetDevice(generic_param_->gpu_id);
batch.offset.SetDevice(ctx_->gpu_id);
batch.data.SetDevice(ctx_->gpu_id);
const uint32_t BLOCK_THREADS = 128;
size_t num_rows = batch.Size();
auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(num_rows, BLOCK_THREADS));
auto max_shared_memory_bytes = ConfigureDevice(generic_param_->gpu_id);
auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);
size_t shared_memory_bytes =
SharedMemoryBytes<BLOCK_THREADS>(num_features, max_shared_memory_bytes);
bool use_shared = shared_memory_bytes != 0;
@@ -694,10 +694,10 @@ class GPUPredictor : public xgboost::Predictor {
if (tree_end - tree_begin == 0) {
return;
}
out_preds->SetDevice(generic_param_->gpu_id);
out_preds->SetDevice(ctx_->gpu_id);
auto const& info = dmat->Info();
DeviceModel d_model;
d_model.Init(model, tree_begin, tree_end, generic_param_->gpu_id);
d_model.Init(model, tree_begin, tree_end, ctx_->gpu_id);
if (dmat->PageExists<SparsePage>()) {
size_t batch_offset = 0;
@@ -709,10 +709,10 @@ class GPUPredictor : public xgboost::Predictor {
} else {
size_t batch_offset = 0;
for (auto const& page : dmat->GetBatches<EllpackPage>()) {
dmat->Info().feature_types.SetDevice(generic_param_->gpu_id);
dmat->Info().feature_types.SetDevice(ctx_->gpu_id);
auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
this->PredictInternal(
page.Impl()->GetDeviceAccessor(generic_param_->gpu_id, feature_types),
page.Impl()->GetDeviceAccessor(ctx_->gpu_id, feature_types),
d_model,
out_preds,
batch_offset);
@@ -726,15 +726,15 @@ class GPUPredictor : public xgboost::Predictor {
Predictor::Predictor{generic_param} {}
~GPUPredictor() override {
if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
if (ctx_->gpu_id >= 0 && ctx_->gpu_id < common::AllVisibleGPUs()) {
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
}
}
void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
const gbm::GBTreeModel& model, uint32_t tree_begin,
uint32_t tree_end = 0) const override {
int device = generic_param_->gpu_id;
int device = ctx_->gpu_id;
CHECK_GE(device, 0) << "Set `gpu_id' to positive value for processing GPU data.";
auto* out_preds = &predts->predictions;
if (tree_end == 0) {
@@ -754,7 +754,7 @@ class GPUPredictor : public xgboost::Predictor {
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
<< "Number of columns in data must equal to trained model.";
CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx())
<< "XGBoost is running on device: " << this->generic_param_->gpu_id << ", "
<< "XGBoost is running on device: " << this->ctx_->gpu_id << ", "
<< "but data is on: " << m->DeviceIdx();
if (p_m) {
p_m->Info().num_row_ = m->NumRows();
@@ -821,8 +821,8 @@ class GPUPredictor : public xgboost::Predictor {
if (tree_weights != nullptr) {
LOG(FATAL) << "Dart booster feature " << not_implemented;
}
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
out_contribs->SetDevice(generic_param_->gpu_id);
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
out_contribs->SetDevice(ctx_->gpu_id);
if (tree_end == 0 || tree_end > model.trees.size()) {
tree_end = static_cast<uint32_t>(model.trees.size());
}
@@ -840,12 +840,12 @@ class GPUPredictor : public xgboost::Predictor {
dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
device_paths;
DeviceModel d_model;
d_model.Init(model, 0, tree_end, generic_param_->gpu_id);
d_model.Init(model, 0, tree_end, ctx_->gpu_id);
dh::device_vector<uint32_t> categories;
ExtractPaths(&device_paths, &d_model, &categories, generic_param_->gpu_id);
ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(generic_param_->gpu_id);
batch.offset.SetDevice(generic_param_->gpu_id);
batch.data.SetDevice(ctx_->gpu_id);
batch.offset.SetDevice(ctx_->gpu_id);
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
model.learner_model_param->num_feature);
auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
@@ -854,7 +854,7 @@ class GPUPredictor : public xgboost::Predictor {
dh::tend(phis));
}
// Add the base margin term to last column
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
float base_score = model.learner_model_param->base_score;
dh::LaunchN(
@@ -879,8 +879,8 @@ class GPUPredictor : public xgboost::Predictor {
if (tree_weights != nullptr) {
LOG(FATAL) << "Dart booster feature " << not_implemented;
}
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
out_contribs->SetDevice(generic_param_->gpu_id);
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
out_contribs->SetDevice(ctx_->gpu_id);
if (tree_end == 0 || tree_end > model.trees.size()) {
tree_end = static_cast<uint32_t>(model.trees.size());
}
@@ -899,12 +899,12 @@ class GPUPredictor : public xgboost::Predictor {
dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
device_paths;
DeviceModel d_model;
d_model.Init(model, 0, tree_end, generic_param_->gpu_id);
d_model.Init(model, 0, tree_end, ctx_->gpu_id);
dh::device_vector<uint32_t> categories;
ExtractPaths(&device_paths, &d_model, &categories, generic_param_->gpu_id);
ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(generic_param_->gpu_id);
batch.offset.SetDevice(generic_param_->gpu_id);
batch.data.SetDevice(ctx_->gpu_id);
batch.offset.SetDevice(ctx_->gpu_id);
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
model.learner_model_param->num_feature);
auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
@@ -913,7 +913,7 @@ class GPUPredictor : public xgboost::Predictor {
dh::tend(phis));
}
// Add the base margin term to last column
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
float base_score = model.learner_model_param->base_score;
size_t n_features = model.learner_model_param->num_feature;
@@ -938,8 +938,8 @@ class GPUPredictor : public xgboost::Predictor {
void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *predictions,
const gbm::GBTreeModel &model,
unsigned tree_end) const override {
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
auto max_shared_memory_bytes = ConfigureDevice(generic_param_->gpu_id);
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);
const MetaInfo& info = p_fmat->Info();
constexpr uint32_t kBlockThreads = 128;
@@ -953,15 +953,15 @@ class GPUPredictor : public xgboost::Predictor {
if (tree_end == 0 || tree_end > model.trees.size()) {
tree_end = static_cast<uint32_t>(model.trees.size());
}
predictions->SetDevice(generic_param_->gpu_id);
predictions->SetDevice(ctx_->gpu_id);
predictions->Resize(num_rows * tree_end);
DeviceModel d_model;
d_model.Init(model, 0, tree_end, this->generic_param_->gpu_id);
d_model.Init(model, 0, tree_end, this->ctx_->gpu_id);
if (p_fmat->PageExists<SparsePage>()) {
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
batch.data.SetDevice(generic_param_->gpu_id);
batch.offset.SetDevice(generic_param_->gpu_id);
batch.data.SetDevice(ctx_->gpu_id);
batch.offset.SetDevice(ctx_->gpu_id);
bst_row_t batch_offset = 0;
SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
model.learner_model_param->num_feature};
@@ -986,7 +986,7 @@ class GPUPredictor : public xgboost::Predictor {
} else {
for (auto const& batch : p_fmat->GetBatches<EllpackPage>()) {
bst_row_t batch_offset = 0;
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(generic_param_->gpu_id)};
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)};
size_t num_rows = batch.Size();
auto grid =
static_cast<uint32_t>(common::DivRoundUp(num_rows, kBlockThreads));