Remove omp_get_max_threads in CPU predictor. (#7519)
This is part of the on going effort to remove the dependency on global omp variables.
This commit is contained in:
@@ -633,12 +633,12 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
size_t num_features,
|
||||
HostDeviceVector<bst_float>* predictions,
|
||||
size_t batch_offset, bool is_dense) const {
|
||||
batch.offset.SetDevice(generic_param_->gpu_id);
|
||||
batch.data.SetDevice(generic_param_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
const uint32_t BLOCK_THREADS = 128;
|
||||
size_t num_rows = batch.Size();
|
||||
auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(num_rows, BLOCK_THREADS));
|
||||
auto max_shared_memory_bytes = ConfigureDevice(generic_param_->gpu_id);
|
||||
auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);
|
||||
size_t shared_memory_bytes =
|
||||
SharedMemoryBytes<BLOCK_THREADS>(num_features, max_shared_memory_bytes);
|
||||
bool use_shared = shared_memory_bytes != 0;
|
||||
@@ -694,10 +694,10 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_end - tree_begin == 0) {
|
||||
return;
|
||||
}
|
||||
out_preds->SetDevice(generic_param_->gpu_id);
|
||||
out_preds->SetDevice(ctx_->gpu_id);
|
||||
auto const& info = dmat->Info();
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, tree_begin, tree_end, generic_param_->gpu_id);
|
||||
d_model.Init(model, tree_begin, tree_end, ctx_->gpu_id);
|
||||
|
||||
if (dmat->PageExists<SparsePage>()) {
|
||||
size_t batch_offset = 0;
|
||||
@@ -709,10 +709,10 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
} else {
|
||||
size_t batch_offset = 0;
|
||||
for (auto const& page : dmat->GetBatches<EllpackPage>()) {
|
||||
dmat->Info().feature_types.SetDevice(generic_param_->gpu_id);
|
||||
dmat->Info().feature_types.SetDevice(ctx_->gpu_id);
|
||||
auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
|
||||
this->PredictInternal(
|
||||
page.Impl()->GetDeviceAccessor(generic_param_->gpu_id, feature_types),
|
||||
page.Impl()->GetDeviceAccessor(ctx_->gpu_id, feature_types),
|
||||
d_model,
|
||||
out_preds,
|
||||
batch_offset);
|
||||
@@ -726,15 +726,15 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
Predictor::Predictor{generic_param} {}
|
||||
|
||||
~GPUPredictor() override {
|
||||
if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
if (ctx_->gpu_id >= 0 && ctx_->gpu_id < common::AllVisibleGPUs()) {
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
}
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
|
||||
const gbm::GBTreeModel& model, uint32_t tree_begin,
|
||||
uint32_t tree_end = 0) const override {
|
||||
int device = generic_param_->gpu_id;
|
||||
int device = ctx_->gpu_id;
|
||||
CHECK_GE(device, 0) << "Set `gpu_id' to positive value for processing GPU data.";
|
||||
auto* out_preds = &predts->predictions;
|
||||
if (tree_end == 0) {
|
||||
@@ -754,7 +754,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
|
||||
<< "Number of columns in data must equal to trained model.";
|
||||
CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx())
|
||||
<< "XGBoost is running on device: " << this->generic_param_->gpu_id << ", "
|
||||
<< "XGBoost is running on device: " << this->ctx_->gpu_id << ", "
|
||||
<< "but data is on: " << m->DeviceIdx();
|
||||
if (p_m) {
|
||||
p_m->Info().num_row_ = m->NumRows();
|
||||
@@ -821,8 +821,8 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_weights != nullptr) {
|
||||
LOG(FATAL) << "Dart booster feature " << not_implemented;
|
||||
}
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
out_contribs->SetDevice(generic_param_->gpu_id);
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
out_contribs->SetDevice(ctx_->gpu_id);
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
@@ -840,12 +840,12 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
|
||||
device_paths;
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, 0, tree_end, generic_param_->gpu_id);
|
||||
d_model.Init(model, 0, tree_end, ctx_->gpu_id);
|
||||
dh::device_vector<uint32_t> categories;
|
||||
ExtractPaths(&device_paths, &d_model, &categories, generic_param_->gpu_id);
|
||||
ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
|
||||
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(generic_param_->gpu_id);
|
||||
batch.offset.SetDevice(generic_param_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
||||
model.learner_model_param->num_feature);
|
||||
auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
|
||||
@@ -854,7 +854,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::tend(phis));
|
||||
}
|
||||
// Add the base margin term to last column
|
||||
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
|
||||
p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
|
||||
const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
|
||||
float base_score = model.learner_model_param->base_score;
|
||||
dh::LaunchN(
|
||||
@@ -879,8 +879,8 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_weights != nullptr) {
|
||||
LOG(FATAL) << "Dart booster feature " << not_implemented;
|
||||
}
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
out_contribs->SetDevice(generic_param_->gpu_id);
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
out_contribs->SetDevice(ctx_->gpu_id);
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
@@ -899,12 +899,12 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
|
||||
device_paths;
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, 0, tree_end, generic_param_->gpu_id);
|
||||
d_model.Init(model, 0, tree_end, ctx_->gpu_id);
|
||||
dh::device_vector<uint32_t> categories;
|
||||
ExtractPaths(&device_paths, &d_model, &categories, generic_param_->gpu_id);
|
||||
ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
|
||||
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(generic_param_->gpu_id);
|
||||
batch.offset.SetDevice(generic_param_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
||||
model.learner_model_param->num_feature);
|
||||
auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
|
||||
@@ -913,7 +913,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::tend(phis));
|
||||
}
|
||||
// Add the base margin term to last column
|
||||
p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
|
||||
p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
|
||||
const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
|
||||
float base_score = model.learner_model_param->base_score;
|
||||
size_t n_features = model.learner_model_param->num_feature;
|
||||
@@ -938,8 +938,8 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *predictions,
|
||||
const gbm::GBTreeModel &model,
|
||||
unsigned tree_end) const override {
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
auto max_shared_memory_bytes = ConfigureDevice(generic_param_->gpu_id);
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);
|
||||
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
constexpr uint32_t kBlockThreads = 128;
|
||||
@@ -953,15 +953,15 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
predictions->SetDevice(generic_param_->gpu_id);
|
||||
predictions->SetDevice(ctx_->gpu_id);
|
||||
predictions->Resize(num_rows * tree_end);
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, 0, tree_end, this->generic_param_->gpu_id);
|
||||
d_model.Init(model, 0, tree_end, this->ctx_->gpu_id);
|
||||
|
||||
if (p_fmat->PageExists<SparsePage>()) {
|
||||
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(generic_param_->gpu_id);
|
||||
batch.offset.SetDevice(generic_param_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
bst_row_t batch_offset = 0;
|
||||
SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
||||
model.learner_model_param->num_feature};
|
||||
@@ -986,7 +986,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
} else {
|
||||
for (auto const& batch : p_fmat->GetBatches<EllpackPage>()) {
|
||||
bst_row_t batch_offset = 0;
|
||||
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(generic_param_->gpu_id)};
|
||||
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)};
|
||||
size_t num_rows = batch.Size();
|
||||
auto grid =
|
||||
static_cast<uint32_t>(common::DivRoundUp(num_rows, kBlockThreads));
|
||||
|
||||
Reference in New Issue
Block a user