Remove internal use of gpu_id. (#9568)
This commit is contained in:
@@ -180,33 +180,30 @@ struct DeviceAdapterLoader {
|
||||
|
||||
XGBOOST_DEV_INLINE DeviceAdapterLoader(Batch const batch, bool use_shared,
|
||||
bst_feature_t num_features, bst_row_t num_rows,
|
||||
size_t entry_start, float missing) :
|
||||
batch{batch},
|
||||
columns{num_features},
|
||||
use_shared{use_shared},
|
||||
is_valid{missing} {
|
||||
extern __shared__ float _smem[];
|
||||
smem = _smem;
|
||||
if (use_shared) {
|
||||
uint32_t global_idx = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
size_t shared_elements = blockDim.x * num_features;
|
||||
dh::BlockFill(smem, shared_elements, nanf(""));
|
||||
__syncthreads();
|
||||
if (global_idx < num_rows) {
|
||||
auto beg = global_idx * columns;
|
||||
auto end = (global_idx + 1) * columns;
|
||||
for (size_t i = beg; i < end; ++i) {
|
||||
auto value = batch.GetElement(i).value;
|
||||
if (is_valid(value)) {
|
||||
smem[threadIdx.x * num_features + (i - beg)] = value;
|
||||
}
|
||||
size_t entry_start, float missing)
|
||||
: batch{batch}, columns{num_features}, use_shared{use_shared}, is_valid{missing} {
|
||||
extern __shared__ float _smem[];
|
||||
smem = _smem;
|
||||
if (use_shared) {
|
||||
uint32_t global_idx = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
size_t shared_elements = blockDim.x * num_features;
|
||||
dh::BlockFill(smem, shared_elements, nanf(""));
|
||||
__syncthreads();
|
||||
if (global_idx < num_rows) {
|
||||
auto beg = global_idx * columns;
|
||||
auto end = (global_idx + 1) * columns;
|
||||
for (size_t i = beg; i < end; ++i) {
|
||||
auto value = batch.GetElement(i).value;
|
||||
if (is_valid(value)) {
|
||||
smem[threadIdx.x * num_features + (i - beg)] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
XGBOOST_DEV_INLINE float GetElement(size_t ridx, size_t fidx) const {
|
||||
[[nodiscard]] XGBOOST_DEV_INLINE float GetElement(size_t ridx, size_t fidx) const {
|
||||
if (use_shared) {
|
||||
return smem[threadIdx.x * columns + fidx];
|
||||
}
|
||||
@@ -340,11 +337,11 @@ class DeviceModel {
|
||||
size_t tree_end_; // NOLINT
|
||||
int num_group;
|
||||
|
||||
void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end, int32_t gpu_id) {
|
||||
dh::safe_cuda(cudaSetDevice(gpu_id));
|
||||
void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end, DeviceOrd device) {
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
|
||||
// Copy decision trees to device
|
||||
tree_segments = HostDeviceVector<size_t>({}, gpu_id);
|
||||
tree_segments = HostDeviceVector<size_t>({}, device);
|
||||
auto& h_tree_segments = tree_segments.HostVector();
|
||||
h_tree_segments.reserve((tree_end - tree_begin) + 1);
|
||||
size_t sum = 0;
|
||||
@@ -354,8 +351,8 @@ class DeviceModel {
|
||||
h_tree_segments.push_back(sum);
|
||||
}
|
||||
|
||||
nodes = HostDeviceVector<RegTree::Node>(h_tree_segments.back(), RegTree::Node(), gpu_id);
|
||||
stats = HostDeviceVector<RTreeNodeStat>(h_tree_segments.back(), RTreeNodeStat(), gpu_id);
|
||||
nodes = HostDeviceVector<RegTree::Node>(h_tree_segments.back(), RegTree::Node(), device);
|
||||
stats = HostDeviceVector<RTreeNodeStat>(h_tree_segments.back(), RTreeNodeStat(), device);
|
||||
auto d_nodes = nodes.DevicePointer();
|
||||
auto d_stats = stats.DevicePointer();
|
||||
for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
|
||||
@@ -369,12 +366,12 @@ class DeviceModel {
|
||||
sizeof(RTreeNodeStat) * src_stats.size(), cudaMemcpyDefault));
|
||||
}
|
||||
|
||||
tree_group = HostDeviceVector<int>(model.tree_info.size(), 0, gpu_id);
|
||||
tree_group = HostDeviceVector<int>(model.tree_info.size(), 0, device);
|
||||
auto& h_tree_group = tree_group.HostVector();
|
||||
std::memcpy(h_tree_group.data(), model.tree_info.data(), sizeof(int) * model.tree_info.size());
|
||||
|
||||
// Initialize categorical splits.
|
||||
split_types.SetDevice(gpu_id);
|
||||
split_types.SetDevice(device);
|
||||
std::vector<FeatureType>& h_split_types = split_types.HostVector();
|
||||
h_split_types.resize(h_tree_segments.back());
|
||||
for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
|
||||
@@ -383,8 +380,8 @@ class DeviceModel {
|
||||
h_split_types.begin() + h_tree_segments[tree_idx - tree_begin]);
|
||||
}
|
||||
|
||||
categories = HostDeviceVector<uint32_t>({}, gpu_id);
|
||||
categories_tree_segments = HostDeviceVector<uint32_t>(1, 0, gpu_id);
|
||||
categories = HostDeviceVector<uint32_t>({}, device);
|
||||
categories_tree_segments = HostDeviceVector<uint32_t>(1, 0, device);
|
||||
std::vector<uint32_t> &h_categories = categories.HostVector();
|
||||
std::vector<uint32_t> &h_split_cat_segments = categories_tree_segments.HostVector();
|
||||
for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
|
||||
@@ -397,7 +394,7 @@ class DeviceModel {
|
||||
}
|
||||
|
||||
categories_node_segments = HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>(
|
||||
h_tree_segments.back(), {}, gpu_id);
|
||||
h_tree_segments.back(), {}, device);
|
||||
std::vector<RegTree::CategoricalSplitMatrix::Segment>& h_categories_node_segments =
|
||||
categories_node_segments.HostVector();
|
||||
for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
|
||||
@@ -485,8 +482,8 @@ struct PathInfo {
|
||||
void ExtractPaths(
|
||||
dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>> *paths,
|
||||
DeviceModel *model, dh::device_vector<uint32_t> *path_categories,
|
||||
int gpu_id) {
|
||||
dh::safe_cuda(cudaSetDevice(gpu_id));
|
||||
DeviceOrd device) {
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
auto& device_model = *model;
|
||||
|
||||
dh::caching_device_vector<PathInfo> info(device_model.nodes.Size());
|
||||
@@ -773,12 +770,12 @@ class ColumnSplitHelper {
|
||||
template <bool predict_leaf>
|
||||
void PredictDMatrix(DMatrix* dmat, HostDeviceVector<float>* out_preds, DeviceModel const& model,
|
||||
bst_feature_t num_features, std::uint32_t num_group) const {
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
|
||||
dh::caching_device_vector<BitType> decision_storage{};
|
||||
dh::caching_device_vector<BitType> missing_storage{};
|
||||
|
||||
auto constexpr kBlockThreads = 128;
|
||||
auto const max_shared_memory_bytes = dh::MaxSharedMemory(ctx_->gpu_id);
|
||||
auto const max_shared_memory_bytes = dh::MaxSharedMemory(ctx_->Ordinal());
|
||||
auto const shared_memory_bytes =
|
||||
SharedMemoryBytes<kBlockThreads>(num_features, max_shared_memory_bytes);
|
||||
auto const use_shared = shared_memory_bytes != 0;
|
||||
@@ -791,8 +788,8 @@ class ColumnSplitHelper {
|
||||
BitVector decision_bits{dh::ToSpan(decision_storage)};
|
||||
BitVector missing_bits{dh::ToSpan(missing_storage)};
|
||||
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->Device());
|
||||
batch.data.SetDevice(ctx_->Device());
|
||||
std::size_t entry_start = 0;
|
||||
SparsePageView data(batch.data.DeviceSpan(), batch.offset.DeviceSpan(), num_features);
|
||||
|
||||
@@ -823,9 +820,9 @@ class ColumnSplitHelper {
|
||||
void AllReduceBitVectors(dh::caching_device_vector<BitType>* decision_storage,
|
||||
dh::caching_device_vector<BitType>* missing_storage) const {
|
||||
collective::AllReduce<collective::Operation::kBitwiseOR>(
|
||||
ctx_->gpu_id, decision_storage->data().get(), decision_storage->size());
|
||||
ctx_->Ordinal(), decision_storage->data().get(), decision_storage->size());
|
||||
collective::AllReduce<collective::Operation::kBitwiseAND>(
|
||||
ctx_->gpu_id, missing_storage->data().get(), missing_storage->size());
|
||||
ctx_->Ordinal(), missing_storage->data().get(), missing_storage->size());
|
||||
}
|
||||
|
||||
void ResizeBitVectors(dh::caching_device_vector<BitType>* decision_storage,
|
||||
@@ -853,12 +850,12 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
size_t num_features,
|
||||
HostDeviceVector<bst_float>* predictions,
|
||||
size_t batch_offset, bool is_dense) const {
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->Device());
|
||||
batch.data.SetDevice(ctx_->Device());
|
||||
const uint32_t BLOCK_THREADS = 128;
|
||||
size_t num_rows = batch.Size();
|
||||
auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(num_rows, BLOCK_THREADS));
|
||||
auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);
|
||||
auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());
|
||||
size_t shared_memory_bytes =
|
||||
SharedMemoryBytes<BLOCK_THREADS>(num_features, max_shared_memory_bytes);
|
||||
bool use_shared = shared_memory_bytes != 0;
|
||||
@@ -914,10 +911,10 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_end - tree_begin == 0) {
|
||||
return;
|
||||
}
|
||||
out_preds->SetDevice(ctx_->gpu_id);
|
||||
out_preds->SetDevice(ctx_->Device());
|
||||
auto const& info = dmat->Info();
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, tree_begin, tree_end, ctx_->gpu_id);
|
||||
d_model.Init(model, tree_begin, tree_end, ctx_->Device());
|
||||
|
||||
if (info.IsColumnSplit()) {
|
||||
column_split_helper_.PredictBatch(dmat, out_preds, model, d_model);
|
||||
@@ -934,10 +931,10 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
} else {
|
||||
size_t batch_offset = 0;
|
||||
for (auto const& page : dmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
|
||||
dmat->Info().feature_types.SetDevice(ctx_->gpu_id);
|
||||
dmat->Info().feature_types.SetDevice(ctx_->Device());
|
||||
auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
|
||||
this->PredictInternal(
|
||||
page.Impl()->GetDeviceAccessor(ctx_->gpu_id, feature_types),
|
||||
page.Impl()->GetDeviceAccessor(ctx_->Device(), feature_types),
|
||||
d_model,
|
||||
out_preds,
|
||||
batch_offset);
|
||||
@@ -951,16 +948,15 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
: Predictor::Predictor{ctx}, column_split_helper_{ctx} {}
|
||||
|
||||
~GPUPredictor() override {
|
||||
if (ctx_->gpu_id >= 0 && ctx_->gpu_id < common::AllVisibleGPUs()) {
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
if (ctx_->IsCUDA() && ctx_->Ordinal() < common::AllVisibleGPUs()) {
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
|
||||
}
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
|
||||
const gbm::GBTreeModel& model, uint32_t tree_begin,
|
||||
uint32_t tree_end = 0) const override {
|
||||
int device = ctx_->gpu_id;
|
||||
CHECK_GE(device, 0) << "Set `gpu_id' to positive value for processing GPU data.";
|
||||
CHECK(ctx_->Device().IsCUDA()) << "Set `device' to `cuda` for processing GPU data.";
|
||||
auto* out_preds = &predts->predictions;
|
||||
if (tree_end == 0) {
|
||||
tree_end = model.trees.size();
|
||||
@@ -978,9 +974,9 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
|
||||
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
|
||||
<< "Number of columns in data must equal to trained model.";
|
||||
CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx())
|
||||
<< "XGBoost is running on device: " << this->ctx_->gpu_id << ", "
|
||||
<< "but data is on: " << m->DeviceIdx();
|
||||
CHECK_EQ(dh::CurrentDevice(), m->Device().ordinal)
|
||||
<< "XGBoost is running on device: " << this->ctx_->Device().Name() << ", "
|
||||
<< "but data is on: " << m->Device().Name();
|
||||
if (p_m) {
|
||||
p_m->Info().num_row_ = m->NumRows();
|
||||
this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
|
||||
@@ -989,16 +985,16 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
info.num_row_ = m->NumRows();
|
||||
this->InitOutPredictions(info, &(out_preds->predictions), model);
|
||||
}
|
||||
out_preds->predictions.SetDevice(m->DeviceIdx());
|
||||
out_preds->predictions.SetDevice(m->Device());
|
||||
|
||||
const uint32_t BLOCK_THREADS = 128;
|
||||
auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(m->NumRows(), BLOCK_THREADS));
|
||||
|
||||
auto max_shared_memory_bytes = dh::MaxSharedMemory(m->DeviceIdx());
|
||||
auto max_shared_memory_bytes = dh::MaxSharedMemory(m->Device().ordinal);
|
||||
size_t shared_memory_bytes =
|
||||
SharedMemoryBytes<BLOCK_THREADS>(m->NumColumns(), max_shared_memory_bytes);
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, tree_begin, tree_end, m->DeviceIdx());
|
||||
d_model.Init(model, tree_begin, tree_end, m->Device());
|
||||
|
||||
bool use_shared = shared_memory_bytes != 0;
|
||||
size_t entry_start = 0;
|
||||
@@ -1050,9 +1046,8 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
}
|
||||
CHECK(!p_fmat->Info().IsColumnSplit())
|
||||
<< "Predict contribution support for column-wise data split is not yet implemented.";
|
||||
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
out_contribs->SetDevice(ctx_->gpu_id);
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
|
||||
out_contribs->SetDevice(ctx_->Device());
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
@@ -1070,12 +1065,12 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
|
||||
device_paths;
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, 0, tree_end, ctx_->gpu_id);
|
||||
d_model.Init(model, 0, tree_end, ctx_->Device());
|
||||
dh::device_vector<uint32_t> categories;
|
||||
ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
|
||||
ExtractPaths(&device_paths, &d_model, &categories, ctx_->Device());
|
||||
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->Device());
|
||||
batch.offset.SetDevice(ctx_->Device());
|
||||
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
||||
model.learner_model_param->num_feature);
|
||||
auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
|
||||
@@ -1084,7 +1079,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::tend(phis));
|
||||
}
|
||||
// Add the base margin term to last column
|
||||
p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
|
||||
p_fmat->Info().base_margin_.SetDevice(ctx_->Device());
|
||||
const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
|
||||
|
||||
auto base_score = model.learner_model_param->BaseScore(ctx_);
|
||||
@@ -1109,8 +1104,8 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
if (tree_weights != nullptr) {
|
||||
LOG(FATAL) << "Dart booster feature " << not_implemented;
|
||||
}
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
out_contribs->SetDevice(ctx_->gpu_id);
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
|
||||
out_contribs->SetDevice(ctx_->Device());
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
@@ -1129,12 +1124,12 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
|
||||
device_paths;
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, 0, tree_end, ctx_->gpu_id);
|
||||
d_model.Init(model, 0, tree_end, ctx_->Device());
|
||||
dh::device_vector<uint32_t> categories;
|
||||
ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
|
||||
ExtractPaths(&device_paths, &d_model, &categories, ctx_->Device());
|
||||
for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->Device());
|
||||
batch.offset.SetDevice(ctx_->Device());
|
||||
SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
||||
model.learner_model_param->num_feature);
|
||||
auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
|
||||
@@ -1143,7 +1138,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
dh::tend(phis));
|
||||
}
|
||||
// Add the base margin term to last column
|
||||
p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
|
||||
p_fmat->Info().base_margin_.SetDevice(ctx_->Device());
|
||||
const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
|
||||
|
||||
auto base_score = model.learner_model_param->BaseScore(ctx_);
|
||||
@@ -1168,24 +1163,24 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *predictions,
|
||||
const gbm::GBTreeModel &model,
|
||||
unsigned tree_end) const override {
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
|
||||
auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());
|
||||
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
bst_row_t num_rows = info.num_row_;
|
||||
if (tree_end == 0 || tree_end > model.trees.size()) {
|
||||
tree_end = static_cast<uint32_t>(model.trees.size());
|
||||
}
|
||||
predictions->SetDevice(ctx_->gpu_id);
|
||||
predictions->SetDevice(ctx_->Device());
|
||||
predictions->Resize(num_rows * tree_end);
|
||||
DeviceModel d_model;
|
||||
d_model.Init(model, 0, tree_end, this->ctx_->gpu_id);
|
||||
d_model.Init(model, 0, tree_end, this->ctx_->Device());
|
||||
|
||||
if (info.IsColumnSplit()) {
|
||||
column_split_helper_.PredictLeaf(p_fmat, predictions, model, d_model);
|
||||
return;
|
||||
}
|
||||
|
||||
auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);
|
||||
constexpr uint32_t kBlockThreads = 128;
|
||||
size_t shared_memory_bytes = SharedMemoryBytes<kBlockThreads>(
|
||||
info.num_col_, max_shared_memory_bytes);
|
||||
@@ -1195,8 +1190,8 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
|
||||
if (p_fmat->PageExists<SparsePage>()) {
|
||||
for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
batch.data.SetDevice(ctx_->gpu_id);
|
||||
batch.offset.SetDevice(ctx_->gpu_id);
|
||||
batch.data.SetDevice(ctx_->Device());
|
||||
batch.offset.SetDevice(ctx_->Device());
|
||||
bst_row_t batch_offset = 0;
|
||||
SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
|
||||
model.learner_model_param->num_feature};
|
||||
@@ -1221,7 +1216,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
} else {
|
||||
for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, BatchParam{})) {
|
||||
bst_row_t batch_offset = 0;
|
||||
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)};
|
||||
EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->Device())};
|
||||
size_t num_rows = batch.Size();
|
||||
auto grid =
|
||||
static_cast<uint32_t>(common::DivRoundUp(num_rows, kBlockThreads));
|
||||
@@ -1249,9 +1244,9 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
|
||||
private:
|
||||
/*! \brief Reconfigure the device when GPU is changed. */
|
||||
static size_t ConfigureDevice(int device) {
|
||||
if (device >= 0) {
|
||||
return dh::MaxSharedMemory(device);
|
||||
static size_t ConfigureDevice(DeviceOrd device) {
|
||||
if (device.IsCUDA()) {
|
||||
return dh::MaxSharedMemory(device.ordinal);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -49,8 +49,8 @@ void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_fl
|
||||
std::size_t n{model.learner_model_param->OutputLength() * info.num_row_};
|
||||
|
||||
const HostDeviceVector<bst_float>* base_margin = info.base_margin_.Data();
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
out_preds->SetDevice(ctx_->gpu_id);
|
||||
if (ctx_->Device().IsCUDA()) {
|
||||
out_preds->SetDevice(ctx_->Device());
|
||||
}
|
||||
if (!base_margin->Empty()) {
|
||||
out_preds->Resize(n);
|
||||
|
||||
Reference in New Issue
Block a user