Remove internal use of gpu_id. (#9568)
This commit is contained in:
@@ -413,7 +413,7 @@ void GPUHistEvaluator::EvaluateSplits(
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
dh::TemporaryArray<DeviceSplitCandidate> all_candidate_storage(out_splits.size() * world_size);
|
||||
auto all_candidates = dh::ToSpan(all_candidate_storage);
|
||||
collective::AllGather(device_, out_splits.data(), all_candidates.data(),
|
||||
collective::AllGather(device_.ordinal, out_splits.data(), all_candidates.data(),
|
||||
out_splits.size() * sizeof(DeviceSplitCandidate));
|
||||
|
||||
// Reduce to get the best candidate from all workers.
|
||||
|
||||
@@ -85,7 +85,7 @@ class GPUHistEvaluator {
|
||||
std::size_t node_categorical_storage_size_ = 0;
|
||||
// Is the data split column-wise?
|
||||
bool is_column_split_ = false;
|
||||
int32_t device_;
|
||||
DeviceOrd device_;
|
||||
|
||||
// Copy the categories from device to host asynchronously.
|
||||
void CopyToHost( const std::vector<bst_node_t>& nidx);
|
||||
@@ -133,14 +133,14 @@ class GPUHistEvaluator {
|
||||
}
|
||||
|
||||
public:
|
||||
GPUHistEvaluator(TrainParam const ¶m, bst_feature_t n_features, int32_t device)
|
||||
GPUHistEvaluator(TrainParam const ¶m, bst_feature_t n_features, DeviceOrd device)
|
||||
: tree_evaluator_{param, n_features, device}, param_{param} {}
|
||||
/**
|
||||
* \brief Reset the evaluator, should be called before any use.
|
||||
*/
|
||||
void Reset(common::HistogramCuts const &cuts, common::Span<FeatureType const> ft,
|
||||
bst_feature_t n_features, TrainParam const ¶m, bool is_column_split,
|
||||
int32_t device);
|
||||
DeviceOrd device);
|
||||
|
||||
/**
|
||||
* \brief Get host category storage for nidx. Different from the internal version, this
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
* Copyright 2022-2023 by XGBoost Contributors
|
||||
*
|
||||
* \brief Some components of GPU Hist evaluator, this file only exist to reduce nvcc
|
||||
* compilation time.
|
||||
@@ -12,11 +12,10 @@
|
||||
#include "evaluate_splits.cuh"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<FeatureType const> ft,
|
||||
bst_feature_t n_features, TrainParam const ¶m,
|
||||
bool is_column_split, int32_t device) {
|
||||
bool is_column_split, DeviceOrd device) {
|
||||
param_ = param;
|
||||
tree_evaluator_ = TreeEvaluator{param, n_features, device};
|
||||
has_categoricals_ = cuts.HasCategorical();
|
||||
@@ -127,6 +126,4 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
|
||||
});
|
||||
return dh::ToSpan(cat_sorted_idx_);
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2020 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef FEATURE_GROUPS_CUH_
|
||||
#define FEATURE_GROUPS_CUH_
|
||||
@@ -102,11 +102,10 @@ struct FeatureGroups {
|
||||
InitSingle(cuts);
|
||||
}
|
||||
|
||||
FeatureGroupsAccessor DeviceAccessor(int device) const {
|
||||
[[nodiscard]] FeatureGroupsAccessor DeviceAccessor(DeviceOrd device) const {
|
||||
feature_segments.SetDevice(device);
|
||||
bin_segments.SetDevice(device);
|
||||
return {feature_segments.ConstDeviceSpan(), bin_segments.ConstDeviceSpan(),
|
||||
max_group_bins};
|
||||
return {feature_segments.ConstDeviceSpan(), bin_segments.ConstDeviceSpan(), max_group_bins};
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@@ -167,10 +167,10 @@ GradientBasedSample ExternalMemoryNoSampling::Sample(Context const* ctx,
|
||||
for (auto& batch : dmat->GetBatches<EllpackPage>(ctx, batch_param_)) {
|
||||
auto page = batch.Impl();
|
||||
if (!page_) {
|
||||
page_ = std::make_unique<EllpackPageImpl>(ctx->gpu_id, page->Cuts(), page->is_dense,
|
||||
page_ = std::make_unique<EllpackPageImpl>(ctx->Device(), page->Cuts(), page->is_dense,
|
||||
page->row_stride, dmat->Info().num_row_);
|
||||
}
|
||||
size_t num_elements = page_->Copy(ctx->gpu_id, page, offset);
|
||||
size_t num_elements = page_->Copy(ctx->Device(), page, offset);
|
||||
offset += num_elements;
|
||||
}
|
||||
page_concatenated_ = true;
|
||||
@@ -228,13 +228,13 @@ GradientBasedSample ExternalMemoryUniformSampling::Sample(Context const* ctx,
|
||||
auto first_page = (*batch_iterator.begin()).Impl();
|
||||
// Create a new ELLPACK page with empty rows.
|
||||
page_.reset(); // Release the device memory first before reallocating
|
||||
page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense,
|
||||
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->Cuts(), first_page->is_dense,
|
||||
first_page->row_stride, sample_rows));
|
||||
|
||||
// Compact the ELLPACK pages into the single sample page.
|
||||
thrust::fill(cuctx->CTP(), dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
||||
for (auto& batch : batch_iterator) {
|
||||
page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
page_->Compact(ctx->Device(), batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
}
|
||||
|
||||
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
|
||||
@@ -306,13 +306,13 @@ GradientBasedSample ExternalMemoryGradientBasedSampling::Sample(Context const* c
|
||||
auto first_page = (*batch_iterator.begin()).Impl();
|
||||
// Create a new ELLPACK page with empty rows.
|
||||
page_.reset(); // Release the device memory first before reallocating
|
||||
page_.reset(new EllpackPageImpl(ctx->gpu_id, first_page->Cuts(), first_page->is_dense,
|
||||
page_.reset(new EllpackPageImpl(ctx->Device(), first_page->Cuts(), first_page->is_dense,
|
||||
first_page->row_stride, sample_rows));
|
||||
|
||||
// Compact the ELLPACK pages into the single sample page.
|
||||
thrust::fill(dh::tbegin(page_->gidx_buffer), dh::tend(page_->gidx_buffer), 0);
|
||||
for (auto& batch : batch_iterator) {
|
||||
page_->Compact(ctx->gpu_id, batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
page_->Compact(ctx->Device(), batch.Impl(), dh::ToSpan(sample_row_index_));
|
||||
}
|
||||
|
||||
return {sample_rows, page_.get(), dh::ToSpan(gpair_)};
|
||||
|
||||
@@ -13,15 +13,15 @@
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
RowPartitioner::RowPartitioner(int device_idx, size_t num_rows)
|
||||
RowPartitioner::RowPartitioner(DeviceOrd device_idx, size_t num_rows)
|
||||
: device_idx_(device_idx), ridx_(num_rows), ridx_tmp_(num_rows) {
|
||||
dh::safe_cuda(cudaSetDevice(device_idx_));
|
||||
dh::safe_cuda(cudaSetDevice(device_idx_.ordinal));
|
||||
ridx_segments_.emplace_back(NodePositionInfo{Segment(0, num_rows)});
|
||||
thrust::sequence(thrust::device, ridx_.data(), ridx_.data() + ridx_.size());
|
||||
}
|
||||
|
||||
RowPartitioner::~RowPartitioner() {
|
||||
dh::safe_cuda(cudaSetDevice(device_idx_));
|
||||
dh::safe_cuda(cudaSetDevice(device_idx_.ordinal));
|
||||
}
|
||||
|
||||
common::Span<const RowPartitioner::RowIndexT> RowPartitioner::GetRows(bst_node_t nidx) {
|
||||
|
||||
@@ -199,7 +199,7 @@ class RowPartitioner {
|
||||
static constexpr bst_node_t kIgnoredTreePosition = -1;
|
||||
|
||||
private:
|
||||
int device_idx_;
|
||||
DeviceOrd device_idx_;
|
||||
/*! \brief In here if you want to find the rows belong to a node nid, first you need to
|
||||
* get the indices segment from ridx_segments[nid], then get the row index that
|
||||
* represents position of row in input data X. `RowPartitioner::GetRows` would be a
|
||||
@@ -223,7 +223,7 @@ class RowPartitioner {
|
||||
dh::PinnedMemory pinned2_;
|
||||
|
||||
public:
|
||||
RowPartitioner(int device_idx, size_t num_rows);
|
||||
RowPartitioner(DeviceOrd device_idx, size_t num_rows);
|
||||
~RowPartitioner();
|
||||
RowPartitioner(const RowPartitioner&) = delete;
|
||||
RowPartitioner& operator=(const RowPartitioner&) = delete;
|
||||
|
||||
Reference in New Issue
Block a user