Use the new DeviceOrd in the linalg module. (#9527)

This commit is contained in:
Jiaming Yuan
2023-08-29 13:37:29 +08:00
committed by GitHub
parent 942b957eef
commit ddf2e68821
43 changed files with 252 additions and 273 deletions

View File

@@ -183,7 +183,7 @@ class GBLinear : public GradientBooster {
bst_layer_t layer_begin, bst_layer_t /*layer_end*/, bool) override {
model_.LazyInitModel();
LinearCheckLayer(layer_begin);
auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());
const int ngroup = model_.learner_model_param->num_output_group;
const size_t ncolumns = model_.learner_model_param->num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows
@@ -250,10 +250,9 @@ class GBLinear : public GradientBooster {
// The bias is the last weight
out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);
auto n_groups = learner_model_param_->num_output_group;
linalg::TensorView<float, 2> scores{
*out_scores,
{learner_model_param_->num_feature, n_groups},
Context::kCpuId};
auto scores = linalg::MakeTensorView(DeviceOrd::CPU(),
common::Span{out_scores->data(), out_scores->size()},
learner_model_param_->num_feature, n_groups);
for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {
for (bst_group_t g = 0; g < n_groups; ++g) {
scores(i, g) = model_[i][g];
@@ -275,12 +274,12 @@ class GBLinear : public GradientBooster {
monitor_.Start("PredictBatchInternal");
model_.LazyInitModel();
std::vector<bst_float> &preds = *out_preds;
auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());
// start collecting the prediction
const int ngroup = model_.learner_model_param->num_output_group;
preds.resize(p_fmat->Info().num_row_ * ngroup);
auto base_score = learner_model_param_->BaseScore(Context::kCpuId);
auto base_score = learner_model_param_->BaseScore(DeviceOrd::CPU());
for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
auto const& batch = page.GetView();
// output convention: nrow * k, where nrow is number of rows

View File

@@ -754,7 +754,7 @@ class Dart : public GBTree {
auto n_groups = model_.learner_model_param->num_output_group;
PredictionCacheEntry predts; // temporary storage for prediction
if (ctx_->gpu_id != Context::kCpuId) {
if (ctx_->IsCUDA()) {
predts.predictions.SetDevice(ctx_->gpu_id);
}
predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);
@@ -859,12 +859,12 @@ class Dart : public GBTree {
size_t n_rows = p_fmat->Info().num_row_;
if (predts.predictions.DeviceIdx() != Context::kCpuId) {
p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
auto base_score = model_.learner_model_param->BaseScore(predts.predictions.DeviceIdx());
auto base_score = model_.learner_model_param->BaseScore(predts.predictions.Device());
GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
predts.predictions.DeviceSpan(), w, n_rows, base_score, n_groups,
group);
} else {
auto base_score = model_.learner_model_param->BaseScore(Context::kCpuId);
auto base_score = model_.learner_model_param->BaseScore(DeviceOrd::CPU());
auto& h_predts = predts.predictions.HostVector();
auto& h_out_predts = p_out_preds->predictions.HostVector();
common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {