Use the new DeviceOrd in the linalg module. (#9527)
This commit is contained in:
@@ -183,7 +183,7 @@ class GBLinear : public GradientBooster {
|
||||
bst_layer_t layer_begin, bst_layer_t /*layer_end*/, bool) override {
|
||||
model_.LazyInitModel();
|
||||
LinearCheckLayer(layer_begin);
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());
|
||||
const int ngroup = model_.learner_model_param->num_output_group;
|
||||
const size_t ncolumns = model_.learner_model_param->num_feature + 1;
|
||||
// allocate space for (#features + bias) times #groups times #rows
|
||||
@@ -250,10 +250,9 @@ class GBLinear : public GradientBooster {
|
||||
// The bias is the last weight
|
||||
out_scores->resize(model_.weight.size() - learner_model_param_->num_output_group, 0);
|
||||
auto n_groups = learner_model_param_->num_output_group;
|
||||
linalg::TensorView<float, 2> scores{
|
||||
*out_scores,
|
||||
{learner_model_param_->num_feature, n_groups},
|
||||
Context::kCpuId};
|
||||
auto scores = linalg::MakeTensorView(DeviceOrd::CPU(),
|
||||
common::Span{out_scores->data(), out_scores->size()},
|
||||
learner_model_param_->num_feature, n_groups);
|
||||
for (size_t i = 0; i < learner_model_param_->num_feature; ++i) {
|
||||
for (bst_group_t g = 0; g < n_groups; ++g) {
|
||||
scores(i, g) = model_[i][g];
|
||||
@@ -275,12 +274,12 @@ class GBLinear : public GradientBooster {
|
||||
monitor_.Start("PredictBatchInternal");
|
||||
model_.LazyInitModel();
|
||||
std::vector<bst_float> &preds = *out_preds;
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(DeviceOrd::CPU());
|
||||
// start collecting the prediction
|
||||
const int ngroup = model_.learner_model_param->num_output_group;
|
||||
preds.resize(p_fmat->Info().num_row_ * ngroup);
|
||||
|
||||
auto base_score = learner_model_param_->BaseScore(Context::kCpuId);
|
||||
auto base_score = learner_model_param_->BaseScore(DeviceOrd::CPU());
|
||||
for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
|
||||
auto const& batch = page.GetView();
|
||||
// output convention: nrow * k, where nrow is number of rows
|
||||
|
||||
@@ -754,7 +754,7 @@ class Dart : public GBTree {
|
||||
auto n_groups = model_.learner_model_param->num_output_group;
|
||||
|
||||
PredictionCacheEntry predts; // temporary storage for prediction
|
||||
if (ctx_->gpu_id != Context::kCpuId) {
|
||||
if (ctx_->IsCUDA()) {
|
||||
predts.predictions.SetDevice(ctx_->gpu_id);
|
||||
}
|
||||
predts.predictions.Resize(p_fmat->Info().num_row_ * n_groups, 0);
|
||||
@@ -859,12 +859,12 @@ class Dart : public GBTree {
|
||||
size_t n_rows = p_fmat->Info().num_row_;
|
||||
if (predts.predictions.DeviceIdx() != Context::kCpuId) {
|
||||
p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
|
||||
auto base_score = model_.learner_model_param->BaseScore(predts.predictions.DeviceIdx());
|
||||
auto base_score = model_.learner_model_param->BaseScore(predts.predictions.Device());
|
||||
GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
|
||||
predts.predictions.DeviceSpan(), w, n_rows, base_score, n_groups,
|
||||
group);
|
||||
} else {
|
||||
auto base_score = model_.learner_model_param->BaseScore(Context::kCpuId);
|
||||
auto base_score = model_.learner_model_param->BaseScore(DeviceOrd::CPU());
|
||||
auto& h_predts = predts.predictions.HostVector();
|
||||
auto& h_out_predts = p_out_preds->predictions.HostVector();
|
||||
common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
|
||||
|
||||
Reference in New Issue
Block a user