parent
52d44e07fe
commit
a5f232feb8
@ -49,6 +49,7 @@ class SimpleBatchIteratorImpl : public BatchIteratorImpl<T> {
|
||||
};
|
||||
|
||||
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
|
||||
// since csr is the default data structure so `source_` is always available.
|
||||
auto cast = dynamic_cast<SimpleCSRSource*>(source_.get());
|
||||
auto begin_iter = BatchIterator<SparsePage>(
|
||||
new SimpleBatchIteratorImpl<SparsePage>(&(cast->page_)));
|
||||
|
||||
@ -191,7 +191,7 @@ class GBTree : public GradientBooster {
|
||||
HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
CHECK(configured_);
|
||||
GetPredictor()->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
GetPredictor(out_preds, p_fmat)->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||
}
|
||||
|
||||
void PredictInstance(const SparsePage::Inst& inst,
|
||||
@ -242,8 +242,22 @@ class GBTree : public GradientBooster {
|
||||
int bst_group,
|
||||
std::vector<std::unique_ptr<RegTree> >* ret);
|
||||
|
||||
std::unique_ptr<Predictor> const& GetPredictor() const {
|
||||
std::unique_ptr<Predictor> const& GetPredictor(HostDeviceVector<float> const* out_pred = nullptr,
|
||||
DMatrix* f_dmat = nullptr) const {
|
||||
CHECK(configured_);
|
||||
// GPU_Hist by default has prediction cache calculated from quantile values, so GPU
|
||||
// Predictor is not used for training dataset. But when XGBoost performs continue
|
||||
// training with an existing model, the prediction cache is not availbale and number
|
||||
// of tree doesn't equal zero, the whole training dataset got copied into GPU for
|
||||
// precise prediction. This condition tries to avoid such copy by calling CPU
|
||||
// Predictor.
|
||||
if ((out_pred && out_pred->Size() == 0) &&
|
||||
(model_.param.num_trees != 0) &&
|
||||
// FIXME(trivialfis): Implement a better method for testing whether data is on
|
||||
// device after DMatrix refactoring is done.
|
||||
(f_dmat && !((*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead()))) {
|
||||
return cpu_predictor_;
|
||||
}
|
||||
if (tparam_.predictor == "cpu_predictor") {
|
||||
CHECK(cpu_predictor_);
|
||||
return cpu_predictor_;
|
||||
|
||||
@ -134,7 +134,7 @@ class CPUPredictor : public Predictor {
|
||||
} else {
|
||||
if (!base_margin.empty()) {
|
||||
std::ostringstream oss;
|
||||
oss << "Warning: Ignoring the base margin, since it has incorrect length. "
|
||||
oss << "Ignoring the base margin, since it has incorrect length. "
|
||||
<< "The base margin must be an array of length ";
|
||||
if (model.param.num_output_group > 1) {
|
||||
oss << "[num_class] * [number of data points], i.e. "
|
||||
@ -145,7 +145,7 @@ class CPUPredictor : public Predictor {
|
||||
}
|
||||
oss << "Instead, all data points will use "
|
||||
<< "base_score = " << model.base_margin;
|
||||
LOG(INFO) << oss.str();
|
||||
LOG(WARNING) << oss.str();
|
||||
}
|
||||
std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
|
||||
}
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
|
||||
#include "xgboost/learner.h"
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/gbm/gbtree.h"
|
||||
|
||||
@ -43,4 +46,67 @@ TEST(GBTree, SelectTreeMethod) {
|
||||
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef XGBOOST_USE_CUDA
|
||||
TEST(GBTree, ChoosePredictor) {
|
||||
size_t constexpr kNumRows = 17;
|
||||
size_t constexpr kCols = 15;
|
||||
auto pp_mat = CreateDMatrix(kNumRows, kCols, 0);
|
||||
auto& p_mat = *pp_mat;
|
||||
|
||||
std::vector<bst_float> labels (kNumRows);
|
||||
for (size_t i = 0; i < kNumRows; ++i) {
|
||||
labels[i] = i % 2;
|
||||
}
|
||||
p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
|
||||
|
||||
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
|
||||
std::string n_feat = std::to_string(kCols);
|
||||
Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
|
||||
GenericParameter generic_param;
|
||||
generic_param.InitAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
|
||||
auto& data = (*(p_mat->GetBatches<SparsePage>().begin())).data;
|
||||
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->SetParams(Args{{"tree_method", "gpu_hist"}});
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
learner->UpdateOneIter(i, p_mat.get());
|
||||
}
|
||||
ASSERT_TRUE(data.HostCanWrite());
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string fname = tempdir.path + "/model_para.bst";
|
||||
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||
learner->Save(fo.get());
|
||||
}
|
||||
|
||||
// a new learner
|
||||
learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
|
||||
learner->Load(fi.get());
|
||||
}
|
||||
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
learner->UpdateOneIter(i, p_mat.get());
|
||||
}
|
||||
ASSERT_TRUE(data.HostCanWrite());
|
||||
|
||||
// pull data into device.
|
||||
data = HostDeviceVector<Entry>(data.HostVector(), 0);
|
||||
data.DeviceSpan();
|
||||
ASSERT_FALSE(data.HostCanWrite());
|
||||
|
||||
// another new learner
|
||||
learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
learner->UpdateOneIter(i, p_mat.get());
|
||||
}
|
||||
// data is not pulled back into host
|
||||
ASSERT_FALSE(data.HostCanWrite());
|
||||
}
|
||||
#endif
|
||||
} // namespace xgboost
|
||||
|
||||
@ -241,7 +241,6 @@ TEST(Learner, GPUConfiguration) {
|
||||
|
||||
delete pp_dmat;
|
||||
}
|
||||
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user