parent
52d44e07fe
commit
a5f232feb8
@ -49,6 +49,7 @@ class SimpleBatchIteratorImpl : public BatchIteratorImpl<T> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
|
BatchSet<SparsePage> SimpleDMatrix::GetRowBatches() {
|
||||||
|
// since csr is the default data structure so `source_` is always available.
|
||||||
auto cast = dynamic_cast<SimpleCSRSource*>(source_.get());
|
auto cast = dynamic_cast<SimpleCSRSource*>(source_.get());
|
||||||
auto begin_iter = BatchIterator<SparsePage>(
|
auto begin_iter = BatchIterator<SparsePage>(
|
||||||
new SimpleBatchIteratorImpl<SparsePage>(&(cast->page_)));
|
new SimpleBatchIteratorImpl<SparsePage>(&(cast->page_)));
|
||||||
|
|||||||
@ -191,7 +191,7 @@ class GBTree : public GradientBooster {
|
|||||||
HostDeviceVector<bst_float>* out_preds,
|
HostDeviceVector<bst_float>* out_preds,
|
||||||
unsigned ntree_limit) override {
|
unsigned ntree_limit) override {
|
||||||
CHECK(configured_);
|
CHECK(configured_);
|
||||||
GetPredictor()->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
GetPredictor(out_preds, p_fmat)->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PredictInstance(const SparsePage::Inst& inst,
|
void PredictInstance(const SparsePage::Inst& inst,
|
||||||
@ -242,8 +242,22 @@ class GBTree : public GradientBooster {
|
|||||||
int bst_group,
|
int bst_group,
|
||||||
std::vector<std::unique_ptr<RegTree> >* ret);
|
std::vector<std::unique_ptr<RegTree> >* ret);
|
||||||
|
|
||||||
std::unique_ptr<Predictor> const& GetPredictor() const {
|
std::unique_ptr<Predictor> const& GetPredictor(HostDeviceVector<float> const* out_pred = nullptr,
|
||||||
|
DMatrix* f_dmat = nullptr) const {
|
||||||
CHECK(configured_);
|
CHECK(configured_);
|
||||||
|
// GPU_Hist by default has prediction cache calculated from quantile values, so GPU
|
||||||
|
// Predictor is not used for training dataset. But when XGBoost performs continue
|
||||||
|
// training with an existing model, the prediction cache is not availbale and number
|
||||||
|
// of tree doesn't equal zero, the whole training dataset got copied into GPU for
|
||||||
|
// precise prediction. This condition tries to avoid such copy by calling CPU
|
||||||
|
// Predictor.
|
||||||
|
if ((out_pred && out_pred->Size() == 0) &&
|
||||||
|
(model_.param.num_trees != 0) &&
|
||||||
|
// FIXME(trivialfis): Implement a better method for testing whether data is on
|
||||||
|
// device after DMatrix refactoring is done.
|
||||||
|
(f_dmat && !((*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead()))) {
|
||||||
|
return cpu_predictor_;
|
||||||
|
}
|
||||||
if (tparam_.predictor == "cpu_predictor") {
|
if (tparam_.predictor == "cpu_predictor") {
|
||||||
CHECK(cpu_predictor_);
|
CHECK(cpu_predictor_);
|
||||||
return cpu_predictor_;
|
return cpu_predictor_;
|
||||||
|
|||||||
@ -134,7 +134,7 @@ class CPUPredictor : public Predictor {
|
|||||||
} else {
|
} else {
|
||||||
if (!base_margin.empty()) {
|
if (!base_margin.empty()) {
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "Warning: Ignoring the base margin, since it has incorrect length. "
|
oss << "Ignoring the base margin, since it has incorrect length. "
|
||||||
<< "The base margin must be an array of length ";
|
<< "The base margin must be an array of length ";
|
||||||
if (model.param.num_output_group > 1) {
|
if (model.param.num_output_group > 1) {
|
||||||
oss << "[num_class] * [number of data points], i.e. "
|
oss << "[num_class] * [number of data points], i.e. "
|
||||||
@ -145,7 +145,7 @@ class CPUPredictor : public Predictor {
|
|||||||
}
|
}
|
||||||
oss << "Instead, all data points will use "
|
oss << "Instead, all data points will use "
|
||||||
<< "base_score = " << model.base_margin;
|
<< "base_score = " << model.base_margin;
|
||||||
LOG(INFO) << oss.str();
|
LOG(WARNING) << oss.str();
|
||||||
}
|
}
|
||||||
std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
|
std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,8 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
#include <dmlc/filesystem.h>
|
||||||
#include <xgboost/generic_parameters.h>
|
#include <xgboost/generic_parameters.h>
|
||||||
|
|
||||||
|
#include "xgboost/learner.h"
|
||||||
#include "../helpers.h"
|
#include "../helpers.h"
|
||||||
#include "../../../src/gbm/gbtree.h"
|
#include "../../../src/gbm/gbtree.h"
|
||||||
|
|
||||||
@ -43,4 +46,67 @@ TEST(GBTree, SelectTreeMethod) {
|
|||||||
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef XGBOOST_USE_CUDA
|
||||||
|
TEST(GBTree, ChoosePredictor) {
|
||||||
|
size_t constexpr kNumRows = 17;
|
||||||
|
size_t constexpr kCols = 15;
|
||||||
|
auto pp_mat = CreateDMatrix(kNumRows, kCols, 0);
|
||||||
|
auto& p_mat = *pp_mat;
|
||||||
|
|
||||||
|
std::vector<bst_float> labels (kNumRows);
|
||||||
|
for (size_t i = 0; i < kNumRows; ++i) {
|
||||||
|
labels[i] = i % 2;
|
||||||
|
}
|
||||||
|
p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
|
||||||
|
std::string n_feat = std::to_string(kCols);
|
||||||
|
Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
|
||||||
|
GenericParameter generic_param;
|
||||||
|
generic_param.InitAllowUnknown(Args{{"gpu_id", "0"}});
|
||||||
|
|
||||||
|
auto& data = (*(p_mat->GetBatches<SparsePage>().begin())).data;
|
||||||
|
|
||||||
|
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||||
|
learner->SetParams(Args{{"tree_method", "gpu_hist"}});
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
learner->UpdateOneIter(i, p_mat.get());
|
||||||
|
}
|
||||||
|
ASSERT_TRUE(data.HostCanWrite());
|
||||||
|
dmlc::TemporaryDirectory tempdir;
|
||||||
|
const std::string fname = tempdir.path + "/model_para.bst";
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
|
||||||
|
learner->Save(fo.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
// a new learner
|
||||||
|
learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||||
|
{
|
||||||
|
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
|
||||||
|
learner->Load(fi.get());
|
||||||
|
}
|
||||||
|
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
learner->UpdateOneIter(i, p_mat.get());
|
||||||
|
}
|
||||||
|
ASSERT_TRUE(data.HostCanWrite());
|
||||||
|
|
||||||
|
// pull data into device.
|
||||||
|
data = HostDeviceVector<Entry>(data.HostVector(), 0);
|
||||||
|
data.DeviceSpan();
|
||||||
|
ASSERT_FALSE(data.HostCanWrite());
|
||||||
|
|
||||||
|
// another new learner
|
||||||
|
learner = std::unique_ptr<Learner>(Learner::Create(mat));
|
||||||
|
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
|
||||||
|
for (size_t i = 0; i < 4; ++i) {
|
||||||
|
learner->UpdateOneIter(i, p_mat.get());
|
||||||
|
}
|
||||||
|
// data is not pulled back into host
|
||||||
|
ASSERT_FALSE(data.HostCanWrite());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -241,7 +241,6 @@ TEST(Learner, GPUConfiguration) {
|
|||||||
|
|
||||||
delete pp_dmat;
|
delete pp_dmat;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // XGBOOST_USE_CUDA
|
#endif // XGBOOST_USE_CUDA
|
||||||
|
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user