[breaking] Add prediction fucntion for DMatrix and use inplace predict for dask. (#6668)
* Add a new API function for predicting on `DMatrix`. This function aligns with rest of the `XGBoosterPredictFrom*` functions on semantic of function arguments. * Purge `ntree_limit` from libxgboost, use iteration instead. * [dask] Use `inplace_predict` by default for dask sklearn models. * [dask] Run prediction shape inference on worker instead of client. The breaking change is in the Python sklearn `apply` function, I made it to be consistent with other prediction functions where `best_iteration` is used by default.
This commit is contained in:
@@ -32,7 +32,7 @@ TEST(CpuPredictor, Basic) {
|
||||
// Test predict batch
|
||||
PredictionCacheEntry out_predictions;
|
||||
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
|
||||
ASSERT_EQ(model.trees.size(), out_predictions.version);
|
||||
|
||||
std::vector<float>& out_predictions_h = out_predictions.predictions.HostVector();
|
||||
for (size_t i = 0; i < out_predictions.predictions.Size(); i++) {
|
||||
ASSERT_EQ(out_predictions_h[i], 1.5);
|
||||
@@ -215,7 +215,7 @@ TEST(CpuPredictor, UpdatePredictionCache) {
|
||||
|
||||
PredictionCacheEntry out_predictions;
|
||||
// perform fair prediction on the same input data, should be equal to cached result
|
||||
gbm->PredictBatch(dmat.get(), &out_predictions, false, 0);
|
||||
gbm->PredictBatch(dmat.get(), &out_predictions, false, 0, 0);
|
||||
|
||||
std::vector<float> &out_predictions_h = out_predictions.predictions.HostVector();
|
||||
std::vector<float> &predtion_cache_from_train = predtion_cache.predictions.HostVector();
|
||||
|
||||
@@ -45,7 +45,6 @@ TEST(GPUPredictor, Basic) {
|
||||
PredictionCacheEntry cpu_out_predictions;
|
||||
|
||||
gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
|
||||
ASSERT_EQ(model.trees.size(), gpu_out_predictions.version);
|
||||
cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
|
||||
|
||||
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.predictions.HostVector();
|
||||
|
||||
@@ -64,10 +64,10 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
}
|
||||
|
||||
HostDeviceVector<float> from_full;
|
||||
learner->Predict(p_full, false, &from_full);
|
||||
learner->Predict(p_full, false, &from_full, 0, 0);
|
||||
|
||||
HostDeviceVector<float> from_hist;
|
||||
learner->Predict(p_hist, false, &from_hist);
|
||||
learner->Predict(p_hist, false, &from_hist, 0, 0);
|
||||
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
EXPECT_NEAR(from_hist.ConstHostVector()[i],
|
||||
@@ -157,20 +157,20 @@ void TestPredictionWithLesserFeatures(std::string predictor_name) {
|
||||
learner->SaveConfig(&config);
|
||||
ASSERT_EQ(get<String>(config["learner"]["gradient_booster"]["gbtree_train_param"]["predictor"]), predictor_name);
|
||||
|
||||
learner->Predict(m_test, false, &prediction);
|
||||
learner->Predict(m_test, false, &prediction, 0, 0);
|
||||
ASSERT_EQ(prediction.Size(), kRows);
|
||||
|
||||
auto m_invalid = RandomDataGenerator(kRows, kTrainCols + 1, 0.5).GenerateDMatrix(false);
|
||||
ASSERT_THROW({learner->Predict(m_invalid, false, &prediction);}, dmlc::Error);
|
||||
ASSERT_THROW({learner->Predict(m_invalid, false, &prediction, 0, 0);}, dmlc::Error);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
HostDeviceVector<float> from_cpu;
|
||||
learner->SetParam("predictor", "cpu_predictor");
|
||||
learner->Predict(m_test, false, &from_cpu);
|
||||
learner->Predict(m_test, false, &from_cpu, 0, 0);
|
||||
|
||||
HostDeviceVector<float> from_cuda;
|
||||
learner->SetParam("predictor", "gpu_predictor");
|
||||
learner->Predict(m_test, false, &from_cuda);
|
||||
learner->Predict(m_test, false, &from_cuda, 0, 0);
|
||||
|
||||
auto const& h_cpu = from_cpu.ConstHostVector();
|
||||
auto const& h_gpu = from_cuda.ConstHostVector();
|
||||
|
||||
Reference in New Issue
Block a user