Fix inplace predict with fallback when base margin is used. (#9536)
- Copy meta info from proxy DMatrix. - Use `std::call_once` to emit less warnings.
This commit is contained in:
parent
d159ee8547
commit
adea842c83
@ -3,9 +3,11 @@
|
|||||||
*/
|
*/
|
||||||
#include "error_msg.h"
|
#include "error_msg.h"
|
||||||
|
|
||||||
|
#include <mutex> // for call_once, once_flag
|
||||||
#include <sstream> // for stringstream
|
#include <sstream> // for stringstream
|
||||||
|
|
||||||
#include "../collective/communicator-inl.h" // for GetRank
|
#include "../collective/communicator-inl.h" // for GetRank
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
|
|
||||||
namespace xgboost::error {
|
namespace xgboost::error {
|
||||||
@ -26,34 +28,43 @@ void WarnDeprecatedGPUHist() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void WarnManualUpdater() {
|
void WarnManualUpdater() {
|
||||||
bool static thread_local logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag, [] {
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG(WARNING)
|
LOG(WARNING)
|
||||||
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
|
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
|
||||||
"will be ignored. Incorrect sequence of updaters will produce undefined "
|
"will be ignored. Incorrect sequence of updaters will produce undefined "
|
||||||
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
|
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
|
||||||
logged = true;
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void WarnDeprecatedGPUId() {
|
void WarnDeprecatedGPUId() {
|
||||||
static thread_local bool logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag, [] {
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
|
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
|
||||||
msg += " E.g. device=cpu/cuda/cuda:0";
|
msg += " E.g. device=cpu/cuda/cuda:0";
|
||||||
LOG(WARNING) << msg;
|
LOG(WARNING) << msg;
|
||||||
logged = true;
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void WarnEmptyDataset() {
|
void WarnEmptyDataset() {
|
||||||
static thread_local bool logged{false};
|
static std::once_flag flag;
|
||||||
if (logged) {
|
std::call_once(flag,
|
||||||
return;
|
[] { LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank(); });
|
||||||
}
|
}
|
||||||
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
|
|
||||||
logged = true;
|
void MismatchedDevices(Context const* booster, Context const* data) {
|
||||||
|
static std::once_flag flag;
|
||||||
|
std::call_once(flag, [&] {
|
||||||
|
LOG(WARNING)
|
||||||
|
<< "Falling back to prediction using DMatrix due to mismatched devices. This might "
|
||||||
|
"lead to higher memory usage and slower performance. XGBoost is running on: "
|
||||||
|
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName() << ".\n"
|
||||||
|
<< R"(Potential solutions:
|
||||||
|
- Use a data structure that matches the device ordinal in the booster.
|
||||||
|
- Set the device for booster before call to inplace_predict.
|
||||||
|
|
||||||
|
This warning will only be shown once.
|
||||||
|
)";
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} // namespace xgboost::error
|
} // namespace xgboost::error
|
||||||
|
|||||||
@ -11,6 +11,7 @@
|
|||||||
#include <string> // for string
|
#include <string> // for string
|
||||||
|
|
||||||
#include "xgboost/base.h" // for bst_feature_t
|
#include "xgboost/base.h" // for bst_feature_t
|
||||||
|
#include "xgboost/context.h" // for Context
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "xgboost/string_view.h" // for StringView
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
@ -94,5 +95,7 @@ constexpr StringView InvalidCUDAOrdinal() {
|
|||||||
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
|
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
|
||||||
"available for using GPU.";
|
"available for using GPU.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MismatchedDevices(Context const* booster, Context const* data);
|
||||||
} // namespace xgboost::error
|
} // namespace xgboost::error
|
||||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||||
|
|||||||
@ -55,6 +55,7 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
CHECK(p_fmat) << "Failed to fallback.";
|
CHECK(p_fmat) << "Failed to fallback.";
|
||||||
|
p_fmat->Info() = proxy->Info().Copy();
|
||||||
return p_fmat;
|
return p_fmat;
|
||||||
}
|
}
|
||||||
} // namespace xgboost::data
|
} // namespace xgboost::data
|
||||||
|
|||||||
@ -85,25 +85,6 @@ bool UpdatersMatched(std::vector<std::string> updater_seq,
|
|||||||
return name == up->Name();
|
return name == up->Name();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void MismatchedDevices(Context const* booster, Context const* data) {
|
|
||||||
bool thread_local static logged{false};
|
|
||||||
if (logged) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. This might "
|
|
||||||
"lead to higher memory usage and slower performance. XGBoost is running on: "
|
|
||||||
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName()
|
|
||||||
<< ".\n"
|
|
||||||
<< R"(Potential solutions:
|
|
||||||
- Use a data structure that matches the device ordinal in the booster.
|
|
||||||
- Set the device for booster before call to inplace_predict.
|
|
||||||
|
|
||||||
This warning will only be shown once for each thread. Subsequent warnings made by the
|
|
||||||
current thread will be suppressed.
|
|
||||||
)";
|
|
||||||
logged = true;
|
|
||||||
}
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void GBTree::Configure(Args const& cfg) {
|
void GBTree::Configure(Args const& cfg) {
|
||||||
@ -557,7 +538,7 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
|
|||||||
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
|
||||||
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
|
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
|
||||||
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
|
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
|
||||||
MismatchedDevices(this->ctx_, p_m->Ctx());
|
error::MismatchedDevices(this->ctx_, p_m->Ctx());
|
||||||
CHECK_EQ(out_preds->version, 0);
|
CHECK_EQ(out_preds->version, 0);
|
||||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
|
||||||
CHECK(proxy) << error::InplacePredictProxy();
|
CHECK(proxy) << error::InplacePredictProxy();
|
||||||
@ -810,7 +791,7 @@ class Dart : public GBTree {
|
|||||||
auto n_groups = model_.learner_model_param->num_output_group;
|
auto n_groups = model_.learner_model_param->num_output_group;
|
||||||
|
|
||||||
if (ctx_->Device() != p_fmat->Ctx()->Device()) {
|
if (ctx_->Device() != p_fmat->Ctx()->Device()) {
|
||||||
MismatchedDevices(ctx_, p_fmat->Ctx());
|
error::MismatchedDevices(ctx_, p_fmat->Ctx());
|
||||||
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
|
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
|
||||||
CHECK(proxy) << error::InplacePredictProxy();
|
CHECK(proxy) << error::InplacePredictProxy();
|
||||||
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);
|
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);
|
||||||
|
|||||||
@ -58,21 +58,6 @@ void TestInplaceFallback(Context const* ctx) {
|
|||||||
HostDeviceVector<float>* out_predt{nullptr};
|
HostDeviceVector<float>* out_predt{nullptr};
|
||||||
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
||||||
std::string output;
|
std::string output;
|
||||||
// test whether the warning is raised
|
|
||||||
#if !defined(_WIN32)
|
|
||||||
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
|
|
||||||
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
|
|
||||||
// might be related to https://github.com/dmlc/xgboost/issues/5793
|
|
||||||
::testing::internal::CaptureStderr();
|
|
||||||
std::thread{[&] {
|
|
||||||
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
|
|
||||||
// warning by using thread-local flags.
|
|
||||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
|
||||||
&out_predt, 0, 0);
|
|
||||||
}}.join();
|
|
||||||
output = testing::internal::GetCapturedStderr();
|
|
||||||
ASSERT_NE(output.find("Falling back"), std::string::npos);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||||
&out_predt, 0, 0);
|
&out_predt, 0, 0);
|
||||||
|
|||||||
@ -191,14 +191,32 @@ class TestGPUPredict:
|
|||||||
np.testing.assert_allclose(predt_0, predt_3)
|
np.testing.assert_allclose(predt_0, predt_3)
|
||||||
np.testing.assert_allclose(predt_0, predt_4)
|
np.testing.assert_allclose(predt_0, predt_4)
|
||||||
|
|
||||||
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
|
def run_inplace_base_margin(
|
||||||
|
self, device: int, booster: xgb.Booster, dtrain: xgb.DMatrix, X, base_margin
|
||||||
|
) -> None:
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
|
|
||||||
|
booster.set_param({"device": f"cuda:{device}"})
|
||||||
dtrain.set_info(base_margin=base_margin)
|
dtrain.set_info(base_margin=base_margin)
|
||||||
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
||||||
from_dmatrix = booster.predict(dtrain)
|
from_dmatrix = booster.predict(dtrain)
|
||||||
cp.testing.assert_allclose(from_inplace, from_dmatrix)
|
cp.testing.assert_allclose(from_inplace, from_dmatrix)
|
||||||
|
|
||||||
|
booster = booster.copy() # clear prediction cache.
|
||||||
|
booster.set_param({"device": "cpu"})
|
||||||
|
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
||||||
|
from_dmatrix = booster.predict(dtrain)
|
||||||
|
cp.testing.assert_allclose(from_inplace, from_dmatrix)
|
||||||
|
|
||||||
|
booster = booster.copy() # clear prediction cache.
|
||||||
|
base_margin = cp.asnumpy(base_margin)
|
||||||
|
if hasattr(X, "values"):
|
||||||
|
X = cp.asnumpy(X.values)
|
||||||
|
booster.set_param({"device": f"cuda:{device}"})
|
||||||
|
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
|
||||||
|
from_dmatrix = booster.predict(dtrain)
|
||||||
|
cp.testing.assert_allclose(from_inplace, from_dmatrix, rtol=1e-6)
|
||||||
|
|
||||||
def run_inplace_predict_cupy(self, device: int) -> None:
|
def run_inplace_predict_cupy(self, device: int) -> None:
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
|
|
||||||
@ -244,7 +262,7 @@ class TestGPUPredict:
|
|||||||
run_threaded_predict(X, rows, predict_dense)
|
run_threaded_predict(X, rows, predict_dense)
|
||||||
|
|
||||||
base_margin = cp_rng.randn(rows)
|
base_margin = cp_rng.randn(rows)
|
||||||
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
|
self.run_inplace_base_margin(device, booster, dtrain, X, base_margin)
|
||||||
|
|
||||||
# Create a wide dataset
|
# Create a wide dataset
|
||||||
X = cp_rng.randn(100, 10000)
|
X = cp_rng.randn(100, 10000)
|
||||||
@ -318,7 +336,7 @@ class TestGPUPredict:
|
|||||||
run_threaded_predict(X, rows, predict_df)
|
run_threaded_predict(X, rows, predict_df)
|
||||||
|
|
||||||
base_margin = cudf.Series(rng.randn(rows))
|
base_margin = cudf.Series(rng.randn(rows))
|
||||||
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
|
self.run_inplace_base_margin(0, booster, dtrain, X, base_margin)
|
||||||
|
|
||||||
@given(
|
@given(
|
||||||
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
|
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user