Disable parameter validation for Scikit-Learn interface. (#5167)
* Disable parameter validation for now. Scikit-Learn passes all parameters down to XGBoost, whether they are used or not. * Add option `validate_parameters`.
This commit is contained in:
parent
2b9a62a806
commit
ebc86a3afa
@ -29,10 +29,16 @@ General Parameters
|
||||
|
||||
* ``verbosity`` [default=1]
|
||||
|
||||
- Verbosity of printing messages. Valid values are 0 (silent),
|
||||
1 (warning), 2 (info), 3 (debug). Sometimes XGBoost tries to change
|
||||
configurations based on heuristics, which is displayed as warning message.
|
||||
If there's unexpected behaviour, please try to increase value of verbosity.
|
||||
- Verbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3
|
||||
(debug). Sometimes XGBoost tries to change configurations based on heuristics, which
|
||||
is displayed as warning message. If there's unexpected behaviour, please try to
|
||||
increase value of verbosity.
|
||||
|
||||
* ``validate_parameters`` [default to false, except for Python ``train`` function]
|
||||
|
||||
- When set to True, XGBoost will perform validation of input parameters to check whether
|
||||
a parameter is used or not. The feature is still experimental. It's expected to have
|
||||
some false positives, especially when used with Scikit-Learn interface.
|
||||
|
||||
* ``nthread`` [default to maximum number of threads available if not set]
|
||||
|
||||
|
||||
@ -28,6 +28,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
// gpu page size in external memory mode, 0 means using the default.
|
||||
size_t gpu_page_size;
|
||||
bool enable_experimental_json_serialization {false};
|
||||
bool validate_parameters {false};
|
||||
|
||||
void CheckDeprecated() {
|
||||
if (this->n_gpus != 0) {
|
||||
@ -70,6 +71,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
.set_default(false)
|
||||
.describe("Enable using JSON for memory serialization (Python Pickle, "
|
||||
"rabit checkpoints etc.).");
|
||||
DMLC_DECLARE_FIELD(validate_parameters)
|
||||
.set_default(false)
|
||||
.describe("Enable to check whether parameters are used or not.");
|
||||
DMLC_DECLARE_FIELD(n_gpus)
|
||||
.set_default(0)
|
||||
.set_range(0, 1)
|
||||
|
||||
@ -1070,6 +1070,10 @@ class Booster(object):
|
||||
self.handle = ctypes.c_void_p()
|
||||
_check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)),
|
||||
ctypes.byref(self.handle)))
|
||||
|
||||
if isinstance(params, dict) and \
|
||||
'validate_parameters' not in params.keys():
|
||||
params['validate_parameters'] = 1
|
||||
self.set_param(params or {})
|
||||
if (params is not None) and ('booster' in params):
|
||||
self.booster = params['booster']
|
||||
|
||||
@ -224,7 +224,8 @@ class XGBModel(XGBModelBase):
|
||||
def get_params(self, deep=False):
|
||||
"""Get parameters."""
|
||||
params = super(XGBModel, self).get_params(deep=deep)
|
||||
if isinstance(self.kwargs, dict): # if kwargs is a dict, update params accordingly
|
||||
# if kwargs is a dict, update params accordingly
|
||||
if isinstance(self.kwargs, dict):
|
||||
params.update(self.kwargs)
|
||||
if params['missing'] is np.nan:
|
||||
params['missing'] = None # sklearn doesn't handle nan. see #4725
|
||||
@ -233,6 +234,11 @@ class XGBModel(XGBModelBase):
|
||||
if isinstance(params['random_state'], np.random.RandomState):
|
||||
params['random_state'] = params['random_state'].randint(
|
||||
np.iinfo(np.int32).max)
|
||||
# Parameter validation is not working with Scikit-Learn interface, as
|
||||
# it passes all paraemters into XGBoost core, whether they are used or
|
||||
# not.
|
||||
if 'validate_parameters' not in params.keys():
|
||||
params['validate_parameters'] = False
|
||||
return params
|
||||
|
||||
def get_xgb_params(self):
|
||||
|
||||
@ -230,7 +230,10 @@ class LearnerImpl : public Learner {
|
||||
obj_->ProbToMargin(mparam_.base_score));
|
||||
|
||||
this->need_configuration_ = false;
|
||||
if (generic_parameters_.validate_parameters) {
|
||||
this->ValidateParameters();
|
||||
}
|
||||
|
||||
// FIXME(trivialfis): Clear the cache once binary IO is gone.
|
||||
monitor_.Stop("Configure");
|
||||
}
|
||||
@ -266,20 +269,21 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
}
|
||||
}
|
||||
auto learner_model_param = mparam_.ToJson();
|
||||
for (auto const& kv : get<Object>(learner_model_param)) {
|
||||
keys.emplace_back(kv.first);
|
||||
}
|
||||
keys.emplace_back(kEvalMetric);
|
||||
keys.emplace_back("verbosity");
|
||||
keys.emplace_back("num_output_group");
|
||||
|
||||
std::sort(keys.begin(), keys.end());
|
||||
|
||||
std::vector<std::string> provided;
|
||||
for (auto const &kv : cfg_) {
|
||||
// `num_feature` and `num_class` are automatically added due to legacy reason.
|
||||
// `verbosity` in logger is not saved, we should move it into generic_param_.
|
||||
// FIXME(trivialfis): Make eval_metric a training parameter.
|
||||
if (kv.first != "num_feature" && kv.first != "verbosity" &&
|
||||
kv.first != "num_class" && kv.first != "num_output_group" &&
|
||||
kv.first != kEvalMetric) {
|
||||
provided.push_back(kv.first);
|
||||
}
|
||||
}
|
||||
std::sort(provided.begin(), provided.end());
|
||||
|
||||
std::vector<std::string> diff;
|
||||
@ -287,12 +291,18 @@ class LearnerImpl : public Learner {
|
||||
keys.end(), std::back_inserter(diff));
|
||||
if (diff.size() != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "Parameters: { ";
|
||||
ss << "\nParameters: { ";
|
||||
for (size_t i = 0; i < diff.size() - 1; ++i) {
|
||||
ss << diff[i] << ", ";
|
||||
}
|
||||
ss << diff.back();
|
||||
ss << " } are not used.";
|
||||
ss << R"W( } might not be used.
|
||||
|
||||
This may not be accurate due to some parameters are only used in language bindings but
|
||||
passed down to XGBoost core. Or some parameters are not used but slip through this
|
||||
verification. Please open an issue if you find above cases.
|
||||
|
||||
)W";
|
||||
LOG(WARNING) << ss.str();
|
||||
}
|
||||
}
|
||||
|
||||
@ -30,8 +30,6 @@ TEST(Monitor, Logging) {
|
||||
run_monitor();
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_EQ(output.size(), 0);
|
||||
|
||||
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@ -31,12 +31,14 @@ TEST(Learner, Basic) {
|
||||
}
|
||||
|
||||
TEST(Learner, ParameterValidation) {
|
||||
ConsoleLogger::Configure({{"verbosity", "2"}});
|
||||
size_t constexpr kRows = 1;
|
||||
size_t constexpr kCols = 1;
|
||||
auto pp_mat = CreateDMatrix(kRows, kCols, 0);
|
||||
auto& p_mat = *pp_mat;
|
||||
|
||||
auto learner = std::unique_ptr<Learner>(Learner::Create({p_mat}));
|
||||
learner->SetParam("validate_parameters", "1");
|
||||
learner->SetParam("Knock Knock", "Who's there?");
|
||||
learner->SetParam("Silence", "....");
|
||||
learner->SetParam("tree_method", "exact");
|
||||
@ -45,7 +47,7 @@ TEST(Learner, ParameterValidation) {
|
||||
learner->Configure();
|
||||
std::string output = testing::internal::GetCapturedStderr();
|
||||
|
||||
ASSERT_TRUE(output.find("Parameters: { Knock Knock, Silence } are not used.") != std::string::npos);
|
||||
ASSERT_TRUE(output.find("Parameters: { Knock Knock, Silence }") != std::string::npos);
|
||||
delete pp_mat;
|
||||
}
|
||||
|
||||
|
||||
@ -334,7 +334,7 @@ TEST_F(SerializationTest, ConfigurationCount) {
|
||||
}
|
||||
ASSERT_EQ(occureences, 2);
|
||||
|
||||
xgboost::ConsoleLogger::Configure({{"verbosity", "1"}});
|
||||
xgboost::ConsoleLogger::Configure({{"verbosity", "2"}});
|
||||
}
|
||||
|
||||
TEST_F(SerializationTest, GPU_CoordDescent) {
|
||||
|
||||
@ -92,6 +92,8 @@ class TestPickling(unittest.TestCase):
|
||||
status = subprocess.call(args, env=env)
|
||||
assert status == 0
|
||||
|
||||
os.remove(model_path)
|
||||
|
||||
def test_pickled_predictor(self):
|
||||
x, y = build_dataset()
|
||||
train_x = xgb.DMatrix(x, label=y)
|
||||
@ -129,6 +131,8 @@ class TestPickling(unittest.TestCase):
|
||||
status = subprocess.call(args, env=env)
|
||||
assert status == 0
|
||||
|
||||
os.remove(model_path)
|
||||
|
||||
def test_predict_sklearn_pickle(self):
|
||||
x, y = build_dataset()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user