Save model in ubj as the default. (#9947)

This commit is contained in:
Jiaming Yuan 2024-01-05 17:53:36 +08:00 committed by GitHub
parent c03a4d5088
commit 38dd91f491
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 598 additions and 550 deletions

View File

@ -30,9 +30,6 @@ import org.apache.spark.ml.param.Params
import org.apache.spark.ml.util.DefaultParamsReader.Metadata import org.apache.spark.ml.util.DefaultParamsReader.Metadata
abstract class XGBoostWriter extends MLWriter { abstract class XGBoostWriter extends MLWriter {
/** Currently it's using the "deprecated" format as
* default, which will be changed into `ubj` in future releases. */
def getModelFormat(): String = { def getModelFormat(): String = {
optionMap.getOrElse("format", JBooster.DEFAULT_FORMAT) optionMap.getOrElse("format", JBooster.DEFAULT_FORMAT)
} }

View File

@ -1,5 +1,5 @@
/* /*
Copyright (c) 2014-2022 by Contributors Copyright (c) 2014-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
@ -432,6 +432,7 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
val xgb = new XGBoostClassifier(paramMap) val xgb = new XGBoostClassifier(paramMap)
val model = xgb.fit(trainingDF) val model = xgb.fit(trainingDF)
// test json
val modelPath = new File(tempDir.toFile, "xgbc").getPath val modelPath = new File(tempDir.toFile, "xgbc").getPath
model.write.option("format", "json").save(modelPath) model.write.option("format", "json").save(modelPath)
val nativeJsonModelPath = new File(tempDir.toFile, "nativeModel.json").getPath val nativeJsonModelPath = new File(tempDir.toFile, "nativeModel.json").getPath
@ -439,21 +440,21 @@ class XGBoostClassifierSuite extends AnyFunSuite with PerTest with TmpFolderPerS
assert(compareTwoFiles(new File(modelPath, "data/XGBoostClassificationModel").getPath, assert(compareTwoFiles(new File(modelPath, "data/XGBoostClassificationModel").getPath,
nativeJsonModelPath)) nativeJsonModelPath))
// test default "deprecated" // test ubj
val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
model.write.save(modelUbjPath) model.write.save(modelUbjPath)
val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel").getPath val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
model.nativeBooster.saveModel(nativeDeprecatedModelPath) model.nativeBooster.saveModel(nativeUbjModelPath)
assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostClassificationModel").getPath, assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostClassificationModel").getPath,
nativeDeprecatedModelPath)) nativeUbjModelPath))
// json file should be indifferent with ubj file // json file should be indifferent with ubj file
val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
model.write.option("format", "json").save(modelJsonPath) model.write.option("format", "json").save(modelJsonPath)
val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel1.ubj").getPath val nativeUbjModelPath1 = new File(tempDir.toFile, "nativeModel1.ubj").getPath
model.nativeBooster.saveModel(nativeUbjModelPath) model.nativeBooster.saveModel(nativeUbjModelPath1)
assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostClassificationModel").getPath, assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostClassificationModel").getPath,
nativeUbjModelPath)) nativeUbjModelPath1))
} }
test("native json model file should store feature_name and feature_type") { test("native json model file should store feature_name and feature_type") {

View File

@ -1,5 +1,5 @@
/* /*
Copyright (c) 2014-2022 by Contributors Copyright (c) 2014-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
@ -333,21 +333,24 @@ class XGBoostRegressorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu
assert(compareTwoFiles(new File(modelPath, "data/XGBoostRegressionModel").getPath, assert(compareTwoFiles(new File(modelPath, "data/XGBoostRegressionModel").getPath,
nativeJsonModelPath)) nativeJsonModelPath))
// test default "deprecated" // test default "ubj"
val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath val modelUbjPath = new File(tempDir.toFile, "xgbcUbj").getPath
model.write.save(modelUbjPath) model.write.save(modelUbjPath)
val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel").getPath
model.nativeBooster.saveModel(nativeDeprecatedModelPath)
assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostRegressionModel").getPath,
nativeDeprecatedModelPath))
// json file should be indifferent with ubj file val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel.ubj").getPath
val modelJsonPath = new File(tempDir.toFile, "xgbcJson").getPath
model.write.option("format", "json").save(modelJsonPath)
val nativeUbjModelPath = new File(tempDir.toFile, "nativeModel1.ubj").getPath
model.nativeBooster.saveModel(nativeUbjModelPath) model.nativeBooster.saveModel(nativeUbjModelPath)
assert(!compareTwoFiles(new File(modelJsonPath, "data/XGBoostRegressionModel").getPath,
nativeUbjModelPath))
}
assert(compareTwoFiles(new File(modelUbjPath, "data/XGBoostRegressionModel").getPath,
nativeUbjModelPath))
// test the deprecated format
val modelDeprecatedPath = new File(tempDir.toFile, "modelDeprecated").getPath
model.write.option("format", "deprecated").save(modelDeprecatedPath)
val nativeDeprecatedModelPath = new File(tempDir.toFile, "nativeModel.deprecated").getPath
model.nativeBooster.saveModel(nativeDeprecatedModelPath)
assert(compareTwoFiles(new File(modelDeprecatedPath, "data/XGBoostRegressionModel").getPath,
nativeDeprecatedModelPath))
}
} }

View File

@ -34,7 +34,7 @@ import org.apache.commons.logging.LogFactory;
* Booster for xgboost, this is a model API that support interactive build of a XGBoost Model * Booster for xgboost, this is a model API that support interactive build of a XGBoost Model
*/ */
public class Booster implements Serializable, KryoSerializable { public class Booster implements Serializable, KryoSerializable {
public static final String DEFAULT_FORMAT = "deprecated"; public static final String DEFAULT_FORMAT = "ubj";
private static final Log logger = LogFactory.getLog(Booster.class); private static final Log logger = LogFactory.getLog(Booster.class);
// handle to the booster. // handle to the booster.
private long handle = 0; private long handle = 0;
@ -788,8 +788,7 @@ public class Booster implements Serializable, KryoSerializable {
} }
/** /**
* Save model into raw byte array. Currently it's using the deprecated format as * Save model into raw byte array in the UBJSON ("ubj") format.
* default, which will be changed into `ubj` in future releases.
* *
* @return the saved byte array * @return the saved byte array
* @throws XGBoostError native error * @throws XGBoostError native error

View File

@ -337,8 +337,7 @@ class Booster private[xgboost4j](private[xgboost4j] var booster: JBooster)
} }
/** /**
* Save model into a raw byte array. Currently it's using the deprecated format as * Save model into a raw byte array in the UBJSON ("ubj") format.
* default, which will be changed into `ubj` in future releases.
*/ */
@throws(classOf[XGBoostError]) @throws(classOf[XGBoostError])
def toByteArray: Array[Byte] = { def toByteArray: Array[Byte] = {

View File

@ -2613,7 +2613,7 @@ class Booster:
else: else:
raise TypeError("fname must be a string or os PathLike") raise TypeError("fname must be a string or os PathLike")
def save_raw(self, raw_format: str = "deprecated") -> bytearray: def save_raw(self, raw_format: str = "ubj") -> bytearray:
"""Save the model to a in memory buffer representation instead of file. """Save the model to a in memory buffer representation instead of file.
Parameters Parameters

View File

@ -630,7 +630,7 @@ sparse_datasets_strategy = strategies.sampled_from(
def make_datasets_with_margin( def make_datasets_with_margin(
unweighted_strategy: strategies.SearchStrategy, unweighted_strategy: strategies.SearchStrategy,
) -> Callable: ) -> Callable[[], strategies.SearchStrategy[TestDataset]]:
"""Factory function for creating strategies that generates datasets with weight and """Factory function for creating strategies that generates datasets with weight and
base margin. base margin.
@ -668,8 +668,7 @@ def make_datasets_with_margin(
# A strategy for drawing from a set of example datasets. May add random weights to the # A strategy for drawing from a set of example datasets. May add random weights to the
# dataset # dataset
@memory.cache def make_dataset_strategy() -> strategies.SearchStrategy[TestDataset]:
def make_dataset_strategy() -> Callable:
_unweighted_datasets_strategy = strategies.sampled_from( _unweighted_datasets_strategy = strategies.sampled_from(
[ [
TestDataset( TestDataset(

View File

@ -1313,10 +1313,8 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
namespace { namespace {
void WarnOldModel() { void WarnOldModel() {
if (XGBOOST_VER_MAJOR >= 2) {
LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or " LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
"`ubj`. Model format will default to JSON in XGBoost 2.2 if not specified."; "`ubj`. Model format is default to UBJSON in XGBoost 2.1 if not specified.";
}
} }
} // anonymous namespace } // anonymous namespace
@ -1339,14 +1337,14 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
save_json(std::ios::out); save_json(std::ios::out);
} else if (common::FileExtension(fname) == "ubj") { } else if (common::FileExtension(fname) == "ubj") {
save_json(std::ios::binary); save_json(std::ios::binary);
} else if (XGBOOST_VER_MAJOR == 2 && XGBOOST_VER_MINOR >= 2) { } else if (common::FileExtension(fname) == "deprecated") {
LOG(WARNING) << "Saving model to JSON as default. You can use file extension `json`, `ubj` or "
"`deprecated` to choose between formats.";
save_json(std::ios::out);
} else {
WarnOldModel(); WarnOldModel();
auto *bst = static_cast<Learner *>(handle); auto *bst = static_cast<Learner *>(handle);
bst->SaveModel(fo.get()); bst->SaveModel(fo.get());
} else {
LOG(WARNING) << "Saving model in the UBJSON format as default. You can use file extension:"
" `json`, `ubj` or `deprecated` to choose between formats.";
save_json(std::ios::binary);
} }
API_END(); API_END();
} }

View File

@ -27,6 +27,7 @@ class LintersPaths:
"tests/python/test_quantile_dmatrix.py", "tests/python/test_quantile_dmatrix.py",
"tests/python/test_tree_regularization.py", "tests/python/test_tree_regularization.py",
"tests/python/test_shap.py", "tests/python/test_shap.py",
"tests/python/test_model_io.py",
"tests/python/test_with_pandas.py", "tests/python/test_with_pandas.py",
"tests/python-gpu/", "tests/python-gpu/",
"tests/python-sycl/", "tests/python-sycl/",
@ -83,6 +84,7 @@ class LintersPaths:
"tests/python/test_multi_target.py", "tests/python/test_multi_target.py",
"tests/python-gpu/test_gpu_data_iterator.py", "tests/python-gpu/test_gpu_data_iterator.py",
"tests/python-gpu/load_pickle.py", "tests/python-gpu/load_pickle.py",
"tests/python/test_model_io.py",
"tests/test_distributed/test_with_spark/test_data.py", "tests/test_distributed/test_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_spark/test_data.py", "tests/test_distributed/test_gpu_with_spark/test_data.py",
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py", "tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",

View File

@ -10,46 +10,48 @@ import pytest
import xgboost as xgb import xgboost as xgb
from xgboost import testing as tm from xgboost import testing as tm
dpath = 'demo/data/' dpath = "demo/data/"
rng = np.random.RandomState(1994) rng = np.random.RandomState(1994)
class TestBasic: class TestBasic:
def test_compat(self): def test_compat(self):
from xgboost.compat import lazy_isinstance from xgboost.compat import lazy_isinstance
a = np.array([1, 2, 3]) a = np.array([1, 2, 3])
assert lazy_isinstance(a, 'numpy', 'ndarray') assert lazy_isinstance(a, "numpy", "ndarray")
assert not lazy_isinstance(a, 'numpy', 'dataframe') assert not lazy_isinstance(a, "numpy", "dataframe")
def test_basic(self): def test_basic(self):
dtrain, dtest = tm.load_agaricus(__file__) dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1, param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
'objective': 'binary:logistic'}
# specify validations set to watch performance # specify validations set to watch performance
watchlist = [(dtrain, 'train')] watchlist = [(dtrain, "train")]
num_round = 2 num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist, verbose_eval=True) bst = xgb.train(param, dtrain, num_round, evals=watchlist, verbose_eval=True)
preds = bst.predict(dtrain) preds = bst.predict(dtrain)
labels = dtrain.get_label() labels = dtrain.get_label()
err = sum(1 for i in range(len(preds)) err = sum(
if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
# error must be smaller than 10% # error must be smaller than 10%
assert err < 0.1 assert err < 0.1
preds = bst.predict(dtest) preds = bst.predict(dtest)
labels = dtest.get_label() labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) err = sum(
if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) 1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
# error must be smaller than 10% # error must be smaller than 10%
assert err < 0.1 assert err < 0.1
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
dtest_path = os.path.join(tmpdir, 'dtest.dmatrix') dtest_path = os.path.join(tmpdir, "dtest.dmatrix")
# save dmatrix into binary buffer # save dmatrix into binary buffer
dtest.save_binary(dtest_path) dtest.save_binary(dtest_path)
# save model # save model
model_path = os.path.join(tmpdir, 'model.booster') model_path = os.path.join(tmpdir, "model.ubj")
bst.save_model(model_path) bst.save_model(model_path)
# load model and data in # load model and data in
bst2 = xgb.Booster(model_file=model_path) bst2 = xgb.Booster(model_file=model_path)
@ -59,17 +61,21 @@ class TestBasic:
assert np.sum(np.abs(preds2 - preds)) == 0 assert np.sum(np.abs(preds2 - preds)) == 0
def test_metric_config(self): def test_metric_config(self):
# Make sure that the metric configuration happens in booster so the # Make sure that the metric configuration happens in booster so the string
# string `['error', 'auc']` doesn't get passed down to core. # `['error', 'auc']` doesn't get passed down to core.
dtrain, dtest = tm.load_agaricus(__file__) dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, param = {
'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']} "max_depth": 2,
watchlist = [(dtest, 'eval'), (dtrain, 'train')] "eta": 1,
"objective": "binary:logistic",
"eval_metric": ["error", "auc"],
}
watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 2 num_round = 2
booster = xgb.train(param, dtrain, num_round, watchlist) booster = xgb.train(param, dtrain, num_round, evals=watchlist)
predt_0 = booster.predict(dtrain) predt_0 = booster.predict(dtrain)
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, 'model.json') path = os.path.join(tmpdir, "model.json")
booster.save_model(path) booster.save_model(path)
booster = xgb.Booster(params=param, model_file=path) booster = xgb.Booster(params=param, model_file=path)
@ -78,22 +84,23 @@ class TestBasic:
def test_multiclass(self): def test_multiclass(self):
dtrain, dtest = tm.load_agaricus(__file__) dtrain, dtest = tm.load_agaricus(__file__)
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2} param = {"max_depth": 2, "eta": 1, "num_class": 2}
# specify validations set to watch performance # specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 2 num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist) bst = xgb.train(param, dtrain, num_round, evals=watchlist)
# this is prediction # this is prediction
preds = bst.predict(dtest) preds = bst.predict(dtest)
labels = dtest.get_label() labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(
if preds[i] != labels[i]) / float(len(preds)) len(preds)
)
# error must be smaller than 10% # error must be smaller than 10%
assert err < 0.1 assert err < 0.1
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
dtest_path = os.path.join(tmpdir, 'dtest.buffer') dtest_path = os.path.join(tmpdir, "dtest.buffer")
model_path = os.path.join(tmpdir, 'xgb.model') model_path = os.path.join(tmpdir, "model.ubj")
# save dmatrix into binary buffer # save dmatrix into binary buffer
dtest.save_binary(dtest_path) dtest.save_binary(dtest_path)
# save model # save model
@ -108,33 +115,39 @@ class TestBasic:
def test_dump(self): def test_dump(self):
data = np.random.randn(100, 2) data = np.random.randn(100, 2)
target = np.array([0, 1] * 50) target = np.array([0, 1] * 50)
features = ['Feature1', 'Feature2'] features = ["Feature1", "Feature2"]
dm = xgb.DMatrix(data, label=target, feature_names=features) dm = xgb.DMatrix(data, label=target, feature_names=features)
params = {'objective': 'binary:logistic', params = {
'eval_metric': 'logloss', "objective": "binary:logistic",
'eta': 0.3, "eval_metric": "logloss",
'max_depth': 1} "eta": 0.3,
"max_depth": 1,
}
bst = xgb.train(params, dm, num_boost_round=1) bst = xgb.train(params, dm, num_boost_round=1)
# number of feature importances should == number of features # number of feature importances should == number of features
dump1 = bst.get_dump() dump1 = bst.get_dump()
assert len(dump1) == 1, 'Expected only 1 tree to be dumped.' assert len(dump1) == 1, "Expected only 1 tree to be dumped."
len(dump1[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.' len(
dump1[0].splitlines()
) == 3, "Expected 1 root and 2 leaves - 3 lines in dump."
dump2 = bst.get_dump(with_stats=True) dump2 = bst.get_dump(with_stats=True)
assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.' assert (
msg = 'Expected more info when with_stats=True is given.' dump2[0].count("\n") == 3
assert dump2[0].find('\n') > dump1[0].find('\n'), msg ), "Expected 1 root and 2 leaves - 3 lines in dump."
msg = "Expected more info when with_stats=True is given."
assert dump2[0].find("\n") > dump1[0].find("\n"), msg
dump3 = bst.get_dump(dump_format="json") dump3 = bst.get_dump(dump_format="json")
dump3j = json.loads(dump3[0]) dump3j = json.loads(dump3[0])
assert dump3j['nodeid'] == 0, 'Expected the root node on top.' assert dump3j["nodeid"] == 0, "Expected the root node on top."
dump4 = bst.get_dump(dump_format="json", with_stats=True) dump4 = bst.get_dump(dump_format="json", with_stats=True)
dump4j = json.loads(dump4[0]) dump4j = json.loads(dump4[0])
assert 'gain' in dump4j, "Expected 'gain' to be dumped in JSON." assert "gain" in dump4j, "Expected 'gain' to be dumped in JSON."
with pytest.raises(ValueError): with pytest.raises(ValueError):
bst.get_dump(fmap="foo") bst.get_dump(fmap="foo")
@ -163,12 +176,14 @@ class TestBasic:
def test_load_file_invalid(self): def test_load_file_invalid(self):
with pytest.raises(xgb.core.XGBoostError): with pytest.raises(xgb.core.XGBoostError):
xgb.Booster(model_file='incorrect_path') xgb.Booster(model_file="incorrect_path")
with pytest.raises(xgb.core.XGBoostError): with pytest.raises(xgb.core.XGBoostError):
xgb.Booster(model_file=u'不正なパス') xgb.Booster(model_file="不正なパス")
@pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"]) @pytest.mark.parametrize(
"path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"]
)
def test_unicode_path(self, tmpdir, path): def test_unicode_path(self, tmpdir, path):
model_path = pathlib.Path(tmpdir) / path model_path = pathlib.Path(tmpdir) / path
dtrain, _ = tm.load_agaricus(__file__) dtrain, _ = tm.load_agaricus(__file__)
@ -180,12 +195,11 @@ class TestBasic:
assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text") assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
def test_dmatrix_numpy_init_omp(self): def test_dmatrix_numpy_init_omp(self):
rows = [1000, 11326, 15000] rows = [1000, 11326, 15000]
cols = 50 cols = 50
for row in rows: for row in rows:
X = np.random.randn(row, cols) X = np.random.randn(row, cols)
y = np.random.randn(row).astype('f') y = np.random.randn(row).astype("f")
dm = xgb.DMatrix(X, y, nthread=0) dm = xgb.DMatrix(X, y, nthread=0)
np.testing.assert_array_equal(dm.get_label(), y) np.testing.assert_array_equal(dm.get_label(), y)
assert dm.num_row() == row assert dm.num_row() == row
@ -198,8 +212,7 @@ class TestBasic:
def test_cv(self): def test_cv(self):
dm, _ = tm.load_agaricus(__file__) dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
'objective': 'binary:logistic'}
# return np.ndarray # return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)
@ -208,19 +221,18 @@ class TestBasic:
def test_cv_no_shuffle(self): def test_cv_no_shuffle(self):
dm, _ = tm.load_agaricus(__file__) dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
'objective': 'binary:logistic'}
# return np.ndarray # return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, shuffle=False, nfold=10, cv = xgb.cv(
as_pandas=False) params, dm, num_boost_round=10, shuffle=False, nfold=10, as_pandas=False
)
assert isinstance(cv, dict) assert isinstance(cv, dict)
assert len(cv) == (4) assert len(cv) == (4)
def test_cv_explicit_fold_indices(self): def test_cv_explicit_fold_indices(self):
dm, _ = tm.load_agaricus(__file__) dm, _ = tm.load_agaricus(__file__)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': params = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
'binary:logistic'}
folds = [ folds = [
# Train Test # Train Test
([1, 3], [5, 8]), ([1, 3], [5, 8]),
@ -228,15 +240,13 @@ class TestBasic:
] ]
# return np.ndarray # return np.ndarray
cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, cv = xgb.cv(params, dm, num_boost_round=10, folds=folds, as_pandas=False)
as_pandas=False)
assert isinstance(cv, dict) assert isinstance(cv, dict)
assert len(cv) == (4) assert len(cv) == (4)
@pytest.mark.skipif(**tm.skip_s390x()) @pytest.mark.skipif(**tm.skip_s390x())
def test_cv_explicit_fold_indices_labels(self): def test_cv_explicit_fold_indices_labels(self):
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': params = {"max_depth": 2, "eta": 1, "objective": "reg:squarederror"}
'reg:squarederror'}
N = 100 N = 100
F = 3 F = 3
dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N)) dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
@ -252,9 +262,10 @@ class TestBasic:
super().__init__() super().__init__()
def after_iteration( def after_iteration(
self, model, self,
model,
epoch: int, epoch: int,
evals_log: xgb.callback.TrainingCallback.EvalsLog evals_log: xgb.callback.TrainingCallback.EvalsLog,
): ):
print([fold.dtest.get_label() for fold in model.cvfolds]) print([fold.dtest.get_label() for fold in model.cvfolds])
@ -263,12 +274,18 @@ class TestBasic:
# Run cross validation and capture standard out to test callback result # Run cross validation and capture standard out to test callback result
with tm.captured_output() as (out, err): with tm.captured_output() as (out, err):
xgb.cv( xgb.cv(
params, dm, num_boost_round=1, folds=folds, callbacks=[cb], params,
as_pandas=False dm,
num_boost_round=1,
folds=folds,
callbacks=[cb],
as_pandas=False,
) )
output = out.getvalue().strip() output = out.getvalue().strip()
solution = ('[array([5., 8.], dtype=float32), array([23., 43., 11.],' + solution = (
' dtype=float32)]') "[array([5., 8.], dtype=float32), array([23., 43., 11.],"
+ " dtype=float32)]"
)
assert output == solution assert output == solution
@ -285,7 +302,7 @@ class TestBasicPathLike:
"""Saving to a binary file using pathlib from a DMatrix.""" """Saving to a binary file using pathlib from a DMatrix."""
data = np.random.randn(100, 2) data = np.random.randn(100, 2)
target = np.array([0, 1] * 50) target = np.array([0, 1] * 50)
features = ['Feature1', 'Feature2'] features = ["Feature1", "Feature2"]
dm = xgb.DMatrix(data, label=target, feature_names=features) dm = xgb.DMatrix(data, label=target, feature_names=features)
@ -299,42 +316,3 @@ class TestBasicPathLike:
"""An invalid model_file path should raise XGBoostError.""" """An invalid model_file path should raise XGBoostError."""
with pytest.raises(xgb.core.XGBoostError): with pytest.raises(xgb.core.XGBoostError):
xgb.Booster(model_file=Path("invalidpath")) xgb.Booster(model_file=Path("invalidpath"))
def test_Booster_save_and_load(self):
"""Saving and loading model files from paths."""
save_path = Path("saveload.model")
data = np.random.randn(100, 2)
target = np.array([0, 1] * 50)
features = ['Feature1', 'Feature2']
dm = xgb.DMatrix(data, label=target, feature_names=features)
params = {'objective': 'binary:logistic',
'eval_metric': 'logloss',
'eta': 0.3,
'max_depth': 1}
bst = xgb.train(params, dm, num_boost_round=1)
# save, assert exists
bst.save_model(save_path)
assert save_path.exists()
def dump_assertions(dump):
"""Assertions for the expected dump from Booster"""
assert len(dump) == 1, 'Exepcted only 1 tree to be dumped.'
assert len(dump[0].splitlines()) == 3, 'Expected 1 root and 2 leaves - 3 lines.'
# load the model again using Path
bst2 = xgb.Booster(model_file=save_path)
dump2 = bst2.get_dump()
dump_assertions(dump2)
# load again using load_model
bst3 = xgb.Booster()
bst3.load_model(save_path)
dump3 = bst3.get_dump()
dump_assertions(dump3)
# remove file
Path.unlink(save_path)

View File

@ -15,33 +15,9 @@ dpath = tm.data_dir(__file__)
rng = np.random.RandomState(1994) rng = np.random.RandomState(1994)
def json_model(model_path: str, parameters: dict) -> dict:
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
if parameters.get("objective", None) == "multi:softmax":
parameters["num_class"] = 3
dm1 = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dm1)
bst.save_model(model_path)
if model_path.endswith("ubj"):
import ubjson
with open(model_path, "rb") as ubjfd:
model = ubjson.load(ubjfd)
else:
with open(model_path, "r") as fd:
model = json.load(fd)
return model
class TestModels: class TestModels:
def test_glm(self): def test_glm(self):
param = {'verbosity': 0, 'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
'nthread': 1} 'nthread': 1}
dtrain, dtest = tm.load_agaricus(__file__) dtrain, dtest = tm.load_agaricus(__file__)
@ -73,7 +49,7 @@ class TestModels:
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
dtest_path = os.path.join(tmpdir, 'dtest.dmatrix') dtest_path = os.path.join(tmpdir, 'dtest.dmatrix')
model_path = os.path.join(tmpdir, 'xgboost.model.dart') model_path = os.path.join(tmpdir, "xgboost.model.dart.ubj")
# save dmatrix into binary buffer # save dmatrix into binary buffer
dtest.save_binary(dtest_path) dtest.save_binary(dtest_path)
model_path = model_path model_path = model_path
@ -101,7 +77,6 @@ class TestModels:
# check whether sample_type and normalize_type work # check whether sample_type and normalize_type work
num_round = 50 num_round = 50
param['verbosity'] = 0
param['learning_rate'] = 0.1 param['learning_rate'] = 0.1
param['rate_drop'] = 0.1 param['rate_drop'] = 0.1
preds_list = [] preds_list = []
@ -214,8 +189,7 @@ class TestModels:
assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'} assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}
def test_fpreproc(self): def test_fpreproc(self):
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
'objective': 'binary:logistic'}
num_round = 2 num_round = 2
def fpreproc(dtrain, dtest, param): def fpreproc(dtrain, dtest, param):
@ -229,8 +203,7 @@ class TestModels:
metrics={'auc'}, seed=0, fpreproc=fpreproc) metrics={'auc'}, seed=0, fpreproc=fpreproc)
def test_show_stdv(self): def test_show_stdv(self):
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
'objective': 'binary:logistic'}
num_round = 2 num_round = 2
dtrain, _ = tm.load_agaricus(__file__) dtrain, _ = tm.load_agaricus(__file__)
xgb.cv(param, dtrain, num_round, nfold=5, xgb.cv(param, dtrain, num_round, nfold=5,
@ -273,142 +246,6 @@ class TestModels:
bst = xgb.train([], dm2) bst = xgb.train([], dm2)
bst.predict(dm2) # success bst.predict(dm2) # success
def test_model_binary_io(self):
model_path = 'test_model_binary_io.bin'
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
'scale_pos_weight': '0.5'}
X = np.random.random((10, 3))
y = np.random.random((10,))
dtrain = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dtrain, num_boost_round=2)
bst.save_model(model_path)
bst = xgb.Booster(model_file=model_path)
os.remove(model_path)
config = json.loads(bst.save_config())
assert float(config['learner']['objective'][
'reg_loss_param']['scale_pos_weight']) == 0.5
buf = bst.save_raw()
from_raw = xgb.Booster()
from_raw.load_model(buf)
buf_from_raw = from_raw.save_raw()
assert buf == buf_from_raw
def run_model_json_io(self, parameters: dict, ext: str) -> None:
if ext == "ubj" and tm.no_ubjson()["condition"]:
pytest.skip(tm.no_ubjson()["reason"])
loc = locale.getpreferredencoding(False)
model_path = 'test_model_json_io.' + ext
j_model = json_model(model_path, parameters)
assert isinstance(j_model['learner'], dict)
bst = xgb.Booster(model_file=model_path)
bst.save_model(fname=model_path)
if ext == "ubj":
import ubjson
with open(model_path, "rb") as ubjfd:
j_model = ubjson.load(ubjfd)
else:
with open(model_path, 'r') as fd:
j_model = json.load(fd)
assert isinstance(j_model['learner'], dict)
os.remove(model_path)
assert locale.getpreferredencoding(False) == loc
json_raw = bst.save_raw(raw_format="json")
from_jraw = xgb.Booster()
from_jraw.load_model(json_raw)
ubj_raw = bst.save_raw(raw_format="ubj")
from_ubjraw = xgb.Booster()
from_ubjraw.load_model(ubj_raw)
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
raw_json = bst.save_raw(raw_format="json")
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
bst.load_model(bytearray(pretty, encoding="ascii"))
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
rng = np.random.default_rng()
X = rng.random(size=from_jraw.num_features() * 10).reshape(
(10, from_jraw.num_features())
)
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
predt_from_bst = bst.predict(xgb.DMatrix(X))
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
@pytest.mark.parametrize("ext", ["json", "ubj"])
def test_model_json_io(self, ext: str) -> None:
parameters = {"booster": "gbtree", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
parameters = {
"booster": "gbtree",
"tree_method": "hist",
"multi_strategy": "multi_output_tree",
"objective": "multi:softmax",
}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "gblinear"}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "dart", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
@pytest.mark.skipif(**tm.no_json_schema())
def test_json_io_schema(self):
import jsonschema
model_path = 'test_json_schema.json'
path = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
doc = os.path.join(path, 'doc', 'model.schema')
with open(doc, 'r') as fd:
schema = json.load(fd)
parameters = {'tree_method': 'hist', 'booster': 'gbtree'}
jsonschema.validate(instance=json_model(model_path, parameters),
schema=schema)
os.remove(model_path)
parameters = {'tree_method': 'hist', 'booster': 'dart'}
jsonschema.validate(instance=json_model(model_path, parameters),
schema=schema)
os.remove(model_path)
try:
dtrain, _ = tm.load_agaricus(__file__)
xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
except ValueError as e:
e_str = str(e)
beg = e_str.find('Objective candidate')
end = e_str.find('Stack trace')
e_str = e_str[beg: end]
e_str = e_str.strip()
splited = e_str.splitlines()
objectives = [s.split(': ')[1] for s in splited]
j_objectives = schema['properties']['learner']['properties'][
'objective']['oneOf']
objectives_from_schema = set()
for j_obj in j_objectives:
objectives_from_schema.add(
j_obj['properties']['name']['const'])
objectives = set(objectives)
assert objectives == objectives_from_schema
@pytest.mark.skipif(**tm.no_json_schema()) @pytest.mark.skipif(**tm.no_json_schema())
def test_json_dump_schema(self): def test_json_dump_schema(self):
import jsonschema import jsonschema
@ -470,29 +307,6 @@ class TestModels:
for d in text_dump: for d in text_dump:
assert d.find(r"feature \"2\"") != -1 assert d.find(r"feature \"2\"") != -1
def test_categorical_model_io(self):
X, y = tm.make_categorical(256, 16, 71, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
predt_0 = booster.predict(Xy)
with tempfile.TemporaryDirectory() as tempdir:
path = os.path.join(tempdir, "model.binary")
with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
booster.save_model(path)
path = os.path.join(tempdir, "model.json")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
path = os.path.join(tempdir, "model.ubj")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_attributes(self): def test_attributes(self):
from sklearn.datasets import load_iris from sklearn.datasets import load_iris

View File

@ -278,14 +278,18 @@ class TestCallbacks:
dtrain, dtest = tm.load_agaricus(__file__) dtrain, dtest = tm.load_agaricus(__file__)
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 4 num_round = 4
# learning_rates as a list # learning_rates as a list
# init eta with 0 to check whether learning_rates work # init eta with 0 to check whether learning_rates work
param = {'max_depth': 2, 'eta': 0, 'verbosity': 0, param = {
'objective': 'binary:logistic', 'eval_metric': 'error', "max_depth": 2,
'tree_method': tree_method} "eta": 0,
"objective": "binary:logistic",
"eval_metric": "error",
"tree_method": tree_method,
}
evals_result = {} evals_result = {}
bst = xgb.train( bst = xgb.train(
param, param,
@ -295,15 +299,19 @@ class TestCallbacks:
callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])], callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
evals_result=evals_result, evals_result=evals_result,
) )
eval_errors_0 = list(map(float, evals_result['eval']['error'])) eval_errors_0 = list(map(float, evals_result["eval"]["error"]))
assert isinstance(bst, xgb.core.Booster) assert isinstance(bst, xgb.core.Booster)
# validation error should decrease, if eta > 0 # validation error should decrease, if eta > 0
assert eval_errors_0[0] > eval_errors_0[-1] assert eval_errors_0[0] > eval_errors_0[-1]
# init learning_rate with 0 to check whether learning_rates work # init learning_rate with 0 to check whether learning_rates work
param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0, param = {
'objective': 'binary:logistic', 'eval_metric': 'error', "max_depth": 2,
'tree_method': tree_method} "learning_rate": 0,
"objective": "binary:logistic",
"eval_metric": "error",
"tree_method": tree_method,
}
evals_result = {} evals_result = {}
bst = xgb.train( bst = xgb.train(
@ -314,15 +322,17 @@ class TestCallbacks:
callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])], callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
evals_result=evals_result, evals_result=evals_result,
) )
eval_errors_1 = list(map(float, evals_result['eval']['error'])) eval_errors_1 = list(map(float, evals_result["eval"]["error"]))
assert isinstance(bst, xgb.core.Booster) assert isinstance(bst, xgb.core.Booster)
# validation error should decrease, if learning_rate > 0 # validation error should decrease, if learning_rate > 0
assert eval_errors_1[0] > eval_errors_1[-1] assert eval_errors_1[0] > eval_errors_1[-1]
# check if learning_rates override default value of eta/learning_rate # check if learning_rates override default value of eta/learning_rate
param = { param = {
'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic', "max_depth": 2,
'eval_metric': 'error', 'tree_method': tree_method "objective": "binary:logistic",
"eval_metric": "error",
"tree_method": tree_method,
} }
evals_result = {} evals_result = {}
bst = xgb.train( bst = xgb.train(

View File

@ -12,6 +12,7 @@ def test_global_config_verbosity(verbosity_level):
return xgb.get_config()["verbosity"] return xgb.get_config()["verbosity"]
old_verbosity = get_current_verbosity() old_verbosity = get_current_verbosity()
assert old_verbosity == 1
with xgb.config_context(verbosity=verbosity_level): with xgb.config_context(verbosity=verbosity_level):
new_verbosity = get_current_verbosity() new_verbosity = get_current_verbosity()
assert new_verbosity == verbosity_level assert new_verbosity == verbosity_level
@ -30,7 +31,10 @@ def test_global_config_use_rmm(use_rmm):
assert old_use_rmm_flag == get_current_use_rmm_flag() assert old_use_rmm_flag == get_current_use_rmm_flag()
def test_nested_config(): def test_nested_config() -> None:
verbosity = xgb.get_config()["verbosity"]
assert verbosity == 1
with xgb.config_context(verbosity=3): with xgb.config_context(verbosity=3):
assert xgb.get_config()["verbosity"] == 3 assert xgb.get_config()["verbosity"] == 3
with xgb.config_context(verbosity=2): with xgb.config_context(verbosity=2):
@ -45,13 +49,15 @@ def test_nested_config():
with xgb.config_context(verbosity=None): with xgb.config_context(verbosity=None):
assert xgb.get_config()["verbosity"] == 3 # None has no effect assert xgb.get_config()["verbosity"] == 3 # None has no effect
verbosity = xgb.get_config()["verbosity"]
xgb.set_config(verbosity=2) xgb.set_config(verbosity=2)
assert xgb.get_config()["verbosity"] == 2 assert xgb.get_config()["verbosity"] == 2
with xgb.config_context(verbosity=3): with xgb.config_context(verbosity=3):
assert xgb.get_config()["verbosity"] == 3 assert xgb.get_config()["verbosity"] == 3
xgb.set_config(verbosity=verbosity) # reset xgb.set_config(verbosity=verbosity) # reset
verbosity = xgb.get_config()["verbosity"]
assert verbosity == 1
def test_thread_safty(): def test_thread_safty():
n_threads = multiprocessing.cpu_count() n_threads = multiprocessing.cpu_count()

View File

@ -1,6 +1,7 @@
import csv import csv
import os import os
import tempfile import tempfile
import warnings
import numpy as np import numpy as np
import pytest import pytest
@ -24,20 +25,18 @@ class TestDMatrix:
with pytest.warns(UserWarning): with pytest.warns(UserWarning):
data._warn_unused_missing("uri", 4) data._warn_unused_missing("uri", 4)
with pytest.warns(None) as record: with warnings.catch_warnings():
warnings.simplefilter("error")
data._warn_unused_missing("uri", None) data._warn_unused_missing("uri", None)
data._warn_unused_missing("uri", np.nan) data._warn_unused_missing("uri", np.nan)
assert len(record) == 0 with warnings.catch_warnings():
warnings.simplefilter("error")
with pytest.warns(None) as record:
x = rng.randn(10, 10) x = rng.randn(10, 10)
y = rng.randn(10) y = rng.randn(10)
xgb.DMatrix(x, y, missing=4) xgb.DMatrix(x, y, missing=4)
assert len(record) == 0
def test_dmatrix_numpy_init(self): def test_dmatrix_numpy_init(self):
data = np.random.randn(5, 5) data = np.random.randn(5, 5)
dm = xgb.DMatrix(data) dm = xgb.DMatrix(data)
@ -264,7 +263,7 @@ class TestDMatrix:
dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow)) dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol) assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
watchlist = [(dtrain, "train")] watchlist = [(dtrain, "train")]
param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0} param = {"max_depth": 3, "objective": "binary:logistic"}
bst = xgb.train(param, dtrain, 5, watchlist) bst = xgb.train(param, dtrain, 5, watchlist)
bst.predict(dtrain) bst.predict(dtrain)
@ -302,7 +301,7 @@ class TestDMatrix:
dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow)) dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow))
assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol) assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
watchlist = [(dtrain, "train")] watchlist = [(dtrain, "train")]
param = {"max_depth": 3, "objective": "binary:logistic", "verbosity": 0} param = {"max_depth": 3, "objective": "binary:logistic"}
bst = xgb.train(param, dtrain, 5, watchlist) bst = xgb.train(param, dtrain, 5, watchlist)
bst.predict(dtrain) bst.predict(dtrain)
@ -475,8 +474,10 @@ class TestDMatrixColumnSplit:
def test_uri(self): def test_uri(self):
def verify_uri(): def verify_uri():
rank = xgb.collective.get_rank() rank = xgb.collective.get_rank()
with tempfile.TemporaryDirectory() as tmpdir:
filename = os.path.join(tmpdir, f"test_data_{rank}.csv")
data = np.random.rand(5, 5) data = np.random.rand(5, 5)
filename = f"test_data_{rank}.csv"
with open(filename, mode="w", newline="") as file: with open(filename, mode="w", newline="") as file:
writer = csv.writer(file) writer = csv.writer(file)
for row in data: for row in data:

View File

@ -67,8 +67,10 @@ class TestEarlyStopping:
X = digits['data'] X = digits['data']
y = digits['target'] y = digits['target']
dm = xgb.DMatrix(X, label=y) dm = xgb.DMatrix(X, label=y)
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, params = {
'objective': 'binary:logistic', 'eval_metric': 'error'} 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic',
'eval_metric': 'error'
}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
early_stopping_rounds=10) early_stopping_rounds=10)

View File

@ -9,29 +9,13 @@ rng = np.random.RandomState(1337)
class TestEvalMetrics: class TestEvalMetrics:
xgb_params_01 = { xgb_params_01 = {'nthread': 1, 'eval_metric': 'error'}
'verbosity': 0,
'nthread': 1,
'eval_metric': 'error'
}
xgb_params_02 = { xgb_params_02 = {'nthread': 1, 'eval_metric': ['error']}
'verbosity': 0,
'nthread': 1,
'eval_metric': ['error']
}
xgb_params_03 = { xgb_params_03 = {'nthread': 1, 'eval_metric': ['rmse', 'error']}
'verbosity': 0,
'nthread': 1,
'eval_metric': ['rmse', 'error']
}
xgb_params_04 = { xgb_params_04 = {'nthread': 1, 'eval_metric': ['error', 'rmse']}
'verbosity': 0,
'nthread': 1,
'eval_metric': ['error', 'rmse']
}
def evalerror_01(self, preds, dtrain): def evalerror_01(self, preds, dtrain):
labels = dtrain.get_label() labels = dtrain.get_label()

View File

@ -22,8 +22,14 @@ coord_strategy = strategies.fixed_dictionaries({
def train_result(param, dmat, num_rounds): def train_result(param, dmat, num_rounds):
result = {} result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False, xgb.train(
evals_result=result) param,
dmat,
num_rounds,
evals=[(dmat, "train")],
verbose_eval=False,
evals_result=result,
)
return result return result

View File

@ -0,0 +1,406 @@
import json
import locale
import os
import pickle
import tempfile
from pathlib import Path
from typing import List
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
def json_model(model_path: str, parameters: dict) -> dict:
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.make_classification(64, n_features=8, n_classes=3, n_informative=6)
if parameters.get("objective", None) == "multi:softmax":
parameters["num_class"] = 3
dm1 = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dm1)
bst.save_model(model_path)
if model_path.endswith("ubj"):
import ubjson
with open(model_path, "rb") as ubjfd:
model = ubjson.load(ubjfd)
else:
with open(model_path, "r") as fd:
model = json.load(fd)
return model
class TestBoosterIO:
def run_model_json_io(self, parameters: dict, ext: str) -> None:
config = xgb.config.get_config()
assert config["verbosity"] == 1
if ext == "ubj" and tm.no_ubjson()["condition"]:
pytest.skip(tm.no_ubjson()["reason"])
loc = locale.getpreferredencoding(False)
model_path = "test_model_json_io." + ext
j_model = json_model(model_path, parameters)
assert isinstance(j_model["learner"], dict)
bst = xgb.Booster(model_file=model_path)
bst.save_model(fname=model_path)
if ext == "ubj":
import ubjson
with open(model_path, "rb") as ubjfd:
j_model = ubjson.load(ubjfd)
else:
with open(model_path, "r") as fd:
j_model = json.load(fd)
assert isinstance(j_model["learner"], dict)
os.remove(model_path)
assert locale.getpreferredencoding(False) == loc
json_raw = bst.save_raw(raw_format="json")
from_jraw = xgb.Booster()
from_jraw.load_model(json_raw)
ubj_raw = bst.save_raw(raw_format="ubj")
from_ubjraw = xgb.Booster()
from_ubjraw.load_model(ubj_raw)
if parameters.get("multi_strategy", None) != "multi_output_tree":
# Old binary model is not supported for vector leaf.
with pytest.warns(Warning, match="Model format is default to UBJSON"):
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
raw_json = bst.save_raw(raw_format="json")
pretty = json.dumps(json.loads(raw_json), indent=2) + "\n\n"
bst.load_model(bytearray(pretty, encoding="ascii"))
if parameters.get("multi_strategy", None) != "multi_output_tree":
# old binary model is not supported.
with pytest.warns(Warning, match="Model format is default to UBJSON"):
old_from_json = from_jraw.save_raw(raw_format="deprecated")
old_from_ubj = from_ubjraw.save_raw(raw_format="deprecated")
assert old_from_json == old_from_ubj
rng = np.random.default_rng()
X = rng.random(size=from_jraw.num_features() * 10).reshape(
(10, from_jraw.num_features())
)
predt_from_jraw = from_jraw.predict(xgb.DMatrix(X))
predt_from_bst = bst.predict(xgb.DMatrix(X))
np.testing.assert_allclose(predt_from_jraw, predt_from_bst)
@pytest.mark.parametrize("ext", ["json", "ubj"])
def test_model_json_io(self, ext: str) -> None:
parameters = {"booster": "gbtree", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
parameters = {
"booster": "gbtree",
"tree_method": "hist",
"multi_strategy": "multi_output_tree",
"objective": "multi:softmax",
}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "gblinear"}
self.run_model_json_io(parameters, ext)
parameters = {"booster": "dart", "tree_method": "hist"}
self.run_model_json_io(parameters, ext)
def test_categorical_model_io(self) -> None:
X, y = tm.make_categorical(256, 16, 71, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "approx"}, Xy, num_boost_round=16)
predt_0 = booster.predict(Xy)
with tempfile.TemporaryDirectory() as tempdir:
path = os.path.join(tempdir, "model.deprecated")
with pytest.raises(ValueError, match=r".*JSON/UBJSON.*"):
with pytest.warns(Warning, match="Model format is default to UBJSON"):
booster.save_model(path)
path = os.path.join(tempdir, "model.json")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
path = os.path.join(tempdir, "model.ubj")
booster.save_model(path)
booster = xgb.Booster(model_file=path)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)
@pytest.mark.skipif(**tm.no_json_schema())
def test_json_io_schema(self) -> None:
import jsonschema
model_path = "test_json_schema.json"
path = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
doc = os.path.join(path, "doc", "model.schema")
with open(doc, "r") as fd:
schema = json.load(fd)
parameters = {"tree_method": "hist", "booster": "gbtree"}
jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
os.remove(model_path)
parameters = {"tree_method": "hist", "booster": "dart"}
jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
os.remove(model_path)
try:
dtrain, _ = tm.load_agaricus(__file__)
xgb.train({"objective": "foo"}, dtrain, num_boost_round=1)
except ValueError as e:
e_str = str(e)
beg = e_str.find("Objective candidate")
end = e_str.find("Stack trace")
e_str = e_str[beg:end]
e_str = e_str.strip()
splited = e_str.splitlines()
objectives = [s.split(": ")[1] for s in splited]
j_objectives = schema["properties"]["learner"]["properties"]["objective"][
"oneOf"
]
objectives_from_schema = set()
for j_obj in j_objectives:
objectives_from_schema.add(j_obj["properties"]["name"]["const"])
assert set(objectives) == objectives_from_schema
def test_model_binary_io(self) -> None:
model_path = "test_model_binary_io.deprecated"
parameters = {
"tree_method": "hist",
"booster": "gbtree",
"scale_pos_weight": "0.5",
}
X = np.random.random((10, 3))
y = np.random.random((10,))
dtrain = xgb.DMatrix(X, y)
bst = xgb.train(parameters, dtrain, num_boost_round=2)
with pytest.warns(Warning, match="Model format is default to UBJSON"):
bst.save_model(model_path)
bst = xgb.Booster(model_file=model_path)
os.remove(model_path)
config = json.loads(bst.save_config())
assert (
float(config["learner"]["objective"]["reg_loss_param"]["scale_pos_weight"])
== 0.5
)
buf = bst.save_raw()
from_raw = xgb.Booster()
from_raw.load_model(buf)
buf_from_raw = from_raw.save_raw()
assert buf == buf_from_raw
def test_with_pathlib(self) -> None:
"""Saving and loading model files from paths."""
save_path = Path("model.ubj")
rng = np.random.default_rng(1994)
data = rng.normal(size=(100, 2))
target = np.array([0, 1] * 50)
features = ["Feature1", "Feature2"]
dm = xgb.DMatrix(data, label=target, feature_names=features)
params = {
"objective": "binary:logistic",
"eval_metric": "logloss",
"eta": 0.3,
"max_depth": 1,
}
bst = xgb.train(params, dm, num_boost_round=1)
# save, assert exists
bst.save_model(save_path)
assert save_path.exists()
def dump_assertions(dump: List[str]) -> None:
"""Assertions for the expected dump from Booster"""
assert len(dump) == 1, "Exepcted only 1 tree to be dumped."
assert (
len(dump[0].splitlines()) == 3
), "Expected 1 root and 2 leaves - 3 lines."
# load the model again using Path
bst2 = xgb.Booster(model_file=save_path)
dump2 = bst2.get_dump()
dump_assertions(dump2)
# load again using load_model
bst3 = xgb.Booster()
bst3.load_model(save_path)
dump3 = bst3.get_dump()
dump_assertions(dump3)
# remove file
Path.unlink(save_path)
def save_load_model(model_path: str) -> None:
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
rng = np.random.RandomState(1994)
digits = load_digits(n_class=2)
y = digits["target"]
X = digits["data"]
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
xgb_model.save_model(model_path)
xgb_model = xgb.XGBClassifier()
xgb_model.load_model(model_path)
assert isinstance(xgb_model.classes_, np.ndarray)
np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
assert isinstance(xgb_model._Booster, xgb.Booster)
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
) / float(len(preds))
assert err < 0.1
assert xgb_model.get_booster().attr("scikit_learn") is None
# test native booster
preds = xgb_model.predict(X[test_index], output_margin=True)
booster = xgb.Booster(model_file=model_path)
predt_1 = booster.predict(xgb.DMatrix(X[test_index]), output_margin=True)
assert np.allclose(preds, predt_1)
with pytest.raises(TypeError):
xgb_model = xgb.XGBModel()
xgb_model.load_model(model_path)
clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
clf.fit(X, y, eval_set=[(X, y)])
best_iteration = clf.best_iteration
best_score = clf.best_score
predt_0 = clf.predict(X)
clf.save_model(model_path)
clf.load_model(model_path)
assert clf.booster == "gblinear"
predt_1 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_1)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
clfpkl = pickle.dumps(clf)
clf = pickle.loads(clfpkl)
predt_2 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_2)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
@pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn_model() -> None:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.deprecated")
with pytest.warns(Warning, match="Model format is default to UBJSON"):
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.json")
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.ubj")
digits = load_digits(n_class=2)
y = digits["target"]
X = digits["data"]
booster = xgb.train(
{"tree_method": "hist", "objective": "binary:logistic"},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4,
)
predt_0 = booster.predict(xgb.DMatrix(X))
booster.save_model(model_path)
cls = xgb.XGBClassifier()
cls.load_model(model_path)
proba = cls.predict_proba(X)
assert proba.shape[0] == X.shape[0]
assert proba.shape[1] == 2 # binary
predt_1 = cls.predict_proba(X)[:, 1]
assert np.allclose(predt_0, predt_1)
cls = xgb.XGBModel()
cls.load_model(model_path)
predt_1 = cls.predict(X)
assert np.allclose(predt_0, predt_1)
# mclass
X, y = load_digits(n_class=10, return_X_y=True)
# small test_size to force early stop
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.01, random_state=1
)
clf = xgb.XGBClassifier(
n_estimators=64, tree_method="hist", early_stopping_rounds=2
)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
score = clf.best_score
clf.save_model(model_path)
clf = xgb.XGBClassifier()
clf.load_model(model_path)
assert clf.classes_.size == 10
assert clf.objective == "multi:softprob"
np.testing.assert_equal(clf.classes_, np.arange(10))
assert clf.n_classes_ == 10
assert clf.best_iteration == 27
assert clf.best_score == score
@pytest.mark.skipif(**tm.no_sklearn())
def test_with_sklearn_obj_metric() -> None:
from sklearn.metrics import mean_squared_error
X, y = tm.datasets.make_regression()
reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
reg.fit(X, y)
pkl = pickle.dumps(reg)
reg_1 = pickle.loads(pkl)
assert callable(reg_1.objective)
assert callable(reg_1.eval_metric)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.json")
reg.save_model(path)
reg_2 = xgb.XGBRegressor()
reg_2.load_model(path)
assert not callable(reg_2.objective)
assert not callable(reg_2.eval_metric)
assert reg_2.eval_metric is None

View File

@ -1,13 +1,10 @@
import json import json
import os import os
import pickle import pickle
import tempfile
import numpy as np import numpy as np
import pytest
import xgboost as xgb import xgboost as xgb
from xgboost import testing as tm
kRows = 100 kRows = 100
kCols = 10 kCols = 10
@ -64,27 +61,3 @@ class TestPickling:
params = {"nthread": 8, "tree_method": "exact", "subsample": 0.5} params = {"nthread": 8, "tree_method": "exact", "subsample": 0.5}
config = self.run_model_pickling(params) config = self.run_model_pickling(params)
check(config) check(config)
@pytest.mark.skipif(**tm.no_sklearn())
def test_with_sklearn_obj_metric(self) -> None:
from sklearn.metrics import mean_squared_error
X, y = tm.datasets.make_regression()
reg = xgb.XGBRegressor(objective=tm.ls_obj, eval_metric=mean_squared_error)
reg.fit(X, y)
pkl = pickle.dumps(reg)
reg_1 = pickle.loads(pkl)
assert callable(reg_1.objective)
assert callable(reg_1.eval_metric)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.json")
reg.save_model(path)
reg_2 = xgb.XGBRegressor()
reg_2.load_model(path)
assert not callable(reg_2.objective)
assert not callable(reg_2.eval_metric)
assert reg_2.eval_metric is None

View File

@ -49,7 +49,7 @@ class TestSHAP:
def fn(max_depth: int, num_rounds: int) -> None: def fn(max_depth: int, num_rounds: int) -> None:
# train # train
params = {"max_depth": max_depth, "eta": 1, "verbosity": 0} params = {"max_depth": max_depth, "eta": 1}
bst = xgb.train(params, dtrain, num_boost_round=num_rounds) bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
# predict # predict

View File

@ -117,7 +117,6 @@ class TestTreeMethod:
ag_param = {'max_depth': 2, ag_param = {'max_depth': 2,
'tree_method': 'hist', 'tree_method': 'hist',
'eta': 1, 'eta': 1,
'verbosity': 0,
'objective': 'binary:logistic', 'objective': 'binary:logistic',
'eval_metric': 'auc'} 'eval_metric': 'auc'}
hist_res = {} hist_res = {}
@ -340,6 +339,7 @@ class TestTreeMethod:
assert get_score(config_0) == get_score(config_1) assert get_score(config_0) == get_score(config_1)
with pytest.warns(Warning, match="Model format is default to UBJSON"):
raw_booster = booster_1.save_raw(raw_format="deprecated") raw_booster = booster_1.save_raw(raw_format="deprecated")
booster_2 = xgb.Booster(model_file=raw_booster) booster_2 = xgb.Booster(model_file=raw_booster)
config_2 = json.loads(booster_2.save_config()) config_2 = json.loads(booster_2.save_config())

View File

@ -341,7 +341,6 @@ class TestPandas:
params = { params = {
"max_depth": 2, "max_depth": 2,
"eta": 1, "eta": 1,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
"eval_metric": "error", "eval_metric": "error",
} }
@ -372,7 +371,6 @@ class TestPandas:
params = { params = {
"max_depth": 2, "max_depth": 2,
"eta": 1, "eta": 1,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
"eval_metric": "auc", "eval_metric": "auc",
} }
@ -383,7 +381,6 @@ class TestPandas:
params = { params = {
"max_depth": 2, "max_depth": 2,
"eta": 1, "eta": 1,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
"eval_metric": ["auc"], "eval_metric": ["auc"],
} }
@ -394,7 +391,6 @@ class TestPandas:
params = { params = {
"max_depth": 2, "max_depth": 2,
"eta": 1, "eta": 1,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
"eval_metric": ["auc"], "eval_metric": ["auc"],
} }
@ -413,7 +409,6 @@ class TestPandas:
params = { params = {
"max_depth": 2, "max_depth": 2,
"eta": 1, "eta": 1,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
} }
cv = xgb.cv( cv = xgb.cv(
@ -424,7 +419,6 @@ class TestPandas:
params = { params = {
"max_depth": 2, "max_depth": 2,
"eta": 1, "eta": 1,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
} }
cv = xgb.cv( cv = xgb.cv(
@ -435,7 +429,6 @@ class TestPandas:
params = { params = {
"max_depth": 2, "max_depth": 2,
"eta": 1, "eta": 1,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
"eval_metric": ["auc"], "eval_metric": ["auc"],
} }

View File

@ -678,7 +678,6 @@ def test_split_value_histograms():
params = { params = {
"max_depth": 6, "max_depth": 6,
"eta": 0.01, "eta": 0.01,
"verbosity": 0,
"objective": "binary:logistic", "objective": "binary:logistic",
"base_score": 0.5, "base_score": 0.5,
} }
@ -897,128 +896,6 @@ def test_validation_weights():
run_validation_weights(xgb.XGBClassifier) run_validation_weights(xgb.XGBClassifier)
def save_load_model(model_path):
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
xgb_model.save_model(model_path)
xgb_model = xgb.XGBClassifier()
xgb_model.load_model(model_path)
assert isinstance(xgb_model.classes_, np.ndarray)
np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
assert isinstance(xgb_model._Booster, xgb.Booster)
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.1
assert xgb_model.get_booster().attr('scikit_learn') is None
# test native booster
preds = xgb_model.predict(X[test_index], output_margin=True)
booster = xgb.Booster(model_file=model_path)
predt_1 = booster.predict(xgb.DMatrix(X[test_index]),
output_margin=True)
assert np.allclose(preds, predt_1)
with pytest.raises(TypeError):
xgb_model = xgb.XGBModel()
xgb_model.load_model(model_path)
clf = xgb.XGBClassifier(booster="gblinear", early_stopping_rounds=1)
clf.fit(X, y, eval_set=[(X, y)])
best_iteration = clf.best_iteration
best_score = clf.best_score
predt_0 = clf.predict(X)
clf.save_model(model_path)
clf.load_model(model_path)
assert clf.booster == "gblinear"
predt_1 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_1)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
clfpkl = pickle.dumps(clf)
clf = pickle.loads(clfpkl)
predt_2 = clf.predict(X)
np.testing.assert_allclose(predt_0, predt_2)
assert clf.best_iteration == best_iteration
assert clf.best_score == best_score
def test_save_load_model():
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model")
save_load_model(model_path)
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.json")
save_load_model(model_path)
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
with tempfile.TemporaryDirectory() as tempdir:
model_path = os.path.join(tempdir, "digits.model.ubj")
digits = load_digits(n_class=2)
y = digits["target"]
X = digits["data"]
booster = xgb.train(
{"tree_method": "hist", "objective": "binary:logistic"},
dtrain=xgb.DMatrix(X, y),
num_boost_round=4,
)
predt_0 = booster.predict(xgb.DMatrix(X))
booster.save_model(model_path)
cls = xgb.XGBClassifier()
cls.load_model(model_path)
proba = cls.predict_proba(X)
assert proba.shape[0] == X.shape[0]
assert proba.shape[1] == 2 # binary
predt_1 = cls.predict_proba(X)[:, 1]
assert np.allclose(predt_0, predt_1)
cls = xgb.XGBModel()
cls.load_model(model_path)
predt_1 = cls.predict(X)
assert np.allclose(predt_0, predt_1)
# mclass
X, y = load_digits(n_class=10, return_X_y=True)
# small test_size to force early stop
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.01, random_state=1
)
clf = xgb.XGBClassifier(
n_estimators=64, tree_method="hist", early_stopping_rounds=2
)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
score = clf.best_score
clf.save_model(model_path)
clf = xgb.XGBClassifier()
clf.load_model(model_path)
assert clf.classes_.size == 10
assert clf.objective == "multi:softprob"
np.testing.assert_equal(clf.classes_, np.arange(10))
assert clf.n_classes_ == 10
assert clf.best_iteration == 27
assert clf.best_score == score
def test_RFECV(): def test_RFECV():
from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
from sklearn.feature_selection import RFECV from sklearn.feature_selection import RFECV