[Breaking] Require format to be specified in input URI. (#9077)
Previously, we use `libsvm` as default when format is not specified. However, the dmlc data parser is not particularly robust against errors, and the most common type of error is undefined format. Along with which, we will recommend users to use other data loader instead. We will continue the maintenance of the parsers as it's currently used for many internal tests including federated learning.
This commit is contained in:
@@ -88,7 +88,8 @@ inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
|
||||
fo << row_data.str() << "\n";
|
||||
}
|
||||
fo.close();
|
||||
return std::shared_ptr<DMatrix>(DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"));
|
||||
return std::shared_ptr<DMatrix>(
|
||||
DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"));
|
||||
}
|
||||
|
||||
// Test that elements are approximately equally distributed among bins
|
||||
|
||||
@@ -29,16 +29,16 @@ TEST(FileIterator, Basic) {
|
||||
{
|
||||
auto zpath = tmpdir.path + "/0-based.svm";
|
||||
CreateBigTestData(zpath, 3 * 64, true);
|
||||
zpath += "?indexing_mode=0";
|
||||
FileIterator iter{zpath, 0, 1, "libsvm"};
|
||||
zpath += "?indexing_mode=0&format=libsvm";
|
||||
FileIterator iter{zpath, 0, 1};
|
||||
check_n_features(&iter);
|
||||
}
|
||||
|
||||
{
|
||||
auto opath = tmpdir.path + "/1-based.svm";
|
||||
CreateBigTestData(opath, 3 * 64, false);
|
||||
opath += "?indexing_mode=1";
|
||||
FileIterator iter{opath, 0, 1, "libsvm"};
|
||||
opath += "?indexing_mode=1&format=libsvm";
|
||||
FileIterator iter{opath, 0, 1};
|
||||
check_n_features(&iter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,8 +157,7 @@ TEST(MetaInfo, LoadQid) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
std::string tmp_file = tempdir.path + "/qid_test.libsvm";
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fs(
|
||||
dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
dmlc::ostream os(fs.get());
|
||||
os << R"qid(3 qid:1 1:1 2:1 3:0 4:0.2 5:0
|
||||
2 qid:1 1:0 2:0 3:1 4:0.1 5:1
|
||||
@@ -175,7 +174,7 @@ TEST(MetaInfo, LoadQid) {
|
||||
os.set_stream(nullptr);
|
||||
}
|
||||
std::unique_ptr<xgboost::DMatrix> dmat(
|
||||
xgboost::DMatrix::Load(tmp_file, true, xgboost::DataSplitMode::kRow, "libsvm"));
|
||||
xgboost::DMatrix::Load(tmp_file + "?format=libsvm", true, xgboost::DataSplitMode::kRow));
|
||||
|
||||
const xgboost::MetaInfo& info = dmat->Info();
|
||||
const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};
|
||||
|
||||
@@ -17,11 +17,15 @@
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
|
||||
namespace {
|
||||
std::string UriSVM(std::string name) { return name + "?format=libsvm"; }
|
||||
} // namespace
|
||||
|
||||
TEST(SimpleDMatrix, MetaInfo) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
|
||||
// Test the metadata that was parsed
|
||||
EXPECT_EQ(dmat->Info().num_row_, 2);
|
||||
@@ -37,7 +41,7 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file, false);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file), false);
|
||||
|
||||
// Loop over the batches and count the records
|
||||
int64_t row_count = 0;
|
||||
@@ -60,7 +64,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
|
||||
ASSERT_TRUE(dmat->SingleColBlock());
|
||||
|
||||
@@ -387,7 +391,7 @@ TEST(SimpleDMatrix, SaveLoadBinary) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
|
||||
data::SimpleDMatrix *simple_dmat = dynamic_cast<data::SimpleDMatrix*>(dmat);
|
||||
|
||||
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
|
||||
|
||||
@@ -16,14 +16,19 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
using namespace xgboost; // NOLINT
|
||||
namespace {
|
||||
std::string UriSVM(std::string name, std::string cache) {
|
||||
return name + "?format=libsvm" + "#" + cache + ".cache";
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename Page>
|
||||
void TestSparseDMatrixLoadFile() {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
auto opath = tmpdir.path + "/1-based.svm";
|
||||
CreateBigTestData(opath, 3 * 64, false);
|
||||
opath += "?indexing_mode=1";
|
||||
data::FileIterator iter{opath, 0, 1, "libsvm"};
|
||||
opath += "?indexing_mode=1&format=libsvm";
|
||||
data::FileIterator iter{opath, 0, 1};
|
||||
auto n_threads = 0;
|
||||
data::SparsePageDMatrix m{&iter,
|
||||
iter.Proxy(),
|
||||
@@ -112,15 +117,13 @@ TEST(SparsePageDMatrix, MetaInfo) {
|
||||
size_t constexpr kEntries = 24;
|
||||
CreateBigTestData(tmp_file, kEntries);
|
||||
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache", false);
|
||||
std::unique_ptr<DMatrix> dmat{xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file), false)};
|
||||
|
||||
// Test the metadata that was parsed
|
||||
EXPECT_EQ(dmat->Info().num_row_, 8ul);
|
||||
EXPECT_EQ(dmat->Info().num_col_, 5ul);
|
||||
EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
|
||||
EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, RowAccess) {
|
||||
@@ -139,7 +142,7 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
|
||||
xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file));
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
size_t iter = 0;
|
||||
@@ -231,7 +234,7 @@ auto TestSparsePageDMatrixDeterminism(int32_t threads) {
|
||||
std::string filename = tempdir.path + "/simple.libsvm";
|
||||
CreateBigTestData(filename, 1 << 16);
|
||||
|
||||
data::FileIterator iter(filename, 0, 1, "auto");
|
||||
data::FileIterator iter(filename + "?format=libsvm", 0, 1);
|
||||
std::unique_ptr<DMatrix> sparse{
|
||||
new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), threads, filename}};
|
||||
|
||||
@@ -13,7 +13,7 @@ TEST(SparsePageDMatrix, EllpackPage) {
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
DMatrix* dmat = DMatrix::Load(tmp_file + "#" + tmp_file + ".cache");
|
||||
DMatrix* dmat = DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache");
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
size_t n = 0;
|
||||
|
||||
@@ -548,7 +548,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
|
||||
}
|
||||
fo.close();
|
||||
|
||||
std::string uri = tmp_file;
|
||||
std::string uri = tmp_file + "?format=libsvm";
|
||||
if (page_size > 0) {
|
||||
uri += "#" + tmp_file + ".cache";
|
||||
}
|
||||
|
||||
@@ -126,7 +126,8 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/big.libsvm";
|
||||
CreateBigTestData(tmp_file, 50000);
|
||||
std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load(tmp_file + "#" + tmp_file + ".cache"));
|
||||
std::shared_ptr<DMatrix> dmat(
|
||||
xgboost::DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"));
|
||||
EXPECT_FALSE(dmat->SingleColBlock());
|
||||
size_t num_row = dmat->Info().num_row_;
|
||||
std::vector<bst_float> labels(num_row);
|
||||
|
||||
@@ -21,8 +21,7 @@ class TestBasic:
|
||||
assert not lazy_isinstance(a, 'numpy', 'dataframe')
|
||||
|
||||
def test_basic(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': 2, 'eta': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
# specify validations set to watch performance
|
||||
@@ -61,8 +60,7 @@ class TestBasic:
|
||||
def test_metric_config(self):
|
||||
# Make sure that the metric configuration happens in booster so the
|
||||
# string `['error', 'auc']` doesn't get passed down to core.
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
@@ -78,8 +76,7 @@ class TestBasic:
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
|
||||
def test_multiclass(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
@@ -188,7 +185,7 @@ class TestBasic:
|
||||
assert dm.num_col() == cols
|
||||
|
||||
def test_cv(self):
|
||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
|
||||
@@ -198,7 +195,7 @@ class TestBasic:
|
||||
assert len(cv) == (4)
|
||||
|
||||
def test_cv_no_shuffle(self):
|
||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
|
||||
@@ -209,7 +206,7 @@ class TestBasic:
|
||||
assert len(cv) == (4)
|
||||
|
||||
def test_cv_explicit_fold_indices(self):
|
||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
|
||||
'binary:logistic'}
|
||||
folds = [
|
||||
@@ -268,8 +265,7 @@ class TestBasicPathLike:
|
||||
|
||||
def test_DMatrix_init_from_path(self):
|
||||
"""Initialization from the data path."""
|
||||
dpath = Path('demo/data')
|
||||
dtrain = xgb.DMatrix(dpath / 'agaricus.txt.train')
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
assert dtrain.num_row() == 6513
|
||||
assert dtrain.num_col() == 127
|
||||
|
||||
|
||||
@@ -42,8 +42,7 @@ class TestModels:
|
||||
param = {'verbosity': 0, 'objective': 'binary:logistic',
|
||||
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1,
|
||||
'nthread': 1}
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 4
|
||||
bst = xgb.train(param, dtrain, num_round, watchlist)
|
||||
@@ -55,8 +54,7 @@ class TestModels:
|
||||
assert err < 0.2
|
||||
|
||||
def test_dart(self):
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': 5, 'objective': 'binary:logistic',
|
||||
'eval_metric': 'logloss', 'booster': 'dart', 'verbosity': 1}
|
||||
# specify validations set to watch performance
|
||||
@@ -122,7 +120,7 @@ class TestModels:
|
||||
|
||||
def test_boost_from_prediction(self):
|
||||
# Re-construct dtrain here to avoid modification
|
||||
margined = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
margined, _ = tm.load_agaricus(__file__)
|
||||
bst = xgb.train({'tree_method': 'hist'}, margined, 1)
|
||||
predt_0 = bst.predict(margined, output_margin=True)
|
||||
margined.set_base_margin(predt_0)
|
||||
@@ -130,13 +128,13 @@ class TestModels:
|
||||
predt_1 = bst.predict(margined)
|
||||
|
||||
assert np.any(np.abs(predt_1 - predt_0) > 1e-6)
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
bst = xgb.train({'tree_method': 'hist'}, dtrain, 2)
|
||||
predt_2 = bst.predict(dtrain)
|
||||
assert np.all(np.abs(predt_2 - predt_1) < 1e-6)
|
||||
|
||||
def test_boost_from_existing_model(self):
|
||||
X = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
X, _ = tm.load_agaricus(__file__)
|
||||
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4)
|
||||
assert booster.num_boosted_rounds() == 4
|
||||
booster = xgb.train({'tree_method': 'hist'}, X, num_boost_round=4,
|
||||
@@ -156,8 +154,7 @@ class TestModels:
|
||||
'objective': 'reg:logistic',
|
||||
"tree_method": tree_method
|
||||
}
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 10
|
||||
|
||||
@@ -203,8 +200,7 @@ class TestModels:
|
||||
self.run_custom_objective()
|
||||
|
||||
def test_multi_eval_metric(self):
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
|
||||
'objective': 'binary:logistic'}
|
||||
@@ -226,7 +222,7 @@ class TestModels:
|
||||
param['scale_pos_weight'] = ratio
|
||||
return (dtrain, dtest, param)
|
||||
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'auc'}, seed=0, fpreproc=fpreproc)
|
||||
|
||||
@@ -234,7 +230,7 @@ class TestModels:
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
num_round = 2
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'error'}, seed=0, show_stdv=False)
|
||||
|
||||
@@ -392,7 +388,7 @@ class TestModels:
|
||||
os.remove(model_path)
|
||||
|
||||
try:
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
xgb.train({'objective': 'foo'}, dtrain, num_boost_round=1)
|
||||
except ValueError as e:
|
||||
e_str = str(e)
|
||||
|
||||
@@ -275,9 +275,7 @@ class TestCallbacks:
|
||||
"""Test learning rate scheduler, used by both CPU and GPU tests."""
|
||||
scheduler = xgb.callback.LearningRateScheduler
|
||||
|
||||
dpath = tm.data_dir(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 4
|
||||
@@ -361,9 +359,7 @@ class TestCallbacks:
|
||||
num_round = 4
|
||||
scheduler = xgb.callback.LearningRateScheduler
|
||||
|
||||
dpath = tm.data_dir(__file__)
|
||||
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
|
||||
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
|
||||
param = {
|
||||
|
||||
@@ -283,7 +283,7 @@ class TestDMatrix:
|
||||
assert m0.feature_types == m1.feature_types
|
||||
|
||||
def test_get_info(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
dtrain.get_float_info('label')
|
||||
dtrain.get_float_info('weight')
|
||||
dtrain.get_float_info('base_margin')
|
||||
@@ -432,7 +432,9 @@ class TestDMatrix:
|
||||
def test_uri_categorical(self):
|
||||
path = os.path.join(dpath, 'agaricus.txt.train')
|
||||
feature_types = ["q"] * 5 + ["c"] + ["q"] * 120
|
||||
Xy = xgb.DMatrix(path + "?indexing_mode=1", feature_types=feature_types)
|
||||
Xy = xgb.DMatrix(
|
||||
path + "?indexing_mode=1&format=libsvm", feature_types=feature_types
|
||||
)
|
||||
np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
|
||||
|
||||
def test_base_margin(self):
|
||||
|
||||
@@ -88,8 +88,12 @@ class TestInteractionConstraints:
|
||||
def training_accuracy(self, tree_method):
|
||||
"""Test accuracy, reused by GPU tests."""
|
||||
from sklearn.metrics import accuracy_score
|
||||
dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1')
|
||||
dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1')
|
||||
dtrain = xgboost.DMatrix(
|
||||
dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm"
|
||||
)
|
||||
dtest = xgboost.DMatrix(
|
||||
dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm"
|
||||
)
|
||||
params = {
|
||||
'eta': 1,
|
||||
'max_depth': 6,
|
||||
|
||||
@@ -134,8 +134,8 @@ class TestMonotoneConstraints:
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_training_accuracy(self):
|
||||
from sklearn.metrics import accuracy_score
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1')
|
||||
dtrain = xgb.DMatrix(dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm")
|
||||
dtest = xgb.DMatrix(dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm")
|
||||
params = {'eta': 1, 'max_depth': 6, 'objective': 'binary:logistic',
|
||||
'tree_method': 'hist', 'monotone_constraints': '(1, 0)'}
|
||||
num_boost_round = 5
|
||||
|
||||
@@ -13,9 +13,7 @@ pytestmark = tm.timeout(10)
|
||||
|
||||
class TestOMP:
|
||||
def test_omp(self):
|
||||
dpath = 'demo/data/'
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain, dtest = tm.load_agaricus(__file__)
|
||||
|
||||
param = {'booster': 'gbtree',
|
||||
'objective': 'binary:logistic',
|
||||
|
||||
@@ -13,7 +13,7 @@ rng = np.random.RandomState(1994)
|
||||
|
||||
class TestTreesToDataFrame:
|
||||
def build_model(self, max_depth, num_round):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtrain, _ = tm.load_agaricus(__file__)
|
||||
param = {'max_depth': max_depth, 'objective': 'binary:logistic',
|
||||
'verbosity': 1}
|
||||
num_round = num_round
|
||||
|
||||
@@ -17,12 +17,10 @@ except ImportError:
|
||||
pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(),
|
||||
tm.no_graphviz()))
|
||||
|
||||
dpath = 'demo/data/agaricus.txt.train'
|
||||
|
||||
|
||||
class TestPlotting:
|
||||
def test_plotting(self):
|
||||
m = xgb.DMatrix(dpath)
|
||||
m, _ = tm.load_agaricus(__file__)
|
||||
booster = xgb.train({'max_depth': 2, 'eta': 1,
|
||||
'objective': 'binary:logistic'}, m,
|
||||
num_boost_round=2)
|
||||
|
||||
@@ -46,8 +46,8 @@ class TestSHAP:
|
||||
fscores = bst.get_fscore()
|
||||
assert scores1 == fscores
|
||||
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train?format=libsvm')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test?format=libsvm')
|
||||
|
||||
def fn(max_depth, num_rounds):
|
||||
# train
|
||||
|
||||
@@ -154,9 +154,7 @@ class TestTreeMethod:
|
||||
|
||||
def test_hist_categorical(self):
|
||||
# hist must be same as exact on all-categorial data
|
||||
dpath = 'demo/data/'
|
||||
ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
ag_dtrain, ag_dtest = tm.load_agaricus(__file__)
|
||||
ag_param = {'max_depth': 2,
|
||||
'tree_method': 'hist',
|
||||
'eta': 1,
|
||||
|
||||
@@ -222,7 +222,7 @@ class TestPandas:
|
||||
set_base_margin_info(pd.DataFrame, xgb.DMatrix, "hist")
|
||||
|
||||
def test_cv_as_pandas(self):
|
||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dm, _ = tm.load_agaricus(__file__)
|
||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic', 'eval_metric': 'error'}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user