sync Mar 27 2023
This commit is contained in:
@@ -161,6 +161,7 @@ def main(args: argparse.Namespace) -> None:
|
||||
"demo/guide-python/spark_estimator_examples.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
"demo/guide-python/multioutput_regression.py",
|
||||
# CI
|
||||
"tests/ci_build/lint_python.py",
|
||||
"tests/ci_build/test_r_package.py",
|
||||
@@ -204,6 +205,7 @@ def main(args: argparse.Namespace) -> None:
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
"demo/guide-python/multioutput_regression.py",
|
||||
# tests
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright (c) by Contributors 2019-2022
|
||||
/**
|
||||
* Copyright (c) 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@@ -8,7 +8,8 @@
|
||||
|
||||
#include "../../../src/common/charconv.h"
|
||||
#include "../../../src/common/io.h"
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../../../src/common/threading_utils.h" // for ParallelFor
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
#include "dmlc/logging.h"
|
||||
#include "xgboost/json.h"
|
||||
|
||||
@@ -505,7 +505,7 @@ TEST(GBTree, PredictRange) {
|
||||
auto h_out_predt_full = out_predt->HostVector();
|
||||
|
||||
ASSERT_TRUE(std::equal(h_out_predt.begin(), h_out_predt.end(), h_out_predt_full.begin()));
|
||||
|
||||
// Out of range.
|
||||
ASSERT_THROW(learner->InplacePredict(x, PredictionType::kValue,
|
||||
std::numeric_limits<float>::quiet_NaN(), &out_predt, 0, 3),
|
||||
dmlc::Error);
|
||||
|
||||
@@ -557,23 +557,6 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
|
||||
return dmat;
|
||||
}
|
||||
|
||||
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context const* ctx,
|
||||
size_t n_classes) {
|
||||
gbm::GBTreeModel model(param, ctx);
|
||||
|
||||
for (size_t i = 0; i < n_classes; ++i) {
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
if (i == 0) {
|
||||
(*trees.back())[0].SetLeaf(1.5f);
|
||||
(*trees.back()).Stat(0).sum_hess = 1.0f;
|
||||
}
|
||||
model.CommitModel(std::move(trees), i);
|
||||
}
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs, size_t kRows,
|
||||
size_t kCols,
|
||||
LearnerModelParam const* learner_model_param,
|
||||
|
||||
@@ -9,8 +9,10 @@
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/learner.h> // for LearnerModelParam
|
||||
#include <xgboost/model.h> // for Configurable
|
||||
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <cstdint> // std::int32_t
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
@@ -22,7 +24,6 @@
|
||||
#include "../../src/collective/communicator-inl.h"
|
||||
#include "../../src/common/common.h"
|
||||
#include "../../src/data/array_interface.h"
|
||||
#include "../../src/gbm/gbtree_model.h"
|
||||
#include "filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
@@ -362,9 +363,6 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
|
||||
size_t n_rows, size_t n_cols, size_t page_size, bool deterministic,
|
||||
const dmlc::TemporaryDirectory& tempdir = dmlc::TemporaryDirectory());
|
||||
|
||||
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context const* ctx,
|
||||
size_t n_classes = 1);
|
||||
|
||||
std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs, size_t kRows,
|
||||
size_t kCols,
|
||||
LearnerModelParam const* learner_model_param,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/c_api.h>
|
||||
@@ -159,7 +159,7 @@ TEST(GPUPredictor, ShapStump) {
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
model.CommitModel(std::move(trees), 0);
|
||||
model.CommitModelGroup(std::move(trees), 0);
|
||||
|
||||
auto gpu_lparam = CreateEmptyGenericParam(0);
|
||||
std::unique_ptr<Predictor> gpu_predictor = std::unique_ptr<Predictor>(
|
||||
@@ -187,7 +187,7 @@ TEST(GPUPredictor, Shap) {
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
trees[0]->ExpandNode(0, 0, 0.5, true, 1.0, -1.0, 1.0, 0.0, 5.0, 2.0, 3.0);
|
||||
model.CommitModel(std::move(trees), 0);
|
||||
model.CommitModelGroup(std::move(trees), 0);
|
||||
|
||||
auto gpu_lparam = CreateEmptyGenericParam(0);
|
||||
auto cpu_lparam = CreateEmptyGenericParam(-1);
|
||||
|
||||
@@ -209,7 +209,7 @@ void GBTreeModelForTest(gbm::GBTreeModel *model, uint32_t split_ind,
|
||||
p_tree->ExpandCategorical(0, split_ind, split_cats, true, 1.5f,
|
||||
left_weight, right_weight,
|
||||
3.0f, 2.2f, 7.0f, 9.0f);
|
||||
model->CommitModel(std::move(trees), 0);
|
||||
model->CommitModelGroup(std::move(trees), 0);
|
||||
}
|
||||
|
||||
void TestCategoricalPrediction(std::string name) {
|
||||
@@ -445,7 +445,7 @@ void TestVectorLeafPrediction(Context const *ctx) {
|
||||
ASSERT_TRUE(mparam.IsVectorLeaf());
|
||||
|
||||
gbm::GBTreeModel model{&mparam, ctx};
|
||||
model.CommitModel(std::move(trees), 0);
|
||||
model.CommitModelGroup(std::move(trees), 0);
|
||||
|
||||
auto run_test = [&](float expected, HostDeviceVector<float> *p_data) {
|
||||
{
|
||||
|
||||
@@ -14,6 +14,23 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context const* ctx,
|
||||
size_t n_classes = 1) {
|
||||
gbm::GBTreeModel model(param, ctx);
|
||||
|
||||
for (size_t i = 0; i < n_classes; ++i) {
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
if (i == 0) {
|
||||
(*trees.back())[0].SetLeaf(1.5f);
|
||||
(*trees.back()).Stat(0).sum_hess = 1.0f;
|
||||
}
|
||||
model.CommitModelGroup(std::move(trees), i);
|
||||
}
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
template <typename Page>
|
||||
void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
|
||||
std::shared_ptr<DMatrix> p_hist) {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
// Copyright (c) 2019-2022 by Contributors
|
||||
/**
|
||||
* Copyright (c) 2019-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/feature_map.h> // for FeatureMap
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/learner.h>
|
||||
|
||||
|
||||
@@ -256,7 +256,7 @@ void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
hist_maker.Update(¶m, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
|
||||
{tree});
|
||||
auto cache = linalg::VectorView<float>{preds->DeviceSpan(), {preds->Size()}, 0};
|
||||
auto cache = linalg::MakeTensorView(&ctx, preds->DeviceSpan(), preds->Size(), 1);
|
||||
hist_maker.UpdatePredictionCache(dmat, cache);
|
||||
}
|
||||
|
||||
|
||||
@@ -15,15 +15,17 @@ namespace xgboost {
|
||||
|
||||
class TestPredictionCache : public ::testing::Test {
|
||||
std::shared_ptr<DMatrix> Xy_;
|
||||
size_t n_samples_{2048};
|
||||
std::size_t n_samples_{2048};
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
size_t n_features = 13;
|
||||
Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.GenerateDMatrix(true);
|
||||
std::size_t n_features = 13;
|
||||
bst_target_t n_targets = 3;
|
||||
Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);
|
||||
}
|
||||
|
||||
void RunLearnerTest(std::string updater_name, float subsample, std::string grow_policy) {
|
||||
void RunLearnerTest(std::string updater_name, float subsample, std::string const& grow_policy,
|
||||
std::string const& strategy) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||
if (updater_name == "grow_gpu_hist") {
|
||||
// gpu_id setup
|
||||
@@ -31,6 +33,7 @@ class TestPredictionCache : public ::testing::Test {
|
||||
} else {
|
||||
learner->SetParam("updater", updater_name);
|
||||
}
|
||||
learner->SetParam("multi_strategy", strategy);
|
||||
learner->SetParam("grow_policy", grow_policy);
|
||||
learner->SetParam("subsample", std::to_string(subsample));
|
||||
learner->SetParam("nthread", "0");
|
||||
@@ -62,7 +65,7 @@ class TestPredictionCache : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
void RunTest(std::string updater_name) {
|
||||
void RunTest(std::string const& updater_name, std::string const& strategy) {
|
||||
{
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
|
||||
@@ -85,28 +88,31 @@ class TestPredictionCache : public ::testing::Test {
|
||||
HostDeviceVector<float> out_prediction_cached;
|
||||
out_prediction_cached.SetDevice(ctx.gpu_id);
|
||||
out_prediction_cached.Resize(n_samples_);
|
||||
auto cache = linalg::VectorView<float>{ctx.gpu_id == Context::kCpuId
|
||||
? out_prediction_cached.HostSpan()
|
||||
: out_prediction_cached.DeviceSpan(),
|
||||
{out_prediction_cached.Size()},
|
||||
ctx.gpu_id};
|
||||
auto cache =
|
||||
linalg::MakeTensorView(&ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
|
||||
ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
|
||||
}
|
||||
|
||||
for (auto policy : {"depthwise", "lossguide"}) {
|
||||
for (auto subsample : {1.0f, 0.4f}) {
|
||||
this->RunLearnerTest(updater_name, subsample, policy);
|
||||
this->RunLearnerTest(updater_name, subsample, policy);
|
||||
this->RunLearnerTest(updater_name, subsample, policy, strategy);
|
||||
this->RunLearnerTest(updater_name, subsample, policy, strategy);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestPredictionCache, Approx) { this->RunTest("grow_histmaker"); }
|
||||
TEST_F(TestPredictionCache, Approx) { this->RunTest("grow_histmaker", "one_output_per_tree"); }
|
||||
|
||||
TEST_F(TestPredictionCache, Hist) { this->RunTest("grow_quantile_histmaker"); }
|
||||
TEST_F(TestPredictionCache, Hist) {
|
||||
this->RunTest("grow_quantile_histmaker", "one_output_per_tree");
|
||||
}
|
||||
|
||||
TEST_F(TestPredictionCache, HistMulti) {
|
||||
this->RunTest("grow_quantile_histmaker", "multi_output_tree");
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
TEST_F(TestPredictionCache, GpuHist) { this->RunTest("grow_gpu_hist"); }
|
||||
TEST_F(TestPredictionCache, GpuHist) { this->RunTest("grow_gpu_hist", "one_output_per_tree"); }
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -524,7 +524,7 @@ class TestModels:
|
||||
booster[-1:0]
|
||||
|
||||
# we do not accept empty slice.
|
||||
with pytest.raises(ValueError):
|
||||
with pytest.raises(ValueError, match="Empty slice"):
|
||||
booster[1:1]
|
||||
# stop can not be smaller than begin
|
||||
with pytest.raises(ValueError, match=r"Invalid.*"):
|
||||
@@ -615,6 +615,46 @@ class TestModels:
|
||||
booster = xgb.Booster(model_file=bytesarray)
|
||||
self.run_slice(booster, dtrain, num_parallel_tree, num_classes, num_boost_round)
|
||||
|
||||
def test_slice_multi(self) -> None:
|
||||
from sklearn.datasets import make_classification
|
||||
|
||||
num_classes = 3
|
||||
X, y = make_classification(
|
||||
n_samples=1000, n_informative=5, n_classes=num_classes
|
||||
)
|
||||
Xy = xgb.DMatrix(data=X, label=y)
|
||||
num_parallel_tree = 4
|
||||
num_boost_round = 16
|
||||
|
||||
class ResetStrategy(xgb.callback.TrainingCallback):
|
||||
def after_iteration(self, model, epoch: int, evals_log) -> bool:
|
||||
model.set_param({"multi_strategy": "multi_output_tree"})
|
||||
return False
|
||||
|
||||
booster = xgb.train(
|
||||
{
|
||||
"num_parallel_tree": num_parallel_tree,
|
||||
"num_class": num_classes,
|
||||
"booster": "gbtree",
|
||||
"objective": "multi:softprob",
|
||||
"multi_strategy": "multi_output_tree",
|
||||
"tree_method": "hist",
|
||||
"base_score": 0,
|
||||
},
|
||||
num_boost_round=num_boost_round,
|
||||
dtrain=Xy,
|
||||
callbacks=[ResetStrategy()]
|
||||
)
|
||||
sliced = [t for t in booster]
|
||||
assert len(sliced) == 16
|
||||
|
||||
predt0 = booster.predict(Xy, output_margin=True)
|
||||
predt1 = np.zeros(predt0.shape)
|
||||
for t in booster:
|
||||
predt1 += t.predict(Xy, output_margin=True)
|
||||
|
||||
np.testing.assert_allclose(predt0, predt1, atol=1e-5)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_feature_info(self):
|
||||
import pandas as pd
|
||||
|
||||
@@ -66,7 +66,6 @@ def run_scikit_model_check(name, path):
|
||||
cls.load_model(path)
|
||||
if name.find('0.90') == -1:
|
||||
assert len(cls.classes_) == gm.kClasses
|
||||
assert len(cls._le.classes_) == gm.kClasses
|
||||
assert cls.n_classes_ == gm.kClasses
|
||||
assert (len(cls.get_booster().get_dump()) ==
|
||||
gm.kRounds * gm.kForests * gm.kClasses), path
|
||||
|
||||
@@ -38,36 +38,34 @@ def test_binary_classification():
|
||||
assert err < 0.1
|
||||
|
||||
|
||||
@pytest.mark.parametrize('objective', ['multi:softmax', 'multi:softprob'])
|
||||
@pytest.mark.parametrize("objective", ["multi:softmax", "multi:softprob"])
|
||||
def test_multiclass_classification(objective):
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
def check_pred(preds, labels, output_margin):
|
||||
if output_margin:
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if preds[i].argmax() != labels[i]) / float(len(preds))
|
||||
err = sum(
|
||||
1 for i in range(len(preds)) if preds[i].argmax() != labels[i]
|
||||
) / float(len(preds))
|
||||
else:
|
||||
err = sum(1 for i in range(len(preds))
|
||||
if preds[i] != labels[i]) / float(len(preds))
|
||||
err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(
|
||||
len(preds)
|
||||
)
|
||||
assert err < 0.4
|
||||
|
||||
iris = load_iris()
|
||||
y = iris['target']
|
||||
X = iris['data']
|
||||
X, y = load_iris(return_X_y=True)
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X, y):
|
||||
xgb_model = xgb.XGBClassifier(objective=objective).fit(X[train_index], y[train_index])
|
||||
assert (xgb_model.get_booster().num_boosted_rounds() ==
|
||||
xgb_model.n_estimators)
|
||||
xgb_model = xgb.XGBClassifier(objective=objective).fit(
|
||||
X[train_index], y[train_index]
|
||||
)
|
||||
assert xgb_model.get_booster().num_boosted_rounds() == 100
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
# test other params in XGBClassifier().fit
|
||||
preds2 = xgb_model.predict(X[test_index], output_margin=True,
|
||||
ntree_limit=3)
|
||||
preds3 = xgb_model.predict(X[test_index], output_margin=True,
|
||||
ntree_limit=0)
|
||||
preds4 = xgb_model.predict(X[test_index], output_margin=False,
|
||||
ntree_limit=3)
|
||||
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
|
||||
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
|
||||
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
|
||||
labels = y[test_index]
|
||||
|
||||
check_pred(preds, labels, output_margin=False)
|
||||
@@ -761,9 +759,9 @@ def test_parameters_access():
|
||||
clf = save_load(clf)
|
||||
|
||||
assert clf.tree_method is None
|
||||
assert clf.n_estimators == 2
|
||||
assert clf.n_estimators is None
|
||||
assert clf.get_params()["tree_method"] is None
|
||||
assert clf.get_params()["n_estimators"] == 2
|
||||
assert clf.get_params()["n_estimators"] is None
|
||||
assert get_tm(clf) == "auto" # discarded for save/load_model
|
||||
|
||||
clf.set_params(tree_method="hist")
|
||||
@@ -771,9 +769,7 @@ def test_parameters_access():
|
||||
clf = pickle.loads(pickle.dumps(clf))
|
||||
assert clf.get_params()["tree_method"] == "hist"
|
||||
clf = save_load(clf)
|
||||
# FIXME(jiamingy): We should remove this behavior once we remove parameters
|
||||
# serialization for skl save/load_model.
|
||||
assert clf.get_params()["tree_method"] == "hist"
|
||||
assert clf.get_params()["tree_method"] is None
|
||||
|
||||
|
||||
def test_kwargs_error():
|
||||
@@ -902,6 +898,7 @@ def save_load_model(model_path):
|
||||
xgb_model.load_model(model_path)
|
||||
|
||||
assert isinstance(xgb_model.classes_, np.ndarray)
|
||||
np.testing.assert_equal(xgb_model.classes_, np.array([0, 1]))
|
||||
assert isinstance(xgb_model._Booster, xgb.Booster)
|
||||
|
||||
preds = xgb_model.predict(X[test_index])
|
||||
@@ -933,8 +930,10 @@ def test_save_load_model():
|
||||
save_load_model(model_path)
|
||||
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model_path = os.path.join(tempdir, 'digits.model.json')
|
||||
model_path = os.path.join(tempdir, 'digits.model.ubj')
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
@@ -959,6 +958,28 @@ def test_save_load_model():
|
||||
predt_1 = cls.predict(X)
|
||||
assert np.allclose(predt_0, predt_1)
|
||||
|
||||
# mclass
|
||||
X, y = load_digits(n_class=10, return_X_y=True)
|
||||
# small test_size to force early stop
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.01, random_state=1
|
||||
)
|
||||
clf = xgb.XGBClassifier(
|
||||
n_estimators=64, tree_method="hist", early_stopping_rounds=2
|
||||
)
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
score = clf.best_score
|
||||
clf.save_model(model_path)
|
||||
|
||||
clf = xgb.XGBClassifier()
|
||||
clf.load_model(model_path)
|
||||
assert clf.classes_.size == 10
|
||||
np.testing.assert_equal(clf.classes_, np.arange(10))
|
||||
assert clf.n_classes_ == 10
|
||||
|
||||
assert clf.best_iteration == 27
|
||||
assert clf.best_score == score
|
||||
|
||||
|
||||
def test_RFECV():
|
||||
from sklearn.datasets import load_breast_cancer, load_diabetes, load_iris
|
||||
|
||||
Reference in New Issue
Block a user