[CI] Refactor tests to reduce CI time. (#8312)
This commit is contained in:
parent
39afdac3be
commit
ce0382dcb0
@ -50,8 +50,8 @@ for train_index, test_index in kf.split(X):
|
|||||||
print("Parameter optimization")
|
print("Parameter optimization")
|
||||||
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
||||||
clf = GridSearchCV(xgb_model,
|
clf = GridSearchCV(xgb_model,
|
||||||
{'max_depth': [2, 4, 6],
|
{'max_depth': [2, 4],
|
||||||
'n_estimators': [50, 100, 200]}, verbose=1, n_jobs=1)
|
'n_estimators': [50, 100]}, verbose=1, n_jobs=1, cv=3)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
print(clf.best_score_)
|
print(clf.best_score_)
|
||||||
print(clf.best_params_)
|
print(clf.best_params_)
|
||||||
|
|||||||
@ -24,7 +24,7 @@ RUN \
|
|||||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||||
python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||||
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||||
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||||
pyspark cloudpickle cuda-python=11.7.0 && \
|
pyspark cloudpickle cuda-python=11.7.0 && \
|
||||||
mamba clean --all && \
|
mamba clean --all && \
|
||||||
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
|
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
|
||||||
|
|||||||
@ -22,6 +22,7 @@ dependencies:
|
|||||||
- sh
|
- sh
|
||||||
- mock
|
- mock
|
||||||
- pytest
|
- pytest
|
||||||
|
- pytest-timeout
|
||||||
- pytest-cov
|
- pytest-cov
|
||||||
- python-kubernetes
|
- python-kubernetes
|
||||||
- urllib3
|
- urllib3
|
||||||
|
|||||||
@ -5,10 +5,9 @@ import pytest
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.append("tests/python")
|
sys.path.append("tests/python")
|
||||||
from test_data_iterator import SingleBatch, make_batches
|
|
||||||
from test_data_iterator import test_single_batch as cpu_single_batch
|
from test_data_iterator import test_single_batch as cpu_single_batch
|
||||||
from test_data_iterator import run_data_iterator
|
from test_data_iterator import run_data_iterator
|
||||||
from testing import IteratorForTest, no_cupy
|
from testing import no_cupy
|
||||||
|
|
||||||
|
|
||||||
def test_gpu_single_batch() -> None:
|
def test_gpu_single_batch() -> None:
|
||||||
@ -21,16 +20,14 @@ def test_gpu_single_batch() -> None:
|
|||||||
strategies.integers(1, 7),
|
strategies.integers(1, 7),
|
||||||
strategies.integers(0, 8),
|
strategies.integers(0, 8),
|
||||||
strategies.booleans(),
|
strategies.booleans(),
|
||||||
|
strategies.booleans(),
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=10, print_blob=True)
|
||||||
def test_gpu_data_iterator(
|
def test_gpu_data_iterator(
|
||||||
n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool
|
n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
|
||||||
) -> None:
|
) -> None:
|
||||||
run_data_iterator(
|
run_data_iterator(
|
||||||
n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, True
|
n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
|
||||||
)
|
|
||||||
run_data_iterator(
|
|
||||||
n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, False
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,8 @@ sys.path.append("tests/python")
|
|||||||
import testing as tm
|
import testing as tm
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(10)
|
||||||
|
|
||||||
parameter_strategy = strategies.fixed_dictionaries({
|
parameter_strategy = strategies.fixed_dictionaries({
|
||||||
'booster': strategies.just('gblinear'),
|
'booster': strategies.just('gblinear'),
|
||||||
'eta': strategies.floats(0.01, 0.25),
|
'eta': strategies.floats(0.01, 0.25),
|
||||||
@ -30,7 +32,7 @@ def train_result(param, dmat, num_rounds):
|
|||||||
class TestGPULinear:
|
class TestGPULinear:
|
||||||
@given(parameter_strategy, strategies.integers(10, 50),
|
@given(parameter_strategy, strategies.integers(10, 50),
|
||||||
tm.dataset_strategy)
|
tm.dataset_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_gpu_coordinate(self, param, num_rounds, dataset):
|
def test_gpu_coordinate(self, param, num_rounds, dataset):
|
||||||
assume(len(dataset.y) > 0)
|
assume(len(dataset.y) > 0)
|
||||||
param['updater'] = 'gpu_coord_descent'
|
param['updater'] = 'gpu_coord_descent'
|
||||||
@ -49,7 +51,7 @@ class TestGPULinear:
|
|||||||
strategies.floats(1e-5, 0.8),
|
strategies.floats(1e-5, 0.8),
|
||||||
strategies.floats(1e-5, 0.8)
|
strategies.floats(1e-5, 0.8)
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
||||||
assume(len(dataset.y) > 0)
|
assume(len(dataset.y) > 0)
|
||||||
param['updater'] = 'gpu_coord_descent'
|
param['updater'] = 'gpu_coord_descent'
|
||||||
|
|||||||
@ -15,6 +15,8 @@ import testing as tm
|
|||||||
model_path = './model.pkl'
|
model_path = './model.pkl'
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(30)
|
||||||
|
|
||||||
def build_dataset():
|
def build_dataset():
|
||||||
N = 10
|
N = 10
|
||||||
x = np.linspace(0, N*N, N*N)
|
x = np.linspace(0, N*N, N*N)
|
||||||
@ -65,6 +67,7 @@ class TestPickling:
|
|||||||
assert status == 0
|
assert status == 0
|
||||||
os.remove(model_path)
|
os.remove(model_path)
|
||||||
|
|
||||||
|
# TODO: This test is too slow
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_pickling(self):
|
def test_pickling(self):
|
||||||
x, y = build_dataset()
|
x, y = build_dataset()
|
||||||
|
|||||||
@ -32,6 +32,7 @@ predict_parameter_strategy = strategies.fixed_dictionaries({
|
|||||||
'num_parallel_tree': strategies.sampled_from([1, 4]),
|
'num_parallel_tree': strategies.sampled_from([1, 4]),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(20)
|
||||||
|
|
||||||
class TestGPUPredict:
|
class TestGPUPredict:
|
||||||
def test_predict(self):
|
def test_predict(self):
|
||||||
@ -264,7 +265,7 @@ class TestGPUPredict:
|
|||||||
|
|
||||||
@given(strategies.integers(1, 10),
|
@given(strategies.integers(1, 10),
|
||||||
tm.dataset_strategy, shap_parameter_strategy)
|
tm.dataset_strategy, shap_parameter_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_shap(self, num_rounds, dataset, param):
|
def test_shap(self, num_rounds, dataset, param):
|
||||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||||
return
|
return
|
||||||
@ -280,7 +281,7 @@ class TestGPUPredict:
|
|||||||
|
|
||||||
@given(strategies.integers(1, 10),
|
@given(strategies.integers(1, 10),
|
||||||
tm.dataset_strategy, shap_parameter_strategy)
|
tm.dataset_strategy, shap_parameter_strategy)
|
||||||
@settings(deadline=None, max_examples=20, print_blob=True)
|
@settings(deadline=None, max_examples=10, print_blob=True)
|
||||||
def test_shap_interactions(self, num_rounds, dataset, param):
|
def test_shap_interactions(self, num_rounds, dataset, param):
|
||||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||||
return
|
return
|
||||||
@ -333,14 +334,14 @@ class TestGPUPredict:
|
|||||||
np.testing.assert_equal(cpu_leaf, gpu_leaf)
|
np.testing.assert_equal(cpu_leaf, gpu_leaf)
|
||||||
|
|
||||||
@given(predict_parameter_strategy, tm.dataset_strategy)
|
@given(predict_parameter_strategy, tm.dataset_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_predict_leaf_gbtree(self, param, dataset):
|
def test_predict_leaf_gbtree(self, param, dataset):
|
||||||
param['booster'] = 'gbtree'
|
param['booster'] = 'gbtree'
|
||||||
param['tree_method'] = 'gpu_hist'
|
param['tree_method'] = 'gpu_hist'
|
||||||
self.run_predict_leaf_booster(param, 10, dataset)
|
self.run_predict_leaf_booster(param, 10, dataset)
|
||||||
|
|
||||||
@given(predict_parameter_strategy, tm.dataset_strategy)
|
@given(predict_parameter_strategy, tm.dataset_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_predict_leaf_dart(self, param, dataset):
|
def test_predict_leaf_dart(self, param, dataset):
|
||||||
param['booster'] = 'dart'
|
param['booster'] = 'dart'
|
||||||
param['tree_method'] = 'gpu_hist'
|
param['tree_method'] = 'gpu_hist'
|
||||||
@ -351,7 +352,7 @@ class TestGPUPredict:
|
|||||||
@given(df=data_frames([column('x0', elements=strategies.integers(min_value=0, max_value=3)),
|
@given(df=data_frames([column('x0', elements=strategies.integers(min_value=0, max_value=3)),
|
||||||
column('x1', elements=strategies.integers(min_value=0, max_value=5))],
|
column('x1', elements=strategies.integers(min_value=0, max_value=5))],
|
||||||
index=range_indexes(min_size=20, max_size=50)))
|
index=range_indexes(min_size=20, max_size=50)))
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_predict_categorical_split(self, df):
|
def test_predict_categorical_split(self, df):
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
|
|||||||
@ -6,10 +6,12 @@ import shutil
|
|||||||
import urllib.request
|
import urllib.request
|
||||||
import zipfile
|
import zipfile
|
||||||
import sys
|
import sys
|
||||||
|
import pytest
|
||||||
sys.path.append("tests/python")
|
sys.path.append("tests/python")
|
||||||
|
|
||||||
import testing as tm # noqa
|
import testing as tm # noqa
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(10)
|
||||||
|
|
||||||
class TestRanking:
|
class TestRanking:
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -96,7 +98,7 @@ class TestRanking:
|
|||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
|
watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
|
||||||
|
|
||||||
num_trees = 2500
|
num_trees = 100
|
||||||
check_metric_improvement_rounds = 10
|
check_metric_improvement_rounds = 10
|
||||||
|
|
||||||
evals_result = {}
|
evals_result = {}
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import sklearn
|
|||||||
sys.path.append("tests/python")
|
sys.path.append("tests/python")
|
||||||
import testing as tm
|
import testing as tm
|
||||||
|
|
||||||
if tm.no_dask()["condition"]:
|
if tm.no_spark()["condition"]:
|
||||||
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
|
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
import gc
|
|
||||||
import pytest
|
import pytest
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
from hypothesis import given, strategies, assume, settings, note
|
from hypothesis import given, strategies, assume, settings, note
|
||||||
@ -10,6 +9,7 @@ sys.path.append("tests/python")
|
|||||||
import testing as tm
|
import testing as tm
|
||||||
import test_updaters as test_up
|
import test_updaters as test_up
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(30)
|
||||||
|
|
||||||
parameter_strategy = strategies.fixed_dictionaries({
|
parameter_strategy = strategies.fixed_dictionaries({
|
||||||
'max_depth': strategies.integers(0, 11),
|
'max_depth': strategies.integers(0, 11),
|
||||||
@ -46,7 +46,7 @@ class TestGPUUpdaters:
|
|||||||
cputest = test_up.TestTreeMethod()
|
cputest = test_up.TestTreeMethod()
|
||||||
|
|
||||||
@given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
|
@given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=50, print_blob=True)
|
||||||
def test_gpu_hist(self, param, num_rounds, dataset):
|
def test_gpu_hist(self, param, num_rounds, dataset):
|
||||||
param["tree_method"] = "gpu_hist"
|
param["tree_method"] = "gpu_hist"
|
||||||
param = dataset.set_params(param)
|
param = dataset.set_params(param)
|
||||||
@ -73,7 +73,7 @@ class TestGPUUpdaters:
|
|||||||
|
|
||||||
@given(strategies.integers(10, 400), strategies.integers(3, 8),
|
@given(strategies.integers(10, 400), strategies.integers(3, 8),
|
||||||
strategies.integers(1, 2), strategies.integers(4, 7))
|
strategies.integers(1, 2), strategies.integers(4, 7))
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_categorical_ohe(self, rows, cols, rounds, cats):
|
def test_categorical_ohe(self, rows, cols, rounds, cats):
|
||||||
self.cputest.run_categorical_ohe(rows, cols, rounds, cats, "gpu_hist")
|
self.cputest.run_categorical_ohe(rows, cols, rounds, cats, "gpu_hist")
|
||||||
@ -85,7 +85,7 @@ class TestGPUUpdaters:
|
|||||||
test_up.cat_parameter_strategy,
|
test_up.cat_parameter_strategy,
|
||||||
strategies.integers(4, 32),
|
strategies.integers(4, 32),
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_categorical(
|
def test_categorical(
|
||||||
self,
|
self,
|
||||||
@ -106,7 +106,7 @@ class TestGPUUpdaters:
|
|||||||
test_up.hist_parameter_strategy,
|
test_up.hist_parameter_strategy,
|
||||||
test_up.cat_parameter_strategy,
|
test_up.cat_parameter_strategy,
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=10, print_blob=True)
|
||||||
def test_categorical_ames_housing(
|
def test_categorical_ames_housing(
|
||||||
self,
|
self,
|
||||||
hist_parameters: Dict[str, Any],
|
hist_parameters: Dict[str, Any],
|
||||||
@ -125,7 +125,7 @@ class TestGPUUpdaters:
|
|||||||
strategies.integers(3, 8),
|
strategies.integers(3, 8),
|
||||||
strategies.integers(4, 7)
|
strategies.integers(4, 7)
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
def test_categorical_missing(self, rows, cols, cats):
|
def test_categorical_missing(self, rows, cols, cats):
|
||||||
self.cputest.run_categorical_missing(rows, cols, cats, "gpu_hist")
|
self.cputest.run_categorical_missing(rows, cols, cats, "gpu_hist")
|
||||||
@ -149,7 +149,7 @@ class TestGPUUpdaters:
|
|||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@given(parameter_strategy, strategies.integers(1, 20),
|
@given(parameter_strategy, strategies.integers(1, 20),
|
||||||
tm.dataset_strategy)
|
tm.dataset_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
|
def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
|
||||||
# We cannot handle empty dataset yet
|
# We cannot handle empty dataset yet
|
||||||
assume(len(dataset.y) > 0)
|
assume(len(dataset.y) > 0)
|
||||||
@ -159,9 +159,9 @@ class TestGPUUpdaters:
|
|||||||
note(result)
|
note(result)
|
||||||
assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)
|
assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)
|
||||||
|
|
||||||
@given(parameter_strategy, strategies.integers(1, 20),
|
@given(parameter_strategy, strategies.integers(1, 3),
|
||||||
tm.dataset_strategy)
|
tm.dataset_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=10, print_blob=True)
|
||||||
def test_external_memory(self, param, num_rounds, dataset):
|
def test_external_memory(self, param, num_rounds, dataset):
|
||||||
if dataset.name.endswith("-l1"):
|
if dataset.name.endswith("-l1"):
|
||||||
return
|
return
|
||||||
@ -172,7 +172,6 @@ class TestGPUUpdaters:
|
|||||||
m = dataset.get_external_dmat()
|
m = dataset.get_external_dmat()
|
||||||
external_result = train_result(param, m, num_rounds)
|
external_result = train_result(param, m, num_rounds)
|
||||||
del m
|
del m
|
||||||
gc.collect()
|
|
||||||
assert tm.non_increasing(external_result['train'][dataset.metric])
|
assert tm.non_increasing(external_result['train'][dataset.metric])
|
||||||
|
|
||||||
def test_empty_dmatrix_prediction(self):
|
def test_empty_dmatrix_prediction(self):
|
||||||
|
|||||||
@ -61,7 +61,7 @@ def test_boost_from_prediction_gpu_hist():
|
|||||||
|
|
||||||
|
|
||||||
def test_num_parallel_tree():
|
def test_num_parallel_tree():
|
||||||
twskl.run_calif_housing_rf_regression("gpu_hist")
|
twskl.run_housing_rf_regression("gpu_hist")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import pytest
|
|||||||
from hypothesis import given, strategies, settings
|
from hypothesis import given, strategies, settings
|
||||||
from scipy.sparse import csr_matrix
|
from scipy.sparse import csr_matrix
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(30)
|
||||||
|
|
||||||
def test_single_batch(tree_method: str = "approx") -> None:
|
def test_single_batch(tree_method: str = "approx") -> None:
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
@ -134,7 +135,7 @@ def run_data_iterator(
|
|||||||
strategies.integers(0, 13),
|
strategies.integers(0, 13),
|
||||||
strategies.booleans(),
|
strategies.booleans(),
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=10, print_blob=True)
|
||||||
def test_data_iterator(
|
def test_data_iterator(
|
||||||
n_samples_per_batch: int,
|
n_samples_per_batch: int,
|
||||||
n_features: int,
|
n_features: int,
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import pytest
|
|||||||
import testing as tm
|
import testing as tm
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(30)
|
||||||
|
|
||||||
ROOT_DIR = tm.PROJECT_ROOT
|
ROOT_DIR = tm.PROJECT_ROOT
|
||||||
DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
|
DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
|
||||||
|
|||||||
@ -1,7 +1,10 @@
|
|||||||
import testing as tm
|
import testing as tm
|
||||||
|
import pytest
|
||||||
from hypothesis import strategies, given, settings, note
|
from hypothesis import strategies, given, settings, note
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(10)
|
||||||
|
|
||||||
parameter_strategy = strategies.fixed_dictionaries({
|
parameter_strategy = strategies.fixed_dictionaries({
|
||||||
'booster': strategies.just('gblinear'),
|
'booster': strategies.just('gblinear'),
|
||||||
'eta': strategies.floats(0.01, 0.25),
|
'eta': strategies.floats(0.01, 0.25),
|
||||||
@ -26,7 +29,7 @@ def train_result(param, dmat, num_rounds):
|
|||||||
class TestLinear:
|
class TestLinear:
|
||||||
@given(parameter_strategy, strategies.integers(10, 50),
|
@given(parameter_strategy, strategies.integers(10, 50),
|
||||||
tm.dataset_strategy, coord_strategy)
|
tm.dataset_strategy, coord_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_coordinate(self, param, num_rounds, dataset, coord_param):
|
def test_coordinate(self, param, num_rounds, dataset, coord_param):
|
||||||
param['updater'] = 'coord_descent'
|
param['updater'] = 'coord_descent'
|
||||||
param.update(coord_param)
|
param.update(coord_param)
|
||||||
@ -46,7 +49,7 @@ class TestLinear:
|
|||||||
strategies.floats(1e-5, 0.8),
|
strategies.floats(1e-5, 0.8),
|
||||||
strategies.floats(1e-5, 0.8)
|
strategies.floats(1e-5, 0.8)
|
||||||
)
|
)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
|
def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
|
||||||
param['updater'] = 'coord_descent'
|
param['updater'] = 'coord_descent'
|
||||||
param['alpha'] = alpha
|
param['alpha'] = alpha
|
||||||
@ -59,7 +62,7 @@ class TestLinear:
|
|||||||
|
|
||||||
@given(parameter_strategy, strategies.integers(10, 50),
|
@given(parameter_strategy, strategies.integers(10, 50),
|
||||||
tm.dataset_strategy)
|
tm.dataset_strategy)
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_shotgun(self, param, num_rounds, dataset):
|
def test_shotgun(self, param, num_rounds, dataset):
|
||||||
param['updater'] = 'shotgun'
|
param['updater'] = 'shotgun'
|
||||||
param = dataset.set_params(param)
|
param = dataset.set_params(param)
|
||||||
@ -76,7 +79,7 @@ class TestLinear:
|
|||||||
@given(parameter_strategy, strategies.integers(10, 50),
|
@given(parameter_strategy, strategies.integers(10, 50),
|
||||||
tm.dataset_strategy, strategies.floats(1e-5, 1.0),
|
tm.dataset_strategy, strategies.floats(1e-5, 1.0),
|
||||||
strategies.floats(1e-5, 1.0))
|
strategies.floats(1e-5, 1.0))
|
||||||
@settings(deadline=None, print_blob=True)
|
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||||
def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
|
||||||
param['updater'] = 'shotgun'
|
param['updater'] = 'shotgun'
|
||||||
param['alpha'] = alpha
|
param['alpha'] = alpha
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import pytest
|
|||||||
|
|
||||||
import testing as tm
|
import testing as tm
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(10)
|
||||||
|
|
||||||
class TestOMP:
|
class TestOMP:
|
||||||
def test_omp(self):
|
def test_omp(self):
|
||||||
@ -49,14 +50,15 @@ class TestOMP:
|
|||||||
print('test approx ...')
|
print('test approx ...')
|
||||||
param['tree_method'] = 'approx'
|
param['tree_method'] = 'approx'
|
||||||
|
|
||||||
|
n_trials = 10
|
||||||
param['nthread'] = 1
|
param['nthread'] = 1
|
||||||
auc_1, pred_1 = consist_test('approx_thread_1', 100)
|
auc_1, pred_1 = consist_test('approx_thread_1', n_trials)
|
||||||
|
|
||||||
param['nthread'] = 2
|
param['nthread'] = 2
|
||||||
auc_2, pred_2 = consist_test('approx_thread_2', 100)
|
auc_2, pred_2 = consist_test('approx_thread_2', n_trials)
|
||||||
|
|
||||||
param['nthread'] = 3
|
param['nthread'] = 3
|
||||||
auc_3, pred_3 = consist_test('approx_thread_3', 100)
|
auc_3, pred_3 = consist_test('approx_thread_3', n_trials)
|
||||||
|
|
||||||
assert auc_1 == auc_2 == auc_3
|
assert auc_1 == auc_2 == auc_3
|
||||||
assert np.array_equal(auc_1, auc_2)
|
assert np.array_equal(auc_1, auc_2)
|
||||||
@ -66,13 +68,13 @@ class TestOMP:
|
|||||||
param['tree_method'] = 'hist'
|
param['tree_method'] = 'hist'
|
||||||
|
|
||||||
param['nthread'] = 1
|
param['nthread'] = 1
|
||||||
auc_1, pred_1 = consist_test('hist_thread_1', 100)
|
auc_1, pred_1 = consist_test('hist_thread_1', n_trials)
|
||||||
|
|
||||||
param['nthread'] = 2
|
param['nthread'] = 2
|
||||||
auc_2, pred_2 = consist_test('hist_thread_2', 100)
|
auc_2, pred_2 = consist_test('hist_thread_2', n_trials)
|
||||||
|
|
||||||
param['nthread'] = 3
|
param['nthread'] = 3
|
||||||
auc_3, pred_3 = consist_test('hist_thread_3', 100)
|
auc_3, pred_3 = consist_test('hist_thread_3', n_trials)
|
||||||
|
|
||||||
assert auc_1 == auc_2 == auc_3
|
assert auc_1 == auc_2 == auc_3
|
||||||
assert np.array_equal(auc_1, auc_2)
|
assert np.array_equal(auc_1, auc_2)
|
||||||
|
|||||||
@ -16,10 +16,7 @@ if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
|
|||||||
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
|
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
|
||||||
|
|
||||||
from pyspark.ml import Pipeline, PipelineModel
|
from pyspark.ml import Pipeline, PipelineModel
|
||||||
from pyspark.ml.evaluation import (
|
from pyspark.ml.evaluation import BinaryClassificationEvaluator
|
||||||
BinaryClassificationEvaluator,
|
|
||||||
MulticlassClassificationEvaluator,
|
|
||||||
)
|
|
||||||
from pyspark.ml.feature import VectorAssembler
|
from pyspark.ml.feature import VectorAssembler
|
||||||
from pyspark.ml.functions import vector_to_array
|
from pyspark.ml.functions import vector_to_array
|
||||||
from pyspark.ml.linalg import Vectors
|
from pyspark.ml.linalg import Vectors
|
||||||
@ -40,6 +37,8 @@ from .utils import SparkTestCase
|
|||||||
|
|
||||||
logging.getLogger("py4j").setLevel(logging.INFO)
|
logging.getLogger("py4j").setLevel(logging.INFO)
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(60)
|
||||||
|
|
||||||
|
|
||||||
class XgboostLocalTest(SparkTestCase):
|
class XgboostLocalTest(SparkTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@ -711,17 +710,10 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
estimatorParamMaps=paramMaps,
|
estimatorParamMaps=paramMaps,
|
||||||
evaluator=BinaryClassificationEvaluator(),
|
evaluator=BinaryClassificationEvaluator(),
|
||||||
seed=1,
|
seed=1,
|
||||||
|
numFolds=2,
|
||||||
)
|
)
|
||||||
cvBinModel = cvBin.fit(self.cls_df_train_large)
|
cvBinModel = cvBin.fit(self.cls_df_train_large)
|
||||||
cvBinModel.transform(self.cls_df_test)
|
cvBinModel.transform(self.cls_df_test)
|
||||||
cvMulti = CrossValidator(
|
|
||||||
estimator=xgb_classifer,
|
|
||||||
estimatorParamMaps=paramMaps,
|
|
||||||
evaluator=MulticlassClassificationEvaluator(),
|
|
||||||
seed=1,
|
|
||||||
)
|
|
||||||
cvMultiModel = cvMulti.fit(self.multi_cls_df_train_large)
|
|
||||||
cvMultiModel.transform(self.multi_cls_df_test)
|
|
||||||
|
|
||||||
def test_callbacks(self):
|
def test_callbacks(self):
|
||||||
from xgboost.callback import LearningRateScheduler
|
from xgboost.callback import LearningRateScheduler
|
||||||
@ -889,35 +881,6 @@ class XgboostLocalTest(SparkTestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def test_classifier_with_weight_eval(self):
|
def test_classifier_with_weight_eval(self):
|
||||||
# with weight
|
|
||||||
classifier_with_weight = SparkXGBClassifier(weight_col="weight")
|
|
||||||
model_with_weight = classifier_with_weight.fit(
|
|
||||||
self.cls_df_train_with_eval_weight
|
|
||||||
)
|
|
||||||
pred_result_with_weight = model_with_weight.transform(
|
|
||||||
self.cls_df_test_with_eval_weight
|
|
||||||
).collect()
|
|
||||||
for row in pred_result_with_weight:
|
|
||||||
self.assertTrue(
|
|
||||||
np.allclose(row.probability, row.expected_prob_with_weight, atol=1e-3)
|
|
||||||
)
|
|
||||||
# with eval
|
|
||||||
classifier_with_eval = SparkXGBClassifier(**self.cls_params_with_eval)
|
|
||||||
model_with_eval = classifier_with_eval.fit(self.cls_df_train_with_eval_weight)
|
|
||||||
self.assertTrue(
|
|
||||||
np.isclose(
|
|
||||||
model_with_eval._xgb_sklearn_model.best_score,
|
|
||||||
self.cls_with_eval_best_score,
|
|
||||||
atol=1e-3,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
pred_result_with_eval = model_with_eval.transform(
|
|
||||||
self.cls_df_test_with_eval_weight
|
|
||||||
).collect()
|
|
||||||
for row in pred_result_with_eval:
|
|
||||||
self.assertTrue(
|
|
||||||
np.allclose(row.probability, row.expected_prob_with_eval, atol=1e-3)
|
|
||||||
)
|
|
||||||
# with weight and eval
|
# with weight and eval
|
||||||
# Added scale_pos_weight because in 1.4.2, the original answer returns 0.5 which
|
# Added scale_pos_weight because in 1.4.2, the original answer returns 0.5 which
|
||||||
# doesn't really indicate this working correctly.
|
# doesn't really indicate this working correctly.
|
||||||
|
|||||||
@ -44,6 +44,8 @@ from xgboost.dask import DaskDMatrix
|
|||||||
|
|
||||||
dask.config.set({"distributed.scheduler.allowed-failures": False})
|
dask.config.set({"distributed.scheduler.allowed-failures": False})
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.timeout(30)
|
||||||
|
|
||||||
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||||
suppress = [HealthCheck.function_scoped_fixture]
|
suppress = [HealthCheck.function_scoped_fixture]
|
||||||
else:
|
else:
|
||||||
@ -381,7 +383,7 @@ def test_categorical(client: "Client") -> None:
|
|||||||
|
|
||||||
|
|
||||||
def test_dask_predict_shape_infer(client: "Client") -> None:
|
def test_dask_predict_shape_infer(client: "Client") -> None:
|
||||||
X, y = make_classification(n_samples=1000, n_informative=5, n_classes=3)
|
X, y = make_classification(n_samples=kRows, n_informative=5, n_classes=3)
|
||||||
X_ = dd.from_array(X, chunksize=100)
|
X_ = dd.from_array(X, chunksize=100)
|
||||||
y_ = dd.from_array(y, chunksize=100)
|
y_ = dd.from_array(y, chunksize=100)
|
||||||
dtrain = xgb.dask.DaskDMatrix(client, data=X_, label=y_)
|
dtrain = xgb.dask.DaskDMatrix(client, data=X_, label=y_)
|
||||||
@ -522,8 +524,8 @@ def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
|
|||||||
|
|
||||||
|
|
||||||
def test_inplace_predict(client: "Client") -> None:
|
def test_inplace_predict(client: "Client") -> None:
|
||||||
from sklearn.datasets import fetch_california_housing
|
from sklearn.datasets import load_diabetes
|
||||||
X_, y_ = fetch_california_housing(return_X_y=True)
|
X_, y_ = load_diabetes(return_X_y=True)
|
||||||
X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)
|
X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)
|
||||||
reg = xgb.dask.DaskXGBRegressor(n_estimators=4).fit(X, y)
|
reg = xgb.dask.DaskXGBRegressor(n_estimators=4).fit(X, y)
|
||||||
booster = reg.get_booster()
|
booster = reg.get_booster()
|
||||||
@ -841,7 +843,7 @@ def run_empty_dmatrix_cls(client: "Client", parameters: dict) -> None:
|
|||||||
def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) -> None:
|
def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) -> None:
|
||||||
from sklearn import datasets
|
from sklearn import datasets
|
||||||
n_samples = 100
|
n_samples = 100
|
||||||
n_features = 97
|
n_features = 7
|
||||||
rng = np.random.RandomState(1994)
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
make_classification = partial(
|
make_classification = partial(
|
||||||
@ -894,9 +896,9 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
|||||||
|
|
||||||
|
|
||||||
def test_empty_dmatrix_auc() -> None:
|
def test_empty_dmatrix_auc() -> None:
|
||||||
with LocalCluster(n_workers=8, dashboard_address=":0") as cluster:
|
with LocalCluster(n_workers=4, dashboard_address=":0") as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
run_empty_dmatrix_auc(client, "hist", 8)
|
run_empty_dmatrix_auc(client, "hist", 4)
|
||||||
|
|
||||||
|
|
||||||
def run_auc(client: "Client", tree_method: str) -> None:
|
def run_auc(client: "Client", tree_method: str) -> None:
|
||||||
@ -1033,7 +1035,7 @@ async def run_dask_classifier_asyncio(scheduler_address: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def test_with_asyncio() -> None:
|
def test_with_asyncio() -> None:
|
||||||
with LocalCluster(dashboard_address=":0") as cluster:
|
with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
address = client.scheduler.address
|
address = client.scheduler.address
|
||||||
output = asyncio.run(run_from_dask_array_asyncio(address))
|
output = asyncio.run(run_from_dask_array_asyncio(address))
|
||||||
@ -1420,11 +1422,11 @@ class TestWithDask:
|
|||||||
|
|
||||||
@given(params=hist_parameter_strategy,
|
@given(params=hist_parameter_strategy,
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None, suppress_health_check=suppress, print_blob=True)
|
@settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True)
|
||||||
def test_hist(
|
def test_hist(
|
||||||
self, params: Dict, dataset: tm.TestDataset, client: "Client"
|
self, params: Dict, dataset: tm.TestDataset, client: "Client"
|
||||||
) -> None:
|
) -> None:
|
||||||
num_rounds = 30
|
num_rounds = 10
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
||||||
|
|
||||||
def test_quantile_dmatrix(self, client: Client) -> None:
|
def test_quantile_dmatrix(self, client: Client) -> None:
|
||||||
@ -1465,11 +1467,11 @@ class TestWithDask:
|
|||||||
|
|
||||||
@given(params=exact_parameter_strategy,
|
@given(params=exact_parameter_strategy,
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None, suppress_health_check=suppress, print_blob=True)
|
@settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True)
|
||||||
def test_approx(
|
def test_approx(
|
||||||
self, client: "Client", params: Dict, dataset: tm.TestDataset
|
self, client: "Client", params: Dict, dataset: tm.TestDataset
|
||||||
) -> None:
|
) -> None:
|
||||||
num_rounds = 30
|
num_rounds = 10
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
||||||
|
|
||||||
def run_quantile(self, name: str) -> None:
|
def run_quantile(self, name: str) -> None:
|
||||||
@ -1773,16 +1775,16 @@ class TestWithDask:
|
|||||||
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
|
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
|
||||||
|
|
||||||
def test_shap(self, client: "Client") -> None:
|
def test_shap(self, client: "Client") -> None:
|
||||||
from sklearn.datasets import fetch_california_housing, load_digits
|
from sklearn.datasets import load_diabetes, load_iris
|
||||||
X, y = fetch_california_housing(return_X_y=True)
|
X, y = load_diabetes(return_X_y=True)
|
||||||
params: Dict[str, Any] = {'objective': 'reg:squarederror'}
|
params: Dict[str, Any] = {'objective': 'reg:squarederror'}
|
||||||
self.run_shap(X, y, params, client)
|
self.run_shap(X, y, params, client)
|
||||||
|
|
||||||
X, y = load_digits(return_X_y=True)
|
X, y = load_iris(return_X_y=True)
|
||||||
params = {'objective': 'multi:softmax', 'num_class': 10}
|
params = {'objective': 'multi:softmax', 'num_class': 3}
|
||||||
self.run_shap(X, y, params, client)
|
self.run_shap(X, y, params, client)
|
||||||
|
|
||||||
params = {'objective': 'multi:softprob', 'num_class': 10}
|
params = {'objective': 'multi:softprob', 'num_class': 3}
|
||||||
self.run_shap(X, y, params, client)
|
self.run_shap(X, y, params, client)
|
||||||
|
|
||||||
self.run_shap_cls_sklearn(X, y, client)
|
self.run_shap_cls_sklearn(X, y, client)
|
||||||
@ -1818,8 +1820,8 @@ class TestWithDask:
|
|||||||
1e-5, 1e-5)
|
1e-5, 1e-5)
|
||||||
|
|
||||||
def test_shap_interactions(self, client: "Client") -> None:
|
def test_shap_interactions(self, client: "Client") -> None:
|
||||||
from sklearn.datasets import fetch_california_housing
|
from sklearn.datasets import load_diabetes
|
||||||
X, y = fetch_california_housing(return_X_y=True)
|
X, y = load_diabetes(return_X_y=True)
|
||||||
params = {'objective': 'reg:squarederror'}
|
params = {'objective': 'reg:squarederror'}
|
||||||
self.run_shap_interactions(X, y, params, client)
|
self.run_shap_interactions(X, y, params, client)
|
||||||
|
|
||||||
|
|||||||
@ -14,10 +14,6 @@ except ImportError:
|
|||||||
pytestmark = pytest.mark.skipif(**tm.no_modin())
|
pytestmark = pytest.mark.skipif(**tm.no_modin())
|
||||||
|
|
||||||
|
|
||||||
dpath = 'demo/data/'
|
|
||||||
rng = np.random.RandomState(1994)
|
|
||||||
|
|
||||||
|
|
||||||
class TestModin:
|
class TestModin:
|
||||||
@pytest.mark.xfail
|
@pytest.mark.xfail
|
||||||
def test_modin(self):
|
def test_modin(self):
|
||||||
|
|||||||
@ -12,7 +12,7 @@ import json
|
|||||||
|
|
||||||
rng = np.random.RandomState(1994)
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
pytestmark = pytest.mark.skipif(**tm.no_sklearn())
|
pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), pytest.mark.timeout(30)]
|
||||||
|
|
||||||
from sklearn.utils.estimator_checks import parametrize_with_checks
|
from sklearn.utils.estimator_checks import parametrize_with_checks
|
||||||
|
|
||||||
@ -328,10 +328,10 @@ def test_select_feature():
|
|||||||
|
|
||||||
|
|
||||||
def test_num_parallel_tree():
|
def test_num_parallel_tree():
|
||||||
from sklearn.datasets import fetch_california_housing
|
from sklearn.datasets import load_diabetes
|
||||||
|
|
||||||
reg = xgb.XGBRegressor(n_estimators=4, num_parallel_tree=4, tree_method="hist")
|
reg = xgb.XGBRegressor(n_estimators=4, num_parallel_tree=4, tree_method="hist")
|
||||||
X, y = fetch_california_housing(return_X_y=True)
|
X, y = load_diabetes(return_X_y=True)
|
||||||
bst = reg.fit(X=X, y=y)
|
bst = reg.fit(X=X, y=y)
|
||||||
dump = bst.get_booster().get_dump(dump_format="json")
|
dump = bst.get_booster().get_dump(dump_format="json")
|
||||||
assert len(dump) == 16
|
assert len(dump) == 16
|
||||||
@ -352,7 +352,7 @@ def test_num_parallel_tree():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_calif_housing_regression():
|
def test_regression():
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
from sklearn.datasets import fetch_california_housing
|
from sklearn.datasets import fetch_california_housing
|
||||||
from sklearn.model_selection import KFold
|
from sklearn.model_selection import KFold
|
||||||
@ -381,7 +381,7 @@ def test_calif_housing_regression():
|
|||||||
xgb_model.feature_names_in_
|
xgb_model.feature_names_in_
|
||||||
|
|
||||||
|
|
||||||
def run_calif_housing_rf_regression(tree_method):
|
def run_housing_rf_regression(tree_method):
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
from sklearn.datasets import fetch_california_housing
|
from sklearn.datasets import fetch_california_housing
|
||||||
from sklearn.model_selection import KFold
|
from sklearn.model_selection import KFold
|
||||||
@ -401,8 +401,8 @@ def run_calif_housing_rf_regression(tree_method):
|
|||||||
rfreg.fit(X, y, early_stopping_rounds=10)
|
rfreg.fit(X, y, early_stopping_rounds=10)
|
||||||
|
|
||||||
|
|
||||||
def test_calif_housing_rf_regression():
|
def test_rf_regression():
|
||||||
run_calif_housing_rf_regression("hist")
|
run_housing_rf_regression("hist")
|
||||||
|
|
||||||
|
|
||||||
def test_parameter_tuning():
|
def test_parameter_tuning():
|
||||||
@ -411,9 +411,9 @@ def test_parameter_tuning():
|
|||||||
|
|
||||||
X, y = fetch_california_housing(return_X_y=True)
|
X, y = fetch_california_housing(return_X_y=True)
|
||||||
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
|
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
|
||||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4],
|
||||||
'n_estimators': [50, 100, 200]},
|
'n_estimators': [50, 200]},
|
||||||
cv=3, verbose=1)
|
cv=2, verbose=1)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
assert clf.best_score_ < 0.7
|
assert clf.best_score_ < 0.7
|
||||||
assert clf.best_params_ == {'n_estimators': 200, 'max_depth': 4}
|
assert clf.best_params_ == {'n_estimators': 200, 'max_depth': 4}
|
||||||
@ -840,13 +840,13 @@ def test_save_load_model():
|
|||||||
|
|
||||||
|
|
||||||
def test_RFECV():
|
def test_RFECV():
|
||||||
from sklearn.datasets import fetch_california_housing
|
from sklearn.datasets import load_diabetes
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
from sklearn.datasets import load_iris
|
from sklearn.datasets import load_iris
|
||||||
from sklearn.feature_selection import RFECV
|
from sklearn.feature_selection import RFECV
|
||||||
|
|
||||||
# Regression
|
# Regression
|
||||||
X, y = fetch_california_housing(return_X_y=True)
|
X, y = load_diabetes(return_X_y=True)
|
||||||
bst = xgb.XGBRegressor(booster='gblinear', learning_rate=0.1,
|
bst = xgb.XGBRegressor(booster='gblinear', learning_rate=0.1,
|
||||||
n_estimators=10,
|
n_estimators=10,
|
||||||
objective='reg:squarederror',
|
objective='reg:squarederror',
|
||||||
@ -861,7 +861,7 @@ def test_RFECV():
|
|||||||
n_estimators=10,
|
n_estimators=10,
|
||||||
objective='binary:logistic',
|
objective='binary:logistic',
|
||||||
random_state=0, verbosity=0)
|
random_state=0, verbosity=0)
|
||||||
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc')
|
rfecv = RFECV(estimator=bst, step=0.5, cv=3, scoring='roc_auc')
|
||||||
rfecv.fit(X, y)
|
rfecv.fit(X, y)
|
||||||
|
|
||||||
# Multi-class classification
|
# Multi-class classification
|
||||||
@ -872,7 +872,7 @@ def test_RFECV():
|
|||||||
objective='multi:softprob',
|
objective='multi:softprob',
|
||||||
random_state=0, reg_alpha=0.001, reg_lambda=0.01,
|
random_state=0, reg_alpha=0.001, reg_lambda=0.01,
|
||||||
scale_pos_weight=0.5, verbosity=0)
|
scale_pos_weight=0.5, verbosity=0)
|
||||||
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss')
|
rfecv = RFECV(estimator=bst, step=0.5, cv=3, scoring='neg_log_loss')
|
||||||
rfecv.fit(X, y)
|
rfecv.fit(X, y)
|
||||||
|
|
||||||
X[0:4, :] = np.nan # verify scikit_learn doesn't throw with nan
|
X[0:4, :] = np.nan # verify scikit_learn doesn't throw with nan
|
||||||
@ -881,7 +881,7 @@ def test_RFECV():
|
|||||||
rfecv.fit(X, y)
|
rfecv.fit(X, y)
|
||||||
|
|
||||||
cls = xgb.XGBClassifier()
|
cls = xgb.XGBClassifier()
|
||||||
rfecv = RFECV(estimator=cls, step=1, cv=3,
|
rfecv = RFECV(estimator=cls, step=0.5, cv=3,
|
||||||
scoring='neg_mean_squared_error')
|
scoring='neg_mean_squared_error')
|
||||||
rfecv.fit(X, y)
|
rfecv.fit(X, y)
|
||||||
|
|
||||||
@ -1155,7 +1155,7 @@ def run_boost_from_prediction_multi_clasas(
|
|||||||
|
|
||||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
||||||
def test_boost_from_prediction(tree_method):
|
def test_boost_from_prediction(tree_method):
|
||||||
from sklearn.datasets import load_breast_cancer, load_digits, make_regression
|
from sklearn.datasets import load_breast_cancer, load_iris, make_regression
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
@ -1163,7 +1163,7 @@ def test_boost_from_prediction(tree_method):
|
|||||||
run_boost_from_prediction_binary(tree_method, X, y, None)
|
run_boost_from_prediction_binary(tree_method, X, y, None)
|
||||||
run_boost_from_prediction_binary(tree_method, X, y, pd.DataFrame)
|
run_boost_from_prediction_binary(tree_method, X, y, pd.DataFrame)
|
||||||
|
|
||||||
X, y = load_digits(return_X_y=True)
|
X, y = load_iris(return_X_y=True)
|
||||||
|
|
||||||
run_boost_from_prediction_multi_clasas(xgb.XGBClassifier, tree_method, X, y, None)
|
run_boost_from_prediction_multi_clasas(xgb.XGBClassifier, tree_method, X, y, None)
|
||||||
run_boost_from_prediction_multi_clasas(
|
run_boost_from_prediction_multi_clasas(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user