Time the CPU tests on Jenkins. (#6257)
* Time the CPU tests on Jenkins. * Reduce thread contention. * Add doc. * Skip heavy tests on ARM.
This commit is contained in:
parent
d1254808d5
commit
81c37c28d5
@ -19,7 +19,7 @@ y = digits['target']
|
|||||||
X = digits['data']
|
X = digits['data']
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X):
|
for train_index, test_index in kf.split(X):
|
||||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
|
||||||
predictions = xgb_model.predict(X[test_index])
|
predictions = xgb_model.predict(X[test_index])
|
||||||
actuals = y[test_index]
|
actuals = y[test_index]
|
||||||
print(confusion_matrix(actuals, predictions))
|
print(confusion_matrix(actuals, predictions))
|
||||||
@ -30,7 +30,7 @@ y = iris['target']
|
|||||||
X = iris['data']
|
X = iris['data']
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X):
|
for train_index, test_index in kf.split(X):
|
||||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
|
||||||
predictions = xgb_model.predict(X[test_index])
|
predictions = xgb_model.predict(X[test_index])
|
||||||
actuals = y[test_index]
|
actuals = y[test_index]
|
||||||
print(confusion_matrix(actuals, predictions))
|
print(confusion_matrix(actuals, predictions))
|
||||||
@ -41,7 +41,7 @@ y = boston['target']
|
|||||||
X = boston['data']
|
X = boston['data']
|
||||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||||
for train_index, test_index in kf.split(X):
|
for train_index, test_index in kf.split(X):
|
||||||
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
|
xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
|
||||||
predictions = xgb_model.predict(X[test_index])
|
predictions = xgb_model.predict(X[test_index])
|
||||||
actuals = y[test_index]
|
actuals = y[test_index]
|
||||||
print(mean_squared_error(actuals, predictions))
|
print(mean_squared_error(actuals, predictions))
|
||||||
@ -49,10 +49,10 @@ for train_index, test_index in kf.split(X):
|
|||||||
print("Parameter optimization")
|
print("Parameter optimization")
|
||||||
y = boston['target']
|
y = boston['target']
|
||||||
X = boston['data']
|
X = boston['data']
|
||||||
xgb_model = xgb.XGBRegressor()
|
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
||||||
clf = GridSearchCV(xgb_model,
|
clf = GridSearchCV(xgb_model,
|
||||||
{'max_depth': [2, 4, 6],
|
{'max_depth': [2, 4, 6],
|
||||||
'n_estimators': [50, 100, 200]}, verbose=1)
|
'n_estimators': [50, 100, 200]}, verbose=1, n_jobs=1)
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
print(clf.best_score_)
|
print(clf.best_score_)
|
||||||
print(clf.best_params_)
|
print(clf.best_params_)
|
||||||
@ -69,6 +69,6 @@ print(np.allclose(clf.predict(X), clf2.predict(X)))
|
|||||||
X = digits['data']
|
X = digits['data']
|
||||||
y = digits['target']
|
y = digits['target']
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||||
clf = xgb.XGBClassifier()
|
clf = xgb.XGBClassifier(n_jobs=1)
|
||||||
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||||
eval_set=[(X_test, y_test)])
|
eval_set=[(X_test, y_test)])
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import load_boston
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("Parallel Parameter optimization")
|
print("Parallel Parameter optimization")
|
||||||
@ -8,7 +9,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
y = boston['target']
|
y = boston['target']
|
||||||
X = boston['data']
|
X = boston['data']
|
||||||
xgb_model = xgb.XGBRegressor()
|
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
|
||||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
||||||
'n_estimators': [50, 100, 200]}, verbose=1,
|
'n_estimators': [50, 100, 200]}, verbose=1,
|
||||||
n_jobs=2)
|
n_jobs=2)
|
||||||
|
|||||||
@ -38,7 +38,8 @@ General Parameters
|
|||||||
|
|
||||||
* ``nthread`` [default to maximum number of threads available if not set]
|
* ``nthread`` [default to maximum number of threads available if not set]
|
||||||
|
|
||||||
- Number of parallel threads used to run XGBoost
|
- Number of parallel threads used to run XGBoost. When choosing it, please keep thread
|
||||||
|
contention and hyperthreading in mind.
|
||||||
|
|
||||||
* ``disable_default_eval_metric`` [default=``false``]
|
* ``disable_default_eval_metric`` [default=``false``]
|
||||||
|
|
||||||
|
|||||||
@ -73,7 +73,10 @@ __model_doc = '''
|
|||||||
available. It's recommended to study this option from parameters
|
available. It's recommended to study this option from parameters
|
||||||
document.
|
document.
|
||||||
n_jobs : int
|
n_jobs : int
|
||||||
Number of parallel threads used to run xgboost.
|
Number of parallel threads used to run xgboost. When used with other Scikit-Learn
|
||||||
|
algorithms like grid search, you may choose which algorithm to parallelize and
|
||||||
|
balance the threads. Creating thread contention will significantly slow dowm both
|
||||||
|
algorithms.
|
||||||
gamma : float
|
gamma : float
|
||||||
Minimum loss reduction required to make a further partition on a leaf
|
Minimum loss reduction required to make a further partition on a leaf
|
||||||
node of the tree.
|
node of the tree.
|
||||||
|
|||||||
@ -60,7 +60,7 @@ case "$suite" in
|
|||||||
cpu)
|
cpu)
|
||||||
source activate cpu_test
|
source activate cpu_test
|
||||||
install_xgboost
|
install_xgboost
|
||||||
pytest -v -s -rxXs --fulltrace ${args} tests/python
|
pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python
|
||||||
cd tests/distributed
|
cd tests/distributed
|
||||||
./runtests.sh
|
./runtests.sh
|
||||||
uninstall_xgboost
|
uninstall_xgboost
|
||||||
|
|||||||
@ -39,6 +39,7 @@ def test_feature_weights_demo():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
@pytest.mark.skipif(**tm.is_arm())
|
||||||
def test_sklearn_demo():
|
def test_sklearn_demo():
|
||||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
|
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
|
||||||
cmd = ['python', script]
|
cmd = ['python', script]
|
||||||
@ -48,6 +49,7 @@ def test_sklearn_demo():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
@pytest.mark.skipif(**tm.is_arm())
|
||||||
def test_sklearn_parallel_demo():
|
def test_sklearn_parallel_demo():
|
||||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_parallel.py')
|
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_parallel.py')
|
||||||
cmd = ['python', script]
|
cmd = ['python', script]
|
||||||
|
|||||||
@ -83,6 +83,7 @@ class TestEarlyStopping(unittest.TestCase):
|
|||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
@pytest.mark.skipif(**tm.no_pandas())
|
@pytest.mark.skipif(**tm.no_pandas())
|
||||||
|
@pytest.mark.skipif(**tm.is_arm())
|
||||||
def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):
|
def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
|
|
||||||
|
|||||||
@ -611,10 +611,6 @@ class TestWithDask:
|
|||||||
tree_method):
|
tree_method):
|
||||||
params['tree_method'] = tree_method
|
params['tree_method'] = tree_method
|
||||||
params = dataset.set_params(params)
|
params = dataset.set_params(params)
|
||||||
# multi class doesn't handle empty dataset well (empty
|
|
||||||
# means at least 1 worker has data).
|
|
||||||
if params['objective'] == "multi:softmax":
|
|
||||||
return
|
|
||||||
# It doesn't make sense to distribute a completely
|
# It doesn't make sense to distribute a completely
|
||||||
# empty dataset.
|
# empty dataset.
|
||||||
if dataset.X.shape[0] == 0:
|
if dataset.X.shape[0] == 0:
|
||||||
@ -640,18 +636,20 @@ class TestWithDask:
|
|||||||
# Make sure that it's decreasing
|
# Make sure that it's decreasing
|
||||||
assert history[-1] < history[0]
|
assert history[-1] < history[0]
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.is_arm())
|
||||||
@given(params=hist_parameter_strategy,
|
@given(params=hist_parameter_strategy,
|
||||||
num_rounds=strategies.integers(20, 30),
|
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None)
|
@settings(deadline=None)
|
||||||
def test_hist(self, params, num_rounds, dataset, client):
|
def test_hist(self, params, dataset, client):
|
||||||
|
num_rounds = 30
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.is_arm())
|
||||||
@given(params=exact_parameter_strategy,
|
@given(params=exact_parameter_strategy,
|
||||||
num_rounds=strategies.integers(20, 30),
|
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None)
|
@settings(deadline=None)
|
||||||
def test_approx(self, client, params, num_rounds, dataset):
|
def test_approx(self, client, params, dataset):
|
||||||
|
num_rounds = 30
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
||||||
|
|
||||||
def run_quantile(self, name):
|
def run_quantile(self, name):
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
||||||
from xgboost.compat import DASK_INSTALLED
|
from xgboost.compat import DASK_INSTALLED
|
||||||
import pytest
|
import pytest
|
||||||
@ -22,6 +23,10 @@ except ImportError:
|
|||||||
memory = Memory('./cachedir', verbose=0)
|
memory = Memory('./cachedir', verbose=0)
|
||||||
|
|
||||||
|
|
||||||
|
def is_arm():
|
||||||
|
return {'condition': platform.machine().lower().find('arm') != 1,
|
||||||
|
'reason': 'Skipping expensive tests on ARM.'}
|
||||||
|
|
||||||
def no_sklearn():
|
def no_sklearn():
|
||||||
return {'condition': not SKLEARN_INSTALLED,
|
return {'condition': not SKLEARN_INSTALLED,
|
||||||
'reason': 'Scikit-Learn is not installed'}
|
'reason': 'Scikit-Learn is not installed'}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user