Time the CPU tests on Jenkins. (#6257)

* Time the CPU tests on Jenkins.
* Reduce thread contention.
* Add doc.
* Skip heavy tests on ARM.
This commit is contained in:
Jiaming Yuan 2020-10-21 08:19:07 +08:00 committed by GitHub
parent d1254808d5
commit 81c37c28d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 29 additions and 18 deletions

View File

@ -19,7 +19,7 @@ y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
predictions = xgb_model.predict(X[test_index])
actuals = y[test_index]
print(confusion_matrix(actuals, predictions))
@ -30,7 +30,7 @@ y = iris['target']
X = iris['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
predictions = xgb_model.predict(X[test_index])
actuals = y[test_index]
print(confusion_matrix(actuals, predictions))
@ -41,7 +41,7 @@ y = boston['target']
X = boston['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
predictions = xgb_model.predict(X[test_index])
actuals = y[test_index]
print(mean_squared_error(actuals, predictions))
@ -49,10 +49,10 @@ for train_index, test_index in kf.split(X):
print("Parameter optimization")
y = boston['target']
X = boston['data']
xgb_model = xgb.XGBRegressor()
xgb_model = xgb.XGBRegressor(n_jobs=1)
clf = GridSearchCV(xgb_model,
{'max_depth': [2, 4, 6],
'n_estimators': [50, 100, 200]}, verbose=1)
'n_estimators': [50, 100, 200]}, verbose=1, n_jobs=1)
clf.fit(X, y)
print(clf.best_score_)
print(clf.best_params_)
@ -69,6 +69,6 @@ print(np.allclose(clf.predict(X), clf2.predict(X)))
X = digits['data']
y = digits['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clf = xgb.XGBClassifier()
clf = xgb.XGBClassifier(n_jobs=1)
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
eval_set=[(X_test, y_test)])

View File

@ -1,6 +1,7 @@
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_boston
import xgboost as xgb
import multiprocessing
if __name__ == "__main__":
print("Parallel Parameter optimization")
@ -8,7 +9,7 @@ if __name__ == "__main__":
y = boston['target']
X = boston['data']
xgb_model = xgb.XGBRegressor()
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
'n_estimators': [50, 100, 200]}, verbose=1,
n_jobs=2)

View File

@ -38,7 +38,8 @@ General Parameters
* ``nthread`` [default to maximum number of threads available if not set]
- Number of parallel threads used to run XGBoost
- Number of parallel threads used to run XGBoost. When choosing it, please keep thread
contention and hyperthreading in mind.
* ``disable_default_eval_metric`` [default=``false``]

View File

@ -73,7 +73,10 @@ __model_doc = '''
available. It's recommended to study this option from parameters
document.
n_jobs : int
Number of parallel threads used to run xgboost.
Number of parallel threads used to run xgboost. When used with other Scikit-Learn
algorithms like grid search, you may choose which algorithm to parallelize and
balance the threads. Creating thread contention will significantly slow dowm both
algorithms.
gamma : float
Minimum loss reduction required to make a further partition on a leaf
node of the tree.

View File

@ -60,7 +60,7 @@ case "$suite" in
cpu)
source activate cpu_test
install_xgboost
pytest -v -s -rxXs --fulltrace ${args} tests/python
pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python
cd tests/distributed
./runtests.sh
uninstall_xgboost

View File

@ -39,6 +39,7 @@ def test_feature_weights_demo():
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.is_arm())
def test_sklearn_demo():
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
cmd = ['python', script]
@ -48,6 +49,7 @@ def test_sklearn_demo():
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.is_arm())
def test_sklearn_parallel_demo():
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_parallel.py')
cmd = ['python', script]

View File

@ -83,6 +83,7 @@ class TestEarlyStopping(unittest.TestCase):
@pytest.mark.skipif(**tm.no_sklearn())
@pytest.mark.skipif(**tm.no_pandas())
@pytest.mark.skipif(**tm.is_arm())
def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):
from sklearn.datasets import load_breast_cancer

View File

@ -611,10 +611,6 @@ class TestWithDask:
tree_method):
params['tree_method'] = tree_method
params = dataset.set_params(params)
# multi class doesn't handle empty dataset well (empty
# means at least 1 worker has data).
if params['objective'] == "multi:softmax":
return
# It doesn't make sense to distribute a completely
# empty dataset.
if dataset.X.shape[0] == 0:
@ -640,18 +636,20 @@ class TestWithDask:
# Make sure that it's decreasing
assert history[-1] < history[0]
@pytest.mark.skipif(**tm.is_arm())
@given(params=hist_parameter_strategy,
num_rounds=strategies.integers(20, 30),
dataset=tm.dataset_strategy)
@settings(deadline=None)
def test_hist(self, params, num_rounds, dataset, client):
def test_hist(self, params, dataset, client):
num_rounds = 30
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
@pytest.mark.skipif(**tm.is_arm())
@given(params=exact_parameter_strategy,
num_rounds=strategies.integers(20, 30),
dataset=tm.dataset_strategy)
@settings(deadline=None)
def test_approx(self, client, params, num_rounds, dataset):
def test_approx(self, client, params, dataset):
num_rounds = 30
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
def run_quantile(self, name):

View File

@ -1,5 +1,6 @@
# coding: utf-8
import os
import platform
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
from xgboost.compat import DASK_INSTALLED
import pytest
@ -22,6 +23,10 @@ except ImportError:
memory = Memory('./cachedir', verbose=0)
def is_arm():
return {'condition': platform.machine().lower().find('arm') != 1,
'reason': 'Skipping expensive tests on ARM.'}
def no_sklearn():
return {'condition': not SKLEARN_INSTALLED,
'reason': 'Scikit-Learn is not installed'}