Time the CPU tests on Jenkins. (#6257)
* Time the CPU tests on Jenkins. * Reduce thread contention. * Add doc. * Skip heavy tests on ARM.
This commit is contained in:
parent
d1254808d5
commit
81c37c28d5
@ -19,7 +19,7 @@ y = digits['target']
|
||||
X = digits['data']
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X):
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
|
||||
predictions = xgb_model.predict(X[test_index])
|
||||
actuals = y[test_index]
|
||||
print(confusion_matrix(actuals, predictions))
|
||||
@ -30,7 +30,7 @@ y = iris['target']
|
||||
X = iris['data']
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X):
|
||||
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
|
||||
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
|
||||
predictions = xgb_model.predict(X[test_index])
|
||||
actuals = y[test_index]
|
||||
print(confusion_matrix(actuals, predictions))
|
||||
@ -41,7 +41,7 @@ y = boston['target']
|
||||
X = boston['data']
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X):
|
||||
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
|
||||
xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
|
||||
predictions = xgb_model.predict(X[test_index])
|
||||
actuals = y[test_index]
|
||||
print(mean_squared_error(actuals, predictions))
|
||||
@ -49,10 +49,10 @@ for train_index, test_index in kf.split(X):
|
||||
print("Parameter optimization")
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
xgb_model = xgb.XGBRegressor()
|
||||
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
||||
clf = GridSearchCV(xgb_model,
|
||||
{'max_depth': [2, 4, 6],
|
||||
'n_estimators': [50, 100, 200]}, verbose=1)
|
||||
'n_estimators': [50, 100, 200]}, verbose=1, n_jobs=1)
|
||||
clf.fit(X, y)
|
||||
print(clf.best_score_)
|
||||
print(clf.best_params_)
|
||||
@ -69,6 +69,6 @@ print(np.allclose(clf.predict(X), clf2.predict(X)))
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = xgb.XGBClassifier()
|
||||
clf = xgb.XGBClassifier(n_jobs=1)
|
||||
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.datasets import load_boston
|
||||
import xgboost as xgb
|
||||
import multiprocessing
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Parallel Parameter optimization")
|
||||
@ -8,7 +9,7 @@ if __name__ == "__main__":
|
||||
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
xgb_model = xgb.XGBRegressor()
|
||||
xgb_model = xgb.XGBRegressor(n_jobs=multiprocessing.cpu_count() // 2)
|
||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
||||
'n_estimators': [50, 100, 200]}, verbose=1,
|
||||
n_jobs=2)
|
||||
|
||||
@ -38,7 +38,8 @@ General Parameters
|
||||
|
||||
* ``nthread`` [default to maximum number of threads available if not set]
|
||||
|
||||
- Number of parallel threads used to run XGBoost
|
||||
- Number of parallel threads used to run XGBoost. When choosing it, please keep thread
|
||||
contention and hyperthreading in mind.
|
||||
|
||||
* ``disable_default_eval_metric`` [default=``false``]
|
||||
|
||||
|
||||
@ -73,7 +73,10 @@ __model_doc = '''
|
||||
available. It's recommended to study this option from parameters
|
||||
document.
|
||||
n_jobs : int
|
||||
Number of parallel threads used to run xgboost.
|
||||
Number of parallel threads used to run xgboost. When used with other Scikit-Learn
|
||||
algorithms like grid search, you may choose which algorithm to parallelize and
|
||||
balance the threads. Creating thread contention will significantly slow dowm both
|
||||
algorithms.
|
||||
gamma : float
|
||||
Minimum loss reduction required to make a further partition on a leaf
|
||||
node of the tree.
|
||||
|
||||
@ -60,7 +60,7 @@ case "$suite" in
|
||||
cpu)
|
||||
source activate cpu_test
|
||||
install_xgboost
|
||||
pytest -v -s -rxXs --fulltrace ${args} tests/python
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python
|
||||
cd tests/distributed
|
||||
./runtests.sh
|
||||
uninstall_xgboost
|
||||
|
||||
@ -39,6 +39,7 @@ def test_feature_weights_demo():
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.skipif(**tm.is_arm())
|
||||
def test_sklearn_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_examples.py')
|
||||
cmd = ['python', script]
|
||||
@ -48,6 +49,7 @@ def test_sklearn_demo():
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.skipif(**tm.is_arm())
|
||||
def test_sklearn_parallel_demo():
|
||||
script = os.path.join(PYTHON_DEMO_DIR, 'sklearn_parallel.py')
|
||||
cmd = ['python', script]
|
||||
|
||||
@ -83,6 +83,7 @@ class TestEarlyStopping(unittest.TestCase):
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
@pytest.mark.skipif(**tm.is_arm())
|
||||
def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
|
||||
|
||||
@ -611,10 +611,6 @@ class TestWithDask:
|
||||
tree_method):
|
||||
params['tree_method'] = tree_method
|
||||
params = dataset.set_params(params)
|
||||
# multi class doesn't handle empty dataset well (empty
|
||||
# means at least 1 worker has data).
|
||||
if params['objective'] == "multi:softmax":
|
||||
return
|
||||
# It doesn't make sense to distribute a completely
|
||||
# empty dataset.
|
||||
if dataset.X.shape[0] == 0:
|
||||
@ -640,18 +636,20 @@ class TestWithDask:
|
||||
# Make sure that it's decreasing
|
||||
assert history[-1] < history[0]
|
||||
|
||||
@pytest.mark.skipif(**tm.is_arm())
|
||||
@given(params=hist_parameter_strategy,
|
||||
num_rounds=strategies.integers(20, 30),
|
||||
dataset=tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_hist(self, params, num_rounds, dataset, client):
|
||||
def test_hist(self, params, dataset, client):
|
||||
num_rounds = 30
|
||||
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
||||
|
||||
@pytest.mark.skipif(**tm.is_arm())
|
||||
@given(params=exact_parameter_strategy,
|
||||
num_rounds=strategies.integers(20, 30),
|
||||
dataset=tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
def test_approx(self, client, params, num_rounds, dataset):
|
||||
def test_approx(self, client, params, dataset):
|
||||
num_rounds = 30
|
||||
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
||||
|
||||
def run_quantile(self, name):
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# coding: utf-8
|
||||
import os
|
||||
import platform
|
||||
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
||||
from xgboost.compat import DASK_INSTALLED
|
||||
import pytest
|
||||
@ -22,6 +23,10 @@ except ImportError:
|
||||
memory = Memory('./cachedir', verbose=0)
|
||||
|
||||
|
||||
def is_arm():
|
||||
return {'condition': platform.machine().lower().find('arm') != 1,
|
||||
'reason': 'Skipping expensive tests on ARM.'}
|
||||
|
||||
def no_sklearn():
|
||||
return {'condition': not SKLEARN_INSTALLED,
|
||||
'reason': 'Scikit-Learn is not installed'}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user