[Breaking] Update sklearn interface. (#4929)

* Remove nthread, seed, silent. Add tree_method, gpu_id, num_parallel_tree. Fix #4909. * Check data shape. Fix #4896. * Check element of eval_set is tuple. Fix #4875 * Add doc for random_state with hogwild. Fixes #4919
2019-10-12 02:50:09 -04:00
parent c2cce4fac3
commit 4bbf062ed3
5 changed files with 177 additions and 128 deletions
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -0,0 +1,31 @@
+import xgboost as xgb
+import pytest
+import sys
+import numpy as np
+
+sys.path.append("tests/python")
+import testing as tm
+
+pytestmark = pytest.mark.skipif(**tm.no_sklearn())
+
+rng = np.random.RandomState(1994)
+
+
+def test_gpu_binary_classification():
+    from sklearn.datasets import load_digits
+    from sklearn.model_selection import KFold
+
+    digits = load_digits(2)
+    y = digits['target']
+    X = digits['data']
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
+        for train_index, test_index in kf.split(X, y):
+            xgb_model = cls(
+                random_state=42, tree_method='gpu_hist',
+                n_estimators=4, gpu_id='0').fit(X[train_index], y[train_index])
+            preds = xgb_model.predict(X[test_index])
+            labels = y[test_index]
+            err = sum(1 for i in range(len(preds))
+                      if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+            assert err < 0.1
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -175,6 +175,21 @@ def test_feature_importances_gain():
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)


+def test_num_parallel_tree():
+    from sklearn.datasets import load_boston
+    reg = xgb.XGBRegressor(n_estimators=4, num_parallel_tree=4,
+                           tree_method='hist')
+    boston = load_boston()
+    bst = reg.fit(X=boston['data'], y=boston['target'])
+    dump = bst.get_booster().get_dump(dump_format='json')
+    assert len(dump) == 16
+
+    reg = xgb.XGBRFRegressor(n_estimators=4)
+    bst = reg.fit(X=boston['data'], y=boston['target'])
+    dump = bst.get_booster().get_dump(dump_format='json')
+    assert len(dump) == 4
+
+
 def test_boston_housing_regression():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
@@ -430,18 +445,18 @@ def test_split_value_histograms():

 def test_sklearn_random_state():
    clf = xgb.XGBClassifier(random_state=402)
-    assert clf.get_xgb_params()['seed'] == 402
+    assert clf.get_xgb_params()['random_state'] == 402

-    clf = xgb.XGBClassifier(seed=401)
-    assert clf.get_xgb_params()['seed'] == 401
+    clf = xgb.XGBClassifier(random_state=401)
+    assert clf.get_xgb_params()['random_state'] == 401


 def test_sklearn_n_jobs():
    clf = xgb.XGBClassifier(n_jobs=1)
-    assert clf.get_xgb_params()['nthread'] == 1
+    assert clf.get_xgb_params()['n_jobs'] == 1

-    clf = xgb.XGBClassifier(nthread=2)
-    assert clf.get_xgb_params()['nthread'] == 2
+    clf = xgb.XGBClassifier(n_jobs=2)
+    assert clf.get_xgb_params()['n_jobs'] == 2


 def test_kwargs():
@@ -482,7 +497,7 @@ def test_kwargs_error():
 def test_sklearn_clone():
    from sklearn.base import clone

-    clf = xgb.XGBClassifier(n_jobs=2, nthread=3)
+    clf = xgb.XGBClassifier(n_jobs=2)
    clf.n_jobs = -1
    clone(clf)