EX: Show example of pickling and parallel use.

2015-05-08 14:34:37 -05:00
parent 932af821c5
commit 99c2df9913
1 changed files with 74 additions and 48 deletions
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@@ -4,7 +4,16 @@ Created on 1 Apr 2015

@author: Jamie Hall
 '''
+if __name__ == "__main__":
+    # NOTE: This *has* to be here and in the `__name__ == "__main__"` clause
+    # to run XGBoost in parallel, if XGBoost was built with OpenMP support.
+    # Otherwise, you can use fork, which is the default backend for joblib,
+    # and omit this.
+    from multiprocessing import set_start_method
+    set_start_method("forkserver")

+    import pickle
+    import os
    import xgboost as xgb

    import numpy as np
@@ -15,7 +24,6 @@ from sklearn.datasets import load_iris, load_digits, load_boston

    rng = np.random.RandomState(31337)

-
    print("Zeros and Ones from the Digits dataset: binary classification")
    digits = load_digits(2)
    y = digits['target']
@@ -60,4 +68,22 @@ clf.fit(X,y)
    print(clf.best_score_)
    print(clf.best_params_)

+    # The sklearn API models are picklable
+    print("Pickling sklearn API models")
+    # must open in binary format to pickle
+    pickle.dump(clf, open("best_boston.pkl", "wb"))
+    clf2 = pickle.load(open("best_boston.pkl", "rb"))
+    print(np.allclose(clf.predict(X), clf2.predict(X)))

+    print("Parallel Parameter optimization")
+    os.environ["OMP_NUM_THREADS"] = "1"
+    y = boston['target']
+    X = boston['data']
+    xgb_model = xgb.XGBRegressor()
+    clf = GridSearchCV(xgb_model,
+                       {'max_depth': [2,4,6],
+                        'n_estimators': [50,100,200]}, verbose=1,
+                       n_jobs=2)
+    clf.fit(X, y)
+    print(clf.best_score_)
+    print(clf.best_params_)