EX: Make separate example for fork issue.

2015-05-11 09:30:51 -05:00 · 2015-05-11 09:30:51 -05:00 · 15ea00540a
commit 15ea00540a
parent fa8c6e2f0b
2 changed files with 89 additions and 76 deletions
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@ -4,22 +4,13 @@ Created on 1 Apr 2015

@author: Jamie Hall
 '''
-if __name__ == "__main__":
-    # NOTE: This *has* to be here and in the `__name__ == "__main__"` clause
-    # to run XGBoost in parallel, if XGBoost was built with OpenMP support.
-    # Otherwise, you can use fork, which is the default backend for joblib,
-    # and omit this.
-    from multiprocessing import set_start_method
-    set_start_method("forkserver")
-
 import pickle
-    import os
 import xgboost as xgb

 import numpy as np
 from sklearn.cross_validation import KFold
-    from sklearn.grid_search import GridSearchCV
 from sklearn.metrics import confusion_matrix, mean_squared_error
+from sklearn.grid_search import GridSearchCV
 from sklearn.datasets import load_iris, load_digits, load_boston

 rng = np.random.RandomState(31337)
@ -74,16 +65,3 @@ if __name__ == "__main__":
 pickle.dump(clf, open("best_boston.pkl", "wb"))
 clf2 = pickle.load(open("best_boston.pkl", "rb"))
 print(np.allclose(clf.predict(X), clf2.predict(X)))
-
-    print("Parallel Parameter optimization")
-    os.environ["OMP_NUM_THREADS"] = "1"
-    y = boston['target']
-    X = boston['data']
-    xgb_model = xgb.XGBRegressor()
-    clf = GridSearchCV(xgb_model,
-                       {'max_depth': [2,4,6],
-                        'n_estimators': [50,100,200]}, verbose=1,
-                       n_jobs=2)
-    clf.fit(X, y)
-    print(clf.best_score_)
-    print(clf.best_params_)
--- a/demo/guide-python/sklearn_parallel.py
+++ b/demo/guide-python/sklearn_parallel.py
@ -0,0 +1,35 @@
+import os
+
+if __name__ == "__main__":
+    # NOTE: on posix systems, this *has* to be here and in the
+    # `__name__ == "__main__"` clause to run XGBoost in parallel processes
+    # using fork, if XGBoost was built with OpenMP support. Otherwise, if you
+    # build XGBoost without OpenMP support, you can use fork, which is the
+    # default backend for joblib, and omit this.
+    try:
+        from multiprocessing import set_start_method
+    except ImportError:
+        raise ImportError("Unable to import multiprocessing.set_start_method."
+                          " This example only runs on Python 3.4")
+    set_start_method("forkserver")
+
+    import numpy as np
+    from sklearn.grid_search import GridSearchCV
+    from sklearn.datasets import load_boston
+    import xgboost as xgb
+
+    rng = np.random.RandomState(31337)
+
+    print("Parallel Parameter optimization")
+    boston = load_boston()
+
+    os.environ["OMP_NUM_THREADS"] = "2"  # or to whatever you want
+    y = boston['target']
+    X = boston['data']
+    xgb_model = xgb.XGBRegressor()
+    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
+                                   'n_estimators': [50, 100, 200]}, verbose=1,
+                       n_jobs=2)
+    clf.fit(X, y)
+    print(clf.best_score_)
+    print(clf.best_params_)