Merge pull request #254 from lihang00/master

Python: add more params in sklearn wrapper.
2015-04-24 14:17:28 -07:00 · 2015-04-24 14:17:28 -07:00 · f28a7a0f8d
commit f28a7a0f8d
parent 1d5b4e19a5 c6d2e16b61
1 changed files with 48 additions and 17 deletions
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@ -781,31 +781,58 @@ class XGBModel(BaseEstimator):
        Number of boosted trees to fit.
    silent : boolean
        Whether to print messages while running boosting.
    objective : string
        Specify the learning task and the corresponding learning objective.
    nthread : int
        Number of parallel threads used to run xgboost.
    gamma : float
        Minimum loss reduction required to make a further partition on a leaf node of the tree.
    min_child_weight : int
        Minimum sum of instance weight(hessian) needed in a child.        
    max_delta_step : int
        Maximum delta step we allow each tree's weight estimation to be.
    subsample : float
        Subsample ratio of the training instance.
    colsample_bytree : float
        Subsample ratio of columns when constructing each tree.
    base_score:
        The initial prediction score of all instances, global bias.
    seed : int
        Random number seed.
    """
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear"):
+    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear", 
                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, 
                 base_score=0.5, seed=0):
        if not SKLEARN_INSTALLED:
            raise Exception('sklearn needs to be installed in order to use this module')
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.silent = silent
        self.n_estimators = n_estimators
        self.silent = silent
        self.objective = objective
        self.nthread = nthread
        self.gamma = gamma
        self.min_child_weight = min_child_weight
        self.max_delta_step = max_delta_step
        self.subsample = subsample
        self.colsample_bytree = colsample_bytree
        self.base_score = base_score
        self.seed = seed
        self._Booster = Booster()
    def get_params(self, deep=True):
        return {'max_depth': self.max_depth,
                'learning_rate': self.learning_rate,
                'n_estimators': self.n_estimators,
                'silent': self.silent,
                'objective': self.objective
                }
    def get_xgb_params(self):
-        return {'eta': self.learning_rate,
+        xgb_params = self.get_params()
-                'max_depth': self.max_depth,
+
-                'silent': 1 if self.silent else 0,
+        xgb_params['silent'] = 1 if self.silent else 0
-                'objective': self.objective
+
-                }
+        if self.nthread <= 0:
            xgb_params.pop('nthread', None)
        return xgb_params
    def fit(self, X, y):
        trainDmatrix = DMatrix(X, label=y)
@ -818,8 +845,12 @@ class XGBModel(BaseEstimator):
 class XGBClassifier(XGBModel, ClassifierMixin):
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic"):
+    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic", 
-        super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective)
+                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, 
                 base_score=0.5, seed=0):
        super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective, 
                                            nthread, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree,
                                            base_score, seed)
    def fit(self, X, y, sample_weight=None):
        y_values = list(np.unique(y))