Merge pull request #220 from white1033/master

*Fix XGBClassifier super()
2015-04-05 09:05:08 -07:00
parent 93d3f4fe61 18cb8d7de2
commit e626b62daa
1 changed files with 35 additions and 36 deletions
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -26,7 +26,6 @@ except ImportError:
    SKLEARN_INSTALLED = False


-
 __all__ = ['DMatrix', 'CVPack', 'Booster', 'aggcv', 'cv', 'mknfold', 'train']

 if sys.version_info[0] == 3:
@@ -552,20 +551,20 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
    early_stopping_rounds: int
        Activates early stopping. Validation error needs to decrease at least
        every <early_stopping_rounds> round(s) to continue training.
-        Requires at least one item in evals. 
+        Requires at least one item in evals.
        If there's more than one, will use the last.
        Returns the model from the last iteration (not the best one).
-        If early stopping occurs, the model will have two additional fields: 
+        If early stopping occurs, the model will have two additional fields:
        bst.best_score and bst.best_iteration.

    Returns
    -------
    booster : a trained booster model
    """
-    
+
    evals = list(evals)
    bst = Booster(params, [dtrain] + [d[0] for d in evals])
-    
+
    if not early_stopping_rounds:
        for i in range(num_boost_round):
            bst.update(dtrain, i, obj)
@@ -576,15 +575,15 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
                else:
                    sys.stderr.write(bst_eval_set.decode() + '\n')
        return bst
-    
+
    else:
        # early stopping
-        
+
        if len(evals) < 1:
-            raise ValueError('For early stopping you need at least on set in evals.')        
-        
+            raise ValueError('For early stopping you need at least on set in evals.')
+
        sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds))
-        
+
        # is params a list of tuples? are we using multiple eval metrics?
        if type(params) == list:
            if len(params) != len(dict(params).items()):
@@ -597,29 +596,29 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
            maximize_metrics = ('auc', 'map', 'ndcg')
            if filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics):
                maximize_score = True
-        
+
        if maximize_score:
            best_score = 0.0
        else:
            best_score = float('inf')
-            
-        best_msg = '' 
+
+        best_msg = ''
        best_score_i = 0
-        
+
        for i in range(num_boost_round):
            bst.update(dtrain, i, obj)
            bst_eval_set = bst.eval_set(evals, i, feval)
-            
+
            if isinstance(bst_eval_set, string_types):
                msg = bst_eval_set
            else:
                msg = bst_eval_set.decode()
-                
+
            sys.stderr.write(msg + '\n')
-            
+
            score = float(msg.rsplit(':', 1)[1])
            if (maximize_score and score > best_score) or \
-                (not maximize_score and score < best_score):
+                    (not maximize_score and score < best_score):
                best_score = score
                best_score_i = i
                best_msg = msg
@@ -628,10 +627,9 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
                bst.best_score = best_score
                bst.best_iteration = best_score_i
                return bst
-           
+
        return bst

-        

 class CVPack(object):
    def __init__(self, dtrain, dtest, param):
@@ -770,7 +768,7 @@ class XGBModel(BaseEstimator):
        self.n_rounds = n_estimators
        self.objective = objective
        self._Booster = Booster()
-    
+
    def get_params(self, deep=True):
        return {'max_depth': self.max_depth,
                'learning_rate': self.eta,
@@ -778,22 +776,24 @@ class XGBModel(BaseEstimator):
                'silent': True if self.silent == 1 else False,
                'objective': self.objective
                }
+
    def get_xgb_params(self):
        return {'eta': self.eta, 'max_depth': self.max_depth, 'silent': self.silent, 'objective': self.objective}
-    
+
    def fit(self, X, y):
        trainDmatrix = DMatrix(X, label=y)
        self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_rounds)
        return self
-    
+
    def predict(self, X):
        testDmatrix = DMatrix(X)
        return self._Booster.predict(testDmatrix)

-class XGBClassifier(XGBModel, ClassifierMixin):    
+
+class XGBClassifier(XGBModel, ClassifierMixin):
    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True):
-        super().__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
-    
+        super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
+
    def fit(self, X, y, sample_weight=None):
        y_values = list(np.unique(y))
        if len(y_values) > 2:
@@ -803,19 +803,19 @@ class XGBClassifier(XGBModel, ClassifierMixin):
            xgb_options['num_class'] = len(y_values)
        else:
            xgb_options = self.get_xgb_params()
-        
+
        self._le = LabelEncoder().fit(y)
        training_labels = self._le.transform(y)
-            
+
        if sample_weight is not None:
            trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight)
        else:
            trainDmatrix = DMatrix(X, label=training_labels)
-        
+
        self._Booster = train(xgb_options, trainDmatrix, self.n_rounds)
-        
+
        return self
-    
+
    def predict(self, X):
        testDmatrix = DMatrix(X)
        class_probs = self._Booster.predict(testDmatrix)
@@ -825,7 +825,7 @@ class XGBClassifier(XGBModel, ClassifierMixin):
            column_indexes = np.repeat(0, X.shape[0])
            column_indexes[class_probs > 0.5] = 1
        return self._le.inverse_transform(column_indexes)
-    
+
    def predict_proba(self, X):
        testDmatrix = DMatrix(X)
        class_probs = self._Booster.predict(testDmatrix)
@@ -834,9 +834,8 @@ class XGBClassifier(XGBModel, ClassifierMixin):
        else:
            classone_probs = class_probs
            classzero_probs = 1.0 - classone_probs
-            return np.vstack((classzero_probs,classone_probs)).transpose()
+            return np.vstack((classzero_probs, classone_probs)).transpose()

-class XGBRegressor(XGBModel, RegressorMixin):  
+
+class XGBRegressor(XGBModel, RegressorMixin):
    pass
-
-