From 402e832ce58690eaa8f3e1046bd7cd15a5d005be Mon Sep 17 00:00:00 2001 From: white1033 Date: Sun, 5 Apr 2015 21:15:09 +0800 Subject: [PATCH 1/2] *Fix XGBClassifier super() --- wrapper/xgboost.py | 62 +++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 4a1e7c895..25d2827db 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -552,20 +552,20 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea early_stopping_rounds: int Activates early stopping. Validation error needs to decrease at least every round(s) to continue training. - Requires at least one item in evals. + Requires at least one item in evals. If there's more than one, will use the last. Returns the model from the last iteration (not the best one). - If early stopping occurs, the model will have two additional fields: + If early stopping occurs, the model will have two additional fields: bst.best_score and bst.best_iteration. Returns ------- booster : a trained booster model """ - + evals = list(evals) bst = Booster(params, [dtrain] + [d[0] for d in evals]) - + if not early_stopping_rounds: for i in range(num_boost_round): bst.update(dtrain, i, obj) @@ -576,15 +576,15 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea else: sys.stderr.write(bst_eval_set.decode() + '\n') return bst - + else: # early stopping - + if len(evals) < 1: - raise ValueError('For early stopping you need at least on set in evals.') - + raise ValueError('For early stopping you need at least on set in evals.') + sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds)) - + # is params a list of tuples? are we using multiple eval metrics? if type(params) == list: if len(params) != len(dict(params).items()): @@ -597,26 +597,26 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea maximize_metrics = ('auc', 'map', 'ndcg') if filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics): maximize_score = True - + if maximize_score: best_score = 0.0 else: best_score = float('inf') - - best_msg = '' + + best_msg = '' best_score_i = 0 - + for i in range(num_boost_round): bst.update(dtrain, i, obj) bst_eval_set = bst.eval_set(evals, i, feval) - + if isinstance(bst_eval_set, string_types): msg = bst_eval_set else: msg = bst_eval_set.decode() - + sys.stderr.write(msg + '\n') - + score = float(msg.rsplit(':', 1)[1]) if (maximize_score and score > best_score) or \ (not maximize_score and score < best_score): @@ -628,10 +628,10 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea bst.best_score = best_score bst.best_iteration = best_score_i return bst - + return bst - + class CVPack(object): def __init__(self, dtrain, dtest, param): @@ -770,7 +770,7 @@ class XGBModel(BaseEstimator): self.n_rounds = n_estimators self.objective = objective self._Booster = Booster() - + def get_params(self, deep=True): return {'max_depth': self.max_depth, 'learning_rate': self.eta, @@ -780,20 +780,20 @@ class XGBModel(BaseEstimator): } def get_xgb_params(self): return {'eta': self.eta, 'max_depth': self.max_depth, 'silent': self.silent, 'objective': self.objective} - + def fit(self, X, y): trainDmatrix = DMatrix(X, label=y) self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_rounds) return self - + def predict(self, X): testDmatrix = DMatrix(X) return self._Booster.predict(testDmatrix) -class XGBClassifier(XGBModel, ClassifierMixin): +class XGBClassifier(XGBModel, ClassifierMixin): def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True): - super().__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic") - + super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic") + def fit(self, X, y, sample_weight=None): y_values = list(np.unique(y)) if len(y_values) > 2: @@ -803,19 +803,19 @@ class XGBClassifier(XGBModel, ClassifierMixin): xgb_options['num_class'] = len(y_values) else: xgb_options = self.get_xgb_params() - + self._le = LabelEncoder().fit(y) training_labels = self._le.transform(y) - + if sample_weight is not None: trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight) else: trainDmatrix = DMatrix(X, label=training_labels) - + self._Booster = train(xgb_options, trainDmatrix, self.n_rounds) - + return self - + def predict(self, X): testDmatrix = DMatrix(X) class_probs = self._Booster.predict(testDmatrix) @@ -825,7 +825,7 @@ class XGBClassifier(XGBModel, ClassifierMixin): column_indexes = np.repeat(0, X.shape[0]) column_indexes[class_probs > 0.5] = 1 return self._le.inverse_transform(column_indexes) - + def predict_proba(self, X): testDmatrix = DMatrix(X) class_probs = self._Booster.predict(testDmatrix) @@ -836,7 +836,7 @@ class XGBClassifier(XGBModel, ClassifierMixin): classzero_probs = 1.0 - classone_probs return np.vstack((classzero_probs,classone_probs)).transpose() -class XGBRegressor(XGBModel, RegressorMixin): +class XGBRegressor(XGBModel, RegressorMixin): pass From 18cb8d7de2c5a63fd932d501236e929761aebcbb Mon Sep 17 00:00:00 2001 From: white1033 Date: Sun, 5 Apr 2015 23:22:40 +0800 Subject: [PATCH 2/2] fix indent warning by flake8 --- wrapper/xgboost.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 25d2827db..33ee9565a 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -26,7 +26,6 @@ except ImportError: SKLEARN_INSTALLED = False - __all__ = ['DMatrix', 'CVPack', 'Booster', 'aggcv', 'cv', 'mknfold', 'train'] if sys.version_info[0] == 3: @@ -619,7 +618,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea score = float(msg.rsplit(':', 1)[1]) if (maximize_score and score > best_score) or \ - (not maximize_score and score < best_score): + (not maximize_score and score < best_score): best_score = score best_score_i = i best_msg = msg @@ -632,7 +631,6 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea return bst - class CVPack(object): def __init__(self, dtrain, dtest, param): self.dtrain = dtrain @@ -778,6 +776,7 @@ class XGBModel(BaseEstimator): 'silent': True if self.silent == 1 else False, 'objective': self.objective } + def get_xgb_params(self): return {'eta': self.eta, 'max_depth': self.max_depth, 'silent': self.silent, 'objective': self.objective} @@ -790,6 +789,7 @@ class XGBModel(BaseEstimator): testDmatrix = DMatrix(X) return self._Booster.predict(testDmatrix) + class XGBClassifier(XGBModel, ClassifierMixin): def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True): super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic") @@ -834,9 +834,8 @@ class XGBClassifier(XGBModel, ClassifierMixin): else: classone_probs = class_probs classzero_probs = 1.0 - classone_probs - return np.vstack((classzero_probs,classone_probs)).transpose() + return np.vstack((classzero_probs, classone_probs)).transpose() + class XGBRegressor(XGBModel, RegressorMixin): pass - -