Merge pull request #220 from white1033/master

*Fix XGBClassifier super()
This commit is contained in:
Tianqi Chen 2015-04-05 09:05:08 -07:00
commit e626b62daa

View File

@ -26,7 +26,6 @@ except ImportError:
SKLEARN_INSTALLED = False
__all__ = ['DMatrix', 'CVPack', 'Booster', 'aggcv', 'cv', 'mknfold', 'train']
if sys.version_info[0] == 3:
@ -552,20 +551,20 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
early_stopping_rounds: int
Activates early stopping. Validation error needs to decrease at least
every <early_stopping_rounds> round(s) to continue training.
Requires at least one item in evals.
Requires at least one item in evals.
If there's more than one, will use the last.
Returns the model from the last iteration (not the best one).
If early stopping occurs, the model will have two additional fields:
If early stopping occurs, the model will have two additional fields:
bst.best_score and bst.best_iteration.
Returns
-------
booster : a trained booster model
"""
evals = list(evals)
bst = Booster(params, [dtrain] + [d[0] for d in evals])
if not early_stopping_rounds:
for i in range(num_boost_round):
bst.update(dtrain, i, obj)
@ -576,15 +575,15 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
else:
sys.stderr.write(bst_eval_set.decode() + '\n')
return bst
else:
# early stopping
if len(evals) < 1:
raise ValueError('For early stopping you need at least on set in evals.')
raise ValueError('For early stopping you need at least on set in evals.')
sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds))
# is params a list of tuples? are we using multiple eval metrics?
if type(params) == list:
if len(params) != len(dict(params).items()):
@ -597,29 +596,29 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
maximize_metrics = ('auc', 'map', 'ndcg')
if filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics):
maximize_score = True
if maximize_score:
best_score = 0.0
else:
best_score = float('inf')
best_msg = ''
best_msg = ''
best_score_i = 0
for i in range(num_boost_round):
bst.update(dtrain, i, obj)
bst_eval_set = bst.eval_set(evals, i, feval)
if isinstance(bst_eval_set, string_types):
msg = bst_eval_set
else:
msg = bst_eval_set.decode()
sys.stderr.write(msg + '\n')
score = float(msg.rsplit(':', 1)[1])
if (maximize_score and score > best_score) or \
(not maximize_score and score < best_score):
(not maximize_score and score < best_score):
best_score = score
best_score_i = i
best_msg = msg
@ -628,10 +627,9 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
bst.best_score = best_score
bst.best_iteration = best_score_i
return bst
return bst
class CVPack(object):
def __init__(self, dtrain, dtest, param):
@ -770,7 +768,7 @@ class XGBModel(BaseEstimator):
self.n_rounds = n_estimators
self.objective = objective
self._Booster = Booster()
def get_params(self, deep=True):
return {'max_depth': self.max_depth,
'learning_rate': self.eta,
@ -778,22 +776,24 @@ class XGBModel(BaseEstimator):
'silent': True if self.silent == 1 else False,
'objective': self.objective
}
def get_xgb_params(self):
return {'eta': self.eta, 'max_depth': self.max_depth, 'silent': self.silent, 'objective': self.objective}
def fit(self, X, y):
trainDmatrix = DMatrix(X, label=y)
self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_rounds)
return self
def predict(self, X):
testDmatrix = DMatrix(X)
return self._Booster.predict(testDmatrix)
class XGBClassifier(XGBModel, ClassifierMixin):
class XGBClassifier(XGBModel, ClassifierMixin):
def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True):
super().__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
def fit(self, X, y, sample_weight=None):
y_values = list(np.unique(y))
if len(y_values) > 2:
@ -803,19 +803,19 @@ class XGBClassifier(XGBModel, ClassifierMixin):
xgb_options['num_class'] = len(y_values)
else:
xgb_options = self.get_xgb_params()
self._le = LabelEncoder().fit(y)
training_labels = self._le.transform(y)
if sample_weight is not None:
trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight)
else:
trainDmatrix = DMatrix(X, label=training_labels)
self._Booster = train(xgb_options, trainDmatrix, self.n_rounds)
return self
def predict(self, X):
testDmatrix = DMatrix(X)
class_probs = self._Booster.predict(testDmatrix)
@ -825,7 +825,7 @@ class XGBClassifier(XGBModel, ClassifierMixin):
column_indexes = np.repeat(0, X.shape[0])
column_indexes[class_probs > 0.5] = 1
return self._le.inverse_transform(column_indexes)
def predict_proba(self, X):
testDmatrix = DMatrix(X)
class_probs = self._Booster.predict(testDmatrix)
@ -834,9 +834,8 @@ class XGBClassifier(XGBModel, ClassifierMixin):
else:
classone_probs = class_probs
classzero_probs = 1.0 - classone_probs
return np.vstack((classzero_probs,classone_probs)).transpose()
return np.vstack((classzero_probs, classone_probs)).transpose()
class XGBRegressor(XGBModel, RegressorMixin):
class XGBRegressor(XGBModel, RegressorMixin):
pass