Merge pull request #220 from white1033/master
*Fix XGBClassifier super()
This commit is contained in:
commit
e626b62daa
@ -26,7 +26,6 @@ except ImportError:
|
||||
SKLEARN_INSTALLED = False
|
||||
|
||||
|
||||
|
||||
__all__ = ['DMatrix', 'CVPack', 'Booster', 'aggcv', 'cv', 'mknfold', 'train']
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
@ -552,20 +551,20 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
|
||||
early_stopping_rounds: int
|
||||
Activates early stopping. Validation error needs to decrease at least
|
||||
every <early_stopping_rounds> round(s) to continue training.
|
||||
Requires at least one item in evals.
|
||||
Requires at least one item in evals.
|
||||
If there's more than one, will use the last.
|
||||
Returns the model from the last iteration (not the best one).
|
||||
If early stopping occurs, the model will have two additional fields:
|
||||
If early stopping occurs, the model will have two additional fields:
|
||||
bst.best_score and bst.best_iteration.
|
||||
|
||||
Returns
|
||||
-------
|
||||
booster : a trained booster model
|
||||
"""
|
||||
|
||||
|
||||
evals = list(evals)
|
||||
bst = Booster(params, [dtrain] + [d[0] for d in evals])
|
||||
|
||||
|
||||
if not early_stopping_rounds:
|
||||
for i in range(num_boost_round):
|
||||
bst.update(dtrain, i, obj)
|
||||
@ -576,15 +575,15 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
|
||||
else:
|
||||
sys.stderr.write(bst_eval_set.decode() + '\n')
|
||||
return bst
|
||||
|
||||
|
||||
else:
|
||||
# early stopping
|
||||
|
||||
|
||||
if len(evals) < 1:
|
||||
raise ValueError('For early stopping you need at least on set in evals.')
|
||||
|
||||
raise ValueError('For early stopping you need at least on set in evals.')
|
||||
|
||||
sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds))
|
||||
|
||||
|
||||
# is params a list of tuples? are we using multiple eval metrics?
|
||||
if type(params) == list:
|
||||
if len(params) != len(dict(params).items()):
|
||||
@ -597,29 +596,29 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
|
||||
maximize_metrics = ('auc', 'map', 'ndcg')
|
||||
if filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics):
|
||||
maximize_score = True
|
||||
|
||||
|
||||
if maximize_score:
|
||||
best_score = 0.0
|
||||
else:
|
||||
best_score = float('inf')
|
||||
|
||||
best_msg = ''
|
||||
|
||||
best_msg = ''
|
||||
best_score_i = 0
|
||||
|
||||
|
||||
for i in range(num_boost_round):
|
||||
bst.update(dtrain, i, obj)
|
||||
bst_eval_set = bst.eval_set(evals, i, feval)
|
||||
|
||||
|
||||
if isinstance(bst_eval_set, string_types):
|
||||
msg = bst_eval_set
|
||||
else:
|
||||
msg = bst_eval_set.decode()
|
||||
|
||||
|
||||
sys.stderr.write(msg + '\n')
|
||||
|
||||
|
||||
score = float(msg.rsplit(':', 1)[1])
|
||||
if (maximize_score and score > best_score) or \
|
||||
(not maximize_score and score < best_score):
|
||||
(not maximize_score and score < best_score):
|
||||
best_score = score
|
||||
best_score_i = i
|
||||
best_msg = msg
|
||||
@ -628,10 +627,9 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, ea
|
||||
bst.best_score = best_score
|
||||
bst.best_iteration = best_score_i
|
||||
return bst
|
||||
|
||||
|
||||
return bst
|
||||
|
||||
|
||||
|
||||
class CVPack(object):
|
||||
def __init__(self, dtrain, dtest, param):
|
||||
@ -770,7 +768,7 @@ class XGBModel(BaseEstimator):
|
||||
self.n_rounds = n_estimators
|
||||
self.objective = objective
|
||||
self._Booster = Booster()
|
||||
|
||||
|
||||
def get_params(self, deep=True):
|
||||
return {'max_depth': self.max_depth,
|
||||
'learning_rate': self.eta,
|
||||
@ -778,22 +776,24 @@ class XGBModel(BaseEstimator):
|
||||
'silent': True if self.silent == 1 else False,
|
||||
'objective': self.objective
|
||||
}
|
||||
|
||||
def get_xgb_params(self):
|
||||
return {'eta': self.eta, 'max_depth': self.max_depth, 'silent': self.silent, 'objective': self.objective}
|
||||
|
||||
|
||||
def fit(self, X, y):
|
||||
trainDmatrix = DMatrix(X, label=y)
|
||||
self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_rounds)
|
||||
return self
|
||||
|
||||
|
||||
def predict(self, X):
|
||||
testDmatrix = DMatrix(X)
|
||||
return self._Booster.predict(testDmatrix)
|
||||
|
||||
class XGBClassifier(XGBModel, ClassifierMixin):
|
||||
|
||||
class XGBClassifier(XGBModel, ClassifierMixin):
|
||||
def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True):
|
||||
super().__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
|
||||
|
||||
super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
y_values = list(np.unique(y))
|
||||
if len(y_values) > 2:
|
||||
@ -803,19 +803,19 @@ class XGBClassifier(XGBModel, ClassifierMixin):
|
||||
xgb_options['num_class'] = len(y_values)
|
||||
else:
|
||||
xgb_options = self.get_xgb_params()
|
||||
|
||||
|
||||
self._le = LabelEncoder().fit(y)
|
||||
training_labels = self._le.transform(y)
|
||||
|
||||
|
||||
if sample_weight is not None:
|
||||
trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight)
|
||||
else:
|
||||
trainDmatrix = DMatrix(X, label=training_labels)
|
||||
|
||||
|
||||
self._Booster = train(xgb_options, trainDmatrix, self.n_rounds)
|
||||
|
||||
|
||||
return self
|
||||
|
||||
|
||||
def predict(self, X):
|
||||
testDmatrix = DMatrix(X)
|
||||
class_probs = self._Booster.predict(testDmatrix)
|
||||
@ -825,7 +825,7 @@ class XGBClassifier(XGBModel, ClassifierMixin):
|
||||
column_indexes = np.repeat(0, X.shape[0])
|
||||
column_indexes[class_probs > 0.5] = 1
|
||||
return self._le.inverse_transform(column_indexes)
|
||||
|
||||
|
||||
def predict_proba(self, X):
|
||||
testDmatrix = DMatrix(X)
|
||||
class_probs = self._Booster.predict(testDmatrix)
|
||||
@ -834,9 +834,8 @@ class XGBClassifier(XGBModel, ClassifierMixin):
|
||||
else:
|
||||
classone_probs = class_probs
|
||||
classzero_probs = 1.0 - classone_probs
|
||||
return np.vstack((classzero_probs,classone_probs)).transpose()
|
||||
return np.vstack((classzero_probs, classone_probs)).transpose()
|
||||
|
||||
class XGBRegressor(XGBModel, RegressorMixin):
|
||||
|
||||
class XGBRegressor(XGBModel, RegressorMixin):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user