Fix coef_ and intercept_ signature to be compatible with sklearn.RFECV (#3873)

* Fix coef_ and intercept_ signature to be compatible with sklearn.RFECV

* Fix lint

* Fix lint
This commit is contained in:
Philip Hyunsu Cho 2018-11-08 19:41:35 -08:00 committed by GitHub
parent 19ee0a3579
commit ad6e0d55f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 4 deletions

View File

@ -535,13 +535,21 @@ class XGBModel(XGBModelBase):
Returns
-------
coef_ : array of shape ``[n_features]``
coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
"""
if self.booster != 'gblinear':
raise AttributeError('Coefficients are not defined for Booster type {}'
.format(self.booster))
b = self.get_booster()
return json.loads(b.get_dump(dump_format='json')[0])['weight']
coef = np.array(json.loads(b.get_dump(dump_format='json')[0])['weight'])
# Logic for multiclass classification
n_classes = getattr(self, 'n_classes_', None)
if n_classes is not None:
if n_classes > 2:
assert len(coef.shape) == 1
assert coef.shape[0] % n_classes == 0
coef = coef.reshape((n_classes, -1))
return coef
@property
def intercept_(self):
@ -556,13 +564,13 @@ class XGBModel(XGBModelBase):
Returns
-------
intercept_ : array of shape ``[n_features]``
intercept_ : array of shape ``(1,)`` or ``[n_classes]``
"""
if self.booster != 'gblinear':
raise AttributeError('Intercept (bias) is not defined for Booster type {}'
.format(self.booster))
b = self.get_booster()
return json.loads(b.get_dump(dump_format='json')[0])['bias']
return np.array(json.loads(b.get_dump(dump_format='json')[0])['bias'])
class XGBClassifier(XGBModel, XGBClassifierBase):

View File

@ -544,3 +544,36 @@ def test_save_load_model():
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.1
def test_RFECV():
tm._skip_if_no_sklearn()
from sklearn.datasets import load_boston
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_iris
from sklearn.feature_selection import RFECV
# Regression
X, y = load_boston(return_X_y=True)
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1, objective='reg:linear',
random_state=0, silent=True)
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
rfecv.fit(X, y)
# Binary classification
X, y = load_breast_cancer(return_X_y=True)
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1, objective='binary:logistic',
random_state=0, silent=True)
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc')
rfecv.fit(X, y)
# Multi-class classification
X, y = load_iris(return_X_y=True)
bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1, objective='multi:softprob',
random_state=0, reg_alpha=0.001, reg_lambda=0.01,
scale_pos_weight=0.5, silent=True)
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss')
rfecv.fit(X, y)