From ad6e0d55f1e3aa1c2d99197cf98dd3a82abeda06 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Thu, 8 Nov 2018 19:41:35 -0800 Subject: [PATCH] Fix coef_ and intercept_ signature to be compatible with sklearn.RFECV (#3873) * Fix coef_ and intercept_ signature to be compatible with sklearn.RFECV * Fix lint * Fix lint --- python-package/xgboost/sklearn.py | 16 +++++++++++---- tests/python/test_with_sklearn.py | 33 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 39bf66703..864da78f3 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -535,13 +535,21 @@ class XGBModel(XGBModelBase): Returns ------- - coef_ : array of shape ``[n_features]`` + coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]`` """ if self.booster != 'gblinear': raise AttributeError('Coefficients are not defined for Booster type {}' .format(self.booster)) b = self.get_booster() - return json.loads(b.get_dump(dump_format='json')[0])['weight'] + coef = np.array(json.loads(b.get_dump(dump_format='json')[0])['weight']) + # Logic for multiclass classification + n_classes = getattr(self, 'n_classes_', None) + if n_classes is not None: + if n_classes > 2: + assert len(coef.shape) == 1 + assert coef.shape[0] % n_classes == 0 + coef = coef.reshape((n_classes, -1)) + return coef @property def intercept_(self): @@ -556,13 +564,13 @@ class XGBModel(XGBModelBase): Returns ------- - intercept_ : array of shape ``[n_features]`` + intercept_ : array of shape ``(1,)`` or ``[n_classes]`` """ if self.booster != 'gblinear': raise AttributeError('Intercept (bias) is not defined for Booster type {}' .format(self.booster)) b = self.get_booster() - return json.loads(b.get_dump(dump_format='json')[0])['bias'] + return np.array(json.loads(b.get_dump(dump_format='json')[0])['bias']) class XGBClassifier(XGBModel, XGBClassifierBase): diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 38e19922a..93c4c4d10 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -544,3 +544,36 @@ def test_save_load_model(): err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) assert err < 0.1 + + +def test_RFECV(): + tm._skip_if_no_sklearn() + from sklearn.datasets import load_boston + from sklearn.datasets import load_breast_cancer + from sklearn.datasets import load_iris + from sklearn.feature_selection import RFECV + + # Regression + X, y = load_boston(return_X_y=True) + bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1, + n_estimators=10, n_jobs=1, objective='reg:linear', + random_state=0, silent=True) + rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error') + rfecv.fit(X, y) + + # Binary classification + X, y = load_breast_cancer(return_X_y=True) + bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1, + n_estimators=10, n_jobs=1, objective='binary:logistic', + random_state=0, silent=True) + rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc') + rfecv.fit(X, y) + + # Multi-class classification + X, y = load_iris(return_X_y=True) + bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear', learning_rate=0.1, + n_estimators=10, n_jobs=1, objective='multi:softprob', + random_state=0, reg_alpha=0.001, reg_lambda=0.01, + scale_pos_weight=0.5, silent=True) + rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss') + rfecv.fit(X, y)