Disable feature validation on sklearn predict prob. (#5953)

* Fix issue when scikit learn interface receives transformed inputs.
This commit is contained in:
Jiaming Yuan 2020-07-29 19:26:44 +08:00 committed by GitHub
parent 18349a7ccf
commit f5fdcbe194
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 1 deletions

View File

@ -909,7 +909,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
'Label encoder is not defined. Returning class probability.') 'Label encoder is not defined. Returning class probability.')
return class_probs return class_probs
def predict_proba(self, data, ntree_limit=None, validate_features=True, def predict_proba(self, data, ntree_limit=None, validate_features=False,
base_margin=None): base_margin=None):
""" """
Predict the probability of each `data` example being of a given class. Predict the probability of each `data` example being of a given class.

View File

@ -888,6 +888,38 @@ def test_parameter_validation():
assert len(output) == 0 assert len(output) == 0
@pytest.mark.skipif(**tm.no_pandas())
def test_pandas_input():
import pandas as pd
from sklearn.calibration import CalibratedClassifierCV
rng = np.random.RandomState(1994)
kRows = 100
kCols = 6
X = rng.randint(low=0, high=2, size=kRows*kCols)
X = X.reshape(kRows, kCols)
df = pd.DataFrame(X)
feature_names = []
for i in range(1, kCols):
feature_names += ['k'+str(i)]
df.columns = ['status'] + feature_names
target = df['status']
train = df.drop(columns=['status'])
model = xgb.XGBClassifier()
model.fit(train, target)
clf_isotonic = CalibratedClassifierCV(model,
cv='prefit', method='isotonic')
clf_isotonic.fit(train, target)
assert isinstance(clf_isotonic.calibrated_classifiers_[0].base_estimator,
xgb.XGBClassifier)
np.testing.assert_allclose(np.array(clf_isotonic.classes_),
np.array([0, 1]))
class TestBoostFromPrediction(unittest.TestCase): class TestBoostFromPrediction(unittest.TestCase):
def run_boost_from_prediction(self, tree_method): def run_boost_from_prediction(self, tree_method):
from sklearn.datasets import load_breast_cancer from sklearn.datasets import load_breast_cancer