[dask, sklearn] Fix predict proba. (#6566)
* For sklearn: - Handles user defined objective function. - Handles `softmax`. * For dask: - Use the implementation from sklearn, the previous implementation doesn't perform any extra handling.
This commit is contained in:
@@ -160,7 +160,7 @@ def test_boost_from_prediction(tree_method: str) -> None:
|
||||
tree_method=tree_method,
|
||||
)
|
||||
model_0.fit(X=X_, y=y_)
|
||||
margin = model_0.predict_proba(X_, output_margin=True)
|
||||
margin = model_0.predict(X_, output_margin=True)
|
||||
|
||||
model_1 = xgb.dask.DaskXGBClassifier(
|
||||
learning_rate=0.3,
|
||||
|
||||
@@ -79,6 +79,18 @@ def test_multiclass_classification():
|
||||
check_pred(preds3, labels, output_margin=True)
|
||||
check_pred(preds4, labels, output_margin=False)
|
||||
|
||||
cls = xgb.XGBClassifier(n_estimators=4).fit(X, y)
|
||||
assert cls.n_classes_ == 3
|
||||
proba = cls.predict_proba(X)
|
||||
assert proba.shape[0] == X.shape[0]
|
||||
assert proba.shape[1] == cls.n_classes_
|
||||
|
||||
# custom objective, the default is multi:softprob so no transformation is required.
|
||||
cls = xgb.XGBClassifier(n_estimators=4, objective=tm.softprob_obj(3)).fit(X, y)
|
||||
proba = cls.predict_proba(X)
|
||||
assert proba.shape[0] == X.shape[0]
|
||||
assert proba.shape[1] == cls.n_classes_
|
||||
|
||||
|
||||
def test_ranking():
|
||||
# generate random data
|
||||
@@ -788,6 +800,11 @@ def test_save_load_model():
|
||||
booster.save_model(model_path)
|
||||
cls = xgb.XGBClassifier()
|
||||
cls.load_model(model_path)
|
||||
|
||||
proba = cls.predict_proba(X)
|
||||
assert proba.shape[0] == X.shape[0]
|
||||
assert proba.shape[1] == 2 # binary
|
||||
|
||||
predt_1 = cls.predict_proba(X)[:, 1]
|
||||
assert np.allclose(predt_0, predt_1)
|
||||
|
||||
|
||||
@@ -253,6 +253,34 @@ def eval_error_metric(predt, dtrain: xgb.DMatrix):
|
||||
return 'CustomErr', np.sum(r)
|
||||
|
||||
|
||||
def softmax(x):
|
||||
e = np.exp(x)
|
||||
return e / np.sum(e)
|
||||
|
||||
|
||||
def softprob_obj(classes):
|
||||
def objective(labels, predt):
|
||||
rows = labels.shape[0]
|
||||
grad = np.zeros((rows, classes), dtype=float)
|
||||
hess = np.zeros((rows, classes), dtype=float)
|
||||
eps = 1e-6
|
||||
for r in range(predt.shape[0]):
|
||||
target = labels[r]
|
||||
p = softmax(predt[r, :])
|
||||
for c in range(predt.shape[1]):
|
||||
assert target >= 0 or target <= classes
|
||||
g = p[c] - 1.0 if c == target else p[c]
|
||||
h = max((2.0 * p[c] * (1.0 - p[c])).item(), eps)
|
||||
grad[r, c] = g
|
||||
hess[r, c] = h
|
||||
|
||||
grad = grad.reshape((rows * classes, 1))
|
||||
hess = hess.reshape((rows * classes, 1))
|
||||
return grad, hess
|
||||
|
||||
return objective
|
||||
|
||||
|
||||
class DirectoryExcursion:
|
||||
def __init__(self, path: os.PathLike, cleanup=False):
|
||||
'''Change directory. Change back and optionally cleaning up the directory when exit.
|
||||
|
||||
Reference in New Issue
Block a user