Fix gamma neg log likelihood. (#7275)
This commit is contained in:
parent
b3b03200e2
commit
d8cb395380
@ -309,10 +309,9 @@ struct EvalGammaNLogLik {
|
|||||||
float constexpr kPsi = 1.0;
|
float constexpr kPsi = 1.0;
|
||||||
bst_float theta = -1. / py;
|
bst_float theta = -1. / py;
|
||||||
bst_float a = kPsi;
|
bst_float a = kPsi;
|
||||||
// b = -std::log(-theta);
|
float b = -std::log(-theta);
|
||||||
float b = 1.0f;
|
// c = 1. / kPsi^2 * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
|
||||||
// c = 1. / kPsi * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
|
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
|
||||||
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
|
|
||||||
float c = 0;
|
float c = 0;
|
||||||
// general form for exponential family.
|
// general form for exponential family.
|
||||||
return -((y * theta - b) / a + c);
|
return -((y * theta - b) / a + c);
|
||||||
|
|||||||
@ -124,6 +124,35 @@ class TestEvalMetrics:
|
|||||||
skl_gamma_dev = mean_gamma_deviance(y, score)
|
skl_gamma_dev = mean_gamma_deviance(y, score)
|
||||||
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
|
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_gamma_lik(self) -> None:
|
||||||
|
import scipy.stats as stats
|
||||||
|
rng = np.random.default_rng(1994)
|
||||||
|
n_samples = 32
|
||||||
|
n_features = 10
|
||||||
|
|
||||||
|
X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))
|
||||||
|
|
||||||
|
alpha, loc, beta = 5.0, 11.1, 22
|
||||||
|
y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
|
||||||
|
reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
|
||||||
|
reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])
|
||||||
|
|
||||||
|
score = reg.predict(X)
|
||||||
|
|
||||||
|
booster = reg.get_booster()
|
||||||
|
nloglik = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])
|
||||||
|
|
||||||
|
# \beta_i = - (1 / \theta_i a)
|
||||||
|
# where \theta_i is the canonical parameter
|
||||||
|
# XGBoost uses the canonical link function of gamma in evaluation function.
|
||||||
|
# so \theta = - (1.0 / y)
|
||||||
|
# dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
|
||||||
|
beta = - (1.0 / (- (1.0 / y))) # == y
|
||||||
|
nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)
|
||||||
|
|
||||||
|
np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
|
||||||
|
|
||||||
def run_roc_auc_binary(self, tree_method, n_samples):
|
def run_roc_auc_binary(self, tree_method, n_samples):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user