Tweedie Regression Post-Rebase (#1737)
* add support for tweedie regression * added back readme line that was accidentally deleted * fixed linting errors * add support for tweedie regression * added back readme line that was accidentally deleted * fixed linting errors * rebased with upstream master and added R example * changed parameter name to tweedie_variance_power * linting error fix * refactored tweedie-nloglik metric to be more like the other parameterized metrics * added upper and lower bound check to tweedie metric * add support for tweedie regression * added back readme line that was accidentally deleted * fixed linting errors * added upper and lower bound check to tweedie metric * added back readme line that was accidentally deleted * rebased with upstream master and added R example * rebased again on top of upstream master * linting error fix * added upper and lower bound check to tweedie metric * rebased with master * lint fix * removed whitespace at end of line 186 - elementwise_metric.cc
This commit is contained in:
committed by
Tianqi Chen
parent
52b9867be5
commit
2ad0948444
49
R-package/demo/tweedie_regression.R
Executable file
49
R-package/demo/tweedie_regression.R
Executable file
@@ -0,0 +1,49 @@
|
||||
library(xgboost)
|
||||
library(data.table)
|
||||
library(cplm)
|
||||
|
||||
data(AutoClaim)
|
||||
|
||||
# auto insurance dataset analyzed by Yip and Yau (2005)
|
||||
dt <- data.table(AutoClaim)
|
||||
|
||||
# exclude these columns from the model matrix
|
||||
exclude <- c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_YY')
|
||||
|
||||
# retains the missing values
|
||||
# NOTE: this dataset is comes ready out of the box
|
||||
options(na.action = 'na.pass')
|
||||
x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = F])
|
||||
options(na.action = 'na.omit')
|
||||
|
||||
# response
|
||||
y <- dt[, CLM_AMT5]
|
||||
|
||||
d_train <- xgb.DMatrix(data = x, label = y, missing = NA)
|
||||
|
||||
# the tweedie_variance_power parameter determines the shape of
|
||||
# distribution
|
||||
# - closer to 1 is more poisson like and the mass
|
||||
# is more concentrated near zero
|
||||
# - closer to 2 is more gamma like and the mass spreads to the
|
||||
# the right with less concentration near zero
|
||||
|
||||
params <- list(
|
||||
objective = 'reg:tweedie',
|
||||
eval_metric = 'rmse',
|
||||
tweedie_variance_power = 1.4,
|
||||
max_depth = 6,
|
||||
eta = 1)
|
||||
|
||||
bst <- xgb.train(
|
||||
data = d_train,
|
||||
params = params,
|
||||
maximize = FALSE,
|
||||
watchlist = list(train = d_train),
|
||||
nrounds = 20)
|
||||
|
||||
var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
|
||||
|
||||
preds <- predict(bst, d_train)
|
||||
|
||||
rmse <- sqrt(sum(mean((y - preds)^2)))
|
||||
Reference in New Issue
Block a user