* add support for tweedie regression * added back readme line that was accidentally deleted * fixed linting errors * add support for tweedie regression * added back readme line that was accidentally deleted * fixed linting errors * rebased with upstream master and added R example * changed parameter name to tweedie_variance_power * linting error fix * refactored tweedie-nloglik metric to be more like the other parameterized metrics * added upper and lower bound check to tweedie metric * add support for tweedie regression * added back readme line that was accidentally deleted * fixed linting errors * added upper and lower bound check to tweedie metric * added back readme line that was accidentally deleted * rebased with upstream master and added R example * rebased again on top of upstream master * linting error fix * added upper and lower bound check to tweedie metric * rebased with master * lint fix * removed whitespace at end of line 186 - elementwise_metric.cc
49 lines
1.2 KiB
R
Executable File
49 lines
1.2 KiB
R
Executable File
library(xgboost)
|
|
library(data.table)
|
|
library(cplm)
|
|
|
|
data(AutoClaim)
|
|
|
|
# auto insurance dataset analyzed by Yip and Yau (2005)
|
|
dt <- data.table(AutoClaim)
|
|
|
|
# exclude these columns from the model matrix
|
|
exclude <- c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_YY')
|
|
|
|
# retains the missing values
|
|
# NOTE: this dataset is comes ready out of the box
|
|
options(na.action = 'na.pass')
|
|
x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = F])
|
|
options(na.action = 'na.omit')
|
|
|
|
# response
|
|
y <- dt[, CLM_AMT5]
|
|
|
|
d_train <- xgb.DMatrix(data = x, label = y, missing = NA)
|
|
|
|
# the tweedie_variance_power parameter determines the shape of
|
|
# distribution
|
|
# - closer to 1 is more poisson like and the mass
|
|
# is more concentrated near zero
|
|
# - closer to 2 is more gamma like and the mass spreads to the
|
|
# the right with less concentration near zero
|
|
|
|
params <- list(
|
|
objective = 'reg:tweedie',
|
|
eval_metric = 'rmse',
|
|
tweedie_variance_power = 1.4,
|
|
max_depth = 6,
|
|
eta = 1)
|
|
|
|
bst <- xgb.train(
|
|
data = d_train,
|
|
params = params,
|
|
maximize = FALSE,
|
|
watchlist = list(train = d_train),
|
|
nrounds = 20)
|
|
|
|
var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
|
|
|
|
preds <- predict(bst, d_train)
|
|
|
|
rmse <- sqrt(sum(mean((y - preds)^2))) |