xgboost/demo/kaggle-otto/otto_train_pred.R
2022-12-17 18:36:14 +08:00

49 lines
1.3 KiB
R

require(xgboost)
require(methods)
train <- read.csv('data/train.csv', header = TRUE, stringsAsFactors = FALSE)
test <- read.csv('data/test.csv', header = TRUE, stringsAsFactors = FALSE)
train <- train[, -1]
test <- test[, -1]
y <- train[, ncol(train)]
y <- gsub('Class_', '', y)
y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass)
x <- rbind(train[, -ncol(train)], test)
x <- as.matrix(x)
x <- matrix(as.numeric(x), nrow(x), ncol(x))
trind <- 1:length(y)
teind <- (nrow(train) + 1):nrow(x)
# Set necessary parameter
param <- list("objective" = "multi:softprob",
"eval_metric" = "mlogloss",
"num_class" = 9,
"nthread" = 8)
# Run Cross Validation
cv.nrounds <- 50
bst.cv <- xgb.cv(
param = param
, data = x[trind, ]
, label = y
, nfold = 3
, nrounds = cv.nrounds
)
# Train the model
nrounds <- 50
bst <- xgboost(param = param, data = x[trind, ], label = y, nrounds = nrounds)
# Make prediction
pred <- predict(bst, x[teind, ])
pred <- matrix(pred, 9, length(pred) / 9)
pred <- t(pred)
# Output submission
pred <- format(pred, digits = 2, scientific = FALSE) # shrink the size of submission
pred <- data.frame(1:nrow(pred), pred)
names(pred) <- c('id', paste0('Class_', 1:9))
write.csv(pred, file = 'submission.csv', quote = FALSE, row.names = FALSE)