49 lines
1.3 KiB
R
49 lines
1.3 KiB
R
require(xgboost)
|
|
require(methods)
|
|
|
|
train <- read.csv('data/train.csv', header = TRUE, stringsAsFactors = FALSE)
|
|
test <- read.csv('data/test.csv', header = TRUE, stringsAsFactors = FALSE)
|
|
train <- train[, -1]
|
|
test <- test[, -1]
|
|
|
|
y <- train[, ncol(train)]
|
|
y <- gsub('Class_', '', y, fixed = TRUE)
|
|
y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass)
|
|
|
|
x <- rbind(train[, -ncol(train)], test)
|
|
x <- as.matrix(x)
|
|
x <- matrix(as.numeric(x), nrow(x), ncol(x))
|
|
trind <- seq_along(y)
|
|
teind <- (nrow(train) + 1):nrow(x)
|
|
|
|
# Set necessary parameter
|
|
param <- list("objective" = "multi:softprob",
|
|
"eval_metric" = "mlogloss",
|
|
"num_class" = 9,
|
|
"nthread" = 8)
|
|
|
|
# Run Cross Validation
|
|
cv.nrounds <- 50
|
|
bst.cv <- xgb.cv(
|
|
param = param
|
|
, data = x[trind, ]
|
|
, label = y
|
|
, nfold = 3
|
|
, nrounds = cv.nrounds
|
|
)
|
|
|
|
# Train the model
|
|
nrounds <- 50
|
|
bst <- xgboost(param = param, data = x[trind, ], label = y, nrounds = nrounds)
|
|
|
|
# Make prediction
|
|
pred <- predict(bst, x[teind, ])
|
|
pred <- matrix(pred, 9, length(pred) / 9)
|
|
pred <- t(pred)
|
|
|
|
# Output submission
|
|
pred <- format(pred, digits = 2, scientific = FALSE) # shrink the size of submission
|
|
pred <- data.frame(seq_len(nrow(pred)), pred)
|
|
names(pred) <- c('id', paste0('Class_', 1:9))
|
|
write.csv(pred, file = 'submission.csv', quote = FALSE, row.names = FALSE)
|