require(xgboost) require(methods) train <- read.csv('data/train.csv', header = TRUE, stringsAsFactors = FALSE) test <- read.csv('data/test.csv', header = TRUE, stringsAsFactors = FALSE) train <- train[, -1] test <- test[, -1] y <- train[, ncol(train)] y <- gsub('Class_', '', y, fixed = TRUE) y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass) x <- rbind(train[, -ncol(train)], test) x <- as.matrix(x) x <- matrix(as.numeric(x), nrow(x), ncol(x)) trind <- seq_along(y) teind <- (nrow(train) + 1):nrow(x) # Set necessary parameter param <- list("objective" = "multi:softprob", "eval_metric" = "mlogloss", "num_class" = 9, "nthread" = 8) # Run Cross Validation cv.nrounds <- 50 bst.cv <- xgb.cv( param = param , data = x[trind, ] , label = y , nfold = 3 , nrounds = cv.nrounds ) # Train the model nrounds <- 50 bst <- xgboost(param = param, data = x[trind, ], label = y, nrounds = nrounds) # Make prediction pred <- predict(bst, x[teind, ]) pred <- matrix(pred, 9, length(pred) / 9) pred <- t(pred) # Output submission pred <- format(pred, digits = 2, scientific = FALSE) # shrink the size of submission pred <- data.frame(seq_len(nrow(pred)), pred) names(pred) <- c('id', paste0('Class_', 1:9)) write.csv(pred, file = 'submission.csv', quote = FALSE, row.names = FALSE)