require(xgboost) require(methods) train <- read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE) test <- read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE) train <- train[,-1] test <- test[,-1] y <- train[,ncol(train)] y <- gsub('Class_','',y) y <- as.integer(y)-1 # xgboost take features in [0,numOfClass) x <- rbind(train[,-ncol(train)],test) x <- as.matrix(x) x <- matrix(as.numeric(x),nrow(x),ncol(x)) trind <- 1:length(y) teind <- (nrow(train)+1):nrow(x) # Set necessary parameter param <- list("objective" = "multi:softprob", "eval_metric" = "mlogloss", "num_class" = 9, "nthread" = 8) # Run Cross Validation cv.nrounds <- 50 bst.cv <- xgb.cv(param=param, data = x[trind,], label = y, nfold = 3, nrounds=cv.nrounds) # Train the model nrounds <- 50 bst <- xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds) # Make prediction pred <- predict(bst,x[teind,]) pred <- matrix(pred,9,length(pred)/9) pred <- t(pred) # Output submission pred <- format(pred, digits=2,scientific=F) # shrink the size of submission pred <- data.frame(1:nrow(pred),pred) names(pred) <- c('id', paste0('Class_',1:9)) write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)