From 4bcc73f0c9d74e19fc8339ce6c69df516ba68bd7 Mon Sep 17 00:00:00 2001 From: hetong007 Date: Fri, 20 Mar 2015 13:34:20 -0700 Subject: [PATCH] add kaggle otto folder --- demo/kaggle-otto/benchmark.R | 43 ++++++++++++++++++++++++++++++++++++ demo/kaggle-otto/readme.md | 24 ++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 demo/kaggle-otto/benchmark.R create mode 100644 demo/kaggle-otto/readme.md diff --git a/demo/kaggle-otto/benchmark.R b/demo/kaggle-otto/benchmark.R new file mode 100644 index 000000000..ffe7a8794 --- /dev/null +++ b/demo/kaggle-otto/benchmark.R @@ -0,0 +1,43 @@ +require(xgboost) +require(methods) + +train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F) +test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F) +train = train[,-1] +test = test[,-1] + +y = train[,ncol(train)] +y = gsub('Class_','',y) +y = as.integer(y)-1 #xgboost take features in [0,numOfClass) + +x = rbind(train[,-ncol(train)],test) +x = as.matrix(x) +x = matrix(as.numeric(x),nrow(x),ncol(x)) +trind = 1:length(y) +teind = (nrow(train)+1):nrow(x) + +# Set necessary parameter +param <- list("objective" = "multi:softprob", + "eval_metric" = "mlogloss", + "num_class" = 9, + "nthread" = 8) + +# Run Cross Valication +cv.nround = 50 +bst.cv = xgb.cv(param=param, data = x[trind,], label = y, + nfold = 3, nrounds=cv.nround) + +# Train the model +nround = 50 +bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nround) + +# Make prediction +pred = predict(bst,x[teind,]) +pred = matrix(pred,9,length(pred)/9) +pred = t(pred) + +# Output submission +pred = format(pred, digits=2,scientific=F) # shrink the size of submission +pred = data.frame(1:nrow(pred),pred) +names(pred) = c('id', paste0('Class_',1:9)) +write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE) diff --git a/demo/kaggle-otto/readme.md b/demo/kaggle-otto/readme.md new file mode 100644 index 000000000..265ebf1d2 --- /dev/null +++ b/demo/kaggle-otto/readme.md @@ -0,0 +1,24 @@ +Benckmark for Otto Group Competition +========= + +This is a folder containing the benchmark for the [Otto Group Competition on Kaggle](http://www.kaggle.com/c/otto-group-product-classification-challenge). + +## Getting started + +1. Put `train.csv` and `test.csv` under the `data` folder +2. Run the script + +The parameter `nthread` controls the number of cores to run on, please set it to suit your machine. + +## R-package + +To install the R-package of xgboost, please run + +``` +devtools::install_github('tqchen/xgboost',subdir='R-package') +``` + +Windows users may need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first. + + +