change name

This commit is contained in:
hetong007 2015-03-20 18:46:52 -07:00
parent 2e71d2dfe4
commit 7ced224722
2 changed files with 68 additions and 0 deletions

View File

@ -0,0 +1,25 @@
Benckmark for Otto Group Competition
=========
This is a folder containing the benchmark for the [Otto Group Competition on Kaggle](http://www.kaggle.com/c/otto-group-product-classification-challenge).
## Getting started
1. Put `train.csv` and `test.csv` under the `data` folder
2. Run the script
3. Submit the `submission.csv`
The parameter `nthread` controls the number of cores to run on, please set it to suit your machine.
## R-package
To install the R-package of xgboost, please run
```r
devtools::install_github('tqchen/xgboost',subdir='R-package')
```
Windows users may need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.

View File

@ -0,0 +1,43 @@
require(xgboost)
require(methods)
train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F)
test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F)
train = train[,-1]
test = test[,-1]
y = train[,ncol(train)]
y = gsub('Class_','',y)
y = as.integer(y)-1 #xgboost take features in [0,numOfClass)
x = rbind(train[,-ncol(train)],test)
x = as.matrix(x)
x = matrix(as.numeric(x),nrow(x),ncol(x))
trind = 1:length(y)
teind = (nrow(train)+1):nrow(x)
# Set necessary parameter
param <- list("objective" = "multi:softprob",
"eval_metric" = "mlogloss",
"num_class" = 9,
"nthread" = 8)
# Run Cross Valication
cv.nround = 50
bst.cv = xgb.cv(param=param, data = x[trind,], label = y,
nfold = 3, nrounds=cv.nround)
# Train the model
nround = 50
bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nround)
# Make prediction
pred = predict(bst,x[teind,])
pred = matrix(pred,9,length(pred)/9)
pred = t(pred)
# Output submission
pred = format(pred, digits=2,scientific=F) # shrink the size of submission
pred = data.frame(1:nrow(pred),pred)
names(pred) = c('id', paste0('Class_',1:9))
write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)