Merge branch 'master' of ssh://github.com/dmlc/xgboost
This commit is contained in:
commit
149b43a0a8
12
README.md
12
README.md
@ -74,6 +74,18 @@ Build
|
||||
export CXX = g++-4.9
|
||||
```
|
||||
Then run ```bash build.sh``` normally.
|
||||
|
||||
- For users who want to use [High Performance Computing for Mac OS X](http://hpc.sourceforge.net/), download the GCC 4.9 binary tar ball and follow the installation guidance to install them under `/usr/local`. Then edit [Makefile](Makefile/) by replacing:
|
||||
```
|
||||
export CC = gcc
|
||||
export CXX = g++
|
||||
```
|
||||
with
|
||||
```
|
||||
export CC = /usr/local/bin/gcc
|
||||
export CXX = /usr/local/bin/g++
|
||||
```
|
||||
Then run ```bash build.sh``` normally. This solution is given by [Phil Culliton](https://www.kaggle.com/c/otto-group-product-classification-challenge/forums/t/12947/achieve-0-50776-on-the-leaderboard-in-a-minute-with-xgboost/68308#post68308).
|
||||
|
||||
Version
|
||||
=======
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
require(xgboost)
|
||||
require(methods)
|
||||
|
||||
train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F)
|
||||
test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F)
|
||||
train = train[,-1]
|
||||
test = test[,-1]
|
||||
|
||||
y = train[,ncol(train)]
|
||||
y = gsub('Class_','',y)
|
||||
y = as.integer(y)-1 #xgboost take features in [0,numOfClass)
|
||||
|
||||
x = rbind(train[,-ncol(train)],test)
|
||||
x = as.matrix(x)
|
||||
x = matrix(as.numeric(x),nrow(x),ncol(x))
|
||||
trind = 1:length(y)
|
||||
teind = (nrow(train)+1):nrow(x)
|
||||
|
||||
# Set necessary parameter
|
||||
param <- list("objective" = "multi:softprob",
|
||||
"eval_metric" = "mlogloss",
|
||||
"num_class" = 9,
|
||||
"nthread" = 8)
|
||||
|
||||
# Run Cross Valication
|
||||
cv.nround = 50
|
||||
bst.cv = xgb.cv(param=param, data = x[trind,], label = y,
|
||||
nfold = 3, nrounds=cv.nround)
|
||||
|
||||
# Train the model
|
||||
nround = 50
|
||||
bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nround)
|
||||
|
||||
# Make prediction
|
||||
pred = predict(bst,x[teind,])
|
||||
pred = matrix(pred,9,length(pred)/9)
|
||||
pred = t(pred)
|
||||
|
||||
# Output submission
|
||||
pred = format(pred, digits=2,scientific=F) # shrink the size of submission
|
||||
pred = data.frame(1:nrow(pred),pred)
|
||||
names(pred) = c('id', paste0('Class_',1:9))
|
||||
write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)
|
||||
@ -1,24 +0,0 @@
|
||||
Benckmark for Otto Group Competition
|
||||
=========
|
||||
|
||||
This is a folder containing the benchmark for the [Otto Group Competition on Kaggle](http://www.kaggle.com/c/otto-group-product-classification-challenge).
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Put `train.csv` and `test.csv` under the `data` folder
|
||||
2. Run the script
|
||||
|
||||
The parameter `nthread` controls the number of cores to run on, please set it to suit your machine.
|
||||
|
||||
## R-package
|
||||
|
||||
To install the R-package of xgboost, please run
|
||||
|
||||
```r
|
||||
devtools::install_github('tqchen/xgboost',subdir='R-package')
|
||||
```
|
||||
|
||||
Windows users may need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user