Merge pull request #37 from tqchen/unity

Unity
This commit is contained in:
Tianqi Chen 2014-08-23 20:54:27 -07:00
commit cf274e76f4
4 changed files with 61 additions and 2 deletions

View File

@ -17,3 +17,9 @@ make
Speed
=====
speedtest.py compares xgboost's speed on this dataset with sklearn.GBM
Using R module
=====
* Alternatively, you can run using R, higgs-train.R and higgs-pred.R

View File

@ -0,0 +1,23 @@
# include xgboost library, must set chdir=TRURE
source("../../wrapper/xgboost.R", chdir=TRUE)
modelfile <- "higgs.model"
outfile <- "higgs.pred.csv"
dtest <- read.csv("data/test.csv", header=TRUE)
data <- as.matrix(dtest[2:31])
idx <- dtest[[1]]
xgmat <- xgb.DMatrix(data, missing = -999.0)
bst <- xgb.Booster(params=list("nthread"=16), modelfile=modelfile)
ypred <- xgb.predict(bst, xgmat)
rorder <- rank(ypred, ties.method="first")
threshold <- 0.15
# to be completed
ntop <- length(rorder) - as.integer(threshold*length(rorder))
plabel <- ifelse(rorder > ntop, "s", "b")
outdata <- list("EventId" = idx,
"RankOrder" = rorder,
"Class" = plabel)
write.csv(outdata, file = outfile, quote=FALSE, row.names=FALSE)

View File

@ -21,8 +21,7 @@ idx = dtest[:,0]
print ('finish loading from csv ')
xgmat = xgb.DMatrix( data, missing = -999.0 )
bst = xgb.Booster({'nthread':16})
bst.load_model( modelfile )
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
ypred = bst.predict( xgmat )
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]

View File

@ -0,0 +1,31 @@
# include xgboost library, must set chdir=TRURE
source("../../wrapper/xgboost.R", chdir=TRUE)
testsize <- 550000
dtrain <- read.csv("data/training.csv", header=TRUE)
dtrain[33] <- dtrain[33] == "s"
label <- as.numeric(dtrain[[33]])
data <- as.matrix(dtrain[2:31])
weight <- as.numeric(dtrain[[32]]) * testsize / length(label)
sumwpos <- sum(weight * (label==1.0))
sumwneg <- sum(weight * (label==0.0))
print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
xgmat <- xgb.DMatrix(data, info = list(label=label, weight=weight), missing = -999.0)
param <- list("objective" = "binary:logitraw",
"scale_pos_weight" = sumwneg / sumwpos,
"bst:eta" = 0.1,
"bst:max_depth" = 6,
"eval_metric" = "auc",
"eval_metric" = "ams@0.15",
"silent" = 1,
"nthread" = 16)
watchlist <- list("train" = xgmat)
nround = 120
print ("loading data end, start to boost trees")
bst = xgb.train(param, xgmat, nround, watchlist );
# save out model
xgb.save(bst, "higgs.model")
print ('finish training')