commit
cf274e76f4
@ -17,3 +17,9 @@ make
|
||||
Speed
|
||||
=====
|
||||
speedtest.py compares xgboost's speed on this dataset with sklearn.GBM
|
||||
|
||||
|
||||
Using R module
|
||||
=====
|
||||
* Alternatively, you can run using R, higgs-train.R and higgs-pred.R
|
||||
|
||||
|
||||
23
demo/kaggle-higgs/higgs-pred.R
Normal file
23
demo/kaggle-higgs/higgs-pred.R
Normal file
@ -0,0 +1,23 @@
|
||||
# include xgboost library, must set chdir=TRURE
|
||||
source("../../wrapper/xgboost.R", chdir=TRUE)
|
||||
|
||||
modelfile <- "higgs.model"
|
||||
outfile <- "higgs.pred.csv"
|
||||
dtest <- read.csv("data/test.csv", header=TRUE)
|
||||
data <- as.matrix(dtest[2:31])
|
||||
idx <- dtest[[1]]
|
||||
|
||||
xgmat <- xgb.DMatrix(data, missing = -999.0)
|
||||
bst <- xgb.Booster(params=list("nthread"=16), modelfile=modelfile)
|
||||
ypred <- xgb.predict(bst, xgmat)
|
||||
|
||||
rorder <- rank(ypred, ties.method="first")
|
||||
|
||||
threshold <- 0.15
|
||||
# to be completed
|
||||
ntop <- length(rorder) - as.integer(threshold*length(rorder))
|
||||
plabel <- ifelse(rorder > ntop, "s", "b")
|
||||
outdata <- list("EventId" = idx,
|
||||
"RankOrder" = rorder,
|
||||
"Class" = plabel)
|
||||
write.csv(outdata, file = outfile, quote=FALSE, row.names=FALSE)
|
||||
@ -21,8 +21,7 @@ idx = dtest[:,0]
|
||||
|
||||
print ('finish loading from csv ')
|
||||
xgmat = xgb.DMatrix( data, missing = -999.0 )
|
||||
bst = xgb.Booster({'nthread':16})
|
||||
bst.load_model( modelfile )
|
||||
bst = xgb.Booster({'nthread':16}, model_file = modelfile)
|
||||
ypred = bst.predict( xgmat )
|
||||
|
||||
res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
|
||||
|
||||
31
demo/kaggle-higgs/higgs-train.R
Normal file
31
demo/kaggle-higgs/higgs-train.R
Normal file
@ -0,0 +1,31 @@
|
||||
# include xgboost library, must set chdir=TRURE
|
||||
source("../../wrapper/xgboost.R", chdir=TRUE)
|
||||
testsize <- 550000
|
||||
|
||||
dtrain <- read.csv("data/training.csv", header=TRUE)
|
||||
dtrain[33] <- dtrain[33] == "s"
|
||||
label <- as.numeric(dtrain[[33]])
|
||||
data <- as.matrix(dtrain[2:31])
|
||||
weight <- as.numeric(dtrain[[32]]) * testsize / length(label)
|
||||
|
||||
sumwpos <- sum(weight * (label==1.0))
|
||||
sumwneg <- sum(weight * (label==0.0))
|
||||
print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
|
||||
|
||||
xgmat <- xgb.DMatrix(data, info = list(label=label, weight=weight), missing = -999.0)
|
||||
param <- list("objective" = "binary:logitraw",
|
||||
"scale_pos_weight" = sumwneg / sumwpos,
|
||||
"bst:eta" = 0.1,
|
||||
"bst:max_depth" = 6,
|
||||
"eval_metric" = "auc",
|
||||
"eval_metric" = "ams@0.15",
|
||||
"silent" = 1,
|
||||
"nthread" = 16)
|
||||
watchlist <- list("train" = xgmat)
|
||||
nround = 120
|
||||
print ("loading data end, start to boost trees")
|
||||
bst = xgb.train(param, xgmat, nround, watchlist );
|
||||
# save out model
|
||||
xgb.save(bst, "higgs.model")
|
||||
print ('finish training')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user