[R] address some lintr warnings (#8609)

This commit is contained in:
James Lamb
2022-12-17 04:36:14 -06:00
committed by GitHub
parent 53e6e32718
commit 17ce1f26c8
18 changed files with 137 additions and 116 deletions

View File

@@ -4,21 +4,21 @@ require(methods)
modelfile <- "higgs.model"
outfile <- "higgs.pred.csv"
dtest <- read.csv("data/test.csv", header=TRUE)
dtest <- read.csv("data/test.csv", header = TRUE)
data <- as.matrix(dtest[2:31])
idx <- dtest[[1]]
xgmat <- xgb.DMatrix(data, missing = -999.0)
bst <- xgb.load(modelfile=modelfile)
bst <- xgb.load(modelfile = modelfile)
ypred <- predict(bst, xgmat)
rorder <- rank(ypred, ties.method="first")
rorder <- rank(ypred, ties.method = "first")
threshold <- 0.15
# to be completed
ntop <- length(rorder) - as.integer(threshold*length(rorder))
ntop <- length(rorder) - as.integer(threshold * length(rorder))
plabel <- ifelse(rorder > ntop, "s", "b")
outdata <- list("EventId" = idx,
"RankOrder" = rorder,
"Class" = plabel)
write.csv(outdata, file = outfile, quote=FALSE, row.names=FALSE)
write.csv(outdata, file = outfile, quote = FALSE, row.names = FALSE)

View File

@@ -4,14 +4,14 @@ require(methods)
testsize <- 550000
dtrain <- read.csv("data/training.csv", header=TRUE)
dtrain <- read.csv("data/training.csv", header = TRUE)
dtrain[33] <- dtrain[33] == "s"
label <- as.numeric(dtrain[[33]])
data <- as.matrix(dtrain[2:31])
weight <- as.numeric(dtrain[[32]]) * testsize / length(label)
sumwpos <- sum(weight * (label==1.0))
sumwneg <- sum(weight * (label==0.0))
sumwpos <- sum(weight * (label == 1.0))
sumwneg <- sum(weight * (label == 0.0))
print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0)
@@ -25,7 +25,7 @@ param <- list("objective" = "binary:logitraw",
watchlist <- list("train" = xgmat)
nrounds <- 120
print ("loading data end, start to boost trees")
bst <- xgb.train(param, xgmat, nrounds, watchlist );
bst <- xgb.train(param, xgmat, nrounds, watchlist)
# save out model
xgb.save(bst, "higgs.model")
print ('finish training')

View File

@@ -5,10 +5,10 @@ require(methods)
testsize <- 550000
dtrain <- read.csv("data/training.csv", header=TRUE, nrows=350001)
dtrain$Label <- as.numeric(dtrain$Label=='s')
dtrain <- read.csv("data/training.csv", header = TRUE, nrows = 350001)
dtrain$Label <- as.numeric(dtrain$Label == 's')
# gbm.time = system.time({
# gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120,
# gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120,
# interaction.depth = 6, shrinkage = 0.1, bag.fraction = 1,
# verbose = TRUE)
# })
@@ -20,12 +20,12 @@ dtrain$Label <- as.numeric(dtrain$Label=='s')
data <- as.matrix(dtrain[2:31])
weight <- as.numeric(dtrain[[32]]) * testsize / length(label)
sumwpos <- sum(weight * (label==1.0))
sumwneg <- sum(weight * (label==0.0))
sumwpos <- sum(weight * (label == 1.0))
sumwneg <- sum(weight * (label == 0.0))
print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
xgboost.time <- list()
threads <- c(1,2,4,8,16)
threads <- c(1, 2, 4, 8, 16)
for (i in 1:length(threads)){
thread <- threads[i]
xgboost.time[[i]] <- system.time({
@@ -40,7 +40,7 @@ for (i in 1:length(threads)){
watchlist <- list("train" = xgmat)
nrounds <- 120
print ("loading data end, start to boost trees")
bst <- xgb.train(param, xgmat, nrounds, watchlist );
bst <- xgb.train(param, xgmat, nrounds, watchlist)
# save out model
xgb.save(bst, "higgs.model")
print ('finish training')
@@ -49,22 +49,21 @@ for (i in 1:length(threads)){
xgboost.time
# [[1]]
# user system elapsed
# 99.015 0.051 98.982
#
# user system elapsed
# 99.015 0.051 98.982
#
# [[2]]
# user system elapsed
# 100.268 0.317 55.473
#
# user system elapsed
# 100.268 0.317 55.473
#
# [[3]]
# user system elapsed
# 111.682 0.777 35.963
#
# user system elapsed
# 111.682 0.777 35.963
#
# [[4]]
# user system elapsed
# 149.396 1.851 32.661
#
# user system elapsed
# 149.396 1.851 32.661
#
# [[5]]
# user system elapsed
# 157.390 5.988 40.949
# user system elapsed
# 157.390 5.988 40.949