[R] resolve assignment_linter warnings (#8599)

This commit is contained in:
James Lamb 2022-12-16 11:22:41 -06:00 committed by GitHub
parent f6effa1734
commit 53e6e32718
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 43 additions and 43 deletions

View File

@ -156,7 +156,7 @@ head(sparse_matrix)
Create the output `numeric` vector (not as a sparse `Matrix`): Create the output `numeric` vector (not as a sparse `Matrix`):
```{r} ```{r}
output_vector = df[,Improved] == "Marked" output_vector <- df[,Improved] == "Marked"
``` ```
1. set `Y` vector to `0`; 1. set `Y` vector to `0`;

View File

@ -423,7 +423,7 @@ file.remove("dtrain.buffer")
Information can be extracted from `xgb.DMatrix` using `getinfo` function. Hereafter we will extract `label` data. Information can be extracted from `xgb.DMatrix` using `getinfo` function. Hereafter we will extract `label` data.
```{r getinfo, message=F, warning=F} ```{r getinfo, message=F, warning=F}
label = getinfo(dtest, "label") label <- getinfo(dtest, "label")
pred <- predict(bst, dtest) pred <- predict(bst, dtest)
err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label) err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
print(paste("test-error=", err)) print(paste("test-error=", err))

View File

@ -8,11 +8,11 @@ library(dummies)
library(insuranceData) library(insuranceData)
data(AutoClaims) data(AutoClaims)
data = AutoClaims data <- AutoClaims
data$STATE = as.factor(data$STATE) data$STATE <- as.factor(data$STATE)
data$CLASS = as.factor(data$CLASS) data$CLASS <- as.factor(data$CLASS)
data$GENDER = as.factor(data$GENDER) data$GENDER <- as.factor(data$GENDER)
data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=TRUE); data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=TRUE);
write.table(data.dummy, 'autoclaims.csv', sep=',', row.names=F, col.names=F, quote=F) write.table(data.dummy, 'autoclaims.csv', sep=',', row.names=F, col.names=F, quote=F)

View File

@ -23,9 +23,9 @@ param <- list("objective" = "binary:logitraw",
"eval_metric" = "ams@0.15", "eval_metric" = "ams@0.15",
"nthread" = 16) "nthread" = 16)
watchlist <- list("train" = xgmat) watchlist <- list("train" = xgmat)
nrounds = 120 nrounds <- 120
print ("loading data end, start to boost trees") print ("loading data end, start to boost trees")
bst = xgb.train(param, xgmat, nrounds, watchlist ); bst <- xgb.train(param, xgmat, nrounds, watchlist );
# save out model # save out model
xgb.save(bst, "higgs.model") xgb.save(bst, "higgs.model")
print ('finish training') print ('finish training')

View File

@ -6,7 +6,7 @@ require(methods)
testsize <- 550000 testsize <- 550000
dtrain <- read.csv("data/training.csv", header=TRUE, nrows=350001) dtrain <- read.csv("data/training.csv", header=TRUE, nrows=350001)
dtrain$Label = as.numeric(dtrain$Label=='s') dtrain$Label <- as.numeric(dtrain$Label=='s')
# gbm.time = system.time({ # gbm.time = system.time({
# gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120, # gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120,
# interaction.depth = 6, shrinkage = 0.1, bag.fraction = 1, # interaction.depth = 6, shrinkage = 0.1, bag.fraction = 1,
@ -24,11 +24,11 @@ sumwpos <- sum(weight * (label==1.0))
sumwneg <- sum(weight * (label==0.0)) sumwneg <- sum(weight * (label==0.0))
print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos)) print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
xgboost.time = list() xgboost.time <- list()
threads = c(1,2,4,8,16) threads <- c(1,2,4,8,16)
for (i in 1:length(threads)){ for (i in 1:length(threads)){
thread = threads[i] thread <- threads[i]
xgboost.time[[i]] = system.time({ xgboost.time[[i]] <- system.time({
xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0) xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0)
param <- list("objective" = "binary:logitraw", param <- list("objective" = "binary:logitraw",
"scale_pos_weight" = sumwneg / sumwpos, "scale_pos_weight" = sumwneg / sumwpos,
@ -38,9 +38,9 @@ for (i in 1:length(threads)){
"eval_metric" = "ams@0.15", "eval_metric" = "ams@0.15",
"nthread" = thread) "nthread" = thread)
watchlist <- list("train" = xgmat) watchlist <- list("train" = xgmat)
nrounds = 120 nrounds <- 120
print ("loading data end, start to boost trees") print ("loading data end, start to boost trees")
bst = xgb.train(param, xgmat, nrounds, watchlist ); bst <- xgb.train(param, xgmat, nrounds, watchlist );
# save out model # save out model
xgb.save(bst, "higgs.model") xgb.save(bst, "higgs.model")
print ('finish training') print ('finish training')

View File

@ -1,20 +1,20 @@
require(xgboost) require(xgboost)
require(methods) require(methods)
train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE) train <- read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE)
test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE) test <- read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE)
train = train[,-1] train <- train[,-1]
test = test[,-1] test <- test[,-1]
y = train[,ncol(train)] y <- train[,ncol(train)]
y = gsub('Class_','',y) y <- gsub('Class_','',y)
y = as.integer(y)-1 # xgboost take features in [0,numOfClass) y <- as.integer(y)-1 # xgboost take features in [0,numOfClass)
x = rbind(train[,-ncol(train)],test) x <- rbind(train[,-ncol(train)],test)
x = as.matrix(x) x <- as.matrix(x)
x = matrix(as.numeric(x),nrow(x),ncol(x)) x <- matrix(as.numeric(x),nrow(x),ncol(x))
trind = 1:length(y) trind <- 1:length(y)
teind = (nrow(train)+1):nrow(x) teind <- (nrow(train)+1):nrow(x)
# Set necessary parameter # Set necessary parameter
param <- list("objective" = "multi:softprob", param <- list("objective" = "multi:softprob",
@ -23,21 +23,21 @@ param <- list("objective" = "multi:softprob",
"nthread" = 8) "nthread" = 8)
# Run Cross Validation # Run Cross Validation
cv.nrounds = 50 cv.nrounds <- 50
bst.cv = xgb.cv(param=param, data = x[trind,], label = y, bst.cv <- xgb.cv(param=param, data = x[trind,], label = y,
nfold = 3, nrounds=cv.nrounds) nfold = 3, nrounds=cv.nrounds)
# Train the model # Train the model
nrounds = 50 nrounds <- 50
bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds) bst <- xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds)
# Make prediction # Make prediction
pred = predict(bst,x[teind,]) pred <- predict(bst,x[teind,])
pred = matrix(pred,9,length(pred)/9) pred <- matrix(pred,9,length(pred)/9)
pred = t(pred) pred <- t(pred)
# Output submission # Output submission
pred = format(pred, digits=2,scientific=F) # shrink the size of submission pred <- format(pred, digits=2,scientific=F) # shrink the size of submission
pred = data.frame(1:nrow(pred),pred) pred <- data.frame(1:nrow(pred),pred)
names(pred) = c('id', paste0('Class_',1:9)) names(pred) <- c('id', paste0('Class_',1:9))
write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE) write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)

View File

@ -127,7 +127,7 @@ param <- list("objective" = "multi:softprob",
cv.nrounds <- 5 cv.nrounds <- 5
cv.nfold <- 3 cv.nfold <- 3
bst.cv = xgb.cv(param=param, data = trainMatrix, label = y, bst.cv <- xgb.cv(param=param, data = trainMatrix, label = y,
nfold = cv.nfold, nrounds = cv.nrounds) nfold = cv.nfold, nrounds = cv.nrounds)
``` ```
> As we can see the error rate is low on the test dataset (for a 5mn trained model). > As we can see the error rate is low on the test dataset (for a 5mn trained model).
@ -135,8 +135,8 @@ bst.cv = xgb.cv(param=param, data = trainMatrix, label = y,
Finally, we are ready to train the real model!!! Finally, we are ready to train the real model!!!
```{r modelTraining} ```{r modelTraining}
nrounds = 50 nrounds <- 50
bst = xgboost(param=param, data = trainMatrix, label = y, nrounds=nrounds) bst <- xgboost(param=param, data = trainMatrix, label = y, nrounds=nrounds)
``` ```
Model understanding Model understanding

View File

@ -26,7 +26,7 @@ test_x <- test[, 1:34]
test_y <- test[, V35] test_y <- test[, V35]
xg_train <- xgb.DMatrix(data = as.matrix(train_x), label = train_y) xg_train <- xgb.DMatrix(data = as.matrix(train_x), label = train_y)
xg_test = xgb.DMatrix(as.matrix(test_x), label = test_y) xg_test <- xgb.DMatrix(as.matrix(test_x), label = test_y)
params <- list( params <- list(
objective = 'multi:softmax', objective = 'multi:softmax',
@ -36,7 +36,7 @@ params <- list(
eta = 0.1 eta = 0.1
) )
watchlist = list(train = xg_train, test = xg_test) watchlist <- list(train = xg_train, test = xg_test)
bst <- xgb.train( bst <- xgb.train(
params = params, params = params,
@ -60,5 +60,5 @@ pred_mat <- matrix(pred_prob, ncol = 6, byrow = TRUE)
# rowSums(pred_mat) # rowSums(pred_mat)
pred_label <- apply(pred_mat, 1, which.max) - 1L pred_label <- apply(pred_mat, 1, which.max) - 1L
error_rate = sum(pred_label != test_y) / length(test_y) error_rate <- sum(pred_label != test_y) / length(test_y)
print(paste("Test error using softprob =", error_rate)) print(paste("Test error using softprob =", error_rate))