[R] Set number of threads in demos and tests. (#9591)

- Restrict the number of threads in IO.
- Specify the number of threads in demos and tests.
- Add helper scripts for checks.
This commit is contained in:
Jiaming Yuan
2023-09-23 21:44:03 +08:00
committed by GitHub
parent def77870f3
commit cac2cd2e94
51 changed files with 714 additions and 296 deletions

View File

@@ -31,6 +31,8 @@ require(data.table)
if (!require('vcd')) {
install.packages('vcd')
}
data.table::setDTthreads(2)
```
> **VCD** package is used for one of its embedded dataset only.
@@ -297,23 +299,25 @@ test <- agaricus.test
#Random Forest - 1000 trees
bst <- xgboost(
data = train$data
, label = train$label
, max_depth = 4
, num_parallel_tree = 1000
, subsample = 0.5
, colsample_bytree = 0.5
, nrounds = 1
, objective = "binary:logistic"
data = train$data,
label = train$label,
max_depth = 4,
num_parallel_tree = 1000,
subsample = 0.5,
colsample_bytree = 0.5,
nrounds = 1,
objective = "binary:logistic",
nthread = 2
)
#Boosting - 3 rounds
bst <- xgboost(
data = train$data
, label = train$label
, max_depth = 4
, nrounds = 3
, objective = "binary:logistic"
data = train$data,
label = train$label,
max_depth = 4,
nrounds = 3,
objective = "binary:logistic",
nthread = 2
)
```

View File

@@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
nrounds = 2, objective = "binary:logistic")
nrounds = 2, objective = "binary:logistic", nthread = 2)
xgb.save(bst, 'model.save')
bst = xgb.load('model.save')
xgb.parameters(bst) <- list(nthread = 2)
pred <- predict(bst, test$data)
@
@@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
<<xgb.DMatrix>>=
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
class(dtrain)
head(getinfo(dtrain,'label'))
@
@@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
return(list(metric = "MSE", value = err))
}
dtest <- xgb.DMatrix(test$data, label = test$label)
dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
watchlist <- list(eval = dtest, train = dtrain)
param <- list(max_depth = 2, eta = 1)
param <- list(max_depth = 2, eta = 1, nthread = 2)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
@

View File

@@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
```{r trainingDense, message=F, warning=F}
bstDense <- xgboost(
data = as.matrix(train$data)
, label = train$label
, max_depth = 2
, eta = 1
, nthread = 2
, nrounds = 2
, objective = "binary:logistic"
data = as.matrix(train$data),
label = train$label,
max_depth = 2,
eta = 1,
nthread = 2,
nrounds = 2,
objective = "binary:logistic"
)
```
@@ -188,14 +188,14 @@ bstDense <- xgboost(
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
```{r trainingDmatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
bstDMatrix <- xgboost(
data = dtrain
, max_depth = 2
, eta = 1
, nthread = 2
, nrounds = 2
, objective = "binary:logistic"
data = dtrain,
max_depth = 2,
eta = 1,
nthread = 2,
nrounds = 2,
objective = "binary:logistic"
)
```
@@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
```{r DMatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtest <- xgb.DMatrix(data = test$data, label = test$label)
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
```
### Measure learning progress with xgb.train
@@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
```{r loadModel, message=F, warning=F}
# load binary model to R
bst2 <- xgb.load("xgboost.model")
xgb.parameters(bst2) <- list(nthread = 2)
pred2 <- predict(bst2, test$data)
# And now the test
@@ -500,6 +501,7 @@ print(class(rawVec))
# load binary model to R
bst3 <- xgb.load(rawVec)
xgb.parameters(bst3) <- list(nthread = 2)
pred3 <- predict(bst3, test$data)
# pred2 should be identical to pred

View File

@@ -175,7 +175,7 @@ bst_preds == bst_from_json_preds
None are exactly equal again. What is going on here? Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation. Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used. On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
```{r}
# calculate the predictions casting doubles to floats
bst_from_json_preds <- ifelse(