[R] Set number of threads in demos and tests. (#9591)
- Restrict the number of threads in IO. - Specify the number of threads in demos and tests. - Add helper scripts for checks.
This commit is contained in:
@@ -31,6 +31,8 @@ require(data.table)
|
||||
if (!require('vcd')) {
|
||||
install.packages('vcd')
|
||||
}
|
||||
|
||||
data.table::setDTthreads(2)
|
||||
```
|
||||
|
||||
> **VCD** package is used for one of its embedded dataset only.
|
||||
@@ -297,23 +299,25 @@ test <- agaricus.test
|
||||
|
||||
#Random Forest - 1000 trees
|
||||
bst <- xgboost(
|
||||
data = train$data
|
||||
, label = train$label
|
||||
, max_depth = 4
|
||||
, num_parallel_tree = 1000
|
||||
, subsample = 0.5
|
||||
, colsample_bytree = 0.5
|
||||
, nrounds = 1
|
||||
, objective = "binary:logistic"
|
||||
data = train$data,
|
||||
label = train$label,
|
||||
max_depth = 4,
|
||||
num_parallel_tree = 1000,
|
||||
subsample = 0.5,
|
||||
colsample_bytree = 0.5,
|
||||
nrounds = 1,
|
||||
objective = "binary:logistic",
|
||||
nthread = 2
|
||||
)
|
||||
|
||||
#Boosting - 3 rounds
|
||||
bst <- xgboost(
|
||||
data = train$data
|
||||
, label = train$label
|
||||
, max_depth = 4
|
||||
, nrounds = 3
|
||||
, objective = "binary:logistic"
|
||||
data = train$data,
|
||||
label = train$label,
|
||||
max_depth = 4,
|
||||
nrounds = 3,
|
||||
objective = "binary:logistic",
|
||||
nthread = 2
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
nrounds = 2, objective = "binary:logistic")
|
||||
nrounds = 2, objective = "binary:logistic", nthread = 2)
|
||||
xgb.save(bst, 'model.save')
|
||||
bst = xgb.load('model.save')
|
||||
xgb.parameters(bst) <- list(nthread = 2)
|
||||
pred <- predict(bst, test$data)
|
||||
@
|
||||
|
||||
@@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.
|
||||
|
||||
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
||||
<<xgb.DMatrix>>=
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
|
||||
class(dtrain)
|
||||
head(getinfo(dtrain,'label'))
|
||||
@
|
||||
@@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
|
||||
return(list(metric = "MSE", value = err))
|
||||
}
|
||||
|
||||
dtest <- xgb.DMatrix(test$data, label = test$label)
|
||||
dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
param <- list(max_depth = 2, eta = 1)
|
||||
param <- list(max_depth = 2, eta = 1, nthread = 2)
|
||||
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
||||
@
|
||||
|
||||
@@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
|
||||
|
||||
```{r trainingDense, message=F, warning=F}
|
||||
bstDense <- xgboost(
|
||||
data = as.matrix(train$data)
|
||||
, label = train$label
|
||||
, max_depth = 2
|
||||
, eta = 1
|
||||
, nthread = 2
|
||||
, nrounds = 2
|
||||
, objective = "binary:logistic"
|
||||
data = as.matrix(train$data),
|
||||
label = train$label,
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
nthread = 2,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
```
|
||||
|
||||
@@ -188,14 +188,14 @@ bstDense <- xgboost(
|
||||
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
|
||||
|
||||
```{r trainingDmatrix, message=F, warning=F}
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||
bstDMatrix <- xgboost(
|
||||
data = dtrain
|
||||
, max_depth = 2
|
||||
, eta = 1
|
||||
, nthread = 2
|
||||
, nrounds = 2
|
||||
, objective = "binary:logistic"
|
||||
data = dtrain,
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
nthread = 2,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
```
|
||||
|
||||
@@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
|
||||
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
|
||||
|
||||
```{r DMatrix, message=F, warning=F}
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
||||
dtest <- xgb.DMatrix(data = test$data, label = test$label)
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||
dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
|
||||
```
|
||||
|
||||
### Measure learning progress with xgb.train
|
||||
@@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
|
||||
```{r loadModel, message=F, warning=F}
|
||||
# load binary model to R
|
||||
bst2 <- xgb.load("xgboost.model")
|
||||
xgb.parameters(bst2) <- list(nthread = 2)
|
||||
pred2 <- predict(bst2, test$data)
|
||||
|
||||
# And now the test
|
||||
@@ -500,6 +501,7 @@ print(class(rawVec))
|
||||
|
||||
# load binary model to R
|
||||
bst3 <- xgb.load(rawVec)
|
||||
xgb.parameters(bst3) <- list(nthread = 2)
|
||||
pred3 <- predict(bst3, test$data)
|
||||
|
||||
# pred2 should be identical to pred
|
||||
|
||||
@@ -175,7 +175,7 @@ bst_preds == bst_from_json_preds
|
||||
|
||||
None are exactly equal again. What is going on here? Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation. Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used. On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
|
||||
|
||||
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
|
||||
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
|
||||
```{r}
|
||||
# calculate the predictions casting doubles to floats
|
||||
bst_from_json_preds <- ifelse(
|
||||
|
||||
Reference in New Issue
Block a user