[R] Redesigned xgboost() interface skeleton (#10456)

---------

Co-authored-by: Michael Mayer <mayermichael79@gmail.com>
This commit is contained in:
david-cortes
2024-07-15 12:44:58 +02:00
committed by GitHub
parent 17c64300e3
commit ab982e7873
35 changed files with 1997 additions and 242 deletions

View File

@@ -173,8 +173,9 @@ Build the model
The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [XGBoost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
```{r}
bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
bst <- xgboost(x = sparse_matrix, y = output_vector,
params = list(max_depth = 4, eta = 1),
nthread = 2, nrounds = 10)
```
@@ -299,28 +300,28 @@ test <- agaricus.test
#Random Forest - 1000 trees
bst <- xgboost(
data = train$data,
label = train$label,
max_depth = 4,
num_parallel_tree = 1000,
subsample = 0.5,
colsample_bytree = 0.5,
x = train$data,
y = factor(train$label, levels = c(0, 1)),
params = list(
max_depth = 4,
num_parallel_tree = 1000,
subsample = 0.5,
colsample_bytree = 0.5
),
nrounds = 1,
objective = "binary:logistic",
nthread = 2
)
#Boosting - 3 rounds
bst <- xgboost(
data = train$data,
label = train$label,
max_depth = 4,
x = train$data,
y = factor(train$label, levels = c(0, 1)),
params = list(max_depth = 4),
nrounds = 3,
objective = "binary:logistic",
nthread = 2
)
```
> Note that the parameter `round` is set to `1`.
> Note that the parameter `nrounds` is set to `1`.
> [**Random Forests**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.

View File

@@ -146,22 +146,19 @@ In a *sparse* matrix, cells containing `0` are not stored in memory. Therefore,
We will train decision tree model using the following parameters:
* `objective = "binary:logistic"`: we will train a binary classification model ;
* `objective = "binary:logistic"`: we will train a binary classification model (note that this is set automatically when `y` is a `factor`) ;
* `max_depth = 2`: the trees won't be deep, because our case is very simple ;
* `nthread = 2`: the number of CPU threads we are going to use;
* `nrounds = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
```{r trainingSparse, message=F, warning=F}
bstSparse <- xgboost(
data = train$data
, label = train$label
, params = list(
max_depth = 2
, eta = 1
, nthread = 2
, objective = "binary:logistic"
)
x = train$data
, y = factor(train$label, levels = c(0, 1))
, objective = "binary:logistic"
, params = list(max_depth = 2, eta = 1)
, nrounds = 2
, nthread = 2
)
```
@@ -175,15 +172,11 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
```{r trainingDense, message=F, warning=F}
bstDense <- xgboost(
data = as.matrix(train$data),
label = train$label,
params = list(
max_depth = 2,
eta = 1,
nthread = 2,
objective = "binary:logistic"
),
nrounds = 2
x = as.matrix(train$data),
y = factor(train$label, levels = c(0, 1)),
params = list(max_depth = 2, eta = 1),
nrounds = 2,
nthread = 2
)
```
@@ -193,7 +186,7 @@ bstDense <- xgboost(
```{r trainingDmatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
bstDMatrix <- xgboost(
bstDMatrix <- xgb.train(
data = dtrain,
params = list(
max_depth = 2,
@@ -213,7 +206,7 @@ One of the simplest way to see the training progress is to set the `verbose` opt
```{r trainingVerbose0, message=T, warning=F}
# verbose = 0, no message
bst <- xgboost(
bst <- xgb.train(
data = dtrain
, params = list(
max_depth = 2
@@ -228,7 +221,7 @@ bst <- xgboost(
```{r trainingVerbose1, message=T, warning=F}
# verbose = 1, print evaluation metric
bst <- xgboost(
bst <- xgb.train(
data = dtrain
, params = list(
max_depth = 2
@@ -243,7 +236,7 @@ bst <- xgboost(
```{r trainingVerbose2, message=T, warning=F}
# verbose = 2, also print information about tree
bst <- xgboost(
bst <- xgb.train(
data = dtrain
, params = list(
max_depth = 2