[R] Redesigned xgboost() interface skeleton (#10456)

--------- Co-authored-by: Michael Mayer <mayermichael79@gmail.com>
2024-07-15 12:44:58 +02:00
parent 17c64300e3
commit ab982e7873
35 changed files with 1997 additions and 242 deletions
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@@ -173,8 +173,9 @@ Build the model
 The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [XGBoost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).

 ```{r}
-bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
-               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
+bst <- xgboost(x = sparse_matrix, y = output_vector,
+               params = list(max_depth = 4, eta = 1),
+               nthread = 2, nrounds = 10)

 ```

@@ -299,28 +300,28 @@ test <- agaricus.test

 #Random Forest - 1000 trees
 bst <- xgboost(
-    data = train$data,
-    label = train$label,
-    max_depth = 4,
-    num_parallel_tree = 1000,
-    subsample = 0.5,
-    colsample_bytree = 0.5,
+    x = train$data,
+    y = factor(train$label, levels = c(0, 1)),
+    params = list(
+        max_depth = 4,
+        num_parallel_tree = 1000,
+        subsample = 0.5,
+        colsample_bytree = 0.5
+    ),
    nrounds = 1,
-    objective = "binary:logistic",
    nthread = 2
 )

 #Boosting - 3 rounds
 bst <- xgboost(
-    data = train$data,
-    label = train$label,
-    max_depth = 4,
+    x = train$data,
+    y = factor(train$label, levels = c(0, 1)),
+    params = list(max_depth = 4),
    nrounds = 3,
-    objective = "binary:logistic",
    nthread = 2
 )
 ```

-> Note that the parameter `round` is set to `1`.
+> Note that the parameter `nrounds` is set to `1`.

 > [**Random Forests**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -146,22 +146,19 @@ In a *sparse* matrix, cells containing `0` are not stored in memory. Therefore,

 We will train decision tree model using the following parameters:

-* `objective = "binary:logistic"`: we will train a binary classification model ;
+* `objective = "binary:logistic"`: we will train a binary classification model (note that this is set automatically when `y` is a `factor`) ;
 * `max_depth = 2`: the trees won't be deep, because our case is very simple ;
 * `nthread = 2`: the number of CPU threads we are going to use;
 * `nrounds = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.

 ```{r trainingSparse, message=F, warning=F}
 bstSparse <- xgboost(
-    data = train$data
-    , label = train$label
-    , params = list(
-        max_depth = 2
-        , eta = 1
-        , nthread = 2
-        , objective = "binary:logistic"
-    )
+    x = train$data
+    , y = factor(train$label, levels = c(0, 1))
+    , objective = "binary:logistic"
+    , params = list(max_depth = 2, eta = 1)
    , nrounds = 2
+    , nthread = 2
 )
 ```

@@ -175,15 +172,11 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**

 ```{r trainingDense, message=F, warning=F}
 bstDense <- xgboost(
-    data = as.matrix(train$data),
-    label = train$label,
-    params = list(
-        max_depth = 2,
-        eta = 1,
-        nthread = 2,
-        objective = "binary:logistic"
-    ),
-    nrounds = 2
+    x = as.matrix(train$data),
+    y = factor(train$label, levels = c(0, 1)),
+    params = list(max_depth = 2, eta = 1),
+    nrounds = 2,
+    nthread = 2
 )
 ```

@@ -193,7 +186,7 @@ bstDense <- xgboost(

 ```{r trainingDmatrix, message=F, warning=F}
 dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
-bstDMatrix <- xgboost(
+bstDMatrix <- xgb.train(
    data = dtrain,
    params = list(
        max_depth = 2,
@@ -213,7 +206,7 @@ One of the simplest way to see the training progress is to set the `verbose` opt

 ```{r trainingVerbose0, message=T, warning=F}
 # verbose = 0, no message
-bst <- xgboost(
+bst <- xgb.train(
    data = dtrain
    , params = list(
        max_depth = 2
@@ -228,7 +221,7 @@ bst <- xgboost(

 ```{r trainingVerbose1, message=T, warning=F}
 # verbose = 1, print evaluation metric
-bst <- xgboost(
+bst <- xgb.train(
    data = dtrain
    , params = list(
        max_depth = 2
@@ -243,7 +236,7 @@ bst <- xgboost(

 ```{r trainingVerbose2, message=T, warning=F}
 # verbose = 2, also print information about tree
-bst <- xgboost(
+bst <- xgb.train(
    data = dtrain
    , params = list(
        max_depth = 2