Support dataframe data format in native XGBoost. (#9828)

- Implement a columnar adapter.
- Refactor Python pandas handling code to avoid converting into a single numpy array.
- Add support in R for transforming columns.
- Support R data.frame and factor type.
This commit is contained in:
Jiaming Yuan
2023-12-12 09:56:31 +08:00
committed by GitHub
parent b3700bbb3f
commit faf0f2df10
21 changed files with 718 additions and 221 deletions

View File

@@ -322,3 +322,30 @@ test_that("xgb.DMatrix: can get group for both 'qid' and 'group' constructors",
expected_gr <- c(0, 20, 40, 100)
expect_equal(info_gr, expected_gr)
})
test_that("xgb.DMatrix: data.frame", {
df <- data.frame(
a = (1:4) / 10,
num = c(1, NA, 3, 4),
as.int = as.integer(c(1, 2, 3, 4)),
lo = c(TRUE, FALSE, NA, TRUE),
str.fac = c("a", "b", "d", "c"),
as.fac = as.factor(c(3, 5, 8, 11)),
stringsAsFactors = TRUE
)
m <- xgb.DMatrix(df, enable_categorical = TRUE)
expect_equal(colnames(m), colnames(df))
expect_equal(
getinfo(m, "feature_type"), c("float", "float", "int", "i", "c", "c")
)
expect_error(xgb.DMatrix(df))
df <- data.frame(
missing = c("a", "b", "d", NA),
valid = c("a", "b", "d", "c"),
stringsAsFactors = TRUE
)
m <- xgb.DMatrix(df, enable_categorical = TRUE)
expect_equal(getinfo(m, "feature_type"), c("c", "c"))
})