enable ROCm on latest XGBoost

This commit is contained in:
Hui Liu
2023-10-23 11:07:08 -07:00
328 changed files with 8028 additions and 3642 deletions

View File

@@ -3,11 +3,11 @@ find_package(xgboost REQUIRED)
# xgboost is built as static libraries, all cxx dependencies need to be linked into the
# executable.
if (XGBOOST_BUILD_STATIC_LIB)
if(XGBOOST_BUILD_STATIC_LIB)
enable_language(CXX)
# find again for those cxx libraries.
find_package(xgboost REQUIRED)
endif(XGBOOST_BUILD_STATIC_LIB)
endif()
add_executable(api-demo c-api-demo.c)
target_link_libraries(api-demo PRIVATE xgboost::xgboost)

View File

@@ -4,11 +4,11 @@ find_package(xgboost REQUIRED)
# xgboost is built as static libraries, all cxx dependencies need to be linked into the
# executable.
if (XGBOOST_BUILD_STATIC_LIB)
if(XGBOOST_BUILD_STATIC_LIB)
enable_language(CXX)
# find again for those cxx libraries.
find_package(xgboost REQUIRED)
endif(XGBOOST_BUILD_STATIC_LIB)
endif()
add_executable(inference-demo inference.c)
target_link_libraries(inference-demo PRIVATE xgboost::xgboost)

View File

@@ -104,7 +104,7 @@ def check_point_callback():
# Use callback class from xgboost.callback
# Feel free to subclass/customize it to suit your need.
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=rounds, name="model"
directory=tmpdir, interval=rounds, name="model"
)
xgb.train(
{"objective": "binary:logistic"},
@@ -118,7 +118,7 @@ def check_point_callback():
# This version of checkpoint saves everything including parameters and
# model. See: doc/tutorials/saving_model.rst
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=rounds, as_pickle=True, name="model"
directory=tmpdir, interval=rounds, as_pickle=True, name="model"
)
xgb.train(
{"objective": "binary:logistic"},

View File

@@ -24,8 +24,8 @@ param <- list("objective" = "binary:logitraw",
"nthread" = 16)
watchlist <- list("train" = xgmat)
nrounds <- 120
print ("loading data end, start to boost trees")
print("loading data end, start to boost trees")
bst <- xgb.train(param, xgmat, nrounds, watchlist)
# save out model
xgb.save(bst, "higgs.model")
print ('finish training')
print('finish training')

View File

@@ -39,11 +39,11 @@ for (i in seq_along(threads)){
"nthread" = thread)
watchlist <- list("train" = xgmat)
nrounds <- 120
print ("loading data end, start to boost trees")
print("loading data end, start to boost trees")
bst <- xgb.train(param, xgmat, nrounds, watchlist)
# save out model
xgb.save(bst, "higgs.model")
print ('finish training')
print('finish training')
})
}

View File

@@ -85,8 +85,8 @@ shutdown server
## Training with GPUs
To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL
turned off (see the [README](../../plugin/federated/README.md)).
Build XGBoost with the federated learning plugin enabled along with CUDA
(see the [README](../../plugin/federated/README.md)).
Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
above.

View File

@@ -67,7 +67,7 @@ class XGBoostTrainer(Executor):
dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm')
# Specify parameters via map, definition are same as c++ version
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
param = {'tree_method': 'hist', 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
if self._use_gpus:
self.log_info(fl_ctx, f'Training with GPU {rank}')
param['device'] = f"cuda:{rank}"

View File

@@ -56,4 +56,9 @@ shutdown server
## Training with GPUs
Currently GPUs are not yet supported by vertical federated XGBoost.
To demo with Vertical Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
Build XGBoost with the federated learning plugin enabled along with CUDA
(see the [README](../../plugin/federated/README.md)).
Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
above.

View File

@@ -77,13 +77,14 @@ class XGBoostTrainer(Executor):
'gamma': 1.0,
'max_depth': 8,
'min_child_weight': 100,
'tree_method': 'approx',
'tree_method': 'hist',
'grow_policy': 'depthwise',
'objective': 'binary:logistic',
'eval_metric': 'auc',
}
if self._use_gpus:
self.log_info(fl_ctx, 'GPUs are not currently supported by vertical federated XGBoost')
self.log_info(fl_ctx, f'Training with GPU {rank}')
param['device'] = f"cuda:{rank}"
# specify validations set to watch performance
watchlist = [(dtest, "eval"), (dtrain, "train")]