[Breaking] Remove rabit support for custom reductions and grow_local_histmaker updater (#7992)

This commit is contained in:
Rong Ou
2022-06-21 00:08:23 -07:00
committed by GitHub
parent 4a87ea49b8
commit e5ec546da5
17 changed files with 36 additions and 1100 deletions

View File

@@ -103,27 +103,5 @@ void Allreduce_(void *sendrecvbuf, // NOLINT
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun,
prepare_arg);
}
// code for reduce handle
ReduceHandle::ReduceHandle() = default;
ReduceHandle::~ReduceHandle() = default;
int ReduceHandle::TypeSize(const MPI::Datatype &dtype) {
return static_cast<int>(dtype.type_size);
}
void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t ) {
utils::Assert(redfunc_ == nullptr, "cannot initialize reduce handle twice");
redfunc_ = redfunc;
}
void ReduceHandle::Allreduce(void *sendrecvbuf,
size_t type_nbytes, size_t count,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg) {
utils::Assert(redfunc_ != nullptr, "must initialize handle to call AllReduce");
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count,
redfunc_, prepare_fun, prepare_arg);
}
} // namespace engine
} // namespace rabit

View File

@@ -158,92 +158,5 @@ void Allreduce_(void *sendrecvbuf,
MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf,
count, GetType(dtype), GetOp(op));
}
// code for reduce handle
ReduceHandle::ReduceHandle(void)
: handle_(NULL), redfunc_(NULL), htype_(NULL) {
}
ReduceHandle::~ReduceHandle(void) {
/* !WARNING!
A handle can be held by a tree method/Learner from xgboost. The booster might not be
freed until program exit, while (good) users call rabit.finalize() before reaching
the end of program. So op->Free() might be called after finalization and results
into following error:
```
Attempting to use an MPI routine after finalizing MPICH
```
Here we skip calling Free if MPI has already been finalized to workaround the issue.
It can be a potential leak of memory. The best way to resolve it is to eliminate all
use of long living handle.
*/
int finalized = 0;
CHECK_EQ(MPI_Finalized(&finalized), MPI_SUCCESS);
if (handle_ != NULL) {
MPI::Op *op = reinterpret_cast<MPI::Op*>(handle_);
if (!finalized) {
op->Free();
}
delete op;
}
if (htype_ != NULL) {
MPI::Datatype *dtype = reinterpret_cast<MPI::Datatype*>(htype_);
if (!finalized) {
dtype->Free();
}
delete dtype;
}
}
int ReduceHandle::TypeSize(const MPI::Datatype &dtype) {
return dtype.Get_size();
}
void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t type_nbytes) {
utils::Assert(handle_ == NULL, "cannot initialize reduce handle twice");
if (type_nbytes != 0) {
MPI::Datatype *dtype = new MPI::Datatype();
if (type_nbytes % 8 == 0) {
*dtype = MPI::LONG.Create_contiguous(type_nbytes / sizeof(long)); // NOLINT(*)
} else if (type_nbytes % 4 == 0) {
*dtype = MPI::INT.Create_contiguous(type_nbytes / sizeof(int));
} else {
*dtype = MPI::CHAR.Create_contiguous(type_nbytes);
}
dtype->Commit();
created_type_nbytes_ = type_nbytes;
htype_ = dtype;
}
MPI::Op *op = new MPI::Op();
MPI::User_function *pf = redfunc;
op->Init(pf, true);
handle_ = op;
}
void ReduceHandle::Allreduce(void *sendrecvbuf,
size_t type_nbytes, size_t count,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg) {
utils::Assert(handle_ != NULL, "must initialize handle to call AllReduce");
MPI::Op *op = reinterpret_cast<MPI::Op*>(handle_);
MPI::Datatype *dtype = reinterpret_cast<MPI::Datatype*>(htype_);
if (created_type_nbytes_ != type_nbytes || dtype == NULL) {
if (dtype == NULL) {
dtype = new MPI::Datatype();
} else {
dtype->Free();
}
if (type_nbytes % 8 == 0) {
*dtype = MPI::LONG.Create_contiguous(type_nbytes / sizeof(long)); // NOLINT(*)
} else if (type_nbytes % 4 == 0) {
*dtype = MPI::INT.Create_contiguous(type_nbytes / sizeof(int));
} else {
*dtype = MPI::CHAR.Create_contiguous(type_nbytes);
}
dtype->Commit();
created_type_nbytes_ = type_nbytes;
}
if (prepare_fun != NULL) prepare_fun(prepare_arg);
MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, *dtype, *op);
}
} // namespace engine
} // namespace rabit