This makes GPU Hist robust in distributed environment as some workers might not be associated with any data in either training or evaluation. * Disable rabit mock test for now: See #5012 . * Disable dask-cudf test at prediction for now: See #5003 * Launch dask job for all workers despite they might not have any data. * Check 0 rows in elementwise evaluation metrics. Using AUC and AUC-PR still throws an error. See #4663 for a robust fix. * Add tests for edge cases. * Add `LaunchKernel` wrapper handling zero sized grid. * Move some parts of allreducer into a cu file. * Don't validate feature names when the booster is empty. * Sync number of columns in DMatrix. As num_feature is required to be the same across all workers in data split mode. * Filtering in dask interface now by default syncs all booster that's not empty, instead of using rank 0. * Fix Jenkins' GPU tests. * Install dask-cuda from source in Jenkins' test. Now all tests are actually running. * Restore GPU Hist tree synchronization test. * Check UUID of running devices. The check is only performed on CUDA version >= 10.x, as 9.x doesn't have UUID field. * Fix CMake policy and project variables. Use xgboost_SOURCE_DIR uniformly, add policy for CMake >= 3.13. * Fix copying data to CPU * Fix race condition in cpu predictor. * Fix duplicated DMatrix construction. * Don't download extra nccl in CI script.
90 lines
2.3 KiB
C++
90 lines
2.3 KiB
C++
/*!
|
|
* Copyright by Contributors 2017-2019
|
|
*/
|
|
#pragma once
|
|
#include <xgboost/logging.h>
|
|
#include <chrono>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace xgboost {
|
|
namespace common {
|
|
|
|
struct Timer {
|
|
using ClockT = std::chrono::high_resolution_clock;
|
|
using TimePointT = std::chrono::high_resolution_clock::time_point;
|
|
using DurationT = std::chrono::high_resolution_clock::duration;
|
|
using SecondsT = std::chrono::duration<double>;
|
|
|
|
TimePointT start;
|
|
DurationT elapsed;
|
|
Timer() { Reset(); }
|
|
void Reset() {
|
|
elapsed = DurationT::zero();
|
|
Start();
|
|
}
|
|
void Start() { start = ClockT::now(); }
|
|
void Stop() { elapsed += ClockT::now() - start; }
|
|
double ElapsedSeconds() const { return SecondsT(elapsed).count(); }
|
|
void PrintElapsed(std::string label) {
|
|
char buffer[255];
|
|
snprintf(buffer, sizeof(buffer), "%s:\t %fs", label.c_str(),
|
|
SecondsT(elapsed).count());
|
|
LOG(CONSOLE) << buffer;
|
|
Reset();
|
|
}
|
|
};
|
|
|
|
/**
|
|
* \struct Monitor
|
|
*
|
|
* \brief Timing utility used to measure total method execution time over the
|
|
* lifetime of the containing object.
|
|
*/
|
|
struct Monitor {
|
|
private:
|
|
struct Statistics {
|
|
Timer timer;
|
|
size_t count{0};
|
|
uint64_t nvtx_id;
|
|
};
|
|
|
|
// from left to right, <name <count, elapsed>>
|
|
using StatMap = std::map<std::string, std::pair<size_t, size_t>>;
|
|
|
|
std::string label = "";
|
|
std::map<std::string, Statistics> statistics_map;
|
|
Timer self_timer;
|
|
|
|
/*! \brief Collect time statistics across all workers. */
|
|
std::vector<StatMap> CollectFromOtherRanks() const;
|
|
void PrintStatistics(StatMap const& statistics) const;
|
|
|
|
public:
|
|
Monitor() { self_timer.Start(); }
|
|
/*\brief Print statistics info during destruction.
|
|
*
|
|
* Please note that this may not work, as with distributed frameworks like Dask, the
|
|
* model is pickled to other workers, and the global parameters like `global_verbosity_`
|
|
* are not included in the pickle.
|
|
*/
|
|
~Monitor() {
|
|
this->Print();
|
|
self_timer.Stop();
|
|
}
|
|
|
|
/*! \brief Print all the statistics. */
|
|
void Print() const;
|
|
|
|
void Init(std::string label) { this->label = label; }
|
|
void Start(const std::string &name);
|
|
void Stop(const std::string &name);
|
|
void StartCuda(const std::string &name);
|
|
void StopCuda(const std::string &name);
|
|
};
|
|
} // namespace common
|
|
} // namespace xgboost
|