/*! * Copyright 2016-2019 XGBoost contributors */ #ifndef XGBOOST_TESTS_CPP_HELPERS_H_ #define XGBOOST_TESTS_CPP_HELPERS_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include "../../src/common/common.h" #include "../../src/gbm/gbtree_model.h" #if defined(__CUDACC__) #define DeclareUnifiedTest(name) GPU ## name #else #define DeclareUnifiedTest(name) name #endif #if defined(__CUDACC__) #define GPUIDX 0 #else #define GPUIDX -1 #endif namespace xgboost { class ObjFunction; class Metric; struct LearnerModelParam; class GradientBooster; } bool FileExists(const std::string& filename); int64_t GetFileSize(const std::string& filename); void CreateSimpleTestData(const std::string& filename); void CreateBigTestData(const std::string& filename, size_t n_entries); void CheckObjFunction(std::unique_ptr const& obj, std::vector preds, std::vector labels, std::vector weights, std::vector out_grad, std::vector out_hess); xgboost::Json CheckConfigReloadImpl(xgboost::Configurable* const configurable, std::string name); template xgboost::Json CheckConfigReload(std::unique_ptr const& configurable, std::string name = "") { return CheckConfigReloadImpl(dynamic_cast(configurable.get()), name); } void CheckRankingObjFunction(std::unique_ptr const& obj, std::vector preds, std::vector labels, std::vector weights, std::vector groups, std::vector out_grad, std::vector out_hess); xgboost::bst_float GetMetricEval( xgboost::Metric * metric, xgboost::HostDeviceVector preds, std::vector labels, std::vector weights = std::vector(), std::vector groups = std::vector()); namespace xgboost { bool IsNear(std::vector::const_iterator _beg1, std::vector::const_iterator _end1, std::vector::const_iterator _beg2); /*! * \brief Linear congruential generator. * * The distribution defined in std is not portable. Given the same seed, it * migth produce different outputs on different platforms or with different * compilers. The SimpleLCG implemented here is to make sure all tests are * reproducible. */ class SimpleLCG { private: using StateType = int64_t; static StateType constexpr default_init_ = 3; static StateType constexpr default_alpha_ = 61; static StateType constexpr max_value_ = ((StateType)1 << 32) - 1; StateType state_; StateType const alpha_; StateType const mod_; StateType const seed_; public: SimpleLCG() : state_{default_init_}, alpha_{default_alpha_}, mod_{max_value_}, seed_{state_}{} /*! * \brief Initialize SimpleLCG. * * \param state Initial state, can also be considered as seed. If set to * zero, SimpleLCG will use internal default value. * \param alpha multiplier * \param mod modulo */ SimpleLCG(StateType state, StateType alpha=default_alpha_, StateType mod=max_value_) : state_{state == 0 ? default_init_ : state}, alpha_{alpha}, mod_{mod} , seed_{state} {} StateType operator()(); StateType Min() const; StateType Max() const; }; template class SimpleRealUniformDistribution { private: ResultT const lower; ResultT const upper; /*! \brief Over-simplified version of std::generate_canonical. */ template ResultT GenerateCanonical(GeneratorT* rng) const { static_assert(std::is_floating_point::value, "Result type must be floating point."); long double const r = (static_cast(rng->Max()) - static_cast(rng->Min())) + 1.0L; auto const log2r = static_cast(std::log(r) / std::log(2.0L)); size_t m = std::max(1UL, (Bits + log2r - 1UL) / log2r); ResultT sum_value = 0, r_k = 1; for (size_t k = m; k != 0; --k) { sum_value += ResultT((*rng)() - rng->Min()) * r_k; r_k *= r; } ResultT res = sum_value / r_k; return res; } public: SimpleRealUniformDistribution(ResultT l, ResultT u) : lower{l}, upper{u} {} template ResultT operator()(GeneratorT* rng) const { ResultT tmp = GenerateCanonical::digits, GeneratorT>(rng); return (tmp * (upper - lower)) + lower; } }; // Generate in-memory random data without using DMatrix. class RandomDataGenerator { bst_row_t rows_; size_t cols_; float sparsity_; float lower_; float upper_; int32_t device_; int32_t seed_; Json ArrayInterfaceImpl(HostDeviceVector *storage, size_t rows, size_t cols) const; public: RandomDataGenerator(bst_row_t rows, size_t cols, float sparsity) : rows_{rows}, cols_{cols}, sparsity_{sparsity}, lower_{0.0f}, upper_{1.0f}, device_{-1}, seed_{0} {} RandomDataGenerator &Lower(float v) { lower_ = v; return *this; } RandomDataGenerator& Upper(float v) { upper_ = v; return *this; } RandomDataGenerator& Device(int32_t d) { device_ = d; return *this; } RandomDataGenerator& Seed(int32_t s) { seed_ = s; return *this; } void GenerateDense(HostDeviceVector* out) const; std::string GenerateArrayInterface(HostDeviceVector* storage) const; std::string GenerateColumnarArrayInterface( std::vector> *data) const; void GenerateCSR(HostDeviceVector* value, HostDeviceVector* row_ptr, HostDeviceVector* columns) const; std::shared_ptr GenerateDMatix(bool with_label = false, bool float_label = true, size_t classes = 1) const; }; std::unique_ptr CreateSparsePageDMatrix( size_t n_entries, size_t page_size, std::string tmp_file); /** * \fn std::unique_ptr CreateSparsePageDMatrixWithRC(size_t n_rows, size_t n_cols, * size_t page_size); * * \brief Creates dmatrix with some records, each record containing random number of * features in [1, n_cols] * * \param n_rows Number of records to create. * \param n_cols Max number of features within that record. * \param page_size Sparse page size for the pages within the dmatrix. If page size is 0 * then the entire dmatrix is resident in memory; else, multiple sparse pages * of page size are created and backed to disk, which would have to be * streamed in at point of use. * \param deterministic The content inside the dmatrix is constant for this configuration, if true; * else, the content changes every time this method is invoked * * \return The new dmatrix. */ std::unique_ptr CreateSparsePageDMatrixWithRC( size_t n_rows, size_t n_cols, size_t page_size, bool deterministic, const dmlc::TemporaryDirectory& tempdir = dmlc::TemporaryDirectory()); gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, size_t n_classes = 1); std::unique_ptr CreateTrainedGBM( std::string name, Args kwargs, size_t kRows, size_t kCols, LearnerModelParam const* learner_model_param, GenericParameter const* generic_param); inline GenericParameter CreateEmptyGenericParam(int gpu_id) { xgboost::GenericParameter tparam; std::vector> args { {"gpu_id", std::to_string(gpu_id)}}; tparam.Init(args); return tparam; } inline HostDeviceVector GenerateRandomGradients(const size_t n_rows, float lower= 0.0f, float upper = 1.0f) { xgboost::SimpleLCG gen; xgboost::SimpleRealUniformDistribution dist(lower, upper); std::vector h_gpair(n_rows); for (auto &gpair : h_gpair) { bst_float grad = dist(&gen); bst_float hess = dist(&gen); gpair = GradientPair(grad, hess); } HostDeviceVector gpair(h_gpair); return gpair; } } // namespace xgboost #endif