Extract interaction constraint from split evaluator. (#5034)
* Extract interaction constraints from split evaluator. The reason for doing so is mostly for model IO, where num_feature and interaction_constraints are copied in split evaluator. Also interaction constraint by itself is a feature selector, acting like column sampler and it's inefficient to bury it deep in the evaluator chain. Lastly removing one another copied parameter is a win. * Enable inc for approx tree method. As now the implementation is spited up from evaluator class, it's also enabled for approx method. * Removing obsoleted code in colmaker. They are never documented nor actually used in real world. Also there isn't a single test for those code blocks. * Unifying the types used for row and column. As the size of input dataset is marching to billion, incorrect use of int is subject to overflow, also singed integer overflow is undefined behaviour. This PR starts the procedure for unifying used index type to unsigned integers. There's optimization that can utilize this undefined behaviour, but after some testings I don't see the optimization is beneficial to XGBoost.
This commit is contained in:
@@ -100,17 +100,30 @@
|
||||
|
||||
/*! \brief namespace of xgboost*/
|
||||
namespace xgboost {
|
||||
/*!
|
||||
* \brief unsigned integer type used in boost,
|
||||
* used for feature index and row index.
|
||||
*/
|
||||
|
||||
/*! \brief unsigned integer type used for feature index. */
|
||||
using bst_uint = uint32_t; // NOLINT
|
||||
/*! \brief integer type. */
|
||||
using bst_int = int32_t; // NOLINT
|
||||
/*! \brief long integers */
|
||||
typedef uint64_t bst_ulong; // NOLINT(*)
|
||||
/*! \brief unsigned long integers */
|
||||
using bst_ulong = uint64_t;
|
||||
/*! \brief float type, used for storing statistics */
|
||||
using bst_float = float; // NOLINT
|
||||
|
||||
/*! \brief Type for data column (feature) index. */
|
||||
using bst_feature_t = uint32_t; // NOLINT
|
||||
/*! \breif Type for data row index.
|
||||
*
|
||||
* Be careful `std::size_t' is implementation-defined. Meaning that the binary
|
||||
* representation of DMatrix might not be portable across platform. Booster model should
|
||||
* be portable as parameters are floating points.
|
||||
*/
|
||||
using bst_row_t = std::size_t; // NOLINT
|
||||
/*! \brief Type for tree node index. */
|
||||
using bst_node_t = int32_t; // NOLINT
|
||||
/*! \brief Type for ranking group index. */
|
||||
using bst_group_t = uint32_t; // NOLINT
|
||||
|
||||
namespace detail {
|
||||
/*! \brief Implementation of gradient statistics pair. Template specialisation
|
||||
* may be used to overload different gradients types e.g. low precision, high
|
||||
|
||||
@@ -57,7 +57,7 @@ class MetaInfo {
|
||||
* \brief the index of begin and end of a group
|
||||
* needed when the learning task is ranking.
|
||||
*/
|
||||
std::vector<bst_uint> group_ptr_;
|
||||
std::vector<bst_group_t> group_ptr_;
|
||||
/*! \brief weights of each instance, optional */
|
||||
HostDeviceVector<bst_float> weights_;
|
||||
/*!
|
||||
@@ -136,7 +136,7 @@ class MetaInfo {
|
||||
/*! \brief Element from a sparse vector */
|
||||
struct Entry {
|
||||
/*! \brief feature index */
|
||||
bst_uint index;
|
||||
bst_feature_t index;
|
||||
/*! \brief feature value */
|
||||
bst_float fvalue;
|
||||
/*! \brief default constructor */
|
||||
@@ -146,7 +146,7 @@ struct Entry {
|
||||
* \param index The feature or row index.
|
||||
* \param fvalue The feature value.
|
||||
*/
|
||||
Entry(bst_uint index, bst_float fvalue) : index(index), fvalue(fvalue) {}
|
||||
Entry(bst_feature_t index, bst_float fvalue) : index(index), fvalue(fvalue) {}
|
||||
/*! \brief reversely compare feature values */
|
||||
inline static bool CmpValue(const Entry& a, const Entry& b) {
|
||||
return a.fvalue < b.fvalue;
|
||||
@@ -174,7 +174,7 @@ struct BatchParam {
|
||||
class SparsePage {
|
||||
public:
|
||||
// Offset for each row.
|
||||
HostDeviceVector<size_t> offset;
|
||||
HostDeviceVector<bst_row_t> offset;
|
||||
/*! \brief the data of the segments */
|
||||
HostDeviceVector<Entry> data;
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
namespace xgboost {
|
||||
class TreeUpdater;
|
||||
namespace gbm {
|
||||
class GBTreeModel;
|
||||
struct GBTreeModel;
|
||||
} // namespace gbm
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user