Refactor fast-hist, add tests for some updaters. (#3836)

Add unittest for prune.

Add unittest for refresh.

Refactor fast_hist.

* Remove fast_hist_param.
* Rename to quantile_hist.

Add unittests for QuantileHist.

* Refactor QuantileHist into .h and .cc file.
* Remove sync.h.
* Remove MGPU_mock test.

Rename fast hist method to quantile hist.
This commit is contained in:
Jiaming Yuan
2018-11-07 21:15:07 +13:00
committed by GitHub
parent 2b045aa805
commit 19ee0a3579
30 changed files with 1366 additions and 983 deletions

View File

@@ -4,10 +4,11 @@
* \brief Utilities to store histograms
* \author Philip Cho, Tianqi Chen
*/
#include <rabit/rabit.h>
#include <dmlc/omp.h>
#include <numeric>
#include <vector>
#include "./sync.h"
#include "./random.h"
#include "./column_matrix.h"
#include "./hist_util.h"
@@ -216,7 +217,7 @@ FindGroups(const std::vector<unsigned>& feature_list,
const std::vector<size_t>& feature_nnz,
const ColumnMatrix& colmat,
size_t nrow,
const FastHistParam& param) {
const tree::TrainParam& param) {
/* Goal: Bundle features together that has little or no "overlap", i.e.
only a few data points should have nonzero values for
member features.
@@ -278,7 +279,7 @@ FindGroups(const std::vector<unsigned>& feature_list,
inline std::vector<std::vector<unsigned>>
FastFeatureGrouping(const GHistIndexMatrix& gmat,
const ColumnMatrix& colmat,
const FastHistParam& param) {
const tree::TrainParam& param) {
const size_t nrow = gmat.row_ptr.size() - 1;
const size_t nfeature = gmat.cut.row_ptr.size() - 1;
@@ -332,7 +333,7 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat,
void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
const ColumnMatrix& colmat,
const FastHistParam& param) {
const tree::TrainParam& param) {
cut_ = &gmat.cut;
const size_t nrow = gmat.row_ptr.size() - 1;

View File

@@ -11,7 +11,6 @@
#include <limits>
#include <vector>
#include "row_set.h"
#include "../tree/fast_hist_param.h"
#include "../tree/param.h"
#include "./quantile.h"
@@ -19,8 +18,6 @@ namespace xgboost {
namespace common {
using tree::FastHistParam;
/*! \brief sums of gradient statistics corresponding to a histogram bin */
struct GHistEntry {
/*! \brief sum of first-order gradient statistics */
@@ -145,7 +142,7 @@ class GHistIndexBlockMatrix {
public:
void Init(const GHistIndexMatrix& gmat,
const ColumnMatrix& colmat,
const FastHistParam& param);
const tree::TrainParam& param);
inline GHistIndexBlock operator[](size_t i) const {
return {blocks_[i].row_ptr_begin, blocks_[i].index_begin};

View File

@@ -9,9 +9,9 @@
#define XGBOOST_COMMON_IO_H_
#include <dmlc/io.h>
#include <rabit/rabit.h>
#include <string>
#include <cstring>
#include "./sync.h"
namespace xgboost {
namespace common {

View File

@@ -1,13 +0,0 @@
/*!
* Copyright 2014 by Contributors
* \file sync.h
* \brief the synchronization module of rabit
* redirects to rabit header
* \author Tianqi Chen
*/
#ifndef XGBOOST_COMMON_SYNC_H_
#define XGBOOST_COMMON_SYNC_H_
#include <rabit/rabit.h>
#endif // XGBOOST_COMMON_SYNC_H_