Extract interaction constraint from split evaluator. (#5034)

* Extract interaction constraints from split evaluator. The reason for doing so is mostly for model IO, where num_feature and interaction_constraints are copied in split evaluator. Also interaction constraint by itself is a feature selector, acting like column sampler and it's inefficient to bury it deep in the evaluator chain. Lastly removing one another copied parameter is a win. * Enable inc for approx tree method. As now the implementation is spited up from evaluator class, it's also enabled for approx method. * Removing obsoleted code in colmaker. They are never documented nor actually used in real world. Also there isn't a single test for those code blocks. * Unifying the types used for row and column. As the size of input dataset is marching to billion, incorrect use of int is subject to overflow, also singed integer overflow is undefined behaviour. This PR starts the procedure for unifying used index type to unsigned integers. There's optimization that can utilize this undefined behaviour, but after some testings I don't see the optimization is beneficial to XGBoost.
2019-11-14 20:11:41 +08:00
parent 886bf93ba4
commit 97abcc7ee2
45 changed files with 688 additions and 652 deletions
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -229,7 +229,7 @@ DMatrix* DMatrix::Load(const std::string& uri,

  std::unique_ptr<dmlc::Parser<uint32_t> > parser(
      dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
-  DMatrix* dmat;
+  DMatrix* dmat {nullptr};

  try {
    dmat = DMatrix::Create(parser.get(), cache_file, page_size);
@@ -253,9 +253,8 @@ DMatrix* DMatrix::Load(const std::string& uri,
            << "Choosing default parser in dmlc-core.  "
            << "Consider providing a uri parameter like: filename?format=csv";
      }
-
-      LOG(FATAL) << "Encountered parser error:\n" << e.what();
    }
+    LOG(FATAL) << "Encountered parser error:\n" << e.what();
  }

  if (!silent) {
@@ -361,8 +360,8 @@ DMatrix* DMatrix::Create(std::unique_ptr<DataSource<SparsePage>>&& source,
 namespace xgboost {
 SparsePage SparsePage::GetTranspose(int num_columns) const {
  SparsePage transpose;
-  common::ParallelGroupBuilder<Entry> builder(&transpose.offset.HostVector(),
-                                              &transpose.data.HostVector());
+  common::ParallelGroupBuilder<Entry, bst_row_t> builder(&transpose.offset.HostVector(),
+                                                         &transpose.data.HostVector());
  const int nthread = omp_get_max_threads();
  builder.InitBudget(num_columns, nthread);
  long batch_size = static_cast<long>(this->Size());  // NOLINT(*)
@@ -424,7 +423,7 @@ void SparsePage::Push(const dmlc::RowBlock<uint32_t>& batch) {

 void SparsePage::PushCSC(const SparsePage &batch) {
  std::vector<xgboost::Entry>& self_data = data.HostVector();
-  std::vector<size_t>& self_offset = offset.HostVector();
+  std::vector<bst_row_t>& self_offset = offset.HostVector();

  auto const& other_data = batch.data.ConstHostVector();
  auto const& other_offset = batch.offset.ConstHostVector();
@@ -442,7 +441,7 @@ void SparsePage::PushCSC(const SparsePage &batch) {
    return;
  }

-  std::vector<size_t> offset(other_offset.size());
+  std::vector<bst_row_t> offset(other_offset.size());
  offset[0] = 0;

  std::vector<xgboost::Entry> data(self_data.size() + other_data.size());
--- a/src/data/simple_csr_source.cu
+++ b/src/data/simple_csr_source.cu
@@ -29,7 +29,7 @@ namespace data {
 template <typename T>
 __global__ void CountValidKernel(Columnar<T> const column,
                                 bool has_missing, float missing,
-                                 int32_t* flag, common::Span<size_t> offsets) {
+                                 int32_t* flag, common::Span<bst_row_t> offsets) {
  auto const tid =  threadIdx.x + blockDim.x * blockIdx.x;
  bool const missing_is_nan = common::CheckNAN(missing);

@@ -59,7 +59,7 @@ __global__ void CountValidKernel(Columnar<T> const column,

 template <typename T>
 __device__ void AssignValue(T fvalue, int32_t colid,
-                            common::Span<size_t> out_offsets, common::Span<Entry> out_data) {
+                            common::Span<bst_row_t> out_offsets, common::Span<Entry> out_data) {
  auto const tid = threadIdx.x + blockDim.x * blockIdx.x;
  int32_t oid = out_offsets[tid];
  out_data[oid].fvalue = fvalue;
@@ -70,7 +70,7 @@ __device__ void AssignValue(T fvalue, int32_t colid,
 template <typename T>
 __global__ void CreateCSRKernel(Columnar<T> const column,
                                int32_t colid, bool has_missing, float missing,
-                                common::Span<size_t> offsets, common::Span<Entry> out_data) {
+                                common::Span<bst_row_t> offsets, common::Span<Entry> out_data) {
  auto const tid = threadIdx.x + blockDim.x * blockIdx.x;
  if (column.size <= tid) {
    return;
@@ -98,7 +98,7 @@ __global__ void CreateCSRKernel(Columnar<T> const column,
 template <typename T>
 void CountValid(std::vector<Json> const& j_columns, uint32_t column_id,
                bool has_missing, float missing,
-                HostDeviceVector<size_t>* out_offset,
+                HostDeviceVector<bst_row_t>* out_offset,
                dh::caching_device_vector<int32_t>* out_d_flag,
                uint32_t* out_n_rows) {
  uint32_t constexpr kThreads = 256;
@@ -121,7 +121,7 @@ void CountValid(std::vector<Json> const& j_columns, uint32_t column_id,
  CHECK_EQ(out_offset->Size(), n_rows + 1)
      << "All columns should have same number of rows.";

-  common::Span<size_t> s_offsets = out_offset->DeviceSpan();
+  common::Span<bst_row_t> s_offsets = out_offset->DeviceSpan();

  uint32_t const kBlocks = common::DivRoundUp(n_rows, kThreads);
  dh::LaunchKernel {kBlocks, kThreads} (
@@ -135,7 +135,7 @@ void CountValid(std::vector<Json> const& j_columns, uint32_t column_id,
 template <typename T>
 void CreateCSR(std::vector<Json> const& j_columns, uint32_t column_id, uint32_t n_rows,
               bool has_missing, float missing,
-               dh::device_vector<size_t>* tmp_offset, common::Span<Entry> s_data) {
+               dh::device_vector<bst_row_t>* tmp_offset, common::Span<Entry> s_data) {
  uint32_t constexpr kThreads = 256;
  auto const& j_column = j_columns[column_id];
  auto const& column_obj = get<Object const>(j_column);
@@ -174,13 +174,13 @@ void SimpleCSRSource::FromDeviceColumnar(std::vector<Json> const& columns,
  info.num_row_ = n_rows;

  auto s_offsets = this->page_.offset.DeviceSpan();
-  thrust::device_ptr<size_t> p_offsets(s_offsets.data());
+  thrust::device_ptr<bst_row_t> p_offsets(s_offsets.data());
  CHECK_GE(s_offsets.size(), n_rows + 1);

  thrust::inclusive_scan(p_offsets, p_offsets + n_rows + 1, p_offsets);
  // Created for building csr matrix, where we need to change index after processing each
  // column.
-  dh::device_vector<size_t> tmp_offset(this->page_.offset.Size());
+  dh::device_vector<bst_row_t> tmp_offset(this->page_.offset.Size());
  dh::safe_cuda(cudaMemcpy(tmp_offset.data().get(), s_offsets.data(),
                           s_offsets.size_bytes(), cudaMemcpyDeviceToDevice));