Fix CPU hist init for sparse dataset. (#4625)

* Fix CPU hist init for sparse dataset. * Implement sparse histogram cut. * Allow empty features. * Fix windows build, don't use sparse in distributed environment. * Comments. * Smaller threshold. * Fix windows omp. * Fix msvc lambda capture. * Fix MSVC macro. * Fix MSVC initialization list. * Fix MSVC initialization list x2. * Preserve categorical feature behavior. * Rename matrix to sparse cuts. * Reuse UseGroup. * Check for categorical data when adding cut. Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu> * Sanity check. * Fix comments. * Fix comment.
2019-07-04 19:27:03 -04:00
parent b7a1f22d24
commit d9a47794a5
33 changed files with 681 additions and 299 deletions
--- a/tests/cpp/common/test_gpu_hist_util.cu
+++ b/tests/cpp/common/test_gpu_hist_util.cu
@@ -48,11 +48,11 @@ void TestDeviceSketch(const GPUSet& devices, bool use_external_memory) {
  int gpu_batch_nrows = 0;

  // find quantiles on the CPU
-  HistCutMatrix hmat_cpu;
-  hmat_cpu.Init((*dmat).get(), p.max_bin);
+  HistogramCuts hmat_cpu;
+  hmat_cpu.Build((*dmat).get(), p.max_bin);

  // find the cuts on the GPU
-  HistCutMatrix hmat_gpu;
+  HistogramCuts hmat_gpu;
  size_t row_stride = DeviceSketch(p, CreateEmptyGenericParam(0, devices.Size()), gpu_batch_nrows,
                                   dmat->get(), &hmat_gpu);

@@ -69,12 +69,12 @@ void TestDeviceSketch(const GPUSet& devices, bool use_external_memory) {

  // compare the cuts
  double eps = 1e-2;
-  ASSERT_EQ(hmat_gpu.min_val.size(), num_cols);
-  ASSERT_EQ(hmat_gpu.row_ptr.size(), num_cols + 1);
-  ASSERT_EQ(hmat_gpu.cut.size(), hmat_cpu.cut.size());
-  ASSERT_LT(fabs(hmat_cpu.min_val[0] - hmat_gpu.min_val[0]), eps * nrows);
-  for (int i = 0; i < hmat_gpu.cut.size(); ++i) {
-    ASSERT_LT(fabs(hmat_cpu.cut[i] - hmat_gpu.cut[i]), eps * nrows);
+  ASSERT_EQ(hmat_gpu.MinValues().size(), num_cols);
+  ASSERT_EQ(hmat_gpu.Ptrs().size(), num_cols + 1);
+  ASSERT_EQ(hmat_gpu.Values().size(), hmat_cpu.Values().size());
+  ASSERT_LT(fabs(hmat_cpu.MinValues()[0] - hmat_gpu.MinValues()[0]), eps * nrows);
+  for (int i = 0; i < hmat_gpu.Values().size(); ++i) {
+    ASSERT_LT(fabs(hmat_cpu.Values()[i] - hmat_gpu.Values()[i]), eps * nrows);
  }

  delete dmat;