Fix CPU hist init for sparse dataset. (#4625)

* Fix CPU hist init for sparse dataset.

* Implement sparse histogram cut.
* Allow empty features.

* Fix windows build, don't use sparse in distributed environment.

* Comments.

* Smaller threshold.

* Fix windows omp.

* Fix msvc lambda capture.

* Fix MSVC macro.

* Fix MSVC initialization list.

* Fix MSVC initialization list x2.

* Preserve categorical feature behavior.

* Rename matrix to sparse cuts.
* Reuse UseGroup.
* Check for categorical data when adding cut.

Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu>

* Sanity check.

* Fix comments.

* Fix comment.
This commit is contained in:
Jiaming Yuan
2019-07-04 19:27:03 -04:00
committed by Philip Hyunsu Cho
parent b7a1f22d24
commit d9a47794a5
33 changed files with 681 additions and 299 deletions

View File

@@ -60,7 +60,7 @@ class Transform {
Evaluator(Functor func, Range range, GPUSet devices, bool shard) :
func_(func), range_{std::move(range)},
shard_{shard},
distribution_{std::move(GPUDistribution::Block(devices))} {}
distribution_{GPUDistribution::Block(devices)} {}
Evaluator(Functor func, Range range, GPUDistribution dist,
bool shard) :
func_(func), range_{std::move(range)}, shard_{shard},
@@ -142,7 +142,7 @@ class Transform {
Range shard_range {0, static_cast<Range::DifferenceType>(shard_size)};
dh::safe_cuda(cudaSetDevice(device));
const int GRID_SIZE =
static_cast<int>(dh::DivRoundUp(*(range_.end()), kBlockThreads));
static_cast<int>(DivRoundUp(*(range_.end()), kBlockThreads));
detail::LaunchCUDAKernel<<<GRID_SIZE, kBlockThreads>>>(
_func, shard_range, UnpackHDV(_vectors, device)...);
}