[GPU-Plugin] Fix gpu_hist to allow matrices with more than just 2^{32} elements. Also fixed CPU hist algorithm. (#2518)
This commit is contained in:
parent
c85bf9859e
commit
ca7fc9fda3
@ -441,7 +441,7 @@ class bulk_allocator {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
~bulk_allocator() {
|
~bulk_allocator() {
|
||||||
for (int i = 0; i < d_ptr.size(); i++) {
|
for (size_t i = 0; i < d_ptr.size(); i++) {
|
||||||
if (!(d_ptr[i] == nullptr)) {
|
if (!(d_ptr[i] == nullptr)) {
|
||||||
safe_cuda(cudaSetDevice(_device_idx[i]));
|
safe_cuda(cudaSetDevice(_device_idx[i]));
|
||||||
safe_cuda(cudaFree(d_ptr[i]));
|
safe_cuda(cudaFree(d_ptr[i]));
|
||||||
@ -522,7 +522,7 @@ inline size_t available_memory(int device_idx) {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void print(const thrust::device_vector<T> &v, size_t max_items = 10) {
|
void print(const thrust::device_vector<T> &v, size_t max_items = 10) {
|
||||||
thrust::host_vector<T> h = v;
|
thrust::host_vector<T> h = v;
|
||||||
for (int i = 0; i < std::min(max_items, h.size()); i++) {
|
for (size_t i = 0; i < std::min(max_items, h.size()); i++) {
|
||||||
std::cout << " " << h[i];
|
std::cout << " " << h[i];
|
||||||
}
|
}
|
||||||
std::cout << "\n";
|
std::cout << "\n";
|
||||||
@ -531,7 +531,7 @@ void print(const thrust::device_vector<T> &v, size_t max_items = 10) {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void print(const dvec<T> &v, size_t max_items = 10) {
|
void print(const dvec<T> &v, size_t max_items = 10) {
|
||||||
std::vector<T> h = v.as_vector();
|
std::vector<T> h = v.as_vector();
|
||||||
for (int i = 0; i < std::min(max_items, h.size()); i++) {
|
for (size_t i = 0; i < std::min(max_items, h.size()); i++) {
|
||||||
std::cout << " " << h[i];
|
std::cout << " " << h[i];
|
||||||
}
|
}
|
||||||
std::cout << "\n";
|
std::cout << "\n";
|
||||||
@ -539,10 +539,10 @@ void print(const dvec<T> &v, size_t max_items = 10) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void print(char *label, const thrust::device_vector<T> &v,
|
void print(char *label, const thrust::device_vector<T> &v,
|
||||||
const char *format = "%d ", int max = 10) {
|
const char *format = "%d ", size_t max = 10) {
|
||||||
thrust::host_vector<T> h_v = v;
|
thrust::host_vector<T> h_v = v;
|
||||||
std::cout << label << ":\n";
|
std::cout << label << ":\n";
|
||||||
for (int i = 0; i < std::min(static_cast<int>(h_v.size()), max); i++) {
|
for (size_t i = 0; i < std::min(static_cast<size_t>(h_v.size()), max); i++) {
|
||||||
printf(format, h_v[i]);
|
printf(format, h_v[i]);
|
||||||
}
|
}
|
||||||
std::cout << "\n";
|
std::cout << "\n";
|
||||||
@ -593,6 +593,7 @@ __global__ void launch_n_kernel(int device_idx, size_t begin, size_t end,
|
|||||||
template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>
|
template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>
|
||||||
inline void launch_n(int device_idx, size_t n, L lambda) {
|
inline void launch_n(int device_idx, size_t n, L lambda) {
|
||||||
safe_cuda(cudaSetDevice(device_idx));
|
safe_cuda(cudaSetDevice(device_idx));
|
||||||
|
// TODO: Template on n so GRID_SIZE always fits into int.
|
||||||
const int GRID_SIZE = div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS);
|
const int GRID_SIZE = div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS);
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
launch_n_kernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n,
|
launch_n_kernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n,
|
||||||
@ -607,6 +608,7 @@ inline void multi_launch_n(size_t n, int n_devices, L lambda) {
|
|||||||
CHECK_LE(n_devices, n_visible_devices()) << "Number of devices requested "
|
CHECK_LE(n_devices, n_visible_devices()) << "Number of devices requested "
|
||||||
"needs to be less than equal to "
|
"needs to be less than equal to "
|
||||||
"number of visible devices.";
|
"number of visible devices.";
|
||||||
|
// TODO: Template on n so GRID_SIZE always fits into int.
|
||||||
const int GRID_SIZE = div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS);
|
const int GRID_SIZE = div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS);
|
||||||
#if defined(__CUDACC__)
|
#if defined(__CUDACC__)
|
||||||
n_devices = n_devices > n ? n : n_devices;
|
n_devices = n_devices > n ? n : n_devices;
|
||||||
|
|||||||
@ -19,8 +19,8 @@ namespace xgboost {
|
|||||||
namespace tree {
|
namespace tree {
|
||||||
|
|
||||||
void DeviceGMat::Init(int device_idx, const common::GHistIndexMatrix& gmat,
|
void DeviceGMat::Init(int device_idx, const common::GHistIndexMatrix& gmat,
|
||||||
bst_uint element_begin, bst_uint element_end,
|
bst_ulong element_begin, bst_ulong element_end,
|
||||||
bst_uint row_begin, bst_uint row_end, int n_bins) {
|
bst_ulong row_begin, bst_ulong row_end, int n_bins) {
|
||||||
dh::safe_cuda(cudaSetDevice(device_idx));
|
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||||
CHECK(gidx_buffer.size()) << "gidx_buffer must be externally allocated";
|
CHECK(gidx_buffer.size()) << "gidx_buffer must be externally allocated";
|
||||||
CHECK_EQ(row_ptr.size(), (row_end - row_begin) + 1)
|
CHECK_EQ(row_ptr.size(), (row_end - row_begin) + 1)
|
||||||
@ -31,15 +31,15 @@ void DeviceGMat::Init(int device_idx, const common::GHistIndexMatrix& gmat,
|
|||||||
cbw.Write(host_buffer.data(), gmat.index.begin() + element_begin,
|
cbw.Write(host_buffer.data(), gmat.index.begin() + element_begin,
|
||||||
gmat.index.begin() + element_end);
|
gmat.index.begin() + element_end);
|
||||||
gidx_buffer = host_buffer;
|
gidx_buffer = host_buffer;
|
||||||
gidx = common::CompressedIterator<int>(gidx_buffer.data(), n_bins);
|
gidx = common::CompressedIterator<uint32_t>(gidx_buffer.data(), n_bins);
|
||||||
|
|
||||||
// row_ptr
|
// row_ptr
|
||||||
thrust::copy(gmat.row_ptr.data() + row_begin,
|
thrust::copy(gmat.row_ptr.data() + row_begin,
|
||||||
gmat.row_ptr.data() + row_end + 1, row_ptr.tbegin());
|
gmat.row_ptr.data() + row_end + 1, row_ptr.tbegin());
|
||||||
// normalise row_ptr
|
// normalise row_ptr
|
||||||
bst_uint start = gmat.row_ptr[row_begin];
|
size_t start = gmat.row_ptr[row_begin];
|
||||||
thrust::transform(row_ptr.tbegin(), row_ptr.tend(), row_ptr.tbegin(),
|
thrust::transform(row_ptr.tbegin(), row_ptr.tend(), row_ptr.tbegin(),
|
||||||
[=] __device__(int val) { return val - start; });
|
[=] __device__(size_t val) { return val - start; });
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeviceHist::Init(int n_bins_in) {
|
void DeviceHist::Init(int n_bins_in) {
|
||||||
@ -193,7 +193,7 @@ void GPUHistBuilder::InitData(const std::vector<bst_gpair>& gpair,
|
|||||||
device_row_segments.push_back(0);
|
device_row_segments.push_back(0);
|
||||||
device_element_segments.push_back(0);
|
device_element_segments.push_back(0);
|
||||||
bst_uint offset = 0;
|
bst_uint offset = 0;
|
||||||
size_t shard_size = std::ceil(static_cast<double>(num_rows) / n_devices);
|
bst_uint shard_size = std::ceil(static_cast<double>(num_rows) / n_devices);
|
||||||
for (int d_idx = 0; d_idx < n_devices; d_idx++) {
|
for (int d_idx = 0; d_idx < n_devices; d_idx++) {
|
||||||
int device_idx = dList[d_idx];
|
int device_idx = dList[d_idx];
|
||||||
offset += shard_size;
|
offset += shard_size;
|
||||||
@ -261,7 +261,7 @@ void GPUHistBuilder::InitData(const std::vector<bst_gpair>& gpair,
|
|||||||
int device_idx = dList[d_idx];
|
int device_idx = dList[d_idx];
|
||||||
bst_uint num_rows_segment =
|
bst_uint num_rows_segment =
|
||||||
device_row_segments[d_idx + 1] - device_row_segments[d_idx];
|
device_row_segments[d_idx + 1] - device_row_segments[d_idx];
|
||||||
bst_uint num_elements_segment =
|
bst_ulong num_elements_segment =
|
||||||
device_element_segments[d_idx + 1] - device_element_segments[d_idx];
|
device_element_segments[d_idx + 1] - device_element_segments[d_idx];
|
||||||
ba.allocate(
|
ba.allocate(
|
||||||
device_idx, &(hist_vec[d_idx].data),
|
device_idx, &(hist_vec[d_idx].data),
|
||||||
@ -360,7 +360,7 @@ void GPUHistBuilder::BuildHist(int depth) {
|
|||||||
auto hist_builder = hist_vec[d_idx].GetBuilder();
|
auto hist_builder = hist_vec[d_idx].GetBuilder();
|
||||||
dh::TransformLbs(
|
dh::TransformLbs(
|
||||||
device_idx, &temp_memory[d_idx], end - begin, d_row_ptr,
|
device_idx, &temp_memory[d_idx], end - begin, d_row_ptr,
|
||||||
row_end - row_begin, [=] __device__(int local_idx, int local_ridx) {
|
row_end - row_begin, [=] __device__(size_t local_idx, int local_ridx) {
|
||||||
int nidx = d_position[local_ridx]; // OPTMARK: latency
|
int nidx = d_position[local_ridx]; // OPTMARK: latency
|
||||||
if (!is_active(nidx, depth)) return;
|
if (!is_active(nidx, depth)) return;
|
||||||
|
|
||||||
@ -606,7 +606,11 @@ __global__ void find_split_kernel(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define MIN_BLOCK_THREADS 32
|
#define MIN_BLOCK_THREADS 32
|
||||||
#define MAX_BLOCK_THREADS 1024 // hard-coded maximum block size
|
#define CHUNK_BLOCK_THREADS 32
|
||||||
|
// MAX_BLOCK_THREADS of 1024 is hard-coded maximum block size due
|
||||||
|
// to CUDA compatibility 35 and above requirement
|
||||||
|
// for Maximum number of threads per block
|
||||||
|
#define MAX_BLOCK_THREADS 1024
|
||||||
|
|
||||||
void GPUHistBuilder::FindSplit(int depth) {
|
void GPUHistBuilder::FindSplit(int depth) {
|
||||||
// Specialised based on max_bins
|
// Specialised based on max_bins
|
||||||
@ -622,7 +626,7 @@ void GPUHistBuilder::FindSplitSpecialize(int depth) {
|
|||||||
if (param.max_bin <= BLOCK_THREADS) {
|
if (param.max_bin <= BLOCK_THREADS) {
|
||||||
LaunchFindSplit<BLOCK_THREADS>(depth);
|
LaunchFindSplit<BLOCK_THREADS>(depth);
|
||||||
} else {
|
} else {
|
||||||
this->FindSplitSpecialize<BLOCK_THREADS + 32>(depth);
|
this->FindSplitSpecialize<BLOCK_THREADS + CHUNK_BLOCK_THREADS>(depth);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -885,7 +889,7 @@ void GPUHistBuilder::UpdatePositionDense(int depth) {
|
|||||||
size_t begin = device_row_segments[d_idx];
|
size_t begin = device_row_segments[d_idx];
|
||||||
size_t end = device_row_segments[d_idx + 1];
|
size_t end = device_row_segments[d_idx + 1];
|
||||||
|
|
||||||
dh::launch_n(device_idx, end - begin, [=] __device__(int local_idx) {
|
dh::launch_n(device_idx, end - begin, [=] __device__(size_t local_idx) {
|
||||||
int pos = d_position[local_idx];
|
int pos = d_position[local_idx];
|
||||||
if (!is_active(pos, depth)) {
|
if (!is_active(pos, depth)) {
|
||||||
return;
|
return;
|
||||||
@ -896,7 +900,8 @@ void GPUHistBuilder::UpdatePositionDense(int depth) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int gidx = d_gidx[local_idx * n_columns + node.split.findex];
|
int gidx = d_gidx[local_idx *
|
||||||
|
static_cast<size_t>(n_columns) + static_cast<size_t>(node.split.findex)];
|
||||||
|
|
||||||
float fvalue = d_gidx_fvalue_map[gidx];
|
float fvalue = d_gidx_fvalue_map[gidx];
|
||||||
|
|
||||||
@ -955,7 +960,7 @@ void GPUHistBuilder::UpdatePositionSparse(int depth) {
|
|||||||
|
|
||||||
dh::TransformLbs(
|
dh::TransformLbs(
|
||||||
device_idx, &temp_memory[d_idx], element_end - element_begin, d_row_ptr,
|
device_idx, &temp_memory[d_idx], element_end - element_begin, d_row_ptr,
|
||||||
row_end - row_begin, [=] __device__(int local_idx, int local_ridx) {
|
row_end - row_begin, [=] __device__(size_t local_idx, int local_ridx) {
|
||||||
int pos = d_position[local_ridx];
|
int pos = d_position[local_ridx];
|
||||||
if (!is_active(pos, depth)) {
|
if (!is_active(pos, depth)) {
|
||||||
return;
|
return;
|
||||||
@ -1065,25 +1070,13 @@ void GPUHistBuilder::Update(const std::vector<bst_gpair>& gpair,
|
|||||||
this->InitData(gpair, *p_fmat, *p_tree);
|
this->InitData(gpair, *p_fmat, *p_tree);
|
||||||
this->InitFirstNode(gpair);
|
this->InitFirstNode(gpair);
|
||||||
this->ColSampleTree();
|
this->ColSampleTree();
|
||||||
// long long int elapsed=0;
|
|
||||||
for (int depth = 0; depth < param.max_depth; depth++) {
|
for (int depth = 0; depth < param.max_depth; depth++) {
|
||||||
this->ColSampleLevel();
|
this->ColSampleLevel();
|
||||||
|
|
||||||
// dh::Timer time;
|
|
||||||
this->BuildHist(depth);
|
this->BuildHist(depth);
|
||||||
// elapsed+=time.elapsed();
|
|
||||||
// printf("depth=%d\n",depth);
|
|
||||||
// time.printElapsed("BH Time");
|
|
||||||
|
|
||||||
// dh::Timer timesplit;
|
|
||||||
this->FindSplit(depth);
|
this->FindSplit(depth);
|
||||||
// timesplit.printElapsed("FS Time");
|
|
||||||
|
|
||||||
// dh::Timer timeupdatepos;
|
|
||||||
this->UpdatePosition(depth);
|
this->UpdatePosition(depth);
|
||||||
// timeupdatepos.printElapsed("UP Time");
|
|
||||||
}
|
}
|
||||||
// printf("Total BuildHist Time=%lld\n",elapsed);
|
|
||||||
|
|
||||||
// done with multi-GPU, pass back result from master to tree on host
|
// done with multi-GPU, pass back result from master to tree on host
|
||||||
int master_device = dList[0];
|
int master_device = dList[0];
|
||||||
|
|||||||
@ -18,10 +18,10 @@ namespace tree {
|
|||||||
|
|
||||||
struct DeviceGMat {
|
struct DeviceGMat {
|
||||||
dh::dvec<common::compressed_byte_t> gidx_buffer;
|
dh::dvec<common::compressed_byte_t> gidx_buffer;
|
||||||
common::CompressedIterator<int > gidx;
|
common::CompressedIterator<uint32_t> gidx;
|
||||||
dh::dvec<int> row_ptr;
|
dh::dvec<size_t> row_ptr;
|
||||||
void Init(int device_idx, const common::GHistIndexMatrix &gmat,
|
void Init(int device_idx, const common::GHistIndexMatrix &gmat,
|
||||||
bst_uint begin, bst_uint end, bst_uint row_begin, bst_uint row_end,int n_bins);
|
bst_ulong element_begin, bst_ulong element_end, bst_ulong row_begin, bst_ulong row_end,int n_bins);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HistBuilder {
|
struct HistBuilder {
|
||||||
@ -99,7 +99,7 @@ class GPUHistBuilder {
|
|||||||
// below vectors are for each devices used
|
// below vectors are for each devices used
|
||||||
std::vector<int> dList;
|
std::vector<int> dList;
|
||||||
std::vector<int> device_row_segments;
|
std::vector<int> device_row_segments;
|
||||||
std::vector<int> device_element_segments;
|
std::vector<size_t> device_element_segments;
|
||||||
|
|
||||||
std::vector<dh::CubMemory> temp_memory;
|
std::vector<dh::CubMemory> temp_memory;
|
||||||
std::vector<DeviceHist> hist_vec;
|
std::vector<DeviceHist> hist_vec;
|
||||||
|
|||||||
134
plugin/updater_gpu/test/python/test_large.py
Normal file
134
plugin/updater_gpu/test/python/test_large.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
from __future__ import print_function
|
||||||
|
#pylint: skip-file
|
||||||
|
import sys
|
||||||
|
sys.path.append("../../tests/python")
|
||||||
|
import xgboost as xgb
|
||||||
|
import testing as tm
|
||||||
|
import numpy as np
|
||||||
|
import unittest
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
|
def eprint(*args, **kwargs):
|
||||||
|
print(*args, file=sys.stderr, **kwargs) ; sys.stderr.flush()
|
||||||
|
print(*args, file=sys.stdout, **kwargs) ; sys.stdout.flush()
|
||||||
|
|
||||||
|
eprint("Testing Big Data (this may take a while)")
|
||||||
|
|
||||||
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
|
# "realistic" size based upon http://stat-computing.org/dataexpo/2009/ , which has been processed to one-hot encode categoricalsxsy
|
||||||
|
cols = 31
|
||||||
|
# reduced to fit onto 1 gpu but still be large
|
||||||
|
rows2 = 5000 # medium
|
||||||
|
#rows2 = 4032 # fake large for testing
|
||||||
|
rows1 = 42360032 # large
|
||||||
|
#rows2 = 152360032 # can do this for multi-gpu test (very large)
|
||||||
|
rowslist = [rows1, rows2]
|
||||||
|
|
||||||
|
|
||||||
|
class TestGPU(unittest.TestCase):
|
||||||
|
def test_large(self):
|
||||||
|
eprint("Starting test for large data")
|
||||||
|
tm._skip_if_no_sklearn()
|
||||||
|
from sklearn.datasets import load_digits
|
||||||
|
try:
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
except:
|
||||||
|
from sklearn.cross_validation import train_test_split
|
||||||
|
|
||||||
|
|
||||||
|
for rows in rowslist:
|
||||||
|
|
||||||
|
eprint("Creating train data rows=%d cols=%d" % (rows,cols))
|
||||||
|
X, y = make_classification(rows, n_features=cols, random_state=7)
|
||||||
|
rowstest = int(rows*0.2)
|
||||||
|
eprint("Creating test data rows=%d cols=%d" % (rowstest,cols))
|
||||||
|
# note the new random state. if chose same as train random state, exact methods can memorize and do very well on test even for random data, while hist cannot
|
||||||
|
Xtest, ytest = make_classification(rowstest, n_features=cols, random_state=8)
|
||||||
|
|
||||||
|
eprint("Starting DMatrix(X,y)")
|
||||||
|
ag_dtrain = xgb.DMatrix(X,y)
|
||||||
|
eprint("Starting DMatrix(Xtest,ytest)")
|
||||||
|
ag_dtest = xgb.DMatrix(Xtest,ytest)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
max_depth=6
|
||||||
|
max_bin=1024
|
||||||
|
|
||||||
|
# regression test --- hist must be same as exact on all-categorial data
|
||||||
|
ag_param = {'max_depth': max_depth,
|
||||||
|
'tree_method': 'exact',
|
||||||
|
#'nthread': 1,
|
||||||
|
'eta': 1,
|
||||||
|
'silent': 0,
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'auc'}
|
||||||
|
ag_paramb = {'max_depth': max_depth,
|
||||||
|
'tree_method': 'hist',
|
||||||
|
#'nthread': 1,
|
||||||
|
'eta': 1,
|
||||||
|
'silent': 0,
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'eval_metric': 'auc'}
|
||||||
|
ag_param2 = {'max_depth': max_depth,
|
||||||
|
'tree_method': 'gpu_hist',
|
||||||
|
'eta': 1,
|
||||||
|
'silent': 0,
|
||||||
|
'n_gpus': 1,
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'max_bin': max_bin,
|
||||||
|
'eval_metric': 'auc'}
|
||||||
|
ag_param3 = {'max_depth': max_depth,
|
||||||
|
'tree_method': 'gpu_hist',
|
||||||
|
'eta': 1,
|
||||||
|
'silent': 0,
|
||||||
|
'n_gpus': -1,
|
||||||
|
'objective': 'binary:logistic',
|
||||||
|
'max_bin': max_bin,
|
||||||
|
'eval_metric': 'auc'}
|
||||||
|
#ag_param4 = {'max_depth': max_depth,
|
||||||
|
# 'tree_method': 'gpu_exact',
|
||||||
|
# 'eta': 1,
|
||||||
|
# 'silent': 0,
|
||||||
|
# 'n_gpus': 1,
|
||||||
|
# 'objective': 'binary:logistic',
|
||||||
|
# 'max_bin': max_bin,
|
||||||
|
# 'eval_metric': 'auc'}
|
||||||
|
ag_res = {}
|
||||||
|
ag_resb = {}
|
||||||
|
ag_res2 = {}
|
||||||
|
ag_res3 = {}
|
||||||
|
#ag_res4 = {}
|
||||||
|
|
||||||
|
num_rounds = 1
|
||||||
|
|
||||||
|
eprint("normal updater")
|
||||||
|
xgb.train(ag_param, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||||
|
evals_result=ag_res)
|
||||||
|
eprint("hist updater")
|
||||||
|
xgb.train(ag_paramb, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||||
|
evals_result=ag_resb)
|
||||||
|
eprint("gpu_hist updater 1 gpu")
|
||||||
|
xgb.train(ag_param2, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||||
|
evals_result=ag_res2)
|
||||||
|
eprint("gpu_hist updater all gpus")
|
||||||
|
xgb.train(ag_param3, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||||
|
evals_result=ag_res3)
|
||||||
|
#eprint("gpu_exact updater")
|
||||||
|
#xgb.train(ag_param4, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||||
|
# evals_result=ag_res4)
|
||||||
|
|
||||||
|
assert np.fabs(ag_res['train']['auc'][0] - ag_resb['train']['auc'][0])<0.001
|
||||||
|
assert np.fabs(ag_res['train']['auc'][0] - ag_res2['train']['auc'][0])<0.001
|
||||||
|
assert np.fabs(ag_res['train']['auc'][0] - ag_res3['train']['auc'][0])<0.001
|
||||||
|
#assert np.fabs(ag_res['train']['auc'][0] - ag_res4['train']['auc'][0])<0.001
|
||||||
|
|
||||||
|
assert np.fabs(ag_res['test']['auc'][0] - ag_resb['test']['auc'][0])<0.01
|
||||||
|
assert np.fabs(ag_res['test']['auc'][0] - ag_res2['test']['auc'][0])<0.01
|
||||||
|
assert np.fabs(ag_res['test']['auc'][0] - ag_res3['test']['auc'][0])<0.01
|
||||||
|
#assert np.fabs(ag_res['test']['auc'][0] - ag_res4['test']['auc'][0])<0.01
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -57,7 +57,7 @@ class Column {
|
|||||||
ColumnType type;
|
ColumnType type;
|
||||||
const T* index;
|
const T* index;
|
||||||
uint32_t index_base;
|
uint32_t index_base;
|
||||||
const uint32_t* row_ind;
|
const size_t* row_ind;
|
||||||
size_t len;
|
size_t len;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -66,8 +66,8 @@ class Column {
|
|||||||
class ColumnMatrix {
|
class ColumnMatrix {
|
||||||
public:
|
public:
|
||||||
// get number of features
|
// get number of features
|
||||||
inline uint32_t GetNumFeature() const {
|
inline bst_uint GetNumFeature() const {
|
||||||
return type_.size();
|
return static_cast<bst_uint>(type_.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// construct column matrix from GHistIndexMatrix
|
// construct column matrix from GHistIndexMatrix
|
||||||
@ -78,8 +78,8 @@ class ColumnMatrix {
|
|||||||
slot of internal buffer. */
|
slot of internal buffer. */
|
||||||
packing_factor_ = sizeof(uint32_t) / static_cast<size_t>(this->dtype);
|
packing_factor_ = sizeof(uint32_t) / static_cast<size_t>(this->dtype);
|
||||||
|
|
||||||
const uint32_t nfeature = gmat.cut->row_ptr.size() - 1;
|
const bst_uint nfeature = static_cast<bst_uint>(gmat.cut->row_ptr.size() - 1);
|
||||||
const omp_ulong nrow = static_cast<omp_ulong>(gmat.row_ptr.size() - 1);
|
const size_t nrow = gmat.row_ptr.size() - 1;
|
||||||
|
|
||||||
// identify type of each column
|
// identify type of each column
|
||||||
feature_counts_.resize(nfeature);
|
feature_counts_.resize(nfeature);
|
||||||
@ -90,13 +90,13 @@ class ColumnMatrix {
|
|||||||
XGBOOST_TYPE_SWITCH(this->dtype, {
|
XGBOOST_TYPE_SWITCH(this->dtype, {
|
||||||
max_val = static_cast<uint32_t>(std::numeric_limits<DType>::max());
|
max_val = static_cast<uint32_t>(std::numeric_limits<DType>::max());
|
||||||
});
|
});
|
||||||
for (uint32_t fid = 0; fid < nfeature; ++fid) {
|
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||||
CHECK_LE(gmat.cut->row_ptr[fid + 1] - gmat.cut->row_ptr[fid], max_val);
|
CHECK_LE(gmat.cut->row_ptr[fid + 1] - gmat.cut->row_ptr[fid], max_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
gmat.GetFeatureCounts(&feature_counts_[0]);
|
gmat.GetFeatureCounts(&feature_counts_[0]);
|
||||||
// classify features
|
// classify features
|
||||||
for (uint32_t fid = 0; fid < nfeature; ++fid) {
|
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||||
if (static_cast<double>(feature_counts_[fid])
|
if (static_cast<double>(feature_counts_[fid])
|
||||||
< param.sparse_threshold * nrow) {
|
< param.sparse_threshold * nrow) {
|
||||||
type_[fid] = kSparseColumn;
|
type_[fid] = kSparseColumn;
|
||||||
@ -108,13 +108,13 @@ class ColumnMatrix {
|
|||||||
// want to compute storage boundary for each feature
|
// want to compute storage boundary for each feature
|
||||||
// using variants of prefix sum scan
|
// using variants of prefix sum scan
|
||||||
boundary_.resize(nfeature);
|
boundary_.resize(nfeature);
|
||||||
bst_uint accum_index_ = 0;
|
size_t accum_index_ = 0;
|
||||||
bst_uint accum_row_ind_ = 0;
|
size_t accum_row_ind_ = 0;
|
||||||
for (uint32_t fid = 0; fid < nfeature; ++fid) {
|
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||||
boundary_[fid].index_begin = accum_index_;
|
boundary_[fid].index_begin = accum_index_;
|
||||||
boundary_[fid].row_ind_begin = accum_row_ind_;
|
boundary_[fid].row_ind_begin = accum_row_ind_;
|
||||||
if (type_[fid] == kDenseColumn) {
|
if (type_[fid] == kDenseColumn) {
|
||||||
accum_index_ += nrow;
|
accum_index_ += static_cast<size_t>(nrow);
|
||||||
} else {
|
} else {
|
||||||
accum_index_ += feature_counts_[fid];
|
accum_index_ += feature_counts_[fid];
|
||||||
accum_row_ind_ += feature_counts_[fid];
|
accum_row_ind_ += feature_counts_[fid];
|
||||||
@ -129,14 +129,14 @@ class ColumnMatrix {
|
|||||||
|
|
||||||
// store least bin id for each feature
|
// store least bin id for each feature
|
||||||
index_base_.resize(nfeature);
|
index_base_.resize(nfeature);
|
||||||
for (uint32_t fid = 0; fid < nfeature; ++fid) {
|
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||||
index_base_[fid] = gmat.cut->row_ptr[fid];
|
index_base_[fid] = gmat.cut->row_ptr[fid];
|
||||||
}
|
}
|
||||||
|
|
||||||
// fill index_ for dense columns
|
// pre-fill index_ for dense columns
|
||||||
for (uint32_t fid = 0; fid < nfeature; ++fid) {
|
for (bst_uint fid = 0; fid < nfeature; ++fid) {
|
||||||
if (type_[fid] == kDenseColumn) {
|
if (type_[fid] == kDenseColumn) {
|
||||||
const uint32_t ibegin = boundary_[fid].index_begin;
|
const size_t ibegin = boundary_[fid].index_begin;
|
||||||
XGBOOST_TYPE_SWITCH(this->dtype, {
|
XGBOOST_TYPE_SWITCH(this->dtype, {
|
||||||
const size_t block_offset = ibegin / packing_factor_;
|
const size_t block_offset = ibegin / packing_factor_;
|
||||||
const size_t elem_offset = ibegin % packing_factor_;
|
const size_t elem_offset = ibegin % packing_factor_;
|
||||||
@ -150,15 +150,15 @@ class ColumnMatrix {
|
|||||||
|
|
||||||
// loop over all rows and fill column entries
|
// loop over all rows and fill column entries
|
||||||
// num_nonzeros[fid] = how many nonzeros have this feature accumulated so far?
|
// num_nonzeros[fid] = how many nonzeros have this feature accumulated so far?
|
||||||
std::vector<uint32_t> num_nonzeros;
|
std::vector<size_t> num_nonzeros;
|
||||||
num_nonzeros.resize(nfeature);
|
num_nonzeros.resize(nfeature);
|
||||||
std::fill(num_nonzeros.begin(), num_nonzeros.end(), 0);
|
std::fill(num_nonzeros.begin(), num_nonzeros.end(), 0);
|
||||||
for (uint32_t rid = 0; rid < nrow; ++rid) {
|
for (size_t rid = 0; rid < nrow; ++rid) {
|
||||||
const size_t ibegin = static_cast<size_t>(gmat.row_ptr[rid]);
|
const size_t ibegin = gmat.row_ptr[rid];
|
||||||
const size_t iend = static_cast<size_t>(gmat.row_ptr[rid + 1]);
|
const size_t iend = gmat.row_ptr[rid + 1];
|
||||||
size_t fid = 0;
|
size_t fid = 0;
|
||||||
for (size_t i = ibegin; i < iend; ++i) {
|
for (size_t i = ibegin; i < iend; ++i) {
|
||||||
const size_t bin_id = gmat.index[i];
|
const uint32_t bin_id = gmat.index[i];
|
||||||
while (bin_id >= gmat.cut->row_ptr[fid + 1]) {
|
while (bin_id >= gmat.cut->row_ptr[fid + 1]) {
|
||||||
++fid;
|
++fid;
|
||||||
}
|
}
|
||||||
@ -167,14 +167,14 @@ class ColumnMatrix {
|
|||||||
const size_t block_offset = boundary_[fid].index_begin / packing_factor_;
|
const size_t block_offset = boundary_[fid].index_begin / packing_factor_;
|
||||||
const size_t elem_offset = boundary_[fid].index_begin % packing_factor_;
|
const size_t elem_offset = boundary_[fid].index_begin % packing_factor_;
|
||||||
DType* begin = reinterpret_cast<DType*>(&index_[block_offset]) + elem_offset;
|
DType* begin = reinterpret_cast<DType*>(&index_[block_offset]) + elem_offset;
|
||||||
begin[rid] = bin_id - index_base_[fid];
|
begin[rid] = static_cast<DType>(bin_id - index_base_[fid]);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
XGBOOST_TYPE_SWITCH(this->dtype, {
|
XGBOOST_TYPE_SWITCH(this->dtype, {
|
||||||
const size_t block_offset = boundary_[fid].index_begin / packing_factor_;
|
const size_t block_offset = boundary_[fid].index_begin / packing_factor_;
|
||||||
const size_t elem_offset = boundary_[fid].index_begin % packing_factor_;
|
const size_t elem_offset = boundary_[fid].index_begin % packing_factor_;
|
||||||
DType* begin = reinterpret_cast<DType*>(&index_[block_offset]) + elem_offset;
|
DType* begin = reinterpret_cast<DType*>(&index_[block_offset]) + elem_offset;
|
||||||
begin[num_nonzeros[fid]] = bin_id - index_base_[fid];
|
begin[num_nonzeros[fid]] = static_cast<DType>(bin_id - index_base_[fid]);
|
||||||
});
|
});
|
||||||
row_ind_[boundary_[fid].row_ind_begin + num_nonzeros[fid]] = rid;
|
row_ind_[boundary_[fid].row_ind_begin + num_nonzeros[fid]] = rid;
|
||||||
++num_nonzeros[fid];
|
++num_nonzeros[fid];
|
||||||
@ -213,16 +213,16 @@ class ColumnMatrix {
|
|||||||
// indicate where each column's index and row_ind is stored.
|
// indicate where each column's index and row_ind is stored.
|
||||||
// index_begin and index_end are logical offsets, so they should be converted to
|
// index_begin and index_end are logical offsets, so they should be converted to
|
||||||
// actual offsets by scaling with packing_factor_
|
// actual offsets by scaling with packing_factor_
|
||||||
unsigned index_begin;
|
size_t index_begin;
|
||||||
unsigned index_end;
|
size_t index_end;
|
||||||
unsigned row_ind_begin;
|
size_t row_ind_begin;
|
||||||
unsigned row_ind_end;
|
size_t row_ind_end;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<bst_uint> feature_counts_;
|
std::vector<size_t> feature_counts_;
|
||||||
std::vector<ColumnType> type_;
|
std::vector<ColumnType> type_;
|
||||||
std::vector<uint32_t> index_; // index_: may store smaller integers; needs padding
|
std::vector<uint32_t> index_; // index_: may store smaller integers; needs padding
|
||||||
std::vector<uint32_t> row_ind_;
|
std::vector<size_t> row_ind_;
|
||||||
std::vector<ColumnBoundary> boundary_;
|
std::vector<ColumnBoundary> boundary_;
|
||||||
|
|
||||||
size_t packing_factor_; // how many integers are stored in each slot of index_
|
size_t packing_factor_; // how many integers are stored in each slot of index_
|
||||||
|
|||||||
@ -46,11 +46,11 @@ static int SymbolBits(int num_symbols) {
|
|||||||
|
|
||||||
class CompressedBufferWriter {
|
class CompressedBufferWriter {
|
||||||
private:
|
private:
|
||||||
int symbol_bits_;
|
size_t symbol_bits_;
|
||||||
size_t offset_;
|
size_t offset_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit CompressedBufferWriter(int num_symbols) : offset_(0) {
|
explicit CompressedBufferWriter(size_t num_symbols) : offset_(0) {
|
||||||
symbol_bits_ = detail::SymbolBits(num_symbols);
|
symbol_bits_ = detail::SymbolBits(num_symbols);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,9 +70,9 @@ class CompressedBufferWriter {
|
|||||||
* \return The calculated buffer size.
|
* \return The calculated buffer size.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static size_t CalculateBufferSize(int num_elements, int num_symbols) {
|
static size_t CalculateBufferSize(size_t num_elements, size_t num_symbols) {
|
||||||
const int bits_per_byte = 8;
|
const int bits_per_byte = 8;
|
||||||
int compressed_size = std::ceil(
|
size_t compressed_size = std::ceil(
|
||||||
static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
|
static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
|
||||||
bits_per_byte);
|
bits_per_byte);
|
||||||
return compressed_size + detail::padding;
|
return compressed_size + detail::padding;
|
||||||
@ -82,10 +82,10 @@ class CompressedBufferWriter {
|
|||||||
void WriteSymbol(compressed_byte_t *buffer, T symbol, size_t offset) {
|
void WriteSymbol(compressed_byte_t *buffer, T symbol, size_t offset) {
|
||||||
const int bits_per_byte = 8;
|
const int bits_per_byte = 8;
|
||||||
|
|
||||||
for (int i = 0; i < symbol_bits_; i++) {
|
for (size_t i = 0; i < symbol_bits_; i++) {
|
||||||
size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
|
size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
|
||||||
byte_idx += detail::padding;
|
byte_idx += detail::padding;
|
||||||
int bit_idx =
|
size_t bit_idx =
|
||||||
((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;
|
((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;
|
||||||
|
|
||||||
if (detail::CheckBit(symbol, i)) {
|
if (detail::CheckBit(symbol, i)) {
|
||||||
@ -100,14 +100,14 @@ class CompressedBufferWriter {
|
|||||||
uint64_t tmp = 0;
|
uint64_t tmp = 0;
|
||||||
int stored_bits = 0;
|
int stored_bits = 0;
|
||||||
const int max_stored_bits = 64 - symbol_bits_;
|
const int max_stored_bits = 64 - symbol_bits_;
|
||||||
int buffer_position = detail::padding;
|
size_t buffer_position = detail::padding;
|
||||||
const int num_symbols = input_end - input_begin;
|
const size_t num_symbols = input_end - input_begin;
|
||||||
for (int i = 0; i < num_symbols; i++) {
|
for (size_t i = 0; i < num_symbols; i++) {
|
||||||
typename std::iterator_traits<iter_t>::value_type symbol = input_begin[i];
|
typename std::iterator_traits<iter_t>::value_type symbol = input_begin[i];
|
||||||
if (stored_bits > max_stored_bits) {
|
if (stored_bits > max_stored_bits) {
|
||||||
// Eject only full bytes
|
// Eject only full bytes
|
||||||
int tmp_bytes = stored_bits / 8;
|
size_t tmp_bytes = stored_bits / 8;
|
||||||
for (int j = 0; j < tmp_bytes; j++) {
|
for (size_t j = 0; j < tmp_bytes; j++) {
|
||||||
buffer[buffer_position] = tmp >> (stored_bits - (j + 1) * 8);
|
buffer[buffer_position] = tmp >> (stored_bits - (j + 1) * 8);
|
||||||
buffer_position++;
|
buffer_position++;
|
||||||
}
|
}
|
||||||
@ -121,8 +121,8 @@ class CompressedBufferWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Eject all bytes
|
// Eject all bytes
|
||||||
int tmp_bytes = std::ceil(static_cast<float>(stored_bits) / 8);
|
size_t tmp_bytes = std::ceil(static_cast<float>(stored_bits) / 8);
|
||||||
for (int j = 0; j < tmp_bytes; j++) {
|
for (size_t j = 0; j < tmp_bytes; j++) {
|
||||||
int shift_bits = stored_bits - (j + 1) * 8;
|
int shift_bits = stored_bits - (j + 1) * 8;
|
||||||
if (shift_bits >= 0) {
|
if (shift_bits >= 0) {
|
||||||
buffer[buffer_position] = tmp >> shift_bits;
|
buffer[buffer_position] = tmp >> shift_bits;
|
||||||
@ -159,7 +159,7 @@ class CompressedIterator {
|
|||||||
/// iterator can point to
|
/// iterator can point to
|
||||||
private:
|
private:
|
||||||
compressed_byte_t *buffer_;
|
compressed_byte_t *buffer_;
|
||||||
int symbol_bits_;
|
size_t symbol_bits_;
|
||||||
size_t offset_;
|
size_t offset_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -189,7 +189,7 @@ class CompressedIterator {
|
|||||||
return static_cast<T>(tmp & mask);
|
return static_cast<T>(tmp & mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
XGBOOST_DEVICE reference operator[](int idx) const {
|
XGBOOST_DEVICE reference operator[](size_t idx) const {
|
||||||
self_type offset = (*this);
|
self_type offset = (*this);
|
||||||
offset.offset_ += idx;
|
offset.offset_ += idx;
|
||||||
return *offset;
|
return *offset;
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
|
|
||||||
void HistCutMatrix::Init(DMatrix* p_fmat, size_t max_num_bins) {
|
void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
|
||||||
typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
|
typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
|
||||||
const MetaInfo& info = p_fmat->info();
|
const MetaInfo& info = p_fmat->info();
|
||||||
|
|
||||||
@ -44,7 +44,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, size_t max_num_bins) {
|
|||||||
unsigned begin = std::min(nstep * tid, ncol);
|
unsigned begin = std::min(nstep * tid, ncol);
|
||||||
unsigned end = std::min(nstep * (tid + 1), ncol);
|
unsigned end = std::min(nstep * (tid + 1), ncol);
|
||||||
for (size_t i = 0; i < batch.size; ++i) { // NOLINT(*)
|
for (size_t i = 0; i < batch.size; ++i) { // NOLINT(*)
|
||||||
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
size_t ridx = batch.base_rowid + i;
|
||||||
RowBatch::Inst inst = batch[i];
|
RowBatch::Inst inst = batch[i];
|
||||||
for (bst_uint j = 0; j < inst.length; ++j) {
|
for (bst_uint j = 0; j < inst.length; ++j) {
|
||||||
if (inst[j].index >= begin && inst[j].index < end) {
|
if (inst[j].index >= begin && inst[j].index < end) {
|
||||||
@ -108,7 +108,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
|
|||||||
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
|
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
|
||||||
|
|
||||||
const int nthread = omp_get_max_threads();
|
const int nthread = omp_get_max_threads();
|
||||||
const unsigned nbins = cut->row_ptr.back();
|
const uint32_t nbins = cut->row_ptr.back();
|
||||||
hit_count.resize(nbins, 0);
|
hit_count.resize(nbins, 0);
|
||||||
hit_count_tloc_.resize(nthread * nbins, 0);
|
hit_count_tloc_.resize(nthread * nbins, 0);
|
||||||
|
|
||||||
@ -116,7 +116,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
|
|||||||
row_ptr.push_back(0);
|
row_ptr.push_back(0);
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
const RowBatch& batch = iter->Value();
|
const RowBatch& batch = iter->Value();
|
||||||
size_t rbegin = row_ptr.size() - 1;
|
const size_t rbegin = row_ptr.size() - 1;
|
||||||
for (size_t i = 0; i < batch.size; ++i) {
|
for (size_t i = 0; i < batch.size; ++i) {
|
||||||
row_ptr.push_back(batch[i].length + row_ptr.back());
|
row_ptr.push_back(batch[i].length + row_ptr.back());
|
||||||
}
|
}
|
||||||
@ -140,7 +140,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
|
|||||||
CHECK(cbegin != cend);
|
CHECK(cbegin != cend);
|
||||||
auto it = std::upper_bound(cbegin, cend, inst[j].fvalue);
|
auto it = std::upper_bound(cbegin, cend, inst[j].fvalue);
|
||||||
if (it == cend) it = cend - 1;
|
if (it == cend) it = cend - 1;
|
||||||
unsigned idx = static_cast<unsigned>(it - cut->cut.begin());
|
uint32_t idx = static_cast<uint32_t>(it - cut->cut.begin());
|
||||||
index[ibegin + j] = idx;
|
index[ibegin + j] = idx;
|
||||||
++hit_count_tloc_[tid * nbins + idx];
|
++hit_count_tloc_[tid * nbins + idx];
|
||||||
}
|
}
|
||||||
@ -148,7 +148,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp parallel for num_threads(nthread) schedule(static)
|
#pragma omp parallel for num_threads(nthread) schedule(static)
|
||||||
for (omp_ulong idx = 0; idx < nbins; ++idx) {
|
for (bst_omp_uint idx = 0; idx < nbins; ++idx) {
|
||||||
for (int tid = 0; tid < nthread; ++tid) {
|
for (int tid = 0; tid < nthread; ++tid) {
|
||||||
hit_count[idx] += hit_count_tloc_[tid * nbins + idx];
|
hit_count[idx] += hit_count_tloc_[tid * nbins + idx];
|
||||||
}
|
}
|
||||||
@ -157,10 +157,10 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static unsigned GetConflictCount(const std::vector<bool>& mark,
|
static size_t GetConflictCount(const std::vector<bool>& mark,
|
||||||
const Column<T>& column,
|
const Column<T>& column,
|
||||||
unsigned max_cnt) {
|
size_t max_cnt) {
|
||||||
unsigned ret = 0;
|
size_t ret = 0;
|
||||||
if (column.type == xgboost::common::kDenseColumn) {
|
if (column.type == xgboost::common::kDenseColumn) {
|
||||||
for (size_t i = 0; i < column.len; ++i) {
|
for (size_t i = 0; i < column.len; ++i) {
|
||||||
if (column.index[i] != std::numeric_limits<T>::max() && mark[i]) {
|
if (column.index[i] != std::numeric_limits<T>::max() && mark[i]) {
|
||||||
@ -203,9 +203,9 @@ MarkUsed(std::vector<bool>* p_mark, const Column<T>& column) {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
inline std::vector<std::vector<unsigned>>
|
inline std::vector<std::vector<unsigned>>
|
||||||
FindGroups_(const std::vector<unsigned>& feature_list,
|
FindGroups_(const std::vector<unsigned>& feature_list,
|
||||||
const std::vector<bst_uint>& feature_nnz,
|
const std::vector<size_t>& feature_nnz,
|
||||||
const ColumnMatrix& colmat,
|
const ColumnMatrix& colmat,
|
||||||
unsigned nrow,
|
size_t nrow,
|
||||||
const FastHistParam& param) {
|
const FastHistParam& param) {
|
||||||
/* Goal: Bundle features together that has little or no "overlap", i.e.
|
/* Goal: Bundle features together that has little or no "overlap", i.e.
|
||||||
only a few data points should have nonzero values for
|
only a few data points should have nonzero values for
|
||||||
@ -214,10 +214,10 @@ FindGroups_(const std::vector<unsigned>& feature_list,
|
|||||||
|
|
||||||
std::vector<std::vector<unsigned>> groups;
|
std::vector<std::vector<unsigned>> groups;
|
||||||
std::vector<std::vector<bool>> conflict_marks;
|
std::vector<std::vector<bool>> conflict_marks;
|
||||||
std::vector<unsigned> group_nnz;
|
std::vector<size_t> group_nnz;
|
||||||
std::vector<unsigned> group_conflict_cnt;
|
std::vector<size_t> group_conflict_cnt;
|
||||||
const unsigned max_conflict_cnt
|
const size_t max_conflict_cnt
|
||||||
= static_cast<unsigned>(param.max_conflict_rate * nrow);
|
= static_cast<size_t>(param.max_conflict_rate * nrow);
|
||||||
|
|
||||||
for (auto fid : feature_list) {
|
for (auto fid : feature_list) {
|
||||||
const Column<T>& column = colmat.GetColumn<T>(fid);
|
const Column<T>& column = colmat.GetColumn<T>(fid);
|
||||||
@ -239,8 +239,8 @@ FindGroups_(const std::vector<unsigned>& feature_list,
|
|||||||
|
|
||||||
// examine each candidate group: is it okay to insert fid?
|
// examine each candidate group: is it okay to insert fid?
|
||||||
for (auto gid : search_groups) {
|
for (auto gid : search_groups) {
|
||||||
const unsigned rest_max_cnt = max_conflict_cnt - group_conflict_cnt[gid];
|
const size_t rest_max_cnt = max_conflict_cnt - group_conflict_cnt[gid];
|
||||||
const unsigned cnt = GetConflictCount(conflict_marks[gid], column, rest_max_cnt);
|
const size_t cnt = GetConflictCount(conflict_marks[gid], column, rest_max_cnt);
|
||||||
if (cnt <= rest_max_cnt) {
|
if (cnt <= rest_max_cnt) {
|
||||||
need_new_group = false;
|
need_new_group = false;
|
||||||
groups[gid].push_back(fid);
|
groups[gid].push_back(fid);
|
||||||
@ -267,9 +267,9 @@ FindGroups_(const std::vector<unsigned>& feature_list,
|
|||||||
|
|
||||||
inline std::vector<std::vector<unsigned>>
|
inline std::vector<std::vector<unsigned>>
|
||||||
FindGroups(const std::vector<unsigned>& feature_list,
|
FindGroups(const std::vector<unsigned>& feature_list,
|
||||||
const std::vector<bst_uint>& feature_nnz,
|
const std::vector<size_t>& feature_nnz,
|
||||||
const ColumnMatrix& colmat,
|
const ColumnMatrix& colmat,
|
||||||
unsigned nrow,
|
size_t nrow,
|
||||||
const FastHistParam& param) {
|
const FastHistParam& param) {
|
||||||
XGBOOST_TYPE_SWITCH(colmat.dtype, {
|
XGBOOST_TYPE_SWITCH(colmat.dtype, {
|
||||||
return FindGroups_<DType>(feature_list, feature_nnz, colmat, nrow, param);
|
return FindGroups_<DType>(feature_list, feature_nnz, colmat, nrow, param);
|
||||||
@ -288,11 +288,11 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat,
|
|||||||
std::iota(feature_list.begin(), feature_list.end(), 0);
|
std::iota(feature_list.begin(), feature_list.end(), 0);
|
||||||
|
|
||||||
// sort features by nonzero counts, descending order
|
// sort features by nonzero counts, descending order
|
||||||
std::vector<bst_uint> feature_nnz(nfeature);
|
std::vector<size_t> feature_nnz(nfeature);
|
||||||
std::vector<unsigned> features_by_nnz(feature_list);
|
std::vector<unsigned> features_by_nnz(feature_list);
|
||||||
gmat.GetFeatureCounts(&feature_nnz[0]);
|
gmat.GetFeatureCounts(&feature_nnz[0]);
|
||||||
std::sort(features_by_nnz.begin(), features_by_nnz.end(),
|
std::sort(features_by_nnz.begin(), features_by_nnz.end(),
|
||||||
[&feature_nnz](int a, int b) {
|
[&feature_nnz](unsigned a, unsigned b) {
|
||||||
return feature_nnz[a] > feature_nnz[b];
|
return feature_nnz[a] > feature_nnz[b];
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -307,7 +307,7 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat,
|
|||||||
if (group.size() <= 1 || group.size() >= 5) {
|
if (group.size() <= 1 || group.size() >= 5) {
|
||||||
ret.push_back(group); // keep singleton groups and large (5+) groups
|
ret.push_back(group); // keep singleton groups and large (5+) groups
|
||||||
} else {
|
} else {
|
||||||
unsigned nnz = 0;
|
size_t nnz = 0;
|
||||||
for (auto fid : group) {
|
for (auto fid : group) {
|
||||||
nnz += feature_nnz[fid];
|
nnz += feature_nnz[fid];
|
||||||
}
|
}
|
||||||
@ -338,37 +338,37 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
|
|||||||
cut = gmat.cut;
|
cut = gmat.cut;
|
||||||
|
|
||||||
const size_t nrow = gmat.row_ptr.size() - 1;
|
const size_t nrow = gmat.row_ptr.size() - 1;
|
||||||
const size_t nbins = gmat.cut->row_ptr.back();
|
const uint32_t nbins = gmat.cut->row_ptr.back();
|
||||||
|
|
||||||
/* step 1: form feature groups */
|
/* step 1: form feature groups */
|
||||||
auto groups = FastFeatureGrouping(gmat, colmat, param);
|
auto groups = FastFeatureGrouping(gmat, colmat, param);
|
||||||
const size_t nblock = groups.size();
|
const uint32_t nblock = static_cast<uint32_t>(groups.size());
|
||||||
|
|
||||||
/* step 2: build a new CSR matrix for each feature group */
|
/* step 2: build a new CSR matrix for each feature group */
|
||||||
std::vector<unsigned> bin2block(nbins); // lookup table [bin id] => [block id]
|
std::vector<uint32_t> bin2block(nbins); // lookup table [bin id] => [block id]
|
||||||
for (size_t group_id = 0; group_id < nblock; ++group_id) {
|
for (uint32_t group_id = 0; group_id < nblock; ++group_id) {
|
||||||
for (auto& fid : groups[group_id]) {
|
for (auto& fid : groups[group_id]) {
|
||||||
const unsigned bin_begin = gmat.cut->row_ptr[fid];
|
const uint32_t bin_begin = gmat.cut->row_ptr[fid];
|
||||||
const unsigned bin_end = gmat.cut->row_ptr[fid + 1];
|
const uint32_t bin_end = gmat.cut->row_ptr[fid + 1];
|
||||||
for (unsigned bin_id = bin_begin; bin_id < bin_end; ++bin_id) {
|
for (uint32_t bin_id = bin_begin; bin_id < bin_end; ++bin_id) {
|
||||||
bin2block[bin_id] = group_id;
|
bin2block[bin_id] = group_id;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<std::vector<unsigned>> index_temp(nblock);
|
std::vector<std::vector<uint32_t>> index_temp(nblock);
|
||||||
std::vector<std::vector<unsigned>> row_ptr_temp(nblock);
|
std::vector<std::vector<size_t>> row_ptr_temp(nblock);
|
||||||
for (size_t block_id = 0; block_id < nblock; ++block_id) {
|
for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
|
||||||
row_ptr_temp[block_id].push_back(0);
|
row_ptr_temp[block_id].push_back(0);
|
||||||
}
|
}
|
||||||
for (size_t rid = 0; rid < nrow; ++rid) {
|
for (size_t rid = 0; rid < nrow; ++rid) {
|
||||||
const size_t ibegin = static_cast<size_t>(gmat.row_ptr[rid]);
|
const size_t ibegin = gmat.row_ptr[rid];
|
||||||
const size_t iend = static_cast<size_t>(gmat.row_ptr[rid + 1]);
|
const size_t iend = gmat.row_ptr[rid + 1];
|
||||||
for (size_t j = ibegin; j < iend; ++j) {
|
for (size_t j = ibegin; j < iend; ++j) {
|
||||||
const size_t bin_id = gmat.index[j];
|
const uint32_t bin_id = gmat.index[j];
|
||||||
const size_t block_id = bin2block[bin_id];
|
const uint32_t block_id = bin2block[bin_id];
|
||||||
index_temp[block_id].push_back(bin_id);
|
index_temp[block_id].push_back(bin_id);
|
||||||
}
|
}
|
||||||
for (size_t block_id = 0; block_id < nblock; ++block_id) {
|
for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
|
||||||
row_ptr_temp[block_id].push_back(index_temp[block_id].size());
|
row_ptr_temp[block_id].push_back(index_temp[block_id].size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -378,7 +378,7 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
|
|||||||
std::vector<size_t> row_ptr_blk_ptr;
|
std::vector<size_t> row_ptr_blk_ptr;
|
||||||
index_blk_ptr.push_back(0);
|
index_blk_ptr.push_back(0);
|
||||||
row_ptr_blk_ptr.push_back(0);
|
row_ptr_blk_ptr.push_back(0);
|
||||||
for (size_t block_id = 0; block_id < nblock; ++block_id) {
|
for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
|
||||||
index.insert(index.end(), index_temp[block_id].begin(), index_temp[block_id].end());
|
index.insert(index.end(), index_temp[block_id].begin(), index_temp[block_id].end());
|
||||||
row_ptr.insert(row_ptr.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
|
row_ptr.insert(row_ptr.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
|
||||||
index_blk_ptr.push_back(index.size());
|
index_blk_ptr.push_back(index.size());
|
||||||
@ -386,7 +386,7 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// save shortcut for each block
|
// save shortcut for each block
|
||||||
for (size_t block_id = 0; block_id < nblock; ++block_id) {
|
for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
|
||||||
Block blk;
|
Block blk;
|
||||||
blk.index_begin = &index[index_blk_ptr[block_id]];
|
blk.index_begin = &index[index_blk_ptr[block_id]];
|
||||||
blk.row_ptr_begin = &row_ptr[row_ptr_blk_ptr[block_id]];
|
blk.row_ptr_begin = &row_ptr[row_ptr_blk_ptr[block_id]];
|
||||||
@ -406,14 +406,14 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
|
|||||||
|
|
||||||
const int K = 8; // loop unrolling factor
|
const int K = 8; // loop unrolling factor
|
||||||
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
|
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||||
const bst_omp_uint nrows = row_indices.end - row_indices.begin;
|
const size_t nrows = row_indices.end - row_indices.begin;
|
||||||
const bst_omp_uint rest = nrows % K;
|
const size_t rest = nrows % K;
|
||||||
|
|
||||||
#pragma omp parallel for num_threads(nthread) schedule(guided)
|
#pragma omp parallel for num_threads(nthread) schedule(guided)
|
||||||
for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
|
for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
|
||||||
const bst_omp_uint tid = omp_get_thread_num();
|
const bst_omp_uint tid = omp_get_thread_num();
|
||||||
const size_t off = tid * nbins_;
|
const size_t off = tid * nbins_;
|
||||||
bst_uint rid[K];
|
size_t rid[K];
|
||||||
size_t ibegin[K];
|
size_t ibegin[K];
|
||||||
size_t iend[K];
|
size_t iend[K];
|
||||||
bst_gpair stat[K];
|
bst_gpair stat[K];
|
||||||
@ -421,32 +421,32 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
|
|||||||
rid[k] = row_indices.begin[i + k];
|
rid[k] = row_indices.begin[i + k];
|
||||||
}
|
}
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
ibegin[k] = static_cast<size_t>(gmat.row_ptr[rid[k]]);
|
ibegin[k] = gmat.row_ptr[rid[k]];
|
||||||
iend[k] = static_cast<size_t>(gmat.row_ptr[rid[k] + 1]);
|
iend[k] = gmat.row_ptr[rid[k] + 1];
|
||||||
}
|
}
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
stat[k] = gpair[rid[k]];
|
stat[k] = gpair[rid[k]];
|
||||||
}
|
}
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
for (size_t j = ibegin[k]; j < iend[k]; ++j) {
|
for (size_t j = ibegin[k]; j < iend[k]; ++j) {
|
||||||
const size_t bin = gmat.index[j];
|
const uint32_t bin = gmat.index[j];
|
||||||
data_[off + bin].Add(stat[k]);
|
data_[off + bin].Add(stat[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (bst_omp_uint i = nrows - rest; i < nrows; ++i) {
|
for (bst_omp_uint i = nrows - rest; i < nrows; ++i) {
|
||||||
const bst_uint rid = row_indices.begin[i];
|
const size_t rid = row_indices.begin[i];
|
||||||
const size_t ibegin = static_cast<size_t>(gmat.row_ptr[rid]);
|
const size_t ibegin = gmat.row_ptr[rid];
|
||||||
const size_t iend = static_cast<size_t>(gmat.row_ptr[rid + 1]);
|
const size_t iend = gmat.row_ptr[rid + 1];
|
||||||
const bst_gpair stat = gpair[rid];
|
const bst_gpair stat = gpair[rid];
|
||||||
for (size_t j = ibegin; j < iend; ++j) {
|
for (size_t j = ibegin; j < iend; ++j) {
|
||||||
const size_t bin = gmat.index[j];
|
const uint32_t bin = gmat.index[j];
|
||||||
data_[bin].Add(stat);
|
data_[bin].Add(stat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reduction */
|
/* reduction */
|
||||||
const bst_omp_uint nbins = static_cast<bst_omp_uint>(nbins_);
|
const uint32_t nbins = nbins_;
|
||||||
#pragma omp parallel for num_threads(nthread) schedule(static)
|
#pragma omp parallel for num_threads(nthread) schedule(static)
|
||||||
for (bst_omp_uint bin_id = 0; bin_id < nbins; ++bin_id) {
|
for (bst_omp_uint bin_id = 0; bin_id < nbins; ++bin_id) {
|
||||||
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
|
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
|
||||||
@ -462,16 +462,16 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
|
|||||||
GHistRow hist) {
|
GHistRow hist) {
|
||||||
const int K = 8; // loop unrolling factor
|
const int K = 8; // loop unrolling factor
|
||||||
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
|
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||||
const bst_omp_uint nblock = gmatb.GetNumBlock();
|
const uint32_t nblock = gmatb.GetNumBlock();
|
||||||
const bst_omp_uint nrows = row_indices.end - row_indices.begin;
|
const size_t nrows = row_indices.end - row_indices.begin;
|
||||||
const bst_omp_uint rest = nrows % K;
|
const size_t rest = nrows % K;
|
||||||
|
|
||||||
#pragma omp parallel for num_threads(nthread) schedule(guided)
|
#pragma omp parallel for num_threads(nthread) schedule(guided)
|
||||||
for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
|
for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
|
||||||
auto gmat = gmatb[bid];
|
auto gmat = gmatb[bid];
|
||||||
|
|
||||||
for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
|
for (size_t i = 0; i < nrows - rest; i += K) {
|
||||||
bst_uint rid[K];
|
size_t rid[K];
|
||||||
size_t ibegin[K];
|
size_t ibegin[K];
|
||||||
size_t iend[K];
|
size_t iend[K];
|
||||||
bst_gpair stat[K];
|
bst_gpair stat[K];
|
||||||
@ -479,26 +479,26 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
|
|||||||
rid[k] = row_indices.begin[i + k];
|
rid[k] = row_indices.begin[i + k];
|
||||||
}
|
}
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
ibegin[k] = static_cast<size_t>(gmat.row_ptr[rid[k]]);
|
ibegin[k] = gmat.row_ptr[rid[k]];
|
||||||
iend[k] = static_cast<size_t>(gmat.row_ptr[rid[k] + 1]);
|
iend[k] = gmat.row_ptr[rid[k] + 1];
|
||||||
}
|
}
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
stat[k] = gpair[rid[k]];
|
stat[k] = gpair[rid[k]];
|
||||||
}
|
}
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
for (size_t j = ibegin[k]; j < iend[k]; ++j) {
|
for (size_t j = ibegin[k]; j < iend[k]; ++j) {
|
||||||
const size_t bin = gmat.index[j];
|
const uint32_t bin = gmat.index[j];
|
||||||
hist.begin[bin].Add(stat[k]);
|
hist.begin[bin].Add(stat[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (bst_omp_uint i = nrows - rest; i < nrows; ++i) {
|
for (bst_omp_uint i = nrows - rest; i < nrows; ++i) {
|
||||||
const bst_uint rid = row_indices.begin[i];
|
const size_t rid = row_indices.begin[i];
|
||||||
const size_t ibegin = static_cast<size_t>(gmat.row_ptr[rid]);
|
const size_t ibegin = gmat.row_ptr[rid];
|
||||||
const size_t iend = static_cast<size_t>(gmat.row_ptr[rid + 1]);
|
const size_t iend = gmat.row_ptr[rid + 1];
|
||||||
const bst_gpair stat = gpair[rid];
|
const bst_gpair stat = gpair[rid];
|
||||||
for (size_t j = ibegin; j < iend; ++j) {
|
for (size_t j = ibegin; j < iend; ++j) {
|
||||||
const size_t bin = gmat.index[j];
|
const uint32_t bin = gmat.index[j];
|
||||||
hist.begin[bin].Add(stat);
|
hist.begin[bin].Add(stat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -507,9 +507,9 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
|
|||||||
|
|
||||||
void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
|
void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
|
||||||
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
|
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||||
const bst_omp_uint nbins = static_cast<bst_omp_uint>(nbins_);
|
const uint32_t nbins = static_cast<bst_omp_uint>(nbins_);
|
||||||
const int K = 8; // loop unrolling factor
|
const int K = 8; // loop unrolling factor
|
||||||
const bst_omp_uint rest = nbins % K;
|
const uint32_t rest = nbins % K;
|
||||||
#pragma omp parallel for num_threads(nthread) schedule(static)
|
#pragma omp parallel for num_threads(nthread) schedule(static)
|
||||||
for (bst_omp_uint bin_id = 0; bin_id < nbins - rest; bin_id += K) {
|
for (bst_omp_uint bin_id = 0; bin_id < nbins - rest; bin_id += K) {
|
||||||
GHistEntry pb[K];
|
GHistEntry pb[K];
|
||||||
@ -524,7 +524,7 @@ void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow pa
|
|||||||
self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
|
self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (bst_omp_uint bin_id = nbins - rest; bin_id < nbins; ++bin_id) {
|
for (uint32_t bin_id = nbins - rest; bin_id < nbins; ++bin_id) {
|
||||||
self.begin[bin_id].SetSubtract(parent.begin[bin_id], sibling.begin[bin_id]);
|
self.begin[bin_id].SetSubtract(parent.begin[bin_id], sibling.begin[bin_id]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -56,30 +56,30 @@ struct HistCutUnit {
|
|||||||
/*! \brief the index pointer of each histunit */
|
/*! \brief the index pointer of each histunit */
|
||||||
const bst_float* cut;
|
const bst_float* cut;
|
||||||
/*! \brief number of cutting point, containing the maximum point */
|
/*! \brief number of cutting point, containing the maximum point */
|
||||||
size_t size;
|
uint32_t size;
|
||||||
// default constructor
|
// default constructor
|
||||||
HistCutUnit() {}
|
HistCutUnit() {}
|
||||||
// constructor
|
// constructor
|
||||||
HistCutUnit(const bst_float* cut, unsigned size)
|
HistCutUnit(const bst_float* cut, uint32_t size)
|
||||||
: cut(cut), size(size) {}
|
: cut(cut), size(size) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief cut configuration for all the features */
|
/*! \brief cut configuration for all the features */
|
||||||
struct HistCutMatrix {
|
struct HistCutMatrix {
|
||||||
/*! \brief actual unit pointer */
|
/*! \brief unit pointer to rows by element position */
|
||||||
std::vector<unsigned> row_ptr;
|
std::vector<uint32_t> row_ptr;
|
||||||
/*! \brief minimum value of each feature */
|
/*! \brief minimum value of each feature */
|
||||||
std::vector<bst_float> min_val;
|
std::vector<bst_float> min_val;
|
||||||
/*! \brief the cut field */
|
/*! \brief the cut field */
|
||||||
std::vector<bst_float> cut;
|
std::vector<bst_float> cut;
|
||||||
/*! \brief Get histogram bound for fid */
|
/*! \brief Get histogram bound for fid */
|
||||||
inline HistCutUnit operator[](unsigned fid) const {
|
inline HistCutUnit operator[](bst_uint fid) const {
|
||||||
return HistCutUnit(dmlc::BeginPtr(cut) + row_ptr[fid],
|
return HistCutUnit(dmlc::BeginPtr(cut) + row_ptr[fid],
|
||||||
row_ptr[fid + 1] - row_ptr[fid]);
|
row_ptr[fid + 1] - row_ptr[fid]);
|
||||||
}
|
}
|
||||||
// create histogram cut matrix given statistics from data
|
// create histogram cut matrix given statistics from data
|
||||||
// using approximate quantile sketch approach
|
// using approximate quantile sketch approach
|
||||||
void Init(DMatrix* p_fmat, size_t max_num_bins);
|
void Init(DMatrix* p_fmat, uint32_t max_num_bins);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -89,11 +89,11 @@ struct HistCutMatrix {
|
|||||||
*/
|
*/
|
||||||
struct GHistIndexRow {
|
struct GHistIndexRow {
|
||||||
/*! \brief The index of the histogram */
|
/*! \brief The index of the histogram */
|
||||||
const unsigned* index;
|
const uint32_t* index;
|
||||||
/*! \brief The size of the histogram */
|
/*! \brief The size of the histogram */
|
||||||
unsigned size;
|
size_t size;
|
||||||
GHistIndexRow() {}
|
GHistIndexRow() {}
|
||||||
GHistIndexRow(const unsigned* index, unsigned size)
|
GHistIndexRow(const uint32_t* index, size_t size)
|
||||||
: index(index), size(size) {}
|
: index(index), size(size) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -103,21 +103,21 @@ struct GHistIndexRow {
|
|||||||
* This is a global histogram index.
|
* This is a global histogram index.
|
||||||
*/
|
*/
|
||||||
struct GHistIndexMatrix {
|
struct GHistIndexMatrix {
|
||||||
/*! \brief row pointer */
|
/*! \brief row pointer to rows by element position */
|
||||||
std::vector<unsigned> row_ptr;
|
std::vector<size_t> row_ptr;
|
||||||
/*! \brief The index data */
|
/*! \brief The index data */
|
||||||
std::vector<unsigned> index;
|
std::vector<uint32_t> index;
|
||||||
/*! \brief hit count of each index */
|
/*! \brief hit count of each index */
|
||||||
std::vector<unsigned> hit_count;
|
std::vector<size_t> hit_count;
|
||||||
/*! \brief The corresponding cuts */
|
/*! \brief The corresponding cuts */
|
||||||
const HistCutMatrix* cut;
|
const HistCutMatrix* cut;
|
||||||
// Create a global histogram matrix, given cut
|
// Create a global histogram matrix, given cut
|
||||||
void Init(DMatrix* p_fmat);
|
void Init(DMatrix* p_fmat);
|
||||||
// get i-th row
|
// get i-th row
|
||||||
inline GHistIndexRow operator[](bst_uint i) const {
|
inline GHistIndexRow operator[](size_t i) const {
|
||||||
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
|
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
|
||||||
}
|
}
|
||||||
inline void GetFeatureCounts(bst_uint* counts) const {
|
inline void GetFeatureCounts(size_t* counts) const {
|
||||||
const unsigned nfeature = cut->row_ptr.size() - 1;
|
const unsigned nfeature = cut->row_ptr.size() - 1;
|
||||||
for (unsigned fid = 0; fid < nfeature; ++fid) {
|
for (unsigned fid = 0; fid < nfeature; ++fid) {
|
||||||
const unsigned ibegin = cut->row_ptr[fid];
|
const unsigned ibegin = cut->row_ptr[fid];
|
||||||
@ -129,18 +129,18 @@ struct GHistIndexMatrix {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<unsigned> hit_count_tloc_;
|
std::vector<size_t> hit_count_tloc_;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GHistIndexBlock {
|
struct GHistIndexBlock {
|
||||||
const unsigned* row_ptr;
|
const size_t* row_ptr;
|
||||||
const unsigned* index;
|
const uint32_t* index;
|
||||||
|
|
||||||
inline GHistIndexBlock(const unsigned* row_ptr, const unsigned* index)
|
inline GHistIndexBlock(const size_t* row_ptr, const uint32_t* index)
|
||||||
: row_ptr(row_ptr), index(index) {}
|
: row_ptr(row_ptr), index(index) {}
|
||||||
|
|
||||||
// get i-th row
|
// get i-th row
|
||||||
inline GHistIndexRow operator[](bst_uint i) const {
|
inline GHistIndexRow operator[](size_t i) const {
|
||||||
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
|
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -153,23 +153,23 @@ class GHistIndexBlockMatrix {
|
|||||||
const ColumnMatrix& colmat,
|
const ColumnMatrix& colmat,
|
||||||
const FastHistParam& param);
|
const FastHistParam& param);
|
||||||
|
|
||||||
inline GHistIndexBlock operator[](bst_uint i) const {
|
inline GHistIndexBlock operator[](size_t i) const {
|
||||||
return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin);
|
return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline unsigned GetNumBlock() const {
|
inline size_t GetNumBlock() const {
|
||||||
return blocks.size();
|
return blocks.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<unsigned> row_ptr;
|
std::vector<size_t> row_ptr;
|
||||||
std::vector<unsigned> index;
|
std::vector<uint32_t> index;
|
||||||
const HistCutMatrix* cut;
|
const HistCutMatrix* cut;
|
||||||
struct Block {
|
struct Block {
|
||||||
const unsigned* row_ptr_begin;
|
const size_t* row_ptr_begin;
|
||||||
const unsigned* row_ptr_end;
|
const size_t* row_ptr_end;
|
||||||
const unsigned* index_begin;
|
const uint32_t* index_begin;
|
||||||
const unsigned* index_end;
|
const uint32_t* index_end;
|
||||||
};
|
};
|
||||||
std::vector<Block> blocks;
|
std::vector<Block> blocks;
|
||||||
};
|
};
|
||||||
@ -184,10 +184,10 @@ struct GHistRow {
|
|||||||
/*! \brief base pointer to first entry */
|
/*! \brief base pointer to first entry */
|
||||||
GHistEntry* begin;
|
GHistEntry* begin;
|
||||||
/*! \brief number of entries */
|
/*! \brief number of entries */
|
||||||
unsigned size;
|
uint32_t size;
|
||||||
|
|
||||||
GHistRow() {}
|
GHistRow() {}
|
||||||
GHistRow(GHistEntry* begin, unsigned size)
|
GHistRow(GHistEntry* begin, uint32_t size)
|
||||||
: begin(begin), size(size) {}
|
: begin(begin), size(size) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -198,19 +198,19 @@ class HistCollection {
|
|||||||
public:
|
public:
|
||||||
// access histogram for i-th node
|
// access histogram for i-th node
|
||||||
inline GHistRow operator[](bst_uint nid) const {
|
inline GHistRow operator[](bst_uint nid) const {
|
||||||
const size_t kMax = std::numeric_limits<size_t>::max();
|
const uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||||
CHECK_NE(row_ptr_[nid], kMax);
|
CHECK_NE(row_ptr_[nid], kMax);
|
||||||
return GHistRow(const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_);
|
return GHistRow(const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// have we computed a histogram for i-th node?
|
// have we computed a histogram for i-th node?
|
||||||
inline bool RowExists(bst_uint nid) const {
|
inline bool RowExists(bst_uint nid) const {
|
||||||
const size_t kMax = std::numeric_limits<size_t>::max();
|
const uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||||
return (nid < row_ptr_.size() && row_ptr_[nid] != kMax);
|
return (nid < row_ptr_.size() && row_ptr_[nid] != kMax);
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize histogram collection
|
// initialize histogram collection
|
||||||
inline void Init(size_t nbins) {
|
inline void Init(uint32_t nbins) {
|
||||||
nbins_ = nbins;
|
nbins_ = nbins;
|
||||||
row_ptr_.clear();
|
row_ptr_.clear();
|
||||||
data_.clear();
|
data_.clear();
|
||||||
@ -218,7 +218,7 @@ class HistCollection {
|
|||||||
|
|
||||||
// create an empty histogram for i-th node
|
// create an empty histogram for i-th node
|
||||||
inline void AddHistRow(bst_uint nid) {
|
inline void AddHistRow(bst_uint nid) {
|
||||||
const size_t kMax = std::numeric_limits<size_t>::max();
|
const uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||||
if (nid >= row_ptr_.size()) {
|
if (nid >= row_ptr_.size()) {
|
||||||
row_ptr_.resize(nid + 1, kMax);
|
row_ptr_.resize(nid + 1, kMax);
|
||||||
}
|
}
|
||||||
@ -230,12 +230,12 @@ class HistCollection {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
/*! \brief number of all bins over all features */
|
/*! \brief number of all bins over all features */
|
||||||
size_t nbins_;
|
uint32_t nbins_;
|
||||||
|
|
||||||
std::vector<GHistEntry> data_;
|
std::vector<GHistEntry> data_;
|
||||||
|
|
||||||
/*! \brief row_ptr_[nid] locates bin for historgram of node nid */
|
/*! \brief row_ptr_[nid] locates bin for historgram of node nid */
|
||||||
std::vector<size_t> row_ptr_;
|
std::vector<uint32_t> row_ptr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -244,7 +244,7 @@ class HistCollection {
|
|||||||
class GHistBuilder {
|
class GHistBuilder {
|
||||||
public:
|
public:
|
||||||
// initialize builder
|
// initialize builder
|
||||||
inline void Init(size_t nthread, size_t nbins) {
|
inline void Init(size_t nthread, uint32_t nbins) {
|
||||||
nthread_ = nthread;
|
nthread_ = nthread;
|
||||||
nbins_ = nbins;
|
nbins_ = nbins;
|
||||||
}
|
}
|
||||||
@ -268,7 +268,7 @@ class GHistBuilder {
|
|||||||
/*! \brief number of threads for parallel computation */
|
/*! \brief number of threads for parallel computation */
|
||||||
size_t nthread_;
|
size_t nthread_;
|
||||||
/*! \brief number of all bins over all features */
|
/*! \brief number of all bins over all features */
|
||||||
size_t nbins_;
|
uint32_t nbins_;
|
||||||
std::vector<GHistEntry> data_;
|
std::vector<GHistEntry> data_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -21,14 +21,14 @@ class RowSetCollection {
|
|||||||
* rows (instances) associated with a particular node in a decision
|
* rows (instances) associated with a particular node in a decision
|
||||||
* tree. */
|
* tree. */
|
||||||
struct Elem {
|
struct Elem {
|
||||||
const bst_uint* begin;
|
const size_t* begin;
|
||||||
const bst_uint* end;
|
const size_t* end;
|
||||||
int node_id;
|
int node_id;
|
||||||
// id of node associated with this instance set; -1 means uninitialized
|
// id of node associated with this instance set; -1 means uninitialized
|
||||||
Elem(void)
|
Elem(void)
|
||||||
: begin(nullptr), end(nullptr), node_id(-1) {}
|
: begin(nullptr), end(nullptr), node_id(-1) {}
|
||||||
Elem(const bst_uint* begin,
|
Elem(const size_t* begin,
|
||||||
const bst_uint* end,
|
const size_t* end,
|
||||||
int node_id)
|
int node_id)
|
||||||
: begin(begin), end(end), node_id(node_id) {}
|
: begin(begin), end(end), node_id(node_id) {}
|
||||||
|
|
||||||
@ -38,8 +38,8 @@ class RowSetCollection {
|
|||||||
};
|
};
|
||||||
/* \brief specifies how to split a rowset into two */
|
/* \brief specifies how to split a rowset into two */
|
||||||
struct Split {
|
struct Split {
|
||||||
std::vector<bst_uint> left;
|
std::vector<size_t> left;
|
||||||
std::vector<bst_uint> right;
|
std::vector<size_t> right;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::vector<Elem>::const_iterator begin() const {
|
inline std::vector<Elem>::const_iterator begin() const {
|
||||||
@ -65,8 +65,8 @@ class RowSetCollection {
|
|||||||
// initialize node id 0->everything
|
// initialize node id 0->everything
|
||||||
inline void Init() {
|
inline void Init() {
|
||||||
CHECK_EQ(elem_of_each_node_.size(), 0U);
|
CHECK_EQ(elem_of_each_node_.size(), 0U);
|
||||||
const bst_uint* begin = dmlc::BeginPtr(row_indices_);
|
const size_t* begin = dmlc::BeginPtr(row_indices_);
|
||||||
const bst_uint* end = dmlc::BeginPtr(row_indices_) + row_indices_.size();
|
const size_t* end = dmlc::BeginPtr(row_indices_) + row_indices_.size();
|
||||||
elem_of_each_node_.emplace_back(Elem(begin, end, 0));
|
elem_of_each_node_.emplace_back(Elem(begin, end, 0));
|
||||||
}
|
}
|
||||||
// split rowset into two
|
// split rowset into two
|
||||||
@ -77,16 +77,15 @@ class RowSetCollection {
|
|||||||
const Elem e = elem_of_each_node_[node_id];
|
const Elem e = elem_of_each_node_[node_id];
|
||||||
const unsigned nthread = row_split_tloc.size();
|
const unsigned nthread = row_split_tloc.size();
|
||||||
CHECK(e.begin != nullptr);
|
CHECK(e.begin != nullptr);
|
||||||
bst_uint* all_begin = dmlc::BeginPtr(row_indices_);
|
size_t* all_begin = dmlc::BeginPtr(row_indices_);
|
||||||
bst_uint* begin = all_begin + (e.begin - all_begin);
|
size_t* begin = all_begin + (e.begin - all_begin);
|
||||||
|
|
||||||
bst_uint* it = begin;
|
size_t* it = begin;
|
||||||
// TODO(hcho3): parallelize this section
|
|
||||||
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
|
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
|
||||||
std::copy(row_split_tloc[tid].left.begin(), row_split_tloc[tid].left.end(), it);
|
std::copy(row_split_tloc[tid].left.begin(), row_split_tloc[tid].left.end(), it);
|
||||||
it += row_split_tloc[tid].left.size();
|
it += row_split_tloc[tid].left.size();
|
||||||
}
|
}
|
||||||
bst_uint* split_pt = it;
|
size_t* split_pt = it;
|
||||||
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
|
for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
|
||||||
std::copy(row_split_tloc[tid].right.begin(), row_split_tloc[tid].right.end(), it);
|
std::copy(row_split_tloc[tid].right.begin(), row_split_tloc[tid].right.end(), it);
|
||||||
it += row_split_tloc[tid].right.size();
|
it += row_split_tloc[tid].right.size();
|
||||||
@ -105,7 +104,7 @@ class RowSetCollection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// stores the row indices in the set
|
// stores the row indices in the set
|
||||||
std::vector<bst_uint> row_indices_;
|
std::vector<size_t> row_indices_;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// vector: node_id -> elements
|
// vector: node_id -> elements
|
||||||
|
|||||||
@ -61,7 +61,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
TStats::CheckInfo(dmat->info());
|
TStats::CheckInfo(dmat->info());
|
||||||
if (is_gmat_initialized_ == false) {
|
if (is_gmat_initialized_ == false) {
|
||||||
double tstart = dmlc::GetTime();
|
double tstart = dmlc::GetTime();
|
||||||
hmat_.Init(dmat, param.max_bin);
|
hmat_.Init(dmat, static_cast<uint32_t>(param.max_bin));
|
||||||
gmat_.cut = &hmat_;
|
gmat_.cut = &hmat_;
|
||||||
gmat_.Init(dmat);
|
gmat_.Init(dmat);
|
||||||
column_matrix_.Init(gmat_, fhparam);
|
column_matrix_.Init(gmat_, fhparam);
|
||||||
@ -111,23 +111,6 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
bool is_gmat_initialized_;
|
bool is_gmat_initialized_;
|
||||||
|
|
||||||
// data structure
|
// data structure
|
||||||
/*! \brief per thread x per node entry to store tmp data */
|
|
||||||
struct ThreadEntry {
|
|
||||||
/*! \brief statistics of data */
|
|
||||||
TStats stats;
|
|
||||||
/*! \brief extra statistics of data */
|
|
||||||
TStats stats_extra;
|
|
||||||
/*! \brief last feature value scanned */
|
|
||||||
float last_fvalue;
|
|
||||||
/*! \brief first feature value scanned */
|
|
||||||
float first_fvalue;
|
|
||||||
/*! \brief current best solution */
|
|
||||||
SplitEntry best;
|
|
||||||
// constructor
|
|
||||||
explicit ThreadEntry(const TrainParam& param)
|
|
||||||
: stats(param), stats_extra(param) {
|
|
||||||
}
|
|
||||||
};
|
|
||||||
struct NodeEntry {
|
struct NodeEntry {
|
||||||
/*! \brief statics for node entry */
|
/*! \brief statics for node entry */
|
||||||
TStats stats;
|
TStats stats;
|
||||||
@ -340,7 +323,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
leaf_value = (*p_last_tree_)[nid].leaf_value();
|
leaf_value = (*p_last_tree_)[nid].leaf_value();
|
||||||
|
|
||||||
for (const bst_uint* it = rowset.begin; it < rowset.end; ++it) {
|
for (const size_t* it = rowset.begin; it < rowset.end; ++it) {
|
||||||
out_preds[*it] += leaf_value;
|
out_preds[*it] += leaf_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -372,7 +355,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
// clear local prediction cache
|
// clear local prediction cache
|
||||||
leaf_value_cache_.clear();
|
leaf_value_cache_.clear();
|
||||||
// initialize histogram collection
|
// initialize histogram collection
|
||||||
size_t nbins = gmat.cut->row_ptr.back();
|
uint32_t nbins = gmat.cut->row_ptr.back();
|
||||||
hist_.Init(nbins);
|
hist_.Init(nbins);
|
||||||
|
|
||||||
// initialize histogram builder
|
// initialize histogram builder
|
||||||
@ -383,18 +366,18 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
hist_builder_.Init(this->nthread, nbins);
|
hist_builder_.Init(this->nthread, nbins);
|
||||||
|
|
||||||
CHECK_EQ(info.root_index.size(), 0U);
|
CHECK_EQ(info.root_index.size(), 0U);
|
||||||
std::vector<bst_uint>& row_indices = row_set_collection_.row_indices_;
|
std::vector<size_t>& row_indices = row_set_collection_.row_indices_;
|
||||||
// mark subsample and build list of member rows
|
// mark subsample and build list of member rows
|
||||||
if (param.subsample < 1.0f) {
|
if (param.subsample < 1.0f) {
|
||||||
std::bernoulli_distribution coin_flip(param.subsample);
|
std::bernoulli_distribution coin_flip(param.subsample);
|
||||||
auto& rnd = common::GlobalRandom();
|
auto& rnd = common::GlobalRandom();
|
||||||
for (bst_uint i = 0; i < info.num_row; ++i) {
|
for (size_t i = 0; i < info.num_row; ++i) {
|
||||||
if (gpair[i].hess >= 0.0f && coin_flip(rnd)) {
|
if (gpair[i].hess >= 0.0f && coin_flip(rnd)) {
|
||||||
row_indices.push_back(i);
|
row_indices.push_back(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (bst_uint i = 0; i < info.num_row; ++i) {
|
for (size_t i = 0; i < info.num_row; ++i) {
|
||||||
if (gpair[i].hess >= 0.0f) {
|
if (gpair[i].hess >= 0.0f) {
|
||||||
row_indices.push_back(i);
|
row_indices.push_back(i);
|
||||||
}
|
}
|
||||||
@ -405,11 +388,11 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
|
|
||||||
{
|
{
|
||||||
/* determine layout of data */
|
/* determine layout of data */
|
||||||
const auto nrow = info.num_row;
|
const size_t nrow = info.num_row;
|
||||||
const auto ncol = info.num_col;
|
const size_t ncol = info.num_col;
|
||||||
const auto nnz = info.num_nonzero;
|
const size_t nnz = info.num_nonzero;
|
||||||
// number of discrete bins for feature 0
|
// number of discrete bins for feature 0
|
||||||
const unsigned nbins_f0 = gmat.cut->row_ptr[1] - gmat.cut->row_ptr[0];
|
const uint32_t nbins_f0 = gmat.cut->row_ptr[1] - gmat.cut->row_ptr[0];
|
||||||
if (nrow * ncol == nnz) {
|
if (nrow * ncol == nnz) {
|
||||||
// dense data with zero-based indexing
|
// dense data with zero-based indexing
|
||||||
data_layout_ = kDenseDataZeroBased;
|
data_layout_ = kDenseDataZeroBased;
|
||||||
@ -427,19 +410,19 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
// store a pointer to training data
|
// store a pointer to training data
|
||||||
p_last_fmat_ = &fmat;
|
p_last_fmat_ = &fmat;
|
||||||
// initialize feature index
|
// initialize feature index
|
||||||
unsigned ncol = static_cast<unsigned>(info.num_col);
|
bst_uint ncol = static_cast<bst_uint>(info.num_col);
|
||||||
feat_index.clear();
|
feat_index.clear();
|
||||||
if (data_layout_ == kDenseDataOneBased) {
|
if (data_layout_ == kDenseDataOneBased) {
|
||||||
for (unsigned i = 1; i < ncol; ++i) {
|
for (bst_uint i = 1; i < ncol; ++i) {
|
||||||
feat_index.push_back(i);
|
feat_index.push_back(i);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (unsigned i = 0; i < ncol; ++i) {
|
for (bst_uint i = 0; i < ncol; ++i) {
|
||||||
feat_index.push_back(i);
|
feat_index.push_back(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unsigned n = std::max(static_cast<unsigned>(1),
|
bst_uint n = std::max(static_cast<bst_uint>(1),
|
||||||
static_cast<unsigned>(param.colsample_bytree * feat_index.size()));
|
static_cast<bst_uint>(param.colsample_bytree * feat_index.size()));
|
||||||
std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom());
|
std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom());
|
||||||
CHECK_GT(param.colsample_bytree, 0U)
|
CHECK_GT(param.colsample_bytree, 0U)
|
||||||
<< "colsample_bytree cannot be zero.";
|
<< "colsample_bytree cannot be zero.";
|
||||||
@ -450,11 +433,11 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
choose the column that has a least positive number of discrete bins.
|
choose the column that has a least positive number of discrete bins.
|
||||||
For dense data (with no missing value),
|
For dense data (with no missing value),
|
||||||
the sum of gradient histogram is equal to snode[nid] */
|
the sum of gradient histogram is equal to snode[nid] */
|
||||||
const std::vector<unsigned>& row_ptr = gmat.cut->row_ptr;
|
const std::vector<uint32_t>& row_ptr = gmat.cut->row_ptr;
|
||||||
const size_t nfeature = row_ptr.size() - 1;
|
const bst_uint nfeature = static_cast<bst_uint>(row_ptr.size() - 1);
|
||||||
size_t min_nbins_per_feature = 0;
|
uint32_t min_nbins_per_feature = 0;
|
||||||
for (size_t i = 0; i < nfeature; ++i) {
|
for (bst_uint i = 0; i < nfeature; ++i) {
|
||||||
const unsigned nbins = row_ptr[i + 1] - row_ptr[i];
|
const uint32_t nbins = row_ptr[i + 1] - row_ptr[i];
|
||||||
if (nbins > 0) {
|
if (nbins > 0) {
|
||||||
if (min_nbins_per_feature == 0 || min_nbins_per_feature > nbins) {
|
if (min_nbins_per_feature == 0 || min_nbins_per_feature > nbins) {
|
||||||
min_nbins_per_feature = nbins;
|
min_nbins_per_feature = nbins;
|
||||||
@ -485,7 +468,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
const std::vector<bst_uint>& feat_set) {
|
const std::vector<bst_uint>& feat_set) {
|
||||||
// start enumeration
|
// start enumeration
|
||||||
const MetaInfo& info = fmat.info();
|
const MetaInfo& info = fmat.info();
|
||||||
const bst_omp_uint nfeature = feat_set.size();
|
const bst_uint nfeature = static_cast<bst_uint>(feat_set.size());
|
||||||
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread);
|
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread);
|
||||||
best_split_tloc_.resize(nthread);
|
best_split_tloc_.resize(nthread);
|
||||||
#pragma omp parallel for schedule(static) num_threads(nthread)
|
#pragma omp parallel for schedule(static) num_threads(nthread)
|
||||||
@ -547,13 +530,17 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
const bool default_left = (*p_tree)[nid].default_left();
|
const bool default_left = (*p_tree)[nid].default_left();
|
||||||
const bst_uint fid = (*p_tree)[nid].split_index();
|
const bst_uint fid = (*p_tree)[nid].split_index();
|
||||||
const bst_float split_pt = (*p_tree)[nid].split_cond();
|
const bst_float split_pt = (*p_tree)[nid].split_cond();
|
||||||
const bst_uint lower_bound = gmat.cut->row_ptr[fid];
|
const uint32_t lower_bound = gmat.cut->row_ptr[fid];
|
||||||
const bst_uint upper_bound = gmat.cut->row_ptr[fid + 1];
|
const uint32_t upper_bound = gmat.cut->row_ptr[fid + 1];
|
||||||
bst_int split_cond = -1;
|
int32_t split_cond = -1;
|
||||||
// convert floating-point split_pt into corresponding bin_id
|
// convert floating-point split_pt into corresponding bin_id
|
||||||
// split_cond = -1 indicates that split_pt is less than all known cut points
|
// split_cond = -1 indicates that split_pt is less than all known cut points
|
||||||
for (unsigned i = gmat.cut->row_ptr[fid]; i < gmat.cut->row_ptr[fid + 1]; ++i) {
|
CHECK_LT(upper_bound,
|
||||||
if (split_pt == gmat.cut->cut[i]) split_cond = static_cast<bst_int>(i);
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
|
for (uint32_t i = lower_bound; i < upper_bound; ++i) {
|
||||||
|
if (split_pt == gmat.cut->cut[i]) {
|
||||||
|
split_cond = static_cast<int32_t>(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& rowset = row_set_collection_[nid];
|
const auto& rowset = row_set_collection_[nid];
|
||||||
@ -580,15 +567,15 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
bool default_left) {
|
bool default_left) {
|
||||||
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
|
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
|
||||||
const int K = 8; // loop unrolling factor
|
const int K = 8; // loop unrolling factor
|
||||||
const bst_omp_uint nrows = rowset.end - rowset.begin;
|
const size_t nrows = rowset.end - rowset.begin;
|
||||||
const bst_omp_uint rest = nrows % K;
|
const size_t rest = nrows % K;
|
||||||
|
|
||||||
#pragma omp parallel for num_threads(nthread) schedule(static)
|
#pragma omp parallel for num_threads(nthread) schedule(static)
|
||||||
for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
|
for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
|
||||||
const bst_uint tid = omp_get_thread_num();
|
const bst_uint tid = omp_get_thread_num();
|
||||||
auto& left = row_split_tloc[tid].left;
|
auto& left = row_split_tloc[tid].left;
|
||||||
auto& right = row_split_tloc[tid].right;
|
auto& right = row_split_tloc[tid].right;
|
||||||
bst_uint rid[K];
|
size_t rid[K];
|
||||||
T rbin[K];
|
T rbin[K];
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
rid[k] = rowset.begin[i + k];
|
rid[k] = rowset.begin[i + k];
|
||||||
@ -604,7 +591,9 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
right.push_back(rid[k]);
|
right.push_back(rid[k]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (static_cast<bst_int>(rbin[k] + column.index_base) <= split_cond) {
|
CHECK_LT(rbin[k] + column.index_base,
|
||||||
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
|
if (static_cast<int32_t>(rbin[k] + column.index_base) <= split_cond) {
|
||||||
left.push_back(rid[k]);
|
left.push_back(rid[k]);
|
||||||
} else {
|
} else {
|
||||||
right.push_back(rid[k]);
|
right.push_back(rid[k]);
|
||||||
@ -612,10 +601,10 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (bst_omp_uint i = nrows - rest; i < nrows; ++i) {
|
for (size_t i = nrows - rest; i < nrows; ++i) {
|
||||||
auto& left = row_split_tloc[nthread-1].left;
|
auto& left = row_split_tloc[nthread-1].left;
|
||||||
auto& right = row_split_tloc[nthread-1].right;
|
auto& right = row_split_tloc[nthread-1].right;
|
||||||
const bst_uint rid = rowset.begin[i];
|
const size_t rid = rowset.begin[i];
|
||||||
const T rbin = column.index[rid];
|
const T rbin = column.index[rid];
|
||||||
if (rbin == std::numeric_limits<T>::max()) { // missing value
|
if (rbin == std::numeric_limits<T>::max()) { // missing value
|
||||||
if (default_left) {
|
if (default_left) {
|
||||||
@ -624,7 +613,9 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
right.push_back(rid);
|
right.push_back(rid);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (static_cast<bst_int>(rbin + column.index_base) <= split_cond) {
|
CHECK_LT(rbin + column.index_base,
|
||||||
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
|
if (static_cast<int32_t>(rbin + column.index_base) <= split_cond) {
|
||||||
left.push_back(rid);
|
left.push_back(rid);
|
||||||
} else {
|
} else {
|
||||||
right.push_back(rid);
|
right.push_back(rid);
|
||||||
@ -642,13 +633,13 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
bool default_left) {
|
bool default_left) {
|
||||||
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
|
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
|
||||||
const int K = 8; // loop unrolling factor
|
const int K = 8; // loop unrolling factor
|
||||||
const bst_omp_uint nrows = rowset.end - rowset.begin;
|
const size_t nrows = rowset.end - rowset.begin;
|
||||||
const bst_omp_uint rest = nrows % K;
|
const size_t rest = nrows % K;
|
||||||
#pragma omp parallel for num_threads(nthread) schedule(static)
|
#pragma omp parallel for num_threads(nthread) schedule(static)
|
||||||
for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
|
for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
|
||||||
bst_uint rid[K];
|
size_t rid[K];
|
||||||
GHistIndexRow row[K];
|
GHistIndexRow row[K];
|
||||||
const unsigned* p[K];
|
const uint32_t* p[K];
|
||||||
bst_uint tid = omp_get_thread_num();
|
bst_uint tid = omp_get_thread_num();
|
||||||
auto& left = row_split_tloc[tid].left;
|
auto& left = row_split_tloc[tid].left;
|
||||||
auto& right = row_split_tloc[tid].right;
|
auto& right = row_split_tloc[tid].right;
|
||||||
@ -663,7 +654,9 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
for (int k = 0; k < K; ++k) {
|
for (int k = 0; k < K; ++k) {
|
||||||
if (p[k] != row[k].index + row[k].size && *p[k] < upper_bound) {
|
if (p[k] != row[k].index + row[k].size && *p[k] < upper_bound) {
|
||||||
if (static_cast<bst_int>(*p[k]) <= split_cond) {
|
CHECK_LT(*p[k],
|
||||||
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
|
if (static_cast<int32_t>(*p[k]) <= split_cond) {
|
||||||
left.push_back(rid[k]);
|
left.push_back(rid[k]);
|
||||||
} else {
|
} else {
|
||||||
right.push_back(rid[k]);
|
right.push_back(rid[k]);
|
||||||
@ -677,14 +670,15 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (bst_omp_uint i = nrows - rest; i < nrows; ++i) {
|
for (size_t i = nrows - rest; i < nrows; ++i) {
|
||||||
const bst_uint rid = rowset.begin[i];
|
const size_t rid = rowset.begin[i];
|
||||||
const auto row = gmat[rid];
|
const auto row = gmat[rid];
|
||||||
const auto p = std::lower_bound(row.index, row.index + row.size, lower_bound);
|
const auto p = std::lower_bound(row.index, row.index + row.size, lower_bound);
|
||||||
auto& left = row_split_tloc[0].left;
|
auto& left = row_split_tloc[0].left;
|
||||||
auto& right = row_split_tloc[0].right;
|
auto& right = row_split_tloc[0].right;
|
||||||
if (p != row.index + row.size && *p < upper_bound) {
|
if (p != row.index + row.size && *p < upper_bound) {
|
||||||
if (static_cast<bst_int>(*p) <= split_cond) {
|
CHECK_LT(*p, static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
|
if (static_cast<int32_t>(*p) <= split_cond) {
|
||||||
left.push_back(rid);
|
left.push_back(rid);
|
||||||
} else {
|
} else {
|
||||||
right.push_back(rid);
|
right.push_back(rid);
|
||||||
@ -709,26 +703,26 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
bst_int split_cond,
|
bst_int split_cond,
|
||||||
bool default_left) {
|
bool default_left) {
|
||||||
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
|
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
|
||||||
const bst_omp_uint nrows = rowset.end - rowset.begin;
|
const size_t nrows = rowset.end - rowset.begin;
|
||||||
|
|
||||||
#pragma omp parallel num_threads(nthread)
|
#pragma omp parallel num_threads(nthread)
|
||||||
{
|
{
|
||||||
const bst_uint tid = omp_get_thread_num();
|
const size_t tid = static_cast<size_t>(omp_get_thread_num());
|
||||||
const bst_omp_uint ibegin = tid * nrows / nthread;
|
const size_t ibegin = tid * nrows / nthread;
|
||||||
const bst_omp_uint iend = (tid + 1) * nrows / nthread;
|
const size_t iend = (tid + 1) * nrows / nthread;
|
||||||
if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range
|
if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range
|
||||||
// search first nonzero row with index >= rowset[ibegin]
|
// search first nonzero row with index >= rowset[ibegin]
|
||||||
const uint32_t* p = std::lower_bound(column.row_ind,
|
const size_t* p = std::lower_bound(column.row_ind,
|
||||||
column.row_ind + column.len,
|
column.row_ind + column.len,
|
||||||
rowset.begin[ibegin]);
|
rowset.begin[ibegin]);
|
||||||
|
|
||||||
auto& left = row_split_tloc[tid].left;
|
auto& left = row_split_tloc[tid].left;
|
||||||
auto& right = row_split_tloc[tid].right;
|
auto& right = row_split_tloc[tid].right;
|
||||||
if (p != column.row_ind + column.len && *p <= rowset.begin[iend - 1]) {
|
if (p != column.row_ind + column.len && *p <= rowset.begin[iend - 1]) {
|
||||||
bst_omp_uint cursor = p - column.row_ind;
|
size_t cursor = p - column.row_ind;
|
||||||
|
|
||||||
for (bst_omp_uint i = ibegin; i < iend; ++i) {
|
for (size_t i = ibegin; i < iend; ++i) {
|
||||||
const bst_uint rid = rowset.begin[i];
|
const size_t rid = rowset.begin[i];
|
||||||
while (cursor < column.len
|
while (cursor < column.len
|
||||||
&& column.row_ind[cursor] < rid
|
&& column.row_ind[cursor] < rid
|
||||||
&& column.row_ind[cursor] <= rowset.begin[iend - 1]) {
|
&& column.row_ind[cursor] <= rowset.begin[iend - 1]) {
|
||||||
@ -736,7 +730,9 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
if (cursor < column.len && column.row_ind[cursor] == rid) {
|
if (cursor < column.len && column.row_ind[cursor] == rid) {
|
||||||
const T rbin = column.index[cursor];
|
const T rbin = column.index[cursor];
|
||||||
if (static_cast<bst_int>(rbin + column.index_base) <= split_cond) {
|
CHECK_LT(rbin + column.index_base,
|
||||||
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
|
if (static_cast<int32_t>(rbin + column.index_base) <= split_cond) {
|
||||||
left.push_back(rid);
|
left.push_back(rid);
|
||||||
} else {
|
} else {
|
||||||
right.push_back(rid);
|
right.push_back(rid);
|
||||||
@ -753,13 +749,13 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
} else { // all rows in [ibegin, iend) have missing values
|
} else { // all rows in [ibegin, iend) have missing values
|
||||||
if (default_left) {
|
if (default_left) {
|
||||||
for (bst_omp_uint i = ibegin; i < iend; ++i) {
|
for (size_t i = ibegin; i < iend; ++i) {
|
||||||
const bst_uint rid = rowset.begin[i];
|
const size_t rid = rowset.begin[i];
|
||||||
left.push_back(rid);
|
left.push_back(rid);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (bst_omp_uint i = ibegin; i < iend; ++i) {
|
for (size_t i = ibegin; i < iend; ++i) {
|
||||||
const bst_uint rid = rowset.begin[i];
|
const size_t rid = rowset.begin[i];
|
||||||
right.push_back(rid);
|
right.push_back(rid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -786,17 +782,17 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
For dense data (with no missing value),
|
For dense data (with no missing value),
|
||||||
the sum of gradient histogram is equal to snode[nid] */
|
the sum of gradient histogram is equal to snode[nid] */
|
||||||
GHistRow hist = hist_[nid];
|
GHistRow hist = hist_[nid];
|
||||||
const std::vector<unsigned>& row_ptr = gmat.cut->row_ptr;
|
const std::vector<uint32_t>& row_ptr = gmat.cut->row_ptr;
|
||||||
|
|
||||||
const size_t ibegin = row_ptr[fid_least_bins_];
|
const uint32_t ibegin = row_ptr[fid_least_bins_];
|
||||||
const size_t iend = row_ptr[fid_least_bins_ + 1];
|
const uint32_t iend = row_ptr[fid_least_bins_ + 1];
|
||||||
for (size_t i = ibegin; i < iend; ++i) {
|
for (uint32_t i = ibegin; i < iend; ++i) {
|
||||||
const GHistEntry et = hist.begin[i];
|
const GHistEntry et = hist.begin[i];
|
||||||
stats.Add(et.sum_grad, et.sum_hess);
|
stats.Add(et.sum_grad, et.sum_hess);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const RowSetCollection::Elem e = row_set_collection_[nid];
|
const RowSetCollection::Elem e = row_set_collection_[nid];
|
||||||
for (const bst_uint* it = e.begin; it < e.end; ++it) {
|
for (const size_t* it = e.begin; it < e.end; ++it) {
|
||||||
stats.Add(gpair[*it]);
|
stats.Add(gpair[*it]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -831,7 +827,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
CHECK(d_step == +1 || d_step == -1);
|
CHECK(d_step == +1 || d_step == -1);
|
||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
const std::vector<unsigned>& cut_ptr = gmat.cut->row_ptr;
|
const std::vector<uint32_t>& cut_ptr = gmat.cut->row_ptr;
|
||||||
const std::vector<bst_float>& cut_val = gmat.cut->cut;
|
const std::vector<bst_float>& cut_val = gmat.cut->cut;
|
||||||
|
|
||||||
// statistics on both sides of split
|
// statistics on both sides of split
|
||||||
@ -841,20 +837,25 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
SplitEntry best;
|
SplitEntry best;
|
||||||
|
|
||||||
// bin boundaries
|
// bin boundaries
|
||||||
|
CHECK_LE(cut_ptr[fid],
|
||||||
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
|
CHECK_LE(cut_ptr[fid + 1],
|
||||||
|
static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
|
||||||
// imin: index (offset) of the minimum value for feature fid
|
// imin: index (offset) of the minimum value for feature fid
|
||||||
// need this for backward enumeration
|
// need this for backward enumeration
|
||||||
const int imin = cut_ptr[fid];
|
const int32_t imin = static_cast<int32_t>(cut_ptr[fid]);
|
||||||
// ibegin, iend: smallest/largest cut points for feature fid
|
// ibegin, iend: smallest/largest cut points for feature fid
|
||||||
int ibegin, iend;
|
// use int to allow for value -1
|
||||||
|
int32_t ibegin, iend;
|
||||||
if (d_step > 0) {
|
if (d_step > 0) {
|
||||||
ibegin = cut_ptr[fid];
|
ibegin = static_cast<int32_t>(cut_ptr[fid]);
|
||||||
iend = cut_ptr[fid + 1];
|
iend = static_cast<int32_t>(cut_ptr[fid + 1]);
|
||||||
} else {
|
} else {
|
||||||
ibegin = cut_ptr[fid + 1] - 1;
|
ibegin = static_cast<int32_t>(cut_ptr[fid + 1]) - 1;
|
||||||
iend = cut_ptr[fid] - 1;
|
iend = static_cast<int32_t>(cut_ptr[fid]) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = ibegin; i != iend; i += d_step) {
|
for (int32_t i = ibegin; i != iend; i += d_step) {
|
||||||
// start working
|
// start working
|
||||||
// try to find a split
|
// try to find a split
|
||||||
e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess);
|
e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess);
|
||||||
@ -930,7 +931,7 @@ class FastHistMaker: public TreeUpdater {
|
|||||||
HistCollection hist_;
|
HistCollection hist_;
|
||||||
/*! \brief feature with least # of bins. to be used for dense specialization
|
/*! \brief feature with least # of bins. to be used for dense specialization
|
||||||
of InitNewNode() */
|
of InitNewNode() */
|
||||||
size_t fid_least_bins_;
|
uint32_t fid_least_bins_;
|
||||||
/*! \brief local prediction cache; maps node id to leaf value */
|
/*! \brief local prediction cache; maps node id to leaf value */
|
||||||
std::vector<float> leaf_value_cache_;
|
std::vector<float> leaf_value_cache_;
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
#include "../../../src/common/compressed_iterator.h"
|
#include "../../../src/common/compressed_iterator.h"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user