Fix several GPU bugs (#2916)
* Fix #2905 * Fix gpu_exact test failures * Fix bug in GPU prediction where multiple calls to batch prediction can produce incorrect results * Fix GPU documentation formatting
This commit is contained in:
@@ -292,11 +292,9 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
thrust::copy(model.tree_info.begin(), model.tree_info.end(),
|
||||
tree_group.begin());
|
||||
|
||||
if (device_matrix->predictions.size() != out_preds->size()) {
|
||||
device_matrix->predictions.resize(out_preds->size());
|
||||
thrust::copy(out_preds->begin(), out_preds->end(),
|
||||
device_matrix->predictions.begin());
|
||||
}
|
||||
device_matrix->predictions.resize(out_preds->size());
|
||||
thrust::copy(out_preds->begin(), out_preds->end(),
|
||||
device_matrix->predictions.begin());
|
||||
|
||||
const int BLOCK_THREADS = 128;
|
||||
const int GRID_SIZE = static_cast<int>(
|
||||
|
||||
@@ -336,8 +336,8 @@ struct XGBOOST_ALIGNAS(16) GradStats {
|
||||
this->Add(b.GetGrad(), b.GetHess());
|
||||
}
|
||||
/*! \brief calculate leaf weight */
|
||||
template <typename param_t>
|
||||
inline double CalcWeight(const param_t& param) const {
|
||||
template <typename param_t>
|
||||
XGBOOST_DEVICE inline double CalcWeight(const param_t ¶m) const {
|
||||
return xgboost::tree::CalcWeight(param, sum_grad, sum_hess);
|
||||
}
|
||||
/*! \brief calculate gain of the solution */
|
||||
|
||||
@@ -302,7 +302,7 @@ DEV_INLINE void argMaxWithAtomics(
|
||||
ExactSplitCandidate s;
|
||||
bst_gpair missing = parentSum - colSum;
|
||||
s.score = loss_chg_missing(gradScans[id], missing, parentSum, parentGain,
|
||||
param, 0, ValueConstraint(), tmp);
|
||||
param, tmp);
|
||||
s.index = id;
|
||||
atomicArgMax(nodeSplits + uid, s);
|
||||
} // end if nodeId != UNUSED_NODE
|
||||
@@ -580,7 +580,7 @@ class GPUMaker : public TreeUpdater {
|
||||
// get the default direction for the current node
|
||||
bst_gpair missing = n.sum_gradients - gradSum;
|
||||
loss_chg_missing(gradScan, missing, n.sum_gradients, n.root_gain,
|
||||
gpu_param, 0, ValueConstraint(), missingLeft);
|
||||
gpu_param, missingLeft);
|
||||
// get the score/weight/id/gradSum for left and right child nodes
|
||||
bst_gpair lGradSum = missingLeft ? gradScan + missing : gradScan;
|
||||
bst_gpair rGradSum = n.sum_gradients - lGradSum;
|
||||
|
||||
@@ -240,6 +240,29 @@ __device__ inline float device_calc_loss_chg(const GPUTrainingParam& param,
|
||||
return left_gain + right_gain - parent_gain;
|
||||
}
|
||||
|
||||
// Without constraints
|
||||
template <typename gpair_t>
|
||||
__device__ float inline loss_chg_missing(const gpair_t& scan,
|
||||
const gpair_t& missing,
|
||||
const gpair_t& parent_sum,
|
||||
const float& parent_gain,
|
||||
const GPUTrainingParam& param,
|
||||
bool& missing_left_out) { // NOLINT
|
||||
float missing_left_loss =
|
||||
device_calc_loss_chg(param, scan + missing, parent_sum, parent_gain);
|
||||
float missing_right_loss =
|
||||
device_calc_loss_chg(param, scan, parent_sum, parent_gain);
|
||||
|
||||
if (missing_left_loss >= missing_right_loss) {
|
||||
missing_left_out = true;
|
||||
return missing_left_loss;
|
||||
} else {
|
||||
missing_left_out = false;
|
||||
return missing_right_loss;
|
||||
}
|
||||
}
|
||||
|
||||
// With constraints
|
||||
template <typename gpair_t>
|
||||
__device__ float inline loss_chg_missing(
|
||||
const gpair_t& scan, const gpair_t& missing, const gpair_t& parent_sum,
|
||||
|
||||
@@ -287,6 +287,10 @@ struct DeviceShard {
|
||||
size_t compressed_size_bytes =
|
||||
common::CompressedBufferWriter::CalculateBufferSize(
|
||||
ellpack_matrix.size(), num_symbols);
|
||||
|
||||
CHECK(!(param.max_leaves == 0 && param.max_depth == 0))
|
||||
<< "Max leaves and max depth cannot both be unconstrained for "
|
||||
"gpu_hist.";
|
||||
int max_nodes =
|
||||
param.max_leaves > 0 ? param.max_leaves * 2 : n_nodes(param.max_depth);
|
||||
ba.allocate(device_idx, param.silent, &gidx_buffer, compressed_size_bytes,
|
||||
|
||||
Reference in New Issue
Block a user