[GPU-Plugin] Unify gpu_gpair/bst_gpair. Refactor. (#2477)
This commit is contained in:
@@ -17,8 +17,8 @@
|
||||
|
||||
#include "../../../../src/tree/param.h"
|
||||
#include "../common.cuh"
|
||||
#include "loss_functions.cuh"
|
||||
#include "node.cuh"
|
||||
#include "../types.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -66,10 +66,10 @@ DEV_INLINE void atomicArgMax(Split* address, Split val) {
|
||||
|
||||
template <typename node_id_t>
|
||||
DEV_INLINE void argMaxWithAtomics(
|
||||
int id, Split* nodeSplits, const gpu_gpair* gradScans,
|
||||
const gpu_gpair* gradSums, const float* vals, const int* colIds,
|
||||
int id, Split* nodeSplits, const bst_gpair* gradScans,
|
||||
const bst_gpair* gradSums, const float* vals, const int* colIds,
|
||||
const node_id_t* nodeAssigns, const Node<node_id_t>* nodes, int nUniqKeys,
|
||||
node_id_t nodeStart, int len, const TrainParam& param) {
|
||||
node_id_t nodeStart, int len, const GPUTrainingParam& param) {
|
||||
int nodeId = nodeAssigns[id];
|
||||
///@todo: this is really a bad check! but will be fixed when we move
|
||||
/// to key-based reduction
|
||||
@@ -78,14 +78,14 @@ DEV_INLINE void argMaxWithAtomics(
|
||||
(vals[id] == vals[id - 1]))) {
|
||||
if (nodeId != UNUSED_NODE) {
|
||||
int sumId = abs2uniqKey(id, nodeAssigns, colIds, nodeStart, nUniqKeys);
|
||||
gpu_gpair colSum = gradSums[sumId];
|
||||
bst_gpair colSum = gradSums[sumId];
|
||||
int uid = nodeId - nodeStart;
|
||||
Node<node_id_t> n = nodes[nodeId];
|
||||
gpu_gpair parentSum = n.gradSum;
|
||||
bst_gpair parentSum = n.gradSum;
|
||||
float parentGain = n.score;
|
||||
bool tmp;
|
||||
Split s;
|
||||
gpu_gpair missing = parentSum - colSum;
|
||||
bst_gpair missing = parentSum - colSum;
|
||||
s.score = loss_chg_missing(gradScans[id], missing, parentSum, parentGain,
|
||||
param, tmp);
|
||||
s.index = id;
|
||||
@@ -96,7 +96,7 @@ DEV_INLINE void argMaxWithAtomics(
|
||||
|
||||
template <typename node_id_t>
|
||||
__global__ void atomicArgMaxByKeyGmem(
|
||||
Split* nodeSplits, const gpu_gpair* gradScans, const gpu_gpair* gradSums,
|
||||
Split* nodeSplits, const bst_gpair* gradScans, const bst_gpair* gradSums,
|
||||
const float* vals, const int* colIds, const node_id_t* nodeAssigns,
|
||||
const Node<node_id_t>* nodes, int nUniqKeys, node_id_t nodeStart, int len,
|
||||
const TrainParam param) {
|
||||
@@ -104,13 +104,13 @@ __global__ void atomicArgMaxByKeyGmem(
|
||||
const int stride = blockDim.x * gridDim.x;
|
||||
for (; id < len; id += stride) {
|
||||
argMaxWithAtomics(id, nodeSplits, gradScans, gradSums, vals, colIds,
|
||||
nodeAssigns, nodes, nUniqKeys, nodeStart, len, param);
|
||||
nodeAssigns, nodes, nUniqKeys, nodeStart, len, GPUTrainingParam(param));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename node_id_t>
|
||||
__global__ void atomicArgMaxByKeySmem(
|
||||
Split* nodeSplits, const gpu_gpair* gradScans, const gpu_gpair* gradSums,
|
||||
Split* nodeSplits, const bst_gpair* gradScans, const bst_gpair* gradSums,
|
||||
const float* vals, const int* colIds, const node_id_t* nodeAssigns,
|
||||
const Node<node_id_t>* nodes, int nUniqKeys, node_id_t nodeStart, int len,
|
||||
const TrainParam param) {
|
||||
@@ -153,8 +153,8 @@ __global__ void atomicArgMaxByKeySmem(
|
||||
* @param algo which algorithm to use for argmax_by_key
|
||||
*/
|
||||
template <typename node_id_t, int BLKDIM = 256, int ITEMS_PER_THREAD = 4>
|
||||
void argMaxByKey(Split* nodeSplits, const gpu_gpair* gradScans,
|
||||
const gpu_gpair* gradSums, const float* vals,
|
||||
void argMaxByKey(Split* nodeSplits, const bst_gpair* gradScans,
|
||||
const bst_gpair* gradSums, const float* vals,
|
||||
const int* colIds, const node_id_t* nodeAssigns,
|
||||
const Node<node_id_t>* nodes, int nUniqKeys,
|
||||
node_id_t nodeStart, int len, const TrainParam param,
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "../common.cuh"
|
||||
#include "gradients.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -24,11 +23,11 @@ namespace exact {
|
||||
|
||||
/**
|
||||
* @struct Pair fused_scan_reduce_by_key.cuh
|
||||
* @brief Pair used for key basd scan operations on gpu_gpair
|
||||
* @brief Pair used for key basd scan operations on bst_gpair
|
||||
*/
|
||||
struct Pair {
|
||||
int key;
|
||||
gpu_gpair value;
|
||||
bst_gpair value;
|
||||
};
|
||||
|
||||
/** define a key that's not used at all in the entire boosting process */
|
||||
@@ -61,15 +60,27 @@ struct AddByKey {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Gradient value getter function
|
||||
* @param id the index into the vals or instIds array to which to fetch
|
||||
* @param vals the gradient value buffer
|
||||
* @param instIds instance index buffer
|
||||
* @return the expected gradient value
|
||||
*/
|
||||
HOST_DEV_INLINE bst_gpair get(int id, const bst_gpair* vals, const int* instIds) {
|
||||
id = instIds[id];
|
||||
return vals[id];
|
||||
}
|
||||
|
||||
template <typename node_id_t, int BLKDIM_L1L3>
|
||||
__global__ void cubScanByKeyL1(gpu_gpair* scans, const gpu_gpair* vals,
|
||||
const int* instIds, gpu_gpair* mScans,
|
||||
__global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals,
|
||||
const int* instIds, bst_gpair* mScans,
|
||||
int* mKeys, const node_id_t* keys, int nUniqKeys,
|
||||
const int* colIds, node_id_t nodeStart,
|
||||
const int size) {
|
||||
Pair rootPair = {NONE_KEY, gpu_gpair(0.f, 0.f)};
|
||||
Pair rootPair = {NONE_KEY, bst_gpair(0.f, 0.f)};
|
||||
int myKey;
|
||||
gpu_gpair myValue;
|
||||
bst_gpair myValue;
|
||||
typedef cub::BlockScan<Pair, BLKDIM_L1L3> BlockScan;
|
||||
__shared__ typename BlockScan::TempStorage temp_storage;
|
||||
Pair threadData;
|
||||
@@ -98,14 +109,14 @@ __global__ void cubScanByKeyL1(gpu_gpair* scans, const gpu_gpair* vals,
|
||||
}
|
||||
if (threadIdx.x == BLKDIM_L1L3 - 1) {
|
||||
threadData.value =
|
||||
(myKey == previousKey) ? threadData.value : gpu_gpair(0.0f, 0.0f);
|
||||
(myKey == previousKey) ? threadData.value : bst_gpair(0.0f, 0.0f);
|
||||
mKeys[blockIdx.x] = myKey;
|
||||
mScans[blockIdx.x] = threadData.value + myValue;
|
||||
}
|
||||
}
|
||||
|
||||
template <int BLKSIZE>
|
||||
__global__ void cubScanByKeyL2(gpu_gpair* mScans, int* mKeys, int mLength) {
|
||||
__global__ void cubScanByKeyL2(bst_gpair* mScans, int* mKeys, int mLength) {
|
||||
typedef cub::BlockScan<Pair, BLKSIZE, cub::BLOCK_SCAN_WARP_SCANS> BlockScan;
|
||||
Pair threadData;
|
||||
__shared__ typename BlockScan::TempStorage temp_storage;
|
||||
@@ -119,9 +130,9 @@ __global__ void cubScanByKeyL2(gpu_gpair* mScans, int* mKeys, int mLength) {
|
||||
}
|
||||
|
||||
template <typename node_id_t, int BLKDIM_L1L3>
|
||||
__global__ void cubScanByKeyL3(gpu_gpair* sums, gpu_gpair* scans,
|
||||
const gpu_gpair* vals, const int* instIds,
|
||||
const gpu_gpair* mScans, const int* mKeys,
|
||||
__global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
|
||||
const bst_gpair* vals, const int* instIds,
|
||||
const bst_gpair* mScans, const int* mKeys,
|
||||
const node_id_t* keys, int nUniqKeys,
|
||||
const int* colIds, node_id_t nodeStart,
|
||||
const int size) {
|
||||
@@ -130,19 +141,19 @@ __global__ void cubScanByKeyL3(gpu_gpair* sums, gpu_gpair* scans,
|
||||
// to avoid the following warning from nvcc:
|
||||
// __shared__ memory variable with non-empty constructor or destructor
|
||||
// (potential race between threads)
|
||||
__shared__ char gradBuff[sizeof(gpu_gpair)];
|
||||
__shared__ char gradBuff[sizeof(bst_gpair)];
|
||||
__shared__ int s_mKeys;
|
||||
gpu_gpair* s_mScans = (gpu_gpair*)gradBuff;
|
||||
bst_gpair* s_mScans = (bst_gpair*)gradBuff;
|
||||
if (tid >= size) return;
|
||||
// cache block-wide partial scan info
|
||||
if (relId == 0) {
|
||||
s_mKeys = (blockIdx.x > 0) ? mKeys[blockIdx.x - 1] : NONE_KEY;
|
||||
s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : gpu_gpair();
|
||||
s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : bst_gpair();
|
||||
}
|
||||
int myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys);
|
||||
int previousKey = tid == 0 ? NONE_KEY : abs2uniqKey(tid - 1, keys, colIds,
|
||||
nodeStart, nUniqKeys);
|
||||
gpu_gpair myValue = scans[tid];
|
||||
bst_gpair myValue = scans[tid];
|
||||
__syncthreads();
|
||||
if (blockIdx.x > 0 && s_mKeys == previousKey) {
|
||||
myValue += s_mScans[0];
|
||||
@@ -152,7 +163,7 @@ __global__ void cubScanByKeyL3(gpu_gpair* sums, gpu_gpair* scans,
|
||||
}
|
||||
if ((previousKey != myKey) && (previousKey >= 0)) {
|
||||
sums[previousKey] = myValue;
|
||||
myValue = gpu_gpair(0.0f, 0.0f);
|
||||
myValue = bst_gpair(0.0f, 0.0f);
|
||||
}
|
||||
scans[tid] = myValue;
|
||||
}
|
||||
@@ -178,12 +189,12 @@ __global__ void cubScanByKeyL3(gpu_gpair* sums, gpu_gpair* scans,
|
||||
* @param nodeStart index of the leftmost node in the current level
|
||||
*/
|
||||
template <typename node_id_t, int BLKDIM_L1L3 = 256, int BLKDIM_L2 = 512>
|
||||
void reduceScanByKey(gpu_gpair* sums, gpu_gpair* scans, const gpu_gpair* vals,
|
||||
void reduceScanByKey(bst_gpair* sums, bst_gpair* scans, const bst_gpair* vals,
|
||||
const int* instIds, const node_id_t* keys, int size,
|
||||
int nUniqKeys, int nCols, gpu_gpair* tmpScans,
|
||||
int nUniqKeys, int nCols, bst_gpair* tmpScans,
|
||||
int* tmpKeys, const int* colIds, node_id_t nodeStart) {
|
||||
int nBlks = dh::div_round_up(size, BLKDIM_L1L3);
|
||||
cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(gpu_gpair));
|
||||
cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(bst_gpair));
|
||||
cubScanByKeyL1<node_id_t, BLKDIM_L1L3><<<nBlks, BLKDIM_L1L3>>>(
|
||||
scans, vals, instIds, tmpScans, tmpKeys, keys, nUniqKeys, colIds,
|
||||
nodeStart, size);
|
||||
|
||||
@@ -19,13 +19,11 @@
|
||||
#include <vector>
|
||||
#include "../../../../src/tree/param.h"
|
||||
#include "../common.cuh"
|
||||
#include "argmax_by_key.cuh"
|
||||
#include "cub/cub.cuh"
|
||||
#include "fused_scan_reduce_by_key.cuh"
|
||||
#include "gradients.cuh"
|
||||
#include "loss_functions.cuh"
|
||||
#include <vector>
|
||||
#include "node.cuh"
|
||||
#include "split2node.cuh"
|
||||
#include "argmax_by_key.cuh"
|
||||
#include "fused_scan_reduce_by_key.cuh"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -33,13 +31,13 @@ namespace tree {
|
||||
namespace exact {
|
||||
|
||||
template <typename node_id_t>
|
||||
__global__ void initRootNode(Node<node_id_t>* nodes, const gpu_gpair* sums,
|
||||
__global__ void initRootNode(Node<node_id_t>* nodes, const bst_gpair* sums,
|
||||
const TrainParam param) {
|
||||
// gradients already evaluated inside transferGrads
|
||||
Node<node_id_t> n;
|
||||
n.gradSum = sums[0];
|
||||
n.score = CalcGain(param, n.gradSum.g, n.gradSum.h);
|
||||
n.weight = CalcWeight(param, n.gradSum.g, n.gradSum.h);
|
||||
n.score = CalcGain(param, n.gradSum.grad , n.gradSum.hess);
|
||||
n.weight = CalcWeight(param, n.gradSum.grad , n.gradSum.hess);
|
||||
n.id = 0;
|
||||
nodes[0] = n;
|
||||
}
|
||||
@@ -198,13 +196,13 @@ class GPUBuilder {
|
||||
dh::dvec<int> instIds_cached;
|
||||
/** column offsets for these feature values */
|
||||
dh::dvec<int> colOffsets;
|
||||
dh::dvec<gpu_gpair> gradsInst;
|
||||
dh::dvec<bst_gpair> gradsInst;
|
||||
dh::dvec2<node_id_t> nodeAssigns;
|
||||
dh::dvec2<int> nodeLocations;
|
||||
dh::dvec<Node<node_id_t>> nodes;
|
||||
dh::dvec<node_id_t> nodeAssignsPerInst;
|
||||
dh::dvec<gpu_gpair> gradSums;
|
||||
dh::dvec<gpu_gpair> gradScans;
|
||||
dh::dvec<bst_gpair> gradSums;
|
||||
dh::dvec<bst_gpair> gradScans;
|
||||
dh::dvec<Split> nodeSplits;
|
||||
int nVals;
|
||||
int nRows;
|
||||
@@ -212,7 +210,7 @@ class GPUBuilder {
|
||||
int maxNodes;
|
||||
int maxLeaves;
|
||||
dh::CubMemory tmp_mem;
|
||||
dh::dvec<gpu_gpair> tmpScanGradBuff;
|
||||
dh::dvec<bst_gpair> tmpScanGradBuff;
|
||||
dh::dvec<int> tmpScanKeyBuff;
|
||||
dh::dvec<int> colIds;
|
||||
dh::bulk_allocator<dh::memory_type::DEVICE> ba;
|
||||
@@ -310,10 +308,10 @@ class GPUBuilder {
|
||||
void transferGrads(const std::vector<bst_gpair>& gpair) {
|
||||
// HACK
|
||||
dh::safe_cuda(cudaMemcpy(gradsInst.data(), &(gpair[0]),
|
||||
sizeof(gpu_gpair) * nRows,
|
||||
sizeof(bst_gpair) * nRows,
|
||||
cudaMemcpyHostToDevice));
|
||||
// evaluate the full-grad reduction for the root node
|
||||
sumReduction<gpu_gpair>(tmp_mem, gradsInst, gradSums, nRows);
|
||||
sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows);
|
||||
}
|
||||
|
||||
void initNodeData(int level, node_id_t nodeStart, int nNodes) {
|
||||
@@ -371,13 +369,13 @@ class GPUBuilder {
|
||||
const Node<node_id_t>& n = hNodes[i];
|
||||
if ((i != 0) && hNodes[i].isLeaf()) {
|
||||
tree[nodeId].set_leaf(n.weight * param.learning_rate);
|
||||
tree.stat(nodeId).sum_hess = n.gradSum.h;
|
||||
tree.stat(nodeId).sum_hess = n.gradSum.hess;
|
||||
++nodeId;
|
||||
} else if (!hNodes[i].isUnused()) {
|
||||
tree.AddChilds(nodeId);
|
||||
tree[nodeId].set_split(n.colIdx, n.threshold, n.dir == LeftDir);
|
||||
tree.stat(nodeId).loss_chg = n.score;
|
||||
tree.stat(nodeId).sum_hess = n.gradSum.h;
|
||||
tree.stat(nodeId).sum_hess = n.gradSum.hess;
|
||||
tree.stat(nodeId).base_weight = n.weight;
|
||||
tree[tree[nodeId].cleft()].set_leaf(0);
|
||||
tree[tree[nodeId].cright()].set_leaf(0);
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION, Xgboost contributors. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "../common.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace exact {
|
||||
|
||||
/**
|
||||
* @struct gpu_gpair gradients.cuh
|
||||
* @brief The first/second order gradients for iteratively building the tree
|
||||
*/
|
||||
struct gpu_gpair {
|
||||
/** the 'g_i' as it appears in the xgboost paper */
|
||||
float g;
|
||||
/** the 'h_i' as it appears in the xgboost paper */
|
||||
float h;
|
||||
|
||||
HOST_DEV_INLINE gpu_gpair() : g(0.f), h(0.f) {}
|
||||
HOST_DEV_INLINE gpu_gpair(const float& _g, const float& _h) : g(_g), h(_h) {}
|
||||
HOST_DEV_INLINE gpu_gpair(const gpu_gpair& a) : g(a.g), h(a.h) {}
|
||||
|
||||
/**
|
||||
* @brief Checks whether the hessian is more than the defined weight
|
||||
* @param minWeight minimum weight to be compared against
|
||||
* @return true if the hessian is greater than the minWeight
|
||||
* @note this is useful in deciding whether to further split to child node
|
||||
*/
|
||||
HOST_DEV_INLINE bool isSplittable(float minWeight) const {
|
||||
return (h > minWeight);
|
||||
}
|
||||
|
||||
HOST_DEV_INLINE gpu_gpair& operator+=(const gpu_gpair& a) {
|
||||
g += a.g;
|
||||
h += a.h;
|
||||
return *this;
|
||||
}
|
||||
|
||||
HOST_DEV_INLINE gpu_gpair& operator-=(const gpu_gpair& a) {
|
||||
g -= a.g;
|
||||
h -= a.h;
|
||||
return *this;
|
||||
}
|
||||
|
||||
HOST_DEV_INLINE friend gpu_gpair operator+(const gpu_gpair& a,
|
||||
const gpu_gpair& b) {
|
||||
return gpu_gpair(a.g + b.g, a.h + b.h);
|
||||
}
|
||||
|
||||
HOST_DEV_INLINE friend gpu_gpair operator-(const gpu_gpair& a,
|
||||
const gpu_gpair& b) {
|
||||
return gpu_gpair(a.g - b.g, a.h - b.h);
|
||||
}
|
||||
|
||||
HOST_DEV_INLINE gpu_gpair(int value) {
|
||||
*this = gpu_gpair((float)value, (float)value);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Gradient value getter function
|
||||
* @param id the index into the vals or instIds array to which to fetch
|
||||
* @param vals the gradient value buffer
|
||||
* @param instIds instance index buffer
|
||||
* @return the expected gradient value
|
||||
*/
|
||||
HOST_DEV_INLINE gpu_gpair get(int id, const gpu_gpair* vals,
|
||||
const int* instIds) {
|
||||
id = instIds[id];
|
||||
return vals[id];
|
||||
}
|
||||
|
||||
} // namespace exact
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION, Xgboost contributors. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "../common.cuh"
|
||||
#include "gradients.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace exact {
|
||||
|
||||
HOST_DEV_INLINE float device_calc_loss_chg(
|
||||
const TrainParam ¶m, const gpu_gpair &scan, const gpu_gpair &missing,
|
||||
const gpu_gpair &parent_sum, const float &parent_gain, bool missing_left) {
|
||||
gpu_gpair left = scan;
|
||||
if (missing_left) {
|
||||
left += missing;
|
||||
}
|
||||
gpu_gpair right = parent_sum - left;
|
||||
float left_gain = CalcGain(param, left.g, left.h);
|
||||
float right_gain = CalcGain(param, right.g, right.h);
|
||||
return left_gain + right_gain - parent_gain;
|
||||
}
|
||||
|
||||
HOST_DEV_INLINE float loss_chg_missing(const gpu_gpair &scan,
|
||||
const gpu_gpair &missing,
|
||||
const gpu_gpair &parent_sum,
|
||||
const float &parent_gain,
|
||||
const TrainParam ¶m,
|
||||
bool &missing_left_out) {
|
||||
float missing_left_loss =
|
||||
device_calc_loss_chg(param, scan, missing, parent_sum, parent_gain, true);
|
||||
float missing_right_loss = device_calc_loss_chg(
|
||||
param, scan, missing, parent_sum, parent_gain, false);
|
||||
if (missing_left_loss >= missing_right_loss) {
|
||||
missing_left_out = true;
|
||||
return missing_left_loss;
|
||||
} else {
|
||||
missing_left_out = false;
|
||||
return missing_right_loss;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace exact
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
@@ -17,7 +17,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "../common.cuh"
|
||||
#include "gradients.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -67,7 +66,7 @@ template <typename node_id_t>
|
||||
class Node {
|
||||
public:
|
||||
/** sum of gradients across all training samples part of this node */
|
||||
gpu_gpair gradSum;
|
||||
bst_gpair gradSum;
|
||||
/** the optimal score for this node */
|
||||
float score;
|
||||
/** weightage for this node */
|
||||
|
||||
@@ -16,8 +16,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../../../src/tree/param.h"
|
||||
#include "gradients.cuh"
|
||||
#include "loss_functions.cuh"
|
||||
#include "node.cuh"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -37,11 +35,11 @@ namespace exact {
|
||||
*/
|
||||
template <typename node_id_t>
|
||||
DEV_INLINE void updateOneChildNode(Node<node_id_t>* nodes, int nid,
|
||||
const gpu_gpair& grad,
|
||||
const bst_gpair& grad,
|
||||
const TrainParam& param) {
|
||||
nodes[nid].gradSum = grad;
|
||||
nodes[nid].score = CalcGain(param, grad.g, grad.h);
|
||||
nodes[nid].weight = CalcWeight(param, grad.g, grad.h);
|
||||
nodes[nid].score = CalcGain(param, grad.grad, grad.hess);
|
||||
nodes[nid].weight = CalcWeight(param, grad.grad, grad.hess);
|
||||
nodes[nid].id = nid;
|
||||
}
|
||||
|
||||
@@ -56,7 +54,7 @@ DEV_INLINE void updateOneChildNode(Node<node_id_t>* nodes, int nid,
|
||||
*/
|
||||
template <typename node_id_t>
|
||||
DEV_INLINE void updateChildNodes(Node<node_id_t>* nodes, int pid,
|
||||
const gpu_gpair& gradL, const gpu_gpair& gradR,
|
||||
const bst_gpair& gradL, const bst_gpair& gradR,
|
||||
const TrainParam& param) {
|
||||
int childId = (pid * 2) + 1;
|
||||
updateOneChildNode(nodes, childId, gradL, param);
|
||||
@@ -66,15 +64,15 @@ DEV_INLINE void updateChildNodes(Node<node_id_t>* nodes, int pid,
|
||||
template <typename node_id_t>
|
||||
DEV_INLINE void updateNodeAndChildren(Node<node_id_t>* nodes, const Split& s,
|
||||
const Node<node_id_t>& n, int absNodeId,
|
||||
int colId, const gpu_gpair& gradScan,
|
||||
const gpu_gpair& colSum, float thresh,
|
||||
int colId, const bst_gpair& gradScan,
|
||||
const bst_gpair& colSum, float thresh,
|
||||
const TrainParam& param) {
|
||||
bool missingLeft = true;
|
||||
// get the default direction for the current node
|
||||
gpu_gpair missing = n.gradSum - colSum;
|
||||
bst_gpair missing = n.gradSum - colSum;
|
||||
loss_chg_missing(gradScan, missing, n.gradSum, n.score, param, missingLeft);
|
||||
// get the score/weight/id/gradSum for left and right child nodes
|
||||
gpu_gpair lGradSum, rGradSum;
|
||||
bst_gpair lGradSum, rGradSum;
|
||||
if (missingLeft) {
|
||||
lGradSum = gradScan + n.gradSum - colSum;
|
||||
} else {
|
||||
@@ -90,8 +88,8 @@ DEV_INLINE void updateNodeAndChildren(Node<node_id_t>* nodes, const Split& s,
|
||||
|
||||
template <typename node_id_t, int BLKDIM = 256>
|
||||
__global__ void split2nodeKernel(
|
||||
Node<node_id_t>* nodes, const Split* nodeSplits, const gpu_gpair* gradScans,
|
||||
const gpu_gpair* gradSums, const float* vals, const int* colIds,
|
||||
Node<node_id_t>* nodes, const Split* nodeSplits, const bst_gpair* gradScans,
|
||||
const bst_gpair* gradSums, const float* vals, const int* colIds,
|
||||
const int* colOffsets, const node_id_t* nodeAssigns, int nUniqKeys,
|
||||
node_id_t nodeStart, int nCols, const TrainParam param) {
|
||||
int uid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
@@ -132,7 +130,7 @@ __global__ void split2nodeKernel(
|
||||
*/
|
||||
template <typename node_id_t, int BLKDIM = 256>
|
||||
void split2node(Node<node_id_t>* nodes, const Split* nodeSplits,
|
||||
const gpu_gpair* gradScans, const gpu_gpair* gradSums,
|
||||
const bst_gpair* gradScans, const bst_gpair* gradSums,
|
||||
const float* vals, const int* colIds, const int* colOffsets,
|
||||
const node_id_t* nodeAssigns, int nUniqKeys,
|
||||
node_id_t nodeStart, int nCols, const TrainParam param) {
|
||||
|
||||
Reference in New Issue
Block a user