[GPU-Plugin] Resolve double compilation issue (#2479)
This commit is contained in:
@@ -60,7 +60,7 @@ DEV_INLINE void atomicArgMax(Split* address, Split val) {
|
||||
do {
|
||||
assumed = old;
|
||||
Split res = maxSplit(val, *(Split*)&assumed);
|
||||
old = atomicCAS(intAddress, assumed, *(unsigned long long*)&res);
|
||||
old = atomicCAS(intAddress, assumed, *(uint64_t*)&res);
|
||||
} while (assumed != old);
|
||||
}
|
||||
|
||||
@@ -115,7 +115,7 @@ __global__ void atomicArgMaxByKeySmem(
|
||||
const Node<node_id_t>* nodes, int nUniqKeys, node_id_t nodeStart, int len,
|
||||
const TrainParam param) {
|
||||
extern __shared__ char sArr[];
|
||||
Split* sNodeSplits = (Split*)sArr;
|
||||
Split* sNodeSplits = reinterpret_cast<Split*>(sArr);
|
||||
int tid = threadIdx.x;
|
||||
Split defVal;
|
||||
#pragma unroll 1
|
||||
@@ -176,7 +176,7 @@ void argMaxByKey(Split* nodeSplits, const bst_gpair* gradScans,
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("argMaxByKey: Bad algo passed!");
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace exact
|
||||
|
||||
@@ -143,7 +143,7 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
|
||||
// (potential race between threads)
|
||||
__shared__ char gradBuff[sizeof(bst_gpair)];
|
||||
__shared__ int s_mKeys;
|
||||
bst_gpair* s_mScans = (bst_gpair*)gradBuff;
|
||||
bst_gpair* s_mScans = reinterpret_cast<bst_gpair*>(gradBuff);
|
||||
if (tid >= size) return;
|
||||
// cache block-wide partial scan info
|
||||
if (relId == 0) {
|
||||
|
||||
@@ -16,14 +16,14 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "../../../../src/tree/param.h"
|
||||
#include "../common.cuh"
|
||||
#include <vector>
|
||||
#include "node.cuh"
|
||||
#include "split2node.cuh"
|
||||
#include "argmax_by_key.cuh"
|
||||
#include "fused_scan_reduce_by_key.cuh"
|
||||
#include "node.cuh"
|
||||
#include "split2node.cuh"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -36,8 +36,8 @@ __global__ void initRootNode(Node<node_id_t>* nodes, const bst_gpair* sums,
|
||||
// gradients already evaluated inside transferGrads
|
||||
Node<node_id_t> n;
|
||||
n.gradSum = sums[0];
|
||||
n.score = CalcGain(param, n.gradSum.grad , n.gradSum.hess);
|
||||
n.weight = CalcWeight(param, n.gradSum.grad , n.gradSum.hess);
|
||||
n.score = CalcGain(param, n.gradSum.grad, n.gradSum.hess);
|
||||
n.weight = CalcWeight(param, n.gradSum.grad, n.gradSum.hess);
|
||||
n.id = 0;
|
||||
nodes[0] = n;
|
||||
}
|
||||
@@ -173,7 +173,7 @@ class GPUBuilder {
|
||||
}
|
||||
// mark all the used nodes with unused children as leaf nodes
|
||||
markLeaves();
|
||||
dense2sparse(*hTree);
|
||||
dense2sparse(hTree);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -299,7 +299,8 @@ class GPUBuilder {
|
||||
vals.current_dvec() = fval;
|
||||
instIds.current_dvec() = fId;
|
||||
colOffsets = offset;
|
||||
segmentedSort<float, int>(tmp_mem, vals, instIds, nVals, nCols, colOffsets);
|
||||
segmentedSort<float, int>(&tmp_mem, &vals, &instIds, nVals, nCols,
|
||||
colOffsets);
|
||||
vals_cached = vals.current_dvec();
|
||||
instIds_cached = instIds.current_dvec();
|
||||
assignColIds<node_id_t><<<nCols, 512>>>(colIds.data(), colOffsets.data());
|
||||
@@ -347,8 +348,8 @@ class GPUBuilder {
|
||||
void sortKeys(int level) {
|
||||
// segmented-sort the arrays based on node-id's
|
||||
// but we don't need more than level+1 bits for sorting!
|
||||
segmentedSort(tmp_mem, nodeAssigns, nodeLocations, nVals, nCols, colOffsets,
|
||||
0, level + 1);
|
||||
segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols,
|
||||
colOffsets, 0, level + 1);
|
||||
gather<float, int>(dh::get_device_idx(param.gpu_id), vals.other(),
|
||||
vals.current(), instIds.other(), instIds.current(),
|
||||
nodeLocations.current(), nVals);
|
||||
@@ -362,7 +363,8 @@ class GPUBuilder {
|
||||
markLeavesKernel<<<nBlks, BlkDim>>>(nodes.data(), maxNodes);
|
||||
}
|
||||
|
||||
void dense2sparse(RegTree& tree) {
|
||||
void dense2sparse(RegTree* p_tree) {
|
||||
RegTree& tree = *p_tree;
|
||||
std::vector<Node<node_id_t>> hNodes = nodes.as_vector();
|
||||
int nodeId = 0;
|
||||
for (int i = 0; i < maxNodes; ++i) {
|
||||
|
||||
Reference in New Issue
Block a user