[GPU-Plugin] Resolve double compilation issue (#2479)

This commit is contained in:
Rory Mitchell
2017-07-03 13:29:10 +12:00
committed by GitHub
parent 5f1b0bb386
commit ed8bc4521e
11 changed files with 161 additions and 138 deletions

View File

@@ -60,7 +60,7 @@ DEV_INLINE void atomicArgMax(Split* address, Split val) {
do {
assumed = old;
Split res = maxSplit(val, *(Split*)&assumed);
old = atomicCAS(intAddress, assumed, *(unsigned long long*)&res);
old = atomicCAS(intAddress, assumed, *(uint64_t*)&res);
} while (assumed != old);
}
@@ -115,7 +115,7 @@ __global__ void atomicArgMaxByKeySmem(
const Node<node_id_t>* nodes, int nUniqKeys, node_id_t nodeStart, int len,
const TrainParam param) {
extern __shared__ char sArr[];
Split* sNodeSplits = (Split*)sArr;
Split* sNodeSplits = reinterpret_cast<Split*>(sArr);
int tid = threadIdx.x;
Split defVal;
#pragma unroll 1
@@ -176,7 +176,7 @@ void argMaxByKey(Split* nodeSplits, const bst_gpair* gradScans,
break;
default:
throw std::runtime_error("argMaxByKey: Bad algo passed!");
};
}
}
} // namespace exact

View File

@@ -143,7 +143,7 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
// (potential race between threads)
__shared__ char gradBuff[sizeof(bst_gpair)];
__shared__ int s_mKeys;
bst_gpair* s_mScans = (bst_gpair*)gradBuff;
bst_gpair* s_mScans = reinterpret_cast<bst_gpair*>(gradBuff);
if (tid >= size) return;
// cache block-wide partial scan info
if (relId == 0) {

View File

@@ -16,14 +16,14 @@
*/
#pragma once
#include <string>
#include <vector>
#include "../../../../src/tree/param.h"
#include "../common.cuh"
#include <vector>
#include "node.cuh"
#include "split2node.cuh"
#include "argmax_by_key.cuh"
#include "fused_scan_reduce_by_key.cuh"
#include "node.cuh"
#include "split2node.cuh"
#include "xgboost/tree_updater.h"
namespace xgboost {
@@ -36,8 +36,8 @@ __global__ void initRootNode(Node<node_id_t>* nodes, const bst_gpair* sums,
// gradients already evaluated inside transferGrads
Node<node_id_t> n;
n.gradSum = sums[0];
n.score = CalcGain(param, n.gradSum.grad , n.gradSum.hess);
n.weight = CalcWeight(param, n.gradSum.grad , n.gradSum.hess);
n.score = CalcGain(param, n.gradSum.grad, n.gradSum.hess);
n.weight = CalcWeight(param, n.gradSum.grad, n.gradSum.hess);
n.id = 0;
nodes[0] = n;
}
@@ -173,7 +173,7 @@ class GPUBuilder {
}
// mark all the used nodes with unused children as leaf nodes
markLeaves();
dense2sparse(*hTree);
dense2sparse(hTree);
}
private:
@@ -299,7 +299,8 @@ class GPUBuilder {
vals.current_dvec() = fval;
instIds.current_dvec() = fId;
colOffsets = offset;
segmentedSort<float, int>(tmp_mem, vals, instIds, nVals, nCols, colOffsets);
segmentedSort<float, int>(&tmp_mem, &vals, &instIds, nVals, nCols,
colOffsets);
vals_cached = vals.current_dvec();
instIds_cached = instIds.current_dvec();
assignColIds<node_id_t><<<nCols, 512>>>(colIds.data(), colOffsets.data());
@@ -347,8 +348,8 @@ class GPUBuilder {
void sortKeys(int level) {
// segmented-sort the arrays based on node-id's
// but we don't need more than level+1 bits for sorting!
segmentedSort(tmp_mem, nodeAssigns, nodeLocations, nVals, nCols, colOffsets,
0, level + 1);
segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols,
colOffsets, 0, level + 1);
gather<float, int>(dh::get_device_idx(param.gpu_id), vals.other(),
vals.current(), instIds.other(), instIds.current(),
nodeLocations.current(), nVals);
@@ -362,7 +363,8 @@ class GPUBuilder {
markLeavesKernel<<<nBlks, BlkDim>>>(nodes.data(), maxNodes);
}
void dense2sparse(RegTree& tree) {
void dense2sparse(RegTree* p_tree) {
RegTree& tree = *p_tree;
std::vector<Node<node_id_t>> hNodes = nodes.as_vector();
int nodeId = 0;
for (int i = 0; i < maxNodes; ++i) {