Cmake improvements (#2487)
* Cmake improvements * Add google test to cmake
This commit is contained in:
@@ -13,11 +13,9 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "gtest/gtest.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include "../../src/exact/argmax_by_key.cuh"
|
||||
#include "../../src/exact/gradients.cuh"
|
||||
#include "../../src/exact/node.cuh"
|
||||
#include "../../src/exact/loss_functions.cuh"
|
||||
#include "utils.cuh"
|
||||
|
||||
|
||||
@@ -56,26 +54,26 @@ void argMaxTest(ArgMaxByKeyAlgo algo) {
|
||||
const int nVals = 1024;
|
||||
const int level = 0;
|
||||
const int nKeys = 1 << level;
|
||||
gpu_gpair* scans = new gpu_gpair[nVals];
|
||||
bst_gpair* scans = new bst_gpair[nVals];
|
||||
float* vals = new float[nVals];
|
||||
int* colIds = new int[nVals];
|
||||
scans[0] = gpu_gpair();
|
||||
scans[0] = bst_gpair();
|
||||
vals[0] = 0.f;
|
||||
colIds[0] = 0;
|
||||
for (int i = 1; i < nVals; ++i) {
|
||||
scans[i].g = scans[i-1].g + (0.1f * 2.f);
|
||||
scans[i].h = scans[i-1].h + (0.1f * 2.f);
|
||||
scans[i].grad = scans[i-1].grad + (0.1f * 2.f);
|
||||
scans[i].hess = scans[i-1].hess + (0.1f * 2.f);
|
||||
vals[i] = static_cast<float>(i) * 0.1f;
|
||||
colIds[i] = 0;
|
||||
}
|
||||
float* dVals;
|
||||
allocateAndUpdateOnGpu<float>(dVals, vals, nVals);
|
||||
gpu_gpair* dScans;
|
||||
allocateAndUpdateOnGpu<gpu_gpair>(dScans, scans, nVals);
|
||||
gpu_gpair* sums = new gpu_gpair[nKeys];
|
||||
sums[0].g = sums[0].h = (0.1f * 2.f * nVals);
|
||||
gpu_gpair* dSums;
|
||||
allocateAndUpdateOnGpu<gpu_gpair>(dSums, sums, nKeys);
|
||||
bst_gpair* dScans;
|
||||
allocateAndUpdateOnGpu<bst_gpair>(dScans, scans, nVals);
|
||||
bst_gpair* sums = new bst_gpair[nKeys];
|
||||
sums[0].grad = sums[0].hess = (0.1f * 2.f * nVals);
|
||||
bst_gpair* dSums;
|
||||
allocateAndUpdateOnGpu<bst_gpair>(dSums, sums, nKeys);
|
||||
int* dColIds;
|
||||
allocateAndUpdateOnGpu<int>(dColIds, colIds, nVals);
|
||||
Split* splits = new Split[nKeys];
|
||||
@@ -93,7 +91,7 @@ void argMaxTest(ArgMaxByKeyAlgo algo) {
|
||||
param.reg_alpha = 0.f;
|
||||
param.reg_lambda = 2.f;
|
||||
param.max_delta_step = 0.f;
|
||||
nodes[0].score = CalcGain(param, sums[0].g, sums[0].h);
|
||||
nodes[0].score = CalcGain(param, sums[0].grad, sums[0].hess);
|
||||
Node<node_id_t>* dNodes;
|
||||
allocateAndUpdateOnGpu<Node<node_id_t> >(dNodes, nodes, nKeys);
|
||||
argMaxByKey<node_id_t>(dSplits, dScans, dSums, dVals, dColIds, dNodeAssigns,
|
||||
|
||||
@@ -31,16 +31,16 @@ class ReduceScanByKey: public Generator<node_id_t> {
|
||||
hSums(nullptr), dSums(nullptr), hScans(nullptr), dScans(nullptr),
|
||||
outSize(this->size), nSegments(this->nKeys*this->nCols),
|
||||
hOffsets(nullptr), dOffsets(nullptr) {
|
||||
hSums = new gpu_gpair[nSegments];
|
||||
allocateOnGpu<gpu_gpair>(dSums, nSegments);
|
||||
hScans = new gpu_gpair[outSize];
|
||||
allocateOnGpu<gpu_gpair>(dScans, outSize);
|
||||
gpu_gpair* buckets = new gpu_gpair[nSegments];
|
||||
hSums = new bst_gpair[nSegments];
|
||||
allocateOnGpu<bst_gpair>(dSums, nSegments);
|
||||
hScans = new bst_gpair[outSize];
|
||||
allocateOnGpu<bst_gpair>(dScans, outSize);
|
||||
bst_gpair* buckets = new bst_gpair[nSegments];
|
||||
for (int i = 0; i < nSegments; i++) {
|
||||
buckets[i] = gpu_gpair();
|
||||
buckets[i] = bst_gpair();
|
||||
}
|
||||
for (int i = 0; i < nSegments; i++) {
|
||||
hSums[i] = gpu_gpair();
|
||||
hSums[i] = bst_gpair();
|
||||
}
|
||||
for (size_t i = 0; i < this->size; i++) {
|
||||
if (this->hKeys[i] >= 0 && this->hKeys[i] < nSegments) {
|
||||
@@ -77,10 +77,10 @@ class ReduceScanByKey: public Generator<node_id_t> {
|
||||
}
|
||||
|
||||
void run() {
|
||||
gpu_gpair* tmpScans;
|
||||
bst_gpair* tmpScans;
|
||||
int* tmpKeys;
|
||||
int tmpSize = scanTempBufferSize(this->size);
|
||||
allocateOnGpu<gpu_gpair>(tmpScans, tmpSize);
|
||||
allocateOnGpu<bst_gpair>(tmpScans, tmpSize);
|
||||
allocateOnGpu<int>(tmpKeys, tmpSize);
|
||||
TIMEIT(reduceScanByKey<node_id_t>
|
||||
(dSums, dScans, this->dVals, this->dInstIds, this->dKeys,
|
||||
@@ -94,10 +94,10 @@ class ReduceScanByKey: public Generator<node_id_t> {
|
||||
}
|
||||
|
||||
private:
|
||||
gpu_gpair* hSums;
|
||||
gpu_gpair* dSums;
|
||||
gpu_gpair* hScans;
|
||||
gpu_gpair* dScans;
|
||||
bst_gpair* hSums;
|
||||
bst_gpair* dSums;
|
||||
bst_gpair* hScans;
|
||||
bst_gpair* dScans;
|
||||
int outSize;
|
||||
int nSegments;
|
||||
int* hOffsets;
|
||||
|
||||
@@ -47,20 +47,20 @@ void testSmallData() {
|
||||
updateHostPtr<float>(tmpVal, builder.vals.current(), builder.nVals);
|
||||
int* tmpInst = new int[builder.nVals];
|
||||
updateHostPtr<int>(tmpInst, builder.instIds.current(), builder.nVals);
|
||||
gpu_gpair* tmpGrad = new gpu_gpair[builder.nRows];
|
||||
updateHostPtr<gpu_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
|
||||
bst_gpair* tmpGrad = new bst_gpair[builder.nRows];
|
||||
updateHostPtr<bst_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
|
||||
EXPECT_EQ(0, tmpInst[0]);
|
||||
EXPECT_FLOAT_EQ(1.f, tmpVal[0]);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).g);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).h);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).grad);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).hess);
|
||||
EXPECT_EQ(2, tmpInst[1]);
|
||||
EXPECT_FLOAT_EQ(1.f, tmpVal[1]);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).g);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).h);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).grad);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).hess);
|
||||
EXPECT_EQ(7, tmpInst[2]);
|
||||
EXPECT_FLOAT_EQ(1.f, tmpVal[2]);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).g);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).h);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).grad);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).hess);
|
||||
delete [] tmpGrad;
|
||||
delete [] tmpOff;
|
||||
delete [] tmpInst;
|
||||
@@ -106,22 +106,22 @@ void testLargeData() {
|
||||
updateHostPtr<float>(tmpVal, builder.vals.current(), builder.nVals);
|
||||
int* tmpInst = new int[builder.nVals];
|
||||
updateHostPtr<int>(tmpInst, builder.instIds.current(), builder.nVals);
|
||||
gpu_gpair* tmpGrad = new gpu_gpair[builder.nRows];
|
||||
updateHostPtr<gpu_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
|
||||
bst_gpair* tmpGrad = new bst_gpair[builder.nRows];
|
||||
updateHostPtr<bst_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
|
||||
// the order of observations is messed up before the convertToCsc call!
|
||||
// hence, the instance IDs have been manually checked and put here.
|
||||
EXPECT_EQ(1164, tmpInst[0]);
|
||||
EXPECT_FLOAT_EQ(1.f, tmpVal[0]);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).g);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).h);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).grad);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).hess);
|
||||
EXPECT_EQ(1435, tmpInst[1]);
|
||||
EXPECT_FLOAT_EQ(1.f, tmpVal[1]);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).g);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).h);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).grad);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).hess);
|
||||
EXPECT_EQ(1421, tmpInst[2]);
|
||||
EXPECT_FLOAT_EQ(1.f, tmpVal[2]);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).g);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).h);
|
||||
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).grad);
|
||||
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).hess);
|
||||
delete [] tmpGrad;
|
||||
delete [] tmpOff;
|
||||
delete [] tmpInst;
|
||||
@@ -164,17 +164,17 @@ void testAllocate() {
|
||||
EXPECT_FALSE(n[i].isUnused());
|
||||
}
|
||||
}
|
||||
gpu_gpair sum;
|
||||
sum.g = 0.f;
|
||||
sum.h = 0.f;
|
||||
bst_gpair sum;
|
||||
sum.grad = 0.f;
|
||||
sum.hess = 0.f;
|
||||
for (int i = 0; i < builder.maxNodes; ++i) {
|
||||
if (!n[i].isUnused()) {
|
||||
sum += n[i].gradSum;
|
||||
}
|
||||
}
|
||||
// law of conservation of gradients! :)
|
||||
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.g, sum.g);
|
||||
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.h, sum.h);
|
||||
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.grad, sum.grad);
|
||||
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.hess, sum.hess);
|
||||
node_id_t* assigns = new node_id_t[builder.nVals];
|
||||
int* offsets = new int[builder.nCols+1];
|
||||
updateHostPtr<node_id_t>(assigns, builder.nodeAssigns.current(),
|
||||
@@ -199,8 +199,8 @@ TEST(CudaGPUBuilderTest, AllocateNodeDataInt32) {
|
||||
template <typename node_id_t>
|
||||
void assign(Node<node_id_t> *n, float g, float h, float sc, float wt,
|
||||
DefaultDirection d, float th, int c, int i) {
|
||||
n->gradSum.g = g;
|
||||
n->gradSum.h = h;
|
||||
n->gradSum.grad = g;
|
||||
n->gradSum.hess = h;
|
||||
n->score = sc;
|
||||
n->weight = wt;
|
||||
n->dir = d;
|
||||
@@ -290,7 +290,7 @@ void testDense2Sparse() {
|
||||
updateDevicePtr<Node<node_id_t> >(builder.nodes.data(), hNodes, builder.maxNodes);
|
||||
builder.markLeaves();
|
||||
RegTree tree;
|
||||
builder.dense2sparse(tree);
|
||||
builder.dense2sparse(&tree);
|
||||
EXPECT_EQ(9, tree.param.num_nodes);
|
||||
delete [] hNodes;
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <random>
|
||||
#include "../../src/exact/gradients.cuh"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <xgboost/data.h>
|
||||
@@ -95,8 +94,8 @@ protected:
|
||||
int size;
|
||||
T* hKeys;
|
||||
T* dKeys;
|
||||
gpu_gpair* hVals;
|
||||
gpu_gpair* dVals;
|
||||
bst_gpair* hVals;
|
||||
bst_gpair* dVals;
|
||||
std::string testName;
|
||||
int* dColIds;
|
||||
int* hColIds;
|
||||
@@ -132,17 +131,17 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
void compare(gpu_gpair* exp, gpu_gpair* dAct, size_t len) {
|
||||
gpu_gpair* act = new gpu_gpair[len];
|
||||
updateHostPtr<gpu_gpair>(act, dAct, len);
|
||||
void compare(bst_gpair* exp, bst_gpair* dAct, size_t len) {
|
||||
bst_gpair* act = new bst_gpair[len];
|
||||
updateHostPtr<bst_gpair>(act, dAct, len);
|
||||
for (size_t i=0;i<len;++i) {
|
||||
bool isSmall;
|
||||
float ratioG = diffRatio(exp[i].g, act[i].g, isSmall);
|
||||
float ratioH = diffRatio(exp[i].h, act[i].h, isSmall);
|
||||
float ratioG = diffRatio(exp[i].grad, act[i].grad, isSmall);
|
||||
float ratioH = diffRatio(exp[i].hess, act[i].hess, isSmall);
|
||||
float thresh = isSmall? SuperSmallThresh : Thresh;
|
||||
if ((ratioG >= Thresh) || (ratioH >= Thresh)) {
|
||||
printf("(exp) %f %f -> (act) %f %f : rG=%f rH=%f th=%f @%lu\n",
|
||||
exp[i].g, exp[i].h, act[i].g, act[i].h, ratioG, ratioH,
|
||||
exp[i].grad, exp[i].hess, act[i].grad, act[i].hess, ratioG, ratioH,
|
||||
thresh, i);
|
||||
}
|
||||
ASSERT_TRUE(ratioG < thresh);
|
||||
@@ -168,12 +167,12 @@ protected:
|
||||
}
|
||||
|
||||
void generateVals() {
|
||||
hVals = new gpu_gpair[size];
|
||||
hVals = new bst_gpair[size];
|
||||
for (size_t i=0;i<size;++i) {
|
||||
hVals[i].g = randVal(-1.f, 1.f);
|
||||
hVals[i].h = randVal(-1.f, 1.f);
|
||||
hVals[i].grad = randVal(-1.f, 1.f);
|
||||
hVals[i].hess = randVal(-1.f, 1.f);
|
||||
}
|
||||
allocateAndUpdateOnGpu<gpu_gpair>(dVals, hVals, size);
|
||||
allocateAndUpdateOnGpu<bst_gpair>(dVals, hVals, size);
|
||||
}
|
||||
|
||||
void sortKeyValues() {
|
||||
@@ -186,7 +185,7 @@ protected:
|
||||
dh::safe_cuda(cub::DeviceRadixSort::SortPairs(tmpStorage, tmpSize, dKeys,
|
||||
dKeys, dVals, dVals, size));
|
||||
dh::safe_cuda(cudaFree(storage));
|
||||
updateHostPtr<gpu_gpair>(hVals, dVals, size);
|
||||
updateHostPtr<bst_gpair>(hVals, dVals, size);
|
||||
updateHostPtr<T>(hKeys, dKeys, size);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user