Cmake improvements (#2487)

* Cmake improvements
* Add google test to cmake
This commit is contained in:
Rory Mitchell
2017-07-06 18:05:11 +12:00
committed by GitHub
parent 8ceeb32bad
commit e939192978
13 changed files with 291 additions and 257 deletions

View File

@@ -13,11 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "gtest/gtest.h"
#include <gtest/gtest.h>
#include "../../src/exact/argmax_by_key.cuh"
#include "../../src/exact/gradients.cuh"
#include "../../src/exact/node.cuh"
#include "../../src/exact/loss_functions.cuh"
#include "utils.cuh"
@@ -56,26 +54,26 @@ void argMaxTest(ArgMaxByKeyAlgo algo) {
const int nVals = 1024;
const int level = 0;
const int nKeys = 1 << level;
gpu_gpair* scans = new gpu_gpair[nVals];
bst_gpair* scans = new bst_gpair[nVals];
float* vals = new float[nVals];
int* colIds = new int[nVals];
scans[0] = gpu_gpair();
scans[0] = bst_gpair();
vals[0] = 0.f;
colIds[0] = 0;
for (int i = 1; i < nVals; ++i) {
scans[i].g = scans[i-1].g + (0.1f * 2.f);
scans[i].h = scans[i-1].h + (0.1f * 2.f);
scans[i].grad = scans[i-1].grad + (0.1f * 2.f);
scans[i].hess = scans[i-1].hess + (0.1f * 2.f);
vals[i] = static_cast<float>(i) * 0.1f;
colIds[i] = 0;
}
float* dVals;
allocateAndUpdateOnGpu<float>(dVals, vals, nVals);
gpu_gpair* dScans;
allocateAndUpdateOnGpu<gpu_gpair>(dScans, scans, nVals);
gpu_gpair* sums = new gpu_gpair[nKeys];
sums[0].g = sums[0].h = (0.1f * 2.f * nVals);
gpu_gpair* dSums;
allocateAndUpdateOnGpu<gpu_gpair>(dSums, sums, nKeys);
bst_gpair* dScans;
allocateAndUpdateOnGpu<bst_gpair>(dScans, scans, nVals);
bst_gpair* sums = new bst_gpair[nKeys];
sums[0].grad = sums[0].hess = (0.1f * 2.f * nVals);
bst_gpair* dSums;
allocateAndUpdateOnGpu<bst_gpair>(dSums, sums, nKeys);
int* dColIds;
allocateAndUpdateOnGpu<int>(dColIds, colIds, nVals);
Split* splits = new Split[nKeys];
@@ -93,7 +91,7 @@ void argMaxTest(ArgMaxByKeyAlgo algo) {
param.reg_alpha = 0.f;
param.reg_lambda = 2.f;
param.max_delta_step = 0.f;
nodes[0].score = CalcGain(param, sums[0].g, sums[0].h);
nodes[0].score = CalcGain(param, sums[0].grad, sums[0].hess);
Node<node_id_t>* dNodes;
allocateAndUpdateOnGpu<Node<node_id_t> >(dNodes, nodes, nKeys);
argMaxByKey<node_id_t>(dSplits, dScans, dSums, dVals, dColIds, dNodeAssigns,

View File

@@ -31,16 +31,16 @@ class ReduceScanByKey: public Generator<node_id_t> {
hSums(nullptr), dSums(nullptr), hScans(nullptr), dScans(nullptr),
outSize(this->size), nSegments(this->nKeys*this->nCols),
hOffsets(nullptr), dOffsets(nullptr) {
hSums = new gpu_gpair[nSegments];
allocateOnGpu<gpu_gpair>(dSums, nSegments);
hScans = new gpu_gpair[outSize];
allocateOnGpu<gpu_gpair>(dScans, outSize);
gpu_gpair* buckets = new gpu_gpair[nSegments];
hSums = new bst_gpair[nSegments];
allocateOnGpu<bst_gpair>(dSums, nSegments);
hScans = new bst_gpair[outSize];
allocateOnGpu<bst_gpair>(dScans, outSize);
bst_gpair* buckets = new bst_gpair[nSegments];
for (int i = 0; i < nSegments; i++) {
buckets[i] = gpu_gpair();
buckets[i] = bst_gpair();
}
for (int i = 0; i < nSegments; i++) {
hSums[i] = gpu_gpair();
hSums[i] = bst_gpair();
}
for (size_t i = 0; i < this->size; i++) {
if (this->hKeys[i] >= 0 && this->hKeys[i] < nSegments) {
@@ -77,10 +77,10 @@ class ReduceScanByKey: public Generator<node_id_t> {
}
void run() {
gpu_gpair* tmpScans;
bst_gpair* tmpScans;
int* tmpKeys;
int tmpSize = scanTempBufferSize(this->size);
allocateOnGpu<gpu_gpair>(tmpScans, tmpSize);
allocateOnGpu<bst_gpair>(tmpScans, tmpSize);
allocateOnGpu<int>(tmpKeys, tmpSize);
TIMEIT(reduceScanByKey<node_id_t>
(dSums, dScans, this->dVals, this->dInstIds, this->dKeys,
@@ -94,10 +94,10 @@ class ReduceScanByKey: public Generator<node_id_t> {
}
private:
gpu_gpair* hSums;
gpu_gpair* dSums;
gpu_gpair* hScans;
gpu_gpair* dScans;
bst_gpair* hSums;
bst_gpair* dSums;
bst_gpair* hScans;
bst_gpair* dScans;
int outSize;
int nSegments;
int* hOffsets;

View File

@@ -47,20 +47,20 @@ void testSmallData() {
updateHostPtr<float>(tmpVal, builder.vals.current(), builder.nVals);
int* tmpInst = new int[builder.nVals];
updateHostPtr<int>(tmpInst, builder.instIds.current(), builder.nVals);
gpu_gpair* tmpGrad = new gpu_gpair[builder.nRows];
updateHostPtr<gpu_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
bst_gpair* tmpGrad = new bst_gpair[builder.nRows];
updateHostPtr<bst_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
EXPECT_EQ(0, tmpInst[0]);
EXPECT_FLOAT_EQ(1.f, tmpVal[0]);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).g);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).h);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).grad);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).hess);
EXPECT_EQ(2, tmpInst[1]);
EXPECT_FLOAT_EQ(1.f, tmpVal[1]);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).g);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).h);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).grad);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).hess);
EXPECT_EQ(7, tmpInst[2]);
EXPECT_FLOAT_EQ(1.f, tmpVal[2]);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).g);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).h);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).grad);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).hess);
delete [] tmpGrad;
delete [] tmpOff;
delete [] tmpInst;
@@ -106,22 +106,22 @@ void testLargeData() {
updateHostPtr<float>(tmpVal, builder.vals.current(), builder.nVals);
int* tmpInst = new int[builder.nVals];
updateHostPtr<int>(tmpInst, builder.instIds.current(), builder.nVals);
gpu_gpair* tmpGrad = new gpu_gpair[builder.nRows];
updateHostPtr<gpu_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
bst_gpair* tmpGrad = new bst_gpair[builder.nRows];
updateHostPtr<bst_gpair>(tmpGrad, builder.gradsInst.data(), builder.nRows);
// the order of observations is messed up before the convertToCsc call!
// hence, the instance IDs have been manually checked and put here.
EXPECT_EQ(1164, tmpInst[0]);
EXPECT_FLOAT_EQ(1.f, tmpVal[0]);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).g);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).h);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).grad);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[0]%10), get(0, tmpGrad, tmpInst).hess);
EXPECT_EQ(1435, tmpInst[1]);
EXPECT_FLOAT_EQ(1.f, tmpVal[1]);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).g);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).h);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).grad);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[1]%10), get(1, tmpGrad, tmpInst).hess);
EXPECT_EQ(1421, tmpInst[2]);
EXPECT_FLOAT_EQ(1.f, tmpVal[2]);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).g);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).h);
EXPECT_FLOAT_EQ(1.f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).grad);
EXPECT_FLOAT_EQ(.5f+(float)(tmpInst[2]%10), get(2, tmpGrad, tmpInst).hess);
delete [] tmpGrad;
delete [] tmpOff;
delete [] tmpInst;
@@ -164,17 +164,17 @@ void testAllocate() {
EXPECT_FALSE(n[i].isUnused());
}
}
gpu_gpair sum;
sum.g = 0.f;
sum.h = 0.f;
bst_gpair sum;
sum.grad = 0.f;
sum.hess = 0.f;
for (int i = 0; i < builder.maxNodes; ++i) {
if (!n[i].isUnused()) {
sum += n[i].gradSum;
}
}
// law of conservation of gradients! :)
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.g, sum.g);
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.h, sum.h);
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.grad, sum.grad);
EXPECT_FLOAT_EQ(2.f*n[0].gradSum.hess, sum.hess);
node_id_t* assigns = new node_id_t[builder.nVals];
int* offsets = new int[builder.nCols+1];
updateHostPtr<node_id_t>(assigns, builder.nodeAssigns.current(),
@@ -199,8 +199,8 @@ TEST(CudaGPUBuilderTest, AllocateNodeDataInt32) {
template <typename node_id_t>
void assign(Node<node_id_t> *n, float g, float h, float sc, float wt,
DefaultDirection d, float th, int c, int i) {
n->gradSum.g = g;
n->gradSum.h = h;
n->gradSum.grad = g;
n->gradSum.hess = h;
n->score = sc;
n->weight = wt;
n->dir = d;
@@ -290,7 +290,7 @@ void testDense2Sparse() {
updateDevicePtr<Node<node_id_t> >(builder.nodes.data(), hNodes, builder.maxNodes);
builder.markLeaves();
RegTree tree;
builder.dense2sparse(tree);
builder.dense2sparse(&tree);
EXPECT_EQ(9, tree.param.num_nodes);
delete [] hNodes;
}

View File

@@ -16,7 +16,6 @@
#pragma once
#include <random>
#include "../../src/exact/gradients.cuh"
#include <memory>
#include <string>
#include <xgboost/data.h>
@@ -95,8 +94,8 @@ protected:
int size;
T* hKeys;
T* dKeys;
gpu_gpair* hVals;
gpu_gpair* dVals;
bst_gpair* hVals;
bst_gpair* dVals;
std::string testName;
int* dColIds;
int* hColIds;
@@ -132,17 +131,17 @@ protected:
}
}
void compare(gpu_gpair* exp, gpu_gpair* dAct, size_t len) {
gpu_gpair* act = new gpu_gpair[len];
updateHostPtr<gpu_gpair>(act, dAct, len);
void compare(bst_gpair* exp, bst_gpair* dAct, size_t len) {
bst_gpair* act = new bst_gpair[len];
updateHostPtr<bst_gpair>(act, dAct, len);
for (size_t i=0;i<len;++i) {
bool isSmall;
float ratioG = diffRatio(exp[i].g, act[i].g, isSmall);
float ratioH = diffRatio(exp[i].h, act[i].h, isSmall);
float ratioG = diffRatio(exp[i].grad, act[i].grad, isSmall);
float ratioH = diffRatio(exp[i].hess, act[i].hess, isSmall);
float thresh = isSmall? SuperSmallThresh : Thresh;
if ((ratioG >= Thresh) || (ratioH >= Thresh)) {
printf("(exp) %f %f -> (act) %f %f : rG=%f rH=%f th=%f @%lu\n",
exp[i].g, exp[i].h, act[i].g, act[i].h, ratioG, ratioH,
exp[i].grad, exp[i].hess, act[i].grad, act[i].hess, ratioG, ratioH,
thresh, i);
}
ASSERT_TRUE(ratioG < thresh);
@@ -168,12 +167,12 @@ protected:
}
void generateVals() {
hVals = new gpu_gpair[size];
hVals = new bst_gpair[size];
for (size_t i=0;i<size;++i) {
hVals[i].g = randVal(-1.f, 1.f);
hVals[i].h = randVal(-1.f, 1.f);
hVals[i].grad = randVal(-1.f, 1.f);
hVals[i].hess = randVal(-1.f, 1.f);
}
allocateAndUpdateOnGpu<gpu_gpair>(dVals, hVals, size);
allocateAndUpdateOnGpu<bst_gpair>(dVals, hVals, size);
}
void sortKeyValues() {
@@ -186,7 +185,7 @@ protected:
dh::safe_cuda(cub::DeviceRadixSort::SortPairs(tmpStorage, tmpSize, dKeys,
dKeys, dVals, dVals, size));
dh::safe_cuda(cudaFree(storage));
updateHostPtr<gpu_gpair>(hVals, dVals, size);
updateHostPtr<bst_gpair>(hVals, dVals, size);
updateHostPtr<T>(hKeys, dKeys, size);
}