[GPU-Plugin] Integration of a faster version of grow_gpu plugin into mainstream (#2360)
* Integrating a faster version of grow_gpu plugin 1. Removed the older files to reduce duplication 2. Moved all of the grow_gpu files under 'exact' folder 3. All of them are inside 'exact' namespace to avoid any conflicts 4. Fixed a bug in benchmark.py while running only 'grow_gpu' plugin 5. Added cub and googletest submodules to ease integration and unit-testing 6. Updates to CMakeLists.txt to directly build cuda objects into libxgboost * Added support for building gpu plugins through make flow 1. updated makefile and config.mk to add right targets 2. added unit-tests for gpu exact plugin code * 1. Added support for building gpu plugin using 'make' flow as well 2. Updated instructions for building and testing gpu plugin * Fix travis-ci errors for PR#2360 1. lint errors on unit-tests 2. removed googletest, instead depended upon dmlc-core provide gtest cache * Some more fixes to travis-ci lint failures PR#2360 * Added Rory's copyrights to the files containing code from both. * updated copyright statement as per Rory's request * moved the static datasets into a script to generate them at runtime * 1. memory usage print when silent=0 2. tests/ and test/ folder organization 3. removal of the dependency of googletest for just building xgboost 4. coding style updates for .cuh as well * Fixes for compilation warnings * add cuda object files as well when JVM_BINDINGS=ON
This commit is contained in:
128
plugin/updater_gpu/test/cpp/argmax_by_key.cu
Normal file
128
plugin/updater_gpu/test/cpp/argmax_by_key.cu
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "gtest/gtest.h"
|
||||
#include "../../src/exact/argmax_by_key.cuh"
|
||||
#include "../../src/exact/gradients.cuh"
|
||||
#include "../../src/exact/node.cuh"
|
||||
#include "../../src/exact/loss_functions.cuh"
|
||||
#include "utils.cuh"
|
||||
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace exact {
|
||||
|
||||
TEST(ArgMaxByKey, maxSplit) {
|
||||
Split a, b, out;
|
||||
a.score = 2.f;
|
||||
a.index = 3;
|
||||
b.score = 3.f;
|
||||
b.index = 4;
|
||||
out = maxSplit(a, b);
|
||||
EXPECT_FLOAT_EQ(out.score, b.score);
|
||||
EXPECT_EQ(out.index, b.index);
|
||||
b.score = 2.f;
|
||||
b.index = 4;
|
||||
out = maxSplit(a, b);
|
||||
EXPECT_FLOAT_EQ(out.score, a.score);
|
||||
EXPECT_EQ(out.index, a.index);
|
||||
b.score = 2.f;
|
||||
b.index = 2;
|
||||
out = maxSplit(a, b);
|
||||
EXPECT_FLOAT_EQ(out.score, a.score);
|
||||
EXPECT_EQ(out.index, b.index);
|
||||
b.score = 1.f;
|
||||
b.index = 1;
|
||||
out = maxSplit(a, b);
|
||||
EXPECT_FLOAT_EQ(out.score, a.score);
|
||||
EXPECT_EQ(out.index, a.index);
|
||||
}
|
||||
|
||||
template <typename node_id_t>
|
||||
void argMaxTest(ArgMaxByKeyAlgo algo) {
|
||||
const int nVals = 1024;
|
||||
const int level = 0;
|
||||
const int nKeys = 1 << level;
|
||||
gpu_gpair* scans = new gpu_gpair[nVals];
|
||||
float* vals = new float[nVals];
|
||||
int* colIds = new int[nVals];
|
||||
scans[0] = gpu_gpair();
|
||||
vals[0] = 0.f;
|
||||
colIds[0] = 0;
|
||||
for (int i = 1; i < nVals; ++i) {
|
||||
scans[i].g = scans[i-1].g + (0.1f * 2.f);
|
||||
scans[i].h = scans[i-1].h + (0.1f * 2.f);
|
||||
vals[i] = static_cast<float>(i) * 0.1f;
|
||||
colIds[i] = 0;
|
||||
}
|
||||
float* dVals;
|
||||
allocateAndUpdateOnGpu<float>(dVals, vals, nVals);
|
||||
gpu_gpair* dScans;
|
||||
allocateAndUpdateOnGpu<gpu_gpair>(dScans, scans, nVals);
|
||||
gpu_gpair* sums = new gpu_gpair[nKeys];
|
||||
sums[0].g = sums[0].h = (0.1f * 2.f * nVals);
|
||||
gpu_gpair* dSums;
|
||||
allocateAndUpdateOnGpu<gpu_gpair>(dSums, sums, nKeys);
|
||||
int* dColIds;
|
||||
allocateAndUpdateOnGpu<int>(dColIds, colIds, nVals);
|
||||
Split* splits = new Split[nKeys];
|
||||
Split* dSplits;
|
||||
allocateOnGpu<Split>(dSplits, nKeys);
|
||||
node_id_t* nodeAssigns = new node_id_t[nVals];
|
||||
memset(nodeAssigns, 0, sizeof(node_id_t)*nVals);
|
||||
node_id_t* dNodeAssigns;
|
||||
allocateAndUpdateOnGpu<node_id_t>(dNodeAssigns, nodeAssigns, nVals);
|
||||
Node<node_id_t>* nodes = new Node<node_id_t>[nKeys];
|
||||
nodes[0].gradSum = sums[0];
|
||||
nodes[0].id = 0;
|
||||
TrainParam param;
|
||||
param.min_child_weight = 0.0f;
|
||||
param.reg_alpha = 0.f;
|
||||
param.reg_lambda = 2.f;
|
||||
param.max_delta_step = 0.f;
|
||||
nodes[0].score = CalcGain(param, sums[0].g, sums[0].h);
|
||||
Node<node_id_t>* dNodes;
|
||||
allocateAndUpdateOnGpu<Node<node_id_t> >(dNodes, nodes, nKeys);
|
||||
argMaxByKey<node_id_t>(dSplits, dScans, dSums, dVals, dColIds, dNodeAssigns,
|
||||
dNodes, nKeys, 0, nVals, param, algo);
|
||||
updateHostPtr<Split>(splits, dSplits, nKeys);
|
||||
EXPECT_FLOAT_EQ(0.f, splits->score);
|
||||
EXPECT_EQ(0, splits->index);
|
||||
dh::safe_cuda(cudaFree(dNodeAssigns));
|
||||
delete [] nodeAssigns;
|
||||
dh::safe_cuda(cudaFree(dSplits));
|
||||
delete [] splits;
|
||||
dh::safe_cuda(cudaFree(dColIds));
|
||||
delete [] colIds;
|
||||
dh::safe_cuda(cudaFree(dSums));
|
||||
delete [] sums;
|
||||
dh::safe_cuda(cudaFree(dVals));
|
||||
delete [] vals;
|
||||
dh::safe_cuda(cudaFree(dScans));
|
||||
delete [] scans;
|
||||
}
|
||||
|
||||
TEST(ArgMaxByKey, testOneColGmem) {
|
||||
argMaxTest<int16_t>(ABK_GMEM);
|
||||
}
|
||||
|
||||
TEST(ArgMaxByKey, testOneColSmem) {
|
||||
argMaxTest<int16_t>(ABK_SMEM);
|
||||
}
|
||||
|
||||
} // namespace exact
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user