[GPU-Plugin] Integration of a faster version of grow_gpu plugin into mainstream (#2360)

* Integrating a faster version of grow_gpu plugin 1. Removed the older files to reduce duplication 2. Moved all of the grow_gpu files under 'exact' folder 3. All of them are inside 'exact' namespace to avoid any conflicts 4. Fixed a bug in benchmark.py while running only 'grow_gpu' plugin 5. Added cub and googletest submodules to ease integration and unit-testing 6. Updates to CMakeLists.txt to directly build cuda objects into libxgboost * Added support for building gpu plugins through make flow 1. updated makefile and config.mk to add right targets 2. added unit-tests for gpu exact plugin code * 1. Added support for building gpu plugin using 'make' flow as well 2. Updated instructions for building and testing gpu plugin * Fix travis-ci errors for PR#2360 1. lint errors on unit-tests 2. removed googletest, instead depended upon dmlc-core provide gtest cache * Some more fixes to travis-ci lint failures PR#2360 * Added Rory's copyrights to the files containing code from both. * updated copyright statement as per Rory's request * moved the static datasets into a script to generate them at runtime * 1. memory usage print when silent=0 2. tests/ and test/ folder organization 3. removal of the dependency of googletest for just building xgboost 4. coding style updates for .cuh as well * Fixes for compilation warnings * add cuda object files as well when JVM_BINDINGS=ON
2017-06-06 03:09:53 +05:30
parent 2d9052bc7d
commit 85b2fb3eee
37 changed files with 4118 additions and 1601 deletions
--- a/34
+++ b/34
@@ -53,7 +53,7 @@ endif

 export LDFLAGS= -pthread -lm $(ADD_LDFLAGS) $(DMLC_LDFLAGS) $(PLUGIN_LDFLAGS)
 export CFLAGS=  -std=c++11 -Wall -Wno-unknown-pragmas -Iinclude $(ADD_CFLAGS) $(PLUGIN_CFLAGS)
-CFLAGS += -I$(DMLC_CORE)/include -I$(RABIT)/include
+CFLAGS += -I$(DMLC_CORE)/include -I$(RABIT)/include -I$(GTEST_PATH)/include
 #java include path
 export JAVAINCFLAGS = -I${JAVA_HOME}/include -I./java

@@ -84,12 +84,28 @@ ifeq ($(UNAME), Darwin)
 	JAVAINCFLAGS += -I${JAVA_HOME}/include/darwin
 endif

+OPENMP_FLAGS =
 ifeq ($(USE_OPENMP), 1)
-	CFLAGS += -fopenmp
+	OPENMP_FLAGS = -fopenmp
 else
-	CFLAGS += -DDISABLE_OPENMP
+	OPENMP_FLAGS = -DDISABLE_OPENMP
 endif
+CFLAGS += $(OPENMP_FLAGS)

+# for using GPUs
+COMPUTE ?= 60 35
+NVCC = nvcc
+INCLUDES = -Iinclude -I$(DMLC_CORE)/include -I$(RABIT)/include
+INCLUDES += -I$(CUB_PATH)
+INCLUDES += -I$(GTEST_PATH)/include
+CODE = $(foreach ver,$(COMPUTE),-gencode arch=compute_$(ver),code=sm_$(ver))
+NVCC_FLAGS = --std=c++11 $(CODE) $(INCLUDES) -lineinfo --expt-extended-lambda
+NVCC_FLAGS += -Xcompiler=$(OPENMP_FLAGS) -Xcompiler=-fPIC
+ifeq ($(PLUGIN_UPDATER_GPU),ON)
+  CUDA_ROOT = $(shell dirname $(shell dirname $(shell which $(NVCC))))
+  INCLUDES += -I$(CUDA_ROOT)/include
+  LDFLAGS += -L$(CUDA_ROOT)/lib64 -lcudart
+endif

 # specify tensor path
 .PHONY: clean all lint clean_all doxygen rcpplint pypack Rpack Rbuild Rcheck java pylint
@@ -113,11 +129,21 @@ ALL_DEP = $(filter-out build/cli_main.o, $(ALL_OBJ)) $(LIB_DEP)
 CLI_OBJ = build/cli_main.o
 include tests/cpp/xgboost_test.mk

+# order of this rule matters wrt %.cc rule below!
+build/%.o: src/%.cu
+	@mkdir -p $(@D)
+	$(NVCC) -c $(NVCC_FLAGS) $< -o $@
+
 build/%.o: src/%.cc
 	@mkdir -p $(@D)
 	$(CXX) $(CFLAGS) -MM -MT build/$*.o $< >build/$*.d
 	$(CXX) -c $(CFLAGS) $< -o $@

+# order of this rule matters wrt %.cc rule below!
+build_plugin/%.o: plugin/%.cu
+	@mkdir -p $(@D)
+	$(NVCC) -c $(NVCC_FLAGS) $< -o $@
+
 build_plugin/%.o: plugin/%.cc
 	@mkdir -p $(@D)
 	$(CXX) $(CFLAGS) -MM -MT build_plugin/$*.o $< >build_plugin/$*.d
@@ -158,6 +184,8 @@ pylint:
 	flake8 --ignore E501 tests/python

 test: $(ALL_TEST)
+	./plugin/updater_gpu/test/cpp/generate_data.sh
+	$(ALL_TEST)

 check: test
 	./tests/cpp/xgboost_test