GPU implementation of AFT survival objective and metric (#5714)

* Add interval accuracy * De-virtualize AFT functions * Lint * Refactor AFT metric using GPU-CPU reducer * Fix R build * Fix build on Windows * Fix copyright header * Clang-tidy * Fix crashing demo * Fix typos in comment; explain GPU ID * Remove unnecessary #include * Add C++ test for interval accuracy * Fix a bug in accuracy metric: use log pred * Refactor AFT objective using GPU-CPU Transform * Lint * Fix lint * Use Ninja to speed up build * Use time, not /usr/bin/time * Add cpu_build worker class, with concurrency = 1 * Use concurrency = 1 only for CUDA build * concurrency = 1 for clang-tidy * Address reviewer's feedback * Update link to AFT paper
2020-07-17 01:18:13 -07:00
parent 7c2686146e
commit 71b0528a2f
20 changed files with 1050 additions and 822 deletions
--- a/tests/ci_build/Dockerfile.cpu
+++ b/tests/ci_build/Dockerfile.cpu
@@ -7,7 +7,7 @@ SHELL ["/bin/bash", "-c"]   # Use Bash as shell
 # Install all basic requirements
 RUN \
    apt-get update && \
-    apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev && \
+    apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev ninja-build && \
    # CMake
    wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
    bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
--- a/tests/ci_build/Dockerfile.gpu_build
+++ b/tests/ci_build/Dockerfile.gpu_build
@@ -21,7 +21,14 @@ RUN \
    bash Miniconda3.sh -b -p /opt/python && \
    # CMake
    wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
-    bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr
+    bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
+    # Ninja
+    mkdir -p /usr/local && \
+    cd /usr/local/ && \
+    wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
+    tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
+    cd ninja && \
+    python ./configure.py --bootstrap

 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
@@ -33,7 +40,7 @@ RUN \
    yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
    rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;

-ENV PATH=/opt/python/bin:$PATH
+ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH
 ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
 ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
 ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
--- a/tests/ci_build/build_via_cmake.sh
+++ b/tests/ci_build/build_via_cmake.sh
@@ -4,7 +4,7 @@ set -e
 rm -rf build
 mkdir build
 cd build
-cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
-make clean
-make -j$(nproc)
+cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON -GNinja
+ninja clean
+time ninja -v
 cd ..