GPU implementation of AFT survival objective and metric (#5714)

* Add interval accuracy

* De-virtualize AFT functions

* Lint

* Refactor AFT metric using GPU-CPU reducer

* Fix R build

* Fix build on Windows

* Fix copyright header

* Clang-tidy

* Fix crashing demo

* Fix typos in comment; explain GPU ID

* Remove unnecessary #include

* Add C++ test for interval accuracy

* Fix a bug in accuracy metric: use log pred

* Refactor AFT objective using GPU-CPU Transform

* Lint

* Fix lint

* Use Ninja to speed up build

* Use time, not /usr/bin/time

* Add cpu_build worker class, with concurrency = 1

* Use concurrency = 1 only for CUDA build

* concurrency = 1 for clang-tidy

* Address reviewer's feedback

* Update link to AFT paper
This commit is contained in:
Philip Hyunsu Cho
2020-07-17 01:18:13 -07:00
committed by GitHub
parent 7c2686146e
commit 71b0528a2f
20 changed files with 1050 additions and 822 deletions

View File

@@ -7,7 +7,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
# Install all basic requirements
RUN \
apt-get update && \
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev && \
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev ninja-build && \
# CMake
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \

View File

@@ -21,7 +21,14 @@ RUN \
bash Miniconda3.sh -b -p /opt/python && \
# CMake
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Ninja
mkdir -p /usr/local && \
cd /usr/local/ && \
wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
cd ninja && \
python ./configure.py --bootstrap
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \
@@ -33,7 +40,7 @@ RUN \
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
ENV PATH=/opt/python/bin:$PATH
ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp

View File

@@ -4,7 +4,7 @@ set -e
rm -rf build
mkdir build
cd build
cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
make clean
make -j$(nproc)
cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON -GNinja
ninja clean
time ninja -v
cd ..