PySpark XGBoost integration (#8020)
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu> Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
@@ -10,7 +10,7 @@ RUN \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:ubuntu-toolchain-r/test && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 && \
|
||||
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
@@ -24,6 +24,7 @@ ENV CXX=g++-8
|
||||
ENV CPP=cpp-8
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Create new Conda environment
|
||||
COPY conda_env/cpu_test.yml /scripts/
|
||||
|
||||
@@ -10,7 +10,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python
|
||||
@@ -19,11 +19,14 @@ ENV PATH=/opt/python/bin:$PATH
|
||||
|
||||
# Create new Conda environment with cuDF, Dask, and cuPy
|
||||
RUN \
|
||||
conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
conda install -c conda-forge mamba && \
|
||||
mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.8 cudf=22.04* rmm=22.04* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda=22.04* dask-cudf=22.04* cupy \
|
||||
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis
|
||||
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
pyspark cloudpickle cuda-python=11.7.0
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
|
||||
@@ -28,6 +28,8 @@ dependencies:
|
||||
- llvmlite
|
||||
- cffi
|
||||
- pyarrow
|
||||
- pyspark
|
||||
- cloudpickle
|
||||
- pip:
|
||||
- shap
|
||||
- awscli
|
||||
|
||||
@@ -36,6 +36,8 @@ dependencies:
|
||||
- cffi
|
||||
- pyarrow
|
||||
- protobuf<=3.20
|
||||
- pyspark
|
||||
- cloudpickle
|
||||
- pip:
|
||||
- shap
|
||||
- ipython # required by shap at import time.
|
||||
|
||||
@@ -35,6 +35,8 @@ dependencies:
|
||||
- py-ubjson
|
||||
- cffi
|
||||
- pyarrow
|
||||
- pyspark
|
||||
- cloudpickle
|
||||
- pip:
|
||||
- sphinx_rtd_theme
|
||||
- datatable
|
||||
|
||||
@@ -34,6 +34,18 @@ function install_xgboost {
|
||||
fi
|
||||
}
|
||||
|
||||
function setup_pyspark_envs {
|
||||
export PYSPARK_DRIVER_PYTHON=`which python`
|
||||
export PYSPARK_PYTHON=`which python`
|
||||
export SPARK_TESTING=1
|
||||
}
|
||||
|
||||
function unset_pyspark_envs {
|
||||
unset PYSPARK_DRIVER_PYTHON
|
||||
unset PYSPARK_PYTHON
|
||||
unset SPARK_TESTING
|
||||
}
|
||||
|
||||
function uninstall_xgboost {
|
||||
pip uninstall -y xgboost
|
||||
}
|
||||
@@ -43,14 +55,18 @@ case "$suite" in
|
||||
gpu)
|
||||
source activate gpu_test
|
||||
install_xgboost
|
||||
setup_pyspark_envs
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 -m "not mgpu" ${args} tests/python-gpu
|
||||
unset_pyspark_envs
|
||||
uninstall_xgboost
|
||||
;;
|
||||
|
||||
mgpu)
|
||||
source activate gpu_test
|
||||
install_xgboost
|
||||
setup_pyspark_envs
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu
|
||||
unset_pyspark_envs
|
||||
|
||||
cd tests/distributed
|
||||
./runtests-gpu.sh
|
||||
@@ -61,7 +77,9 @@ case "$suite" in
|
||||
source activate cpu_test
|
||||
install_xgboost
|
||||
export RAY_OBJECT_STORE_ALLOW_SLOW_STORAGE=1
|
||||
setup_pyspark_envs
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python
|
||||
unset_pyspark_envs
|
||||
cd tests/distributed
|
||||
./runtests.sh
|
||||
uninstall_xgboost
|
||||
@@ -70,7 +88,9 @@ case "$suite" in
|
||||
cpu-arm64)
|
||||
source activate aarch64_test
|
||||
install_xgboost
|
||||
setup_pyspark_envs
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 ${args} tests/python/test_basic.py tests/python/test_basic_models.py tests/python/test_model_compatibility.py
|
||||
unset_pyspark_envs
|
||||
uninstall_xgboost
|
||||
;;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user