sync Jun 1

This commit is contained in:
Your Name
2023-06-01 15:55:06 -07:00
76 changed files with 1424 additions and 595 deletions

View File

@@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu22.04
ARG CUDA_VERSION_ARG
# Environment
@@ -7,22 +7,21 @@ ENV DEBIAN_FRONTEND noninteractive
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \
apt-get install -y wget git python3 python3-pip software-properties-common \
apt-transport-https ca-certificates gnupg-agent && \
wget -nv -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
add-apt-repository -u 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main' && \
apt-get update && \
apt-get install -y llvm-15 clang-tidy-15 clang-15 libomp-15-dev && \
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
apt-get install -y cmake
# Set default clang-tidy version
RUN \
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-15 100 && \
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100
RUN \
apt-get install libgtest-dev libgmock-dev -y
# Install Python packages
RUN \
pip3 install pyyaml

View File

@@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu18.04
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
@@ -9,7 +9,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
# Python
@@ -25,7 +25,7 @@ RUN \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
pyspark cloudpickle cuda-python && \
pyspark>=3.4.0 cloudpickle cuda-python && \
mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector

View File

@@ -1,53 +0,0 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
ARG CUDA_VERSION_ARG
ARG JDK_VERSION=8
ARG SPARK_VERSION=3.0.0
# Environment
ENV DEBIAN_FRONTEND noninteractive
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:openjdk-r/ppa && \
apt-get update && \
apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/pip install awscli && \
# Maven
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
# Spark
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
# Install Python packages
RUN \
pip install numpy scipy pandas scikit-learn
ENV GOSU_VERSION 1.10
# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true
# Set default JDK version
RUN update-java-alternatives -v -s java-1.$JDK_VERSION.0-openjdk-amd64
# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]

View File

@@ -20,10 +20,14 @@ RUN \
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
# Spark
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
# Spark with scala 2.12
mkdir -p /opt/spark-scala-2.12 && \
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \
# Spark with scala 2.13
mkdir -p /opt/spark-scala-2.13 && \
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH

View File

@@ -6,6 +6,7 @@ set -x
spark_version=$1
use_cuda=$2
gpu_arch=$3
use_scala213=$4
gpu_options=""
if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
@@ -22,7 +23,13 @@ export RABIT_MOCK=ON
if [ "x$gpu_arch" != "x" ]; then
export GPU_ARCH_FLAG=$gpu_arch
fi
mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options
mvn_profile_string=""
if [ "x$use_scala213" != "x" ]; then
export mvn_profile_string="-Pdefault,scala-2.13"
fi
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
set +x
set +e

View File

@@ -28,7 +28,7 @@ dependencies:
- llvmlite
- cffi
- pyarrow
- pyspark
- pyspark>=3.4.0
- cloudpickle
- pip:
- awscli

View File

@@ -38,8 +38,6 @@ dependencies:
- protobuf
- cloudpickle
- modin
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
- pyspark>=3.3.1
- pyspark>=3.4.0
- pip:
- datatable

View File

@@ -35,7 +35,7 @@ dependencies:
- py-ubjson
- cffi
- pyarrow
- pyspark
- pyspark>=3.4.0
- cloudpickle
- pip:
- sphinx_rtd_theme

View File

@@ -19,6 +19,4 @@ dependencies:
- pytest
- hypothesis
- hatchling
- pip:
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
- https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
- pyspark>=3.4.0

View File

@@ -6,37 +6,56 @@ set -x
# Initialize local Maven repository
./tests/ci_build/initialize_maven.sh
# Get version number of XGBoost4J and other auxiliary information
cd jvm-packages
jvm_packages_dir=`pwd`
# Get version number of XGBoost4J and other auxiliary information
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout)
maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout)
spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout)
scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout)
scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout)
# Install XGBoost4J JAR into local Maven repository
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
cd xgboost4j-tester
# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
# Run unit tests with XGBoost4J
mvn --no-transfer-progress package
# Run integration tests with XGBoost4J
java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar
# Run integration tests with XGBoost4J-Spark
if [ ! -z "$RUN_INTEGRATION_TEST" ]
then
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
cd $jvm_packages_dir/xgboost4j-tester
python3 get_iris.py
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model
cd $jvm_packages_dir
fi
# including maven profiles for different scala versions: 2.12 is the default at the moment.
for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
# Install XGBoost4J JAR into local Maven repository
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
cd xgboost4j-tester
# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
# Build package and unit tests with XGBoost4J
mvn --no-transfer-progress clean package
xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar"
# Run integration tests with XGBoost4J
java -jar $xgboost4j_tester_jar
# Run integration tests with XGBoost4J-Spark
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
# Changing directory so that we do not mix code and resulting files
cd target
if [[ "$scala_binary_version" == "2.12" ]]; then
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
elif [[ "$scala_binary_version" == "2.13" ]]; then
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
else
echo "Unexpected scala version: $scala_version ($scala_binary_version)."
fi
fi
cd $jvm_packages_dir
done
set +x
set +e

View File

@@ -41,7 +41,7 @@ class ClangTidy(object):
def __init__(self, args):
self.cpp_lint = args.cpp
self.cuda_lint = args.cuda
self.use_dmlc_gtest = args.use_dmlc_gtest
self.use_dmlc_gtest: bool = args.use_dmlc_gtest
self.cuda_archs = args.cuda_archs.copy() if args.cuda_archs else []
if args.tidy_version:
@@ -202,6 +202,7 @@ class ClangTidy(object):
cdb_file = os.path.join(self.cdb_path, 'compile_commands.json')
with open(cdb_file, 'r') as fd:
self.compile_commands = json.load(fd)
tidy_file = os.path.join(self.root_path, '.clang-tidy')
with open(tidy_file) as fd:
self.clang_tidy = yaml.safe_load(fd)
@@ -276,16 +277,24 @@ right keywords?
print('clang-tidy is working.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run clang-tidy.')
parser.add_argument('--cpp', type=int, default=1)
parser.add_argument('--tidy-version', type=int, default=None,
help='Specify the version of preferred clang-tidy.')
parser.add_argument('--cuda', type=int, default=1)
parser.add_argument('--use-dmlc-gtest', type=int, default=1,
help='Whether to use gtest bundled in dmlc-core.')
parser.add_argument('--cuda-archs', action='append',
help='List of CUDA archs to build')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run clang-tidy.")
parser.add_argument("--cpp", type=int, default=1)
parser.add_argument(
"--tidy-version",
type=int,
default=None,
help="Specify the version of preferred clang-tidy.",
)
parser.add_argument("--cuda", type=int, default=1)
parser.add_argument(
"--use-dmlc-gtest",
action="store_true",
help="Whether to use gtest bundled in dmlc-core.",
)
parser.add_argument(
"--cuda-archs", action="append", help="List of CUDA archs to build"
)
args = parser.parse_args()
test_tidy(args)