FROM ubuntu:18.04 ARG JDK_VERSION=8 ARG SPARK_VERSION=3.0.0 # Environment ENV DEBIAN_FRONTEND noninteractive # Install all basic requirements RUN \ apt-get update && \ apt-get install -y software-properties-common && \ add-apt-repository ppa:openjdk-r/ppa && \ apt-get update && \ apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \ # Python wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ /opt/mambaforge/bin/pip install awscli && \ # Maven wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ ln -s /opt/apache-maven-3.6.1/ /opt/maven && \ # Spark with scala 2.12 mkdir -p /opt/spark-scala-2.12 && \ wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \ tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \ # Spark with scala 2.13 mkdir -p /opt/spark-scala-2.13 && \ wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \ tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13 ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH # Install Python packages RUN \ pip install numpy scipy pandas scikit-learn ENV GOSU_VERSION 1.10 # Install lightweight sudo (not bound to TTY) RUN set -ex; \ wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ chmod +x /usr/local/bin/gosu && \ gosu nobody true # Set default JDK version RUN update-java-alternatives -v -s java-1.$JDK_VERSION.0-openjdk-amd64 # Default entry-point to use if running locally # It will preserve attributes of created files COPY entrypoint.sh /scripts/ WORKDIR /workspace ENTRYPOINT ["/scripts/entrypoint.sh"]