diff --git a/jvm-packages/README.md b/jvm-packages/README.md index 0f8cd16ea..9befa9140 100644 --- a/jvm-packages/README.md +++ b/jvm-packages/README.md @@ -98,3 +98,23 @@ spark.read.format("libsvm").load("trainingset_libsvm") ``` Spark assumes that the dataset is 1-based indexed. However, when you do prediction with other bindings of XGBoost (e.g. Python API of XGBoost), XGBoost assumes that the dataset is 0-based indexed. It creates a pitfall for the users who train model with Spark but predict with the dataset in the same format in other bindings of XGBoost. + +## Development + +You can build/package xgboost4j locally with the following steps: + +**Linux:** +1. Ensure [Docker for Linux](https://docs.docker.com/install/) is installed. +2. Clone this repo: `git clone --recursive https://github.com/dmlc/xgboost.git` +3. Run the following command: + - With Tests: `./xgboost/jvm-packages/dev/build-linux.sh` + - Skip Tests: `./xgboost/jvm-packages/dev/build-linux.sh --skip-tests` + +**Windows:** +1. Ensure [Docker for Windows](https://docs.docker.com/docker-for-windows/install/) is installed. +2. Clone this repo: `git clone --recursive https://github.com/dmlc/xgboost.git` +3. Run the following command: + - With Tests: `.\xgboost\jvm-packages\dev\build-linux.cmd` + - Skip Tests: `.\xgboost\jvm-packages\dev\build-linux.cmd --skip-tests` + +*Note: this will create jars for deployment on Linux machines.* diff --git a/jvm-packages/dev/.gitattributes b/jvm-packages/dev/.gitattributes new file mode 100644 index 000000000..ed670eced --- /dev/null +++ b/jvm-packages/dev/.gitattributes @@ -0,0 +1,3 @@ +# Set line endings to LF, even on Windows. Otherwise, execution within Docker fails. +# See https://help.github.com/articles/dealing-with-line-endings/ +*.sh text eol=lf diff --git a/jvm-packages/dev/.gitignore b/jvm-packages/dev/.gitignore new file mode 100644 index 000000000..eb713db19 --- /dev/null +++ b/jvm-packages/dev/.gitignore @@ -0,0 +1 @@ +.m2 diff --git a/jvm-packages/dev/Dockerfile b/jvm-packages/dev/Dockerfile new file mode 100644 index 000000000..491fdf73e --- /dev/null +++ b/jvm-packages/dev/Dockerfile @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +FROM centos:6 + +# Install all basic requirements +RUN \ + yum -y update && \ + yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \ + yum-config-manager --enable centos-sclo-rh-testing && \ + yum -y update && \ + yum install -y devtoolset-4-gcc devtoolset-4-binutils devtoolset-4-gcc-c++ && \ + # Python + wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \ + bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \ + # CMake + wget -nv -nc https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.12.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + # Maven + wget http://apache.osuosl.org/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ + tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ + ln -s /opt/apache-maven-3.6.1/ /opt/maven + +# Set the required environment variables +ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH +ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp +ENV JAVA_HOME=/usr/lib/jvm/java + +# Install Python packages +RUN \ + pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli + +WORKDIR /xgboost diff --git a/jvm-packages/dev/build-docker.sh b/jvm-packages/dev/build-docker.sh deleted file mode 100755 index 6f8287a5f..000000000 --- a/jvm-packages/dev/build-docker.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -set -x - -sudo docker run --rm -m 4g -e JAVA_OPTS='-Xmx6g' --attach stdin --attach stdout --attach stderr --volume `pwd`/../:/xgboost codingcat/xgbrelease:latest /xgboost/jvm-packages/dev/build.sh diff --git a/jvm-packages/dev/build-linux.cmd b/jvm-packages/dev/build-linux.cmd new file mode 100644 index 000000000..6e1658588 --- /dev/null +++ b/jvm-packages/dev/build-linux.cmd @@ -0,0 +1,44 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one +rem or more contributor license agreements. See the NOTICE file +rem distributed with this work for additional information +rem regarding copyright ownership. The ASF licenses this file +rem to you under the Apache License, Version 2.0 (the +rem "License"); you may not use this file except in compliance +rem with the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, +rem software distributed under the License is distributed on an +rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +rem KIND, either express or implied. See the License for the +rem specific language governing permissions and limitations +rem under the License. +rem + +rem The the local path of this file +set "BASEDIR=%~dp0" + +rem The local path of .m2 directory for maven +set "M2DIR=%BASEDIR%\.m2\" + +rem Create a local .m2 directory if needed +if not exist "%M2DIR%" mkdir "%M2DIR%" + +rem Build and tag the Dockerfile +docker build -t dmlc/xgboost4j-build %BASEDIR% + +docker run^ + -it^ + --rm^ + --memory 8g^ + --env JAVA_OPTS="-Xmx6g"^ + --env MAVEN_OPTS="-Xmx2g"^ + --ulimit core=-1^ + --volume %BASEDIR%\..\..:/xgboost^ + --volume %M2DIR%:/root/.m2^ + dmlc/xgboost4j-build^ + /xgboost/jvm-packages/dev/package-linux.sh "%*" diff --git a/jvm-packages/dev/build-linux.sh b/jvm-packages/dev/build-linux.sh new file mode 100755 index 000000000..f14f10bc6 --- /dev/null +++ b/jvm-packages/dev/build-linux.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +BASEDIR="$( cd "$( dirname "$0" )" && pwd )" # the directory of this file + +docker build -t dmlc/xgboost4j-build "${BASEDIR}" # build and tag the Dockerfile + +docker run \ + -it \ + --rm \ + --memory 8g \ + --env JAVA_OPTS="-Xmx6g" \ + --env MAVEN_OPTS="-Xmx2g" \ + --ulimit core=-1 \ + --volume "${BASEDIR}/../..":/xgboost \ + --volume "${BASEDIR}/.m2":/root/.m2 \ + dmlc/xgboost4j-build \ + /xgboost/jvm-packages/dev/package-linux.sh "$@" diff --git a/jvm-packages/dev/build.sh b/jvm-packages/dev/build.sh deleted file mode 100755 index db883908f..000000000 --- a/jvm-packages/dev/build.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -set -x - -export JAVA_HOME=/usr/lib/jvm/java-1.8.0 -export MAVEN_OPTS="-Xmx3000m" -export CMAKE_CXX_COMPILER=/opt/rh/devtoolset-2/root/usr/bin/gcc -export CXX=/opt/rh/devtoolset-2/root/usr/bin/g++ -export CC=/opt/rh/devtoolset-2/root/usr/bin/gcc - -export PATH=$CXX:$CC:/opt/rh/python27/root/usr/bin/python:$PATH - -scl enable devtoolset-2 bash -scl enable python27 bash - -rm /usr/bin/python -ln -s /opt/rh/python27/root/usr/bin/python /usr/bin/python - -# build xgboost -cd /xgboost/jvm-packages;ulimit -c unlimited;mvn package - diff --git a/jvm-packages/dev/change_version.sh b/jvm-packages/dev/change_version.sh index 745c3ded1..0004c99fb 100755 --- a/jvm-packages/dev/change_version.sh +++ b/jvm-packages/dev/change_version.sh @@ -1,27 +1,26 @@ -#!/bin/sh - -#!/bin/bash - +#!/usr/bin/env bash # -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. # # (Yizhi) This is mainly inspired by the script in apache/spark. -# I did some modificaiton to get it with our project. +# I did some modifications to get it with our project. # (Nan) Modified from MxNet - +# set -e if [[ ($# -ne 2) || ( $1 == "--help") || $1 == "-h" ]]; then diff --git a/jvm-packages/dev/package-linux.sh b/jvm-packages/dev/package-linux.sh new file mode 100755 index 000000000..1fd777d9b --- /dev/null +++ b/jvm-packages/dev/package-linux.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +cd jvm-packages + +case "$1" in + --skip-tests) SKIP_TESTS=true ;; + "") SKIP_TESTS=false ;; +esac + +if [[ -n ${SKIP_TESTS} ]]; then + if [[ ${SKIP_TESTS} == "true" ]]; then + mvn --batch-mode clean package -DskipTests + elif [[ ${SKIP_TESTS} == "false" ]]; then + mvn --batch-mode clean package + fi +else + echo "Usage: $0 [--skip-tests]" + exit 1 +fi