[jvm-packages] update local dev build process (#4640)

This commit is contained in:
Mathew Wicks 2019-07-16 16:23:06 +12:00 committed by Nan Zhu
parent 9975c533c7
commit 6323ef94ad
10 changed files with 205 additions and 44 deletions

View File

@ -98,3 +98,23 @@ spark.read.format("libsvm").load("trainingset_libsvm")
``` ```
Spark assumes that the dataset is 1-based indexed. However, when you do prediction with other bindings of XGBoost (e.g. Python API of XGBoost), XGBoost assumes that the dataset is 0-based indexed. It creates a pitfall for the users who train model with Spark but predict with the dataset in the same format in other bindings of XGBoost. Spark assumes that the dataset is 1-based indexed. However, when you do prediction with other bindings of XGBoost (e.g. Python API of XGBoost), XGBoost assumes that the dataset is 0-based indexed. It creates a pitfall for the users who train model with Spark but predict with the dataset in the same format in other bindings of XGBoost.
## Development
You can build/package xgboost4j locally with the following steps:
**Linux:**
1. Ensure [Docker for Linux](https://docs.docker.com/install/) is installed.
2. Clone this repo: `git clone --recursive https://github.com/dmlc/xgboost.git`
3. Run the following command:
- With Tests: `./xgboost/jvm-packages/dev/build-linux.sh`
- Skip Tests: `./xgboost/jvm-packages/dev/build-linux.sh --skip-tests`
**Windows:**
1. Ensure [Docker for Windows](https://docs.docker.com/docker-for-windows/install/) is installed.
2. Clone this repo: `git clone --recursive https://github.com/dmlc/xgboost.git`
3. Run the following command:
- With Tests: `.\xgboost\jvm-packages\dev\build-linux.cmd`
- Skip Tests: `.\xgboost\jvm-packages\dev\build-linux.cmd --skip-tests`
*Note: this will create jars for deployment on Linux machines.*

3
jvm-packages/dev/.gitattributes vendored Normal file
View File

@ -0,0 +1,3 @@
# Set line endings to LF, even on Windows. Otherwise, execution within Docker fails.
# See https://help.github.com/articles/dealing-with-line-endings/
*.sh text eol=lf

1
jvm-packages/dev/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.m2

View File

@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
FROM centos:6
# Install all basic requirements
RUN \
yum -y update && \
yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
yum-config-manager --enable centos-sclo-rh-testing && \
yum -y update && \
yum install -y devtoolset-4-gcc devtoolset-4-binutils devtoolset-4-gcc-c++ && \
# Python
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \
# CMake
wget -nv -nc https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.12.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Maven
wget http://apache.osuosl.org/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven
# Set the required environment variables
ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
ENV JAVA_HOME=/usr/lib/jvm/java
# Install Python packages
RUN \
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
WORKDIR /xgboost

View File

@ -1,5 +0,0 @@
#!/bin/bash
set -x
sudo docker run --rm -m 4g -e JAVA_OPTS='-Xmx6g' --attach stdin --attach stdout --attach stderr --volume `pwd`/../:/xgboost codingcat/xgbrelease:latest /xgboost/jvm-packages/dev/build.sh

View File

@ -0,0 +1,44 @@
@echo off
rem
rem Licensed to the Apache Software Foundation (ASF) under one
rem or more contributor license agreements. See the NOTICE file
rem distributed with this work for additional information
rem regarding copyright ownership. The ASF licenses this file
rem to you under the Apache License, Version 2.0 (the
rem "License"); you may not use this file except in compliance
rem with the License. You may obtain a copy of the License at
rem
rem http://www.apache.org/licenses/LICENSE-2.0
rem
rem Unless required by applicable law or agreed to in writing,
rem software distributed under the License is distributed on an
rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
rem KIND, either express or implied. See the License for the
rem specific language governing permissions and limitations
rem under the License.
rem
rem The the local path of this file
set "BASEDIR=%~dp0"
rem The local path of .m2 directory for maven
set "M2DIR=%BASEDIR%\.m2\"
rem Create a local .m2 directory if needed
if not exist "%M2DIR%" mkdir "%M2DIR%"
rem Build and tag the Dockerfile
docker build -t dmlc/xgboost4j-build %BASEDIR%
docker run^
-it^
--rm^
--memory 8g^
--env JAVA_OPTS="-Xmx6g"^
--env MAVEN_OPTS="-Xmx2g"^
--ulimit core=-1^
--volume %BASEDIR%\..\..:/xgboost^
--volume %M2DIR%:/root/.m2^
dmlc/xgboost4j-build^
/xgboost/jvm-packages/dev/package-linux.sh "%*"

34
jvm-packages/dev/build-linux.sh Executable file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
BASEDIR="$( cd "$( dirname "$0" )" && pwd )" # the directory of this file
docker build -t dmlc/xgboost4j-build "${BASEDIR}" # build and tag the Dockerfile
docker run \
-it \
--rm \
--memory 8g \
--env JAVA_OPTS="-Xmx6g" \
--env MAVEN_OPTS="-Xmx2g" \
--ulimit core=-1 \
--volume "${BASEDIR}/../..":/xgboost \
--volume "${BASEDIR}/.m2":/root/.m2 \
dmlc/xgboost4j-build \
/xgboost/jvm-packages/dev/package-linux.sh "$@"

View File

@ -1,21 +0,0 @@
#!/usr/bin/env bash
set -x
export JAVA_HOME=/usr/lib/jvm/java-1.8.0
export MAVEN_OPTS="-Xmx3000m"
export CMAKE_CXX_COMPILER=/opt/rh/devtoolset-2/root/usr/bin/gcc
export CXX=/opt/rh/devtoolset-2/root/usr/bin/g++
export CC=/opt/rh/devtoolset-2/root/usr/bin/gcc
export PATH=$CXX:$CC:/opt/rh/python27/root/usr/bin/python:$PATH
scl enable devtoolset-2 bash
scl enable python27 bash
rm /usr/bin/python
ln -s /opt/rh/python27/root/usr/bin/python /usr/bin/python
# build xgboost
cd /xgboost/jvm-packages;ulimit -c unlimited;mvn package

View File

@ -1,27 +1,26 @@
#!/bin/sh #!/usr/bin/env bash
#!/bin/bash
# #
# Licensed to the Apache Software Foundation (ASF) under one or more # Licensed to the Apache Software Foundation (ASF) under one
# contributor license agreements. See the NOTICE file distributed with # or more contributor license agreements. See the NOTICE file
# this work for additional information regarding copyright ownership. # distributed with this work for additional information
# The ASF licenses this file to You under the Apache License, Version 2.0 # regarding copyright ownership. The ASF licenses this file
# (the "License"); you may not use this file except in compliance with # to you under the Apache License, Version 2.0 (the
# the License. You may obtain a copy of the License at # "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing,
# distributed under the License is distributed on an "AS IS" BASIS, # software distributed under the License is distributed on an
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# See the License for the specific language governing permissions and # KIND, either express or implied. See the License for the
# limitations under the License. # specific language governing permissions and limitations
# under the License.
# #
# (Yizhi) This is mainly inspired by the script in apache/spark. # (Yizhi) This is mainly inspired by the script in apache/spark.
# I did some modificaiton to get it with our project. # I did some modifications to get it with our project.
# (Nan) Modified from MxNet # (Nan) Modified from MxNet
#
set -e set -e
if [[ ($# -ne 2) || ( $1 == "--help") || $1 == "-h" ]]; then if [[ ($# -ne 2) || ( $1 == "--help") || $1 == "-h" ]]; then

View File

@ -0,0 +1,36 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
cd jvm-packages
case "$1" in
--skip-tests) SKIP_TESTS=true ;;
"") SKIP_TESTS=false ;;
esac
if [[ -n ${SKIP_TESTS} ]]; then
if [[ ${SKIP_TESTS} == "true" ]]; then
mvn --batch-mode clean package -DskipTests
elif [[ ${SKIP_TESTS} == "false" ]]; then
mvn --batch-mode clean package
fi
else
echo "Usage: $0 [--skip-tests]"
exit 1
fi