[jvm-packages] update local dev build process (#4640)
This commit is contained in:
parent
9975c533c7
commit
6323ef94ad
@ -98,3 +98,23 @@ spark.read.format("libsvm").load("trainingset_libsvm")
|
||||
```
|
||||
|
||||
Spark assumes that the dataset is 1-based indexed. However, when you do prediction with other bindings of XGBoost (e.g. Python API of XGBoost), XGBoost assumes that the dataset is 0-based indexed. It creates a pitfall for the users who train model with Spark but predict with the dataset in the same format in other bindings of XGBoost.
|
||||
|
||||
## Development
|
||||
|
||||
You can build/package xgboost4j locally with the following steps:
|
||||
|
||||
**Linux:**
|
||||
1. Ensure [Docker for Linux](https://docs.docker.com/install/) is installed.
|
||||
2. Clone this repo: `git clone --recursive https://github.com/dmlc/xgboost.git`
|
||||
3. Run the following command:
|
||||
- With Tests: `./xgboost/jvm-packages/dev/build-linux.sh`
|
||||
- Skip Tests: `./xgboost/jvm-packages/dev/build-linux.sh --skip-tests`
|
||||
|
||||
**Windows:**
|
||||
1. Ensure [Docker for Windows](https://docs.docker.com/docker-for-windows/install/) is installed.
|
||||
2. Clone this repo: `git clone --recursive https://github.com/dmlc/xgboost.git`
|
||||
3. Run the following command:
|
||||
- With Tests: `.\xgboost\jvm-packages\dev\build-linux.cmd`
|
||||
- Skip Tests: `.\xgboost\jvm-packages\dev\build-linux.cmd --skip-tests`
|
||||
|
||||
*Note: this will create jars for deployment on Linux machines.*
|
||||
|
||||
3
jvm-packages/dev/.gitattributes
vendored
Normal file
3
jvm-packages/dev/.gitattributes
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Set line endings to LF, even on Windows. Otherwise, execution within Docker fails.
|
||||
# See https://help.github.com/articles/dealing-with-line-endings/
|
||||
*.sh text eol=lf
|
||||
1
jvm-packages/dev/.gitignore
vendored
Normal file
1
jvm-packages/dev/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
.m2
|
||||
50
jvm-packages/dev/Dockerfile
Normal file
50
jvm-packages/dev/Dockerfile
Normal file
@ -0,0 +1,50 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
FROM centos:6
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y devtoolset-4-gcc devtoolset-4-binutils devtoolset-4-gcc-c++ && \
|
||||
# Python
|
||||
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \
|
||||
bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.12/cmake-3.12.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.12.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
# Maven
|
||||
wget http://apache.osuosl.org/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
|
||||
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
|
||||
ln -s /opt/apache-maven-3.6.1/ /opt/maven
|
||||
|
||||
# Set the required environment variables
|
||||
ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
|
||||
|
||||
WORKDIR /xgboost
|
||||
@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
|
||||
sudo docker run --rm -m 4g -e JAVA_OPTS='-Xmx6g' --attach stdin --attach stdout --attach stderr --volume `pwd`/../:/xgboost codingcat/xgbrelease:latest /xgboost/jvm-packages/dev/build.sh
|
||||
44
jvm-packages/dev/build-linux.cmd
Normal file
44
jvm-packages/dev/build-linux.cmd
Normal file
@ -0,0 +1,44 @@
|
||||
@echo off
|
||||
|
||||
rem
|
||||
rem Licensed to the Apache Software Foundation (ASF) under one
|
||||
rem or more contributor license agreements. See the NOTICE file
|
||||
rem distributed with this work for additional information
|
||||
rem regarding copyright ownership. The ASF licenses this file
|
||||
rem to you under the Apache License, Version 2.0 (the
|
||||
rem "License"); you may not use this file except in compliance
|
||||
rem with the License. You may obtain a copy of the License at
|
||||
rem
|
||||
rem http://www.apache.org/licenses/LICENSE-2.0
|
||||
rem
|
||||
rem Unless required by applicable law or agreed to in writing,
|
||||
rem software distributed under the License is distributed on an
|
||||
rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
rem KIND, either express or implied. See the License for the
|
||||
rem specific language governing permissions and limitations
|
||||
rem under the License.
|
||||
rem
|
||||
|
||||
rem The the local path of this file
|
||||
set "BASEDIR=%~dp0"
|
||||
|
||||
rem The local path of .m2 directory for maven
|
||||
set "M2DIR=%BASEDIR%\.m2\"
|
||||
|
||||
rem Create a local .m2 directory if needed
|
||||
if not exist "%M2DIR%" mkdir "%M2DIR%"
|
||||
|
||||
rem Build and tag the Dockerfile
|
||||
docker build -t dmlc/xgboost4j-build %BASEDIR%
|
||||
|
||||
docker run^
|
||||
-it^
|
||||
--rm^
|
||||
--memory 8g^
|
||||
--env JAVA_OPTS="-Xmx6g"^
|
||||
--env MAVEN_OPTS="-Xmx2g"^
|
||||
--ulimit core=-1^
|
||||
--volume %BASEDIR%\..\..:/xgboost^
|
||||
--volume %M2DIR%:/root/.m2^
|
||||
dmlc/xgboost4j-build^
|
||||
/xgboost/jvm-packages/dev/package-linux.sh "%*"
|
||||
34
jvm-packages/dev/build-linux.sh
Executable file
34
jvm-packages/dev/build-linux.sh
Executable file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
BASEDIR="$( cd "$( dirname "$0" )" && pwd )" # the directory of this file
|
||||
|
||||
docker build -t dmlc/xgboost4j-build "${BASEDIR}" # build and tag the Dockerfile
|
||||
|
||||
docker run \
|
||||
-it \
|
||||
--rm \
|
||||
--memory 8g \
|
||||
--env JAVA_OPTS="-Xmx6g" \
|
||||
--env MAVEN_OPTS="-Xmx2g" \
|
||||
--ulimit core=-1 \
|
||||
--volume "${BASEDIR}/../..":/xgboost \
|
||||
--volume "${BASEDIR}/.m2":/root/.m2 \
|
||||
dmlc/xgboost4j-build \
|
||||
/xgboost/jvm-packages/dev/package-linux.sh "$@"
|
||||
@ -1,21 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -x
|
||||
|
||||
export JAVA_HOME=/usr/lib/jvm/java-1.8.0
|
||||
export MAVEN_OPTS="-Xmx3000m"
|
||||
export CMAKE_CXX_COMPILER=/opt/rh/devtoolset-2/root/usr/bin/gcc
|
||||
export CXX=/opt/rh/devtoolset-2/root/usr/bin/g++
|
||||
export CC=/opt/rh/devtoolset-2/root/usr/bin/gcc
|
||||
|
||||
export PATH=$CXX:$CC:/opt/rh/python27/root/usr/bin/python:$PATH
|
||||
|
||||
scl enable devtoolset-2 bash
|
||||
scl enable python27 bash
|
||||
|
||||
rm /usr/bin/python
|
||||
ln -s /opt/rh/python27/root/usr/bin/python /usr/bin/python
|
||||
|
||||
# build xgboost
|
||||
cd /xgboost/jvm-packages;ulimit -c unlimited;mvn package
|
||||
|
||||
@ -1,27 +1,26 @@
|
||||
#!/bin/sh
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# (Yizhi) This is mainly inspired by the script in apache/spark.
|
||||
# I did some modificaiton to get it with our project.
|
||||
# I did some modifications to get it with our project.
|
||||
# (Nan) Modified from MxNet
|
||||
|
||||
#
|
||||
set -e
|
||||
|
||||
if [[ ($# -ne 2) || ( $1 == "--help") || $1 == "-h" ]]; then
|
||||
|
||||
36
jvm-packages/dev/package-linux.sh
Executable file
36
jvm-packages/dev/package-linux.sh
Executable file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
cd jvm-packages
|
||||
|
||||
case "$1" in
|
||||
--skip-tests) SKIP_TESTS=true ;;
|
||||
"") SKIP_TESTS=false ;;
|
||||
esac
|
||||
|
||||
if [[ -n ${SKIP_TESTS} ]]; then
|
||||
if [[ ${SKIP_TESTS} == "true" ]]; then
|
||||
mvn --batch-mode clean package -DskipTests
|
||||
elif [[ ${SKIP_TESTS} == "false" ]]; then
|
||||
mvn --batch-mode clean package
|
||||
fi
|
||||
else
|
||||
echo "Usage: $0 [--skip-tests]"
|
||||
exit 1
|
||||
fi
|
||||
Loading…
x
Reference in New Issue
Block a user