[BLOCKING] Adding JVM doc build to Jenkins CI (#3567)

* Adding Java/Scala doc build to Jenkins CI

* Deploy built doc to S3 bucket

* Build doc only for branches

* Build doc first, to get doc faster for branch updates

* Have ReadTheDocs download doc tarball from S3

* Update JVM doc links

* Put doc build commands in a script

* Specify Spark 2.3+ requirement for XGBoost4J-Spark

* Build GPU wheel without NCCL, to reduce binary size
This commit is contained in:
Philip Hyunsu Cho 2018-08-09 13:27:01 -07:00 committed by GitHub
parent bad76048d1
commit aa4ee6a0e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 89 additions and 9 deletions

26
Jenkinsfile vendored
View File

@ -9,7 +9,7 @@ dockerRun = 'tests/ci_build/ci_build.sh'
def buildMatrix = [
[ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2" ],
[ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
[ "enabled": false, "os" : "linux", "withGpu": false, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "" ],
[ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
]
pipeline {
@ -34,6 +34,28 @@ pipeline {
milestone label: 'Sources ready', ordinal: 1
}
}
stage('Build doc') {
agent any
steps {
script {
if (env.CHANGE_ID == null) { // This is a branch
def commit_id = "${GIT_COMMIT}"
def branch_name = "${GIT_LOCAL_BRANCH}"
echo 'Building doc...'
dir ('jvm-packages') {
sh "bash ./build_doc.sh ${commit_id}"
archiveArtifacts artifacts: "${commit_id}.tar.bz2", allowEmptyArchive: true
echo 'Deploying doc...'
withAWS(credentials:'xgboost-doc-bucket') {
s3Upload file: "${commit_id}.tar.bz2", bucket: 'xgboost-docs', acl: 'PublicRead', path: "${branch_name}.tar.bz2"
}
}
} else { // This is a pull request
echo 'Skipping doc build step for pull request'
}
}
}
}
stage('Build & Test') {
steps {
script {
@ -121,7 +143,7 @@ def cmakeOptions(conf) {
}
def getBuildName(conf) {
def gpuLabel = conf['withGpu'] ? "_cuda" + conf['cudaVersion'] : "_cpu"
def gpuLabel = conf['withGpu'] ? ("_cuda" + conf['cudaVersion'] + (conf['withNccl'] ? "_nccl" : "_nonccl")) : "_cpu"
def ompLabel = conf['withOmp'] ? "_omp" : ""
def pyLabel = "_py${conf['pythonVersion']}"
return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}"

View File

@ -12,11 +12,18 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
from subprocess import call
from sh.contrib import git
import urllib.request
from recommonmark.parser import CommonMarkParser
import sys
import os, subprocess
import shlex
import guzzle_sphinx_theme
git_branch = str(git('rev-parse', '--abbrev-ref', 'HEAD')).rstrip('\n')
filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch))
call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True)
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
@ -94,6 +101,7 @@ autoclass_content = 'both'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
html_extra_path = ['./tmp']
# The reST default role (used for this markup: `text`) to use for all
# documents.

View File

@ -58,10 +58,9 @@ For sbt, please add the repository and dependency in build.sbt as following:
If you want to use XGBoost4J-Spark, replace ``xgboost4j`` with ``xgboost4j-spark``.
.. note:: Spark 2.0 Required
After integrating with Dataframe/Dataset APIs of Spark 2.0, XGBoost4J-Spark only supports compile with Spark 2.x. You can build XGBoost4J-Spark as a component of XGBoost4J by running ``mvn package``, and you can specify the version of spark with ``mvn -Dspark.version=2.0.0 package``. (To continue working with Spark 1.x, the users are supposed to update pom.xml by modifying the properties like ``spark.version``, ``scala.version``, and ``scala.binary.version``. Users also need to change the implementation by replacing ``SparkSession`` with ``SQLContext`` and the type of API parameters from ``Dataset[_]`` to ``Dataframe``)
.. note:: XGBoost4J-Spark requires Spark 2.3+
XGBoost4J-Spark now requires Spark 2.3+. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared` extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.
Installation from maven repo
============================
@ -150,7 +149,7 @@ Contents
java_intro
XGBoost4J-Spark Tutorial <xgboost4j_spark_tutorial>
Code Examples <https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j-example>
XGBoost4J Java API <http://dmlc.ml/docs/javadocs/index.html>
XGBoost4J Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j/index.html>
XGBoost4J-Spark Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j-spark/index.html>
XGBoost4J-Flink Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j-flink/index.html>
XGBoost4J Java API <javadocs/index>
XGBoost4J Scala API <scaladocs/xgboost4j/index>
XGBoost4J-Spark Scala API <scaladocs/xgboost4j-spark/index>
XGBoost4J-Flink Scala API <scaladocs/xgboost4j-flink/index>

View File

@ -0,0 +1,3 @@
==================
XGBoost4J Java API
==================

View File

@ -0,0 +1,3 @@
=========================
XGBoost4J-Flink Scala API
=========================

View File

@ -0,0 +1,3 @@
=========================
XGBoost4J-Spark Scala API
=========================

View File

@ -0,0 +1,3 @@
===================
XGBoost4J Scala API
===================

View File

@ -61,6 +61,10 @@ and then refer to the snapshot dependency by adding:
<version>next_version_num-SNAPSHOT</version>
</dependency>
.. note:: XGBoost4J-Spark requires Spark 2.3+
XGBoost4J-Spark now requires Spark 2.3+. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared` extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.
Data Preparation
================

View File

@ -2,3 +2,4 @@ sphinx
mock
guzzle_sphinx_theme
breathe
sh>=1.12.14

34
jvm-packages/build_doc.sh Executable file
View File

@ -0,0 +1,34 @@
#!/bin/bash
if [ $# -ne 1 ]; then
echo "Usage: $0 [commit id]"
exit 1
fi
set -e
set -x
commit_id=$1
# Install JVM packages in local Maven repository
mvn install -DskipTests
# Build Scaladocs
mvn scala:doc -DskipTests
# Build Javadocs
mvn javadoc:javadoc -DskipTests
# Package JVM docs in a tarball
mkdir -p tmp/scaladocs
cp -rv xgboost4j/target/site/apidocs/ ./tmp/javadocs/
cp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/
cp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/
cp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/
cd tmp
tar cvjf ${commit_id}.tar.bz2 javadocs/ scaladocs/
mv ${commit_id}.tar.bz2 ..
cd ..
rm -rfv tmp/
set +x
set +e