[BLOCKING] Adding JVM doc build to Jenkins CI (#3567)

* Adding Java/Scala doc build to Jenkins CI * Deploy built doc to S3 bucket * Build doc only for branches * Build doc first, to get doc faster for branch updates * Have ReadTheDocs download doc tarball from S3 * Update JVM doc links * Put doc build commands in a script * Specify Spark 2.3+ requirement for XGBoost4J-Spark * Build GPU wheel without NCCL, to reduce binary size
2018-08-09 13:27:01 -07:00 · 2018-08-09 13:27:01 -07:00 · aa4ee6a0e4
commit aa4ee6a0e4
parent bad76048d1
10 changed files with 89 additions and 9 deletions
--- a/26
+++ b/26
@ -9,7 +9,7 @@ dockerRun = 'tests/ci_build/ci_build.sh'
 def buildMatrix = [
    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": true,  "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2" ],
    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": true,  "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
-    [ "enabled": false,  "os" : "linux", "withGpu": false, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": ""  ],
+    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
 ]
 pipeline {
@ -34,6 +34,28 @@ pipeline {
                milestone label: 'Sources ready', ordinal: 1
            }
        }
        stage('Build doc') {
            agent any
            steps {
                script {
                    if (env.CHANGE_ID == null) {  // This is a branch
                        def commit_id = "${GIT_COMMIT}"
                        def branch_name = "${GIT_LOCAL_BRANCH}"
                        echo 'Building doc...'
                        dir ('jvm-packages') {
                            sh "bash ./build_doc.sh ${commit_id}"
                            archiveArtifacts artifacts: "${commit_id}.tar.bz2", allowEmptyArchive: true
                            echo 'Deploying doc...'
                            withAWS(credentials:'xgboost-doc-bucket') {
                                s3Upload file: "${commit_id}.tar.bz2", bucket: 'xgboost-docs', acl: 'PublicRead', path: "${branch_name}.tar.bz2"
                            }
                        }
                    } else {                      // This is a pull request
                        echo 'Skipping doc build step for pull request'
                    }
                }
            }
        }
        stage('Build & Test') {
            steps {
                script {
@ -121,7 +143,7 @@ def cmakeOptions(conf) {
 }
 def getBuildName(conf) {
-    def gpuLabel = conf['withGpu'] ? "_cuda" + conf['cudaVersion'] : "_cpu"
+    def gpuLabel = conf['withGpu'] ? ("_cuda" + conf['cudaVersion'] + (conf['withNccl'] ? "_nccl" : "_nonccl")) : "_cpu"
    def ompLabel = conf['withOmp'] ? "_omp" : ""
    def pyLabel = "_py${conf['pythonVersion']}"
    return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}"
--- a/doc/conf.py
+++ b/doc/conf.py
@ -12,11 +12,18 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 from subprocess import call
 from sh.contrib import git
 import urllib.request
 from recommonmark.parser import CommonMarkParser
 import sys
 import os, subprocess
 import shlex
 import guzzle_sphinx_theme
 git_branch = str(git('rev-parse', '--abbrev-ref', 'HEAD')).rstrip('\n')
 filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch))
 call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True)
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
@ -94,6 +101,7 @@ autoclass_content = 'both'
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 exclude_patterns = ['_build']
 html_extra_path = ['./tmp']
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
--- a/doc/jvm/index.rst
+++ b/doc/jvm/index.rst
@ -58,10 +58,9 @@ For sbt, please add the repository and dependency in build.sbt as following:
 If you want to use XGBoost4J-Spark, replace ``xgboost4j`` with ``xgboost4j-spark``.
-.. note:: Spark 2.0 Required
+.. note:: XGBoost4J-Spark requires Spark 2.3+
  After integrating with Dataframe/Dataset APIs of Spark 2.0, XGBoost4J-Spark only supports compile with Spark 2.x. You can build XGBoost4J-Spark as a component of XGBoost4J by running ``mvn package``, and you can specify the version of spark with ``mvn -Dspark.version=2.0.0 package``.   (To continue working with Spark 1.x, the users are supposed to update pom.xml by modifying the properties like ``spark.version``, ``scala.version``, and ``scala.binary.version``. Users also need to change the implementation by replacing ``SparkSession`` with ``SQLContext`` and the type of API parameters from ``Dataset[_]`` to ``Dataframe``)
  XGBoost4J-Spark now requires Spark 2.3+. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared` extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.
 Installation from maven repo
 ============================
@ -150,7 +149,7 @@ Contents
  java_intro
  XGBoost4J-Spark Tutorial <xgboost4j_spark_tutorial>
  Code Examples <https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j-example>
-  XGBoost4J Java API <http://dmlc.ml/docs/javadocs/index.html>
+  XGBoost4J Java API <javadocs/index>
-  XGBoost4J Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j/index.html>
+  XGBoost4J Scala API <scaladocs/xgboost4j/index>
-  XGBoost4J-Spark Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j-spark/index.html>
+  XGBoost4J-Spark Scala API <scaladocs/xgboost4j-spark/index>
-  XGBoost4J-Flink Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j-flink/index.html>
+  XGBoost4J-Flink Scala API <scaladocs/xgboost4j-flink/index>
--- a/doc/jvm/javadocs/index.rst
+++ b/doc/jvm/javadocs/index.rst
@ -0,0 +1,3 @@
 ==================
 XGBoost4J Java API
 ==================
--- a/doc/jvm/scaladocs/xgboost4j-flink/index.rst
+++ b/doc/jvm/scaladocs/xgboost4j-flink/index.rst
@ -0,0 +1,3 @@
 =========================
 XGBoost4J-Flink Scala API
 =========================
--- a/doc/jvm/scaladocs/xgboost4j-spark/index.rst
+++ b/doc/jvm/scaladocs/xgboost4j-spark/index.rst
@ -0,0 +1,3 @@
 =========================
 XGBoost4J-Spark Scala API
 =========================
--- a/doc/jvm/scaladocs/xgboost4j/index.rst
+++ b/doc/jvm/scaladocs/xgboost4j/index.rst
@ -0,0 +1,3 @@
 ===================
 XGBoost4J Scala API
 ===================
--- a/doc/jvm/xgboost4j_spark_tutorial.rst
+++ b/doc/jvm/xgboost4j_spark_tutorial.rst
@ -61,6 +61,10 @@ and then refer to the snapshot dependency by adding:
      <version>next_version_num-SNAPSHOT</version>
  </dependency>
 .. note:: XGBoost4J-Spark requires Spark 2.3+
  XGBoost4J-Spark now requires Spark 2.3+. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared` extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.
 Data Preparation
 ================
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -2,3 +2,4 @@ sphinx
 mock
 guzzle_sphinx_theme
 breathe
 sh>=1.12.14
--- a/jvm-packages/build_doc.sh
+++ b/jvm-packages/build_doc.sh
@ -0,0 +1,34 @@
 #!/bin/bash
 if [ $# -ne 1 ]; then
  echo "Usage: $0 [commit id]"
  exit 1
 fi
 set -e
 set -x
 commit_id=$1
 # Install JVM packages in local Maven repository
 mvn install -DskipTests
 # Build Scaladocs
 mvn scala:doc -DskipTests
 # Build Javadocs
 mvn javadoc:javadoc -DskipTests
 # Package JVM docs in a tarball
 mkdir -p tmp/scaladocs
 cp -rv xgboost4j/target/site/apidocs/ ./tmp/javadocs/
 cp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/
 cp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/
 cp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/
 cd tmp
 tar cvjf ${commit_id}.tar.bz2 javadocs/ scaladocs/
 mv ${commit_id}.tar.bz2 ..
 cd ..
 rm -rfv tmp/
 set +x
 set +e