[BLOCKING] Adding JVM doc build to Jenkins CI (#3567)

* Adding Java/Scala doc build to Jenkins CI * Deploy built doc to S3 bucket * Build doc only for branches * Build doc first, to get doc faster for branch updates * Have ReadTheDocs download doc tarball from S3 * Update JVM doc links * Put doc build commands in a script * Specify Spark 2.3+ requirement for XGBoost4J-Spark * Build GPU wheel without NCCL, to reduce binary size
2018-08-09 13:27:01 -07:00
parent bad76048d1
commit aa4ee6a0e4
10 changed files with 89 additions and 9 deletions
--- a/26
+++ b/26
@@ -9,7 +9,7 @@ dockerRun = 'tests/ci_build/ci_build.sh'
 def buildMatrix = [
    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": true,  "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2" ],
    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": true,  "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
-    [ "enabled": false,  "os" : "linux", "withGpu": false, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": ""  ],
+    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
 ]

 pipeline {
@@ -34,6 +34,28 @@ pipeline {
                milestone label: 'Sources ready', ordinal: 1
            }
        }
+        stage('Build doc') {
+            agent any
+            steps {
+                script {
+                    if (env.CHANGE_ID == null) {  // This is a branch
+                        def commit_id = "${GIT_COMMIT}"
+                        def branch_name = "${GIT_LOCAL_BRANCH}"
+                        echo 'Building doc...'
+                        dir ('jvm-packages') {
+                            sh "bash ./build_doc.sh ${commit_id}"
+                            archiveArtifacts artifacts: "${commit_id}.tar.bz2", allowEmptyArchive: true
+                            echo 'Deploying doc...'
+                            withAWS(credentials:'xgboost-doc-bucket') {
+                                s3Upload file: "${commit_id}.tar.bz2", bucket: 'xgboost-docs', acl: 'PublicRead', path: "${branch_name}.tar.bz2"
+                            }
+                        }
+                    } else {                      // This is a pull request
+                        echo 'Skipping doc build step for pull request'
+                    }
+                }
+            }
+        }
        stage('Build & Test') {
            steps {
                script {
@@ -121,7 +143,7 @@ def cmakeOptions(conf) {
 }

 def getBuildName(conf) {
-    def gpuLabel = conf['withGpu'] ? "_cuda" + conf['cudaVersion'] : "_cpu"
+    def gpuLabel = conf['withGpu'] ? ("_cuda" + conf['cudaVersion'] + (conf['withNccl'] ? "_nccl" : "_nonccl")) : "_cpu"
    def ompLabel = conf['withOmp'] ? "_omp" : ""
    def pyLabel = "_py${conf['pythonVersion']}"
    return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}"
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -12,11 +12,18 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 from subprocess import call
+from sh.contrib import git
+import urllib.request
 from recommonmark.parser import CommonMarkParser
 import sys
 import os, subprocess
 import shlex
 import guzzle_sphinx_theme
+
+git_branch = str(git('rev-parse', '--abbrev-ref', 'HEAD')).rstrip('\n')
+filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch))
+call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True)
+
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -94,6 +101,7 @@ autoclass_content = 'both'
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 exclude_patterns = ['_build']
+html_extra_path = ['./tmp']

 # The reST default role (used for this markup: `text`) to use for all
 # documents.
--- a/doc/jvm/index.rst
+++ b/doc/jvm/index.rst
@@ -58,10 +58,9 @@ For sbt, please add the repository and dependency in build.sbt as following:

 If you want to use XGBoost4J-Spark, replace ``xgboost4j`` with ``xgboost4j-spark``.

-.. note:: Spark 2.0 Required
-
-  After integrating with Dataframe/Dataset APIs of Spark 2.0, XGBoost4J-Spark only supports compile with Spark 2.x. You can build XGBoost4J-Spark as a component of XGBoost4J by running ``mvn package``, and you can specify the version of spark with ``mvn -Dspark.version=2.0.0 package``.   (To continue working with Spark 1.x, the users are supposed to update pom.xml by modifying the properties like ``spark.version``, ``scala.version``, and ``scala.binary.version``. Users also need to change the implementation by replacing ``SparkSession`` with ``SQLContext`` and the type of API parameters from ``Dataset[_]`` to ``Dataframe``)
+.. note:: XGBoost4J-Spark requires Spark 2.3+

+  XGBoost4J-Spark now requires Spark 2.3+. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared` extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.

 Installation from maven repo
 ============================
@@ -150,7 +149,7 @@ Contents
  java_intro
  XGBoost4J-Spark Tutorial <xgboost4j_spark_tutorial>
  Code Examples <https://github.com/dmlc/xgboost/tree/master/jvm-packages/xgboost4j-example>
-  XGBoost4J Java API <http://dmlc.ml/docs/javadocs/index.html>
-  XGBoost4J Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j/index.html>
-  XGBoost4J-Spark Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j-spark/index.html>
-  XGBoost4J-Flink Scala API <http://dmlc.ml/docs/scaladocs/xgboost4j-flink/index.html>
+  XGBoost4J Java API <javadocs/index>
+  XGBoost4J Scala API <scaladocs/xgboost4j/index>
+  XGBoost4J-Spark Scala API <scaladocs/xgboost4j-spark/index>
+  XGBoost4J-Flink Scala API <scaladocs/xgboost4j-flink/index>
--- a/doc/jvm/javadocs/index.rst
+++ b/doc/jvm/javadocs/index.rst
@@ -0,0 +1,3 @@
+==================
+XGBoost4J Java API
+==================
--- a/doc/jvm/scaladocs/xgboost4j-flink/index.rst
+++ b/doc/jvm/scaladocs/xgboost4j-flink/index.rst
@@ -0,0 +1,3 @@
+=========================
+XGBoost4J-Flink Scala API
+=========================
--- a/doc/jvm/scaladocs/xgboost4j-spark/index.rst
+++ b/doc/jvm/scaladocs/xgboost4j-spark/index.rst
@@ -0,0 +1,3 @@
+=========================
+XGBoost4J-Spark Scala API
+=========================
--- a/doc/jvm/scaladocs/xgboost4j/index.rst
+++ b/doc/jvm/scaladocs/xgboost4j/index.rst
@@ -0,0 +1,3 @@
+===================
+XGBoost4J Scala API
+===================
--- a/doc/jvm/xgboost4j_spark_tutorial.rst
+++ b/doc/jvm/xgboost4j_spark_tutorial.rst
@@ -61,6 +61,10 @@ and then refer to the snapshot dependency by adding:
      <version>next_version_num-SNAPSHOT</version>
  </dependency>

+.. note:: XGBoost4J-Spark requires Spark 2.3+
+
+  XGBoost4J-Spark now requires Spark 2.3+. Latest versions of XGBoost4J-Spark uses facilities of `org.apache.spark.ml.param.shared` extensively to provide for a tight integration with Spark MLLIB framework, and these facilities are not fully available on earlier versions of Spark.
+
 Data Preparation
 ================

--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -2,3 +2,4 @@ sphinx
 mock
 guzzle_sphinx_theme
 breathe
+sh>=1.12.14
--- a/jvm-packages/build_doc.sh
+++ b/jvm-packages/build_doc.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 [commit id]"
+  exit 1
+fi
+
+set -e
+set -x
+
+commit_id=$1
+
+# Install JVM packages in local Maven repository
+mvn install -DskipTests
+# Build Scaladocs
+mvn scala:doc -DskipTests
+# Build Javadocs
+mvn javadoc:javadoc -DskipTests
+
+# Package JVM docs in a tarball
+mkdir -p tmp/scaladocs
+cp -rv xgboost4j/target/site/apidocs/ ./tmp/javadocs/
+cp -rv xgboost4j/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j/
+cp -rv xgboost4j-spark/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-spark/
+cp -rv xgboost4j-flink/target/site/scaladocs/ ./tmp/scaladocs/xgboost4j-flink/
+
+cd tmp
+tar cvjf ${commit_id}.tar.bz2 javadocs/ scaladocs/
+mv ${commit_id}.tar.bz2 ..
+cd ..
+rm -rfv tmp/
+
+set +x
+set +e