Separate out restricted and unrestricted tasks (#3736 )

Fix #3730 : scikit-learn 0.20 compatibility fix (#3731 )
* Fix #3730: scikit-learn 0.20 compatibility fix sklearn.cross_validation has been removed from scikit-learn 0.20, so replace it with sklearn.model_selection * Display test names for Python tests for clarity
2018-09-27 23:20:06 -07:00 · 2018-09-27 15:04:25 -07:00 · 2018-09-26 14:57:46 -07:00 · 2018-09-05 16:18:20 -07:00 · 2018-09-05 12:05:31 -07:00 · 2018-09-05 12:04:46 -07:00
42 changed files with 479 additions and 275 deletions
--- a/103
+++ b/103
@ -3,8 +3,15 @@
 // Jenkins pipeline
 // See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/

+import groovy.transform.Field
+
+/* Unrestricted tasks: tasks that do NOT generate artifacts */
+
 // Command to run command inside a docker container
-dockerRun = 'tests/ci_build/ci_build.sh'
+def dockerRun = 'tests/ci_build/ci_build.sh'
+// Utility functions
+@Field
+def utils

 def buildMatrix = [
    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": true,  "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2" ],
@ -26,42 +33,25 @@ pipeline {

    // Build stages
    stages {
-        stage('Get sources') {
-            agent any
+        stage('Jenkins: Get sources') {
+            agent {
+                label 'unrestricted'
+            }
            steps {
-                checkoutSrcs()
+                script {
+                    utils = load('tests/ci_build/jenkins_tools.Groovy')
+                    utils.checkoutSrcs()
+                }
                stash name: 'srcs', excludes: '.git/'
                milestone label: 'Sources ready', ordinal: 1
            }
        }
-        stage('Build doc') {
-            agent any
-            steps {
-                script {
-                    if (env.CHANGE_ID == null) {  // This is a branch
-                        def commit_id = "${GIT_COMMIT}"
-                        def branch_name = "${GIT_LOCAL_BRANCH}"
-                        echo 'Building doc...'
-                        dir ('jvm-packages') {
-                            sh "bash ./build_doc.sh ${commit_id}"
-                            archiveArtifacts artifacts: "${commit_id}.tar.bz2", allowEmptyArchive: true
-                            echo 'Deploying doc...'
-                            withAWS(credentials:'xgboost-doc-bucket') {
-                                s3Upload file: "${commit_id}.tar.bz2", bucket: 'xgboost-docs', acl: 'PublicRead', path: "${branch_name}.tar.bz2"
-                            }
-                        }
-                    } else {                      // This is a pull request
-                        echo 'Skipping doc build step for pull request'
-                    }
-                }
-            }
-        }
-        stage('Build & Test') {
+        stage('Jenkins: Build & Test') {
            steps {
                script {
                    parallel (buildMatrix.findAll{it['enabled']}.collectEntries{ c ->
-                        def buildName = getBuildName(c)
-                        buildFactory(buildName, c)
+                        def buildName = utils.getBuildName(c)
+                        utils.buildFactory(buildName, c, false, this.&buildPlatformCmake)
                    })
                }
            }
@ -69,37 +59,11 @@ pipeline {
    }
 }

-// initialize source codes
-def checkoutSrcs() {
-  retry(5) {
-    try {
-      timeout(time: 2, unit: 'MINUTES') {
-        checkout scm
-        sh 'git submodule update --init'
-      }
-    } catch (exc) {
-      deleteDir()
-      error "Failed to fetch source codes"
-    }
-  }
-}
-
-/**
- * Creates cmake and make builds
- */
-def buildFactory(buildName, conf) {
-    def os = conf["os"]
-    def nodeReq = conf["withGpu"] ? "${os} && gpu" : "${os}"
-    def dockerTarget = conf["withGpu"] ? "gpu" : "cpu"
-    [ ("${buildName}") : { buildPlatformCmake("${buildName}", conf, nodeReq, dockerTarget) }
-    ]
-}
-
 /**
 * Build platform and test it via cmake.
 */
 def buildPlatformCmake(buildName, conf, nodeReq, dockerTarget) {
-    def opts = cmakeOptions(conf)
+    def opts = utils.cmakeOptions(conf)
    // Destination dir for artifacts
    def distDir = "dist/${buildName}"
    def dockerArgs = ""
@ -119,33 +83,6 @@ def buildPlatformCmake(buildName, conf, nodeReq, dockerTarget) {
        sh """
        ${dockerRun} ${dockerTarget} ${dockerArgs} tests/ci_build/build_via_cmake.sh ${opts}
        ${dockerRun} ${dockerTarget} ${dockerArgs} tests/ci_build/test_${dockerTarget}.sh
-        ${dockerRun} ${dockerTarget} ${dockerArgs} bash -c "cd python-package; rm -f dist/*; python setup.py bdist_wheel --universal"
-        rm -rf "${distDir}"; mkdir -p "${distDir}/py"
-        cp xgboost "${distDir}"
-        cp -r lib "${distDir}"
-        cp -r python-package/dist "${distDir}/py"
-        # Test the wheel for compatibility on a barebones CPU container
-        ${dockerRun} release ${dockerArgs} bash -c " \
-            auditwheel show xgboost-*-py2-none-any.whl
-            pip install --user python-package/dist/xgboost-*-none-any.whl && \
-            python -m nose tests/python"
        """
-        archiveArtifacts artifacts: "${distDir}/**/*.*", allowEmptyArchive: true
    }
 }
-
-def cmakeOptions(conf) {
-    return ([
-        conf["withGpu"] ? '-DUSE_CUDA=ON' : '-DUSE_CUDA=OFF',
-        conf["withNccl"] ? '-DUSE_NCCL=ON' : '-DUSE_NCCL=OFF',
-        conf["withOmp"] ? '-DOPEN_MP:BOOL=ON' : '']
-        ).join(" ")
-}
-
-def getBuildName(conf) {
-    def gpuLabel = conf['withGpu'] ? ("_cuda" + conf['cudaVersion'] + (conf['withNccl'] ? "_nccl" : "_nonccl")) : "_cpu"
-    def ompLabel = conf['withOmp'] ? "_omp" : ""
-    def pyLabel = "_py${conf['pythonVersion']}"
-    return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}"
-}
-
--- a/121
+++ b/121
@ -0,0 +1,121 @@
+#!/usr/bin/groovy
+// -*- mode: groovy -*-
+// Jenkins pipeline
+// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
+
+import groovy.transform.Field
+
+/* Restricted tasks: tasks generating artifacts, such as binary wheels and
+                     documentation */
+
+// Command to run command inside a docker container
+def dockerRun = 'tests/ci_build/ci_build.sh'
+// Utility functions
+@Field
+def utils
+
+def buildMatrix = [
+    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": true,  "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2" ],
+    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": true,  "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
+    [ "enabled": true,  "os" : "linux", "withGpu": true, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ],
+]
+
+pipeline {
+    // Each stage specify its own agent
+    agent none
+
+    // Setup common job properties
+    options {
+        ansiColor('xterm')
+        timestamps()
+        timeout(time: 120, unit: 'MINUTES')
+        buildDiscarder(logRotator(numToKeepStr: '10'))
+    }
+
+    // Build stages
+    stages {
+        stage('Jenkins: Get sources') {
+            agent {
+                label 'restricted'
+            }
+            steps {
+                script {
+                    utils = load('tests/ci_build/jenkins_tools.Groovy')
+                    utils.checkoutSrcs()
+                }
+                stash name: 'srcs', excludes: '.git/'
+                milestone label: 'Sources ready', ordinal: 1
+            }
+        }
+        stage('Jenkins: Build doc') {
+            agent {
+                label 'linux && cpu && restricted'
+            }
+            steps {
+                unstash name: 'srcs'
+                script {
+                    def commit_id = "${GIT_COMMIT}"
+                    def branch_name = "${GIT_LOCAL_BRANCH}"
+                    echo 'Building doc...'
+                    dir ('jvm-packages') {
+                        sh "bash ./build_doc.sh ${commit_id}"
+                        archiveArtifacts artifacts: "${commit_id}.tar.bz2", allowEmptyArchive: true
+                        echo 'Deploying doc...'
+                        withAWS(credentials:'xgboost-doc-bucket') {
+                            s3Upload file: "${commit_id}.tar.bz2", bucket: 'xgboost-docs', acl: 'PublicRead', path: "${branch_name}.tar.bz2"
+                        }
+                    }
+                }
+            }
+        }
+
+        stage('Jenkins: Build artifacts') {
+            steps {
+                script {
+                    parallel (buildMatrix.findAll{it['enabled']}.collectEntries{ c ->
+                        def buildName = utils.getBuildName(c)
+                        utils.buildFactory(buildName, c, true, this.&buildPlatformCmake)
+                    })
+                }
+            }
+        }
+    }
+}
+
+/**
+ * Build platform and test it via cmake.
+ */
+def buildPlatformCmake(buildName, conf, nodeReq, dockerTarget) {
+    def opts = utils.cmakeOptions(conf)
+    // Destination dir for artifacts
+    def distDir = "dist/${buildName}"
+    def dockerArgs = ""
+    if(conf["withGpu"]){
+        dockerArgs = "--build-arg CUDA_VERSION=" + conf["cudaVersion"]
+    }
+    // Build node - this is returned result
+    node(nodeReq) {
+        unstash name: 'srcs'
+        echo """
+        |===== XGBoost CMake build =====
+        |  dockerTarget: ${dockerTarget}
+        |  cmakeOpts   : ${opts}
+        |=========================
+        """.stripMargin('|')
+        // Invoke command inside docker
+        sh """
+        ${dockerRun} ${dockerTarget} ${dockerArgs} tests/ci_build/build_via_cmake.sh ${opts}
+        ${dockerRun} ${dockerTarget} ${dockerArgs} bash -c "cd python-package; rm -f dist/*; python setup.py bdist_wheel --universal"
+        rm -rf "${distDir}"; mkdir -p "${distDir}/py"
+        cp xgboost "${distDir}"
+        cp -r lib "${distDir}"
+        cp -r python-package/dist "${distDir}/py"
+        # Test the wheel for compatibility on a barebones CPU container
+        ${dockerRun} release ${dockerArgs} bash -c " \
+            auditwheel show xgboost-*-py2-none-any.whl
+            pip install --user python-package/dist/xgboost-*-none-any.whl && \
+            python -m nose tests/python"
+        """
+        archiveArtifacts artifacts: "${distDir}/**/*.*", allowEmptyArchive: true
+    }
+}
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@ -168,7 +168,7 @@ cb.evaluation.log <- function() {
 #' at the beginning of each iteration.
 #' 
 #' Note that when training is resumed from some previous model, and a function is used to 
-#' reset a parameter value, the \code{nround} argument in this function would be the 
+#' reset a parameter value, the \code{nrounds} argument in this function would be the 
 #' the number of boosting rounds in the current training.
 #'
 #' Callback function expects the following values to be set in its calling frame:
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@ -52,9 +52,9 @@
 #' dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)
 #'
 #' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
-#' nround = 4
+#' nrounds = 4
 #'
-#' bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
+#' bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
 #' 
 #' # Model accuracy without new features
 #' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /
@ -68,7 +68,7 @@
 #' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
 #' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
 #' watchlist <- list(train = new.dtrain)
-#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
+#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
 #' 
 #' # Model accuracy with new features
 #' accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) /
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -22,7 +22,7 @@
 #'   \item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. 
 #'   \item \code{max_depth} maximum depth of a tree. Default: 6
 #'   \item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
-#'   \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1 
+#'   \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1 
 #'   \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
 #'   \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly. Default: 1
 #'   \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
--- a/R-package/demo/cross_validation.R
+++ b/R-package/demo/cross_validation.R
@ -5,20 +5,20 @@ data(agaricus.test, package='xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

-nround <- 2
+nrounds <- 2
 param <- list(max_depth=2, eta=1, silent=1, nthread=2, objective='binary:logistic')

 cat('running cross validation\n')
 # do cross validation, this will print result out as
 # [iteration]  metric_name:mean_value+std_value
 # std_value is standard deviation of the metric
-xgb.cv(param, dtrain, nround, nfold=5, metrics={'error'})
+xgb.cv(param, dtrain, nrounds, nfold=5, metrics={'error'})

 cat('running cross validation, disable standard deviation display\n')
 # do cross validation, this will print result out as
 # [iteration]  metric_name:mean_value+std_value
 # std_value is standard deviation of the metric
-xgb.cv(param, dtrain, nround, nfold=5,
+xgb.cv(param, dtrain, nrounds, nfold=5,
       metrics='error', showsd = FALSE)

 ###
@ -43,9 +43,9 @@ evalerror <- function(preds, dtrain) {
 param <- list(max_depth=2, eta=1, silent=1,
              objective = logregobj, eval_metric = evalerror)
 # train with customized objective
-xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5)
+xgb.cv(params = param, data = dtrain, nrounds = nrounds, nfold = 5)

 # do cross validation with prediction values for each fold
-res <- xgb.cv(params = param, data = dtrain, nrounds = nround, nfold = 5, prediction = TRUE)
+res <- xgb.cv(params = param, data = dtrain, nrounds = nrounds, nfold = 5, prediction = TRUE)
 res$evaluation_log
 length(res$pred)
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@ -7,10 +7,10 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

 param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
 watchlist <- list(eval = dtest, train = dtrain)
-nround = 2
+nrounds = 2

 # training the model for two rounds
-bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
+bst = xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
 cat('start testing prediction from first n trees\n')
 labels <- getinfo(dtest,'label')

--- a/R-package/demo/predict_leaf_indices.R
+++ b/R-package/demo/predict_leaf_indices.R
@ -11,10 +11,10 @@ dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)

 param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
-nround = 4
+nrounds = 4

 # training the model for two rounds
-bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
+bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy without new features
 accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
@ -43,7 +43,7 @@ new.features.test <- create.new.tree.features(bst, agaricus.test$data)
 new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
 new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
 watchlist <- list(train = new.dtrain)
-bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
+bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy with new features
 accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) / length(agaricus.test$label)
--- a/R-package/man/cb.reset.parameters.Rd
+++ b/R-package/man/cb.reset.parameters.Rd
@ -22,7 +22,7 @@ This is a "pre-iteration" callback function used to reset booster's parameters
 at the beginning of each iteration.

 Note that when training is resumed from some previous model, and a function is used to 
-reset a parameter value, the \code{nround} argument in this function would be the 
+reset a parameter value, the \code{nrounds} argument in this function would be the 
 the number of boosting rounds in the current training.

 Callback function expects the following values to be set in its calling frame:
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@ -63,9 +63,9 @@ dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(data = agaricus.test$data, label = agaricus.test$label)

 param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
-nround = 4
+nrounds = 4

-bst = xgb.train(params = param, data = dtrain, nrounds = nround, nthread = 2)
+bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy without new features
 accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /
@ -79,7 +79,7 @@ new.features.test <- xgb.create.features(model = bst, agaricus.test$data)
 new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
 new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
 watchlist <- list(train = new.dtrain)
-bst <- xgb.train(params = param, data = new.dtrain, nrounds = nround, nthread = 2)
+bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy with new features
 accuracy.after <- sum((predict(bst, new.dtest) >= 0.5) == agaricus.test$label) /
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@ -35,7 +35,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
  \item \code{gamma} minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. 
  \item \code{max_depth} maximum depth of a tree. Default: 6
  \item \code{min_child_weight} minimum sum of instance weight (hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. Default: 1
-  \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nround}. Default: 1 
+  \item \code{subsample} subsample ratio of the training instance. Setting it to 0.5 means that xgboost randomly collected half of the data instances to grow trees and this will prevent overfitting. It makes computation shorter (because less data to analyse). It is advised to use this parameter with \code{eta} and increase \code{nrounds}. Default: 1 
  \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
  \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly. Default: 1
  \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@ -12,7 +12,7 @@ XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\

 # disable the use of thread_local for 32 bit windows:
 ifeq ($(R_OSTYPE)$(WIN),windows)
-    XGB_RFLAGS += -DDMLC_CXX11_THREAD_LOCAL=0 -msse2 -mfpmath=sse
+    XGB_RFLAGS += -DDMLC_CXX11_THREAD_LOCAL=0
 endif
 $(foreach v, $(XGB_RFLAGS), $(warning $(v)))

--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@ -24,7 +24,7 @@ XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\

 # disable the use of thread_local for 32 bit windows:
 ifeq ($(R_OSTYPE)$(WIN),windows)
-    XGB_RFLAGS += -DDMLC_CXX11_THREAD_LOCAL=0 -msse2 -mfpmath=sse
+    XGB_RFLAGS += -DDMLC_CXX11_THREAD_LOCAL=0
 endif
 $(foreach v, $(XGB_RFLAGS), $(warning $(v)))

--- a/R-package/tests/testthat/test_gc_safety.R
+++ b/R-package/tests/testthat/test_gc_safety.R
@ -9,7 +9,7 @@ test_that("train and prediction when gctorture is on", {
  test <- agaricus.test
  gctorture(TRUE)
  bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
-  eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+  eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
  pred <- predict(bst, test$data)
  gctorture(FALSE)
 })
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@ -7,6 +7,9 @@ require(vcd, quietly = TRUE)

 float_tolerance = 5e-6

+# disable some tests for Win32
+win32_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
+
 set.seed(1982)
 data(Arthritis)
 df <- data.table(Arthritis, keep.rownames = F)
@ -41,7 +44,8 @@ mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,


 test_that("xgb.dump works", {
-  expect_length(xgb.dump(bst.Tree), 200)
+  if (!win32_flag)
+    expect_length(xgb.dump(bst.Tree), 200)
  dump_file = file.path(tempdir(), 'xgb.model.dump')
  expect_true(xgb.dump(bst.Tree, dump_file, with_stats = T))
  expect_true(file.exists(dump_file))
@ -50,7 +54,8 @@ test_that("xgb.dump works", {
  # JSON format
  dmp <- xgb.dump(bst.Tree, dump_format = "json")
  expect_length(dmp, 1)
-  expect_length(grep('nodeid', strsplit(dmp, '\n')[[1]]), 188)
+  if (!win32_flag)
+    expect_length(grep('nodeid', strsplit(dmp, '\n')[[1]]), 188)
 })

 test_that("xgb.dump works for gblinear", {
@ -210,7 +215,8 @@ test_that("xgb.model.dt.tree works with and without feature names", {
  names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
  dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
  expect_equal(names.dt.trees, names(dt.tree))
-  expect_equal(dim(dt.tree), c(188, 10))
+  if (!win32_flag)
+    expect_equal(dim(dt.tree), c(188, 10))
  expect_output(str(dt.tree), 'Feature.*\\"Age\\"')

  dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
@ -236,7 +242,8 @@ test_that("xgb.model.dt.tree throws error for gblinear", {

 test_that("xgb.importance works with and without feature names", {
  importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree)
-  expect_equal(dim(importance.Tree), c(7, 4))
+  if (!win32_flag)
+    expect_equal(dim(importance.Tree), c(7, 4))
  expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
  expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')

--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@ -7,6 +7,10 @@ data(agaricus.test, package = 'xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

+# Disable flaky tests for 32-bit Windows.
+# See https://github.com/dmlc/xgboost/issues/3720
+win32_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
+
 test_that("updating the model works", {
  watchlist = list(train = dtrain, test = dtest)

@ -29,7 +33,9 @@ test_that("updating the model works", {
  tr1r <- xgb.model.dt.tree(model = bst1r)
  # all should be the same when no subsampling
  expect_equal(bst1$evaluation_log, bst1r$evaluation_log)
-  expect_equal(tr1, tr1r, tolerance = 0.00001, check.attributes = FALSE)
+  if (!win32_flag) {
+    expect_equal(tr1, tr1r, tolerance = 0.00001, check.attributes = FALSE)
+  }

  # the same boosting with subsampling with an extra 'refresh' updater:
  p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
@ -38,7 +44,9 @@ test_that("updating the model works", {
  tr2r <- xgb.model.dt.tree(model = bst2r)
  # should be the same evaluation but different gains and larger cover
  expect_equal(bst2$evaluation_log, bst2r$evaluation_log)
-  expect_equal(tr2[Feature == 'Leaf']$Quality, tr2r[Feature == 'Leaf']$Quality)
+  if (!win32_flag) {
+    expect_equal(tr2[Feature == 'Leaf']$Quality, tr2r[Feature == 'Leaf']$Quality)
+  }
  expect_gt(sum(abs(tr2[Feature != 'Leaf']$Quality - tr2r[Feature != 'Leaf']$Quality)), 100)
  expect_gt(sum(tr2r$Cover) / sum(tr2$Cover), 1.5)

@ -61,7 +69,9 @@ test_that("updating the model works", {
  expect_gt(sum(tr2u$Cover) / sum(tr2$Cover), 1.5)
  # the results should be the same as for the model with an extra 'refresh' updater
  expect_equal(bst2r$evaluation_log, bst2u$evaluation_log)
-  expect_equal(tr2r, tr2u, tolerance = 0.00001, check.attributes = FALSE)
+  if (!win32_flag) {
+    expect_equal(tr2r, tr2u, tolerance = 0.00001, check.attributes = FALSE)
+  }
  
  # process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
  p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
--- a/demo/binary_classification/README.md
+++ b/demo/binary_classification/README.md
@ -80,12 +80,6 @@ booster = gblinear
 # L2 regularization term on weights, default 0
 lambda = 0.01
 # L1 regularization term on weights, default 0
-If ```agaricus.txt.test.buffer``` exists, and automatically loads from binary buffer if possible, this can speedup training process when you do training many times. You can disable it by setting ```use_buffer=0```.
-  - Buffer file can also be used as standalone input, i.e if buffer file exists, but original agaricus.txt.test was removed, xgboost will still run
-* Deviation from LibSVM input format: xgboost is compatible with LibSVM format, with the following minor differences:
-  - xgboost allows feature index starts from 0
-  - for binary classification, the label is 1 for positive, 0 for negative, instead of +1,-1
-  - the feature indices in each line *do not* need to be sorted
 alpha = 0.01
 # L2 regularization term on bias, default 0
 lambda_bias = 0.01
@ -102,7 +96,7 @@ After training, we can use the output model to get the prediction of the test da
 For binary classification, the output predictions are probability confidence scores in [0,1], corresponds to the probability of the label to be positive.

 #### Dump Model
-This is a preliminary feature, so far only tree model support text dump. XGBoost can display the tree models in text files and we can scan the model in an easy way:
+This is a preliminary feature, so only tree models support text dump. XGBoost can display the tree models in text or JSON files, and we can scan the model in an easy way:
 ```
 ../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt
 ../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
--- a/demo/guide-python/custom_objective.py
+++ b/demo/guide-python/custom_objective.py
@ -33,10 +33,10 @@ def logregobj(preds, dtrain):
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
 def evalerror(preds, dtrain):
    labels = dtrain.get_label()
-    # return a pair metric_name, result
+    # return a pair metric_name, result. The metric name must not contain a colon (:)
    # since preds are margin(before logistic transformation, cutoff at 0)
    return 'error', float(sum(labels != (preds > 0.0))) / len(labels)

 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
-bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
+bst = xgb.train(param, dtrain, num_round, watchlist, obj=logregobj, feval=evalerror)
--- a/demo/kaggle-higgs/higgs-train.R
+++ b/demo/kaggle-higgs/higgs-train.R
@ -24,9 +24,9 @@ param <- list("objective" = "binary:logitraw",
              "silent" = 1,
              "nthread" = 16)
 watchlist <- list("train" = xgmat)
-nround = 120
+nrounds = 120
 print ("loading data end, start to boost trees")
-bst = xgb.train(param, xgmat, nround, watchlist );
+bst = xgb.train(param, xgmat, nrounds, watchlist );
 # save out model
 xgb.save(bst, "higgs.model")
 print ('finish training')
--- a/demo/kaggle-higgs/speedtest.R
+++ b/demo/kaggle-higgs/speedtest.R
@ -39,9 +39,9 @@ for (i in 1:length(threads)){
                  "silent" = 1,
                  "nthread" = thread)
    watchlist <- list("train" = xgmat)
-    nround = 120
+    nrounds = 120
    print ("loading data end, start to boost trees")
-    bst = xgb.train(param, xgmat, nround, watchlist );
+    bst = xgb.train(param, xgmat, nrounds, watchlist );
    # save out model
    xgb.save(bst, "higgs.model")
    print ('finish training')
--- a/demo/kaggle-otto/otto_train_pred.R
+++ b/demo/kaggle-otto/otto_train_pred.R
@ -23,13 +23,13 @@ param <- list("objective" = "multi:softprob",
              "nthread" = 8)

 # Run Cross Validation
-cv.nround = 50
+cv.nrounds = 50
 bst.cv = xgb.cv(param=param, data = x[trind,], label = y, 
-                nfold = 3, nrounds=cv.nround)
+                nfold = 3, nrounds=cv.nrounds)

 # Train the model
-nround = 50
-bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nround)
+nrounds = 50
+bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds)

 # Make prediction
 pred = predict(bst,x[teind,])
--- a/demo/kaggle-otto/understandingXGBoostModel.Rmd
+++ b/demo/kaggle-otto/understandingXGBoostModel.Rmd
@ -121,19 +121,19 @@ param <- list("objective" = "multi:softprob",
              "eval_metric" = "mlogloss",
              "num_class" = numberOfClasses)

-cv.nround <- 5
+cv.nrounds <- 5
 cv.nfold <- 3

 bst.cv = xgb.cv(param=param, data = trainMatrix, label = y,
-                nfold = cv.nfold, nrounds = cv.nround)
+                nfold = cv.nfold, nrounds = cv.nrounds)
 ```
 > As we can see the error rate is low on the test dataset (for a 5mn trained model).

 Finally, we are ready to train the real model!!!

 ```{r modelTraining}
-nround = 50
-bst = xgboost(param=param, data = trainMatrix, label = y, nrounds=nround)
+nrounds = 50
+bst = xgboost(param=param, data = trainMatrix, label = y, nrounds=nrounds)
 ```

 Model understanding
@ -142,7 +142,7 @@ Model understanding
 Feature importance
 ------------------

-So far, we have built a model made of **`r nround`** trees.
+So far, we have built a model made of **`r nrounds`** trees.

 To build a tree, the dataset is divided recursively several times. At the end of the process, you get groups of observations (here, these observations are properties regarding **Otto** products).

--- a/doc/R-package/discoverYourData.md
+++ b/doc/R-package/discoverYourData.md
@ -222,7 +222,7 @@ The code below is very usual. For more information, you can look at the document

 ```r
 bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 4,
-               eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
+               eta = 1, nthread = 2, nrounds = 10,objective = "binary:logistic")
 ```

 ```
@ -244,7 +244,7 @@ A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitti

 > Here you can see the numbers decrease until line 7 and then increase.
 >
-> It probably means we are overfitting. To fix that I should reduce the number of rounds to `nround = 4`. I will let things like that because I don't really care for the purpose of this example :-)
+> It probably means we are overfitting. To fix that I should reduce the number of rounds to `nrounds = 4`. I will let things like that because I don't really care for the purpose of this example :-)

 Feature importance
 ------------------
@ -448,7 +448,7 @@ train <- agaricus.train
 test <- agaricus.test

 #Random Forest™ - 1000 trees
-bst <- xgboost(data = train$data, label = train$label, max.depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nround = 1, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max.depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nrounds = 1, objective = "binary:logistic")
 ```

 ```
@ -457,7 +457,7 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 4, num_parall

 ```r
 #Boosting - 3 rounds
-bst <- xgboost(data = train$data, label = train$label, max.depth = 4, nround = 3, objective = "binary:logistic")
+bst <- xgboost(data = train$data, label = train$label, max.depth = 4, nrounds = 3, objective = "binary:logistic")
 ```

 ```
--- a/doc/R-package/xgboostPresentation.md
+++ b/doc/R-package/xgboostPresentation.md
@ -178,11 +178,11 @@ We will train decision tree model using the following parameters:
 * `objective = "binary:logistic"`: we will train a binary classification model ;
 * `max.deph = 2`: the trees won't be deep, because our case is very simple ;
 * `nthread = 2`: the number of cpu threads we are going to use;
-* `nround = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
+* `nrounds = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.


 ```r
-bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+bstSparse <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 ```

 ```
@ -200,7 +200,7 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**


 ```r
-bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 ```

 ```
@ -215,7 +215,7 @@ bstDense <- xgboost(data = as.matrix(train$data), label = train$label, max.depth

 ```r
 dtrain <- xgb.DMatrix(data = train$data, label = train$label)
-bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
+bstDMatrix <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 ```

 ```
@ -232,13 +232,13 @@ One of the simplest way to see the training progress is to set the `verbose` opt

 ```r
 # verbose = 0, no message
-bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 0)
+bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
 ```


 ```r
 # verbose = 1, print evaluation metric
-bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 1)
+bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 1)
 ```

 ```
@ -249,7 +249,7 @@ bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, o

 ```r
 # verbose = 2, also print information about tree
-bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nround = 2, objective = "binary:logistic", verbose = 2)
+bst <- xgboost(data = dtrain, max.depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 2)
 ```

 ```
@ -372,7 +372,7 @@ For the purpose of this example, we use `watchlist` parameter. It is a list of `
 ```r
 watchlist <- list(train=dtrain, test=dtest)

-bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
+bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nrounds=2, watchlist=watchlist, objective = "binary:logistic")
 ```

 ```
@ -380,7 +380,7 @@ bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchli
 ## [1]	train-error:0.022263	test-error:0.021726
 ```

-**XGBoost** has computed at each round the same average error metric than seen above (we set `nround` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset.
+**XGBoost** has computed at each round the same average error metric than seen above (we set `nrounds` to 2, that is why we have two lines). Obviously, the `train-error` number is related to the training dataset (the one the algorithm learns from) and the `test-error` number to the test dataset.

 Both training and test error related metrics are very similar, and in some way, it makes sense: what we have learned from the training dataset matches the observations from the test dataset.

@ -390,7 +390,7 @@ For a better understanding of the learning progression, you may want to have som


 ```r
-bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
+bst <- xgb.train(data=dtrain, max.depth=2, eta=1, nthread = 2, nrounds=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
 ```

 ```
@ -407,7 +407,7 @@ Until now, all the learnings we have performed were based on boosting trees. **X


 ```r
-bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nround=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
+bst <- xgb.train(data=dtrain, booster = "gblinear", max.depth=2, nthread = 2, nrounds=2, watchlist=watchlist, eval.metric = "error", eval.metric = "logloss", objective = "binary:logistic")
 ```

 ```
@ -445,7 +445,7 @@ dtrain2 <- xgb.DMatrix("dtrain.buffer")
 ```

 ```r
-bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nround=2, watchlist=watchlist, objective = "binary:logistic")
+bst <- xgb.train(data=dtrain2, max.depth=2, eta=1, nthread = 2, nrounds=2, watchlist=watchlist, objective = "binary:logistic")
 ```

 ```
--- a/doc/conf.py
+++ b/doc/conf.py
@ -14,6 +14,7 @@
 from subprocess import call
 from sh.contrib import git
 import urllib.request
+from urllib.error import HTTPError
 from recommonmark.parser import CommonMarkParser
 import sys
 import re
@ -24,8 +25,11 @@ import guzzle_sphinx_theme
 git_branch = [re.sub(r'origin/', '', x.lstrip(' ')) for x in str(git.branch('-r', '--contains', 'HEAD')).rstrip('\n').split('\n')]
 git_branch = [x for x in git_branch if 'HEAD' not in x]
 print('git_branch = {}'.format(git_branch[0]))
-filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch[0]))
-call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True)
+try:
+  filename, _ = urllib.request.urlretrieve('https://s3-us-west-2.amazonaws.com/xgboost-docs/{}.tar.bz2'.format(git_branch[0]))
+  call('if [ -d tmp ]; then rm -rf tmp; fi; mkdir -p tmp/jvm; cd tmp/jvm; tar xvf {}'.format(filename), shell=True)
+except HTTPError:
+  print('JVM doc not found. Skipping...')

 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@ -146,7 +150,7 @@ extensions.append("guzzle_sphinx_theme")
 # Guzzle theme options (see theme.conf for more information)
 html_theme_options = {
    # Set the name of the project to appear in the sidebar
-    "project_nav_name": "XGBoost"
+    "project_nav_name": "XGBoost (0.80)"
 }

 html_sidebars = {
--- a/doc/get_started.rst
+++ b/doc/get_started.rst
@ -42,7 +42,7 @@ R
  train <- agaricus.train
  test <- agaricus.test
  # fit model
-  bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nround = 2,
+  bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nrounds = 2,
                 nthread = 2, objective = "binary:logistic")
  # predict
  pred <- predict(bst, test$data)
--- a/doc/gpu/index.rst
+++ b/doc/gpu/index.rst
@ -5,6 +5,12 @@ XGBoost GPU Support
 This page contains information about GPU algorithms supported in XGBoost.
 To install GPU support, checkout the :doc:`/build`.

+.. note:: CUDA 8.0, Compute Capability 3.5 required
+
+  The GPU algorithms in XGBoost require a graphics card with compute capability 3.5 or higher, with
+  CUDA toolkits 8.0 or later.
+  (See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)
+
 *********************************************
 CUDA Accelerated Tree Construction Algorithms
 *********************************************
--- a/doc/jvm/xgboost4j_spark_tutorial.rst
+++ b/doc/jvm/xgboost4j_spark_tutorial.rst
@ -274,7 +274,7 @@ and then loading the model in another session:
 With regards to ML pipeline save and load, please refer the next section.

 Interact with Other Bindings of XGBoost
------------------------------------
+---------------------------------------
 After we train a model with XGBoost4j-Spark on massive dataset, sometimes we want to do model serving in single machine or integrate it with other single node libraries for further processing. XGBoost4j-Spark supports export model to local by:

 .. code-block:: scala
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@ -119,7 +119,7 @@ Parameters for Tree Booster

 * ``scale_pos_weight`` [default=1]

-  - Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: ``sum(negative instances) / sum(positive instances)``. See `Parameters Tuning </tutorials/param_tuning>`_ for more discussion. Also, see Higgs Kaggle competition demo for examples: `R <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R>`_, `py1 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py>`_, `py2 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py>`_, `py3 <https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py>`_.
+  - Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: ``sum(negative instances) / sum(positive instances)``. See :doc:`Parameters Tuning </tutorials/param_tuning>` for more discussion. Also, see Higgs Kaggle competition demo for examples: `R <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R>`_, `py1 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py>`_, `py2 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py>`_, `py3 <https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py>`_.

 * ``updater`` [default= ``grow_colmaker,prune``]

@ -318,10 +318,6 @@ Command Line Parameters
 ***********************
 The following parameters are only used in the console version of XGBoost

-* ``use_buffer`` [default=1]
-
-  - Whether to create a binary buffer from text input. Doing so normally will speed up loading times
-
 * ``num_round``

  - The number of rounds for boosting
@ -361,6 +357,10 @@ The following parameters are only used in the console version of XGBoost

  - Feature map, used for dumping model

+* ``dump_format`` [default= ``text``] options: ``text``, ``json``
+
+  - Format of model dump file
+
 * ``name_dump`` [default= ``dump.txt``]

  - Name of model dump file
--- a/doc/python/python_api.rst
+++ b/doc/python/python_api.rst
@ -2,6 +2,10 @@ Python API Reference
 ====================
 This page gives the Python API reference of xgboost, please also refer to Python Package Introduction for more information about python package.

+.. contents::
+  :backlinks: none
+  :local:
+
 Core Data Structure
 -------------------
 .. automodule:: xgboost.core
@ -29,9 +33,11 @@ Scikit-Learn API
 .. automodule:: xgboost.sklearn
 .. autoclass:: xgboost.XGBRegressor
    :members:
+    :inherited-members:
    :show-inheritance:
 .. autoclass:: xgboost.XGBClassifier
    :members:
+    :inherited-members:
    :show-inheritance:

 Plotting API
--- a/doc/tutorials/external_memory.rst
+++ b/doc/tutorials/external_memory.rst
@ -13,6 +13,10 @@ The external memory version takes in the following filename format:
 The ``filename`` is the normal path to libsvm file you want to load in, and ``cacheprefix`` is a
 path to a cache file that XGBoost will use for external memory cache.

+.. note:: External memory is not available with GPU algorithms
+
+  External memory is not available when ``tree_method`` is set to ``gpu_exact`` or ``gpu_hist``.
+
 The following code was extracted from `demo/guide-python/external_memory.py <https://github.com/dmlc/xgboost/blob/master/demo/guide-python/external_memory.py>`_:

 .. code-block:: python
--- a/doc/tutorials/model.rst
+++ b/doc/tutorials/model.rst
@ -223,7 +223,7 @@ In this equation, :math:`w_j` are independent with respect to each other, the fo
  w_j^\ast &= -\frac{G_j}{H_j+\lambda}\\
  \text{obj}^\ast &= -\frac{1}{2} \sum_{j=1}^T \frac{G_j^2}{H_j+\lambda} + \gamma T

-The last equation measures *how good* a tree structure :math:`$q(x)` is.
+The last equation measures *how good* a tree structure :math:`q(x)` is.

 .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/xgboost/model/struct_score.png
  :width: 100%
--- a/include/xgboost/logging.h
+++ b/include/xgboost/logging.h
@ -38,6 +38,8 @@ class TrackerLogger : public BaseLogger {
  ~TrackerLogger();
 };

+// custom logging callback; disabled for R wrapper
+#if !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0
 class LogCallbackRegistry {
 public:
  using Callback = void (*)(const char*);
@ -52,6 +54,17 @@ class LogCallbackRegistry {
 private:
  Callback log_callback_;
 };
+#else
+class LogCallbackRegistry {
+ public:
+  using Callback = void (*)(const char*);
+  LogCallbackRegistry() {}
+  inline void Register(Callback log_callback) {}
+  inline Callback Get() const {
+    return nullptr;
+  }
+};
+#endif

 using LogCallbackRegistryStore = dmlc::ThreadLocalStore<LogCallbackRegistry>;

--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -1358,11 +1358,12 @@ class Booster(object):
    def get_score(self, fmap='', importance_type='weight'):
        """Get feature importance of each feature.
        Importance type can be defined as:
-            'weight' - the number of times a feature is used to split the data across all trees.
-            'gain' - the average gain across all splits the feature is used in.
-            'cover' - the average coverage across all splits the feature is used in.
-            'total_gain' - the total gain across all splits the feature is used in.
-            'total_cover' - the total coverage across all splits the feature is used in.
+
+        * 'weight': the number of times a feature is used to split the data across all trees.
+        * 'gain': the average gain across all splits the feature is used in.
+        * 'cover': the average coverage across all splits the feature is used in.
+        * 'total_gain': the total gain across all splits the feature is used in.
+        * 'total_cover': the total coverage across all splits the feature is used in.

        Parameters
        ----------
@ -1478,6 +1479,7 @@ class Booster(object):

    def get_split_value_histogram(self, feature, fmap='', bins=None, as_pandas=True):
        """Get split value histogram of a feature
+
        Parameters
        ----------
        feature: str
@ -1488,7 +1490,7 @@ class Booster(object):
            The maximum number of bins.
            Number of bins equals number of unique split values n_unique,
            if bins == None or bins > n_unique.
-        as_pandas : bool, default True
+        as_pandas: bool, default True
            Return pd.DataFrame when pandas is installed.
            If False or pandas is not installed, return numpy ndarray.

--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@ -28,10 +28,11 @@ def plot_importance(booster, ax=None, height=0.2,
    grid : bool, Turn the axes grids on or off.  Default is True (On).
    importance_type : str, default "weight"
        How the importance is calculated: either "weight", "gain", or "cover"
-        "weight" is the number of times a feature appears in a tree
-        "gain" is the average gain of splits which use the feature
-        "cover" is the average coverage of splits which use the feature
-            where coverage is defined as the number of samples affected by the split
+
+        * "weight" is the number of times a feature appears in a tree
+        * "gain" is the average gain of splits which use the feature
+        * "cover" is the average coverage of splits which use the feature
+          where coverage is defined as the number of samples affected by the split
    max_num_features : int, default None
        Maximum number of top features displayed on plot. If None, all features will be displayed.
    height : float, default 0.2
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -99,14 +99,16 @@ class XGBModel(XGBModelBase):
    missing : float, optional
        Value in the data which needs to be present as a missing value. If
        None, defaults to np.nan.
-    **kwargs : dict, optional
+    \*\*kwargs : dict, optional
        Keyword arguments for XGBoost Booster object.  Full documentation of parameters can
-        be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.md.
-        Attempting to set a parameter via the constructor args and **kwargs dict simultaneously
+        be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
+        Attempting to set a parameter via the constructor args and \*\*kwargs dict simultaneously
        will result in a TypeError.
-        Note:
-            **kwargs is unsupported by Sklearn.  We do not guarantee that parameters passed via
-            this argument will interact properly with Sklearn.
+
+        .. note:: \*\*kwargs unsupported by scikit-learn
+
+            \*\*kwargs is unsupported by scikit-learn.  We do not guarantee that parameters
+            passed via this argument will interact properly with scikit-learn.

    Note
    ----
@ -217,6 +219,7 @@ class XGBModel(XGBModelBase):
    def save_model(self, fname):
        """
        Save the model to a file.
+
        Parameters
        ----------
        fname : string
@ -227,6 +230,7 @@ class XGBModel(XGBModelBase):
    def load_model(self, fname):
        """
        Load the model from a file.
+
        Parameters
        ----------
        fname : string or a memory buffer
@ -259,7 +263,7 @@ class XGBModel(XGBModelBase):
            instance weights on the i-th validation set.
        eval_metric : str, callable, optional
            If a str, should be a built-in evaluation metric to use. See
-            doc/parameter.md. If callable, a custom evaluation metric. The call
+            doc/parameter.rst. If callable, a custom evaluation metric. The call
            signature is func(y_predicted, y_true) where y_true will be a
            DMatrix object such that you may need to call the get_label
            method. It must return a str, value pair where the str is a name
@ -336,6 +340,39 @@ class XGBModel(XGBModelBase):
        return self

    def predict(self, data, output_margin=False, ntree_limit=None):
+        """
+        Predict with `data`.
+
+        .. note:: This function is not thread safe.
+
+          For each booster object, predict can only be called from one thread.
+          If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
+          of model object and then call ``predict()``.
+
+        .. note:: Using ``predict()`` with DART booster
+
+          If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
+          some of the trees will be evaluated. This will produce incorrect results if ``data`` is
+          not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
+          a nonzero value, e.g.
+
+          .. code-block:: python
+
+            preds = bst.predict(dtest, ntree_limit=num_round)
+
+        Parameters
+        ----------
+        data : DMatrix
+            The dmatrix storing the input.
+        output_margin : bool
+            Whether to output the raw untransformed margin value.
+        ntree_limit : int
+            Limit number of trees in the prediction; defaults to best_ntree_limit if defined
+            (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+        Returns
+        -------
+        prediction : numpy array
+        """
        # pylint: disable=missing-docstring,invalid-name
        test_dmatrix = DMatrix(data, missing=self.missing, nthread=self.n_jobs)
        # get ntree_limit to use - if none specified, default to
@ -372,10 +409,10 @@ class XGBModel(XGBModelBase):
    def evals_result(self):
        """Return the evaluation results.

-        If eval_set is passed to the `fit` function, you can call evals_result() to
-        get evaluation results for all passed eval_sets. When eval_metric is also
-        passed to the `fit` function, the evals_result will contain the eval_metrics
-        passed to the `fit` function
+        If ``eval_set`` is passed to the `fit` function, you can call ``evals_result()`` to
+        get evaluation results for all passed eval_sets. When ``eval_metric`` is also
+        passed to the ``fit`` function, the ``evals_result`` will contain the ``eval_metrics``
+        passed to the ``fit`` function

        Returns
        -------
@ -383,20 +420,26 @@ class XGBModel(XGBModelBase):

        Example
        -------
-        param_dist = {'objective':'binary:logistic', 'n_estimators':2}

-        clf = xgb.XGBModel(**param_dist)
+        .. code-block:: python

-        clf.fit(X_train, y_train,
-                eval_set=[(X_train, y_train), (X_test, y_test)],
-                eval_metric='logloss',
-                verbose=True)
+            param_dist = {'objective':'binary:logistic', 'n_estimators':2}

-        evals_result = clf.evals_result()
+            clf = xgb.XGBModel(**param_dist)
+
+            clf.fit(X_train, y_train,
+                    eval_set=[(X_train, y_train), (X_test, y_test)],
+                    eval_metric='logloss',
+                    verbose=True)
+
+            evals_result = clf.evals_result()

        The variable evals_result will contain:
-        {'validation_0': {'logloss': ['0.604835', '0.531479']},
-         'validation_1': {'logloss': ['0.41965', '0.17686']}}
+
+        .. code-block:: none
+
+            {'validation_0': {'logloss': ['0.604835', '0.531479']},
+            'validation_1': {'logloss': ['0.41965', '0.17686']}}
        """
        if self.evals_result_:
            evals_result = self.evals_result_
@ -408,9 +451,11 @@ class XGBModel(XGBModelBase):
    @property
    def feature_importances_(self):
        """
+        Feature importances property
+
        Returns
        -------
-        feature_importances_ : array of shape = [n_features]
+        feature_importances_ : array of shape ``[n_features]``

        """
        b = self.get_booster()
@ -422,9 +467,8 @@ class XGBModel(XGBModelBase):

 class XGBClassifier(XGBModel, XGBClassifierBase):
    # pylint: disable=missing-docstring,too-many-arguments,invalid-name
-    __doc__ = """Implementation of the scikit-learn API for XGBoost classification.
-
-    """ + '\n'.join(XGBModel.__doc__.split('\n')[2:])
+    __doc__ = "Implementation of the scikit-learn API for XGBoost classification.\n\n" \
+        + '\n'.join(XGBModel.__doc__.split('\n')[2:])

    def __init__(self, max_depth=3, learning_rate=0.1,
                 n_estimators=100, silent=True,
@ -465,7 +509,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
            instance weights on the i-th validation set.
        eval_metric : str, callable, optional
            If a str, should be a built-in evaluation metric to use. See
-            doc/parameter.md. If callable, a custom evaluation metric. The call
+            doc/parameter.rst. If callable, a custom evaluation metric. The call
            signature is func(y_predicted, y_true) where y_true will be a
            DMatrix object such that you may need to call the get_label
            method. It must return a str, value pair where the str is a name
@ -610,10 +654,13 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
    def predict_proba(self, data, ntree_limit=None):
        """
        Predict the probability of each `data` example being of a given class.
-        NOTE: This function is not thread safe.
-              For each booster object, predict can only be called from one thread.
-              If you want to run prediction using multiple thread, call xgb.copy() to make copies
-              of model object and then call predict
+
+        .. note:: This function is not thread safe
+
+            For each booster object, predict can only be called from one thread.
+            If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
+            of model object and then call predict
+
        Parameters
        ----------
        data : DMatrix
@ -621,6 +668,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
        ntree_limit : int
            Limit number of trees in the prediction; defaults to best_ntree_limit if defined
            (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+
        Returns
        -------
        prediction : numpy array
@ -652,20 +700,26 @@ class XGBClassifier(XGBModel, XGBClassifierBase):

        Example
        -------
-        param_dist = {'objective':'binary:logistic', 'n_estimators':2}

-        clf = xgb.XGBClassifier(**param_dist)
+        .. code-block:: python

-        clf.fit(X_train, y_train,
-                eval_set=[(X_train, y_train), (X_test, y_test)],
-                eval_metric='logloss',
-                verbose=True)
+            param_dist = {'objective':'binary:logistic', 'n_estimators':2}

-        evals_result = clf.evals_result()
+            clf = xgb.XGBClassifier(**param_dist)

-        The variable evals_result will contain:
-        {'validation_0': {'logloss': ['0.604835', '0.531479']},
-         'validation_1': {'logloss': ['0.41965', '0.17686']}}
+            clf.fit(X_train, y_train,
+                    eval_set=[(X_train, y_train), (X_test, y_test)],
+                    eval_metric='logloss',
+                    verbose=True)
+
+            evals_result = clf.evals_result()
+
+        The variable ``evals_result`` will contain
+
+        .. code-block:: none
+
+            {'validation_0': {'logloss': ['0.604835', '0.531479']},
+            'validation_1': {'logloss': ['0.41965', '0.17686']}}
        """
        if self.evals_result_:
            evals_result = self.evals_result_
@ -677,5 +731,5 @@ class XGBClassifier(XGBModel, XGBClassifierBase):

 class XGBRegressor(XGBModel, XGBRegressorBase):
    # pylint: disable=missing-docstring
-    __doc__ = """Implementation of the scikit-learn API for XGBoost regression.
-    """ + '\n'.join(XGBModel.__doc__.split('\n')[2:])
+    __doc__ = "Implementation of the scikit-learn API for XGBoost regression.\n\n"\
+        + '\n'.join(XGBModel.__doc__.split('\n')[2:])
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@ -147,18 +147,24 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
        and/or num_class appears in the parameters)
    evals_result: dict
        This dictionary stores the evaluation results of all the items in watchlist.
+
        Example: with a watchlist containing [(dtest,'eval'), (dtrain,'train')] and
-        a parameter containing ('eval_metric': 'logloss')
-        Returns: {'train': {'logloss': ['0.48253', '0.35953']},
-                  'eval': {'logloss': ['0.480385', '0.357756']}}
+        a parameter containing ('eval_metric': 'logloss'), the **evals_result**
+        returns
+
+        .. code-block:: none
+
+            {'train': {'logloss': ['0.48253', '0.35953']},
+             'eval': {'logloss': ['0.480385', '0.357756']}}
+
    verbose_eval : bool or int
        Requires at least one item in evals.
-        If `verbose_eval` is True then the evaluation metric on the validation set is
+        If **verbose_eval** is True then the evaluation metric on the validation set is
        printed at each boosting stage.
-        If `verbose_eval` is an integer then the evaluation metric on the validation set
-        is printed at every given `verbose_eval` boosting stage. The last boosting stage
-        / the boosting stage found by using `early_stopping_rounds` is also printed.
-        Example: with verbose_eval=4 and at least one item in evals, an evaluation metric
+        If **verbose_eval** is an integer then the evaluation metric on the validation set
+        is printed at every given **verbose_eval** boosting stage. The last boosting stage
+        / the boosting stage found by using **early_stopping_rounds** is also printed.
+        Example: with ``verbose_eval=4`` and at least one item in evals, an evaluation metric
        is printed every 4 boosting stages, instead of every boosting stage.
    learning_rates: list or function (deprecated - use callback API instead)
        List of learning rate for each boosting round
@ -328,10 +334,10 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
    folds : a KFold or StratifiedKFold instance or list of fold indices
        Sklearn KFolds or StratifiedKFolds object.
        Alternatively may explicitly pass sample indices for each fold.
-        For `n` folds, `folds` should be a length `n` list of tuples.
-        Each tuple is `(in,out)` where `in` is a list of indices to be used
-        as the training samples for the `n`th fold and `out` is a list of
-        indices to be used as the testing samples for the `n`th fold.
+        For ``n`` folds, ``folds`` should be a length ``n`` list of tuples.
+        Each tuple is ``(in,out)`` where ``in`` is a list of indices to be used
+        as the training samples for the ``n`` th fold and ``out`` is a list of
+        indices to be used as the testing samples for the ``n`` th fold.
    metrics : string or list of strings
        Evaluation metrics to be watched in CV.
    obj : function
@ -363,8 +369,12 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
    callbacks : list of callback functions
        List of callback functions that are applied at end of each iteration.
        It is possible to use predefined callbacks by using xgb.callback module.
-        Example: [xgb.callback.reset_learning_rate(custom_rates)]
-     shuffle : bool
+        Example:
+
+        .. code-block:: none
+
+            [xgb.callback.reset_learning_rate(custom_rates)]
+    shuffle : bool
        Shuffle data before creating folds.

    Returns
--- a/tests/ci_build/jenkins_tools.Groovy
+++ b/tests/ci_build/jenkins_tools.Groovy
@ -0,0 +1,52 @@
+#!/usr/bin/groovy
+// -*- mode: groovy -*-
+
+/* Utility functions for Jenkins */
+
+// Command to run command inside a docker container
+dockerRun = 'tests/ci_build/ci_build.sh'
+
+// initialize source codes
+def checkoutSrcs() {
+  retry(5) {
+    try {
+      timeout(time: 2, unit: 'MINUTES') {
+        checkout scm
+        sh 'git submodule update --init'
+      }
+    } catch (exc) {
+      deleteDir()
+      error "Failed to fetch source codes"
+    }
+  }
+}
+
+/**
+ * Creates cmake and make builds
+ */
+def buildFactory(buildName, conf, restricted, build_func) {
+    def os = conf["os"]
+    def device = conf["withGpu"] ? "gpu" : "cpu"
+    def restricted_flag = restricted ? "restricted" : "unrestricted"
+    def nodeReq = "${os} && ${device} && ${restricted_flag}"
+    def dockerTarget = conf["withGpu"] ? "gpu" : "cpu"
+    [ ("${buildName}") : { build_func("${buildName}", conf, nodeReq, dockerTarget) }
+    ]
+}
+
+def cmakeOptions(conf) {
+    return ([
+        conf["withGpu"] ? '-DUSE_CUDA=ON' : '-DUSE_CUDA=OFF',
+        conf["withNccl"] ? '-DUSE_NCCL=ON' : '-DUSE_NCCL=OFF',
+        conf["withOmp"] ? '-DOPEN_MP:BOOL=ON' : '']
+        ).join(" ")
+}
+
+def getBuildName(conf) {
+    def gpuLabel = conf['withGpu'] ? ("_cuda" + conf['cudaVersion'] + (conf['withNccl'] ? "_nccl" : "_nonccl")) : "_cpu"
+    def ompLabel = conf['withOmp'] ? "_omp" : ""
+    def pyLabel = "_py${conf['pythonVersion']}"
+    return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}"
+}
+
+return this
--- a/tests/ci_build/test_gpu.sh
+++ b/tests/ci_build/test_gpu.sh
@ -4,6 +4,6 @@ set -e
 cd python-package
 python setup.py install --user
 cd ..
-python -m nose --attr='!slow' tests/python-gpu/
+python -m nose -v --attr='!slow' tests/python-gpu/
 ./testxgboost

--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@ -49,7 +49,7 @@ class TestGPUPredict(unittest.TestCase):
    # Test case for a bug where multiple batch predictions made on a test set produce incorrect results
    def test_multi_predict(self):
        from sklearn.datasets import make_regression
-        from sklearn.cross_validation import train_test_split
+        from sklearn.model_selection import train_test_split

        n = 1000
        X, y = make_regression(n, random_state=rng)
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -22,21 +22,13 @@ class TemporaryDirectory(object):
 def test_binary_classification():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_digits
-    try:
-        from sklearn.model_selection import KFold
-    except:
-        from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold

    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
-    try:
-        kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    except TypeError:  # sklearn.model_selection.KFold uses n_split
-        kf = KFold(
-            n_splits=2, shuffle=True, random_state=rng
-        ).split(np.arange(y.shape[0]))
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        labels = y[test_index]
@ -48,10 +40,7 @@ def test_binary_classification():
 def test_multiclass_classification():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_iris
-    try:
-        from sklearn.cross_validation import KFold
-    except:
-        from sklearn.model_selection import KFold
+    from sklearn.model_selection import KFold

    def check_pred(preds, labels):
        err = sum(1 for i in range(len(preds))
@ -61,8 +50,8 @@ def test_multiclass_classification():
    iris = load_iris()
    y = iris['target']
    X = iris['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        # test other params in XGBClassifier().fit
@ -111,13 +100,13 @@ def test_boston_housing_regression():
    tm._skip_if_no_sklearn()
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
-    from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold

    boston = load_boston()
    y = boston['target']
    X = boston['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])

        preds = xgb_model.predict(X[test_index])
@ -135,7 +124,7 @@ def test_boston_housing_regression():

 def test_parameter_tuning():
    tm._skip_if_no_sklearn()
-    from sklearn.grid_search import GridSearchCV
+    from sklearn.model_selection import GridSearchCV
    from sklearn.datasets import load_boston

    boston = load_boston()
@ -143,7 +132,8 @@ def test_parameter_tuning():
    X = boston['data']
    xgb_model = xgb.XGBRegressor()
    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
-                                   'n_estimators': [50, 100, 200]}, verbose=1)
+                                   'n_estimators': [50, 100, 200]},
+                       cv=3, verbose=1, iid=True)
    clf.fit(X, y)
    assert clf.best_score_ < 0.7
    assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
@ -153,7 +143,7 @@ def test_regression_with_custom_objective():
    tm._skip_if_no_sklearn()
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
-    from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold

    def objective_ls(y_true, y_pred):
        grad = (y_pred - y_true)
@ -163,8 +153,8 @@ def test_regression_with_custom_objective():
    boston = load_boston()
    y = boston['target']
    X = boston['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
            X[train_index], y[train_index]
        )
@ -186,7 +176,7 @@ def test_regression_with_custom_objective():
 def test_classification_with_custom_objective():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_digits
-    from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold

    def logregobj(y_true, y_pred):
        y_pred = 1.0 / (1.0 + np.exp(-y_pred))
@ -197,8 +187,8 @@ def test_classification_with_custom_objective():
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier(objective=logregobj)
        xgb_model.fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
@ -225,10 +215,11 @@ def test_classification_with_custom_objective():
 def test_sklearn_api():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_iris
-    from sklearn.cross_validation import train_test_split
+    from sklearn.model_selection import train_test_split

    iris = load_iris()
-    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
+    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target,
+                                              train_size=120, test_size=0.2)

    classifier = xgb.XGBClassifier(booster='gbtree', n_estimators=10)
    classifier.fit(tr_d, tr_l)
@ -242,7 +233,7 @@ def test_sklearn_api():
 def test_sklearn_api_gblinear():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_iris
-    from sklearn.cross_validation import train_test_split
+    from sklearn.model_selection import train_test_split

    iris = load_iris()
    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
@ -476,23 +467,15 @@ def test_validation_weights_xgbclassifier():
 def test_save_load_model():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_digits
-    try:
-        from sklearn.model_selection import KFold
-    except:
-        from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold

    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
-    try:
-        kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    except TypeError:  # sklearn.model_selection.KFold uses n_split
-        kf = KFold(
-            n_splits=2, shuffle=True, random_state=rng
-        ).split(np.arange(y.shape[0]))
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    with TemporaryDirectory() as tempdir:
        model_path = os.path.join(tempdir, 'digits.model')
-        for train_index, test_index in kf:
+        for train_index, test_index in kf.split(X, y):
            xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
            xgb_model.save_model(model_path)
            xgb_model = xgb.XGBModel()
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@ -56,7 +56,7 @@ if [ ${TASK} == "python_test" ]; then
    python -m pip install datatable --no-binary datatable

    python -m pip install graphviz pytest pytest-cov codecov
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
    py.test tests/python --cov=python-package/xgboost
    codecov
    source activate python2
@ -64,7 +64,7 @@ if [ ${TASK} == "python_test" ]; then
    python --version
    conda install numpy scipy pandas matplotlib nose scikit-learn
    python -m pip install graphviz
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
    exit 0
 fi

@ -75,7 +75,7 @@ if [ ${TASK} == "python_lightweight_test" ]; then
    python --version
    conda install numpy scipy nose
    python -m pip install graphviz pytest pytest-cov codecov
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
    py.test tests/python --cov=python-package/xgboost
    codecov
    source activate python2
@ -83,7 +83,7 @@ if [ ${TASK} == "python_lightweight_test" ]; then
    python --version
    conda install numpy scipy nose
    python -m pip install graphviz
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
    python -m pip install flake8==3.4.1
    flake8 --ignore E501 python-package || exit -1
    flake8 --ignore E501 tests/python || exit -1
Author	SHA1	Message	Date
Philip Hyunsu Cho	6852d0afd5	Separate out restricted and unrestricted tasks (#3736 )	2018-09-27 23:20:06 -07:00
Philip Hyunsu Cho	c0bd296354	Fix #3730 : scikit-learn 0.20 compatibility fix (#3731 ) * Fix #3730: scikit-learn 0.20 compatibility fix sklearn.cross_validation has been removed from scikit-learn 0.20, so replace it with sklearn.model_selection * Display test names for Python tests for clarity	2018-09-27 15:04:25 -07:00
Philip Hyunsu Cho	09142c94f5	Disable flaky tests in R-package/tests/testthat/test_update.R (#3723 )	2018-09-26 14:57:46 -07:00
Philip Cho	ba4244ef51	Mask tests for 32-bit Windows that fail due to difference between x87 and SSE	2018-09-05 16:18:20 -07:00
Philip Hyunsu Cho	a46b0ac2d2	Fix CRAN check by removing reference to std::cerr (#3660 ) * Fix CRAN check by removing reference to std::cerr * Mask tests that fail on 32-bit Windows R	2018-09-05 12:05:31 -07:00
gorogm	4bc7e94603	Link fixed. (#3640 )	2018-09-05 12:04:46 -07:00
Philip Hyunsu Cho	a899e8f4cd	Document CUDA requirement, lack of external memory on GPU (#3624 ) * Document fact that GPU doesn't support external memory * Document CUDA requirement	2018-09-05 12:03:46 -07:00
Philip Cho	f9a833f525	Update Python API doc (#3619 ) * Show inherited members of XGBRegressor in API doc, since XGBRegressor uses default methods from XGBModel * Add table of contents to Python API doc * Skip JVM doc download if not available * Show inherited members for XGBRegressor * Add docstring to XGBRegressor.predict() * Fix rendering errors in Python docstrings * Fix lint	2018-09-05 12:02:11 -07:00
Grant W Schneider	1afd2f1b2d	Remove errant $ (#3618 )	2018-09-05 11:55:14 -07:00
Philip Hyunsu Cho	b1d76d533d	Fix #3609 : Removed unused parameter 'use_buffer' (#3610 )	2018-09-05 11:54:54 -07:00
Philip Hyunsu Cho	9d70655c42	Fix #3598 : document that custom objective can't contain colon (:) (#3601 )	2018-09-05 11:54:19 -07:00
Grace Lam	dd1fda449c	Add JSON dump functionality documentation (#3600 )	2018-09-05 11:53:31 -07:00
Jakob Richter	324f3b5259	replace nround with nrounds to match actual parameter (#3592 )	2018-09-05 11:52:34 -07:00
Philip Cho	24e08c2638	Add version to doc sidebar	2018-08-13 01:46:05 -07:00