Make 1.4.2 patch release. (#6962 )

[back port] Fix multiclass auc with empty dataset. (#6947 ) (#6960 )
[CI] Fix CI/CD pipeline broken by latest auditwheel (4.0.0) (#6951 ) (#6952 )
2021-05-13 16:17:14 +08:00 · 2021-05-13 12:31:52 +08:00 · 2021-05-11 20:45:04 +08:00 · 2021-05-04 12:43:42 -07:00 · 2021-05-04 16:10:16 +08:00 · 2021-05-04 16:09:49 +08:00
43 changed files with 200 additions and 105 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -31,7 +31,10 @@ jobs:
    - name: Run gtest binary
      run: |
        cd build
-        ctest --exclude-regex AllTestsInDMLCUnitTests --extra-verbose
+        # libomp internal error:
+        #   OMP: Error #131: Thread identifier invalid.
+        ./testxgboost  --gtest_filter="-HistIndexCreationWithExternalMemory.Test"
+        ctest -R TestXGBoostCLI --extra-verbose

  gtest-cpu-nonomp:
    name: Test Google C++ unittest (CPU Non-OMP)
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,18 +19,10 @@ jobs:
      env: TASK=java_test

 # dependent brew packages
+# the dependencies from homebrew is installed manually from setup script due to outdated image from travis.
 addons:
  homebrew:
-    packages:
-      - cmake
-      - libomp
-      - graphviz
-      - openssl
-      - libgit2
-      - lz4
-      - wget
-      - r
-    update: true
+    update: false
  apt:
    packages:
      - snapd
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.13)
-project(xgboost LANGUAGES CXX C VERSION 1.4.1)
+project(xgboost LANGUAGES CXX C VERSION 1.4.2)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/5
+++ b/5
@@ -179,10 +179,10 @@ def BuildCPUARM64() {
    ${dockerRun} ${container_type} ${docker_binary} bash -c "cd build && ctest --extra-verbose"
    ${dockerRun} ${container_type} ${docker_binary} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
    ${dockerRun} ${container_type} ${docker_binary} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} ${wheel_tag}
-    ${dockerRun} ${container_type} ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
+    ${dockerRun} ${container_type} ${docker_binary} bash -c "auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl && python tests/ci_build/rename_whl.py wheelhouse/*.whl ${commit_id} ${wheel_tag}"
    mv -v wheelhouse/*.whl python-package/dist/
    # Make sure that libgomp.so is vendored in the wheel
-    ${dockerRun} ${container_type} ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
+    ${dockerRun} ${container_type} ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
    """
    echo 'Stashing Python wheel...'
    stash name: "xgboost_whl_arm64_cpu", includes: 'python-package/dist/*.whl'
@@ -231,6 +231,7 @@ def BuildCUDA(args) {
    if (args.cuda_version == ref_cuda_ver) {
      sh """
      ${dockerRun} auditwheel_x86_64 ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
+      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py wheelhouse/*.whl ${commit_id} ${wheel_tag}
      mv -v wheelhouse/*.whl python-package/dist/
      # Make sure that libgomp.so is vendored in the wheel
      ${dockerRun} auditwheel_x86_64 ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.4.1.1
+Version: 1.4.2.1
 Date: 2020-08-28
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
--- a/dev/release-pypi.py
+++ b/dev/release-pypi.py
@@ -49,7 +49,7 @@ def download_wheels(
    dir_URL: str,
    src_filename_prefix: str,
    target_filename_prefix: str,
-) -> None:
+) -> List[str]:
    """Download all binary wheels. dir_URL is the URL for remote directory storing the release
    wheels

@@ -72,6 +72,8 @@ def download_wheels(
        assert stderr.find("warning") == -1, "Unresolved warnings:\n" + stderr
        assert stdout.find("warning") == -1, "Unresolved warnings:\n" + stdout

+    return filenames
+

 def check_path():
    root = os.path.abspath(os.path.curdir)
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@

 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 4
-#define XGBOOST_VER_PATCH 1
+#define XGBOOST_VER_PATCH 2

 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/create_jni.py
+++ b/jvm-packages/create_jni.py
@@ -84,8 +84,9 @@ if __name__ == "__main__":

    print("building Java wrapper")
    with cd(".."):
-        maybe_makedirs("build")
-        with cd("build"):
+        build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build'
+        maybe_makedirs(build_dir)
+        with cd(build_dir):
            if sys.platform == "win32":
                # Force x64 build on Windows.
                maybe_generator = ' -A x64'
@@ -114,6 +115,9 @@ if __name__ == "__main__":
            if gpu_arch_flag is not None:
                args.append("%s" % gpu_arch_flag)

+            lib_dir = os.path.join(os.pardir, 'lib')
+            if os.path.exists(lib_dir):
+                shutil.rmtree(lib_dir)
            run("cmake .. " + " ".join(args) + maybe_generator)
            run("cmake --build . --config Release" + maybe_parallel_build)

--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@

    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.4.1</version>
+    <version>1.4.2</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.4.1</version>
+        <version>1.4.2</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.4.1</version>
+    <version>1.4.2</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.4.1</version>
+            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.4.1</version>
+            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.4.1</version>
+        <version>1.4.2</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.4.1</version>
+    <version>1.4.2</version>
    <build>
        <plugins>
            <plugin>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.4.1</version>
+            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.4.1</version>
+        <version>1.4.2</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.4.1</version>
+    <version>1.4.2</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/jvm-packages/xgboost4j-gpu/src
+++ b/jvm-packages/xgboost4j-gpu/src
@@ -1 +0,0 @@
-../xgboost4j/src/
--- a/jvm-packages/xgboost4j-gpu/src/main/java
+++ b/jvm-packages/xgboost4j-gpu/src/main/java
@@ -0,0 +1 @@
+../../../xgboost4j/src/main/java/
--- a/jvm-packages/xgboost4j-gpu/src/main/resources/xgboost4j-version.properties
+++ b/jvm-packages/xgboost4j-gpu/src/main/resources/xgboost4j-version.properties
@@ -0,0 +1 @@
+../../../../xgboost4j/src/main/resources/xgboost4j-version.properties
--- a/jvm-packages/xgboost4j-gpu/src/main/scala
+++ b/jvm-packages/xgboost4j-gpu/src/main/scala
@@ -0,0 +1 @@
+../../../xgboost4j/src/main/scala/
--- a/jvm-packages/xgboost4j-gpu/src/native
+++ b/jvm-packages/xgboost4j-gpu/src/native
@@ -0,0 +1 @@
+../../xgboost4j/src/native
--- a/jvm-packages/xgboost4j-gpu/src/test
+++ b/jvm-packages/xgboost4j-gpu/src/test
@@ -0,0 +1 @@
+../../xgboost4j/src/test
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.4.1</version>
+        <version>1.4.2</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.4.1</version>
+            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark-gpu/src
+++ b/jvm-packages/xgboost4j-spark-gpu/src
@@ -1 +0,0 @@
-../xgboost4j-spark/src/
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala
@@ -0,0 +1 @@
+../../../xgboost4j-spark/src/main/scala
--- a/jvm-packages/xgboost4j-spark-gpu/src/test
+++ b/jvm-packages/xgboost4j-spark-gpu/src/test
@@ -0,0 +1 @@
+../../xgboost4j-spark/src/test
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.4.1</version>
+        <version>1.4.2</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.4.1</version>
+            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.4.1</version>
+        <version>1.4.2</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.4.1</version>
+    <version>1.4.2</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.4.1
+1.4.2
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -229,6 +229,9 @@ def _numpy2ctypes_type(dtype):


 def _array_interface(data: np.ndarray) -> bytes:
+    assert (
+        data.dtype.hasobject is False
+    ), "Input data contains `object` dtype.  Expecting numeric data."
    interface = data.__array_interface__
    if "mask" in interface:
        interface["mask"] = interface["mask"].__array_interface__
@@ -1841,8 +1844,8 @@ class Booster(object):
                )

        if isinstance(data, np.ndarray):
-            from .data import _maybe_np_slice
-            data = _maybe_np_slice(data, data.dtype)
+            from .data import _ensure_np_dtype
+            data, _ = _ensure_np_dtype(data, data.dtype)
            _check_call(
                _LIB.XGBoosterPredictFromDense(
                    self.handle,
@@ -1872,7 +1875,9 @@ class Booster(object):
                )
            )
            return _prediction_output(shape, dims, preds, False)
-        if lazy_isinstance(data, "cupy.core.core", "ndarray"):
+        if lazy_isinstance(data, "cupy.core.core", "ndarray") or lazy_isinstance(
+            data, "cupy._core.core", "ndarray"
+        ):
            from .data import _transform_cupy_array
            data = _transform_cupy_array(data)
            interface = data.__cuda_array_interface__
@@ -2027,7 +2032,7 @@ class Booster(object):
        """
        if isinstance(fout, (STRING_TYPES, os.PathLike)):
            fout = os.fspath(os.path.expanduser(fout))
-            fout = open(fout, 'w')
+            fout = open(fout, 'w')  # pylint: disable=consider-using-with
            need_close = True
        else:
            need_close = False
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -104,6 +104,13 @@ def _is_numpy_array(data):
    return isinstance(data, (np.ndarray, np.matrix))


+def _ensure_np_dtype(data, dtype):
+    if data.dtype.hasobject:
+        data = data.astype(np.float32, copy=False)
+        dtype = np.float32
+    return data, dtype
+
+
 def _maybe_np_slice(data, dtype):
    '''Handle numpy slice.  This can be removed if we use __array_interface__.
    '''
@@ -118,6 +125,7 @@ def _maybe_np_slice(data, dtype):
            data = np.array(data, copy=False, dtype=dtype)
    except AttributeError:
        data = np.array(data, copy=False, dtype=dtype)
+    data, dtype = _ensure_np_dtype(data, dtype)
    return data


--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -734,8 +734,9 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr,
  API_BEGIN();
  CHECK_HANDLE();
  std::shared_ptr<xgboost::data::CSRArrayAdapter> x{
-      new xgboost::data::CSRArrayAdapter{
-          StringView{indptr}, StringView{indices}, StringView{data}, cols}};
+      new xgboost::data::CSRArrayAdapter{StringView{indptr},
+                                         StringView{indices}, StringView{data},
+                                         static_cast<size_t>(cols)}};
  std::shared_ptr<DMatrix> p_m {nullptr};
  if (m) {
    p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
--- a/src/c_api/c_api_utils.h
+++ b/src/c_api/c_api_utils.h
@@ -98,6 +98,10 @@ inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows
      forest = std::max(static_cast<decltype(forest)>(1), forest);
      shape[3] = forest;
      *out_dim = shape.size();
+    } else if (chunksize == 1) {
+      *out_dim = 1;
+      shape.resize(*out_dim);
+      shape.front() = rows;
    } else {
      *out_dim = 2;
      shape.resize(*out_dim);
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@@ -229,8 +229,11 @@ class ArrayInterfaceHandler {
      }
      strides[1] = n;
    }
-    auto valid = (rows - 1) * strides[0] + (cols - 1) * strides[1] == (rows * cols) - 1;
-    CHECK(valid) << "Invalid strides in array.";
+
+    auto valid = rows * strides[0] + cols * strides[1] >= (rows * cols);
+    CHECK(valid) << "Invalid strides in array."
+                 << "  strides: (" << strides[0] << "," << strides[1]
+                 << "), shape: (" << rows << ", " << cols << ")";
  }

  static void* ExtractData(std::map<std::string, Json> const &column,
--- a/src/metric/auc.cc
+++ b/src/metric/auc.cc
@@ -87,8 +87,7 @@ std::tuple<float, float, float> BinaryAUC(std::vector<float> const &predts,
 * - Kleiman, Ross and Page, David. $AUC_{\mu}$: A Performance Metric for Multi-Class
 *   Machine Learning Models
 */
-float MultiClassOVR(std::vector<float> const& predts, MetaInfo const& info) {
-  auto n_classes = predts.size() / info.labels_.Size();
+float MultiClassOVR(std::vector<float> const& predts, MetaInfo const& info, size_t n_classes) {
  CHECK_NE(n_classes, 0);
  auto const& labels = info.labels_.ConstHostVector();

@@ -230,6 +229,10 @@ class EvalAUC : public Metric {
      info.labels_.SetDevice(tparam_->gpu_id);
      info.weights_.SetDevice(tparam_->gpu_id);
    }
+    //  We use the global size to handle empty dataset.
+    std::array<size_t, 2> meta{info.labels_.Size(), preds.Size()};
+    rabit::Allreduce<rabit::op::Max>(meta.data(), meta.size());
+
    if (!info.group_ptr_.empty()) {
      /**
       * learning to rank
@@ -261,16 +264,17 @@ class EvalAUC : public Metric {
        CHECK_LE(auc, 1) << "Total AUC across groups: " << auc * valid_groups
                         << ", valid groups: " << valid_groups;
      }
-    } else if (info.labels_.Size() != preds.Size() &&
-               preds.Size() % info.labels_.Size() == 0) {
+    } else if (meta[0] != meta[1] && meta[1] % meta[0] == 0) {
      /**
       * multi class
       */
+      size_t n_classes = meta[1] / meta[0];
+      CHECK_NE(n_classes, 0);
      if (tparam_->gpu_id == GenericParameter::kCpuId) {
-        auc = MultiClassOVR(preds.ConstHostVector(), info);
+        auc = MultiClassOVR(preds.ConstHostVector(), info, n_classes);
      } else {
        auc = GPUMultiClassAUCOVR(preds.ConstDeviceSpan(), info, tparam_->gpu_id,
-                                  &this->d_cache_);
+                                  &this->d_cache_, n_classes);
      }
    } else {
      /**
@@ -323,7 +327,8 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
 }

 float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info,
-                          int32_t device, std::shared_ptr<DeviceAUCCache>* cache) {
+                          int32_t device, std::shared_ptr<DeviceAUCCache>* cache,
+                          size_t n_classes) {
  common::AssertGPUSupport();
  return 0;
 }
--- a/src/metric/auc.cu
+++ b/src/metric/auc.cu
@@ -61,10 +61,12 @@ struct DeviceAUCCache {
      neg_pos.resize(sorted_idx.size());
      if (is_multi) {
        predts_t.resize(sorted_idx.size());
-        reducer.reset(new dh::AllReducer);
-        reducer->Init(rabit::GetRank());
      }
    }
+    if (is_multi && !reducer) {
+      reducer.reset(new dh::AllReducer);
+      reducer->Init(device);
+    }
  }
 };

@@ -197,12 +199,48 @@ XGBOOST_DEVICE size_t LastOf(size_t group, common::Span<Idx> indptr) {
  return indptr[group + 1] - 1;
 }

+
+float ScaleClasses(common::Span<float> results, common::Span<float> local_area,
+                   common::Span<float> fp, common::Span<float> tp,
+                   common::Span<float> auc, std::shared_ptr<DeviceAUCCache> cache,
+                   size_t n_classes) {
+  dh::XGBDeviceAllocator<char> alloc;
+  if (rabit::IsDistributed()) {
+    CHECK_EQ(dh::CudaGetPointerDevice(results.data()), dh::CurrentDevice());
+    cache->reducer->AllReduceSum(results.data(), results.data(), results.size());
+  }
+  auto reduce_in = dh::MakeTransformIterator<thrust::pair<float, float>>(
+      thrust::make_counting_iterator(0), [=] __device__(size_t i) {
+        if (local_area[i] > 0) {
+          return thrust::make_pair(auc[i] / local_area[i] * tp[i], tp[i]);
+        }
+        return thrust::make_pair(std::numeric_limits<float>::quiet_NaN(), 0.0f);
+      });
+
+  float tp_sum;
+  float auc_sum;
+  thrust::tie(auc_sum, tp_sum) = thrust::reduce(
+      thrust::cuda::par(alloc), reduce_in, reduce_in + n_classes,
+      thrust::make_pair(0.0f, 0.0f),
+      [=] __device__(auto const &l, auto const &r) {
+        return thrust::make_pair(l.first + r.first, l.second + r.second);
+      });
+  if (tp_sum != 0 && !std::isnan(auc_sum)) {
+    auc_sum /= tp_sum;
+  } else {
+    return std::numeric_limits<float>::quiet_NaN();
+  }
+  return auc_sum;
+}
+
 /**
 * MultiClass implementation is similar to binary classification, except we need to split
 * up each class in all kernels.
 */
 float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info,
-                          int32_t device, std::shared_ptr<DeviceAUCCache>* p_cache) {
+                          int32_t device, std::shared_ptr<DeviceAUCCache>* p_cache,
+                          size_t n_classes) {
+  dh::safe_cuda(cudaSetDevice(device));
  auto& cache = *p_cache;
  if (!cache) {
    cache.reset(new DeviceAUCCache);
@@ -213,8 +251,19 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
  auto weights = info.weights_.ConstDeviceSpan();

  size_t n_samples = labels.size();
-  size_t n_classes = predts.size() / labels.size();
-  CHECK_NE(n_classes, 0);
+
+  if (n_samples == 0) {
+    dh::TemporaryArray<float> resutls(n_classes * 4, 0.0f);
+    auto d_results = dh::ToSpan(resutls);
+    dh::LaunchN(device, n_classes * 4, [=]__device__(size_t i) {
+      d_results[i] = 0.0f;
+    });
+    auto local_area = d_results.subspan(0, n_classes);
+    auto fp = d_results.subspan(n_classes, n_classes);
+    auto tp = d_results.subspan(2 * n_classes, n_classes);
+    auto auc = d_results.subspan(3 * n_classes, n_classes);
+    return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
+  }

  /**
   * Create sorted index for each class
@@ -377,32 +426,7 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
    tp[c] = last.second;
    local_area[c] = last.first * last.second;
  });
-  if (rabit::IsDistributed()) {
-    cache->reducer->AllReduceSum(resutls.data().get(), resutls.data().get(),
-                                 resutls.size());
-  }
-  auto reduce_in = dh::MakeTransformIterator<thrust::pair<float, float>>(
-      thrust::make_counting_iterator(0), [=] __device__(size_t i) {
-        if (local_area[i] > 0) {
-          return thrust::make_pair(auc[i] / local_area[i] * tp[i], tp[i]);
-        }
-        return thrust::make_pair(std::numeric_limits<float>::quiet_NaN(), 0.0f);
-      });
-
-  float tp_sum;
-  float auc_sum;
-  thrust::tie(auc_sum, tp_sum) = thrust::reduce(
-      thrust::cuda::par(alloc), reduce_in, reduce_in + n_classes,
-      thrust::make_pair(0.0f, 0.0f),
-      [=] __device__(auto const &l, auto const &r) {
-        return thrust::make_pair(l.first + r.first, l.second + r.second);
-      });
-  if (tp_sum != 0 && !std::isnan(auc_sum)) {
-    auc_sum /= tp_sum;
-  } else {
-    return std::numeric_limits<float>::quiet_NaN();
-  }
-  return auc_sum;
+  return ScaleClasses(d_results, local_area, fp, tp, auc, cache, n_classes);
 }

 namespace {
--- a/src/metric/auc.h
+++ b/src/metric/auc.h
@@ -26,7 +26,8 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
             int32_t device, std::shared_ptr<DeviceAUCCache> *p_cache);

 float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info,
-                          int32_t device, std::shared_ptr<DeviceAUCCache>* cache);
+                          int32_t device, std::shared_ptr<DeviceAUCCache>* cache,
+                          size_t n_classes);

 std::pair<float, uint32_t>
 GPURankingAUC(common::Span<float const> predts, MetaInfo const &info,
--- a/tests/ci_build/conda_env/macos_cpu_test.yml
+++ b/tests/ci_build/conda_env/macos_cpu_test.yml
@@ -15,6 +15,7 @@ dependencies:
 - matplotlib
 - dask
 - distributed
+- graphviz
 - python-graphviz
 - hypothesis
 - astroid
--- a/tests/ci_build/insert_vcomp140.py
+++ b/tests/ci_build/insert_vcomp140.py
@@ -11,7 +11,7 @@ vcomp140_path = 'C:\\Windows\\System32\\vcomp140.dll'

 for wheel_path in sorted(glob.glob(sys.argv[1])):
    m = re.search(r'xgboost-(.*)-py3', wheel_path)
-    assert m
+    assert m, f'wheel_path = {wheel_path}'
    version = m.group(1)

    with zipfile.ZipFile(wheel_path, 'a') as f:
--- a/tests/ci_build/rename_whl.py
+++ b/tests/ci_build/rename_whl.py
@@ -26,8 +26,9 @@ dirname, basename = os.path.dirname(whl_path), os.path.basename(whl_path)
 with cd(dirname):
    tokens = basename.split('-')
    assert len(tokens) == 5
+    version = tokens[1].split('+')[0]
    keywords = {'pkg_name': tokens[0],
-                'version': tokens[1],
+                'version': version,
                'commit_id': commit_id,
                'platform_tag': platform_tag}
    new_name = '{pkg_name}-{version}+{commit_id}-py3-none-{platform_tag}.whl'.format(**keywords)
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@@ -277,7 +277,7 @@ class TestDistributedGPU:
            X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_))
            y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_))
            w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_))
-            run_dask_classifier(X, y, w, model, client, 10)
+            run_dask_classifier(X, y, w, model, "gpu_hist", client, 10)

    @pytest.mark.skipif(**tm.no_dask())
    @pytest.mark.skipif(**tm.no_dask_cuda())
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -57,15 +57,13 @@ class TestLinear:
        param['updater'] = 'shotgun'
        param = dataset.set_params(param)
        result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
-        # shotgun is non-deterministic, so we relax the test by sampling
-        # result.
+        # shotgun is non-deterministic, so we relax the test by only using first and last
+        # iteration.
        if len(result) > 2:
-            sampled_result = [score for i, score in enumerate(result)
-                              if i % 2 == 0]
-            sampled_result[-1] = result[-1]  # make sure the last one is used
+            sampled_result = (result[0], result[-1])
        else:
            sampled_result = result
-        assert tm.non_increasing(sampled_result, 1e-3)
+        assert tm.non_increasing(sampled_result)

    @given(parameter_strategy, strategies.integers(10, 50),
           tm.dataset_strategy, strategies.floats(1e-5, 2.0),
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -75,6 +75,11 @@ def run_predict_leaf(predictor):
    first = sliced[0, ...]

    assert np.prod(first.shape) == classes * num_parallel_tree * ntree_limit
+
+    # When there's only 1 tree, the output is a 1 dim vector
+    booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
+    assert booster.predict(m, pred_leaf=True).shape == (rows, )
+
    return leaf


@@ -150,6 +155,14 @@ class TestInplacePredict:
        predt_from_array = booster.inplace_predict(X[:10, ...], missing=self.missing)
        predt_from_dmatrix = booster.predict(test)

+        X_obj = X.copy().astype(object)
+
+        assert X_obj.dtype.hasobject is True
+        assert X.dtype.hasobject is False
+        np.testing.assert_allclose(
+            booster.inplace_predict(X_obj), booster.inplace_predict(X)
+        )
+
        np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)

        predt_from_array = booster.inplace_predict(
@@ -187,8 +200,13 @@ class TestInplacePredict:
        arr_predt = booster.inplace_predict(X)
        dmat_predt = booster.predict(xgb.DMatrix(X))

+        X = df.values
+        X = np.asfortranarray(X)
+        fort_predt = booster.inplace_predict(X)
+
        np.testing.assert_allclose(dmat_predt, arr_predt)
        np.testing.assert_allclose(df_predt, arr_predt)
+        np.testing.assert_allclose(fort_predt, arr_predt)

    def test_base_margin(self):
        booster = self.booster
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -317,6 +317,7 @@ def run_dask_classifier(
    y: xgb.dask._DaskCollection,
    w: xgb.dask._DaskCollection,
    model: str,
+    tree_method: Optional[str],
    client: "Client",
    n_classes,
 ) -> None:
@@ -324,11 +325,11 @@ def run_dask_classifier(

    if model == "boosting":
        classifier = xgb.dask.DaskXGBClassifier(
-            verbosity=1, n_estimators=2, eval_metric=metric
+            verbosity=1, n_estimators=2, eval_metric=metric, tree_method=tree_method
        )
    else:
        classifier = xgb.dask.DaskXGBRFClassifier(
-            verbosity=1, n_estimators=2, eval_metric=metric
+            verbosity=1, n_estimators=2, eval_metric=metric, tree_method=tree_method
        )

    assert classifier._estimator_type == "classifier"
@@ -397,12 +398,12 @@ def run_dask_classifier(
 def test_dask_classifier(model: str, client: "Client") -> None:
    X, y, w = generate_array(with_weights=True)
    y = (y * 10).astype(np.int32)
-    run_dask_classifier(X, y, w, model, client, 10)
+    run_dask_classifier(X, y, w, model, None, client, 10)

    y_bin = y.copy()
    y_bin[y > 5] = 1.0
    y_bin[y <= 5] = 0.0
-    run_dask_classifier(X, y_bin, w, model, client, 2)
+    run_dask_classifier(X, y_bin, w, model, None, client, 2)


@pytest.mark.skipif(**tm.no_sklearn())
@@ -568,22 +569,26 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
    # multiclass
    X_, y_ = make_classification(
        n_samples=n_samples,
-        n_classes=10,
+        n_classes=n_workers,
        n_informative=n_features,
        n_redundant=0,
        n_repeated=0
    )
+    for i in range(y_.shape[0]):
+        y_[i] = i % n_workers
    X = dd.from_array(X_, chunksize=10)
    y = dd.from_array(y_, chunksize=10)

    n_samples = n_workers - 1
    valid_X_, valid_y_ = make_classification(
        n_samples=n_samples,
-        n_classes=10,
+        n_classes=n_workers,
        n_informative=n_features,
        n_redundant=0,
        n_repeated=0
    )
+    for i in range(valid_y_.shape[0]):
+        valid_y_[i] = i % n_workers
    valid_X = dd.from_array(valid_X_, chunksize=n_samples)
    valid_y = dd.from_array(valid_y_, chunksize=n_samples)

@@ -594,9 +599,9 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->


 def test_empty_dmatrix_auc() -> None:
-    with LocalCluster(n_workers=2) as cluster:
+    with LocalCluster(n_workers=8) as cluster:
        with Client(cluster) as client:
-            run_empty_dmatrix_auc(client, "hist", 2)
+            run_empty_dmatrix_auc(client, "hist", 8)


 def run_auc(client: "Client", tree_method: str) -> None:
@@ -1023,7 +1028,17 @@ class TestWithDask:
                                 evals=[(m, 'train')])['history']
        note(history)
        history = history['train'][dataset.metric]
-        assert tm.non_increasing(history)
+
+        def is_stump():
+            return params["max_depth"] == 1 or params["max_leaves"] == 1
+
+        def minimum_bin():
+            return "max_bin" in params and params["max_bin"] == 2
+
+        if minimum_bin() and is_stump():
+            assert tm.non_increasing(history, tolerance=1e-3)
+        else:
+            assert tm.non_increasing(history)
        # Make sure that it's decreasing
        assert history[-1] < history[0]

--- a/tests/python/testing.py
+++ b/tests/python/testing.py
@@ -272,6 +272,8 @@ def eval_error_metric(predt, dtrain: xgb.DMatrix):
    label = dtrain.get_label()
    r = np.zeros(predt.shape)
    gt = predt > 0.5
+    if predt.size == 0:
+        return "CustomErr", 0
    r[gt] = 1 - label[gt]
    le = predt <= 0.5
    r[le] = label[le]
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@@ -1,7 +1,5 @@
 #!/bin/bash

-make -f dmlc-core/scripts/packages.mk lz4
-
 source $HOME/miniconda/bin/activate

 if [ ${TASK} == "python_sdist_test" ]; then
--- a/tests/travis/setup.sh
+++ b/tests/travis/setup.sh
@@ -1,5 +1,9 @@
 #!/bin/bash

+# https://travis-ci.community/t/macos-build-fails-because-of-homebrew-bundle-unknown-command/7296/27
+brew install cmake libomp lz4
+
+
 if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_sdist_test" ]; then
    if [ ${TRAVIS_OS_NAME} == "osx" ]; then
        wget --no-verbose -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
Author	SHA1	Message	Date
Jiaming Yuan	522b8977c2	Make 1.4.2 patch release. (#6962 )	2021-05-13 16:17:14 +08:00
Jiaming Yuan	8147d78b12	[back port] Fix multiclass auc with empty dataset. (#6947 ) (#6960 )	2021-05-13 12:31:52 +08:00
Jiaming Yuan	651c4ac03b	[CI] Fix CI/CD pipeline broken by latest auditwheel (4.0.0) (#6951 ) (#6952 ) Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2021-05-11 20:45:04 +08:00
Philip Hyunsu Cho	1fb75d2460	Make it easier to release GPU/CPU code artifacts to Maven Central (#6907 )	2021-05-04 12:43:42 -07:00
Jiaming Yuan	6609211517	[backport] Enforce input data is not `object`. (#6927 ) (#6938 ) * Check for object data type. * Allow strided arrays with greater underlying buffer size.	2021-05-04 16:10:16 +08:00
Jiaming Yuan	b78ad1e623	[backport] Ensure predict leaf output 1-dim vector where there's only 1 tree. (#6889 ) (#6937 )	2021-05-04 16:09:49 +08:00
Jiaming Yuan	96f8843694	[backport] CI fixes (#6933 ) * Relax shotgun test. (#6900) It's non-deterministic algorithm, the test is flaky. * Disable pylint error. (#6911) * [CI] Skip external memory gtest on osx. (#6901) * [CI] Fix custom metric test with empty dataset. (#6917) * Reduce Travis environment setup time. (#6912) * Remove unused r from travis. * Don't update homebrew. * Don't install indirect/unused dependencies like libgit2, wget, openssl. * Move graphviz installation to conda. * Relax shotgun test. (#6918) * Relax test for decision stump in distributed environment. (#6919) * Backport cupy fix.	2021-05-03 21:30:26 +08:00
Jiaming Yuan	a6d1fbf8d1	Fix warning on Windows. (#6883 )	2021-04-22 02:57:23 +08:00
				`@@ -0,0 +1 @@`
				`../../../../xgboost4j/src/main/resources/xgboost4j-version.properties`
@@ -1 +1 @@
 .4.1
 .4.2