sync Jun 1

This commit is contained in:
Your Name 2023-06-01 15:55:06 -07:00
commit 42867a4805
76 changed files with 1424 additions and 595 deletions

View File

@ -75,3 +75,13 @@ jobs:
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
env:
RABIT_MOCK: ON
- name: Build and Test XGBoost4J with scala 2.13
run: |
rm -rfv build/
cd jvm-packages
mvn -B clean install test -Pdefault,scala-2.13
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
env:
RABIT_MOCK: ON

View File

@ -171,6 +171,8 @@ if (USE_CUDA)
set(GEN_CODE "")
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
find_package(CUDAToolkit REQUIRED)
endif (USE_CUDA)
if (USE_HIP)

View File

@ -124,13 +124,6 @@ function(format_gencode_flags flags out)
endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
endfunction(format_gencode_flags flags)
macro(enable_nvtx target)
find_package(NVTX REQUIRED)
target_include_directories(${target} PRIVATE "${NVTX_INCLUDE_DIR}")
target_link_libraries(${target} PRIVATE "${NVTX_LIBRARY}")
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
endmacro()
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
function(xgboost_set_cuda_flags target)
target_compile_options(${target} PRIVATE
@ -162,11 +155,14 @@ function(xgboost_set_cuda_flags target)
endif (USE_DEVICE_DEBUG)
if (USE_NVTX)
enable_nvtx(${target})
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
endif (USE_NVTX)
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
target_include_directories(
${target} PRIVATE
${xgboost_SOURCE_DIR}/gputreeshap
${CUDAToolkit_INCLUDE_DIRS})
if (MSVC)
target_compile_options(${target} PRIVATE
@ -314,7 +310,7 @@ macro(xgboost_target_link_libraries target)
endif (USE_NCCL)
if (USE_NVTX)
enable_nvtx(${target})
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
endif (USE_NVTX)
if (RABIT_BUILD_MPI)

View File

@ -1,26 +0,0 @@
if (NVTX_LIBRARY)
unset(NVTX_LIBRARY CACHE)
endif (NVTX_LIBRARY)
set(NVTX_LIB_NAME nvToolsExt)
find_path(NVTX_INCLUDE_DIR
NAMES nvToolsExt.h
PATHS ${CUDA_HOME}/include ${CUDA_INCLUDE} /usr/local/cuda/include)
find_library(NVTX_LIBRARY
NAMES nvToolsExt
PATHS ${CUDA_HOME}/lib64 /usr/local/cuda/lib64)
message(STATUS "Using nvtx library: ${NVTX_LIBRARY}")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NVTX DEFAULT_MSG
NVTX_INCLUDE_DIR NVTX_LIBRARY)
mark_as_advanced(
NVTX_INCLUDE_DIR
NVTX_LIBRARY
)

View File

@ -38,19 +38,18 @@ def using_dask_matrix(client: Client, X, y):
def using_quantile_device_dmatrix(client: Client, X, y):
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist`, tree
method that reduces memory overhead. When training on GPU pipeline, it's
preferred over `DaskDMatrix`.
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist` and `hist` tree
methods for reducing memory usage.
.. versionadded:: 1.2.0
"""
# Input must be on GPU for `DaskQuantileDMatrix`.
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X))
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y))
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful
# that it can not be used for anything else other than training.
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
# be used for anything else other than training unless a reference is specified. See
# the `ref` argument of `DaskQuantileDMatrix`.
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
output = xgb.dask.train(
client, {"verbosity": 2, "tree_method": "gpu_hist"}, dtrain, num_boost_round=4

View File

@ -27,20 +27,29 @@ In the following two sections, we will provide a step by step walk through of im
the ``Squared Log Error (SLE)`` objective function:
.. math::
\frac{1}{2}[log(pred + 1) - log(label + 1)]^2
\frac{1}{2}[\log(pred + 1) - \log(label + 1)]^2
and its default metric ``Root Mean Squared Log Error(RMSLE)``:
.. math::
\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}
\sqrt{\frac{1}{N}[\log(pred + 1) - \log(label + 1)]^2}
Although XGBoost has native support for said functions, using it for demonstration
provides us the opportunity of comparing the result from our own implementation and the
one from XGBoost internal for learning purposes. After finishing this tutorial, we should
be able to provide our own functions for rapid experiments. And at the end, we will
provide some notes on non-identy link function along with examples of using custom metric
and objective with `scikit-learn` interface.
with scikit-learn interface.
and objective with the `scikit-learn` interface.
If we compute the gradient of said objective function:
.. math::
g = \frac{\partial{objective}}{\partial{pred}} = \frac{\log(pred + 1) - \log(label + 1)}{pred + 1}
As well as the hessian (the second derivative of the objective):
.. math::
h = \frac{\partial^2{objective}}{\partial{pred}} = \frac{ - \log(pred + 1) + \log(label + 1) + 1}{(pred + 1)^2}
*****************************
Customized Objective Function

View File

@ -519,6 +519,9 @@ Troubleshooting
the ``NCCL_SOCKET_IFNAME``. In addition, you can use ``NCCL_DEBUG`` to obtain debug
logs.
- If NCCL fails to initialize in a container environment, it might be caused by limited
system shared memory. With docker, one can try the flag: `--shm-size=4g`.
- MIG (Multi-Instance GPU) is not yet supported by NCCL. You will receive an error message
that includes `Multiple processes within a communication group ...` upon initialization.

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2018 XGBoost contributors
/**
* Copyright 2018-2023, XGBoost contributors
* \brief span class based on ISO++20 span
*
* About NOLINTs in this file:
@ -32,21 +32,17 @@
#include <xgboost/base.h>
#include <xgboost/logging.h>
#include <cinttypes> // size_t
#include <limits> // numeric_limits
#include <iterator>
#include <type_traits>
#include <cinttypes> // size_t
#include <cstdio>
#include <iterator>
#include <limits> // numeric_limits
#include <type_traits>
#include <utility> // for move
#if defined(__CUDACC__)
#include <cuda_runtime.h>
#elif defined(__HIP_PLATFORM_AMD__)
#include <hip/hip_runtime.h>
extern "C" void __assert_fail (const char *__assertion, const char *__file,
unsigned int __line, const char *__function)
noexcept (true) __attribute__ ((__noreturn__));
#endif
/*!
@ -127,7 +123,7 @@ namespace common {
#define __ASSERT_STR_HELPER(x) #x
#if 1
#if 0
#define HIP_KERNEL_CHECK(cond) \
(XGBOOST_EXPECT((cond), true) \
? static_cast<void>(0) \
@ -710,6 +706,44 @@ XGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept -> // NOLIN
Span<byte, detail::ExtentAsBytesValue<T, E>::value> {
return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
}
/**
* \brief A simple custom Span type that uses general iterator instead of pointer.
*/
template <typename It>
class IterSpan {
public:
using value_type = typename std::iterator_traits<It>::value_type; // NOLINT
using index_type = std::size_t; // NOLINT
using iterator = It; // NOLINT
private:
It it_;
index_type size_{0};
public:
IterSpan() = default;
XGBOOST_DEVICE IterSpan(It it, index_type size) : it_{std::move(it)}, size_{size} {}
XGBOOST_DEVICE explicit IterSpan(common::Span<It, dynamic_extent> span)
: it_{span.data()}, size_{span.size()} {}
[[nodiscard]] XGBOOST_DEVICE index_type size() const noexcept { return size_; } // NOLINT
[[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) const { return it_[i]; }
[[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) { return it_[i]; }
[[nodiscard]] XGBOOST_DEVICE bool empty() const noexcept { return size() == 0; } // NOLINT
[[nodiscard]] XGBOOST_DEVICE It data() const noexcept { return it_; } // NOLINT
[[nodiscard]] XGBOOST_DEVICE IterSpan<It> subspan( // NOLINT
index_type _offset, index_type _count = dynamic_extent) const {
SPAN_CHECK((_count == dynamic_extent) ? (_offset <= size()) : (_offset + _count <= size()));
return {data() + _offset, _count == dynamic_extent ? size() - _offset : _count};
}
[[nodiscard]] XGBOOST_DEVICE constexpr iterator begin() const noexcept { // NOLINT
return {this, 0};
}
[[nodiscard]] XGBOOST_DEVICE constexpr iterator end() const noexcept { // NOLINT
return {this, size()};
}
};
} // namespace common
} // namespace xgboost

View File

@ -1,2 +1,4 @@
tracker.py
build.sh
xgboost4j-tester/pom.xml
xgboost4j-tester/iris.csv

View File

@ -36,6 +36,19 @@ XGBoost4J, XGBoost4J-Spark, etc. in maven repository is compiled with g++-4.8.5.
<version>latest_version_num</version>
</dependency>
```
or
```
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_2.13</artifactId>
<version>latest_version_num</version>
</dependency>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_2.13</artifactId>
<version>latest_version_num</version>
</dependency>
```
<b>sbt</b>
```sbt
@ -47,7 +60,6 @@ libraryDependencies ++= Seq(
For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases).
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
### Access SNAPSHOT version
@ -85,6 +97,19 @@ Then add XGBoost4J as a dependency:
<version>latest_version_num-SNAPSHOT</version>
</dependency>
```
or with scala 2.13
```
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_2.13</artifactId>
<version>latest_version_num-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_2.13</artifactId>
<version>latest_version_num-SNAPSHOT</version>
</dependency>
```
<b>sbt</b>
```sbt
@ -96,7 +121,9 @@ libraryDependencies ++= Seq(
For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html).
### GPU algorithm
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
Note that scala 2.13 is not supported by the [NVIDIA/spark-rapids#1525](https://github.com/NVIDIA/spark-rapids/issues/1525) yet, so the GPU algorithm can only be used with scala 2.12.
## Examples

View File

@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
@ -33,7 +33,8 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<flink.version>1.17.0</flink.version>
<flink.version>1.17.1</flink.version>
<junit.version>4.13.2</junit.version>
<spark.version>3.4.0</spark.version>
<spark.version.gpu>3.3.2</spark.version.gpu>
<scala.version>2.12.17</scala.version>
@ -45,7 +46,9 @@
<cudf.version>23.04.0</cudf.version>
<spark.rapids.version>23.04.1</spark.rapids.version>
<cudf.classifier>cuda11</cudf.classifier>
</properties>
<scalatest.version>3.2.16</scalatest.version>
<scala-collection-compat.version>2.10.0</scala-collection-compat.version>
</properties>
<repositories>
<repository>
<id>central_maven</id>
@ -71,6 +74,14 @@
</modules>
</profile>
<profile>
<id>scala-2.13</id>
<properties>
<scala.binary.version>2.13</scala.binary.version>
<scala.version>2.13.10</scala.version>
</properties>
</profile>
<!-- gpu profile with both cpu and gpu test suites -->
<profile>
<id>gpu</id>
@ -451,7 +462,7 @@
<plugins>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.4.3</version>
<version>3.4.4</version>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
@ -467,6 +478,7 @@
</plugins>
</reporting>
<dependencies>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo</artifactId>
@ -483,6 +495,11 @@
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.scala-lang.modules</groupId>
<artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
<version>${scala-collection-compat.version}</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
@ -491,13 +508,13 @@
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>3.2.16</version>
<version>${scalatest.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalactic</groupId>
<artifactId>scalactic_${scala.binary.version}</artifactId>
<version>3.2.15</version>
<version>${scalatest.version}</version>
<scope>test</scope>
</dependency>
</dependencies>

View File

@ -5,10 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>xgboost4j-example_2.12</artifactId>
<name>xgboost4j-example</name>
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
<build>

View File

@ -73,12 +73,13 @@ object DistTrainWithFlink {
.map(_.f1.f0)
.returns(testDataTypeHint)
val paramMap = mapAsJavaMap(Map(
("eta", "0.1".asInstanceOf[AnyRef]),
("max_depth", "2"),
("objective", "binary:logistic"),
("verbosity", "1")
))
val paramMap = Map(
("eta", "0.1".asInstanceOf[AnyRef]),
("max_depth", "2"),
("objective", "binary:logistic"),
("verbosity", "1")
)
.asJava
// number of iterations
val round = 2

View File

@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature._
import org.apache.spark.ml.tuning._
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types._
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel}
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
@ -50,6 +49,13 @@ object SparkMLlibPipeline {
.appName("XGBoost4J-Spark Pipeline Example")
.getOrCreate()
run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers)
.show(false)
}
private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String,
pipelineModelPath: String, treeMethod: String,
numWorkers: Int): DataFrame = {
// Load dataset
val schema = new StructType(Array(
StructField("sepal length", DoubleType, true),
@ -90,11 +96,11 @@ object SparkMLlibPipeline {
val labelConverter = new IndexToString()
.setInputCol("prediction")
.setOutputCol("realLabel")
.setLabels(labelIndexer.labels)
.setLabels(labelIndexer.labelsArray(0))
val pipeline = new Pipeline()
.setStages(Array(assembler, labelIndexer, booster, labelConverter))
val model = pipeline.fit(training)
val model: PipelineModel = pipeline.fit(training)
// Batch prediction
val prediction = model.transform(test)
@ -136,6 +142,6 @@ object SparkMLlibPipeline {
// Load a saved model and serving
val model2 = PipelineModel.load(pipelineModelPath)
model2.transform(test).show(false)
model2.transform(test)
}
}

View File

@ -17,9 +17,8 @@
package ml.dmlc.xgboost4j.scala.example.spark
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
@ -38,6 +37,12 @@ object SparkTraining {
val spark = SparkSession.builder().getOrCreate()
val inputPath = args(0)
val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers)
results.show()
}
private[spark] def run(spark: SparkSession, inputPath: String,
treeMethod: String, numWorkers: Int): DataFrame = {
val schema = new StructType(Array(
StructField("sepal length", DoubleType, true),
StructField("sepal width", DoubleType, true),
@ -81,7 +86,6 @@ object SparkTraining {
setFeaturesCol("features").
setLabelCol("classIndex")
val xgbClassificationModel = xgbClassifier.fit(train)
val results = xgbClassificationModel.transform(test)
results.show()
xgbClassificationModel.transform(test)
}
}

View File

@ -0,0 +1,123 @@
/*
Copyright (c) 2014-2023 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ml.dmlc.xgboost4j.scala.example.spark
import org.apache.spark.sql.SparkSession
import org.scalatest.BeforeAndAfterAll
import org.scalatest.funsuite.AnyFunSuite
import org.slf4j.LoggerFactory
import java.io.File
import java.nio.file.{Files, StandardOpenOption}
import scala.jdk.CollectionConverters._
import scala.util.{Random, Try}
class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll {
private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest])
private val random = new Random(42)
protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4)
private val pathToTestDataset = Files.createTempFile("", "iris.csv").toAbsolutePath
private var spark: SparkSession = _
override def beforeAll(): Unit = {
def generateLine(i: Int): String = {
val getIrisName = (int: Int) => {
int % 3 match {
case 0 => "Iris-versicolor"
case 1 => "Iris-virginica"
case 2 => "Iris-setosa"
}
}
val generateValue = () => Math.abs(random.nextInt(99) * 0.1)
val sepalLength = generateValue()
val sepalWidth = generateValue()
val petalLength = generateValue()
val petalWidth = generateValue()
val irisName = getIrisName(Math.abs(random.nextInt()) + i)
s"$sepalLength,$sepalWidth,$petalLength,$petalWidth,$irisName"
}
if (spark == null) {
spark = SparkSession
.builder()
.appName("XGBoost4J-Spark Pipeline Example")
.master(s"local[${numWorkers}]")
.config("spark.ui.enabled", value = false)
.config("spark.driver.memory", "512m")
.config("spark.barrier.sync.timeout", 10)
.config("spark.task.cpus", 1)
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
}
val data = (0 until 150)
.map(i => generateLine(i))
.toList
.asJava
Files.write(pathToTestDataset,
data,
StandardOpenOption.CREATE,
StandardOpenOption.WRITE,
StandardOpenOption.TRUNCATE_EXISTING)
logger.info(s"${new String(Files.readAllBytes(pathToTestDataset))}")
}
override def afterAll(): Unit = {
if (spark != null) {
spark.stop()
cleanExternalCache(spark.sparkContext.appName)
spark = null
}
Try(Files.deleteIfExists(pathToTestDataset))
.recover {
case e =>
logger.warn(
s"Could not delete temporary file $pathToTestDataset. Please, remove it manually",
e
)
true
}
}
private def cleanExternalCache(prefix: String): Unit = {
val dir = new File(".")
for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {
file.delete()
}
}
test("Smoke test for SparkMLlibPipeline example") {
SparkMLlibPipeline.run(spark, pathToTestDataset.toString, "target/native-model",
"target/pipeline-model", "auto", 2)
}
test("Smoke test for SparkTraining example") {
val spark = SparkSession
.builder()
.appName("XGBoost4J-Spark Pipeline Example")
.master(s"local[${numWorkers}]")
.config("spark.ui.enabled", value = false)
.config("spark.driver.memory", "512m")
.config("spark.barrier.sync.timeout", 10)
.config("spark.task.cpus", 1)
.getOrCreate()
SparkTraining.run(spark, pathToTestDataset.toString, "auto", 2)
}
}

View File

@ -5,9 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<properties>

View File

@ -5,10 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>xgboost4j-gpu_2.12</artifactId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<name>xgboost4j-gpu</name>
<version>2.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
@ -35,13 +36,13 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>3.2.15</version>
<version>${scalatest.version}</version>
<scope>provided</scope>
</dependency>
<dependency>

View File

@ -5,10 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
<build>
<plugins>
<plugin>
@ -24,7 +25,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@ -5,10 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>xgboost4j-spark_2.12</artifactId>
<name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<build>
<plugins>
<plugin>
@ -24,7 +25,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@ -8,25 +8,28 @@ pom_template = """
<modelVersion>4.0.0</modelVersion>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-tester_2.12</artifactId>
<artifactId>xgboost4j-tester_{scala_binary_version}</artifactId>
<version>1.0-SNAPSHOT</version>
<name>xgboost4j-tester_2.12</name>
<name>xgboost4j-tester</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>{maven_compiler_source}</maven.compiler.source>
<maven.compiler.target>{maven_compiler_target}</maven.compiler.target>
<junit.version>4.13.2</junit.version>
<spark.version>{spark_version}</spark.version>
<scala.version>{scala_version}</scala.version>
<scalatest.version>3.2.15</scalatest.version>
<scala.binary.version>{scala_binary_version}</scala.binary.version>
<kryo.version>5.5.0</kryo.version>
</properties>
<dependencies>
<dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo</artifactId>
<version>4.0.2</version>
<version>${{kryo.version}}</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
@ -48,29 +51,12 @@ pom_template = """
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-testkit_${{scala.binary.version}}</artifactId>
<version>2.6.20</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${{scala.binary.version}}</artifactId>
<version>3.0.8</version>
<version>${{scalatest.version}}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalactic</groupId>
<artifactId>scalactic_${{scala.binary.version}}</artifactId>
<version>3.2.15</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${{scala.binary.version}}</artifactId>
@ -92,7 +78,7 @@ pom_template = """
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<version>${{junit.version}}</version>
<scope>test</scope>
</dependency>
<dependency>
@ -122,36 +108,9 @@ pom_template = """
<build>
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
@ -171,22 +130,12 @@ pom_template = """
</execution>
</executions>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
<configuration>
<dependenciesToScan>
<dependency>ml.dmlc:xgboost4j_2.12</dependency>
<dependency>ml.dmlc:xgboost4j_${{scala.binary.version}}</dependency>
</dependenciesToScan>
</configuration>
</plugin>

View File

@ -1,20 +0,0 @@
package ml.dmlc.xgboost4j.tester;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
/**
* Unit test for simple App.
*/
public class AppTest
{
/**
* Rigorous Test :-)
*/
@Test
public void shouldAnswerWithTrue()
{
assertTrue( true );
}
}

View File

@ -5,10 +5,11 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>xgboost4j_2.12</artifactId>
<name>xgboost4j</name>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
@ -28,13 +29,13 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>3.2.16</version>
<version>${scalatest.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>

View File

@ -37,7 +37,7 @@ trait EvalTrait extends IEvaluation {
*/
def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float
private[scala] def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " +
s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}")
eval(predicts, new DMatrix(jdmat))

View File

@ -31,7 +31,7 @@ trait ObjectiveTrait extends IObjective {
*/
def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]]
private[scala] def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
java.util.List[Array[Float]] = {
getGradient(predicts, new DMatrix(dtrain)).asJava
}

View File

@ -17,12 +17,11 @@
package ml.dmlc.xgboost4j.scala
import java.io.InputStream
import ml.dmlc.xgboost4j.java.{XGBoostError, XGBoost => JXGBoost}
import ml.dmlc.xgboost4j.java.{XGBoostError, Booster => JBooster, XGBoost => JXGBoost}
import scala.collection.JavaConverters._
import scala.jdk.CollectionConverters._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.fs.Path
/**
* XGBoost Scala Training function.
@ -40,7 +39,12 @@ object XGBoost {
earlyStoppingRound: Int = 0,
prevBooster: Booster,
checkpointParams: Option[ExternalCheckpointParams]): Booster = {
val jWatches = watches.mapValues(_.jDMatrix).asJava
// we have to filter null value for customized obj and eval
val jParams: java.util.Map[String, AnyRef] =
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava
val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava
val jBooster = if (prevBooster == null) {
null
} else {
@ -51,8 +55,7 @@ object XGBoost {
map(cp => {
JXGBoost.trainAndSaveCheckpoint(
dtrain.jDMatrix,
// we have to filter null value for customized obj and eval
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
jParams,
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster,
cp.checkpointInterval,
cp.checkpointPath,
@ -61,8 +64,7 @@ object XGBoost {
getOrElse(
JXGBoost.train(
dtrain.jDMatrix,
// we have to filter null value for customized obj and eval
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
jParams,
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster)
)
if (prevBooster == null) {

View File

@ -82,9 +82,10 @@ def from_pystr_to_cstr(data: Union[str, List[str]]) -> Union[bytes, ctypes.Array
if isinstance(data, str):
return bytes(data, "utf-8")
if isinstance(data, list):
pointers: ctypes.Array[ctypes.c_char_p] = (ctypes.c_char_p * len(data))()
data_as_bytes = [bytes(d, "utf-8") for d in data]
pointers[:] = data_as_bytes # type: ignore
data_as_bytes: List[bytes] = [bytes(d, "utf-8") for d in data]
pointers: ctypes.Array[ctypes.c_char_p] = (
ctypes.c_char_p * len(data_as_bytes)
)(*data_as_bytes)
return pointers
raise TypeError()
@ -319,7 +320,7 @@ def _cuda_array_interface(data: DataType) -> bytes:
def ctypes2numpy(cptr: CNumericPtr, length: int, dtype: Type[np.number]) -> np.ndarray:
"""Convert a ctypes pointer array to a numpy array."""
ctype: Type[CNumeric] = _numpy2ctypes_type(dtype)
if not isinstance(cptr, ctypes.POINTER(ctype)): # type: ignore
if not isinstance(cptr, ctypes.POINTER(ctype)):
raise RuntimeError(f"expected {ctype} pointer")
res = np.zeros(length, dtype=dtype)
if not ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0]):
@ -2460,9 +2461,9 @@ class Booster:
raise TypeError("Unknown file type: ", fname)
if self.attr("best_iteration") is not None:
self.best_iteration = int(self.attr("best_iteration")) # type: ignore
self.best_iteration = int(cast(int, self.attr("best_iteration")))
if self.attr("best_score") is not None:
self.best_score = float(self.attr("best_score")) # type: ignore
self.best_score = float(cast(float, self.attr("best_score")))
def num_boosted_rounds(self) -> int:
"""Get number of boosted rounds. For gblinear this is reset to 0 after

View File

@ -882,7 +882,7 @@ def _transform_cupy_array(data: DataType) -> CupyT:
if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
data = cupy.array(data, copy=False)
if data.dtype.hasobject or data.dtype in [cupy.float16, cupy.bool_]:
if data.dtype.hasobject or data.dtype in [cupy.bool_]:
data = data.astype(cupy.float32, copy=False)
return data

View File

@ -337,11 +337,9 @@ class _SparkXGBParams(
if self.getOrDefault(self.features_cols):
if not self.getOrDefault(self.use_gpu):
raise ValueError("features_cols param requires enabling use_gpu.")
get_logger(self.__class__.__name__).warning(
"If features_cols param set, then features_col param is ignored."
)
raise ValueError(
"features_col param with list value requires enabling use_gpu."
)
if self.getOrDefault("objective") is not None:
if not isinstance(self.getOrDefault("objective"), str):
@ -547,6 +545,8 @@ FeatureProp = namedtuple(
class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
_input_kwargs: Dict[str, Any]
def __init__(self) -> None:
super().__init__()
self._set_xgb_params_default()
@ -576,6 +576,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
raise ValueError("Invalid param name: 'arbitrary_params_dict'.")
for k, v in kwargs.items():
# We're not allowing user use features_cols directly.
if k == self.features_cols.name:
raise ValueError(
f"Unsupported param '{k}' please use features_col instead."
)
if k in _inverse_pyspark_param_alias_map:
raise ValueError(
f"Please use param name {_inverse_pyspark_param_alias_map[k]} instead."
@ -591,7 +596,10 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
k = real_k
if self.hasParam(k):
self._set(**{str(k): v})
if k == "features_col" and isinstance(v, list):
self._set(**{"features_cols": v})
else:
self._set(**{str(k): v})
else:
if (
k in _unsupported_xgb_params

View File

@ -1,10 +1,13 @@
"""Xgboost pyspark integration submodule for estimator API."""
# pylint: disable=too-many-ancestors
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
# pylint: disable=unused-argument, too-many-locals
from typing import Any, Type
from typing import Any, Dict, List, Optional, Type, Union
import numpy as np
from pyspark import keyword_only
from pyspark.ml.param import Param, Params
from pyspark.ml.param.shared import HasProbabilityCol, HasRawPredictionCol
@ -83,8 +86,8 @@ class SparkXGBRegressor(_SparkXGBEstimator):
:py:class:`~pyspark.ml.classification.OneVsRest`
SparkXGBRegressor automatically supports most of the parameters in
`xgboost.XGBRegressor` constructor and most of the parameters used in
:py:class:`xgboost.XGBRegressor` fit and predict method.
:py:class:`xgboost.XGBRegressor` constructor and most of the parameters used in
:py:meth:`xgboost.XGBRegressor.fit` and :py:meth:`xgboost.XGBRegressor.predict` method.
SparkXGBRegressor doesn't support setting `gpu_id` but support another param `use_gpu`,
see doc below for more details.
@ -97,13 +100,23 @@ class SparkXGBRegressor(_SparkXGBEstimator):
SparkXGBRegressor doesn't support setting `nthread` xgboost param, instead, the `nthread`
param for each xgboost worker will be set equal to `spark.task.cpus` config value.
callbacks:
The export and import of the callback functions are at best effort.
For details, see :py:attr:`xgboost.spark.SparkXGBRegressor.callbacks` param doc.
validation_indicator_col
For params related to `xgboost.XGBRegressor` training
with evaluation dataset's supervision, set
:py:attr:`xgboost.spark.SparkXGBRegressor.validation_indicator_col`
Parameters
----------
features_col:
When the value is string, it requires the features column name to be vector type.
When the value is a list of string, it requires all the feature columns to be numeric types.
label_col:
Label column name. Default to "label".
prediction_col:
Prediction column name. Default to "prediction"
pred_contrib_col:
Contribution prediction column name.
validation_indicator_col:
For params related to `xgboost.XGBRegressor` training with
evaluation dataset's supervision,
set :py:attr:`xgboost.spark.SparkXGBRegressor.validation_indicator_col`
parameter instead of setting the `eval_set` parameter in `xgboost.XGBRegressor`
fit method.
weight_col:
@ -111,26 +124,40 @@ class SparkXGBRegressor(_SparkXGBEstimator):
:py:attr:`xgboost.spark.SparkXGBRegressor.weight_col` parameter instead of setting
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRegressor`
fit method.
xgb_model:
Set the value to be the instance returned by
:func:`xgboost.spark.SparkXGBRegressorModel.get_booster`.
num_workers:
Integer that specifies the number of XGBoost workers to use.
Each XGBoost worker corresponds to one spark task.
use_gpu:
Boolean that specifies whether the executors are running on GPU
instances.
base_margin_col:
To specify the base margins of the training and validation
dataset, set :py:attr:`xgboost.spark.SparkXGBRegressor.base_margin_col` parameter
instead of setting `base_margin` and `base_margin_eval_set` in the
`xgboost.XGBRegressor` fit method. Note: this isn't available for distributed
training.
`xgboost.XGBRegressor` fit method.
.. Note:: The Parameters chart above contains parameters that need special handling.
For a full list of parameters, see entries with `Param(parent=...` below.
num_workers:
How many XGBoost workers to be used to train.
Each XGBoost worker corresponds to one spark task.
use_gpu:
Boolean value to specify whether the executors are running on GPU
instances.
force_repartition:
Boolean value to specify if forcing the input dataset to be repartitioned
before XGBoost training.
repartition_random_shuffle:
Boolean value to specify if randomly shuffling the dataset when repartitioning is required.
enable_sparse_data_optim:
Boolean value to specify if enabling sparse data optimization, if True,
Xgboost DMatrix object will be constructed from sparse matrix instead of
dense matrix.
kwargs:
A dictionary of xgboost parameters, please refer to
https://xgboost.readthedocs.io/en/stable/parameter.html
Note
----
The Parameters chart above contains parameters that need special handling.
For a full list of parameters, see entries with `Param(parent=...` below.
This API is experimental.
.. Note:: This API is experimental.
Examples
--------
@ -155,9 +182,27 @@ class SparkXGBRegressor(_SparkXGBEstimator):
"""
def __init__(self, **kwargs: Any) -> None:
@keyword_only
def __init__(
self,
*,
features_col: Union[str, List[str]] = "features",
label_col: str = "label",
prediction_col: str = "prediction",
pred_contrib_col: Optional[str] = None,
validation_indicator_col: Optional[str] = None,
weight_col: Optional[str] = None,
base_margin_col: Optional[str] = None,
num_workers: int = 1,
use_gpu: bool = False,
force_repartition: bool = False,
repartition_random_shuffle: bool = False,
enable_sparse_data_optim: bool = False,
**kwargs: Dict[str, Any],
) -> None:
super().__init__()
self.setParams(**kwargs)
input_kwargs = self._input_kwargs
self.setParams(**input_kwargs)
@classmethod
def _xgb_cls(cls) -> Type[XGBRegressor]:
@ -199,8 +244,8 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
:py:class:`~pyspark.ml.classification.OneVsRest`
SparkXGBClassifier automatically supports most of the parameters in
`xgboost.XGBClassifier` constructor and most of the parameters used in
:py:class:`xgboost.XGBClassifier` fit and predict method.
:py:class:`xgboost.XGBClassifier` constructor and most of the parameters used in
:py:meth:`xgboost.XGBClassifier.fit` and :py:meth:`xgboost.XGBClassifier.predict` method.
SparkXGBClassifier doesn't support setting `gpu_id` but support another param `use_gpu`,
see doc below for more details.
@ -220,13 +265,21 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
Parameters
----------
callbacks:
The export and import of the callback functions are at best effort. For
details, see :py:attr:`xgboost.spark.SparkXGBClassifier.callbacks` param doc.
features_col:
When the value is string, it requires the features column name to be vector type.
When the value is a list of string, it requires all the feature columns to be numeric types.
label_col:
Label column name. Default to "label".
prediction_col:
Prediction column name. Default to "prediction"
probability_col:
Column name for predicted class conditional probabilities. Default to probabilityCol
raw_prediction_col:
The `output_margin=True` is implicitly supported by the
`rawPredictionCol` output column, which is always returned with the predicted margin
values.
pred_contrib_col:
Contribution prediction column name.
validation_indicator_col:
For params related to `xgboost.XGBClassifier` training with
evaluation dataset's supervision,
@ -238,26 +291,39 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
:py:attr:`xgboost.spark.SparkXGBClassifier.weight_col` parameter instead of setting
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBClassifier`
fit method.
xgb_model:
Set the value to be the instance returned by
:func:`xgboost.spark.SparkXGBClassifierModel.get_booster`.
num_workers:
Integer that specifies the number of XGBoost workers to use.
Each XGBoost worker corresponds to one spark task.
use_gpu:
Boolean that specifies whether the executors are running on GPU
instances.
base_margin_col:
To specify the base margins of the training and validation
dataset, set :py:attr:`xgboost.spark.SparkXGBClassifier.base_margin_col` parameter
instead of setting `base_margin` and `base_margin_eval_set` in the
`xgboost.XGBClassifier` fit method. Note: this isn't available for distributed
training.
`xgboost.XGBClassifier` fit method.
.. Note:: The Parameters chart above contains parameters that need special handling.
For a full list of parameters, see entries with `Param(parent=...` below.
num_workers:
How many XGBoost workers to be used to train.
Each XGBoost worker corresponds to one spark task.
use_gpu:
Boolean value to specify whether the executors are running on GPU
instances.
force_repartition:
Boolean value to specify if forcing the input dataset to be repartitioned
before XGBoost training.
repartition_random_shuffle:
Boolean value to specify if randomly shuffling the dataset when repartitioning is required.
enable_sparse_data_optim:
Boolean value to specify if enabling sparse data optimization, if True,
Xgboost DMatrix object will be constructed from sparse matrix instead of
dense matrix.
.. Note:: This API is experimental.
kwargs:
A dictionary of xgboost parameters, please refer to
https://xgboost.readthedocs.io/en/stable/parameter.html
Note
----
The Parameters chart above contains parameters that need special handling.
For a full list of parameters, see entries with `Param(parent=...` below.
This API is experimental.
Examples
--------
@ -281,14 +347,34 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
"""
def __init__(self, **kwargs: Any) -> None:
@keyword_only
def __init__(
self,
*,
features_col: Union[str, List[str]] = "features",
label_col: str = "label",
prediction_col: str = "prediction",
probability_col: str = "probability",
raw_prediction_col: str = "rawPrediction",
pred_contrib_col: Optional[str] = None,
validation_indicator_col: Optional[str] = None,
weight_col: Optional[str] = None,
base_margin_col: Optional[str] = None,
num_workers: int = 1,
use_gpu: bool = False,
force_repartition: bool = False,
repartition_random_shuffle: bool = False,
enable_sparse_data_optim: bool = False,
**kwargs: Dict[str, Any],
) -> None:
super().__init__()
# The default 'objective' param value comes from sklearn `XGBClassifier` ctor,
# but in pyspark we will automatically set objective param depending on
# binary or multinomial input dataset, and we need to remove the fixed default
# param value as well to avoid causing ambiguity.
input_kwargs = self._input_kwargs
self.setParams(**input_kwargs)
self._setDefault(objective=None)
self.setParams(**kwargs)
@classmethod
def _xgb_cls(cls) -> Type[XGBClassifier]:
@ -334,8 +420,8 @@ class SparkXGBRanker(_SparkXGBEstimator):
:py:class:`~pyspark.ml.classification.OneVsRest`
SparkXGBRanker automatically supports most of the parameters in
`xgboost.XGBRanker` constructor and most of the parameters used in
:py:class:`xgboost.XGBRanker` fit and predict method.
:py:class:`xgboost.XGBRanker` constructor and most of the parameters used in
:py:meth:`xgboost.XGBRanker.fit` and :py:meth:`xgboost.XGBRanker.predict` method.
SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`,
see doc below for more details.
@ -355,39 +441,53 @@ class SparkXGBRanker(_SparkXGBEstimator):
Parameters
----------
callbacks:
The export and import of the callback functions are at best effort. For
details, see :py:attr:`xgboost.spark.SparkXGBRanker.callbacks` param doc.
features_col:
When the value is string, it requires the features column name to be vector type.
When the value is a list of string, it requires all the feature columns to be numeric types.
label_col:
Label column name. Default to "label".
prediction_col:
Prediction column name. Default to "prediction"
pred_contrib_col:
Contribution prediction column name.
validation_indicator_col:
For params related to `xgboost.XGBRanker` training with
evaluation dataset's supervision,
set :py:attr:`xgboost.spark.XGBRanker.validation_indicator_col`
parameter instead of setting the `eval_set` parameter in `xgboost.XGBRanker`
set :py:attr:`xgboost.spark.SparkXGBRanker.validation_indicator_col`
parameter instead of setting the `eval_set` parameter in :py:class:`xgboost.XGBRanker`
fit method.
weight_col:
To specify the weight of the training and validation dataset, set
:py:attr:`xgboost.spark.SparkXGBRanker.weight_col` parameter instead of setting
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRanker`
`sample_weight` and `sample_weight_eval_set` parameter in :py:class:`xgboost.XGBRanker`
fit method.
xgb_model:
Set the value to be the instance returned by
:func:`xgboost.spark.SparkXGBRankerModel.get_booster`.
num_workers:
Integer that specifies the number of XGBoost workers to use.
Each XGBoost worker corresponds to one spark task.
use_gpu:
Boolean that specifies whether the executors are running on GPU
instances.
base_margin_col:
To specify the base margins of the training and validation
dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.base_margin_col` parameter
instead of setting `base_margin` and `base_margin_eval_set` in the
`xgboost.XGBRanker` fit method.
:py:class:`xgboost.XGBRanker` fit method.
qid_col:
To specify the qid of the training and validation
dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.qid_col` parameter
instead of setting `qid` / `group`, `eval_qid` / `eval_group` in the
`xgboost.XGBRanker` fit method.
Query id column name.
num_workers:
How many XGBoost workers to be used to train.
Each XGBoost worker corresponds to one spark task.
use_gpu:
Boolean value to specify whether the executors are running on GPU
instances.
force_repartition:
Boolean value to specify if forcing the input dataset to be repartitioned
before XGBoost training.
repartition_random_shuffle:
Boolean value to specify if randomly shuffling the dataset when repartitioning is required.
enable_sparse_data_optim:
Boolean value to specify if enabling sparse data optimization, if True,
Xgboost DMatrix object will be constructed from sparse matrix instead of
dense matrix.
kwargs:
A dictionary of xgboost parameters, please refer to
https://xgboost.readthedocs.io/en/stable/parameter.html
.. Note:: The Parameters chart above contains parameters that need special handling.
For a full list of parameters, see entries with `Param(parent=...` below.
@ -426,9 +526,28 @@ class SparkXGBRanker(_SparkXGBEstimator):
>>> model.transform(df_test).show()
"""
def __init__(self, **kwargs: Any) -> None:
@keyword_only
def __init__(
self,
*,
features_col: Union[str, List[str]] = "features",
label_col: str = "label",
prediction_col: str = "prediction",
pred_contrib_col: Optional[str] = None,
validation_indicator_col: Optional[str] = None,
weight_col: Optional[str] = None,
base_margin_col: Optional[str] = None,
qid_col: Optional[str] = None,
num_workers: int = 1,
use_gpu: bool = False,
force_repartition: bool = False,
repartition_random_shuffle: bool = False,
enable_sparse_data_optim: bool = False,
**kwargs: Dict[str, Any],
) -> None:
super().__init__()
self.setParams(**kwargs)
input_kwargs = self._input_kwargs
self.setParams(**input_kwargs)
@classmethod
def _xgb_cls(cls) -> Type[XGBRanker]:

View File

@ -3,6 +3,7 @@
*/
#pragma once
#include <string>
#include <vector>
#include "communicator.h"
@ -224,5 +225,46 @@ inline void Allreduce(double *send_receive_buffer, size_t count) {
Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kDouble, op);
}
template <typename T>
struct AllgatherVResult {
std::vector<std::size_t> offsets;
std::vector<std::size_t> sizes;
std::vector<T> result;
};
/**
* @brief Gathers variable-length data from all processes and distributes it to all processes.
*
* We assume each worker has the same number of inputs, but each input may be of a different size.
*
* @param inputs All the inputs from the local worker.
* @param sizes Sizes of each input.
*/
template <typename T>
inline AllgatherVResult<T> AllgatherV(std::vector<T> const &inputs,
std::vector<std::size_t> const &sizes) {
auto num_inputs = sizes.size();
// Gather the sizes across all workers.
std::vector<std::size_t> all_sizes(num_inputs * GetWorldSize());
std::copy_n(sizes.cbegin(), sizes.size(), all_sizes.begin() + num_inputs * GetRank());
collective::Allgather(all_sizes.data(), all_sizes.size() * sizeof(std::size_t));
// Calculate input offsets (std::exclusive_scan).
std::vector<std::size_t> offsets(all_sizes.size());
for (std::size_t i = 1; i < offsets.size(); i++) {
offsets[i] = offsets[i - 1] + all_sizes[i - 1];
}
// Gather all the inputs.
auto total_input_size = offsets.back() + all_sizes.back();
std::vector<T> all_inputs(total_input_size);
std::copy_n(inputs.cbegin(), inputs.size(), all_inputs.begin() + offsets[num_inputs * GetRank()]);
// We cannot use allgather here, since each worker might have a different size.
Allreduce<Operation::kMax>(all_inputs.data(), all_inputs.size());
return {offsets, all_sizes, all_inputs};
}
} // namespace collective
} // namespace xgboost

View File

@ -12,19 +12,22 @@
namespace xgboost {
namespace collective {
thread_local int Communicator::device_ordinal_{-1};
thread_local std::unique_ptr<DeviceCommunicator> Communicator::device_communicator_{};
void Communicator::Finalize() {
communicator_->Shutdown();
communicator_.reset(new NoOpCommunicator());
device_ordinal_ = -1;
device_communicator_.reset(nullptr);
}
DeviceCommunicator* Communicator::GetDevice(int device_ordinal) {
if (!device_communicator_ || device_ordinal_ != device_ordinal) {
device_ordinal_ = device_ordinal;
thread_local auto old_device_ordinal = -1;
// If the number of GPUs changes, we need to re-initialize NCCL.
thread_local auto old_world_size = -1;
if (!device_communicator_ || device_ordinal != old_device_ordinal ||
communicator_->GetWorldSize() != old_world_size) {
old_device_ordinal = device_ordinal;
old_world_size = communicator_->GetWorldSize();
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
if (type_ != CommunicatorType::kFederated) {
device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, Get()));

View File

@ -229,7 +229,6 @@ class Communicator {
static thread_local std::unique_ptr<Communicator> communicator_;
static thread_local CommunicatorType type_;
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
static thread_local int device_ordinal_;
static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_;
#endif

View File

@ -1,5 +1,5 @@
/**
* Copyright 2022 by XGBoost Contributors
* Copyright 2022-2023, XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
@ -16,21 +16,39 @@ struct CUDAContext {
/**
* \brief Caching thrust policy.
*/
#if defined(XGBOOST_USE_HIP)
auto CTP() const { return thrust::hip::par(caching_alloc_).on(dh::DefaultStream()); }
auto CTP() const {
#if defined(XGBOOST_USE_CUDA)
#if THRUST_MAJOR_VERSION >= 2
return thrust::cuda::par_nosync(caching_alloc_).on(dh::DefaultStream());
#else
auto CTP() const { return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream()); }
return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream());
#endif // THRUST_MAJOR_VERSION >= 2
#elif defined(XGBOOST_USE_HIP)
#if THRUST_MAJOR_VERSION >= 2
return thrust::hip::par_nosync(caching_alloc_).on(dh::DefaultStream());
#else
return thrust::hip::par(caching_alloc_).on(dh::DefaultStream());
#endif // THRUST_MAJOR_VERSION >= 2
#endif
}
/**
* \brief Thrust policy without caching allocator.
*/
#if defined(XGBOOST_USE_HIP)
auto TP() const { return thrust::hip::par(alloc_).on(dh::DefaultStream()); }
auto TP() const {
#if defined(XGBOOST_USE_CUDA)
#if THRUST_MAJOR_VERSION >= 2
return thrust::cuda::par_nosync(alloc_).on(dh::DefaultStream());
#else
auto TP() const { return thrust::cuda::par(alloc_).on(dh::DefaultStream()); }
return thrust::cuda::par(alloc_).on(dh::DefaultStream());
#endif // THRUST_MAJOR_VERSION >= 2
#elif defined(XGBOOST_USE_HIP)
#if THRUST_MAJOR_VERSION >= 2
return thrust::hip::par_nosync(alloc_).on(dh::DefaultStream());
#else
return thrust::hip::par(alloc_).on(dh::DefaultStream());
#endif // THRUST_MAJOR_VERSION >= 2
#endif
}
auto Stream() const { return dh::DefaultStream(); }
};
} // namespace xgboost

View File

@ -227,9 +227,8 @@ void ProcessBatch(int device, MetaInfo const &info, const SparsePage &page,
return {0, e.index, e.fvalue}; // row_idx is not needed for scanning column size.
});
detail::GetColumnSizesScan(device, num_columns, num_cuts_per_feature,
batch_it, dummy_is_valid,
0, sorted_entries.size(),
&cuts_ptr, &column_sizes_scan);
IterSpan{batch_it, sorted_entries.size()}, dummy_is_valid, &cuts_ptr,
&column_sizes_scan);
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
if (sketch_container->HasCategorical()) {
@ -296,9 +295,8 @@ void ProcessWeightedBatch(int device, const SparsePage& page,
return {0, e.index, e.fvalue}; // row_idx is not needed for scaning column size.
});
detail::GetColumnSizesScan(device, num_columns, num_cuts_per_feature,
batch_it, dummy_is_valid,
0, sorted_entries.size(),
&cuts_ptr, &column_sizes_scan);
IterSpan{batch_it, sorted_entries.size()}, dummy_is_valid, &cuts_ptr,
&column_sizes_scan);
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
if (sketch_container->HasCategorical()) {
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr,

View File

@ -17,6 +17,10 @@
#include "quantile.cuh"
#include "timer.h"
#if defined(XGBOOST_USE_HIP)
namespace cub = hipcub;
#endif
namespace xgboost {
namespace common {
namespace cuda {
@ -53,24 +57,128 @@ struct EntryCompareOp {
};
// Get column size from adapter batch and for output cuts.
template <typename Iter>
void GetColumnSizesScan(int device, size_t num_columns, size_t num_cuts_per_feature,
Iter batch_iter, data::IsValidFunctor is_valid,
size_t begin, size_t end,
HostDeviceVector<SketchContainer::OffsetT> *cuts_ptr,
template <std::uint32_t kBlockThreads, typename CounterT, typename BatchIt>
__global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
data::IsValidFunctor is_valid,
Span<std::size_t> out_column_size) {
extern __shared__ char smem[];
auto smem_cs_ptr = reinterpret_cast<CounterT*>(smem);
dh::BlockFill(smem_cs_ptr, out_column_size.size(), 0);
cub::CTA_SYNC();
auto n = batch_iter.size();
for (auto idx : dh::GridStrideRange(static_cast<std::size_t>(0), n)) {
auto e = batch_iter[idx];
if (is_valid(e)) {
atomicAdd(&smem_cs_ptr[e.column_idx], static_cast<CounterT>(1));
}
}
cub::CTA_SYNC();
auto out_global_ptr = out_column_size;
for (auto i : dh::BlockStrideRange(static_cast<std::size_t>(0), out_column_size.size())) {
atomicAdd(&out_global_ptr[i], static_cast<std::size_t>(smem_cs_ptr[i]));
}
}
template <std::uint32_t kBlockThreads, typename Kernel>
std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) {
int n_mps = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device));
#endif
int n_blocks_per_mp = 0;
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
kBlockThreads, shared_mem));
#endif
std::uint32_t grid_size = n_blocks_per_mp * n_mps;
return grid_size;
}
/**
* \brief Get the size of each column. This is a histogram with additional handling of
* invalid values.
*
* \tparam BatchIt Type of input adapter batch.
* \tparam force_use_global_memory Used for testing. Force global atomic add.
* \tparam force_use_u64 Used for testing. For u64 as counter in shared memory.
*
* \param device CUDA device ordinal.
* \param batch_iter Iterator for input data from adapter batch.
* \param is_valid Whehter an element is considered as missing.
* \param out_column_size Output buffer for the size of each column.
*/
template <typename BatchIt, bool force_use_global_memory = false, bool force_use_u64 = false>
void LaunchGetColumnSizeKernel(std::int32_t device, IterSpan<BatchIt> batch_iter,
data::IsValidFunctor is_valid, Span<std::size_t> out_column_size) {
thrust::fill_n(thrust::device, dh::tbegin(out_column_size), out_column_size.size(), 0);
std::size_t max_shared_memory = dh::MaxSharedMemory(device);
// Not strictly correct as we should use number of samples to determine the type of
// counter. However, the sample size is not known due to sliding window on number of
// elements.
std::size_t n = batch_iter.size();
std::size_t required_shared_memory = 0;
bool use_u32{false};
if (!force_use_u64 && n < static_cast<std::size_t>(std::numeric_limits<std::uint32_t>::max())) {
required_shared_memory = out_column_size.size() * sizeof(std::uint32_t);
use_u32 = true;
} else {
required_shared_memory = out_column_size.size() * sizeof(std::size_t);
use_u32 = false;
}
bool use_shared = required_shared_memory <= max_shared_memory && required_shared_memory != 0;
if (!force_use_global_memory && use_shared) {
CHECK_NE(required_shared_memory, 0);
std::uint32_t constexpr kBlockThreads = 512;
if (use_u32) {
CHECK(!force_use_u64);
auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::uint32_t, BatchIt>;
auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}(
kernel, batch_iter, is_valid, out_column_size);
} else {
auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::size_t, BatchIt>;
auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}(
kernel, batch_iter, is_valid, out_column_size);
}
} else {
auto d_out_column_size = out_column_size;
dh::LaunchN(batch_iter.size(), [=] __device__(size_t idx) {
auto e = batch_iter[idx];
if (is_valid(e)) {
atomicAdd(&d_out_column_size[e.column_idx], static_cast<size_t>(1));
}
});
}
}
template <typename BatchIt>
void GetColumnSizesScan(int device, size_t num_columns, std::size_t num_cuts_per_feature,
IterSpan<BatchIt> batch_iter, data::IsValidFunctor is_valid,
HostDeviceVector<SketchContainer::OffsetT>* cuts_ptr,
dh::caching_device_vector<size_t>* column_sizes_scan) {
column_sizes_scan->resize(num_columns + 1, 0);
column_sizes_scan->resize(num_columns + 1);
cuts_ptr->SetDevice(device);
cuts_ptr->Resize(num_columns + 1, 0);
dh::XGBCachingDeviceAllocator<char> alloc;
auto d_column_sizes_scan = column_sizes_scan->data().get();
dh::LaunchN(end - begin, [=] __device__(size_t idx) {
auto e = batch_iter[begin + idx];
if (is_valid(e)) {
atomicAdd(&d_column_sizes_scan[e.column_idx], static_cast<size_t>(1));
}
});
auto d_column_sizes_scan = dh::ToSpan(*column_sizes_scan);
LaunchGetColumnSizeKernel(device, batch_iter, is_valid, d_column_sizes_scan);
// Calculate cuts CSC pointer
auto cut_ptr_it = dh::MakeTransformIterator<size_t>(
column_sizes_scan->begin(), [=] __device__(size_t column_size) {
@ -85,8 +193,7 @@ void GetColumnSizesScan(int device, size_t num_columns, size_t num_cuts_per_feat
column_sizes_scan->end(), column_sizes_scan->begin());
#elif defined(XGBOOST_USE_CUDA)
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
cut_ptr_it + column_sizes_scan->size(),
cuts_ptr->DevicePointer());
cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
column_sizes_scan->end(), column_sizes_scan->begin());
#endif
@ -130,29 +237,26 @@ size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
// Count the valid entries in each column and copy them out.
template <typename AdapterBatch, typename BatchIter>
void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter,
Range1d range, float missing,
size_t columns, size_t cuts_per_feature, int device,
void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, Range1d range,
float missing, size_t columns, size_t cuts_per_feature, int device,
HostDeviceVector<SketchContainer::OffsetT>* cut_sizes_scan,
dh::caching_device_vector<size_t>* column_sizes_scan,
dh::device_vector<Entry>* sorted_entries) {
auto entry_iter = dh::MakeTransformIterator<Entry>(
thrust::make_counting_iterator(0llu), [=] __device__(size_t idx) {
return Entry(batch.GetElement(idx).column_idx,
batch.GetElement(idx).value);
return Entry(batch.GetElement(idx).column_idx, batch.GetElement(idx).value);
});
auto n = range.end() - range.begin();
auto span = IterSpan{batch_iter + range.begin(), n};
data::IsValidFunctor is_valid(missing);
// Work out how many valid entries we have in each column
GetColumnSizesScan(device, columns, cuts_per_feature,
batch_iter, is_valid,
range.begin(), range.end(),
cut_sizes_scan,
GetColumnSizesScan(device, columns, cuts_per_feature, span, is_valid, cut_sizes_scan,
column_sizes_scan);
size_t num_valid = column_sizes_scan->back();
// Copy current subset of valid elements into temporary storage and sort
sorted_entries->resize(num_valid);
dh::CopyIf(entry_iter + range.begin(), entry_iter + range.end(),
sorted_entries->begin(), is_valid);
dh::CopyIf(entry_iter + range.begin(), entry_iter + range.end(), sorted_entries->begin(),
is_valid);
}
void SortByWeight(dh::device_vector<float>* weights,

View File

@ -209,7 +209,7 @@ class PartitionBuilder {
BitVector* decision_bits, BitVector* missing_bits) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
std::size_t nid = nodes[node_in_set].nid;
bst_feature_t fid = tree[nid].SplitIndex();
bst_feature_t fid = tree.SplitIndex(nid);
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
auto node_cats = tree.NodeCats(nid);
auto const& cut_values = gmat.cut.Values();
@ -263,14 +263,13 @@ class PartitionBuilder {
template <typename ExpandEntry>
void PartitionByMask(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
const common::Range1d range, GHistIndexMatrix const& gmat,
const common::ColumnMatrix& column_matrix, const RegTree& tree,
const size_t* rid, BitVector const& decision_bits,
const RegTree& tree, const size_t* rid, BitVector const& decision_bits,
BitVector const& missing_bits) {
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
std::size_t nid = nodes[node_in_set].nid;
bool default_left = tree[nid].DefaultLeft();
bool default_left = tree.DefaultLeft(nid);
auto pred = [&](auto ridx) {
bool go_left = default_left;

View File

@ -7,7 +7,6 @@
#include <utility>
#include "../collective/aggregator.h"
#include "../collective/communicator-inl.h"
#include "../data/adapter.h"
#include "categorical.h"
#include "hist_util.h"
@ -143,6 +142,7 @@ struct QuantileAllreduce {
template <typename WQSketch>
void SketchContainerImpl<WQSketch>::GatherSketchInfo(
MetaInfo const& info,
std::vector<typename WQSketch::SummaryContainer> const &reduced,
std::vector<size_t> *p_worker_segments, std::vector<bst_row_t> *p_sketches_scan,
std::vector<typename WQSketch::Entry> *p_global_sketches) {
@ -168,7 +168,7 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1);
// Gather all column pointers
collective::Allreduce<collective::Operation::kSum>(sketches_scan.data(), sketches_scan.size());
collective::GlobalSum(info, sketches_scan.data(), sketches_scan.size());
for (int32_t i = 0; i < world; ++i) {
size_t back = (i + 1) * (n_columns + 1) - 1;
auto n_entries = sketches_scan.at(back);
@ -196,7 +196,8 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
static_assert(sizeof(typename WQSketch::Entry) / 4 == sizeof(float),
"Unexpected size of sketch entry.");
collective::Allreduce<collective::Operation::kSum>(
collective::GlobalSum(
info,
reinterpret_cast<float *>(global_sketches.data()),
global_sketches.size() * sizeof(typename WQSketch::Entry) / sizeof(float));
}
@ -222,8 +223,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
std::vector<size_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
size_t feat_begin = rank * feature_ptr.size(); // pointer to current worker
std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin);
collective::Allreduce<collective::Operation::kSum>(global_feat_ptrs.data(),
global_feat_ptrs.size());
collective::GlobalSum(info, global_feat_ptrs.data(), global_feat_ptrs.size());
// move all categories into a flatten vector to prepare for allreduce
size_t total = feature_ptr.back();
@ -236,8 +236,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
// indptr for indexing workers
std::vector<size_t> global_worker_ptr(world_size + 1, 0);
global_worker_ptr[rank + 1] = total; // shift 1 to right for constructing the indptr
collective::Allreduce<collective::Operation::kSum>(global_worker_ptr.data(),
global_worker_ptr.size());
collective::GlobalSum(info, global_worker_ptr.data(), global_worker_ptr.size());
std::partial_sum(global_worker_ptr.cbegin(), global_worker_ptr.cend(), global_worker_ptr.begin());
// total number of categories in all workers with all features
auto gtotal = global_worker_ptr.back();
@ -249,8 +248,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
CHECK_EQ(rank_size, total);
std::copy(flatten.cbegin(), flatten.cend(), global_categories.begin() + rank_begin);
// gather values from all workers.
collective::Allreduce<collective::Operation::kSum>(global_categories.data(),
global_categories.size());
collective::GlobalSum(info, global_categories.data(), global_categories.size());
QuantileAllreduce<float> allreduce_result{global_categories, global_worker_ptr, global_feat_ptrs,
categories_.size()};
ParallelFor(categories_.size(), n_threads_, [&](auto fidx) {
@ -323,7 +321,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
std::vector<bst_row_t> sketches_scan((n_columns + 1) * world, 0);
std::vector<typename WQSketch::Entry> global_sketches;
this->GatherSketchInfo(reduced, &worker_segments, &sketches_scan, &global_sketches);
this->GatherSketchInfo(info, reduced, &worker_segments, &sketches_scan, &global_sketches);
std::vector<typename WQSketch::SummaryContainer> final_sketches(n_columns);
@ -371,7 +369,9 @@ auto AddCategories(std::set<float> const &categories, HistogramCuts *cuts) {
InvalidCategory();
}
auto &cut_values = cuts->cut_values_.HostVector();
auto max_cat = *std::max_element(categories.cbegin(), categories.cend());
// With column-wise data split, the categories may be empty.
auto max_cat =
categories.empty() ? 0.0f : *std::max_element(categories.cbegin(), categories.cend());
CheckMaxCat(max_cat, categories.size());
for (bst_cat_t i = 0; i <= AsCat(max_cat); ++i) {
cut_values.push_back(i);

View File

@ -822,7 +822,8 @@ class SketchContainerImpl {
return group_ind;
}
// Gather sketches from all workers.
void GatherSketchInfo(std::vector<typename WQSketch::SummaryContainer> const &reduced,
void GatherSketchInfo(MetaInfo const& info,
std::vector<typename WQSketch::SummaryContainer> const &reduced,
std::vector<bst_row_t> *p_worker_segments,
std::vector<bst_row_t> *p_sketches_scan,
std::vector<typename WQSketch::Entry> *p_global_sketches);

View File

@ -26,6 +26,12 @@
#include "xgboost/logging.h"
#include "xgboost/span.h"
#if defined(XGBOOST_USE_CUDA)
#include "cuda_fp16.h"
#elif defined(__HIP_PLATFORM_AMD__)
#include <hip/hip_fp16.h>
#endif
namespace xgboost {
// Common errors in parsing columnar format.
struct ArrayInterfaceErrors {
@ -304,12 +310,12 @@ class ArrayInterfaceHandler {
template <typename T, typename E = void>
struct ToDType;
// float
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
template <>
struct ToDType<__half> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
};
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
template <>
struct ToDType<float> {
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
@ -459,11 +465,11 @@ class ArrayInterface {
CHECK(sizeof(long double) == 16) << error::NoF128();
type = T::kF16;
} else if (typestr[1] == 'f' && typestr[2] == '2') {
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
type = T::kF2;
#else
LOG(FATAL) << "Half type is not supported.";
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
} else if (typestr[1] == 'f' && typestr[2] == '4') {
type = T::kF4;
} else if (typestr[1] == 'f' && typestr[2] == '8') {
@ -490,20 +496,17 @@ class ArrayInterface {
}
}
XGBOOST_DEVICE size_t Shape(size_t i) const { return shape[i]; }
XGBOOST_DEVICE size_t Stride(size_t i) const { return strides[i]; }
[[nodiscard]] XGBOOST_DEVICE std::size_t Shape(size_t i) const { return shape[i]; }
[[nodiscard]] XGBOOST_DEVICE std::size_t Stride(size_t i) const { return strides[i]; }
template <typename Fn>
XGBOOST_HOST_DEV_INLINE decltype(auto) DispatchCall(Fn func) const {
using T = ArrayInterfaceHandler::Type;
switch (type) {
case T::kF2: {
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
return func(reinterpret_cast<__half const *>(data));
#else
SPAN_CHECK(false);
return func(reinterpret_cast<float const *>(data));
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#endif // defined(XGBOOST_USE_CUDA) || || defined(__HIP_PLATFORM_AMD__)
}
case T::kF4:
return func(reinterpret_cast<float const *>(data));
@ -540,23 +543,23 @@ class ArrayInterface {
return func(reinterpret_cast<uint64_t const *>(data));
}
XGBOOST_DEVICE std::size_t ElementSize() const {
[[nodiscard]] XGBOOST_DEVICE std::size_t ElementSize() const {
return this->DispatchCall([](auto *typed_data_ptr) {
return sizeof(std::remove_pointer_t<decltype(typed_data_ptr)>);
});
}
XGBOOST_DEVICE std::size_t ElementAlignment() const {
[[nodiscard]] XGBOOST_DEVICE std::size_t ElementAlignment() const {
return this->DispatchCall([](auto *typed_data_ptr) {
return std::alignment_of<std::remove_pointer_t<decltype(typed_data_ptr)>>::value;
});
}
template <typename T = float, typename... Index>
XGBOOST_DEVICE T operator()(Index &&...index) const {
XGBOOST_HOST_DEV_INLINE T operator()(Index &&...index) const {
static_assert(sizeof...(index) <= D, "Invalid index.");
return this->DispatchCall([=](auto const *p_values) -> T {
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
// No operator defined for half -> size_t
using Type = std::conditional_t<
std::is_same<__half,
@ -566,7 +569,7 @@ class ArrayInterface {
return static_cast<T>(static_cast<Type>(p_values[offset]));
#else
return static_cast<T>(p_values[offset]);
#endif
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
});
}
@ -603,7 +606,7 @@ void DispatchDType(ArrayInterface<D> const array, std::int32_t device, Fn fn) {
};
switch (array.type) {
case ArrayInterfaceHandler::kF2: {
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
dispatch(__half{});
#endif
break;

View File

@ -698,6 +698,9 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
this->feature_type_names = that.feature_type_names;
auto &h_feature_types = feature_types.HostVector();
LoadFeatureType(this->feature_type_names, &h_feature_types);
} else if (!that.feature_types.Empty()) {
this->feature_types.Resize(that.feature_types.Size());
this->feature_types.Copy(that.feature_types);
}
if (!that.feature_weights.Empty()) {
this->feature_weights.Resize(that.feature_weights.Size());

View File

@ -29,7 +29,7 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
: columns_(columns),
num_rows_(num_rows) {}
size_t Size() const { return num_rows_ * columns_.size(); }
__device__ COOTuple GetElement(size_t idx) const {
__device__ __forceinline__ COOTuple GetElement(size_t idx) const {
size_t column_idx = idx % columns_.size();
size_t row_idx = idx / columns_.size();
auto const& column = columns_[column_idx];
@ -39,6 +39,14 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
return {row_idx, column_idx, value};
}
__device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
auto const& column = columns_[fidx];
float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
? column(ridx)
: std::numeric_limits<float>::quiet_NaN();
return value;
}
XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; }
XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); }
@ -166,6 +174,10 @@ class CupyAdapterBatch : public detail::NoMetaInfo {
float value = array_interface_(row_idx, column_idx);
return {row_idx, column_idx, value};
}
__device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
float value = array_interface_(ridx, fidx);
return value;
}
XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.Shape(0); }
XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.Shape(1); }
@ -202,40 +214,64 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
// Returns maximum row length
template <typename AdapterBatchT>
size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
int device_idx, float missing) {
#if defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(device_idx));
#elif defined(XGBOOST_USE_CUDA)
std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, int device_idx,
float missing) {
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(device_idx));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(device_idx));
#endif
IsValidFunctor is_valid(missing);
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipMemsetAsync(offset.data(), '\0', offset.size_bytes()));
#endif
auto n_samples = batch.NumRows();
bst_feature_t n_features = batch.NumCols();
// Use more than 1 threads for each row in case of dataset being too wide.
bst_feature_t stride{0};
if (n_features < 32) {
stride = std::min(n_features, 4u);
} else if (n_features < 64) {
stride = 8;
} else if (n_features < 128) {
stride = 16;
} else {
stride = 32;
}
// Count elements per row
dh::LaunchN(batch.Size(), [=] __device__(size_t idx) {
auto element = batch.GetElement(idx);
if (is_valid(element)) {
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
&offset[element.row_idx]),
static_cast<unsigned long long>(1)); // NOLINT
dh::LaunchN(n_samples * stride, [=] __device__(std::size_t idx) {
bst_row_t cnt{0};
auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride);
SPAN_CHECK(ridx < n_samples);
for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) {
if (is_valid(batch.GetElement(ridx, fidx))) {
cnt++;
}
}
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
&offset[ridx]),
static_cast<unsigned long long>(cnt)); // NOLINT
});
dh::XGBCachingDeviceAllocator<char> alloc;
#if defined(XGBOOST_USE_HIP)
size_t row_stride =
dh::Reduce(thrust::hip::par(alloc), thrust::device_pointer_cast(offset.data()),
thrust::device_pointer_cast(offset.data()) + offset.size(),
static_cast<std::size_t>(0), thrust::maximum<size_t>());
#elif defined(XGBOOST_USE_CUDA)
size_t row_stride =
#if defined(XGBOOST_USE_CUDA)
bst_row_t row_stride =
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
thrust::device_pointer_cast(offset.data()) + offset.size(),
static_cast<std::size_t>(0), thrust::maximum<size_t>());
static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
#elif defined(XGBOOST_USE_HIP)
bst_row_t row_stride =
dh::Reduce(thrust::hip::par(alloc), thrust::device_pointer_cast(offset.data()),
thrust::device_pointer_cast(offset.data()) + offset.size(),
static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
#endif
return row_stride;
}
@ -243,13 +279,29 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
* \brief Check there's no inf in data.
*/
template <typename AdapterBatchT>
bool HasInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
bool NoInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
auto counting = thrust::make_counting_iterator(0llu);
auto value_iter = dh::MakeTransformIterator<float>(
counting, [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; });
auto valid =
thrust::none_of(value_iter, value_iter + batch.Size(),
[is_valid] XGBOOST_DEVICE(float v) { return is_valid(v) && std::isinf(v); });
auto value_iter = dh::MakeTransformIterator<bool>(counting, [=] XGBOOST_DEVICE(std::size_t idx) {
auto v = batch.GetElement(idx).value;
if (!is_valid(v)) {
// discard the invalid elements.
return true;
}
// check that there's no inf in data.
return !std::isinf(v);
});
dh::XGBCachingDeviceAllocator<char> alloc;
// The default implementation in thrust optimizes any_of/none_of/all_of by using small
// intervals to early stop. But we expect all data to be valid here, using small
// intervals only decreases performance due to excessive kernel launch and stream
// synchronization.
#if defined(XGBOOST_USE_CUDA)
auto valid = dh::Reduce(thrust::cuda::par(alloc), value_iter, value_iter + batch.Size(), true,
thrust::logical_and<>{});
#elif defined(XGBOOST_USE_HIP)
auto valid = dh::Reduce(thrust::hip::par(alloc), value_iter, value_iter + batch.Size(), true,
thrust::logical_and<>{});
#endif
return valid;
}
}; // namespace data

View File

@ -213,7 +213,7 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
// correct output position
auto counting = thrust::make_counting_iterator(0llu);
data::IsValidFunctor is_valid(missing);
bool valid = data::HasInfInData(batch, is_valid);
bool valid = data::NoInfInData(batch, is_valid);
CHECK(valid) << error::InfInData();
auto key_iter = dh::MakeTransformIterator<size_t>(

View File

@ -92,7 +92,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
}
auto batch_rows = num_rows();
accumulated_rows += batch_rows;
dh::caching_device_vector<size_t> row_counts(batch_rows + 1, 0);
dh::device_vector<size_t> row_counts(batch_rows + 1, 0);
common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
row_stride = std::max(row_stride, Dispatch(proxy, [=](auto const& value) {
return GetRowCounts(value, row_counts_span, get_device(), missing);
@ -163,7 +163,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
#endif
auto rows = num_rows();
dh::caching_device_vector<size_t> row_counts(rows + 1, 0);
dh::device_vector<size_t> row_counts(rows + 1, 0);
common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
Dispatch(proxy, [=](auto const& value) {
return GetRowCounts(value, row_counts_span, get_device(), missing);

View File

@ -92,7 +92,7 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset,
template <typename AdapterBatchT>
size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing,
SparsePage* page) {
bool valid = HasInfInData(batch, IsValidFunctor{missing});
bool valid = NoInfInData(batch, IsValidFunctor{missing});
CHECK(valid) << error::InfInData();
page->offset.SetDevice(device);

View File

@ -67,7 +67,7 @@ class ColumnSplitHelper {
const int32_t nid = nodes[node_in_set].nid;
const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin);
partition_builder_->AllocateForTask(task_id);
partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, *p_tree,
(*row_set_collection_)[nid].begin, decision_bits_,
missing_bits_);
});

View File

@ -25,7 +25,6 @@
#include "xgboost/linalg.h" // for Constants, Vector
namespace xgboost::tree {
template <typename ExpandEntry>
class HistEvaluator {
private:
struct NodeEntry {
@ -285,10 +284,42 @@ class HistEvaluator {
return left_sum;
}
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
std::vector<CPUExpandEntry> Allgather(std::vector<CPUExpandEntry> const &entries) {
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const num_entries = entries.size();
// First, gather all the primitive fields.
std::vector<CPUExpandEntry> all_entries(num_entries * world);
std::vector<uint32_t> cat_bits;
std::vector<std::size_t> cat_bits_sizes;
for (std::size_t i = 0; i < num_entries; i++) {
all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes);
}
collective::Allgather(all_entries.data(), all_entries.size() * sizeof(CPUExpandEntry));
// Gather all the cat_bits.
auto gathered = collective::AllgatherV(cat_bits, cat_bits_sizes);
common::ParallelFor(num_entries * world, ctx_->Threads(), [&] (auto i) {
// Copy the cat_bits back into all expand entries.
all_entries[i].split.cat_bits.resize(gathered.sizes[i]);
std::copy_n(gathered.result.cbegin() + gathered.offsets[i], gathered.sizes[i],
all_entries[i].split.cat_bits.begin());
});
return all_entries;
}
public:
void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut,
common::Span<FeatureType const> feature_types, const RegTree &tree,
std::vector<ExpandEntry> *p_entries) {
std::vector<CPUExpandEntry> *p_entries) {
auto n_threads = ctx_->Threads();
auto& entries = *p_entries;
// All nodes are on the same level, so we can store the shared ptr.
@ -306,7 +337,7 @@ class HistEvaluator {
return features[nidx_in_set]->Size();
}, grain_size);
std::vector<ExpandEntry> tloc_candidates(n_threads * entries.size());
std::vector<CPUExpandEntry> tloc_candidates(n_threads * entries.size());
for (size_t i = 0; i < entries.size(); ++i) {
for (decltype(n_threads) j = 0; j < n_threads; ++j) {
tloc_candidates[i * n_threads + j] = entries[i];
@ -365,22 +396,18 @@ class HistEvaluator {
if (is_col_split_) {
// With column-wise data split, we gather the best splits from all the workers and update the
// expand entries accordingly.
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const num_entries = entries.size();
std::vector<ExpandEntry> buffer{num_entries * world};
std::copy_n(entries.cbegin(), num_entries, buffer.begin() + num_entries * rank);
collective::Allgather(buffer.data(), buffer.size() * sizeof(ExpandEntry));
for (auto worker = 0; worker < world; ++worker) {
auto all_entries = Allgather(entries);
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
entries[nidx_in_set].split.Update(buffer[worker * num_entries + nidx_in_set].split);
entries[nidx_in_set].split.Update(
all_entries[worker * entries.size() + nidx_in_set].split);
}
}
}
}
// Add splits to tree, handles all statistic
void ApplyTreeSplit(ExpandEntry const& candidate, RegTree *p_tree) {
void ApplyTreeSplit(CPUExpandEntry const& candidate, RegTree *p_tree) {
auto evaluator = tree_evaluator_.GetEvaluator();
RegTree &tree = *p_tree;
@ -465,6 +492,7 @@ class HistMultiEvaluator {
FeatureInteractionConstraintHost interaction_constraints_;
std::shared_ptr<common::ColumnSampler> column_sampler_;
Context const *ctx_;
bool is_col_split_{false};
private:
static double MultiCalcSplitGain(TrainParam const &param,
@ -543,6 +571,57 @@ class HistMultiEvaluator {
return false;
}
/**
* @brief Gather the expand entries from all the workers.
* @param entries Local expand entries on this worker.
* @return Global expand entries gathered from all workers.
*/
std::vector<MultiExpandEntry> Allgather(std::vector<MultiExpandEntry> const &entries) {
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const num_entries = entries.size();
// First, gather all the primitive fields.
std::vector<MultiExpandEntry> all_entries(num_entries * world);
std::vector<uint32_t> cat_bits;
std::vector<std::size_t> cat_bits_sizes;
std::vector<GradientPairPrecise> gradients;
for (std::size_t i = 0; i < num_entries; i++) {
all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes,
&gradients);
}
collective::Allgather(all_entries.data(), all_entries.size() * sizeof(MultiExpandEntry));
// Gather all the cat_bits.
auto gathered_cat_bits = collective::AllgatherV(cat_bits, cat_bits_sizes);
// Gather all the gradients.
auto const num_gradients = gradients.size();
std::vector<GradientPairPrecise> all_gradients(num_gradients * world);
std::copy_n(gradients.cbegin(), num_gradients, all_gradients.begin() + num_gradients * rank);
collective::Allgather(all_gradients.data(), all_gradients.size() * sizeof(GradientPairPrecise));
auto const total_entries = num_entries * world;
auto const gradients_per_entry = num_gradients / num_entries;
auto const gradients_per_side = gradients_per_entry / 2;
common::ParallelFor(total_entries, ctx_->Threads(), [&] (auto i) {
// Copy the cat_bits back into all expand entries.
all_entries[i].split.cat_bits.resize(gathered_cat_bits.sizes[i]);
std::copy_n(gathered_cat_bits.result.cbegin() + gathered_cat_bits.offsets[i],
gathered_cat_bits.sizes[i], all_entries[i].split.cat_bits.begin());
// Copy the gradients back into all expand entries.
all_entries[i].split.left_sum.resize(gradients_per_side);
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry, gradients_per_side,
all_entries[i].split.left_sum.begin());
all_entries[i].split.right_sum.resize(gradients_per_side);
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry + gradients_per_side,
gradients_per_side, all_entries[i].split.right_sum.begin());
});
return all_entries;
}
public:
void EvaluateSplits(RegTree const &tree, common::Span<const common::HistCollection *> hist,
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
@ -597,6 +676,18 @@ class HistMultiEvaluator {
entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);
}
}
if (is_col_split_) {
// With column-wise data split, we gather the best splits from all the workers and update the
// expand entries accordingly.
auto all_entries = Allgather(entries);
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
entries[nidx_in_set].split.Update(
all_entries[worker * entries.size() + nidx_in_set].split);
}
}
}
}
linalg::Vector<float> InitRoot(linalg::VectorView<GradientPairPrecise const> root_sum) {
@ -660,7 +751,10 @@ class HistMultiEvaluator {
explicit HistMultiEvaluator(Context const *ctx, MetaInfo const &info, TrainParam const *param,
std::shared_ptr<common::ColumnSampler> sampler)
: param_{param}, column_sampler_{std::move(sampler)}, ctx_{ctx} {
: param_{param},
column_sampler_{std::move(sampler)},
ctx_{ctx},
is_col_split_{info.IsColumnSplit()} {
interaction_constraints_.Configure(*param, info.num_col_);
column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(),
param_->colsample_bynode, param_->colsample_bylevel,

View File

@ -70,6 +70,22 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
os << "split:\n" << e.split << std::endl;
return os;
}
/**
* @brief Copy primitive fields into this, and collect cat_bits into a vector.
*
* This is used for allgather.
*
* @param that The other entry to copy from
* @param collected_cat_bits The vector to collect cat_bits
* @param cat_bits_sizes The sizes of the collected cat_bits
*/
void CopyAndCollect(CPUExpandEntry const& that, std::vector<uint32_t>* collected_cat_bits,
std::vector<std::size_t>* cat_bits_sizes) {
nid = that.nid;
depth = that.depth;
split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes);
}
};
struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
@ -119,6 +135,24 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
os << "]\n";
return os;
}
/**
* @brief Copy primitive fields into this, and collect cat_bits and gradients into vectors.
*
* This is used for allgather.
*
* @param that The other entry to copy from
* @param collected_cat_bits The vector to collect cat_bits
* @param cat_bits_sizes The sizes of the collected cat_bits
* @param collected_gradients The vector to collect gradients
*/
void CopyAndCollect(MultiExpandEntry const& that, std::vector<uint32_t>* collected_cat_bits,
std::vector<std::size_t>* cat_bits_sizes,
std::vector<GradientPairPrecise>* collected_gradients) {
nid = that.nid;
depth = that.depth;
split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes, collected_gradients);
}
};
} // namespace xgboost::tree
#endif // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_

View File

@ -419,6 +419,60 @@ struct SplitEntryContainer {
<< "right_sum: " << s.right_sum << std::endl;
return os;
}
/**
* @brief Copy primitive fields into this, and collect cat_bits into a vector.
*
* This is used for allgather.
*
* @param that The other entry to copy from
* @param collected_cat_bits The vector to collect cat_bits
* @param cat_bits_sizes The sizes of the collected cat_bits
*/
void CopyAndCollect(SplitEntryContainer<GradientT> const &that,
std::vector<uint32_t> *collected_cat_bits,
std::vector<std::size_t> *cat_bits_sizes) {
loss_chg = that.loss_chg;
sindex = that.sindex;
split_value = that.split_value;
is_cat = that.is_cat;
static_assert(std::is_trivially_copyable_v<GradientT>);
left_sum = that.left_sum;
right_sum = that.right_sum;
collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),
that.cat_bits.cend());
cat_bits_sizes->emplace_back(that.cat_bits.size());
}
/**
* @brief Copy primitive fields into this, and collect cat_bits and gradient sums into vectors.
*
* This is used for allgather.
*
* @param that The other entry to copy from
* @param collected_cat_bits The vector to collect cat_bits
* @param cat_bits_sizes The sizes of the collected cat_bits
* @param collected_gradients The vector to collect gradients
*/
template <typename G>
void CopyAndCollect(SplitEntryContainer<GradientT> const &that,
std::vector<uint32_t> *collected_cat_bits,
std::vector<std::size_t> *cat_bits_sizes,
std::vector<G> *collected_gradients) {
loss_chg = that.loss_chg;
sindex = that.sindex;
split_value = that.split_value;
is_cat = that.is_cat;
collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),
that.cat_bits.cend());
cat_bits_sizes->emplace_back(that.cat_bits.size());
static_assert(!std::is_trivially_copyable_v<GradientT>);
collected_gradients->insert(collected_gradients->end(), that.left_sum.cbegin(),
that.left_sum.cend());
collected_gradients->insert(collected_gradients->end(), that.right_sum.cbegin(),
that.right_sum.cend());
}
/*!\return feature index to split on */
[[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }
/*!\return whether missing value goes to left branch */

View File

@ -44,7 +44,7 @@ class GloablApproxBuilder {
protected:
TrainParam const *param_;
std::shared_ptr<common::ColumnSampler> col_sampler_;
HistEvaluator<CPUExpandEntry> evaluator_;
HistEvaluator evaluator_;
HistogramBuilder<CPUExpandEntry> histogram_builder_;
Context const *ctx_;
ObjInfo const *const task_;

View File

@ -13,6 +13,7 @@
#include <utility> // for move, swap
#include <vector> // for vector
#include "../collective/aggregator.h" // for GlobalSum
#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
#include "../collective/communicator.h" // for Operation
#include "../common/hist_util.h" // for HistogramCuts, HistCollection
@ -200,8 +201,8 @@ class MultiTargetHistBuilder {
}
}
CHECK(root_sum.CContiguous());
collective::Allreduce<collective::Operation::kSum>(
reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2);
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(root_sum.Values().data()),
root_sum.Size() * 2);
std::vector<MultiExpandEntry> nodes{best};
std::size_t i = 0;
@ -335,7 +336,7 @@ class HistBuilder {
common::Monitor *monitor_;
TrainParam const *param_;
std::shared_ptr<common::ColumnSampler> col_sampler_;
std::unique_ptr<HistEvaluator<CPUExpandEntry>> evaluator_;
std::unique_ptr<HistEvaluator> evaluator_;
std::vector<CommonRowPartitioner> partitioner_;
// back pointers to tree and data matrix
@ -354,7 +355,7 @@ class HistBuilder {
: monitor_{monitor},
param_{param},
col_sampler_{std::move(column_sampler)},
evaluator_{std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx, param, fmat->Info(),
evaluator_{std::make_unique<HistEvaluator>(ctx, param, fmat->Info(),
col_sampler_)},
p_last_fmat_(fmat),
histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
@ -395,8 +396,7 @@ class HistBuilder {
}
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed(), fmat->Info().IsColumnSplit());
evaluator_ = std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx_, this->param_, fmat->Info(),
col_sampler_);
evaluator_ = std::make_unique<HistEvaluator>(ctx_, this->param_, fmat->Info(), col_sampler_);
p_last_tree_ = p_tree;
monitor_->Stop(__func__);
}
@ -455,8 +455,7 @@ class HistBuilder {
for (auto const &grad : gpair_h) {
grad_stat.Add(grad.GetGrad(), grad.GetHess());
}
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&grad_stat),
2);
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&grad_stat), 2);
}
auto weight = evaluator_->InitRoot(GradStats{grad_stat});

View File

@ -20,7 +20,7 @@ namespace xgboost::tree {
DMLC_REGISTRY_FILE_TAG(updater_refresh);
/*! \brief pruner that prunes a tree after growing finishs */
/*! \brief pruner that prunes a tree after growing finishes */
class TreeRefresher : public TreeUpdater {
public:
explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {}

View File

@ -4,11 +4,18 @@ set -euo pipefail
source tests/buildkite/conftest.sh
echo "--- Build XGBoost JVM packages"
echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION}
echo "--- Build XGBoost JVM packages scala 2.13"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} "" "" "true"
echo "--- Stash XGBoost4J JARs"
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"

View File

@ -25,7 +25,7 @@ set -x
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.02
SPARK_VERSION=3.1.1
SPARK_VERSION=3.4.0
JDK_VERSION=8
if [[ -z ${BUILDKITE:-} ]]

View File

@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu22.04
ARG CUDA_VERSION_ARG
# Environment
@ -7,22 +7,21 @@ ENV DEBIAN_FRONTEND noninteractive
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \
apt-get install -y wget git python3 python3-pip software-properties-common \
apt-transport-https ca-certificates gnupg-agent && \
wget -nv -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
add-apt-repository -u 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main' && \
apt-get update && \
apt-get install -y llvm-15 clang-tidy-15 clang-15 libomp-15-dev && \
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
apt-get install -y cmake
# Set default clang-tidy version
RUN \
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-15 100 && \
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100
RUN \
apt-get install libgtest-dev libgmock-dev -y
# Install Python packages
RUN \
pip3 install pyyaml

View File

@ -1,5 +1,5 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu18.04
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
@ -9,7 +9,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
# Python
@ -25,7 +25,7 @@ RUN \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
pyspark cloudpickle cuda-python && \
pyspark>=3.4.0 cloudpickle cuda-python && \
mamba clean --all && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector

View File

@ -1,53 +0,0 @@
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
ARG CUDA_VERSION_ARG
ARG JDK_VERSION=8
ARG SPARK_VERSION=3.0.0
# Environment
ENV DEBIAN_FRONTEND noninteractive
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:openjdk-r/ppa && \
apt-get update && \
apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/pip install awscli && \
# Maven
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
# Spark
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
# Install Python packages
RUN \
pip install numpy scipy pandas scikit-learn
ENV GOSU_VERSION 1.10
# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true
# Set default JDK version
RUN update-java-alternatives -v -s java-1.$JDK_VERSION.0-openjdk-amd64
# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]

View File

@ -20,10 +20,14 @@ RUN \
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
# Spark
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
# Spark with scala 2.12
mkdir -p /opt/spark-scala-2.12 && \
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \
# Spark with scala 2.13
mkdir -p /opt/spark-scala-2.13 && \
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \
tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH

View File

@ -6,6 +6,7 @@ set -x
spark_version=$1
use_cuda=$2
gpu_arch=$3
use_scala213=$4
gpu_options=""
if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
@ -22,7 +23,13 @@ export RABIT_MOCK=ON
if [ "x$gpu_arch" != "x" ]; then
export GPU_ARCH_FLAG=$gpu_arch
fi
mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options
mvn_profile_string=""
if [ "x$use_scala213" != "x" ]; then
export mvn_profile_string="-Pdefault,scala-2.13"
fi
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
set +x
set +e

View File

@ -28,7 +28,7 @@ dependencies:
- llvmlite
- cffi
- pyarrow
- pyspark
- pyspark>=3.4.0
- cloudpickle
- pip:
- awscli

View File

@ -38,8 +38,6 @@ dependencies:
- protobuf
- cloudpickle
- modin
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
- pyspark>=3.3.1
- pyspark>=3.4.0
- pip:
- datatable

View File

@ -35,7 +35,7 @@ dependencies:
- py-ubjson
- cffi
- pyarrow
- pyspark
- pyspark>=3.4.0
- cloudpickle
- pip:
- sphinx_rtd_theme

View File

@ -19,6 +19,4 @@ dependencies:
- pytest
- hypothesis
- hatchling
- pip:
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
- https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
- pyspark>=3.4.0

View File

@ -6,37 +6,56 @@ set -x
# Initialize local Maven repository
./tests/ci_build/initialize_maven.sh
# Get version number of XGBoost4J and other auxiliary information
cd jvm-packages
jvm_packages_dir=`pwd`
# Get version number of XGBoost4J and other auxiliary information
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout)
maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout)
spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout)
scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout)
scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout)
# Install XGBoost4J JAR into local Maven repository
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
cd xgboost4j-tester
# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
# Run unit tests with XGBoost4J
mvn --no-transfer-progress package
# Run integration tests with XGBoost4J
java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar
# Run integration tests with XGBoost4J-Spark
if [ ! -z "$RUN_INTEGRATION_TEST" ]
then
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
cd $jvm_packages_dir/xgboost4j-tester
python3 get_iris.py
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model
cd $jvm_packages_dir
fi
# including maven profiles for different scala versions: 2.12 is the default at the moment.
for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
# Install XGBoost4J JAR into local Maven repository
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
cd xgboost4j-tester
# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
# Build package and unit tests with XGBoost4J
mvn --no-transfer-progress clean package
xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar"
# Run integration tests with XGBoost4J
java -jar $xgboost4j_tester_jar
# Run integration tests with XGBoost4J-Spark
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
# Changing directory so that we do not mix code and resulting files
cd target
if [[ "$scala_binary_version" == "2.12" ]]; then
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
elif [[ "$scala_binary_version" == "2.13" ]]; then
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
else
echo "Unexpected scala version: $scala_version ($scala_binary_version)."
fi
fi
cd $jvm_packages_dir
done
set +x
set +e

View File

@ -41,7 +41,7 @@ class ClangTidy(object):
def __init__(self, args):
self.cpp_lint = args.cpp
self.cuda_lint = args.cuda
self.use_dmlc_gtest = args.use_dmlc_gtest
self.use_dmlc_gtest: bool = args.use_dmlc_gtest
self.cuda_archs = args.cuda_archs.copy() if args.cuda_archs else []
if args.tidy_version:
@ -202,6 +202,7 @@ class ClangTidy(object):
cdb_file = os.path.join(self.cdb_path, 'compile_commands.json')
with open(cdb_file, 'r') as fd:
self.compile_commands = json.load(fd)
tidy_file = os.path.join(self.root_path, '.clang-tidy')
with open(tidy_file) as fd:
self.clang_tidy = yaml.safe_load(fd)
@ -276,16 +277,24 @@ right keywords?
print('clang-tidy is working.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run clang-tidy.')
parser.add_argument('--cpp', type=int, default=1)
parser.add_argument('--tidy-version', type=int, default=None,
help='Specify the version of preferred clang-tidy.')
parser.add_argument('--cuda', type=int, default=1)
parser.add_argument('--use-dmlc-gtest', type=int, default=1,
help='Whether to use gtest bundled in dmlc-core.')
parser.add_argument('--cuda-archs', action='append',
help='List of CUDA archs to build')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run clang-tidy.")
parser.add_argument("--cpp", type=int, default=1)
parser.add_argument(
"--tidy-version",
type=int,
default=None,
help="Specify the version of preferred clang-tidy.",
)
parser.add_argument("--cuda", type=int, default=1)
parser.add_argument(
"--use-dmlc-gtest",
action="store_true",
help="Whether to use gtest bundled in dmlc-core.",
)
parser.add_argument(
"--cuda-archs", action="append", help="List of CUDA archs to build"
)
args = parser.parse_args()
test_tidy(args)

View File

@ -497,6 +497,77 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
}
}
namespace {
auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
#if defined(XGBOOST_USE_CUDA)
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
#elif defined(XGBOOST_USE_HIP)
dh::safe_cuda(hipSetDevice(ctx->gpu_id));
#endif
auto n = n_samples * n_features;
std::vector<float> x;
x.resize(n);
std::iota(x.begin(), x.end(), 0);
std::int32_t c{0};
float missing = n_samples * n_features;
for (std::size_t i = 0; i < x.size(); ++i) {
if (i % 5 == 0) {
x[i] = missing;
c++;
}
}
thrust::device_vector<float> d_x;
d_x = x;
auto n_invalids = n / 10 * 2 + 1;
auto is_valid = data::IsValidFunctor{missing};
return std::tuple{x, d_x, n_invalids, is_valid};
}
void TestGetColumnSize(std::size_t n_samples) {
auto ctx = MakeCUDACtx(0);
bst_feature_t n_features = 12;
[[maybe_unused]] auto [x, d_x, n_invalids, is_valid] = MakeData(&ctx, n_samples, n_features);
auto adapter = AdapterFromData(d_x, n_samples, n_features);
auto batch = adapter.Value();
auto batch_iter = dh::MakeTransformIterator<data::COOTuple>(
thrust::make_counting_iterator(0llu),
[=] __device__(std::size_t idx) { return batch.GetElement(idx); });
dh::caching_device_vector<std::size_t> column_sizes_scan;
column_sizes_scan.resize(n_features + 1);
std::vector<std::size_t> h_column_size(column_sizes_scan.size());
std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
ASSERT_EQ(h_column_size, h_column_size_1);
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
ASSERT_EQ(h_column_size, h_column_size_1);
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
ASSERT_EQ(h_column_size, h_column_size_1);
}
} // namespace
TEST(HistUtil, GetColumnSize) {
bst_row_t n_samples = 4096;
TestGetColumnSize(n_samples);
}
// Check sketching from adapter or DMatrix results in the same answer
// Consistency here is useful for testing and user experience
TEST(HistUtil, SketchingEquivalent) {

View File

@ -56,7 +56,7 @@ void TestSketchUnique(float sparsity) {
thrust::make_counting_iterator(0llu),
[=] __device__(size_t idx) { return batch.GetElement(idx); });
auto end = kCols * kRows;
detail::GetColumnSizesScan(0, kCols, n_cuts, batch_iter, is_valid, 0, end,
detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
&cut_sizes_scan, &column_sizes_scan);
auto const& cut_sizes = cut_sizes_scan.HostVector();
ASSERT_LE(sketch.Data().size(), cut_sizes.back());

View File

@ -1,15 +1,16 @@
/*!
* Copyright 2018 XGBoost contributors
/**
* Copyright 2018-2023, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <vector>
#include <xgboost/span.h>
#include "test_span.h"
namespace xgboost {
namespace common {
#include <gtest/gtest.h>
#include <xgboost/span.h>
#include <vector>
#include "../../../src/common/transform_iterator.h" // for MakeIndexTransformIter
namespace xgboost::common {
TEST(Span, TestStatus) {
int status = 1;
TestTestStatus {&status}();
@ -526,5 +527,17 @@ TEST(SpanDeathTest, Empty) {
Span<float> s{data.data(), static_cast<Span<float>::index_type>(0)};
EXPECT_DEATH(s[0], ""); // not ok to use it.
}
} // namespace common
} // namespace xgboost
TEST(IterSpan, Basic) {
auto iter = common::MakeIndexTransformIter([](std::size_t i) { return i; });
std::size_t n = 13;
auto span = IterSpan{iter, n};
ASSERT_EQ(span.size(), n);
for (std::size_t i = 0; i < n; ++i) {
ASSERT_EQ(span[i], i);
}
ASSERT_EQ(span.subspan(1).size(), n - 1);
ASSERT_EQ(span.subspan(1)[0], 1);
ASSERT_EQ(span.subspan(1, 2)[1], 2);
}
} // namespace xgboost::common

View File

@ -62,3 +62,22 @@ void TestCudfAdapter()
TEST(DeviceAdapter, CudfAdapter) {
TestCudfAdapter();
}
namespace xgboost::data {
TEST(DeviceAdapter, GetRowCounts) {
auto ctx = MakeCUDACtx(0);
for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {
HostDeviceVector<float> storage;
auto str_arr = RandomDataGenerator{8192, n_features, 0.0}
.Device(ctx.gpu_id)
.GenerateArrayInterface(&storage);
auto adapter = CupyAdapter{str_arr};
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
offset.SetDevice(ctx.gpu_id);
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id,
std::numeric_limits<float>::quiet_NaN());
ASSERT_EQ(rstride, n_features);
}
}
} // namespace xgboost::data

View File

@ -23,6 +23,7 @@
#include "../../src/collective/communicator-inl.h"
#include "../../src/common/common.h"
#include "../../src/common/threading_utils.h"
#include "../../src/data/array_interface.h"
#include "filesystem.h" // dmlc::TemporaryDirectory
#include "xgboost/linalg.h"
@ -388,6 +389,23 @@ inline Context CreateEmptyGenericParam(int gpu_id) {
return tparam;
}
inline std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(
std::size_t rows, bst_target_t n_targets = 1) {
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows * n_targets);
auto& h_gradients = p_gradients->HostVector();
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
for (std::size_t i = 0; i < rows * n_targets; ++i) {
auto grad = dist(&gen);
auto hess = dist(&gen);
h_gradients[i] = GradientPair{grad, hess};
}
return p_gradients;
}
/**
* \brief Make a context that uses CUDA.
*/
@ -509,11 +527,7 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&
xgboost::collective::Finalize();
};
#if defined(_OPENMP)
#pragma omp parallel num_threads(world_size)
{
auto rank = omp_get_thread_num();
run(rank);
}
common::ParallelFor(world_size, world_size, run);
#else
std::vector<std::thread> threads;
for (auto rank = 0; rank < world_size; rank++) {

View File

@ -13,6 +13,7 @@
#include "../../../plugin/federated/federated_server.h"
#include "../../../src/collective/communicator-inl.h"
#include "../../../src/common/threading_utils.h"
namespace xgboost {
@ -75,11 +76,7 @@ void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_
xgboost::collective::Finalize();
};
#if defined(_OPENMP)
#pragma omp parallel num_threads(world_size)
{
auto rank = omp_get_thread_num();
run(rank);
}
common::ParallelFor(world_size, world_size, run);
#else
std::vector<std::thread> threads;
for (auto rank = 0; rank < world_size; rank++) {

View File

@ -15,9 +15,9 @@
namespace xgboost {
namespace {
auto MakeModel(std::string objective, std::shared_ptr<DMatrix> dmat) {
auto MakeModel(std::string tree_method, std::string objective, std::shared_ptr<DMatrix> dmat) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParam("tree_method", "approx");
learner->SetParam("tree_method", tree_method);
learner->SetParam("objective", objective);
if (objective.find("quantile") != std::string::npos) {
learner->SetParam("quantile_alpha", "0.5");
@ -35,7 +35,7 @@ auto MakeModel(std::string objective, std::shared_ptr<DMatrix> dmat) {
}
void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json expected_model,
std::string objective) {
std::string tree_method, std::string objective) {
auto const world_size = collective::GetWorldSize();
auto const rank = collective::GetRank();
std::shared_ptr<DMatrix> dmat{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)};
@ -61,7 +61,7 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e
}
std::shared_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
auto model = MakeModel(objective, sliced);
auto model = MakeModel(tree_method, objective, sliced);
auto base_score = GetBaseScore(model);
ASSERT_EQ(base_score, expected_base_score);
ASSERT_EQ(model, expected_model);
@ -76,7 +76,7 @@ class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
void SetUp() override { server_ = std::make_unique<ServerForTest>(kWorldSize); }
void TearDown() override { server_.reset(nullptr); }
void Run(std::string objective) {
void Run(std::string tree_method, std::string objective) {
static auto constexpr kRows{16};
static auto constexpr kCols{16};
@ -99,17 +99,22 @@ class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
}
}
auto model = MakeModel(objective, dmat);
auto model = MakeModel(tree_method, objective, dmat);
auto score = GetBaseScore(model);
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyObjective, kRows, kCols,
score, model, objective);
score, model, tree_method, objective);
}
};
TEST_P(FederatedLearnerTest, Objective) {
TEST_P(FederatedLearnerTest, Approx) {
std::string objective = GetParam();
this->Run(objective);
this->Run("approx", objective);
}
TEST_P(FederatedLearnerTest, Hist) {
std::string objective = GetParam();
this->Run("hist", objective);
}
INSTANTIATE_TEST_SUITE_P(FederatedLearnerObjective, FederatedLearnerTest,

View File

@ -33,7 +33,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
auto evaluator = HistEvaluator{&ctx, &param, dmat->Info(), sampler};
common::HistCollection hist;
std::vector<GradientPair> row_gpairs = {
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
@ -167,7 +167,7 @@ TEST(HistEvaluator, Apply) {
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
auto evaluator_ = HistEvaluator{&ctx, &param, dmat->Info(), sampler};
CPUExpandEntry entry{0, 0};
entry.split.loss_chg = 10.0f;
@ -195,7 +195,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
// check the evaluator is returning the optimal split
std::vector<FeatureType> ft{FeatureType::kCategorical};
auto sampler = std::make_shared<common::ColumnSampler>();
HistEvaluator<CPUExpandEntry> evaluator{&ctx, &param_, info_, sampler};
HistEvaluator evaluator{&ctx, &param_, info_, sampler};
evaluator.InitRoot(GradStats{total_gpair_});
RegTree tree;
std::vector<CPUExpandEntry> entries(1);
@ -225,7 +225,7 @@ auto CompareOneHotAndPartition(bool onehot) {
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
auto sampler = std::make_shared<common::ColumnSampler>();
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param, dmat->Info(), sampler};
auto evaluator = HistEvaluator{&ctx, &param, dmat->Info(), sampler};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
@ -276,7 +276,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
info.num_col_ = 1;
info.feature_types = {FeatureType::kCategorical};
Context ctx;
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, &param_, info, sampler};
auto evaluator = HistEvaluator{&ctx, &param_, info, sampler};
evaluator.InitRoot(GradStats{parent_sum_});
std::vector<CPUExpandEntry> entries(1);

View File

@ -79,7 +79,7 @@ TEST(CPUMonoConstraint, Basic) {
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
auto sampler = std::make_shared<common::ColumnSampler>();
HistEvaluator<CPUExpandEntry> evalutor{&ctx, &param, Xy->Info(), sampler};
HistEvaluator evalutor{&ctx, &param, Xy->Info(), sampler};
evalutor.InitRoot(GradStats{2.0, 2.0});
SplitEntry split;

View File

@ -9,28 +9,20 @@
#include "../helpers.h"
namespace xgboost::tree {
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
}
std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(std::size_t rows) {
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows);
auto& h_gradients = p_gradients->HostVector();
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
for (std::size_t i = 0; i < rows; ++i) {
auto grad = dist(&gen);
auto hess = dist(&gen);
h_gradients[i] = GradientPair{grad, hess};
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols,
bool categorical = false) {
if (categorical) {
std::vector<FeatureType> ft(cols);
for (size_t i = 0; i < ft.size(); ++i) {
ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
}
return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
} else {
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
}
return p_gradients;
}
TEST(GrowHistMaker, InteractionConstraint)
{
TEST(GrowHistMaker, InteractionConstraint) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
auto p_dmat = GenerateDMatrix(kRows, kCols);
@ -74,8 +66,9 @@ TEST(GrowHistMaker, InteractionConstraint)
}
namespace {
void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) {
auto p_dmat = GenerateDMatrix(rows, cols);
void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
RegTree const& expected_tree) {
auto p_dmat = GenerateDMatrix(rows, cols, categorical);
auto p_gradients = GenerateGradients(rows);
Context ctx;
ObjInfo task{ObjInfo::kRegression};
@ -90,27 +83,21 @@ void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_t
param.Init(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
ASSERT_EQ(tree.NumExtraNodes(), 10);
ASSERT_EQ(tree[0].SplitIndex(), 1);
ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
FeatureMap fmap;
auto json = tree.DumpModel(fmap, false, "json");
auto expected_json = expected_tree.DumpModel(fmap, false, "json");
Json json{Object{}};
tree.SaveModel(&json);
Json expected_json{Object{}};
expected_tree.SaveModel(&expected_json);
ASSERT_EQ(json, expected_json);
}
} // anonymous namespace
TEST(GrowHistMaker, ColumnSplit) {
void TestColumnSplit(bool categorical) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
RegTree expected_tree{1u, kCols};
ObjInfo task{ObjInfo::kRegression};
{
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
auto p_gradients = GenerateGradients(kRows);
Context ctx;
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
@ -121,6 +108,12 @@ TEST(GrowHistMaker, ColumnSplit) {
}
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, kRows, kCols, categorical,
std::cref(expected_tree));
}
} // anonymous namespace
TEST(GrowHistMaker, ColumnSplitNumerical) { TestColumnSplit(false); }
TEST(GrowHistMaker, ColumnSplitCategorical) { TestColumnSplit(true); }
} // namespace xgboost::tree

View File

@ -113,7 +113,6 @@ void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,
for (auto const& page : Xy->GetBatches<SparsePage>()) {
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
bst_feature_t const split_ind = 0;
common::ColumnMatrix column_indices;
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
{
@ -194,11 +193,65 @@ void TestColumnSplitPartitioner(bst_target_t n_targets) {
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner<ExpandEntry>, n_targets,
n_samples, n_features, base_rowid, Xy, min_value, mid_value, mid_partitioner);
n_samples, n_features, base_rowid, Xy, min_value, mid_value,
mid_partitioner);
}
} // anonymous namespace
TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEntry>(1); }
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
namespace {
void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, bst_target_t n_targets,
RegTree const& expected_tree) {
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
auto p_gradients = GenerateGradients(rows, n_targets);
Context ctx;
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
RegTree tree{n_targets, cols};
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), sliced.get(), position, {&tree});
Json json{Object{}};
tree.SaveModel(&json);
Json expected_json{Object{}};
expected_tree.SaveModel(&expected_json);
ASSERT_EQ(json, expected_json);
}
void TestColumnSplit(bst_target_t n_targets) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
RegTree expected_tree{n_targets, kCols};
ObjInfo task{ObjInfo::kRegression};
{
auto Xy = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
auto p_gradients = GenerateGradients(kRows, n_targets);
Context ctx;
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Update(&param, p_gradients.get(), Xy.get(), position, {&expected_tree});
}
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, kRows, kCols, n_targets,
std::cref(expected_tree));
}
} // anonymous namespace
TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); }
TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); }
} // namespace xgboost::tree