sync Jun 1
This commit is contained in:
commit
42867a4805
10
.github/workflows/jvm_tests.yml
vendored
10
.github/workflows/jvm_tests.yml
vendored
@ -75,3 +75,13 @@ jobs:
|
||||
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
|
||||
env:
|
||||
RABIT_MOCK: ON
|
||||
|
||||
|
||||
- name: Build and Test XGBoost4J with scala 2.13
|
||||
run: |
|
||||
rm -rfv build/
|
||||
cd jvm-packages
|
||||
mvn -B clean install test -Pdefault,scala-2.13
|
||||
if: matrix.os == 'ubuntu-latest' # Distributed training doesn't work on Windows
|
||||
env:
|
||||
RABIT_MOCK: ON
|
||||
|
||||
@ -171,6 +171,8 @@ if (USE_CUDA)
|
||||
set(GEN_CODE "")
|
||||
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
endif (USE_CUDA)
|
||||
|
||||
if (USE_HIP)
|
||||
|
||||
@ -124,13 +124,6 @@ function(format_gencode_flags flags out)
|
||||
endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
||||
endfunction(format_gencode_flags flags)
|
||||
|
||||
macro(enable_nvtx target)
|
||||
find_package(NVTX REQUIRED)
|
||||
target_include_directories(${target} PRIVATE "${NVTX_INCLUDE_DIR}")
|
||||
target_link_libraries(${target} PRIVATE "${NVTX_LIBRARY}")
|
||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
|
||||
endmacro()
|
||||
|
||||
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
|
||||
function(xgboost_set_cuda_flags target)
|
||||
target_compile_options(${target} PRIVATE
|
||||
@ -162,11 +155,14 @@ function(xgboost_set_cuda_flags target)
|
||||
endif (USE_DEVICE_DEBUG)
|
||||
|
||||
if (USE_NVTX)
|
||||
enable_nvtx(${target})
|
||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
|
||||
endif (USE_NVTX)
|
||||
|
||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
|
||||
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
|
||||
target_include_directories(
|
||||
${target} PRIVATE
|
||||
${xgboost_SOURCE_DIR}/gputreeshap
|
||||
${CUDAToolkit_INCLUDE_DIRS})
|
||||
|
||||
if (MSVC)
|
||||
target_compile_options(${target} PRIVATE
|
||||
@ -314,7 +310,7 @@ macro(xgboost_target_link_libraries target)
|
||||
endif (USE_NCCL)
|
||||
|
||||
if (USE_NVTX)
|
||||
enable_nvtx(${target})
|
||||
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
||||
endif (USE_NVTX)
|
||||
|
||||
if (RABIT_BUILD_MPI)
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
if (NVTX_LIBRARY)
|
||||
unset(NVTX_LIBRARY CACHE)
|
||||
endif (NVTX_LIBRARY)
|
||||
|
||||
set(NVTX_LIB_NAME nvToolsExt)
|
||||
|
||||
|
||||
find_path(NVTX_INCLUDE_DIR
|
||||
NAMES nvToolsExt.h
|
||||
PATHS ${CUDA_HOME}/include ${CUDA_INCLUDE} /usr/local/cuda/include)
|
||||
|
||||
|
||||
find_library(NVTX_LIBRARY
|
||||
NAMES nvToolsExt
|
||||
PATHS ${CUDA_HOME}/lib64 /usr/local/cuda/lib64)
|
||||
|
||||
message(STATUS "Using nvtx library: ${NVTX_LIBRARY}")
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(NVTX DEFAULT_MSG
|
||||
NVTX_INCLUDE_DIR NVTX_LIBRARY)
|
||||
|
||||
mark_as_advanced(
|
||||
NVTX_INCLUDE_DIR
|
||||
NVTX_LIBRARY
|
||||
)
|
||||
@ -38,19 +38,18 @@ def using_dask_matrix(client: Client, X, y):
|
||||
|
||||
|
||||
def using_quantile_device_dmatrix(client: Client, X, y):
|
||||
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist`, tree
|
||||
method that reduces memory overhead. When training on GPU pipeline, it's
|
||||
preferred over `DaskDMatrix`.
|
||||
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist` and `hist` tree
|
||||
methods for reducing memory usage.
|
||||
|
||||
.. versionadded:: 1.2.0
|
||||
|
||||
"""
|
||||
# Input must be on GPU for `DaskQuantileDMatrix`.
|
||||
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X))
|
||||
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y))
|
||||
|
||||
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful
|
||||
# that it can not be used for anything else other than training.
|
||||
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
|
||||
# be used for anything else other than training unless a reference is specified. See
|
||||
# the `ref` argument of `DaskQuantileDMatrix`.
|
||||
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
|
||||
output = xgb.dask.train(
|
||||
client, {"verbosity": 2, "tree_method": "gpu_hist"}, dtrain, num_boost_round=4
|
||||
|
||||
@ -27,20 +27,29 @@ In the following two sections, we will provide a step by step walk through of im
|
||||
the ``Squared Log Error (SLE)`` objective function:
|
||||
|
||||
.. math::
|
||||
\frac{1}{2}[log(pred + 1) - log(label + 1)]^2
|
||||
\frac{1}{2}[\log(pred + 1) - \log(label + 1)]^2
|
||||
|
||||
and its default metric ``Root Mean Squared Log Error(RMSLE)``:
|
||||
|
||||
.. math::
|
||||
\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}
|
||||
\sqrt{\frac{1}{N}[\log(pred + 1) - \log(label + 1)]^2}
|
||||
|
||||
Although XGBoost has native support for said functions, using it for demonstration
|
||||
provides us the opportunity of comparing the result from our own implementation and the
|
||||
one from XGBoost internal for learning purposes. After finishing this tutorial, we should
|
||||
be able to provide our own functions for rapid experiments. And at the end, we will
|
||||
provide some notes on non-identy link function along with examples of using custom metric
|
||||
and objective with `scikit-learn` interface.
|
||||
with scikit-learn interface.
|
||||
and objective with the `scikit-learn` interface.
|
||||
|
||||
If we compute the gradient of said objective function:
|
||||
|
||||
.. math::
|
||||
g = \frac{\partial{objective}}{\partial{pred}} = \frac{\log(pred + 1) - \log(label + 1)}{pred + 1}
|
||||
|
||||
As well as the hessian (the second derivative of the objective):
|
||||
|
||||
.. math::
|
||||
h = \frac{\partial^2{objective}}{\partial{pred}} = \frac{ - \log(pred + 1) + \log(label + 1) + 1}{(pred + 1)^2}
|
||||
|
||||
*****************************
|
||||
Customized Objective Function
|
||||
|
||||
@ -519,6 +519,9 @@ Troubleshooting
|
||||
the ``NCCL_SOCKET_IFNAME``. In addition, you can use ``NCCL_DEBUG`` to obtain debug
|
||||
logs.
|
||||
|
||||
- If NCCL fails to initialize in a container environment, it might be caused by limited
|
||||
system shared memory. With docker, one can try the flag: `--shm-size=4g`.
|
||||
|
||||
- MIG (Multi-Instance GPU) is not yet supported by NCCL. You will receive an error message
|
||||
that includes `Multiple processes within a communication group ...` upon initialization.
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2018 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2018-2023, XGBoost contributors
|
||||
* \brief span class based on ISO++20 span
|
||||
*
|
||||
* About NOLINTs in this file:
|
||||
@ -33,20 +33,16 @@
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <cinttypes> // size_t
|
||||
#include <limits> // numeric_limits
|
||||
#include <iterator>
|
||||
#include <type_traits>
|
||||
#include <cstdio>
|
||||
#include <iterator>
|
||||
#include <limits> // numeric_limits
|
||||
#include <type_traits>
|
||||
#include <utility> // for move
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
#include <cuda_runtime.h>
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
extern "C" void __assert_fail (const char *__assertion, const char *__file,
|
||||
unsigned int __line, const char *__function)
|
||||
noexcept (true) __attribute__ ((__noreturn__));
|
||||
|
||||
#endif
|
||||
|
||||
/*!
|
||||
@ -127,7 +123,7 @@ namespace common {
|
||||
|
||||
#define __ASSERT_STR_HELPER(x) #x
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
#define HIP_KERNEL_CHECK(cond) \
|
||||
(XGBOOST_EXPECT((cond), true) \
|
||||
? static_cast<void>(0) \
|
||||
@ -710,6 +706,44 @@ XGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept -> // NOLIN
|
||||
Span<byte, detail::ExtentAsBytesValue<T, E>::value> {
|
||||
return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief A simple custom Span type that uses general iterator instead of pointer.
|
||||
*/
|
||||
template <typename It>
|
||||
class IterSpan {
|
||||
public:
|
||||
using value_type = typename std::iterator_traits<It>::value_type; // NOLINT
|
||||
using index_type = std::size_t; // NOLINT
|
||||
using iterator = It; // NOLINT
|
||||
|
||||
private:
|
||||
It it_;
|
||||
index_type size_{0};
|
||||
|
||||
public:
|
||||
IterSpan() = default;
|
||||
XGBOOST_DEVICE IterSpan(It it, index_type size) : it_{std::move(it)}, size_{size} {}
|
||||
XGBOOST_DEVICE explicit IterSpan(common::Span<It, dynamic_extent> span)
|
||||
: it_{span.data()}, size_{span.size()} {}
|
||||
|
||||
[[nodiscard]] XGBOOST_DEVICE index_type size() const noexcept { return size_; } // NOLINT
|
||||
[[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) const { return it_[i]; }
|
||||
[[nodiscard]] XGBOOST_DEVICE decltype(auto) operator[](index_type i) { return it_[i]; }
|
||||
[[nodiscard]] XGBOOST_DEVICE bool empty() const noexcept { return size() == 0; } // NOLINT
|
||||
[[nodiscard]] XGBOOST_DEVICE It data() const noexcept { return it_; } // NOLINT
|
||||
[[nodiscard]] XGBOOST_DEVICE IterSpan<It> subspan( // NOLINT
|
||||
index_type _offset, index_type _count = dynamic_extent) const {
|
||||
SPAN_CHECK((_count == dynamic_extent) ? (_offset <= size()) : (_offset + _count <= size()));
|
||||
return {data() + _offset, _count == dynamic_extent ? size() - _offset : _count};
|
||||
}
|
||||
[[nodiscard]] XGBOOST_DEVICE constexpr iterator begin() const noexcept { // NOLINT
|
||||
return {this, 0};
|
||||
}
|
||||
[[nodiscard]] XGBOOST_DEVICE constexpr iterator end() const noexcept { // NOLINT
|
||||
return {this, size()};
|
||||
}
|
||||
};
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
|
||||
2
jvm-packages/.gitignore
vendored
2
jvm-packages/.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
tracker.py
|
||||
build.sh
|
||||
xgboost4j-tester/pom.xml
|
||||
xgboost4j-tester/iris.csv
|
||||
|
||||
@ -36,6 +36,19 @@ XGBoost4J, XGBoost4J-Spark, etc. in maven repository is compiled with g++-4.8.5.
|
||||
<version>latest_version_num</version>
|
||||
</dependency>
|
||||
```
|
||||
or
|
||||
```
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_2.13</artifactId>
|
||||
<version>latest_version_num</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_2.13</artifactId>
|
||||
<version>latest_version_num</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
<b>sbt</b>
|
||||
```sbt
|
||||
@ -47,7 +60,6 @@ libraryDependencies ++= Seq(
|
||||
|
||||
For the latest release version number, please check [here](https://github.com/dmlc/xgboost/releases).
|
||||
|
||||
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
|
||||
|
||||
### Access SNAPSHOT version
|
||||
|
||||
@ -85,6 +97,19 @@ Then add XGBoost4J as a dependency:
|
||||
<version>latest_version_num-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
or with scala 2.13
|
||||
```
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_2.13</artifactId>
|
||||
<version>latest_version_num-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_2.13</artifactId>
|
||||
<version>latest_version_num-SNAPSHOT</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
<b>sbt</b>
|
||||
```sbt
|
||||
@ -96,7 +121,9 @@ libraryDependencies ++= Seq(
|
||||
|
||||
For the latest release version number, please check [the repository listing](https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/list.html).
|
||||
|
||||
### GPU algorithm
|
||||
To enable the GPU algorithm (`tree_method='gpu_hist'`), use artifacts `xgboost4j-gpu_2.12` and `xgboost4j-spark-gpu_2.12` instead.
|
||||
Note that scala 2.13 is not supported by the [NVIDIA/spark-rapids#1525](https://github.com/NVIDIA/spark-rapids/issues/1525) yet, so the GPU algorithm can only be used with scala 2.12.
|
||||
|
||||
## Examples
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<artifactId>xgboost-jvm</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>XGBoost JVM Package</name>
|
||||
@ -33,7 +33,8 @@
|
||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<flink.version>1.17.0</flink.version>
|
||||
<flink.version>1.17.1</flink.version>
|
||||
<junit.version>4.13.2</junit.version>
|
||||
<spark.version>3.4.0</spark.version>
|
||||
<spark.version.gpu>3.3.2</spark.version.gpu>
|
||||
<scala.version>2.12.17</scala.version>
|
||||
@ -45,6 +46,8 @@
|
||||
<cudf.version>23.04.0</cudf.version>
|
||||
<spark.rapids.version>23.04.1</spark.rapids.version>
|
||||
<cudf.classifier>cuda11</cudf.classifier>
|
||||
<scalatest.version>3.2.16</scalatest.version>
|
||||
<scala-collection-compat.version>2.10.0</scala-collection-compat.version>
|
||||
</properties>
|
||||
<repositories>
|
||||
<repository>
|
||||
@ -71,6 +74,14 @@
|
||||
</modules>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>scala-2.13</id>
|
||||
<properties>
|
||||
<scala.binary.version>2.13</scala.binary.version>
|
||||
<scala.version>2.13.10</scala.version>
|
||||
</properties>
|
||||
</profile>
|
||||
|
||||
<!-- gpu profile with both cpu and gpu test suites -->
|
||||
<profile>
|
||||
<id>gpu</id>
|
||||
@ -451,7 +462,7 @@
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||
<version>3.4.3</version>
|
||||
<version>3.4.4</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
@ -467,6 +478,7 @@
|
||||
</plugins>
|
||||
</reporting>
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
@ -483,6 +495,11 @@
|
||||
<artifactId>scala-library</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scala-lang.modules</groupId>
|
||||
<artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
|
||||
<version>${scala-collection-compat.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
@ -491,13 +508,13 @@
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
||||
<version>3.2.16</version>
|
||||
<version>${scalatest.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scalactic</groupId>
|
||||
<artifactId>scalactic_${scala.binary.version}</artifactId>
|
||||
<version>3.2.15</version>
|
||||
<version>${scalatest.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
@ -5,10 +5,11 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<artifactId>xgboost-jvm</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||
<name>xgboost4j-example</name>
|
||||
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
<build>
|
||||
|
||||
@ -73,12 +73,13 @@ object DistTrainWithFlink {
|
||||
.map(_.f1.f0)
|
||||
.returns(testDataTypeHint)
|
||||
|
||||
val paramMap = mapAsJavaMap(Map(
|
||||
val paramMap = Map(
|
||||
("eta", "0.1".asInstanceOf[AnyRef]),
|
||||
("max_depth", "2"),
|
||||
("objective", "binary:logistic"),
|
||||
("verbosity", "1")
|
||||
))
|
||||
)
|
||||
.asJava
|
||||
|
||||
// number of iterations
|
||||
val round = 2
|
||||
|
||||
@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel}
|
||||
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
|
||||
import org.apache.spark.ml.feature._
|
||||
import org.apache.spark.ml.tuning._
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||
import org.apache.spark.sql.types._
|
||||
|
||||
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel}
|
||||
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}
|
||||
|
||||
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
|
||||
|
||||
@ -50,6 +49,13 @@ object SparkMLlibPipeline {
|
||||
.appName("XGBoost4J-Spark Pipeline Example")
|
||||
.getOrCreate()
|
||||
|
||||
run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers)
|
||||
.show(false)
|
||||
}
|
||||
private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String,
|
||||
pipelineModelPath: String, treeMethod: String,
|
||||
numWorkers: Int): DataFrame = {
|
||||
|
||||
// Load dataset
|
||||
val schema = new StructType(Array(
|
||||
StructField("sepal length", DoubleType, true),
|
||||
@ -90,11 +96,11 @@ object SparkMLlibPipeline {
|
||||
val labelConverter = new IndexToString()
|
||||
.setInputCol("prediction")
|
||||
.setOutputCol("realLabel")
|
||||
.setLabels(labelIndexer.labels)
|
||||
.setLabels(labelIndexer.labelsArray(0))
|
||||
|
||||
val pipeline = new Pipeline()
|
||||
.setStages(Array(assembler, labelIndexer, booster, labelConverter))
|
||||
val model = pipeline.fit(training)
|
||||
val model: PipelineModel = pipeline.fit(training)
|
||||
|
||||
// Batch prediction
|
||||
val prediction = model.transform(test)
|
||||
@ -136,6 +142,6 @@ object SparkMLlibPipeline {
|
||||
|
||||
// Load a saved model and serving
|
||||
val model2 = PipelineModel.load(pipelineModelPath)
|
||||
model2.transform(test).show(false)
|
||||
model2.transform(test)
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,9 +17,8 @@
|
||||
package ml.dmlc.xgboost4j.scala.example.spark
|
||||
|
||||
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
|
||||
|
||||
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.apache.spark.sql.{DataFrame, SparkSession}
|
||||
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
|
||||
|
||||
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
|
||||
@ -38,6 +37,12 @@ object SparkTraining {
|
||||
|
||||
val spark = SparkSession.builder().getOrCreate()
|
||||
val inputPath = args(0)
|
||||
val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers)
|
||||
results.show()
|
||||
}
|
||||
|
||||
private[spark] def run(spark: SparkSession, inputPath: String,
|
||||
treeMethod: String, numWorkers: Int): DataFrame = {
|
||||
val schema = new StructType(Array(
|
||||
StructField("sepal length", DoubleType, true),
|
||||
StructField("sepal width", DoubleType, true),
|
||||
@ -81,7 +86,6 @@ object SparkTraining {
|
||||
setFeaturesCol("features").
|
||||
setLabelCol("classIndex")
|
||||
val xgbClassificationModel = xgbClassifier.fit(train)
|
||||
val results = xgbClassificationModel.transform(test)
|
||||
results.show()
|
||||
xgbClassificationModel.transform(test)
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright (c) 2014-2023 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
package ml.dmlc.xgboost4j.scala.example.spark
|
||||
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.scalatest.BeforeAndAfterAll
|
||||
import org.scalatest.funsuite.AnyFunSuite
|
||||
import org.slf4j.LoggerFactory
|
||||
|
||||
import java.io.File
|
||||
import java.nio.file.{Files, StandardOpenOption}
|
||||
import scala.jdk.CollectionConverters._
|
||||
import scala.util.{Random, Try}
|
||||
|
||||
class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll {
|
||||
private val logger = LoggerFactory.getLogger(classOf[SparkExamplesTest])
|
||||
private val random = new Random(42)
|
||||
protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4)
|
||||
|
||||
private val pathToTestDataset = Files.createTempFile("", "iris.csv").toAbsolutePath
|
||||
private var spark: SparkSession = _
|
||||
|
||||
override def beforeAll(): Unit = {
|
||||
|
||||
def generateLine(i: Int): String = {
|
||||
val getIrisName = (int: Int) => {
|
||||
int % 3 match {
|
||||
case 0 => "Iris-versicolor"
|
||||
case 1 => "Iris-virginica"
|
||||
case 2 => "Iris-setosa"
|
||||
}
|
||||
}
|
||||
val generateValue = () => Math.abs(random.nextInt(99) * 0.1)
|
||||
val sepalLength = generateValue()
|
||||
val sepalWidth = generateValue()
|
||||
val petalLength = generateValue()
|
||||
val petalWidth = generateValue()
|
||||
val irisName = getIrisName(Math.abs(random.nextInt()) + i)
|
||||
s"$sepalLength,$sepalWidth,$petalLength,$petalWidth,$irisName"
|
||||
}
|
||||
|
||||
if (spark == null) {
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName("XGBoost4J-Spark Pipeline Example")
|
||||
.master(s"local[${numWorkers}]")
|
||||
.config("spark.ui.enabled", value = false)
|
||||
.config("spark.driver.memory", "512m")
|
||||
.config("spark.barrier.sync.timeout", 10)
|
||||
.config("spark.task.cpus", 1)
|
||||
.getOrCreate()
|
||||
spark.sparkContext.setLogLevel("ERROR")
|
||||
}
|
||||
val data = (0 until 150)
|
||||
.map(i => generateLine(i))
|
||||
.toList
|
||||
.asJava
|
||||
Files.write(pathToTestDataset,
|
||||
data,
|
||||
StandardOpenOption.CREATE,
|
||||
StandardOpenOption.WRITE,
|
||||
StandardOpenOption.TRUNCATE_EXISTING)
|
||||
logger.info(s"${new String(Files.readAllBytes(pathToTestDataset))}")
|
||||
|
||||
}
|
||||
|
||||
override def afterAll(): Unit = {
|
||||
if (spark != null) {
|
||||
spark.stop()
|
||||
cleanExternalCache(spark.sparkContext.appName)
|
||||
spark = null
|
||||
}
|
||||
|
||||
Try(Files.deleteIfExists(pathToTestDataset))
|
||||
.recover {
|
||||
case e =>
|
||||
logger.warn(
|
||||
s"Could not delete temporary file $pathToTestDataset. Please, remove it manually",
|
||||
e
|
||||
)
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
private def cleanExternalCache(prefix: String): Unit = {
|
||||
val dir = new File(".")
|
||||
for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {
|
||||
file.delete()
|
||||
}
|
||||
}
|
||||
|
||||
test("Smoke test for SparkMLlibPipeline example") {
|
||||
SparkMLlibPipeline.run(spark, pathToTestDataset.toString, "target/native-model",
|
||||
"target/pipeline-model", "auto", 2)
|
||||
}
|
||||
|
||||
test("Smoke test for SparkTraining example") {
|
||||
val spark = SparkSession
|
||||
.builder()
|
||||
.appName("XGBoost4J-Spark Pipeline Example")
|
||||
.master(s"local[${numWorkers}]")
|
||||
.config("spark.ui.enabled", value = false)
|
||||
.config("spark.driver.memory", "512m")
|
||||
.config("spark.barrier.sync.timeout", 10)
|
||||
.config("spark.task.cpus", 1)
|
||||
.getOrCreate()
|
||||
|
||||
SparkTraining.run(spark, pathToTestDataset.toString, "auto", 2)
|
||||
}
|
||||
}
|
||||
@ -5,9 +5,11 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<artifactId>xgboost-jvm</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<name>xgboost4j-flink</name>
|
||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<properties>
|
||||
|
||||
@ -5,10 +5,11 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<artifactId>xgboost-jvm</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||
<name>xgboost4j-gpu</name>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
@ -35,13 +36,13 @@
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.2</version>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
||||
<version>3.2.15</version>
|
||||
<version>${scalatest.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
||||
@ -5,10 +5,11 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<artifactId>xgboost-jvm</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||
<name>xgboost4j-spark-gpu</name>
|
||||
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@ -24,7 +25,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@ -5,10 +5,11 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<artifactId>xgboost-jvm</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||
<name>xgboost4j-spark</name>
|
||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@ -24,7 +25,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@ -8,25 +8,28 @@ pom_template = """
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-tester_2.12</artifactId>
|
||||
<artifactId>xgboost4j-tester_{scala_binary_version}</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<name>xgboost4j-tester_2.12</name>
|
||||
<name>xgboost4j-tester</name>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>{maven_compiler_source}</maven.compiler.source>
|
||||
<maven.compiler.target>{maven_compiler_target}</maven.compiler.target>
|
||||
<junit.version>4.13.2</junit.version>
|
||||
<spark.version>{spark_version}</spark.version>
|
||||
<scala.version>{scala_version}</scala.version>
|
||||
<scalatest.version>3.2.15</scalatest.version>
|
||||
<scala.binary.version>{scala_binary_version}</scala.binary.version>
|
||||
<kryo.version>5.5.0</kryo.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
<version>4.0.2</version>
|
||||
<version>${{kryo.version}}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
@ -48,29 +51,12 @@ pom_template = """
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<version>1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.typesafe.akka</groupId>
|
||||
<artifactId>akka-testkit_${{scala.binary.version}}</artifactId>
|
||||
<version>2.6.20</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_${{scala.binary.version}}</artifactId>
|
||||
<version>3.0.8</version>
|
||||
<version>${{scalatest.version}}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scalactic</groupId>
|
||||
<artifactId>scalactic_${{scala.binary.version}}</artifactId>
|
||||
<version>3.2.15</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
<version>3.9</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_${{scala.binary.version}}</artifactId>
|
||||
@ -92,7 +78,7 @@ pom_template = """
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.2</version>
|
||||
<version>${{junit.version}}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
@ -122,36 +108,9 @@ pom_template = """
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
</plugin>
|
||||
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
|
||||
<plugin>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<version>2.5.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>2.8.2</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<configuration>
|
||||
<descriptorRefs>
|
||||
<descriptorRef>jar-with-dependencies</descriptorRef>
|
||||
@ -171,22 +130,12 @@ pom_template = """
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
|
||||
<plugin>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<version>3.7.1</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.22.1</version>
|
||||
<configuration>
|
||||
<dependenciesToScan>
|
||||
<dependency>ml.dmlc:xgboost4j_2.12</dependency>
|
||||
<dependency>ml.dmlc:xgboost4j_${{scala.binary.version}}</dependency>
|
||||
</dependenciesToScan>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
@ -1,20 +0,0 @@
|
||||
package ml.dmlc.xgboost4j.tester;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Unit test for simple App.
|
||||
*/
|
||||
public class AppTest
|
||||
{
|
||||
/**
|
||||
* Rigorous Test :-)
|
||||
*/
|
||||
@Test
|
||||
public void shouldAnswerWithTrue()
|
||||
{
|
||||
assertTrue( true );
|
||||
}
|
||||
}
|
||||
@ -5,10 +5,11 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<artifactId>xgboost-jvm</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j_2.12</artifactId>
|
||||
<name>xgboost4j</name>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
@ -28,13 +29,13 @@
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.2</version>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_${scala.binary.version}</artifactId>
|
||||
<version>3.2.16</version>
|
||||
<version>${scalatest.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
@ -37,7 +37,7 @@ trait EvalTrait extends IEvaluation {
|
||||
*/
|
||||
def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float
|
||||
|
||||
private[scala] def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
|
||||
def eval(predicts: Array[Array[Float]], jdmat: java.DMatrix): Float = {
|
||||
require(predicts.length == jdmat.getLabel.length, "predicts size and label size must match " +
|
||||
s" predicts size: ${predicts.length}, label size: ${jdmat.getLabel.length}")
|
||||
eval(predicts, new DMatrix(jdmat))
|
||||
|
||||
@ -31,7 +31,7 @@ trait ObjectiveTrait extends IObjective {
|
||||
*/
|
||||
def getGradient(predicts: Array[Array[Float]], dtrain: DMatrix): List[Array[Float]]
|
||||
|
||||
private[scala] def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
|
||||
def getGradient(predicts: Array[Array[Float]], dtrain: JDMatrix):
|
||||
java.util.List[Array[Float]] = {
|
||||
getGradient(predicts, new DMatrix(dtrain)).asJava
|
||||
}
|
||||
|
||||
@ -17,12 +17,11 @@
|
||||
package ml.dmlc.xgboost4j.scala
|
||||
|
||||
import java.io.InputStream
|
||||
import ml.dmlc.xgboost4j.java.{XGBoostError, XGBoost => JXGBoost}
|
||||
|
||||
import ml.dmlc.xgboost4j.java.{XGBoostError, Booster => JBooster, XGBoost => JXGBoost}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import org.apache.hadoop.conf.Configuration
|
||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||
import org.apache.hadoop.fs.Path
|
||||
|
||||
/**
|
||||
* XGBoost Scala Training function.
|
||||
@ -40,7 +39,12 @@ object XGBoost {
|
||||
earlyStoppingRound: Int = 0,
|
||||
prevBooster: Booster,
|
||||
checkpointParams: Option[ExternalCheckpointParams]): Booster = {
|
||||
val jWatches = watches.mapValues(_.jDMatrix).asJava
|
||||
|
||||
// we have to filter null value for customized obj and eval
|
||||
val jParams: java.util.Map[String, AnyRef] =
|
||||
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).toMap.asJava
|
||||
|
||||
val jWatches = watches.mapValues(_.jDMatrix).toMap.asJava
|
||||
val jBooster = if (prevBooster == null) {
|
||||
null
|
||||
} else {
|
||||
@ -51,8 +55,7 @@ object XGBoost {
|
||||
map(cp => {
|
||||
JXGBoost.trainAndSaveCheckpoint(
|
||||
dtrain.jDMatrix,
|
||||
// we have to filter null value for customized obj and eval
|
||||
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
|
||||
jParams,
|
||||
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster,
|
||||
cp.checkpointInterval,
|
||||
cp.checkpointPath,
|
||||
@ -61,8 +64,7 @@ object XGBoost {
|
||||
getOrElse(
|
||||
JXGBoost.train(
|
||||
dtrain.jDMatrix,
|
||||
// we have to filter null value for customized obj and eval
|
||||
params.filter(_._2 != null).mapValues(_.toString.asInstanceOf[AnyRef]).asJava,
|
||||
jParams,
|
||||
numRounds, jWatches, metrics, obj, eval, earlyStoppingRound, jBooster)
|
||||
)
|
||||
if (prevBooster == null) {
|
||||
|
||||
@ -82,9 +82,10 @@ def from_pystr_to_cstr(data: Union[str, List[str]]) -> Union[bytes, ctypes.Array
|
||||
if isinstance(data, str):
|
||||
return bytes(data, "utf-8")
|
||||
if isinstance(data, list):
|
||||
pointers: ctypes.Array[ctypes.c_char_p] = (ctypes.c_char_p * len(data))()
|
||||
data_as_bytes = [bytes(d, "utf-8") for d in data]
|
||||
pointers[:] = data_as_bytes # type: ignore
|
||||
data_as_bytes: List[bytes] = [bytes(d, "utf-8") for d in data]
|
||||
pointers: ctypes.Array[ctypes.c_char_p] = (
|
||||
ctypes.c_char_p * len(data_as_bytes)
|
||||
)(*data_as_bytes)
|
||||
return pointers
|
||||
raise TypeError()
|
||||
|
||||
@ -319,7 +320,7 @@ def _cuda_array_interface(data: DataType) -> bytes:
|
||||
def ctypes2numpy(cptr: CNumericPtr, length: int, dtype: Type[np.number]) -> np.ndarray:
|
||||
"""Convert a ctypes pointer array to a numpy array."""
|
||||
ctype: Type[CNumeric] = _numpy2ctypes_type(dtype)
|
||||
if not isinstance(cptr, ctypes.POINTER(ctype)): # type: ignore
|
||||
if not isinstance(cptr, ctypes.POINTER(ctype)):
|
||||
raise RuntimeError(f"expected {ctype} pointer")
|
||||
res = np.zeros(length, dtype=dtype)
|
||||
if not ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0]):
|
||||
@ -2460,9 +2461,9 @@ class Booster:
|
||||
raise TypeError("Unknown file type: ", fname)
|
||||
|
||||
if self.attr("best_iteration") is not None:
|
||||
self.best_iteration = int(self.attr("best_iteration")) # type: ignore
|
||||
self.best_iteration = int(cast(int, self.attr("best_iteration")))
|
||||
if self.attr("best_score") is not None:
|
||||
self.best_score = float(self.attr("best_score")) # type: ignore
|
||||
self.best_score = float(cast(float, self.attr("best_score")))
|
||||
|
||||
def num_boosted_rounds(self) -> int:
|
||||
"""Get number of boosted rounds. For gblinear this is reset to 0 after
|
||||
|
||||
@ -882,7 +882,7 @@ def _transform_cupy_array(data: DataType) -> CupyT:
|
||||
|
||||
if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
|
||||
data = cupy.array(data, copy=False)
|
||||
if data.dtype.hasobject or data.dtype in [cupy.float16, cupy.bool_]:
|
||||
if data.dtype.hasobject or data.dtype in [cupy.bool_]:
|
||||
data = data.astype(cupy.float32, copy=False)
|
||||
return data
|
||||
|
||||
|
||||
@ -337,10 +337,8 @@ class _SparkXGBParams(
|
||||
|
||||
if self.getOrDefault(self.features_cols):
|
||||
if not self.getOrDefault(self.use_gpu):
|
||||
raise ValueError("features_cols param requires enabling use_gpu.")
|
||||
|
||||
get_logger(self.__class__.__name__).warning(
|
||||
"If features_cols param set, then features_col param is ignored."
|
||||
raise ValueError(
|
||||
"features_col param with list value requires enabling use_gpu."
|
||||
)
|
||||
|
||||
if self.getOrDefault("objective") is not None:
|
||||
@ -547,6 +545,8 @@ FeatureProp = namedtuple(
|
||||
|
||||
|
||||
class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
_input_kwargs: Dict[str, Any]
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._set_xgb_params_default()
|
||||
@ -576,6 +576,11 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
raise ValueError("Invalid param name: 'arbitrary_params_dict'.")
|
||||
|
||||
for k, v in kwargs.items():
|
||||
# We're not allowing user use features_cols directly.
|
||||
if k == self.features_cols.name:
|
||||
raise ValueError(
|
||||
f"Unsupported param '{k}' please use features_col instead."
|
||||
)
|
||||
if k in _inverse_pyspark_param_alias_map:
|
||||
raise ValueError(
|
||||
f"Please use param name {_inverse_pyspark_param_alias_map[k]} instead."
|
||||
@ -591,6 +596,9 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
k = real_k
|
||||
|
||||
if self.hasParam(k):
|
||||
if k == "features_col" and isinstance(v, list):
|
||||
self._set(**{"features_cols": v})
|
||||
else:
|
||||
self._set(**{str(k): v})
|
||||
else:
|
||||
if (
|
||||
|
||||
@ -1,10 +1,13 @@
|
||||
"""Xgboost pyspark integration submodule for estimator API."""
|
||||
# pylint: disable=too-many-ancestors
|
||||
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
|
||||
# pylint: disable=unused-argument, too-many-locals
|
||||
|
||||
from typing import Any, Type
|
||||
|
||||
from typing import Any, Dict, List, Optional, Type, Union
|
||||
|
||||
import numpy as np
|
||||
from pyspark import keyword_only
|
||||
from pyspark.ml.param import Param, Params
|
||||
from pyspark.ml.param.shared import HasProbabilityCol, HasRawPredictionCol
|
||||
|
||||
@ -83,8 +86,8 @@ class SparkXGBRegressor(_SparkXGBEstimator):
|
||||
:py:class:`~pyspark.ml.classification.OneVsRest`
|
||||
|
||||
SparkXGBRegressor automatically supports most of the parameters in
|
||||
`xgboost.XGBRegressor` constructor and most of the parameters used in
|
||||
:py:class:`xgboost.XGBRegressor` fit and predict method.
|
||||
:py:class:`xgboost.XGBRegressor` constructor and most of the parameters used in
|
||||
:py:meth:`xgboost.XGBRegressor.fit` and :py:meth:`xgboost.XGBRegressor.predict` method.
|
||||
|
||||
SparkXGBRegressor doesn't support setting `gpu_id` but support another param `use_gpu`,
|
||||
see doc below for more details.
|
||||
@ -97,13 +100,23 @@ class SparkXGBRegressor(_SparkXGBEstimator):
|
||||
SparkXGBRegressor doesn't support setting `nthread` xgboost param, instead, the `nthread`
|
||||
param for each xgboost worker will be set equal to `spark.task.cpus` config value.
|
||||
|
||||
callbacks:
|
||||
The export and import of the callback functions are at best effort.
|
||||
For details, see :py:attr:`xgboost.spark.SparkXGBRegressor.callbacks` param doc.
|
||||
validation_indicator_col
|
||||
For params related to `xgboost.XGBRegressor` training
|
||||
with evaluation dataset's supervision, set
|
||||
:py:attr:`xgboost.spark.SparkXGBRegressor.validation_indicator_col`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
features_col:
|
||||
When the value is string, it requires the features column name to be vector type.
|
||||
When the value is a list of string, it requires all the feature columns to be numeric types.
|
||||
label_col:
|
||||
Label column name. Default to "label".
|
||||
prediction_col:
|
||||
Prediction column name. Default to "prediction"
|
||||
pred_contrib_col:
|
||||
Contribution prediction column name.
|
||||
validation_indicator_col:
|
||||
For params related to `xgboost.XGBRegressor` training with
|
||||
evaluation dataset's supervision,
|
||||
set :py:attr:`xgboost.spark.SparkXGBRegressor.validation_indicator_col`
|
||||
parameter instead of setting the `eval_set` parameter in `xgboost.XGBRegressor`
|
||||
fit method.
|
||||
weight_col:
|
||||
@ -111,26 +124,40 @@ class SparkXGBRegressor(_SparkXGBEstimator):
|
||||
:py:attr:`xgboost.spark.SparkXGBRegressor.weight_col` parameter instead of setting
|
||||
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRegressor`
|
||||
fit method.
|
||||
xgb_model:
|
||||
Set the value to be the instance returned by
|
||||
:func:`xgboost.spark.SparkXGBRegressorModel.get_booster`.
|
||||
num_workers:
|
||||
Integer that specifies the number of XGBoost workers to use.
|
||||
Each XGBoost worker corresponds to one spark task.
|
||||
use_gpu:
|
||||
Boolean that specifies whether the executors are running on GPU
|
||||
instances.
|
||||
base_margin_col:
|
||||
To specify the base margins of the training and validation
|
||||
dataset, set :py:attr:`xgboost.spark.SparkXGBRegressor.base_margin_col` parameter
|
||||
instead of setting `base_margin` and `base_margin_eval_set` in the
|
||||
`xgboost.XGBRegressor` fit method. Note: this isn't available for distributed
|
||||
training.
|
||||
`xgboost.XGBRegressor` fit method.
|
||||
|
||||
.. Note:: The Parameters chart above contains parameters that need special handling.
|
||||
num_workers:
|
||||
How many XGBoost workers to be used to train.
|
||||
Each XGBoost worker corresponds to one spark task.
|
||||
use_gpu:
|
||||
Boolean value to specify whether the executors are running on GPU
|
||||
instances.
|
||||
force_repartition:
|
||||
Boolean value to specify if forcing the input dataset to be repartitioned
|
||||
before XGBoost training.
|
||||
repartition_random_shuffle:
|
||||
Boolean value to specify if randomly shuffling the dataset when repartitioning is required.
|
||||
enable_sparse_data_optim:
|
||||
Boolean value to specify if enabling sparse data optimization, if True,
|
||||
Xgboost DMatrix object will be constructed from sparse matrix instead of
|
||||
dense matrix.
|
||||
|
||||
kwargs:
|
||||
A dictionary of xgboost parameters, please refer to
|
||||
https://xgboost.readthedocs.io/en/stable/parameter.html
|
||||
|
||||
Note
|
||||
----
|
||||
|
||||
The Parameters chart above contains parameters that need special handling.
|
||||
For a full list of parameters, see entries with `Param(parent=...` below.
|
||||
|
||||
.. Note:: This API is experimental.
|
||||
This API is experimental.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
@ -155,9 +182,27 @@ class SparkXGBRegressor(_SparkXGBEstimator):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
@keyword_only
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
features_col: Union[str, List[str]] = "features",
|
||||
label_col: str = "label",
|
||||
prediction_col: str = "prediction",
|
||||
pred_contrib_col: Optional[str] = None,
|
||||
validation_indicator_col: Optional[str] = None,
|
||||
weight_col: Optional[str] = None,
|
||||
base_margin_col: Optional[str] = None,
|
||||
num_workers: int = 1,
|
||||
use_gpu: bool = False,
|
||||
force_repartition: bool = False,
|
||||
repartition_random_shuffle: bool = False,
|
||||
enable_sparse_data_optim: bool = False,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.setParams(**kwargs)
|
||||
input_kwargs = self._input_kwargs
|
||||
self.setParams(**input_kwargs)
|
||||
|
||||
@classmethod
|
||||
def _xgb_cls(cls) -> Type[XGBRegressor]:
|
||||
@ -199,8 +244,8 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
|
||||
:py:class:`~pyspark.ml.classification.OneVsRest`
|
||||
|
||||
SparkXGBClassifier automatically supports most of the parameters in
|
||||
`xgboost.XGBClassifier` constructor and most of the parameters used in
|
||||
:py:class:`xgboost.XGBClassifier` fit and predict method.
|
||||
:py:class:`xgboost.XGBClassifier` constructor and most of the parameters used in
|
||||
:py:meth:`xgboost.XGBClassifier.fit` and :py:meth:`xgboost.XGBClassifier.predict` method.
|
||||
|
||||
SparkXGBClassifier doesn't support setting `gpu_id` but support another param `use_gpu`,
|
||||
see doc below for more details.
|
||||
@ -220,13 +265,21 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
|
||||
Parameters
|
||||
----------
|
||||
|
||||
callbacks:
|
||||
The export and import of the callback functions are at best effort. For
|
||||
details, see :py:attr:`xgboost.spark.SparkXGBClassifier.callbacks` param doc.
|
||||
features_col:
|
||||
When the value is string, it requires the features column name to be vector type.
|
||||
When the value is a list of string, it requires all the feature columns to be numeric types.
|
||||
label_col:
|
||||
Label column name. Default to "label".
|
||||
prediction_col:
|
||||
Prediction column name. Default to "prediction"
|
||||
probability_col:
|
||||
Column name for predicted class conditional probabilities. Default to probabilityCol
|
||||
raw_prediction_col:
|
||||
The `output_margin=True` is implicitly supported by the
|
||||
`rawPredictionCol` output column, which is always returned with the predicted margin
|
||||
values.
|
||||
pred_contrib_col:
|
||||
Contribution prediction column name.
|
||||
validation_indicator_col:
|
||||
For params related to `xgboost.XGBClassifier` training with
|
||||
evaluation dataset's supervision,
|
||||
@ -238,26 +291,39 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
|
||||
:py:attr:`xgboost.spark.SparkXGBClassifier.weight_col` parameter instead of setting
|
||||
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBClassifier`
|
||||
fit method.
|
||||
xgb_model:
|
||||
Set the value to be the instance returned by
|
||||
:func:`xgboost.spark.SparkXGBClassifierModel.get_booster`.
|
||||
num_workers:
|
||||
Integer that specifies the number of XGBoost workers to use.
|
||||
Each XGBoost worker corresponds to one spark task.
|
||||
use_gpu:
|
||||
Boolean that specifies whether the executors are running on GPU
|
||||
instances.
|
||||
base_margin_col:
|
||||
To specify the base margins of the training and validation
|
||||
dataset, set :py:attr:`xgboost.spark.SparkXGBClassifier.base_margin_col` parameter
|
||||
instead of setting `base_margin` and `base_margin_eval_set` in the
|
||||
`xgboost.XGBClassifier` fit method. Note: this isn't available for distributed
|
||||
training.
|
||||
`xgboost.XGBClassifier` fit method.
|
||||
|
||||
.. Note:: The Parameters chart above contains parameters that need special handling.
|
||||
num_workers:
|
||||
How many XGBoost workers to be used to train.
|
||||
Each XGBoost worker corresponds to one spark task.
|
||||
use_gpu:
|
||||
Boolean value to specify whether the executors are running on GPU
|
||||
instances.
|
||||
force_repartition:
|
||||
Boolean value to specify if forcing the input dataset to be repartitioned
|
||||
before XGBoost training.
|
||||
repartition_random_shuffle:
|
||||
Boolean value to specify if randomly shuffling the dataset when repartitioning is required.
|
||||
enable_sparse_data_optim:
|
||||
Boolean value to specify if enabling sparse data optimization, if True,
|
||||
Xgboost DMatrix object will be constructed from sparse matrix instead of
|
||||
dense matrix.
|
||||
|
||||
kwargs:
|
||||
A dictionary of xgboost parameters, please refer to
|
||||
https://xgboost.readthedocs.io/en/stable/parameter.html
|
||||
|
||||
Note
|
||||
----
|
||||
|
||||
The Parameters chart above contains parameters that need special handling.
|
||||
For a full list of parameters, see entries with `Param(parent=...` below.
|
||||
|
||||
.. Note:: This API is experimental.
|
||||
This API is experimental.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@ -281,14 +347,34 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
@keyword_only
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
features_col: Union[str, List[str]] = "features",
|
||||
label_col: str = "label",
|
||||
prediction_col: str = "prediction",
|
||||
probability_col: str = "probability",
|
||||
raw_prediction_col: str = "rawPrediction",
|
||||
pred_contrib_col: Optional[str] = None,
|
||||
validation_indicator_col: Optional[str] = None,
|
||||
weight_col: Optional[str] = None,
|
||||
base_margin_col: Optional[str] = None,
|
||||
num_workers: int = 1,
|
||||
use_gpu: bool = False,
|
||||
force_repartition: bool = False,
|
||||
repartition_random_shuffle: bool = False,
|
||||
enable_sparse_data_optim: bool = False,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> None:
|
||||
super().__init__()
|
||||
# The default 'objective' param value comes from sklearn `XGBClassifier` ctor,
|
||||
# but in pyspark we will automatically set objective param depending on
|
||||
# binary or multinomial input dataset, and we need to remove the fixed default
|
||||
# param value as well to avoid causing ambiguity.
|
||||
input_kwargs = self._input_kwargs
|
||||
self.setParams(**input_kwargs)
|
||||
self._setDefault(objective=None)
|
||||
self.setParams(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def _xgb_cls(cls) -> Type[XGBClassifier]:
|
||||
@ -334,8 +420,8 @@ class SparkXGBRanker(_SparkXGBEstimator):
|
||||
:py:class:`~pyspark.ml.classification.OneVsRest`
|
||||
|
||||
SparkXGBRanker automatically supports most of the parameters in
|
||||
`xgboost.XGBRanker` constructor and most of the parameters used in
|
||||
:py:class:`xgboost.XGBRanker` fit and predict method.
|
||||
:py:class:`xgboost.XGBRanker` constructor and most of the parameters used in
|
||||
:py:meth:`xgboost.XGBRanker.fit` and :py:meth:`xgboost.XGBRanker.predict` method.
|
||||
|
||||
SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`,
|
||||
see doc below for more details.
|
||||
@ -355,39 +441,53 @@ class SparkXGBRanker(_SparkXGBEstimator):
|
||||
Parameters
|
||||
----------
|
||||
|
||||
callbacks:
|
||||
The export and import of the callback functions are at best effort. For
|
||||
details, see :py:attr:`xgboost.spark.SparkXGBRanker.callbacks` param doc.
|
||||
features_col:
|
||||
When the value is string, it requires the features column name to be vector type.
|
||||
When the value is a list of string, it requires all the feature columns to be numeric types.
|
||||
label_col:
|
||||
Label column name. Default to "label".
|
||||
prediction_col:
|
||||
Prediction column name. Default to "prediction"
|
||||
pred_contrib_col:
|
||||
Contribution prediction column name.
|
||||
validation_indicator_col:
|
||||
For params related to `xgboost.XGBRanker` training with
|
||||
evaluation dataset's supervision,
|
||||
set :py:attr:`xgboost.spark.XGBRanker.validation_indicator_col`
|
||||
parameter instead of setting the `eval_set` parameter in `xgboost.XGBRanker`
|
||||
set :py:attr:`xgboost.spark.SparkXGBRanker.validation_indicator_col`
|
||||
parameter instead of setting the `eval_set` parameter in :py:class:`xgboost.XGBRanker`
|
||||
fit method.
|
||||
weight_col:
|
||||
To specify the weight of the training and validation dataset, set
|
||||
:py:attr:`xgboost.spark.SparkXGBRanker.weight_col` parameter instead of setting
|
||||
`sample_weight` and `sample_weight_eval_set` parameter in `xgboost.XGBRanker`
|
||||
`sample_weight` and `sample_weight_eval_set` parameter in :py:class:`xgboost.XGBRanker`
|
||||
fit method.
|
||||
xgb_model:
|
||||
Set the value to be the instance returned by
|
||||
:func:`xgboost.spark.SparkXGBRankerModel.get_booster`.
|
||||
num_workers:
|
||||
Integer that specifies the number of XGBoost workers to use.
|
||||
Each XGBoost worker corresponds to one spark task.
|
||||
use_gpu:
|
||||
Boolean that specifies whether the executors are running on GPU
|
||||
instances.
|
||||
base_margin_col:
|
||||
To specify the base margins of the training and validation
|
||||
dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.base_margin_col` parameter
|
||||
instead of setting `base_margin` and `base_margin_eval_set` in the
|
||||
`xgboost.XGBRanker` fit method.
|
||||
:py:class:`xgboost.XGBRanker` fit method.
|
||||
qid_col:
|
||||
To specify the qid of the training and validation
|
||||
dataset, set :py:attr:`xgboost.spark.SparkXGBRanker.qid_col` parameter
|
||||
instead of setting `qid` / `group`, `eval_qid` / `eval_group` in the
|
||||
`xgboost.XGBRanker` fit method.
|
||||
Query id column name.
|
||||
|
||||
num_workers:
|
||||
How many XGBoost workers to be used to train.
|
||||
Each XGBoost worker corresponds to one spark task.
|
||||
use_gpu:
|
||||
Boolean value to specify whether the executors are running on GPU
|
||||
instances.
|
||||
force_repartition:
|
||||
Boolean value to specify if forcing the input dataset to be repartitioned
|
||||
before XGBoost training.
|
||||
repartition_random_shuffle:
|
||||
Boolean value to specify if randomly shuffling the dataset when repartitioning is required.
|
||||
enable_sparse_data_optim:
|
||||
Boolean value to specify if enabling sparse data optimization, if True,
|
||||
Xgboost DMatrix object will be constructed from sparse matrix instead of
|
||||
dense matrix.
|
||||
|
||||
kwargs:
|
||||
A dictionary of xgboost parameters, please refer to
|
||||
https://xgboost.readthedocs.io/en/stable/parameter.html
|
||||
|
||||
.. Note:: The Parameters chart above contains parameters that need special handling.
|
||||
For a full list of parameters, see entries with `Param(parent=...` below.
|
||||
@ -426,9 +526,28 @@ class SparkXGBRanker(_SparkXGBEstimator):
|
||||
>>> model.transform(df_test).show()
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
@keyword_only
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
features_col: Union[str, List[str]] = "features",
|
||||
label_col: str = "label",
|
||||
prediction_col: str = "prediction",
|
||||
pred_contrib_col: Optional[str] = None,
|
||||
validation_indicator_col: Optional[str] = None,
|
||||
weight_col: Optional[str] = None,
|
||||
base_margin_col: Optional[str] = None,
|
||||
qid_col: Optional[str] = None,
|
||||
num_workers: int = 1,
|
||||
use_gpu: bool = False,
|
||||
force_repartition: bool = False,
|
||||
repartition_random_shuffle: bool = False,
|
||||
enable_sparse_data_optim: bool = False,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.setParams(**kwargs)
|
||||
input_kwargs = self._input_kwargs
|
||||
self.setParams(**input_kwargs)
|
||||
|
||||
@classmethod
|
||||
def _xgb_cls(cls) -> Type[XGBRanker]:
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "communicator.h"
|
||||
|
||||
@ -224,5 +225,46 @@ inline void Allreduce(double *send_receive_buffer, size_t count) {
|
||||
Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kDouble, op);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct AllgatherVResult {
|
||||
std::vector<std::size_t> offsets;
|
||||
std::vector<std::size_t> sizes;
|
||||
std::vector<T> result;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Gathers variable-length data from all processes and distributes it to all processes.
|
||||
*
|
||||
* We assume each worker has the same number of inputs, but each input may be of a different size.
|
||||
*
|
||||
* @param inputs All the inputs from the local worker.
|
||||
* @param sizes Sizes of each input.
|
||||
*/
|
||||
template <typename T>
|
||||
inline AllgatherVResult<T> AllgatherV(std::vector<T> const &inputs,
|
||||
std::vector<std::size_t> const &sizes) {
|
||||
auto num_inputs = sizes.size();
|
||||
|
||||
// Gather the sizes across all workers.
|
||||
std::vector<std::size_t> all_sizes(num_inputs * GetWorldSize());
|
||||
std::copy_n(sizes.cbegin(), sizes.size(), all_sizes.begin() + num_inputs * GetRank());
|
||||
collective::Allgather(all_sizes.data(), all_sizes.size() * sizeof(std::size_t));
|
||||
|
||||
// Calculate input offsets (std::exclusive_scan).
|
||||
std::vector<std::size_t> offsets(all_sizes.size());
|
||||
for (std::size_t i = 1; i < offsets.size(); i++) {
|
||||
offsets[i] = offsets[i - 1] + all_sizes[i - 1];
|
||||
}
|
||||
|
||||
// Gather all the inputs.
|
||||
auto total_input_size = offsets.back() + all_sizes.back();
|
||||
std::vector<T> all_inputs(total_input_size);
|
||||
std::copy_n(inputs.cbegin(), inputs.size(), all_inputs.begin() + offsets[num_inputs * GetRank()]);
|
||||
// We cannot use allgather here, since each worker might have a different size.
|
||||
Allreduce<Operation::kMax>(all_inputs.data(), all_inputs.size());
|
||||
|
||||
return {offsets, all_sizes, all_inputs};
|
||||
}
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
|
||||
@ -12,19 +12,22 @@
|
||||
namespace xgboost {
|
||||
namespace collective {
|
||||
|
||||
thread_local int Communicator::device_ordinal_{-1};
|
||||
thread_local std::unique_ptr<DeviceCommunicator> Communicator::device_communicator_{};
|
||||
|
||||
void Communicator::Finalize() {
|
||||
communicator_->Shutdown();
|
||||
communicator_.reset(new NoOpCommunicator());
|
||||
device_ordinal_ = -1;
|
||||
device_communicator_.reset(nullptr);
|
||||
}
|
||||
|
||||
DeviceCommunicator* Communicator::GetDevice(int device_ordinal) {
|
||||
if (!device_communicator_ || device_ordinal_ != device_ordinal) {
|
||||
device_ordinal_ = device_ordinal;
|
||||
thread_local auto old_device_ordinal = -1;
|
||||
// If the number of GPUs changes, we need to re-initialize NCCL.
|
||||
thread_local auto old_world_size = -1;
|
||||
if (!device_communicator_ || device_ordinal != old_device_ordinal ||
|
||||
communicator_->GetWorldSize() != old_world_size) {
|
||||
old_device_ordinal = device_ordinal;
|
||||
old_world_size = communicator_->GetWorldSize();
|
||||
#if defined(XGBOOST_USE_NCCL) || defined(XGBOOST_USE_RCCL)
|
||||
if (type_ != CommunicatorType::kFederated) {
|
||||
device_communicator_.reset(new NcclDeviceCommunicator(device_ordinal, Get()));
|
||||
|
||||
@ -229,7 +229,6 @@ class Communicator {
|
||||
static thread_local std::unique_ptr<Communicator> communicator_;
|
||||
static thread_local CommunicatorType type_;
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(XGBOOST_USE_HIP)
|
||||
static thread_local int device_ordinal_;
|
||||
static thread_local std::unique_ptr<DeviceCommunicator> device_communicator_;
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
* Copyright 2022-2023, XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
|
||||
#define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
|
||||
@ -16,21 +16,39 @@ struct CUDAContext {
|
||||
/**
|
||||
* \brief Caching thrust policy.
|
||||
*/
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
auto CTP() const { return thrust::hip::par(caching_alloc_).on(dh::DefaultStream()); }
|
||||
auto CTP() const {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#if THRUST_MAJOR_VERSION >= 2
|
||||
return thrust::cuda::par_nosync(caching_alloc_).on(dh::DefaultStream());
|
||||
#else
|
||||
auto CTP() const { return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream()); }
|
||||
return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream());
|
||||
#endif // THRUST_MAJOR_VERSION >= 2
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#if THRUST_MAJOR_VERSION >= 2
|
||||
return thrust::hip::par_nosync(caching_alloc_).on(dh::DefaultStream());
|
||||
#else
|
||||
return thrust::hip::par(caching_alloc_).on(dh::DefaultStream());
|
||||
#endif // THRUST_MAJOR_VERSION >= 2
|
||||
#endif
|
||||
|
||||
}
|
||||
/**
|
||||
* \brief Thrust policy without caching allocator.
|
||||
*/
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
auto TP() const { return thrust::hip::par(alloc_).on(dh::DefaultStream()); }
|
||||
auto TP() const {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#if THRUST_MAJOR_VERSION >= 2
|
||||
return thrust::cuda::par_nosync(alloc_).on(dh::DefaultStream());
|
||||
#else
|
||||
auto TP() const { return thrust::cuda::par(alloc_).on(dh::DefaultStream()); }
|
||||
return thrust::cuda::par(alloc_).on(dh::DefaultStream());
|
||||
#endif // THRUST_MAJOR_VERSION >= 2
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
#if THRUST_MAJOR_VERSION >= 2
|
||||
return thrust::hip::par_nosync(alloc_).on(dh::DefaultStream());
|
||||
#else
|
||||
return thrust::hip::par(alloc_).on(dh::DefaultStream());
|
||||
#endif // THRUST_MAJOR_VERSION >= 2
|
||||
#endif
|
||||
|
||||
}
|
||||
auto Stream() const { return dh::DefaultStream(); }
|
||||
};
|
||||
} // namespace xgboost
|
||||
|
||||
@ -227,9 +227,8 @@ void ProcessBatch(int device, MetaInfo const &info, const SparsePage &page,
|
||||
return {0, e.index, e.fvalue}; // row_idx is not needed for scanning column size.
|
||||
});
|
||||
detail::GetColumnSizesScan(device, num_columns, num_cuts_per_feature,
|
||||
batch_it, dummy_is_valid,
|
||||
0, sorted_entries.size(),
|
||||
&cuts_ptr, &column_sizes_scan);
|
||||
IterSpan{batch_it, sorted_entries.size()}, dummy_is_valid, &cuts_ptr,
|
||||
&column_sizes_scan);
|
||||
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
|
||||
|
||||
if (sketch_container->HasCategorical()) {
|
||||
@ -296,9 +295,8 @@ void ProcessWeightedBatch(int device, const SparsePage& page,
|
||||
return {0, e.index, e.fvalue}; // row_idx is not needed for scaning column size.
|
||||
});
|
||||
detail::GetColumnSizesScan(device, num_columns, num_cuts_per_feature,
|
||||
batch_it, dummy_is_valid,
|
||||
0, sorted_entries.size(),
|
||||
&cuts_ptr, &column_sizes_scan);
|
||||
IterSpan{batch_it, sorted_entries.size()}, dummy_is_valid, &cuts_ptr,
|
||||
&column_sizes_scan);
|
||||
auto d_cuts_ptr = cuts_ptr.DeviceSpan();
|
||||
if (sketch_container->HasCategorical()) {
|
||||
detail::RemoveDuplicatedCategories(device, info, d_cuts_ptr,
|
||||
|
||||
@ -17,6 +17,10 @@
|
||||
#include "quantile.cuh"
|
||||
#include "timer.h"
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
namespace cub = hipcub;
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
namespace cuda {
|
||||
@ -53,24 +57,128 @@ struct EntryCompareOp {
|
||||
};
|
||||
|
||||
// Get column size from adapter batch and for output cuts.
|
||||
template <typename Iter>
|
||||
void GetColumnSizesScan(int device, size_t num_columns, size_t num_cuts_per_feature,
|
||||
Iter batch_iter, data::IsValidFunctor is_valid,
|
||||
size_t begin, size_t end,
|
||||
template <std::uint32_t kBlockThreads, typename CounterT, typename BatchIt>
|
||||
__global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
|
||||
data::IsValidFunctor is_valid,
|
||||
Span<std::size_t> out_column_size) {
|
||||
extern __shared__ char smem[];
|
||||
|
||||
auto smem_cs_ptr = reinterpret_cast<CounterT*>(smem);
|
||||
|
||||
dh::BlockFill(smem_cs_ptr, out_column_size.size(), 0);
|
||||
|
||||
cub::CTA_SYNC();
|
||||
|
||||
auto n = batch_iter.size();
|
||||
|
||||
for (auto idx : dh::GridStrideRange(static_cast<std::size_t>(0), n)) {
|
||||
auto e = batch_iter[idx];
|
||||
if (is_valid(e)) {
|
||||
atomicAdd(&smem_cs_ptr[e.column_idx], static_cast<CounterT>(1));
|
||||
}
|
||||
}
|
||||
|
||||
cub::CTA_SYNC();
|
||||
|
||||
auto out_global_ptr = out_column_size;
|
||||
for (auto i : dh::BlockStrideRange(static_cast<std::size_t>(0), out_column_size.size())) {
|
||||
atomicAdd(&out_global_ptr[i], static_cast<std::size_t>(smem_cs_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <std::uint32_t kBlockThreads, typename Kernel>
|
||||
std::uint32_t EstimateGridSize(std::int32_t device, Kernel kernel, std::size_t shared_mem) {
|
||||
int n_mps = 0;
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaDeviceGetAttribute(&n_mps, cudaDevAttrMultiProcessorCount, device));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipDeviceGetAttribute(&n_mps, hipDeviceAttributeMultiprocessorCount, device));
|
||||
#endif
|
||||
int n_blocks_per_mp = 0;
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
|
||||
kBlockThreads, shared_mem));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipOccupancyMaxActiveBlocksPerMultiprocessor(&n_blocks_per_mp, kernel,
|
||||
kBlockThreads, shared_mem));
|
||||
#endif
|
||||
std::uint32_t grid_size = n_blocks_per_mp * n_mps;
|
||||
return grid_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the size of each column. This is a histogram with additional handling of
|
||||
* invalid values.
|
||||
*
|
||||
* \tparam BatchIt Type of input adapter batch.
|
||||
* \tparam force_use_global_memory Used for testing. Force global atomic add.
|
||||
* \tparam force_use_u64 Used for testing. For u64 as counter in shared memory.
|
||||
*
|
||||
* \param device CUDA device ordinal.
|
||||
* \param batch_iter Iterator for input data from adapter batch.
|
||||
* \param is_valid Whehter an element is considered as missing.
|
||||
* \param out_column_size Output buffer for the size of each column.
|
||||
*/
|
||||
template <typename BatchIt, bool force_use_global_memory = false, bool force_use_u64 = false>
|
||||
void LaunchGetColumnSizeKernel(std::int32_t device, IterSpan<BatchIt> batch_iter,
|
||||
data::IsValidFunctor is_valid, Span<std::size_t> out_column_size) {
|
||||
thrust::fill_n(thrust::device, dh::tbegin(out_column_size), out_column_size.size(), 0);
|
||||
|
||||
std::size_t max_shared_memory = dh::MaxSharedMemory(device);
|
||||
// Not strictly correct as we should use number of samples to determine the type of
|
||||
// counter. However, the sample size is not known due to sliding window on number of
|
||||
// elements.
|
||||
std::size_t n = batch_iter.size();
|
||||
|
||||
std::size_t required_shared_memory = 0;
|
||||
bool use_u32{false};
|
||||
if (!force_use_u64 && n < static_cast<std::size_t>(std::numeric_limits<std::uint32_t>::max())) {
|
||||
required_shared_memory = out_column_size.size() * sizeof(std::uint32_t);
|
||||
use_u32 = true;
|
||||
} else {
|
||||
required_shared_memory = out_column_size.size() * sizeof(std::size_t);
|
||||
use_u32 = false;
|
||||
}
|
||||
bool use_shared = required_shared_memory <= max_shared_memory && required_shared_memory != 0;
|
||||
|
||||
if (!force_use_global_memory && use_shared) {
|
||||
CHECK_NE(required_shared_memory, 0);
|
||||
std::uint32_t constexpr kBlockThreads = 512;
|
||||
if (use_u32) {
|
||||
CHECK(!force_use_u64);
|
||||
auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::uint32_t, BatchIt>;
|
||||
auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);
|
||||
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}(
|
||||
kernel, batch_iter, is_valid, out_column_size);
|
||||
} else {
|
||||
auto kernel = GetColumnSizeSharedMemKernel<kBlockThreads, std::size_t, BatchIt>;
|
||||
auto grid_size = EstimateGridSize<kBlockThreads>(device, kernel, required_shared_memory);
|
||||
dh::LaunchKernel{grid_size, kBlockThreads, required_shared_memory, dh::DefaultStream()}(
|
||||
kernel, batch_iter, is_valid, out_column_size);
|
||||
}
|
||||
} else {
|
||||
auto d_out_column_size = out_column_size;
|
||||
dh::LaunchN(batch_iter.size(), [=] __device__(size_t idx) {
|
||||
auto e = batch_iter[idx];
|
||||
if (is_valid(e)) {
|
||||
atomicAdd(&d_out_column_size[e.column_idx], static_cast<size_t>(1));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename BatchIt>
|
||||
void GetColumnSizesScan(int device, size_t num_columns, std::size_t num_cuts_per_feature,
|
||||
IterSpan<BatchIt> batch_iter, data::IsValidFunctor is_valid,
|
||||
HostDeviceVector<SketchContainer::OffsetT>* cuts_ptr,
|
||||
dh::caching_device_vector<size_t>* column_sizes_scan) {
|
||||
column_sizes_scan->resize(num_columns + 1, 0);
|
||||
column_sizes_scan->resize(num_columns + 1);
|
||||
cuts_ptr->SetDevice(device);
|
||||
cuts_ptr->Resize(num_columns + 1, 0);
|
||||
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
auto d_column_sizes_scan = column_sizes_scan->data().get();
|
||||
dh::LaunchN(end - begin, [=] __device__(size_t idx) {
|
||||
auto e = batch_iter[begin + idx];
|
||||
if (is_valid(e)) {
|
||||
atomicAdd(&d_column_sizes_scan[e.column_idx], static_cast<size_t>(1));
|
||||
}
|
||||
});
|
||||
auto d_column_sizes_scan = dh::ToSpan(*column_sizes_scan);
|
||||
LaunchGetColumnSizeKernel(device, batch_iter, is_valid, d_column_sizes_scan);
|
||||
// Calculate cuts CSC pointer
|
||||
auto cut_ptr_it = dh::MakeTransformIterator<size_t>(
|
||||
column_sizes_scan->begin(), [=] __device__(size_t column_size) {
|
||||
@ -85,8 +193,7 @@ void GetColumnSizesScan(int device, size_t num_columns, size_t num_cuts_per_feat
|
||||
column_sizes_scan->end(), column_sizes_scan->begin());
|
||||
#elif defined(XGBOOST_USE_CUDA)
|
||||
thrust::exclusive_scan(thrust::cuda::par(alloc), cut_ptr_it,
|
||||
cut_ptr_it + column_sizes_scan->size(),
|
||||
cuts_ptr->DevicePointer());
|
||||
cut_ptr_it + column_sizes_scan->size(), cuts_ptr->DevicePointer());
|
||||
thrust::exclusive_scan(thrust::cuda::par(alloc), column_sizes_scan->begin(),
|
||||
column_sizes_scan->end(), column_sizes_scan->begin());
|
||||
#endif
|
||||
@ -130,29 +237,26 @@ size_t RequiredMemory(bst_row_t num_rows, bst_feature_t num_columns, size_t nnz,
|
||||
|
||||
// Count the valid entries in each column and copy them out.
|
||||
template <typename AdapterBatch, typename BatchIter>
|
||||
void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter,
|
||||
Range1d range, float missing,
|
||||
size_t columns, size_t cuts_per_feature, int device,
|
||||
void MakeEntriesFromAdapter(AdapterBatch const& batch, BatchIter batch_iter, Range1d range,
|
||||
float missing, size_t columns, size_t cuts_per_feature, int device,
|
||||
HostDeviceVector<SketchContainer::OffsetT>* cut_sizes_scan,
|
||||
dh::caching_device_vector<size_t>* column_sizes_scan,
|
||||
dh::device_vector<Entry>* sorted_entries) {
|
||||
auto entry_iter = dh::MakeTransformIterator<Entry>(
|
||||
thrust::make_counting_iterator(0llu), [=] __device__(size_t idx) {
|
||||
return Entry(batch.GetElement(idx).column_idx,
|
||||
batch.GetElement(idx).value);
|
||||
return Entry(batch.GetElement(idx).column_idx, batch.GetElement(idx).value);
|
||||
});
|
||||
auto n = range.end() - range.begin();
|
||||
auto span = IterSpan{batch_iter + range.begin(), n};
|
||||
data::IsValidFunctor is_valid(missing);
|
||||
// Work out how many valid entries we have in each column
|
||||
GetColumnSizesScan(device, columns, cuts_per_feature,
|
||||
batch_iter, is_valid,
|
||||
range.begin(), range.end(),
|
||||
cut_sizes_scan,
|
||||
GetColumnSizesScan(device, columns, cuts_per_feature, span, is_valid, cut_sizes_scan,
|
||||
column_sizes_scan);
|
||||
size_t num_valid = column_sizes_scan->back();
|
||||
// Copy current subset of valid elements into temporary storage and sort
|
||||
sorted_entries->resize(num_valid);
|
||||
dh::CopyIf(entry_iter + range.begin(), entry_iter + range.end(),
|
||||
sorted_entries->begin(), is_valid);
|
||||
dh::CopyIf(entry_iter + range.begin(), entry_iter + range.end(), sorted_entries->begin(),
|
||||
is_valid);
|
||||
}
|
||||
|
||||
void SortByWeight(dh::device_vector<float>* weights,
|
||||
|
||||
@ -209,7 +209,7 @@ class PartitionBuilder {
|
||||
BitVector* decision_bits, BitVector* missing_bits) {
|
||||
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
|
||||
std::size_t nid = nodes[node_in_set].nid;
|
||||
bst_feature_t fid = tree[nid].SplitIndex();
|
||||
bst_feature_t fid = tree.SplitIndex(nid);
|
||||
bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
|
||||
auto node_cats = tree.NodeCats(nid);
|
||||
auto const& cut_values = gmat.cut.Values();
|
||||
@ -263,14 +263,13 @@ class PartitionBuilder {
|
||||
template <typename ExpandEntry>
|
||||
void PartitionByMask(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
|
||||
const common::Range1d range, GHistIndexMatrix const& gmat,
|
||||
const common::ColumnMatrix& column_matrix, const RegTree& tree,
|
||||
const size_t* rid, BitVector const& decision_bits,
|
||||
const RegTree& tree, const size_t* rid, BitVector const& decision_bits,
|
||||
BitVector const& missing_bits) {
|
||||
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
|
||||
common::Span<size_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
|
||||
common::Span<size_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
|
||||
std::size_t nid = nodes[node_in_set].nid;
|
||||
bool default_left = tree[nid].DefaultLeft();
|
||||
bool default_left = tree.DefaultLeft(nid);
|
||||
|
||||
auto pred = [&](auto ridx) {
|
||||
bool go_left = default_left;
|
||||
|
||||
@ -7,7 +7,6 @@
|
||||
#include <utility>
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../collective/communicator-inl.h"
|
||||
#include "../data/adapter.h"
|
||||
#include "categorical.h"
|
||||
#include "hist_util.h"
|
||||
@ -143,6 +142,7 @@ struct QuantileAllreduce {
|
||||
|
||||
template <typename WQSketch>
|
||||
void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
MetaInfo const& info,
|
||||
std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
||||
std::vector<size_t> *p_worker_segments, std::vector<bst_row_t> *p_sketches_scan,
|
||||
std::vector<typename WQSketch::Entry> *p_global_sketches) {
|
||||
@ -168,7 +168,7 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
std::partial_sum(sketch_size.cbegin(), sketch_size.cend(), sketches_scan.begin() + beg_scan + 1);
|
||||
|
||||
// Gather all column pointers
|
||||
collective::Allreduce<collective::Operation::kSum>(sketches_scan.data(), sketches_scan.size());
|
||||
collective::GlobalSum(info, sketches_scan.data(), sketches_scan.size());
|
||||
for (int32_t i = 0; i < world; ++i) {
|
||||
size_t back = (i + 1) * (n_columns + 1) - 1;
|
||||
auto n_entries = sketches_scan.at(back);
|
||||
@ -196,7 +196,8 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
|
||||
static_assert(sizeof(typename WQSketch::Entry) / 4 == sizeof(float),
|
||||
"Unexpected size of sketch entry.");
|
||||
collective::Allreduce<collective::Operation::kSum>(
|
||||
collective::GlobalSum(
|
||||
info,
|
||||
reinterpret_cast<float *>(global_sketches.data()),
|
||||
global_sketches.size() * sizeof(typename WQSketch::Entry) / sizeof(float));
|
||||
}
|
||||
@ -222,8 +223,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
|
||||
std::vector<size_t> global_feat_ptrs(feature_ptr.size() * world_size, 0);
|
||||
size_t feat_begin = rank * feature_ptr.size(); // pointer to current worker
|
||||
std::copy(feature_ptr.begin(), feature_ptr.end(), global_feat_ptrs.begin() + feat_begin);
|
||||
collective::Allreduce<collective::Operation::kSum>(global_feat_ptrs.data(),
|
||||
global_feat_ptrs.size());
|
||||
collective::GlobalSum(info, global_feat_ptrs.data(), global_feat_ptrs.size());
|
||||
|
||||
// move all categories into a flatten vector to prepare for allreduce
|
||||
size_t total = feature_ptr.back();
|
||||
@ -236,8 +236,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
|
||||
// indptr for indexing workers
|
||||
std::vector<size_t> global_worker_ptr(world_size + 1, 0);
|
||||
global_worker_ptr[rank + 1] = total; // shift 1 to right for constructing the indptr
|
||||
collective::Allreduce<collective::Operation::kSum>(global_worker_ptr.data(),
|
||||
global_worker_ptr.size());
|
||||
collective::GlobalSum(info, global_worker_ptr.data(), global_worker_ptr.size());
|
||||
std::partial_sum(global_worker_ptr.cbegin(), global_worker_ptr.cend(), global_worker_ptr.begin());
|
||||
// total number of categories in all workers with all features
|
||||
auto gtotal = global_worker_ptr.back();
|
||||
@ -249,8 +248,7 @@ void SketchContainerImpl<WQSketch>::AllreduceCategories(MetaInfo const& info) {
|
||||
CHECK_EQ(rank_size, total);
|
||||
std::copy(flatten.cbegin(), flatten.cend(), global_categories.begin() + rank_begin);
|
||||
// gather values from all workers.
|
||||
collective::Allreduce<collective::Operation::kSum>(global_categories.data(),
|
||||
global_categories.size());
|
||||
collective::GlobalSum(info, global_categories.data(), global_categories.size());
|
||||
QuantileAllreduce<float> allreduce_result{global_categories, global_worker_ptr, global_feat_ptrs,
|
||||
categories_.size()};
|
||||
ParallelFor(categories_.size(), n_threads_, [&](auto fidx) {
|
||||
@ -323,7 +321,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
std::vector<bst_row_t> sketches_scan((n_columns + 1) * world, 0);
|
||||
|
||||
std::vector<typename WQSketch::Entry> global_sketches;
|
||||
this->GatherSketchInfo(reduced, &worker_segments, &sketches_scan, &global_sketches);
|
||||
this->GatherSketchInfo(info, reduced, &worker_segments, &sketches_scan, &global_sketches);
|
||||
|
||||
std::vector<typename WQSketch::SummaryContainer> final_sketches(n_columns);
|
||||
|
||||
@ -371,7 +369,9 @@ auto AddCategories(std::set<float> const &categories, HistogramCuts *cuts) {
|
||||
InvalidCategory();
|
||||
}
|
||||
auto &cut_values = cuts->cut_values_.HostVector();
|
||||
auto max_cat = *std::max_element(categories.cbegin(), categories.cend());
|
||||
// With column-wise data split, the categories may be empty.
|
||||
auto max_cat =
|
||||
categories.empty() ? 0.0f : *std::max_element(categories.cbegin(), categories.cend());
|
||||
CheckMaxCat(max_cat, categories.size());
|
||||
for (bst_cat_t i = 0; i <= AsCat(max_cat); ++i) {
|
||||
cut_values.push_back(i);
|
||||
|
||||
@ -822,7 +822,8 @@ class SketchContainerImpl {
|
||||
return group_ind;
|
||||
}
|
||||
// Gather sketches from all workers.
|
||||
void GatherSketchInfo(std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
||||
void GatherSketchInfo(MetaInfo const& info,
|
||||
std::vector<typename WQSketch::SummaryContainer> const &reduced,
|
||||
std::vector<bst_row_t> *p_worker_segments,
|
||||
std::vector<bst_row_t> *p_sketches_scan,
|
||||
std::vector<typename WQSketch::Entry> *p_global_sketches);
|
||||
|
||||
@ -26,6 +26,12 @@
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/span.h"
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
#include "cuda_fp16.h"
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
#include <hip/hip_fp16.h>
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
// Common errors in parsing columnar format.
|
||||
struct ArrayInterfaceErrors {
|
||||
@ -304,12 +310,12 @@ class ArrayInterfaceHandler {
|
||||
template <typename T, typename E = void>
|
||||
struct ToDType;
|
||||
// float
|
||||
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
template <>
|
||||
struct ToDType<__half> {
|
||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF2;
|
||||
};
|
||||
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
template <>
|
||||
struct ToDType<float> {
|
||||
static constexpr ArrayInterfaceHandler::Type kType = ArrayInterfaceHandler::kF4;
|
||||
@ -459,11 +465,11 @@ class ArrayInterface {
|
||||
CHECK(sizeof(long double) == 16) << error::NoF128();
|
||||
type = T::kF16;
|
||||
} else if (typestr[1] == 'f' && typestr[2] == '2') {
|
||||
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
type = T::kF2;
|
||||
#else
|
||||
LOG(FATAL) << "Half type is not supported.";
|
||||
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
} else if (typestr[1] == 'f' && typestr[2] == '4') {
|
||||
type = T::kF4;
|
||||
} else if (typestr[1] == 'f' && typestr[2] == '8') {
|
||||
@ -490,20 +496,17 @@ class ArrayInterface {
|
||||
}
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE size_t Shape(size_t i) const { return shape[i]; }
|
||||
XGBOOST_DEVICE size_t Stride(size_t i) const { return strides[i]; }
|
||||
[[nodiscard]] XGBOOST_DEVICE std::size_t Shape(size_t i) const { return shape[i]; }
|
||||
[[nodiscard]] XGBOOST_DEVICE std::size_t Stride(size_t i) const { return strides[i]; }
|
||||
|
||||
template <typename Fn>
|
||||
XGBOOST_HOST_DEV_INLINE decltype(auto) DispatchCall(Fn func) const {
|
||||
using T = ArrayInterfaceHandler::Type;
|
||||
switch (type) {
|
||||
case T::kF2: {
|
||||
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
return func(reinterpret_cast<__half const *>(data));
|
||||
#else
|
||||
SPAN_CHECK(false);
|
||||
return func(reinterpret_cast<float const *>(data));
|
||||
#endif // (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#endif // defined(XGBOOST_USE_CUDA) || || defined(__HIP_PLATFORM_AMD__)
|
||||
}
|
||||
case T::kF4:
|
||||
return func(reinterpret_cast<float const *>(data));
|
||||
@ -540,23 +543,23 @@ class ArrayInterface {
|
||||
return func(reinterpret_cast<uint64_t const *>(data));
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE std::size_t ElementSize() const {
|
||||
[[nodiscard]] XGBOOST_DEVICE std::size_t ElementSize() const {
|
||||
return this->DispatchCall([](auto *typed_data_ptr) {
|
||||
return sizeof(std::remove_pointer_t<decltype(typed_data_ptr)>);
|
||||
});
|
||||
}
|
||||
XGBOOST_DEVICE std::size_t ElementAlignment() const {
|
||||
[[nodiscard]] XGBOOST_DEVICE std::size_t ElementAlignment() const {
|
||||
return this->DispatchCall([](auto *typed_data_ptr) {
|
||||
return std::alignment_of<std::remove_pointer_t<decltype(typed_data_ptr)>>::value;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T = float, typename... Index>
|
||||
XGBOOST_DEVICE T operator()(Index &&...index) const {
|
||||
XGBOOST_HOST_DEV_INLINE T operator()(Index &&...index) const {
|
||||
static_assert(sizeof...(index) <= D, "Invalid index.");
|
||||
return this->DispatchCall([=](auto const *p_values) -> T {
|
||||
std::size_t offset = linalg::detail::Offset<0ul>(strides, 0ul, index...);
|
||||
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
// No operator defined for half -> size_t
|
||||
using Type = std::conditional_t<
|
||||
std::is_same<__half,
|
||||
@ -566,7 +569,7 @@ class ArrayInterface {
|
||||
return static_cast<T>(static_cast<Type>(p_values[offset]));
|
||||
#else
|
||||
return static_cast<T>(p_values[offset]);
|
||||
#endif
|
||||
#endif // defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
});
|
||||
}
|
||||
|
||||
@ -603,7 +606,7 @@ void DispatchDType(ArrayInterface<D> const array, std::int32_t device, Fn fn) {
|
||||
};
|
||||
switch (array.type) {
|
||||
case ArrayInterfaceHandler::kF2: {
|
||||
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600) || defined(__HIP_PLATFORM_AMD__)
|
||||
#if defined(XGBOOST_USE_CUDA) || defined(__HIP_PLATFORM_AMD__)
|
||||
dispatch(__half{});
|
||||
#endif
|
||||
break;
|
||||
|
||||
@ -698,6 +698,9 @@ void MetaInfo::Extend(MetaInfo const& that, bool accumulate_rows, bool check_col
|
||||
this->feature_type_names = that.feature_type_names;
|
||||
auto &h_feature_types = feature_types.HostVector();
|
||||
LoadFeatureType(this->feature_type_names, &h_feature_types);
|
||||
} else if (!that.feature_types.Empty()) {
|
||||
this->feature_types.Resize(that.feature_types.Size());
|
||||
this->feature_types.Copy(that.feature_types);
|
||||
}
|
||||
if (!that.feature_weights.Empty()) {
|
||||
this->feature_weights.Resize(that.feature_weights.Size());
|
||||
|
||||
@ -29,7 +29,7 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
|
||||
: columns_(columns),
|
||||
num_rows_(num_rows) {}
|
||||
size_t Size() const { return num_rows_ * columns_.size(); }
|
||||
__device__ COOTuple GetElement(size_t idx) const {
|
||||
__device__ __forceinline__ COOTuple GetElement(size_t idx) const {
|
||||
size_t column_idx = idx % columns_.size();
|
||||
size_t row_idx = idx / columns_.size();
|
||||
auto const& column = columns_[column_idx];
|
||||
@ -39,6 +39,14 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
|
||||
return {row_idx, column_idx, value};
|
||||
}
|
||||
|
||||
__device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
|
||||
auto const& column = columns_[fidx];
|
||||
float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
|
||||
? column(ridx)
|
||||
: std::numeric_limits<float>::quiet_NaN();
|
||||
return value;
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; }
|
||||
XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); }
|
||||
|
||||
@ -166,6 +174,10 @@ class CupyAdapterBatch : public detail::NoMetaInfo {
|
||||
float value = array_interface_(row_idx, column_idx);
|
||||
return {row_idx, column_idx, value};
|
||||
}
|
||||
__device__ float GetElement(bst_row_t ridx, bst_feature_t fidx) const {
|
||||
float value = array_interface_(ridx, fidx);
|
||||
return value;
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.Shape(0); }
|
||||
XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.Shape(1); }
|
||||
@ -202,40 +214,64 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
|
||||
|
||||
// Returns maximum row length
|
||||
template <typename AdapterBatchT>
|
||||
size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
|
||||
int device_idx, float missing) {
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(device_idx));
|
||||
#elif defined(XGBOOST_USE_CUDA)
|
||||
std::size_t GetRowCounts(const AdapterBatchT batch, common::Span<bst_row_t> offset, int device_idx,
|
||||
float missing) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(device_idx));
|
||||
#endif
|
||||
|
||||
IsValidFunctor is_valid(missing);
|
||||
// Count elements per row
|
||||
dh::LaunchN(batch.Size(), [=] __device__(size_t idx) {
|
||||
auto element = batch.GetElement(idx);
|
||||
if (is_valid(element)) {
|
||||
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
|
||||
&offset[element.row_idx]),
|
||||
static_cast<unsigned long long>(1)); // NOLINT
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaMemsetAsync(offset.data(), '\0', offset.size_bytes()));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipMemsetAsync(offset.data(), '\0', offset.size_bytes()));
|
||||
#endif
|
||||
|
||||
auto n_samples = batch.NumRows();
|
||||
bst_feature_t n_features = batch.NumCols();
|
||||
|
||||
// Use more than 1 threads for each row in case of dataset being too wide.
|
||||
bst_feature_t stride{0};
|
||||
if (n_features < 32) {
|
||||
stride = std::min(n_features, 4u);
|
||||
} else if (n_features < 64) {
|
||||
stride = 8;
|
||||
} else if (n_features < 128) {
|
||||
stride = 16;
|
||||
} else {
|
||||
stride = 32;
|
||||
}
|
||||
|
||||
// Count elements per row
|
||||
dh::LaunchN(n_samples * stride, [=] __device__(std::size_t idx) {
|
||||
bst_row_t cnt{0};
|
||||
auto [ridx, fbeg] = linalg::UnravelIndex(idx, n_samples, stride);
|
||||
SPAN_CHECK(ridx < n_samples);
|
||||
for (bst_feature_t fidx = fbeg; fidx < n_features; fidx += stride) {
|
||||
if (is_valid(batch.GetElement(ridx, fidx))) {
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
|
||||
&offset[ridx]),
|
||||
static_cast<unsigned long long>(cnt)); // NOLINT
|
||||
});
|
||||
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
size_t row_stride =
|
||||
dh::Reduce(thrust::hip::par(alloc), thrust::device_pointer_cast(offset.data()),
|
||||
thrust::device_pointer_cast(offset.data()) + offset.size(),
|
||||
static_cast<std::size_t>(0), thrust::maximum<size_t>());
|
||||
#elif defined(XGBOOST_USE_CUDA)
|
||||
size_t row_stride =
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
bst_row_t row_stride =
|
||||
dh::Reduce(thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
|
||||
thrust::device_pointer_cast(offset.data()) + offset.size(),
|
||||
static_cast<std::size_t>(0), thrust::maximum<size_t>());
|
||||
static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
bst_row_t row_stride =
|
||||
dh::Reduce(thrust::hip::par(alloc), thrust::device_pointer_cast(offset.data()),
|
||||
thrust::device_pointer_cast(offset.data()) + offset.size(),
|
||||
static_cast<bst_row_t>(0), thrust::maximum<bst_row_t>());
|
||||
#endif
|
||||
|
||||
return row_stride;
|
||||
}
|
||||
|
||||
@ -243,13 +279,29 @@ size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
|
||||
* \brief Check there's no inf in data.
|
||||
*/
|
||||
template <typename AdapterBatchT>
|
||||
bool HasInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
|
||||
bool NoInfInData(AdapterBatchT const& batch, IsValidFunctor is_valid) {
|
||||
auto counting = thrust::make_counting_iterator(0llu);
|
||||
auto value_iter = dh::MakeTransformIterator<float>(
|
||||
counting, [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; });
|
||||
auto valid =
|
||||
thrust::none_of(value_iter, value_iter + batch.Size(),
|
||||
[is_valid] XGBOOST_DEVICE(float v) { return is_valid(v) && std::isinf(v); });
|
||||
auto value_iter = dh::MakeTransformIterator<bool>(counting, [=] XGBOOST_DEVICE(std::size_t idx) {
|
||||
auto v = batch.GetElement(idx).value;
|
||||
if (!is_valid(v)) {
|
||||
// discard the invalid elements.
|
||||
return true;
|
||||
}
|
||||
// check that there's no inf in data.
|
||||
return !std::isinf(v);
|
||||
});
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
// The default implementation in thrust optimizes any_of/none_of/all_of by using small
|
||||
// intervals to early stop. But we expect all data to be valid here, using small
|
||||
// intervals only decreases performance due to excessive kernel launch and stream
|
||||
// synchronization.
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
auto valid = dh::Reduce(thrust::cuda::par(alloc), value_iter, value_iter + batch.Size(), true,
|
||||
thrust::logical_and<>{});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
auto valid = dh::Reduce(thrust::hip::par(alloc), value_iter, value_iter + batch.Size(), true,
|
||||
thrust::logical_and<>{});
|
||||
#endif
|
||||
return valid;
|
||||
}
|
||||
}; // namespace data
|
||||
|
||||
@ -213,7 +213,7 @@ void CopyDataToEllpack(const AdapterBatchT& batch, common::Span<FeatureType cons
|
||||
// correct output position
|
||||
auto counting = thrust::make_counting_iterator(0llu);
|
||||
data::IsValidFunctor is_valid(missing);
|
||||
bool valid = data::HasInfInData(batch, is_valid);
|
||||
bool valid = data::NoInfInData(batch, is_valid);
|
||||
CHECK(valid) << error::InfInData();
|
||||
|
||||
auto key_iter = dh::MakeTransformIterator<size_t>(
|
||||
|
||||
@ -92,7 +92,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
}
|
||||
auto batch_rows = num_rows();
|
||||
accumulated_rows += batch_rows;
|
||||
dh::caching_device_vector<size_t> row_counts(batch_rows + 1, 0);
|
||||
dh::device_vector<size_t> row_counts(batch_rows + 1, 0);
|
||||
common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
|
||||
row_stride = std::max(row_stride, Dispatch(proxy, [=](auto const& value) {
|
||||
return GetRowCounts(value, row_counts_span, get_device(), missing);
|
||||
@ -163,7 +163,7 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
|
||||
#endif
|
||||
|
||||
auto rows = num_rows();
|
||||
dh::caching_device_vector<size_t> row_counts(rows + 1, 0);
|
||||
dh::device_vector<size_t> row_counts(rows + 1, 0);
|
||||
common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
|
||||
Dispatch(proxy, [=](auto const& value) {
|
||||
return GetRowCounts(value, row_counts_span, get_device(), missing);
|
||||
|
||||
@ -92,7 +92,7 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset,
|
||||
template <typename AdapterBatchT>
|
||||
size_t CopyToSparsePage(AdapterBatchT const& batch, int32_t device, float missing,
|
||||
SparsePage* page) {
|
||||
bool valid = HasInfInData(batch, IsValidFunctor{missing});
|
||||
bool valid = NoInfInData(batch, IsValidFunctor{missing});
|
||||
CHECK(valid) << error::InfInData();
|
||||
|
||||
page->offset.SetDevice(device);
|
||||
|
||||
@ -67,7 +67,7 @@ class ColumnSplitHelper {
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin);
|
||||
partition_builder_->AllocateForTask(task_id);
|
||||
partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
|
||||
partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, *p_tree,
|
||||
(*row_set_collection_)[nid].begin, decision_bits_,
|
||||
missing_bits_);
|
||||
});
|
||||
|
||||
@ -25,7 +25,6 @@
|
||||
#include "xgboost/linalg.h" // for Constants, Vector
|
||||
|
||||
namespace xgboost::tree {
|
||||
template <typename ExpandEntry>
|
||||
class HistEvaluator {
|
||||
private:
|
||||
struct NodeEntry {
|
||||
@ -285,10 +284,42 @@ class HistEvaluator {
|
||||
return left_sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gather the expand entries from all the workers.
|
||||
* @param entries Local expand entries on this worker.
|
||||
* @return Global expand entries gathered from all workers.
|
||||
*/
|
||||
std::vector<CPUExpandEntry> Allgather(std::vector<CPUExpandEntry> const &entries) {
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
auto const num_entries = entries.size();
|
||||
|
||||
// First, gather all the primitive fields.
|
||||
std::vector<CPUExpandEntry> all_entries(num_entries * world);
|
||||
std::vector<uint32_t> cat_bits;
|
||||
std::vector<std::size_t> cat_bits_sizes;
|
||||
for (std::size_t i = 0; i < num_entries; i++) {
|
||||
all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes);
|
||||
}
|
||||
collective::Allgather(all_entries.data(), all_entries.size() * sizeof(CPUExpandEntry));
|
||||
|
||||
// Gather all the cat_bits.
|
||||
auto gathered = collective::AllgatherV(cat_bits, cat_bits_sizes);
|
||||
|
||||
common::ParallelFor(num_entries * world, ctx_->Threads(), [&] (auto i) {
|
||||
// Copy the cat_bits back into all expand entries.
|
||||
all_entries[i].split.cat_bits.resize(gathered.sizes[i]);
|
||||
std::copy_n(gathered.result.cbegin() + gathered.offsets[i], gathered.sizes[i],
|
||||
all_entries[i].split.cat_bits.begin());
|
||||
});
|
||||
|
||||
return all_entries;
|
||||
}
|
||||
|
||||
public:
|
||||
void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut,
|
||||
common::Span<FeatureType const> feature_types, const RegTree &tree,
|
||||
std::vector<ExpandEntry> *p_entries) {
|
||||
std::vector<CPUExpandEntry> *p_entries) {
|
||||
auto n_threads = ctx_->Threads();
|
||||
auto& entries = *p_entries;
|
||||
// All nodes are on the same level, so we can store the shared ptr.
|
||||
@ -306,7 +337,7 @@ class HistEvaluator {
|
||||
return features[nidx_in_set]->Size();
|
||||
}, grain_size);
|
||||
|
||||
std::vector<ExpandEntry> tloc_candidates(n_threads * entries.size());
|
||||
std::vector<CPUExpandEntry> tloc_candidates(n_threads * entries.size());
|
||||
for (size_t i = 0; i < entries.size(); ++i) {
|
||||
for (decltype(n_threads) j = 0; j < n_threads; ++j) {
|
||||
tloc_candidates[i * n_threads + j] = entries[i];
|
||||
@ -365,22 +396,18 @@ class HistEvaluator {
|
||||
if (is_col_split_) {
|
||||
// With column-wise data split, we gather the best splits from all the workers and update the
|
||||
// expand entries accordingly.
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
auto const num_entries = entries.size();
|
||||
std::vector<ExpandEntry> buffer{num_entries * world};
|
||||
std::copy_n(entries.cbegin(), num_entries, buffer.begin() + num_entries * rank);
|
||||
collective::Allgather(buffer.data(), buffer.size() * sizeof(ExpandEntry));
|
||||
for (auto worker = 0; worker < world; ++worker) {
|
||||
auto all_entries = Allgather(entries);
|
||||
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
|
||||
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
|
||||
entries[nidx_in_set].split.Update(buffer[worker * num_entries + nidx_in_set].split);
|
||||
entries[nidx_in_set].split.Update(
|
||||
all_entries[worker * entries.size() + nidx_in_set].split);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add splits to tree, handles all statistic
|
||||
void ApplyTreeSplit(ExpandEntry const& candidate, RegTree *p_tree) {
|
||||
void ApplyTreeSplit(CPUExpandEntry const& candidate, RegTree *p_tree) {
|
||||
auto evaluator = tree_evaluator_.GetEvaluator();
|
||||
RegTree &tree = *p_tree;
|
||||
|
||||
@ -465,6 +492,7 @@ class HistMultiEvaluator {
|
||||
FeatureInteractionConstraintHost interaction_constraints_;
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_;
|
||||
Context const *ctx_;
|
||||
bool is_col_split_{false};
|
||||
|
||||
private:
|
||||
static double MultiCalcSplitGain(TrainParam const ¶m,
|
||||
@ -543,6 +571,57 @@ class HistMultiEvaluator {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gather the expand entries from all the workers.
|
||||
* @param entries Local expand entries on this worker.
|
||||
* @return Global expand entries gathered from all workers.
|
||||
*/
|
||||
std::vector<MultiExpandEntry> Allgather(std::vector<MultiExpandEntry> const &entries) {
|
||||
auto const world = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
auto const num_entries = entries.size();
|
||||
|
||||
// First, gather all the primitive fields.
|
||||
std::vector<MultiExpandEntry> all_entries(num_entries * world);
|
||||
std::vector<uint32_t> cat_bits;
|
||||
std::vector<std::size_t> cat_bits_sizes;
|
||||
std::vector<GradientPairPrecise> gradients;
|
||||
for (std::size_t i = 0; i < num_entries; i++) {
|
||||
all_entries[num_entries * rank + i].CopyAndCollect(entries[i], &cat_bits, &cat_bits_sizes,
|
||||
&gradients);
|
||||
}
|
||||
collective::Allgather(all_entries.data(), all_entries.size() * sizeof(MultiExpandEntry));
|
||||
|
||||
// Gather all the cat_bits.
|
||||
auto gathered_cat_bits = collective::AllgatherV(cat_bits, cat_bits_sizes);
|
||||
|
||||
// Gather all the gradients.
|
||||
auto const num_gradients = gradients.size();
|
||||
std::vector<GradientPairPrecise> all_gradients(num_gradients * world);
|
||||
std::copy_n(gradients.cbegin(), num_gradients, all_gradients.begin() + num_gradients * rank);
|
||||
collective::Allgather(all_gradients.data(), all_gradients.size() * sizeof(GradientPairPrecise));
|
||||
|
||||
auto const total_entries = num_entries * world;
|
||||
auto const gradients_per_entry = num_gradients / num_entries;
|
||||
auto const gradients_per_side = gradients_per_entry / 2;
|
||||
common::ParallelFor(total_entries, ctx_->Threads(), [&] (auto i) {
|
||||
// Copy the cat_bits back into all expand entries.
|
||||
all_entries[i].split.cat_bits.resize(gathered_cat_bits.sizes[i]);
|
||||
std::copy_n(gathered_cat_bits.result.cbegin() + gathered_cat_bits.offsets[i],
|
||||
gathered_cat_bits.sizes[i], all_entries[i].split.cat_bits.begin());
|
||||
|
||||
// Copy the gradients back into all expand entries.
|
||||
all_entries[i].split.left_sum.resize(gradients_per_side);
|
||||
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry, gradients_per_side,
|
||||
all_entries[i].split.left_sum.begin());
|
||||
all_entries[i].split.right_sum.resize(gradients_per_side);
|
||||
std::copy_n(all_gradients.cbegin() + i * gradients_per_entry + gradients_per_side,
|
||||
gradients_per_side, all_entries[i].split.right_sum.begin());
|
||||
});
|
||||
|
||||
return all_entries;
|
||||
}
|
||||
|
||||
public:
|
||||
void EvaluateSplits(RegTree const &tree, common::Span<const common::HistCollection *> hist,
|
||||
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
|
||||
@ -597,6 +676,18 @@ class HistMultiEvaluator {
|
||||
entries[nidx_in_set].split.Update(tloc_candidates[n_threads * nidx_in_set + tidx].split);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_col_split_) {
|
||||
// With column-wise data split, we gather the best splits from all the workers and update the
|
||||
// expand entries accordingly.
|
||||
auto all_entries = Allgather(entries);
|
||||
for (auto worker = 0; worker < collective::GetWorldSize(); ++worker) {
|
||||
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
|
||||
entries[nidx_in_set].split.Update(
|
||||
all_entries[worker * entries.size() + nidx_in_set].split);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
linalg::Vector<float> InitRoot(linalg::VectorView<GradientPairPrecise const> root_sum) {
|
||||
@ -660,7 +751,10 @@ class HistMultiEvaluator {
|
||||
|
||||
explicit HistMultiEvaluator(Context const *ctx, MetaInfo const &info, TrainParam const *param,
|
||||
std::shared_ptr<common::ColumnSampler> sampler)
|
||||
: param_{param}, column_sampler_{std::move(sampler)}, ctx_{ctx} {
|
||||
: param_{param},
|
||||
column_sampler_{std::move(sampler)},
|
||||
ctx_{ctx},
|
||||
is_col_split_{info.IsColumnSplit()} {
|
||||
interaction_constraints_.Configure(*param, info.num_col_);
|
||||
column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(),
|
||||
param_->colsample_bynode, param_->colsample_bylevel,
|
||||
|
||||
@ -70,6 +70,22 @@ struct CPUExpandEntry : public ExpandEntryImpl<CPUExpandEntry> {
|
||||
os << "split:\n" << e.split << std::endl;
|
||||
return os;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copy primitive fields into this, and collect cat_bits into a vector.
|
||||
*
|
||||
* This is used for allgather.
|
||||
*
|
||||
* @param that The other entry to copy from
|
||||
* @param collected_cat_bits The vector to collect cat_bits
|
||||
* @param cat_bits_sizes The sizes of the collected cat_bits
|
||||
*/
|
||||
void CopyAndCollect(CPUExpandEntry const& that, std::vector<uint32_t>* collected_cat_bits,
|
||||
std::vector<std::size_t>* cat_bits_sizes) {
|
||||
nid = that.nid;
|
||||
depth = that.depth;
|
||||
split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes);
|
||||
}
|
||||
};
|
||||
|
||||
struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
|
||||
@ -119,6 +135,24 @@ struct MultiExpandEntry : public ExpandEntryImpl<MultiExpandEntry> {
|
||||
os << "]\n";
|
||||
return os;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copy primitive fields into this, and collect cat_bits and gradients into vectors.
|
||||
*
|
||||
* This is used for allgather.
|
||||
*
|
||||
* @param that The other entry to copy from
|
||||
* @param collected_cat_bits The vector to collect cat_bits
|
||||
* @param cat_bits_sizes The sizes of the collected cat_bits
|
||||
* @param collected_gradients The vector to collect gradients
|
||||
*/
|
||||
void CopyAndCollect(MultiExpandEntry const& that, std::vector<uint32_t>* collected_cat_bits,
|
||||
std::vector<std::size_t>* cat_bits_sizes,
|
||||
std::vector<GradientPairPrecise>* collected_gradients) {
|
||||
nid = that.nid;
|
||||
depth = that.depth;
|
||||
split.CopyAndCollect(that.split, collected_cat_bits, cat_bits_sizes, collected_gradients);
|
||||
}
|
||||
};
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TREE_HIST_EXPAND_ENTRY_H_
|
||||
|
||||
@ -419,6 +419,60 @@ struct SplitEntryContainer {
|
||||
<< "right_sum: " << s.right_sum << std::endl;
|
||||
return os;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copy primitive fields into this, and collect cat_bits into a vector.
|
||||
*
|
||||
* This is used for allgather.
|
||||
*
|
||||
* @param that The other entry to copy from
|
||||
* @param collected_cat_bits The vector to collect cat_bits
|
||||
* @param cat_bits_sizes The sizes of the collected cat_bits
|
||||
*/
|
||||
void CopyAndCollect(SplitEntryContainer<GradientT> const &that,
|
||||
std::vector<uint32_t> *collected_cat_bits,
|
||||
std::vector<std::size_t> *cat_bits_sizes) {
|
||||
loss_chg = that.loss_chg;
|
||||
sindex = that.sindex;
|
||||
split_value = that.split_value;
|
||||
is_cat = that.is_cat;
|
||||
static_assert(std::is_trivially_copyable_v<GradientT>);
|
||||
left_sum = that.left_sum;
|
||||
right_sum = that.right_sum;
|
||||
collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),
|
||||
that.cat_bits.cend());
|
||||
cat_bits_sizes->emplace_back(that.cat_bits.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copy primitive fields into this, and collect cat_bits and gradient sums into vectors.
|
||||
*
|
||||
* This is used for allgather.
|
||||
*
|
||||
* @param that The other entry to copy from
|
||||
* @param collected_cat_bits The vector to collect cat_bits
|
||||
* @param cat_bits_sizes The sizes of the collected cat_bits
|
||||
* @param collected_gradients The vector to collect gradients
|
||||
*/
|
||||
template <typename G>
|
||||
void CopyAndCollect(SplitEntryContainer<GradientT> const &that,
|
||||
std::vector<uint32_t> *collected_cat_bits,
|
||||
std::vector<std::size_t> *cat_bits_sizes,
|
||||
std::vector<G> *collected_gradients) {
|
||||
loss_chg = that.loss_chg;
|
||||
sindex = that.sindex;
|
||||
split_value = that.split_value;
|
||||
is_cat = that.is_cat;
|
||||
collected_cat_bits->insert(collected_cat_bits->end(), that.cat_bits.cbegin(),
|
||||
that.cat_bits.cend());
|
||||
cat_bits_sizes->emplace_back(that.cat_bits.size());
|
||||
static_assert(!std::is_trivially_copyable_v<GradientT>);
|
||||
collected_gradients->insert(collected_gradients->end(), that.left_sum.cbegin(),
|
||||
that.left_sum.cend());
|
||||
collected_gradients->insert(collected_gradients->end(), that.right_sum.cbegin(),
|
||||
that.right_sum.cend());
|
||||
}
|
||||
|
||||
/*!\return feature index to split on */
|
||||
[[nodiscard]] bst_feature_t SplitIndex() const { return sindex & ((1U << 31) - 1U); }
|
||||
/*!\return whether missing value goes to left branch */
|
||||
|
||||
@ -44,7 +44,7 @@ class GloablApproxBuilder {
|
||||
protected:
|
||||
TrainParam const *param_;
|
||||
std::shared_ptr<common::ColumnSampler> col_sampler_;
|
||||
HistEvaluator<CPUExpandEntry> evaluator_;
|
||||
HistEvaluator evaluator_;
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder_;
|
||||
Context const *ctx_;
|
||||
ObjInfo const *const task_;
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
#include <utility> // for move, swap
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../collective/aggregator.h" // for GlobalSum
|
||||
#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
|
||||
#include "../collective/communicator.h" // for Operation
|
||||
#include "../common/hist_util.h" // for HistogramCuts, HistCollection
|
||||
@ -200,8 +201,8 @@ class MultiTargetHistBuilder {
|
||||
}
|
||||
}
|
||||
CHECK(root_sum.CContiguous());
|
||||
collective::Allreduce<collective::Operation::kSum>(
|
||||
reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2);
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(root_sum.Values().data()),
|
||||
root_sum.Size() * 2);
|
||||
|
||||
std::vector<MultiExpandEntry> nodes{best};
|
||||
std::size_t i = 0;
|
||||
@ -335,7 +336,7 @@ class HistBuilder {
|
||||
common::Monitor *monitor_;
|
||||
TrainParam const *param_;
|
||||
std::shared_ptr<common::ColumnSampler> col_sampler_;
|
||||
std::unique_ptr<HistEvaluator<CPUExpandEntry>> evaluator_;
|
||||
std::unique_ptr<HistEvaluator> evaluator_;
|
||||
std::vector<CommonRowPartitioner> partitioner_;
|
||||
|
||||
// back pointers to tree and data matrix
|
||||
@ -354,7 +355,7 @@ class HistBuilder {
|
||||
: monitor_{monitor},
|
||||
param_{param},
|
||||
col_sampler_{std::move(column_sampler)},
|
||||
evaluator_{std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx, param, fmat->Info(),
|
||||
evaluator_{std::make_unique<HistEvaluator>(ctx, param, fmat->Info(),
|
||||
col_sampler_)},
|
||||
p_last_fmat_(fmat),
|
||||
histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
|
||||
@ -395,8 +396,7 @@ class HistBuilder {
|
||||
}
|
||||
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
|
||||
collective::IsDistributed(), fmat->Info().IsColumnSplit());
|
||||
evaluator_ = std::make_unique<HistEvaluator<CPUExpandEntry>>(ctx_, this->param_, fmat->Info(),
|
||||
col_sampler_);
|
||||
evaluator_ = std::make_unique<HistEvaluator>(ctx_, this->param_, fmat->Info(), col_sampler_);
|
||||
p_last_tree_ = p_tree;
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
@ -455,8 +455,7 @@ class HistBuilder {
|
||||
for (auto const &grad : gpair_h) {
|
||||
grad_stat.Add(grad.GetGrad(), grad.GetHess());
|
||||
}
|
||||
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&grad_stat),
|
||||
2);
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&grad_stat), 2);
|
||||
}
|
||||
|
||||
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
|
||||
|
||||
@ -20,7 +20,7 @@ namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_refresh);
|
||||
|
||||
/*! \brief pruner that prunes a tree after growing finishs */
|
||||
/*! \brief pruner that prunes a tree after growing finishes */
|
||||
class TreeRefresher : public TreeUpdater {
|
||||
public:
|
||||
explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {}
|
||||
|
||||
@ -4,11 +4,18 @@ set -euo pipefail
|
||||
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
echo "--- Build XGBoost JVM packages"
|
||||
echo "--- Build XGBoost JVM packages scala 2.12"
|
||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||
${SPARK_VERSION}
|
||||
|
||||
|
||||
echo "--- Build XGBoost JVM packages scala 2.13"
|
||||
|
||||
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
|
||||
${SPARK_VERSION} "" "" "true"
|
||||
|
||||
echo "--- Stash XGBoost4J JARs"
|
||||
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
|
||||
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
|
||||
buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
|
||||
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
|
||||
|
||||
@ -25,7 +25,7 @@ set -x
|
||||
CUDA_VERSION=11.8.0
|
||||
NCCL_VERSION=2.16.5-1
|
||||
RAPIDS_VERSION=23.02
|
||||
SPARK_VERSION=3.1.1
|
||||
SPARK_VERSION=3.4.0
|
||||
JDK_VERSION=8
|
||||
|
||||
if [[ -z ${BUILDKITE:-} ]]
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu22.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
|
||||
# Environment
|
||||
@ -7,22 +7,21 @@ ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget git build-essential python3 python3-pip software-properties-common \
|
||||
apt-get install -y wget git python3 python3-pip software-properties-common \
|
||||
apt-transport-https ca-certificates gnupg-agent && \
|
||||
wget -nv -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
|
||||
add-apt-repository -u 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main' && \
|
||||
apt-get update && \
|
||||
apt-get install -y llvm-15 clang-tidy-15 clang-15 libomp-15-dev && \
|
||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr
|
||||
apt-get install -y cmake
|
||||
|
||||
# Set default clang-tidy version
|
||||
RUN \
|
||||
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-15 100 && \
|
||||
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100
|
||||
|
||||
RUN \
|
||||
apt-get install libgtest-dev libgmock-dev -y
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
pip3 install pyyaml
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu18.04
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG RAPIDS_VERSION_ARG
|
||||
|
||||
@ -9,7 +9,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
|
||||
# Python
|
||||
@ -25,7 +25,7 @@ RUN \
|
||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||
dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
pyspark cloudpickle cuda-python && \
|
||||
pyspark>=3.4.0 cloudpickle cuda-python && \
|
||||
mamba clean --all && \
|
||||
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
|
||||
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG JDK_VERSION=8
|
||||
ARG SPARK_VERSION=3.0.0
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:openjdk-r/ppa && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
/opt/mambaforge/bin/pip install awscli && \
|
||||
# Maven
|
||||
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
|
||||
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
|
||||
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
|
||||
# Spark
|
||||
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
|
||||
tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
|
||||
ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
pip install numpy scipy pandas scikit-learn
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||
chmod +x /usr/local/bin/gosu && \
|
||||
gosu nobody true
|
||||
|
||||
# Set default JDK version
|
||||
RUN update-java-alternatives -v -s java-1.$JDK_VERSION.0-openjdk-amd64
|
||||
|
||||
# Default entry-point to use if running locally
|
||||
# It will preserve attributes of created files
|
||||
COPY entrypoint.sh /scripts/
|
||||
|
||||
WORKDIR /workspace
|
||||
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
||||
@ -20,10 +20,14 @@ RUN \
|
||||
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
|
||||
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
|
||||
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
|
||||
# Spark
|
||||
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
|
||||
tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
|
||||
ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
|
||||
# Spark with scala 2.12
|
||||
mkdir -p /opt/spark-scala-2.12 && \
|
||||
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \
|
||||
tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \
|
||||
# Spark with scala 2.13
|
||||
mkdir -p /opt/spark-scala-2.13 && \
|
||||
wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \
|
||||
tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ set -x
|
||||
spark_version=$1
|
||||
use_cuda=$2
|
||||
gpu_arch=$3
|
||||
use_scala213=$4
|
||||
|
||||
gpu_options=""
|
||||
if [ "x$use_cuda" == "x-Duse.cuda=ON" ]; then
|
||||
@ -22,7 +23,13 @@ export RABIT_MOCK=ON
|
||||
if [ "x$gpu_arch" != "x" ]; then
|
||||
export GPU_ARCH_FLAG=$gpu_arch
|
||||
fi
|
||||
mvn --no-transfer-progress package -Dspark.version=${spark_version} $gpu_options
|
||||
|
||||
mvn_profile_string=""
|
||||
if [ "x$use_scala213" != "x" ]; then
|
||||
export mvn_profile_string="-Pdefault,scala-2.13"
|
||||
fi
|
||||
|
||||
mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options
|
||||
|
||||
set +x
|
||||
set +e
|
||||
|
||||
@ -28,7 +28,7 @@ dependencies:
|
||||
- llvmlite
|
||||
- cffi
|
||||
- pyarrow
|
||||
- pyspark
|
||||
- pyspark>=3.4.0
|
||||
- cloudpickle
|
||||
- pip:
|
||||
- awscli
|
||||
|
||||
@ -38,8 +38,6 @@ dependencies:
|
||||
- protobuf
|
||||
- cloudpickle
|
||||
- modin
|
||||
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
|
||||
# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
|
||||
- pyspark>=3.3.1
|
||||
- pyspark>=3.4.0
|
||||
- pip:
|
||||
- datatable
|
||||
|
||||
@ -35,7 +35,7 @@ dependencies:
|
||||
- py-ubjson
|
||||
- cffi
|
||||
- pyarrow
|
||||
- pyspark
|
||||
- pyspark>=3.4.0
|
||||
- cloudpickle
|
||||
- pip:
|
||||
- sphinx_rtd_theme
|
||||
|
||||
@ -19,6 +19,4 @@ dependencies:
|
||||
- pytest
|
||||
- hypothesis
|
||||
- hatchling
|
||||
- pip:
|
||||
# TODO: Replace it with pyspark>=3.4 once 3.4 released.
|
||||
- https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz
|
||||
- pyspark>=3.4.0
|
||||
|
||||
@ -6,14 +6,24 @@ set -x
|
||||
# Initialize local Maven repository
|
||||
./tests/ci_build/initialize_maven.sh
|
||||
|
||||
# Get version number of XGBoost4J and other auxiliary information
|
||||
cd jvm-packages
|
||||
jvm_packages_dir=`pwd`
|
||||
# Get version number of XGBoost4J and other auxiliary information
|
||||
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
|
||||
maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout)
|
||||
maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout)
|
||||
spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout)
|
||||
scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout)
|
||||
scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout)
|
||||
|
||||
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
|
||||
cd $jvm_packages_dir/xgboost4j-tester
|
||||
python3 get_iris.py
|
||||
cd $jvm_packages_dir
|
||||
fi
|
||||
|
||||
# including maven profiles for different scala versions: 2.12 is the default at the moment.
|
||||
for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
|
||||
scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
|
||||
scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
|
||||
|
||||
# Install XGBoost4J JAR into local Maven repository
|
||||
mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
|
||||
@ -24,19 +34,28 @@ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/targe
|
||||
cd xgboost4j-tester
|
||||
# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
|
||||
python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
|
||||
# Run unit tests with XGBoost4J
|
||||
mvn --no-transfer-progress package
|
||||
|
||||
# Build package and unit tests with XGBoost4J
|
||||
mvn --no-transfer-progress clean package
|
||||
xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar"
|
||||
# Run integration tests with XGBoost4J
|
||||
java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar
|
||||
java -jar $xgboost4j_tester_jar
|
||||
|
||||
# Run integration tests with XGBoost4J-Spark
|
||||
if [ ! -z "$RUN_INTEGRATION_TEST" ]
|
||||
then
|
||||
python3 get_iris.py
|
||||
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv
|
||||
spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model
|
||||
if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
|
||||
# Changing directory so that we do not mix code and resulting files
|
||||
cd target
|
||||
if [[ "$scala_binary_version" == "2.12" ]]; then
|
||||
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
|
||||
/opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
|
||||
elif [[ "$scala_binary_version" == "2.13" ]]; then
|
||||
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
|
||||
/opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
|
||||
else
|
||||
echo "Unexpected scala version: $scala_version ($scala_binary_version)."
|
||||
fi
|
||||
fi
|
||||
cd $jvm_packages_dir
|
||||
done
|
||||
|
||||
set +x
|
||||
set +e
|
||||
|
||||
@ -41,7 +41,7 @@ class ClangTidy(object):
|
||||
def __init__(self, args):
|
||||
self.cpp_lint = args.cpp
|
||||
self.cuda_lint = args.cuda
|
||||
self.use_dmlc_gtest = args.use_dmlc_gtest
|
||||
self.use_dmlc_gtest: bool = args.use_dmlc_gtest
|
||||
self.cuda_archs = args.cuda_archs.copy() if args.cuda_archs else []
|
||||
|
||||
if args.tidy_version:
|
||||
@ -202,6 +202,7 @@ class ClangTidy(object):
|
||||
cdb_file = os.path.join(self.cdb_path, 'compile_commands.json')
|
||||
with open(cdb_file, 'r') as fd:
|
||||
self.compile_commands = json.load(fd)
|
||||
|
||||
tidy_file = os.path.join(self.root_path, '.clang-tidy')
|
||||
with open(tidy_file) as fd:
|
||||
self.clang_tidy = yaml.safe_load(fd)
|
||||
@ -276,16 +277,24 @@ right keywords?
|
||||
print('clang-tidy is working.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Run clang-tidy.')
|
||||
parser.add_argument('--cpp', type=int, default=1)
|
||||
parser.add_argument('--tidy-version', type=int, default=None,
|
||||
help='Specify the version of preferred clang-tidy.')
|
||||
parser.add_argument('--cuda', type=int, default=1)
|
||||
parser.add_argument('--use-dmlc-gtest', type=int, default=1,
|
||||
help='Whether to use gtest bundled in dmlc-core.')
|
||||
parser.add_argument('--cuda-archs', action='append',
|
||||
help='List of CUDA archs to build')
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run clang-tidy.")
|
||||
parser.add_argument("--cpp", type=int, default=1)
|
||||
parser.add_argument(
|
||||
"--tidy-version",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Specify the version of preferred clang-tidy.",
|
||||
)
|
||||
parser.add_argument("--cuda", type=int, default=1)
|
||||
parser.add_argument(
|
||||
"--use-dmlc-gtest",
|
||||
action="store_true",
|
||||
help="Whether to use gtest bundled in dmlc-core.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cuda-archs", action="append", help="List of CUDA archs to build"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
test_tidy(args)
|
||||
|
||||
@ -497,6 +497,77 @@ TEST(HistUtil, AdapterDeviceSketchBatches) {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
auto MakeData(Context const* ctx, std::size_t n_samples, bst_feature_t n_features) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipSetDevice(ctx->gpu_id));
|
||||
#endif
|
||||
auto n = n_samples * n_features;
|
||||
std::vector<float> x;
|
||||
x.resize(n);
|
||||
|
||||
std::iota(x.begin(), x.end(), 0);
|
||||
std::int32_t c{0};
|
||||
float missing = n_samples * n_features;
|
||||
for (std::size_t i = 0; i < x.size(); ++i) {
|
||||
if (i % 5 == 0) {
|
||||
x[i] = missing;
|
||||
c++;
|
||||
}
|
||||
}
|
||||
thrust::device_vector<float> d_x;
|
||||
d_x = x;
|
||||
|
||||
auto n_invalids = n / 10 * 2 + 1;
|
||||
auto is_valid = data::IsValidFunctor{missing};
|
||||
return std::tuple{x, d_x, n_invalids, is_valid};
|
||||
}
|
||||
|
||||
void TestGetColumnSize(std::size_t n_samples) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
bst_feature_t n_features = 12;
|
||||
[[maybe_unused]] auto [x, d_x, n_invalids, is_valid] = MakeData(&ctx, n_samples, n_features);
|
||||
|
||||
auto adapter = AdapterFromData(d_x, n_samples, n_features);
|
||||
auto batch = adapter.Value();
|
||||
|
||||
auto batch_iter = dh::MakeTransformIterator<data::COOTuple>(
|
||||
thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(std::size_t idx) { return batch.GetElement(idx); });
|
||||
|
||||
dh::caching_device_vector<std::size_t> column_sizes_scan;
|
||||
column_sizes_scan.resize(n_features + 1);
|
||||
std::vector<std::size_t> h_column_size(column_sizes_scan.size());
|
||||
std::vector<std::size_t> h_column_size_1(column_sizes_scan.size());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size.begin());
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), true, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, true>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
|
||||
detail::LaunchGetColumnSizeKernel<decltype(batch_iter), false, false>(
|
||||
ctx.gpu_id, IterSpan{batch_iter, batch.Size()}, is_valid, dh::ToSpan(column_sizes_scan));
|
||||
thrust::copy(column_sizes_scan.begin(), column_sizes_scan.end(), h_column_size_1.begin());
|
||||
ASSERT_EQ(h_column_size, h_column_size_1);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(HistUtil, GetColumnSize) {
|
||||
bst_row_t n_samples = 4096;
|
||||
TestGetColumnSize(n_samples);
|
||||
}
|
||||
|
||||
// Check sketching from adapter or DMatrix results in the same answer
|
||||
// Consistency here is useful for testing and user experience
|
||||
TEST(HistUtil, SketchingEquivalent) {
|
||||
|
||||
@ -56,7 +56,7 @@ void TestSketchUnique(float sparsity) {
|
||||
thrust::make_counting_iterator(0llu),
|
||||
[=] __device__(size_t idx) { return batch.GetElement(idx); });
|
||||
auto end = kCols * kRows;
|
||||
detail::GetColumnSizesScan(0, kCols, n_cuts, batch_iter, is_valid, 0, end,
|
||||
detail::GetColumnSizesScan(0, kCols, n_cuts, IterSpan{batch_iter, end}, is_valid,
|
||||
&cut_sizes_scan, &column_sizes_scan);
|
||||
auto const& cut_sizes = cut_sizes_scan.HostVector();
|
||||
ASSERT_LE(sketch.Data().size(), cut_sizes.back());
|
||||
|
||||
@ -1,15 +1,16 @@
|
||||
/*!
|
||||
* Copyright 2018 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2018-2023, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
|
||||
#include <xgboost/span.h>
|
||||
#include "test_span.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/span.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../../../src/common/transform_iterator.h" // for MakeIndexTransformIter
|
||||
|
||||
namespace xgboost::common {
|
||||
TEST(Span, TestStatus) {
|
||||
int status = 1;
|
||||
TestTestStatus {&status}();
|
||||
@ -526,5 +527,17 @@ TEST(SpanDeathTest, Empty) {
|
||||
Span<float> s{data.data(), static_cast<Span<float>::index_type>(0)};
|
||||
EXPECT_DEATH(s[0], ""); // not ok to use it.
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
TEST(IterSpan, Basic) {
|
||||
auto iter = common::MakeIndexTransformIter([](std::size_t i) { return i; });
|
||||
std::size_t n = 13;
|
||||
auto span = IterSpan{iter, n};
|
||||
ASSERT_EQ(span.size(), n);
|
||||
for (std::size_t i = 0; i < n; ++i) {
|
||||
ASSERT_EQ(span[i], i);
|
||||
}
|
||||
ASSERT_EQ(span.subspan(1).size(), n - 1);
|
||||
ASSERT_EQ(span.subspan(1)[0], 1);
|
||||
ASSERT_EQ(span.subspan(1, 2)[1], 2);
|
||||
}
|
||||
} // namespace xgboost::common
|
||||
|
||||
@ -62,3 +62,22 @@ void TestCudfAdapter()
|
||||
TEST(DeviceAdapter, CudfAdapter) {
|
||||
TestCudfAdapter();
|
||||
}
|
||||
|
||||
namespace xgboost::data {
|
||||
TEST(DeviceAdapter, GetRowCounts) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
for (bst_feature_t n_features : {1, 2, 4, 64, 128, 256}) {
|
||||
HostDeviceVector<float> storage;
|
||||
auto str_arr = RandomDataGenerator{8192, n_features, 0.0}
|
||||
.Device(ctx.gpu_id)
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto adapter = CupyAdapter{str_arr};
|
||||
HostDeviceVector<bst_row_t> offset(adapter.NumRows() + 1, 0);
|
||||
offset.SetDevice(ctx.gpu_id);
|
||||
auto rstride = GetRowCounts(adapter.Value(), offset.DeviceSpan(), ctx.gpu_id,
|
||||
std::numeric_limits<float>::quiet_NaN());
|
||||
ASSERT_EQ(rstride, n_features);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
#include "../../src/collective/communicator-inl.h"
|
||||
#include "../../src/common/common.h"
|
||||
#include "../../src/common/threading_utils.h"
|
||||
#include "../../src/data/array_interface.h"
|
||||
#include "filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "xgboost/linalg.h"
|
||||
@ -388,6 +389,23 @@ inline Context CreateEmptyGenericParam(int gpu_id) {
|
||||
return tparam;
|
||||
}
|
||||
|
||||
inline std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(
|
||||
std::size_t rows, bst_target_t n_targets = 1) {
|
||||
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows * n_targets);
|
||||
auto& h_gradients = p_gradients->HostVector();
|
||||
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
|
||||
for (std::size_t i = 0; i < rows * n_targets; ++i) {
|
||||
auto grad = dist(&gen);
|
||||
auto hess = dist(&gen);
|
||||
h_gradients[i] = GradientPair{grad, hess};
|
||||
}
|
||||
|
||||
return p_gradients;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Make a context that uses CUDA.
|
||||
*/
|
||||
@ -509,11 +527,7 @@ void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&
|
||||
xgboost::collective::Finalize();
|
||||
};
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel num_threads(world_size)
|
||||
{
|
||||
auto rank = omp_get_thread_num();
|
||||
run(rank);
|
||||
}
|
||||
common::ParallelFor(world_size, world_size, run);
|
||||
#else
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < world_size; rank++) {
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
#include "../../../src/collective/communicator-inl.h"
|
||||
#include "../../../src/common/threading_utils.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@ -75,11 +76,7 @@ void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_
|
||||
xgboost::collective::Finalize();
|
||||
};
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel num_threads(world_size)
|
||||
{
|
||||
auto rank = omp_get_thread_num();
|
||||
run(rank);
|
||||
}
|
||||
common::ParallelFor(world_size, world_size, run);
|
||||
#else
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < world_size; rank++) {
|
||||
|
||||
@ -15,9 +15,9 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
auto MakeModel(std::string objective, std::shared_ptr<DMatrix> dmat) {
|
||||
auto MakeModel(std::string tree_method, std::string objective, std::shared_ptr<DMatrix> dmat) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("tree_method", tree_method);
|
||||
learner->SetParam("objective", objective);
|
||||
if (objective.find("quantile") != std::string::npos) {
|
||||
learner->SetParam("quantile_alpha", "0.5");
|
||||
@ -35,7 +35,7 @@ auto MakeModel(std::string objective, std::shared_ptr<DMatrix> dmat) {
|
||||
}
|
||||
|
||||
void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json expected_model,
|
||||
std::string objective) {
|
||||
std::string tree_method, std::string objective) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::shared_ptr<DMatrix> dmat{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)};
|
||||
@ -61,7 +61,7 @@ void VerifyObjective(size_t rows, size_t cols, float expected_base_score, Json e
|
||||
}
|
||||
std::shared_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
auto model = MakeModel(objective, sliced);
|
||||
auto model = MakeModel(tree_method, objective, sliced);
|
||||
auto base_score = GetBaseScore(model);
|
||||
ASSERT_EQ(base_score, expected_base_score);
|
||||
ASSERT_EQ(model, expected_model);
|
||||
@ -76,7 +76,7 @@ class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
|
||||
void SetUp() override { server_ = std::make_unique<ServerForTest>(kWorldSize); }
|
||||
void TearDown() override { server_.reset(nullptr); }
|
||||
|
||||
void Run(std::string objective) {
|
||||
void Run(std::string tree_method, std::string objective) {
|
||||
static auto constexpr kRows{16};
|
||||
static auto constexpr kCols{16};
|
||||
|
||||
@ -99,17 +99,22 @@ class FederatedLearnerTest : public ::testing::TestWithParam<std::string> {
|
||||
}
|
||||
}
|
||||
|
||||
auto model = MakeModel(objective, dmat);
|
||||
auto model = MakeModel(tree_method, objective, dmat);
|
||||
auto score = GetBaseScore(model);
|
||||
|
||||
RunWithFederatedCommunicator(kWorldSize, server_->Address(), &VerifyObjective, kRows, kCols,
|
||||
score, model, objective);
|
||||
score, model, tree_method, objective);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(FederatedLearnerTest, Objective) {
|
||||
TEST_P(FederatedLearnerTest, Approx) {
|
||||
std::string objective = GetParam();
|
||||
this->Run(objective);
|
||||
this->Run("approx", objective);
|
||||
}
|
||||
|
||||
TEST_P(FederatedLearnerTest, Hist) {
|
||||
std::string objective = GetParam();
|
||||
this->Run("hist", objective);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FederatedLearnerObjective, FederatedLearnerTest,
|
||||
|
||||
@ -33,7 +33,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
|
||||
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
common::HistCollection hist;
|
||||
std::vector<GradientPair> row_gpairs = {
|
||||
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
|
||||
@ -167,7 +167,7 @@ TEST(HistEvaluator, Apply) {
|
||||
param.UpdateAllowUnknown(Args{{"min_child_weight", "0"}, {"reg_lambda", "0.0"}});
|
||||
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator_ = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
auto evaluator_ = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
|
||||
CPUExpandEntry entry{0, 0};
|
||||
entry.split.loss_chg = 10.0f;
|
||||
@ -195,7 +195,7 @@ TEST_F(TestPartitionBasedSplit, CPUHist) {
|
||||
// check the evaluator is returning the optimal split
|
||||
std::vector<FeatureType> ft{FeatureType::kCategorical};
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
HistEvaluator<CPUExpandEntry> evaluator{&ctx, ¶m_, info_, sampler};
|
||||
HistEvaluator evaluator{&ctx, ¶m_, info_, sampler};
|
||||
evaluator.InitRoot(GradStats{total_gpair_});
|
||||
RegTree tree;
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
@ -225,7 +225,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
RandomDataGenerator(kRows, kCols, 0).Seed(3).Type(ft).MaxCategory(n_cats).GenerateDMatrix();
|
||||
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m, dmat->Info(), sampler};
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
|
||||
@ -276,7 +276,7 @@ TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
info.num_col_ = 1;
|
||||
info.feature_types = {FeatureType::kCategorical};
|
||||
Context ctx;
|
||||
auto evaluator = HistEvaluator<CPUExpandEntry>{&ctx, ¶m_, info, sampler};
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m_, info, sampler};
|
||||
evaluator.InitRoot(GradStats{parent_sum_});
|
||||
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
|
||||
@ -79,7 +79,7 @@ TEST(CPUMonoConstraint, Basic) {
|
||||
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
HistEvaluator<CPUExpandEntry> evalutor{&ctx, ¶m, Xy->Info(), sampler};
|
||||
HistEvaluator evalutor{&ctx, ¶m, Xy->Info(), sampler};
|
||||
evalutor.InitRoot(GradStats{2.0, 2.0});
|
||||
|
||||
SplitEntry split;
|
||||
|
||||
@ -9,28 +9,20 @@
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost::tree {
|
||||
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols){
|
||||
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols,
|
||||
bool categorical = false) {
|
||||
if (categorical) {
|
||||
std::vector<FeatureType> ft(cols);
|
||||
for (size_t i = 0; i < ft.size(); ++i) {
|
||||
ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
|
||||
}
|
||||
return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
|
||||
} else {
|
||||
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
|
||||
}
|
||||
|
||||
std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(std::size_t rows) {
|
||||
auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows);
|
||||
auto& h_gradients = p_gradients->HostVector();
|
||||
|
||||
xgboost::SimpleLCG gen;
|
||||
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
|
||||
|
||||
for (std::size_t i = 0; i < rows; ++i) {
|
||||
auto grad = dist(&gen);
|
||||
auto hess = dist(&gen);
|
||||
h_gradients[i] = GradientPair{grad, hess};
|
||||
}
|
||||
|
||||
return p_gradients;
|
||||
}
|
||||
|
||||
TEST(GrowHistMaker, InteractionConstraint)
|
||||
{
|
||||
TEST(GrowHistMaker, InteractionConstraint) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
@ -74,8 +66,9 @@ TEST(GrowHistMaker, InteractionConstraint)
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_tree) {
|
||||
auto p_dmat = GenerateDMatrix(rows, cols);
|
||||
void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
|
||||
RegTree const& expected_tree) {
|
||||
auto p_dmat = GenerateDMatrix(rows, cols, categorical);
|
||||
auto p_gradients = GenerateGradients(rows);
|
||||
Context ctx;
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
@ -90,27 +83,21 @@ void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_t
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 10);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
|
||||
ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
|
||||
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
|
||||
|
||||
FeatureMap fmap;
|
||||
auto json = tree.DumpModel(fmap, false, "json");
|
||||
auto expected_json = expected_tree.DumpModel(fmap, false, "json");
|
||||
Json json{Object{}};
|
||||
tree.SaveModel(&json);
|
||||
Json expected_json{Object{}};
|
||||
expected_tree.SaveModel(&expected_json);
|
||||
ASSERT_EQ(json, expected_json);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GrowHistMaker, ColumnSplit) {
|
||||
void TestColumnSplit(bool categorical) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
|
||||
RegTree expected_tree{1u, kCols};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols);
|
||||
auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
|
||||
auto p_gradients = GenerateGradients(kRows);
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
|
||||
@ -121,6 +108,12 @@ TEST(GrowHistMaker, ColumnSplit) {
|
||||
}
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit, kRows, kCols, std::cref(expected_tree));
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, kRows, kCols, categorical,
|
||||
std::cref(expected_tree));
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(GrowHistMaker, ColumnSplitNumerical) { TestColumnSplit(false); }
|
||||
|
||||
TEST(GrowHistMaker, ColumnSplitCategorical) { TestColumnSplit(true); }
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@ -113,7 +113,6 @@ void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
bst_feature_t const split_ind = 0;
|
||||
common::ColumnMatrix column_indices;
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
{
|
||||
@ -194,11 +193,65 @@ void TestColumnSplitPartitioner(bst_target_t n_targets) {
|
||||
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner<ExpandEntry>, n_targets,
|
||||
n_samples, n_features, base_rowid, Xy, min_value, mid_value, mid_partitioner);
|
||||
n_samples, n_features, base_rowid, Xy, min_value, mid_value,
|
||||
mid_partitioner);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
|
||||
|
||||
namespace {
|
||||
void VerifyColumnSplit(bst_row_t rows, bst_feature_t cols, bst_target_t n_targets,
|
||||
RegTree const& expected_tree) {
|
||||
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
|
||||
auto p_gradients = GenerateGradients(rows, n_targets);
|
||||
Context ctx;
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
|
||||
std::unique_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
|
||||
RegTree tree{n_targets, cols};
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
Json json{Object{}};
|
||||
tree.SaveModel(&json);
|
||||
Json expected_json{Object{}};
|
||||
expected_tree.SaveModel(&expected_json);
|
||||
ASSERT_EQ(json, expected_json);
|
||||
}
|
||||
|
||||
void TestColumnSplit(bst_target_t n_targets) {
|
||||
auto constexpr kRows = 32;
|
||||
auto constexpr kCols = 16;
|
||||
|
||||
RegTree expected_tree{n_targets, kCols};
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
{
|
||||
auto Xy = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
|
||||
auto p_gradients = GenerateGradients(kRows, n_targets);
|
||||
Context ctx;
|
||||
std::unique_ptr<TreeUpdater> updater{
|
||||
TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
|
||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||
TrainParam param;
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), Xy.get(), position, {&expected_tree});
|
||||
}
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, kRows, kCols, n_targets,
|
||||
std::cref(expected_tree));
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); }
|
||||
|
||||
TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); }
|
||||
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user