sync up May15 2023
This commit is contained in:
commit
8cad8c693c
2
.github/workflows/python_tests.yml
vendored
2
.github/workflows/python_tests.yml
vendored
@ -66,7 +66,7 @@ jobs:
|
||||
cd python-package
|
||||
python --version
|
||||
python -m build --sdist
|
||||
pip install -v ./dist/xgboost-*.tar.gz
|
||||
pip install -v ./dist/xgboost-*.tar.gz --config-settings use_openmp=False
|
||||
cd ..
|
||||
python -c 'import xgboost'
|
||||
|
||||
|
||||
@ -43,9 +43,38 @@ In the admin CLI, run the following command:
|
||||
submit_job horizontal-xgboost
|
||||
```
|
||||
|
||||
Make a note of the job id:
|
||||
```console
|
||||
Submitted job: 28309e77-a7c5-45e6-b2bc-c2e3655122d8
|
||||
```
|
||||
|
||||
On both workers, you should see train and eval losses printed:
|
||||
```console
|
||||
[10:45:41] [0] eval-logloss:0.22646 train-logloss:0.23316
|
||||
[10:45:41] [1] eval-logloss:0.13776 train-logloss:0.13654
|
||||
[10:45:41] [2] eval-logloss:0.08036 train-logloss:0.08243
|
||||
[10:45:41] [3] eval-logloss:0.05830 train-logloss:0.05645
|
||||
[10:45:41] [4] eval-logloss:0.03825 train-logloss:0.04148
|
||||
[10:45:41] [5] eval-logloss:0.02660 train-logloss:0.02958
|
||||
[10:45:41] [6] eval-logloss:0.01386 train-logloss:0.01918
|
||||
[10:45:41] [7] eval-logloss:0.01018 train-logloss:0.01331
|
||||
[10:45:41] [8] eval-logloss:0.00847 train-logloss:0.01112
|
||||
[10:45:41] [9] eval-logloss:0.00691 train-logloss:0.00662
|
||||
[10:45:41] [10] eval-logloss:0.00543 train-logloss:0.00503
|
||||
[10:45:41] [11] eval-logloss:0.00445 train-logloss:0.00420
|
||||
[10:45:41] [12] eval-logloss:0.00336 train-logloss:0.00355
|
||||
[10:45:41] [13] eval-logloss:0.00277 train-logloss:0.00280
|
||||
[10:45:41] [14] eval-logloss:0.00252 train-logloss:0.00244
|
||||
[10:45:41] [15] eval-logloss:0.00177 train-logloss:0.00193
|
||||
[10:45:41] [16] eval-logloss:0.00156 train-logloss:0.00161
|
||||
[10:45:41] [17] eval-logloss:0.00135 train-logloss:0.00142
|
||||
[10:45:41] [18] eval-logloss:0.00123 train-logloss:0.00125
|
||||
[10:45:41] [19] eval-logloss:0.00106 train-logloss:0.00107
|
||||
```
|
||||
|
||||
Once the training finishes, the model file should be written into
|
||||
`/tmp/nvlfare/poc/site-1/run_1/test.model.json` and `/tmp/nvflare/poc/site-2/run_1/test.model.json`
|
||||
respectively.
|
||||
`/tmp/nvlfare/poc/site-1/${job_id}/test.model.json` and `/tmp/nvflare/poc/site-2/${job_id}/test.model.json`
|
||||
respectively, where `job_id` is the UUID printed out when we ran `submit_job`.
|
||||
|
||||
Finally, shutdown everything from the admin CLI, using `admin` as password:
|
||||
```shell
|
||||
|
||||
@ -63,8 +63,8 @@ class XGBoostTrainer(Executor):
|
||||
}
|
||||
with xgb.collective.CommunicatorContext(**communicator_env):
|
||||
# Load file, file will not be sharded in federated mode.
|
||||
dtrain = xgb.DMatrix('agaricus.txt.train')
|
||||
dtest = xgb.DMatrix('agaricus.txt.test')
|
||||
dtrain = xgb.DMatrix('agaricus.txt.train?format=libsvm')
|
||||
dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm')
|
||||
|
||||
# Specify parameters via map, definition are same as c++ version
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
set -e
|
||||
|
||||
rm -fr ./agaricus* ./*.pem ./poc
|
||||
rm -fr ./agaricus* ./*.pem /tmp/nvflare
|
||||
|
||||
world_size=2
|
||||
|
||||
@ -11,15 +11,15 @@ openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout server-key.pem -out se
|
||||
openssl req -x509 -newkey rsa:2048 -days 7 -nodes -keyout client-key.pem -out client-cert.pem -subj "/C=US/CN=localhost"
|
||||
|
||||
# Split train and test files manually to simulate a federated environment.
|
||||
split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.train agaricus.txt.train-site-
|
||||
split -n l/${world_size} --numeric-suffixes=1 -a 1 ../data/agaricus.txt.test agaricus.txt.test-site-
|
||||
split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.train agaricus.txt.train-site-
|
||||
split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.test agaricus.txt.test-site-
|
||||
|
||||
nvflare poc -n 2 --prepare
|
||||
mkdir -p /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
|
||||
cp -fr config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
|
||||
cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/
|
||||
for id in $(eval echo "{1..$world_size}"); do
|
||||
cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$id"/
|
||||
cp agaricus.txt.train-site-"$id" /tmp/nvflare/poc/site-"$id"/agaricus.txt.train
|
||||
cp agaricus.txt.test-site-"$id" /tmp/nvflare/poc/site-"$id"/agaricus.txt.test
|
||||
for (( site=1; site<=world_size; site++ )); do
|
||||
cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$site"/
|
||||
cp agaricus.txt.train-site-"$site" /tmp/nvflare/poc/site-"$site"/agaricus.txt.train
|
||||
cp agaricus.txt.test-site-"$site" /tmp/nvflare/poc/site-"$site"/agaricus.txt.test
|
||||
done
|
||||
|
||||
@ -143,7 +143,7 @@ extensions = [
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx_gallery.gen_gallery",
|
||||
"breathe",
|
||||
"recommonmark",
|
||||
"myst_parser",
|
||||
]
|
||||
|
||||
sphinx_gallery_conf = {
|
||||
|
||||
@ -1,14 +1,15 @@
|
||||
sphinx>=5.2.1
|
||||
sphinx
|
||||
mock
|
||||
sphinx_rtd_theme>=1.0.0
|
||||
breathe
|
||||
scikit-learn
|
||||
sh>=1.12.14
|
||||
matplotlib>=2.1
|
||||
sh
|
||||
matplotlib
|
||||
graphviz
|
||||
numpy
|
||||
recommonmark
|
||||
myst-parser
|
||||
xgboost_ray
|
||||
sphinx-gallery
|
||||
pyspark
|
||||
cloudpickle
|
||||
setuptools
|
||||
|
||||
@ -150,7 +150,7 @@ inline LINALG_HD int Popc(uint64_t v) {
|
||||
return __popcll(v);
|
||||
#elif defined(__GNUC__) || defined(__clang__) || defined(__HIP_PLATFORM_AMD__)
|
||||
return __builtin_popcountll(v);
|
||||
#elif defined(_MSC_VER) && _defined(_M_X64)
|
||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||
return __popcnt64(v);
|
||||
#else
|
||||
return NativePopc(v);
|
||||
|
||||
@ -129,7 +129,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-gpg-plugin</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<version>3.1.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>sign-artifacts</id>
|
||||
@ -427,7 +427,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
<version>3.1.0</version>
|
||||
<configuration>
|
||||
<skipTests>false</skipTests>
|
||||
<useSystemClassLoader>false</useSystemClassLoader>
|
||||
|
||||
@ -48,12 +48,6 @@ pom_template = """
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<version>1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.typesafe.akka</groupId>
|
||||
<artifactId>akka-actor_${{scala.binary.version}}</artifactId>
|
||||
<version>2.6.20</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.typesafe.akka</groupId>
|
||||
<artifactId>akka-testkit_${{scala.binary.version}}</artifactId>
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
[MASTER]
|
||||
|
||||
ignore=tests
|
||||
|
||||
extension-pkg-whitelist=numpy
|
||||
|
||||
disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance,import-outside-toplevel,unsubscriptable-object,attribute-defined-outside-init
|
||||
|
||||
dummy-variables-rgx=(unused|)_.*
|
||||
|
||||
reports=no
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Enforce naming convention
|
||||
const-naming-style=UPPER_CASE
|
||||
class-naming-style=PascalCase
|
||||
function-naming-style=snake_case
|
||||
method-naming-style=snake_case
|
||||
attr-naming-style=snake_case
|
||||
argument-naming-style=snake_case
|
||||
variable-naming-style=snake_case
|
||||
class-attribute-naming-style=snake_case
|
||||
|
||||
# Allow single-letter variables
|
||||
variable-rgx=[a-zA-Z_][a-z0-9_]{0,30}$
|
||||
@ -26,23 +26,18 @@ class BuildConfiguration: # pylint: disable=R0902
|
||||
# Special option: See explanation below
|
||||
use_system_libxgboost: bool = False
|
||||
|
||||
def _set_config_setting(
|
||||
self, config_settings: Dict[str, Any], field_name: str
|
||||
) -> None:
|
||||
if field_name in config_settings:
|
||||
def _set_config_setting(self, config_settings: Dict[str, Any]) -> None:
|
||||
for field_name in config_settings:
|
||||
setattr(
|
||||
self,
|
||||
field_name,
|
||||
(config_settings[field_name].lower() in ["true", "1", "on"]),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Field {field_name} is not a valid config_settings")
|
||||
|
||||
def update(self, config_settings: Optional[Dict[str, Any]]) -> None:
|
||||
"""Parse config_settings from Pip (or other PEP 517 frontend)"""
|
||||
if config_settings is not None:
|
||||
for field_name in [x.name for x in dataclasses.fields(self)]:
|
||||
self._set_config_setting(config_settings, field_name)
|
||||
self._set_config_setting(config_settings)
|
||||
|
||||
def get_cmake_args(self) -> List[str]:
|
||||
"""Convert build configuration to CMake args"""
|
||||
|
||||
@ -130,20 +130,21 @@ def locate_or_build_libxgboost(
|
||||
"""Locate libxgboost; if not exist, build it"""
|
||||
logger = logging.getLogger("xgboost.packager.locate_or_build_libxgboost")
|
||||
|
||||
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
|
||||
if libxgboost is not None:
|
||||
return libxgboost
|
||||
if build_config.use_system_libxgboost:
|
||||
# Find libxgboost from system prefix
|
||||
sys_prefix = pathlib.Path(sys.prefix).absolute().resolve()
|
||||
libxgboost = sys_prefix / "lib" / _lib_name()
|
||||
if not libxgboost.exists():
|
||||
libxgboost_sys = sys_prefix / "lib" / _lib_name()
|
||||
if not libxgboost_sys.exists():
|
||||
raise RuntimeError(
|
||||
f"use_system_libxgboost was specified but {_lib_name()} is "
|
||||
f"not found in {libxgboost.parent}"
|
||||
f"not found in {libxgboost_sys.parent}"
|
||||
)
|
||||
|
||||
logger.info("Using system XGBoost: %s", str(libxgboost))
|
||||
logger.info("Using system XGBoost: %s", str(libxgboost_sys))
|
||||
return libxgboost_sys
|
||||
|
||||
libxgboost = locate_local_libxgboost(toplevel_dir, logger=logger)
|
||||
if libxgboost is not None:
|
||||
return libxgboost
|
||||
|
||||
if toplevel_dir.joinpath("cpp_src").exists():
|
||||
|
||||
@ -79,7 +79,8 @@ def build_wheel(
|
||||
libxgboost = locate_or_build_libxgboost(
|
||||
TOPLEVEL_DIR, build_dir=build_dir, build_config=build_config
|
||||
)
|
||||
copy_with_logging(libxgboost, lib_path, logger=logger)
|
||||
if not build_config.use_system_libxgboost:
|
||||
copy_with_logging(libxgboost, lib_path, logger=logger)
|
||||
|
||||
with cd(workspace):
|
||||
wheel_name = hatchling.build.build_wheel(
|
||||
|
||||
@ -9,13 +9,13 @@ build-backend = "packager.pep517"
|
||||
name = "xgboost"
|
||||
version = "2.0.0-dev"
|
||||
authors = [
|
||||
{name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu"},
|
||||
{name = "Jiaming Yuan", email = "jm.yuan@outlook.com"}
|
||||
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
|
||||
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
|
||||
]
|
||||
description = "XGBoost Python Package"
|
||||
readme = {file = "README.rst", content-type = "text/x-rst"}
|
||||
readme = { file = "README.rst", content-type = "text/x-rst" }
|
||||
requires-python = ">=3.8"
|
||||
license = {text = "Apache-2.0"}
|
||||
license = { text = "Apache-2.0" }
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
@ -24,13 +24,18 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10"
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11"
|
||||
]
|
||||
dependencies = [
|
||||
"numpy",
|
||||
"scipy"
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
documentation = "https://xgboost.readthedocs.io/en/stable/"
|
||||
repository = "https://github.com/dmlc/xgboost"
|
||||
|
||||
[project.optional-dependencies]
|
||||
pandas = ["pandas"]
|
||||
scikit-learn = ["scikit-learn"]
|
||||
@ -40,3 +45,39 @@ plotting = ["graphviz", "matplotlib"]
|
||||
pyspark = ["pyspark", "scikit-learn", "cloudpickle"]
|
||||
|
||||
[tool.hatch.build.targets.wheel.hooks.custom]
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
[tool.mypy]
|
||||
ignore_missing_imports = true
|
||||
disallow_untyped_defs = true
|
||||
follow_imports = "silent"
|
||||
|
||||
[tool.pylint.main]
|
||||
ignore = ["tests"]
|
||||
extension-pkg-whitelist = ["numpy"]
|
||||
disable = [
|
||||
"attribute-defined-outside-init",
|
||||
"import-outside-toplevel",
|
||||
"too-many-nested-blocks",
|
||||
"unexpected-special-method-signature",
|
||||
"unsubscriptable-object",
|
||||
"useless-object-inheritance"
|
||||
]
|
||||
dummy-variables-rgx = "(unused|)_.*"
|
||||
reports = false
|
||||
|
||||
[tool.pylint.basic]
|
||||
# Enforce naming convention
|
||||
const-naming-style = "UPPER_CASE"
|
||||
class-naming-style = "PascalCase"
|
||||
function-naming-style = "snake_case"
|
||||
method-naming-style = "snake_case"
|
||||
attr-naming-style = "snake_case"
|
||||
argument-naming-style = "snake_case"
|
||||
variable-naming-style = "snake_case"
|
||||
class-attribute-naming-style = "snake_case"
|
||||
|
||||
# Allow single-letter variables
|
||||
variable-rgx = "[a-zA-Z_][a-z0-9_]{0,30}$"
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
[metadata]
|
||||
description_file = README.rst
|
||||
|
||||
[mypy]
|
||||
ignore_missing_imports = True
|
||||
disallow_untyped_defs = True
|
||||
follow_imports = silent
|
||||
@ -200,12 +200,6 @@ void AllreduceBase::SetParam(const char *name, const char *val) {
|
||||
if (!strcmp(name, "DMLC_WORKER_CONNECT_RETRY")) {
|
||||
connect_retry = atoi(val);
|
||||
}
|
||||
if (!strcmp(name, "rabit_bootstrap_cache")) {
|
||||
rabit_bootstrap_cache = utils::StringToBool(val);
|
||||
}
|
||||
if (!strcmp(name, "rabit_debug")) {
|
||||
rabit_debug = utils::StringToBool(val);
|
||||
}
|
||||
if (!strcmp(name, "rabit_timeout")) {
|
||||
rabit_timeout = utils::StringToBool(val);
|
||||
}
|
||||
|
||||
@ -487,10 +487,6 @@ class AllreduceBase : public IEngine {
|
||||
int world_size; // NOLINT
|
||||
// connect retry time
|
||||
int connect_retry; // NOLINT
|
||||
// enable bootstrap cache 0 false 1 true
|
||||
bool rabit_bootstrap_cache = false; // NOLINT
|
||||
// enable detailed logging
|
||||
bool rabit_debug = false; // NOLINT
|
||||
// by default, if rabit worker not recover in half an hour exit
|
||||
std::chrono::seconds timeout_sec{std::chrono::seconds{1800}}; // NOLINT
|
||||
// flag to enable rabit_timeout
|
||||
|
||||
@ -4,9 +4,6 @@
|
||||
* \brief The command line interface program of xgboost.
|
||||
* This file is not included in dynamic library.
|
||||
*/
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
|
||||
#if !defined(NOMINMAX) && defined(_WIN32)
|
||||
#define NOMINMAX
|
||||
#endif // !defined(NOMINMAX)
|
||||
|
||||
@ -222,15 +222,15 @@ void InMemoryHandler::Handle(char const* input, std::size_t bytes, std::string*
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
|
||||
LOG(INFO) << functor.name << " rank " << rank << ": waiting for current sequence number";
|
||||
LOG(DEBUG) << functor.name << " rank " << rank << ": waiting for current sequence number";
|
||||
cv_.wait(lock, [this, sequence_number] { return sequence_number_ == sequence_number; });
|
||||
|
||||
LOG(INFO) << functor.name << " rank " << rank << ": handling request";
|
||||
LOG(DEBUG) << functor.name << " rank " << rank << ": handling request";
|
||||
functor(input, bytes, &buffer_);
|
||||
received_++;
|
||||
|
||||
if (received_ == world_size_) {
|
||||
LOG(INFO) << functor.name << " rank " << rank << ": all requests received";
|
||||
LOG(DEBUG) << functor.name << " rank " << rank << ": all requests received";
|
||||
output->assign(buffer_);
|
||||
sent_++;
|
||||
lock.unlock();
|
||||
@ -238,15 +238,15 @@ void InMemoryHandler::Handle(char const* input, std::size_t bytes, std::string*
|
||||
return;
|
||||
}
|
||||
|
||||
LOG(INFO) << functor.name << " rank " << rank << ": waiting for all clients";
|
||||
LOG(DEBUG) << functor.name << " rank " << rank << ": waiting for all clients";
|
||||
cv_.wait(lock, [this] { return received_ == world_size_; });
|
||||
|
||||
LOG(INFO) << functor.name << " rank " << rank << ": sending reply";
|
||||
LOG(DEBUG) << functor.name << " rank " << rank << ": sending reply";
|
||||
output->assign(buffer_);
|
||||
sent_++;
|
||||
|
||||
if (sent_ == world_size_) {
|
||||
LOG(INFO) << functor.name << " rank " << rank << ": all replies sent";
|
||||
LOG(DEBUG) << functor.name << " rank " << rank << ": all replies sent";
|
||||
sent_ = 0;
|
||||
received_ = 0;
|
||||
buffer_.clear();
|
||||
|
||||
@ -1355,14 +1355,12 @@ class CUDAStream {
|
||||
cudaStream_t stream_;
|
||||
|
||||
public:
|
||||
CUDAStream() {
|
||||
dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
|
||||
}
|
||||
~CUDAStream() {
|
||||
dh::safe_cuda(cudaStreamDestroy(stream_));
|
||||
}
|
||||
CUDAStream() { dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); }
|
||||
~CUDAStream() { dh::safe_cuda(cudaStreamDestroy(stream_)); }
|
||||
|
||||
[[nodiscard]] CUDAStreamView View() const { return CUDAStreamView{stream_}; }
|
||||
[[nodiscard]] cudaStream_t Handle() const { return stream_; }
|
||||
|
||||
CUDAStreamView View() const { return CUDAStreamView{stream_}; }
|
||||
void Sync() { this->View().Sync(); }
|
||||
};
|
||||
|
||||
|
||||
@ -1273,14 +1273,12 @@ class CUDAStream {
|
||||
hipStream_t stream_;
|
||||
|
||||
public:
|
||||
CUDAStream() {
|
||||
dh::safe_cuda(hipStreamCreateWithFlags(&stream_, hipStreamNonBlocking));
|
||||
}
|
||||
~CUDAStream() {
|
||||
dh::safe_cuda(hipStreamDestroy(stream_));
|
||||
}
|
||||
CUDAStream() { dh::safe_cuda(hipStreamCreateWithFlags(&stream_, hipStreamNonBlocking)); }
|
||||
~CUDAStream() { dh::safe_cuda(hipStreamDestroy(stream_)); }
|
||||
|
||||
[[nodiscard]] CUDAStreamView View() const { return CUDAStreamView{stream_}; }
|
||||
[[nodiscard]] hipStream_t Handle() const { return stream_; }
|
||||
|
||||
CUDAStreamView View() const { return CUDAStreamView{stream_}; }
|
||||
void Sync() { this->View().Sync(); }
|
||||
};
|
||||
|
||||
|
||||
@ -183,14 +183,28 @@ class PartitionBuilder {
|
||||
SetNRightElems(node_in_set, range.begin(), n_right);
|
||||
}
|
||||
|
||||
template <bool any_missing, typename ColumnType, typename Predicate>
|
||||
void MaskKernel(ColumnType* p_column, common::Span<const size_t> row_indices, size_t base_rowid,
|
||||
BitVector* decision_bits, BitVector* missing_bits, Predicate&& pred) {
|
||||
auto& column = *p_column;
|
||||
for (auto const row_id : row_indices) {
|
||||
auto const bin_id = column[row_id - base_rowid];
|
||||
if (any_missing && bin_id == ColumnType::kMissingId) {
|
||||
missing_bits->Set(row_id - base_rowid);
|
||||
} else if (pred(row_id, bin_id)) {
|
||||
decision_bits->Set(row_id - base_rowid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief When data is split by column, we don't have all the features locally on the current
|
||||
* worker, so we go through all the rows and mark the bit vectors on whether the decision is made
|
||||
* to go right, or if the feature value used for the split is missing.
|
||||
*/
|
||||
template <typename ExpandEntry>
|
||||
template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
|
||||
void MaskRows(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
|
||||
const common::Range1d range, GHistIndexMatrix const& gmat,
|
||||
const common::Range1d range, bst_bin_t split_cond, GHistIndexMatrix const& gmat,
|
||||
const common::ColumnMatrix& column_matrix, const RegTree& tree, const size_t* rid,
|
||||
BitVector* decision_bits, BitVector* missing_bits) {
|
||||
common::Span<const size_t> rid_span(rid + range.begin(), rid + range.end());
|
||||
@ -204,7 +218,7 @@ class PartitionBuilder {
|
||||
for (auto row_id : rid_span) {
|
||||
auto gidx = gmat.GetGindex(row_id, fid);
|
||||
if (gidx > -1) {
|
||||
bool go_left = false;
|
||||
bool go_left;
|
||||
if (is_cat) {
|
||||
go_left = Decision(node_cats, cut_values[gidx]);
|
||||
} else {
|
||||
@ -218,7 +232,27 @@ class PartitionBuilder {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG(FATAL) << "Column data split is only supported for the `approx` tree method";
|
||||
auto pred_hist = [&](auto ridx, auto bin_id) {
|
||||
if (any_cat && is_cat) {
|
||||
auto gidx = gmat.GetGindex(ridx, fid);
|
||||
CHECK_GT(gidx, -1);
|
||||
return Decision(node_cats, cut_values[gidx]);
|
||||
} else {
|
||||
return bin_id <= split_cond;
|
||||
}
|
||||
};
|
||||
|
||||
if (column_matrix.GetColumnType(fid) == xgboost::common::kDenseColumn) {
|
||||
auto column = column_matrix.DenseColumn<BinIdxType, any_missing>(fid);
|
||||
MaskKernel<any_missing>(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits,
|
||||
pred_hist);
|
||||
} else {
|
||||
CHECK_EQ(any_missing, true);
|
||||
auto column =
|
||||
column_matrix.SparseColumn<BinIdxType>(fid, rid_span.front() - gmat.base_rowid);
|
||||
MaskKernel<any_missing>(&column, rid_span, gmat.base_rowid, decision_bits, missing_bits,
|
||||
pred_hist);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -238,7 +272,7 @@ class PartitionBuilder {
|
||||
std::size_t nid = nodes[node_in_set].nid;
|
||||
bool default_left = tree[nid].DefaultLeft();
|
||||
|
||||
auto pred_approx = [&](auto ridx) {
|
||||
auto pred = [&](auto ridx) {
|
||||
bool go_left = default_left;
|
||||
bool is_missing = missing_bits.Check(ridx - gmat.base_rowid);
|
||||
if (!is_missing) {
|
||||
@ -248,11 +282,7 @@ class PartitionBuilder {
|
||||
};
|
||||
|
||||
std::pair<size_t, size_t> child_nodes_sizes;
|
||||
if (!column_matrix.IsInitialized()) {
|
||||
child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred_approx);
|
||||
} else {
|
||||
LOG(FATAL) << "Column data split is only supported for the `approx` tree method";
|
||||
}
|
||||
child_nodes_sizes = PartitionRangeKernel(rid_span, left, right, pred);
|
||||
|
||||
const size_t n_left = child_nodes_sizes.first;
|
||||
const size_t n_right = child_nodes_sizes.second;
|
||||
|
||||
@ -26,9 +26,9 @@ class IndexTransformIter {
|
||||
|
||||
public:
|
||||
using iterator_category = std::random_access_iterator_tag; // NOLINT
|
||||
using value_type = std::result_of_t<Fn(std::size_t)>; // NOLINT
|
||||
using reference = std::result_of_t<Fn(std::size_t)>; // NOLINT
|
||||
using value_type = std::remove_cv_t<std::remove_reference_t<reference>>; // NOLINT
|
||||
using difference_type = detail::ptrdiff_t; // NOLINT
|
||||
using reference = std::add_lvalue_reference_t<value_type>; // NOLINT
|
||||
using pointer = std::add_pointer_t<value_type>; // NOLINT
|
||||
|
||||
public:
|
||||
@ -43,8 +43,8 @@ class IndexTransformIter {
|
||||
return *this;
|
||||
}
|
||||
|
||||
value_type operator*() const { return fn_(iter_); }
|
||||
value_type operator[](std::size_t i) const {
|
||||
reference operator*() const { return fn_(iter_); }
|
||||
reference operator[](std::size_t i) const {
|
||||
auto iter = *this + i;
|
||||
return *iter;
|
||||
}
|
||||
|
||||
@ -1,11 +1,15 @@
|
||||
/*!
|
||||
* Copyright 2021 by Contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <cstdint> // for int64_t
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/device_helpers.cuh" // for DefaultStream, CUDAEvent
|
||||
#include "array_interface.h"
|
||||
#include "xgboost/logging.h"
|
||||
|
||||
namespace xgboost {
|
||||
void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) {
|
||||
void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) {
|
||||
switch (stream) {
|
||||
case 0:
|
||||
/**
|
||||
@ -22,12 +26,15 @@ void ArrayInterfaceHandler::SyncCudaStream(int64_t stream) {
|
||||
break;
|
||||
case 2:
|
||||
// default per-thread stream
|
||||
default:
|
||||
default: {
|
||||
dh::CUDAEvent e;
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
dh::safe_cuda(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
|
||||
e.Record(dh::CUDAStreamView{reinterpret_cast<cudaStream_t>(stream)});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
dh::safe_cuda(hipStreamSynchronize(reinterpret_cast<hipStream_t>(stream)));
|
||||
e.Record(dh::CUDAStreamView{reinterpret_cast<hipStream_t>(stream)});
|
||||
#endif
|
||||
dh::DefaultStream().Wait(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -166,7 +166,7 @@ BatchSet<GHistIndexMatrix> SimpleDMatrix::GetGradientIndex(Context const* ctx,
|
||||
}
|
||||
if (!gradient_index_ || detail::RegenGHist(batch_param_, param)) {
|
||||
// GIDX page doesn't exist, generate it
|
||||
LOG(INFO) << "Generating new Gradient Index.";
|
||||
LOG(DEBUG) << "Generating new Gradient Index.";
|
||||
// These places can ask for a CSR gidx:
|
||||
// - CPU Hist: the ctx must be on CPU.
|
||||
// - IterativeDMatrix::InitFromCPU: The ctx must be on CPU.
|
||||
|
||||
@ -38,19 +38,21 @@ class ColumnSplitHelper {
|
||||
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
|
||||
}
|
||||
|
||||
template <typename ExpandEntry>
|
||||
template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
|
||||
void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads,
|
||||
GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,
|
||||
std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
|
||||
std::vector<ExpandEntry> const& nodes,
|
||||
std::vector<int32_t> const& split_conditions, RegTree const* p_tree) {
|
||||
// When data is split by column, we don't have all the feature values in the local worker, so
|
||||
// we first collect all the decisions and whether the feature is missing into bit vectors.
|
||||
std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
|
||||
std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
|
||||
common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
partition_builder_->MaskRows(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
|
||||
(*row_set_collection_)[nid].begin, &decision_bits_,
|
||||
&missing_bits_);
|
||||
bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
|
||||
partition_builder_->MaskRows<BinIdxType, any_missing, any_cat>(
|
||||
node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
|
||||
(*row_set_collection_)[nid].begin, &decision_bits_, &missing_bits_);
|
||||
});
|
||||
|
||||
// Then aggregate the bit vectors across all the workers.
|
||||
@ -217,7 +219,8 @@ class CommonRowPartitioner {
|
||||
// 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
|
||||
// Store results in intermediate buffers from partition_builder_
|
||||
if (is_col_split_) {
|
||||
column_split_helper_.Partition(space, ctx->Threads(), gmat, column_matrix, nodes, p_tree);
|
||||
column_split_helper_.Partition<BinIdxType, any_missing, any_cat>(
|
||||
space, ctx->Threads(), gmat, column_matrix, nodes, split_conditions, p_tree);
|
||||
} else {
|
||||
common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {
|
||||
size_t begin = r.begin();
|
||||
|
||||
@ -412,6 +412,7 @@ class HistEvaluator {
|
||||
tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
|
||||
tree[candidate.nid].SplitIndex(), left_weight,
|
||||
right_weight);
|
||||
evaluator = tree_evaluator_.GetEvaluator();
|
||||
|
||||
snode_.resize(tree.GetNodes().size());
|
||||
snode_.at(left_child).stats = candidate.split.left_sum;
|
||||
|
||||
@ -49,6 +49,8 @@ class TreeEvaluator {
|
||||
monotone_.HostVector().resize(n_features, 0);
|
||||
has_constraint_ = false;
|
||||
} else {
|
||||
CHECK_LE(p.monotone_constraints.size(), n_features)
|
||||
<< "The size of monotone constraint should be less or equal to the number of features.";
|
||||
monotone_.HostVector() = p.monotone_constraints;
|
||||
monotone_.HostVector().resize(n_features, 0);
|
||||
// Initialised to some small size, can grow if needed
|
||||
|
||||
@ -1,227 +1,225 @@
|
||||
import argparse
|
||||
import os
|
||||
import pathlib
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import Counter
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from typing import Dict, Tuple
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
from pylint import epylint
|
||||
from test_utils import PY_PACKAGE, ROOT, cd, print_time, record_time
|
||||
|
||||
CURDIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
|
||||
SRCPATH = os.path.normpath(
|
||||
os.path.join(CURDIR, os.path.pardir, os.path.pardir, "python-package")
|
||||
)
|
||||
|
||||
class LintersPaths:
|
||||
"""The paths each linter run on."""
|
||||
|
||||
BLACK = (
|
||||
# core
|
||||
"python-package/",
|
||||
# tests
|
||||
"tests/python/test_config.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_predict.py",
|
||||
"tests/python/test_quantile_dmatrix.py",
|
||||
"tests/python/test_tree_regularization.py",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/test_distributed/test_with_spark/",
|
||||
"tests/test_distributed/test_gpu_with_spark/",
|
||||
# demo
|
||||
"demo/json-model/json_parser.py",
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/categorical.py",
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/sklearn_parallel.py",
|
||||
"demo/guide-python/spark_estimator_examples.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
"demo/guide-python/multioutput_regression.py",
|
||||
# CI
|
||||
"tests/ci_build/lint_python.py",
|
||||
"tests/ci_build/test_r_package.py",
|
||||
"tests/ci_build/test_utils.py",
|
||||
"tests/ci_build/change_version.py",
|
||||
)
|
||||
|
||||
ISORT = (
|
||||
# core
|
||||
"python-package/",
|
||||
# tests
|
||||
"tests/test_distributed/",
|
||||
"tests/python/",
|
||||
"tests/python-gpu/",
|
||||
"tests/ci_build/",
|
||||
# demo
|
||||
"demo/",
|
||||
# misc
|
||||
"dev/",
|
||||
"doc/",
|
||||
)
|
||||
|
||||
MYPY = (
|
||||
# core
|
||||
"python-package/",
|
||||
# tests
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/test_distributed/test_with_spark/test_data.py",
|
||||
"tests/test_distributed/test_gpu_with_spark/test_data.py",
|
||||
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
|
||||
# demo
|
||||
"demo/json-model/json_parser.py",
|
||||
"demo/guide-python/external_memory.py",
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
"demo/guide-python/multioutput_regression.py",
|
||||
# CI
|
||||
"tests/ci_build/lint_python.py",
|
||||
"tests/ci_build/test_r_package.py",
|
||||
"tests/ci_build/test_utils.py",
|
||||
"tests/ci_build/change_version.py",
|
||||
)
|
||||
|
||||
|
||||
def check_cmd_print_failure_assistance(cmd: List[str]) -> bool:
|
||||
if subprocess.run(cmd).returncode == 0:
|
||||
return True
|
||||
|
||||
subprocess.run([cmd[0], "--version"])
|
||||
msg = """
|
||||
Please run the following command on your machine to address the formatting error:
|
||||
|
||||
"""
|
||||
msg += " ".join(cmd)
|
||||
print(msg, file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
@record_time
|
||||
@cd(PY_PACKAGE)
|
||||
def run_black(rel_path: str, fix: bool) -> bool:
|
||||
if fix:
|
||||
cmd = ["black", "-q", rel_path]
|
||||
else:
|
||||
cmd = ["black", "-q", "--check", rel_path]
|
||||
ret = subprocess.run(cmd).returncode
|
||||
if ret != 0:
|
||||
subprocess.run(["black", "--version"])
|
||||
msg = """
|
||||
Please run the following command on your machine to address the formatting error:
|
||||
cmd = ["black", "-q", os.path.join(ROOT, rel_path)]
|
||||
if not fix:
|
||||
cmd += ["--check"]
|
||||
|
||||
"""
|
||||
msg += " ".join(cmd)
|
||||
print(msg, file=sys.stderr)
|
||||
return False
|
||||
return True
|
||||
return check_cmd_print_failure_assistance(cmd)
|
||||
|
||||
|
||||
@record_time
|
||||
@cd(PY_PACKAGE)
|
||||
def run_isort(rel_path: str, fix: bool) -> bool:
|
||||
if fix:
|
||||
cmd = ["isort", f"--src={SRCPATH}", "--profile=black", rel_path]
|
||||
else:
|
||||
cmd = ["isort", f"--src={SRCPATH}", "--check", "--profile=black", rel_path]
|
||||
ret = subprocess.run(cmd).returncode
|
||||
if ret != 0:
|
||||
subprocess.run(["isort", "--version"])
|
||||
msg = """
|
||||
Please run the following command on your machine to address the formatting error:
|
||||
# Isort gets confused when trying to find the config file, so specified explicitly.
|
||||
cmd = ["isort", "--settings-path", PY_PACKAGE, os.path.join(ROOT, rel_path)]
|
||||
if not fix:
|
||||
cmd += ["--check"]
|
||||
|
||||
"""
|
||||
msg += " ".join(cmd)
|
||||
print(msg, file=sys.stderr)
|
||||
return False
|
||||
return True
|
||||
return check_cmd_print_failure_assistance(cmd)
|
||||
|
||||
|
||||
@record_time
|
||||
@cd(PY_PACKAGE)
|
||||
def run_mypy(rel_path: str) -> bool:
|
||||
path = os.path.join(ROOT, rel_path)
|
||||
ret = subprocess.run(["mypy", path])
|
||||
if ret.returncode != 0:
|
||||
return False
|
||||
return True
|
||||
cmd = ["mypy", os.path.join(ROOT, rel_path)]
|
||||
|
||||
return check_cmd_print_failure_assistance(cmd)
|
||||
|
||||
|
||||
class PyLint:
|
||||
"""A helper for running pylint, mostly copied from dmlc-core/scripts."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.pypackage_root = os.path.join(ROOT, "python-package/")
|
||||
self.pylint_cats = set(["error", "warning", "convention", "refactor"])
|
||||
self.pylint_opts = [
|
||||
"--extension-pkg-whitelist=numpy",
|
||||
"--rcfile=" + os.path.join(self.pypackage_root, ".pylintrc"),
|
||||
]
|
||||
MESSAGE_CATEGORIES = {
|
||||
"Fatal",
|
||||
"Error",
|
||||
"Warning",
|
||||
"Convention",
|
||||
"Refactor",
|
||||
"Information",
|
||||
}
|
||||
MESSAGE_PREFIX_TO_CATEGORY = {
|
||||
category[0]: category for category in MESSAGE_CATEGORIES
|
||||
}
|
||||
|
||||
def run(self, path: str) -> Tuple[Dict, str, str]:
|
||||
(pylint_stdout, pylint_stderr) = epylint.py_run(
|
||||
" ".join([str(path)] + self.pylint_opts), return_std=True
|
||||
@classmethod
|
||||
@cd(PY_PACKAGE)
|
||||
def get_summary(cls, path: str) -> Tuple[str, Dict[str, int], str, str, bool]:
|
||||
"""Get the summary of pylint's errors, warnings, etc."""
|
||||
ret = subprocess.run(["pylint", path], capture_output=True)
|
||||
stdout = ret.stdout.decode("utf-8")
|
||||
|
||||
emap: Dict[str, int] = Counter()
|
||||
for line in stdout.splitlines():
|
||||
if ":" in line and (
|
||||
category := cls.MESSAGE_PREFIX_TO_CATEGORY.get(
|
||||
line.split(":")[-2].strip()[0]
|
||||
)
|
||||
):
|
||||
emap[category] += 1
|
||||
|
||||
return path, emap, stdout, ret.stderr.decode("utf-8"), ret.returncode == 0
|
||||
|
||||
@staticmethod
|
||||
def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
|
||||
"""Print summary of certain result map."""
|
||||
if len(result_map) == 0:
|
||||
return 0
|
||||
|
||||
ftype = "Python"
|
||||
nfail = sum(map(bool, result_map.values()))
|
||||
print(
|
||||
f"====={len(result_map) - nfail}/{len(result_map)} {ftype} files passed check====="
|
||||
)
|
||||
emap = {}
|
||||
err = pylint_stderr.read()
|
||||
for fname, emap in result_map.items():
|
||||
if emap:
|
||||
print(
|
||||
f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={emap}"
|
||||
)
|
||||
return nfail
|
||||
|
||||
out = []
|
||||
for line in pylint_stdout:
|
||||
out.append(line)
|
||||
key = line.split(":")[-1].split("(")[0].strip()
|
||||
if key not in self.pylint_cats:
|
||||
continue
|
||||
if key not in emap:
|
||||
emap[key] = 1
|
||||
else:
|
||||
emap[key] += 1
|
||||
|
||||
return {path: emap}, err, "\n".join(out)
|
||||
|
||||
def __call__(self) -> bool:
|
||||
@classmethod
|
||||
def run(cls) -> bool:
|
||||
"""Run pylint with parallelization on a batch of paths."""
|
||||
all_errors: Dict[str, Dict[str, int]] = {}
|
||||
|
||||
def print_summary_map(result_map: Dict[str, Dict[str, int]]) -> int:
|
||||
"""Print summary of certain result map."""
|
||||
if len(result_map) == 0:
|
||||
return 0
|
||||
ftype = "Python"
|
||||
npass = sum(1 for x in result_map.values() if len(x) == 0)
|
||||
print(f"====={npass}/{len(result_map)} {ftype} files passed check=====")
|
||||
for fname, emap in result_map.items():
|
||||
if len(emap) == 0:
|
||||
continue
|
||||
print(
|
||||
f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}"
|
||||
)
|
||||
return len(result_map) - npass
|
||||
|
||||
all_scripts = []
|
||||
for root, dirs, files in os.walk(self.pypackage_root):
|
||||
for f in files:
|
||||
if f.endswith(".py"):
|
||||
all_scripts.append(os.path.join(root, f))
|
||||
|
||||
with Pool(cpu_count()) as pool:
|
||||
error_maps = pool.map(self.run, all_scripts)
|
||||
for emap, err, out in error_maps:
|
||||
error_maps = pool.map(
|
||||
cls.get_summary,
|
||||
(os.fspath(file) for file in pathlib.Path(PY_PACKAGE).glob("**/*.py")),
|
||||
)
|
||||
for path, emap, out, err, succeeded in error_maps:
|
||||
all_errors[path] = emap
|
||||
if succeeded:
|
||||
continue
|
||||
|
||||
print(out)
|
||||
if len(err) != 0:
|
||||
print(err)
|
||||
all_errors.update(emap)
|
||||
|
||||
nerr = print_summary_map(all_errors)
|
||||
nerr = cls.print_summary_map(all_errors)
|
||||
return nerr == 0
|
||||
|
||||
|
||||
@record_time
|
||||
def run_pylint() -> bool:
|
||||
return PyLint()()
|
||||
return PyLint.run()
|
||||
|
||||
|
||||
@record_time
|
||||
def main(args: argparse.Namespace) -> None:
|
||||
if args.format == 1:
|
||||
black_results = [
|
||||
run_black(path, args.fix)
|
||||
for path in [
|
||||
# core
|
||||
"python-package/",
|
||||
# tests
|
||||
"tests/python/test_config.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_predict.py",
|
||||
"tests/python/test_quantile_dmatrix.py",
|
||||
"tests/python/test_tree_regularization.py",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/ci_build/lint_python.py",
|
||||
"tests/test_distributed/test_with_spark/",
|
||||
"tests/test_distributed/test_gpu_with_spark/",
|
||||
# demo
|
||||
"demo/json-model/json_parser.py",
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/categorical.py",
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/sklearn_parallel.py",
|
||||
"demo/guide-python/spark_estimator_examples.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
"demo/guide-python/multioutput_regression.py",
|
||||
# CI
|
||||
"tests/ci_build/lint_python.py",
|
||||
"tests/ci_build/test_r_package.py",
|
||||
"tests/ci_build/test_utils.py",
|
||||
"tests/ci_build/change_version.py",
|
||||
]
|
||||
]
|
||||
black_results = [run_black(path, args.fix) for path in LintersPaths.BLACK]
|
||||
if not all(black_results):
|
||||
sys.exit(-1)
|
||||
|
||||
isort_results = [
|
||||
run_isort(path, args.fix)
|
||||
for path in [
|
||||
# core
|
||||
"python-package/",
|
||||
# tests
|
||||
"tests/test_distributed/",
|
||||
"tests/python/",
|
||||
"tests/python-gpu/",
|
||||
"tests/ci_build/",
|
||||
# demo
|
||||
"demo/",
|
||||
# misc
|
||||
"dev/",
|
||||
"doc/",
|
||||
]
|
||||
]
|
||||
isort_results = [run_isort(path, args.fix) for path in LintersPaths.ISORT]
|
||||
if not all(isort_results):
|
||||
sys.exit(-1)
|
||||
|
||||
if args.type_check == 1:
|
||||
if not all(
|
||||
run_mypy(path)
|
||||
for path in [
|
||||
# core
|
||||
"python-package/",
|
||||
# demo
|
||||
"demo/json-model/json_parser.py",
|
||||
"demo/guide-python/external_memory.py",
|
||||
"demo/guide-python/cat_in_the_dat.py",
|
||||
"demo/guide-python/feature_weights.py",
|
||||
"demo/guide-python/individual_trees.py",
|
||||
"demo/guide-python/quantile_regression.py",
|
||||
"demo/guide-python/multioutput_regression.py",
|
||||
# tests
|
||||
"tests/python/test_dt.py",
|
||||
"tests/python/test_data_iterator.py",
|
||||
"tests/python-gpu/test_gpu_data_iterator.py",
|
||||
"tests/test_distributed/test_with_spark/test_data.py",
|
||||
"tests/test_distributed/test_gpu_with_spark/test_data.py",
|
||||
"tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py",
|
||||
# CI
|
||||
"tests/ci_build/lint_python.py",
|
||||
"tests/ci_build/test_r_package.py",
|
||||
"tests/ci_build/test_utils.py",
|
||||
"tests/ci_build/change_version.py",
|
||||
]
|
||||
):
|
||||
subprocess.check_call(["mypy", "--version"])
|
||||
mypy_results = [run_mypy(path) for path in LintersPaths.MYPY]
|
||||
if not all(mypy_results):
|
||||
sys.exit(-1)
|
||||
|
||||
if args.pylint == 1:
|
||||
|
||||
@ -26,6 +26,60 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
|
||||
static void Allgather(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllgather(comm, rank);
|
||||
}
|
||||
|
||||
static void AllreduceMax(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllreduceMax(comm, rank);
|
||||
}
|
||||
|
||||
static void AllreduceMin(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllreduceMin(comm, rank);
|
||||
}
|
||||
|
||||
static void AllreduceSum(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllreduceSum(comm);
|
||||
}
|
||||
|
||||
static void AllreduceBitwiseAND(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllreduceBitwiseAND(comm, rank);
|
||||
}
|
||||
|
||||
static void AllreduceBitwiseOR(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllreduceBitwiseOR(comm, rank);
|
||||
}
|
||||
|
||||
static void AllreduceBitwiseXOR(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyAllreduceBitwiseXOR(comm, rank);
|
||||
}
|
||||
|
||||
static void Broadcast(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
VerifyBroadcast(comm, rank);
|
||||
}
|
||||
|
||||
static void Mixture(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
VerifyAllgather(comm, rank);
|
||||
VerifyAllreduceMax(comm, rank);
|
||||
VerifyAllreduceMin(comm, rank);
|
||||
VerifyAllreduceSum(comm);
|
||||
VerifyAllreduceBitwiseAND(comm, rank);
|
||||
VerifyAllreduceBitwiseOR(comm, rank);
|
||||
VerifyAllreduceBitwiseXOR(comm, rank);
|
||||
VerifyBroadcast(comm, rank);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
static void VerifyAllgather(InMemoryCommunicator &comm, int rank) {
|
||||
char buffer[kWorldSize] = {'a', 'b', 'c'};
|
||||
buffer[rank] = '0' + rank;
|
||||
comm.AllGather(buffer, kWorldSize);
|
||||
@ -34,8 +88,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
static void AllreduceMax(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
static void VerifyAllreduceMax(InMemoryCommunicator &comm, int rank) {
|
||||
int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank};
|
||||
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMax);
|
||||
int expected[] = {3, 4, 5, 6, 7};
|
||||
@ -44,8 +97,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
static void AllreduceMin(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
static void VerifyAllreduceMin(InMemoryCommunicator &comm, int rank) {
|
||||
int buffer[] = {1 + rank, 2 + rank, 3 + rank, 4 + rank, 5 + rank};
|
||||
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kMin);
|
||||
int expected[] = {1, 2, 3, 4, 5};
|
||||
@ -54,8 +106,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
static void AllreduceSum(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
static void VerifyAllreduceSum(InMemoryCommunicator &comm) {
|
||||
int buffer[] = {1, 2, 3, 4, 5};
|
||||
comm.AllReduce(buffer, sizeof(buffer) / sizeof(buffer[0]), DataType::kInt32, Operation::kSum);
|
||||
int expected[] = {3, 6, 9, 12, 15};
|
||||
@ -64,16 +115,14 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
static void AllreduceBitwiseAND(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
static void VerifyAllreduceBitwiseAND(InMemoryCommunicator &comm, int rank) {
|
||||
std::bitset<2> original(rank);
|
||||
auto buffer = original.to_ulong();
|
||||
comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseAND);
|
||||
EXPECT_EQ(buffer, 0UL);
|
||||
}
|
||||
|
||||
static void AllreduceBitwiseOR(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
static void VerifyAllreduceBitwiseOR(InMemoryCommunicator &comm, int rank) {
|
||||
std::bitset<2> original(rank);
|
||||
auto buffer = original.to_ulong();
|
||||
comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseOR);
|
||||
@ -82,8 +131,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
EXPECT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
static void AllreduceBitwiseXOR(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
static void VerifyAllreduceBitwiseXOR(InMemoryCommunicator &comm, int rank) {
|
||||
std::bitset<3> original(rank * 2);
|
||||
auto buffer = original.to_ulong();
|
||||
comm.AllReduce(&buffer, 1, DataType::kUInt32, Operation::kBitwiseXOR);
|
||||
@ -92,8 +140,7 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
EXPECT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
static void Broadcast(int rank) {
|
||||
InMemoryCommunicator comm{kWorldSize, rank};
|
||||
static void VerifyBroadcast(InMemoryCommunicator &comm, int rank) {
|
||||
if (rank == 0) {
|
||||
std::string buffer{"hello"};
|
||||
comm.Broadcast(&buffer[0], buffer.size(), 0);
|
||||
@ -105,7 +152,6 @@ class InMemoryCommunicatorTest : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
static int const kWorldSize{3};
|
||||
};
|
||||
|
||||
@ -173,5 +219,7 @@ TEST_F(InMemoryCommunicatorTest, AllreduceBitwiseXOR) { Verify(&AllreduceBitwise
|
||||
|
||||
TEST_F(InMemoryCommunicatorTest, Broadcast) { Verify(&Broadcast); }
|
||||
|
||||
TEST_F(InMemoryCommunicatorTest, Mixture) { Verify(&Mixture); }
|
||||
|
||||
} // namespace collective
|
||||
} // namespace xgboost
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2021 by Contributors
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/host_device_vector.h>
|
||||
@ -22,31 +22,19 @@ TEST(ArrayInterface, Stream) {
|
||||
HostDeviceVector<float> storage;
|
||||
auto arr_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreate(&stream);
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
hipStream_t stream;
|
||||
hipStreamCreate(&stream);
|
||||
#endif
|
||||
dh::CUDAStream stream;
|
||||
|
||||
auto j_arr =Json::Load(StringView{arr_str});
|
||||
j_arr["stream"] = Integer(reinterpret_cast<int64_t>(stream));
|
||||
auto j_arr = Json::Load(StringView{arr_str});
|
||||
j_arr["stream"] = Integer(reinterpret_cast<int64_t>(stream.Handle()));
|
||||
Json::Dump(j_arr, &arr_str);
|
||||
|
||||
dh::caching_device_vector<uint64_t> out(1, 0);
|
||||
uint64_t dur = 1e9;
|
||||
dh::LaunchKernel{1, 1, 0, stream}(SleepForTest, out.data().get(), dur);
|
||||
std::uint64_t dur = 1e9;
|
||||
dh::LaunchKernel{1, 1, 0, stream.View()}(SleepForTest, out.data().get(), dur);
|
||||
ArrayInterface<2> arr(arr_str);
|
||||
|
||||
auto t = out[0];
|
||||
CHECK_GE(t, dur);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
cudaStreamDestroy(stream);
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
hipStreamDestroy(stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(ArrayInterface, Ptr) {
|
||||
|
||||
@ -497,23 +497,32 @@ inline std::int32_t AllThreadsForTest() { return Context{}.Threads(); }
|
||||
|
||||
template <typename Function, typename... Args>
|
||||
void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&... args) {
|
||||
auto run = [&](auto rank) {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("in-memory");
|
||||
config["in_memory_world_size"] = world_size;
|
||||
config["in_memory_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
|
||||
std::forward<Function>(function)(std::forward<Args>(args)...);
|
||||
|
||||
xgboost::collective::Finalize();
|
||||
};
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel num_threads(world_size)
|
||||
{
|
||||
auto rank = omp_get_thread_num();
|
||||
run(rank);
|
||||
}
|
||||
#else
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < world_size; rank++) {
|
||||
threads.emplace_back([&, rank]() {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("in-memory");
|
||||
config["in_memory_world_size"] = world_size;
|
||||
config["in_memory_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
|
||||
std::forward<Function>(function)(std::forward<Args>(args)...);
|
||||
|
||||
xgboost::collective::Finalize();
|
||||
});
|
||||
threads.emplace_back(run, rank);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <dmlc/omp.h>
|
||||
#include <grpcpp/server_builder.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h>
|
||||
@ -61,24 +62,33 @@ class BaseFederatedTest : public ::testing::Test {
|
||||
template <typename Function, typename... Args>
|
||||
void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_address,
|
||||
Function&& function, Args&&... args) {
|
||||
auto run = [&](auto rank) {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
config["federated_server_address"] = String(server_address);
|
||||
config["federated_world_size"] = world_size;
|
||||
config["federated_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
|
||||
std::forward<Function>(function)(std::forward<Args>(args)...);
|
||||
|
||||
xgboost::collective::Finalize();
|
||||
};
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel num_threads(world_size)
|
||||
{
|
||||
auto rank = omp_get_thread_num();
|
||||
run(rank);
|
||||
}
|
||||
#else
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < world_size; rank++) {
|
||||
threads.emplace_back([&, rank]() {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
config["federated_server_address"] = String(server_address);
|
||||
config["federated_world_size"] = world_size;
|
||||
config["federated_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
|
||||
std::forward<Function>(function)(std::forward<Args>(args)...);
|
||||
|
||||
xgboost::collective::Finalize();
|
||||
});
|
||||
threads.emplace_back(run, rank);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@ -20,32 +20,6 @@ TEST(AllreduceBase, InitTask)
|
||||
EXPECT_EQ(base.task_id, "1");
|
||||
}
|
||||
|
||||
TEST(AllreduceBase, InitWithCacheOn)
|
||||
{
|
||||
rabit::engine::AllreduceBase base;
|
||||
|
||||
std::string rabit_task_id = "rabit_task_id=1";
|
||||
char cmd[rabit_task_id.size()+1];
|
||||
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
|
||||
cmd[rabit_task_id.size()] = '\0';
|
||||
|
||||
std::string rabit_bootstrap_cache = "rabit_bootstrap_cache=1";
|
||||
char cmd2[rabit_bootstrap_cache.size()+1];
|
||||
std::copy(rabit_bootstrap_cache.begin(), rabit_bootstrap_cache.end(), cmd2);
|
||||
cmd2[rabit_bootstrap_cache.size()] = '\0';
|
||||
|
||||
std::string rabit_debug = "rabit_debug=1";
|
||||
char cmd3[rabit_debug.size()+1];
|
||||
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd3);
|
||||
cmd3[rabit_debug.size()] = '\0';
|
||||
|
||||
char* argv[] = {cmd, cmd2, cmd3};
|
||||
base.Init(3, argv);
|
||||
EXPECT_EQ(base.task_id, "1");
|
||||
EXPECT_TRUE(base.rabit_bootstrap_cache);
|
||||
EXPECT_EQ(base.rabit_debug, 1);
|
||||
}
|
||||
|
||||
TEST(AllreduceBase, InitWithRingReduce)
|
||||
{
|
||||
rabit::engine::AllreduceBase base;
|
||||
|
||||
@ -6,6 +6,8 @@
|
||||
#include <string>
|
||||
|
||||
#include "../../../src/tree/constraints.h"
|
||||
#include "../../../src/tree/hist/evaluate_splits.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@ -56,5 +58,37 @@ TEST(CPUFeatureInteractionConstraint, Basic) {
|
||||
ASSERT_FALSE(constraints.Query(1, 5));
|
||||
}
|
||||
|
||||
TEST(CPUMonoConstraint, Basic) {
|
||||
std::size_t kRows{64}, kCols{16};
|
||||
Context ctx;
|
||||
|
||||
TrainParam param;
|
||||
std::vector<std::int32_t> mono(kCols, 1);
|
||||
I32Array arr;
|
||||
for (std::size_t i = 0; i < kCols; ++i) {
|
||||
arr.GetArray().push_back(mono[i]);
|
||||
}
|
||||
Json jarr{std::move(arr)};
|
||||
std::string str_mono;
|
||||
Json::Dump(jarr, &str_mono);
|
||||
str_mono.front() = '(';
|
||||
str_mono.back() = ')';
|
||||
|
||||
param.UpdateAllowUnknown(Args{{"monotone_constraints", str_mono}});
|
||||
|
||||
auto Xy = RandomDataGenerator{kRows, kCols, 0.0}.GenerateDMatrix(true);
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
|
||||
HistEvaluator<CPUExpandEntry> evalutor{&ctx, ¶m, Xy->Info(), sampler};
|
||||
evalutor.InitRoot(GradStats{2.0, 2.0});
|
||||
|
||||
SplitEntry split;
|
||||
split.Update(1.0f, 0, 3.0, false, false, GradStats{1.0, 1.0}, GradStats{1.0, 1.0});
|
||||
CPUExpandEntry entry{0, 0, split};
|
||||
RegTree tree{1, static_cast<bst_feature_t>(kCols)};
|
||||
evalutor.ApplyTreeSplit(entry, &tree);
|
||||
|
||||
ASSERT_TRUE(evalutor.Evaluator().has_constraint);
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
@ -90,13 +90,16 @@ void TestColumnSplit(int32_t rows, bst_feature_t cols, RegTree const& expected_t
|
||||
param.Init(Args{});
|
||||
updater->Update(¶m, p_gradients.get(), sliced.get(), position, {&tree});
|
||||
|
||||
EXPECT_EQ(tree.NumExtraNodes(), 10);
|
||||
EXPECT_EQ(tree[0].SplitIndex(), 1);
|
||||
ASSERT_EQ(tree.NumExtraNodes(), 10);
|
||||
ASSERT_EQ(tree[0].SplitIndex(), 1);
|
||||
|
||||
EXPECT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
|
||||
EXPECT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
|
||||
ASSERT_NE(tree[tree[0].LeftChild()].SplitIndex(), 0);
|
||||
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
|
||||
|
||||
EXPECT_EQ(tree, expected_tree);
|
||||
FeatureMap fmap;
|
||||
auto json = tree.DumpModel(fmap, false, "json");
|
||||
auto expected_json = expected_tree.DumpModel(fmap, false, "json");
|
||||
ASSERT_EQ(json, expected_json);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
@ -19,6 +19,8 @@
|
||||
#include "xgboost/data.h"
|
||||
|
||||
namespace xgboost::tree {
|
||||
|
||||
namespace {
|
||||
template <typename ExpandEntry>
|
||||
void TestPartitioner(bst_target_t n_targets) {
|
||||
std::size_t n_samples = 1024, base_rowid = 0;
|
||||
@ -86,8 +88,117 @@ void TestPartitioner(bst_target_t n_targets) {
|
||||
}
|
||||
}
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(QuantileHist, Partitioner) { TestPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitioner) { TestPartitioner<MultiExpandEntry>(3); }
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename ExpandEntry>
|
||||
void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,
|
||||
bst_feature_t n_features, size_t base_rowid,
|
||||
std::shared_ptr<DMatrix> Xy, float min_value, float mid_value,
|
||||
CommonRowPartitioner const& expected_mid_partitioner) {
|
||||
auto dmat =
|
||||
std::unique_ptr<DMatrix>{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, dmat.get(), 64);
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
bst_feature_t const split_ind = 0;
|
||||
common::ColumnMatrix column_indices;
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
{
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, min_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, min_value, &candidates);
|
||||
}
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
ASSERT_EQ(partitioner.Size(), 3);
|
||||
ASSERT_EQ(partitioner[1].Size(), 0);
|
||||
ASSERT_EQ(partitioner[2].Size(), n_samples);
|
||||
}
|
||||
{
|
||||
RegTree tree{n_targets, n_features};
|
||||
CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, mid_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, mid_value, &candidates);
|
||||
}
|
||||
auto left_nidx = tree.LeftChild(RegTree::kRoot);
|
||||
partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
|
||||
auto elem = partitioner[left_nidx];
|
||||
ASSERT_LT(elem.Size(), n_samples);
|
||||
ASSERT_GT(elem.Size(), 1);
|
||||
auto expected_elem = expected_mid_partitioner[left_nidx];
|
||||
ASSERT_EQ(elem.Size(), expected_elem.Size());
|
||||
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
|
||||
ASSERT_EQ(*it, *eit);
|
||||
}
|
||||
|
||||
auto right_nidx = tree.RightChild(RegTree::kRoot);
|
||||
elem = partitioner[right_nidx];
|
||||
expected_elem = expected_mid_partitioner[right_nidx];
|
||||
ASSERT_EQ(elem.Size(), expected_elem.Size());
|
||||
for (auto it = elem.begin, eit = expected_elem.begin; it != elem.end; ++it, ++eit) {
|
||||
ASSERT_EQ(*it, *eit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ExpandEntry>
|
||||
void TestColumnSplitPartitioner(bst_target_t n_targets) {
|
||||
std::size_t n_samples = 1024, base_rowid = 0;
|
||||
bst_feature_t n_features = 16;
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
|
||||
std::vector<ExpandEntry> candidates{{0, 0}};
|
||||
candidates.front().split.loss_chg = 0.4;
|
||||
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{});
|
||||
auto cuts = common::SketchOnDMatrix(&ctx, Xy.get(), 64);
|
||||
|
||||
float min_value, mid_value;
|
||||
CommonRowPartitioner mid_partitioner{&ctx, n_samples, base_rowid, false};
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
bst_feature_t const split_ind = 0;
|
||||
common::ColumnMatrix column_indices;
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
min_value = gmat.cut.MinValues()[split_ind];
|
||||
|
||||
auto ptr = gmat.cut.Ptrs()[split_ind + 1];
|
||||
mid_value = gmat.cut.Values().at(ptr / 2);
|
||||
RegTree tree{n_targets, n_features};
|
||||
if constexpr (std::is_same<ExpandEntry, CPUExpandEntry>::value) {
|
||||
GetSplit(&tree, mid_value, &candidates);
|
||||
} else {
|
||||
GetMultiSplitForTest(&tree, mid_value, &candidates);
|
||||
}
|
||||
mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
|
||||
}
|
||||
|
||||
auto constexpr kWorkers = 4;
|
||||
RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner<ExpandEntry>, n_targets,
|
||||
n_samples, n_features, base_rowid, Xy, min_value, mid_value, mid_partitioner);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEntry>(1); }
|
||||
|
||||
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user