Compare commits
31 Commits
master-roc
...
release_1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
963a17b771 | ||
|
|
000292ce6d | ||
|
|
d3ec116322 | ||
|
|
a018028471 | ||
|
|
3e343159ef | ||
|
|
99e802f2ff | ||
|
|
6a29afb480 | ||
|
|
8e321adac8 | ||
|
|
d0ec65520a | ||
|
|
7aec915dcd | ||
|
|
a78d0d4110 | ||
|
|
76c361431f | ||
|
|
d95d02132a | ||
|
|
7109c6c1f2 | ||
|
|
bce7ca313c | ||
|
|
8be2cd8c91 | ||
|
|
c5f0cdbc72 | ||
|
|
1bf3899983 | ||
|
|
c39f6b25f0 | ||
|
|
2b3e301543 | ||
|
|
10d3419fa6 | ||
|
|
b273e5bd4c | ||
|
|
3a83fcb0eb | ||
|
|
3efc4ea0d1 | ||
|
|
a2c778e2d1 | ||
|
|
8a0db293c5 | ||
|
|
028ec5f028 | ||
|
|
38c80bcec4 | ||
|
|
16ff63905d | ||
|
|
a9b09919f9 | ||
|
|
f3b060401a |
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@ -192,7 +192,7 @@ jobs:
|
||||
run: |
|
||||
cd build/
|
||||
tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
|
||||
python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/ --acl public-read
|
||||
python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
|
||||
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
|
||||
|
||||
@ -52,6 +52,7 @@ addons:
|
||||
apt:
|
||||
packages:
|
||||
- snapd
|
||||
- unzip
|
||||
|
||||
before_install:
|
||||
- source tests/travis/travis_setup_env.sh
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.3.0)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.3.3)
|
||||
include(cmake/Utils.cmake)
|
||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||
cmake_policy(SET CMP0022 NEW)
|
||||
|
||||
11
Jenkinsfile
vendored
11
Jenkinsfile
vendored
@ -190,11 +190,20 @@ def BuildCUDA(args) {
|
||||
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
|
||||
arch_flag = "-DGPU_COMPUTE_VER=75"
|
||||
}
|
||||
def wheel_tag = "manylinux2010_x86_64"
|
||||
sh """
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh -DUSE_CUDA=ON -DUSE_NCCL=ON -DOPEN_MP:BOOL=ON -DHIDE_CXX_SYMBOLS=ON ${arch_flag}
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} ${wheel_tag}
|
||||
"""
|
||||
if (args.cuda_version == ref_cuda_ver) {
|
||||
sh """
|
||||
${dockerRun} auditwheel_x86_64 ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
|
||||
mv -v wheelhouse/*.whl python-package/dist/
|
||||
# Make sure that libgomp.so is vendored in the wheel
|
||||
${dockerRun} auditwheel_x86_64 ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
|
||||
"""
|
||||
}
|
||||
echo 'Stashing Python wheel...'
|
||||
stash name: "xgboost_whl_cuda${args.cuda_version}", includes: 'python-package/dist/*.whl'
|
||||
if (args.cuda_version == ref_cuda_ver && (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release'))) {
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 1.3.0.1
|
||||
Version: 1.3.3.1
|
||||
Date: 2020-08-28
|
||||
Authors@R: c(
|
||||
person("Tianqi", "Chen", role = c("aut"),
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
# of saved model files from XGBoost version 0.90 and 1.0.x.
|
||||
library(xgboost)
|
||||
library(Matrix)
|
||||
source('./generate_models_params.R')
|
||||
|
||||
set.seed(0)
|
||||
metadata <- list(
|
||||
@ -53,11 +52,16 @@ generate_logistic_model <- function () {
|
||||
y <- sample(0:1, size = metadata$kRows, replace = TRUE)
|
||||
stopifnot(max(y) == 1, min(y) == 0)
|
||||
|
||||
data <- xgb.DMatrix(X, label = y, weight = w)
|
||||
params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
|
||||
max_depth = metadata$kMaxDepth, objective = 'binary:logistic')
|
||||
booster <- xgb.train(params, data, nrounds = metadata$kRounds)
|
||||
save_booster(booster, 'logit')
|
||||
objective <- c('binary:logistic', 'binary:logitraw')
|
||||
name <- c('logit', 'logitraw')
|
||||
|
||||
for (i in seq_len(length(objective))) {
|
||||
data <- xgb.DMatrix(X, label = y, weight = w)
|
||||
params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
|
||||
max_depth = metadata$kMaxDepth, objective = objective[i])
|
||||
booster <- xgb.train(params, data, nrounds = metadata$kRounds)
|
||||
save_booster(booster, name[i])
|
||||
}
|
||||
}
|
||||
|
||||
generate_classification_model <- function () {
|
||||
|
||||
@ -39,6 +39,10 @@ run_booster_check <- function (booster, name) {
|
||||
testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax')
|
||||
testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class),
|
||||
metadata$kClasses)
|
||||
} else if (name == 'logitraw') {
|
||||
testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
|
||||
testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
|
||||
testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logitraw')
|
||||
} else if (name == 'logit') {
|
||||
testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
|
||||
testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
|
||||
|
||||
@ -1 +1 @@
|
||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT
|
||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||
|
||||
@ -6,11 +6,11 @@ function(setup_rpackage_install_target rlib_target build_dir)
|
||||
install(
|
||||
DIRECTORY "${xgboost_SOURCE_DIR}/R-package"
|
||||
DESTINATION "${build_dir}"
|
||||
REGEX "src/*" EXCLUDE
|
||||
REGEX "R-package/configure" EXCLUDE
|
||||
PATTERN "src/*" EXCLUDE
|
||||
PATTERN "R-package/configure" EXCLUDE
|
||||
)
|
||||
install(TARGETS ${rlib_target}
|
||||
LIBRARY DESTINATION "${build_dir}/R-package/src/"
|
||||
RUNTIME DESTINATION "${build_dir}/R-package/src/")
|
||||
install(SCRIPT ${PROJECT_BINARY_DIR}/RPackageInstall.cmake)
|
||||
endfunction()
|
||||
endfunction()
|
||||
|
||||
@ -62,7 +62,7 @@ test:data = "agaricus.txt.test"
|
||||
We use the tree booster and logistic regression objective in our setting. This indicates that we accomplish our task using classic gradient boosting regression tree(GBRT), which is a promising method for binary classification.
|
||||
|
||||
The parameters shown in the example gives the most common ones that are needed to use xgboost.
|
||||
If you are interested in more parameter settings, the complete parameter settings and detailed descriptions are [here](../../doc/parameter.rst). Besides putting the parameters in the configuration file, we can set them by passing them as arguments as below:
|
||||
If you are interested in more parameter settings, the complete parameter settings and detailed descriptions are [here](https://xgboost.readthedocs.io/en/stable/parameter.html). Besides putting the parameters in the configuration file, we can set them by passing them as arguments as below:
|
||||
|
||||
```
|
||||
../../xgboost mushroom.conf max_depth=6
|
||||
@ -161,4 +161,3 @@ Eg. ```nthread=10```
|
||||
|
||||
Set nthread to be the number of your real cpu (On Unix, this can be found using ```lscpu```)
|
||||
Some systems will have ```Thread(s) per core = 2```, for example, a 4 core cpu with 8 threads, in such case set ```nthread=4``` and not 8.
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Regression
|
||||
====
|
||||
Using XGBoost for regression is very similar to using it for binary classification. We suggest that you can refer to the [binary classification demo](../binary_classification) first. In XGBoost if we use negative log likelihood as the loss function for regression, the training procedure is same as training binary classifier of XGBoost.
|
||||
Using XGBoost for regression is very similar to using it for binary classification. We suggest that you can refer to the [binary classification demo](../binary_classification) first. In XGBoost if we use negative log likelihood as the loss function for regression, the training procedure is same as training binary classifier of XGBoost.
|
||||
|
||||
### Tutorial
|
||||
The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter:
|
||||
@ -14,4 +14,3 @@ objective = reg:squarederror
|
||||
```
|
||||
|
||||
The input format is same as binary classification, except that the label is now the target regression values. We use linear regression here, if we want use objective = reg:logistic logistic regression, the label needed to be pre-scaled into [0,1].
|
||||
|
||||
|
||||
@ -60,9 +60,9 @@ This is a list of short codes introducing different functionalities of xgboost p
|
||||
Most of examples in this section are based on CLI or python version.
|
||||
However, the parameter settings can be applied to all versions
|
||||
|
||||
- [Binary classification](binary_classification)
|
||||
- [Binary classification](CLI/binary_classification)
|
||||
- [Multiclass classification](multiclass_classification)
|
||||
- [Regression](regression)
|
||||
- [Regression](CLI/regression)
|
||||
- [Learning to Rank](rank)
|
||||
|
||||
### Benchmarks
|
||||
|
||||
@ -5,9 +5,9 @@ objective="rank:pairwise"
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
eta = 0.1
|
||||
eta = 0.1
|
||||
# minimum loss reduction required to make a further partition
|
||||
gamma = 1.0
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
min_child_weight = 0.1
|
||||
# maximum depth of a tree
|
||||
@ -17,12 +17,10 @@ max_depth = 6
|
||||
# the number of round to do boosting
|
||||
num_round = 4
|
||||
# 0 means do not save any model except the final round model
|
||||
save_period = 0
|
||||
save_period = 0
|
||||
# The path of training data
|
||||
data = "mq2008.train"
|
||||
data = "mq2008.train"
|
||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||
eval[test] = "mq2008.vali"
|
||||
# The path of test data
|
||||
test:data = "mq2008.test"
|
||||
|
||||
|
||||
eval[test] = "mq2008.vali"
|
||||
# The path of test data
|
||||
test:data = "mq2008.test"
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
Introduction to Boosted Trees
|
||||
#############################
|
||||
XGBoost stands for "Extreme Gradient Boosting", where the term "Gradient Boosting" originates from the paper *Greedy Function Approximation: A Gradient Boosting Machine*, by Friedman.
|
||||
This is a tutorial on gradient boosted trees, and most of the content is based on `these slides <http://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf>`_ by Tianqi Chen, the original author of XGBoost.
|
||||
|
||||
The **gradient boosted trees** has been around for a while, and there are a lot of materials on the topic.
|
||||
This tutorial will explain boosted trees in a self-contained and principled way using the elements of supervised learning.
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
#endif // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
|
||||
|
||||
#if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) && \
|
||||
!defined(__CUDACC__)
|
||||
!defined(__CUDACC__) && !defined(__sun) && !defined(sun)
|
||||
#include <parallel/algorithm>
|
||||
#define XGBOOST_PARALLEL_SORT(X, Y, Z) __gnu_parallel::sort((X), (Y), (Z))
|
||||
#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) \
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include <string>
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
// Constant representing the device ID of CPU.
|
||||
static int32_t constexpr kCpuId = -1;
|
||||
@ -26,6 +27,8 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
int nthread;
|
||||
// primary device, -1 means no gpu.
|
||||
int gpu_id;
|
||||
// fail when gpu_id is invalid
|
||||
bool fail_on_invalid_gpu_id {false};
|
||||
// gpu page size in external memory mode, 0 means using the default.
|
||||
size_t gpu_page_size;
|
||||
bool enable_experimental_json_serialization {true};
|
||||
@ -64,6 +67,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
.set_default(-1)
|
||||
.set_lower_bound(-1)
|
||||
.describe("The primary GPU device ordinal.");
|
||||
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
|
||||
.set_default(false)
|
||||
.describe("Fail with error when gpu_id is invalid.");
|
||||
DMLC_DECLARE_FIELD(gpu_page_size)
|
||||
.set_default(0)
|
||||
.set_lower_bound(0)
|
||||
|
||||
@ -6,6 +6,6 @@
|
||||
|
||||
#define XGBOOST_VER_MAJOR 1
|
||||
#define XGBOOST_VER_MINOR 3
|
||||
#define XGBOOST_VER_PATCH 0
|
||||
#define XGBOOST_VER_PATCH 3
|
||||
|
||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||
|
||||
@ -34,9 +34,9 @@ TO_VERSION=$2
|
||||
sed_i() {
|
||||
perl -p -000 -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
|
||||
}
|
||||
|
||||
|
||||
export -f sed_i
|
||||
|
||||
|
||||
BASEDIR=$(dirname $0)/..
|
||||
find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
|
||||
-exec bash -c \
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>XGBoost JVM Package</name>
|
||||
<description>JVM Package for XGBoost</description>
|
||||
|
||||
@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
<packaging>jar</packaging>
|
||||
<build>
|
||||
<plugins>
|
||||
@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
@ -37,7 +37,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||
<build>
|
||||
@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||
<build>
|
||||
@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j_2.12</artifactId>
|
||||
<version>1.3.0-SNAPSHOT</version>
|
||||
<version>1.3.3</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@ -1 +1 @@
|
||||
1.3.0-SNAPSHOT
|
||||
1.3.3
|
||||
|
||||
@ -456,6 +456,7 @@ class LearningRateScheduler(TrainingCallback):
|
||||
|
||||
def after_iteration(self, model, epoch, evals_log):
|
||||
model.set_param('learning_rate', self.learning_rates(epoch))
|
||||
return False
|
||||
|
||||
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
@ -565,7 +566,7 @@ class EarlyStopping(TrainingCallback):
|
||||
def after_training(self, model: Booster):
|
||||
try:
|
||||
if self.save_best:
|
||||
model = model[: int(model.attr('best_iteration'))]
|
||||
model = model[: int(model.attr('best_iteration')) + 1]
|
||||
except XGBoostError as e:
|
||||
raise XGBoostError('`save_best` is not applicable to current booster') from e
|
||||
return model
|
||||
@ -621,7 +622,7 @@ class EvaluationMonitor(TrainingCallback):
|
||||
msg += self._fmt_metric(data, metric_name, score, stdv)
|
||||
msg += '\n'
|
||||
|
||||
if (epoch % self.period) != 0:
|
||||
if (epoch % self.period) == 0 or self.period == 1:
|
||||
rabit.tracker_print(msg)
|
||||
self._latest = None
|
||||
else:
|
||||
@ -677,6 +678,7 @@ class TrainingCheckPoint(TrainingCallback):
|
||||
else:
|
||||
model.save_model(path)
|
||||
self._epoch += 1
|
||||
return False
|
||||
|
||||
|
||||
class LegacyCallbacks:
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
# coding: utf-8
|
||||
# pylint: disable=too-many-arguments, too-many-branches, invalid-name
|
||||
# pylint: disable=too-many-lines, too-many-locals
|
||||
# pylint: disable=too-many-lines, too-many-locals, no-self-use
|
||||
"""Core XGBoost Library."""
|
||||
import collections
|
||||
# pylint: disable=no-name-in-module,import-error
|
||||
from collections.abc import Mapping
|
||||
# pylint: enable=no-name-in-module,import-error
|
||||
from typing import Dict, Union, List
|
||||
import ctypes
|
||||
import os
|
||||
import re
|
||||
@ -1012,6 +1013,7 @@ class Booster(object):
|
||||
_check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)),
|
||||
ctypes.byref(self.handle)))
|
||||
params = params or {}
|
||||
params = self._configure_metrics(params.copy())
|
||||
if isinstance(params, list):
|
||||
params.append(('validate_parameters', True))
|
||||
else:
|
||||
@ -1041,6 +1043,17 @@ class Booster(object):
|
||||
else:
|
||||
raise TypeError('Unknown type:', model_file)
|
||||
|
||||
def _configure_metrics(self, params: Union[Dict, List]) -> Union[Dict, List]:
|
||||
if isinstance(params, dict) and 'eval_metric' in params \
|
||||
and isinstance(params['eval_metric'], list):
|
||||
params = dict((k, v) for k, v in params.items())
|
||||
eval_metrics = params['eval_metric']
|
||||
params.pop("eval_metric", None)
|
||||
params = list(params.items())
|
||||
for eval_metric in eval_metrics:
|
||||
params += [('eval_metric', eval_metric)]
|
||||
return params
|
||||
|
||||
def __del__(self):
|
||||
if hasattr(self, 'handle') and self.handle is not None:
|
||||
_check_call(_LIB.XGBoosterFree(self.handle))
|
||||
|
||||
@ -33,7 +33,7 @@ from .compat import lazy_isinstance
|
||||
from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter
|
||||
from .core import _deprecate_positional_args
|
||||
from .training import train as worker_train
|
||||
from .tracker import RabitTracker
|
||||
from .tracker import RabitTracker, get_host_ip
|
||||
from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase
|
||||
from .sklearn import xgboost_model_doc
|
||||
|
||||
@ -70,8 +70,7 @@ LOGGER = logging.getLogger('[xgboost.dask]')
|
||||
def _start_tracker(n_workers):
|
||||
"""Start Rabit tracker """
|
||||
env = {'DMLC_NUM_WORKER': n_workers}
|
||||
import socket
|
||||
host = socket.gethostbyname(socket.gethostname())
|
||||
host = get_host_ip('auto')
|
||||
rabit_context = RabitTracker(hostIP=host, nslave=n_workers)
|
||||
env.update(rabit_context.slave_envs())
|
||||
|
||||
@ -1211,10 +1210,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||
early_stopping_rounds=early_stopping_rounds,
|
||||
verbose=verbose)
|
||||
|
||||
async def _predict_proba_async(self, data, output_margin=False,
|
||||
async def _predict_proba_async(self, X, output_margin=False,
|
||||
base_margin=None):
|
||||
test_dmatrix = await DaskDMatrix(
|
||||
client=self.client, data=data, base_margin=base_margin,
|
||||
client=self.client, data=X, base_margin=base_margin,
|
||||
missing=self.missing
|
||||
)
|
||||
pred_probs = await predict(client=self.client,
|
||||
@ -1224,11 +1223,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||
return pred_probs
|
||||
|
||||
# pylint: disable=arguments-differ,missing-docstring
|
||||
def predict_proba(self, data, output_margin=False, base_margin=None):
|
||||
def predict_proba(self, X, output_margin=False, base_margin=None):
|
||||
_assert_dask_support()
|
||||
return self.client.sync(
|
||||
self._predict_proba_async,
|
||||
data,
|
||||
X=X,
|
||||
output_margin=output_margin,
|
||||
base_margin=base_margin
|
||||
)
|
||||
|
||||
@ -424,6 +424,7 @@ def _transform_cupy_array(data):
|
||||
data, '__array__'):
|
||||
import cupy # pylint: disable=import-error
|
||||
data = cupy.array(data, copy=False)
|
||||
data = data.astype(dtype=data.dtype, order='C', copy=False)
|
||||
return data
|
||||
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
import copy
|
||||
import warnings
|
||||
import json
|
||||
from typing import Optional
|
||||
import numpy as np
|
||||
from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
|
||||
from .training import train
|
||||
@ -398,7 +399,7 @@ class XGBModel(XGBModelBase):
|
||||
'importance_type', 'kwargs', 'missing', 'n_estimators', 'use_label_encoder'}
|
||||
filtered = dict()
|
||||
for k, v in params.items():
|
||||
if k not in wrapper_specific:
|
||||
if k not in wrapper_specific and not callable(v):
|
||||
filtered[k] = v
|
||||
return filtered
|
||||
|
||||
@ -494,6 +495,13 @@ class XGBModel(XGBModelBase):
|
||||
# Delete the attribute after load
|
||||
self.get_booster().set_attr(scikit_learn=None)
|
||||
|
||||
def _set_evaluation_result(self, evals_result: Optional[dict]) -> None:
|
||||
if evals_result:
|
||||
for val in evals_result.items():
|
||||
evals_result_key = list(val[1].keys())[0]
|
||||
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
|
||||
self.evals_result_ = evals_result
|
||||
|
||||
@_deprecate_positional_args
|
||||
def fit(self, X, y, *, sample_weight=None, base_margin=None,
|
||||
eval_set=None, eval_metric=None, early_stopping_rounds=None,
|
||||
@ -565,13 +573,6 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
"""
|
||||
self.n_features_in_ = X.shape[1]
|
||||
|
||||
train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
|
||||
base_margin=base_margin,
|
||||
missing=self.missing,
|
||||
nthread=self.n_jobs)
|
||||
train_dmatrix.set_info(feature_weights=feature_weights)
|
||||
|
||||
evals_result = {}
|
||||
|
||||
train_dmatrix, evals = self._wrap_evaluation_matrices(
|
||||
@ -601,12 +602,7 @@ class XGBModel(XGBModelBase):
|
||||
verbose_eval=verbose, xgb_model=xgb_model,
|
||||
callbacks=callbacks)
|
||||
|
||||
if evals_result:
|
||||
for val in evals_result.items():
|
||||
evals_result_key = list(val[1].keys())[0]
|
||||
evals_result[val[0]][evals_result_key] = val[1][
|
||||
evals_result_key]
|
||||
self.evals_result_ = evals_result
|
||||
self._set_evaluation_result(evals_result)
|
||||
|
||||
if early_stopping_rounds is not None:
|
||||
self.best_score = self._Booster.best_score
|
||||
@ -841,14 +837,18 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
self.classes_ = cp.unique(y.values)
|
||||
self.n_classes_ = len(self.classes_)
|
||||
can_use_label_encoder = False
|
||||
if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
|
||||
expected_classes = cp.arange(self.n_classes_)
|
||||
if (self.classes_.shape != expected_classes.shape or
|
||||
not (self.classes_ == expected_classes).all()):
|
||||
raise ValueError(label_encoding_check_error)
|
||||
elif _is_cupy_array(y):
|
||||
import cupy as cp # pylint: disable=E0401
|
||||
self.classes_ = cp.unique(y)
|
||||
self.n_classes_ = len(self.classes_)
|
||||
can_use_label_encoder = False
|
||||
if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
|
||||
expected_classes = cp.arange(self.n_classes_)
|
||||
if (self.classes_.shape != expected_classes.shape or
|
||||
not (self.classes_ == expected_classes).all()):
|
||||
raise ValueError(label_encoding_check_error)
|
||||
else:
|
||||
self.classes_ = np.unique(y)
|
||||
@ -915,12 +915,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
callbacks=callbacks)
|
||||
|
||||
self.objective = xgb_options["objective"]
|
||||
if evals_result:
|
||||
for val in evals_result.items():
|
||||
evals_result_key = list(val[1].keys())[0]
|
||||
evals_result[val[0]][
|
||||
evals_result_key] = val[1][evals_result_key]
|
||||
self.evals_result_ = evals_result
|
||||
self._set_evaluation_result(evals_result)
|
||||
|
||||
if early_stopping_rounds is not None:
|
||||
self.best_score = self._Booster.best_score
|
||||
@ -991,10 +986,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
return self._le.inverse_transform(column_indexes)
|
||||
return column_indexes
|
||||
|
||||
def predict_proba(self, data, ntree_limit=None, validate_features=False,
|
||||
def predict_proba(self, X, ntree_limit=None, validate_features=False,
|
||||
base_margin=None):
|
||||
"""
|
||||
Predict the probability of each `data` example being of a given class.
|
||||
""" Predict the probability of each `X` example being of a given class.
|
||||
|
||||
.. note:: This function is not thread safe
|
||||
|
||||
@ -1004,21 +998,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : array_like
|
||||
X : array_like
|
||||
Feature matrix.
|
||||
ntree_limit : int
|
||||
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
|
||||
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
|
||||
Limit number of trees in the prediction; defaults to best_ntree_limit if
|
||||
defined (i.e. it has been trained with early stopping), otherwise 0 (use all
|
||||
trees).
|
||||
validate_features : bool
|
||||
When this is True, validate that the Booster's and data's feature_names are identical.
|
||||
Otherwise, it is assumed that the feature_names are the same.
|
||||
When this is True, validate that the Booster's and data's feature_names are
|
||||
identical. Otherwise, it is assumed that the feature_names are the same.
|
||||
|
||||
Returns
|
||||
-------
|
||||
prediction : numpy array
|
||||
a numpy array with the probability of each data example being of a given class.
|
||||
"""
|
||||
test_dmatrix = DMatrix(data, base_margin=base_margin,
|
||||
test_dmatrix = DMatrix(X, base_margin=base_margin,
|
||||
missing=self.missing, nthread=self.n_jobs)
|
||||
if ntree_limit is None:
|
||||
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
||||
@ -1324,12 +1319,7 @@ class XGBRanker(XGBModel):
|
||||
|
||||
self.objective = params["objective"]
|
||||
|
||||
if evals_result:
|
||||
for val in evals_result.items():
|
||||
evals_result_key = list(val[1].keys())[0]
|
||||
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
|
||||
self.evals_result = evals_result
|
||||
|
||||
self._set_evaluation_result(evals_result)
|
||||
if early_stopping_rounds is not None:
|
||||
self.best_score = self._Booster.best_score
|
||||
self.best_iteration = self._Booster.best_iteration
|
||||
|
||||
@ -52,6 +52,28 @@ def get_some_ip(host):
|
||||
return socket.getaddrinfo(host, None)[0][4][0]
|
||||
|
||||
|
||||
def get_host_ip(hostIP=None):
|
||||
if hostIP is None or hostIP == 'auto':
|
||||
hostIP = 'ip'
|
||||
|
||||
if hostIP == 'dns':
|
||||
hostIP = socket.getfqdn()
|
||||
elif hostIP == 'ip':
|
||||
from socket import gaierror
|
||||
try:
|
||||
hostIP = socket.gethostbyname(socket.getfqdn())
|
||||
except gaierror:
|
||||
logging.warning(
|
||||
'gethostbyname(socket.getfqdn()) failed... trying on hostname()')
|
||||
hostIP = socket.gethostbyname(socket.gethostname())
|
||||
if hostIP.startswith("127."):
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
# doesn't have to be reachable
|
||||
s.connect(('10.255.255.255', 1))
|
||||
hostIP = s.getsockname()[0]
|
||||
return hostIP
|
||||
|
||||
|
||||
def get_family(addr):
|
||||
return socket.getaddrinfo(addr, None)[0][0]
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
"""Training Library containing training routines."""
|
||||
import warnings
|
||||
import copy
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
from .core import Booster, XGBoostError
|
||||
@ -40,18 +41,6 @@ def _is_new_callback(callbacks):
|
||||
for c in callbacks) or not callbacks
|
||||
|
||||
|
||||
def _configure_metrics(params):
|
||||
if isinstance(params, dict) and 'eval_metric' in params \
|
||||
and isinstance(params['eval_metric'], list):
|
||||
params = dict((k, v) for k, v in params.items())
|
||||
eval_metrics = params['eval_metric']
|
||||
params.pop("eval_metric", None)
|
||||
params = list(params.items())
|
||||
for eval_metric in eval_metrics:
|
||||
params += [('eval_metric', eval_metric)]
|
||||
return params
|
||||
|
||||
|
||||
def _train_internal(params, dtrain,
|
||||
num_boost_round=10, evals=(),
|
||||
obj=None, feval=None,
|
||||
@ -61,7 +50,6 @@ def _train_internal(params, dtrain,
|
||||
"""internal training function"""
|
||||
callbacks = [] if callbacks is None else copy.copy(callbacks)
|
||||
evals = list(evals)
|
||||
params = _configure_metrics(params.copy())
|
||||
|
||||
bst = Booster(params, [dtrain] + [d[0] for d in evals])
|
||||
nboost = 0
|
||||
@ -136,7 +124,26 @@ def _train_internal(params, dtrain,
|
||||
bst.best_iteration = int(bst.attr('best_iteration'))
|
||||
else:
|
||||
bst.best_iteration = nboost - 1
|
||||
|
||||
config = json.loads(bst.save_config())
|
||||
booster = config['learner']['gradient_booster']['name']
|
||||
if booster == 'gblinear':
|
||||
num_parallel_tree = 0
|
||||
elif booster == 'dart':
|
||||
num_parallel_tree = int(
|
||||
config['learner']['gradient_booster']['gbtree']['gbtree_train_param'][
|
||||
'num_parallel_tree'
|
||||
]
|
||||
)
|
||||
elif booster == 'gbtree':
|
||||
num_parallel_tree = int(
|
||||
config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'num_parallel_tree']
|
||||
)
|
||||
else:
|
||||
raise ValueError(f'Unknown booster: {booster}')
|
||||
bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree
|
||||
|
||||
# Copy to serialise and unserialise booster to reset state and free
|
||||
# training memory
|
||||
return bst.copy()
|
||||
@ -175,9 +182,10 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
If there's more than one metric in the **eval_metric** parameter given in
|
||||
**params**, the last metric will be used for early stopping.
|
||||
If early stopping occurs, the model will have three additional fields:
|
||||
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
|
||||
(Use ``bst.best_ntree_limit`` to get the correct value if
|
||||
``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
|
||||
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``. Use
|
||||
``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
|
||||
``num_class`` appears in the parameters. ``best_ntree_limit`` is the result of
|
||||
``num_parallel_tree * best_iteration``.
|
||||
evals_result: dict
|
||||
This dictionary stores the evaluation results of all the items in watchlist.
|
||||
|
||||
|
||||
@ -25,6 +25,10 @@
|
||||
#include <sys/socket.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#if defined(__sun) || defined(sun)
|
||||
#include <sys/sockio.h>
|
||||
#endif // defined(__sun) || defined(sun)
|
||||
|
||||
#endif // defined(_WIN32)
|
||||
|
||||
#include <string>
|
||||
|
||||
@ -268,7 +268,7 @@ class CLI {
|
||||
// always save final round
|
||||
if ((param_.save_period == 0 ||
|
||||
param_.num_round % param_.save_period != 0) &&
|
||||
param_.model_out != CLIParam::kNull && rabit::GetRank() == 0) {
|
||||
rabit::GetRank() == 0) {
|
||||
std::ostringstream os;
|
||||
if (param_.model_out == CLIParam::kNull) {
|
||||
os << param_.model_dir << '/' << std::setfill('0') << std::setw(4)
|
||||
|
||||
@ -407,9 +407,14 @@ class HistCollection {
|
||||
// access histogram for i-th node
|
||||
GHistRowT operator[](bst_uint nid) const {
|
||||
constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
|
||||
CHECK_NE(row_ptr_[nid], kMax);
|
||||
GradientPairT* ptr =
|
||||
const_cast<GradientPairT*>(dmlc::BeginPtr(data_) + row_ptr_[nid]);
|
||||
const size_t id = row_ptr_[nid];
|
||||
CHECK_NE(id, kMax);
|
||||
GradientPairT* ptr = nullptr;
|
||||
if (contiguous_allocation_) {
|
||||
ptr = const_cast<GradientPairT*>(data_[0].data() + nbins_*id);
|
||||
} else {
|
||||
ptr = const_cast<GradientPairT*>(data_[id].data());
|
||||
}
|
||||
return {ptr, nbins_};
|
||||
}
|
||||
|
||||
@ -438,21 +443,37 @@ class HistCollection {
|
||||
}
|
||||
CHECK_EQ(row_ptr_[nid], kMax);
|
||||
|
||||
if (data_.size() < nbins_ * (nid + 1)) {
|
||||
data_.resize(nbins_ * (nid + 1));
|
||||
if (data_.size() < (nid + 1)) {
|
||||
data_.resize((nid + 1));
|
||||
}
|
||||
|
||||
row_ptr_[nid] = nbins_ * n_nodes_added_;
|
||||
row_ptr_[nid] = n_nodes_added_;
|
||||
n_nodes_added_++;
|
||||
}
|
||||
// allocate thread local memory i-th node
|
||||
void AllocateData(bst_uint nid) {
|
||||
if (data_[row_ptr_[nid]].size() == 0) {
|
||||
data_[row_ptr_[nid]].resize(nbins_, {0, 0});
|
||||
}
|
||||
}
|
||||
// allocate common buffer contiguously for all nodes, need for single Allreduce call
|
||||
void AllocateAllData() {
|
||||
const size_t new_size = nbins_*data_.size();
|
||||
contiguous_allocation_ = true;
|
||||
if (data_[0].size() != new_size) {
|
||||
data_[0].resize(new_size);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/*! \brief number of all bins over all features */
|
||||
uint32_t nbins_ = 0;
|
||||
/*! \brief amount of active nodes in hist collection */
|
||||
uint32_t n_nodes_added_ = 0;
|
||||
/*! \brief flag to identify contiguous memory allocation */
|
||||
bool contiguous_allocation_ = false;
|
||||
|
||||
std::vector<GradientPairT> data_;
|
||||
std::vector<std::vector<GradientPairT>> data_;
|
||||
|
||||
/*! \brief row_ptr_[nid] locates bin for histogram of node nid */
|
||||
std::vector<size_t> row_ptr_;
|
||||
@ -481,7 +502,6 @@ class ParallelGHistBuilder {
|
||||
const std::vector<GHistRowT>& targeted_hists) {
|
||||
hist_buffer_.Init(nbins_);
|
||||
tid_nid_to_hist_.clear();
|
||||
hist_memory_.clear();
|
||||
threads_to_nids_map_.clear();
|
||||
|
||||
targeted_hists_ = targeted_hists;
|
||||
@ -504,8 +524,11 @@ class ParallelGHistBuilder {
|
||||
CHECK_LT(nid, nodes_);
|
||||
CHECK_LT(tid, nthreads_);
|
||||
|
||||
size_t idx = tid_nid_to_hist_.at({tid, nid});
|
||||
GHistRowT hist = hist_memory_[idx];
|
||||
int idx = tid_nid_to_hist_.at({tid, nid});
|
||||
if (idx >= 0) {
|
||||
hist_buffer_.AllocateData(idx);
|
||||
}
|
||||
GHistRowT hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
|
||||
|
||||
if (!hist_was_used_[tid * nodes_ + nid]) {
|
||||
InitilizeHistByZeroes(hist, 0, hist.size());
|
||||
@ -526,8 +549,9 @@ class ParallelGHistBuilder {
|
||||
for (size_t tid = 0; tid < nthreads_; ++tid) {
|
||||
if (hist_was_used_[tid * nodes_ + nid]) {
|
||||
is_updated = true;
|
||||
const size_t idx = tid_nid_to_hist_.at({tid, nid});
|
||||
GHistRowT src = hist_memory_[idx];
|
||||
|
||||
int idx = tid_nid_to_hist_.at({tid, nid});
|
||||
GHistRowT src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];
|
||||
|
||||
if (dst.data() != src.data()) {
|
||||
IncrementHist(dst, src, begin, end);
|
||||
@ -589,7 +613,6 @@ class ParallelGHistBuilder {
|
||||
}
|
||||
|
||||
void MatchNodeNidPairToHist() {
|
||||
size_t hist_total = 0;
|
||||
size_t hist_allocated_additionally = 0;
|
||||
|
||||
for (size_t nid = 0; nid < nodes_; ++nid) {
|
||||
@ -597,15 +620,11 @@ class ParallelGHistBuilder {
|
||||
for (size_t tid = 0; tid < nthreads_; ++tid) {
|
||||
if (threads_to_nids_map_[tid * nodes_ + nid]) {
|
||||
if (first_hist) {
|
||||
hist_memory_.push_back(targeted_hists_[nid]);
|
||||
tid_nid_to_hist_[{tid, nid}] = -1;
|
||||
first_hist = false;
|
||||
} else {
|
||||
hist_memory_.push_back(hist_buffer_[hist_allocated_additionally]);
|
||||
hist_allocated_additionally++;
|
||||
tid_nid_to_hist_[{tid, nid}] = hist_allocated_additionally++;
|
||||
}
|
||||
// map pair {tid, nid} to index of allocated histogram from hist_memory_
|
||||
tid_nid_to_hist_[{tid, nid}] = hist_total++;
|
||||
CHECK_EQ(hist_total, hist_memory_.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -630,10 +649,11 @@ class ParallelGHistBuilder {
|
||||
std::vector<bool> threads_to_nids_map_;
|
||||
/*! \brief Contains histograms for final results */
|
||||
std::vector<GHistRowT> targeted_hists_;
|
||||
/*! \brief Allocated memory for histograms used for construction */
|
||||
std::vector<GHistRowT> hist_memory_;
|
||||
/*! \brief map pair {tid, nid} to index of allocated histogram from hist_memory_ */
|
||||
std::map<std::pair<size_t, size_t>, size_t> tid_nid_to_hist_;
|
||||
/*!
|
||||
* \brief map pair {tid, nid} to index of allocated histogram from hist_buffer_ and targeted_hists_,
|
||||
* -1 is reserved for targeted_hists_
|
||||
*/
|
||||
std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
|
||||
};
|
||||
|
||||
/*!
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@ -150,24 +151,33 @@ class PartitionBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
// allocate thread local memory, should be called for each specific task
|
||||
void AllocateForTask(size_t id) {
|
||||
if (mem_blocks_[id].get() == nullptr) {
|
||||
BlockInfo* local_block_ptr = new BlockInfo;
|
||||
CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
|
||||
mem_blocks_[id].reset(local_block_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
|
||||
const size_t task_idx = GetTaskIdx(nid, begin);
|
||||
return { mem_blocks_.at(task_idx).Left(), end - begin };
|
||||
return { mem_blocks_.at(task_idx)->Left(), end - begin };
|
||||
}
|
||||
|
||||
common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
|
||||
const size_t task_idx = GetTaskIdx(nid, begin);
|
||||
return { mem_blocks_.at(task_idx).Right(), end - begin };
|
||||
return { mem_blocks_.at(task_idx)->Right(), end - begin };
|
||||
}
|
||||
|
||||
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
|
||||
size_t task_idx = GetTaskIdx(nid, begin);
|
||||
mem_blocks_.at(task_idx).n_left = n_left;
|
||||
mem_blocks_.at(task_idx)->n_left = n_left;
|
||||
}
|
||||
|
||||
void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
|
||||
size_t task_idx = GetTaskIdx(nid, begin);
|
||||
mem_blocks_.at(task_idx).n_right = n_right;
|
||||
mem_blocks_.at(task_idx)->n_right = n_right;
|
||||
}
|
||||
|
||||
|
||||
@ -185,13 +195,13 @@ class PartitionBuilder {
|
||||
for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
|
||||
size_t n_left = 0;
|
||||
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
|
||||
mem_blocks_[j].n_offset_left = n_left;
|
||||
n_left += mem_blocks_[j].n_left;
|
||||
mem_blocks_[j]->n_offset_left = n_left;
|
||||
n_left += mem_blocks_[j]->n_left;
|
||||
}
|
||||
size_t n_right = 0;
|
||||
for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
|
||||
mem_blocks_[j].n_offset_right = n_left + n_right;
|
||||
n_right += mem_blocks_[j].n_right;
|
||||
mem_blocks_[j]->n_offset_right = n_left + n_right;
|
||||
n_right += mem_blocks_[j]->n_right;
|
||||
}
|
||||
left_right_nodes_sizes_[i] = {n_left, n_right};
|
||||
}
|
||||
@ -200,21 +210,21 @@ class PartitionBuilder {
|
||||
void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
|
||||
size_t task_idx = GetTaskIdx(nid, begin);
|
||||
|
||||
size_t* left_result = rows_indexes + mem_blocks_[task_idx].n_offset_left;
|
||||
size_t* right_result = rows_indexes + mem_blocks_[task_idx].n_offset_right;
|
||||
size_t* left_result = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
|
||||
size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;
|
||||
|
||||
const size_t* left = mem_blocks_[task_idx].Left();
|
||||
const size_t* right = mem_blocks_[task_idx].Right();
|
||||
const size_t* left = mem_blocks_[task_idx]->Left();
|
||||
const size_t* right = mem_blocks_[task_idx]->Right();
|
||||
|
||||
std::copy_n(left, mem_blocks_[task_idx].n_left, left_result);
|
||||
std::copy_n(right, mem_blocks_[task_idx].n_right, right_result);
|
||||
std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
|
||||
std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
|
||||
}
|
||||
|
||||
protected:
|
||||
size_t GetTaskIdx(int nid, size_t begin) {
|
||||
return blocks_offsets_[nid] + begin / BlockSize;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct BlockInfo{
|
||||
size_t n_left;
|
||||
size_t n_right;
|
||||
@ -230,12 +240,12 @@ class PartitionBuilder {
|
||||
return &right_data_[0];
|
||||
}
|
||||
private:
|
||||
alignas(128) size_t left_data_[BlockSize];
|
||||
alignas(128) size_t right_data_[BlockSize];
|
||||
size_t left_data_[BlockSize];
|
||||
size_t right_data_[BlockSize];
|
||||
};
|
||||
std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
|
||||
std::vector<size_t> blocks_offsets_;
|
||||
std::vector<BlockInfo> mem_blocks_;
|
||||
std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
|
||||
size_t max_n_tasks_ = 0;
|
||||
};
|
||||
|
||||
|
||||
@ -10,10 +10,6 @@ namespace xgboost {
|
||||
namespace gbm {
|
||||
|
||||
void GBLinearModel::SaveModel(Json* p_out) const {
|
||||
using WeightType = std::remove_reference<decltype(std::declval<decltype(weight)>().back())>::type;
|
||||
using JsonFloat = Number::Float;
|
||||
static_assert(std::is_same<WeightType, JsonFloat>::value,
|
||||
"Weight type should be of the same type with JSON float");
|
||||
auto& out = *p_out;
|
||||
|
||||
size_t const n_weights = weight.size();
|
||||
|
||||
@ -222,6 +222,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
|
||||
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
|
||||
}
|
||||
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
|
||||
} else if (fail_on_invalid_gpu_id) {
|
||||
CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
|
||||
<< "Only " << n_gpus << " GPUs are visible, gpu_id "
|
||||
<< gpu_id << " is invalid.";
|
||||
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
|
||||
LOG(WARNING) << "Only " << n_gpus
|
||||
<< " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;
|
||||
|
||||
@ -162,6 +162,9 @@ struct LogisticRaw : public LogisticRegression {
|
||||
predt = common::Sigmoid(predt);
|
||||
return std::max(predt * (T(1.0f) - predt), eps);
|
||||
}
|
||||
static bst_float ProbToMargin(bst_float base_score) {
|
||||
return base_score;
|
||||
}
|
||||
static const char* DefaultEvalMetric() { return "auc"; }
|
||||
|
||||
static const char* Name() { return "binary:logitraw"; }
|
||||
|
||||
@ -580,7 +580,7 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
Predictor::Predictor{generic_param} {}
|
||||
|
||||
~GPUPredictor() override {
|
||||
if (generic_param_->gpu_id >= 0) {
|
||||
if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
|
||||
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,8 +182,10 @@ void DistributedHistSynchronizer<GradientSumT>::SyncHistograms(BuilderT* builder
|
||||
}
|
||||
});
|
||||
builder->builder_monitor_.Start("SyncHistogramsAllreduce");
|
||||
|
||||
builder->histred_.Allreduce(builder->hist_[starting_index].data(),
|
||||
builder->hist_builder_.GetNumBins() * sync_count);
|
||||
|
||||
builder->builder_monitor_.Stop("SyncHistogramsAllreduce");
|
||||
|
||||
ParallelSubtractionHist(builder, space, builder->nodes_for_explicit_hist_build_, p_tree);
|
||||
@ -232,7 +234,7 @@ void BatchHistRowsAdder<GradientSumT>::AddHistRows(BuilderT *builder,
|
||||
for (auto const& node : builder->nodes_for_subtraction_trick_) {
|
||||
builder->hist_.AddHistRow(node.nid);
|
||||
}
|
||||
|
||||
builder->hist_.AllocateAllData();
|
||||
builder->builder_monitor_.Stop("AddHistRows");
|
||||
}
|
||||
|
||||
@ -268,6 +270,8 @@ void DistributedHistRowsAdder<GradientSumT>::AddHistRows(BuilderT *builder,
|
||||
builder->hist_local_worker_.AddHistRow(nid);
|
||||
}
|
||||
}
|
||||
builder->hist_.AllocateAllData();
|
||||
builder->hist_local_worker_.AllocateAllData();
|
||||
(*sync_count) = std::max(1, n_left);
|
||||
builder->builder_monitor_.Stop("AddHistRows");
|
||||
}
|
||||
@ -1166,7 +1170,7 @@ template <typename GradientSumT>
|
||||
void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<ExpandEntry> nodes,
|
||||
const GHistIndexMatrix& gmat,
|
||||
const ColumnMatrix& column_matrix,
|
||||
const HistCollection<GradientSumT>&,
|
||||
const HistCollection<GradientSumT>& hist,
|
||||
RegTree* p_tree) {
|
||||
builder_monitor_.Start("ApplySplit");
|
||||
// 1. Find split condition for each split
|
||||
@ -1189,7 +1193,10 @@ void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<Expa
|
||||
// 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
|
||||
// Store results in intermediate buffers from partition_builder_
|
||||
common::ParallelFor2d(space, this->nthread_, [&](size_t node_in_set, common::Range1d r) {
|
||||
size_t begin = r.begin();
|
||||
const int32_t nid = nodes[node_in_set].nid;
|
||||
const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
|
||||
partition_builder_.AllocateForTask(task_id);
|
||||
switch (column_matrix.GetTypeSize()) {
|
||||
case common::kUint8BinsTypeSize:
|
||||
PartitionKernel<uint8_t>(node_in_set, nid, r,
|
||||
|
||||
37
tests/ci_build/CentOS-Base.repo
Normal file
37
tests/ci_build/CentOS-Base.repo
Normal file
@ -0,0 +1,37 @@
|
||||
[base]
|
||||
name=CentOS-$releasever - Base
|
||||
baseurl=http://vault.centos.org/centos/$releasever/os/$basearch/
|
||||
gpgcheck=1
|
||||
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
|
||||
|
||||
#released updates
|
||||
[updates]
|
||||
name=CentOS-$releasever - Updates
|
||||
baseurl=http://vault.centos.org/centos/$releasever/updates/$basearch/
|
||||
gpgcheck=1
|
||||
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
|
||||
|
||||
#additional packages that may be useful
|
||||
[extras]
|
||||
name=CentOS-$releasever - Extras
|
||||
baseurl=http://vault.centos.org/centos/$releasever/extras/$basearch/
|
||||
gpgcheck=1
|
||||
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
|
||||
|
||||
#additional packages that extend functionality of existing packages
|
||||
[centosplus]
|
||||
name=CentOS-$releasever - Plus
|
||||
mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus&infra=$infra
|
||||
#baseurl=http://mirror.centos.org/centos/$releasever/centosplus/$basearch/
|
||||
gpgcheck=1
|
||||
enabled=0
|
||||
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
|
||||
|
||||
#contrib - packages by Centos Users
|
||||
[contrib]
|
||||
name=CentOS-$releasever - Contrib
|
||||
mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=contrib&infra=$infra
|
||||
#baseurl=http://mirror.centos.org/centos/$releasever/contrib/$basearch/
|
||||
gpgcheck=1
|
||||
enabled=0
|
||||
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
|
||||
15
tests/ci_build/Dockerfile.auditwheel_x86_64
Normal file
15
tests/ci_build/Dockerfile.auditwheel_x86_64
Normal file
@ -0,0 +1,15 @@
|
||||
FROM quay.io/pypa/manylinux2010_x86_64
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
ENV GOSU_VERSION 1.10
|
||||
RUN set -ex; \
|
||||
curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||
chmod +x /usr/local/bin/gosu && \
|
||||
gosu nobody true
|
||||
|
||||
# Default entry-point to use if running locally
|
||||
# It will preserve attributes of created files
|
||||
COPY entrypoint.sh /scripts/
|
||||
|
||||
WORKDIR /workspace
|
||||
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
||||
@ -19,7 +19,7 @@ ENV PATH=/opt/python/bin:$PATH
|
||||
# Create new Conda environment with cuDF, Dask, and cuPy
|
||||
RUN \
|
||||
conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||
python=3.7 cudf=0.16* rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
|
||||
python=3.7 cudf=0.17* rmm=0.17* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
|
||||
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
@ -6,12 +6,13 @@ ARG CUDA_VERSION_ARG
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
|
||||
|
||||
COPY CentOS-Base.repo /etc/yum.repos.d/
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum install -y epel-release && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git centos-release-scl yum-utils && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git patchelf && \
|
||||
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
|
||||
@ -20,6 +21,7 @@ RUN \
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python && \
|
||||
/opt/python/bin/python -m pip install auditwheel && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
@ -29,7 +31,7 @@ RUN \
|
||||
wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
|
||||
tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
|
||||
cd ninja && \
|
||||
python ./configure.py --bootstrap
|
||||
/opt/python/bin/python ./configure.py --bootstrap
|
||||
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
|
||||
@ -2,12 +2,13 @@ FROM centos:6
|
||||
|
||||
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
|
||||
|
||||
COPY CentOS-Base.repo /etc/yum.repos.d/
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum install -y epel-release && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git java-1.8.0-openjdk-devel && \
|
||||
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
|
||||
@ -31,7 +32,7 @@ ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
|
||||
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.25.10 awscli
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
|
||||
@ -6,12 +6,13 @@ ARG CUDA_VERSION_ARG
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
|
||||
|
||||
COPY CentOS-Base.repo /etc/yum.repos.d/
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum install -y epel-release && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git java-1.8.0-openjdk-devel && \
|
||||
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
|
||||
@ -45,7 +46,7 @@ ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
|
||||
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.25.10 awscli
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ ENV PATH=/opt/python/bin:$PATH
|
||||
# Create new Conda environment with RMM
|
||||
RUN \
|
||||
conda create -n gpu_test -c nvidia -c rapidsai-nightly -c rapidsai -c conda-forge -c defaults \
|
||||
python=3.7 rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG
|
||||
python=3.7 rmm=0.17* cudatoolkit=$CUDA_VERSION_ARG
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
|
||||
@ -27,3 +27,4 @@ dependencies:
|
||||
- pip:
|
||||
- shap
|
||||
- awscli
|
||||
- auditwheel
|
||||
|
||||
@ -9,7 +9,6 @@ dependencies:
|
||||
- scikit-learn
|
||||
- pandas
|
||||
- pytest
|
||||
- python-graphviz
|
||||
- boto3
|
||||
- hypothesis
|
||||
- jsonschema
|
||||
@ -17,3 +16,4 @@ dependencies:
|
||||
- pip:
|
||||
- cupy-cuda101
|
||||
- modin[all]
|
||||
- graphviz
|
||||
|
||||
@ -35,7 +35,7 @@ void ParallelGHistBuilderReset() {
|
||||
for(size_t inode = 0; inode < kNodesExtended; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
}
|
||||
|
||||
collection.AllocateAllData();
|
||||
ParallelGHistBuilder<GradientSumT> hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow<GradientSumT>> target_hist(kNodes);
|
||||
@ -91,7 +91,7 @@ void ParallelGHistBuilderReduceHist(){
|
||||
for(size_t inode = 0; inode < kNodes; inode++) {
|
||||
collection.AddHistRow(inode);
|
||||
}
|
||||
|
||||
collection.AllocateAllData();
|
||||
ParallelGHistBuilder<GradientSumT> hist_builder;
|
||||
hist_builder.Init(kBins);
|
||||
std::vector<GHistRow<GradientSumT>> target_hist(kNodes);
|
||||
|
||||
@ -32,6 +32,8 @@ TEST(PartitionBuilder, BasicTest) {
|
||||
for(size_t j = 0; j < tasks[nid]; ++j) {
|
||||
size_t begin = kBlockSize*j;
|
||||
size_t end = kBlockSize*(j+1);
|
||||
const size_t id = builder.GetTaskIdx(nid, begin);
|
||||
builder.AllocateForTask(id);
|
||||
|
||||
auto left = builder.GetLeftBuffer(nid, begin, end);
|
||||
auto right = builder.GetRightBuffer(nid, begin, end);
|
||||
|
||||
@ -274,6 +274,7 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
RealImpl::InitData(gmat, gpair, fmat, tree);
|
||||
GHistIndexBlockMatrix dummy;
|
||||
this->hist_.AddHistRow(nid);
|
||||
this->hist_.AllocateAllData();
|
||||
this->BuildHist(gpair, this->row_set_collection_[nid],
|
||||
gmat, dummy, this->hist_[nid]);
|
||||
|
||||
@ -315,7 +316,7 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
|
||||
RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
|
||||
this->hist_.AddHistRow(0);
|
||||
|
||||
this->hist_.AllocateAllData();
|
||||
this->BuildHist(row_gpairs, this->row_set_collection_[0],
|
||||
gmat, quantile_index_block, this->hist_[0]);
|
||||
|
||||
@ -411,7 +412,7 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
cm.Init(gmat, 0.0);
|
||||
RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
|
||||
this->hist_.AddHistRow(0);
|
||||
|
||||
this->hist_.AllocateAllData();
|
||||
RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree);
|
||||
|
||||
const size_t num_row = dmat->Info().num_row_;
|
||||
@ -449,6 +450,8 @@ class QuantileHistMock : public QuantileHistMaker {
|
||||
RealImpl::partition_builder_.Init(1, 1, [&](size_t node_in_set) {
|
||||
return 1;
|
||||
});
|
||||
const size_t task_id = RealImpl::partition_builder_.GetTaskIdx(0, 0);
|
||||
RealImpl::partition_builder_.AllocateForTask(task_id);
|
||||
this->template PartitionKernel<uint8_t>(0, 0, common::Range1d(0, kNRows),
|
||||
split, cm, tree);
|
||||
RealImpl::partition_builder_.CalculateRowOffsets();
|
||||
|
||||
@ -52,3 +52,17 @@ class TestGPUBasicModels:
|
||||
|
||||
model_0, model_1 = self.run_cls(X, y, False)
|
||||
assert model_0 != model_1
|
||||
|
||||
def test_invalid_gpu_id(self):
|
||||
X = np.random.randn(10, 5) * 1e4
|
||||
y = np.random.randint(0, 2, size=10) * 1e4
|
||||
# should pass with invalid gpu id
|
||||
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
|
||||
cls1.fit(X, y)
|
||||
# should throw error with fail_on_invalid_gpu_id enabled
|
||||
cls2 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True)
|
||||
try:
|
||||
cls2.fit(X, y)
|
||||
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
|
||||
except xgb.core.XGBoostError as err:
|
||||
assert "gpu_id 9999 is invalid" in str(err)
|
||||
|
||||
@ -5,8 +5,10 @@ import numpy as np
|
||||
import asyncio
|
||||
import xgboost
|
||||
import subprocess
|
||||
import hypothesis
|
||||
from hypothesis import given, strategies, settings, note
|
||||
from hypothesis._settings import duration
|
||||
from hypothesis import HealthCheck
|
||||
from test_gpu_updaters import parameter_strategy
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
@ -19,6 +21,11 @@ from test_with_dask import _get_client_workers # noqa
|
||||
from test_with_dask import generate_array # noqa
|
||||
import testing as tm # noqa
|
||||
|
||||
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||
suppress = [HealthCheck.function_scoped_fixture]
|
||||
else:
|
||||
suppress = hypothesis.utils.conventions.not_set
|
||||
|
||||
|
||||
try:
|
||||
import dask.dataframe as dd
|
||||
@ -161,19 +168,24 @@ class TestDistributedGPU:
|
||||
run_with_dask_dataframe(dxgb.DaskDMatrix, client)
|
||||
run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
|
||||
|
||||
@given(params=parameter_strategy, num_rounds=strategies.integers(1, 20),
|
||||
dataset=tm.dataset_strategy)
|
||||
@settings(deadline=duration(seconds=120))
|
||||
@given(
|
||||
params=parameter_strategy,
|
||||
num_rounds=strategies.integers(1, 20),
|
||||
dataset=tm.dataset_strategy,
|
||||
)
|
||||
@settings(deadline=duration(seconds=120), suppress_health_check=suppress)
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||
@pytest.mark.parametrize('local_cuda_cluster', [{'n_workers': 2}], indirect=['local_cuda_cluster'])
|
||||
@pytest.mark.parametrize(
|
||||
"local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
|
||||
)
|
||||
@pytest.mark.mgpu
|
||||
def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
|
||||
with Client(local_cuda_cluster) as client:
|
||||
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
|
||||
client)
|
||||
run_gpu_hist(params, num_rounds, dataset,
|
||||
dxgb.DaskDeviceQuantileDMatrix, client)
|
||||
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client)
|
||||
run_gpu_hist(
|
||||
params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.skipif(**tm.no_dask())
|
||||
|
||||
@ -64,22 +64,24 @@ def generate_logistic_model():
|
||||
y = np.random.randint(0, 2, size=kRows)
|
||||
assert y.max() == 1 and y.min() == 0
|
||||
|
||||
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||
booster = xgboost.train({'tree_method': 'hist',
|
||||
'num_parallel_tree': kForests,
|
||||
'max_depth': kMaxDepth,
|
||||
'objective': 'binary:logistic'},
|
||||
num_boost_round=kRounds, dtrain=data)
|
||||
booster.save_model(booster_bin('logit'))
|
||||
booster.save_model(booster_json('logit'))
|
||||
for objective, name in [('binary:logistic', 'logit'), ('binary:logitraw', 'logitraw')]:
|
||||
data = xgboost.DMatrix(X, label=y, weight=w)
|
||||
booster = xgboost.train({'tree_method': 'hist',
|
||||
'num_parallel_tree': kForests,
|
||||
'max_depth': kMaxDepth,
|
||||
'objective': objective},
|
||||
num_boost_round=kRounds, dtrain=data)
|
||||
booster.save_model(booster_bin(name))
|
||||
booster.save_model(booster_json(name))
|
||||
|
||||
reg = xgboost.XGBClassifier(tree_method='hist',
|
||||
num_parallel_tree=kForests,
|
||||
max_depth=kMaxDepth,
|
||||
n_estimators=kRounds)
|
||||
reg.fit(X, y, w)
|
||||
reg.save_model(skl_bin('logit'))
|
||||
reg.save_model(skl_json('logit'))
|
||||
reg = xgboost.XGBClassifier(tree_method='hist',
|
||||
num_parallel_tree=kForests,
|
||||
max_depth=kMaxDepth,
|
||||
n_estimators=kRounds,
|
||||
objective=objective)
|
||||
reg.fit(X, y, w)
|
||||
reg.save_model(skl_bin(name))
|
||||
reg.save_model(skl_json(name))
|
||||
|
||||
|
||||
def generate_classification_model():
|
||||
|
||||
@ -57,6 +57,25 @@ class TestBasic:
|
||||
# assert they are the same
|
||||
assert np.sum(np.abs(preds2 - preds)) == 0
|
||||
|
||||
def test_metric_config(self):
|
||||
# Make sure that the metric configuration happens in booster so the
|
||||
# string `['error', 'auc']` doesn't get passed down to core.
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||
'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
num_round = 2
|
||||
booster = xgb.train(param, dtrain, num_round, watchlist)
|
||||
predt_0 = booster.predict(dtrain)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, 'model.json')
|
||||
booster.save_model(path)
|
||||
|
||||
booster = xgb.Booster(params=param, model_file=path)
|
||||
predt_1 = booster.predict(dtrain)
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
|
||||
def test_record_results(self):
|
||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
@ -124,8 +143,8 @@ class TestBasic:
|
||||
|
||||
dump2 = bst.get_dump(with_stats=True)
|
||||
assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
|
||||
assert (dump2[0].find('\n') > dump1[0].find('\n'),
|
||||
'Expected more info when with_stats=True is given.')
|
||||
msg = 'Expected more info when with_stats=True is given.'
|
||||
assert dump2[0].find('\n') > dump1[0].find('\n'), msg
|
||||
|
||||
dump3 = bst.get_dump(dump_format="json")
|
||||
dump3j = json.loads(dump3[0])
|
||||
@ -248,13 +267,11 @@ class TestBasicPathLike:
|
||||
assert binary_path.exists()
|
||||
Path.unlink(binary_path)
|
||||
|
||||
|
||||
def test_Booster_init_invalid_path(self):
|
||||
"""An invalid model_file path should raise XGBoostError."""
|
||||
with pytest.raises(xgb.core.XGBoostError):
|
||||
xgb.Booster(model_file=Path("invalidpath"))
|
||||
|
||||
|
||||
def test_Booster_save_and_load(self):
|
||||
"""Saving and loading model files from paths."""
|
||||
save_path = Path("saveload.model")
|
||||
|
||||
@ -22,6 +22,30 @@ class TestCallbacks:
|
||||
cls.X_valid = X[split:, ...]
|
||||
cls.y_valid = y[split:, ...]
|
||||
|
||||
def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval):
|
||||
evals_result = {}
|
||||
with tm.captured_output() as (out, err):
|
||||
xgb.train({'objective': 'binary:logistic',
|
||||
'eval_metric': 'error'}, D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
num_boost_round=rounds,
|
||||
evals_result=evals_result,
|
||||
verbose_eval=verbose_eval)
|
||||
output: str = out.getvalue().strip()
|
||||
|
||||
if int(verbose_eval) == 1:
|
||||
# Should print each iteration info
|
||||
assert len(output.split('\n')) == rounds
|
||||
elif int(verbose_eval) > rounds:
|
||||
# Should print first and latest iteration info
|
||||
assert len(output.split('\n')) == 2
|
||||
else:
|
||||
# Should print info by each period additionaly to first and latest iteration
|
||||
num_periods = rounds // int(verbose_eval)
|
||||
# Extra information is required for latest iteration
|
||||
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
|
||||
assert len(output.split('\n')) == 1 + num_periods + int(is_extra_info_required)
|
||||
|
||||
def test_evaluation_monitor(self):
|
||||
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
||||
D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
|
||||
@ -36,23 +60,10 @@ class TestCallbacks:
|
||||
assert len(evals_result['Train']['error']) == rounds
|
||||
assert len(evals_result['Valid']['error']) == rounds
|
||||
|
||||
with tm.captured_output() as (out, err):
|
||||
xgb.train({'objective': 'binary:logistic',
|
||||
'eval_metric': 'error'}, D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
num_boost_round=rounds,
|
||||
evals_result=evals_result,
|
||||
verbose_eval=2)
|
||||
output: str = out.getvalue().strip()
|
||||
|
||||
pos = 0
|
||||
msg = 'Train-error'
|
||||
for i in range(rounds // 2):
|
||||
pos = output.find('Train-error', pos)
|
||||
assert pos != -1
|
||||
pos += len(msg)
|
||||
|
||||
assert output.find('Train-error', pos) == -1
|
||||
self.run_evaluation_monitor(D_train, D_valid, rounds, True)
|
||||
self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
|
||||
self.run_evaluation_monitor(D_train, D_valid, rounds, 4)
|
||||
self.run_evaluation_monitor(D_train, D_valid, rounds, rounds + 1)
|
||||
|
||||
def test_early_stopping(self):
|
||||
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
||||
@ -142,7 +153,7 @@ class TestCallbacks:
|
||||
eval_metric=tm.eval_error_metric, callbacks=[early_stop])
|
||||
booster = cls.get_booster()
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
assert len(dump) == booster.best_iteration
|
||||
assert len(dump) == booster.best_iteration + 1
|
||||
|
||||
early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
|
||||
save_best=True)
|
||||
|
||||
@ -22,6 +22,7 @@ model_in = {model_in}
|
||||
model_out = {model_out}
|
||||
test_path = {test_path}
|
||||
name_pred = {name_pred}
|
||||
model_dir = {model_dir}
|
||||
|
||||
num_round = 10
|
||||
data = {data_path}
|
||||
@ -59,7 +60,8 @@ eval[test] = {data_path}
|
||||
model_in='NULL',
|
||||
model_out=model_out_cli,
|
||||
test_path='NULL',
|
||||
name_pred='NULL')
|
||||
name_pred='NULL',
|
||||
model_dir='NULL')
|
||||
with open(config_path, 'w') as fd:
|
||||
fd.write(train_conf)
|
||||
|
||||
@ -73,7 +75,8 @@ eval[test] = {data_path}
|
||||
model_in=model_out_cli,
|
||||
model_out='NULL',
|
||||
test_path=data_path,
|
||||
name_pred=predict_out)
|
||||
name_pred=predict_out,
|
||||
model_dir='NULL')
|
||||
with open(config_path, 'w') as fd:
|
||||
fd.write(predict_conf)
|
||||
|
||||
@ -145,7 +148,8 @@ eval[test] = {data_path}
|
||||
model_in='NULL',
|
||||
model_out=model_out_cli,
|
||||
test_path='NULL',
|
||||
name_pred='NULL')
|
||||
name_pred='NULL',
|
||||
model_dir='NULL')
|
||||
with open(config_path, 'w') as fd:
|
||||
fd.write(train_conf)
|
||||
|
||||
@ -154,3 +158,28 @@ eval[test] = {data_path}
|
||||
model = json.load(fd)
|
||||
|
||||
assert model['learner']['gradient_booster']['name'] == 'gbtree'
|
||||
|
||||
def test_cli_save_model(self):
|
||||
'''Test save on final round'''
|
||||
exe = self.get_exe()
|
||||
data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
|
||||
root=self.PROJECT_ROOT)
|
||||
seed = 1994
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
model_out_cli = os.path.join(tmpdir, '0010.model')
|
||||
config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
|
||||
|
||||
train_conf = self.template.format(data_path=data_path,
|
||||
seed=seed,
|
||||
task='train',
|
||||
model_in='NULL',
|
||||
model_out='NULL',
|
||||
test_path='NULL',
|
||||
name_pred='NULL',
|
||||
model_dir=tmpdir)
|
||||
with open(config_path, 'w') as fd:
|
||||
fd.write(train_conf)
|
||||
|
||||
subprocess.run([exe, config_path])
|
||||
assert os.path.exists(model_out_cli)
|
||||
|
||||
@ -24,6 +24,10 @@ def run_booster_check(booster, name):
|
||||
config['learner']['learner_model_param']['base_score']) == 0.5
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'multi:softmax'
|
||||
elif name.find('logitraw') != -1:
|
||||
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||
assert config['learner']['learner_model_param']['num_class'] == str(0)
|
||||
assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
|
||||
elif name.find('logit') != -1:
|
||||
assert len(booster.get_dump()) == gm.kForests * gm.kRounds
|
||||
assert config['learner']['learner_model_param']['num_class'] == str(0)
|
||||
@ -77,6 +81,13 @@ def run_scikit_model_check(name, path):
|
||||
assert config['learner']['learner_train_param'][
|
||||
'objective'] == 'rank:ndcg'
|
||||
run_model_param_check(config)
|
||||
elif name.find('logitraw') != -1:
|
||||
logit = xgboost.XGBClassifier()
|
||||
logit.load_model(path)
|
||||
assert (len(logit.get_booster().get_dump()) ==
|
||||
gm.kRounds * gm.kForests)
|
||||
config = json.loads(logit.get_booster().save_config())
|
||||
assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
|
||||
elif name.find('logit') != -1:
|
||||
logit = xgboost.XGBClassifier()
|
||||
logit.load_model(path)
|
||||
|
||||
@ -33,9 +33,15 @@ def run_predict_leaf(predictor):
|
||||
y = rng.randint(low=0, high=classes, size=rows)
|
||||
m = xgb.DMatrix(X, y)
|
||||
booster = xgb.train(
|
||||
{'num_parallel_tree': num_parallel_tree, 'num_class': classes,
|
||||
'predictor': predictor, 'tree_method': 'hist'}, m,
|
||||
num_boost_round=num_boost_round)
|
||||
{
|
||||
"num_parallel_tree": num_parallel_tree,
|
||||
"num_class": classes,
|
||||
"predictor": predictor,
|
||||
"tree_method": "hist",
|
||||
},
|
||||
m,
|
||||
num_boost_round=num_boost_round,
|
||||
)
|
||||
|
||||
empty = xgb.DMatrix(np.ones(shape=(0, cols)))
|
||||
empty_leaf = booster.predict(empty, pred_leaf=True)
|
||||
@ -52,12 +58,19 @@ def run_predict_leaf(predictor):
|
||||
end = classes * num_parallel_tree * (j + 1)
|
||||
layer = row[start: end]
|
||||
for c in range(classes):
|
||||
tree_group = layer[c * num_parallel_tree:
|
||||
(c+1) * num_parallel_tree]
|
||||
tree_group = layer[c * num_parallel_tree: (c + 1) * num_parallel_tree]
|
||||
assert tree_group.shape[0] == num_parallel_tree
|
||||
# no subsampling so tree in same forest should output same
|
||||
# leaf.
|
||||
assert np.all(tree_group == tree_group[0])
|
||||
|
||||
ntree_limit = 2
|
||||
sliced = booster.predict(
|
||||
m, pred_leaf=True, ntree_limit=num_parallel_tree * ntree_limit
|
||||
)
|
||||
first = sliced[0, ...]
|
||||
|
||||
assert first.shape[0] == classes * num_parallel_tree * ntree_limit
|
||||
return leaf
|
||||
|
||||
|
||||
|
||||
@ -8,7 +8,8 @@ import asyncio
|
||||
from sklearn.datasets import make_classification
|
||||
import os
|
||||
import subprocess
|
||||
from hypothesis import given, settings, note
|
||||
import hypothesis
|
||||
from hypothesis import given, settings, note, HealthCheck
|
||||
from test_updaters import hist_parameter_strategy, exact_parameter_strategy
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
@ -17,6 +18,12 @@ if tm.no_dask()['condition']:
|
||||
pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
|
||||
|
||||
|
||||
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||
suppress = [HealthCheck.function_scoped_fixture]
|
||||
else:
|
||||
suppress = hypothesis.utils.conventions.not_set
|
||||
|
||||
|
||||
try:
|
||||
from distributed import LocalCluster, Client, get_client
|
||||
from distributed.utils_test import client, loop, cluster_fixture
|
||||
@ -668,14 +675,14 @@ class TestWithDask:
|
||||
|
||||
@given(params=hist_parameter_strategy,
|
||||
dataset=tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
@settings(deadline=None, suppress_health_check=suppress)
|
||||
def test_hist(self, params, dataset, client):
|
||||
num_rounds = 30
|
||||
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
||||
|
||||
@given(params=exact_parameter_strategy,
|
||||
dataset=tm.dataset_strategy)
|
||||
@settings(deadline=None)
|
||||
@settings(deadline=None, suppress_health_check=suppress)
|
||||
def test_approx(self, client, params, dataset):
|
||||
num_rounds = 30
|
||||
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
||||
@ -795,7 +802,6 @@ class TestDaskCallbacks:
|
||||
merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
|
||||
assert len(merged) == 2
|
||||
|
||||
|
||||
def test_data_initialization(self):
|
||||
'''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
|
||||
generate unnecessary copies of data.
|
||||
|
||||
@ -78,6 +78,34 @@ def test_multiclass_classification():
|
||||
check_pred(preds4, labels, output_margin=False)
|
||||
|
||||
|
||||
def test_best_ntree_limit():
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
X, y = load_iris(return_X_y=True)
|
||||
|
||||
def train(booster, forest):
|
||||
rounds = 4
|
||||
cls = xgb.XGBClassifier(
|
||||
n_estimators=rounds, num_parallel_tree=forest, booster=booster
|
||||
).fit(
|
||||
X, y, eval_set=[(X, y)], early_stopping_rounds=3
|
||||
)
|
||||
|
||||
if forest:
|
||||
assert cls.best_ntree_limit == rounds * forest
|
||||
else:
|
||||
assert cls.best_ntree_limit == 0
|
||||
|
||||
# best_ntree_limit is used by default, assert that under gblinear it's
|
||||
# automatically ignored due to being 0.
|
||||
cls.predict(X)
|
||||
|
||||
num_parallel_tree = 4
|
||||
train('gbtree', num_parallel_tree)
|
||||
train('dart', num_parallel_tree)
|
||||
train('gblinear', None)
|
||||
|
||||
|
||||
def test_ranking():
|
||||
# generate random data
|
||||
x_train = np.random.rand(1000, 10)
|
||||
@ -94,6 +122,8 @@ def test_ranking():
|
||||
model = xgb.sklearn.XGBRanker(**params)
|
||||
model.fit(x_train, y_train, group=train_group,
|
||||
eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
|
||||
assert model.evals_result()
|
||||
|
||||
pred = model.predict(x_test)
|
||||
|
||||
train_data = xgb.DMatrix(x_train, y_train)
|
||||
@ -399,6 +429,21 @@ def test_classification_with_custom_objective():
|
||||
X, y
|
||||
)
|
||||
|
||||
cls = xgb.XGBClassifier(use_label_encoder=False, n_estimators=1)
|
||||
cls.fit(X, y)
|
||||
|
||||
is_called = [False]
|
||||
|
||||
def wrapped(y, p):
|
||||
is_called[0] = True
|
||||
return logregobj(y, p)
|
||||
|
||||
cls.set_params(objective=wrapped)
|
||||
cls.predict(X) # no throw
|
||||
cls.fit(X, y)
|
||||
|
||||
assert is_called[0]
|
||||
|
||||
|
||||
def test_sklearn_api():
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
@ -34,6 +34,10 @@ if [ ${TASK} == "python_test" ]; then
|
||||
tests/ci_build/ci_build.sh aarch64 docker bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
||||
TAG=manylinux2014_aarch64
|
||||
tests/ci_build/ci_build.sh aarch64 docker python tests/ci_build/rename_whl.py python-package/dist/*.whl ${TRAVIS_COMMIT} ${TAG}
|
||||
tests/ci_build/ci_build.sh aarch64 docker auditwheel repair --plat ${TAG} python-package/dist/*.whl
|
||||
mv -v wheelhouse/*.whl python-package/dist/
|
||||
# Make sure that libgomp.so is vendored in the wheel
|
||||
unzip -l python-package/dist/*.whl | grep libgomp || exit -1
|
||||
else
|
||||
rm -rf build
|
||||
mkdir build && cd build
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user