Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
34408a7fdc | ||
|
|
f9b246f5ee | ||
|
|
8467880aeb | ||
|
|
e74560c86a | ||
|
|
882b966536 | ||
|
|
115e4c3360 | ||
|
|
f5d4fddafe | ||
|
|
66690f3d07 | ||
|
|
c42f533ae9 | ||
|
|
751160b69c |
@@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.1.0)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.1.1)
|
||||
include(cmake/Utils.cmake)
|
||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||
cmake_policy(SET CMP0022 NEW)
|
||||
@@ -57,6 +57,7 @@ address, leak, undefined and thread.")
|
||||
## Plugins
|
||||
option(PLUGIN_LZ4 "Build lz4 plugin" OFF)
|
||||
option(PLUGIN_DENSE_PARSER "Build dense parser plugin" OFF)
|
||||
option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)
|
||||
|
||||
#-- Checks for building XGBoost
|
||||
if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
||||
@@ -274,3 +275,12 @@ endif (GOOGLE_TEST)
|
||||
# replace /MD with /MT. See https://github.com/dmlc/xgboost/issues/4462
|
||||
# for issues caused by mixing of /MD and /MT flags
|
||||
msvc_use_static_runtime()
|
||||
|
||||
# Add xgboost.pc
|
||||
if (ADD_PKGCONFIG)
|
||||
configure_file(${xgboost_SOURCE_DIR}/cmake/xgboost.pc.in ${xgboost_BINARY_DIR}/xgboost.pc @ONLY)
|
||||
|
||||
install(
|
||||
FILES ${xgboost_BINARY_DIR}/xgboost.pc
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
endif (ADD_PKGCONFIG)
|
||||
|
||||
12
Jenkinsfile
vendored
12
Jenkinsfile
vendored
@@ -48,7 +48,6 @@ pipeline {
|
||||
script {
|
||||
parallel ([
|
||||
'clang-tidy': { ClangTidy() },
|
||||
'lint': { Lint() },
|
||||
'sphinx-doc': { SphinxDoc() },
|
||||
'doxygen': { Doxygen() }
|
||||
])
|
||||
@@ -64,7 +63,6 @@ pipeline {
|
||||
'build-cpu': { BuildCPU() },
|
||||
'build-cpu-rabit-mock': { BuildCPUMock() },
|
||||
'build-cpu-non-omp': { BuildCPUNonOmp() },
|
||||
'build-gpu-cuda9.0': { BuildCUDA(cuda_version: '9.0') },
|
||||
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
|
||||
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
|
||||
'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
|
||||
@@ -251,10 +249,10 @@ def BuildCUDA(args) {
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python3 tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
|
||||
"""
|
||||
// Stash wheel for CUDA 9.0 target
|
||||
if (args.cuda_version == '9.0') {
|
||||
// Stash wheel for CUDA 10.0 target
|
||||
if (args.cuda_version == '10.0') {
|
||||
echo 'Stashing Python wheel...'
|
||||
stash name: 'xgboost_whl_cuda9', includes: 'python-package/dist/*.whl'
|
||||
stash name: 'xgboost_whl_cuda10', includes: 'python-package/dist/*.whl'
|
||||
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
|
||||
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
|
||||
echo 'Stashing C++ test executable (testxgboost)...'
|
||||
@@ -298,7 +296,7 @@ def BuildJVMDoc() {
|
||||
|
||||
def TestPythonCPU() {
|
||||
node('linux && cpu') {
|
||||
unstash name: 'xgboost_whl_cuda9'
|
||||
unstash name: 'xgboost_whl_cuda10'
|
||||
unstash name: 'srcs'
|
||||
unstash name: 'xgboost_cli'
|
||||
echo "Test Python CPU"
|
||||
@@ -315,7 +313,7 @@ def TestPythonCPU() {
|
||||
def TestPythonGPU(args) {
|
||||
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
|
||||
node(nodeReq) {
|
||||
unstash name: 'xgboost_whl_cuda9'
|
||||
unstash name: 'xgboost_whl_cuda10'
|
||||
unstash name: 'srcs'
|
||||
echo "Test Python GPU: CUDA ${args.cuda_version}"
|
||||
def container_type = "gpu"
|
||||
|
||||
@@ -28,7 +28,7 @@ pipeline {
|
||||
steps {
|
||||
script {
|
||||
parallel ([
|
||||
'build-win64-cuda9.0': { BuildWin64() }
|
||||
'build-win64-cuda10.0': { BuildWin64() }
|
||||
])
|
||||
}
|
||||
milestone ordinal: 2
|
||||
@@ -40,7 +40,6 @@ pipeline {
|
||||
script {
|
||||
parallel ([
|
||||
'test-win64-cpu': { TestWin64CPU() },
|
||||
'test-win64-gpu-cuda9.0': { TestWin64GPU(cuda_target: 'cuda9') },
|
||||
'test-win64-gpu-cuda10.0': { TestWin64GPU(cuda_target: 'cuda10_0') },
|
||||
'test-win64-gpu-cuda10.1': { TestWin64GPU(cuda_target: 'cuda10_1') }
|
||||
])
|
||||
@@ -67,7 +66,7 @@ def checkoutSrcs() {
|
||||
}
|
||||
|
||||
def BuildWin64() {
|
||||
node('win64 && build') {
|
||||
node('win64 && build && cuda10') {
|
||||
unstash name: 'srcs'
|
||||
echo "Building XGBoost for Windows AMD64 target..."
|
||||
bat "nvcc --version"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 1.1.0.1
|
||||
Version: 1.1.1.1
|
||||
Date: 2020-02-21
|
||||
Authors@R: c(
|
||||
person("Tianqi", "Chen", role = c("aut"),
|
||||
|
||||
2
R-package/configure
vendored
2
R-package/configure
vendored
@@ -2698,7 +2698,7 @@ fi
|
||||
if test `uname -s` = "Darwin"
|
||||
then
|
||||
OPENMP_CXXFLAGS='-Xclang -fopenmp'
|
||||
OPENMP_LIB='/usr/local/lib/libomp.dylib'
|
||||
OPENMP_LIB='-lomp'
|
||||
ac_pkg_openmp=no
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5
|
||||
$as_echo_n "checking whether OpenMP will work in a package... " >&6; }
|
||||
|
||||
@@ -29,7 +29,7 @@ fi
|
||||
if test `uname -s` = "Darwin"
|
||||
then
|
||||
OPENMP_CXXFLAGS='-Xclang -fopenmp'
|
||||
OPENMP_LIB='/usr/local/lib/libomp.dylib'
|
||||
OPENMP_LIB='-lomp'
|
||||
ac_pkg_openmp=no
|
||||
AC_MSG_CHECKING([whether OpenMP will work in a package])
|
||||
AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <omp.h>]], [[ return (omp_get_max_threads() <= 1); ]])])
|
||||
|
||||
@@ -1 +1 @@
|
||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@rc2
|
||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||
|
||||
12
cmake/xgboost.pc.in
Normal file
12
cmake/xgboost.pc.in
Normal file
@@ -0,0 +1,12 @@
|
||||
prefix=@CMAKE_INSTALL_PREFIX@
|
||||
version=@xgboost_VERSION@
|
||||
exec_prefix=${prefix}/bin
|
||||
libdir=${prefix}/lib
|
||||
includedir=${prefix}/include
|
||||
|
||||
Name: xgboost
|
||||
Description: XGBoost - Scalable and Flexible Gradient Boosting.
|
||||
Version: ${version}
|
||||
|
||||
Cflags: -I${includedir}
|
||||
Libs: -L${libdir} -lxgboost
|
||||
55
doc/dump.schema
Normal file
55
doc/dump.schema
Normal file
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"split_node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"nodeid": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"depth": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"yes": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"no": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"split": {
|
||||
"type": "string"
|
||||
},
|
||||
"children": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{"$ref": "#/definitions/split_node"},
|
||||
{"$ref": "#/definitions/leaf_node"}
|
||||
]
|
||||
},
|
||||
"maxItems": 2
|
||||
}
|
||||
},
|
||||
"required": ["nodeid", "depth", "yes", "no", "split", "children"]
|
||||
},
|
||||
"leaf_node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"nodeid": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"leaf": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["nodeid", "leaf"]
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"$ref": "#/definitions/split_node"
|
||||
}
|
||||
@@ -35,7 +35,7 @@ There are four kinds of censoring:
|
||||
|
||||
* **Uncensored**: the label is not censored and given as a single number.
|
||||
* **Right-censored**: the label is of form :math:`[a, +\infty)`, where :math:`a` is the lower bound.
|
||||
* **Left-censored**: the label is of form :math:`(-\infty, b]`, where :math:`b` is the upper bound.
|
||||
* **Left-censored**: the label is of form :math:`[0, b]`, where :math:`b` is the upper bound.
|
||||
* **Interval-censored**: the label is of form :math:`[a, b]`, where :math:`a` and :math:`b` are the lower and upper bounds, respectively.
|
||||
|
||||
Right-censoring is the most commonly used.
|
||||
@@ -83,7 +83,7 @@ Censoring type Interval form Lower bound finite? Upper bound finite?
|
||||
================= ==================== =================== ===================
|
||||
Uncensored :math:`[a, a]` |tick| |tick|
|
||||
Right-censored :math:`[a, +\infty)` |tick| |cross|
|
||||
Left-censored :math:`(-\infty, b]` |cross| |tick|
|
||||
Left-censored :math:`[0, b]` |tick| |tick|
|
||||
Interval-censored :math:`[a, b]` |tick| |tick|
|
||||
================= ==================== =================== ===================
|
||||
|
||||
@@ -102,7 +102,7 @@ Collect the lower bound numbers in one array (let's call it ``y_lower_bound``) a
|
||||
# Associate ranged labels with the data matrix.
|
||||
# This example shows each kind of censored labels.
|
||||
# uncensored right left interval
|
||||
y_lower_bound = np.array([ 2.0, 3.0, -np.inf, 4.0])
|
||||
y_lower_bound = np.array([ 2.0, 3.0, 0.0, 4.0])
|
||||
y_upper_bound = np.array([ 2.0, +np.inf, 4.0, 5.0])
|
||||
dtrain.set_float_info('label_lower_bound', y_lower_bound)
|
||||
dtrain.set_float_info('label_upper_bound', y_upper_bound)
|
||||
@@ -120,7 +120,7 @@ Collect the lower bound numbers in one array (let's call it ``y_lower_bound``) a
|
||||
# Associate ranged labels with the data matrix.
|
||||
# This example shows each kind of censored labels.
|
||||
# uncensored right left interval
|
||||
y_lower_bound <- c( 2., 3., -Inf, 4.)
|
||||
y_lower_bound <- c( 2., 3., 0., 4.)
|
||||
y_upper_bound <- c( 2., +Inf, 4., 5.)
|
||||
setinfo(dtrain, 'label_lower_bound', y_lower_bound)
|
||||
setinfo(dtrain, 'label_upper_bound', y_upper_bound)
|
||||
@@ -136,7 +136,7 @@ Now we are ready to invoke the training API:
|
||||
'aft_loss_distribution_scale': 1.20,
|
||||
'tree_method': 'hist', 'learning_rate': 0.05, 'max_depth': 2}
|
||||
bst = xgb.train(params, dtrain, num_boost_round=5,
|
||||
evals=[(dtrain, 'train'), (dvalid, 'valid')])
|
||||
evals=[(dtrain, 'train')])
|
||||
|
||||
.. code-block:: r
|
||||
:caption: R
|
||||
@@ -165,4 +165,4 @@ Currently, you can choose from three probability distributions for ``aft_loss_di
|
||||
``extreme`` :math:`e^z e^{-\exp{z}}`
|
||||
========================= ===========================================
|
||||
|
||||
Note that it is not yet possible to set the ranged label using the scikit-learn interface (e.g. :class:`xgboost.XGBRegressor`). For now, you should use :class:`xgboost.train` with :class:`xgboost.DMatrix`.
|
||||
Note that it is not yet possible to set the ranged label using the scikit-learn interface (e.g. :class:`xgboost.XGBRegressor`). For now, you should use :class:`xgboost.train` with :class:`xgboost.DMatrix`.
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>XGBoost JVM Package</name>
|
||||
<description>JVM Package for XGBoost</description>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
<packaging>jar</packaging>
|
||||
<build>
|
||||
<plugins>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
@@ -37,7 +37,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j_2.12</artifactId>
|
||||
<version>1.1.0-RC2</version>
|
||||
<version>1.1.1</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -1 +1 @@
|
||||
1.1.0rc2
|
||||
1.1.1
|
||||
|
||||
@@ -1467,6 +1467,7 @@ class Booster(object):
|
||||
self._validate_features(data)
|
||||
return self.eval_set([(data, name)], iteration)
|
||||
|
||||
# pylint: disable=too-many-function-args
|
||||
def predict(self,
|
||||
data,
|
||||
output_margin=False,
|
||||
@@ -1588,7 +1589,7 @@ class Booster(object):
|
||||
ngroup = int(chunk_size / ((data.num_col() + 1) *
|
||||
(data.num_col() + 1)))
|
||||
if ngroup == 1:
|
||||
preds = preds.reshape(nrow,
|
||||
preds = preds.reshape(nrow, # pylint: disable=too-many-function-args
|
||||
data.num_col() + 1,
|
||||
data.num_col() + 1)
|
||||
else:
|
||||
|
||||
@@ -82,14 +82,16 @@ template <typename BinIdxType>
|
||||
class DenseColumn: public Column<BinIdxType> {
|
||||
public:
|
||||
DenseColumn(ColumnType type, common::Span<const BinIdxType> index,
|
||||
uint32_t index_base,
|
||||
const std::vector<bool>::const_iterator missing_flags)
|
||||
uint32_t index_base, const std::vector<bool>& missing_flags,
|
||||
size_t feature_offset)
|
||||
: Column<BinIdxType>(type, index, index_base),
|
||||
missing_flags_(missing_flags) {}
|
||||
bool IsMissing(size_t idx) const { return missing_flags_[idx]; }
|
||||
missing_flags_(missing_flags),
|
||||
feature_offset_(feature_offset) {}
|
||||
bool IsMissing(size_t idx) const { return missing_flags_[feature_offset_ + idx]; }
|
||||
private:
|
||||
/* flags for missing values in dense columns */
|
||||
std::vector<bool>::const_iterator missing_flags_;
|
||||
const std::vector<bool>& missing_flags_;
|
||||
size_t feature_offset_;
|
||||
};
|
||||
|
||||
/*! \brief a collection of columns, with support for construction from
|
||||
@@ -208,10 +210,8 @@ class ColumnMatrix {
|
||||
column_size };
|
||||
std::unique_ptr<const Column<BinIdxType> > res;
|
||||
if (type_[fid] == ColumnType::kDenseColumn) {
|
||||
std::vector<bool>::const_iterator column_iterator = missing_flags_.begin();
|
||||
advance(column_iterator, feature_offset); // increment iterator to right position
|
||||
res.reset(new DenseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
|
||||
column_iterator));
|
||||
missing_flags_, feature_offset));
|
||||
} else {
|
||||
res.reset(new SparseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
|
||||
{&row_ind_[feature_offset], column_size}));
|
||||
|
||||
@@ -689,15 +689,23 @@ class LearnerIO : public LearnerConfiguration {
|
||||
warn_old_model = false;
|
||||
}
|
||||
|
||||
if (mparam_.major_version >= 1) {
|
||||
learner_model_param_ = LearnerModelParam(mparam_,
|
||||
obj_->ProbToMargin(mparam_.base_score));
|
||||
} else {
|
||||
if (mparam_.major_version < 1) {
|
||||
// Before 1.0.0, base_score is saved as a transformed value, and there's no version
|
||||
// attribute in the saved model.
|
||||
learner_model_param_ = LearnerModelParam(mparam_, mparam_.base_score);
|
||||
// attribute (saved a 0) in the saved model.
|
||||
std::string multi{"multi:"};
|
||||
if (!std::equal(multi.cbegin(), multi.cend(), tparam_.objective.cbegin())) {
|
||||
HostDeviceVector<float> t;
|
||||
t.HostVector().resize(1);
|
||||
t.HostVector().at(0) = mparam_.base_score;
|
||||
this->obj_->PredTransform(&t);
|
||||
auto base_score = t.HostVector().at(0);
|
||||
mparam_.base_score = base_score;
|
||||
}
|
||||
warn_old_model = true;
|
||||
}
|
||||
|
||||
learner_model_param_ =
|
||||
LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score));
|
||||
if (attributes_.find("objective") != attributes_.cend()) {
|
||||
auto obj_str = attributes_.at("objective");
|
||||
auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
|
||||
|
||||
@@ -41,10 +41,11 @@ struct RegLossParam : public XGBoostParameter<RegLossParam> {
|
||||
template<typename Loss>
|
||||
class RegLossObj : public ObjFunction {
|
||||
protected:
|
||||
HostDeviceVector<int> label_correct_;
|
||||
HostDeviceVector<float> additional_input_;
|
||||
|
||||
public:
|
||||
RegLossObj() = default;
|
||||
// 0 - label_correct flag, 1 - scale_pos_weight, 2 - is_null_weight
|
||||
RegLossObj(): additional_input_(3) {}
|
||||
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
@@ -64,8 +65,7 @@ class RegLossObj : public ObjFunction {
|
||||
size_t const ndata = preds.Size();
|
||||
out_gpair->Resize(ndata);
|
||||
auto device = tparam_->gpu_id;
|
||||
label_correct_.Resize(1);
|
||||
label_correct_.Fill(1);
|
||||
additional_input_.HostVector().begin()[0] = 1; // Fill the label_correct flag
|
||||
|
||||
bool is_null_weight = info.weights_.Size() == 0;
|
||||
if (!is_null_weight) {
|
||||
@@ -73,35 +73,37 @@ class RegLossObj : public ObjFunction {
|
||||
<< "Number of weights should be equal to number of data points.";
|
||||
}
|
||||
auto scale_pos_weight = param_.scale_pos_weight;
|
||||
common::Transform<>::Init(
|
||||
[=] XGBOOST_DEVICE(size_t _idx,
|
||||
common::Span<int> _label_correct,
|
||||
additional_input_.HostVector().begin()[1] = scale_pos_weight;
|
||||
additional_input_.HostVector().begin()[2] = is_null_weight;
|
||||
|
||||
common::Transform<>::Init([] XGBOOST_DEVICE(size_t _idx,
|
||||
common::Span<float> _additional_input,
|
||||
common::Span<GradientPair> _out_gpair,
|
||||
common::Span<const bst_float> _preds,
|
||||
common::Span<const bst_float> _labels,
|
||||
common::Span<const bst_float> _weights) {
|
||||
const float _scale_pos_weight = _additional_input[1];
|
||||
const bool _is_null_weight = _additional_input[2];
|
||||
|
||||
bst_float p = Loss::PredTransform(_preds[_idx]);
|
||||
bst_float w = is_null_weight ? 1.0f : _weights[_idx];
|
||||
bst_float w = _is_null_weight ? 1.0f : _weights[_idx];
|
||||
bst_float label = _labels[_idx];
|
||||
if (label == 1.0f) {
|
||||
w *= scale_pos_weight;
|
||||
w *= _scale_pos_weight;
|
||||
}
|
||||
if (!Loss::CheckLabel(label)) {
|
||||
// If there is an incorrect label, the host code will know.
|
||||
_label_correct[0] = 0;
|
||||
_additional_input[0] = 0;
|
||||
}
|
||||
_out_gpair[_idx] = GradientPair(Loss::FirstOrderGradient(p, label) * w,
|
||||
Loss::SecondOrderGradient(p, label) * w);
|
||||
},
|
||||
common::Range{0, static_cast<int64_t>(ndata)}, device).Eval(
|
||||
&label_correct_, out_gpair, &preds, &info.labels_, &info.weights_);
|
||||
&additional_input_, out_gpair, &preds, &info.labels_, &info.weights_);
|
||||
|
||||
// copy "label correct" flags back to host
|
||||
std::vector<int>& label_correct_h = label_correct_.HostVector();
|
||||
for (auto const flag : label_correct_h) {
|
||||
if (flag == 0) {
|
||||
LOG(FATAL) << Loss::LabelErrorMsg();
|
||||
}
|
||||
auto const flag = additional_input_.HostVector().begin()[0];
|
||||
if (flag == 0) {
|
||||
LOG(FATAL) << Loss::LabelErrorMsg();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -68,20 +68,20 @@ class TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) {
|
||||
virtual std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const {
|
||||
return "";
|
||||
}
|
||||
virtual std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) {
|
||||
virtual std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const {
|
||||
return "";
|
||||
}
|
||||
virtual std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) {
|
||||
virtual std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const {
|
||||
return "";
|
||||
}
|
||||
virtual std::string NodeStat(RegTree const& tree, int32_t nid) {
|
||||
virtual std::string NodeStat(RegTree const& tree, int32_t nid) const {
|
||||
return "";
|
||||
}
|
||||
|
||||
virtual std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
|
||||
virtual std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const = 0;
|
||||
|
||||
virtual std::string SplitNode(RegTree const& tree, int32_t nid, uint32_t depth) {
|
||||
auto const split_index = tree[nid].SplitIndex();
|
||||
@@ -110,7 +110,7 @@ class TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
|
||||
virtual std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const = 0;
|
||||
virtual std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
|
||||
|
||||
public:
|
||||
@@ -181,7 +181,7 @@ class TextGenerator : public TreeGenerator {
|
||||
TextGenerator(FeatureMap const& fmap, std::string const& attrs, bool with_stats) :
|
||||
TreeGenerator(fmap, with_stats) {}
|
||||
|
||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
static std::string kLeafTemplate = "{tabs}{nid}:leaf={leaf}{stats}";
|
||||
static std::string kStatTemplate = ",cover={cover}";
|
||||
std::string result = SuperT::Match(
|
||||
@@ -195,7 +195,7 @@ class TextGenerator : public TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
static std::string const kIndicatorTemplate = "{nid}:[{fname}] yes={yes},no={no}";
|
||||
int32_t nyes = tree[nid].DefaultLeft() ?
|
||||
tree[nid].RightChild() : tree[nid].LeftChild();
|
||||
@@ -211,7 +211,7 @@ class TextGenerator : public TreeGenerator {
|
||||
|
||||
std::string SplitNodeImpl(
|
||||
RegTree const& tree, int32_t nid, std::string const& template_str,
|
||||
std::string cond, uint32_t depth) {
|
||||
std::string cond, uint32_t depth) const {
|
||||
auto split_index = tree[nid].SplitIndex();
|
||||
std::string const result = SuperT::Match(
|
||||
template_str,
|
||||
@@ -226,7 +226,7 @@ class TextGenerator : public TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
static std::string const kIntegerTemplate =
|
||||
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||
auto cond = tree[nid].SplitCond();
|
||||
@@ -238,21 +238,21 @@ class TextGenerator : public TreeGenerator {
|
||||
std::to_string(integer_threshold), depth);
|
||||
}
|
||||
|
||||
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
static std::string const kQuantitiveTemplate =
|
||||
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||
auto cond = tree[nid].SplitCond();
|
||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
auto cond = tree[nid].SplitCond();
|
||||
static std::string const kNodeTemplate =
|
||||
"{tabs}{nid}:[f{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string NodeStat(RegTree const& tree, int32_t nid) override {
|
||||
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
|
||||
static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
|
||||
std::string const result = SuperT::Match(
|
||||
kStatTemplate,
|
||||
@@ -297,7 +297,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
JsonGenerator(FeatureMap const& fmap, std::string attrs, bool with_stats) :
|
||||
TreeGenerator(fmap, with_stats) {}
|
||||
|
||||
std::string Indent(uint32_t depth) {
|
||||
std::string Indent(uint32_t depth) const {
|
||||
std::string result;
|
||||
for (uint32_t i = 0; i < depth + 1; ++i) {
|
||||
result += " ";
|
||||
@@ -305,7 +305,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
static std::string const kLeafTemplate =
|
||||
R"L({ "nodeid": {nid}, "leaf": {leaf} {stat}})L";
|
||||
static std::string const kStatTemplate =
|
||||
@@ -321,11 +321,11 @@ class JsonGenerator : public TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
int32_t nyes = tree[nid].DefaultLeft() ?
|
||||
tree[nid].RightChild() : tree[nid].LeftChild();
|
||||
static std::string const kIndicatorTemplate =
|
||||
R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no}})ID";
|
||||
R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no})ID";
|
||||
auto split_index = tree[nid].SplitIndex();
|
||||
auto result = SuperT::Match(
|
||||
kIndicatorTemplate,
|
||||
@@ -337,8 +337,9 @@ class JsonGenerator : public TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string SplitNodeImpl(RegTree const& tree, int32_t nid,
|
||||
std::string const& template_str, std::string cond, uint32_t depth) {
|
||||
std::string SplitNodeImpl(RegTree const &tree, int32_t nid,
|
||||
std::string const &template_str, std::string cond,
|
||||
uint32_t depth) const {
|
||||
auto split_index = tree[nid].SplitIndex();
|
||||
std::string const result = SuperT::Match(
|
||||
template_str,
|
||||
@@ -353,7 +354,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
auto cond = tree[nid].SplitCond();
|
||||
const bst_float floored = std::floor(cond);
|
||||
const int32_t integer_threshold
|
||||
@@ -367,7 +368,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
std::to_string(integer_threshold), depth);
|
||||
}
|
||||
|
||||
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
static std::string const kQuantitiveTemplate =
|
||||
R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
|
||||
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
|
||||
@@ -376,7 +377,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
auto cond = tree[nid].SplitCond();
|
||||
static std::string const kNodeTemplate =
|
||||
R"I( "nodeid": {nid}, "depth": {depth}, "split": {fname}, )I"
|
||||
@@ -385,7 +386,7 @@ class JsonGenerator : public TreeGenerator {
|
||||
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
||||
}
|
||||
|
||||
std::string NodeStat(RegTree const& tree, int32_t nid) override {
|
||||
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
|
||||
static std::string kStatTemplate =
|
||||
R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
|
||||
auto result = SuperT::Match(
|
||||
@@ -529,7 +530,7 @@ class GraphvizGenerator : public TreeGenerator {
|
||||
protected:
|
||||
// Only indicator is different, so we combine all different node types into this
|
||||
// function.
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
auto split = tree[nid].SplitIndex();
|
||||
auto cond = tree[nid].SplitCond();
|
||||
static std::string const kNodeTemplate =
|
||||
@@ -563,7 +564,7 @@ class GraphvizGenerator : public TreeGenerator {
|
||||
return result;
|
||||
};
|
||||
|
||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||
static std::string const kLeafTemplate =
|
||||
" {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
|
||||
auto result = SuperT::Match(kLeafTemplate, {
|
||||
|
||||
@@ -151,6 +151,10 @@ TEST(Tree, DumpJson) {
|
||||
|
||||
str = tree.DumpModel(fmap, false, "json");
|
||||
ASSERT_EQ(str.find("cover"), std::string::npos);
|
||||
|
||||
|
||||
auto j_tree = Json::Load({str.c_str(), str.size()});
|
||||
ASSERT_EQ(get<Array>(j_tree["children"]).size(), 2);
|
||||
}
|
||||
|
||||
TEST(Tree, DumpText) {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
[pytest]
|
||||
markers =
|
||||
mgpu: Mark a test that requires multiple GPUs to run.
|
||||
mgpu: Mark a test that requires multiple GPUs to run.
|
||||
ci: Mark a test that runs only on CI.
|
||||
@@ -12,25 +12,15 @@ rng = np.random.RandomState(1994)
|
||||
class TestGPUBasicModels(unittest.TestCase):
|
||||
cputest = test_bm.TestModels()
|
||||
|
||||
def test_eta_decay_gpu_hist(self):
|
||||
self.cputest.run_eta_decay('gpu_hist')
|
||||
|
||||
def test_deterministic_gpu_hist(self):
|
||||
kRows = 1000
|
||||
kCols = 64
|
||||
kClasses = 4
|
||||
# Create large values to force rounding.
|
||||
X = np.random.randn(kRows, kCols) * 1e4
|
||||
y = np.random.randint(0, kClasses, size=kRows)
|
||||
|
||||
def run_cls(self, X, y, deterministic):
|
||||
cls = xgb.XGBClassifier(tree_method='gpu_hist',
|
||||
deterministic_histogram=True,
|
||||
deterministic_histogram=deterministic,
|
||||
single_precision_histogram=True)
|
||||
cls.fit(X, y)
|
||||
cls.get_booster().save_model('test_deterministic_gpu_hist-0.json')
|
||||
|
||||
cls = xgb.XGBClassifier(tree_method='gpu_hist',
|
||||
deterministic_histogram=True,
|
||||
deterministic_histogram=deterministic,
|
||||
single_precision_histogram=True)
|
||||
cls.fit(X, y)
|
||||
cls.get_booster().save_model('test_deterministic_gpu_hist-1.json')
|
||||
@@ -40,7 +30,24 @@ class TestGPUBasicModels(unittest.TestCase):
|
||||
with open('test_deterministic_gpu_hist-1.json', 'r') as fd:
|
||||
model_1 = fd.read()
|
||||
|
||||
assert hash(model_0) == hash(model_1)
|
||||
|
||||
os.remove('test_deterministic_gpu_hist-0.json')
|
||||
os.remove('test_deterministic_gpu_hist-1.json')
|
||||
|
||||
return hash(model_0), hash(model_1)
|
||||
|
||||
def test_eta_decay_gpu_hist(self):
|
||||
self.cputest.run_eta_decay('gpu_hist')
|
||||
|
||||
def test_deterministic_gpu_hist(self):
|
||||
kRows = 1000
|
||||
kCols = 64
|
||||
kClasses = 4
|
||||
# Create large values to force rounding.
|
||||
X = np.random.randn(kRows, kCols) * 1e4
|
||||
y = np.random.randint(0, kClasses, size=kRows) * 1e4
|
||||
|
||||
model_0, model_1 = self.run_cls(X, y, True)
|
||||
assert model_0 == model_1
|
||||
|
||||
model_0, model_1 = self.run_cls(X, y, False)
|
||||
assert model_0 != model_1
|
||||
|
||||
@@ -325,7 +325,7 @@ class TestModels(unittest.TestCase):
|
||||
assert locale.getpreferredencoding(False) == loc
|
||||
|
||||
@pytest.mark.skipif(**tm.no_json_schema())
|
||||
def test_json_schema(self):
|
||||
def test_json_io_schema(self):
|
||||
import jsonschema
|
||||
model_path = 'test_json_schema.json'
|
||||
path = os.path.dirname(
|
||||
@@ -342,3 +342,35 @@ class TestModels(unittest.TestCase):
|
||||
jsonschema.validate(instance=json_model(model_path, parameters),
|
||||
schema=schema)
|
||||
os.remove(model_path)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_json_schema())
|
||||
def test_json_dump_schema(self):
|
||||
import jsonschema
|
||||
|
||||
def validate_model(parameters):
|
||||
X = np.random.random((100, 30))
|
||||
y = np.random.randint(0, 4, size=(100,))
|
||||
|
||||
parameters['num_class'] = 4
|
||||
m = xgb.DMatrix(X, y)
|
||||
|
||||
booster = xgb.train(parameters, m)
|
||||
dump = booster.get_dump(dump_format='json')
|
||||
|
||||
for i in range(len(dump)):
|
||||
jsonschema.validate(instance=json.loads(dump[i]),
|
||||
schema=schema)
|
||||
|
||||
path = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
doc = os.path.join(path, 'doc', 'dump.schema')
|
||||
with open(doc, 'r') as fd:
|
||||
schema = json.load(fd)
|
||||
|
||||
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
|
||||
'objective': 'multi:softmax'}
|
||||
validate_model(parameters)
|
||||
|
||||
parameters = {'tree_method': 'hist', 'booster': 'dart',
|
||||
'objective': 'multi:softmax'}
|
||||
validate_model(parameters)
|
||||
|
||||
@@ -4,6 +4,7 @@ import generate_models as gm
|
||||
import json
|
||||
import zipfile
|
||||
import pytest
|
||||
import copy
|
||||
|
||||
|
||||
def run_model_param_check(config):
|
||||
@@ -124,6 +125,9 @@ def test_model_compatibility():
|
||||
if name.startswith('xgboost-'):
|
||||
booster = xgboost.Booster(model_file=path)
|
||||
run_booster_check(booster, name)
|
||||
# Do full serialization.
|
||||
booster = copy.copy(booster)
|
||||
run_booster_check(booster, name)
|
||||
elif name.startswith('xgboost_scikit'):
|
||||
run_scikit_model_check(name, path)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user