Compare commits
13 Commits
release_1.
...
v1.1.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f5d4fddafe | ||
|
|
66690f3d07 | ||
|
|
c42f533ae9 | ||
|
|
751160b69c | ||
|
|
8aaabce7c9 | ||
|
|
14543176d1 | ||
|
|
afa6e086cc | ||
|
|
636ab6b522 | ||
|
|
6daa6ee4e0 | ||
|
|
4979991d5b | ||
|
|
02faddc5f3 | ||
|
|
844d7c1d5b | ||
|
|
3728855ce9 |
@@ -6,7 +6,7 @@ os:
|
|||||||
- linux
|
- linux
|
||||||
- osx
|
- osx
|
||||||
|
|
||||||
osx_image: xcode10.3
|
osx_image: xcode10.1
|
||||||
dist: bionic
|
dist: bionic
|
||||||
|
|
||||||
# Use Build Matrix to do lint and build seperately
|
# Use Build Matrix to do lint and build seperately
|
||||||
@@ -21,6 +21,10 @@ env:
|
|||||||
# cmake test
|
# cmake test
|
||||||
- TASK=cmake_test
|
- TASK=cmake_test
|
||||||
|
|
||||||
|
global:
|
||||||
|
- secure: "PR16i9F8QtNwn99C5NDp8nptAS+97xwDtXEJJfEiEVhxPaaRkOp0MPWhogCaK0Eclxk1TqkgWbdXFknwGycX620AzZWa/A1K3gAs+GrpzqhnPMuoBJ0Z9qxXTbSJvCyvMbYwVrjaxc/zWqdMU8waWz8A7iqKGKs/SqbQ3rO6v7c="
|
||||||
|
- secure: "dAGAjBokqm/0nVoLMofQni/fWIBcYSmdq4XvCBX1ZAMDsWnuOfz/4XCY6h2lEI1rVHZQ+UdZkc9PioOHGPZh5BnvE49/xVVWr9c4/61lrDOlkD01ZjSAeoV0fAZq+93V/wPl4QV+MM+Sem9hNNzFSbN5VsQLAiWCSapWsLdKzqA="
|
||||||
|
|
||||||
matrix:
|
matrix:
|
||||||
exclude:
|
exclude:
|
||||||
- os: linux
|
- os: linux
|
||||||
|
|||||||
11
Jenkinsfile
vendored
11
Jenkinsfile
vendored
@@ -64,7 +64,6 @@ pipeline {
|
|||||||
'build-cpu': { BuildCPU() },
|
'build-cpu': { BuildCPU() },
|
||||||
'build-cpu-rabit-mock': { BuildCPUMock() },
|
'build-cpu-rabit-mock': { BuildCPUMock() },
|
||||||
'build-cpu-non-omp': { BuildCPUNonOmp() },
|
'build-cpu-non-omp': { BuildCPUNonOmp() },
|
||||||
'build-gpu-cuda9.0': { BuildCUDA(cuda_version: '9.0') },
|
|
||||||
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
|
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
|
||||||
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
|
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
|
||||||
'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
|
'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
|
||||||
@@ -251,10 +250,10 @@ def BuildCUDA(args) {
|
|||||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
|
||||||
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python3 tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
|
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python3 tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
|
||||||
"""
|
"""
|
||||||
// Stash wheel for CUDA 9.0 target
|
// Stash wheel for CUDA 10.0 target
|
||||||
if (args.cuda_version == '9.0') {
|
if (args.cuda_version == '10.0') {
|
||||||
echo 'Stashing Python wheel...'
|
echo 'Stashing Python wheel...'
|
||||||
stash name: 'xgboost_whl_cuda9', includes: 'python-package/dist/*.whl'
|
stash name: 'xgboost_whl_cuda10', includes: 'python-package/dist/*.whl'
|
||||||
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
|
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
|
||||||
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
|
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
|
||||||
echo 'Stashing C++ test executable (testxgboost)...'
|
echo 'Stashing C++ test executable (testxgboost)...'
|
||||||
@@ -298,7 +297,7 @@ def BuildJVMDoc() {
|
|||||||
|
|
||||||
def TestPythonCPU() {
|
def TestPythonCPU() {
|
||||||
node('linux && cpu') {
|
node('linux && cpu') {
|
||||||
unstash name: 'xgboost_whl_cuda9'
|
unstash name: 'xgboost_whl_cuda10'
|
||||||
unstash name: 'srcs'
|
unstash name: 'srcs'
|
||||||
unstash name: 'xgboost_cli'
|
unstash name: 'xgboost_cli'
|
||||||
echo "Test Python CPU"
|
echo "Test Python CPU"
|
||||||
@@ -315,7 +314,7 @@ def TestPythonCPU() {
|
|||||||
def TestPythonGPU(args) {
|
def TestPythonGPU(args) {
|
||||||
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
|
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
|
||||||
node(nodeReq) {
|
node(nodeReq) {
|
||||||
unstash name: 'xgboost_whl_cuda9'
|
unstash name: 'xgboost_whl_cuda10'
|
||||||
unstash name: 'srcs'
|
unstash name: 'srcs'
|
||||||
echo "Test Python GPU: CUDA ${args.cuda_version}"
|
echo "Test Python GPU: CUDA ${args.cuda_version}"
|
||||||
def container_type = "gpu"
|
def container_type = "gpu"
|
||||||
|
|||||||
@@ -410,7 +410,7 @@ In some very specific cases, like when you want to pilot **XGBoost** from `caret
|
|||||||
|
|
||||||
```{r saveLoadRBinVectorModel, message=F, warning=F}
|
```{r saveLoadRBinVectorModel, message=F, warning=F}
|
||||||
# save model to R's raw vector
|
# save model to R's raw vector
|
||||||
rawVec <- xgb.save.raw(bst)
|
rawVec <- xgb.serialize(bst)
|
||||||
|
|
||||||
# print class
|
# print class
|
||||||
print(class(rawVec))
|
print(class(rawVec))
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT
|
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||||
|
|||||||
55
doc/dump.schema
Normal file
55
doc/dump.schema
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"definitions": {
|
||||||
|
"split_node": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"nodeid": {
|
||||||
|
"type": "number",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"depth": {
|
||||||
|
"type": "number",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"yes": {
|
||||||
|
"type": "number",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"no": {
|
||||||
|
"type": "number",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"split": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"children": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"oneOf": [
|
||||||
|
{"$ref": "#/definitions/split_node"},
|
||||||
|
{"$ref": "#/definitions/leaf_node"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"maxItems": 2
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["nodeid", "depth", "yes", "no", "split", "children"]
|
||||||
|
},
|
||||||
|
"leaf_node": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"nodeid": {
|
||||||
|
"type": "number",
|
||||||
|
"minimum": 0
|
||||||
|
},
|
||||||
|
"leaf": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["nodeid", "leaf"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
"$ref": "#/definitions/split_node"
|
||||||
|
}
|
||||||
@@ -418,7 +418,14 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
|
|||||||
* 4:output feature contributions to individual predictions
|
* 4:output feature contributions to individual predictions
|
||||||
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
|
* \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
|
||||||
* when the parameter is set to 0, we will use all the trees
|
* when the parameter is set to 0, we will use all the trees
|
||||||
* \param training Whether the prediction value is used for training.
|
* \param training Whether the prediction function is used as part of a training loop.
|
||||||
|
* Prediction can be run in 2 scenarios:
|
||||||
|
* 1. Given data matrix X, obtain prediction y_pred from the model.
|
||||||
|
* 2. Obtain the prediction for computing gradients. For example, DART booster performs dropout
|
||||||
|
* during training, and the prediction result will be different from the one obtained by normal
|
||||||
|
* inference step due to dropped trees.
|
||||||
|
* Set training=false for the first scenario. Set training=true for the second scenario.
|
||||||
|
* The second scenario applies when you are defining a custom objective function.
|
||||||
* \param out_len used to store length of returning result
|
* \param out_len used to store length of returning result
|
||||||
* \param out_result used to set a pointer to array
|
* \param out_result used to set a pointer to array
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include <dmlc/base.h>
|
#include <dmlc/base.h>
|
||||||
#include <dmlc/data.h>
|
#include <dmlc/data.h>
|
||||||
|
#include <dmlc/serializer.h>
|
||||||
#include <rabit/rabit.h>
|
#include <rabit/rabit.h>
|
||||||
#include <xgboost/base.h>
|
#include <xgboost/base.h>
|
||||||
#include <xgboost/span.h>
|
#include <xgboost/span.h>
|
||||||
@@ -102,7 +103,7 @@ class MetaInfo {
|
|||||||
/*!
|
/*!
|
||||||
* \brief Validate all metainfo.
|
* \brief Validate all metainfo.
|
||||||
*/
|
*/
|
||||||
void Validate() const;
|
void Validate(int32_t device) const;
|
||||||
|
|
||||||
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
|
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
|
||||||
/*!
|
/*!
|
||||||
@@ -554,5 +555,21 @@ inline BatchSet<EllpackPage> DMatrix::GetBatches(const BatchParam& param) {
|
|||||||
|
|
||||||
namespace dmlc {
|
namespace dmlc {
|
||||||
DMLC_DECLARE_TRAITS(is_pod, xgboost::Entry, true);
|
DMLC_DECLARE_TRAITS(is_pod, xgboost::Entry, true);
|
||||||
}
|
|
||||||
|
namespace serializer {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct Handler<xgboost::Entry> {
|
||||||
|
inline static void Write(Stream* strm, const xgboost::Entry& data) {
|
||||||
|
strm->Write(data.index);
|
||||||
|
strm->Write(data.fvalue);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static bool Read(Stream* strm, xgboost::Entry* data) {
|
||||||
|
return strm->Read(&data->index) && strm->Read(&data->fvalue);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace serializer
|
||||||
|
} // namespace dmlc
|
||||||
#endif // XGBOOST_DATA_H_
|
#endif // XGBOOST_DATA_H_
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#define XGBOOST_SPAN_H_
|
#define XGBOOST_SPAN_H_
|
||||||
|
|
||||||
#include <xgboost/base.h>
|
#include <xgboost/base.h>
|
||||||
|
#include <xgboost/logging.h>
|
||||||
|
|
||||||
#include <cinttypes> // size_t
|
#include <cinttypes> // size_t
|
||||||
#include <limits> // numeric_limits
|
#include <limits> // numeric_limits
|
||||||
@@ -85,9 +86,11 @@ namespace common {
|
|||||||
} \
|
} \
|
||||||
} while (0);
|
} while (0);
|
||||||
|
|
||||||
#ifdef __CUDA_ARCH__
|
#if defined(__CUDA_ARCH__)
|
||||||
#define SPAN_CHECK KERNEL_CHECK
|
#define SPAN_CHECK KERNEL_CHECK
|
||||||
#else
|
#elif defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1 // R package
|
||||||
|
#define SPAN_CHECK CHECK // check from dmlc
|
||||||
|
#else // not CUDA, not R
|
||||||
#define SPAN_CHECK(cond) \
|
#define SPAN_CHECK(cond) \
|
||||||
do { \
|
do { \
|
||||||
if (XGBOOST_EXPECT(!(cond), false)) { \
|
if (XGBOOST_EXPECT(!(cond), false)) { \
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.1.0-SNAPSHOT</version>
|
<version>1.1.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.1.0-SNAPSHOT
|
1.1.0
|
||||||
|
|||||||
@@ -159,8 +159,9 @@ def _load_lib():
|
|||||||
'XGBoost Library ({}) could not be loaded.\n'.format(libname) +
|
'XGBoost Library ({}) could not be loaded.\n'.format(libname) +
|
||||||
'Likely causes:\n' +
|
'Likely causes:\n' +
|
||||||
' * OpenMP runtime is not installed ' +
|
' * OpenMP runtime is not installed ' +
|
||||||
'(vcomp140.dll or libgomp-1.dll for Windows, ' +
|
'(vcomp140.dll or libgomp-1.dll for Windows, libomp.dylib for Mac OSX, ' +
|
||||||
'libgomp.so for UNIX-like OSes)\n' +
|
'libgomp.so for Linux and other UNIX-like OSes). Mac OSX users: Run ' +
|
||||||
|
'`brew install libomp` to install OpenMP runtime.\n' +
|
||||||
' * You are running 32-bit Python on a 64-bit OS\n' +
|
' * You are running 32-bit Python on a 64-bit OS\n' +
|
||||||
'Error message(s): {}\n'.format(os_error_list))
|
'Error message(s): {}\n'.format(os_error_list))
|
||||||
lib.XGBGetLastError.restype = ctypes.c_char_p
|
lib.XGBGetLastError.restype = ctypes.c_char_p
|
||||||
|
|||||||
@@ -426,7 +426,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
|||||||
DMatrixHandle dmat,
|
DMatrixHandle dmat,
|
||||||
int option_mask,
|
int option_mask,
|
||||||
unsigned ntree_limit,
|
unsigned ntree_limit,
|
||||||
int32_t training,
|
int training,
|
||||||
xgboost::bst_ulong *len,
|
xgboost::bst_ulong *len,
|
||||||
const bst_float **out_result) {
|
const bst_float **out_result) {
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
|
|||||||
@@ -82,14 +82,16 @@ template <typename BinIdxType>
|
|||||||
class DenseColumn: public Column<BinIdxType> {
|
class DenseColumn: public Column<BinIdxType> {
|
||||||
public:
|
public:
|
||||||
DenseColumn(ColumnType type, common::Span<const BinIdxType> index,
|
DenseColumn(ColumnType type, common::Span<const BinIdxType> index,
|
||||||
uint32_t index_base,
|
uint32_t index_base, const std::vector<bool>& missing_flags,
|
||||||
const std::vector<bool>::const_iterator missing_flags)
|
size_t feature_offset)
|
||||||
: Column<BinIdxType>(type, index, index_base),
|
: Column<BinIdxType>(type, index, index_base),
|
||||||
missing_flags_(missing_flags) {}
|
missing_flags_(missing_flags),
|
||||||
bool IsMissing(size_t idx) const { return missing_flags_[idx]; }
|
feature_offset_(feature_offset) {}
|
||||||
|
bool IsMissing(size_t idx) const { return missing_flags_[feature_offset_ + idx]; }
|
||||||
private:
|
private:
|
||||||
/* flags for missing values in dense columns */
|
/* flags for missing values in dense columns */
|
||||||
std::vector<bool>::const_iterator missing_flags_;
|
const std::vector<bool>& missing_flags_;
|
||||||
|
size_t feature_offset_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief a collection of columns, with support for construction from
|
/*! \brief a collection of columns, with support for construction from
|
||||||
@@ -208,10 +210,8 @@ class ColumnMatrix {
|
|||||||
column_size };
|
column_size };
|
||||||
std::unique_ptr<const Column<BinIdxType> > res;
|
std::unique_ptr<const Column<BinIdxType> > res;
|
||||||
if (type_[fid] == ColumnType::kDenseColumn) {
|
if (type_[fid] == ColumnType::kDenseColumn) {
|
||||||
std::vector<bool>::const_iterator column_iterator = missing_flags_.begin();
|
|
||||||
advance(column_iterator, feature_offset); // increment iterator to right position
|
|
||||||
res.reset(new DenseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
|
res.reset(new DenseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
|
||||||
column_iterator));
|
missing_flags_, feature_offset));
|
||||||
} else {
|
} else {
|
||||||
res.reset(new SparseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
|
res.reset(new SparseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
|
||||||
{&row_ind_[feature_offset], column_size}));
|
{&row_ind_[feature_offset], column_size}));
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ template <typename T>
|
|||||||
void SaveScalarField(dmlc::Stream *strm, const std::string &name,
|
void SaveScalarField(dmlc::Stream *strm, const std::string &name,
|
||||||
xgboost::DataType type, const T &field) {
|
xgboost::DataType type, const T &field) {
|
||||||
strm->Write(name);
|
strm->Write(name);
|
||||||
strm->Write(type);
|
strm->Write(static_cast<uint8_t>(type));
|
||||||
strm->Write(true); // is_scalar=True
|
strm->Write(true); // is_scalar=True
|
||||||
strm->Write(field);
|
strm->Write(field);
|
||||||
}
|
}
|
||||||
@@ -47,7 +47,7 @@ void SaveVectorField(dmlc::Stream *strm, const std::string &name,
|
|||||||
xgboost::DataType type, std::pair<uint64_t, uint64_t> shape,
|
xgboost::DataType type, std::pair<uint64_t, uint64_t> shape,
|
||||||
const std::vector<T>& field) {
|
const std::vector<T>& field) {
|
||||||
strm->Write(name);
|
strm->Write(name);
|
||||||
strm->Write(type);
|
strm->Write(static_cast<uint8_t>(type));
|
||||||
strm->Write(false); // is_scalar=False
|
strm->Write(false); // is_scalar=False
|
||||||
strm->Write(shape.first);
|
strm->Write(shape.first);
|
||||||
strm->Write(shape.second);
|
strm->Write(shape.second);
|
||||||
@@ -71,7 +71,9 @@ void LoadScalarField(dmlc::Stream* strm, const std::string& expected_name,
|
|||||||
CHECK(strm->Read(&name)) << invalid;
|
CHECK(strm->Read(&name)) << invalid;
|
||||||
CHECK_EQ(name, expected_name)
|
CHECK_EQ(name, expected_name)
|
||||||
<< invalid << " Expected field: " << expected_name << ", got: " << name;
|
<< invalid << " Expected field: " << expected_name << ", got: " << name;
|
||||||
CHECK(strm->Read(&type)) << invalid;
|
uint8_t type_val;
|
||||||
|
CHECK(strm->Read(&type_val)) << invalid;
|
||||||
|
type = static_cast<xgboost::DataType>(type_val);
|
||||||
CHECK(type == expected_type)
|
CHECK(type == expected_type)
|
||||||
<< invalid << "Expected field of type: " << static_cast<int>(expected_type) << ", "
|
<< invalid << "Expected field of type: " << static_cast<int>(expected_type) << ", "
|
||||||
<< "got field type: " << static_cast<int>(type);
|
<< "got field type: " << static_cast<int>(type);
|
||||||
@@ -91,7 +93,9 @@ void LoadVectorField(dmlc::Stream* strm, const std::string& expected_name,
|
|||||||
CHECK(strm->Read(&name)) << invalid;
|
CHECK(strm->Read(&name)) << invalid;
|
||||||
CHECK_EQ(name, expected_name)
|
CHECK_EQ(name, expected_name)
|
||||||
<< invalid << " Expected field: " << expected_name << ", got: " << name;
|
<< invalid << " Expected field: " << expected_name << ", got: " << name;
|
||||||
CHECK(strm->Read(&type)) << invalid;
|
uint8_t type_val;
|
||||||
|
CHECK(strm->Read(&type_val)) << invalid;
|
||||||
|
type = static_cast<xgboost::DataType>(type_val);
|
||||||
CHECK(type == expected_type)
|
CHECK(type == expected_type)
|
||||||
<< invalid << "Expected field of type: " << static_cast<int>(expected_type) << ", "
|
<< invalid << "Expected field of type: " << static_cast<int>(expected_type) << ", "
|
||||||
<< "got field type: " << static_cast<int>(type);
|
<< "got field type: " << static_cast<int>(type);
|
||||||
@@ -338,7 +342,7 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetaInfo::Validate() const {
|
void MetaInfo::Validate(int32_t device) const {
|
||||||
if (group_ptr_.size() != 0 && weights_.Size() != 0) {
|
if (group_ptr_.size() != 0 && weights_.Size() != 0) {
|
||||||
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
|
CHECK_EQ(group_ptr_.size(), weights_.Size() + 1)
|
||||||
<< "Size of weights must equal to number of groups when ranking "
|
<< "Size of weights must equal to number of groups when ranking "
|
||||||
@@ -350,30 +354,44 @@ void MetaInfo::Validate() const {
|
|||||||
<< "Invalid group structure. Number of rows obtained from groups "
|
<< "Invalid group structure. Number of rows obtained from groups "
|
||||||
"doesn't equal to actual number of rows given by data.";
|
"doesn't equal to actual number of rows given by data.";
|
||||||
}
|
}
|
||||||
|
auto check_device = [device](HostDeviceVector<float> const &v) {
|
||||||
|
CHECK(v.DeviceIdx() == GenericParameter::kCpuId ||
|
||||||
|
device == GenericParameter::kCpuId ||
|
||||||
|
v.DeviceIdx() == device)
|
||||||
|
<< "Data is resided on a different device than `gpu_id`. "
|
||||||
|
<< "Device that data is on: " << v.DeviceIdx() << ", "
|
||||||
|
<< "`gpu_id` for XGBoost: " << device;
|
||||||
|
};
|
||||||
|
|
||||||
if (weights_.Size() != 0) {
|
if (weights_.Size() != 0) {
|
||||||
CHECK_EQ(weights_.Size(), num_row_)
|
CHECK_EQ(weights_.Size(), num_row_)
|
||||||
<< "Size of weights must equal to number of rows.";
|
<< "Size of weights must equal to number of rows.";
|
||||||
|
check_device(weights_);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (labels_.Size() != 0) {
|
if (labels_.Size() != 0) {
|
||||||
CHECK_EQ(labels_.Size(), num_row_)
|
CHECK_EQ(labels_.Size(), num_row_)
|
||||||
<< "Size of labels must equal to number of rows.";
|
<< "Size of labels must equal to number of rows.";
|
||||||
|
check_device(labels_);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (labels_lower_bound_.Size() != 0) {
|
if (labels_lower_bound_.Size() != 0) {
|
||||||
CHECK_EQ(labels_lower_bound_.Size(), num_row_)
|
CHECK_EQ(labels_lower_bound_.Size(), num_row_)
|
||||||
<< "Size of label_lower_bound must equal to number of rows.";
|
<< "Size of label_lower_bound must equal to number of rows.";
|
||||||
|
check_device(labels_lower_bound_);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (labels_upper_bound_.Size() != 0) {
|
if (labels_upper_bound_.Size() != 0) {
|
||||||
CHECK_EQ(labels_upper_bound_.Size(), num_row_)
|
CHECK_EQ(labels_upper_bound_.Size(), num_row_)
|
||||||
<< "Size of label_upper_bound must equal to number of rows.";
|
<< "Size of label_upper_bound must equal to number of rows.";
|
||||||
|
check_device(labels_upper_bound_);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
CHECK_LE(num_nonzero_, num_col_ * num_row_);
|
CHECK_LE(num_nonzero_, num_col_ * num_row_);
|
||||||
if (base_margin_.Size() != 0) {
|
if (base_margin_.Size() != 0) {
|
||||||
CHECK_EQ(base_margin_.Size() % num_row_, 0)
|
CHECK_EQ(base_margin_.Size() % num_row_, 0)
|
||||||
<< "Size of base margin must be a multiple of number of rows.";
|
<< "Size of base margin must be a multiple of number of rows.";
|
||||||
|
check_device(base_margin_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -201,6 +201,7 @@ template <typename AdapterT>
|
|||||||
DeviceDMatrix::DeviceDMatrix(AdapterT* adapter, float missing, int nthread, int max_bin) {
|
DeviceDMatrix::DeviceDMatrix(AdapterT* adapter, float missing, int nthread, int max_bin) {
|
||||||
common::HistogramCuts cuts =
|
common::HistogramCuts cuts =
|
||||||
common::AdapterDeviceSketch(adapter, max_bin, missing);
|
common::AdapterDeviceSketch(adapter, max_bin, missing);
|
||||||
|
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
|
||||||
auto& batch = adapter->Value();
|
auto& batch = adapter->Value();
|
||||||
// Work out how many valid entries we have in each row
|
// Work out how many valid entries we have in each row
|
||||||
dh::caching_device_vector<size_t> row_counts(adapter->NumRows() + 1, 0);
|
dh::caching_device_vector<size_t> row_counts(adapter->NumRows() + 1, 0);
|
||||||
|
|||||||
@@ -99,6 +99,7 @@ void CopyDataRowMajor(AdapterT* adapter, common::Span<Entry> data,
|
|||||||
// be supported in future. Does not currently support inferring row/column size
|
// be supported in future. Does not currently support inferring row/column size
|
||||||
template <typename AdapterT>
|
template <typename AdapterT>
|
||||||
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
|
||||||
|
dh::safe_cuda(cudaSetDevice(adapter->DeviceIdx()));
|
||||||
CHECK(adapter->NumRows() != kAdapterUnknownSize);
|
CHECK(adapter->NumRows() != kAdapterUnknownSize);
|
||||||
CHECK(adapter->NumColumns() != kAdapterUnknownSize);
|
CHECK(adapter->NumColumns() != kAdapterUnknownSize);
|
||||||
|
|
||||||
|
|||||||
@@ -1052,7 +1052,7 @@ class LearnerImpl : public LearnerIO {
|
|||||||
|
|
||||||
void ValidateDMatrix(DMatrix* p_fmat) const {
|
void ValidateDMatrix(DMatrix* p_fmat) const {
|
||||||
MetaInfo const& info = p_fmat->Info();
|
MetaInfo const& info = p_fmat->Info();
|
||||||
info.Validate();
|
info.Validate(generic_parameters_.gpu_id);
|
||||||
|
|
||||||
auto const row_based_split = [this]() {
|
auto const row_based_split = [this]() {
|
||||||
return tparam_.dsplit == DataSplitMode::kRow ||
|
return tparam_.dsplit == DataSplitMode::kRow ||
|
||||||
|
|||||||
@@ -70,8 +70,7 @@ struct EvalAFT : public Metric {
|
|||||||
|
|
||||||
double nloglik_sum = 0.0;
|
double nloglik_sum = 0.0;
|
||||||
double weight_sum = 0.0;
|
double weight_sum = 0.0;
|
||||||
#pragma omp parallel for default(none) \
|
#pragma omp parallel for \
|
||||||
firstprivate(nsize, is_null_weight, aft_loss_distribution_scale) \
|
|
||||||
shared(weights, y_lower, y_upper, yhat) reduction(+:nloglik_sum, weight_sum)
|
shared(weights, y_lower, y_upper, yhat) reduction(+:nloglik_sum, weight_sum)
|
||||||
for (omp_ulong i = 0; i < nsize; ++i) {
|
for (omp_ulong i = 0; i < nsize; ++i) {
|
||||||
// If weights are empty, data is unweighted so we use 1.0 everywhere
|
// If weights are empty, data is unweighted so we use 1.0 everywhere
|
||||||
|
|||||||
@@ -56,8 +56,7 @@ class AFTObj : public ObjFunction {
|
|||||||
const omp_ulong nsize = static_cast<omp_ulong>(yhat.size());
|
const omp_ulong nsize = static_cast<omp_ulong>(yhat.size());
|
||||||
const float aft_loss_distribution_scale = param_.aft_loss_distribution_scale;
|
const float aft_loss_distribution_scale = param_.aft_loss_distribution_scale;
|
||||||
|
|
||||||
#pragma omp parallel for default(none) \
|
#pragma omp parallel for \
|
||||||
firstprivate(nsize, is_null_weight, aft_loss_distribution_scale) \
|
|
||||||
shared(weights, y_lower, y_upper, yhat, gpair)
|
shared(weights, y_lower, y_upper, yhat, gpair)
|
||||||
for (omp_ulong i = 0; i < nsize; ++i) {
|
for (omp_ulong i = 0; i < nsize; ++i) {
|
||||||
// If weights are empty, data is unweighted so we use 1.0 everywhere
|
// If weights are empty, data is unweighted so we use 1.0 everywhere
|
||||||
@@ -74,7 +73,7 @@ class AFTObj : public ObjFunction {
|
|||||||
// Trees give us a prediction in log scale, so exponentiate
|
// Trees give us a prediction in log scale, so exponentiate
|
||||||
std::vector<bst_float> &preds = io_preds->HostVector();
|
std::vector<bst_float> &preds = io_preds->HostVector();
|
||||||
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
||||||
#pragma omp parallel for default(none) firstprivate(ndata) shared(preds)
|
#pragma omp parallel for shared(preds)
|
||||||
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
||||||
preds[j] = std::exp(preds[j]);
|
preds[j] = std::exp(preds[j]);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,20 +68,20 @@ class TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) {
|
virtual std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
virtual std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) {
|
virtual std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
virtual std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) {
|
virtual std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
virtual std::string NodeStat(RegTree const& tree, int32_t nid) {
|
virtual std::string NodeStat(RegTree const& tree, int32_t nid) const {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
|
virtual std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const = 0;
|
||||||
|
|
||||||
virtual std::string SplitNode(RegTree const& tree, int32_t nid, uint32_t depth) {
|
virtual std::string SplitNode(RegTree const& tree, int32_t nid, uint32_t depth) {
|
||||||
auto const split_index = tree[nid].SplitIndex();
|
auto const split_index = tree[nid].SplitIndex();
|
||||||
@@ -110,7 +110,7 @@ class TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
|
virtual std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const = 0;
|
||||||
virtual std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
|
virtual std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -181,7 +181,7 @@ class TextGenerator : public TreeGenerator {
|
|||||||
TextGenerator(FeatureMap const& fmap, std::string const& attrs, bool with_stats) :
|
TextGenerator(FeatureMap const& fmap, std::string const& attrs, bool with_stats) :
|
||||||
TreeGenerator(fmap, with_stats) {}
|
TreeGenerator(fmap, with_stats) {}
|
||||||
|
|
||||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
static std::string kLeafTemplate = "{tabs}{nid}:leaf={leaf}{stats}";
|
static std::string kLeafTemplate = "{tabs}{nid}:leaf={leaf}{stats}";
|
||||||
static std::string kStatTemplate = ",cover={cover}";
|
static std::string kStatTemplate = ",cover={cover}";
|
||||||
std::string result = SuperT::Match(
|
std::string result = SuperT::Match(
|
||||||
@@ -195,7 +195,7 @@ class TextGenerator : public TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
static std::string const kIndicatorTemplate = "{nid}:[{fname}] yes={yes},no={no}";
|
static std::string const kIndicatorTemplate = "{nid}:[{fname}] yes={yes},no={no}";
|
||||||
int32_t nyes = tree[nid].DefaultLeft() ?
|
int32_t nyes = tree[nid].DefaultLeft() ?
|
||||||
tree[nid].RightChild() : tree[nid].LeftChild();
|
tree[nid].RightChild() : tree[nid].LeftChild();
|
||||||
@@ -211,7 +211,7 @@ class TextGenerator : public TreeGenerator {
|
|||||||
|
|
||||||
std::string SplitNodeImpl(
|
std::string SplitNodeImpl(
|
||||||
RegTree const& tree, int32_t nid, std::string const& template_str,
|
RegTree const& tree, int32_t nid, std::string const& template_str,
|
||||||
std::string cond, uint32_t depth) {
|
std::string cond, uint32_t depth) const {
|
||||||
auto split_index = tree[nid].SplitIndex();
|
auto split_index = tree[nid].SplitIndex();
|
||||||
std::string const result = SuperT::Match(
|
std::string const result = SuperT::Match(
|
||||||
template_str,
|
template_str,
|
||||||
@@ -226,7 +226,7 @@ class TextGenerator : public TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
static std::string const kIntegerTemplate =
|
static std::string const kIntegerTemplate =
|
||||||
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||||
auto cond = tree[nid].SplitCond();
|
auto cond = tree[nid].SplitCond();
|
||||||
@@ -238,21 +238,21 @@ class TextGenerator : public TreeGenerator {
|
|||||||
std::to_string(integer_threshold), depth);
|
std::to_string(integer_threshold), depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
static std::string const kQuantitiveTemplate =
|
static std::string const kQuantitiveTemplate =
|
||||||
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||||
auto cond = tree[nid].SplitCond();
|
auto cond = tree[nid].SplitCond();
|
||||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
auto cond = tree[nid].SplitCond();
|
auto cond = tree[nid].SplitCond();
|
||||||
static std::string const kNodeTemplate =
|
static std::string const kNodeTemplate =
|
||||||
"{tabs}{nid}:[f{fname}<{cond}] yes={left},no={right},missing={missing}";
|
"{tabs}{nid}:[f{fname}<{cond}] yes={left},no={right},missing={missing}";
|
||||||
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string NodeStat(RegTree const& tree, int32_t nid) override {
|
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
|
||||||
static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
|
static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
|
||||||
std::string const result = SuperT::Match(
|
std::string const result = SuperT::Match(
|
||||||
kStatTemplate,
|
kStatTemplate,
|
||||||
@@ -297,7 +297,7 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
JsonGenerator(FeatureMap const& fmap, std::string attrs, bool with_stats) :
|
JsonGenerator(FeatureMap const& fmap, std::string attrs, bool with_stats) :
|
||||||
TreeGenerator(fmap, with_stats) {}
|
TreeGenerator(fmap, with_stats) {}
|
||||||
|
|
||||||
std::string Indent(uint32_t depth) {
|
std::string Indent(uint32_t depth) const {
|
||||||
std::string result;
|
std::string result;
|
||||||
for (uint32_t i = 0; i < depth + 1; ++i) {
|
for (uint32_t i = 0; i < depth + 1; ++i) {
|
||||||
result += " ";
|
result += " ";
|
||||||
@@ -305,7 +305,7 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
static std::string const kLeafTemplate =
|
static std::string const kLeafTemplate =
|
||||||
R"L({ "nodeid": {nid}, "leaf": {leaf} {stat}})L";
|
R"L({ "nodeid": {nid}, "leaf": {leaf} {stat}})L";
|
||||||
static std::string const kStatTemplate =
|
static std::string const kStatTemplate =
|
||||||
@@ -321,11 +321,11 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
int32_t nyes = tree[nid].DefaultLeft() ?
|
int32_t nyes = tree[nid].DefaultLeft() ?
|
||||||
tree[nid].RightChild() : tree[nid].LeftChild();
|
tree[nid].RightChild() : tree[nid].LeftChild();
|
||||||
static std::string const kIndicatorTemplate =
|
static std::string const kIndicatorTemplate =
|
||||||
R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no}})ID";
|
R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no})ID";
|
||||||
auto split_index = tree[nid].SplitIndex();
|
auto split_index = tree[nid].SplitIndex();
|
||||||
auto result = SuperT::Match(
|
auto result = SuperT::Match(
|
||||||
kIndicatorTemplate,
|
kIndicatorTemplate,
|
||||||
@@ -337,8 +337,9 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string SplitNodeImpl(RegTree const& tree, int32_t nid,
|
std::string SplitNodeImpl(RegTree const &tree, int32_t nid,
|
||||||
std::string const& template_str, std::string cond, uint32_t depth) {
|
std::string const &template_str, std::string cond,
|
||||||
|
uint32_t depth) const {
|
||||||
auto split_index = tree[nid].SplitIndex();
|
auto split_index = tree[nid].SplitIndex();
|
||||||
std::string const result = SuperT::Match(
|
std::string const result = SuperT::Match(
|
||||||
template_str,
|
template_str,
|
||||||
@@ -353,7 +354,7 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
auto cond = tree[nid].SplitCond();
|
auto cond = tree[nid].SplitCond();
|
||||||
const bst_float floored = std::floor(cond);
|
const bst_float floored = std::floor(cond);
|
||||||
const int32_t integer_threshold
|
const int32_t integer_threshold
|
||||||
@@ -367,7 +368,7 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
std::to_string(integer_threshold), depth);
|
std::to_string(integer_threshold), depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
static std::string const kQuantitiveTemplate =
|
static std::string const kQuantitiveTemplate =
|
||||||
R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
|
R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
|
||||||
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
|
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
|
||||||
@@ -376,7 +377,7 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
auto cond = tree[nid].SplitCond();
|
auto cond = tree[nid].SplitCond();
|
||||||
static std::string const kNodeTemplate =
|
static std::string const kNodeTemplate =
|
||||||
R"I( "nodeid": {nid}, "depth": {depth}, "split": {fname}, )I"
|
R"I( "nodeid": {nid}, "depth": {depth}, "split": {fname}, )I"
|
||||||
@@ -385,7 +386,7 @@ class JsonGenerator : public TreeGenerator {
|
|||||||
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string NodeStat(RegTree const& tree, int32_t nid) override {
|
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
|
||||||
static std::string kStatTemplate =
|
static std::string kStatTemplate =
|
||||||
R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
|
R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
|
||||||
auto result = SuperT::Match(
|
auto result = SuperT::Match(
|
||||||
@@ -529,7 +530,7 @@ class GraphvizGenerator : public TreeGenerator {
|
|||||||
protected:
|
protected:
|
||||||
// Only indicator is different, so we combine all different node types into this
|
// Only indicator is different, so we combine all different node types into this
|
||||||
// function.
|
// function.
|
||||||
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
auto split = tree[nid].SplitIndex();
|
auto split = tree[nid].SplitIndex();
|
||||||
auto cond = tree[nid].SplitCond();
|
auto cond = tree[nid].SplitCond();
|
||||||
static std::string const kNodeTemplate =
|
static std::string const kNodeTemplate =
|
||||||
@@ -563,7 +564,7 @@ class GraphvizGenerator : public TreeGenerator {
|
|||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
|
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
|
||||||
static std::string const kLeafTemplate =
|
static std::string const kLeafTemplate =
|
||||||
" {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
|
" {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
|
||||||
auto result = SuperT::Match(kLeafTemplate, {
|
auto result = SuperT::Match(kLeafTemplate, {
|
||||||
|
|||||||
@@ -21,8 +21,6 @@ whl_path = sys.argv[1]
|
|||||||
commit_id = sys.argv[2]
|
commit_id = sys.argv[2]
|
||||||
platform_tag = sys.argv[3]
|
platform_tag = sys.argv[3]
|
||||||
|
|
||||||
assert platform_tag in ['manylinux1_x86_64', 'manylinux2010_x86_64', 'win_amd64']
|
|
||||||
|
|
||||||
dirname, basename = os.path.dirname(whl_path), os.path.basename(whl_path)
|
dirname, basename = os.path.dirname(whl_path), os.path.basename(whl_path)
|
||||||
|
|
||||||
with cd(dirname):
|
with cd(dirname):
|
||||||
|
|||||||
@@ -149,9 +149,17 @@ TEST(MetaInfo, Validate) {
|
|||||||
info.num_col_ = 3;
|
info.num_col_ = 3;
|
||||||
std::vector<xgboost::bst_group_t> groups (11);
|
std::vector<xgboost::bst_group_t> groups (11);
|
||||||
info.SetInfo("group", groups.data(), xgboost::DataType::kUInt32, 11);
|
info.SetInfo("group", groups.data(), xgboost::DataType::kUInt32, 11);
|
||||||
EXPECT_THROW(info.Validate(), dmlc::Error);
|
EXPECT_THROW(info.Validate(0), dmlc::Error);
|
||||||
|
|
||||||
std::vector<float> labels(info.num_row_ + 1);
|
std::vector<float> labels(info.num_row_ + 1);
|
||||||
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
|
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_ + 1);
|
||||||
EXPECT_THROW(info.Validate(), dmlc::Error);
|
EXPECT_THROW(info.Validate(0), dmlc::Error);
|
||||||
|
|
||||||
|
#if defined(XGBOOST_USE_CUDA)
|
||||||
|
info.group_ptr_.clear();
|
||||||
|
labels.resize(info.num_row_);
|
||||||
|
info.SetInfo("label", labels.data(), xgboost::DataType::kFloat32, info.num_row_);
|
||||||
|
info.labels_.SetDevice(0);
|
||||||
|
EXPECT_THROW(info.Validate(1), dmlc::Error);
|
||||||
|
#endif // defined(XGBOOST_USE_CUDA)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -151,6 +151,10 @@ TEST(Tree, DumpJson) {
|
|||||||
|
|
||||||
str = tree.DumpModel(fmap, false, "json");
|
str = tree.DumpModel(fmap, false, "json");
|
||||||
ASSERT_EQ(str.find("cover"), std::string::npos);
|
ASSERT_EQ(str.find("cover"), std::string::npos);
|
||||||
|
|
||||||
|
|
||||||
|
auto j_tree = Json::Load({str.c_str(), str.size()});
|
||||||
|
ASSERT_EQ(get<Array>(j_tree["children"]).size(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Tree, DumpText) {
|
TEST(Tree, DumpText) {
|
||||||
|
|||||||
@@ -136,3 +136,14 @@ Arrow specification.'''
|
|||||||
n = 100
|
n = 100
|
||||||
X = cp.random.random((n, 2))
|
X = cp.random.random((n, 2))
|
||||||
xgb.DeviceQuantileDMatrix(X.toDlpack())
|
xgb.DeviceQuantileDMatrix(X.toDlpack())
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
|
@pytest.mark.mgpu
|
||||||
|
def test_specified_device(self):
|
||||||
|
import cupy as cp
|
||||||
|
cp.cuda.runtime.setDevice(0)
|
||||||
|
dtrain = dmatrix_from_cupy(
|
||||||
|
np.float32, xgb.DeviceQuantileDMatrix, np.nan)
|
||||||
|
with pytest.raises(xgb.core.XGBoostError):
|
||||||
|
xgb.train({'tree_method': 'gpu_hist', 'gpu_id': 1},
|
||||||
|
dtrain, num_boost_round=10)
|
||||||
|
|||||||
@@ -12,25 +12,15 @@ rng = np.random.RandomState(1994)
|
|||||||
class TestGPUBasicModels(unittest.TestCase):
|
class TestGPUBasicModels(unittest.TestCase):
|
||||||
cputest = test_bm.TestModels()
|
cputest = test_bm.TestModels()
|
||||||
|
|
||||||
def test_eta_decay_gpu_hist(self):
|
def run_cls(self, X, y, deterministic):
|
||||||
self.cputest.run_eta_decay('gpu_hist')
|
|
||||||
|
|
||||||
def test_deterministic_gpu_hist(self):
|
|
||||||
kRows = 1000
|
|
||||||
kCols = 64
|
|
||||||
kClasses = 4
|
|
||||||
# Create large values to force rounding.
|
|
||||||
X = np.random.randn(kRows, kCols) * 1e4
|
|
||||||
y = np.random.randint(0, kClasses, size=kRows)
|
|
||||||
|
|
||||||
cls = xgb.XGBClassifier(tree_method='gpu_hist',
|
cls = xgb.XGBClassifier(tree_method='gpu_hist',
|
||||||
deterministic_histogram=True,
|
deterministic_histogram=deterministic,
|
||||||
single_precision_histogram=True)
|
single_precision_histogram=True)
|
||||||
cls.fit(X, y)
|
cls.fit(X, y)
|
||||||
cls.get_booster().save_model('test_deterministic_gpu_hist-0.json')
|
cls.get_booster().save_model('test_deterministic_gpu_hist-0.json')
|
||||||
|
|
||||||
cls = xgb.XGBClassifier(tree_method='gpu_hist',
|
cls = xgb.XGBClassifier(tree_method='gpu_hist',
|
||||||
deterministic_histogram=True,
|
deterministic_histogram=deterministic,
|
||||||
single_precision_histogram=True)
|
single_precision_histogram=True)
|
||||||
cls.fit(X, y)
|
cls.fit(X, y)
|
||||||
cls.get_booster().save_model('test_deterministic_gpu_hist-1.json')
|
cls.get_booster().save_model('test_deterministic_gpu_hist-1.json')
|
||||||
@@ -40,7 +30,24 @@ class TestGPUBasicModels(unittest.TestCase):
|
|||||||
with open('test_deterministic_gpu_hist-1.json', 'r') as fd:
|
with open('test_deterministic_gpu_hist-1.json', 'r') as fd:
|
||||||
model_1 = fd.read()
|
model_1 = fd.read()
|
||||||
|
|
||||||
assert hash(model_0) == hash(model_1)
|
|
||||||
|
|
||||||
os.remove('test_deterministic_gpu_hist-0.json')
|
os.remove('test_deterministic_gpu_hist-0.json')
|
||||||
os.remove('test_deterministic_gpu_hist-1.json')
|
os.remove('test_deterministic_gpu_hist-1.json')
|
||||||
|
|
||||||
|
return hash(model_0), hash(model_1)
|
||||||
|
|
||||||
|
def test_eta_decay_gpu_hist(self):
|
||||||
|
self.cputest.run_eta_decay('gpu_hist')
|
||||||
|
|
||||||
|
def test_deterministic_gpu_hist(self):
|
||||||
|
kRows = 1000
|
||||||
|
kCols = 64
|
||||||
|
kClasses = 4
|
||||||
|
# Create large values to force rounding.
|
||||||
|
X = np.random.randn(kRows, kCols) * 1e4
|
||||||
|
y = np.random.randint(0, kClasses, size=kRows) * 1e4
|
||||||
|
|
||||||
|
model_0, model_1 = self.run_cls(X, y, True)
|
||||||
|
assert model_0 == model_1
|
||||||
|
|
||||||
|
model_0, model_1 = self.run_cls(X, y, False)
|
||||||
|
assert model_0 != model_1
|
||||||
|
|||||||
@@ -121,6 +121,7 @@ class TestGPUPredict(unittest.TestCase):
|
|||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
def test_inplace_predict_cupy(self):
|
def test_inplace_predict_cupy(self):
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
|
cp.cuda.runtime.setDevice(0)
|
||||||
rows = 1000
|
rows = 1000
|
||||||
cols = 10
|
cols = 10
|
||||||
cp_rng = cp.random.RandomState(1994)
|
cp_rng = cp.random.RandomState(1994)
|
||||||
|
|||||||
@@ -325,7 +325,7 @@ class TestModels(unittest.TestCase):
|
|||||||
assert locale.getpreferredencoding(False) == loc
|
assert locale.getpreferredencoding(False) == loc
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_json_schema())
|
@pytest.mark.skipif(**tm.no_json_schema())
|
||||||
def test_json_schema(self):
|
def test_json_io_schema(self):
|
||||||
import jsonschema
|
import jsonschema
|
||||||
model_path = 'test_json_schema.json'
|
model_path = 'test_json_schema.json'
|
||||||
path = os.path.dirname(
|
path = os.path.dirname(
|
||||||
@@ -342,3 +342,35 @@ class TestModels(unittest.TestCase):
|
|||||||
jsonschema.validate(instance=json_model(model_path, parameters),
|
jsonschema.validate(instance=json_model(model_path, parameters),
|
||||||
schema=schema)
|
schema=schema)
|
||||||
os.remove(model_path)
|
os.remove(model_path)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_json_schema())
|
||||||
|
def test_json_dump_schema(self):
|
||||||
|
import jsonschema
|
||||||
|
|
||||||
|
def validate_model(parameters):
|
||||||
|
X = np.random.random((100, 30))
|
||||||
|
y = np.random.randint(0, 4, size=(100,))
|
||||||
|
|
||||||
|
parameters['num_class'] = 4
|
||||||
|
m = xgb.DMatrix(X, y)
|
||||||
|
|
||||||
|
booster = xgb.train(parameters, m)
|
||||||
|
dump = booster.get_dump(dump_format='json')
|
||||||
|
|
||||||
|
for i in range(len(dump)):
|
||||||
|
jsonschema.validate(instance=json.loads(dump[i]),
|
||||||
|
schema=schema)
|
||||||
|
|
||||||
|
path = os.path.dirname(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
doc = os.path.join(path, 'doc', 'dump.schema')
|
||||||
|
with open(doc, 'r') as fd:
|
||||||
|
schema = json.load(fd)
|
||||||
|
|
||||||
|
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
|
||||||
|
'objective': 'multi:softmax'}
|
||||||
|
validate_model(parameters)
|
||||||
|
|
||||||
|
parameters = {'tree_method': 'hist', 'booster': 'dart',
|
||||||
|
'objective': 'multi:softmax'}
|
||||||
|
validate_model(parameters)
|
||||||
|
|||||||
@@ -23,18 +23,42 @@ if [ ${TASK} == "python_test" ]; then
|
|||||||
mkdir build && cd build
|
mkdir build && cd build
|
||||||
cmake .. -DUSE_OPENMP=ON -DCMAKE_VERBOSE_MAKEFILE=ON
|
cmake .. -DUSE_OPENMP=ON -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||||
make -j$(nproc)
|
make -j$(nproc)
|
||||||
cd ..
|
|
||||||
|
|
||||||
echo "-------------------------------"
|
echo "-------------------------------"
|
||||||
conda activate python3
|
conda activate python3
|
||||||
conda --version
|
conda --version
|
||||||
python --version
|
python --version
|
||||||
|
|
||||||
|
# Build binary wheel
|
||||||
|
cd ../python-package
|
||||||
|
python setup.py bdist_wheel
|
||||||
|
TAG=macosx_10_13_x86_64.macosx_10_14_x86_64.macosx_10_15_x86_64
|
||||||
|
python ../tests/ci_build/rename_whl.py dist/*.whl ${TRAVIS_COMMIT} ${TAG}
|
||||||
|
python -m pip install ./dist/xgboost-*-py3-none-${TAG}.whl
|
||||||
|
|
||||||
|
# Run unit tests
|
||||||
|
cd ..
|
||||||
python -m pip install graphviz pytest pytest-cov codecov
|
python -m pip install graphviz pytest pytest-cov codecov
|
||||||
python -m pip install datatable
|
python -m pip install datatable
|
||||||
python -m pip install numpy scipy pandas matplotlib scikit-learn dask[complete]
|
python -m pip install numpy scipy pandas matplotlib scikit-learn dask[complete]
|
||||||
python -m pytest -v --fulltrace -s tests/python --cov=python-package/xgboost || exit -1
|
python -m pytest -v --fulltrace -s tests/python --cov=python-package/xgboost || exit -1
|
||||||
codecov
|
codecov
|
||||||
|
|
||||||
|
# Deploy binary wheel to S3
|
||||||
|
python -m pip install awscli
|
||||||
|
if [ "${TRAVIS_PULL_REQUEST}" != "false" ]
|
||||||
|
then
|
||||||
|
S3_DEST="s3://xgboost-nightly-builds/PR-${TRAVIS_PULL_REQUEST}/"
|
||||||
|
else
|
||||||
|
if [ "${TRAVIS_BRANCH}" == "master" ]
|
||||||
|
then
|
||||||
|
S3_DEST="s3://xgboost-nightly-builds/"
|
||||||
|
elif [ -z "${TRAVIS_TAG}" ]
|
||||||
|
then
|
||||||
|
S3_DEST="s3://xgboost-nightly-builds/${TRAVIS_BRANCH}/"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
python -m awscli s3 cp python-package/dist/*.whl "${S3_DEST}" --acl public-read || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ${TASK} == "java_test" ]; then
|
if [ ${TASK} == "java_test" ]; then
|
||||||
|
|||||||
Reference in New Issue
Block a user