Compare commits

..

4 Commits

Author SHA1 Message Date
Hyunsu Cho
f5d4fddafe Release 1.1.0 2020-05-17 00:26:22 -07:00
Jiaming Yuan
66690f3d07 Add JSON schema to model dump. (#5660) 2020-05-15 12:26:49 +08:00
Rory Mitchell
c42f533ae9 Resolve vector<bool>::iterator crash (#5642) 2020-05-11 18:14:41 +08:00
Philip Hyunsu Cho
751160b69c Upgrade to CUDA 10.0 (#5649)
Co-authored-by: fis <jm.yuan@outlook.com>
2020-05-11 18:04:47 +08:00
14 changed files with 167 additions and 69 deletions

11
Jenkinsfile vendored
View File

@@ -64,7 +64,6 @@ pipeline {
'build-cpu': { BuildCPU() },
'build-cpu-rabit-mock': { BuildCPUMock() },
'build-cpu-non-omp': { BuildCPUNonOmp() },
'build-gpu-cuda9.0': { BuildCUDA(cuda_version: '9.0') },
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
@@ -251,10 +250,10 @@ def BuildCUDA(args) {
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python3 tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
"""
// Stash wheel for CUDA 9.0 target
if (args.cuda_version == '9.0') {
// Stash wheel for CUDA 10.0 target
if (args.cuda_version == '10.0') {
echo 'Stashing Python wheel...'
stash name: 'xgboost_whl_cuda9', includes: 'python-package/dist/*.whl'
stash name: 'xgboost_whl_cuda10', includes: 'python-package/dist/*.whl'
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
echo 'Stashing C++ test executable (testxgboost)...'
@@ -298,7 +297,7 @@ def BuildJVMDoc() {
def TestPythonCPU() {
node('linux && cpu') {
unstash name: 'xgboost_whl_cuda9'
unstash name: 'xgboost_whl_cuda10'
unstash name: 'srcs'
unstash name: 'xgboost_cli'
echo "Test Python CPU"
@@ -315,7 +314,7 @@ def TestPythonCPU() {
def TestPythonGPU(args) {
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
node(nodeReq) {
unstash name: 'xgboost_whl_cuda9'
unstash name: 'xgboost_whl_cuda10'
unstash name: 'srcs'
echo "Test Python GPU: CUDA ${args.cuda_version}"
def container_type = "gpu"

View File

@@ -1 +1 @@
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@rc2
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@

55
doc/dump.schema Normal file
View File

@@ -0,0 +1,55 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"split_node": {
"type": "object",
"properties": {
"nodeid": {
"type": "number",
"minimum": 0
},
"depth": {
"type": "number",
"minimum": 0
},
"yes": {
"type": "number",
"minimum": 0
},
"no": {
"type": "number",
"minimum": 0
},
"split": {
"type": "string"
},
"children": {
"type": "array",
"items": {
"oneOf": [
{"$ref": "#/definitions/split_node"},
{"$ref": "#/definitions/leaf_node"}
]
},
"maxItems": 2
}
},
"required": ["nodeid", "depth", "yes", "no", "split", "children"]
},
"leaf_node": {
"type": "object",
"properties": {
"nodeid": {
"type": "number",
"minimum": 0
},
"leaf": {
"type": "number"
}
},
"required": ["nodeid", "leaf"]
}
},
"type": "object",
"$ref": "#/definitions/split_node"
}

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</parent>
<artifactId>xgboost4j-example_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
<packaging>jar</packaging>
<build>
<plugins>
@@ -26,7 +26,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</parent>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
<build>
<plugins>
<plugin>
@@ -26,7 +26,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</parent>
<artifactId>xgboost4j-spark_2.12</artifactId>
<build>
@@ -24,7 +24,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
</parent>
<artifactId>xgboost4j_2.12</artifactId>
<version>1.1.0-RC2</version>
<version>1.1.0</version>
<packaging>jar</packaging>
<dependencies>

View File

@@ -1 +1 @@
1.1.0rc2
1.1.0

View File

@@ -82,14 +82,16 @@ template <typename BinIdxType>
class DenseColumn: public Column<BinIdxType> {
public:
DenseColumn(ColumnType type, common::Span<const BinIdxType> index,
uint32_t index_base,
const std::vector<bool>::const_iterator missing_flags)
uint32_t index_base, const std::vector<bool>& missing_flags,
size_t feature_offset)
: Column<BinIdxType>(type, index, index_base),
missing_flags_(missing_flags) {}
bool IsMissing(size_t idx) const { return missing_flags_[idx]; }
missing_flags_(missing_flags),
feature_offset_(feature_offset) {}
bool IsMissing(size_t idx) const { return missing_flags_[feature_offset_ + idx]; }
private:
/* flags for missing values in dense columns */
std::vector<bool>::const_iterator missing_flags_;
const std::vector<bool>& missing_flags_;
size_t feature_offset_;
};
/*! \brief a collection of columns, with support for construction from
@@ -208,10 +210,8 @@ class ColumnMatrix {
column_size };
std::unique_ptr<const Column<BinIdxType> > res;
if (type_[fid] == ColumnType::kDenseColumn) {
std::vector<bool>::const_iterator column_iterator = missing_flags_.begin();
advance(column_iterator, feature_offset); // increment iterator to right position
res.reset(new DenseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
column_iterator));
missing_flags_, feature_offset));
} else {
res.reset(new SparseColumn<BinIdxType>(type_[fid], bin_index, index_base_[fid],
{&row_ind_[feature_offset], column_size}));

View File

@@ -68,20 +68,20 @@ class TreeGenerator {
return result;
}
virtual std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) {
virtual std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const {
return "";
}
virtual std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) {
virtual std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const {
return "";
}
virtual std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) {
virtual std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const {
return "";
}
virtual std::string NodeStat(RegTree const& tree, int32_t nid) {
virtual std::string NodeStat(RegTree const& tree, int32_t nid) const {
return "";
}
virtual std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
virtual std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const = 0;
virtual std::string SplitNode(RegTree const& tree, int32_t nid, uint32_t depth) {
auto const split_index = tree[nid].SplitIndex();
@@ -110,7 +110,7 @@ class TreeGenerator {
return result;
}
virtual std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
virtual std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const = 0;
virtual std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
public:
@@ -181,7 +181,7 @@ class TextGenerator : public TreeGenerator {
TextGenerator(FeatureMap const& fmap, std::string const& attrs, bool with_stats) :
TreeGenerator(fmap, with_stats) {}
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
static std::string kLeafTemplate = "{tabs}{nid}:leaf={leaf}{stats}";
static std::string kStatTemplate = ",cover={cover}";
std::string result = SuperT::Match(
@@ -195,7 +195,7 @@ class TextGenerator : public TreeGenerator {
return result;
}
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const override {
static std::string const kIndicatorTemplate = "{nid}:[{fname}] yes={yes},no={no}";
int32_t nyes = tree[nid].DefaultLeft() ?
tree[nid].RightChild() : tree[nid].LeftChild();
@@ -211,7 +211,7 @@ class TextGenerator : public TreeGenerator {
std::string SplitNodeImpl(
RegTree const& tree, int32_t nid, std::string const& template_str,
std::string cond, uint32_t depth) {
std::string cond, uint32_t depth) const {
auto split_index = tree[nid].SplitIndex();
std::string const result = SuperT::Match(
template_str,
@@ -226,7 +226,7 @@ class TextGenerator : public TreeGenerator {
return result;
}
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
static std::string const kIntegerTemplate =
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
auto cond = tree[nid].SplitCond();
@@ -238,21 +238,21 @@ class TextGenerator : public TreeGenerator {
std::to_string(integer_threshold), depth);
}
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
static std::string const kQuantitiveTemplate =
"{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
auto cond = tree[nid].SplitCond();
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
}
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
auto cond = tree[nid].SplitCond();
static std::string const kNodeTemplate =
"{tabs}{nid}:[f{fname}<{cond}] yes={left},no={right},missing={missing}";
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
}
std::string NodeStat(RegTree const& tree, int32_t nid) override {
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
std::string const result = SuperT::Match(
kStatTemplate,
@@ -297,7 +297,7 @@ class JsonGenerator : public TreeGenerator {
JsonGenerator(FeatureMap const& fmap, std::string attrs, bool with_stats) :
TreeGenerator(fmap, with_stats) {}
std::string Indent(uint32_t depth) {
std::string Indent(uint32_t depth) const {
std::string result;
for (uint32_t i = 0; i < depth + 1; ++i) {
result += " ";
@@ -305,7 +305,7 @@ class JsonGenerator : public TreeGenerator {
return result;
}
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
static std::string const kLeafTemplate =
R"L({ "nodeid": {nid}, "leaf": {leaf} {stat}})L";
static std::string const kStatTemplate =
@@ -321,11 +321,11 @@ class JsonGenerator : public TreeGenerator {
return result;
}
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string Indicator(RegTree const& tree, int32_t nid, uint32_t depth) const override {
int32_t nyes = tree[nid].DefaultLeft() ?
tree[nid].RightChild() : tree[nid].LeftChild();
static std::string const kIndicatorTemplate =
R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no}})ID";
R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no})ID";
auto split_index = tree[nid].SplitIndex();
auto result = SuperT::Match(
kIndicatorTemplate,
@@ -337,8 +337,9 @@ class JsonGenerator : public TreeGenerator {
return result;
}
std::string SplitNodeImpl(RegTree const& tree, int32_t nid,
std::string const& template_str, std::string cond, uint32_t depth) {
std::string SplitNodeImpl(RegTree const &tree, int32_t nid,
std::string const &template_str, std::string cond,
uint32_t depth) const {
auto split_index = tree[nid].SplitIndex();
std::string const result = SuperT::Match(
template_str,
@@ -353,7 +354,7 @@ class JsonGenerator : public TreeGenerator {
return result;
}
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
auto cond = tree[nid].SplitCond();
const bst_float floored = std::floor(cond);
const int32_t integer_threshold
@@ -367,7 +368,7 @@ class JsonGenerator : public TreeGenerator {
std::to_string(integer_threshold), depth);
}
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
static std::string const kQuantitiveTemplate =
R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
@@ -376,7 +377,7 @@ class JsonGenerator : public TreeGenerator {
return SplitNodeImpl(tree, nid, kQuantitiveTemplate, SuperT::ToStr(cond), depth);
}
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
auto cond = tree[nid].SplitCond();
static std::string const kNodeTemplate =
R"I( "nodeid": {nid}, "depth": {depth}, "split": {fname}, )I"
@@ -385,7 +386,7 @@ class JsonGenerator : public TreeGenerator {
return SplitNodeImpl(tree, nid, kNodeTemplate, SuperT::ToStr(cond), depth);
}
std::string NodeStat(RegTree const& tree, int32_t nid) override {
std::string NodeStat(RegTree const& tree, int32_t nid) const override {
static std::string kStatTemplate =
R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
auto result = SuperT::Match(
@@ -529,7 +530,7 @@ class GraphvizGenerator : public TreeGenerator {
protected:
// Only indicator is different, so we combine all different node types into this
// function.
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
auto split = tree[nid].SplitIndex();
auto cond = tree[nid].SplitCond();
static std::string const kNodeTemplate =
@@ -563,7 +564,7 @@ class GraphvizGenerator : public TreeGenerator {
return result;
};
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
static std::string const kLeafTemplate =
" {nid} [ label=\"leaf={leaf-value}\" {params}]\n";
auto result = SuperT::Match(kLeafTemplate, {

View File

@@ -151,6 +151,10 @@ TEST(Tree, DumpJson) {
str = tree.DumpModel(fmap, false, "json");
ASSERT_EQ(str.find("cover"), std::string::npos);
auto j_tree = Json::Load({str.c_str(), str.size()});
ASSERT_EQ(get<Array>(j_tree["children"]).size(), 2);
}
TEST(Tree, DumpText) {

View File

@@ -12,25 +12,15 @@ rng = np.random.RandomState(1994)
class TestGPUBasicModels(unittest.TestCase):
cputest = test_bm.TestModels()
def test_eta_decay_gpu_hist(self):
self.cputest.run_eta_decay('gpu_hist')
def test_deterministic_gpu_hist(self):
kRows = 1000
kCols = 64
kClasses = 4
# Create large values to force rounding.
X = np.random.randn(kRows, kCols) * 1e4
y = np.random.randint(0, kClasses, size=kRows)
def run_cls(self, X, y, deterministic):
cls = xgb.XGBClassifier(tree_method='gpu_hist',
deterministic_histogram=True,
deterministic_histogram=deterministic,
single_precision_histogram=True)
cls.fit(X, y)
cls.get_booster().save_model('test_deterministic_gpu_hist-0.json')
cls = xgb.XGBClassifier(tree_method='gpu_hist',
deterministic_histogram=True,
deterministic_histogram=deterministic,
single_precision_histogram=True)
cls.fit(X, y)
cls.get_booster().save_model('test_deterministic_gpu_hist-1.json')
@@ -40,7 +30,24 @@ class TestGPUBasicModels(unittest.TestCase):
with open('test_deterministic_gpu_hist-1.json', 'r') as fd:
model_1 = fd.read()
assert hash(model_0) == hash(model_1)
os.remove('test_deterministic_gpu_hist-0.json')
os.remove('test_deterministic_gpu_hist-1.json')
return hash(model_0), hash(model_1)
def test_eta_decay_gpu_hist(self):
self.cputest.run_eta_decay('gpu_hist')
def test_deterministic_gpu_hist(self):
kRows = 1000
kCols = 64
kClasses = 4
# Create large values to force rounding.
X = np.random.randn(kRows, kCols) * 1e4
y = np.random.randint(0, kClasses, size=kRows) * 1e4
model_0, model_1 = self.run_cls(X, y, True)
assert model_0 == model_1
model_0, model_1 = self.run_cls(X, y, False)
assert model_0 != model_1

View File

@@ -325,7 +325,7 @@ class TestModels(unittest.TestCase):
assert locale.getpreferredencoding(False) == loc
@pytest.mark.skipif(**tm.no_json_schema())
def test_json_schema(self):
def test_json_io_schema(self):
import jsonschema
model_path = 'test_json_schema.json'
path = os.path.dirname(
@@ -342,3 +342,35 @@ class TestModels(unittest.TestCase):
jsonschema.validate(instance=json_model(model_path, parameters),
schema=schema)
os.remove(model_path)
@pytest.mark.skipif(**tm.no_json_schema())
def test_json_dump_schema(self):
import jsonschema
def validate_model(parameters):
X = np.random.random((100, 30))
y = np.random.randint(0, 4, size=(100,))
parameters['num_class'] = 4
m = xgb.DMatrix(X, y)
booster = xgb.train(parameters, m)
dump = booster.get_dump(dump_format='json')
for i in range(len(dump)):
jsonschema.validate(instance=json.loads(dump[i]),
schema=schema)
path = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
doc = os.path.join(path, 'doc', 'dump.schema')
with open(doc, 'r') as fd:
schema = json.load(fd)
parameters = {'tree_method': 'hist', 'booster': 'gbtree',
'objective': 'multi:softmax'}
validate_model(parameters)
parameters = {'tree_method': 'hist', 'booster': 'dart',
'objective': 'multi:softmax'}
validate_model(parameters)