Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
000292ce6d | ||
|
|
d3ec116322 | ||
|
|
a018028471 | ||
|
|
3e343159ef | ||
|
|
99e802f2ff | ||
|
|
6a29afb480 | ||
|
|
8e321adac8 | ||
|
|
d0ec65520a | ||
|
|
7aec915dcd |
@@ -1,5 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.13)
|
cmake_minimum_required(VERSION 3.13)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 1.3.1)
|
project(xgboost LANGUAGES CXX C VERSION 1.3.3)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 1.3.1.1
|
Version: 1.3.3.1
|
||||||
Date: 2020-08-28
|
Date: 2020-08-28
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
|
|||||||
@@ -55,7 +55,7 @@
|
|||||||
#endif // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
|
#endif // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
|
||||||
|
|
||||||
#if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) && \
|
#if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) && \
|
||||||
!defined(__CUDACC__)
|
!defined(__CUDACC__) && !defined(__sun) && !defined(sun)
|
||||||
#include <parallel/algorithm>
|
#include <parallel/algorithm>
|
||||||
#define XGBOOST_PARALLEL_SORT(X, Y, Z) __gnu_parallel::sort((X), (Y), (Z))
|
#define XGBOOST_PARALLEL_SORT(X, Y, Z) __gnu_parallel::sort((X), (Y), (Z))
|
||||||
#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) \
|
#define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) \
|
||||||
|
|||||||
@@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
#define XGBOOST_VER_MAJOR 1
|
#define XGBOOST_VER_MAJOR 1
|
||||||
#define XGBOOST_VER_MINOR 3
|
#define XGBOOST_VER_MINOR 3
|
||||||
#define XGBOOST_VER_PATCH 1
|
#define XGBOOST_VER_PATCH 3
|
||||||
|
|
||||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||||
|
|||||||
@@ -34,9 +34,9 @@ TO_VERSION=$2
|
|||||||
sed_i() {
|
sed_i() {
|
||||||
perl -p -000 -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
|
perl -p -000 -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
|
||||||
}
|
}
|
||||||
|
|
||||||
export -f sed_i
|
export -f sed_i
|
||||||
|
|
||||||
BASEDIR=$(dirname $0)/..
|
BASEDIR=$(dirname $0)/..
|
||||||
find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
|
find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
|
||||||
-exec bash -c \
|
-exec bash -c \
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.3.1</version>
|
<version>1.3.3</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.3.1
|
1.3.3
|
||||||
|
|||||||
@@ -1210,10 +1210,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
early_stopping_rounds=early_stopping_rounds,
|
early_stopping_rounds=early_stopping_rounds,
|
||||||
verbose=verbose)
|
verbose=verbose)
|
||||||
|
|
||||||
async def _predict_proba_async(self, data, output_margin=False,
|
async def _predict_proba_async(self, X, output_margin=False,
|
||||||
base_margin=None):
|
base_margin=None):
|
||||||
test_dmatrix = await DaskDMatrix(
|
test_dmatrix = await DaskDMatrix(
|
||||||
client=self.client, data=data, base_margin=base_margin,
|
client=self.client, data=X, base_margin=base_margin,
|
||||||
missing=self.missing
|
missing=self.missing
|
||||||
)
|
)
|
||||||
pred_probs = await predict(client=self.client,
|
pred_probs = await predict(client=self.client,
|
||||||
@@ -1223,11 +1223,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
return pred_probs
|
return pred_probs
|
||||||
|
|
||||||
# pylint: disable=arguments-differ,missing-docstring
|
# pylint: disable=arguments-differ,missing-docstring
|
||||||
def predict_proba(self, data, output_margin=False, base_margin=None):
|
def predict_proba(self, X, output_margin=False, base_margin=None):
|
||||||
_assert_dask_support()
|
_assert_dask_support()
|
||||||
return self.client.sync(
|
return self.client.sync(
|
||||||
self._predict_proba_async,
|
self._predict_proba_async,
|
||||||
data,
|
X=X,
|
||||||
output_margin=output_margin,
|
output_margin=output_margin,
|
||||||
base_margin=base_margin
|
base_margin=base_margin
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import warnings
|
import warnings
|
||||||
import json
|
import json
|
||||||
|
from typing import Optional
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
|
from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
|
||||||
from .training import train
|
from .training import train
|
||||||
@@ -494,6 +495,13 @@ class XGBModel(XGBModelBase):
|
|||||||
# Delete the attribute after load
|
# Delete the attribute after load
|
||||||
self.get_booster().set_attr(scikit_learn=None)
|
self.get_booster().set_attr(scikit_learn=None)
|
||||||
|
|
||||||
|
def _set_evaluation_result(self, evals_result: Optional[dict]) -> None:
|
||||||
|
if evals_result:
|
||||||
|
for val in evals_result.items():
|
||||||
|
evals_result_key = list(val[1].keys())[0]
|
||||||
|
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
|
||||||
|
self.evals_result_ = evals_result
|
||||||
|
|
||||||
@_deprecate_positional_args
|
@_deprecate_positional_args
|
||||||
def fit(self, X, y, *, sample_weight=None, base_margin=None,
|
def fit(self, X, y, *, sample_weight=None, base_margin=None,
|
||||||
eval_set=None, eval_metric=None, early_stopping_rounds=None,
|
eval_set=None, eval_metric=None, early_stopping_rounds=None,
|
||||||
@@ -565,13 +573,6 @@ class XGBModel(XGBModelBase):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
self.n_features_in_ = X.shape[1]
|
self.n_features_in_ = X.shape[1]
|
||||||
|
|
||||||
train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
|
|
||||||
base_margin=base_margin,
|
|
||||||
missing=self.missing,
|
|
||||||
nthread=self.n_jobs)
|
|
||||||
train_dmatrix.set_info(feature_weights=feature_weights)
|
|
||||||
|
|
||||||
evals_result = {}
|
evals_result = {}
|
||||||
|
|
||||||
train_dmatrix, evals = self._wrap_evaluation_matrices(
|
train_dmatrix, evals = self._wrap_evaluation_matrices(
|
||||||
@@ -601,12 +602,7 @@ class XGBModel(XGBModelBase):
|
|||||||
verbose_eval=verbose, xgb_model=xgb_model,
|
verbose_eval=verbose, xgb_model=xgb_model,
|
||||||
callbacks=callbacks)
|
callbacks=callbacks)
|
||||||
|
|
||||||
if evals_result:
|
self._set_evaluation_result(evals_result)
|
||||||
for val in evals_result.items():
|
|
||||||
evals_result_key = list(val[1].keys())[0]
|
|
||||||
evals_result[val[0]][evals_result_key] = val[1][
|
|
||||||
evals_result_key]
|
|
||||||
self.evals_result_ = evals_result
|
|
||||||
|
|
||||||
if early_stopping_rounds is not None:
|
if early_stopping_rounds is not None:
|
||||||
self.best_score = self._Booster.best_score
|
self.best_score = self._Booster.best_score
|
||||||
@@ -919,12 +915,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
callbacks=callbacks)
|
callbacks=callbacks)
|
||||||
|
|
||||||
self.objective = xgb_options["objective"]
|
self.objective = xgb_options["objective"]
|
||||||
if evals_result:
|
self._set_evaluation_result(evals_result)
|
||||||
for val in evals_result.items():
|
|
||||||
evals_result_key = list(val[1].keys())[0]
|
|
||||||
evals_result[val[0]][
|
|
||||||
evals_result_key] = val[1][evals_result_key]
|
|
||||||
self.evals_result_ = evals_result
|
|
||||||
|
|
||||||
if early_stopping_rounds is not None:
|
if early_stopping_rounds is not None:
|
||||||
self.best_score = self._Booster.best_score
|
self.best_score = self._Booster.best_score
|
||||||
@@ -995,10 +986,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
return self._le.inverse_transform(column_indexes)
|
return self._le.inverse_transform(column_indexes)
|
||||||
return column_indexes
|
return column_indexes
|
||||||
|
|
||||||
def predict_proba(self, data, ntree_limit=None, validate_features=False,
|
def predict_proba(self, X, ntree_limit=None, validate_features=False,
|
||||||
base_margin=None):
|
base_margin=None):
|
||||||
"""
|
""" Predict the probability of each `X` example being of a given class.
|
||||||
Predict the probability of each `data` example being of a given class.
|
|
||||||
|
|
||||||
.. note:: This function is not thread safe
|
.. note:: This function is not thread safe
|
||||||
|
|
||||||
@@ -1008,21 +998,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
data : array_like
|
X : array_like
|
||||||
Feature matrix.
|
Feature matrix.
|
||||||
ntree_limit : int
|
ntree_limit : int
|
||||||
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
|
Limit number of trees in the prediction; defaults to best_ntree_limit if
|
||||||
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
|
defined (i.e. it has been trained with early stopping), otherwise 0 (use all
|
||||||
|
trees).
|
||||||
validate_features : bool
|
validate_features : bool
|
||||||
When this is True, validate that the Booster's and data's feature_names are identical.
|
When this is True, validate that the Booster's and data's feature_names are
|
||||||
Otherwise, it is assumed that the feature_names are the same.
|
identical. Otherwise, it is assumed that the feature_names are the same.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
prediction : numpy array
|
prediction : numpy array
|
||||||
a numpy array with the probability of each data example being of a given class.
|
a numpy array with the probability of each data example being of a given class.
|
||||||
"""
|
"""
|
||||||
test_dmatrix = DMatrix(data, base_margin=base_margin,
|
test_dmatrix = DMatrix(X, base_margin=base_margin,
|
||||||
missing=self.missing, nthread=self.n_jobs)
|
missing=self.missing, nthread=self.n_jobs)
|
||||||
if ntree_limit is None:
|
if ntree_limit is None:
|
||||||
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
||||||
@@ -1328,12 +1319,7 @@ class XGBRanker(XGBModel):
|
|||||||
|
|
||||||
self.objective = params["objective"]
|
self.objective = params["objective"]
|
||||||
|
|
||||||
if evals_result:
|
self._set_evaluation_result(evals_result)
|
||||||
for val in evals_result.items():
|
|
||||||
evals_result_key = list(val[1].keys())[0]
|
|
||||||
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
|
|
||||||
self.evals_result = evals_result
|
|
||||||
|
|
||||||
if early_stopping_rounds is not None:
|
if early_stopping_rounds is not None:
|
||||||
self.best_score = self._Booster.best_score
|
self.best_score = self._Booster.best_score
|
||||||
self.best_iteration = self._Booster.best_iteration
|
self.best_iteration = self._Booster.best_iteration
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
"""Training Library containing training routines."""
|
"""Training Library containing training routines."""
|
||||||
import warnings
|
import warnings
|
||||||
import copy
|
import copy
|
||||||
|
import json
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .core import Booster, XGBoostError
|
from .core import Booster, XGBoostError
|
||||||
@@ -123,7 +124,26 @@ def _train_internal(params, dtrain,
|
|||||||
bst.best_iteration = int(bst.attr('best_iteration'))
|
bst.best_iteration = int(bst.attr('best_iteration'))
|
||||||
else:
|
else:
|
||||||
bst.best_iteration = nboost - 1
|
bst.best_iteration = nboost - 1
|
||||||
|
|
||||||
|
config = json.loads(bst.save_config())
|
||||||
|
booster = config['learner']['gradient_booster']['name']
|
||||||
|
if booster == 'gblinear':
|
||||||
|
num_parallel_tree = 0
|
||||||
|
elif booster == 'dart':
|
||||||
|
num_parallel_tree = int(
|
||||||
|
config['learner']['gradient_booster']['gbtree']['gbtree_train_param'][
|
||||||
|
'num_parallel_tree'
|
||||||
|
]
|
||||||
|
)
|
||||||
|
elif booster == 'gbtree':
|
||||||
|
num_parallel_tree = int(
|
||||||
|
config['learner']['gradient_booster']['gbtree_train_param'][
|
||||||
|
'num_parallel_tree']
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Unknown booster: {booster}')
|
||||||
bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree
|
bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree
|
||||||
|
|
||||||
# Copy to serialise and unserialise booster to reset state and free
|
# Copy to serialise and unserialise booster to reset state and free
|
||||||
# training memory
|
# training memory
|
||||||
return bst.copy()
|
return bst.copy()
|
||||||
@@ -162,9 +182,10 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
|||||||
If there's more than one metric in the **eval_metric** parameter given in
|
If there's more than one metric in the **eval_metric** parameter given in
|
||||||
**params**, the last metric will be used for early stopping.
|
**params**, the last metric will be used for early stopping.
|
||||||
If early stopping occurs, the model will have three additional fields:
|
If early stopping occurs, the model will have three additional fields:
|
||||||
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
|
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``. Use
|
||||||
(Use ``bst.best_ntree_limit`` to get the correct value if
|
``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
|
||||||
``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
|
``num_class`` appears in the parameters. ``best_ntree_limit`` is the result of
|
||||||
|
``num_parallel_tree * best_iteration``.
|
||||||
evals_result: dict
|
evals_result: dict
|
||||||
This dictionary stores the evaluation results of all the items in watchlist.
|
This dictionary stores the evaluation results of all the items in watchlist.
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,10 @@
|
|||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
#if defined(__sun) || defined(sun)
|
||||||
|
#include <sys/sockio.h>
|
||||||
|
#endif // defined(__sun) || defined(sun)
|
||||||
|
|
||||||
#endif // defined(_WIN32)
|
#endif // defined(_WIN32)
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|||||||
@@ -10,10 +10,6 @@ namespace xgboost {
|
|||||||
namespace gbm {
|
namespace gbm {
|
||||||
|
|
||||||
void GBLinearModel::SaveModel(Json* p_out) const {
|
void GBLinearModel::SaveModel(Json* p_out) const {
|
||||||
using WeightType = std::remove_reference<decltype(std::declval<decltype(weight)>().back())>::type;
|
|
||||||
using JsonFloat = Number::Float;
|
|
||||||
static_assert(std::is_same<WeightType, JsonFloat>::value,
|
|
||||||
"Weight type should be of the same type with JSON float");
|
|
||||||
auto& out = *p_out;
|
auto& out = *p_out;
|
||||||
|
|
||||||
size_t const n_weights = weight.size();
|
size_t const n_weights = weight.size();
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ dependencies:
|
|||||||
- scikit-learn
|
- scikit-learn
|
||||||
- pandas
|
- pandas
|
||||||
- pytest
|
- pytest
|
||||||
- python-graphviz
|
|
||||||
- boto3
|
- boto3
|
||||||
- hypothesis
|
- hypothesis
|
||||||
- jsonschema
|
- jsonschema
|
||||||
@@ -17,3 +16,4 @@ dependencies:
|
|||||||
- pip:
|
- pip:
|
||||||
- cupy-cuda101
|
- cupy-cuda101
|
||||||
- modin[all]
|
- modin[all]
|
||||||
|
- graphviz
|
||||||
|
|||||||
@@ -5,8 +5,10 @@ import numpy as np
|
|||||||
import asyncio
|
import asyncio
|
||||||
import xgboost
|
import xgboost
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import hypothesis
|
||||||
from hypothesis import given, strategies, settings, note
|
from hypothesis import given, strategies, settings, note
|
||||||
from hypothesis._settings import duration
|
from hypothesis._settings import duration
|
||||||
|
from hypothesis import HealthCheck
|
||||||
from test_gpu_updaters import parameter_strategy
|
from test_gpu_updaters import parameter_strategy
|
||||||
|
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
@@ -19,6 +21,11 @@ from test_with_dask import _get_client_workers # noqa
|
|||||||
from test_with_dask import generate_array # noqa
|
from test_with_dask import generate_array # noqa
|
||||||
import testing as tm # noqa
|
import testing as tm # noqa
|
||||||
|
|
||||||
|
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||||
|
suppress = [HealthCheck.function_scoped_fixture]
|
||||||
|
else:
|
||||||
|
suppress = hypothesis.utils.conventions.not_set
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import dask.dataframe as dd
|
import dask.dataframe as dd
|
||||||
@@ -161,19 +168,24 @@ class TestDistributedGPU:
|
|||||||
run_with_dask_dataframe(dxgb.DaskDMatrix, client)
|
run_with_dask_dataframe(dxgb.DaskDMatrix, client)
|
||||||
run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
|
run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
|
||||||
|
|
||||||
@given(params=parameter_strategy, num_rounds=strategies.integers(1, 20),
|
@given(
|
||||||
dataset=tm.dataset_strategy)
|
params=parameter_strategy,
|
||||||
@settings(deadline=duration(seconds=120))
|
num_rounds=strategies.integers(1, 20),
|
||||||
|
dataset=tm.dataset_strategy,
|
||||||
|
)
|
||||||
|
@settings(deadline=duration(seconds=120), suppress_health_check=suppress)
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||||
@pytest.mark.parametrize('local_cuda_cluster', [{'n_workers': 2}], indirect=['local_cuda_cluster'])
|
@pytest.mark.parametrize(
|
||||||
|
"local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
|
||||||
|
)
|
||||||
@pytest.mark.mgpu
|
@pytest.mark.mgpu
|
||||||
def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
|
def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
|
||||||
with Client(local_cuda_cluster) as client:
|
with Client(local_cuda_cluster) as client:
|
||||||
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
|
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client)
|
||||||
client)
|
run_gpu_hist(
|
||||||
run_gpu_hist(params, num_rounds, dataset,
|
params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client
|
||||||
dxgb.DaskDeviceQuantileDMatrix, client)
|
)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
|
|||||||
@@ -33,9 +33,15 @@ def run_predict_leaf(predictor):
|
|||||||
y = rng.randint(low=0, high=classes, size=rows)
|
y = rng.randint(low=0, high=classes, size=rows)
|
||||||
m = xgb.DMatrix(X, y)
|
m = xgb.DMatrix(X, y)
|
||||||
booster = xgb.train(
|
booster = xgb.train(
|
||||||
{'num_parallel_tree': num_parallel_tree, 'num_class': classes,
|
{
|
||||||
'predictor': predictor, 'tree_method': 'hist'}, m,
|
"num_parallel_tree": num_parallel_tree,
|
||||||
num_boost_round=num_boost_round)
|
"num_class": classes,
|
||||||
|
"predictor": predictor,
|
||||||
|
"tree_method": "hist",
|
||||||
|
},
|
||||||
|
m,
|
||||||
|
num_boost_round=num_boost_round,
|
||||||
|
)
|
||||||
|
|
||||||
empty = xgb.DMatrix(np.ones(shape=(0, cols)))
|
empty = xgb.DMatrix(np.ones(shape=(0, cols)))
|
||||||
empty_leaf = booster.predict(empty, pred_leaf=True)
|
empty_leaf = booster.predict(empty, pred_leaf=True)
|
||||||
@@ -52,12 +58,19 @@ def run_predict_leaf(predictor):
|
|||||||
end = classes * num_parallel_tree * (j + 1)
|
end = classes * num_parallel_tree * (j + 1)
|
||||||
layer = row[start: end]
|
layer = row[start: end]
|
||||||
for c in range(classes):
|
for c in range(classes):
|
||||||
tree_group = layer[c * num_parallel_tree:
|
tree_group = layer[c * num_parallel_tree: (c + 1) * num_parallel_tree]
|
||||||
(c+1) * num_parallel_tree]
|
|
||||||
assert tree_group.shape[0] == num_parallel_tree
|
assert tree_group.shape[0] == num_parallel_tree
|
||||||
# no subsampling so tree in same forest should output same
|
# no subsampling so tree in same forest should output same
|
||||||
# leaf.
|
# leaf.
|
||||||
assert np.all(tree_group == tree_group[0])
|
assert np.all(tree_group == tree_group[0])
|
||||||
|
|
||||||
|
ntree_limit = 2
|
||||||
|
sliced = booster.predict(
|
||||||
|
m, pred_leaf=True, ntree_limit=num_parallel_tree * ntree_limit
|
||||||
|
)
|
||||||
|
first = sliced[0, ...]
|
||||||
|
|
||||||
|
assert first.shape[0] == classes * num_parallel_tree * ntree_limit
|
||||||
return leaf
|
return leaf
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,8 @@ import asyncio
|
|||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from hypothesis import given, settings, note
|
import hypothesis
|
||||||
|
from hypothesis import given, settings, note, HealthCheck
|
||||||
from test_updaters import hist_parameter_strategy, exact_parameter_strategy
|
from test_updaters import hist_parameter_strategy, exact_parameter_strategy
|
||||||
|
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
@@ -17,6 +18,12 @@ if tm.no_dask()['condition']:
|
|||||||
pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
|
pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
|
||||||
|
|
||||||
|
|
||||||
|
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||||
|
suppress = [HealthCheck.function_scoped_fixture]
|
||||||
|
else:
|
||||||
|
suppress = hypothesis.utils.conventions.not_set
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from distributed import LocalCluster, Client, get_client
|
from distributed import LocalCluster, Client, get_client
|
||||||
from distributed.utils_test import client, loop, cluster_fixture
|
from distributed.utils_test import client, loop, cluster_fixture
|
||||||
@@ -668,14 +675,14 @@ class TestWithDask:
|
|||||||
|
|
||||||
@given(params=hist_parameter_strategy,
|
@given(params=hist_parameter_strategy,
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None)
|
@settings(deadline=None, suppress_health_check=suppress)
|
||||||
def test_hist(self, params, dataset, client):
|
def test_hist(self, params, dataset, client):
|
||||||
num_rounds = 30
|
num_rounds = 30
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
||||||
|
|
||||||
@given(params=exact_parameter_strategy,
|
@given(params=exact_parameter_strategy,
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None)
|
@settings(deadline=None, suppress_health_check=suppress)
|
||||||
def test_approx(self, client, params, dataset):
|
def test_approx(self, client, params, dataset):
|
||||||
num_rounds = 30
|
num_rounds = 30
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
||||||
@@ -795,7 +802,6 @@ class TestDaskCallbacks:
|
|||||||
merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
|
merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
|
||||||
assert len(merged) == 2
|
assert len(merged) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_data_initialization(self):
|
def test_data_initialization(self):
|
||||||
'''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
|
'''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
|
||||||
generate unnecessary copies of data.
|
generate unnecessary copies of data.
|
||||||
|
|||||||
@@ -78,6 +78,34 @@ def test_multiclass_classification():
|
|||||||
check_pred(preds4, labels, output_margin=False)
|
check_pred(preds4, labels, output_margin=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_best_ntree_limit():
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
|
||||||
|
X, y = load_iris(return_X_y=True)
|
||||||
|
|
||||||
|
def train(booster, forest):
|
||||||
|
rounds = 4
|
||||||
|
cls = xgb.XGBClassifier(
|
||||||
|
n_estimators=rounds, num_parallel_tree=forest, booster=booster
|
||||||
|
).fit(
|
||||||
|
X, y, eval_set=[(X, y)], early_stopping_rounds=3
|
||||||
|
)
|
||||||
|
|
||||||
|
if forest:
|
||||||
|
assert cls.best_ntree_limit == rounds * forest
|
||||||
|
else:
|
||||||
|
assert cls.best_ntree_limit == 0
|
||||||
|
|
||||||
|
# best_ntree_limit is used by default, assert that under gblinear it's
|
||||||
|
# automatically ignored due to being 0.
|
||||||
|
cls.predict(X)
|
||||||
|
|
||||||
|
num_parallel_tree = 4
|
||||||
|
train('gbtree', num_parallel_tree)
|
||||||
|
train('dart', num_parallel_tree)
|
||||||
|
train('gblinear', None)
|
||||||
|
|
||||||
|
|
||||||
def test_ranking():
|
def test_ranking():
|
||||||
# generate random data
|
# generate random data
|
||||||
x_train = np.random.rand(1000, 10)
|
x_train = np.random.rand(1000, 10)
|
||||||
@@ -94,6 +122,8 @@ def test_ranking():
|
|||||||
model = xgb.sklearn.XGBRanker(**params)
|
model = xgb.sklearn.XGBRanker(**params)
|
||||||
model.fit(x_train, y_train, group=train_group,
|
model.fit(x_train, y_train, group=train_group,
|
||||||
eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
|
eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
|
||||||
|
assert model.evals_result()
|
||||||
|
|
||||||
pred = model.predict(x_test)
|
pred = model.predict(x_test)
|
||||||
|
|
||||||
train_data = xgb.DMatrix(x_train, y_train)
|
train_data = xgb.DMatrix(x_train, y_train)
|
||||||
|
|||||||
Reference in New Issue
Block a user