Support multi-class with base margin. (#7381)
This is already partially supported but never properly tested. So the only possible way to use it is calling `numpy.ndarray.flatten` with `base_margin` before passing it into XGBoost. This PR adds proper support for most of the data types along with tests.
This commit is contained in:
parent
6295dc3b67
commit
a13321148a
@ -577,7 +577,7 @@ class DMatrix: # pylint: disable=too-many-instance-attributes
|
|||||||
|
|
||||||
# force into void_p, mac need to pass things in as void_p
|
# force into void_p, mac need to pass things in as void_p
|
||||||
if data is None:
|
if data is None:
|
||||||
self.handle = None
|
self.handle: Optional[ctypes.c_void_p] = None
|
||||||
return
|
return
|
||||||
|
|
||||||
from .data import dispatch_data_backend, _is_iter
|
from .data import dispatch_data_backend, _is_iter
|
||||||
|
|||||||
@ -1432,9 +1432,7 @@ def inplace_predict( # pylint: disable=unused-argument
|
|||||||
Value in the input data which needs to be present as a missing
|
Value in the input data which needs to be present as a missing
|
||||||
value. If None, defaults to np.nan.
|
value. If None, defaults to np.nan.
|
||||||
base_margin:
|
base_margin:
|
||||||
See :py:obj:`xgboost.DMatrix` for details. Right now classifier is not well
|
See :py:obj:`xgboost.DMatrix` for details.
|
||||||
supported with base_margin as it requires the size of base margin to be `n_classes
|
|
||||||
* n_samples`.
|
|
||||||
|
|
||||||
.. versionadded:: 1.4.0
|
.. versionadded:: 1.4.0
|
||||||
|
|
||||||
|
|||||||
@ -18,6 +18,11 @@ c_bst_ulong = ctypes.c_uint64 # pylint: disable=invalid-name
|
|||||||
|
|
||||||
CAT_T = "c"
|
CAT_T = "c"
|
||||||
|
|
||||||
|
# meta info that can be a matrix instead of vector.
|
||||||
|
# For now it's base_margin for multi-class, but it can be extended to label once we have
|
||||||
|
# multi-output.
|
||||||
|
_matrix_meta = {"base_margin"}
|
||||||
|
|
||||||
|
|
||||||
def _warn_unused_missing(data, missing):
|
def _warn_unused_missing(data, missing):
|
||||||
if (missing is not None) and (not np.isnan(missing)):
|
if (missing is not None) and (not np.isnan(missing)):
|
||||||
@ -217,7 +222,7 @@ _pandas_dtype_mapper = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _invalid_dataframe_dtype(data) -> None:
|
def _invalid_dataframe_dtype(data: Any) -> None:
|
||||||
# pandas series has `dtypes` but it's just a single object
|
# pandas series has `dtypes` but it's just a single object
|
||||||
# cudf series doesn't have `dtypes`.
|
# cudf series doesn't have `dtypes`.
|
||||||
if hasattr(data, "dtypes") and hasattr(data.dtypes, "__iter__"):
|
if hasattr(data, "dtypes") and hasattr(data.dtypes, "__iter__"):
|
||||||
@ -291,7 +296,7 @@ def _transform_pandas_df(
|
|||||||
else:
|
else:
|
||||||
transformed = data
|
transformed = data
|
||||||
|
|
||||||
if meta and len(data.columns) > 1:
|
if meta and len(data.columns) > 1 and meta not in _matrix_meta:
|
||||||
raise ValueError(f"DataFrame for {meta} cannot have multiple columns")
|
raise ValueError(f"DataFrame for {meta} cannot have multiple columns")
|
||||||
|
|
||||||
dtype = meta_type if meta_type else np.float32
|
dtype = meta_type if meta_type else np.float32
|
||||||
@ -323,6 +328,18 @@ def _is_pandas_series(data):
|
|||||||
return isinstance(data, pd.Series)
|
return isinstance(data, pd.Series)
|
||||||
|
|
||||||
|
|
||||||
|
def _meta_from_pandas_series(
|
||||||
|
data, name: str, dtype: Optional[str], handle: ctypes.c_void_p
|
||||||
|
) -> None:
|
||||||
|
"""Help transform pandas series for meta data like labels"""
|
||||||
|
data = data.values.astype('float')
|
||||||
|
from pandas.api.types import is_sparse
|
||||||
|
if is_sparse(data):
|
||||||
|
data = data.to_dense()
|
||||||
|
assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
|
||||||
|
_meta_from_numpy(data, name, dtype, handle)
|
||||||
|
|
||||||
|
|
||||||
def _is_modin_series(data):
|
def _is_modin_series(data):
|
||||||
try:
|
try:
|
||||||
import modin.pandas as pd
|
import modin.pandas as pd
|
||||||
@ -374,9 +391,9 @@ def _transform_dt_df(
|
|||||||
):
|
):
|
||||||
"""Validate feature names and types if data table"""
|
"""Validate feature names and types if data table"""
|
||||||
if meta and data.shape[1] > 1:
|
if meta and data.shape[1] > 1:
|
||||||
raise ValueError(
|
raise ValueError('DataTable for meta info cannot have multiple columns')
|
||||||
'DataTable for label or weight cannot have multiple columns')
|
|
||||||
if meta:
|
if meta:
|
||||||
|
meta_type = "float" if meta_type is None else meta_type
|
||||||
# below requires new dt version
|
# below requires new dt version
|
||||||
# extract first column
|
# extract first column
|
||||||
data = data.to_numpy()[:, 0].astype(meta_type)
|
data = data.to_numpy()[:, 0].astype(meta_type)
|
||||||
@ -820,19 +837,27 @@ def _to_data_type(dtype: str, name: str):
|
|||||||
return dtype_map[dtype]
|
return dtype_map[dtype]
|
||||||
|
|
||||||
|
|
||||||
def _validate_meta_shape(data, name: str) -> None:
|
def _validate_meta_shape(data: Any, name: str) -> None:
|
||||||
if hasattr(data, "shape"):
|
if hasattr(data, "shape"):
|
||||||
|
msg = f"Invalid shape: {data.shape} for {name}"
|
||||||
|
if name in _matrix_meta:
|
||||||
|
if len(data.shape) > 2:
|
||||||
|
raise ValueError(msg)
|
||||||
|
return
|
||||||
|
|
||||||
if len(data.shape) > 2 or (
|
if len(data.shape) > 2 or (
|
||||||
len(data.shape) == 2 and (data.shape[1] != 0 and data.shape[1] != 1)
|
len(data.shape) == 2 and (data.shape[1] != 0 and data.shape[1] != 1)
|
||||||
):
|
):
|
||||||
raise ValueError(f"Invalid shape: {data.shape} for {name}")
|
raise ValueError(f"Invalid shape: {data.shape} for {name}")
|
||||||
|
|
||||||
|
|
||||||
def _meta_from_numpy(data, field, dtype, handle):
|
def _meta_from_numpy(
|
||||||
|
data: np.ndarray, field: str, dtype, handle: ctypes.c_void_p
|
||||||
|
) -> None:
|
||||||
data = _maybe_np_slice(data, dtype)
|
data = _maybe_np_slice(data, dtype)
|
||||||
interface = data.__array_interface__
|
interface = data.__array_interface__
|
||||||
assert interface.get('mask', None) is None, 'Masked array is not supported'
|
assert interface.get('mask', None) is None, 'Masked array is not supported'
|
||||||
size = data.shape[0]
|
size = data.size
|
||||||
|
|
||||||
c_type = _to_data_type(str(data.dtype), field)
|
c_type = _to_data_type(str(data.dtype), field)
|
||||||
ptr = interface['data'][0]
|
ptr = interface['data'][0]
|
||||||
@ -855,17 +880,13 @@ def _meta_from_tuple(data, field, dtype, handle):
|
|||||||
return _meta_from_list(data, field, dtype, handle)
|
return _meta_from_list(data, field, dtype, handle)
|
||||||
|
|
||||||
|
|
||||||
def _meta_from_cudf_df(data, field, handle):
|
def _meta_from_cudf_df(data, field: str, handle: ctypes.c_void_p) -> None:
|
||||||
if len(data.columns) != 1:
|
if field not in _matrix_meta:
|
||||||
raise ValueError(
|
_meta_from_cudf_series(data.iloc[:, 0], field, handle)
|
||||||
'Expecting meta-info to contain a single column')
|
else:
|
||||||
data = data[data.columns[0]]
|
data = data.values
|
||||||
|
interface = _cuda_array_interface(data)
|
||||||
interface = bytes(json.dumps([data.__cuda_array_interface__],
|
_check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface))
|
||||||
indent=2), 'utf-8')
|
|
||||||
_check_call(_LIB.XGDMatrixSetInfoFromInterface(handle,
|
|
||||||
c_str(field),
|
|
||||||
interface))
|
|
||||||
|
|
||||||
|
|
||||||
def _meta_from_cudf_series(data, field, handle):
|
def _meta_from_cudf_series(data, field, handle):
|
||||||
@ -885,14 +906,15 @@ def _meta_from_cupy_array(data, field, handle):
|
|||||||
interface))
|
interface))
|
||||||
|
|
||||||
|
|
||||||
def _meta_from_dt(data, field, dtype, handle):
|
def _meta_from_dt(data, field: str, dtype, handle: ctypes.c_void_p):
|
||||||
data, _, _ = _transform_dt_df(data, None, None)
|
data, _, _ = _transform_dt_df(data, None, None, field, dtype)
|
||||||
_meta_from_numpy(data, field, dtype, handle)
|
_meta_from_numpy(data, field, dtype, handle)
|
||||||
|
|
||||||
|
|
||||||
def dispatch_meta_backend(matrix: DMatrix, data, name: str, dtype: str = None):
|
def dispatch_meta_backend(matrix: DMatrix, data, name: str, dtype: str = None):
|
||||||
'''Dispatch for meta info.'''
|
'''Dispatch for meta info.'''
|
||||||
handle = matrix.handle
|
handle = matrix.handle
|
||||||
|
assert handle is not None
|
||||||
_validate_meta_shape(data, name)
|
_validate_meta_shape(data, name)
|
||||||
if data is None:
|
if data is None:
|
||||||
return
|
return
|
||||||
@ -911,9 +933,7 @@ def dispatch_meta_backend(matrix: DMatrix, data, name: str, dtype: str = None):
|
|||||||
_meta_from_numpy(data, name, dtype, handle)
|
_meta_from_numpy(data, name, dtype, handle)
|
||||||
return
|
return
|
||||||
if _is_pandas_series(data):
|
if _is_pandas_series(data):
|
||||||
data = data.values.astype('float')
|
_meta_from_pandas_series(data, name, dtype, handle)
|
||||||
assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
|
|
||||||
_meta_from_numpy(data, name, dtype, handle)
|
|
||||||
return
|
return
|
||||||
if _is_dlpack(data):
|
if _is_dlpack(data):
|
||||||
data = _transform_dlpack(data)
|
data = _transform_dlpack(data)
|
||||||
|
|||||||
@ -210,27 +210,28 @@ class ArrayInterfaceHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void ExtractStride(std::map<std::string, Json> const &column,
|
static void ExtractStride(std::map<std::string, Json> const &column,
|
||||||
size_t strides[2], size_t rows, size_t cols, size_t itemsize) {
|
size_t *stride_r, size_t *stride_c, size_t rows,
|
||||||
|
size_t cols, size_t itemsize) {
|
||||||
auto strides_it = column.find("strides");
|
auto strides_it = column.find("strides");
|
||||||
if (strides_it == column.cend() || IsA<Null>(strides_it->second)) {
|
if (strides_it == column.cend() || IsA<Null>(strides_it->second)) {
|
||||||
// default strides
|
// default strides
|
||||||
strides[0] = cols;
|
*stride_r = cols;
|
||||||
strides[1] = 1;
|
*stride_c = 1;
|
||||||
} else {
|
} else {
|
||||||
// strides specified by the array interface
|
// strides specified by the array interface
|
||||||
auto const &j_strides = get<Array const>(strides_it->second);
|
auto const &j_strides = get<Array const>(strides_it->second);
|
||||||
CHECK_LE(j_strides.size(), 2) << ArrayInterfaceErrors::Dimension(2);
|
CHECK_LE(j_strides.size(), 2) << ArrayInterfaceErrors::Dimension(2);
|
||||||
strides[0] = get<Integer const>(j_strides[0]) / itemsize;
|
*stride_r = get<Integer const>(j_strides[0]) / itemsize;
|
||||||
size_t n = 1;
|
size_t n = 1;
|
||||||
if (j_strides.size() == 2) {
|
if (j_strides.size() == 2) {
|
||||||
n = get<Integer const>(j_strides[1]) / itemsize;
|
n = get<Integer const>(j_strides[1]) / itemsize;
|
||||||
}
|
}
|
||||||
strides[1] = n;
|
*stride_c = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto valid = rows * strides[0] + cols * strides[1] >= (rows * cols);
|
auto valid = rows * (*stride_r) + cols * (*stride_c) >= (rows * cols);
|
||||||
CHECK(valid) << "Invalid strides in array."
|
CHECK(valid) << "Invalid strides in array."
|
||||||
<< " strides: (" << strides[0] << "," << strides[1]
|
<< " strides: (" << (*stride_r) << "," << (*stride_c)
|
||||||
<< "), shape: (" << rows << ", " << cols << ")";
|
<< "), shape: (" << rows << ", " << cols << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -281,8 +282,8 @@ class ArrayInterface {
|
|||||||
<< "Masked array is not yet supported.";
|
<< "Masked array is not yet supported.";
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrayInterfaceHandler::ExtractStride(array, strides, num_rows, num_cols,
|
ArrayInterfaceHandler::ExtractStride(array, &stride_row, &stride_col,
|
||||||
typestr[2] - '0');
|
num_rows, num_cols, typestr[2] - '0');
|
||||||
|
|
||||||
auto stream_it = array.find("stream");
|
auto stream_it = array.find("stream");
|
||||||
if (stream_it != array.cend() && !IsA<Null>(stream_it->second)) {
|
if (stream_it != array.cend() && !IsA<Null>(stream_it->second)) {
|
||||||
@ -323,8 +324,8 @@ class ArrayInterface {
|
|||||||
num_rows = std::max(num_rows, static_cast<size_t>(num_cols));
|
num_rows = std::max(num_rows, static_cast<size_t>(num_cols));
|
||||||
num_cols = 1;
|
num_cols = 1;
|
||||||
|
|
||||||
strides[0] = std::max(strides[0], strides[1]);
|
stride_row = std::max(stride_row, stride_col);
|
||||||
strides[1] = 1;
|
stride_col = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssignType(StringView typestr) {
|
void AssignType(StringView typestr) {
|
||||||
@ -406,13 +407,14 @@ class ArrayInterface {
|
|||||||
template <typename T = float>
|
template <typename T = float>
|
||||||
XGBOOST_DEVICE T GetElement(size_t r, size_t c) const {
|
XGBOOST_DEVICE T GetElement(size_t r, size_t c) const {
|
||||||
return this->DispatchCall(
|
return this->DispatchCall(
|
||||||
[=](auto *p_values) -> T { return p_values[strides[0] * r + strides[1] * c]; });
|
[=](auto *p_values) -> T { return p_values[stride_row * r + stride_col * c]; });
|
||||||
}
|
}
|
||||||
|
|
||||||
RBitField8 valid;
|
RBitField8 valid;
|
||||||
bst_row_t num_rows;
|
bst_row_t num_rows;
|
||||||
bst_feature_t num_cols;
|
bst_feature_t num_cols;
|
||||||
size_t strides[2]{0, 0};
|
size_t stride_row{0};
|
||||||
|
size_t stride_col{0};
|
||||||
void* data;
|
void* data;
|
||||||
Type type;
|
Type type;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -30,12 +30,16 @@ void CopyInfoImpl(ArrayInterface column, HostDeviceVector<float>* out) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
out->SetDevice(ptr_device);
|
out->SetDevice(ptr_device);
|
||||||
out->Resize(column.num_rows);
|
|
||||||
|
size_t size = column.num_rows * column.num_cols;
|
||||||
|
CHECK_NE(size, 0);
|
||||||
|
out->Resize(size);
|
||||||
|
|
||||||
auto p_dst = thrust::device_pointer_cast(out->DevicePointer());
|
auto p_dst = thrust::device_pointer_cast(out->DevicePointer());
|
||||||
|
dh::LaunchN(size, [=] __device__(size_t idx) {
|
||||||
dh::LaunchN(column.num_rows, [=] __device__(size_t idx) {
|
size_t ridx = idx / column.num_cols;
|
||||||
p_dst[idx] = column.GetElement(idx, 0);
|
size_t cidx = idx - (ridx * column.num_cols);
|
||||||
|
p_dst[idx] = column.GetElement(ridx, cidx);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,16 +130,8 @@ void ValidateQueryGroup(std::vector<bst_group_t> const &group_ptr_);
|
|||||||
|
|
||||||
void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
|
void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
|
||||||
Json j_interface = Json::Load({interface_str.c_str(), interface_str.size()});
|
Json j_interface = Json::Load({interface_str.c_str(), interface_str.size()});
|
||||||
auto const& j_arr = get<Array>(j_interface);
|
|
||||||
CHECK_EQ(j_arr.size(), 1)
|
|
||||||
<< "MetaInfo: " << c_key << ". " << ArrayInterfaceErrors::Dimension(1);
|
|
||||||
ArrayInterface array_interface(interface_str);
|
ArrayInterface array_interface(interface_str);
|
||||||
std::string key{c_key};
|
std::string key{c_key};
|
||||||
if (!((array_interface.num_cols == 1 && array_interface.num_rows == 0) ||
|
|
||||||
(array_interface.num_cols == 0 && array_interface.num_rows == 1))) {
|
|
||||||
// Not an empty column, transform it.
|
|
||||||
array_interface.AsColumnVector();
|
|
||||||
}
|
|
||||||
|
|
||||||
CHECK(!array_interface.valid.Data())
|
CHECK(!array_interface.valid.Data())
|
||||||
<< "Meta info " << key << " should be dense, found validity mask";
|
<< "Meta info " << key << " should be dense, found validity mask";
|
||||||
@ -143,6 +139,18 @@ void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (key == "base_margin") {
|
||||||
|
CopyInfoImpl(array_interface, &base_margin_);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK(array_interface.num_cols == 1 || array_interface.num_rows == 1)
|
||||||
|
<< "MetaInfo: " << c_key << " has invalid shape";
|
||||||
|
if (!((array_interface.num_cols == 1 && array_interface.num_rows == 0) ||
|
||||||
|
(array_interface.num_cols == 0 && array_interface.num_rows == 1))) {
|
||||||
|
// Not an empty column, transform it.
|
||||||
|
array_interface.AsColumnVector();
|
||||||
|
}
|
||||||
if (key == "label") {
|
if (key == "label") {
|
||||||
CopyInfoImpl(array_interface, &labels_);
|
CopyInfoImpl(array_interface, &labels_);
|
||||||
auto ptr = labels_.ConstDevicePointer();
|
auto ptr = labels_.ConstDevicePointer();
|
||||||
@ -155,8 +163,6 @@ void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
|
|||||||
auto valid = thrust::none_of(thrust::device, ptr, ptr + weights_.Size(),
|
auto valid = thrust::none_of(thrust::device, ptr, ptr + weights_.Size(),
|
||||||
WeightsCheck{});
|
WeightsCheck{});
|
||||||
CHECK(valid) << "Weights must be positive values.";
|
CHECK(valid) << "Weights must be positive values.";
|
||||||
} else if (key == "base_margin") {
|
|
||||||
CopyInfoImpl(array_interface, &base_margin_);
|
|
||||||
} else if (key == "group") {
|
} else if (key == "group") {
|
||||||
CopyGroupInfoImpl(array_interface, &group_ptr_);
|
CopyGroupInfoImpl(array_interface, &group_ptr_);
|
||||||
ValidateQueryGroup(group_ptr_);
|
ValidateQueryGroup(group_ptr_);
|
||||||
|
|||||||
@ -290,27 +290,16 @@ class CPUPredictor : public Predictor {
|
|||||||
const auto& base_margin = info.base_margin_.HostVector();
|
const auto& base_margin = info.base_margin_.HostVector();
|
||||||
out_preds->Resize(n);
|
out_preds->Resize(n);
|
||||||
std::vector<bst_float>& out_preds_h = out_preds->HostVector();
|
std::vector<bst_float>& out_preds_h = out_preds->HostVector();
|
||||||
if (base_margin.size() == n) {
|
if (base_margin.empty()) {
|
||||||
CHECK_EQ(out_preds->Size(), n);
|
|
||||||
std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
|
|
||||||
} else {
|
|
||||||
if (!base_margin.empty()) {
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << "Ignoring the base margin, since it has incorrect length. "
|
|
||||||
<< "The base margin must be an array of length ";
|
|
||||||
if (model.learner_model_param->num_output_group > 1) {
|
|
||||||
oss << "[num_class] * [number of data points], i.e. "
|
|
||||||
<< model.learner_model_param->num_output_group << " * " << info.num_row_
|
|
||||||
<< " = " << n << ". ";
|
|
||||||
} else {
|
|
||||||
oss << "[number of data points], i.e. " << info.num_row_ << ". ";
|
|
||||||
}
|
|
||||||
oss << "Instead, all data points will use "
|
|
||||||
<< "base_score = " << model.learner_model_param->base_score;
|
|
||||||
LOG(WARNING) << oss.str();
|
|
||||||
}
|
|
||||||
std::fill(out_preds_h.begin(), out_preds_h.end(),
|
std::fill(out_preds_h.begin(), out_preds_h.end(),
|
||||||
model.learner_model_param->base_score);
|
model.learner_model_param->base_score);
|
||||||
|
} else {
|
||||||
|
std::string expected{
|
||||||
|
"(" + std::to_string(info.num_row_) + ", " +
|
||||||
|
std::to_string(model.learner_model_param->num_output_group) + ")"};
|
||||||
|
CHECK_EQ(base_margin.size(), n)
|
||||||
|
<< "Invalid shape of base_margin. Expected:" << expected;
|
||||||
|
std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -938,7 +938,11 @@ class GPUPredictor : public xgboost::Predictor {
|
|||||||
out_preds->SetDevice(generic_param_->gpu_id);
|
out_preds->SetDevice(generic_param_->gpu_id);
|
||||||
out_preds->Resize(n);
|
out_preds->Resize(n);
|
||||||
if (base_margin.Size() != 0) {
|
if (base_margin.Size() != 0) {
|
||||||
CHECK_EQ(base_margin.Size(), n);
|
std::string expected{
|
||||||
|
"(" + std::to_string(info.num_row_) + ", " +
|
||||||
|
std::to_string(model.learner_model_param->num_output_group) + ")"};
|
||||||
|
CHECK_EQ(base_margin.Size(), n)
|
||||||
|
<< "Invalid shape of base_margin. Expected:" << expected;
|
||||||
out_preds->Copy(base_margin);
|
out_preds->Copy(base_margin);
|
||||||
} else {
|
} else {
|
||||||
out_preds->Fill(model.learner_model_param->base_score);
|
out_preds->Fill(model.learner_model_param->base_score);
|
||||||
|
|||||||
@ -252,6 +252,8 @@ TEST(MetaInfo, Validate) {
|
|||||||
EXPECT_THROW(info.Validate(1), dmlc::Error);
|
EXPECT_THROW(info.Validate(1), dmlc::Error);
|
||||||
|
|
||||||
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
|
xgboost::HostDeviceVector<xgboost::bst_group_t> d_groups{groups};
|
||||||
|
d_groups.SetDevice(0);
|
||||||
|
d_groups.DevicePointer(); // pull to device
|
||||||
auto arr_interface = xgboost::GetArrayInterface(&d_groups, 64, 1);
|
auto arr_interface = xgboost::GetArrayInterface(&d_groups, 64, 1);
|
||||||
std::string arr_interface_str;
|
std::string arr_interface_str;
|
||||||
xgboost::Json::Dump(arr_interface, &arr_interface_str);
|
xgboost::Json::Dump(arr_interface, &arr_interface_str);
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import pytest
|
|||||||
|
|
||||||
sys.path.append("tests/python")
|
sys.path.append("tests/python")
|
||||||
import testing as tm
|
import testing as tm
|
||||||
|
from test_dmatrix import set_base_margin_info
|
||||||
|
|
||||||
|
|
||||||
def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
|
def dmatrix_from_cudf(input_type, DMatrixT, missing=np.NAN):
|
||||||
@ -142,6 +143,8 @@ def _test_cudf_metainfo(DMatrixT):
|
|||||||
dmat_cudf.get_float_info('base_margin'))
|
dmat_cudf.get_float_info('base_margin'))
|
||||||
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
|
assert np.array_equal(dmat.get_uint_info('group_ptr'), dmat_cudf.get_uint_info('group_ptr'))
|
||||||
|
|
||||||
|
set_base_margin_info(df, DMatrixT, "gpu_hist")
|
||||||
|
|
||||||
|
|
||||||
class TestFromColumnar:
|
class TestFromColumnar:
|
||||||
'''Tests for constructing DMatrix from data structure conforming Apache
|
'''Tests for constructing DMatrix from data structure conforming Apache
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import pytest
|
|||||||
|
|
||||||
sys.path.append("tests/python")
|
sys.path.append("tests/python")
|
||||||
import testing as tm
|
import testing as tm
|
||||||
|
from test_dmatrix import set_base_margin_info
|
||||||
|
|
||||||
|
|
||||||
def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
|
def dmatrix_from_cupy(input_type, DMatrixT, missing=np.NAN):
|
||||||
@ -107,6 +108,8 @@ def _test_cupy_metainfo(DMatrixT):
|
|||||||
assert np.array_equal(dmat.get_uint_info('group_ptr'),
|
assert np.array_equal(dmat.get_uint_info('group_ptr'),
|
||||||
dmat_cupy.get_uint_info('group_ptr'))
|
dmat_cupy.get_uint_info('group_ptr'))
|
||||||
|
|
||||||
|
set_base_margin_info(cp.asarray, DMatrixT, "gpu_hist")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
|||||||
@ -22,6 +22,7 @@ from test_with_dask import run_empty_dmatrix_reg # noqa
|
|||||||
from test_with_dask import run_empty_dmatrix_auc # noqa
|
from test_with_dask import run_empty_dmatrix_auc # noqa
|
||||||
from test_with_dask import run_auc # noqa
|
from test_with_dask import run_auc # noqa
|
||||||
from test_with_dask import run_boost_from_prediction # noqa
|
from test_with_dask import run_boost_from_prediction # noqa
|
||||||
|
from test_with_dask import run_boost_from_prediction_multi_clasas # noqa
|
||||||
from test_with_dask import run_dask_classifier # noqa
|
from test_with_dask import run_dask_classifier # noqa
|
||||||
from test_with_dask import run_empty_dmatrix_cls # noqa
|
from test_with_dask import run_empty_dmatrix_cls # noqa
|
||||||
from test_with_dask import _get_client_workers # noqa
|
from test_with_dask import _get_client_workers # noqa
|
||||||
@ -297,13 +298,18 @@ def run_gpu_hist(
|
|||||||
@pytest.mark.skipif(**tm.no_cudf())
|
@pytest.mark.skipif(**tm.no_cudf())
|
||||||
def test_boost_from_prediction(local_cuda_cluster: LocalCUDACluster) -> None:
|
def test_boost_from_prediction(local_cuda_cluster: LocalCUDACluster) -> None:
|
||||||
import cudf
|
import cudf
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer, load_digits
|
||||||
with Client(local_cuda_cluster) as client:
|
with Client(local_cuda_cluster) as client:
|
||||||
X_, y_ = load_breast_cancer(return_X_y=True)
|
X_, y_ = load_breast_cancer(return_X_y=True)
|
||||||
X = dd.from_array(X_, chunksize=100).map_partitions(cudf.from_pandas)
|
X = dd.from_array(X_, chunksize=100).map_partitions(cudf.from_pandas)
|
||||||
y = dd.from_array(y_, chunksize=100).map_partitions(cudf.from_pandas)
|
y = dd.from_array(y_, chunksize=100).map_partitions(cudf.from_pandas)
|
||||||
run_boost_from_prediction(X, y, "gpu_hist", client)
|
run_boost_from_prediction(X, y, "gpu_hist", client)
|
||||||
|
|
||||||
|
X_, y_ = load_digits(return_X_y=True)
|
||||||
|
X = dd.from_array(X_, chunksize=100).map_partitions(cudf.from_pandas)
|
||||||
|
y = dd.from_array(y_, chunksize=100).map_partitions(cudf.from_pandas)
|
||||||
|
run_boost_from_prediction_multi_clasas(X, y, "gpu_hist", client)
|
||||||
|
|
||||||
|
|
||||||
class TestDistributedGPU:
|
class TestDistributedGPU:
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
|
|||||||
@ -35,8 +35,25 @@ def test_gpu_binary_classification():
|
|||||||
assert err < 0.1
|
assert err < 0.1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
|
@pytest.mark.skipif(**tm.no_cudf())
|
||||||
def test_boost_from_prediction_gpu_hist():
|
def test_boost_from_prediction_gpu_hist():
|
||||||
twskl.run_boost_from_prediction('gpu_hist')
|
from sklearn.datasets import load_breast_cancer, load_digits
|
||||||
|
import cupy as cp
|
||||||
|
import cudf
|
||||||
|
|
||||||
|
tree_method = "gpu_hist"
|
||||||
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
|
X, y = cp.array(X), cp.array(y)
|
||||||
|
|
||||||
|
twskl.run_boost_from_prediction_binary(tree_method, X, y, None)
|
||||||
|
twskl.run_boost_from_prediction_binary(tree_method, X, y, cudf.DataFrame)
|
||||||
|
|
||||||
|
X, y = load_digits(return_X_y=True)
|
||||||
|
X, y = cp.array(X), cp.array(y)
|
||||||
|
|
||||||
|
twskl.run_boost_from_prediction_multi_clasas(tree_method, X, y, None)
|
||||||
|
twskl.run_boost_from_prediction_multi_clasas(tree_method, X, y, cudf.DataFrame)
|
||||||
|
|
||||||
|
|
||||||
def test_num_parallel_tree():
|
def test_num_parallel_tree():
|
||||||
|
|||||||
@ -15,6 +15,24 @@ dpath = 'demo/data/'
|
|||||||
rng = np.random.RandomState(1994)
|
rng = np.random.RandomState(1994)
|
||||||
|
|
||||||
|
|
||||||
|
def set_base_margin_info(DType, DMatrixT, tm: str):
|
||||||
|
rng = np.random.default_rng()
|
||||||
|
X = DType(rng.normal(0, 1.0, size=100).reshape(50, 2))
|
||||||
|
if hasattr(X, "iloc"):
|
||||||
|
y = X.iloc[:, 0]
|
||||||
|
else:
|
||||||
|
y = X[:, 0]
|
||||||
|
base_margin = X
|
||||||
|
# no error at set
|
||||||
|
Xy = DMatrixT(X, y, base_margin=base_margin)
|
||||||
|
# Error at train, caused by check in predictor.
|
||||||
|
with pytest.raises(ValueError, match=r".*base_margin.*"):
|
||||||
|
xgb.train({"tree_method": tm}, Xy)
|
||||||
|
|
||||||
|
# FIXME(jiamingy): Currently the metainfo has no concept of shape. If you pass a
|
||||||
|
# base_margin with shape (n_classes, n_samples) to XGBoost the result is undefined.
|
||||||
|
|
||||||
|
|
||||||
class TestDMatrix:
|
class TestDMatrix:
|
||||||
def test_warn_missing(self):
|
def test_warn_missing(self):
|
||||||
from xgboost import data
|
from xgboost import data
|
||||||
@ -122,7 +140,7 @@ class TestDMatrix:
|
|||||||
|
|
||||||
# base margin is per-class in multi-class classifier
|
# base margin is per-class in multi-class classifier
|
||||||
base_margin = rng.randn(100, 3).astype(np.float32)
|
base_margin = rng.randn(100, 3).astype(np.float32)
|
||||||
d.set_base_margin(base_margin.flatten())
|
d.set_base_margin(base_margin)
|
||||||
|
|
||||||
ridxs = [1, 2, 3, 4, 5, 6]
|
ridxs = [1, 2, 3, 4, 5, 6]
|
||||||
sliced = d.slice(ridxs)
|
sliced = d.slice(ridxs)
|
||||||
@ -380,3 +398,6 @@ class TestDMatrix:
|
|||||||
feature_types = ["q"] * 5 + ["c"] + ["q"] * 120
|
feature_types = ["q"] * 5 + ["c"] + ["q"] * 120
|
||||||
Xy = xgb.DMatrix(path + "?indexing_mode=1", feature_types=feature_types)
|
Xy = xgb.DMatrix(path + "?indexing_mode=1", feature_types=feature_types)
|
||||||
np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
|
np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
|
||||||
|
|
||||||
|
def test_base_margin(self):
|
||||||
|
set_base_margin_info(np.asarray, xgb.DMatrix, "hist")
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import sys
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy
|
import scipy
|
||||||
import json
|
import json
|
||||||
from typing import List, Tuple, Dict, Optional, Type, Any
|
from typing import List, Tuple, Dict, Optional, Type, Any, Callable
|
||||||
import asyncio
|
import asyncio
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
@ -182,6 +182,50 @@ def test_dask_predict_shape_infer(client: "Client") -> None:
|
|||||||
assert prediction.shape[1] == 3
|
assert prediction.shape[1] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def run_boost_from_prediction_multi_clasas(
|
||||||
|
X: xgb.dask._DaskCollection,
|
||||||
|
y: xgb.dask._DaskCollection,
|
||||||
|
tree_method: str,
|
||||||
|
client: "Client"
|
||||||
|
) -> None:
|
||||||
|
model_0 = xgb.dask.DaskXGBClassifier(
|
||||||
|
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||||
|
)
|
||||||
|
model_0.fit(X=X, y=y)
|
||||||
|
margin = xgb.dask.inplace_predict(
|
||||||
|
client, model_0.get_booster(), X, predict_type="margin"
|
||||||
|
)
|
||||||
|
|
||||||
|
model_1 = xgb.dask.DaskXGBClassifier(
|
||||||
|
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||||
|
)
|
||||||
|
model_1.fit(X=X, y=y, base_margin=margin)
|
||||||
|
predictions_1 = xgb.dask.predict(
|
||||||
|
client,
|
||||||
|
model_1.get_booster(),
|
||||||
|
xgb.dask.DaskDMatrix(client, X, base_margin=margin),
|
||||||
|
output_margin=True
|
||||||
|
)
|
||||||
|
|
||||||
|
model_2 = xgb.dask.DaskXGBClassifier(
|
||||||
|
learning_rate=0.3, random_state=0, n_estimators=8, tree_method=tree_method
|
||||||
|
)
|
||||||
|
model_2.fit(X=X, y=y)
|
||||||
|
predictions_2 = xgb.dask.inplace_predict(
|
||||||
|
client, model_2.get_booster(), X, predict_type="margin"
|
||||||
|
)
|
||||||
|
a = predictions_1.compute()
|
||||||
|
b = predictions_2.compute()
|
||||||
|
# cupy/cudf
|
||||||
|
if hasattr(a, "get"):
|
||||||
|
a = a.get()
|
||||||
|
if hasattr(b, "values"):
|
||||||
|
b = b.values
|
||||||
|
if hasattr(b, "get"):
|
||||||
|
b = b.get()
|
||||||
|
np.testing.assert_allclose(a, b, atol=1e-5)
|
||||||
|
|
||||||
|
|
||||||
def run_boost_from_prediction(
|
def run_boost_from_prediction(
|
||||||
X: xgb.dask._DaskCollection, y: xgb.dask._DaskCollection, tree_method: str, client: "Client"
|
X: xgb.dask._DaskCollection, y: xgb.dask._DaskCollection, tree_method: str, client: "Client"
|
||||||
) -> None:
|
) -> None:
|
||||||
@ -227,11 +271,15 @@ def run_boost_from_prediction(
|
|||||||
|
|
||||||
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
|
@pytest.mark.parametrize("tree_method", ["hist", "approx"])
|
||||||
def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
|
def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer, load_digits
|
||||||
X_, y_ = load_breast_cancer(return_X_y=True)
|
X_, y_ = load_breast_cancer(return_X_y=True)
|
||||||
X, y = dd.from_array(X_, chunksize=100), dd.from_array(y_, chunksize=100)
|
X, y = dd.from_array(X_, chunksize=100), dd.from_array(y_, chunksize=100)
|
||||||
run_boost_from_prediction(X, y, tree_method, client)
|
run_boost_from_prediction(X, y, tree_method, client)
|
||||||
|
|
||||||
|
X_, y_ = load_digits(return_X_y=True)
|
||||||
|
X, y = dd.from_array(X_, chunksize=100), dd.from_array(y_, chunksize=100)
|
||||||
|
run_boost_from_prediction_multi_clasas(X, y, tree_method, client)
|
||||||
|
|
||||||
|
|
||||||
def test_inplace_predict(client: "Client") -> None:
|
def test_inplace_predict(client: "Client") -> None:
|
||||||
from sklearn.datasets import load_boston
|
from sklearn.datasets import load_boston
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import numpy as np
|
|||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import testing as tm
|
import testing as tm
|
||||||
import pytest
|
import pytest
|
||||||
|
from test_dmatrix import set_base_margin_info
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import modin.pandas as md
|
import modin.pandas as md
|
||||||
@ -144,3 +145,6 @@ class TestModin:
|
|||||||
assert data.num_col() == kCols
|
assert data.num_col() == kCols
|
||||||
|
|
||||||
np.testing.assert_array_equal(data.get_weight(), w)
|
np.testing.assert_array_equal(data.get_weight(), w)
|
||||||
|
|
||||||
|
def test_base_margin(self):
|
||||||
|
set_base_margin_info(md.DataFrame, xgb.DMatrix, "hist")
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import numpy as np
|
|||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import testing as tm
|
import testing as tm
|
||||||
import pytest
|
import pytest
|
||||||
|
from test_dmatrix import set_base_margin_info
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -205,6 +206,9 @@ class TestPandas:
|
|||||||
|
|
||||||
np.testing.assert_array_equal(data.get_weight(), w)
|
np.testing.assert_array_equal(data.get_weight(), w)
|
||||||
|
|
||||||
|
def test_base_margin(self):
|
||||||
|
set_base_margin_info(pd.DataFrame, xgb.DMatrix, "hist")
|
||||||
|
|
||||||
def test_cv_as_pandas(self):
|
def test_cv_as_pandas(self):
|
||||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
from typing import Callable, Optional
|
||||||
import collections
|
import collections
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -1147,32 +1148,83 @@ def test_feature_weights():
|
|||||||
assert poly_decreasing[0] < -0.08
|
assert poly_decreasing[0] < -0.08
|
||||||
|
|
||||||
|
|
||||||
def run_boost_from_prediction(tree_method):
|
def run_boost_from_prediction_binary(tree_method, X, y, as_frame: Optional[Callable]):
|
||||||
from sklearn.datasets import load_breast_cancer
|
"""
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
as_frame: A callable function to convert margin into DataFrame, useful for different
|
||||||
|
df implementations.
|
||||||
|
"""
|
||||||
|
|
||||||
model_0 = xgb.XGBClassifier(
|
model_0 = xgb.XGBClassifier(
|
||||||
learning_rate=0.3, random_state=0, n_estimators=4,
|
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||||
tree_method=tree_method)
|
)
|
||||||
model_0.fit(X=X, y=y)
|
model_0.fit(X=X, y=y)
|
||||||
margin = model_0.predict(X, output_margin=True)
|
margin = model_0.predict(X, output_margin=True)
|
||||||
|
if as_frame is not None:
|
||||||
|
margin = as_frame(margin)
|
||||||
|
|
||||||
model_1 = xgb.XGBClassifier(
|
model_1 = xgb.XGBClassifier(
|
||||||
learning_rate=0.3, random_state=0, n_estimators=4,
|
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||||
tree_method=tree_method)
|
)
|
||||||
model_1.fit(X=X, y=y, base_margin=margin)
|
model_1.fit(X=X, y=y, base_margin=margin)
|
||||||
predictions_1 = model_1.predict(X, base_margin=margin)
|
predictions_1 = model_1.predict(X, base_margin=margin)
|
||||||
|
|
||||||
cls_2 = xgb.XGBClassifier(
|
cls_2 = xgb.XGBClassifier(
|
||||||
learning_rate=0.3, random_state=0, n_estimators=8,
|
learning_rate=0.3, random_state=0, n_estimators=8, tree_method=tree_method
|
||||||
tree_method=tree_method)
|
)
|
||||||
cls_2.fit(X=X, y=y)
|
cls_2.fit(X=X, y=y)
|
||||||
predictions_2 = cls_2.predict(X)
|
predictions_2 = cls_2.predict(X)
|
||||||
assert np.all(predictions_1 == predictions_2)
|
np.testing.assert_allclose(predictions_1, predictions_2)
|
||||||
|
|
||||||
|
|
||||||
|
def run_boost_from_prediction_multi_clasas(
|
||||||
|
tree_method, X, y, as_frame: Optional[Callable]
|
||||||
|
):
|
||||||
|
# Multi-class
|
||||||
|
model_0 = xgb.XGBClassifier(
|
||||||
|
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||||
|
)
|
||||||
|
model_0.fit(X=X, y=y)
|
||||||
|
margin = model_0.get_booster().inplace_predict(X, predict_type="margin")
|
||||||
|
if as_frame is not None:
|
||||||
|
margin = as_frame(margin)
|
||||||
|
|
||||||
|
model_1 = xgb.XGBClassifier(
|
||||||
|
learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
|
||||||
|
)
|
||||||
|
model_1.fit(X=X, y=y, base_margin=margin)
|
||||||
|
predictions_1 = model_1.get_booster().predict(
|
||||||
|
xgb.DMatrix(X, base_margin=margin), output_margin=True
|
||||||
|
)
|
||||||
|
|
||||||
|
model_2 = xgb.XGBClassifier(
|
||||||
|
learning_rate=0.3, random_state=0, n_estimators=8, tree_method=tree_method
|
||||||
|
)
|
||||||
|
model_2.fit(X=X, y=y)
|
||||||
|
predictions_2 = model_2.get_booster().inplace_predict(X, predict_type="margin")
|
||||||
|
|
||||||
|
if hasattr(predictions_1, "get"):
|
||||||
|
predictions_1 = predictions_1.get()
|
||||||
|
if hasattr(predictions_2, "get"):
|
||||||
|
predictions_2 = predictions_2.get()
|
||||||
|
np.testing.assert_allclose(predictions_1, predictions_2, atol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
||||||
def test_boost_from_prediction(tree_method):
|
def test_boost_from_prediction(tree_method):
|
||||||
run_boost_from_prediction(tree_method)
|
from sklearn.datasets import load_breast_cancer, load_digits
|
||||||
|
import pandas as pd
|
||||||
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
|
|
||||||
|
run_boost_from_prediction_binary(tree_method, X, y, None)
|
||||||
|
run_boost_from_prediction_binary(tree_method, X, y, pd.DataFrame)
|
||||||
|
|
||||||
|
X, y = load_digits(return_X_y=True)
|
||||||
|
|
||||||
|
run_boost_from_prediction_multi_clasas(tree_method, X, y, None)
|
||||||
|
run_boost_from_prediction_multi_clasas(tree_method, X, y, pd.DataFrame)
|
||||||
|
|
||||||
|
|
||||||
def test_estimator_type():
|
def test_estimator_type():
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import os
|
|||||||
import urllib
|
import urllib
|
||||||
import zipfile
|
import zipfile
|
||||||
import sys
|
import sys
|
||||||
|
from typing import Optional
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
|
||||||
@ -177,7 +178,7 @@ class TestDataset:
|
|||||||
self.metric = metric
|
self.metric = metric
|
||||||
self.X, self.y = get_dataset()
|
self.X, self.y = get_dataset()
|
||||||
self.w = None
|
self.w = None
|
||||||
self.margin = None
|
self.margin: Optional[np.ndarray] = None
|
||||||
|
|
||||||
def set_params(self, params_in):
|
def set_params(self, params_in):
|
||||||
params_in['objective'] = self.objective
|
params_in['objective'] = self.objective
|
||||||
@ -315,7 +316,7 @@ _unweighted_datasets_strategy = strategies.sampled_from(
|
|||||||
|
|
||||||
@strategies.composite
|
@strategies.composite
|
||||||
def _dataset_weight_margin(draw):
|
def _dataset_weight_margin(draw):
|
||||||
data = draw(_unweighted_datasets_strategy)
|
data: TestDataset = draw(_unweighted_datasets_strategy)
|
||||||
if draw(strategies.booleans()):
|
if draw(strategies.booleans()):
|
||||||
data.w = draw(arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0)))
|
data.w = draw(arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0)))
|
||||||
if draw(strategies.booleans()):
|
if draw(strategies.booleans()):
|
||||||
@ -324,6 +325,8 @@ def _dataset_weight_margin(draw):
|
|||||||
num_class = int(np.max(data.y) + 1)
|
num_class = int(np.max(data.y) + 1)
|
||||||
data.margin = draw(
|
data.margin = draw(
|
||||||
arrays(np.float64, (len(data.y) * num_class), elements=strategies.floats(0.5, 1.0)))
|
arrays(np.float64, (len(data.y) * num_class), elements=strategies.floats(0.5, 1.0)))
|
||||||
|
if num_class != 1:
|
||||||
|
data.margin = data.margin.reshape(data.y.shape[0], num_class)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user