Feature weights (#5962)
This commit is contained in:
@@ -455,7 +455,8 @@ class DMatrix: # pylint: disable=too-many-instance-attributes
|
||||
label_lower_bound=None,
|
||||
label_upper_bound=None,
|
||||
feature_names=None,
|
||||
feature_types=None):
|
||||
feature_types=None,
|
||||
feature_weights=None):
|
||||
'''Set meta info for DMatrix.'''
|
||||
if label is not None:
|
||||
self.set_label(label)
|
||||
@@ -473,6 +474,10 @@ class DMatrix: # pylint: disable=too-many-instance-attributes
|
||||
self.feature_names = feature_names
|
||||
if feature_types is not None:
|
||||
self.feature_types = feature_types
|
||||
if feature_weights is not None:
|
||||
from .data import dispatch_meta_backend
|
||||
dispatch_meta_backend(matrix=self, data=feature_weights,
|
||||
name='feature_weights')
|
||||
|
||||
def get_float_info(self, field):
|
||||
"""Get float property from the DMatrix.
|
||||
|
||||
@@ -530,22 +530,38 @@ def dispatch_data_backend(data, missing, threads,
|
||||
raise TypeError('Not supported type for data.' + str(type(data)))
|
||||
|
||||
|
||||
def _to_data_type(dtype: str, name: str):
|
||||
dtype_map = {'float32': 1, 'float64': 2, 'uint32': 3, 'uint64': 4}
|
||||
if dtype not in dtype_map.keys():
|
||||
raise TypeError(
|
||||
f'Expecting float32, float64, uint32, uint64, got {dtype} ' +
|
||||
f'for {name}.')
|
||||
return dtype_map[dtype]
|
||||
|
||||
|
||||
def _validate_meta_shape(data):
|
||||
if hasattr(data, 'shape'):
|
||||
assert len(data.shape) == 1 or (
|
||||
len(data.shape) == 2 and
|
||||
(data.shape[1] == 0 or data.shape[1] == 1))
|
||||
|
||||
|
||||
def _meta_from_numpy(data, field, dtype, handle):
|
||||
data = _maybe_np_slice(data, dtype)
|
||||
if dtype == 'uint32':
|
||||
c_data = c_array(ctypes.c_uint32, data)
|
||||
_check_call(_LIB.XGDMatrixSetUIntInfo(handle,
|
||||
c_str(field),
|
||||
c_array(ctypes.c_uint, data),
|
||||
c_bst_ulong(len(data))))
|
||||
elif dtype == 'float':
|
||||
c_data = c_array(ctypes.c_float, data)
|
||||
_check_call(_LIB.XGDMatrixSetFloatInfo(handle,
|
||||
c_str(field),
|
||||
c_data,
|
||||
c_bst_ulong(len(data))))
|
||||
else:
|
||||
raise TypeError('Unsupported type ' + str(dtype) + ' for:' + field)
|
||||
interface = data.__array_interface__
|
||||
assert interface.get('mask', None) is None, 'Masked array is not supported'
|
||||
size = data.shape[0]
|
||||
|
||||
c_type = _to_data_type(str(data.dtype), field)
|
||||
ptr = interface['data'][0]
|
||||
ptr = ctypes.c_void_p(ptr)
|
||||
_check_call(_LIB.XGDMatrixSetDenseInfo(
|
||||
handle,
|
||||
c_str(field),
|
||||
ptr,
|
||||
c_bst_ulong(size),
|
||||
c_type
|
||||
))
|
||||
|
||||
|
||||
def _meta_from_list(data, field, dtype, handle):
|
||||
@@ -595,6 +611,7 @@ def _meta_from_dt(data, field, dtype, handle):
|
||||
def dispatch_meta_backend(matrix: DMatrix, data, name: str, dtype: str = None):
|
||||
'''Dispatch for meta info.'''
|
||||
handle = matrix.handle
|
||||
_validate_meta_shape(data)
|
||||
if data is None:
|
||||
return
|
||||
if _is_list(data):
|
||||
|
||||
@@ -441,6 +441,7 @@ class XGBModel(XGBModelBase):
|
||||
def fit(self, X, y, sample_weight=None, base_margin=None,
|
||||
eval_set=None, eval_metric=None, early_stopping_rounds=None,
|
||||
verbose=True, xgb_model=None, sample_weight_eval_set=None,
|
||||
feature_weights=None,
|
||||
callbacks=None):
|
||||
# pylint: disable=invalid-name,attribute-defined-outside-init
|
||||
"""Fit gradient boosting model
|
||||
@@ -459,9 +460,6 @@ class XGBModel(XGBModelBase):
|
||||
A list of (X, y) tuple pairs to use as validation sets, for which
|
||||
metrics will be computed.
|
||||
Validation metrics will help us track the performance of the model.
|
||||
sample_weight_eval_set : list, optional
|
||||
A list of the form [L_1, L_2, ..., L_n], where each L_i is a list of
|
||||
instance weights on the i-th validation set.
|
||||
eval_metric : str, list of str, or callable, optional
|
||||
If a str, should be a built-in evaluation metric to use. See
|
||||
doc/parameter.rst.
|
||||
@@ -490,6 +488,13 @@ class XGBModel(XGBModelBase):
|
||||
xgb_model : str
|
||||
file name of stored XGBoost model or 'Booster' instance XGBoost model to be
|
||||
loaded before training (allows training continuation).
|
||||
sample_weight_eval_set : list, optional
|
||||
A list of the form [L_1, L_2, ..., L_n], where each L_i is a list of
|
||||
instance weights on the i-th validation set.
|
||||
feature_weights: array_like
|
||||
Weight for each feature, defines the probability of each feature
|
||||
being selected when colsample is being used. All values must be
|
||||
greater than 0, otherwise a `ValueError` is thrown.
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each iteration.
|
||||
It is possible to use predefined callbacks by using :ref:`callback_api`.
|
||||
@@ -498,6 +503,7 @@ class XGBModel(XGBModelBase):
|
||||
.. code-block:: python
|
||||
|
||||
[xgb.callback.reset_learning_rate(custom_rates)]
|
||||
|
||||
"""
|
||||
self.n_features_in_ = X.shape[1]
|
||||
|
||||
@@ -505,6 +511,7 @@ class XGBModel(XGBModelBase):
|
||||
base_margin=base_margin,
|
||||
missing=self.missing,
|
||||
nthread=self.n_jobs)
|
||||
train_dmatrix.set_info(feature_weights=feature_weights)
|
||||
|
||||
evals_result = {}
|
||||
|
||||
@@ -759,7 +766,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
def fit(self, X, y, sample_weight=None, base_margin=None,
|
||||
eval_set=None, eval_metric=None,
|
||||
early_stopping_rounds=None, verbose=True, xgb_model=None,
|
||||
sample_weight_eval_set=None, callbacks=None):
|
||||
sample_weight_eval_set=None, feature_weights=None, callbacks=None):
|
||||
# pylint: disable = attribute-defined-outside-init,arguments-differ
|
||||
|
||||
evals_result = {}
|
||||
@@ -821,6 +828,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
|
||||
base_margin=base_margin,
|
||||
missing=self.missing, nthread=self.n_jobs)
|
||||
train_dmatrix.set_info(feature_weights=feature_weights)
|
||||
|
||||
self._Booster = train(xgb_options, train_dmatrix,
|
||||
self.get_num_boosting_rounds(),
|
||||
@@ -1101,10 +1109,10 @@ class XGBRanker(XGBModel):
|
||||
raise ValueError("please use XGBRanker for ranking task")
|
||||
|
||||
def fit(self, X, y, group, sample_weight=None, base_margin=None,
|
||||
eval_set=None,
|
||||
sample_weight_eval_set=None, eval_group=None, eval_metric=None,
|
||||
eval_set=None, sample_weight_eval_set=None,
|
||||
eval_group=None, eval_metric=None,
|
||||
early_stopping_rounds=None, verbose=False, xgb_model=None,
|
||||
callbacks=None):
|
||||
feature_weights=None, callbacks=None):
|
||||
# pylint: disable = attribute-defined-outside-init,arguments-differ
|
||||
"""Fit gradient boosting ranker
|
||||
|
||||
@@ -1170,6 +1178,10 @@ class XGBRanker(XGBModel):
|
||||
xgb_model : str
|
||||
file name of stored XGBoost model or 'Booster' instance XGBoost
|
||||
model to be loaded before training (allows training continuation).
|
||||
feature_weights: array_like
|
||||
Weight for each feature, defines the probability of each feature
|
||||
being selected when colsample is being used. All values must be
|
||||
greater than 0, otherwise a `ValueError` is thrown.
|
||||
callbacks : list of callback functions
|
||||
List of callback functions that are applied at end of each
|
||||
iteration. It is possible to use predefined callbacks by using
|
||||
@@ -1205,6 +1217,7 @@ class XGBRanker(XGBModel):
|
||||
train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
|
||||
base_margin=base_margin,
|
||||
missing=self.missing, nthread=self.n_jobs)
|
||||
train_dmatrix.set_info(feature_weights=feature_weights)
|
||||
train_dmatrix.set_group(group)
|
||||
|
||||
evals_result = {}
|
||||
|
||||
Reference in New Issue
Block a user