Add scikit-learn tests (#3674)
* Add scikit-learn tests Goal is to pass scikit-learn's check_estimator() for XGBClassifier, XGBRegressor, and XGBRanker. It is actually not possible to do so entirely, since check_estimator() assumes that NaN is disallowed, but XGBoost allows for NaN as missing values. However, it is always good ideas to add some checks inspired by check_estimator(). * Fix lint * Fix lint
This commit is contained in:
parent
190d888695
commit
d176a0fbc8
@ -9,7 +9,6 @@ import ctypes
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.sparse
|
import scipy.sparse
|
||||||
|
|
||||||
@ -374,11 +373,15 @@ class DMatrix(object):
|
|||||||
if label is not None:
|
if label is not None:
|
||||||
if isinstance(label, np.ndarray):
|
if isinstance(label, np.ndarray):
|
||||||
self.set_label_npy2d(label)
|
self.set_label_npy2d(label)
|
||||||
|
elif getattr(label, '__array__', None) is not None:
|
||||||
|
self.set_label_npy2d(label.__array__())
|
||||||
else:
|
else:
|
||||||
self.set_label(label)
|
self.set_label(label)
|
||||||
if weight is not None:
|
if weight is not None:
|
||||||
if isinstance(weight, np.ndarray):
|
if isinstance(weight, np.ndarray):
|
||||||
self.set_weight_npy2d(weight)
|
self.set_weight_npy2d(weight)
|
||||||
|
elif getattr(weight, '__array__', None) is not None:
|
||||||
|
self.set_weight_npy2d(weight.__array__())
|
||||||
else:
|
else:
|
||||||
self.set_weight(weight)
|
self.set_weight(weight)
|
||||||
|
|
||||||
@ -428,7 +431,7 @@ class DMatrix(object):
|
|||||||
and type if memory use is a concern.
|
and type if memory use is a concern.
|
||||||
"""
|
"""
|
||||||
if len(mat.shape) != 2:
|
if len(mat.shape) != 2:
|
||||||
raise ValueError('Input numpy.ndarray must be 2 dimensional')
|
raise ValueError('Input numpy.ndarray must be 2 dimensional. Reshape your data.')
|
||||||
# flatten the array by rows and ensure it is float32.
|
# flatten the array by rows and ensure it is float32.
|
||||||
# we try to avoid data copies if possible (reshape returns a view when possible
|
# we try to avoid data copies if possible (reshape returns a view when possible
|
||||||
# and we explicitly tell np.array to try and avoid copying)
|
# and we explicitly tell np.array to try and avoid copying)
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, E0012, R0912
|
# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, E0012, R0912, C0302
|
||||||
"""Scikit-Learn Wrapper interface for XGBoost."""
|
"""Scikit-Learn Wrapper interface for XGBoost."""
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import warnings
|
import warnings
|
||||||
|
from sklearn.exceptions import NotFittedError
|
||||||
|
from sklearn.exceptions import DataConversionWarning
|
||||||
from .core import Booster, DMatrix, XGBoostError
|
from .core import Booster, DMatrix, XGBoostError
|
||||||
from .training import train
|
from .training import train
|
||||||
|
|
||||||
@ -14,6 +16,16 @@ from .compat import (SKLEARN_INSTALLED, XGBModelBase,
|
|||||||
XGBClassifierBase, XGBRegressorBase, XGBLabelEncoder)
|
XGBClassifierBase, XGBRegressorBase, XGBLabelEncoder)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_label_1d(label):
|
||||||
|
"""Produce warning if label is not 1D array"""
|
||||||
|
label = np.array(label, copy=False, dtype=np.float32)
|
||||||
|
if len(label.shape) == 2 and label.shape[1] == 1:
|
||||||
|
warnings.warn('A column-vector y was passed when a 1d array was'
|
||||||
|
' expected. Please change the shape of y to '
|
||||||
|
'(n_samples, ), for example using ravel().',
|
||||||
|
DataConversionWarning, stacklevel=2)
|
||||||
|
|
||||||
|
|
||||||
def _objective_decorator(func):
|
def _objective_decorator(func):
|
||||||
"""Decorate an objective function
|
"""Decorate an objective function
|
||||||
|
|
||||||
@ -178,7 +190,7 @@ class XGBModel(XGBModelBase):
|
|||||||
booster : a xgboost booster of underlying model
|
booster : a xgboost booster of underlying model
|
||||||
"""
|
"""
|
||||||
if self._Booster is None:
|
if self._Booster is None:
|
||||||
raise XGBoostError('need to call fit or load_model beforehand')
|
raise NotFittedError('need to call fit or load_model beforehand')
|
||||||
return self._Booster
|
return self._Booster
|
||||||
|
|
||||||
def get_params(self, deep=False):
|
def get_params(self, deep=False):
|
||||||
@ -286,6 +298,7 @@ class XGBModel(XGBModelBase):
|
|||||||
file name of stored xgb model or 'Booster' instance Xgb model to be
|
file name of stored xgb model or 'Booster' instance Xgb model to be
|
||||||
loaded before training (allows training continuation).
|
loaded before training (allows training continuation).
|
||||||
"""
|
"""
|
||||||
|
_check_label_1d(label=y)
|
||||||
if sample_weight is not None:
|
if sample_weight is not None:
|
||||||
trainDmatrix = DMatrix(X, label=y, weight=sample_weight,
|
trainDmatrix = DMatrix(X, label=y, weight=sample_weight,
|
||||||
missing=self.missing, nthread=self.n_jobs)
|
missing=self.missing, nthread=self.n_jobs)
|
||||||
@ -536,6 +549,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
file name of stored xgb model or 'Booster' instance Xgb model to be
|
file name of stored xgb model or 'Booster' instance Xgb model to be
|
||||||
loaded before training (allows training continuation).
|
loaded before training (allows training continuation).
|
||||||
"""
|
"""
|
||||||
|
_check_label_1d(label=y)
|
||||||
evals_result = {}
|
evals_result = {}
|
||||||
self.classes_ = np.unique(y)
|
self.classes_ = np.unique(y)
|
||||||
self.n_classes_ = len(self.classes_)
|
self.n_classes_ = len(self.classes_)
|
||||||
@ -912,6 +926,7 @@ class XGBRanker(XGBModel):
|
|||||||
file name of stored xgb model or 'Booster' instance Xgb model to be
|
file name of stored xgb model or 'Booster' instance Xgb model to be
|
||||||
loaded before training (allows training continuation).
|
loaded before training (allows training continuation).
|
||||||
"""
|
"""
|
||||||
|
_check_label_1d(label=y)
|
||||||
# check if group information is provided
|
# check if group information is provided
|
||||||
if group is None:
|
if group is None:
|
||||||
raise ValueError("group is required for ranking task")
|
raise ValueError("group is required for ranking task")
|
||||||
|
|||||||
@ -203,6 +203,18 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
|||||||
DeprecationWarning)
|
DeprecationWarning)
|
||||||
callbacks.append(callback.reset_learning_rate(learning_rates))
|
callbacks.append(callback.reset_learning_rate(learning_rates))
|
||||||
|
|
||||||
|
nrow = dtrain.num_row()
|
||||||
|
ncol = dtrain.num_col()
|
||||||
|
if nrow <= 0:
|
||||||
|
raise ValueError('{} row(s) (shape=({}, {})) while a minimum of 1 is required.'
|
||||||
|
.format(nrow, nrow, ncol))
|
||||||
|
if ncol <= 0:
|
||||||
|
raise ValueError('{} feature(s) (shape=({}, {})) while a minimum of 1 is required.'
|
||||||
|
.format(ncol, nrow, ncol))
|
||||||
|
label = dtrain.get_label()
|
||||||
|
if nrow != len(label):
|
||||||
|
raise ValueError('Label must have same length as the number of data rows')
|
||||||
|
|
||||||
return _train_internal(params, dtrain,
|
return _train_internal(params, dtrain,
|
||||||
num_boost_round=num_boost_round,
|
num_boost_round=num_boost_round,
|
||||||
evals=evals,
|
evals=evals,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user