diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 117f672f3..1e02e5343 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -9,7 +9,6 @@ import ctypes import os import re import sys - import numpy as np import scipy.sparse @@ -374,11 +373,15 @@ class DMatrix(object): if label is not None: if isinstance(label, np.ndarray): self.set_label_npy2d(label) + elif getattr(label, '__array__', None) is not None: + self.set_label_npy2d(label.__array__()) else: self.set_label(label) if weight is not None: if isinstance(weight, np.ndarray): self.set_weight_npy2d(weight) + elif getattr(weight, '__array__', None) is not None: + self.set_weight_npy2d(weight.__array__()) else: self.set_weight(weight) @@ -428,7 +431,7 @@ class DMatrix(object): and type if memory use is a concern. """ if len(mat.shape) != 2: - raise ValueError('Input numpy.ndarray must be 2 dimensional') + raise ValueError('Input numpy.ndarray must be 2 dimensional. Reshape your data.') # flatten the array by rows and ensure it is float32. # we try to avoid data copies if possible (reshape returns a view when possible # and we explicitly tell np.array to try and avoid copying) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 2a66959a5..c0f63f76b 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -1,10 +1,12 @@ # coding: utf-8 -# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, E0012, R0912 +# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, E0012, R0912, C0302 """Scikit-Learn Wrapper interface for XGBoost.""" from __future__ import absolute_import import numpy as np import warnings +from sklearn.exceptions import NotFittedError +from sklearn.exceptions import DataConversionWarning from .core import Booster, DMatrix, XGBoostError from .training import train @@ -14,6 +16,16 @@ from .compat import (SKLEARN_INSTALLED, XGBModelBase, XGBClassifierBase, XGBRegressorBase, XGBLabelEncoder) +def _check_label_1d(label): + """Produce warning if label is not 1D array""" + label = np.array(label, copy=False, dtype=np.float32) + if len(label.shape) == 2 and label.shape[1] == 1: + warnings.warn('A column-vector y was passed when a 1d array was' + ' expected. Please change the shape of y to ' + '(n_samples, ), for example using ravel().', + DataConversionWarning, stacklevel=2) + + def _objective_decorator(func): """Decorate an objective function @@ -178,7 +190,7 @@ class XGBModel(XGBModelBase): booster : a xgboost booster of underlying model """ if self._Booster is None: - raise XGBoostError('need to call fit or load_model beforehand') + raise NotFittedError('need to call fit or load_model beforehand') return self._Booster def get_params(self, deep=False): @@ -286,6 +298,7 @@ class XGBModel(XGBModelBase): file name of stored xgb model or 'Booster' instance Xgb model to be loaded before training (allows training continuation). """ + _check_label_1d(label=y) if sample_weight is not None: trainDmatrix = DMatrix(X, label=y, weight=sample_weight, missing=self.missing, nthread=self.n_jobs) @@ -536,6 +549,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): file name of stored xgb model or 'Booster' instance Xgb model to be loaded before training (allows training continuation). """ + _check_label_1d(label=y) evals_result = {} self.classes_ = np.unique(y) self.n_classes_ = len(self.classes_) @@ -912,6 +926,7 @@ class XGBRanker(XGBModel): file name of stored xgb model or 'Booster' instance Xgb model to be loaded before training (allows training continuation). """ + _check_label_1d(label=y) # check if group information is provided if group is None: raise ValueError("group is required for ranking task") diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py index eb568da65..419bdddd1 100644 --- a/python-package/xgboost/training.py +++ b/python-package/xgboost/training.py @@ -203,6 +203,18 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None, DeprecationWarning) callbacks.append(callback.reset_learning_rate(learning_rates)) + nrow = dtrain.num_row() + ncol = dtrain.num_col() + if nrow <= 0: + raise ValueError('{} row(s) (shape=({}, {})) while a minimum of 1 is required.' + .format(nrow, nrow, ncol)) + if ncol <= 0: + raise ValueError('{} feature(s) (shape=({}, {})) while a minimum of 1 is required.' + .format(ncol, nrow, ncol)) + label = dtrain.get_label() + if nrow != len(label): + raise ValueError('Label must have same length as the number of data rows') + return _train_internal(params, dtrain, num_boost_round=num_boost_round, evals=evals,