Fix some stuff
This commit is contained in:
parent
136e902fb2
commit
a1a427af37
@ -11,7 +11,7 @@ import xgboost as xgb
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.cross_validation import KFold
|
from sklearn.cross_validation import KFold
|
||||||
from sklearn.grid_search import GridSearchCV
|
from sklearn.grid_search import GridSearchCV
|
||||||
from sklearn.metrics import confusion_matrix
|
from sklearn.metrics import confusion_matrix, mean_squared_error
|
||||||
from sklearn.datasets import load_iris, load_digits, load_boston
|
from sklearn.datasets import load_iris, load_digits, load_boston
|
||||||
|
|
||||||
rng = np.random.RandomState(31337)
|
rng = np.random.RandomState(31337)
|
||||||
@ -39,4 +39,26 @@ for train_index, test_index in kf:
|
|||||||
actuals = y[test_index]
|
actuals = y[test_index]
|
||||||
print(confusion_matrix(actuals, predictions))
|
print(confusion_matrix(actuals, predictions))
|
||||||
|
|
||||||
|
print("Boston Housing: regression")
|
||||||
|
boston = load_boston()
|
||||||
|
y = boston['target']
|
||||||
|
X = boston['data']
|
||||||
|
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
|
||||||
|
for train_index, test_index in kf:
|
||||||
|
xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
|
||||||
|
predictions = xgb_model.predict(X[test_index])
|
||||||
|
actuals = y[test_index]
|
||||||
|
print(mean_squared_error(actuals, predictions))
|
||||||
|
|
||||||
|
print("Parameter optimization")
|
||||||
|
y = boston['target']
|
||||||
|
X = boston['data']
|
||||||
|
xgb_model = xgb.XGBRegressor()
|
||||||
|
clf = GridSearchCV(xgb_model,
|
||||||
|
{'max_depth': [2,4,6],
|
||||||
|
'n_estimators': [50,100,200]}, verbose=1)
|
||||||
|
clf.fit(X,y)
|
||||||
|
print(clf.best_score_)
|
||||||
|
print(clf.best_params_)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -16,6 +16,7 @@ import scipy.sparse
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
|
from sklearn.base import RegressorMixin, ClassifierMixin
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
SKLEARN_INSTALLED = True
|
SKLEARN_INSTALLED = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -716,40 +717,32 @@ class XGBModel(BaseEstimator):
|
|||||||
self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_rounds)
|
self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_rounds)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
class XGBClassifier(XGBModel):
|
def predict(self, X):
|
||||||
|
testDmatrix = DMatrix(X)
|
||||||
|
return self._Booster.predict(testDmatrix)
|
||||||
|
|
||||||
|
class XGBClassifier(XGBModel, ClassifierMixin):
|
||||||
def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True):
|
def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True):
|
||||||
super().__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
|
super().__init__(max_depth, learning_rate, n_estimators, silent, objective="binary:logistic")
|
||||||
|
|
||||||
def fit(self, X, y, sample_weight=None):
|
def fit(self, X, y, sample_weight=None):
|
||||||
y_values = list(np.unique(y))
|
y_values = list(np.unique(y))
|
||||||
if len(y_values) == 2:
|
if len(y_values) > 2:
|
||||||
# Map the two classes in the y vector into {0,1}, and record the mapping so that
|
|
||||||
# the predict() method can return results in the original range
|
|
||||||
if not (-1 in y_values and 1 in y_values) or (0 in y_values and 1 in y_values) or (True in y_values and False in y_values):
|
|
||||||
raise ValueError("For a binary classifier, y must be in (0,1), or (-1,1), or (True,False).")
|
|
||||||
if -1 in y_values:
|
|
||||||
self._yspace = "svm_like"
|
|
||||||
training_labels = y.copy()
|
|
||||||
training_labels[training_labels == -1] = 0
|
|
||||||
elif False in y_values:
|
|
||||||
self._yspace = "boolean"
|
|
||||||
training_labels = np.array(y, dtype=int)
|
|
||||||
else:
|
|
||||||
self._yspace = "zero_one"
|
|
||||||
training_labels = y
|
|
||||||
xgb_options = self.get_xgb_params()
|
|
||||||
else:
|
|
||||||
# Switch to using a multiclass objective in the underlying XGB instance
|
# Switch to using a multiclass objective in the underlying XGB instance
|
||||||
self._yspace = "multiclass"
|
|
||||||
self.objective = "multi:softprob"
|
self.objective = "multi:softprob"
|
||||||
self._le = LabelEncoder().fit(y)
|
|
||||||
training_labels = self._le.transform(y)
|
|
||||||
xgb_options = self.get_xgb_params()
|
xgb_options = self.get_xgb_params()
|
||||||
xgb_options['num_class'] = len(y_values)
|
xgb_options['num_class'] = len(y_values)
|
||||||
|
else:
|
||||||
|
xgb_options = self.get_xgb_params()
|
||||||
|
|
||||||
|
self._le = LabelEncoder().fit(y)
|
||||||
|
training_labels = self._le.transform(y)
|
||||||
|
|
||||||
if sample_weight is not None:
|
if sample_weight is not None:
|
||||||
trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight)
|
trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight)
|
||||||
else:
|
else:
|
||||||
trainDmatrix = DMatrix(X, label=training_labels)
|
trainDmatrix = DMatrix(X, label=training_labels)
|
||||||
|
|
||||||
self._Booster = train(xgb_options, trainDmatrix, self.n_rounds)
|
self._Booster = train(xgb_options, trainDmatrix, self.n_rounds)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
@ -757,22 +750,12 @@ class XGBClassifier(XGBModel):
|
|||||||
def predict(self, X):
|
def predict(self, X):
|
||||||
testDmatrix = DMatrix(X)
|
testDmatrix = DMatrix(X)
|
||||||
class_probs = self._Booster.predict(testDmatrix)
|
class_probs = self._Booster.predict(testDmatrix)
|
||||||
if self._yspace == "multiclass":
|
if len(class_probs.shape) > 1:
|
||||||
column_indexes = np.argmax(class_probs, axis=1)
|
column_indexes = np.argmax(class_probs, axis=1)
|
||||||
fitted_values = self._le.inverse_transform(column_indexes)
|
|
||||||
else:
|
else:
|
||||||
if self._yspace == "svm_like":
|
column_indexes = np.repeat(0, X.shape[0])
|
||||||
base_value = -1
|
column_indexes[class_probs > 0.5] = 1
|
||||||
one_value = 1
|
return self._le.inverse_transform(column_indexes)
|
||||||
elif self._yspace == "boolean":
|
|
||||||
base_value = False
|
|
||||||
one_value = True
|
|
||||||
else:
|
|
||||||
base_value = 0
|
|
||||||
one_value = 1
|
|
||||||
fitted_values = np.repeat(base_value, X.shape[0])
|
|
||||||
fitted_values[class_probs > 0.5] = one_value
|
|
||||||
return fitted_values
|
|
||||||
|
|
||||||
def predict_proba(self, X):
|
def predict_proba(self, X):
|
||||||
testDmatrix = DMatrix(X)
|
testDmatrix = DMatrix(X)
|
||||||
@ -784,6 +767,7 @@ class XGBClassifier(XGBModel):
|
|||||||
classzero_probs = 1.0 - classone_probs
|
classzero_probs = 1.0 - classone_probs
|
||||||
return np.vstack((classzero_probs,classone_probs)).transpose()
|
return np.vstack((classzero_probs,classone_probs)).transpose()
|
||||||
|
|
||||||
|
class XGBRegressor(XGBModel, RegressorMixin):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user