From 6e27d7539f57bfe9f26054221208a56516d71771 Mon Sep 17 00:00:00 2001
From: Alexis Mignon <alexis.mignon@probayes.com>
Date: Tue, 16 Feb 2016 10:59:25 +0100
Subject: [PATCH] - Added test cases for the use of custom objective functions
 - Made the indentation more consistent with pep8

---
 tests/python/test_with_sklearn.py | 164 +++++++++++++++++++++---------
 1 file changed, 116 insertions(+), 48 deletions(-)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 3e31ddb65..5cfe40891 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -1,6 +1,6 @@
 import xgboost as xgb
 import numpy as np
-from sklearn.cross_validation import KFold, train_test_split
+from sklearn.cross_validation import KFold
 from sklearn.metrics import mean_squared_error
 from sklearn.grid_search import GridSearchCV
 from sklearn.datasets import load_iris, load_digits, load_boston
@@ -8,57 +8,125 @@ from sklearn.datasets import load_iris, load_digits, load_boston
 rng = np.random.RandomState(1994)
 
 def test_binary_classification():
-	digits = load_digits(2)
-	y = digits['target']
-	X = digits['data']
-	kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-	for train_index, test_index in kf:
-	    xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
-	    preds = xgb_model.predict(X[test_index])
-	    labels = y[test_index]
-	    err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
-	assert err < 0.1
+    digits = load_digits(2)
+    y = digits['target']
+    X = digits['data']
+    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf:
+        xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+        preds = xgb_model.predict(X[test_index])
+        labels = y[test_index]
+        err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
+    assert err < 0.1
 
 def test_multiclass_classification():
-	iris = load_iris()
-	y = iris['target']
-	X = iris['data']
-	kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-	for train_index, test_index in kf:
-	    xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
-	    preds = xgb_model.predict(X[test_index])
-		# test other params in XGBClassifier().fit
-	    preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
-	    preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
-	    preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
-	    labels = y[test_index]
-	    err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
-	assert err < 0.4
+    iris = load_iris()
+    y = iris['target']
+    X = iris['data']
+    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf:
+        xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index])
+        preds = xgb_model.predict(X[test_index])
+        # test other params in XGBClassifier().fit
+        preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
+        preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
+        preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
+        labels = y[test_index]
+        err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
+    assert err < 0.4
 
 def test_boston_housing_regression():
-	boston = load_boston()
-	y = boston['target']
-	X = boston['data']
-	kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-	for train_index, test_index in kf:
-	    xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
-	    preds = xgb_model.predict(X[test_index])
-	    # test other params in XGBRegressor().fit
-	    preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
-	    preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
-	    preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
-	    labels = y[test_index]
-	assert mean_squared_error(preds, labels) < 25
+    boston = load_boston()
+    y = boston['target']
+    X = boston['data']
+    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf:
+        xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index])
+        preds = xgb_model.predict(X[test_index])
+        # test other params in XGBRegressor().fit
+        preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
+        preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
+        preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
+        labels = y[test_index]
+    assert mean_squared_error(preds, labels) < 25
 
 def test_parameter_tuning():
-	boston = load_boston()
-	y = boston['target']
-	X = boston['data']
-	xgb_model = xgb.XGBRegressor()
-	clf = GridSearchCV(xgb_model,
-	                   {'max_depth': [2,4,6],
-	                    'n_estimators': [50,100,200]}, verbose=1)
-	clf.fit(X,y)
-	assert clf.best_score_ < 0.7
-	assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
+    boston = load_boston()
+    y = boston['target']
+    X = boston['data']
+    xgb_model = xgb.XGBRegressor()
+    clf = GridSearchCV(xgb_model,
+                       {'max_depth': [2,4,6],
+                        'n_estimators': [50,100,200]}, verbose=1)
+    clf.fit(X,y)
+    assert clf.best_score_ < 0.7
+    assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
+
+def test_regression_with_custom_objective():
+    def objective_ls(y_true, y_pred):
+        grad = (y_pred - y_true)
+        hess = np.ones(len(y_true))
+        return grad, hess
+
+    boston = load_boston()
+    y = boston['target']
+    X = boston['data']
+    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf:
+        xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
+            X[train_index], y[train_index]
+        )
+        preds = xgb_model.predict(X[test_index])
+        labels = y[test_index]
+    assert mean_squared_error(preds, labels) < 25
+
+    # Test that the custom objective function is actually used
+    class XGBCustomObjectiveException(Exception):
+        pass
+
+    def dummy_objective(y_true, y_pred):
+        raise  XGBCustomObjectiveException()
+
+    xgb_model = xgb.XGBRegressor(objective=dummy_objective)
+    np.testing.assert_raises(
+        XGBCustomObjectiveException,
+        xgb_model.fit,
+        X, y
+    )
+
+def test_classification_with_custom_objective():
+    def logregobj(y_true, y_pred):
+        y_pred = 1.0 / (1.0 + np.exp(-y_pred))
+        grad = y_pred - y_true
+        hess = y_pred * (1.0-y_pred)
+        return grad, hess
+
+    digits = load_digits(2)
+    y = digits['target']
+    X = digits['data']
+    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf:
+        xgb_model = xgb.XGBClassifier(objective=logregobj).fit(
+            X[train_index],y[train_index]
+        )
+        preds = xgb_model.predict(X[test_index])
+        labels = y[test_index]
+        err = sum(1 for i in range(len(preds))
+                  if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
+    assert err < 0.1
+
+
+    # Test that the custom objective function is actually used
+    class XGBCustomObjectiveException(Exception):
+        pass
+
+    def dummy_objective(y_true, y_preds):
+        raise  XGBCustomObjectiveException()
+
+    xgb_model = xgb.XGBClassifier(objective=dummy_objective)
+    np.testing.assert_raises(
+        XGBCustomObjectiveException,
+        xgb_model.fit,
+        X, y
+    )