diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 791fcfb41..8229095a3 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -215,7 +215,8 @@ class XGBModel(XGBModelBase):
         return xgb_params
 
     def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
-            early_stopping_rounds=None, verbose=True, xgb_model=None):
+            early_stopping_rounds=None, verbose=True, xgb_model=None,
+            sample_weight_eval_set=None):
         # pylint: disable=missing-docstring,invalid-name,attribute-defined-outside-init
         """
         Fit the gradient boosting model
@@ -231,6 +232,9 @@ class XGBModel(XGBModelBase):
         eval_set : list, optional
             A list of (X, y) tuple pairs to use as a validation set for
             early-stopping
+        sample_weight_eval_set : list, optional
+            A list of the form [L_1, L_2, ..., L_n], where each L_i is a list of
+            instance weights on the i-th validation set.
         eval_metric : str, callable, optional
             If a str, should be a built-in evaluation metric to use. See
             doc/parameter.md. If callable, a custom evaluation metric. The call
@@ -263,9 +267,14 @@ class XGBModel(XGBModelBase):
             trainDmatrix = DMatrix(X, label=y, missing=self.missing, nthread=self.n_jobs)
 
         evals_result = {}
+
         if eval_set is not None:
-            evals = list(DMatrix(x[0], label=x[1], missing=self.missing,
-                                 nthread=self.n_jobs) for x in eval_set)
+            if sample_weight_eval_set is None:
+                sample_weight_eval_set = [None] * len(eval_set)
+            evals = list(
+                DMatrix(eval_set[i][0], label=eval_set[i][1], missing=self.missing,
+                        weight=sample_weight_eval_set[i], nthread=self.n_jobs)
+                for i in range(len(eval_set)))
             evals = list(zip(evals, ["validation_{}".format(i) for i in
                                      range(len(evals))]))
         else:
@@ -408,7 +417,8 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
                                             random_state, seed, missing, **kwargs)
 
     def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
-            early_stopping_rounds=None, verbose=True, xgb_model=None):
+            early_stopping_rounds=None, verbose=True, xgb_model=None,
+            sample_weight_eval_set=None):
         # pylint: disable = attribute-defined-outside-init,arguments-differ
         """
         Fit gradient boosting classifier
@@ -424,6 +434,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
         eval_set : list, optional
             A list of (X, y) pairs to use as a validation set for
             early-stopping
+        sample_weight_eval_set : list, optional
+            A list of the form [L_1, L_2, ..., L_n], where each L_i is a list of
+            instance weights on the i-th validation set.
         eval_metric : str, callable, optional
             If a str, should be a built-in evaluation metric to use. See
             doc/parameter.md. If callable, a custom evaluation metric. The call
@@ -478,11 +491,13 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
         training_labels = self._le.transform(y)
 
         if eval_set is not None:
-            # TODO: use sample_weight if given?
+            if sample_weight_eval_set is None:
+                sample_weight_eval_set = [None] * len(eval_set)
             evals = list(
-                DMatrix(x[0], label=self._le.transform(x[1]),
-                        missing=self.missing, nthread=self.n_jobs)
-                for x in eval_set
+                DMatrix(eval_set[i][0], label=self._le.transform(eval_set[i][1]),
+                        missing=self.missing, weight=sample_weight_eval_set[i],
+                        nthread=self.n_jobs)
+                for i in range(len(eval_set))
             )
             nevals = len(evals)
             eval_names = ["validation_{}".format(i) for i in range(nevals)]
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 4ce2a5be5..6fc2eaecb 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -370,3 +370,91 @@ def test_sklearn_clone():
     clf = xgb.XGBClassifier(n_jobs=2, nthread=3)
     clf.n_jobs = -1
     clone(clf)
+
+
+def test_validation_weights_xgbmodel():
+    tm._skip_if_no_sklearn()
+    from sklearn.datasets import make_hastie_10_2
+
+    # prepare training and test data
+    X, y = make_hastie_10_2(n_samples=2000, random_state=42)
+    labels, y = np.unique(y, return_inverse=True)
+    X_train, X_test = X[:1600], X[1600:]
+    y_train, y_test = y[:1600], y[1600:]
+
+    # instantiate model
+    param_dist = {'objective': 'binary:logistic', 'n_estimators': 2,
+                  'random_state': 123}
+    clf = xgb.sklearn.XGBModel(**param_dist)
+
+    # train it using instance weights only in the training set
+    weights_train = np.random.choice([1, 2], len(X_train))
+    clf.fit(X_train, y_train,
+            sample_weight=weights_train,
+            eval_set=[(X_test, y_test)],
+            eval_metric='logloss',
+            verbose=False)
+
+    # evaluate logloss metric on test set *without* using weights
+    evals_result_without_weights = clf.evals_result()
+    logloss_without_weights = evals_result_without_weights["validation_0"]["logloss"]
+
+    # now use weights for the test set
+    np.random.seed(0)
+    weights_test = np.random.choice([1, 2], len(X_test))
+    clf.fit(X_train, y_train,
+            sample_weight=weights_train,
+            eval_set=[(X_test, y_test)],
+            sample_weight_eval_set=[weights_test],
+            eval_metric='logloss',
+            verbose=False)
+    evals_result_with_weights = clf.evals_result()
+    logloss_with_weights = evals_result_with_weights["validation_0"]["logloss"]
+
+    # check that the logloss in the test set is actually different when using weights
+    # than when not using them
+    assert all((logloss_with_weights[i] != logloss_without_weights[i] for i in [0, 1]))
+
+
+def test_validation_weights_xgbclassifier():
+    tm._skip_if_no_sklearn()
+    from sklearn.datasets import make_hastie_10_2
+
+    # prepare training and test data
+    X, y = make_hastie_10_2(n_samples=2000, random_state=42)
+    labels, y = np.unique(y, return_inverse=True)
+    X_train, X_test = X[:1600], X[1600:]
+    y_train, y_test = y[:1600], y[1600:]
+
+    # instantiate model
+    param_dist = {'objective': 'binary:logistic', 'n_estimators': 2,
+                  'random_state': 123}
+    clf = xgb.sklearn.XGBClassifier(**param_dist)
+
+    # train it using instance weights only in the training set
+    weights_train = np.random.choice([1, 2], len(X_train))
+    clf.fit(X_train, y_train,
+            sample_weight=weights_train,
+            eval_set=[(X_test, y_test)],
+            eval_metric='logloss',
+            verbose=False)
+
+    # evaluate logloss metric on test set *without* using weights
+    evals_result_without_weights = clf.evals_result()
+    logloss_without_weights = evals_result_without_weights["validation_0"]["logloss"]
+
+    # now use weights for the test set
+    np.random.seed(0)
+    weights_test = np.random.choice([1, 2], len(X_test))
+    clf.fit(X_train, y_train,
+            sample_weight=weights_train,
+            eval_set=[(X_test, y_test)],
+            sample_weight_eval_set=[weights_test],
+            eval_metric='logloss',
+            verbose=False)
+    evals_result_with_weights = clf.evals_result()
+    logloss_with_weights = evals_result_with_weights["validation_0"]["logloss"]
+
+    # check that the logloss in the test set is actually different when using weights
+    # than when not using them
+    assert all((logloss_with_weights[i] != logloss_without_weights[i] for i in [0, 1]))