diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 6195ce0e7..5cbfd0684 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -37,6 +37,7 @@ OBJECTS= \
     $(PKGROOT)/src/objective/aft_obj.o \
     $(PKGROOT)/src/objective/adaptive.o \
     $(PKGROOT)/src/objective/init_estimation.o \
+    $(PKGROOT)/src/objective/quantile_obj.o \
     $(PKGROOT)/src/gbm/gbm.o \
     $(PKGROOT)/src/gbm/gbtree.o \
     $(PKGROOT)/src/gbm/gbtree_model.o \
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index 3a5587190..e8e1579f7 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -37,6 +37,7 @@ OBJECTS= \
     $(PKGROOT)/src/objective/aft_obj.o \
     $(PKGROOT)/src/objective/adaptive.o \
     $(PKGROOT)/src/objective/init_estimation.o \
+    $(PKGROOT)/src/objective/quantile_obj.o \
     $(PKGROOT)/src/gbm/gbm.o \
     $(PKGROOT)/src/gbm/gbtree.o \
     $(PKGROOT)/src/gbm/gbtree_model.o \
diff --git a/demo/guide-python/quantile_regression.py b/demo/guide-python/quantile_regression.py
new file mode 100644
index 000000000..e8c5486c8
--- /dev/null
+++ b/demo/guide-python/quantile_regression.py
@@ -0,0 +1,124 @@
+"""
+Quantile Regression
+===================
+
+The script is inspired by this awesome example in sklearn:
+https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html
+
+"""
+import argparse
+from typing import Dict
+
+import numpy as np
+from sklearn.model_selection import train_test_split
+
+import xgboost as xgb
+
+
+def f(x: np.ndarray) -> np.ndarray:
+    """The function to predict."""
+    return x * np.sin(x)
+
+
+def quantile_loss(args: argparse.Namespace) -> None:
+    """Train a quantile regression model."""
+    rng = np.random.RandomState(1994)
+    # Generate a synthetic dataset for demo, the generate process is from the sklearn
+    # example.
+    X = np.atleast_2d(rng.uniform(0, 10.0, size=1000)).T
+    expected_y = f(X).ravel()
+
+    sigma = 0.5 + X.ravel() / 10.0
+    noise = rng.lognormal(sigma=sigma) - np.exp(sigma**2.0 / 2.0)
+    y = expected_y + noise
+
+    # Train on 0.05 and 0.95 quantiles. The model is similar to multi-class and
+    # multi-target models.
+    alpha = np.array([0.05, 0.5, 0.95])
+    evals_result: Dict[str, Dict] = {}
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
+    # We will be using the `hist` tree method, quantile DMatrix can be used to preserve
+    # memory.
+    # Do not use the `exact` tree method for quantile regression, otherwise the
+    # performance might drop.
+    Xy = xgb.QuantileDMatrix(X, y)
+    # use Xy as a reference
+    Xy_test = xgb.QuantileDMatrix(X_test, y_test, ref=Xy)
+
+    booster = xgb.train(
+        {
+            # Use the quantile objective function.
+            "objective": "reg:quantileerror",
+            "tree_method": "hist",
+            "quantile_alpha": alpha,
+            # Let's try not to overfit.
+            "learning_rate": 0.01,
+            "max_depth": 3,
+            "min_child_weight": 16.0,
+        },
+        Xy,
+        num_boost_round=32,
+        early_stopping_rounds=2,
+        # The evaluation result is a weighted average across multiple quantiles.
+        evals=[(Xy, "Train"), (Xy_test, "Test")],
+        evals_result=evals_result,
+    )
+    xx = np.atleast_2d(np.linspace(0, 10, 1000)).T
+    scores = booster.inplace_predict(xx)
+    # dim 1 is the quantiles
+    assert scores.shape[0] == xx.shape[0]
+    assert scores.shape[1] == alpha.shape[0]
+
+    y_lower = scores[:, 0]  # alpha=0.05
+    y_med = scores[:, 1]  # alpha=0.5, median
+    y_upper = scores[:, 2]  # alpha=0.95
+
+    # Train a mse model for comparison
+    booster = xgb.train(
+        {
+            "objective": "reg:squarederror",
+            "tree_method": "hist",
+            # Let's try not to overfit.
+            "learning_rate": 0.01,
+            "max_depth": 3,
+            "min_child_weight": 16.0,
+        },
+        Xy,
+        num_boost_round=32,
+        early_stopping_rounds=2,
+        evals=[(Xy, "Train"), (Xy_test, "Test")],
+        evals_result=evals_result,
+    )
+    xx = np.atleast_2d(np.linspace(0, 10, 1000)).T
+    y_pred = booster.inplace_predict(xx)
+
+    if args.plot:
+        from matplotlib import pyplot as plt
+
+        fig = plt.figure(figsize=(10, 10))
+        plt.plot(xx, f(xx), "g:", linewidth=3, label=r"$f(x) = x\,\sin(x)$")
+        plt.plot(X_test, y_test, "b.", markersize=10, label="Test observations")
+        plt.plot(xx, y_med, "r-", label="Predicted median")
+        plt.plot(xx, y_pred, "m-", label="Predicted mean")
+        plt.plot(xx, y_upper, "k-")
+        plt.plot(xx, y_lower, "k-")
+        plt.fill_between(
+            xx.ravel(), y_lower, y_upper, alpha=0.4, label="Predicted 90% interval"
+        )
+        plt.xlabel("$x$")
+        plt.ylabel("$f(x)$")
+        plt.ylim(-10, 25)
+        plt.legend(loc="upper left")
+        plt.show()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--plot",
+        action="store_true",
+        help="Specify it to enable plotting the outputs.",
+    )
+    args = parser.parse_args()
+    quantile_loss(args)
diff --git a/doc/model.schema b/doc/model.schema
index d91039db3..07a871820 100644
--- a/doc/model.schema
+++ b/doc/model.schema
@@ -440,6 +440,20 @@
               },
               "type": "object"
             },
+            {
+              "properties": {
+                "name": {
+                  "const": "reg:quantileerror"
+                },
+                "quantile_loss_param": {
+                  "type": "object",
+                  "properties": {
+                    "quantle_alpha": {"type": "array"}
+                  }
+                }
+              },
+              "type": "object"
+            },
             {
               "type": "object",
               "properties": {
diff --git a/doc/parameter.rst b/doc/parameter.rst
index 6232884e8..99d6f0585 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -348,6 +348,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
   - ``reg:logistic``: logistic regression.
   - ``reg:pseudohubererror``: regression with Pseudo Huber loss, a twice differentiable alternative to absolute loss.
   - ``reg:absoluteerror``: Regression with L1 error. When tree model is used, leaf value is refreshed after tree construction. If used in distributed training, the leaf value is calculated as the mean value from all workers, which is not guaranteed to be optimal.
+  - ``reg:quantileerror``: Quantile loss, also known as ``pinball loss``. See later sections for its parameter and :ref:`sphx_glr_python_examples_quantile_regression.py` for a worked example.
   - ``binary:logistic``: logistic regression for binary classification, output probability
   - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
   - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
@@ -441,6 +442,11 @@ Parameter for using Pseudo-Huber (``reg:pseudohubererror``)
 
 * ``huber_slope`` : A parameter used for Pseudo-Huber loss to define the :math:`\delta` term. [default = 1.0]
 
+Parameter for using Quantile Loss (``reg:quantileerror``)
+=========================================================
+
+* ``quantile_alpha``: A scala or a list of targeted quantiles.
+
 ***********************
 Command Line Parameters
 ***********************
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 489e5e565..58dae68e0 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -16,6 +16,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cinttypes>  // std::int32_t
+#include <cstddef>    // std::size_t
 #include <limits>
 #include <string>
 #include <tuple>
@@ -552,6 +553,11 @@ LINALG_HD auto UnravelIndex(size_t idx, common::Span<size_t const, D> shape) {
   }
 }
 
+template <size_t D>
+LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
+  return UnravelIndex(idx, common::Span<std::size_t const, D>(shape));
+}
+
 /**
  * \brief A view over a vector, specialization of Tensor
  *
diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
index f3b986e93..a186dc396 100644
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1926,6 +1926,8 @@ class Booster:
         elif isinstance(params, str) and value is not None:
             params = [(params, value)]
         for key, val in cast(Iterable[Tuple[str, str]], params):
+            if isinstance(val, np.ndarray):
+                val = val.tolist()
             if val is not None:
                 _check_call(
                     _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))
diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
index 1b675e51f..05d620088 100644
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -1,7 +1,10 @@
 """Tests for updaters."""
 import json
+from functools import partial, update_wrapper
+from typing import Dict
 
 import numpy as np
+import xgboost.testing as tm
 
 import xgboost as xgb
 
@@ -68,3 +71,90 @@ def check_init_estimation(tree_method: str) -> None:
         n_samples=4096, n_labels=3, n_classes=5, random_state=17
     )
     run_clf(X, y)
+
+
+# pylint: disable=too-many-locals
+def check_quantile_loss(tree_method: str, weighted: bool) -> None:
+    """Test for quantile loss."""
+    from sklearn.datasets import make_regression
+    from sklearn.metrics import mean_pinball_loss
+    from xgboost.sklearn import _metric_decorator
+
+    n_samples = 4096
+    n_features = 8
+    n_estimators = 8
+    # non-zero base score can cause floating point difference with GPU predictor.
+    # multi-class has small difference than single target in the prediction kernel
+    base_score = 0.0
+    rng = np.random.RandomState(1994)
+    # pylint: disable=unbalanced-tuple-unpacking
+    X, y = make_regression(
+        n_samples=n_samples,
+        n_features=n_features,
+        random_state=rng,
+    )
+    if weighted:
+        weight = rng.random(size=n_samples)
+    else:
+        weight = None
+
+    Xy = xgb.QuantileDMatrix(X, y, weight=weight)
+
+    alpha = np.array([0.1, 0.5])
+    evals_result: Dict[str, Dict] = {}
+    booster_multi = xgb.train(
+        {
+            "objective": "reg:quantileerror",
+            "tree_method": tree_method,
+            "quantile_alpha": alpha,
+            "base_score": base_score,
+        },
+        Xy,
+        num_boost_round=n_estimators,
+        evals=[(Xy, "Train")],
+        evals_result=evals_result,
+    )
+    predt_multi = booster_multi.predict(Xy, strict_shape=True)
+
+    assert tm.non_increasing(evals_result["Train"]["quantile"])
+    assert evals_result["Train"]["quantile"][-1] < 20.0
+    # check that there's a way to use custom metric and compare the results.
+    metrics = [
+        _metric_decorator(
+            update_wrapper(
+                partial(mean_pinball_loss, sample_weight=weight, alpha=alpha[i]),
+                mean_pinball_loss,
+            )
+        )
+        for i in range(alpha.size)
+    ]
+
+    predts = np.empty(predt_multi.shape)
+    for i in range(alpha.shape[0]):
+        a = alpha[i]
+
+        booster_i = xgb.train(
+            {
+                "objective": "reg:quantileerror",
+                "tree_method": tree_method,
+                "quantile_alpha": a,
+                "base_score": base_score,
+            },
+            Xy,
+            num_boost_round=n_estimators,
+            evals=[(Xy, "Train")],
+            custom_metric=metrics[i],
+            evals_result=evals_result,
+        )
+        assert tm.non_increasing(evals_result["Train"]["quantile"])
+        assert evals_result["Train"]["quantile"][-1] < 30.0
+        np.testing.assert_allclose(
+            np.array(evals_result["Train"]["quantile"]),
+            np.array(evals_result["Train"]["mean_pinball_loss"]),
+            atol=1e-6,
+            rtol=1e-6,
+        )
+        predts[:, i] = booster_i.predict(Xy)
+
+    for i in range(alpha.shape[0]):
+        np.testing.assert_allclose(predts[:, i], predt_multi[:, i])
diff --git a/src/common/stats.cc b/src/common/stats.cc
index 1770f521e..80fc2c50d 100644
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -35,11 +35,11 @@ void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
     auto iter = linalg::cbegin(ti_v);
     float q{0};
     if (opt_weights.Empty()) {
-      q = common::Quantile(0.5, iter, iter + ti_v.Size());
+      q = common::Quantile(ctx, 0.5, iter, iter + ti_v.Size());
     } else {
       CHECK_NE(t_v.Shape(1), 0);
       auto w_it = common::MakeIndexTransformIter([&](std::size_t i) { return opt_weights[i]; });
-      q = common::WeightedQuantile(0.5, iter, iter + ti_v.Size(), w_it);
+      q = common::WeightedQuantile(ctx, 0.5, iter, iter + ti_v.Size(), w_it);
     }
     h_out(i) = q;
   }
diff --git a/src/common/stats.h b/src/common/stats.h
index 5f7892cb5..639da32ce 100644
--- a/src/common/stats.h
+++ b/src/common/stats.h
@@ -4,43 +4,49 @@
 #ifndef XGBOOST_COMMON_STATS_H_
 #define XGBOOST_COMMON_STATS_H_
 #include <algorithm>
-#include <iterator>
+#include <iterator>  // for distance
 #include <limits>
 #include <vector>
 
+#include "algorithm.h"           // for StableSort
 #include "common.h"              // AssertGPUSupport, OptionalWeights
 #include "optional_weight.h"     // OptionalWeights
 #include "transform_iterator.h"  // MakeIndexTransformIter
 #include "xgboost/context.h"     // Context
-#include "xgboost/linalg.h"
-#include "xgboost/logging.h"  // CHECK_GE
+#include "xgboost/linalg.h"      // TensorView,VectorView
+#include "xgboost/logging.h"     // CHECK_GE
 
 namespace xgboost {
 namespace common {
 
 /**
- * \brief Percentile with masked array using linear interpolation.
+ * @brief Quantile using linear interpolation.
  *
  *   https://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
  *
- * \param alpha Percentile, must be in range [0, 1].
+ * \param alpha Quantile, must be in range [0, 1].
  * \param begin Iterator begin for input array.
  * \param end   Iterator end for input array.
  *
  * \return The result of interpolation.
  */
 template <typename Iter>
-float Quantile(double alpha, Iter const& begin, Iter const& end) {
+float Quantile(Context const* ctx, double alpha, Iter const& begin, Iter const& end) {
   CHECK(alpha >= 0 && alpha <= 1);
   auto n = static_cast<double>(std::distance(begin, end));
   if (n == 0) {
     return std::numeric_limits<float>::quiet_NaN();
   }
 
-  std::vector<size_t> sorted_idx(n);
+  std::vector<std::size_t> sorted_idx(n);
   std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
-  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
-                   [&](size_t l, size_t r) { return *(begin + l) < *(begin + r); });
+  if (omp_in_parallel()) {
+    std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
+                     [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
+  } else {
+    StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),
+               [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
+  }
 
   auto val = [&](size_t i) { return *(begin + sorted_idx[i]); };
   static_assert(std::is_same<decltype(val(0)), float>::value, "");
@@ -51,7 +57,7 @@ float Quantile(double alpha, Iter const& begin, Iter const& end) {
   if (alpha >= (n / (n + 1))) {
     return val(sorted_idx.size() - 1);
   }
-  assert(n != 0 && "The number of rows in a leaf can not be zero.");
+
   double x = alpha * static_cast<double>((n + 1));
   double k = std::floor(x) - 1;
   CHECK_GE(k, 0);
@@ -66,30 +72,35 @@ float Quantile(double alpha, Iter const& begin, Iter const& end) {
  * \brief Calculate the weighted quantile with step function. Unlike the unweighted
  *        version, no interpolation is used.
  *
- *   See https://aakinshin.net/posts/weighted-quantiles/ for some discussion on computing
+ *   See https://aakinshin.net/posts/weighted-quantiles/ for some discussions on computing
  *   weighted quantile with interpolation.
  */
 template <typename Iter, typename WeightIter>
-float WeightedQuantile(double alpha, Iter begin, Iter end, WeightIter weights) {
+float WeightedQuantile(Context const* ctx, double alpha, Iter begin, Iter end, WeightIter w_begin) {
   auto n = static_cast<double>(std::distance(begin, end));
   if (n == 0) {
     return std::numeric_limits<float>::quiet_NaN();
   }
   std::vector<size_t> sorted_idx(n);
   std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
-  std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
-                   [&](size_t l, size_t r) { return *(begin + l) < *(begin + r); });
+  if (omp_in_parallel()) {
+    std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
+                     [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
+  } else {
+    StableSort(ctx, sorted_idx.begin(), sorted_idx.end(),
+               [&](std::size_t l, std::size_t r) { return *(begin + l) < *(begin + r); });
+  }
 
   auto val = [&](size_t i) { return *(begin + sorted_idx[i]); };
 
   std::vector<float> weight_cdf(n);  // S_n
   // weighted cdf is sorted during construction
-  weight_cdf[0] = *(weights + sorted_idx[0]);
+  weight_cdf[0] = *(w_begin + sorted_idx[0]);
   for (size_t i = 1; i < n; ++i) {
-    weight_cdf[i] = weight_cdf[i - 1] + *(weights + sorted_idx[i]);
+    weight_cdf[i] = weight_cdf[i - 1] + w_begin[sorted_idx[i]];
   }
   float thresh = weight_cdf.back() * alpha;
-  size_t idx =
+  std::size_t idx =
       std::lower_bound(weight_cdf.cbegin(), weight_cdf.cend(), thresh) - weight_cdf.cbegin();
   idx = std::min(idx, static_cast<size_t>(n - 1));
   return val(idx);
diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc
index 9b341f4a7..fb22f0049 100644
--- a/src/objective/adaptive.cc
+++ b/src/objective/adaptive.cc
@@ -3,17 +3,25 @@
  */
 #include "adaptive.h"
 
-#include <limits>
-#include <vector>
+#include <algorithm>                       // std::transform,std::find_if,std::copy,std::unique
+#include <cmath>                           // std::isnan
+#include <cstddef>                         // std::size_t
+#include <iterator>                        // std::distance
+#include <vector>                          // std::vector
 
 #include "../common/algorithm.h"           // ArgSort
+#include "../common/common.h"              // AssertGPUSupport
 #include "../common/numeric.h"             // RunLengthEncode
 #include "../common/stats.h"               // Quantile,WeightedQuantile
 #include "../common/threading_utils.h"     // ParallelFor
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
+#include "xgboost/base.h"                  // bst_node_t
 #include "xgboost/context.h"               // Context
-#include "xgboost/linalg.h"
-#include "xgboost/tree_model.h"
+#include "xgboost/data.h"                  // MetaInfo
+#include "xgboost/host_device_vector.h"    // HostDeviceVector
+#include "xgboost/linalg.h"                // MakeTensorView
+#include "xgboost/span.h"                  // Span
+#include "xgboost/tree_model.h"            // RegTree
 
 namespace xgboost {
 namespace obj {
@@ -100,8 +108,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
     CHECK_LT(k + 1, h_node_ptr.size());
     size_t n = h_node_ptr[k + 1] - h_node_ptr[k];
     auto h_row_set = common::Span<size_t const>{ridx}.subspan(h_node_ptr[k], n);
-    CHECK_LE(group_idx, info.labels.Shape(1));
-    auto h_labels = info.labels.HostView().Slice(linalg::All(), group_idx);
+
+    auto h_labels = info.labels.HostView().Slice(linalg::All(), IdxY(info, group_idx));
     auto h_weights = linalg::MakeVec(&info.weights_);
 
     auto iter = common::MakeIndexTransformIter([&](size_t i) -> float {
@@ -115,9 +123,9 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
 
     float q{0};
     if (info.weights_.Empty()) {
-      q = common::Quantile(alpha, iter, iter + h_row_set.size());
+      q = common::Quantile(ctx, alpha, iter, iter + h_row_set.size());
     } else {
-      q = common::WeightedQuantile(alpha, iter, iter + h_row_set.size(), w_it);
+      q = common::WeightedQuantile(ctx, alpha, iter, iter + h_row_set.size(), w_it);
     }
     if (std::isnan(q)) {
       CHECK(h_row_set.empty());
@@ -127,6 +135,13 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
 
   UpdateLeafValues(&quantiles, nidx, p_tree);
 }
+
+#if !defined(XGBOOST_USE_CUDA)
+void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
+                          MetaInfo const&, HostDeviceVector<float> const&, float, RegTree*) {
+  common::AssertGPUSupport();
+}
+#endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace detail
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/adaptive.cu b/src/objective/adaptive.cu
index 5e2f490b7..71731e9c4 100644
--- a/src/objective/adaptive.cu
+++ b/src/objective/adaptive.cu
@@ -20,20 +20,19 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
                           HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) {
   // copy position to buffer
   dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  auto cuctx = ctx->CUDACtx();
   size_t n_samples = position.size();
-  dh::XGBDeviceAllocator<char> alloc;
   dh::device_vector<bst_node_t> sorted_position(position.size());
   dh::safe_cuda(cudaMemcpyAsync(sorted_position.data().get(), position.data(),
-                                position.size_bytes(), cudaMemcpyDeviceToDevice));
+                                position.size_bytes(), cudaMemcpyDeviceToDevice, cuctx->Stream()));
 
   p_ridx->resize(position.size());
   dh::Iota(dh::ToSpan(*p_ridx));
   // sort row index according to node index
-  thrust::stable_sort_by_key(thrust::cuda::par(alloc), sorted_position.begin(),
+  thrust::stable_sort_by_key(cuctx->TP(), sorted_position.begin(),
                              sorted_position.begin() + n_samples, p_ridx->begin());
-  dh::XGBCachingDeviceAllocator<char> caching;
   size_t beg_pos =
-      thrust::find_if(thrust::cuda::par(caching), sorted_position.cbegin(), sorted_position.cend(),
+      thrust::find_if(cuctx->CTP(), sorted_position.cbegin(), sorted_position.cend(),
                       [] XGBOOST_DEVICE(bst_node_t nidx) { return nidx >= 0; }) -
       sorted_position.cbegin();
   if (beg_pos == sorted_position.size()) {
@@ -72,7 +71,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
   size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());
 
   dh::CUDAEvent e;
-  e.Record(dh::DefaultStream());
+  e.Record(cuctx->Stream());
   copy_stream.View().Wait(e);
   // flag for whether there's ignored position
   bst_node_t* h_first_unique =
@@ -108,7 +107,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
       d_node_ptr[0] = beg_pos;
     }
   });
-  thrust::inclusive_scan(thrust::cuda::par(caching), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr),
+  thrust::inclusive_scan(cuctx->CTP(), dh::tbegin(d_node_ptr), dh::tend(d_node_ptr),
                          dh::tbegin(d_node_ptr));
   copy_stream.View().Sync();
   CHECK_GT(*h_num_runs, 0);
@@ -162,7 +161,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
                                         {info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
   CHECK_LT(group_idx, d_predt.Shape(1));
   auto t_predt = d_predt.Slice(linalg::All(), group_idx);
-  auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), group_idx);
+  auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));
 
   auto d_row_index = dh::ToSpan(ridx);
   auto seg_beg = nptr.DevicePointer();
diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h
index 10486c85c..ca81cac2a 100644
--- a/src/objective/adaptive.h
+++ b/src/objective/adaptive.h
@@ -6,13 +6,15 @@
 #include <algorithm>
 #include <cstdint>  // std::int32_t
 #include <limits>
-#include <vector>
+#include <vector>  // std::vector
 
 #include "../collective/communicator-inl.h"
 #include "../common/common.h"
-#include "xgboost/context.h"
-#include "xgboost/host_device_vector.h"
-#include "xgboost/tree_model.h"
+#include "xgboost/base.h"                // bst_node_t
+#include "xgboost/context.h"             // Context
+#include "xgboost/data.h"                // MetaInfo
+#include "xgboost/host_device_vector.h"  // HostDeviceVector
+#include "xgboost/tree_model.h"          // RegTree
 
 namespace xgboost {
 namespace obj {
@@ -73,6 +75,15 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
   }
 }
 
+inline std::size_t IdxY(MetaInfo const& info, bst_group_t group_idx) {
+  std::size_t y_idx{0};
+  if (info.labels.Shape(1) > 1) {
+    y_idx = group_idx;
+  }
+  CHECK_LE(y_idx, info.labels.Shape(1));
+  return y_idx;
+}
+
 void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
                           std::int32_t group_idx, MetaInfo const& info,
                           HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
@@ -81,5 +92,18 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
                         std::int32_t group_idx, MetaInfo const& info,
                         HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
 }  // namespace detail
+
+inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> const& position,
+                           std::int32_t group_idx, MetaInfo const& info,
+                           HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
+  if (ctx->IsCPU()) {
+    detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, predt, alpha,
+                               p_tree);
+  } else {
+    position.SetDevice(ctx->gpu_id);
+    detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, predt, alpha,
+                                 p_tree);
+  }
+}
 }  // namespace obj
 }  // namespace xgboost
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index 9512233dc..d3b01d80b 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -44,11 +44,13 @@ namespace obj {
 // List of files that will be force linked in static links.
 #ifdef XGBOOST_USE_CUDA
 DMLC_REGISTRY_LINK_TAG(regression_obj_gpu);
+DMLC_REGISTRY_LINK_TAG(quantile_obj_gpu);
 DMLC_REGISTRY_LINK_TAG(hinge_obj_gpu);
 DMLC_REGISTRY_LINK_TAG(multiclass_obj_gpu);
 DMLC_REGISTRY_LINK_TAG(rank_obj_gpu);
 #else
 DMLC_REGISTRY_LINK_TAG(regression_obj);
+DMLC_REGISTRY_LINK_TAG(quantile_obj);
 DMLC_REGISTRY_LINK_TAG(hinge_obj);
 DMLC_REGISTRY_LINK_TAG(multiclass_obj);
 DMLC_REGISTRY_LINK_TAG(rank_obj);
diff --git a/src/objective/quantile_obj.cc b/src/objective/quantile_obj.cc
new file mode 100644
index 000000000..89e2d6010
--- /dev/null
+++ b/src/objective/quantile_obj.cc
@@ -0,0 +1,18 @@
+/**
+ * Copyright 2023 by XGBoost Contributors
+ */
+
+// Dummy file to enable the CUDA conditional compile trick.
+
+#include <dmlc/registry.h>
+namespace xgboost {
+namespace obj {
+
+DMLC_REGISTRY_FILE_TAG(quantile_obj);
+
+}  // namespace obj
+}  // namespace xgboost
+
+#ifndef XGBOOST_USE_CUDA
+#include "quantile_obj.cu"
+#endif  // !defined(XBGOOST_USE_CUDA)
diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu
new file mode 100644
index 000000000..776d9e08e
--- /dev/null
+++ b/src/objective/quantile_obj.cu
@@ -0,0 +1,226 @@
+/**
+ * Copyright 2023 by XGBoost contributors
+ */
+#include <cstddef>                          // std::size_t
+#include <cstdint>                          // std::int32_t
+#include <vector>                           // std::vector
+
+#include "../common/linalg_op.h"            // ElementWiseKernel,cbegin,cend
+#include "../common/quantile_loss_utils.h"  // QuantileLossParam
+#include "../common/stats.h"                // Quantile,WeightedQuantile
+#include "adaptive.h"                       // UpdateTreeLeaf
+#include "dmlc/parameter.h"                 // DMLC_DECLARE_PARAMETER
+#include "init_estimation.h"                // CheckInitInputs
+#include "xgboost/base.h"                   // GradientPair,XGBOOST_DEVICE,bst_target_t
+#include "xgboost/data.h"                   // MetaInfo
+#include "xgboost/host_device_vector.h"     // HostDeviceVector
+#include "xgboost/json.h"                   // Json,String,ToJson,FromJson
+#include "xgboost/linalg.h"                 // Tensor,MakeTensorView,MakeVec
+#include "xgboost/objective.h"              // ObjFunction
+#include "xgboost/parameter.h"              // XGBoostParameter
+
+#if defined(XGBOOST_USE_CUDA)
+
+#include "../common/linalg_op.cuh"  // ElementWiseKernel
+#include "../common/stats.cuh"      // SegmentedQuantile
+
+#endif                              // defined(XGBOOST_USE_CUDA)
+
+namespace xgboost {
+namespace obj {
+class QuantileRegression : public ObjFunction {
+  common::QuantileLossParam param_;
+  HostDeviceVector<float> alpha_;
+
+  bst_target_t Targets(MetaInfo const& info) const override {
+    auto const& alpha = param_.quantile_alpha.Get();
+    CHECK_EQ(alpha.size(), alpha_.Size()) << "The objective is not yet configured.";
+    CHECK_EQ(info.labels.Shape(1), 1) << "Multi-target is not yet supported by the quantile loss.";
+    CHECK(!alpha.empty());
+    // We have some placeholders for multi-target in the quantile loss. But it's not
+    // supported as the gbtree doesn't know how to slice the gradient and there's no 3-dim
+    // model shape in general.
+    auto n_y = std::max(static_cast<std::size_t>(1), info.labels.Shape(1));
+    return alpha_.Size() * n_y;
+  }
+
+ public:
+  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info, std::int32_t iter,
+                   HostDeviceVector<GradientPair>* out_gpair) override {
+    if (iter == 0) {
+      CheckInitInputs(info);
+    }
+    CHECK_EQ(param_.quantile_alpha.Get().size(), alpha_.Size());
+
+    using SizeT = decltype(info.num_row_);
+    SizeT n_targets = this->Targets(info);
+    SizeT n_alphas = alpha_.Size();
+    CHECK_NE(n_alphas, 0);
+    CHECK_GE(n_targets, n_alphas);
+    CHECK_EQ(preds.Size(), info.num_row_ * n_targets);
+
+    auto labels = info.labels.View(ctx_->gpu_id);
+
+    out_gpair->SetDevice(ctx_->gpu_id);
+    out_gpair->Resize(n_targets * info.num_row_);
+    auto gpair =
+        linalg::MakeTensorView(ctx_->IsCPU() ? out_gpair->HostSpan() : out_gpair->DeviceSpan(),
+                               {info.num_row_, n_alphas, n_targets / n_alphas}, ctx_->gpu_id);
+
+    info.weights_.SetDevice(ctx_->gpu_id);
+    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
+                                                 : info.weights_.ConstDeviceSpan()};
+
+    preds.SetDevice(ctx_->gpu_id);
+    auto predt = linalg::MakeVec(&preds);
+    auto n_samples = info.num_row_;
+
+    alpha_.SetDevice(ctx_->gpu_id);
+    auto alpha = ctx_->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();
+
+    linalg::ElementWiseKernel(
+        ctx_, gpair, [=] XGBOOST_DEVICE(std::size_t i, GradientPair const&) mutable {
+          auto idx = linalg::UnravelIndex(
+              i, {n_samples, static_cast<SizeT>(alpha.size()), n_targets / alpha.size()});
+          // std::tie is not available for cuda kernel.
+          std::size_t sample_id = std::get<0>(idx);
+          std::size_t quantile_id = std::get<1>(idx);
+          std::size_t target_id = std::get<2>(idx);
+
+          auto d = predt(i) - labels(sample_id, target_id);
+          auto h = weight[sample_id];
+          if (d >= 0) {
+            auto g = (1.0f - alpha[quantile_id]) * weight[sample_id];
+            gpair(sample_id, quantile_id, target_id) = GradientPair{g, h};
+          } else {
+            auto g = (-alpha[quantile_id] * weight[sample_id]);
+            gpair(sample_id, quantile_id, target_id) = GradientPair{g, h};
+          }
+        });
+  }
+
+  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {
+    CHECK(!alpha_.Empty());
+
+    auto n_targets = this->Targets(info);
+    base_score->SetDevice(ctx_->gpu_id);
+    base_score->Reshape(n_targets);
+
+    double sw{0};
+    if (ctx_->IsCPU()) {
+      auto quantiles = base_score->HostView();
+      auto h_weights = info.weights_.ConstHostVector();
+      if (info.weights_.Empty()) {
+        sw = info.num_row_;
+      } else {
+        sw = std::accumulate(std::cbegin(h_weights), std::cend(h_weights), 0.0);
+      }
+      for (bst_target_t t{0}; t < n_targets; ++t) {
+        auto alpha = param_.quantile_alpha[t];
+        auto h_labels = info.labels.HostView();
+        if (h_weights.empty()) {
+          quantiles(t) =
+              common::Quantile(ctx_, alpha, linalg::cbegin(h_labels), linalg::cend(h_labels));
+        } else {
+          CHECK_EQ(h_weights.size(), h_labels.Size());
+          quantiles(t) = common::WeightedQuantile(ctx_, alpha, linalg::cbegin(h_labels),
+                                                  linalg::cend(h_labels), std::cbegin(h_weights));
+        }
+      }
+    } else {
+#if defined(XGBOOST_USE_CUDA)
+      alpha_.SetDevice(ctx_->gpu_id);
+      auto d_alpha = alpha_.ConstDeviceSpan();
+      auto d_labels = info.labels.View(ctx_->gpu_id);
+      auto seg_it = dh::MakeTransformIterator<std::size_t>(
+          thrust::make_counting_iterator(0ul),
+          [=] XGBOOST_DEVICE(std::size_t i) { return i * d_labels.Shape(0); });
+      CHECK_EQ(d_labels.Shape(1), 1);
+      auto val_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
+                                                     [=] XGBOOST_DEVICE(std::size_t i) {
+                                                       auto sample_idx = i % d_labels.Shape(0);
+                                                       return d_labels(sample_idx, 0);
+                                                     });
+      auto n = d_labels.Size() * d_alpha.size();
+      CHECK_EQ(base_score->Size(), d_alpha.size());
+      if (info.weights_.Empty()) {
+        common::SegmentedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, val_it,
+                                  val_it + n, base_score->Data());
+        sw = info.num_row_;
+      } else {
+        info.weights_.SetDevice(ctx_->gpu_id);
+        auto d_weights = info.weights_.ConstDeviceSpan();
+        auto weight_it = dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
+                                                          [=] XGBOOST_DEVICE(std::size_t i) {
+                                                            auto sample_idx = i % d_labels.Shape(0);
+                                                            return d_weights[sample_idx];
+                                                          });
+        common::SegmentedWeightedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1,
+                                          val_it, val_it + n, weight_it, weight_it + n,
+                                          base_score->Data());
+        sw = dh::Reduce(ctx_->CUDACtx()->CTP(), dh::tcbegin(d_weights), dh::tcend(d_weights), 0.0,
+                        thrust::plus<double>{});
+      }
+#else
+      common::AssertGPUSupport();
+#endif  // defined(XGBOOST_USE_CUDA)
+    }
+
+    // For multiple quantiles, we should extend the base score to a vector instead of
+    // computing the average. For now, this is a workaround.
+    linalg::Vector<float> temp;
+    common::Mean(ctx_, *base_score, &temp);
+    double meanq = temp(0) * sw;
+
+    collective::Allreduce<collective::Operation::kSum>(&meanq, 1);
+    collective::Allreduce<collective::Operation::kSum>(&sw, 1);
+    meanq /= (sw + kRtEps);
+    base_score->Reshape(1);
+    base_score->Data()->Fill(meanq);
+  }
+
+  void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
+                      HostDeviceVector<float> const& prediction, std::int32_t group_idx,
+                      RegTree* p_tree) const override {
+    auto alpha = param_.quantile_alpha[group_idx];
+    ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, prediction, alpha, p_tree);
+  }
+
+  void Configure(Args const& args) override {
+    param_.UpdateAllowUnknown(args);
+    param_.Validate();
+    this->alpha_.HostVector() = param_.quantile_alpha.Get();
+  }
+  ObjInfo Task() const override { return {ObjInfo::kRegression, true, true}; }
+  static char const* Name() { return "reg:quantileerror"; }
+
+  void SaveConfig(Json* p_out) const override {
+    auto& out = *p_out;
+    out["name"] = String(Name());
+    out["quantile_loss_param"] = ToJson(param_);
+  }
+  void LoadConfig(Json const& in) override {
+    CHECK_EQ(get<String const>(in["name"]), Name());
+    FromJson(in["quantile_loss_param"], &param_);
+    alpha_.HostVector() = param_.quantile_alpha.Get();
+  }
+
+  const char* DefaultEvalMetric() const override { return "quantile"; }
+  Json DefaultMetricConfig() const override {
+    CHECK(param_.GetInitialised());
+    Json config{Object{}};
+    config["name"] = String{this->DefaultEvalMetric()};
+    config["quantile_loss_param"] = ToJson(param_);
+    return config;
+  }
+};
+
+XGBOOST_REGISTER_OBJECTIVE(QuantileRegression, QuantileRegression::Name())
+    .describe("Regression with quantile loss.")
+    .set_body([]() { return new QuantileRegression(); });
+
+#if defined(XGBOOST_USE_CUDA)
+DMLC_REGISTRY_FILE_TAG(quantile_obj_gpu);
+#endif  // defined(XGBOOST_USE_CUDA)
+}  // namespace obj
+}  // namespace xgboost
diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index 1fd1621af..1ef7106cf 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -1,15 +1,16 @@
-/*!
- * Copyright 2017-2022 XGBoost contributors
+/**
+ * Copyright 2017-2023 by XGBoost contributors
  */
 #ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
 #define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
 
 #include <dmlc/omp.h>
-#include <xgboost/logging.h>
 
 #include <cmath>
 
 #include "../common/math.h"
+#include "xgboost/data.h"  // MetaInfo
+#include "xgboost/logging.h"
 #include "xgboost/task.h"  // ObjInfo
 
 namespace xgboost {
@@ -105,7 +106,6 @@ struct LogisticRaw : public LogisticRegression {
 
   static ObjInfo Info() { return ObjInfo::kRegression; }
 };
-
 }  // namespace obj
 }  // namespace xgboost
 
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 332646095..2edaff0b0 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -744,18 +744,7 @@ class MeanAbsoluteError : public ObjFunction {
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
                       HostDeviceVector<float> const& prediction, std::int32_t group_idx,
                       RegTree* p_tree) const override {
-    if (ctx_->IsCPU()) {
-      auto const& h_position = position.ConstHostVector();
-      detail::UpdateTreeLeafHost(ctx_, h_position, group_idx, info, prediction, 0.5, p_tree);
-    } else {
-#if defined(XGBOOST_USE_CUDA)
-      position.SetDevice(ctx_->gpu_id);
-      auto d_position = position.ConstDeviceSpan();
-      detail::UpdateTreeLeafDevice(ctx_, d_position, group_idx, info, prediction, 0.5, p_tree);
-#else
-      common::AssertGPUSupport();
-#endif  //  defined(XGBOOST_USE_CUDA)
-    }
+    ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, prediction, 0.5, p_tree);
   }
 
   const char* DefaultEvalMetric() const override { return "mae"; }
diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
index 34eb92fa6..f85fae823 100644
--- a/tests/ci_build/lint_python.py
+++ b/tests/ci_build/lint_python.py
@@ -151,6 +151,7 @@ def main(args: argparse.Namespace) -> None:
                 "demo/guide-python/sklearn_parallel.py",
                 "demo/guide-python/spark_estimator_examples.py",
                 "demo/guide-python/individual_trees.py",
+                "demo/guide-python/quantile_regression.py",
                 # CI
                 "tests/ci_build/lint_python.py",
                 "tests/ci_build/test_r_package.py",
@@ -193,6 +194,7 @@ def main(args: argparse.Namespace) -> None:
                 "demo/guide-python/cat_in_the_dat.py",
                 "demo/guide-python/feature_weights.py",
                 "demo/guide-python/individual_trees.py",
+                "demo/guide-python/quantile_regression.py",
                 # tests
                 "tests/python/test_dt.py",
                 "tests/python/test_data_iterator.py",
diff --git a/tests/cpp/common/test_stats.cc b/tests/cpp/common/test_stats.cc
index 3f3786809..abdf00425 100644
--- a/tests/cpp/common/test_stats.cc
+++ b/tests/cpp/common/test_stats.cc
@@ -11,19 +11,20 @@
 namespace xgboost {
 namespace common {
 TEST(Stats, Quantile) {
+  Context ctx;
   {
     linalg::Tensor<float, 1> arr({20.f, 0.f, 15.f, 50.f, 40.f, 0.f, 35.f}, {7}, Context::kCpuId);
     std::vector<size_t> index{0, 2, 3, 4, 6};
     auto h_arr = arr.HostView();
     auto beg = MakeIndexTransformIter([&](size_t i) { return h_arr(index[i]); });
     auto end = beg + index.size();
-    auto q = Quantile(0.40f, beg, end);
+    auto q = Quantile(&ctx, 0.40f, beg, end);
     ASSERT_EQ(q, 26.0);
 
-    q = Quantile(0.20f, beg, end);
+    q = Quantile(&ctx, 0.20f, beg, end);
     ASSERT_EQ(q, 16.0);
 
-    q = Quantile(0.10f, beg, end);
+    q = Quantile(&ctx, 0.10f, beg, end);
     ASSERT_EQ(q, 15.0);
   }
 
@@ -31,12 +32,13 @@ TEST(Stats, Quantile) {
     std::vector<float> vec{1., 2., 3., 4., 5.};
     auto beg = MakeIndexTransformIter([&](size_t i) { return vec[i]; });
     auto end = beg + vec.size();
-    auto q = Quantile(0.5f, beg, end);
+    auto q = Quantile(&ctx, 0.5f, beg, end);
     ASSERT_EQ(q, 3.);
   }
 }
 
 TEST(Stats, WeightedQuantile) {
+  Context ctx;
   linalg::Tensor<float, 1> arr({1.f, 2.f, 3.f, 4.f, 5.f}, {5}, Context::kCpuId);
   linalg::Tensor<float, 1> weight({1.f, 1.f, 1.f, 1.f, 1.f}, {5}, Context::kCpuId);
 
@@ -47,13 +49,13 @@ TEST(Stats, WeightedQuantile) {
   auto end = beg + arr.Size();
   auto w = MakeIndexTransformIter([&](size_t i) { return h_weight(i); });
 
-  auto q = WeightedQuantile(0.50f, beg, end, w);
+  auto q = WeightedQuantile(&ctx, 0.50f, beg, end, w);
   ASSERT_EQ(q, 3);
 
-  q = WeightedQuantile(0.0, beg, end, w);
+  q = WeightedQuantile(&ctx, 0.0, beg, end, w);
   ASSERT_EQ(q, 1);
 
-  q = WeightedQuantile(1.0, beg, end, w);
+  q = WeightedQuantile(&ctx, 1.0, beg, end, w);
   ASSERT_EQ(q, 5);
 }
 
diff --git a/tests/cpp/objective/test_objective.cc b/tests/cpp/objective/test_objective.cc
index 2f13b8bb3..718f8f659 100644
--- a/tests/cpp/objective/test_objective.cc
+++ b/tests/cpp/objective/test_objective.cc
@@ -1,4 +1,6 @@
-// Copyright by Contributors
+/**
+ * Copyright 2016-2023 by XGBoost contributors
+ */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
 #include <xgboost/objective.h>
@@ -25,11 +27,14 @@ TEST(Objective, PredTransform) {
   tparam.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
   size_t n = 100;
 
-  for (const auto &entry :
-       ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
-    std::unique_ptr<xgboost::ObjFunction> obj{
-        xgboost::ObjFunction::Create(entry->name, &tparam)};
-    obj->Configure(Args{{"num_class", "2"}});
+  for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
+    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create(entry->name, &tparam)};
+    if (entry->name.find("multi") != std::string::npos) {
+      obj->Configure(Args{{"num_class", "2"}});
+    }
+    if (entry->name.find("quantile") != std::string::npos) {
+      obj->Configure(Args{{"quantile_alpha", "0.5"}});
+    }
     HostDeviceVector<float> predts;
     predts.Resize(n, 3.14f);  // prediction is performed on host.
     ASSERT_FALSE(predts.DeviceCanRead());
diff --git a/tests/cpp/objective/test_quantile_obj.cc b/tests/cpp/objective/test_quantile_obj.cc
new file mode 100644
index 000000000..76233975a
--- /dev/null
+++ b/tests/cpp/objective/test_quantile_obj.cc
@@ -0,0 +1,74 @@
+/**
+ * Copyright 2023 by XGBoost contributors
+ */
+#include <gtest/gtest.h>
+#include <xgboost/base.h>       // Args
+#include <xgboost/context.h>    // Context
+#include <xgboost/objective.h>  // ObjFunction
+#include <xgboost/span.h>       // Span
+
+#include <memory>               // std::unique_ptr
+#include <vector>               // std::vector
+
+#include "../helpers.h"         // CheckConfigReload,CreateEmptyGenericParam,DeclareUnifiedTest
+
+namespace xgboost {
+TEST(Objective, DeclareUnifiedTest(Quantile)) {
+  Context ctx = CreateEmptyGenericParam(GPUIDX);
+
+  {
+    Args args{{"quantile_alpha", "[0.6, 0.8]"}};
+    std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
+    obj->Configure(args);
+    CheckConfigReload(obj, "reg:quantileerror");
+  }
+
+  Args args{{"quantile_alpha", "0.6"}};
+  std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
+  obj->Configure(args);
+  CheckConfigReload(obj, "reg:quantileerror");
+
+  std::vector<float> predts{1.0f, 2.0f, 3.0f};
+  std::vector<float> labels{3.0f, 2.0f, 1.0f};
+  std::vector<float> weights{1.0f, 1.0f, 1.0f};
+  std::vector<float> grad{-0.6f, 0.4f, 0.4f};
+  std::vector<float> hess = weights;
+  CheckObjFunction(obj, predts, labels, weights, grad, hess);
+}
+
+TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) {
+  Context ctx = CreateEmptyGenericParam(GPUIDX);
+  Args args{{"quantile_alpha", "[0.6, 0.8]"}};
+  std::unique_ptr<ObjFunction> obj{ObjFunction::Create("reg:quantileerror", &ctx)};
+  obj->Configure(args);
+
+  MetaInfo info;
+  info.num_row_ = 10;
+  info.labels.ModifyInplace([&](HostDeviceVector<float>* data, common::Span<std::size_t> shape) {
+    data->SetDevice(ctx.gpu_id);
+    data->Resize(info.num_row_);
+    shape[0] = info.num_row_;
+    shape[1] = 1;
+
+    auto& h_labels = data->HostVector();
+    for (std::size_t i = 0; i < info.num_row_; ++i) {
+      h_labels[i] = i;
+    }
+  });
+
+  linalg::Vector<float> base_scores;
+  obj->InitEstimation(info, &base_scores);
+  ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
+  // mean([5.6, 7.8])
+  ASSERT_NEAR(base_scores(0), 6.7, kRtEps);
+
+  for (std::size_t i = 0; i < info.num_row_; ++i) {
+    info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0);
+  }
+
+  obj->InitEstimation(info, &base_scores);
+  ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
+  // mean([3, 5])
+  ASSERT_NEAR(base_scores(0), 4.0, kRtEps);
+}
+}  // namespace xgboost
diff --git a/tests/cpp/objective/test_quantile_obj_gpu.cu b/tests/cpp/objective/test_quantile_obj_gpu.cu
new file mode 100644
index 000000000..518692411
--- /dev/null
+++ b/tests/cpp/objective/test_quantile_obj_gpu.cu
@@ -0,0 +1,5 @@
+/**
+ * Copyright 2023 XGBoost contributors
+ */
+// Dummy file to enable the CUDA tests.
+#include "test_quantile_obj.cc"
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index 571c4a171..23b2fdf00 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -5,7 +5,7 @@ import numpy as np
 import pytest
 from hypothesis import assume, given, note, settings, strategies
 from xgboost.testing.params import cat_parameter_strategy, hist_parameter_strategy
-from xgboost.testing.updater import check_init_estimation
+from xgboost.testing.updater import check_init_estimation, check_quantile_loss
 
 import xgboost as xgb
 from xgboost import testing as tm
@@ -209,3 +209,7 @@ class TestGPUUpdaters:
 
     def test_init_estimation(self) -> None:
         check_init_estimation("gpu_hist")
+
+    @pytest.mark.parametrize("weighted", [True, False])
+    def test_quantile_loss(self, weighted: bool) -> None:
+        check_quantile_loss("gpu_hist", weighted)
diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py
index 28797f160..c54f35046 100644
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@@ -146,6 +146,13 @@ def test_multioutput_reg() -> None:
     subprocess.check_call(cmd)
 
 
+@pytest.mark.skipif(**tm.no_sklearn())
+def test_quantile_reg() -> None:
+    script = os.path.join(PYTHON_DEMO_DIR, "quantile_regression.py")
+    cmd = ['python', script]
+    subprocess.check_call(cmd)
+
+
 @pytest.mark.skipif(**tm.no_ubjson())
 def test_json_model() -> None:
     script = os.path.join(DEMO_DIR, "json-model", "json_parser.py")
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index 130af619c..6ad6e72de 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -10,7 +10,7 @@ from xgboost.testing.params import (
     exact_parameter_strategy,
     hist_parameter_strategy,
 )
-from xgboost.testing.updater import check_init_estimation
+from xgboost.testing.updater import check_init_estimation, check_quantile_loss
 
 import xgboost as xgb
 from xgboost import testing as tm
@@ -469,3 +469,7 @@ class TestTreeMethod:
 
     def test_init_estimation(self) -> None:
         check_init_estimation("hist")
+
+    @pytest.mark.parametrize("weighted", [True, False])
+    def test_quantile_loss(self, weighted: bool) -> None:
+        check_quantile_loss("hist", weighted)