dmlc
diff --git a/‎doc/parameter.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/parameter.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/tutorials/external_memory.rst‎
Lines changed: 1 addition & 3 deletions b/‎doc/tutorials/external_memory.rst‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎doc/tutorials/intercept.rst‎
Lines changed: 55 additions & 1 deletion b/‎doc/tutorials/intercept.rst‎
Lines changed: 55 additions & 1 deletion
diff --git a/‎include/xgboost/learner.h‎
Lines changed: 16 additions & 13 deletions b/‎include/xgboost/learner.h‎
Lines changed: 16 additions & 13 deletions
diff --git a/‎include/xgboost/objective.h‎
Lines changed: 43 additions & 39 deletions b/‎include/xgboost/objective.h‎
Lines changed: 43 additions & 39 deletions
diff --git a/‎python-package/xgboost/testing/predict.py‎
Lines changed: 31 additions & 0 deletions b/‎python-package/xgboost/testing/predict.py‎
Lines changed: 31 additions & 0 deletions
@@ -419,7 +419,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
 
 * ``base_score``
 
-  - The initial prediction score of all instances, global bias
+  - The initial prediction score of all instances, global bias.
   - The parameter is automatically estimated for selected objectives before training. To
     disable the estimation, specify a real number argument.
   - If ``base_margin`` is supplied, ``base_score`` will not be added.
 
@@ -274,9 +274,7 @@ floating point samples, `512` features (total 1TB) on a GH200 (a H200 GPU connec
 Grace CPU by a chip-to-chip link) system. One can start with:
 - Evenly divide the data into 128 batches with 8GB per batch.
 - Define a custom iterator as previously described.
-- Set the `max_quantile_batches` parameter of the
-  :py:class:`~xgboost.ExtMemQuantileDMatrix` to 32 (256GB per sub-stream for
-  quantization). Load the data.
+- Set the `max_quantile_batches` parameter of the :py:class:`~xgboost.ExtMemQuantileDMatrix` to 32 (256GB per sub-stream for quantization). Load the data.
 - Start training with ``device=cuda``.
 
 To run experiments on these platforms, the open source `NVIDIA Linux driver
 
@@ -136,4 +136,58 @@ We have:
    E[c_i] &= \exp{(F(x_i) + \ln{\gamma_i})} \\
    E[c_i] &= g^{-1}(F(x_i) + g(\gamma_i))
 
-As you can see, we can use the ``base_margin`` for modeling with offset similar to GLMs
+As you can see, we can use the ``base_margin`` for modeling with offset similar to GLMs
+
+*******
+Example
+*******
+
+The following example shows the relationship between ``base_score`` and ``base_margin``
+using binary logistic with a `logit` link function:
+
+.. code-block:: python
+
+    import numpy as np
+    from scipy.special import logit
+    from sklearn.datasets import make_classification
+    from xgboost import train, DMatrix
+
+    X, y = make_classification(random_state=2025)
+
+The intercept is a valid probability (0.5). It's used as the initial estimation of the
+probability of obtaining a positive sample.
+
+.. code-block:: python
+
+    intercept = 0.5
+
+First we use the intercept to train a model:
+
+.. code-block:: python
+
+    booster = train(
+        {"base_score": intercept, "objective": "binary:logistic"},
+        dtrain=DMatrix(X, y),
+        num_boost_round=1,
+    )
+    predt_0 = booster.predict(DMatrix(X, y))
+
+Apply :py:func:`~scipy.special.logit` to obtain the "margin":
+
+.. code-block:: python
+
+    margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)
+    Xy = DMatrix(X, y, base_margin=margin)
+    # 0.2 is a dummy value to show that `base_margin` overrides `base_score`.
+    booster = train(
+        {"base_score": 0.2, "objective": "binary:logistic"},
+        dtrain=Xy,
+        num_boost_round=1,
+    )
+    predt_1 = booster.predict(Xy)
+
+Compare the results:
+
+.. code-block:: python
+
+    np.testing.assert_allclose(predt_0, predt_1)
@@ -11,7 +11,7 @@
 #include <dmlc/io.h>          // for Serializable
 #include <xgboost/base.h>     // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, ..
 #include <xgboost/context.h>  // for Context
-#include <xgboost/linalg.h>   // for Tensor, TensorView
+#include <xgboost/linalg.h>   // for Vector, VectorView
 #include <xgboost/metric.h>   // for Metric
 #include <xgboost/model.h>    // for Configurable, Model
 #include <xgboost/span.h>     // for Span
@@ -284,58 +284,61 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
 struct LearnerModelParamLegacy;
 
 /**
- * \brief Strategy for building multi-target models.
+ * @brief Strategy for building multi-target models.
  */
 enum class MultiStrategy : std::int32_t {
   kOneOutputPerTree = 0,
   kMultiOutputTree = 1,
 };
 
 /**
- * \brief Basic model parameters, used to describe the booster.
+ * @brief Basic model parameters, used to describe the booster.
  */
 struct LearnerModelParam {
  private:
   /**
-   * \brief Global bias, this is just a scalar value but can be extended to vector when we
+   * @brief Global bias, this is just a scalar value but can be extended to vector when we
    *        support multi-class and multi-target.
+   *
+   * The value stored here is the value before applying the inverse link function, used
+   * for initializing the prediction matrix/vector.
    */
-  linalg::Tensor<float, 1> base_score_;
+  linalg::Vector<float> base_score_;
 
  public:
   /**
-   * \brief The number of features.
+   * @brief The number of features.
    */
   bst_feature_t num_feature{0};
   /**
-   * \brief The number of classes or targets.
+   * @brief The number of classes or targets.
    */
   std::uint32_t num_output_group{0};
   /**
-   * \brief Current task, determined by objective.
+   * @brief Current task, determined by objective.
    */
   ObjInfo task{ObjInfo::kRegression};
   /**
-   * \brief Strategy for building multi-target models.
+   * @brief Strategy for building multi-target models.
    */
   MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};
 
   LearnerModelParam() = default;
   // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
   // this one as an immutable copy.
   LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
-                    linalg::Tensor<float, 1> base_margin, ObjInfo t, MultiStrategy multi_strategy);
+                    linalg::Vector<float> base_score, ObjInfo t, MultiStrategy multi_strategy);
   LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
                     MultiStrategy multi_strategy);
-  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_score,
+  LearnerModelParam(bst_feature_t n_features, linalg::Vector<float> base_score,
                     std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy)
       : base_score_{std::move(base_score)},
         num_feature{n_features},
         num_output_group{std::max(n_groups, n_targets)},
         multi_strategy{multi_strategy} {}
 
-  linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
-  [[nodiscard]] linalg::TensorView<float const, 1> BaseScore(DeviceOrd device) const;
+  linalg::VectorView<float const> BaseScore(Context const* ctx) const;
+  [[nodiscard]] linalg::VectorView<float const> BaseScore(DeviceOrd device) const;
 
   void Copy(LearnerModelParam const& that);
   [[nodiscard]] bool IsVectorLeaf() const noexcept {
 
@@ -1,8 +1,8 @@
 /**
- * Copyright 2014-2024, XGBoost Contributors
- * \file objective.h
- * \brief interface of objective function used by xgboost.
- * \author Tianqi Chen, Kailong Chen
+ * Copyright 2014-2025, XGBoost Contributors
+ *
+ * @brief interface of objective function used by xgboost.
+ * @author Tianqi Chen, Kailong Chen
  */
 #ifndef XGBOOST_OBJECTIVE_H_
 #define XGBOOST_OBJECTIVE_H_
@@ -11,19 +11,20 @@
 #include <xgboost/base.h>
 #include <xgboost/data.h>
 #include <xgboost/host_device_vector.h>
+#include <xgboost/linalg.h>  // for Vector
 #include <xgboost/model.h>
 #include <xgboost/task.h>
 
-#include <cstdint>  // std::int32_t
+#include <cstdint>  // for int32_t
 #include <functional>
-#include <string>
+#include <string>  // for string
 
 namespace xgboost {
 
 class RegTree;
 struct Context;
 
-/*! \brief interface of objective function */
+/** @brief The interface of objective function */
 class ObjFunction : public Configurable {
  protected:
   Context const* ctx_;
@@ -32,32 +33,30 @@ class ObjFunction : public Configurable {
   static constexpr float DefaultBaseScore() { return 0.5f; }
 
  public:
-  /*! \brief virtual destructor */
   ~ObjFunction() override = default;
-  /*!
-   * \brief Configure the objective with the specified parameters.
-   * \param args arguments to the objective function.
+  /**
+   * @brief Configure the objective with the specified parameters.
+   *
+   * @param args arguments to the objective function.
    */
   virtual void Configure(Args const& args) = 0;
   /**
    * @brief Get gradient over each of predictions, given existing information.
    *
-   * @param preds prediction of current round
-   * @param info information about labels, weights, groups in rank
+   * @param preds Raw prediction (before applying the inverse link) of the current round.
+   * @param info information about labels, weights, groups in rank.
    * @param iteration current iteration number.
    * @param out_gpair output of get gradient, saves gradient and second order gradient in
    */
   virtual void GetGradient(HostDeviceVector<float> const& preds, MetaInfo const& info,
                            std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) = 0;
 
-  /*! \return the default evaluation metric for the objective */
-  virtual const char* DefaultEvalMetric() const = 0;
+  /** @return the default evaluation metric for the objective */
+  [[nodiscard]] virtual const char* DefaultEvalMetric() const = 0;
   /**
-   * \brief Return the configuration for the default metric.
+   * @brief Return the configuration for the default metric.
    */
-  virtual Json DefaultMetricConfig() const { return Json{Null{}}; }
-
-  // the following functions are optional, most of time default implementation is good enough
+  [[nodiscard]] virtual Json DefaultMetricConfig() const { return Json{Null{}}; }
   /**
    * @brief Apply inverse link (activation) function to prediction values.
    *
@@ -75,25 +74,28 @@ class ObjFunction : public Configurable {
    */
   virtual void EvalTransform(HostDeviceVector<float>* io_preds) { this->PredTransform(io_preds); }
   /**
-   * @brief Apply link function to the intercept.
+   * @brief Apply the link function to the intercept.
    *
-   *   This is used to transform user-set base_score back to margin used by gradient
-   *   boosting
+   *   This is an inverse of `PredTransform` for most of the objectives (if there's a
+   *   valid inverse). It's used to transform user-set base_score back to margin used by
+   *   gradient boosting. The method converts objective-based valid outputs like
+   *   probability back to raw model outputs.
    *
    * @return transformed value
    */
   [[nodiscard]] virtual float ProbToMargin(float base_score) const { return base_score; }
   /**
-   * @brief Obtain the initial estimation of prediction.
+   * @brief Obtain the initial estimation of prediction (intercept).
    *
-   *   The output in `base_score` represents prediction after apply the inverse link function.
+   *   The output in `base_score` represents prediction after apply the inverse link function
+   *   (valid prediction instead of raw).
    *
    * @param info MetaInfo that contains label.
    * @param base_score Output estimation.
    */
-  virtual void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const;
-  /*!
-   * \brief Return task of this objective.
+  virtual void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const;
+  /**
+   * @brief Return task of this objective.
    */
   [[nodiscard]] virtual struct ObjInfo Task() const = 0;
   /**
@@ -106,31 +108,33 @@ class ObjFunction : public Configurable {
     }
     return 1;
   }
+  /** @brief Getter of the context. */
+  [[nodiscard]] Context const* Ctx() const { return this->ctx_; }
 
   /**
-   * \brief Update the leaf values after a tree is built. Needed for objectives with 0
+   * @brief Update the leaf values after a tree is built. Needed for objectives with 0
    *        hessian.
    *
    *   Note that the leaf update is not well defined for distributed training as XGBoost
    *   computes only an average of quantile between workers. This breaks when some leaf
    *   have no sample assigned in a local worker.
    *
-   * \param position The leaf index for each rows.
-   * \param info MetaInfo providing labels and weights.
-   * \param learning_rate The learning rate for current iteration.
-   * \param prediction Model prediction after transformation.
-   * \param group_idx The group index for this tree, 0 when it's not multi-target or multi-class.
-   * \param p_tree Tree that needs to be updated.
+   * @param position The leaf index for each rows.
+   * @param info MetaInfo providing labels and weights.
+   * @param learning_rate The learning rate for current iteration.
+   * @param prediction Model prediction after transformation.
+   * @param group_idx The group index for this tree, 0 when it's not multi-target or multi-class.
+   * @param p_tree Tree that needs to be updated.
    */
   virtual void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& /*position*/,
                               MetaInfo const& /*info*/, float /*learning_rate*/,
                               HostDeviceVector<float> const& /*prediction*/,
                               std::int32_t /*group_idx*/, RegTree* /*p_tree*/) const {}
-
-  /*!
-   * \brief Create an objective function according to name.
-   * \param ctx  Pointer to runtime parameters.
-   * \param name Name of the objective.
+  /**
+   * @brief Create an objective function according to the name.
+   *
+   * @param name Name of the objective.
+   * @param ctx  Pointer to the context.
    */
   static ObjFunction* Create(const std::string& name, Context const* ctx);
 };
 
@@ -3,10 +3,12 @@
 from typing import Type
 
 import numpy as np
+from scipy.special import logit  # pylint: disable=no-name-in-module
 
 from ..core import DMatrix
 from ..training import train
 from .shared import validate_leaf_output
+from .updater import get_basescore
 from .utils import Device
 
 
@@ -63,3 +65,32 @@ def run_predict_leaf(device: Device, DMatrixT: Type[DMatrix]) -> np.ndarray:
     assert booster.predict(m, pred_leaf=True).shape == (rows,)
 
     return leaf
+
+
+def run_base_margin_vs_base_score(device: Device) -> None:
+    """Test for the relation between score and margin."""
+    from sklearn.datasets import make_classification
+
+    intercept = 0.5
+
+    X, y = make_classification(random_state=2025)
+    booster = train(
+        {"base_score": intercept, "objective": "binary:logistic", "device": device},
+        dtrain=DMatrix(X, y),
+        num_boost_round=1,
+    )
+    np.testing.assert_allclose(get_basescore(booster), intercept)
+    predt_0 = booster.predict(DMatrix(X, y))
+
+    margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)
+    Xy = DMatrix(X, y, base_margin=margin)
+    # 0.2 is a dummy value
+    booster = train(
+        {"base_score": 0.2, "objective": "binary:logistic", "device": device},
+        dtrain=Xy,
+        num_boost_round=1,
+    )
+    np.testing.assert_allclose(get_basescore(booster), 0.2)
+    predt_1 = booster.predict(Xy)
+
+    np.testing.assert_allclose(predt_0, predt_1)