Building your own minimizer

Building your own minimizer#

from __future__ import annotations

from collections.abc import Sequence

import zfit
from zfit.minimizers.interface import ZfitMinimizer
zfit.run.experimental_disable_param_update(True)  # does not update parameters automatically after minimization
<zfit.util.temporary.TemporarilySet at 0x7c96f06d6030>
class ChainedMinimizer(zfit.minimize.BaseMinimizer):
    def __init__(self, minimizers: ZfitMinimizer | Sequence[ZfitMinimizer], tol=None, verbosity=0, name=None):
        if isinstance(minimizers, ZfitMinimizer):
            minimizers = [minimizers]

        self.minimizers = minimizers
        lasttol = minimizers[-1].tol
        if tol is None:
            tol = lasttol
        elif abs(tol - lasttol) > 1e-6:
            raise ValueError("The tolerance of the chained minimizer must be the same as the last minimizer.")
        super().__init__(tol=tol, verbosity=verbosity, name=name)

    @zfit.minimize.minimize_supports(init=True)
    def _minimize(self, loss, params, init):
        result = init
        for minimizer in self.minimizers:
            result = minimizer.minimize(loss, params=params, init=result)
            if self.verbosity > 7:
                print(f"Minimizer {minimizer} finished with result \n{result}")
        return result
minimizer1 = zfit.minimize.Minuit(tol=10., mode=0)
minimizer2 = zfit.minimize.ScipyTrustConstrV1(tol=1e-3)
minimizer = ChainedMinimizer([minimizer1, minimizer2], verbosity=8)

Create a simple loss and minimize it with the chained minimizer.

obs = zfit.Space('obs1', -10, 10)
mu = zfit.Parameter('mu', 1., -1, 5)
sigma = zfit.Parameter('sigma', 1., 0, 10)
sigyield = zfit.Parameter('sigyield', 1000, 0, 10000)
gauss = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma, extended=sigyield)

lamb = zfit.Parameter('lambda', -0.1, -1, -0.01)
bkgyield = zfit.Parameter('bkgyield', 1000, 0, 10000)
exponential = zfit.pdf.Exponential(obs=obs, lambda_=lamb, extended=bkgyield)

model = zfit.pdf.SumPDF([gauss, exponential])

data = model.sample(n=5000, params={mu: 0.5, sigma: 1.2, lamb: -0.05, sigyield: 3000, bkgyield: 2000})

loss = zfit.loss.ExtendedUnbinnedNLL(model=model, data=data)
# result = minimizer.minimize(loss=loss)

Implementing a custom algorithm#

import zfit.z.numpy as znp
from zfit.result import FitResult


class GradientDescentMinimizer(zfit.minimize.BaseMinimizer):
    def __init__(self, scaling, tol=None, verbosity=0, strategy=None, criterion=None, maxiter=None, name=None):
        super().__init__(
            name=name,
            strategy=strategy,
            tol=tol,
            verbosity=verbosity,
            criterion=criterion,
            maxiter=maxiter
        )
        self.scaling = scaling

    @zfit.minimize.minimize_supports(init=False)  # we could allow the previous result as additional information
    def _minimize(self, loss, params, init):
        criterion = self.create_criterion(loss, params)  # this is to be checked for convergence
        evaluator = self.create_evaluator(loss, params)  # takes into account the strategy, callbacks, maxiter, and so on. A wrapper around the loss
        paramvals = znp.asarray(params)
        i = 1
        while True:
            value, gradients = evaluator.value_gradient(paramvals)
            result = FitResult(loss=loss, params={p: v for p, v in zip(params, paramvals)}, minimizer=self, valid=False, converged=False, edm=None, fminopt=None,
                               approx={'gradient': gradients}, criterion=criterion,
                               )
            if criterion.converged(result=result):
                result = FitResult(loss=loss, params={p: v for p, v in zip(params, paramvals)}, minimizer=self, valid=True, converged=True, edm=None,
                                   fminopt=None, approx={'gradient': gradients}, criterion=criterion)
                if self.verbosity > 5:
                    print(f"Converged with value {value}, criterion {criterion.last_value}")
                break
            if self.verbosity > 9:
                print(f"Criterion: {criterion.last_value} Loss value: {value}, gradients: {gradients}")
            paramvals -= self.scaling * gradients / i ** 0.1
        return result
gsdminimizer = GradientDescentMinimizer(scaling=0.0001, tol=0.3, verbosity=10, maxiter=10)  # limit maxiter, as it won't converge
loss.hessian(loss.get_params())
<tf.Tensor: shape=(5, 5), dtype=float64, numpy=
array([[ 2.24556304e-03,  4.76924278e-04,  2.70609554e-01,
        -6.73612112e-01,  1.69925600e+00],
       [ 4.76924278e-04,  1.80058840e-03, -2.70609554e-01,
         6.73612112e-01, -1.69925600e+00],
       [ 2.70609554e-01, -2.70609554e-01,  1.57195093e+03,
        -7.82059604e+02,  3.28234441e+01],
       [-6.73612112e-01,  6.73612112e-01, -7.82059604e+02,
         3.45524035e+03,  2.06608068e+03],
       [ 1.69925600e+00, -1.69925600e+00,  3.28234441e+01,
         2.06608068e+03,  5.57939562e+04]])>
gsdresult = gsdminimizer.minimize(loss=loss)
Criterion: 1210.8738690936395 Loss value: 10006.622045953896, gradients: [-1.72248732e+00 -1.27751268e+00  7.49117318e+02 -5.25674511e+02
 -3.66385089e+03]

Criterion: 1099.1713764110482 Loss value: 9870.314487418487, gradients: [-1.67132575e+00 -1.32867350e+00  6.40519879e+02 -2.24127627e+02
  2.47807149e+03]

Criterion: 1051.48834640525 Loss value: 9828.678501070179, gradients: [-1.69612945e+00 -1.30386903e+00  5.37074868e+02 -1.29158016e+02
  2.42558412e+03]

Criterion: 25085.72174492847 Loss value: 10607.409149434148, gradients: [-2.07329502e+00 -9.26702659e-01  3.80884961e+02 -4.67779356e+02
 -7.58425891e+03]

Criterion: 1002.1157378880737 Loss value: 9784.230825256782, gradients: [-1.73774599e+00 -1.26225096e+00  3.84337106e+02  4.59258561e+01
  2.34977344e+03]

Criterion: 985.6485631695613 Loss value: 9770.203732635586, gradients: [-1.74092350e+00 -1.25907269e+00  3.33998141e+02  4.93470348e+01
  2.31621735e+03]

Criterion: 973.4603845646624 Loss value: 9759.541992968283, gradients: [-1.74280084e+00 -1.25719458e+00  2.89790674e+02  4.84308888e+01
  2.28628775e+03]

Criterion: 1650.5122351193213 Loss value: 10125.13598820688, gradients: [-1.97003544e+00 -1.02995913e+00  1.50465444e+02 -1.43412181e+02
 -6.08281776e+03]

Criterion: 961.2169288275082 Loss value: 9748.757420781534, gradients: [-1.75091996e+00 -1.24907392e+00  2.27530991e+02  7.73149212e+01
  2.24747889e+03]

Criterion: 955.3383784621751 Loss value: 9743.36740861339, gradients: [-1.74979446e+00 -1.25019866e+00  1.97585201e+02  6.52815038e+01
  2.22581021e+03]