Building your own minimizer

Building your own minimizer#

from __future__ import annotations

from collections.abc import Sequence

import zfit
from zfit.minimizers.interface import ZfitMinimizer
zfit.run.experimental_disable_param_update(True)  # does not update parameters automatically after minimization
<zfit.util.temporary.TemporarilySet at 0x75ed70c79e50>
class ChainedMinimizer(zfit.minimize.BaseMinimizer):
    def __init__(self, minimizers: ZfitMinimizer | Sequence[ZfitMinimizer], tol=None, verbosity=0, name=None):
        if isinstance(minimizers, ZfitMinimizer):
            minimizers = [minimizers]

        self.minimizers = minimizers
        lasttol = minimizers[-1].tol
        if tol is None:
            tol = lasttol
        elif abs(tol - lasttol) > 1e-6:
            raise ValueError("The tolerance of the chained minimizer must be the same as the last minimizer.")
        super().__init__(tol=tol, verbosity=verbosity, name=name)

    @zfit.minimize.minimize_supports(init=True)
    def _minimize(self, loss, params, init):
        result = init
        for minimizer in self.minimizers:
            result = minimizer.minimize(loss, params=params, init=result)
            if self.verbosity > 7:
                print(f"Minimizer {minimizer} finished with result \n{result}")
        return result
minimizer1 = zfit.minimize.Minuit(tol=10., mode=0)
minimizer2 = zfit.minimize.ScipyTrustConstrV1(tol=1e-3)
minimizer = ChainedMinimizer([minimizer1, minimizer2], verbosity=8)

Create a simple loss and minimize it with the chained minimizer.

obs = zfit.Space('obs1', -10, 10)
mu = zfit.Parameter('mu', 1., -1, 5)
sigma = zfit.Parameter('sigma', 1., 0, 10)
sigyield = zfit.Parameter('sigyield', 1000, 0, 10000)
gauss = zfit.pdf.Gauss(obs=obs, mu=mu, sigma=sigma, extended=sigyield)

lamb = zfit.Parameter('lambda', -0.1, -1, -0.01)
bkgyield = zfit.Parameter('bkgyield', 1000, 0, 10000)
exponential = zfit.pdf.Exponential(obs=obs, lambda_=lamb, extended=bkgyield)

model = zfit.pdf.SumPDF([gauss, exponential])

data = model.sample(n=5000, params={mu: 0.5, sigma: 1.2, lamb: -0.05, sigyield: 3000, bkgyield: 2000})

loss = zfit.loss.ExtendedUnbinnedNLL(model=model, data=data)
# result = minimizer.minimize(loss=loss)

Implementing a custom algorithm#

import zfit.z.numpy as znp
from zfit.result import FitResult


class GradientDescentMinimizer(zfit.minimize.BaseMinimizer):
    def __init__(self, scaling, tol=None, verbosity=0, strategy=None, criterion=None, maxiter=None, name=None):
        super().__init__(
            name=name,
            strategy=strategy,
            tol=tol,
            verbosity=verbosity,
            criterion=criterion,
            maxiter=maxiter
        )
        self.scaling = scaling

    @zfit.minimize.minimize_supports(init=False)  # we could allow the previous result as additional information
    def _minimize(self, loss, params, init):
        criterion = self.create_criterion(loss, params)  # this is to be checked for convergence
        evaluator = self.create_evaluator(loss, params)  # takes into account the strategy, callbacks, maxiter, and so on. A wrapper around the loss
        paramvals = znp.asarray(params)
        i = 1
        while True:
            value, gradients = evaluator.value_gradient(paramvals)
            result = FitResult(loss=loss, params={p: v for p, v in zip(params, paramvals)}, minimizer=self, valid=False, converged=False, edm=None, fminopt=None,
                               approx={'gradient': gradients}, criterion=criterion,
                               )
            if criterion.converged(result=result):
                result = FitResult(loss=loss, params={p: v for p, v in zip(params, paramvals)}, minimizer=self, valid=True, converged=True, edm=None,
                                   fminopt=None, approx={'gradient': gradients}, criterion=criterion)
                if self.verbosity > 5:
                    print(f"Converged with value {value}, criterion {criterion.last_value}")
                break
            if self.verbosity > 9:
                print(f"Criterion: {criterion.last_value} Loss value: {value}, gradients: {gradients}")
            paramvals -= self.scaling * gradients / i ** 0.1
        return result
gsdminimizer = GradientDescentMinimizer(scaling=0.0001, tol=0.3, verbosity=10, maxiter=10)  # limit maxiter, as it won't converge
loss.hessian(loss.get_params())
<tf.Tensor: shape=(5, 5), dtype=float64, numpy=
array([[ 2.24748115e-03,  4.74268998e-04,  2.54095872e-01,
        -6.53102731e-01,  1.70480244e+00],
       [ 4.74268998e-04,  1.80398086e-03, -2.54095872e-01,
         6.53102731e-01, -1.70480244e+00],
       [ 2.54095872e-01, -2.54095872e-01,  1.59437841e+03,
        -7.77730960e+02,  7.78661201e+01],
       [-6.53102731e-01,  6.53102731e-01, -7.77730960e+02,
         3.54809212e+03,  2.06409840e+03],
       [ 1.70480244e+00, -1.70480244e+00,  7.78661201e+01,
         2.06409840e+03,  5.57463464e+04]])>
gsdresult = gsdminimizer.minimize(loss=loss)
Criterion: 1167.438510247885 Loss value: 10006.629142882339, gradients: [-1.72175014e+00 -1.27824986e+00  7.05588648e+02 -4.99006544e+02
 -3.32863299e+03]

Criterion: 1087.950169265075 Loss value: 9908.323622283271, gradients: [-1.66485427e+00 -1.33514497e+00  6.05259230e+02 -2.01342703e+02
  2.81872170e+03]

Criterion: 993.0848909276918 Loss value: 9819.720173126341, gradients: [  -1.71664709   -1.2833514   478.17944648 -128.89744582  -71.79063604]

Criterion: 970.4025908376387 Loss value: 9798.51318363582, gradients: [ -1.72293877  -1.27705896 406.78196752 -67.60769559 407.14968464]

Criterion: 986.5324389027087 Loss value: 9822.588581843025, gradients: [-1.78390166e+00 -1.21609529e+00  3.18755489e+02 -7.35257902e+01
 -2.25637476e+03]

Criterion: 988.9244557271205 Loss value: 9820.430934049975, gradients: [-1.71563221e+00 -1.28436400e+00  3.21964927e+02  6.57272541e+00
  2.67127176e+03]

Criterion: 1147.6401141500094 Loss value: 9946.135916656123, gradients: [-1.86798660e+00 -1.13200881e+00  1.95955231e+02 -1.03851021e+02
 -4.41965220e+03]

Criterion: 972.9015878584834 Loss value: 9805.991381363805, gradients: [-1.72481045e+00 -1.27518422e+00  2.45379059e+02  4.35451002e+01
  2.62698694e+03]

Criterion: 966.6093868723038 Loss value: 9800.205825212885, gradients: [-1.72509399e+00 -1.27489992e+00  2.11339312e+02  3.92829902e+01
  2.60414062e+03]

Criterion: 3516.2585275972783 Loss value: 10412.968564910661, gradients: [-2.04759588e+00 -9.52397135e-01  4.83786215e+01 -2.91489069e+02
 -6.95136099e+03]