Source code for enc.training.quantizemodel

# Software Name: Cool-Chic
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 Orange
# SPDX-License-Identifier: BSD 3-Clause "New"
#
# This software is distributed under the BSD-3-Clause license.
#
# Authors: see CONTRIBUTORS.md

import itertools
import time
from typing import Optional, OrderedDict

import torch
from enc.utils.misc import exp_golomb_nbins
from enc.training.loss import loss_function
from enc.utils.manager import FrameEncoderManager
from enc.component.frame import FrameEncoder
from enc.utils.codingstructure import Frame
from enc.utils.misc import (
    MAX_AC_MAX_VAL,
    POSSIBLE_EXP_GOL_COUNT,
    POSSIBLE_Q_STEP,
    DescriptorNN,
    get_q_step_from_parameter_name,
)
from torch import Tensor


def _quantize_parameters(
    fp_param: OrderedDict[str, Tensor],
    q_step: DescriptorNN,
) -> Optional[OrderedDict[str, Tensor]]:
    """Quantize a dictionary of parameters fp_param with a given quantization
    step (e.g. one for bias one for the weight).
    Return None if quantization fails i.e. if round(param / q_step) is greater
    than MAC_AX_MAX_VAL.

    Args:
        fp_param (OrderedDict[str, Tensor]): Full precision parameter, usually
            the output of self.get_param() or self.named_parameters()
        q_step (DescriptorNN): A dictionary with one quantization step for the
            weight and one for the bias.

    Returns:
        Optional[OrderedDict[str, Tensor]]: The quantized parameters or None
            if quantization failed.
    """
    q_param = OrderedDict()
    for k, v in fp_param.items():
        current_q_step = get_q_step_from_parameter_name(k, q_step)
        sent_param = torch.round(v / current_q_step)

        if sent_param.abs().max() > MAX_AC_MAX_VAL:
            #print(
            #    f"Sent param {k} exceed MAX_AC_MAX_VAL! Q step {current_q_step} too small."
            #)
            return None

        q_param[k] = sent_param * current_q_step

    return q_param


[docs]
@torch.no_grad()
def quantize_model(
    frame_encoder: FrameEncoder,
    frame: Frame,
    frame_encoder_manager: FrameEncoderManager,
) -> FrameEncoder:
    """Quantize a ``FrameEncoder`` compressing a ``Frame`` under a rate
    constraint ``lmbda`` and return it.

    This function iterates on all the neural networks sent from the encoder
    to the decoder, listed in `frame_encoder.coolchic_encoder.modules_to_send`.
    For each module :math:`m`, we want to find the most suited pair of
    quantization steps for the weight and the biases :math:`(\\Delta_w^m,
    \\Delta_b^m)`.

    To do so, a greedy search is used where we quantize the weights and biases
    using all the possible pairs of quantization steps, and we compute the
    :doc`usual loss function <./loss>`. The loss measures the impact of the NN
    quantization steps :math:`(\\Delta_w^m, \\Delta_b^m)` on the MSE / rate of
    the decoded image and the rate of the NN.-

    In the end, we select the pair of quantization step minimizing the loss:

        .. math::

            (\\Delta_w^m, \\Delta_b^m) = \\arg\\min ||\\mathbf{x}
            - \hat{\\mathbf{x}}||^2 + \\lambda
            (\\mathrm{R}(\hat{\\mathbf{x}}) + \\mathrm{R}_{NN}), \\text{ with }
            \\begin{cases}
                \\mathbf{x} & \\text{the original image}\\\\ \\hat{\\mathbf{x}} &
                \\text{the coded image}\\\\ \\mathrm{R}(\\hat{\\mathbf{x}}) &
                \\text{A measure of the rate of } \\hat{\\mathbf{x}} \\\\
                    \\mathrm{R}_{NN} & \\text{The rate of the neural networks}
            \\end{cases}

    Then we quantize the next module to be sent.

    .. warning::

        The parameter ``frame_encoder_manager`` tracking the encoding time of
        the frame (``total_training_time_sec``) and the number of encoding
        iterations (``iterations_counter``) is modified ** in place** by this
        function.


    Args:
        frame_encoder: Model to be compressed.
        frame: Original frame to code, including its references.
        frame_encoder_manager: Contains (among other things) the rate
            constraint :math:`\\lambda` and description of the warm-up preset.
            It is also used to track the total encoding time and encoding
            iterations. Modified in place.

    Returns:
        Model with quantized parameters.
    """
    start_time = time.time()
    frame_encoder.set_to_eval()

    # We have to quantize all the modules that we want to send
    module_to_quantize = {
        module_name: getattr(frame_encoder.coolchic_encoder, module_name)
        for module_name in frame_encoder.coolchic_encoder.modules_to_send
    }

    for module_name, cur_module in sorted(module_to_quantize.items()):
        # Start the RD optimization for the quantization step of each module with an
        # arbitrary high value for the RD cost.
        best_loss = 1e6

        # All possible quantization steps for this module
        all_q_step = POSSIBLE_Q_STEP.get(module_name)
        all_expgol_cnt = POSSIBLE_EXP_GOL_COUNT.get(module_name)

        # Save full precision parameter.
        fp_param = cur_module.get_param()

        best_q_step = {}
        # Overall best expgol count for this module weights and biases
        final_best_expgol_cnt = {}

        for q_step_w, q_step_b in itertools.product(all_q_step.get("weight"), all_q_step.get("bias")):
            # Reset full precision parameters, set the quantization step
            # and quantize the model.
            current_q_step: DescriptorNN = {"weight": q_step_w, "bias": q_step_b}

            # Reset full precision parameter before quantizing
            q_param = _quantize_parameters(fp_param, current_q_step)

            # Quantization has failed
            if q_param is None:
                continue

            cur_module.set_param(q_param)

            # Plug the quantized module back into Cool-chic
            setattr(frame_encoder.coolchic_encoder, module_name, cur_module)

            frame_encoder.coolchic_encoder.nn_q_step[module_name] = current_q_step

            # Test Cool-chic performance with this quantization steps pair
            frame_encoder_out = frame_encoder.forward(
                reference_frames=[ref_i.data for ref_i in frame.refs_data],
                quantizer_noise_type="none",
                quantizer_type="hardround",
                AC_MAX_VAL=-1,
                flag_additional_outputs=False,
            )

            param = cur_module.get_param()

            # Best exp-golomb count for this quantization step
            best_expgol_cnt = {}
            for weight_or_bias in ["weight", "bias"]:

                # Find the best exp-golomb count for this quantization step:
                cur_best_expgol_cnt = None
                # Arbitrarily high number
                cur_best_rate = 1e9

                sent_param = []
                for parameter_name, parameter_value in param.items():

                    # Quantization is round(parameter_value / q_step) * q_step so we divide by q_step
                    # to obtain the sent latent.
                    current_sent_param = (parameter_value / current_q_step.get(weight_or_bias)).view(-1)

                    if weight_or_bias in parameter_name:
                        sent_param.append(current_sent_param)

                # Integer, sent parameters
                v = torch.cat(sent_param)

                # Find the best expgol count for this weight
                for expgol_cnt in all_expgol_cnt.get(weight_or_bias):
                    cur_rate = exp_golomb_nbins(v, count=expgol_cnt)
                    if cur_rate < cur_best_rate:
                        cur_best_rate = cur_rate
                        cur_best_expgol_cnt = expgol_cnt

                best_expgol_cnt[weight_or_bias] = int(cur_best_expgol_cnt)

            frame_encoder.coolchic_encoder.nn_expgol_cnt[module_name] = best_expgol_cnt

            rate_mlp = 0.0
            rate_per_module = frame_encoder.coolchic_encoder.get_network_rate()
            for _, module_rate in rate_per_module.items():
                for _, param_rate in module_rate.items():  # weight, bias
                    rate_mlp += param_rate

            loss_fn_output = loss_function(
                frame_encoder_out.decoded_image,
                frame_encoder_out.rate,
                frame.data.data,
                lmbda=frame_encoder_manager.lmbda,
                rate_mlp_bit=rate_mlp,
                compute_logs=True,
            )


            # Store best quantization steps
            if loss_fn_output.loss < best_loss:
                best_loss = loss_fn_output.loss
                best_q_step = current_q_step
                final_best_expgol_cnt = best_expgol_cnt

        # Once we've tested all the possible quantization step and expgol_cnt,
        # quantize one last time with the best one we've found to actually use it.
        frame_encoder.coolchic_encoder.nn_q_step[module_name] = best_q_step
        frame_encoder.coolchic_encoder.nn_expgol_cnt[module_name] = final_best_expgol_cnt

        q_param = _quantize_parameters(fp_param, frame_encoder.coolchic_encoder.nn_q_step[module_name])
        assert q_param is not None, (
            "_quantize_parameters() failed with q_step "
            f"{frame_encoder.coolchic_encoder.nn_q_step[module_name]}"
        )

        cur_module.set_param(q_param)
        # Plug the quantized module back into Cool-chic
        setattr(frame_encoder.coolchic_encoder, module_name, cur_module)

    time_nn_quantization = time.time() - start_time

    print(f"\nTime quantize_model(): {time_nn_quantization:4.1f} seconds\n")
    frame_encoder_manager.total_training_time_sec += time_nn_quantization

    return frame_encoder