Source code for enc.component.core.quantizer

# Software Name: Cool-Chic
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 Orange
# SPDX-License-Identifier: BSD 3-Clause "New"
#
# This software is distributed under the BSD-3-Clause license.
#
# Authors: see CONTRIBUTORS.md


from typing import Literal, Optional

import torch
from torch import Tensor


[docs] def softround(x: Tensor, t: Tensor) -> Tensor: """Perform the softround function as introduced in section 4.1 of the paper `Universally Quantized Neural Compression, Agustsson & Theis <https://arxiv.org/pdf/2006.09952.pdf>`_, defined as follows: .. math:: \\mathrm{softround}(x, t) = \\lfloor x \\rfloor + \\frac{\mathrm{tanh}(\\frac{\\Delta}{t})}{2\\ \\mathrm{\\mathrm{tanh}(\\frac{1}{2t})}} + \\frac{1}{2}, \\text{ with } \\Delta = x - \\lfloor x \\rfloor - \\frac{1}{2}. Args: x: Input tensor to be quantized. t: Soft round temperature :math:`t`. Setting :math:`t = 0` corresponds to the actual quantization i.e. ``round(x)``. As :math:`t` grows bigger, the function approaches identity i.e. :math:`\\lim_{t \\rightarrow \\infty} \\mathrm{softround}(x, t) = x`. In practice :math:`t \geq 1` is already quite close to identity. Returns: Soft-rounded tensor """ floor_x = torch.floor(x) delta = x - floor_x - 0.5 return floor_x + 0.5 * torch.tanh(delta / t) / torch.tanh(1 / (2 * t)) + 0.5
[docs] def generate_kumaraswamy_noise( uniform_noise: Tensor, kumaraswamy_param: Tensor ) -> Tensor: """ Reparameterize a random variable ``uniform_noise`` following a uniform distribution :math:`\\mathcal{U}(0, 1)` to a random variable following a `kumaraswamy distribution <https://en.wikipedia.org/wiki/Kumaraswamy_distribution>`_ as proposed in the paper `C3: High-performance and low-complexity neural compression from a single image or video, Kim et al. <https://arxiv.org/abs/2312.02753>`_ The kumaraswamy distribution is defined on the interval :math:`(0, 1)` with the following PDF: .. math:: f(x;a,b) = 1 - (1 - x^a)^b Here, it is only parameterized through a single parameter ``kumaraswamy_param`` corresponding to :math:`a` in the above equation. The second parameter :math:`b` is set to as a function of :math:`a` so that the mode of the distribution is always :math:`\\frac{1}{2}`. Setting :math:`a=1` gives the uniform distribution :math:`\\mathcal{U}(0, 1)`. Increasing the value of :math:`a` gives more "pointy" distribution. The resulting kumaraswamy noise is shifted so that it lies in :math:`(-\\frac{1}{2}, \\frac{1}{2})`. Args: uniform_noise: A uniform noise in :math:`[0, 1]` with any size. kumaraswamy_param: Parameter :math:`a` of a Kumaraswamy distribution. Set it to 1 for a uniform noise. Returns: A kumaraswamy noise with identical dim to ``uniform_noise`` in :math:`[-\\frac{1}{2}, \\frac{1}{2}]`. """ # This relation between a and b allows to always have a mode of 0.5 a = kumaraswamy_param b = (2**a * (a - 1) + 1) / a # Use the inverse of the repartition function to sample a kumaraswamy noise in [0., 1.] # Shift the noise to have it in [-0.5, 0.5] kumaraswamy_noise = (1 - (1 - uniform_noise) ** (1 / b)) ** (1 / a) - 0.5 return kumaraswamy_noise
POSSIBLE_QUANTIZATION_NOISE_TYPE = Literal["kumaraswamy", "gaussian", "none"] POSSIBLE_QUANTIZER_TYPE = Literal["softround_alone", "softround", "hardround", "ste", "none"]
[docs] def quantize( x: Tensor, quantizer_noise_type: POSSIBLE_QUANTIZATION_NOISE_TYPE = "kumaraswamy", quantizer_type: POSSIBLE_QUANTIZER_TYPE = "softround", soft_round_temperature: Optional[Tensor] = None, noise_parameter: Optional[Tensor] = None, ) -> Tensor: """Quantize an input :math:`x` to an output :math:`y` simulating the quantization. There is different mode possibles, described by ``quantizer_type``: - ``none``: :math:`y = x + n` with :math:`n` a random noise (more details below) - ``softround_alone``: :math:`y = \\mathrm{softround}(x, t)` with :math:`t` the ``soft_round_temperature``. - ``softround``: :math:`y = \\mathrm{softround}(\\mathrm{softround}(x, t) + n, t)` with :math:`t` the ``soft_round_temperature`` and :math:`n` a random noise (more details below) - ``hardround``: :math:`y = \\mathrm{round}(x)` - ``ste``: :math:`y = \\mathrm{round}(x)` (backward done through softround) The noise is parameterized by ``quantizer_noise_type`` and ``noise_parameter``. This last parameter has a different role for the different noise type: - ``gaussian``: ``noise_parameter`` is the standard deviation of the gaussian distribution - ``kumaraswamy``: ``noise_parameter`` corresponds to the :math:`a` parameter of the kumaraswamy distribution. 1 means uniform distribution and increasing it leads to more more and more probability of being into the center. Softround is parameterized by ``soft_round_temperature`` denoted as :math:`t`. Setting :math:`t = 0` corresponds to the actual quantization i.e. ``round(x)``. As :math:`t` grows bigger, the function approaches identity i.e. :math:`\\lim_{t \\rightarrow \\infty} \\mathrm{softround}(x, t) = x`. In practice :math:`t \geq 1` is already quite close to identity., .. note:: Why do we apply twice the softround when ``quantizer_type`` is ``softround``? It follows the operations described in `C3: High-performance and low-complexity neural compression from a single image or video, Kim et al. <https://arxiv.org/abs/2312.02753>`_ i.e. 1. Use a soft round function instead of the non-differentiable round function 2. Add a random noise to prevent the network from learning the inverse softround function 3. Re-apply the soft round function as advocated in `Universally Quantized Neural Compression, Agustsson & Theis <https://arxiv.org/pdf/2006.09952.pdf>`_ Args: x: Tensor to be quantized. quantizer_noise_type: noise type. Defaults to ``"kumaraswamy"``. quantizer_type: quantizer type. Defaults to ``"softround"``. soft_round_temperature: Soft round temperature. This is used for softround modes as well as the ste mode to simulate the derivative in the backward. Defaults to 0.3. noise_parameter: noise distribution parameter. Defaults to 1.0. Returns: Quantized tensor """ match quantizer_noise_type: case "none": pass case "gaussian": noise = torch.randn_like(x, requires_grad=False) * noise_parameter case "kumaraswamy": noise = generate_kumaraswamy_noise( torch.rand_like(x, requires_grad=False), noise_parameter ) match quantizer_type: case "none": return x + noise case "softround_alone": return softround(x, soft_round_temperature) case "softround": return softround( softround(x, soft_round_temperature) + noise, soft_round_temperature, ) case "ste": # From the forward point of view (i.e. entering into the torch.no_grad()), we have # y = softround(x) - softround(x) + round(x) = round(x). From the backward point of view # we have y = softround(x) meaning that dy / dx = d softround(x) / dx. y = softround(x, soft_round_temperature) with torch.no_grad(): y = y - softround(x, soft_round_temperature) + torch.round(x) return y case "hardround": return torch.round(x)