Source code for esrgan.models.discriminator

import collections
from typing import Callable, Dict, Iterable, List, Optional, Tuple

import torch
from torch import nn

from esrgan import utils
from esrgan.nn import modules

__all__ = ["StridedConvEncoder", "LinearHead", "VGGConv"]


[docs]class StridedConvEncoder(nn.Module): """Generalized Fully Convolutional encoder. Args: layers: List of feature maps sizes of each block. layer_order: Ordered list of layers applied within each block. For instance, if you don't want to use normalization layer just exclude it from this list. conv: Class constructor or partial object which when called should return convolutional layer e.g., :py:class:`nn.Conv2d`. norm: Class constructor or partial object which when called should return normalization layer e.g., :py:class:`.nn.BatchNorm2d`. activation: Class constructor or partial object which when called should return activation function to use e.g., :py:class:`nn.ReLU`. residual: Class constructor or partial object which when called should return block wrapper module e.g., :py:class:`esrgan.nn.ResidualModule` can be used to add residual connections between blocks. """ def __init__( self, layers: Iterable[int] = (3, 64, 128, 128, 256, 256, 512, 512), layer_order: Iterable[str] = ("conv", "norm", "activation"), conv: Callable[..., nn.Module] = modules.Conv2d, norm: Optional[Callable[..., nn.Module]] = nn.BatchNorm2d, activation: Callable[..., nn.Module] = modules.LeakyReLU, residual: Optional[Callable[..., nn.Module]] = None, ): super().__init__() name2fn: Dict[str, Callable[..., nn.Module]] = { "activation": activation, "conv": conv, "norm": norm, } self._layers = list(layers) net: List[Tuple[str, nn.Module]] = [] first_conv = collections.OrderedDict([ ("conv_0", name2fn["conv"](self._layers[0], self._layers[1])), ("act", name2fn["activation"]()), ]) net.append(("block_0", nn.Sequential(first_conv))) channels = utils.pairwise(self._layers[1:]) for i, (in_ch, out_ch) in enumerate(channels, start=1): block_list: List[Tuple[str, nn.Module]] = [] for name in layer_order: # `conv + 2x2 pooling` is equal to `conv with stride=2` kwargs = {"stride": out_ch // in_ch} if name == "conv" else {} module = utils.create_layer( layer_name=name, layer=name2fn[name], in_channels=in_ch, out_channels=out_ch, **kwargs ) block_list.append((name, module)) block = nn.Sequential(collections.OrderedDict(block_list)) # add residual connection, like in resnet blocks if residual is not None and in_ch == out_ch: block = residual(block) net.append((f"block_{i}", block)) self.net = nn.Sequential(collections.OrderedDict(net))
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward pass. Args: x: Batch of inputs. Returns: Batch of embeddings. """ output = self.net(x) return output
@property def in_channels(self) -> int: """The number of channels in the feature map of the input. Returns: Size of the input feature map. """ return self._layers[0] @property def out_channels(self) -> int: """Number of channels produced by the block. Returns: Size of the output feature map. """ return self._layers[-1]
[docs]class LinearHead(nn.Module): """Stack of linear layers used for embeddings classification. Args: in_channels: Size of each input sample. out_channels: Size of each output sample. latent_channels: Size of the latent space. layer_order: Ordered list of layers applied within each block. For instance, if you don't want to use activation function just exclude it from this list. linear: Class constructor or partial object which when called should return linear layer e.g., :py:class:`nn.Linear`. activation: Class constructor or partial object which when called should return activation function layer e.g., :py:class:`nn.ReLU`. norm: Class constructor or partial object which when called should return normalization layer e.g., :py:class:`nn.BatchNorm1d`. dropout: Class constructor or partial object which when called should return dropout layer e.g., :py:class:`nn.Dropout`. """ def __init__( self, in_channels: int, out_channels: int, latent_channels: Optional[Iterable[int]] = None, layer_order: Iterable[str] = ("linear", "activation"), linear: Callable[..., nn.Module] = nn.Linear, activation: Callable[..., nn.Module] = modules.LeakyReLU, norm: Optional[Callable[..., nn.Module]] = None, dropout: Optional[Callable[..., nn.Module]] = None, ) -> None: super().__init__() name2fn: Dict[str, Callable[..., nn.Module]] = { "activation": activation, "dropout": dropout, "linear": linear, "norm": norm, } latent_channels = latent_channels or [] channels = [in_channels, *latent_channels, out_channels] channels_pairs: List[Tuple[int, int]] = list(utils.pairwise(channels)) net: List[nn.Module] = [] for in_ch, out_ch in channels_pairs[:-1]: for name in layer_order: module = utils.create_layer( layer_name=name, layer=name2fn[name], in_channels=in_ch, out_channels=out_ch, ) net.append(module) net.append(name2fn["linear"](*channels_pairs[-1])) self.net = nn.Sequential(*net)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward pass. Args: x: Batch of inputs e.g. images. Returns: Batch of logits. """ output = self.net(x) return output
[docs]class VGGConv(nn.Module): """VGG-like neural network for image classification. Args: encoder: Image encoder module, usually used for the extraction of embeddings from input signals. pool: Pooling layer, used to reduce embeddings from the encoder. head: Classification head, usually consists of Fully Connected layers. """ def __init__( self, encoder: nn.Module, pool: nn.Module, head: nn.Module, ) -> None: super().__init__() self.encoder = encoder self.pool = pool self.head = head utils.net_init_(self)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward call. Args: x: Batch of images. Returns: Batch of logits. """ x = self.pool(self.encoder(x)) x = x.view(x.shape[0], -1) x = self.head(x) return x