Source code for magnet.nodes.core

# coding=utf-8
import torch.nn.functional as F

from torch import nn

from .nodes import Node

[docs]class Lambda(Node):
    r"""Wraps a Node around any function.

    Args:
        fn (callable): The function which gets called in the forward pass

    Examples::

        >>> import magnet.nodes as mn

        >>> import torch

        >>> model = mn.Lambda(lambda x: x.mean())

        >>> model(torch.arange(5, dtype=torch.float)).item()
        2.0

        >>> def subtract(x, y):
        >>>     return x - y

        >>> model = mn.Lambda(subtract)

        >>> model(2 * torch.ones(1), torch.ones(1)).item()
        1.0
    """

    def __init__(self, fn, **kwargs):
        super().__init__(fn, **kwargs)

        # If a name is not supplied, get the function name instead
        # of the class (Lambda) name.
        if self.name == self.__class__.__name__:
            self.name = self._args['fn'].__name__

    def forward(self, *args, **kwargs):
        return self._args['fn'](*args, **kwargs)

[docs]class Conv(Node):
    r"""Applies a convolution over an input tensor.

    Args:
        c (int): Number of channels produced by the convolution.
            Default: Inferred
        k (int or tuple): Size of the convolving kernel. Default: ``3``
        p (int, tuple or str): Zero-padding added to both sides
            of the input. Default: ``'half'``
        s (int or tuple): Stride of the convolution. Default: ``1``
        d (int or tuple): Spacing between kernel elements. Default: ``1``
        g (int): Number of blocked connections from input channels
            to output channels. Default: ``1``
        b (bool): If ``True``, adds a learnable bias to the output.
            Default: ``True``
        ic (int): Number of channels in the input image.
            Default: Inferred
        act (str or None): The activation function to use.
            Default: ``'relu'``

    * :attr:`p` can be conveniently used for ``'half'``, ``'same'`` or
      ``'double'`` padding to half, same or double the image size respectively.
      The arguments are accordingly inferred at runtime.
      For ``'half'`` padding, the output channels (if not provided)
      are set to twice the input channels to make up for the lost
      information and vice-versa for the double padding.
      For ``'same'`` padding, the output channels are kept equal to the
      input channels.
      In all three cases, the dilation is set to ``1`` and the stride
      is modified as required.

    * :attr:`c` is inferred from the second dimension of the
      input tensor.

    * :attr:`act` is set to ``'relu'`` by default unlike the PyTorch
      implementation where activation functions need to be seperately
      defined.
      Take caution to manually set the activation to ``None``, where needed.

    .. note::

         The dimensions (1, 2 or 3) of the convolutional kernels
         are inferred from the corresponding shape of the input tensor.

    .. note::

         One can also create multiple Nodes using the convinient
         multiplication (``*``) operation.

         Multiplication with an integer :math:`n`, gives :math:`n`
         copies of the Node.

         Multiplication with a list or tuple of integers,
         :math:`(c_1, c_2, ..., c_n)` gives :math:`n` copies
         of the Node with :attr:`c` set to :math:`c_i`

    Shape:
    - Input: :math:`(N, C_{in}, *)`
    where `*` is any non-zero number of trailing dimensions.
    - Output: :math:`(N, C_{out}, *)`

    Attributes:
        layer (nn.Module): The Conv module built from torch.nn

    Examples::

        >>> import torch

        >>> from torch import nn

        >>> import magnet.nodes as mn
        >>> from magnet.utils import summarize

        >>> # A Conv layer with 32 channels and half padding
        >>> model = mn.Conv(32)

        >>> model(torch.randn(4, 16, 28, 28)).shape
        torch.Size([4, 32, 14, 14])

        >>> # Alternatively, the 32 in the constructor may be omitted
        >>> # since it is inferred on runtime.

        >>> # The same conv layer with 'double' padding
        >>> model = mn.Conv(p='double')

        >>> model(torch.randn(4, 16, 28, 28)).shape
        torch.Size([4, 8, 56, 56])

        >>> layers = mn.Conv() * 3
        [Conv(), Conv(), Conv()]

        >>> model = nn.Sequential(*layers)
        >>> summarize(model)
        +-------+------------+----------------------+
        | Node  |   Shape    | Trainable Parameters |
        +-------+------------+----------------------+
        | input | 16, 28, 28 |          0           |
        +-------+------------+----------------------+
        | Conv  | 32, 14, 14 |        4,640         |
        +-------+------------+----------------------+
        | Conv  |  64, 7, 7  |        18,496        |
        +-------+------------+----------------------+
        | Conv  | 128, 4, 4  |        73,856        |
        +-------+------------+----------------------+
        Total Trainable Parameters: 96,992
    """
    def __init__(self, c=None, k=3, p='half', s=1, d=1, g=1, b=True, ic=None, act='relu', bn=False, **kwargs):
        super().__init__(c, k, p, s, d, g, b, ic, act, bn, **kwargs)

    def build(self, x):
        from magnet.nodes.functional import wiki

        self._set_padding(x) # Handle 'half', 'same' and 'double' padding

        # Infer the input shape if not given
        if self._args['ic'] is None: self._args['ic'] = x.shape[1]

        self._activation = wiki['activations'][self._args['act']]

        layer_class = self._find_layer(x) # Infer the layer (Conv1D, 2D or 3D)

        self.layer = layer_class(kernel_size=self._args['k'], out_channels=self._args['c'],
                                stride=self._args['s'], padding=self._args['p'], dilation=self._args['d'],
                                groups=self._args['g'], bias=self._args['b'], in_channels=self._args['ic'])

        if self._args['bn']: self._batch_norm = BatchNorm()
        super().build(x)

    def forward(self, x):
        if hasattr(self, '_upsample'): x = F.interpolate(x, scale_factor=self._upsample)

        x = self._activation(self.layer(x))

        if self._args['bn']: x = self._batch_norm(x)

        return x

    @staticmethod
    def _find_layer(x):
        shape_dict = [nn.Conv1d, nn.Conv2d, nn.Conv3d]
        ndim = len(x.shape) - 2
        return shape_dict[ndim - 1]

    def _set_padding(self, x):
        in_shape = x.shape

        p = self._args['p']

        if p == 'half': f = 0.5
        elif p == 'same': f = 1
        elif p == 'double':
            self._upsample = 2
            if self._args['c'] is None:
                self._args['c'] = in_shape[1] // 2
            f = 1
        else: return

        s = 1 / f

        self._args['d'] = 1
        self._args['s'] = int(s)
        self._args['p'] = int(self._args['k'] // 2)
        if self._args['c'] is None:
            self._args['c'] = self._args['s'] * in_shape[1]

    def _mul_list(self, n):
        convs = [self]
        self._args['c'] = n[0]
        kwargs = self._args.copy()
        for c in n[1:]:
            kwargs['c'] = c
            convs.append(self.__class__(**kwargs))

        return convs

[docs]class Linear(Node):
    r"""Applies a linear transformation to the incoming tensor

    Args:
        o (int or tuple): Output dimensions. Default: :math:`1`
        b (bool): Whether to include a bias term. Default: ``True``
        flat (bool): Whether to flatten out the input to 2 dimensions.
            Default: ``True``
        i (int): Input dimensions. Default: Inferred
        act (str or None): The activation function to use.
            Default: ``'relu'``
        bn (bool): Whether to use Batch Normalization immediately after
            the layer. Default: ``False``

    * :attr:`flat` is used by default to flatten the input to a vector.
      This is useful, say in the case of CNNs where an 3-D image based
      output with multiple channels needs to be fed to several dense layers.

    * :attr:`o` is inferred from the last dimension of the
      input tensor.

    * :attr:`act` is set to 'relu' by default unlike the PyTorch
      implementation where activation functions need to be seperately
      defined.
      Take caution to manually set the activation to None, where needed.

    .. note::

        One can also create multiple Nodes using the convinient
        multiplication (*) operation.

        Multiplication with an integer :math:`n`, gives :math:`n`
        copies of the Node.

        Multiplication with a list or tuple of integers,
        :math:`(o_1, o_2, ..., o_n)` gives :math:`n` copies
        of the Node with :attr:`o` set to :math:`o_i`

    .. note::

        If :attr:`o` is a tuple, the output features are its product
        and the output is inflated to this shape.

    Shape:
        If :attr:`flat` is True
            - Input: :math:`(N, *)` where :math:`*` means any number of
              trailing dimensions
            - Output: :math:`(N, *)`
        Else
            - Input: :math:`(N, *, in\_features)` where :math:`*` means any
              number of trailing dimensions
            - Output: :math:`(N, *, out\_features)` where all but the last
              dimension are the same shape as the input.

    Attributes:
        layer (nn.Module): The Linear module built from torch.nn

    Examples::

        >>> import torch

        >>> from torch import nn

        >>> import magnet.nodes as mn
        >>> from magnet.utils import summarize

        >>> # A Linear mapping to 10-dimensional space
        >>> model = mn.Linear(10)

        >>> model(torch.randn(64, 3, 28, 28)).shape
        torch.Size([64, 10])

        >>> # Don't flatten the input
        >>> model = mn.Linear(10, flat=False)

        >>> model(torch.randn(64, 3, 28, 28)).shape
        torch.Size([64, 3, 28, 10])

        >>> # Make a Deep Neural Network
        >>> # Don't forget to turn the activation to None in the final layer
        >>> layers = mn.Linear() * (10, 50) + [mn.Linear(10, act=None)]
        [Linear(), Linear(), Linear()]

        >>> model = nn.Sequential(*layers)
        >>> summarize(model)
        +------+---------+--------------------+----------------------------------------------------+
        | Node |  Shape  |Trainable Parameters|                   Arguments                        |
        +------+---------+--------------------+----------------------------------------------------+
        |input |3, 28, 28|         0          |                                                    |
        +------+---------+--------------------+----------------------------------------------------+
        |Linear|   10    |       23,530       |bn=False, act=relu, i=2352, flat=True, b=True, o=10 |
        +------+---------+--------------------+----------------------------------------------------+
        |Linear|   50    |        550         |bn=False, act=relu, i=10, flat=True, b=True, o=50   |
        +------+---------+--------------------+----------------------------------------------------+
        |Linear|   10    |        510         |bn=False, act=None, i=50, flat=True, b=True, o=10   |
        +------+---------+--------------------+----------------------------------------------------+
        Total Trainable Parameters: 24,590
    """
    def __init__(self, o=1, b=True, flat=True, i=None, act='relu', bn=False, **kwargs):
        super().__init__(o, b, flat, i, act, bn, **kwargs)

    def build(self, x):
        from numpy import prod
        from magnet.nodes.functional import wiki

        # Infer the input shape if not given
        if self._args['i'] is None: self._args['i'] = prod(x.shape[1:]) if self._args['flat'] else x.shape[-1]

        # If a tuple is given as output shape, inflate to that tuple
        if isinstance(self._args['o'], (list, tuple)):
            self._inflate_shape = self._args['o']
            self._args['o'] = prod(self._args['o'])
        else:
            self._inflate_shape = None

        self._activation = wiki['activations'][self._args['act']]

        self.layer = nn.Linear(*[self._args[k] for k in ('i', 'o', 'b')])

        if self._args['bn']: self._batch_norm = BatchNorm()

        super().build(x)

    def forward(self, x):
        if self._args['flat']: x = x.view(x.size(0), -1)

        x = self._activation(self.layer(x))

        if self._args['bn']: x = self._batch_norm(x)

        if self._inflate_shape is not None: x = x.view(-1, *self._inflate_shape)

        return x

    def _mul_list(self, n):
        lins = [self]
        self._args['o'] = n[0]
        kwargs = self._args.copy()
        for o in n[1:]:
            kwargs['o'] = o
            lins.append(self.__class__(**kwargs))

        return lins

class _RNNBase(Node):
    def __init__(self, mode, h, n=1, b=False, bi=False, act='tanh', d=0, batch_first=False, i=None, **kwargs):
        self.layer = mode
        super().__init__(h, n, b, bi, act, d, batch_first, i, **kwargs)

    def build(self, x, h=None):
        # Infer the input shape if not given
        if self._args['i'] is None: self._args['i'] =  x.shape[-1]

        self.layer = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU}[self.layer.lower()]

        kwargs = {'nonlinearity': self._args['act'], 'bias': self._args['b'],
                'batch_first': self._args['batch_first'],
                'dropout': self._args['d'], 'bidirectional': self._args['bi']}

        # The 'nonlinearity' / 'act' argument is not a part of LSTM and GRU
        if not isinstance(self.layer, nn.RNN): kwargs.pop('nonlinearity')

        self.layer = self.layer(*[self._args[k] for k in ('i', 'h', 'n')], **kwargs)

        super().build(x, h)

    def forward(self, x, h=None):
        return self.layer(x, h)

    def _mul_list(self, n):
        rnns = [self]
        self._args['h'] = n[0]
        kwargs = self._args.copy()
        for h in n[1:]:
            kwargs['h'] = h
            print(self.__class__, kwargs)
            rnns.append(self.__class__(**kwargs))

        return rnns

[docs]class RNN(_RNNBase):
    r"""Applies a multi-layer RNN with to an input tensor.

    Args:
        h (int, Required): The number of features in the hidden state `h`
        n (int):  Number of layers. Default: ``1``
        b (bool): Whether to include a bias term. Default: ``True``
        bi (bool): If ``True``, becomes a bidirectional RNN.
            Default: ``False``
        act (str or None): The activation function to use.
            Default: ``'tanh'``
        d (int): The dropout probability of the outputs of each layer.
            Default: ``0``
        batch_first (False): If ``True``, then the input and output
            tensors are provided as ``(batch, seq, feature)``.
            Default: ``False``
        i (int): Input dimensions. Default: Inferred

    * :attr:`i` is inferred from the last dimension of the
      input tensor.

    .. note::

         One can also create multiple Nodes using the convinient
         multiplication (*) operation.

         Multiplication with an integer :math:`n`, gives :math:`n`
         copies of the Node.

         Multiplication with a list or tuple of integers,
         :math:`(h_1, h_2, ..., h_n)` gives :math:`n` copies
         of the Node with :attr:`h` set to :math:`h_i`

    Attributes:
        layer (nn.Module): The RNN module built from torch.nn

    Examples::

        >>> import torch

        >>> from torch import nn

        >>> import magnet.nodes as mn
        >>> from magnet.utils import summarize

        >>> # A recurrent layer with 32 hidden dimensions
        >>> model = mn.RNN(32)

        >>> model(torch.randn(7, 4, 300))[0].shape
        torch.Size([7, 4, 32])

        >>> # Attach a linear head
        >>> model = nn.Sequential(model, mn.Linear(1000, act=None))
    """
    def __init__(self, h, n=1, b=False, bi=False, act='tanh', d=0, batch_first=False, i=None, **kwargs):
        mode = kwargs.pop('mode', 'rnn')
        super().__init__(mode, h, n, b, bi, act, d, batch_first, i, **kwargs)

[docs]class LSTM(_RNNBase):
    r"""Applies a multi-layer LSTM with to an input tensor.

            See mn.RNN for more details
            """
    def __init__(self, h, n=1, b=False, bi=False, d=0, batch_first=False, i=None, **kwargs):
        act = kwargs.pop('act', None)
        mode = kwargs.pop('mode', 'lstm')
        super().__init__(mode, h, n, b, bi, act, d, batch_first, i, **kwargs)

[docs]class GRU(_RNNBase):
    r"""Applies a multi-layer GRU with to an input tensor.

    See mn.RNN for more details
    """
    def __init__(self, h, n=1, b=False, bi=False, d=0, batch_first=False, i=None, **kwargs):
        act = kwargs.pop('act', None)
        mode = kwargs.pop('mode', 'gru')
        super().__init__(mode, h, n, b, bi, act, d, batch_first, i, **kwargs)

[docs]class BatchNorm(Node):
    r"""Applies Batch Normalization to the input tensor
    e=1e-05, m=0.1, a=True, track=True, i=None

    Args:
        e (float): A small value added to the denominator
            for numerical stability. Default: ``1e-5``
        m (float or None): The value used for the running_mean
            and running_var computation. Can be set to ``None`` for
            cumulative moving average (i.e. simple average). Default: ``0.1``
        a (bool): Whether to have learnable affine parameters.
            Default: ``True``
        track (bool): Whether to track the running mean and variance.
            Default: ``True``
        i (int): Input channels. Default: Inferred

    * :attr:`i` is inferred from the second dimension of the
      input tensor.

    .. note::

         The dimensions (1, 2 or 3) of the running mean and variance
         are inferred from the corresponding shape of the input tensor.

    .. note::

         One can also create multiple Nodes using the convinient
         multiplication (*) operation.

         Multiplication with an integer :math:`n`, gives :math:`n`
         copies of the Node.

         Multiplication with a list or tuple of integers,
         :math:`(i_1, i_2, ..., i_n)` gives :math:`n` copies
         of the Node with :attr:`i` set to :math:`i_i`

    Shape:
        - Input: :math:`(N, C, *)` where :math:`*` means any number of
          trailing dimensions
        - Output: :math:`(N, C, *)` (same shape as input)

    Attributes:
        layer (nn.Module): The BatchNorm module built from :py:class:`torch.nn`

    Examples::

        >>> import torch

        >>> from torch import nn

        >>> import magnet.nodes as mn
        >>> from magnet.utils import summarize

        >>> # A Linear mapping to 10-dimensional space
        >>> model = mn.Linear(10)

        >>> model(torch.randn(64, 3, 28, 28)).shape
        torch.Size([64, 10])

        >>> # Don't flatten the input
        >>> model = mn.Linear(10, flat=False)

        >>> model(torch.randn(64, 3, 28, 28)).shape
        torch.Size([64, 3, 28, 10])

        >>> # Make a Deep Neural Network
        >>> # Don't forget to turn the activation to None in the final layer
        >>> layers = mn.Linear() * (10, 50) + [mn.Linear(10, act=None)]
        [Linear(), Linear(), Linear()]

        >>> model = nn.Sequential(*layers)
        >>> summarize(model)
        +------+---------+--------------------+----------------------------------------------------+
        | Node |  Shape  |Trainable Parameters|                   Arguments                        |
        +------+---------+--------------------+----------------------------------------------------+
        |input |3, 28, 28|         0          |                                                    |
        +------+---------+--------------------+----------------------------------------------------+
        |Linear|   10    |       23,530       |bn=False, act=relu, i=2352, flat=True, b=True, o=10 |
        +------+---------+--------------------+----------------------------------------------------+
        |Linear|   50    |        550         |bn=False, act=relu, i=10, flat=True, b=True, o=50   |
        +------+---------+--------------------+----------------------------------------------------+
        |Linear|   10    |        510         |bn=False, act=None, i=50, flat=True, b=True, o=10   |
        +------+---------+--------------------+----------------------------------------------------+
        Total Trainable Parameters: 24,590
    """
    def __init__(self, e=1e-05, m=0.1, a=True, track=True, i=None, **kwargs):
        super().__init__(e, m, a, track, i, **kwargs)

    def build(self, x):
        # Infer the input shape if not given
        self._args['i'] = x.shape[1]

        layer_class = self._find_layer(x) # Infer the layer (BatchNorm1D, 2D or 3D)
        self.layer = layer_class(*[self._args[k] for k in ('i', 'e', 'm', 'a', 'track')])

        super().build(x)

    def forward(self, x):
        return self.layer(x)

    @staticmethod
    def _find_layer(x):
        shape_dict = [nn.BatchNorm1d, nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d]
        ndim = len(x.shape) - 1
        return shape_dict[ndim - 1]