Source code for magnet.nodes.core

# coding=utf-8
import torch.nn.functional as F

from torch import nn

from .nodes import Node

[docs]class Lambda(Node): r"""Wraps a Node around any function. Args: fn (callable): The function which gets called in the forward pass Examples:: >>> import magnet.nodes as mn >>> import torch >>> model = mn.Lambda(lambda x: x.mean()) >>> model(torch.arange(5, dtype=torch.float)).item() 2.0 >>> def subtract(x, y): >>> return x - y >>> model = mn.Lambda(subtract) >>> model(2 * torch.ones(1), torch.ones(1)).item() 1.0 """ def __init__(self, fn, **kwargs): super().__init__(fn, **kwargs) # If a name is not supplied, get the function name instead # of the class (Lambda) name. if self.name == self.__class__.__name__: self.name = self._args['fn'].__name__ def forward(self, *args, **kwargs): return self._args['fn'](*args, **kwargs)
[docs]class Conv(Node): r"""Applies a convolution over an input tensor. Args: c (int): Number of channels produced by the convolution. Default: Inferred k (int or tuple): Size of the convolving kernel. Default: ``3`` p (int, tuple or str): Zero-padding added to both sides of the input. Default: ``'half'`` s (int or tuple): Stride of the convolution. Default: ``1`` d (int or tuple): Spacing between kernel elements. Default: ``1`` g (int): Number of blocked connections from input channels to output channels. Default: ``1`` b (bool): If ``True``, adds a learnable bias to the output. Default: ``True`` ic (int): Number of channels in the input image. Default: Inferred act (str or None): The activation function to use. Default: ``'relu'`` * :attr:`p` can be conveniently used for ``'half'``, ``'same'`` or ``'double'`` padding to half, same or double the image size respectively. The arguments are accordingly inferred at runtime. For ``'half'`` padding, the output channels (if not provided) are set to twice the input channels to make up for the lost information and vice-versa for the double padding. For ``'same'`` padding, the output channels are kept equal to the input channels. In all three cases, the dilation is set to ``1`` and the stride is modified as required. * :attr:`c` is inferred from the second dimension of the input tensor. * :attr:`act` is set to ``'relu'`` by default unlike the PyTorch implementation where activation functions need to be seperately defined. Take caution to manually set the activation to ``None``, where needed. .. note:: The dimensions (1, 2 or 3) of the convolutional kernels are inferred from the corresponding shape of the input tensor. .. note:: One can also create multiple Nodes using the convinient multiplication (``*``) operation. Multiplication with an integer :math:`n`, gives :math:`n` copies of the Node. Multiplication with a list or tuple of integers, :math:`(c_1, c_2, ..., c_n)` gives :math:`n` copies of the Node with :attr:`c` set to :math:`c_i` Shape: - Input: :math:`(N, C_{in}, *)` where `*` is any non-zero number of trailing dimensions. - Output: :math:`(N, C_{out}, *)` Attributes: layer (nn.Module): The Conv module built from torch.nn Examples:: >>> import torch >>> from torch import nn >>> import magnet.nodes as mn >>> from magnet.utils import summarize >>> # A Conv layer with 32 channels and half padding >>> model = mn.Conv(32) >>> model(torch.randn(4, 16, 28, 28)).shape torch.Size([4, 32, 14, 14]) >>> # Alternatively, the 32 in the constructor may be omitted >>> # since it is inferred on runtime. >>> # The same conv layer with 'double' padding >>> model = mn.Conv(p='double') >>> model(torch.randn(4, 16, 28, 28)).shape torch.Size([4, 8, 56, 56]) >>> layers = mn.Conv() * 3 [Conv(), Conv(), Conv()] >>> model = nn.Sequential(*layers) >>> summarize(model) +-------+------------+----------------------+ | Node | Shape | Trainable Parameters | +-------+------------+----------------------+ | input | 16, 28, 28 | 0 | +-------+------------+----------------------+ | Conv | 32, 14, 14 | 4,640 | +-------+------------+----------------------+ | Conv | 64, 7, 7 | 18,496 | +-------+------------+----------------------+ | Conv | 128, 4, 4 | 73,856 | +-------+------------+----------------------+ Total Trainable Parameters: 96,992 """ def __init__(self, c=None, k=3, p='half', s=1, d=1, g=1, b=True, ic=None, act='relu', bn=False, **kwargs): super().__init__(c, k, p, s, d, g, b, ic, act, bn, **kwargs) def build(self, x): from magnet.nodes.functional import wiki self._set_padding(x) # Handle 'half', 'same' and 'double' padding # Infer the input shape if not given if self._args['ic'] is None: self._args['ic'] = x.shape[1] self._activation = wiki['activations'][self._args['act']] layer_class = self._find_layer(x) # Infer the layer (Conv1D, 2D or 3D) self.layer = layer_class(kernel_size=self._args['k'], out_channels=self._args['c'], stride=self._args['s'], padding=self._args['p'], dilation=self._args['d'], groups=self._args['g'], bias=self._args['b'], in_channels=self._args['ic']) if self._args['bn']: self._batch_norm = BatchNorm() super().build(x) def forward(self, x): if hasattr(self, '_upsample'): x = F.interpolate(x, scale_factor=self._upsample) x = self._activation(self.layer(x)) if self._args['bn']: x = self._batch_norm(x) return x @staticmethod def _find_layer(x): shape_dict = [nn.Conv1d, nn.Conv2d, nn.Conv3d] ndim = len(x.shape) - 2 return shape_dict[ndim - 1] def _set_padding(self, x): in_shape = x.shape p = self._args['p'] if p == 'half': f = 0.5 elif p == 'same': f = 1 elif p == 'double': self._upsample = 2 if self._args['c'] is None: self._args['c'] = in_shape[1] // 2 f = 1 else: return s = 1 / f self._args['d'] = 1 self._args['s'] = int(s) self._args['p'] = int(self._args['k'] // 2) if self._args['c'] is None: self._args['c'] = self._args['s'] * in_shape[1] def _mul_list(self, n): convs = [self] self._args['c'] = n[0] kwargs = self._args.copy() for c in n[1:]: kwargs['c'] = c convs.append(self.__class__(**kwargs)) return convs
[docs]class Linear(Node): r"""Applies a linear transformation to the incoming tensor Args: o (int or tuple): Output dimensions. Default: :math:`1` b (bool): Whether to include a bias term. Default: ``True`` flat (bool): Whether to flatten out the input to 2 dimensions. Default: ``True`` i (int): Input dimensions. Default: Inferred act (str or None): The activation function to use. Default: ``'relu'`` bn (bool): Whether to use Batch Normalization immediately after the layer. Default: ``False`` * :attr:`flat` is used by default to flatten the input to a vector. This is useful, say in the case of CNNs where an 3-D image based output with multiple channels needs to be fed to several dense layers. * :attr:`o` is inferred from the last dimension of the input tensor. * :attr:`act` is set to 'relu' by default unlike the PyTorch implementation where activation functions need to be seperately defined. Take caution to manually set the activation to None, where needed. .. note:: One can also create multiple Nodes using the convinient multiplication (*) operation. Multiplication with an integer :math:`n`, gives :math:`n` copies of the Node. Multiplication with a list or tuple of integers, :math:`(o_1, o_2, ..., o_n)` gives :math:`n` copies of the Node with :attr:`o` set to :math:`o_i` .. note:: If :attr:`o` is a tuple, the output features are its product and the output is inflated to this shape. Shape: If :attr:`flat` is True - Input: :math:`(N, *)` where :math:`*` means any number of trailing dimensions - Output: :math:`(N, *)` Else - Input: :math:`(N, *, in\_features)` where :math:`*` means any number of trailing dimensions - Output: :math:`(N, *, out\_features)` where all but the last dimension are the same shape as the input. Attributes: layer (nn.Module): The Linear module built from torch.nn Examples:: >>> import torch >>> from torch import nn >>> import magnet.nodes as mn >>> from magnet.utils import summarize >>> # A Linear mapping to 10-dimensional space >>> model = mn.Linear(10) >>> model(torch.randn(64, 3, 28, 28)).shape torch.Size([64, 10]) >>> # Don't flatten the input >>> model = mn.Linear(10, flat=False) >>> model(torch.randn(64, 3, 28, 28)).shape torch.Size([64, 3, 28, 10]) >>> # Make a Deep Neural Network >>> # Don't forget to turn the activation to None in the final layer >>> layers = mn.Linear() * (10, 50) + [mn.Linear(10, act=None)] [Linear(), Linear(), Linear()] >>> model = nn.Sequential(*layers) >>> summarize(model) +------+---------+--------------------+----------------------------------------------------+ | Node | Shape |Trainable Parameters| Arguments | +------+---------+--------------------+----------------------------------------------------+ |input |3, 28, 28| 0 | | +------+---------+--------------------+----------------------------------------------------+ |Linear| 10 | 23,530 |bn=False, act=relu, i=2352, flat=True, b=True, o=10 | +------+---------+--------------------+----------------------------------------------------+ |Linear| 50 | 550 |bn=False, act=relu, i=10, flat=True, b=True, o=50 | +------+---------+--------------------+----------------------------------------------------+ |Linear| 10 | 510 |bn=False, act=None, i=50, flat=True, b=True, o=10 | +------+---------+--------------------+----------------------------------------------------+ Total Trainable Parameters: 24,590 """ def __init__(self, o=1, b=True, flat=True, i=None, act='relu', bn=False, **kwargs): super().__init__(o, b, flat, i, act, bn, **kwargs) def build(self, x): from numpy import prod from magnet.nodes.functional import wiki # Infer the input shape if not given if self._args['i'] is None: self._args['i'] = prod(x.shape[1:]) if self._args['flat'] else x.shape[-1] # If a tuple is given as output shape, inflate to that tuple if isinstance(self._args['o'], (list, tuple)): self._inflate_shape = self._args['o'] self._args['o'] = prod(self._args['o']) else: self._inflate_shape = None self._activation = wiki['activations'][self._args['act']] self.layer = nn.Linear(*[self._args[k] for k in ('i', 'o', 'b')]) if self._args['bn']: self._batch_norm = BatchNorm() super().build(x) def forward(self, x): if self._args['flat']: x = x.view(x.size(0), -1) x = self._activation(self.layer(x)) if self._args['bn']: x = self._batch_norm(x) if self._inflate_shape is not None: x = x.view(-1, *self._inflate_shape) return x def _mul_list(self, n): lins = [self] self._args['o'] = n[0] kwargs = self._args.copy() for o in n[1:]: kwargs['o'] = o lins.append(self.__class__(**kwargs)) return lins
class _RNNBase(Node): def __init__(self, mode, h, n=1, b=False, bi=False, act='tanh', d=0, batch_first=False, i=None, **kwargs): self.layer = mode super().__init__(h, n, b, bi, act, d, batch_first, i, **kwargs) def build(self, x, h=None): # Infer the input shape if not given if self._args['i'] is None: self._args['i'] = x.shape[-1] self.layer = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU}[self.layer.lower()] kwargs = {'nonlinearity': self._args['act'], 'bias': self._args['b'], 'batch_first': self._args['batch_first'], 'dropout': self._args['d'], 'bidirectional': self._args['bi']} # The 'nonlinearity' / 'act' argument is not a part of LSTM and GRU if not isinstance(self.layer, nn.RNN): kwargs.pop('nonlinearity') self.layer = self.layer(*[self._args[k] for k in ('i', 'h', 'n')], **kwargs) super().build(x, h) def forward(self, x, h=None): return self.layer(x, h) def _mul_list(self, n): rnns = [self] self._args['h'] = n[0] kwargs = self._args.copy() for h in n[1:]: kwargs['h'] = h print(self.__class__, kwargs) rnns.append(self.__class__(**kwargs)) return rnns
[docs]class RNN(_RNNBase): r"""Applies a multi-layer RNN with to an input tensor. Args: h (int, Required): The number of features in the hidden state `h` n (int): Number of layers. Default: ``1`` b (bool): Whether to include a bias term. Default: ``True`` bi (bool): If ``True``, becomes a bidirectional RNN. Default: ``False`` act (str or None): The activation function to use. Default: ``'tanh'`` d (int): The dropout probability of the outputs of each layer. Default: ``0`` batch_first (False): If ``True``, then the input and output tensors are provided as ``(batch, seq, feature)``. Default: ``False`` i (int): Input dimensions. Default: Inferred * :attr:`i` is inferred from the last dimension of the input tensor. .. note:: One can also create multiple Nodes using the convinient multiplication (*) operation. Multiplication with an integer :math:`n`, gives :math:`n` copies of the Node. Multiplication with a list or tuple of integers, :math:`(h_1, h_2, ..., h_n)` gives :math:`n` copies of the Node with :attr:`h` set to :math:`h_i` Attributes: layer (nn.Module): The RNN module built from torch.nn Examples:: >>> import torch >>> from torch import nn >>> import magnet.nodes as mn >>> from magnet.utils import summarize >>> # A recurrent layer with 32 hidden dimensions >>> model = mn.RNN(32) >>> model(torch.randn(7, 4, 300))[0].shape torch.Size([7, 4, 32]) >>> # Attach a linear head >>> model = nn.Sequential(model, mn.Linear(1000, act=None)) """ def __init__(self, h, n=1, b=False, bi=False, act='tanh', d=0, batch_first=False, i=None, **kwargs): mode = kwargs.pop('mode', 'rnn') super().__init__(mode, h, n, b, bi, act, d, batch_first, i, **kwargs)
[docs]class LSTM(_RNNBase): r"""Applies a multi-layer LSTM with to an input tensor. See mn.RNN for more details """ def __init__(self, h, n=1, b=False, bi=False, d=0, batch_first=False, i=None, **kwargs): act = kwargs.pop('act', None) mode = kwargs.pop('mode', 'lstm') super().__init__(mode, h, n, b, bi, act, d, batch_first, i, **kwargs)
[docs]class GRU(_RNNBase): r"""Applies a multi-layer GRU with to an input tensor. See mn.RNN for more details """ def __init__(self, h, n=1, b=False, bi=False, d=0, batch_first=False, i=None, **kwargs): act = kwargs.pop('act', None) mode = kwargs.pop('mode', 'gru') super().__init__(mode, h, n, b, bi, act, d, batch_first, i, **kwargs)
[docs]class BatchNorm(Node): r"""Applies Batch Normalization to the input tensor e=1e-05, m=0.1, a=True, track=True, i=None Args: e (float): A small value added to the denominator for numerical stability. Default: ``1e-5`` m (float or None): The value used for the running_mean and running_var computation. Can be set to ``None`` for cumulative moving average (i.e. simple average). Default: ``0.1`` a (bool): Whether to have learnable affine parameters. Default: ``True`` track (bool): Whether to track the running mean and variance. Default: ``True`` i (int): Input channels. Default: Inferred * :attr:`i` is inferred from the second dimension of the input tensor. .. note:: The dimensions (1, 2 or 3) of the running mean and variance are inferred from the corresponding shape of the input tensor. .. note:: One can also create multiple Nodes using the convinient multiplication (*) operation. Multiplication with an integer :math:`n`, gives :math:`n` copies of the Node. Multiplication with a list or tuple of integers, :math:`(i_1, i_2, ..., i_n)` gives :math:`n` copies of the Node with :attr:`i` set to :math:`i_i` Shape: - Input: :math:`(N, C, *)` where :math:`*` means any number of trailing dimensions - Output: :math:`(N, C, *)` (same shape as input) Attributes: layer (nn.Module): The BatchNorm module built from :py:class:`torch.nn` Examples:: >>> import torch >>> from torch import nn >>> import magnet.nodes as mn >>> from magnet.utils import summarize >>> # A Linear mapping to 10-dimensional space >>> model = mn.Linear(10) >>> model(torch.randn(64, 3, 28, 28)).shape torch.Size([64, 10]) >>> # Don't flatten the input >>> model = mn.Linear(10, flat=False) >>> model(torch.randn(64, 3, 28, 28)).shape torch.Size([64, 3, 28, 10]) >>> # Make a Deep Neural Network >>> # Don't forget to turn the activation to None in the final layer >>> layers = mn.Linear() * (10, 50) + [mn.Linear(10, act=None)] [Linear(), Linear(), Linear()] >>> model = nn.Sequential(*layers) >>> summarize(model) +------+---------+--------------------+----------------------------------------------------+ | Node | Shape |Trainable Parameters| Arguments | +------+---------+--------------------+----------------------------------------------------+ |input |3, 28, 28| 0 | | +------+---------+--------------------+----------------------------------------------------+ |Linear| 10 | 23,530 |bn=False, act=relu, i=2352, flat=True, b=True, o=10 | +------+---------+--------------------+----------------------------------------------------+ |Linear| 50 | 550 |bn=False, act=relu, i=10, flat=True, b=True, o=50 | +------+---------+--------------------+----------------------------------------------------+ |Linear| 10 | 510 |bn=False, act=None, i=50, flat=True, b=True, o=10 | +------+---------+--------------------+----------------------------------------------------+ Total Trainable Parameters: 24,590 """ def __init__(self, e=1e-05, m=0.1, a=True, track=True, i=None, **kwargs): super().__init__(e, m, a, track, i, **kwargs) def build(self, x): # Infer the input shape if not given self._args['i'] = x.shape[1] layer_class = self._find_layer(x) # Infer the layer (BatchNorm1D, 2D or 3D) self.layer = layer_class(*[self._args[k] for k in ('i', 'e', 'm', 'a', 'track')]) super().build(x) def forward(self, x): return self.layer(x) @staticmethod def _find_layer(x): shape_dict = [nn.BatchNorm1d, nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d] ndim = len(x.shape) - 1 return shape_dict[ndim - 1]