Source code for brainstorm.handlers.base_handler

#!/usr/bin/env python
# coding=utf-8
from __future__ import division, print_function, unicode_literals

import abc

import six

from brainstorm.describable import Describable


@six.add_metaclass(abc.ABCMeta)
[docs]class Handler(Describable): """Abstract base class for all handlers. This base is used mainly to ensure a common interface and provide documentation for derived handlers. When implementing new methods one should adhere to the naming scheme. Most mathematical operations should have a suffix or suffixes indicating the shapes of inputs it expects: `s` for scalar, `v` for vector (a 2D array with at least dimension equal to 1), `m` for matrix (a 2D array), `t` for tensor (which means arbitrary shape, synonym for `array`). Note that these shapes are not checked by each handler itself. However, the DebugHandler can be used to perform these checks to ensure that operations are not abused. Attributes: dtype: Data type that this handler works with. context: Context which may be used by this handler for operation. EMPTY: An empty array matching this handler's type. rnd: A random state maintained by this handler. array_type: The type of array object that this handler works with. """ __undescribed__ = {'inplace_act_func', 'inplace_act_func_deriv', 'act_func', 'act_func_deriv'} def __init__(self): self.inplace_act_func = { 'sigmoid': lambda x: self.sigmoid(x, x), 'rel': lambda x: self.rel(x, x), 'tanh': lambda x: self.tanh(x, x), 'linear': lambda x: None, 'el': lambda x: self.el(x, x) } self.inplace_act_func_deriv = { 'sigmoid': lambda y, dy: self.sigmoid_deriv(y, y, dy, dy), 'rel': lambda y, dy: self.rel_deriv(y, y, dy, dy), 'tanh': lambda y, dy: self.tanh_deriv(y, y, dy, dy), 'linear': lambda y, dy: None, 'el': lambda y, dy: self.el_deriv(y, y, dy, dy) } self.act_func = { 'sigmoid': self.sigmoid, 'rel': self.rel, 'tanh': self.tanh, 'linear': self.copy_to, 'el': self.el } self.act_func_deriv = { 'sigmoid': self.sigmoid_deriv, 'rel': self.rel_deriv, 'tanh': self.tanh_deriv, 'linear': lambda x, y, dy, dx: self.copy_to(dy, dx), 'el': self.el_deriv } def __init_from_description__(self, description): Handler.__init__(self) # ------------------------- Allocate new memory ------------------------- # @abc.abstractmethod
[docs] def allocate(self, shape): """Allocate new memory with given shape but arbitrary content. Args: shape (tuple[int]): Shape of the array. Returns: object: New array with given shape. """
@abc.abstractmethod
[docs] def ones(self, shape): """Allocate new memory with given shape and filled with ones. Args: shape (tuple[int]): Shape of the array. Returns: object: New array with given shape filled with ones. """
@abc.abstractmethod
[docs] def zeros(self, shape): """Allocate new memory with given shape and filled with zeros. Args: shape (tuple[int]): Shape of the array. Returns: object: New array with given shape filled with zeros. """
# ---------------------------- Copy and Fill ---------------------------- # @abc.abstractmethod
[docs] def copy_to(self, src, dest): """Copy the contents of one array to another. Both source and destination arrays must be of this handler's supported type and have the same shape. Args: dest (array_type): Destination array. src (array_type): Source array. Returns: None """
@abc.abstractmethod
[docs] def copy_to_if(self, src, dest, cond): """Copy element of 'src' to element of 'dest' if cond is not equal to 0. Args: src (array_type): Source array whose elements (might) be copied into `dest`. dest (array_type): Destination array. cond (array_type): The condition array. Only those `src` elements get copied to `dest` whose corresponding `cond` elements are non-zero. Returns: None """
@abc.abstractmethod
[docs] def create_from_numpy(self, arr): """Create a new array with the same entries as a Numpy array. Args: arr (numpy.ndarray): Numpy array whose elements should be used to fill the new array. Returns: array_type: New array with same shape and entries as the given Numpy array. """
@abc.abstractmethod
[docs] def fill(self, mem, val): """Fill an array with a given value. Args: mem (array_type): Array to be filled. val (dtype): Value to fill. Returns: None """
@abc.abstractmethod
[docs] def fill_if(self, mem, val, cond): """Set the elements of `mem` to `val` if corresponding `cond` element is non-zero. Args: mem (array_type): Array to be filled. val (dtype): The scalar which the elements of `mem` (might) be set to. cond (array_type): The condition array. Only those `mem` elements are set to `val` whose corresponding `cond` elements are non-zero. Returns: None """
@abc.abstractmethod
[docs] def get_numpy_copy(self, mem): """Return a copy of the given data as a numpy array. Args: mem (array_type): Source array to be copied. Returns: numpy.ndarray: Numpy array with same content as mem. """
@abc.abstractmethod
[docs] def set_from_numpy(self, mem, arr): """Set the content of an array from a given numpy array. Args: mem (array_type): Destination array that should be set. arr (numpy.ndarray): Source numpy array. Returns: None """
# ---------------------------- Debug helpers ---------------------------- # @abc.abstractmethod
[docs] def is_fully_finite(self, a): """Check if all entries of the array are finite (no nans or infs). Args: a (array_type): Input array to check. Returns: bool: True if there are no infs or nans, False otherwise. """
# ----------------------- Mathematical operations ----------------------- # @abc.abstractmethod
[docs] def abs_t(self, a, out): """Compute the element-wise absolute value. Args: a (array_type): Array whose absolute values are to be computed. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a`. Returns: None """
@abc.abstractmethod
[docs] def add_into_if(self, a, out, cond): """Add element of `a` to element of `out` if corresponding element in `cond` is non-zero. Args: a (array_type): Array whose elements (might) be added to `out`. out (array_type): Output array, whose values might be increased by values from `a`. cond (array_type): The condition array. Only those entries from `a` are added into `out` whose corresponding `cond` elements are non-zero. Returns: None """
@abc.abstractmethod
[docs] def add_mv(self, m, v, out): """Add a matrix to a vector with broadcasting. Add an (M, N) matrix to a (1, N) or (M, 1) vector using broadcasting such that the output is (M, N). Args: m (array_type): The first array to be added. Must be 2D. v (array_type): The second array to be added. Must be 2D with at least one dimension of size 1 and the other dimension matching the corresponding size of :attr:`m`. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`m`. Returns: None """
@abc.abstractmethod
[docs] def add_st(self, s, t, out): """Add a scalar to each element of a tensor. Args: s (dtype): The scalar value to be added. t (array_type): The array to be added. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`t`. Returns: None """
@abc.abstractmethod
[docs] def add_tt(self, a, b, out): """Add two tensors element-wise, Args: a (array_type): First array. b (array_type): Second array. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a` and :attr:`b`. Returns: None """
@abc.abstractmethod
[docs] def avgpool2d_backward_batch(self, inputs, window, outputs, padding, stride, in_deltas, out_deltas): """Computes the gradients for 2D average-pooling on a batch of images. Args: inputs (array_type): window (tuple[int]): outputs (array_type): padding (int): stride (tuple[int]): in_deltas (array_type): out_deltas (array_type): Returns: None """
@abc.abstractmethod
[docs] def avgpool2d_forward_batch(self, inputs, window, outputs, padding, stride): """Performs 2D average-pooling on a batch of images. Args: inputs (array_type): window (tuple[int]): outputs (array_type): padding (int): stride (tuple[int]): argmax (array_type): Returns: None """
@abc.abstractmethod
[docs] def binarize_v(self, v, out): """Convert a column vector into a matrix of one-hot row vectors. Usually used to convert class IDs into one-hot vectors. Therefore, `out[i, j] = 1`, if j equals v[i, 0] `out[i, j] = 0`, otherwise. Note that `out` must have enough columns such that all indices in :attr:`v` are valid. Args: v (array_type): Column vector (2D array with a single column). out (array_type): Matrix (2D array) into which the output is placed. The number of rows must be the same as :attr:`v` and number of columns must be greater than the maximum value in :attr:`v`. Returns: None """
@abc.abstractmethod
[docs] def broadcast_t(self, a, axis, out): """Broadcast the given axis of an array by copying elements. This function provides a numpy-broadcast-like operation for the the dimension given by axis. E.g. for axis=3 an array with shape (2, 3, 4, 1) may be broadcasted to shape (2, 3, 4, 5), by copying all the elements 5 times. Args: a (array_type): Array whose elements should be broadcasted. The dimension corresponding to axis must be of size 1. axis (int): the axis along which to broadcast out (array_type): Array into which the output is placed. Must have same the number of dimensions as `a`. Only the dimension corresponding to axis can differ from `a`. Returns: None """
@abc.abstractmethod
[docs] def clip_t(self, a, a_min, a_max, out): """Clip (limit) the values in an array. Given an interval, values outside the interval are clipped to the interval edges. For example, if an interval of [0, 1] is specified, values smaller than 0 become 0, and values larger than 1 become 1. Args: a (array_type): Array containing the elements to clip. a_min (dtype): Minimum value. a_max (dtype): Maximum value. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a`. Returns: None """
@abc.abstractmethod
[docs] def conv2d_backward_batch(self, inputs, weights, padding, stride, in_deltas, out_deltas, weight_deltas, bias_deltas): """Computes the gradients for a 2D convolution on a batch of images. Args: inputs (array_type): weights (array_type): padding (int): stride (tuple[int]): in_deltas (array_type): out_deltas (array_type): weight_deltas (array_type): bias_deltas (array_type): Returns: None """
@abc.abstractmethod
[docs] def conv2d_forward_batch(self, inputs, weights, bias, outputs, padding, stride): """Performs a 2D convolution on a batch of images. Args: inputs (array_type): weights (array_type): bias (array_type): outputs (array_type): padding (int): stride (tuple[int]): Returns: None """
@abc.abstractmethod
[docs] def dot_add_mm(self, a, b, out, transa=False, transb=False): """Multiply two matrices and add to a matrix. Only 2D arrays (matrices) are supported. Args: a (array_type): First matrix. b (array_type): Second matrix. Must have compatible shape to be right-multiplied with :attr:`a`. out (array_type): Array into which the output is added. Must have correct shape for the product of the two matrices. Returns: None """
@abc.abstractmethod
[docs] def dot_mm(self, a, b, out, transa=False, transb=False): """Multiply two matrices. Only 2D arrays (matrices) are supported. Args: a (array_type): First matrix. b (array_type): Second matrix. Must have compatible shape to be right-multiplied with :attr:`a`. out (array_type): Array into which the output is placed. Must have correct shape for the product of the two matrices. Returns: None """
@abc.abstractmethod
[docs] def divide_mv(self, m, v, out): """Divide a matrix by a vector. Divide a (M, N) matrix element-wise by a (1, N) vector using broadcasting such that the output is (M, N). Args: a (array_type): First array (dividend). Must be 2D. b (array_type): Second array (divisor). Must be 2D with at least one dimension of size 1 and second dimension matching the corresponding size of :attr:`m`. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`m`. Returns: None """
@abc.abstractmethod
[docs] def divide_tt(self, a, b, out): """Divide two tensors element-wise. Args: a (array_type): First array (dividend). b (array_type): Second array (divisor). Must have the same shape as :attr:`a`. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a` and :attr:`b`. Returns: None """
@abc.abstractmethod
[docs] def fill_gaussian(self, mean, std, out): """Fill an array with values drawn from a Gaussian distribution. Args: mean (float): Mean of the Gaussian Distribution. std (float): Standard deviation of the Gaussian distribution. out (array_type): Target array to fill with values. Returns: None """
@abc.abstractmethod
[docs] def generate_probability_mask(self, mask, probability): """Fill an array with zeros and ones. Fill an array with zeros and ones such that the probability of an element being one is equal to :attr:`probability`. Args: mask (array_type): Array to will be filled. probability (float): Probability of an element of :attr:`mask` being equal to one. Returns: None """
@abc.abstractmethod
[docs] def index_m_by_v(self, m, v, out): """Get elements from a matrix using indices from a vector. :attr:`v` and :attr:`out` must be column vectors of the same size. Elements from the matrix :attr:`m` are copied using the indices given by a column vector. From row `i` of the matrix, the element from column `v[i, 0]` is copied to out, such that `out[i, 0] = m[i, v[i, 0]]`. Note that `m` must have enough columns such that all indices in :attr:`v` are valid. Args: m (array_type): Matrix (2D array) whose elements should be copied. v (array_type): Column vector (2D array with a single column) whose values are used as indices into :attr:`m`. The number of rows must be the same as :attr:`m`. out (array_type): Array into which the output is placed. It's shape must be the same as :attr:`v`. Returns: None """
@abc.abstractmethod
[docs] def log_t(self, a, out): """Compute the element-wise natural logarithm. The natural logarithm log is the inverse of the exponential function, so that `log(exp(x)) = x`. Args: a (array_type): Array whose logarithm is to be computed. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a`. Returns: None """
@abc.abstractmethod
[docs] def merge_tt(self, a, b, out): """Merge arrays a and b along their last axis. Args: a (array_type): Array to be merged. b (array_type): Array to be merged. out (array_type): Array into which the output is placed. Returns: None """
@abc.abstractmethod
[docs] def maxpool2d_backward_batch(self, inputs, window, outputs, padding, stride, argmax, in_deltas, out_deltas): """Computes the gradients for 2D max-pooling on a batch of images. Args: inputs (array_type): window (tuple[int]): outputs (array_type): padding (int): stride (tuple[int]): argmax (array_type): in_deltas (array_type): out_deltas (array_type): Returns: None """
@abc.abstractmethod
[docs] def maxpool2d_forward_batch(self, inputs, window, outputs, padding, stride, argmax): """Performs a 2D max-pooling on a batch of images. Args: inputs (array_type): window (tuple[int]): outputs (array_type): padding (int): stride (tuple[int]): argmax (array_type): Returns: None """
@abc.abstractmethod
[docs] def modulo_tt(self, a, b, out): """Take the modulo between two arrays elementwise. (out = a % b) Args: a (array_type): First array (dividend). b (array_type): Second array (divisor). Must have the same shape as `a`. out (array_type): Array into which the remainder is placed. Must have the same shape as :attr:`a` and :attr:`b`. Returns: None """
@abc.abstractmethod
[docs] def mult_add_st(self, s, t, out): """Multiply a scalar with each element of a tensor and add to a tensor. Args: s (dtype): The scalar value to be multiplied. t (array_type): The array to be multiplied. out (array_type): Array into which the product is added. Must have the same shape as :attr:`t`. Returns: None """
@abc.abstractmethod
[docs] def mult_add_tt(self, a, b, out): """Multiply two tensors element-wise and add to a tensor. Args: a (array_type): First array. b (array_type): Second array. Must have the same shape as :attr:`a`. out (array_type): Array into which the output is added. Must have the same shape as :attr:`a` and :attr:`b`. Returns: None """
@abc.abstractmethod
[docs] def mult_mv(self, m, v, out): """Multiply a matrix with a vector. Multiply an (M, N) matrix with a (1, N) or (M, 1) vector using broadcasting such that the output is (M, N). Also allows the "vector" to have the same dimension as the matrix in which case it behaves the same as :meth:`.mult_tt`. Args: m (array_type): The first array. Must be 2D. v (array_type): The second array, to be multiplied with :attr:`a`. Must be 2D with at least one dimension of size 1 and the other dimension matching the corresponding size of :attr:`m`. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`m`. Returns: None """
@abc.abstractmethod
[docs] def mult_st(self, s, t, out): """Multiply a scalar with each element of a tensor. Args: s (dtype): The scalar value to be multiplied. t (array_type): The array to be multiplied. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`t`. Returns: None """
@abc.abstractmethod
[docs] def mult_tt(self, a, b, out): """Multiply two tensors of the same shape element-wise. Args: a (array_type): First array. b (array_type): Second array. Must have the same shape as :attr:`a`. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a` and :attr:`b`. Returns: None """
@abc.abstractmethod
[docs] def sign_t(self, a, out): """Compute an element-wise indication of the sign of a number. Output has the value 1.0 if an element is positive, 0 if it is zero, and -1.0 if it is negative. Args: a (array_type): Array whose sign is to be computed. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a`. Returns: None """
@abc.abstractmethod
[docs] def split_add_tt(self, x, out_a, out_b): """Split array x along the last axis and add the parts to out_i. Args: x (array_type): Array to be split. out_a (array_type): Array to which 1st part of x is added. out_b (array_type): Array to which 2nd part of x is added. Returns: None """
@abc.abstractmethod
[docs] def sqrt_t(self, a, out): """Compute the positive square-root of an array, element-wise. Args: a (array_type): Array whose square root is to be computed. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`a`. Returns: None """
@abc.abstractmethod
[docs] def subtract_mv(self, m, v, out): """Subtract a vector from a matrix with broadcasting. Args: m (array_type): The first array. Must be 2D. v (array_type): The second array, to be subtracted from :attr:`a`. Must be 2D with at least one dimension of size 1 and second dimension matching the corresponding size of :attr:`m`. out (array_type): Array into which the output is placed. Must have the same shape as :attr:`m`. Returns: None """
@abc.abstractmethod
[docs] def subtract_tt(self, a, b, out): """Subtract a tensor from another element-wise. Args: a (array_type): First array. b (array_type): Second array, to be subtracted from :attr:`a`. Must have the same shape as :attr:`a`. out (array_type): Array into which the output (:attr:`a` - :attr:`b`) is placed. Must have the same shape as :attr:`a` and :attr:`b`. Returns: None """
@abc.abstractmethod
[docs] def sum_t(self, a, axis, out): """Sum the elements of an array along a given axis. If axis is None, the sum is computed over all elements of the array. Otherwise, it is computed along the specified axis. Note: Only 1D and 2D arrays are currently supported. Args: a (array_type): Array to be summed. axis (int): Axis over which the summation should be done. out (array_type): Array into which the output is placed. Returns: None """
# ------------------------ Activation functions ------------------------- # @abc.abstractmethod
[docs] def rel(self, x, y): """Compute the rel (rectified linear) function. `y = rel(x) = max(0, x)` Args: x (array_type): Input array. y (array_type): Output array. Returns: None """
@abc.abstractmethod
[docs] def rel_deriv(self, x, y, dy, dx): """Backpropagate derivatives through the rectified linear function. Args: x (array_type): Inputs to the rel function. This argument is not used and is present only to conform with other activation functions. y (array_type): Outputs of the rel function. dy (array_type): Derivatives with respect to the outputs. dx (array_type): Array in which the derivatives with respect to the inputs are placed. Returns: None """
@abc.abstractmethod
[docs] def sigmoid(self, x, y): """Compute the sigmoid function. `y = sigmoid(x) = 1 / (1 + exp(-x))` Args: x (array_type): Input array. y (array_type): Output array. Returns: None """
@abc.abstractmethod
[docs] def sigmoid_deriv(self, x, y, dy, dx): """Backpropagate derivatives through the sigmoid function. Args: x (array_type): Inputs to the sigmoid function. This argument is not used and is present only to conform with other activation functions. y (array_type): Outputs of the sigmoid function. dy (array_type): Derivatives with respect to the outputs. dx (array_type): Array in which the derivatives with respect to the inputs are placed. Returns: None """
@abc.abstractmethod
[docs] def softmax_m(self, m, out): """Compute the softmax function over last dimension of a matrix. Args: m (array_type): Input array. out (array_type): Output array. Returns: None """
@abc.abstractmethod
[docs] def tanh(self, x, y): """Compute the tanh (hyperbolic tangent) function. `y = tanh(x) = (e^z - e^-z) / (e^z + e^-z)` Args: x (array_type): Input array. y (array_type): Output array. Returns: None """
@abc.abstractmethod
[docs] def tanh_deriv(self, x, y, dy, dx): """Backpropagate derivatives through the tanh function. Args: x (array_type): Inputs to the tanh function. This argument is not used and is present only to conform with other activation functions. y (array_type): Outputs of the tanh function. dy (array_type): Derivatives with respect to the outputs. dx (array_type): Array in which the derivatives with respect to the inputs are placed. Returns: None """
[docs] def el(self, x, y): """ Compute exponential linear activation function. f(x) = x if x > 0 else exp(x) - 1 Note that we chose to fix alpha to 1 Args: x (array_type): Input Array. y (array_type): Output Array Returns: None References: Clevert, D. A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate Deep Network Learning by Exponential Linear Units. arXiv preprint arXiv:1511.07289. """
[docs] def el_deriv(self, x, y, dy, dx): """Backpropagate derivatives through the exponential linear function. f'(x) = 1 if x > 0 else f(x) + 1 Note that we chose to fix alpha to 1 Args: x (array_type): Inputs to the exponential linear function. This argument is not used and is present only to conform with other activation functions. y (array_type): Outputs of the exponential linear function. dy (array_type): Derivatives with respect to the outputs. dx (array_type): Array in which the derivatives with respect to the inputs are placed. Returns: None References: Clevert, D. A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate Deep Network Learning by Exponential Linear Units. arXiv preprint arXiv:1511.07289. """