Coverage for /usr/lib/python3/dist-packages/numpy/core/einsumfunc.py: 6%

100

101

102

103

"""

Implementation of optimized einsum.

"""

from __future__ import division, absolute_import, print_function

import itertools

from numpy.compat import basestring

from numpy.core.multiarray import c_einsum

from numpy.core.numeric import asanyarray, tensordot

from numpy.core.overrides import array_function_dispatch

__all__ = ['einsum', 'einsum_path']

einsum_symbols = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

einsum_symbols_set = set(einsum_symbols)

def _flop_count(idx_contraction, inner, num_terms, size_dictionary):

"""

Computes the number of FLOPS in the contraction.

Parameters

----------

idx_contraction : iterable

The indices involved in the contraction

inner : bool

Does this contraction require an inner product?

num_terms : int

The number of terms in a contraction

size_dictionary : dict

The size of each of the indices in idx_contraction

Returns

-------

flop_count : int

The total number of FLOPS required for the contraction.

Examples

--------

>>> _flop_count('abc', False, 1, {'a': 2, 'b':3, 'c':5})

>>> _flop_count('abc', True, 2, {'a': 2, 'b':3, 'c':5})

270

"""

overall_size = _compute_size_by_dict(idx_contraction, size_dictionary)

op_factor = max(1, num_terms - 1)

if inner:

op_factor += 1

return overall_size * op_factor

def _compute_size_by_dict(indices, idx_dict):

"""

Computes the product of the elements in indices based on the dictionary

idx_dict.

Parameters

----------

indices : iterable

Indices to base the product on.

idx_dict : dictionary

Dictionary of index sizes

Returns

-------

ret : int

The resulting product.

Examples

--------

>>> _compute_size_by_dict('abbc', {'a': 2, 'b':3, 'c':5})

"""

ret = 1

for i in indices:

ret *= idx_dict[i]

return ret

def _find_contraction(positions, input_sets, output_set):

"""

Finds the contraction for a given set of input and output sets.

Parameters

----------

positions : iterable

Integer positions of terms used in the contraction.

input_sets : list

List of sets that represent the lhs side of the einsum subscript

output_set : set

Set that represents the rhs side of the overall einsum subscript

Returns

-------

new_result : set

The indices of the resulting contraction

remaining : list

List of sets that have not been contracted, the new set is appended to

the end of this list

idx_removed : set

Indices removed from the entire contraction

idx_contraction : set

The indices used in the current contraction

Examples

--------

# A simple dot product test case

>>> pos = (0, 1)

>>> isets = [set('ab'), set('bc')]

>>> oset = set('ac')

>>> _find_contraction(pos, isets, oset)

({'a', 'c'}, [{'a', 'c'}], {'b'}, {'a', 'b', 'c'})

# A more complex case with additional terms in the contraction

>>> pos = (0, 2)

>>> isets = [set('abd'), set('ac'), set('bdc')]

>>> oset = set('ac')

>>> _find_contraction(pos, isets, oset)

({'a', 'c'}, [{'a', 'c'}, {'a', 'c'}], {'b', 'd'}, {'a', 'b', 'c', 'd'})

"""

idx_contract = set()

idx_remain = output_set.copy()

remaining = []

for ind, value in enumerate(input_sets):

if ind in positions:

idx_contract |= value

else:

remaining.append(value)

idx_remain |= value

new_result = idx_remain & idx_contract

idx_removed = (idx_contract - new_result)

remaining.append(new_result)

return (new_result, remaining, idx_removed, idx_contract)

def _optimal_path(input_sets, output_set, idx_dict, memory_limit):

"""

Computes all possible pair contractions, sieves the results based

on ``memory_limit`` and returns the lowest cost path. This algorithm

scales factorial with respect to the elements in the list ``input_sets``.

Parameters

----------

input_sets : list

List of sets that represent the lhs side of the einsum subscript

output_set : set

Set that represents the rhs side of the overall einsum subscript

idx_dict : dictionary

Dictionary of index sizes

memory_limit : int

The maximum number of elements in a temporary array

Returns

-------

path : list

The optimal contraction order within the memory limit constraint.

Examples

--------

>>> isets = [set('abd'), set('ac'), set('bdc')]

>>> oset = set()

>>> idx_sizes = {'a': 1, 'b':2, 'c':3, 'd':4}

>>> _path__optimal_path(isets, oset, idx_sizes, 5000)

[(0, 2), (0, 1)]

"""

full_results = [(0, [], input_sets)]

for iteration in range(len(input_sets) - 1):

iter_results = []

# Compute all unique pairs

for curr in full_results:

cost, positions, remaining = curr

for con in itertools.combinations(range(len(input_sets) - iteration), 2):

# Find the contraction

cont = _find_contraction(con, remaining, output_set)

new_result, new_input_sets, idx_removed, idx_contract = cont

# Sieve the results based on memory_limit

new_size = _compute_size_by_dict(new_result, idx_dict)

if new_size > memory_limit:

continue

# Build (total_cost, positions, indices_remaining)

total_cost = cost + _flop_count(idx_contract, idx_removed, len(con), idx_dict)

new_pos = positions + [con]

iter_results.append((total_cost, new_pos, new_input_sets))

# Update combinatorial list, if we did not find anything return best

# path + remaining contractions

if iter_results:

full_results = iter_results

else:

path = min(full_results, key=lambda x: x[0])[1]

path += [tuple(range(len(input_sets) - iteration))]

return path

# If we have not found anything return single einsum contraction

if len(full_results) == 0:

return [tuple(range(len(input_sets)))]

path = min(full_results, key=lambda x: x[0])[1]

return path

def _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost, naive_cost):

"""Compute the cost (removed size + flops) and resultant indices for

performing the contraction specified by ``positions``.

Parameters

----------

positions : tuple of int

The locations of the proposed tensors to contract.

input_sets : list of sets

The indices found on each tensors.

output_set : set

The output indices of the expression.

idx_dict : dict

Mapping of each index to its size.

memory_limit : int

The total allowed size for an intermediary tensor.

path_cost : int

The contraction cost so far.

naive_cost : int

The cost of the unoptimized expression.

Returns

-------

cost : (int, int)

A tuple containing the size of any indices removed, and the flop cost.

positions : tuple of int

The locations of the proposed tensors to contract.

new_input_sets : list of sets

The resulting new list of indices if this proposed contraction is performed.

"""

# Find the contraction

contract = _find_contraction(positions, input_sets, output_set)

idx_result, new_input_sets, idx_removed, idx_contract = contract

# Sieve the results based on memory_limit

new_size = _compute_size_by_dict(idx_result, idx_dict)

if new_size > memory_limit:

return None

# Build sort tuple

old_sizes = (_compute_size_by_dict(input_sets[p], idx_dict) for p in positions)

removed_size = sum(old_sizes) - new_size

# NB: removed_size used to be just the size of any removed indices i.e.:

# helpers.compute_size_by_dict(idx_removed, idx_dict)

cost = _flop_count(idx_contract, idx_removed, len(positions), idx_dict)

sort = (-removed_size, cost)

# Sieve based on total cost as well

if (path_cost + cost) > naive_cost:

return None

# Add contraction to possible choices

return [sort, positions, new_input_sets]

def _update_other_results(results, best):

"""Update the positions and provisional input_sets of ``results`` based on

performing the contraction result ``best``. Remove any involving the tensors

contracted.

Parameters

----------

results : list

List of contraction results produced by ``_parse_possible_contraction``.

best : list

The best contraction of ``results`` i.e. the one that will be performed.

Returns

-------

mod_results : list

The list of modifed results, updated with outcome of ``best`` contraction.

"""

best_con = best[1]

bx, by = best_con

mod_results = []

for cost, (x, y), con_sets in results:

# Ignore results involving tensors just contracted

if x in best_con or y in best_con:

continue

# Update the input_sets

del con_sets[by - int(by > x) - int(by > y)]

del con_sets[bx - int(bx > x) - int(bx > y)]

con_sets.insert(-1, best[2][-1])

# Update the position indices

mod_con = x - int(x > bx) - int(x > by), y - int(y > bx) - int(y > by)

mod_results.append((cost, mod_con, con_sets))

return mod_results

def _greedy_path(input_sets, output_set, idx_dict, memory_limit):

"""

Finds the path by contracting the best pair until the input list is

exhausted. The best pair is found by minimizing the tuple

``(-prod(indices_removed), cost)``. What this amounts to is prioritizing

matrix multiplication or inner product operations, then Hadamard like

operations, and finally outer operations. Outer products are limited by

``memory_limit``. This algorithm scales cubically with respect to the

number of elements in the list ``input_sets``.

Parameters

----------

input_sets : list

List of sets that represent the lhs side of the einsum subscript

output_set : set

Set that represents the rhs side of the overall einsum subscript

idx_dict : dictionary

Dictionary of index sizes

memory_limit_limit : int

The maximum number of elements in a temporary array

Returns

-------

path : list

The greedy contraction order within the memory limit constraint.

Examples

--------

>>> isets = [set('abd'), set('ac'), set('bdc')]

>>> oset = set()

>>> idx_sizes = {'a': 1, 'b':2, 'c':3, 'd':4}

>>> _path__greedy_path(isets, oset, idx_sizes, 5000)

[(0, 2), (0, 1)]

"""

# Handle trivial cases that leaked through

if len(input_sets) == 1:

return [(0,)]

elif len(input_sets) == 2:

return [(0, 1)]

# Build up a naive cost

contract = _find_contraction(range(len(input_sets)), input_sets, output_set)

idx_result, new_input_sets, idx_removed, idx_contract = contract

naive_cost = _flop_count(idx_contract, idx_removed, len(input_sets), idx_dict)

# Initially iterate over all pairs

comb_iter = itertools.combinations(range(len(input_sets)), 2)

known_contractions = []

path_cost = 0

path = []

for iteration in range(len(input_sets) - 1):

# Iterate over all pairs on first step, only previously found pairs on subsequent steps

for positions in comb_iter:

# Always initially ignore outer products

if input_sets[positions[0]].isdisjoint(input_sets[positions[1]]):

continue

result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost,

naive_cost)

if result is not None:

known_contractions.append(result)

# If we do not have a inner contraction, rescan pairs including outer products

if len(known_contractions) == 0:

# Then check the outer products

for positions in itertools.combinations(range(len(input_sets)), 2):

result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit,

path_cost, naive_cost)

if result is not None:

known_contractions.append(result)

# If we still did not find any remaining contractions, default back to einsum like behavior

if len(known_contractions) == 0:

path.append(tuple(range(len(input_sets))))

break

# Sort based on first index

best = min(known_contractions, key=lambda x: x[0])

# Now propagate as many unused contractions as possible to next iteration

known_contractions = _update_other_results(known_contractions, best)

# Next iteration only compute contractions with the new tensor

# All other contractions have been accounted for

input_sets = best[2]

new_tensor_pos = len(input_sets) - 1

comb_iter = ((i, new_tensor_pos) for i in range(new_tensor_pos))

# Update path and total cost

path.append(best[1])

path_cost += best[0][1]

return path

def _can_dot(inputs, result, idx_removed):

"""

Checks if we can use BLAS (np.tensordot) call and its beneficial to do so.

Parameters

----------

inputs : list of str

Specifies the subscripts for summation.

result : str

Resulting summation.

idx_removed : set

Indices that are removed in the summation

Returns

-------

type : bool

Returns true if BLAS should and can be used, else False

Notes

-----

If the operations is BLAS level 1 or 2 and is not already aligned

we default back to einsum as the memory movement to copy is more

costly than the operation itself.

Examples

--------

# Standard GEMM operation

>>> _can_dot(['ij', 'jk'], 'ik', set('j'))

True

# Can use the standard BLAS, but requires odd data movement

>>> _can_dot(['ijj', 'jk'], 'ik', set('j'))

False

# DDOT where the memory is not aligned

>>> _can_dot(['ijk', 'ikj'], '', set('ijk'))

False

"""

# All `dot` calls remove indices

if len(idx_removed) == 0:

return False

# BLAS can only handle two operands

if len(inputs) != 2:

return False

input_left, input_right = inputs

for c in set(input_left + input_right):

# can't deal with repeated indices on same input or more than 2 total

nl, nr = input_left.count(c), input_right.count(c)

if (nl > 1) or (nr > 1) or (nl + nr > 2):

return False

# can't do implicit summation or dimension collapse e.g.

# "ab,bc->c" (implicitly sum over 'a')

# "ab,ca->ca" (take diagonal of 'a')

if nl + nr - 1 == int(c in result):

return False

# Build a few temporaries

set_left = set(input_left)

set_right = set(input_right)

keep_left = set_left - idx_removed

keep_right = set_right - idx_removed

rs = len(idx_removed)

# At this point we are a DOT, GEMV, or GEMM operation

# Handle inner products

# DDOT with aligned data

if input_left == input_right:

return True

# DDOT without aligned data (better to use einsum)

if set_left == set_right:

return False

# Handle the 4 possible (aligned) GEMV or GEMM cases

# GEMM or GEMV no transpose

if input_left[-rs:] == input_right[:rs]:

return True

# GEMM or GEMV transpose both

if input_left[:rs] == input_right[-rs:]:

return True

# GEMM or GEMV transpose right

if input_left[-rs:] == input_right[-rs:]:

return True

# GEMM or GEMV transpose left

if input_left[:rs] == input_right[:rs]:

return True

# Einsum is faster than GEMV if we have to copy data

if not keep_left or not keep_right:

return False

# We are a matrix-matrix product, but we need to copy data

return True

def _parse_einsum_input(operands):

"""

A reproduction of einsum c side einsum parsing in python.

Returns

-------

input_strings : str

Parsed input strings

output_string : str

Parsed output string

operands : list of array_like

The operands to use in the numpy contraction

Examples

--------

The operand list is simplified to reduce printing:

>>> a = np.random.rand(4, 4)

>>> b = np.random.rand(4, 4, 4)

>>> __parse_einsum_input(('...a,...a->...', a, b))

('za,xza', 'xz', [a, b])

>>> __parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0]))

('za,xza', 'xz', [a, b])

"""

if len(operands) == 0:

raise ValueError("No input operands")

if isinstance(operands[0], basestring):

subscripts = operands[0].replace(" ", "")

operands = [asanyarray(v) for v in operands[1:]]

# Ensure all characters are valid

for s in subscripts:

if s in '.,->':

continue

if s not in einsum_symbols:

raise ValueError("Character %s is not a valid symbol." % s)

else:

tmp_operands = list(operands)

operand_list = []

subscript_list = []

for p in range(len(operands) // 2):

operand_list.append(tmp_operands.pop(0))

subscript_list.append(tmp_operands.pop(0))

output_list = tmp_operands[-1] if len(tmp_operands) else None

operands = [asanyarray(v) for v in operand_list]

subscripts = ""

last = len(subscript_list) - 1

for num, sub in enumerate(subscript_list):

for s in sub:

if s is Ellipsis:

subscripts += "..."

elif isinstance(s, int):

subscripts += einsum_symbols[s]

else:

raise TypeError("For this input type lists must contain "

"either int or Ellipsis")

if num != last:

subscripts += ","

if output_list is not None:

subscripts += "->"

for s in output_list:

if s is Ellipsis:

subscripts += "..."

elif isinstance(s, int):

subscripts += einsum_symbols[s]

else:

raise TypeError("For this input type lists must contain "

"either int or Ellipsis")

# Check for proper "->"

if ("-" in subscripts) or (">" in subscripts):

invalid = (subscripts.count("-") > 1) or (subscripts.count(">") > 1)

if invalid or (subscripts.count("->") != 1):

raise ValueError("Subscripts can only contain one '->'.")

# Parse ellipses

if "." in subscripts:

used = subscripts.replace(".", "").replace(",", "").replace("->", "")

unused = list(einsum_symbols_set - set(used))

ellipse_inds = "".join(unused)

longest = 0

if "->" in subscripts:

input_tmp, output_sub = subscripts.split("->")

split_subscripts = input_tmp.split(",")

out_sub = True

else:

split_subscripts = subscripts.split(',')

out_sub = False

for num, sub in enumerate(split_subscripts):

if "." in sub:

if (sub.count(".") != 3) or (sub.count("...") != 1):

raise ValueError("Invalid Ellipses.")

# Take into account numerical values

if operands[num].shape == ():

ellipse_count = 0

else:

ellipse_count = max(operands[num].ndim, 1)

ellipse_count -= (len(sub) - 3)

if ellipse_count > longest:

longest = ellipse_count

if ellipse_count < 0:

raise ValueError("Ellipses lengths do not match.")

elif ellipse_count == 0:

split_subscripts[num] = sub.replace('...', '')

else:

rep_inds = ellipse_inds[-ellipse_count:]

split_subscripts[num] = sub.replace('...', rep_inds)

subscripts = ",".join(split_subscripts)

if longest == 0:

out_ellipse = ""

else:

out_ellipse = ellipse_inds[-longest:]

if out_sub:

subscripts += "->" + output_sub.replace("...", out_ellipse)

else:

# Special care for outputless ellipses

output_subscript = ""

tmp_subscripts = subscripts.replace(",", "")

for s in sorted(set(tmp_subscripts)):

if s not in (einsum_symbols):

raise ValueError("Character %s is not a valid symbol." % s)

if tmp_subscripts.count(s) == 1:

output_subscript += s

normal_inds = ''.join(sorted(set(output_subscript) -

set(out_ellipse)))

subscripts += "->" + out_ellipse + normal_inds

# Build output string if does not exist

if "->" in subscripts:

input_subscripts, output_subscript = subscripts.split("->")

else:

input_subscripts = subscripts

# Build output subscripts

tmp_subscripts = subscripts.replace(",", "")

output_subscript = ""

for s in sorted(set(tmp_subscripts)):

if s not in einsum_symbols:

raise ValueError("Character %s is not a valid symbol." % s)

if tmp_subscripts.count(s) == 1:

output_subscript += s

# Make sure output subscripts are in the input

for char in output_subscript:

if char not in input_subscripts:

raise ValueError("Output character %s did not appear in the input"

% char)

# Make sure number operands is equivalent to the number of terms

if len(input_subscripts.split(',')) != len(operands):

raise ValueError("Number of einsum subscripts must be equal to the "

"number of operands.")

return (input_subscripts, output_subscript, operands)

def _einsum_path_dispatcher(*operands, **kwargs):

# NOTE: technically, we should only dispatch on array-like arguments, not

# subscripts (given as strings). But separating operands into

# arrays/subscripts is a little tricky/slow (given einsum's two supported

# signatures), so as a practical shortcut we dispatch on everything.

# Strings will be ignored for dispatching since they don't define

# __array_function__.

return operands

@array_function_dispatch(_einsum_path_dispatcher, module='numpy')

def einsum_path(*operands, **kwargs):

"""

einsum_path(subscripts, *operands, optimize='greedy')

Evaluates the lowest cost contraction order for an einsum expression by

considering the creation of intermediate arrays.

Parameters

----------

subscripts : str

Specifies the subscripts for summation.

*operands : list of array_like

These are the arrays for the operation.

optimize : {bool, list, tuple, 'greedy', 'optimal'}

Choose the type of path. If a tuple is provided, the second argument is

assumed to be the maximum intermediate size created. If only a single

argument is provided the largest input or output array size is used

as a maximum intermediate size.

* if a list is given that starts with ``einsum_path``, uses this as the

contraction path

* if False no optimization is taken

* if True defaults to the 'greedy' algorithm

* 'optimal' An algorithm that combinatorially explores all possible

ways of contracting the listed tensors and choosest the least costly

path. Scales exponentially with the number of terms in the

contraction.

* 'greedy' An algorithm that chooses the best pair contraction

at each step. Effectively, this algorithm searches the largest inner,

Hadamard, and then outer products at each step. Scales cubically with

the number of terms in the contraction. Equivalent to the 'optimal'

path for most contractions.

Default is 'greedy'.

Returns

-------

path : list of tuples

A list representation of the einsum path.

string_repr : str

A printable representation of the einsum path.

Notes

-----

The resulting path indicates which terms of the input contraction should be

contracted first, the result of this contraction is then appended to the

end of the contraction list. This list can then be iterated over until all

intermediate contractions are complete.

Coverage for /usr/lib/python3/dist-packages/numpy/core/einsumfunc.py : 6%

391 statements 22 run 369 missing 17 excluded