I\'m trying to run over the parameters space of a 6 parameter function to study it\'s numerical behavior before trying to do anything complex with it so I\'m searching for a
The following numpy implementation should be approx. 2x the speed of the given answer:
def cartesian2(arrays):
arrays = [np.asarray(a) for a in arrays]
shape = (len(x) for x in arrays)
ix = np.indices(shape, dtype=int)
ix = ix.reshape(len(arrays), -1).T
for n, arr in enumerate(arrays):
ix[:, n] = arrays[n][ix[:, n]]
return ix
It looks like you want a grid to evaluate your function, in which case you can use numpy.ogrid
(open) or numpy.mgrid
(fleshed out):
import numpy
my_grid = numpy.mgrid[[slice(0,1,0.1)]*6]
Here's yet another way, using pure NumPy, no recursion, no list comprehension, and no explicit for loops. It's about 20% slower than the original answer, and it's based on np.meshgrid.
def cartesian(*arrays):
mesh = np.meshgrid(*arrays) # standard numpy meshgrid
dim = len(mesh) # number of dimensions
elements = mesh[0].size # number of elements, any index will do
flat = np.concatenate(mesh).ravel() # flatten the whole meshgrid
reshape = np.reshape(flat, (dim, elements)).T # reshape and transpose
return reshape
For example,
x = np.arange(3)
a = cartesian(x, x, x, x, x)
print(a)
gives
[[0 0 0 0 0]
[0 0 0 0 1]
[0 0 0 0 2]
...,
[2 2 2 2 0]
[2 2 2 2 1]
[2 2 2 2 2]]
Pandas merge
offers a naive, fast solution to the problem:
# given the lists
x, y, z = [1, 2, 3], [4, 5], [6, 7]
# get dfs with same, constant index
x = pd.DataFrame({'x': x}, index=np.repeat(0, len(x))
y = pd.DataFrame({'y': y}, index=np.repeat(0, len(y))
z = pd.DataFrame({'z': z}, index=np.repeat(0, len(z))
# get all permutations stored in a new df
df = pd.merge(x, pd.merge(y, z, left_index=True, righ_index=True),
left_index=True, right_index=True)
In newer version of numpy
(>1.8.x), numpy.meshgrid() provides a much faster implementation:
@pv's solution
In [113]:
%timeit cartesian(([1, 2, 3], [4, 5], [6, 7]))
10000 loops, best of 3: 135 µs per loop
In [114]:
cartesian(([1, 2, 3], [4, 5], [6, 7]))
Out[114]:
array([[1, 4, 6],
[1, 4, 7],
[1, 5, 6],
[1, 5, 7],
[2, 4, 6],
[2, 4, 7],
[2, 5, 6],
[2, 5, 7],
[3, 4, 6],
[3, 4, 7],
[3, 5, 6],
[3, 5, 7]])
numpy.meshgrid() use to be 2D only, now it is capable of ND. In this case, 3D:
In [115]:
%timeit np.array(np.meshgrid([1, 2, 3], [4, 5], [6, 7])).T.reshape(-1,3)
10000 loops, best of 3: 74.1 µs per loop
In [116]:
np.array(np.meshgrid([1, 2, 3], [4, 5], [6, 7])).T.reshape(-1,3)
Out[116]:
array([[1, 4, 6],
[1, 5, 6],
[2, 4, 6],
[2, 5, 6],
[3, 4, 6],
[3, 5, 6],
[1, 4, 7],
[1, 5, 7],
[2, 4, 7],
[2, 5, 7],
[3, 4, 7],
[3, 5, 7]])
Note that the order of the final resultant is slightly different.
Here's a pure-numpy implementation. It's about 5× faster than using itertools.
import numpy as np
def cartesian(arrays, out=None):
"""
Generate a cartesian product of input arrays.
Parameters
----------
arrays : list of array-like
1-D arrays to form the cartesian product of.
out : ndarray
Array to place the cartesian product in.
Returns
-------
out : ndarray
2-D array of shape (M, len(arrays)) containing cartesian products
formed of input arrays.
Examples
--------
>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
array([[1, 4, 6],
[1, 4, 7],
[1, 5, 6],
[1, 5, 7],
[2, 4, 6],
[2, 4, 7],
[2, 5, 6],
[2, 5, 7],
[3, 4, 6],
[3, 4, 7],
[3, 5, 6],
[3, 5, 7]])
"""
arrays = [np.asarray(x) for x in arrays]
dtype = arrays[0].dtype
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
m = n / arrays[0].size
out[:,0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian(arrays[1:], out=out[0:m, 1:])
for j in xrange(1, arrays[0].size):
out[j*m:(j+1)*m, 1:] = out[0:m, 1:]
return out