I have a scatter plot
that gets sorted into 4 Bins
. These are separated by two arcs
and a line
in the middle (see figure belo
Patches have a test for containing points or not: contains_point
and even for arrays of points:contains_points
Just to play with I have a code snippet for you, which you can add between the part where you're adding your patches and the #Sorting the coordinates into bins
codeblock.
It adds two additional (transparent) ellipses for calculating if the arcs would contain points if they were fully closed ellipses. Then your bin calculation is just a boolean combination of tests if a point belongs to the big oval, the left or right ellipsis or has positive or negative x-coordinate.
ov1 = mpl.patches.Ellipse(ang1, 70, 110, alpha=0)
ov2 = mpl.patches.Ellipse(ang2, 70, 110, alpha=0)
ax.add_patch(ov1)
ax.add_patch(ov2)
for px, py in zip(X, Y):
in_oval = Oval.contains_point(ax.transData.transform(([px, py])), 0)
in_left = ov1.contains_point(ax.transData.transform(([px, py])), 0)
in_right = ov2.contains_point(ax.transData.transform(([px, py])), 0)
on_left = px < 0
on_right = px > 0
if in_oval:
if in_left:
n_bin = 1
elif in_right:
n_bin = 4
elif on_left:
n_bin = 2
elif on_right:
n_bin = 3
else:
n_bin = -1
else:
n_bin = -1
print('({:>2}/{:>2}) is {}'.format(px, py, 'in Bin ' +str(n_bin) if n_bin>0 else 'outside'))
The output is:
(24/94) is in Bin 3
(15/61) is in Bin 3
(71/76) is in Bin 4
(72/83) is in Bin 4
( 6/69) is in Bin 3
(13/86) is in Bin 3
(77/78) is outside
(52/57) is in Bin 4
(52/45) is in Bin 4
(62/94) is in Bin 4
(46/82) is in Bin 4
(43/74) is in Bin 4
(31/56) is in Bin 4
(35/70) is in Bin 4
(41/94) is in Bin 4
Note you still should decide how to define bins when points have x-coord=0 - at the moment they're equal to outside, as on_left
and on_right
both do not feel responsible for them...
PS: Thanks to @ImportanceOfBeingErnest for the hint to the necessary transformation: https://stackoverflow.com/a/49112347/8300135
Note: for all the following EDITS you'll need to import numpy as np
EDIT:
Function for counting the bin distribution per X, Y
array input:
def bin_counts(X, Y):
bc = dict()
E = Oval.contains_points(ax.transData.transform(np.array([X, Y]).T), 0)
E_l = ov1.contains_points(ax.transData.transform(np.array([X, Y]).T), 0)
E_r = ov2.contains_points(ax.transData.transform(np.array([X, Y]).T), 0)
L = np.array(X) < 0
R = np.array(X) > 0
bc[1] = np.sum(E & E_l)
bc[2] = np.sum(E & L & ~E_l)
bc[3] = np.sum(E & R & ~E_r)
bc[4] = np.sum(E & E_r)
return bc
Will lead to this result:
bin_counts(X, Y)
Out: {1: 0, 2: 0, 3: 4, 4: 10}
EDIT2: many rows in two 2D-arrays for X and Y:
np.random.seed(42)
X = np.random.randint(-80, 80, size=(100, 10))
Y = np.random.randint(0, 120, size=(100, 10))
looping over all the rows:
for xr, yr in zip(X, Y):
print(bin_counts(xr, yr))
result:
{1: 1, 2: 2, 3: 6, 4: 0}
{1: 1, 2: 0, 3: 4, 4: 2}
{1: 5, 2: 2, 3: 1, 4: 1}
...
{1: 3, 2: 2, 3: 2, 4: 0}
{1: 2, 2: 4, 3: 1, 4: 1}
{1: 1, 2: 1, 3: 6, 4: 2}
EDIT3: for returning not the number of points in each bin, but an array with four arrays containing the x,y-coordinates of the points in each bin, use the following:
X = [24,15,71,72,6,13,77,52,52,62,46,43,31,35,41]
Y = [94,61,76,83,69,86,78,57,45,94,82,74,56,70,94]
def bin_points(X, Y):
X = np.array(X)
Y = np.array(Y)
E = Oval.contains_points(ax.transData.transform(np.array([X, Y]).T), 0)
E_l = ov1.contains_points(ax.transData.transform(np.array([X, Y]).T), 0)
E_r = ov2.contains_points(ax.transData.transform(np.array([X, Y]).T), 0)
L = X < 0
R = X > 0
bp1 = np.array([X[E & E_l], Y[E & E_l]]).T
bp2 = np.array([X[E & L & ~E_l], Y[E & L & ~E_l]]).T
bp3 = np.array([X[E & R & ~E_r], Y[E & R & ~E_r]]).T
bp4 = np.array([X[E & E_r], Y[E & E_r]]).T
return [bp1, bp2, bp3, bp4]
print(bin_points(X, Y))
[array([], shape=(0, 2), dtype=int32), array([], shape=(0, 2), dtype=int32), array([[24, 94],
[15, 61],
[ 6, 69],
[13, 86]]), array([[71, 76],
[72, 83],
[52, 57],
[52, 45],
[62, 94],
[46, 82],
[43, 74],
[31, 56],
[35, 70],
[41, 94]])]
...and again, for applying this to the big 2D-arrays, just iterate over them:
np.random.seed(42)
X = np.random.randint(-100, 100, size=(100, 10))
Y = np.random.randint(-40, 140, size=(100, 10))
bincol = ['r', 'g', 'b', 'y', 'k']
for xr, yr in zip(X, Y):
for i, binned_points in enumerate(bin_points(xr, yr)):
ax.scatter(*binned_points.T, c=bincol[i], marker='o' if i<4 else 'x')
This is a version where I sort it into ellipses. As the OP is using simple geometric shapes one can test this with a simple formula, i.e. not "asking" the patch. I generalized it for n arcs with the small disadvantage that bin numbering is not from left to right, but this can be taken care of elsewhere. Output is of type
[ [ [x,y], [x,y],...], ... ]
i.e. a list of x,y for each bins. Numbering here is from -3 to 3 though, with 0 being outside.
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
def in_ellipse( xy, x0y0ab):
x, y = xy
x0, y0 = x0y0ab[0]
a = x0y0ab[1]/2. ## as the list of ellipses takes width and not semi axis
b = x0y0ab[2]/2.
return ( x - x0 )**2 / a**2+ ( y - y0 )**2 / b**2 < 1
def sort_into_bins( xy, mainE, eList ):
binCntr = 0
xyA = (np.abs(xy[0]),xy[1]) ## all positive
if in_ellipse( xyA, mainE ):
binCntr +=1
for ell in eList:
if in_ellipse( xyA, ell ):
break
binCntr +=1
binCntr=np.copysign( binCntr, xy[0] )
return int( binCntr )
X = 200 * np.random.random(150) - 100
Y = 140 * np.random.random(150) - 70 + 60
fig, ax = plt.subplots()
ax.set_xlim(-100,100)
ax.set_ylim(-40,140)
ax.grid(False)
BIN_23_X = 0
mainEllipse = [ np.array([0, 60]), 160, 130 ]
allEllipses = [ [ np.array([60,60]), 70., 110. ], [ np.array([60,60]), 100, 160 ] ]
Halfway = mpl.lines.Line2D((BIN_23_X,BIN_23_X), (0,125), color = '#808080', lw = 1.5, alpha = 0.8, zorder = 1)
Oval = mpl.patches.Ellipse( mainEllipse[0], mainEllipse[1], mainEllipse[2], lw = 3, edgecolor = '#808080', facecolor = '#808080', alpha = 0.2)
ax.add_patch(Oval)
ax.add_line(Halfway)
for ell in allEllipses:
arc = mpl.patches.Arc( ell[0] , ell[1], ell[2], angle = 0, color = '#808080', lw = 2, linestyle=':')
ax.add_patch( arc )
arc = mpl.patches.Arc( ell[0] * np.array([ -1, 1 ]), ell[1], ell[2], angle = 0, color = '#808080', lw = 2, linestyle=':')
ax.add_patch( arc )
binDict = dict()
for x,y in zip(X,Y):
binDict[( x,y)]=sort_into_bins( (x,y), mainEllipse, allEllipses )
rowEval=[]
for s in range(-3,4):
rowEval+=[[]]
for key, val in binDict.iteritems():
rowEval[ val + 3 ]+=[key]
for s in range(-3,4):
plt.scatter( *zip( *rowEval[ s + 3 ] ) )
plt.show()
showing
Note that I used the fact of symmetry with respect to x=0. If the ellipses are shifted with respect to x the code has to be modified a little. Also note that the order in which the ellipses are provided matters!