问题
This is what I want to do :
I have 300 000 points.
I want 10% of the points.
I then want 10% of the remaining 90% of points.
I then want 10% of the remaining 81% of points
I then want 10% of the remaining 73% of points
etc until i'm finished with all the points.
Is this the fastest way of doing it:
#all the points
s = np.arange(len(c_list))
np.random.shuffle(s)
#first 10%
s1 = np.arange(len(c_list)*10/100)
np.random.shuffle(s1)
k = s1
while len(k)<len(s) :
r = [x for x in s if x not in k]#get the remaining points
r = r[0:len(r)*10/100] #third cluster
s2 = r #4th cluster,#5th,6th,7th cluster etc , here i go through each point in this cluster if i find another 1 point with c value of 1 in a certain radius i delete the point ,but if i dont i assign its c value a 1
k = np.concatenate((k, s2))
here is my actual full point cloud simplification algorithm
from sklearn.neighbors import NearestNeighbors
import numpy as np
###Plane Fit Function
def fitPlaneEigen(XYZ):
#Get the covar matrix
average=sum(XYZ)/XYZ.shape[0]
b = np.transpose(XYZ - average)
cov=np.cov(b)
#Get eigen val and vec
e_val,e_vect = np.linalg.eigh(cov)
#diagonlize eigen vals
## print 'eigenvalues'
e_val_d = np.diag(e_val)
## print e_val_d
#find min eigen val
h = np.rank(min(e_val))
#Ffind normal
norm = e_vect[:,h]
#calc curvature
curvature = e_val[0]/(e_val[0]+e_val[1]+e_val[2])
return curvature #return curvature
###Input point cloud and add a colum for information content
f_name = '10 scan rabit'
c_list = np.genfromtxt(str(f_name)+'.txt',autostrip=True)
##
##c_list = np.array([[-0.0369122 , 0.12751199 , 0.00276757],
## [-0.0398624 , 0.128204 , 0.00299348],
## [-0.0328896 , 0.12613 , 0.00300653],
## [-0.0396095 , 0.12667701 ,-0.00334699],
## [-0.0331765, 0.12681 , 0.00839958],
## [-0.0400911 ,0.128618 , 0.00909496],
## [-0.0328901 , 0.124518 , -0.00282055]])
##XYZ = np.random.randn(100, 3)
##c_list = XYZ
c_list = np.hstack((c_list, np.zeros((c_list.shape[0], 1), dtype=c_list.dtype))) #add another column to our coordinate list for the informaton content
c_list = np.hstack((c_list, np.zeros((c_list.shape[0], 1), dtype=c_list.dtype))) #add another column to our coordinate list for the keep/delete
###Assign information content to each point
#Determine the neighbourhood at each point, fit a plane and work out curvature at each point.
neigh = NearestNeighbors(3) #this means 7 points in neighbourhood
neigh.fit(c_list)
for i in range(0,len(c_list)):
print i*100/len(c_list)
d = neigh.kneighbors(c_list[i])
y = np.zeros((1,3)) #change brackets, to many maybe?
#add coordinates of neighbours to array
for c in range(1,3): #we do not want to include the first neighbour , this is because its our original point
f = d[1][0][c]
g = c_list[f] #get coordinates
y = np.vstack([y,[g[0],g[1],g[2]]])
b = fitPlaneEigen(y) #fit plane
c_list[i][3] = b
###Simplify
#get the max eigen value
eig_max = np.max(c_list[:][3])
eig_min = np.min(c_list[:][3])
#get the point distance for flat areas from user
flat_dist = 0.01
rad_dif = flat_dist/2
#get the straight line equation
grad = -(rad_dif/(eig_max-eig_min))
c = -(grad)*(eig_max)
#y=mx+c
##y=rad_dif
##x = (y-c)/grad
#y = grad*x+c
##print eig_min
##print x
##
##np.random.shuffle(XYZ)
##points.shape = (10,-1) + points.shape[1:]
##l3 = [x for x in l1 if x not in l2]
#all the points
##s = np.random.choice(len(c_list),size = len(c_list),replace = False)
s = np.arange(len(c_list))
np.random.shuffle(s)
#first seed points - keep - set 1 - 10%
##s1 = np.random.choice(s,size = len(c_list)*10/100,replace = False)
s1 = np.arange(len(c_list)*10/100)
np.random.shuffle(s1)
for i in range(0,len(s1)):
p = s1[i]
c_list[p][4] =1
#remaining points
##r = [x for x in s if x not in s1]
##
##s2 = np.random.choice(r,size = len(r)*10/100,replace = False)
##
##for i in range (0,len(s2)):
##
## p = s1[i]
## c_list[p][4] =1
k = s1
while len(k)<len(s) :
#keep points
print len(s)-len(k)
r = [x for x in s if x not in k]
r = r[0:len(r)*10/100]
#s2 = np.random.choice(r,size = len(r)*10/100,replace = False)
s2 = r
for i in range (0,len(s2)):
print i*100/len(s2)
p = s2[i]
x = c_list[p][3]
rad = grad*x+c
neigh = NearestNeighbors(radius=rad)
neigh.fit(c_list[:,0:3])
tt= neigh.radius_neighbors(c_list[p,0:3]) #fit xyz check this
n = [] #empty list to store scalars of neighbouring points
for i in range(0,len(tt[1][0])):
v = tt[1][0][i] #neighbouring point index
n.append(c_list[v][4])
if np.sum(n) < 1:
c_list[p][4] =1
if len(s2)<10:#i.e only 10 points left,keep them all
for i in range (0,len(s2)):
p = s2[i]
c_list[p][4] =1
break
else:
k = np.concatenate((k, s2))
np.savetxt('letssee.txt',c_list)
回答1:
I think you can simplify things quite a bit:
# You would have n = 300000 and k = 0.1, changed for a simpler sample output
n = 10
c_list = np.random.rand(n)
k = 0.5
take = []
while n > 1:
# round up to always take at least 1 item
take.append(int(np.ceil(n * k)))
n -= take[-1]
# You can skip the copying if you don't mind shuffling c_list
c_copy = c_list.copy()
np.random.shuffle(c_copy)
groups = np.split(c_copy, np.cumsum(take))
>>> c_list
array([ 0.11444327, 0.82500303, 0.03582646, 0.90688504, 0.98204763,
0.34391556, 0.89169497, 0.30899009, 0.50246339, 0.37812873])
>>> c_copy
array([ 0.98204763, 0.34391556, 0.89169497, 0.11444327, 0.82500303,
0.30899009, 0.37812873, 0.90688504, 0.50246339, 0.03582646])
>>> for group in groups: print group
...
[ 0.98204763 0.34391556 0.89169497 0.11444327 0.82500303]
[ 0.30899009 0.37812873 0.90688504]
[ 0.50246339]
[ 0.03582646]
来源:https://stackoverflow.com/questions/18850866/how-to-efficiently-get-10-of-random-numbers-then-10-of-remaining-90-etc-until