How to efficiently get 10% of random numbers, then 10% of remaining 90 etc untill all points allocated

问题

This is what I want to do :

I have 300 000 points.

I want 10% of the points.

I then want 10% of the remaining 90% of points.

I then want 10% of the remaining 81% of points

I then want 10% of the remaining 73% of points

etc until i'm finished with all the points.

Is this the fastest way of doing it:

#all the points
s = np.arange(len(c_list)) 
np.random.shuffle(s)

#first 10%
s1 = np.arange(len(c_list)*10/100) 
np.random.shuffle(s1)

k = s1

while len(k)<len(s) :




       r = [x for x in s if x not in k]#get the remaining points
       r = r[0:len(r)*10/100] #third cluster
       s2 = r #4th cluster,#5th,6th,7th cluster etc , here i go through each point in this cluster  if i find another 1 point with c value of 1 in a certain radius i delete the point ,but if i dont i assign its c value a 1
       k = np.concatenate((k, s2))

here is my actual full point cloud simplification algorithm

from sklearn.neighbors import NearestNeighbors
import numpy as np


###Plane Fit Function
def fitPlaneEigen(XYZ):

    #Get the covar matrix
    average=sum(XYZ)/XYZ.shape[0]
    b = np.transpose(XYZ - average)
    cov=np.cov(b)

    #Get eigen val and vec
    e_val,e_vect = np.linalg.eigh(cov)

    #diagonlize eigen vals
##    print 'eigenvalues'
    e_val_d = np.diag(e_val)
##    print e_val_d 

    #find min eigen val
    h = np.rank(min(e_val))

    #Ffind normal
    norm =  e_vect[:,h]


    #calc curvature
    curvature = e_val[0]/(e_val[0]+e_val[1]+e_val[2])


    return curvature #return curvature


###Input point cloud and add a colum for information content

f_name = '10 scan rabit'
c_list = np.genfromtxt(str(f_name)+'.txt',autostrip=True)
##
##c_list = np.array([[-0.0369122 ,  0.12751199 , 0.00276757],
## [-0.0398624 ,  0.128204  ,  0.00299348],
## [-0.0328896  , 0.12613    , 0.00300653],
## [-0.0396095 ,  0.12667701 ,-0.00334699],
## [-0.0331765,   0.12681  ,   0.00839958],
## [-0.0400911   ,0.128618  ,  0.00909496],
## [-0.0328901  , 0.124518 ,  -0.00282055]])

##XYZ = np.random.randn(100, 3)
##c_list = XYZ

c_list = np.hstack((c_list, np.zeros((c_list.shape[0], 1), dtype=c_list.dtype))) #add another column to our coordinate list for the informaton content
c_list = np.hstack((c_list, np.zeros((c_list.shape[0], 1), dtype=c_list.dtype))) #add another column to our coordinate list for the keep/delete
###Assign information content to each point

#Determine the neighbourhood at each point, fit a plane and work out curvature at each point.

neigh = NearestNeighbors(3) #this means 7 points in neighbourhood
neigh.fit(c_list)

for i in range(0,len(c_list)):
    print i*100/len(c_list)
    d = neigh.kneighbors(c_list[i])

    y = np.zeros((1,3)) #change brackets, to many maybe?

    #add coordinates of neighbours to array

    for c in range(1,3): #we do not want to include the first neighbour , this is because its our original point
          f = d[1][0][c]
          g = c_list[f] #get coordinates

          y = np.vstack([y,[g[0],g[1],g[2]]])    

    b = fitPlaneEigen(y) #fit plane

    c_list[i][3] = b 

###Simplify


#get the max eigen value
eig_max = np.max(c_list[:][3])

eig_min = np.min(c_list[:][3])

#get the point distance for flat areas from user
flat_dist = 0.01
rad_dif = flat_dist/2

#get the straight line equation
grad = -(rad_dif/(eig_max-eig_min))
c  = -(grad)*(eig_max)
 #y=mx+c

##y=rad_dif
##x = (y-c)/grad
#y = grad*x+c
##print eig_min
##print x
##    

##np.random.shuffle(XYZ)
##points.shape = (10,-1) + points.shape[1:]



##l3 = [x for x in l1 if x not in l2]
#all the points
##s = np.random.choice(len(c_list),size = len(c_list),replace = False)
s = np.arange(len(c_list))
np.random.shuffle(s)


#first seed points - keep - set 1 - 10%
##s1 = np.random.choice(s,size = len(c_list)*10/100,replace = False)

s1 = np.arange(len(c_list)*10/100)
np.random.shuffle(s1)

for i in range(0,len(s1)):
    p =  s1[i]
    c_list[p][4] =1

#remaining points
##r = [x for x in s if x not in s1]
##
##s2 = np.random.choice(r,size = len(r)*10/100,replace = False)
##
##for i in range (0,len(s2)):
##
##    p =  s1[i]
##    c_list[p][4] =1


k = s1

while len(k)<len(s) :

    #keep points

    print len(s)-len(k) 
    r = [x for x in s if x not in k]


    r = r[0:len(r)*10/100]
    #s2 = np.random.choice(r,size = len(r)*10/100,replace = False)
    s2 = r
    for i in range (0,len(s2)):

        print i*100/len(s2)

        p =  s2[i]
        x = c_list[p][3]
        rad = grad*x+c

        neigh = NearestNeighbors(radius=rad)
        neigh.fit(c_list[:,0:3])

        tt= neigh.radius_neighbors(c_list[p,0:3]) #fit xyz check this
        n = [] #empty list to store scalars of neighbouring points
        for i in range(0,len(tt[1][0])):
            v = tt[1][0][i] #neighbouring point index
            n.append(c_list[v][4]) 

        if np.sum(n) < 1:
            c_list[p][4] =1   

    if len(s2)<10:#i.e only 10 points left,keep them all

        for i in range (0,len(s2)):
            p =  s2[i]
            c_list[p][4] =1
        break
    else:
        k = np.concatenate((k, s2))



np.savetxt('letssee.txt',c_list)

回答1:

I think you can simplify things quite a bit:

# You would have n = 300000 and k = 0.1, changed for a simpler sample output
n = 10
c_list = np.random.rand(n)
k = 0.5

take = []
while n > 1:
    # round up to always take at least 1 item
    take.append(int(np.ceil(n * k)))
    n -= take[-1]
# You can skip the copying if you don't mind shuffling c_list
c_copy = c_list.copy()
np.random.shuffle(c_copy)
groups = np.split(c_copy, np.cumsum(take))

>>> c_list
array([ 0.11444327,  0.82500303,  0.03582646,  0.90688504,  0.98204763,
        0.34391556,  0.89169497,  0.30899009,  0.50246339,  0.37812873])
>>> c_copy
array([ 0.98204763,  0.34391556,  0.89169497,  0.11444327,  0.82500303,
        0.30899009,  0.37812873,  0.90688504,  0.50246339,  0.03582646])
>>> for group in groups: print group
... 
[ 0.98204763  0.34391556  0.89169497  0.11444327  0.82500303]
[ 0.30899009  0.37812873  0.90688504]
[ 0.50246339]
[ 0.03582646]

来源：https://stackoverflow.com/questions/18850866/how-to-efficiently-get-10-of-random-numbers-then-10-of-remaining-90-etc-until

标签

performance

random

numpy

point-clouds

simplification