#===================================================================== # data and parameters | #===================================================================== db = {10:('a','c','d'), 20:('b','c','e'), 30:('a','b','c','e'), 40:('b','e')} db1 =[set(t) for t in db.values()] min_sup = 2 #===================================================================== # main function | #===================================================================== #--------------------------------------------------------------------- # scan DB once to get frequent 1-itemset | #--------------------------------------------------------------------- #count table = {} for t in db.values(): for k in t: table[k] = table.get(k,0)+1 #remember~! #find frequent ones ntable = {} for t in table: if table[t]>=min_sup: ntable[t]=table[t] #--------------------------------------------------------------------- # Generate length (k+1) candidate itemsets | #--------------------------------------------------------------------- nlist = ntable.keys() q = 1 while(len(nlist)>0): #--------------------------------------------------------------------- # #Step 1: self-joining | #--------------------------------------------------------------------- print "-"*50 print "this is the ",q,"th iteration." q += 1 candidates = [] print "item list: ", nlist for k1 in range(len(nlist)): for k2 in range(k1+1,len(nlist)): a = nlist[k1] b = nlist[k2] if a[:-1]==b[:-1]: c='' if a[-1]<b[-1]: c=a[:-1]+a[-1]+b[-1] else: c=a[:-1]+b[-1]+a[-1] candidates.append(c) #--------------------------------------------------------------------- # #Step 2: pruning | #--------------------------------------------------------------------- print 'candidates(after self-joining):',candidates cp = []#candidates pruned for c in candidates: flag = True for k in range(len(c)): sub = c[:k]+c[k+1:] if sub not in ntable.keys(): flag = False break if flag: m = set() for n in c: m.add(n) cp.append(m) print 'candidates(after pruning):',cp #--------------------------------------------------------------------- # Test the candidates against DB | #--------------------------------------------------------------------- def setToStr(s): """ change set to ordered string """ l = sorted(list(s)) r = '' for t in l: r+=t return r table = {} for c in cp: for t in db1: if c.issubset(t): table[setToStr(c)] = table.get(setToStr(c),0)+1 print 'candidates with frequency:',table ntable = {} for t in table: if table[t]>=min_sup: ntable[t]=table[t] print 'current frequent pattern:',ntable nlist = ntable.keys()
注意:这个版本的apriori算法只是初级版本,输入'a','b'单个字符,对于“123”,‘234’,不能用。
版权声明:本文为博主原创文章,未经博主允许不得转载。
来源:https://www.cnblogs.com/huaweiquankaiyueweiyuan/p/4964424.html