basic apriori method | 易学教程

#=====================================================================
#                    data and parameters                             |
#=====================================================================
db = {10:('a','c','d'),
      20:('b','c','e'),
      30:('a','b','c','e'),
      40:('b','e')}
db1 =[set(t) for t in db.values()]
min_sup = 2
#=====================================================================
#                    main function                                   |
#=====================================================================
#---------------------------------------------------------------------
#                    scan DB once to get frequent 1-itemset         |
#---------------------------------------------------------------------
#count
table = {}
for t in db.values():
    for k in t:
        table[k] = table.get(k,0)+1 #remember~!
#find frequent ones
ntable = {}
for t in table:
    if table[t]>=min_sup:
        ntable[t]=table[t]
#---------------------------------------------------------------------
#                    Generate length (k+1) candidate itemsets       |
#---------------------------------------------------------------------
nlist = ntable.keys()
q = 1
while(len(nlist)>0):
    #---------------------------------------------------------------------
    #                    #Step 1: self-joining                           |
    #---------------------------------------------------------------------
    print "-"*50
    print "this is the ",q,"th iteration."
    q += 1
    candidates = []
    print "item list: ", nlist
    
    for k1 in range(len(nlist)):
        for k2 in range(k1+1,len(nlist)):
            a = nlist[k1]
            b = nlist[k2]
            if a[:-1]==b[:-1]:
                c=''
                if a[-1]<b[-1]:
                    c=a[:-1]+a[-1]+b[-1]
                else:
                    c=a[:-1]+b[-1]+a[-1]
                candidates.append(c)
    #---------------------------------------------------------------------
    #                    #Step 2: pruning                                |
    #---------------------------------------------------------------------
    print 'candidates(after self-joining):',candidates
    cp = []#candidates pruned
    for c in candidates:
        flag = True
        for k in range(len(c)):
            sub = c[:k]+c[k+1:]
            if sub not in ntable.keys():
                flag = False
                break
        if flag:
            m = set()
            for n in c:
                m.add(n)
            cp.append(m)
    print 'candidates(after pruning):',cp
    #---------------------------------------------------------------------
    #                    Test the candidates against DB                 |
    #---------------------------------------------------------------------
    def setToStr(s):
        """
        change set to ordered string
        """
        l = sorted(list(s))
        r = ''
        for t in l:
            r+=t
        return r
    
    table = {}
    for c in cp:
        for t in db1:
            if c.issubset(t):
                table[setToStr(c)] = table.get(setToStr(c),0)+1
    print 'candidates with frequency:',table
    
    ntable = {}
    for t in table:
        if table[t]>=min_sup:
            ntable[t]=table[t]
    print 'current frequent pattern:',ntable
    nlist = ntable.keys()
注意：这个版本的apriori算法只是初级版本，输入'a','b'单个字符，对于“123”，‘234’，不能用。
来源：https://www.cnblogs.com/huaweiquankaiyueweiyuan/p/4964424.html
标签
apriori
table