Python Apriori

夙愿已清 提交于 2019-12-22 17:05:56
class Multi_Item:
    def __init__(self):
        self.itemset = []
        self.support = 0
    def __str__(self):
        return "{}:{}".format(self.itemset, self.support)
    def set_support(self):
        self.support += 1

#D = [[1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3], [1, 3], [1, 2, 3, 5], [1, 2, 3]]
D = [['M','O','N','K','E','Y'],['D','O','N','K','E','Y'],['M','A','K','E'],['M','U','C','K','Y'],['C','O','O','K','I','E']]
def create_C(D):

    C = []

    for item in D:
        for i in item:

            flag = False

            index = Multi_Item()
            index.itemset = i

            if not C:
                C.append(index)
            else:
                for i in range(len(C)):
                    if C[i].itemset == index.itemset:
                        C[i].set_support()
                        flag = True
                        break

                if not flag:
                    index.set_support()
                    C.append(index)
    return C

def find_frequent_l_itemsets(C, minsup):


    data = C[:] #创建临时列表,不能写data=C,会指向相同地址
    for item in C:
        if item.support < minsup:
            data.remove(item)

    return data


def L2C(L, item_set_number=2):

    item_set = []


    for first in L:
        for second in L:
            temp = list(set(first.itemset).union(set(second.itemset)))
            if len(temp) == item_set_number and temp not in item_set:
                item_set.append(temp)
    return item_set


#计算支持度
def calc_support(item_set, D):

    data_set = []

    for item in item_set:
        index = Multi_Item()
        for data in D:
            index.itemset = item
            if set(item) <= set(data):
                index.set_support()

        data_set.append(index)

    return data_set

def apriori(min_sup):

    C1 = create_C(D)

    L = find_frequent_l_itemsets(C1, min_sup)

    item_set_number = 2

    while True:

        C = L2C(L, item_set_number)
        C = calc_support(C, D)
        L = find_frequent_l_itemsets(C, min_sup)
        item_set_number += 1

        for i in range(len(L)):
            if L[i].support != min_sup:
                break

        if i >= len(L) - 1:

            return L

# C1 = create_C(D)
# for i in C1:
#     print(i)
# print("*"*20)
# L = find_frequent_l_itemsets(C1, 2)
# for i in L:
#     print(i)
# C2 = L2C(L)
# print("*"*20)
# for i in C2:
#     print(i)
# print("*"*20)
# C2 = calc_support(C2, D)
# for i in C2:
#     print(i)
# print("*"*20)
# L = find_frequent_l_itemsets(C2, 2)
# for i in L:
#     print(i)
# print("*"*20)
# L2 = L2C(L, 3)
# for i in L2:
#     print(i)
# print("*"*20)
# C3 = calc_support(L2, D)
# for i in C3:
#     print(i)
# print("*"*20)
# for i in find_frequent_l_itemsets(C3, 2):
#     print(i)

result = apriori(3)

for i in result:
    print(i)

 

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!