class Multi_Item:
def __init__(self):
self.itemset = []
self.support = 0
def __str__(self):
return "{}:{}".format(self.itemset, self.support)
def set_support(self):
self.support += 1
#D = [[1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3], [1, 3], [1, 2, 3, 5], [1, 2, 3]]
D = [['M','O','N','K','E','Y'],['D','O','N','K','E','Y'],['M','A','K','E'],['M','U','C','K','Y'],['C','O','O','K','I','E']]
def create_C(D):
C = []
for item in D:
for i in item:
flag = False
index = Multi_Item()
index.itemset = i
if not C:
C.append(index)
else:
for i in range(len(C)):
if C[i].itemset == index.itemset:
C[i].set_support()
flag = True
break
if not flag:
index.set_support()
C.append(index)
return C
def find_frequent_l_itemsets(C, minsup):
data = C[:] #创建临时列表,不能写data=C,会指向相同地址
for item in C:
if item.support < minsup:
data.remove(item)
return data
def L2C(L, item_set_number=2):
item_set = []
for first in L:
for second in L:
temp = list(set(first.itemset).union(set(second.itemset)))
if len(temp) == item_set_number and temp not in item_set:
item_set.append(temp)
return item_set
#计算支持度
def calc_support(item_set, D):
data_set = []
for item in item_set:
index = Multi_Item()
for data in D:
index.itemset = item
if set(item) <= set(data):
index.set_support()
data_set.append(index)
return data_set
def apriori(min_sup):
C1 = create_C(D)
L = find_frequent_l_itemsets(C1, min_sup)
item_set_number = 2
while True:
C = L2C(L, item_set_number)
C = calc_support(C, D)
L = find_frequent_l_itemsets(C, min_sup)
item_set_number += 1
for i in range(len(L)):
if L[i].support != min_sup:
break
if i >= len(L) - 1:
return L
# C1 = create_C(D)
# for i in C1:
# print(i)
# print("*"*20)
# L = find_frequent_l_itemsets(C1, 2)
# for i in L:
# print(i)
# C2 = L2C(L)
# print("*"*20)
# for i in C2:
# print(i)
# print("*"*20)
# C2 = calc_support(C2, D)
# for i in C2:
# print(i)
# print("*"*20)
# L = find_frequent_l_itemsets(C2, 2)
# for i in L:
# print(i)
# print("*"*20)
# L2 = L2C(L, 3)
# for i in L2:
# print(i)
# print("*"*20)
# C3 = calc_support(L2, D)
# for i in C3:
# print(i)
# print("*"*20)
# for i in find_frequent_l_itemsets(C3, 2):
# print(i)
result = apriori(3)
for i in result:
print(i)
来源:CSDN
作者:Carpe_D1em
链接:https://blog.csdn.net/qq_35106907/article/details/103653660