基于python的贝叶斯分类算法(数据集为Iris_data)

匿名 (未验证) 提交于 2019-12-02 22:54:36
#coding:utf-8 import math  Iris_setosa_data=[] Iris_versicolor_data=[] Iris_virginica_data=[] #读取训练数据集,这里我将每种花取前45条数据,剩下的5条数据另外存入测试数据集 def read_train_data(filename):     f=open(filename,'r')     all_lines=f.readlines()     for line in all_lines[0:45]:         line=line.strip().split(',')         Iris_setosa_data.append(line[0:4])         #Iris_setosa_label+=1     for line in all_lines[51:95]:         line=line.strip().split(',')         Iris_versicolor_data.append(line[0:4])         #Iris_versicolor_label+=1     for line in all_lines[101:145]:         line=line.strip().split(',')         Iris_virginica_data.append(line[0:4])         #Iris_virginica_label+=1     return Iris_setosa_data,Iris_versicolor_data,Iris_virginica_data  test_data=[] #读取测试数据集 def read_test_data(testname):     f=open(testname,'r')     all_lines=f.readlines()     for line in all_lines[0:]:         line=line.strip().split(',')   #以逗号为分割符拆分列表         test_data.append(line)     return test_data  #计算均值和方差 def calculate_junzhi_and_fangcha(train_data):     x1_sum=0.0     x2_sum=0.0     x3_sum=0.0     x4_sum=0.0      for x in train_data: #计算各个特征的和         x1_sum+=float(x[0])         x2_sum+=float(x[1])         x3_sum+=float(x[2])         x4_sum+=float(x[3])         #print(x[0],x[1],x[2],x[3])     #计算样本在各个属性上取值的均值     u_x1=x1_sum/45     u_x2=x2_sum/45     u_x3=x3_sum/45     u_x4=x4_sum/45         k1=0.0     k2=0.0     k3=0.0     k4=0.0     #计算各类样本在第i个属性上的方差     for x in train_data:         k1+=(float(x[0])-u_x1)**2         k2+=(float(x[1])-u_x2)**2         k3+=(float(x[2])-u_x3)**2         k4+=(float(x[3])-u_x4)**2     variance_x1=k1/45     variance_x2=k2/45     variance_x3=k3/45     variance_x4=k4/45      return u_x1,u_x2,u_x3,u_x4,variance_x1,variance_x2,variance_x3,variance_x4  #计算每个属性估计条件概率     def calculate_P_xi_c(u_x1,u_x2,u_x3,u_x4,variance_x1,variance_x2,variance_x3,variance_x4,line_data):     p_x1_c=(1/math.sqrt(2*math.pi))*math.exp(-(float(line_data[0])-u_x1)**2/(2*variance_x1))     p_x2_c=(1/math.sqrt(2*math.pi))*math.exp(-(float(line_data[1])-u_x2)**2/(2*variance_x2))     p_x3_c=(1/math.sqrt(2*math.pi))*math.exp(-(float(line_data[2])-u_x3)**2/(2*variance_x3))     p_x4_c=(1/math.sqrt(2*math.pi))*math.exp(-(float(line_data[3])-u_x4)**2/(2*variance_x4))      return p_x1_c,p_x2_c,p_x3_c,p_x4_c        if __name__ == '__main__':     filename='iris_data.txt'     testname='iris_test_data.txt'     Iris_setosa_data,Iris_versicolor_data,Iris_virginica_data=read_train_data(filename)          #Iris_setosa种类的各个特征属性上的均值和方差     Iris_setosa_u_x1,Iris_setosa_u_x2,Iris_setosa_u_x3,Iris_setosa_u_x4,\     Iris_setosa_variance_x1,Iris_setosa_variance_x2,Iris_setosa_variance_x3,\     Iris_setosa_variance_x4=calculate_junzhi_and_fangcha(Iris_setosa_data)     #Iris_versicolor种类的各个特征属性上的均值和方差     Iris_versicolor_u_x1,Iris_versicolor_u_x2,Iris_versicolor_u_x3,Iris_versicolor_u_x4,\     Iris_versicolor_variance_x1,Iris_versicolor_variance_x2,Iris_versicolor_variance_x3,\     Iris_versicolor_variance_x4=calculate_junzhi_and_fangcha(Iris_versicolor_data)     #Iris_virginica种类的各个特征属性上的均值和方差     Iris_virginica_u_x1,Iris_virginica_u_x2,Iris_virginica_u_x3,Iris_virginica_u_x4,\     Iris_virginica_variance_x1,Iris_virginica_variance_x2,Iris_virginica_variance_x3,\     Iris_virginica_variance_x4=calculate_junzhi_and_fangcha(Iris_virginica_data)          '''开始测试'''     test_data=read_test_data(testname)     #print ('test_data',test_data)     #估计类先验概率     p1=len(Iris_setosa_data)/(len(Iris_versicolor_data)+len(Iris_virginica_data)+len(Iris_setosa_data))     p2=len(Iris_versicolor_data)/(len(Iris_versicolor_data)+len(Iris_virginica_data)+len(Iris_setosa_data))     p3=len(Iris_virginica_data)/(len(Iris_versicolor_data)+len(Iris_virginica_data)+len(Iris_setosa_data))     for x in test_data:         #在Iris_setosa种类上的各个特征属性的条件概率         P_x1_Iris_setosa,P_x2_Iris_setosa,P_x3_Iris_setosa,P_x4_Iris_setosa=calculate_P_xi_c(Iris_setosa_u_x1,Iris_setosa_u_x2,Iris_setosa_u_x3,Iris_setosa_u_x4,\         Iris_setosa_variance_x1,Iris_setosa_variance_x2,Iris_setosa_variance_x3,Iris_setosa_variance_x4,x)         #print(P_x1_Iris_setosa,P_x2_Iris_setosa,P_x3_Iris_setosa,P_x4_Iris_setosa)                  #在Iris_versicolor种类上的各个特征属性的条件概率         P_x1_Iris_versicolor,P_x2_Iris_versicolor,P_x3_Iris_versicolor,P_x4_Iris_versicolor=calculate_P_xi_c(Iris_versicolor_u_x1,Iris_versicolor_u_x2,Iris_versicolor_u_x3,Iris_versicolor_u_x4,\         Iris_versicolor_variance_x1,Iris_versicolor_variance_x2,Iris_versicolor_variance_x3,Iris_versicolor_variance_x4,x)         #print(P_x1_Iris_versicolor,P_x2_Iris_versicolor,P_x3_Iris_versicolor)          #在Iris_virginica种类上的各个特征属性的条件概率         P_x1_Iris_virginica,P_x2_Iris_virginica,P_x3_Iris_virginica,P_x4_Iris_virginica=calculate_P_xi_c(Iris_virginica_u_x1,Iris_virginica_u_x2,Iris_virginica_u_x3,Iris_virginica_u_x4,\         Iris_virginica_variance_x1,Iris_virginica_variance_x2,Iris_virginica_variance_x3,Iris_virginica_variance_x4,x)         #print(P_x1_Iris_virginica,P_x2_Iris_virginica,P_x3_Iris_virginica,P_x4_Iris_virginica)          #计算各个种类上的后验概率         P_Iris_setosa=p1*P_x1_Iris_setosa*P_x2_Iris_setosa*P_x3_Iris_setosa*P_x4_Iris_setosa         #print( P_Iris_setosa)         P_Iris_versicolor=p2*P_x1_Iris_versicolor*P_x2_Iris_versicolor*P_x3_Iris_versicolor*P_x4_Iris_versicolor         #print( P_Iris_versicolor)         P_Iris_virginica=p3*P_x1_Iris_virginica*P_x2_Iris_virginica*P_x3_Iris_virginica*P_x4_Iris_virginica         #print( P_Iris_virginica)          if P_Iris_setosa>P_Iris_versicolor and P_Iris_setosa>P_Iris_virginica:             print(x[0],x[1],x[2],x[3],":这行数据属于Iris_setosa类")         if P_Iris_versicolor>P_Iris_setosa and P_Iris_versicolor>P_Iris_virginica:             print(x[0],x[1],x[2],x[3],":这行数据属于Iris_versicolor类")               if P_Iris_virginica>P_Iris_setosa and P_Iris_virginica>P_Iris_versicolor: print(x[0],x[1],x[2],x[3],":这行数据属于Iris_virginica类")
具体细节及数据集见我的github:https://github.com/Erikfather/bayes-python
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!