- 引入包,查看数据
import pandas as pd
import matplotlib.pyplot as plt
# 支持中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
read_df = pd.read_csv('winequality-red.csv',sep=';')
white_df = pd.read_csv('winequality-white.csv',sep=';')
white_df.head()
read_df.head()
- 新增颜色列
import numpy as np
# 为红葡萄酒数据框创建颜色数组
color_red = np.repeat(0,read_df.shape[0])
# 为白葡萄酒数据框创建颜色数组
color_white = np.repeat(1,white_df.shape[0])
read_df['color'] = color_red
white_df['color'] = color_white
- 合并红白葡萄酒数据集
wine_df = read_df.append(white_df)
wine_df.info()
wine_df.to_csv('winequality_edited.csv',index=False)
wine_df.to_csv('winequality_edited1.csv')
- 可视化 固定酸度
# 固定酸度
wine_df['fixed acidity'].plot(kind='hist');
# 总二氧化硫
wine_df['total sulfur dioxide'].plot(kind='hist');
# pH 值
wine_df['pH'].plot(kind='hist');
# 酒精度
wine_df['alcohol'].plot(kind='hist');
某种类型的葡萄酒(红葡萄酒或白葡萄酒)是否代表更高的品质?
# 0 红色葡萄酒平均质量 5.636023 1 白色葡萄酒平均质量 5.877909
wine_df.groupby('color').mean()['quality']
- 查看中位数的值
wine_df.describe()
def dtype(x):
if x <3.11:
return '高'
elif x <= 3.21:
return '中等偏高'
elif x <= 3.32:
return '中'
else:
return '低'
wine_df['temp'] = wine_df['pH'].map(lambda x:dtype(x))
wine_df.groupby('temp').mean()['quality']
问题
1: 缺陷是否集中在某些区站 (各个区站的缺陷数量)
2: 最常出现缺陷的是哪种设备 (每种设备的缺陷数量统计)
3: 缺陷最集中的区站中最常出现缺陷的是哪种设备
4: 最常出现缺陷的设备经常出现哪种缺陷
- 引入包
import numpy as np
import pandas as pd
# % matplotlib inline
import pymongo
import matplotlib.pyplot as plt
# 支持中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
myclient = pymongo.MongoClient("mongodb://localhost:13300/")
bj_itpsdsc = myclient.get_database('bj')
phm_analysis_proproblem = bj_itpsdsc.phm_analysis_proproblem
- 构造设备类型字典
devCodeMapDF = pd.read_excel('设备种类编码映射.xlsx')
devCodeMap={}
for i in range(devCodeMapDF.shape[0]):
devCodeMap[devCodeMapDF.iloc[i]['种类编码']]=devCodeMapDF.iloc[i]['设备种类']
- mongodb 查出数据,根据字典映射设备类型
proproblem = phm_analysis_proproblem.find({})
proproblemDF=pd.DataFrame(list(proproblem))
proproblemDF['devCodeMap'] = proproblemDF['devCode'].map(lambda x: devCodeMap[int(x)])
- 设备种类 对应缺陷数
devCodeCounts = proproblemDF['devCodeMap'].value_counts()
devCodeCounts
import matplotlib
fig =proproblemDF['devCode'].value_counts().plot(title='缺陷设备种类分类数量直方图',kind='bar')
fig.get_figure().savefig('缺陷设备种类分类数量直方图(无中文).png')
fig = proproblemDF['devCodeMap'].value_counts().plot(title='缺陷设备种类分类数量直方图', kind='bar',figsize=(16,6))
fig.get_figure().savefig('缺陷设备种类分类数量直方图.png')
fig = proproblemDF['devCodeMap'].value_counts().plot(title='缺陷设备种类分类饼图', kind='pie',figsize=(10,9));
fig.get_figure().savefig('缺陷设备种类分类饼图.png')
来源:CSDN
作者:宁缺100
链接:https://blog.csdn.net/qq_24434491/article/details/103915463