import pandas as pd t=pd.DataFrame(pd.read_excel('C:\\Users\\ASUS\\Desktop\\lw\\python高级设计test\\数据文件\\titanic.xlsx')) s=t['survived'].value_counts() print('存活人数为{}\n死亡人数为{}'.format(s[0],s[1]))
s=t['sex'].value_counts() print('male人数为{}\nfemale人数为{}'.format(s[0],s[1]))
a = 0 b = 0 for i in t.index: if t['alive'][i] == 'yes': if t['sex'][i] == 'male': a += 1 elif t['sex'][i] == 'female': b += 1 print("男的获救人数为{}\n女的获救人数为{}".format(a, b))
print(t['class'].value_counts())
t = pd.DataFrame(pd.read_excel(file_path)) a = t[['survived', 'pclass']] print(a.corr())
print(t.boxplot(['fare'], ['pclass']))
从图中可看出,船舱等级为1时票价范围较大,船舱等级为2,3时票价范围相对最小