1_info.py
# encoding: utf-8 import pandas as pd # 租房 基本信息 # 读取文件 df=dataframe df = pd.read_json("zufang.json") # print(df) # print(df.columns) # 使用pandas的describe方法,打印基本信息 print(df.describe()) # 按照区,分别统计个数 print(df["district"].value_counts()) # print('**************************') # # 二手房 基本信息 df = pd.read_json("ershoufang.json") print(df.describe()) # 分别统计个数 print(df["district"].value_counts())
2_pie_chart.py
# coding:utf-8 import numpy as np import pandas as pd import json import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from pylab import * mpl.rcParams['font.sans-serif'] = ['SimHei'] myfont = FontProperties( fname='/Users/seancheney/.matplotlib/mpl-data/fonts/ttf/SimHei.ttf') labels = '朝阳', '海淀', '昌平', '东城', '大兴', '西城', '丰台', '石景山', '通州', '顺义' df_zf = pd.read_json("ershoufang.json") chaoyang_count = df_zf['district'].value_counts()['朝阳'] haidian_count = df_zf['district'].value_counts()['海淀'] changping_count = df_zf['district'].value_counts()['昌平'] dongcheng_count = df_zf['district'].value_counts()['东城'] daxing_count = df_zf['district'].value_counts()['大兴'] xicheng_count = df_zf['district'].value_counts()['西城'] fengtai_count = df_zf['district'].value_counts()['丰台'] shijingshan_count = df_zf['district'].value_counts()['石景山'] tongzhou_count = df_zf['district'].value_counts()['通州'] shunyi_count = df_zf['district'].value_counts()['顺义'] sizes = [ chaoyang_count, haidian_count, changping_count, dongcheng_count, daxing_count, xicheng_count, fengtai_count, shijingshan_count, tongzhou_count, shunyi_count] explode = (0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0) plt.subplot(121) plt.pie( sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=-90) plt.axis('equal') plt.title("房屋出售分布", fontproperties=myfont) labels = '朝阳', '海淀', '昌平', '东城', '大兴', '西城', '丰台', '石景山', '通州', '顺义' df_zf = pd.read_json("zufang.json") chaoyang_count = df_zf['district'].value_counts()['朝阳'] haidian_count = df_zf['district'].value_counts()['海淀'] changping_count = df_zf['district'].value_counts()['昌平'] dongcheng_count = df_zf['district'].value_counts()['东城'] daxing_count = df_zf['district'].value_counts()['大兴'] xicheng_count = df_zf['district'].value_counts()['西城'] fengtai_count = df_zf['district'].value_counts()['丰台'] shijingshan_count = df_zf['district'].value_counts()['石景山'] tongzhou_count = df_zf['district'].value_counts()['通州'] labels = '朝阳', '海淀', '昌平', '东城', '大兴', '西城', '丰台', '石景山', '通州' sizes = [ chaoyang_count, haidian_count, changping_count, dongcheng_count, daxing_count, xicheng_count, fengtai_count, shijingshan_count, tongzhou_count] explode = (0.1, 0, 0, 0, 0, 0, 0, 0, 0) plt.subplot(122) plt.pie( sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=-90) plt.axis('equal') plt.title("房屋出租分布", fontproperties=myfont) plt.rc('font', family=['SimHei']) plt.show()
3_hist.py
import numpy as np import pandas as pd import json import matplotlib.pyplot as plt from pylab import * mpl.rcParams['font.sans-serif'] = ['SimHei'] df = pd.read_json("ershoufang.json") print(df.columns) unitprice_values = df.unitprice plt.hist(unitprice_values,bins=25) plt.xlim(0, 200000) plt.title(u"房屋出售每平米价格分布") plt.xlabel(u'价格(单位:万/平方米)') plt.ylabel(u'套数') plt.show()
4_ratio.py
# 售租比 import numpy as np import pandas as pd import matplotlib.pyplot as plt from pylab import * mpl.rcParams['font.sans-serif'] = ['SimHei'] district = ('西城', '石景山', '东城', '海淀', '丰台', '昌平', '大兴', '朝阳', '通州') # 读取租房数据 df_zf = pd.read_json("zufang.json") unitprice_zf = df_zf['price'] / df_zf['area'] df_zf['unitprice'] = unitprice_zf # print(df_zf) month_price = df_zf.groupby(by=['district']).sum( )['unitprice'] / df_zf["district"].value_counts() # print(month_price) # # 读取二手房数据 df_esf = pd.read_json("ershoufang.json") sell_price = df_esf.groupby(by=['district']).sum( )['unitprice'] / df_esf["district"].value_counts() # print(sell_price) xicheng_ratio = sell_price['西城'] / month_price['西城'] shijingshan_ratio = sell_price['石景山'] / month_price['石景山'] dongcheng_ratio = sell_price['东城'] / month_price['东城'] haidian_ratio = sell_price['海淀'] / month_price['海淀'] fengtai_ratio = sell_price['丰台'] / month_price['丰台'] changping_ratio = sell_price['昌平'] / month_price['昌平'] daxing_ratio = sell_price['大兴'] / month_price['大兴'] chaoyang_ratio = sell_price['朝阳'] / month_price['朝阳'] tongzhou_ratio = sell_price['通州'] / month_price['通州'] # # ratio = ( xicheng_ratio, shijingshan_ratio, dongcheng_ratio, haidian_ratio, fengtai_ratio, changping_ratio, daxing_ratio, chaoyang_ratio, tongzhou_ratio ) fig, ax = plt.subplots() y_pos = np.arange(len(district)) # performance = ratio ax.barh(y_pos, ratio, align='center', color='green', ecolor='black') ax.set_yticks(y_pos) ax.set_yticklabels(district) # ax.invert_yaxis() ax.set_xlabel('售租比(单位:月)') ax.set_title('各区房屋售租比') plt.show()
来源:https://www.cnblogs.com/hankleo/p/10809175.html