In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties # 修改字体
最简单的图表¶
In [3]:
x = [5,7,2,10]
plt.plot(x) ### 画折线图,传入一个值的话,这个值就是y轴值
plt.show()
In [5]:
x = [5,7,2,10]
y = [3,9,10,5]
plt.plot(x,y) ### 画折线图,传入两个值的话,第一个代表x值,第二个代表y值。
plt.show()
In [13]:
x = [1,3,6,9]
y = [12,5,10,20]
font=FontProperties(fname='C:\Windows\Fonts\simhei.ttf')
plt.figure(figsize=(10,6)) #### 设置画板大小,第一个值表示宽度,第二个值表示高度。
plt.title('两个参数的折线图',fontproperties=font, fontsize=20, color='red') ### 设置标题
plt.xlabel('x轴', fontproperties=font,fontsize=15, color='red') ### 设置 x轴的说明
plt.ylabel('y轴', fontproperties=font,fontsize=20, color='blue') ### 设置 y 轴的说明
plt.plot(x, y)
plt.show()
#### 存在的问题:1.中文乱码
解决方案一:修改配置文件¶
matplotlib 从配置文件 matplotlibrc 中读取配置,字体相关内容也在其中。查询当前matplotlibrc 所在目录,可以用 get_configdir()函数:
通常存放位置:lib\site-packages\matplotlib\mpl-data\matplotlibrc
In [ ]:
import matplotlib
matplotlib.get_configdir()
涉及到字体部分的设置内容为:
In [37]:
#font.family : sans-serif
#font.style : normal
#font.variant : normal
#font.weight : normal
#font.stretch : normal
## note that font.size controls default text sizes. To configure
## special text sizes tick labels, axes, labels, title, etc, see the rc
## settings for axes and ticks. Special text sizes can be defined
## relative to font.size, using the following values: xx-small, x-small,
## small, medium, large, x-large, xx-large, larger, or smaller
#font.size : 10.0
#font.serif : DejaVu Serif, Bitstream Vera Serif, Computer Modern Roman, New Century Schoolbook, Century Schoolbook L, Utopia, ITC Bookman, Bookman, Nimbus Roman No9 L, Times New Roman, Times, Palatino, Charter, serif
#font.sans-serif : DejaVu Sans, Bitstream Vera Sans, Computer Modern Sans Serif, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif
#font.cursive : Apple Chancery, Textile, Zapf Chancery, Sand, Script MT, Felipa, cursive
#font.fantasy : Comic Sans MS, Chicago, Charcoal, ImpactWestern, Humor Sans, xkcd, fantasy
#font.monospace : DejaVu Sans Mono, Bitstream Vera Sans Mono, Computer Modern Typewriter, Andale Mono, Nimbus Mono L, Courier New, Courier, Fixed, Terminal, monospace
matplotlib 默认使用的 font.family 是 sans-serif,即无衬线字体,可以看到在font.sans-serif中设置的全部为西文字体,这里的设置和css样式文件中设置差不多,只需要添加系统存在的字体名称即可(需要注意的是,matplotlib:
只支持ttf格式的字体),设置时需要将注释符号#去除。
解决方案二:重载配置文件¶
In [38]:
import matplotlib as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['font.serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题,或者转换负号为字符串
In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [32]:
#### windows配置
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
In [33]:
x = [1,3,6,9]
y = [12,5,10,20]
plt.figure(figsize=(10,6)) #### 设置画板大小
plt.title('标题',fontsize=20, color='red') ### 设置标题
plt.xlabel('x轴',fontsize=15, color='blue') ### 设置 x轴的说明
plt.ylabel('y轴', fontsize=20, color='blue') ### 设置 y 轴的说明
plt.plot(x, y)
plt.show()
解决方案三:自定义字体¶
In [40]:
import numpy as np
import pylab as pl
import matplotlib.font_manager as fm
myfont = fm.FontProperties(fname=r'C:\Windows\Fonts\simhei.ttf') # 设置字体
t = np.arange(0.0,2.0 * np.pi,0.01) # 自变量取值范围
s = np.sin(t) # 计算正弦函数值
z = np.cos(t) # 计算余弦函数值
pl.plot(t,s,label='正弦')
pl.plot(t,z,label='余弦')
pl.xlabel('x-变量',fontproperties=myfont,fontsize=24) #设置标签
pl.ylabel('y-正弦余弦函数值',fontproperties=myfont,fontsize=24)
pl.title('sin-cos函数图像',fontproperties=myfont,fontsize=32) #图像标题
pl.legend(prop=myfont)
pl.show()
将电影产地和上映电影数量画成柱状图¶
In [20]:
import pandas as pd
movies = pd.read_csv('./douban_movie.csv') ### 只需要read_csv函数, 将csv中的所有数据读出
movies
Out[20]:
名字 | 投票人数 | 类型 | 产地 | 上映时间 | 时长 | 年代 | 评分 | 首映地点 | |
---|---|---|---|---|---|---|---|---|---|
0 | 肖申克的救赎 | 692795.0 | 剧情/犯罪 | 美国 | 1994-09-10 00:00:00 | 142.0 | 1994 | 9.6 | 多伦多电影节 |
1 | 控方证人 | 42995.0 | 剧情/悬疑/犯罪 | 美国 | 1957-12-17 00:00:00 | 116.0 | 1957 | 9.5 | 美国 |
2 | 美丽人生 | 327855.0 | 剧情/喜剧/爱情 | 意大利 | 1997-12-20 00:00:00 | 116.0 | 1997 | 9.5 | 意大利 |
3 | 阿甘正传 | 580897.0 | 剧情/爱情 | 美国 | 1994-06-23 00:00:00 | 142.0 | 1994 | 9.4 | 洛杉矶首映 |
4 | 霸王别姬 | 478523.0 | 剧情/爱情/同性 | 中国大陆 | 1993-01-01 00:00:00 | 171.0 | 1993 | 9.4 | 香港 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
38730 | 神学院 S | 46.0 | Adult | 法国 | 1905-06-05 00:00:00 | 58.0 | 1983 | 8.6 | 美国 |
38731 | 1935年 | 57.0 | 喜剧/歌舞 | 美国 | 1935-03-15 00:00:00 | 98.0 | 1935 | 7.6 | 美国 |
38732 | 血溅画屏 | 95.0 | 剧情/悬疑/犯罪/武侠/古装 | 中国大陆 | 1905-06-08 00:00:00 | 91.0 | 1986 | 7.1 | 美国 |
38733 | 魔窟中的幻想 | 51.0 | 惊悚/恐怖/儿童 | 中国大陆 | 1905-06-08 00:00:00 | 78.0 | 1986 | 8.0 | 美国 |
38734 | 列宁格勒围困之星火战役 Блокада: Фильм 2: Ленинградский ме... | 32.0 | 剧情/战争 | 苏联 | 1905-05-30 00:00:00 | 97.0 | 1977 | 6.6 | 美国 |
38735 rows × 9 columns
In [21]:
movies.groupby('产地').groups
Out[21]:
{'USA': Int64Index([ 704, 883, 1010, 1167, 2046, 2342, 2411, 2626, 2771, 3095, ... 33005, 33248, 34457, 34695, 34721, 34733, 35081, 35778, 36994, 37662], dtype='int64', length=113), '中国台湾': Int64Index([ 153, 185, 222, 333, 345, 385, 389, 391, 442, 588, ... 37902, 37939, 37947, 37995, 38024, 38049, 38470, 38489, 38624, 38722], dtype='int64', length=618), '中国大陆': Int64Index([ 4, 21, 29, 38, 45, 49, 62, 67, 119, 132, ... 38680, 38695, 38699, 38701, 38714, 38724, 38726, 38728, 38732, 38733], dtype='int64', length=3802), '中国香港': Int64Index([ 53, 92, 121, 182, 184, 187, 198, 202, 208, 209, ... 38357, 38421, 38495, 38497, 38498, 38554, 38571, 38691, 38697, 38709], dtype='int64', length=2852), '丹麦': Int64Index([ 266, 428, 1275, 1567, 1664, 1710, 2118, 2318, 2391, 2480, ... 37452, 37628, 37762, 37852, 37889, 38167, 38273, 38292, 38316, 38607], dtype='int64', length=198), '俄罗斯': Int64Index([ 152, 161, 414, 548, 803, 953, 1342, 1795, 1910, 2338, ... 37173, 37347, 37408, 37740, 38006, 38007, 38174, 38563, 38566, 38698], dtype='int64', length=221), '其他': Int64Index([ 116, 128, 169, 170, 190, 216, 218, 263, 288, 301, ... 38375, 38392, 38395, 38396, 38401, 38517, 38575, 38586, 38647, 38653], dtype='int64', length=1920), '加拿大': Int64Index([ 117, 141, 227, 232, 398, 409, 420, 478, 496, 529, ... 38019, 38133, 38180, 38225, 38328, 38522, 38601, 38636, 38679, 38684], dtype='int64', length=723), '印度': Int64Index([ 99, 142, 166, 289, 1036, 1532, 1544, 1680, 1681, 1811, ... 37772, 37773, 37775, 37776, 37936, 38022, 38115, 38346, 38465, 38623], dtype='int64', length=357), '墨西哥': Int64Index([ 763, 1618, 1709, 2269, 2301, 2317, 3051, 3489, 4114, 4238, ... 36949, 36976, 37693, 37777, 38079, 38458, 38599, 38600, 38646, 38681], dtype='int64', length=119), '巴西': Int64Index([ 776, 1568, 1866, 1948, 2711, 2727, 2784, 2892, 3296, 3498, ... 36690, 36756, 36929, 37159, 37273, 37653, 37734, 37779, 37780, 38299], dtype='int64', length=101), '德国': Int64Index([ 97, 148, 234, 261, 306, 344, 558, 561, 845, 960, ... 38296, 38297, 38311, 38518, 38556, 38560, 38578, 38592, 38606, 38608], dtype='int64', length=902), '意大利': Int64Index([ 2, 10, 24, 54, 56, 75, 89, 111, 135, 221, ... 38441, 38448, 38469, 38472, 38506, 38513, 38534, 38651, 38692, 38711], dtype='int64', length=749), '日本': Int64Index([ 7, 8, 12, 14, 28, 34, 35, 40, 41, 42, ... 38656, 38660, 38666, 38668, 38670, 38686, 38688, 38689, 38690, 38712], dtype='int64', length=5053), '比利时': Int64Index([ 503, 620, 1012, 1068, 1564, 1634, 2321, 2336, 2537, 3179, ... 35831, 35918, 35998, 36099, 36211, 36829, 37268, 37726, 37879, 38200], dtype='int64', length=139), '法国': Int64Index([ 9, 23, 25, 33, 39, 51, 55, 96, 112, 127, ... 38595, 38597, 38602, 38657, 38658, 38676, 38678, 38694, 38703, 38730], dtype='int64', length=2817), '波兰': Int64Index([ 120, 614, 1100, 1558, 1815, 2084, 2291, 2566, 2994, 3031, ... 36842, 36894, 37081, 37715, 37733, 37910, 37988, 38459, 38480, 38713], dtype='int64', length=181), '泰国': Int64Index([ 131, 183, 274, 305, 310, 330, 370, 439, 450, 472, ... 36330, 36791, 36922, 37198, 37215, 37506, 37691, 37692, 37745, 38182], dtype='int64', length=294), '澳大利亚': Int64Index([ 139, 171, 470, 583, 730, 810, 830, 925, 934, 969, ... 37786, 37963, 37992, 38045, 38108, 38326, 38373, 38397, 38696, 38700], dtype='int64', length=300), '瑞典': Int64Index([ 836, 1264, 1619, 1825, 1867, 1900, 1971, 2387, 2501, 2907, ... 37425, 37434, 37435, 37497, 37622, 37945, 37946, 37986, 38088, 38727], dtype='int64', length=193), '美国': Int64Index([ 0, 1, 3, 5, 6, 11, 13, 15, 16, 17, ... 38708, 38710, 38716, 38719, 38720, 38721, 38723, 38725, 38729, 38731], dtype='int64', length=11866), '苏联': Int64Index([ 756, 1203, 1278, 1384, 1576, 2077, 2349, 2374, 2379, 2512, ... 37421, 37610, 37629, 38060, 38228, 38371, 38417, 38467, 38717, 38734], dtype='int64', length=256), '英国': Int64Index([ 18, 31, 37, 44, 46, 68, 69, 70, 71, 72, ... 38523, 38537, 38555, 38564, 38638, 38671, 38682, 38706, 38707, 38718], dtype='int64', length=2762), '荷兰': Int64Index([ 205, 448, 833, 875, 1003, 1046, 1276, 1291, 1410, 1610, ... 36831, 36927, 36953, 37141, 37409, 37415, 37875, 38419, 38515, 38533], dtype='int64', length=155), '西德': Int64Index([ 1283, 1670, 1877, 2014, 2381, 2755, 2833, 2908, 2979, 3128, ... 38064, 38068, 38084, 38142, 38149, 38194, 38219, 38411, 38487, 38536], dtype='int64', length=130), '西班牙': Int64Index([ 147, 155, 348, 353, 355, 378, 387, 395, 1235, 1266, ... 37813, 37826, 37978, 38020, 38047, 38132, 38147, 38290, 38609, 38677], dtype='int64', length=447), '阿根廷': Int64Index([ 66, 654, 1376, 1979, 2369, 2655, 2723, 2852, 3056, 3094, ... 37118, 37172, 37197, 37295, 37502, 37542, 37707, 37876, 37997, 38588], dtype='int64', length=116), '韩国': Int64Index([ 48, 50, 98, 100, 102, 105, 113, 154, 158, 215, ... 38293, 38324, 38408, 38423, 38438, 38582, 38593, 38594, 38641, 38715], dtype='int64', length=1351)}
In [22]:
res = movies.groupby('产地').size().sort_values(ascending=False)
res
Out[22]:
产地 美国 11866 日本 5053 中国大陆 3802 中国香港 2852 法国 2817 英国 2762 其他 1920 韩国 1351 德国 902 意大利 749 加拿大 723 中国台湾 618 西班牙 447 印度 357 澳大利亚 300 泰国 294 苏联 256 俄罗斯 221 丹麦 198 瑞典 193 波兰 181 荷兰 155 比利时 139 西德 130 墨西哥 119 阿根廷 116 USA 113 巴西 101 dtype: int64
In [12]:
res[0]
Out[12]:
11866
In [23]:
res.values
Out[23]:
array([11866, 5053, 3802, 2852, 2817, 2762, 1920, 1351, 902, 749, 723, 618, 447, 357, 300, 294, 256, 221, 198, 193, 181, 155, 139, 130, 119, 116, 113, 101], dtype=int64)
In [24]:
res.index
Out[24]:
Index(['美国', '日本', '中国大陆', '中国香港', '法国', '英国', '其他', '韩国', '德国', '意大利', '加拿大', '中国台湾', '西班牙', '印度', '澳大利亚', '泰国', '苏联', '俄罗斯', '丹麦', '瑞典', '波兰', '荷兰', '比利时', '西德', '墨西哥', '阿根廷', 'USA', '巴西'], dtype='object', name='产地')
统计每一年电影的数量的折线图¶
In [104]:
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties # 修改字体
font=FontProperties(fname='C:\Windows\Fonts\simfang.ttf')
plt.style.use('ggplot') # 设置条形图的背景
plt.figure(figsize=(18,6))
Product_origin=res.index
movies_count = res.values
plt.xlabel('产地',FontProperties=font,fontsize=15,color='blue')
plt.ylabel('出影数',FontProperties=font,fontsize=15,color='blue')
plt.title('产地-电影数量',FontProperties=font,fontsize=30)
plt.xticks(fontproperties=font, size=18, rotation=90) ### 对坐标轴上的刻度进行设置 rotation控制字体旋转
for a,b in zip(Product_origin,movies_count):
plt.text(a,b+100,b,ha='center') #### 参数1控制x轴文本,参数2控制柱上的数字,+100是柱子和数字之间的距离,参数3控制文本在柱子上居中,参数4控制字体大小
plt.bar(Product_origin,movies_count,color='darkgreen')
# plt.savefig('./a.png') #### 保存图片
plt.show()
---------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-104-0bef56d17196> in <module> 19 plt.text(a,b+100,b,ha='center') #### 参数1控制x轴文本,参数2控制柱上的数字,+100是柱子和数字之间的距离,参数3控制文本在柱子上居中,参数4控制字体大小 20 ---> 21 plt.bar(Product_origin,movies_count,color='darkgreen') 22 23 # plt.savefig('./a.png') #### 保存图片 g:\python36\lib\site-packages\matplotlib\pyplot.py in bar(x, height, width, bottom, align, data, **kwargs) 2438 return gca().bar( 2439 x, height, width=width, bottom=bottom, align=align, -> 2440 **({"data": data} if data is not None else {}), **kwargs) 2441 2442 g:\python36\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs) 1599 def inner(ax, *args, data=None, **kwargs): 1600 if data is None: -> 1601 return func(ax, *map(sanitize_sequence, args), **kwargs) 1602 1603 bound = new_sig.bind(ax, *args, **kwargs) g:\python36\lib\site-packages\matplotlib\axes\_axes.py in bar(self, x, height, width, bottom, align, **kwargs) 2436 elif orientation == 'horizontal': 2437 r.sticky_edges.x.append(l) -> 2438 self.add_patch(r) 2439 patches.append(r) 2440 g:\python36\lib\site-packages\matplotlib\axes\_base.py in add_patch(self, p) 1969 if p.get_clip_path() is None: 1970 p.set_clip_path(self.patch) -> 1971 self._update_patch_limits(p) 1972 self.patches.append(p) 1973 p._remove_method = self.patches.remove g:\python36\lib\site-packages\matplotlib\axes\_base.py in _update_patch_limits(self, patch) 1989 vertices = patch.get_path().vertices 1990 if vertices.size > 0: -> 1991 xys = patch.get_patch_transform().transform(vertices) 1992 if patch.get_data_transform() != self.transData: 1993 patch_to_data = (patch.get_data_transform() - g:\python36\lib\site-packages\matplotlib\patches.py in get_patch_transform(self) 756 757 def get_patch_transform(self): --> 758 self._update_patch_transform() 759 return self._rect_transform 760 g:\python36\lib\site-packages\matplotlib\patches.py in _update_patch_transform(self) 733 """ 734 x0, y0, x1, y1 = self._convert_units() --> 735 bbox = transforms.Bbox.from_extents(x0, y0, x1, y1) 736 rot_trans = transforms.Affine2D() 737 rot_trans.rotate_deg_around(x0, y0, self.angle) g:\python36\lib\site-packages\matplotlib\transforms.py in from_extents(*args) 794 The *y*-axis increases upwards. 795 """ --> 796 points = np.array(args, dtype=float).reshape(2, 2) 797 return Bbox(points) 798 TypeError: float() argument must be a string or a number, not 'pandas._libs.interval.Interval'
Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x000000000E3276A8> (for post_execute):
---------------------------------------------------------------------- TypeError Traceback (most recent call last) g:\python36\lib\site-packages\matplotlib\pyplot.py in post_execute() 107 def post_execute(): 108 if matplotlib.is_interactive(): --> 109 draw_all() 110 111 # IPython >= 2 g:\python36\lib\site-packages\matplotlib\_pylab_helpers.py in draw_all(cls, force) 126 for f_mgr in cls.get_all_fig_managers(): 127 if force or f_mgr.canvas.figure.stale: --> 128 f_mgr.canvas.draw_idle() 129 130 atexit.register(Gcf.destroy_all) g:\python36\lib\site-packages\matplotlib\backend_bases.py in draw_idle(self, *args, **kwargs) 1905 if not self._is_idle_drawing: 1906 with self._idle_draw_cntx(): -> 1907 self.draw(*args, **kwargs) 1908 1909 def draw_cursor(self, event): g:\python36\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self) 386 self.renderer = self.get_renderer(cleared=True) 387 with RendererAgg.lock: --> 388 self.figure.draw(self.renderer) 389 # A GUI class may be need to update a window using this draw, so 390 # don't forget to call the superclass. g:\python36\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs) 36 renderer.start_filter() 37 ---> 38 return draw(artist, renderer, *args, **kwargs) 39 finally: 40 if artist.get_agg_filter() is not None: g:\python36\lib\site-packages\matplotlib\figure.py in draw(self, renderer) 1707 self.patch.draw(renderer) 1708 mimage._draw_list_compositing_images( -> 1709 renderer, self, artists, self.suppressComposite) 1710 1711 renderer.close_group('figure') g:\python36\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite) 133 if not_composite or not has_images: 134 for a in artists: --> 135 a.draw(renderer) 136 else: 137 # Composite any adjacent images together g:\python36\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs) 36 renderer.start_filter() 37 ---> 38 return draw(artist, renderer, *args, **kwargs) 39 finally: 40 if artist.get_agg_filter() is not None: g:\python36\lib\site-packages\matplotlib\axes\_base.py in draw(self, renderer, inframe) 2645 renderer.stop_rasterizing() 2646 -> 2647 mimage._draw_list_compositing_images(renderer, self, artists) 2648 2649 renderer.close_group('axes') g:\python36\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite) 133 if not_composite or not has_images: 134 for a in artists: --> 135 a.draw(renderer) 136 else: 137 # Composite any adjacent images together g:\python36\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs) 36 renderer.start_filter() 37 ---> 38 return draw(artist, renderer, *args, **kwargs) 39 finally: 40 if artist.get_agg_filter() is not None: g:\python36\lib\site-packages\matplotlib\text.py in draw(self, renderer) 668 669 with _wrap_text(self) as textobj: --> 670 bbox, info, descent = textobj._get_layout(renderer) 671 trans = textobj.get_transform() 672 g:\python36\lib\site-packages\matplotlib\text.py in _get_layout(self, renderer) 274 of a rotated text when necessary. 275 """ --> 276 key = self.get_prop_tup(renderer=renderer) 277 if key in self._cached: 278 return self._cached[key] g:\python36\lib\site-packages\matplotlib\text.py in get_prop_tup(self, renderer) 829 need to know if the text has changed. 830 """ --> 831 x, y = self.get_unitless_position() 832 renderer = renderer or self._renderer 833 return (x, y, self.get_text(), self._color, g:\python36\lib\site-packages\matplotlib\text.py in get_unitless_position(self) 811 # This will get the position with all unit information stripped away. 812 # This is here for convenience since it is done in several locations. --> 813 x = float(self.convert_xunits(self._x)) 814 y = float(self.convert_yunits(self._y)) 815 return x, y TypeError: float() argument must be a string or a number, not 'pandas._libs.interval.Interval'
---------------------------------------------------------------------- TypeError Traceback (most recent call last) g:\python36\lib\site-packages\IPython\core\formatters.py in __call__(self, obj) 339 pass 340 else: --> 341 return printer(obj) 342 # Finally look for special method names 343 method = get_real_method(obj, self.print_method) g:\python36\lib\site-packages\IPython\core\pylabtools.py in <lambda>(fig) 242 243 if 'png' in formats: --> 244 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs)) 245 if 'retina' in formats or 'png2x' in formats: 246 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs)) g:\python36\lib\site-packages\IPython\core\pylabtools.py in print_figure(fig, fmt, bbox_inches, **kwargs) 126 127 bytes_io = BytesIO() --> 128 fig.canvas.print_figure(bytes_io, **kw) 129 data = bytes_io.getvalue() 130 if fmt == 'svg': g:\python36\lib\site-packages\matplotlib\backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, **kwargs) 2054 orientation=orientation, 2055 dryrun=True, -> 2056 **kwargs) 2057 renderer = self.figure._cachedRenderer 2058 bbox_artists = kwargs.pop("bbox_extra_artists", None) g:\python36\lib\site-packages\matplotlib\backends\backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args, **kwargs) 525 526 else: --> 527 FigureCanvasAgg.draw(self) 528 renderer = self.get_renderer() 529 with cbook._setattr_cm(renderer, dpi=self.figure.dpi), \ g:\python36\lib\site-packages\matplotlib\backends\backend_agg.py in draw(self) 386 self.renderer = self.get_renderer(cleared=True) 387 with RendererAgg.lock: --> 388 self.figure.draw(self.renderer) 389 # A GUI class may be need to update a window using this draw, so 390 # don't forget to call the superclass. g:\python36\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs) 36 renderer.start_filter() 37 ---> 38 return draw(artist, renderer, *args, **kwargs) 39 finally: 40 if artist.get_agg_filter() is not None: g:\python36\lib\site-packages\matplotlib\figure.py in draw(self, renderer) 1707 self.patch.draw(renderer) 1708 mimage._draw_list_compositing_images( -> 1709 renderer, self, artists, self.suppressComposite) 1710 1711 renderer.close_group('figure') g:\python36\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite) 133 if not_composite or not has_images: 134 for a in artists: --> 135 a.draw(renderer) 136 else: 137 # Composite any adjacent images together g:\python36\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs) 36 renderer.start_filter() 37 ---> 38 return draw(artist, renderer, *args, **kwargs) 39 finally: 40 if artist.get_agg_filter() is not None: g:\python36\lib\site-packages\matplotlib\axes\_base.py in draw(self, renderer, inframe) 2645 renderer.stop_rasterizing() 2646 -> 2647 mimage._draw_list_compositing_images(renderer, self, artists) 2648 2649 renderer.close_group('axes') g:\python36\lib\site-packages\matplotlib\image.py in _draw_list_compositing_images(renderer, parent, artists, suppress_composite) 133 if not_composite or not has_images: 134 for a in artists: --> 135 a.draw(renderer) 136 else: 137 # Composite any adjacent images together g:\python36\lib\site-packages\matplotlib\artist.py in draw_wrapper(artist, renderer, *args, **kwargs) 36 renderer.start_filter() 37 ---> 38 return draw(artist, renderer, *args, **kwargs) 39 finally: 40 if artist.get_agg_filter() is not None: g:\python36\lib\site-packages\matplotlib\text.py in draw(self, renderer) 668 669 with _wrap_text(self) as textobj: --> 670 bbox, info, descent = textobj._get_layout(renderer) 671 trans = textobj.get_transform() 672 g:\python36\lib\site-packages\matplotlib\text.py in _get_layout(self, renderer) 274 of a rotated text when necessary. 275 """ --> 276 key = self.get_prop_tup(renderer=renderer) 277 if key in self._cached: 278 return self._cached[key] g:\python36\lib\site-packages\matplotlib\text.py in get_prop_tup(self, renderer) 829 need to know if the text has changed. 830 """ --> 831 x, y = self.get_unitless_position() 832 renderer = renderer or self._renderer 833 return (x, y, self.get_text(), self._color, g:\python36\lib\site-packages\matplotlib\text.py in get_unitless_position(self) 811 # This will get the position with all unit information stripped away. 812 # This is here for convenience since it is done in several locations. --> 813 x = float(self.convert_xunits(self._x)) 814 y = float(self.convert_yunits(self._y)) 815 return x, y TypeError: float() argument must be a string or a number, not 'pandas._libs.interval.Interval'
<Figure size 1296x432 with 1 Axes>
统计每一年电影的数量的折线图¶
In [48]:
movies.groupby('年代').groups
Out[48]:
{1888: Int64Index([1700, 14048], dtype='int64'), 1890: Int64Index([26170], dtype='int64'), 1892: Int64Index([10627], dtype='int64'), 1894: Int64Index([12374, 14455, 21765], dtype='int64'), 1895: Int64Index([590, 616, 932, 3212, 17331, 19648, 29242, 32520], dtype='int64'), 1896: Int64Index([203, 4555, 9140, 19630, 20994, 24283, 31489, 32171], dtype='int64'), 1897: Int64Index([15431, 27085, 29127], dtype='int64'), 1898: Int64Index([1191, 11304], dtype='int64'), 1899: Int64Index([6500, 12115], dtype='int64'), 1900: Int64Index([1052, 5141, 5391, 6360, 18618, 18726, 23634], dtype='int64'), 1901: Int64Index([6820, 14151, 14784, 15407], dtype='int64'), 1902: Int64Index([2564, 12850, 13530, 23510, 24421, 33637], dtype='int64'), 1903: Int64Index([ 1507, 2016, 2758, 3476, 7051, 7740, 8791, 12225, 12349, 12787, 15569, 16190, 25005, 27528, 34814, 37077], dtype='int64'), 1904: Int64Index([2170, 8785, 14828, 20534, 26820, 28591, 33374, 33730], dtype='int64'), 1905: Int64Index([951, 23596, 24317, 29796, 35893], dtype='int64'), 1906: Int64Index([1685, 2223, 7157, 7320, 7733, 9190, 28200], dtype='int64'), 1907: Int64Index([1364, 5361, 11365, 13982, 34134], dtype='int64'), 1908: Int64Index([1040, 1453, 15301, 25657, 28568, 37459], dtype='int64'), 1909: Int64Index([1786, 27785, 29633, 36055, 36056], dtype='int64'), 1910: Int64Index([13810, 14790, 17092, 25326, 25480], dtype='int64'), 1911: Int64Index([17580, 29660, 36235, 36475], dtype='int64'), 1912: Int64Index([1422, 4492, 13395, 23014, 34223], dtype='int64'), 1913: Int64Index([1368, 1922, 3261, 4327, 19323, 23454, 25314, 27211], dtype='int64'), 1914: Int64Index([ 4434, 7907, 8096, 14125, 14214, 14552, 15054, 17156, 17584, 17673, 18922, 20041, 20166, 20369, 24408, 25291, 25523, 27181, 30125, 31866, 32289, 32937, 33191, 33401, 34209, 38479], dtype='int64'), 1915: Int64Index([ 7515, 9382, 9897, 13131, 14597, 16474, 17443, 22118, 22616, 24857, 25292, 28817, 30184, 31657, 31859, 32305, 32902, 33580, 33583, 36849], dtype='int64'), 1916: Int64Index([2931, 4025, 6356, 6376, 14077, 14233, 16402, 16450, 16477, 32332, 33383, 33628], dtype='int64'), 1917: Int64Index([ 2368, 5157, 11276, 11840, 14740, 16178, 17603, 18689, 19979, 26587, 30110, 38333], dtype='int64'), 1918: Int64Index([5276, 5629, 6084, 9550, 16705, 19616, 22217, 23495, 26889, 33633, 36734, 38454], dtype='int64'), 1919: Int64Index([ 650, 4430, 6372, 7509, 8984, 11468, 15162, 18894, 18963, 20922, 23160, 23359, 26143, 26716, 27802, 29238, 33010, 34154, 34188, 34342], dtype='int64'), 1920: Int64Index([ 143, 2372, 6208, 6467, 7190, 9114, 14580, 15373, 16297, 19394, 25177, 31910, 33755, 35293], dtype='int64'), 1921: Int64Index([ 276, 2449, 3250, 5130, 5383, 6419, 9253, 9637, 10142, 12053, 14366, 22119, 25789, 31066, 31421, 32611, 32740, 32870, 33787, 35434, 36464, 38608], dtype='int64'), 1922: Int64Index([ 2835, 2845, 3348, 3891, 5255, 7031, 7947, 8801, 10504, 14076, 15581, 17969, 19725, 19920, 21181, 21607, 26257, 26836, 27926, 29499, 32452, 38607], dtype='int64'), 1923: Int64Index([ 1885, 2413, 4788, 6243, 9320, 12162, 13807, 20646, 22143, 22219, 23868, 24642, 26259, 31575, 35892, 36428, 36733, 38433], dtype='int64'), 1924: Int64Index([ 4749, 7080, 7577, 8807, 9392, 10663, 12200, 12419, 13176, 14239, 14618, 15319, 17358, 19364, 19365, 23553, 25504, 27319, 28326, 33901, 34135, 34152, 34845, 35206, 37220, 37315, 38605, 38606], dtype='int64'), 1925: Int64Index([ 1807, 2795, 3330, 3866, 4870, 9239, 10397, 11856, 11933, 20424, 20526, 22226, 22863, 23754, 24682, 25356, 28766, 30186, 30592, 31865, 33486, 33818, 36476, 38153, 38545], dtype='int64'), 1926: Int64Index([ 1666, 3828, 8538, 8611, 8789, 11418, 13309, 14736, 16391, 17335, 18942, 19128, 19194, 23530, 23648, 24004, 24185, 31669, 32056, 33819, 36349, 37289, 38604], dtype='int64'), 1927: Int64Index([ 2984, 5416, 5856, 6152, 6895, 8119, 16103, 17039, 17504, 19048, 20071, 21542, 21953, 22267, 23546, 26347, 26948, 27396, 27530, 28787, 29039, 29774, 32217, 36848, 38135, 38572, 38573, 38574, 38603], dtype='int64'), 1928: Int64Index([ 423, 1177, 3422, 5389, 6677, 7591, 7753, 8225, 8469, 9603, 10695, 13166, 13751, 17219, 17246, 19622, 19653, 20891, 20955, 21588, 21841, 23013, 23209, 23289, 23641, 23696, 23944, 24726, 26344, 28976, 29292, 29869, 29882, 29949, 31069, 33679, 35276, 36145, 37056, 38134], dtype='int64'), 1929: Int64Index([ 2940, 3001, 3941, 5021, 5754, 7232, 7730, 9466, 15107, 18001, 18066, 18208, 19514, 19914, 20062, 20272, 21208, 21977, 22941, 24670, 25580, 26529, 26549, 27518, 30469, 30709, 30784, 32904, 33038, 33379, 34101, 34835, 35121, 38105, 38569], dtype='int64'), 1930: Int64Index([ 2684, 4301, 5694, 5833, 6197, 6772, 7366, 7486, 9101, 12063, 14658, 15458, 18566, 18913, 19091, 19390, 20654, 21199, 23812, 26880, 27808, 28786, 30350, 36350, 38562], dtype='int64'), 1931: Int64Index([ 20, 2380, 2959, 2997, 7209, 7228, 7609, 7834, 8722, 8886, 10279, 11765, 12289, 13627, 13902, 14789, 16047, 17210, 18368, 18754, 18932, 20527, 21659, 22188, 24466, 25018, 26717, 27850, 28815, 29087, 29556, 29563, 29607, 29689, 32828, 35092, 35470, 36217, 36281, 36312, 37290, 37909], dtype='int64'), 1932: Int64Index([ 611, 2765, 2836, 4220, 4318, 5877, 9174, 9436, 9705, 11259, 11270, 11318, 18982, 18993, 21072, 21858, 22198, 22794, 23882, 23963, 24242, 26055, 26971, 26990, 27534, 28818, 31011, 31423, 32283, 32827, 34427, 35334, 36234, 36280, 36547, 38559, 38560], dtype='int64'), 1933: Int64Index([ 168, 872, 1328, 2018, 2144, 2823, 4042, 4483, 5979, 6655, 6929, 7315, 8168, 8258, 8298, 9085, 9258, 11464, 11735, 13703, 16088, 16343, 16820, 17161, 17162, 19659, 21172, 22083, 22097, 26502, 26713, 26904, 28297, 29523, 29735, 30018, 30279, 30997, 31562, 31822, 32418, 32449, 35891, 36212, 36293, 36388, 37666, 38136, 38179, 38558], dtype='int64'), 1934: Int64Index([ 1691, 1798, 3152, 4157, 5461, 5866, 7909, 8141, 8502, 8771, 9300, 9977, 10332, 10480, 11013, 15149, 15944, 16173, 16199, 16508, 17625, 18363, 19755, 20635, 20664, 21005, 21753, 21788, 23237, 23994, 24445, 25770, 27313, 27903, 28895, 29042, 29627, 30168, 31062, 32944, 33546, 33732, 33884, 34713, 35752, 35754, 36701, 37025, 38223, 38549, 38567], dtype='int64'), 1935: Int64Index([ 2375, 5986, 6271, 7133, 7265, 7363, 8543, 9180, 9245, 10034, 12958, 14383, 14793, 15147, 15850, 16597, 18256, 18265, 19852, 19956, 20080, 22305, 22340, 24545, 25920, 26686, 28153, 28822, 29564, 29665, 29698, 29724, 30290, 32214, 32949, 33317, 34015, 34355, 35105, 35753, 36590, 38030, 38165, 38437, 38546, 38731], dtype='int64'), 1936: Int64Index([ 90, 1080, 1638, 3195, 3415, 5068, 6097, 6339, 6486, 6587, 6617, 7462, 7473, 7622, 7663, 9248, 9432, 10497, 10607, 10841, 11770, 12389, 13125, 13165, 14144, 14821, 14950, 15210, 15387, 15539, 15551, 15979, 16841, 16926, 17656, 17690, 18225, 18684, 20254, 20498, 20593, 22159, 22341, 23256, 25167, 26460, 27578, 29237, 29252, 29491, 29771, 31278, 31568, 31753, 32242, 33150, 34924, 36698, 37932, 38547], dtype='int64'), 1937: Int64Index([ 1484, 2822, 3411, 4543, 5472, 5554, 5832, 5928, 5980, 6261, 7489, 8554, 9667, 10451, 10669, 10875, 10886, 11999, 15648, 16202, 17142, 17218, 18472, 18776, 19475, 19520, 19690, 21771, 23955, 24063, 24464, 25227, 25261, 25975, 27173, 28002, 28721, 30215, 31144, 31146, 31473, 31672, 32473, 33151, 33736, 34329, 35410, 35586, 36819, 38382], dtype='int64'), 1938: Int64Index([ 1213, 2533, 2534, 2939, 3350, 4202, 5687, 7096, 8784, 9742, 10197, 11372, 11388, 11661, 12307, 14430, 15410, 16844, 16871, 17231, 17373, 18264, 19457, 19587, 25465, 25576, 27688, 28695, 30430, 30621, 31045, 31842, 31972, 32691, 33104, 38539, 38543, 38544], dtype='int64'), 1939: Int64Index([ 27, 2129, 2680, 2718, 3637, 4432, 5179, 6380, 8809, 8908, 9383, 9689, 9851, 10608, 10877, 11501, 13298, 13801, 15609, 16203, 17862, 18974, 19363, 19848, 21093, 24129, 24793, 25094, 25112, 25943, 26029, 26125, 26941, 27346, 28539, 29529, 29688, 29960, 31008, 32814, 33211, 33612, 35502, 36216, 36706, 37419, 37586, 38175, 38538], dtype='int64'), 1940: Int64Index([ 962, 2006, 2505, 2941, 7499, 8385, 8458, 8732, 8987, 9984, 10078, 10520, 11910, 13286, 13472, 14558, 14653, 14946, 16554, 17697, 19528, 19857, 19960, 20488, 21097, 22261, 26156, 27541, 27561, 28740, 29321, 30287, 32308, 32310, 32786, 35424, 35961, 36978, 37418], dtype='int64'), 1941: Int64Index([ 2623, 3107, 3634, 4697, 6174, 7524, 10619, 10623, 11135, 11594, 11663, 12058, 12380, 13641, 15512, 17186, 17189, 17725, 18788, 20513, 21107, 22785, 23049, 23586, 24128, 24193, 24425, 24465, 24572, 25173, 25231, 25442, 26296, 26691, 26926, 28545, 29575, 29789, 30074, 30411, 31517, 33332, 33347, 33652, 34251, 38531, 38532], dtype='int64'), 1942: Int64Index([ 2402, 3203, 3232, 3786, 4009, 6040, 7967, 8203, 8303, 8508, 8859, 8920, 9562, 10458, 10722, 10786, 12196, 13115, 13595, 15034, 17176, 17421, 18008, 19455, 21145, 22050, 22547, 23675, 24330, 25104, 25575, 27318, 29126, 29211, 29535, 29826, 31176, 31177, 31509, 32690, 32747, 33011, 33209, 33217, 33436, 34477, 34646, 35116, 38525, 38526, 38527, 38528, 38530], dtype='int64'), 1943: Int64Index([ 379, 813, 2975, 3159, 4106, 4168, 4173, 4861, 5444, 6151, 6888, 8393, 8820, 8914, 8946, 9233, 9593, 10194, 11375, 12071, 12572, 12619, 12956, 13847, 14279, 14637, 15329, 15702, 16369, 18304, 18356, 19099, 23642, 24415, 24547, 25175, 26053, 28175, 28532, 29102, 31178, 32945, 32972, 33785, 33865, 33924, 33959, 34351, 34358, 35012, 35365, 35524, 36213, 36950, 38460, 38518, 38519], dtype='int64'), 1944: Int64Index([ 1697, 2177, 2960, 3187, 4236, 4630, 5478, 7243, 7427, 8120, 8566, 8808, 9491, 10406, 11217, 11415, 11928, 12378, 12392, 15002, 15171, 15689, 16659, 16889, 17059, 17173, 17289, 17496, 17525, 17696, 18562, 18853, 19678, 19926, 22277, 22630, 24612, 25630, 25904, 25919, 27786, 30444, 31560, 31756, 31931, 33000, 33081, 33596, 34010, 34141, 34604, 35057, 35191, 36686], dtype='int64'), 1945: Int64Index([ 1895, 4041, 4347, 5848, 6263, 6448, 7725, 8361, 8481, 11835, 12353, 13668, 14692, 14896, 15013, 15232, 15695, 16893, 17093, 17733, 18414, 20512, 20636, 21345, 21635, 22998, 23626, 23767, 24492, 24583, 24849, 25021, 25672, 26043, 26542, 28116, 28338, 29271, 30813, 31017, 31814, 33139, 33912, 35013, 36058, 38330, 38509, 38510, 38511], dtype='int64'), 1946: Int64Index([ 30, 697, 2397, 3641, 4076, 4380, 5001, 6235, 8507, 8517, 9242, 10383, 11106, 12064, 12679, 12684, 13270, 14606, 14853, 15035, 15423, 16760, 17079, 17550, 17897, 18851, 19265, 20298, 20522, 22926, 24014, 25544, 25696, 25773, 26142, 26600, 26622, 27108, 27258, 27738, 28723, 28996, 29486, 30554, 30656, 31194, 31573, 32251, 32304, 32774, 33278, 33809, 35910, 37189, 37417, 37605, 38501, 38502, 38503], dtype='int64'), 1947: Int64Index([ 315, 1254, 1502, 2343, 2594, 2663, 3968, 4339, 5206, 5865, 6265, 6293, 7850, 8008, 8053, 10233, 10509, 10529, 11096, 11534, 11576, 11755, 12575, 13087, 13201, 14414, 14530, 14654, 15114, 16685, 17411, 17498, 17815, 18087, 18271, 18767, 19654, 20171, 20502, 20757, 21228, 21888, 22233, 22801, 23274, 24607, 25026, 25438, 25649, 25963, 27433, 29245, 29662, 29761, 30708, 30821, 30847, 31085, 31261, 31492, 31725, 32416, 32748, 32773, 33075, 33841, 33955, 33956, 34976, 35432, 35433, 35925, 36705, 37248, 38067], dtype='int64'), 1948: Int64Index([ 1645, 2052, 2377, 2998, 3265, 3636, 4046, 4104, 4113, 4277, 4454, 5335, 5844, 6191, 6512, 7718, 8021, 8073, 8503, 8556, 8982, 8995, 9552, 9765, 10368, 10460, 10926, 11161, 11289, 11307, 11392, 11641, 11737, 12180, 12571, 13777, 14595, 14699, 14826, 15087, 15688, 15982, 16784, 16822, 17356, 18176, 18253, 18266, 18272, 19396, 19746, 21092, 21564, 21658, 22244, 22969, 23637, 24599, 25169, 26103, 26451, 26920, 27182, 28311, 28688, 29532, 30004, 30623, 30700, 30710, 31148, 31156, 31170, 31329, 31355, 31543, 31577, 34095, 35607, 36282, 36292, 36828, 37062, 37945, 38498, 38499, 38500], dtype='int64'), 1949: Int64Index([ 592, 1784, 2015, 3022, 3130, 3450, 3649, 5183, 5392, 6141, 6211, 6340, 6377, 6910, 7351, 7390, 7667, 8513, 9115, 10084, 10230, 10437, 11103, 11120, 11562, 11994, 12047, 12203, 12476, 12798, 13158, 13181, 13191, 13504, 13540, 13733, 13849, 14138, 15150, 16504, 16731, 17128, 17501, 19788, 20485, 21102, 21195, 21339, 21891, 22182, 22210, 22276, 23794, 23954, 24812, 25081, 25276, 26625, 26894, 27023, 27048, 27545, 27907, 28183, 28327, 29522, 29547, 29574, 29839, 30701, 31033, 31401, 31944, 33427, 33714, 33771, 33783, 33936, 36073, 36294, 36947, 37031, 37384, 37623, 38490, 38491, 38493, 38496, 38497], dtype='int64'), 1950: Int64Index([ 2520, 2932, 2974, 3307, 3489, 3984, 4197, 5174, 6472, 7285, 8257, 8846, 8899, 9072, 9108, 9166, 9237, 9275, 10717, 12232, 12382, 13157, 13192, 13372, 13541, 14827, 15385, 15703, 16863, 17381, 18648, 18649, 18930, 19011, 19151, 20806, 20812, 21202, 21383, 23609, 23877, 24554, 24768, 24835, 24984, 25082, 25501, 26237, 26280, 26623, 27114, 27267, 27504, 27654, 28102, 28737, 28980, 29795, 30756, 31333, 31519, 31726, 33136, 33421, 33891, 33966, 33999, 34007, 34269, 34796, 35112, 36496, 37175, 37368, 37626, 37946], dtype='int64'), 1951: Int64Index([ 800, 1139, 1769, 1884, 2017, 2803, 2826, 2902, 3262, 5911, 6118, 6188, 6418, 6690, 7022, 7402, 7425, 7601, 8064, 8249, 8350, 8381, 9046, 9769, 11956, 12350, 13164, 13539, 13677, 14103, 14598, 14604, 14628, 15281, 15492, 16121, 16479, 17247, 17364, 17489, 18254, 20048, 20065, 20144, 21454, 21547, 22580, 23334, 24163, 25182, 26056, 26913, 27020, 27322, 28537, 28592, 28621, 28652, 29081, 29244, 29642, 30096, 30713, 30898, 31013, 31665, 31670, 31971, 32354, 33273, 34824, 35028, 36430, 37040, 37557, 38381, 38481, 38482, 38483], dtype='int64'), 1952: Int64Index([ 1489, 1665, 1683, 2353, 2545, 2563, 2843, 3481, 3651, 4554, 5212, 5527, 6470, 7818, 7931, 8189, 8501, 8561, 9417, 9568, 10199, 10644, 10743, 10764, 11642, 11881, 12057, 12287, 12540, 12807, 13067, 13513, 13730, 13854, 13904, 13979, 14272, 14938, 15383, 15694, 16026, 16270, 16635, 17910, 18306, 18389, 18897, 18943, 19310, 19692, 19708, 19842, 20163, 20618, 21085, 23522, 23608, 25551, 26362, 26638, 27131, 27414, 27856, 28408, 28526, 28655, 28768, 28974, 29024, 29559, 29637, 30535, 31507, 31734, 31988, 32490, 32842, 33080, 33223, 33708, 34267, 37026, 37174, 38407, 38441, 38478, 38480], dtype='int64'), 1953: Int64Index([ 64, 1344, 2914, 3314, 4027, 4169, 4608, 5038, 5619, 5719, 6020, 6758, 7094, 7104, 7407, 8160, 8297, 10544, 10835, 11258, 11503, 11706, 11867, 12282, 12906, 13357, 14688, 14715, 14825, 15041, 15048, 15363, 15454, 15542, 16833, 17252, 17386, 19290, 19391, 19559, 20160, 20495, 20592, 21000, 21923, 22606, 23033, 23643, 23722, 24180, 25284, 26238, 26624, 27118, 27220, 27641, 27879, 28097, 28462, 28685, 29489, 29686, 29719, 29746, 29935, 30586, 31164, 31385, 32919, 33015, 33049, 33236, 33241, 33255, 33315, 33778, 33844, 34041, 34562, 34692, 36710, 36818, 36999, 37426, 38465, 38468, 38471, 38472], dtype='int64'), 1954: Int64Index([ 93, 589, 749, 1548, 1699, 1797, 2668, 3680, 4085, 4848, ... 35514, 36175, 36352, 37629, 37701, 38298, 38462, 38463, 38466, 38467], dtype='int64', length=102), 1955: Int64Index([ 762, 1679, 1875, 2976, 3836, 4075, 4290, 4944, 5163, 5547, ... 35005, 35722, 36000, 36270, 36630, 36949, 37548, 38456, 38457, 38458], dtype='int64', length=115), 1956: Int64Index([ 651, 1141, 1826, 2046, 2411, 2543, 2713, 2812, 2872, 3095, ... 35081, 35643, 36229, 37316, 37364, 37367, 37730, 38442, 38443, 38444], dtype='int64', length=147), 1957: Int64Index([ 1, 16, 819, 1113, 1481, 2315, 2691, 2897, 3457, 3629, ... 36700, 37196, 37611, 38097, 38431, 38432, 38434, 38435, 38436, 38488], dtype='int64', length=107), 1958: Int64Index([ 1180, 2451, 2978, 3318, 3479, 3571, 3748, 3770, 3851, 3897, ... 37544, 37706, 38356, 38418, 38419, 38420, 38426, 38427, 38429, 38487], dtype='int64', length=122), 1959: Int64Index([ 1688, 2249, 2272, 2406, 2721, 2728, 3033, 3271, 3394, 3675, ... 37576, 38321, 38347, 38411, 38412, 38413, 38414, 38415, 38416, 38417], dtype='int64', length=128), 1960: Int64Index([ 39, 140, 1586, 2026, 2519, 2521, 2724, 3426, 3625, 5009, ... 38383, 38384, 38385, 38386, 38387, 38390, 38403, 38406, 38409, 38410], dtype='int64', length=131), 1961: Int64Index([ 21, 956, 2504, 2682, 3253, 4123, 4130, 4161, 4424, 4869, ... 36857, 37705, 37728, 37793, 38122, 38372, 38374, 38375, 38377, 38380], dtype='int64', length=102), 1962: Int64Index([ 83, 1383, 1648, 2418, 2422, 2425, 3112, 3370, 3393, 3579, ... 35900, 35920, 35922, 36308, 36858, 36860, 37126, 38367, 38370, 38461], dtype='int64', length=143), 1963: Int64Index([ 72, 1551, 2025, 3228, 3375, 3543, 3710, 3915, 4043, 4239, ... 36662, 36859, 37219, 37622, 38172, 38353, 38354, 38355, 38357, 38359], dtype='int64', length=143), 1964: Int64Index([ 361, 366, 374, 411, 974, 1746, 1781, 2313, 2341, 2409, ... 36965, 36966, 36971, 37404, 38127, 38348, 38349, 38350, 38351, 38352], dtype='int64', length=182), 1965: Int64Index([ 252, 1304, 1576, 1642, 1671, 2448, 2774, 2935, 2936, 2990, ... 36426, 36431, 36515, 36521, 36678, 37366, 37521, 38208, 38341, 38342], dtype='int64', length=163), 1966: Int64Index([ 75, 756, 1105, 1222, 1646, 1647, 1917, 2233, 2349, 2690, ... 37608, 38032, 38279, 38280, 38281, 38282, 38283, 38334, 38335, 38340], dtype='int64', length=180), 1967: Int64Index([ 69, 78, 948, 952, 1316, 2379, 2546, 2819, 3141, 3452, ... 37416, 37538, 37704, 38148, 38263, 38264, 38265, 38266, 38269, 38270], dtype='int64', length=176), 1968: Int64Index([ 1390, 2310, 2381, 3014, 3442, 4223, 4400, 4508, 4570, 4599, ... 37246, 37390, 37621, 37914, 38150, 38242, 38248, 38249, 38262, 38358], dtype='int64', length=172), 1969: Int64Index([ 84, 311, 788, 1022, 1619, 1802, 1877, 1971, 2021, 2126, ... 38194, 38222, 38224, 38227, 38228, 38229, 38230, 38231, 38240, 38250], dtype='int64', length=207), 1970: Int64Index([ 123, 799, 867, 1283, 1465, 1632, 2071, 2082, 2088, 2755, ... 37901, 38162, 38206, 38215, 38216, 38217, 38218, 38219, 38220, 38221], dtype='int64', length=190), 1971: Int64Index([ 76, 347, 802, 1216, 1859, 1916, 2156, 2360, 2499, 3646, ... 37643, 38160, 38161, 38188, 38198, 38199, 38200, 38207, 38209, 38211], dtype='int64', length=219), 1972: Int64Index([ 26, 169, 961, 1670, 1909, 2043, 2979, 3643, 3827, 3986, ... 38184, 38185, 38186, 38187, 38189, 38190, 38191, 38192, 38193, 38197], dtype='int64', length=207), 1973: Int64Index([ 82, 1117, 1524, 1698, 1711, 2243, 2270, 2388, 2576, 2625, ... 37939, 38101, 38118, 38141, 38142, 38145, 38146, 38147, 38149, 38286], dtype='int64', length=213), 1974: Int64Index([ 77, 81, 1286, 1558, 1717, 1728, 1840, 1900, 2014, 2210, ... 36744, 37516, 37965, 38128, 38131, 38132, 38133, 38138, 38139, 38140], dtype='int64', length=223), 1975: Int64Index([ 360, 816, 1196, 1436, 1625, 1835, 1943, 2212, 2219, 2267, ... 37409, 37580, 37795, 38110, 38114, 38115, 38116, 38119, 38121, 38124], dtype='int64', length=196), 1976: Int64Index([ 1197, 1727, 1962, 1969, 2020, 2077, 2299, 2574, 2580, 2697, ... 37284, 37559, 37900, 38096, 38098, 38099, 38100, 38106, 38107, 38108], dtype='int64', length=233), 1977: Int64Index([ 73, 383, 1186, 1291, 1293, 1608, 1690, 1836, 1837, 1845, ... 38083, 38084, 38085, 38086, 38087, 38088, 38093, 38094, 38095, 38734], dtype='int64', length=220), 1978: Int64Index([ 1122, 1278, 1479, 1953, 2385, 2488, 2712, 2726, 2980, 3137, ... 38069, 38070, 38071, 38072, 38073, 38074, 38075, 38076, 38079, 38081], dtype='int64', length=201), 1979: Int64Index([ 86, 1776, 1777, 1804, 2099, 2104, 2428, 3249, 3472, 3881, ... 37539, 37543, 37733, 38031, 38052, 38054, 38057, 38058, 38060, 38455], dtype='int64', length=227), 1980: Int64Index([ 2252, 2300, 2427, 2512, 2658, 2708, 2827, 3263, 3439, 3455, ... 37563, 37616, 37725, 37873, 38042, 38043, 38045, 38048, 38049, 38051], dtype='int64', length=224), 1981: Int64Index([ 88, 144, 328, 629, 657, 770, 1374, 1606, 2182, 2191, ... 37399, 37450, 37480, 37561, 37649, 37751, 37858, 37886, 38243, 38277], dtype='int64', length=274), 1982: Int64Index([ 38, 216, 340, 367, 401, 957, 1016, 1202, 1244, 1257, ... 35958, 36022, 36684, 36737, 36768, 37490, 37491, 37644, 38177, 38540], dtype='int64', length=266), 1983: Int64Index([ 70, 71, 217, 675, 759, 1043, 1168, 1251, 1621, 2476, ... 36525, 37029, 37165, 37226, 37496, 37814, 37846, 37898, 38244, 38730], dtype='int64', length=277), 1984: Int64Index([ 56, 89, 1169, 1179, 1200, 1545, 1602, 1834, 1918, 1935, ... 36647, 37059, 37067, 37125, 37295, 37485, 37896, 38120, 38312, 38379], dtype='int64', length=294), 1985: Int64Index([ 79, 477, 529, 582, 763, 794, 849, 1008, 1009, 1096, ... 36844, 36879, 37443, 37617, 38050, 38144, 38245, 38448, 38507, 38585], dtype='int64', length=322), 1986: Int64Index([ 429, 451, 519, 913, 1029, 1138, 1376, 1464, 1610, 1886, ... 38241, 38654, 38724, 38725, 38726, 38727, 38728, 38729, 38732, 38733], dtype='int64', length=325), 1987: Int64Index([ 80, 404, 627, 754, 1024, 1074, 1085, 1298, 1411, 1539, ... 38516, 38521, 38536, 38584, 38718, 38719, 38720, 38721, 38722, 38723], dtype='int64', length=343), 1988: Int64Index([ 45, 87, 111, 127, 227, 633, 905, 1131, 1151, 1188, ... 38055, 38062, 38169, 38285, 38397, 38550, 38583, 38689, 38716, 38717], dtype='int64', length=391), 1989: Int64Index([ 85, 391, 527, 1220, 1308, 1387, 1458, 1612, 1704, 2161, ... 38123, 38246, 38453, 38596, 38709, 38710, 38711, 38712, 38714, 38715], dtype='int64', length=393), 1990: Int64Index([ 184, 313, 341, 364, 530, 781, 906, 1069, 1142, 1384, ... 37556, 37560, 37675, 37722, 38089, 38129, 38705, 38706, 38707, 38708], dtype='int64', length=393), 1991: Int64Index([ 124, 384, 694, 745, 858, 1165, 1193, 1385, 1509, 1749, ... 38313, 38317, 38473, 38477, 38695, 38699, 38700, 38701, 38702, 38704], dtype='int64', length=410), 1992: Int64Index([ 110, 132, 178, 179, 286, 453, 454, 455, 479, 547, ... 38143, 38210, 38332, 38464, 38551, 38646, 38694, 38696, 38697, 38698], dtype='int64', length=435), 1993: Int64Index([ 4, 6, 46, 100, 181, 273, 459, 512, 793, 1015, ... 37785, 37903, 37924, 37925, 38113, 38252, 38362, 38495, 38692, 38693], dtype='int64', length=441), 1994: Int64Index([ 0, 3, 9, 314, 402, 424, 548, 852, 876, 1004, ... 37424, 37630, 37748, 37940, 38152, 38214, 38344, 38688, 38690, 38691], dtype='int64', length=494), 1995: Int64Index([ 53, 55, 91, 101, 185, 275, 377, 449, 538, 669, ... 37607, 37736, 38111, 38155, 38226, 38421, 38508, 38533, 38685, 38686], dtype='int64', length=489), 1996: Int64Index([ 54, 92, 112, 327, 368, 393, 631, 804, 831, 939, ... 38554, 38579, 38581, 38609, 38649, 38677, 38681, 38682, 38683, 38684], dtype='int64', length=517), 1997: Int64Index([ 2, 7, 34, 109, 116, 151, 173, 192, 494, 585, ... 38324, 38368, 38376, 38474, 38529, 38552, 38594, 38601, 38675, 38676], dtype='int64', length=537), 1998: Int64Index([ 24, 188, 336, 356, 372, 409, 419, 430, 473, 490, ... 37658, 37679, 37684, 37765, 37834, 37856, 38257, 38423, 38564, 38644], dtype='int64', length=577), 1999: Int64Index([ 198, 210, 228, 309, 324, 370, 398, 643, 667, 693, ... 37928, 38040, 38408, 38445, 38590, 38593, 38616, 38655, 38672, 38673], dtype='int64', length=625), 2000: Int64Index([ 67, 104, 195, 278, 293, 308, 355, 389, 448, 486, ... 38182, 38258, 38271, 38292, 38452, 38476, 38486, 38595, 38670, 38671], dtype='int64', length=731), 2001: Int64Index([ 18, 28, 58, 96, 196, 211, 288, 342, 352, 407, ... 38164, 38274, 38278, 38336, 38400, 38512, 38514, 38561, 38678, 38703], dtype='int64', length=828), 2002: Int64Index([ 48, 115, 187, 209, 213, 224, 282, 285, 300, 348, ... 38517, 38576, 38598, 38599, 38600, 38619, 38647, 38666, 38667, 38668], dtype='int64', length=900), 2003: Int64Index([ 10, 103, 139, 193, 205, 317, 329, 464, 480, 482, ... 38425, 38450, 38459, 38513, 38566, 38575, 38582, 38640, 38652, 38687], dtype='int64', length=951), 2004: Int64Index([ 23, 59, 61, 121, 226, 320, 334, 369, 390, 413, ... 38325, 38343, 38360, 38395, 38396, 38430, 38535, 38557, 38589, 38653], dtype='int64', length=1136), 2005: Int64Index([ 129, 130, 153, 197, 220, 263, 268, 349, 350, 385, ... 38289, 38308, 38314, 38346, 38393, 38520, 38537, 38597, 38663, 38679], dtype='int64', length=1263), 2006: Int64Index([ 40, 44, 60, 97, 106, 113, 114, 131, 147, 158, ... 38296, 38297, 38315, 38392, 38405, 38470, 38534, 38541, 38556, 38610], dtype='int64', length=1515), 2007: Int64Index([ 65, 133, 135, 145, 149, 152, 199, 212, 221, 229, ... 38428, 38447, 38505, 38563, 38568, 38577, 38586, 38587, 38591, 38602], dtype='int64', length=1711), 2008: Int64Index([ 14, 15, 17, 43, 66, 117, 119, 136, 154, 170, ... 38284, 38295, 38361, 38378, 38401, 38469, 38515, 38542, 38555, 38578], dtype='int64', length=1963), 2009: Int64Index([ 12, 13, 19, 25, 36, 99, 142, 146, 163, 164, ... 38306, 38322, 38323, 38329, 38369, 38422, 38440, 38506, 38592, 38618], dtype='int64', length=1862), 2010: Int64Index([ 22, 33, 35, 42, 118, 156, 157, 174, 175, 182, ... 38345, 38364, 38388, 38391, 38446, 38580, 38622, 38634, 38660, 38661], dtype='int64', length=1886), 2011: Int64Index([ 31, 32, 51, 68, 98, 155, 167, 208, 214, 231, ... 38484, 38485, 38492, 38522, 38523, 38548, 38565, 38588, 38627, 38659], dtype='int64', length=1866), 2012: Int64Index([ 5, 41, 108, 134, 138, 148, 161, 166, 206, 207, ... 38489, 38494, 38553, 38630, 38632, 38642, 38643, 38645, 38656, 38657], dtype='int64', length=2042), 2013: Int64Index([ 8, 37, 47, 50, 52, 95, 102, 105, 125, 128, ... 38251, 38256, 38275, 38363, 38475, 38571, 38613, 38635, 38658, 38664], dtype='int64', length=2001), 2014: Int64Index([ 57, 62, 63, 74, 94, 107, 122, 137, 160, 190, ... 38628, 38631, 38636, 38637, 38641, 38651, 38662, 38665, 38680, 38713], dtype='int64', length=1887), 2015: Int64Index([ 29, 120, 126, 162, 176, 183, 253, 254, 255, 256, ... 38623, 38624, 38625, 38626, 38629, 38633, 38638, 38648, 38650, 38669], dtype='int64', length=1592), 2016: Int64Index([ 11, 49, 150, 262, 264, 432, 732, 1184, 1588, 1695, ... 38009, 38011, 38025, 38037, 38038, 38039, 38267, 38570, 38639, 38674], dtype='int64', length=257), 34943: Int64Index([13882], dtype='int64'), 39180: Int64Index([17115], dtype='int64')}
In [62]:
res = movies.groupby('年代').size().sort_index()[:-2] #### 按照年代分组,用size统计每年对应的数量,再以年代为索引排序,[:-2]删除最后两个无效数据
res
Out[62]:
年代 1888 2 1890 1 1892 1 1894 3 1895 8 ... 2012 2042 2013 2001 2014 1887 2015 1592 2016 257 Length: 126, dtype: int64
In [64]:
x = res.index
y = res.values
plt.figure(figsize=(10,6))
plt.title('每年电影数量的折线图',FontProperties=font,color='red',size=20)
plt.xlabel('年代',FontProperties=font,color='blue',size=15)
plt.ylabel('数量',FontProperties=font,color='blue',size=15)
plt.plot(x,y)
plt.show()
根据电影时长 画出一个饼图分布¶
In [65]:
#### 分布的范围:0-70 70-100 100-130 130-----
In [66]:
t = pd.cut([10,5,20,15,8], (2,10,15,50)) ### 第一个参数是待分割的数据 第二个参数是分割的范围
t
Out[66]:
[(2, 10], (2, 10], (15, 50], (10, 15], (2, 10]] Categories (3, interval[int64]): [(2, 10] < (10, 15] < (15, 50]]
In [72]:
res = t.value_counts() ### 将值相同的进行分组并且计算个数
res
Out[72]:
(2, 10] 3 (10, 15] 1 (15, 50] 1 dtype: int64
In [73]:
res.index
Out[73]:
CategoricalIndex([(2, 10], (10, 15], (15, 50]], categories=[(2, 10], (10, 15], (15, 50]], ordered=True, dtype='category')
In [74]:
res.values
Out[74]:
array([3, 1, 1], dtype=int64)
In [76]:
movie_time = movies['时长']
movie_time
Out[76]:
0 142.0 1 116.0 2 116.0 3 142.0 4 171.0 ... 38730 58.0 38731 98.0 38732 91.0 38733 78.0 38734 97.0 Name: 时长, Length: 38735, dtype: float64
In [79]:
res_time = pd.cut(movie_time,(0,70,100,130,1000)).value_counts()
res_time
Out[79]:
(70, 100] 15079 (0, 70] 10838 (100, 130] 10517 (130, 1000] 2299 Name: 时长, dtype: int64
In [85]:
x = res_time.index
y = res_time.values
plt.figure(figsize=(10,6))
plt.title('电影时长分布图', FontProperties=font, color='red', size=20)
patchs, l_text, p_text = plt.pie(y,labels=x, autopct='%.2f%%')
for l in l_text: ### l_text 就是一个lable的列表
l.set_size(15)
l.set_color('red')
for p in p_text: #### p_text: 就是百分比值
p.set_size(15)
p.set_color('white')
plt.show()
matplotlib模块第二部分:matplotlib模块 07
来源:https://www.cnblogs.com/zhangchaocoming/p/12417347.html