一幅可视化图的基本结构
通常,使用 numpy 组织数据, 使用 matplotlib API 进行数据图像绘制。 一幅数据图基本上包括如下结构:
- Data: 数据区,包括数据点、描绘形状
- Axis: 坐标轴,包括 X 轴、 Y 轴及其标签、刻度尺及其标签
- Title: 标题,数据图的描述
- Legend: 图例,区分图中包含的多种曲线或不同分类的数据
其他的还有图形文本 (Text)、注解 (Annotate)等其他描述
画法
下面以常规图为例,详细记录作图流程及技巧。按照绘图结构,可将数据图的绘制分为如下几个步骤:
- 导入 matplotlib 包相关工具包
- 准备数据,numpy 数组存储
- 绘制原始曲线
- 配置标题、坐标轴、刻度、图例
- 添加文字说明、注解
- 显示、保存绘图结果
实战
1.导包
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
2.准备数据
# 定义数据部分
x = np.arange(0., 10, 0.2)
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
3.绘制基本曲线
# 绘制 3 条函数曲线
plt.plot(x, y1, color='blue', linewidth=1.5, linestyle='-', marker='.', label=r'$y = cos{x}$')
plt.plot(x, y2, color='green', linewidth=1.5, linestyle='-', marker='*', label=r'$y = sin{x}$')
plt.plot(x, y3, color='m', linewidth=1.5, linestyle='-', marker='x', label=r'$y = \sqrt{x}$')
颜色
r 红色
g 绿色
b 蓝色
c cyan
m 紫色
y 土黄色
k 黑色
w 白色
linestyle 参数
linestyle 参数主要包含虚线、点化虚线、粗虚线、实线
marker 参数
marker参数设定在曲线上标记的特殊符号,以区分不同的线段
设置坐标轴
可通过如下代码,移动坐标轴 spines
# 坐标轴上移
ax = plt.subplot(111)
ax.spines['right'].set_color('none') # 去掉右边的边框线
ax.spines['top'].set_color('none') # 去掉上边的边框线
# 移动下边边框线,相当于移动 X 轴
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
# 移动左边边框线,相当于移动 y 轴
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
设置刻度尺间隔 lim、刻度标签 ticks
# 设置 x, y 轴的刻度取值范围
plt.xlim(x.min()*1.1, x.max()*1.1)
plt.ylim(-1.5, 4.0)
# 设置 x, y 轴的刻度标签值
plt.xticks([2, 4, 6, 8, 10], [r'2', r'4', r'6', r'8', r'10'])
plt.yticks([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
[r'-1.0', r'0.0', r'1.0', r'2.0', r'3.0', r'4.0'])
设置坐标轴和标题的说明
# 设置标题、x轴、y轴
plt.title(r'$the \ function \ figure \ of \ cos(), \ sin() \ and \ sqrt()$', fontsize=19)
plt.xlabel(r'$the \ input \ value \ of \ x$', fontsize=18, labelpad=88.8)
plt.ylabel(r'$y = f(x)$', fontsize=18, labelpad=12.5)
设置文字描述、注解
plt.text(4, 1.68, r'$x \in [0.0, \ 10.0]$', color='k', fontsize=15)
plt.text(4, 1.38, r'$y \in [-1.0, \ 4.0]$', color='k', fontsize=15)
给特殊点添加注解
# 特殊点添加注解
plt.scatter([8,],[np.sqrt(8),], 50, color ='m') # 使用散点图放大当前点
plt.annotate(r'$2\sqrt{2}$', xy=(8, np.sqrt(8)), xytext=(8.5, 2.2), fontsize=16, color='#090909', arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=0.1', color='#090909'))
设置图例
可使用如下两种方式,给绘图设置图例:
- 在 plt.plot 函数中添加 label 参数后,使用 plt.legend(loc=’upper right’)
- 不使用参数 label, 直接使用如下命令:
# 要和曲线顺序一一对应
plt.legend(['cos(x)', 'sin(x)', 'sqrt(x)'], loc='upper right')
网格线开关
# 显示网格线
plt.grid(True)
显示与图像保存
plt.show() # 显示
# plt.save('../figures/plot3d_ex.png',dpi=48) # 保存,前提目录存在
完整的绘制程序
#coding:utf-8
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
# 定义数据部分
x = np.arange(0., 10, 0.2)
y1 = np.cos(x)
y2 = np.sin(x)
y3 = np.sqrt(x)
# 绘制 3 条函数曲线
plt.plot(x, y1, color='blue', linewidth=1.5, linestyle='-', marker='.', label=r'$y = cos{x}$')
plt.plot(x, y2, color='green', linewidth=1.5, linestyle='-', marker='*', label=r'$y = sin{x}$')
plt.plot(x, y3, color='m', linewidth=1.5, linestyle='-', marker='x', label=r'$y = \sqrt{x}$')
# 坐标轴上移
ax = plt.subplot(111) # row-col-num
ax.spines['right'].set_color('none') # 去掉右边的边框线
ax.spines['top'].set_color('none') # 去掉上边的边框线
# 移动下边边框线,相当于移动 X 轴
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
# 移动左边边框线,相当于移动 y 轴
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
# 设置 x, y 轴的取值范围
plt.xlim(x.min()*1.1, x.max()*1.1)
plt.ylim(-1.5, 4.0)
# 设置 x, y 轴的刻度值
plt.xticks([2, 4, 6, 8, 10], [r'2', r'4', r'6', r'8', r'10'])
plt.yticks([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
[r'-1.0', r'0.0', r'1.0', r'2.0', r'3.0', r'4.0'])
# 添加文字
plt.text(4, 1.68, r'$x \in [0.0, \ 10.0]$', color='k', fontsize=15)
plt.text(4, 1.38, r'$y \in [-1.0, \ 4.0]$', color='k', fontsize=15)
# 特殊点添加注解
plt.scatter([8,],[np.sqrt(8),], 50, color ='m') # 使用散点图放大当前点
plt.annotate(r'$2\sqrt{2}$', xy=(8, np.sqrt(8)), xytext=(8.5, 2.2), fontsize=16, color='#090909', arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=0.1', color='#090909'))
# 设置标题、x轴、y轴
plt.title(r'$the \ function \ figure \ of \ cos(), \ sin() \ and \ sqrt()$', fontsize=19)
plt.xlabel(r'$the \ input \ value \ of \ x$', fontsize=18, labelpad=88.8)
plt.ylabel(r'$y = f(x)$', fontsize=18, labelpad=12.5)
# 设置图例及位置
plt.legend(loc='up right')
# plt.legend(['cos(x)', 'sin(x)', 'sqrt(x)'], loc='up right')
# 显示网格线
plt.grid(True)
# 显示绘图
plt.show()
常用图形
细节看:这里
- 曲线图:matplotlib.pyplot.plot(data)
- 灰度图:matplotlib.pyplot.hist(data)
- 散点图:matplotlib.pyplot.scatter(data)
- 箱式图:matplotlib.pyplot.boxplot(data)
x = np.arange(-5,5,0.1)
y = x ** 2
plt.plot(x,y)
x = np.random.normal(size=1000)
plt.hist(x, bins=10)
x = np.random.normal(size=1000)
y = np.random.normal(size=1000)
plt.scatter(x,y)
plt.boxplot(x)
箱式图科普
找离群点,异常点
- 上边缘(Q3+1.5IQR)(最上面黑线)、下边缘(Q1-1.5IQR)(最下面黑线)、IQR=Q3-Q1
- 上四分位数(Q3)(蓝色框上边缘)、下四分位数(Q1)(蓝色框下边缘)
- 中位数(红线)
- 异常值(黑线以外)
- 处理异常值时与3 σσ 标准的异同:统计边界是否受异常值影响、容忍度的大小
手册
数据降维
PCA(线性) 、 t-sne(非线性)
案例:自行车租赁数据分析与可视化
# 步骤1:导入数据,做简单的数据处理
import pandas as pd # 读取数据到DataFrame
import urllib.request # 获取网络数据
import tempfile # 创建临时文件系统
import shutil # 文件操作
import zipfile # 压缩解压
temp_dir = tempfile.mkdtemp() # 建立临时目录
data_source = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip' # 网络数据地址
zipname = temp_dir + '\Bike-Sharing-Dataset.zip' # 拼接文件和路径
urllib.request.urlretrieve(data_source, zipname) # 获得数据
zip_ref = zipfile.ZipFile(zipname, 'r') # 创建一个ZipFile对象处理压缩文件
zip_ref.extractall(temp_dir) # 解压
zip_ref.close()
daily_path = 'day.csv'
daily_data = pd.read_csv(daily_path) # 读取csv文件
daily_data['dteday'] = pd.to_datetime(daily_data['dteday']) # 把字符串数据传换成日期数据
drop_list = ['instant', 'season', 'yr', 'mnth', 'holiday', 'workingday', 'weathersit', 'atemp', 'hum'] # 不关注的列
daily_data.drop(drop_list, inplace = True, axis = 1) # inplace=true在对象上直接操作
shutil.rmtree(temp_dir) # 删除临时文件目录
print(daily_data.head()) # 看一看数据~
dteday weekday temp windspeed casual registered cnt
0 2011-01-01 6 0.344167 0.160446 331 654 985
1 2011-01-02 0 0.363478 0.248539 131 670 801
2 2011-01-03 1 0.196364 0.248309 120 1229 1349
3 2011-01-04 2 0.200000 0.160296 108 1454 1562
4 2011-01-05 3 0.226957 0.186900 82 1518 1600
# 步骤2:配置参数
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# 设置一些全局的资源参数,可以进行个性化修改
import matplotlib
# 设置图片尺寸 14" x 7"
# rc: resource configuration
matplotlib.rc('figure', figsize = (14, 7))
# 设置字体 14
matplotlib.rc('font', size = 14)
# 不显示顶部和右侧的坐标线
matplotlib.rc('axes.spines', top = False, right = False)
# 不显示网格
matplotlib.rc('axes', grid = False)
# 设置背景颜色是白色
matplotlib.rc('axes', facecolor = 'white')
# 步骤3:关联分析
# 散点图:分析变量关系
# 包装一个散点图的函数便于复用
def scatterplot(x_data, y_data, x_label, y_label, title):
# 创建一个绘图对象
fig, ax = plt.subplots()
# 设置数据、点的大小、点的颜色和透明度
ax.scatter(x_data, y_data, s = 10, color = '#539caf', alpha = 0.75) # http://www.114la.com/other/rgb.htm
# 添加标题和坐标说明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 绘制散点图
scatterplot(x_data = daily_data['temp']
, y_data = daily_data['cnt']
, x_label = 'Normalized temperature (C)'
, y_label = 'Check outs'
, title = 'Number of Check Outs vs Temperature')
# 曲线图:拟合变量关系
# 线性回归
import statsmodels.api as sm # 最小二乘
from statsmodels.stats.outliers_influence import summary_table # 获得汇总信息
x = sm.add_constant(daily_data['temp']) # 线性回归增加常数项 y=kx+b
y = daily_data['cnt']
regr = sm.OLS(y, x) # 普通最小二乘模型,ordinary least square model
res = regr.fit()
# 从模型获得拟合数据
st, data, ss2 = summary_table(res, alpha=0.05) # 置信水平alpha=5%,st数据汇总,data数据详情,ss2数据列名
fitted_values = data[:,2]
# 包装曲线绘制函数
def lineplot(x_data, y_data, x_label, y_label, title):
# 创建绘图对象
_, ax = plt.subplots()
# 绘制拟合曲线,lw=linewidth,alpha=transparancy
ax.plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)
# 添加标题和坐标说明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 调用绘图函数
lineplot(x_data = daily_data['temp']
, y_data = fitted_values
, x_label = 'Normalized temperature (C)'
, y_label = 'Check outs'
, title = 'Line of Best Fit for Number of Check Outs vs Temperature')
# 带置信区间的曲线图:评估曲线拟合结果
# 获得5%置信区间的上下界
predict_mean_ci_low, predict_mean_ci_upp = data[:,4:6].T
# 创建置信区间DataFrame,上下界
CI_df = pd.DataFrame(columns = ['x_data', 'low_CI', 'upper_CI'])
CI_df['x_data'] = daily_data['temp']
CI_df['low_CI'] = predict_mean_ci_low
CI_df['upper_CI'] = predict_mean_ci_upp
CI_df.sort_values('x_data', inplace = True) # 根据x_data进行排序
# 绘制置信区间
def lineplotCI(x_data, y_data, sorted_x, low_CI, upper_CI, x_label, y_label, title):
# 创建绘图对象
_, ax = plt.subplots()
# 绘制预测曲线
ax.plot(x_data, y_data, lw = 1, color = '#539caf', alpha = 1, label = 'Fit')
# 绘制置信区间,顺序填充
ax.fill_between(sorted_x, low_CI, upper_CI, color = '#539caf', alpha = 0.4, label = '95% CI')
# 添加标题和坐标说明
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
# 显示图例,配合label参数,loc=“best”自适应方式
ax.legend(loc = 'best')
# Call the function to create plot
lineplotCI(x_data = daily_data['temp']
, y_data = fitted_values
, sorted_x = CI_df['x_data']
, low_CI = CI_df['low_CI']
, upper_CI = CI_df['upper_CI']
, x_label = 'Normalized temperature (C)'
, y_label = 'Check outs'
, title = 'Line of Best Fit for Number of Check Outs vs Temperature')
# 双坐标曲线图:(1)曲线拟合不满足置信阈值时,考虑增加独立变量;(2)分析不同尺度多变量的关系
# 双纵坐标绘图函数
def lineplot2y(x_data, x_label, y1_data, y1_color, y1_label, y2_data, y2_color, y2_label, title):
_, ax1 = plt.subplots()
ax1.plot(x_data, y1_data, color = y1_color)
# 添加标题和坐标说明
ax1.set_ylabel(y1_label, color = y1_color)
ax1.set_xlabel(x_label)
ax1.set_title(title)
ax2 = ax1.twinx() # 两个绘图对象共享横坐标轴
ax2.plot(x_data, y2_data, color = y2_color)
ax2.set_ylabel(y2_label, color = y2_color)
# 右侧坐标轴可见
ax2.spines['right'].set_visible(True)
# 调用绘图函数
lineplot2y(x_data = daily_data['dteday']
, x_label = 'Day'
, y1_data = daily_data['cnt']
, y1_color = '#539caf'
, y1_label = 'Check outs'
, y2_data = daily_data['windspeed']
, y2_color = '#7663b0'
, y2_label = 'Normalized windspeed'
, title = 'Check Outs and Windspeed Over Time')
# 步骤4:分布分析
# 灰度图:粗略区间计数
# 绘制灰度图的函数
def histogram(data, x_label, y_label, title):
_, ax = plt.subplots()
res = ax.hist(data, color = '#539caf', bins=10) # 设置bin的数量
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
return res
# 绘图函数调用
res = histogram(data = daily_data['registered']
, x_label = 'Check outs'
, y_label = 'Frequency'
, title = 'Distribution of Registered Check Outs')
res[0] # value of bins
res[1] # boundary of bins
# 堆叠直方图:比较两个分布
# 绘制堆叠的直方图
def overlaid_histogram(data1, data1_name, data1_color, data2, data2_name, data2_color, x_label, y_label, title):
# 归一化数据区间,对齐两个直方图的bins
max_nbins = 10
data_range = [min(min(data1), min(data2)), max(max(data1), max(data2))]
binwidth = (data_range[1] - data_range[0]) / max_nbins
bins = np.arange(data_range[0], data_range[1] + binwidth, binwidth) # 生成直方图bins区间
# Create the plot
_, ax = plt.subplots()
ax.hist(data1, bins = bins, color = data1_color, alpha = 1, label = data1_name)
ax.hist(data2, bins = bins, color = data2_color, alpha = 0.75, label = data2_name) # 绘制两个直方图
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
ax.legend(loc = 'best')
# Call the function to create plot
overlaid_histogram(data1 = daily_data['registered']
, data1_name = 'Registered'
, data1_color = '#539caf'
, data2 = daily_data['casual']
, data2_name = 'Casual'
, data2_color = '#7663b0'
, x_label = 'Check outs'
, y_label = 'Frequency'
, title = 'Distribution of Check Outs By Type')
# 密度图:精细刻画概率分布
# 计算概率密度
from scipy.stats import gaussian_kde
data = daily_data['registered']
density_est = gaussian_kde(data) # kernal density estimate: https://en.wikipedia.org/wiki/Kernel_density_estimation
# 控制平滑程度,数值越大,越平滑
density_est.covariance_factor = lambda : .3
density_est._compute_covariance()
x_data = np.arange(min(data), max(data), 200)
# 绘制密度估计曲线
def densityplot(x_data, density_est, x_label, y_label, title):
_, ax = plt.subplots()
ax.plot(x_data, density_est(x_data), color = '#539caf', lw = 2)
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
# 调用绘图函数
densityplot(x_data = x_data
, density_est = density_est
, x_label = 'Check outs'
, y_label = 'Frequency'
, title = 'Distribution of Registered Check Outs')
# 步骤5:组间分析(组间定量比较、分组粒度、组间聚类)
# 柱状图:一级类间均值方差比较
# 分天分析统计特征
mean_total_co_day = daily_data[['weekday', 'cnt']].groupby('weekday').agg([np.mean, np.std]) # mean:均值;std:标准差
mean_total_co_day.columns = mean_total_co_day.columns.droplevel()
# 定义绘制柱状图的函数
def barplot(x_data, y_data, error_data, x_label, y_label, title):
_, ax = plt.subplots()
# 柱状图
ax.bar(x_data, y_data, color = '#539caf', align = 'center')
# 绘制方差
# ls='none'去掉bar之间的连线
ax.errorbar(x_data, y_data, yerr = error_data, color = '#297083', ls = 'none', lw = 5) # 标准差:查看波动情况
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
# 绘图函数调用
barplot(x_data = mean_total_co_day.index.values
, y_data = mean_total_co_day['mean']
, error_data = mean_total_co_day['std']
, x_label = 'Day of week'
, y_label = 'Check outs'
, title = 'Total Check Outs By Day of Week (0 = Sunday)')
mean_total_co_day.columns
daily_data[['weekday', 'cnt']].groupby('weekday').agg([np.mean, np.std])
cnt
mean std
weekday
0 4228.828571 1872.496462
1 4338.123810 1793.073897
2 4510.663462 1826.911602
3 4548.538462 2038.095680
4 4667.259615 1939.433165
5 4690.288462 1874.624762
6 4550.542857 2196.692969
# 堆叠柱状图:多级类间相对占比比较
mean_by_reg_co_day = daily_data[['weekday', 'registered', 'casual']].groupby('weekday').mean()
mean_by_reg_co_day
registered casual
weekday
0 2890.533333 1338.295238
1 3663.990476 674.133333
2 3954.480769 556.182692
3 3997.394231 551.144231
4 4076.298077 590.961538
5 3938.000000 752.288462
6 3085.285714 1465.257143
# 分天统计注册和偶然使用的情况
mean_by_reg_co_day = daily_data[['weekday', 'registered', 'casual']].groupby('weekday').mean()
# 分天统计注册和偶然使用的占比
mean_by_reg_co_day['total'] = mean_by_reg_co_day['registered'] + mean_by_reg_co_day['casual']
mean_by_reg_co_day['reg_prop'] = mean_by_reg_co_day['registered'] / mean_by_reg_co_day['total']
mean_by_reg_co_day['casual_prop'] = mean_by_reg_co_day['casual'] / mean_by_reg_co_day['total']
# 绘制堆积柱状图
def stackedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label, title):
_, ax = plt.subplots()
# 循环绘制堆积柱状图
for i in range(0, len(y_data_list)):
if i == 0:
ax.bar(x_data, y_data_list[i], color = colors[i], align = 'center', label = y_data_names[i])
else:
# 采用堆积的方式,除了第一个分类,后面的分类都从前一个分类的柱状图接着画
# 用归一化保证最终累积结果为1
ax.bar(x_data, y_data_list[i], color = colors[i], bottom = y_data_list[i - 1], align = 'center', label = y_data_names[i])
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
ax.legend(loc = 'upper right') # 设定图例位置
# 调用绘图函数
stackedbarplot(x_data = mean_by_reg_co_day.index.values
, y_data_list = [mean_by_reg_co_day['reg_prop'], mean_by_reg_co_day['casual_prop']]
, y_data_names = ['Registered', 'Casual']
, colors = ['#539caf', '#7663b0']
, x_label = 'Day of week'
, y_label = 'Proportion of check outs'
, title = 'Check Outs By Registration Status and Day of Week (0 = Sunday)')
# 分组柱状图:多级类间绝对数值比较
# 绘制分组柱状图的函数
def groupedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label, title):
_, ax = plt.subplots()
# 设置每一组柱状图的宽度
total_width = 0.8
# 设置每一个柱状图的宽度
ind_width = total_width / len(y_data_list)
# 计算每一个柱状图的中心偏移
alteration = np.arange(-total_width/2+ind_width/2, total_width/2+ind_width/2, ind_width)
# 分别绘制每一个柱状图
for i in range(0, len(y_data_list)):
# 横向散开绘制
ax.bar(x_data + alteration[i], y_data_list[i], color = colors[i], label = y_data_names[i], width = ind_width) # 要加偏移量
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
ax.legend(loc = 'upper right')
# 调用绘图函数
groupedbarplot(x_data = mean_by_reg_co_day.index.values
, y_data_list = [mean_by_reg_co_day['registered'], mean_by_reg_co_day['casual']]
, y_data_names = ['Registered', 'Casual']
, colors = ['#539caf', '#7663b0']
, x_label = 'Day of week'
, y_label = 'Check outs'
, title = 'Check Outs By Registration Status and Day of Week (0 = Sunday)')
# 箱式图:多级类间数据分布比较(柱状图 + 堆叠灰度图)
# 只需要指定分类的依据,就能自动绘制箱式图
days = np.unique(daily_data['weekday'])
bp_data = []
for day in days:
bp_data.append(daily_data[daily_data['weekday'] == day]['cnt'].values)
# 定义绘图函数
def boxplot(x_data, y_data, base_color, median_color, x_label, y_label, title):
_, ax = plt.subplots()
# 设置样式
ax.boxplot(y_data
# 箱子是否颜色填充
, patch_artist = True
# 中位数线颜色
, medianprops = {'color': base_color}
# 箱子颜色设置,color:边框颜色,facecolor:填充颜色
, boxprops = {'color': base_color, 'facecolor': median_color}
# 猫须颜色whisker
, whiskerprops = {'color': median_color}
# 猫须界限颜色whisker cap
, capprops = {'color': base_color})
# 箱图与x_data保持一致
ax.set_xticklabels(x_data)
ax.set_ylabel(y_label)
ax.set_xlabel(x_label)
ax.set_title(title)
# 调用绘图函数
boxplot(x_data = days
, y_data = bp_data
, base_color = 'b'
, median_color = 'r'
, x_label = 'Day of week'
, y_label = 'Check outs'
, title = 'Total Check Outs By Day of Week (0 = Sunday)')
散点图:
曲线图:
带置信区间的曲线图(浅蓝色区域为置信区间):
双坐标曲线图:
灰度图:
堆叠直方图:
密度图:
柱状图:
堆叠柱状图:
分组柱状图:
箱式图:
简单总结
- 关联分析、数值比较:散点图、曲线图、
- 分布分析:灰度图、密度图
- 涉及分类的分析:柱状图、箱式图
来源:CSDN
作者:Glen_Zou
链接:https://blog.csdn.net/qq_36551226/article/details/104461995