matplotlib 画统计图大全

柱状图

适用场景：适用场合是二维数据集（每个数据点包括两个值x和y），但只有一个维度需要比较，用于显示一段时间内的数据变化或显示各项之间的比较情况。适用于枚举的数据，比如地域之间的关系，数据没有必然的连续性。
优势：柱状图利用柱子的高度，反映数据的差异，肉眼对高度差异很敏感。

劣势：柱状图的局限在于只适用中小规模的数据集。

from matplotlib.ticker import FuncFormatter
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(4)
money = [1.5e5, 2.5e6, 5.5e6, 2.0e7]


def millions(x, pos):
    'The two args are the value and tick position'
    return '$%1.1fM' % (x * 1e-6)


formatter = FuncFormatter(millions)

fig, ax = plt.subplots()
ax.yaxis.set_major_formatter(formatter)
plt.bar(x, money)
plt.xticks(x, ('Bill', 'Fred', 'Mary', 'Sue'))
plt.show()

1.png

堆积柱状图

不仅可以直观的看出每个系列的值，还能够反映出系列的总和，尤其是当需要看某一单位的综合以及各系列值的比重时，最适合。

import numpy as np
import matplotlib.pyplot as plt


N = 5
menMeans = (20, 35, 30, 35, 27)
womenMeans = (25, 32, 34, 20, 25)
menStd = (2, 3, 4, 1, 2)
womenStd = (3, 5, 2, 3, 3)
ind = np.arange(N)    # the x locations for the groups
width = 0.35       # the width of the bars: can also be len(x) sequence

p1 = plt.bar(ind, menMeans, width, yerr=menStd)
p2 = plt.bar(ind, womenMeans, width,
             bottom=menMeans, yerr=womenStd)

plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind, ('G1', 'G2', 'G3', 'G4', 'G5'))
plt.yticks(np.arange(0, 81, 10))
plt.legend((p1[0], p2[0]), ('Men', 'Women'))

plt.show()

2.png

条形图（横向柱状图）

适用场景：显示各个项目之间的比较情况，和柱状图类似的作用。
优势：每个条都清晰表示数据，直观。

"""
Simple demo of a horizontal bar chart.
"""
import matplotlib.pyplot as plt
plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt

# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))

plt.barh(y_pos, performance, xerr=error, align='center', alpha=0.4)
plt.yticks(y_pos, people)
plt.xlabel('Performance')
plt.title('How fast do you want to go today?')
plt.show()

3.png

折线图

适用场景: 折线图适合二维的大数据集，还适合多个二维数据集的比较。一般用来表示趋势的变化，横轴一般为日期字段。
优势：容易反应出数据变化的趋势。

import sys

import matplotlib.pyplot as plt
import tushare as ts


# 获取上证50指数的历史数据
data = ts.get_hist_data('sz50',start='2018-11-01',end='2019-03-25')

data = data.sort_index()

# 一个基本的折线图
x = range(len(data))
# 收盘价的折线图
plt.plot(x,data['close'])
plt.show()

4.png

数据地图

适用场景：适用于有空间位置的数据集，一般分成行政地图（气泡图、面积图）和GIS地图。行政地图一般有省份、城市数据就够了（比如福建-泉州）；而GIS地图则需要经纬度数据，更细化到具体区域，只要有数据，可做区域、全国甚至全球的地图。
优劣势：特殊状况下使用，涉及行政区域。

import requests
from csv import DictReader
DATA_URL = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.csv'
print("Downloading", DATA_URL)
resp = requests.get(DATA_URL)
quakes = list(DictReader(resp.text.splitlines()))
# ...avoiding numpy/pandas Array() for now, and can't care enough to do this less awkwardly...
lngs = [float(q['longitude']) for q in quakes]
lats = [float(q['latitude']) for q in quakes]
mags = [2 ** float(q['mag']) for q in quakes]

# %matplotlib  # in iPython
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
plt.figure(figsize=(14, 8))
earth = Basemap()
earth.bluemarble(alpha=0.42)
earth.drawcoastlines(color='#555566', linewidth=1)
plt.scatter(lngs, lats, mags, 
            c='red',alpha=0.5, zorder=10)
plt.xlabel("M4.5 earthquakes in the past 30 days from March 18, 2016 (USGS)")
plt.savefig('usgs-4.5quakes-bluemarble.png', dpi=350)

plt.savefig('usgs-4.5quakes.svg')

fig, ax = plt.subplots()
earth = Basemap(ax=ax)
earth.drawcoastlines(color='#555566', linewidth=1)
ax.scatter(lngs, lats, mags, c='red',alpha=0.5, zorder=10)
ax.set_xlabel("Hello there")
fig.savefig('hello-ax.png')

import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap


DATA_URL = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.csv'
print("Downloading", DATA_URL)
df = pd.read_csv(DATA_URL)

fig, ax = plt.subplots()
earth = Basemap(ax=ax)
earth.drawcoastlines(color='#556655', linewidth=0.5)
ax.scatter(df['longitude'], df['latitude'], df['mag'] ** 2, 
           c='red', alpha=0.5, zorder=10)
ax.set_xlabel("This month's 4.5M+ earthquakes")
fig.savefig('usgs-monthly-4.5M.png')

real5.png

饼图（环图）

适用场景：显示各项的大小与各项总和的比例。适用简单的占比比例图，在不要求数据精细的情况适用。
优势：明确显示数据的比例情况，尤其合适渠道来源等场景。
劣势：不会具体的数值，只是整体的占比情况。

import matplotlib.pyplot as plt

# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
sizes = [15, 30, 45, 10]
explode = (0, 0.1, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.savefig('Demo_official.jpg')
plt.show()

5.png

雷达图

适用场景：雷达图适用于多维数据（四维以上），一般是用来表示某个数据字段的综合情况，数据点一般6个左右，太多的话辨别起来有困难。
优势：主要用来了解公司各项数据指标的变动情形及其好坏趋向。

劣势：理解成本较高。

# 导入第三方模块
import numpy as np
import matplotlib.pyplot as plt

# 中文和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

# 使用ggplot的绘图风格
plt.style.use('ggplot')

# 构造数据
values = [3.2,2.1,3.5,2.8,3]
feature = ['personal ability','quality control','solve','serve','team']

N = len(values)
# 设置雷达图的角度，用于平分切开一个圆面
angles=np.linspace(0, 2*np.pi, N, endpoint=False)

# 为了使雷达图一圈封闭起来，需要下面的步骤
values=np.concatenate((values,[values[0]]))
angles=np.concatenate((angles,[angles[0]]))

# 绘图
fig=plt.figure()
# 这里一定要设置为极坐标格式
ax = fig.add_subplot(111, polar=True)
# 绘制折线图
ax.plot(angles, values, 'o-', linewidth=2)
# 填充颜色
ax.fill(angles, values, alpha=0.25)
# 添加每个特征的标签
ax.set_thetagrids(angles * 180/np.pi, feature)
# 设置雷达图的范围
ax.set_ylim(0,5)
# 添加标题
plt.title('behave')
# 添加网格线
ax.grid(True)
# 显示图形
plt.show()

6.png

漏斗图

适用场景：漏斗图适用于业务流程多的流程分析，显示各流程的转化率。
优势:在网站分析中，通常用于转化率比较，它不仅能展示用户从进入网站到实现购买的最终转化率，还可以展示每个步骤的转化率，能够直观地发现和说明问题所在。
劣势:单一漏斗图无法评价网站某个关键流程中各步骤转化率的好坏。

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from matplotlib.offsetbox import (TextArea, DrawingArea, OffsetImage,
                                  AnnotationBbox)
from matplotlib.cbook import get_sample_data

#中文及负号处理
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

N = 5
width = 0.55
x1 = np.array([1000, 500, 300, 200,150])
x2= np.array((x1.max()-x1)/2) # 占位
#x1+x2
x3=[]
for i,j in zip(x1,x2):
    x3.append(i+j)
x3 = np.array(x3)


y = -np.sort(-np.arange(N)) # 倒转y轴
labels=['浏览商品','放入购物车','生成订单','支付订单','完成交易']

#figure
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)

#plot
ax.barh(y,x3,width,tick_label=labels,color='r',alpha=0.85)
ax.plot(x3,y,'red',alpha=0.7)
ax.barh(y,x2,width,color='w',alpha =1) #辅助图
ax.plot(x2,y,'red',alpha=0.7)

#setting
transform = []       
for i in range(0,len(x1)):
    if i < len(x1)-1:
        transform.append('%.2f%%'%((x1[i+1]/x1[i])*100))
l = [(500,3),(500,2),(500, 1),(500, 0)]
for a,b in zip(transform,l):
    offsetbox = TextArea(a, minimumdescent=False)
    ab = AnnotationBbox(offsetbox, b,
                        xybox=(0, 40),
                        boxcoords="offset points",
                        arrowprops=dict(arrowstyle="->"))
    ax.add_artist(ab)
ax.set_xticks([0,1000])
ax.set_yticks(y)

plt.show()

7.png

散点图

适用场景：显示若干数据系列中各数值之间的关系，类似XY轴，判断两变量之间是否存在某种关联。散点图适用于三维数据集，但其中只有两维数据是需要比较的。另外，散点图还可以看出极值的分布情况。
优势：对于处理值的分布和数据点的分簇区域（通过设置横纵项的辅助线），散点图都很理想。如果数据集中包含非常多的点，那么散点图便是最佳图表类型。

劣势：在点状图中显示多个序列看上去非常混乱。

import matplotlib.pyplot as plt
import numpy as np

# Fixing random state for reproducibility
np.random.seed(19680801)


x = np.arange(0.0, 50.0, 2.0)
y = x ** 1.3 + np.random.rand(*x.shape) * 30.0
s = np.random.rand(*x.shape) * 800 + 500

plt.scatter(x, y, s, c="g", alpha=0.5, marker=r'$\clubsuit$',
            label="Luck")
plt.xlabel("Leprechauns")
plt.ylabel("Gold")
plt.legend(loc='upper left')
plt.show()

8.png

matplotlib 画统计图大全

matplotlib 画统计图大全

推荐阅读更多精彩内容