第 13 章

数据可视化——matplotlib、seaborn 与 Plotly 实战

第13章:数据可视化自动化——matplotlib、plotly 与报告生成

每周手动截图贴进 PPT、每月把数据复制到表格再画图——这些时间完全可以交给 Python。本章从库选型开始,深入 matplotlib 的 Figure/Axes 架构、seaborn 统计图表、plotly 交互可视化,最后教你把所有图表自动合成为可邮件发送的 HTML 周报。学完本章,你能在 3 分钟内自动生成一份包含 5 张专业图表的运营周报。

可视化库选型

定位 输出格式 最适合场景
matplotlib 基础,高度可定制 PNG/PDF/SVG 批量静态图表、出版级质量、精细控制
seaborn 基于 matplotlib 的统计可视化 PNG/PDF 热力图、箱线图、分布图
plotly 交互式可视化 HTML / PNG(需 kaleido) 交互仪表板、嵌入 HTML 报告
altair 声明式,基于 Vega-Lite HTML/SVG 快速原型、数据探索

**选型原则:**批量生成静态图表(邮件/PDF) terminal

pip install matplotlib seaborn plotly kaleido pandas openpyxl jinja2

matplotlib 专业图表

关键概念:Figure(整张画布)和 Axes(单个坐标系/图表)。永远用面向对象接口 fig, ax = plt.subplots(),而非 plt.plot() 全局状态接口——在循环中批量生成图表时后者会导致状态混乱。

sales_chart.py — 月度销售多系列折线图(出版级质量)

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
from pathlib import Path

plt.style.use("seaborn-v0_8-whitegrid")

def plot_monthly_sales(df: pd.DataFrame, output_path: str = "sales_chart.png") -> str:
    """
    绘制多产品线月度销售折线图。
    df 需含列:month(str)、product_line(str)、revenue(float)
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    colors = ["#2563eb", "#16a34a", "#dc2626", "#9333ea", "#ea580c"]

    for i, pl in enumerate(df["product_line"].unique()):
        sub = df[df["product_line"] == pl].sort_values("month")
        ax.plot(sub["month"], sub["revenue"],
                marker="o", linewidth=2.2, markersize=6,
                color=colors[i % len(colors)], label=pl)
        last = sub.iloc[-1]
        ax.annotate(f"¥{last['revenue']/10000:.0f}万",
                    xy=(last["month"], last["revenue"]),
                    xytext=(8, 0), textcoords="offset points",
                    fontsize=9, color=colors[i % len(colors)], va="center")

    ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"¥{x/10000:.0f}万"))
    ax.set_xlabel("月份", fontsize=12); ax.set_ylabel("销售额", fontsize=12)
    ax.set_title("2024年各产品线月度销售额对比", fontsize=15, fontweight="bold", pad=16)
    ax.legend(loc="upper left", framealpha=0.9, fontsize=10)
    ax.tick_params(axis="x", rotation=45)

    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(output_path, dpi=300, bbox_inches="tight")
    plt.close(fig)  # 关键:自动化脚本必须关闭 figure,否则内存泄漏
    return output_path

**中文字体问题:**matplotlib 默认不支持中文。在 macOS 上添加 plt.rcParams["font.family"] = "PingFang SC",在 Windows 上用 "SimHei",在 Linux 上用 "Noto Sans CJK SC"pip install matplotlib-fontja。plotly 无此问题。

seaborn 统计可视化

seaborn_charts.py — 热力图与箱线图

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

def plot_correlation_heatmap(df: pd.DataFrame, output_path: str = "corr.png") -> str:
    """相关系数矩阵热力图——发现变量间关联"""
    corr = df.select_dtypes(include="number").corr()
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(corr, annot=True, fmt=".2f", cmap="RdYlGn",
                center=0, vmin=-1, vmax=1, linewidths=0.5, ax=ax)
    ax.set_title("变量相关系数矩阵", fontsize=14, fontweight="bold")
    fig.tight_layout(); fig.savefig(output_path, dpi=200, bbox_inches="tight"); plt.close(fig)
    return output_path

def plot_performance_boxplot(df: pd.DataFrame, output_path: str = "boxplot.png") -> str:
    """
    员工绩效分布箱线图(含异常值检测)。
    df 需含列:department(str)、score(float)
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    order = df.groupby("department")["score"].median().sort_values(ascending=False).index
    sns.boxplot(data=df, x="department", y="score", order=order, palette="husl",
                width=0.5, flierprops={"marker":"o","markersize":4,"alpha":0.5}, ax=ax)
    sns.stripplot(data=df, x="department", y="score", order=order,
                  color="black", alpha=0.3, size=3, jitter=True, ax=ax)
    ax.set_title("各部门员工绩效分布(含异常值检测)", fontsize=14, fontweight="bold")
    ax.tick_params(axis="x", rotation=30)
    fig.tight_layout(); fig.savefig(output_path, dpi=200, bbox_inches="tight"); plt.close(fig)
    return output_path

plotly 交互图表

plotly_charts.py

import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

def plotly_line(df, x, y, color=None, title="") -> str:
    """返回可嵌入 HTML 的 div 字符串(首次调用加载 CDN)"""
    fig = px.line(df, x=x, y=y, color=color, title=title, template="plotly_dark", markers=True)
    fig.update_traces(line_width=2.5)
    return fig.to_html(full_html=False, include_plotlyjs="cdn")

def plotly_bar(df, x, y, color=None, title="") -> str:
    """交互式条形图,后续图表 include_plotlyjs=False(CDN 只需加载一次)"""
    fig = px.bar(df, x=x, y=y, color=color, title=title, template="plotly_dark",
                 color_discrete_sequence=px.colors.qualitative.Set2, barmode="group")
    fig.update_layout(margin={"t":50,"b":30}, legend_title_text="")
    return fig.to_html(full_html=False, include_plotlyjs=False)

def plotly_funnel(stages: list, values: list, title: str = "") -> str:
    fig = go.Figure(go.Funnel(y=stages, x=values, textinfo="value+percent initial",
        marker_color=["#3b82f6","#6366f1","#8b5cf6","#a855f7","#c026d3"]))
    fig.update_layout(template="plotly_dark", title_text=title)
    return fig.to_html(full_html=False, include_plotlyjs=False)

def save_as_image(fig, path: str):
    """导出静态图片(需要 kaleido)"""
    fig.write_image(path, width=1200, height=600, scale=2)

自动化报告生成方案对比

方案 技术栈 优点 缺点 适合场景
PDF 报告 matplotlib + reportlab 格式固定、可印刷 无交互性、中文字体需手动配置 正式报告、存档
HTML 报告 Jinja2 + plotly 交互图表、可直接邮件发送、无需安装软件 依赖浏览器 管理层周报、数据仪表盘

实战项目:运营数据周报自动化

完整流程:读取多个 Excel 数据源 weekly_report.py(完整实现)

"""运营数据周报自动化生成系统"""
import os, smtplib, logging
from datetime import datetime, timedelta
from pathlib import Path
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from jinja2 import Template
from dotenv import load_dotenv

load_dotenv()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
DATA_DIR = Path("data"); OUTPUT_DIR = Path("reports"); OUTPUT_DIR.mkdir(exist_ok=True)

# ── Step 1: 读取数据 ─────────────────────────────────────────────────

def load_weekly_data() -> dict[str, pd.DataFrame]:
    return {
        "sales":    pd.read_excel(DATA_DIR / "sales.xlsx",    parse_dates=["date"]),
        "traffic":  pd.read_excel(DATA_DIR / "traffic.xlsx",  parse_dates=["date"]),
        "orders":   pd.read_excel(DATA_DIR / "orders.xlsx",   parse_dates=["order_date"]),
        "products": pd.read_excel(DATA_DIR / "products.xlsx"),
        "regions":  pd.read_excel(DATA_DIR / "regions.xlsx"),
    }

# ── Step 2: 计算 KPI ─────────────────────────────────────────────────

def calc_kpis(data: dict) -> list[dict]:
    now = datetime.now()
    tw = now - timedelta(days=now.weekday())   # 本周一
    lw = tw - timedelta(days=7)                # 上周一
    sales = data["sales"]
    this_rev = sales[sales["date"] >= tw]["revenue"].sum()
    last_rev = sales[(sales["date"] >= lw) & (sales["date"] < tw)]["revenue"].sum()
    delta_rev = round((this_rev - last_rev) / last_rev * 100, 1) if last_rev else 0
    orders = data["orders"]
    this_ord = orders[orders["order_date"] >= tw].shape[0]
    last_ord = orders[(orders["order_date"] >= lw) & (orders["order_date"] < tw)].shape[0]
    delta_ord = round((this_ord - last_ord) / last_ord * 100, 1) if last_ord else 0
    return [
        {"label":"本周销售额", "value":f"¥{this_rev/10000:.1f}万", "delta":delta_rev},
        {"label":"订单量",     "value":str(this_ord),              "delta":delta_ord},
        {"label":"客单价",     "value":f"¥{this_rev/this_ord:.0f}" if this_ord else "—", "delta":0},
    ]

# ── Step 3: 生成5张图表 ──────────────────────────────────────────────

def generate_charts(data: dict) -> list[dict]:
    charts = []

    # 图1:近30天每日销售额折线图
    daily = data["sales"].groupby("date")["revenue"].sum().reset_index().tail(30)
    f1 = px.line(daily, x="date", y="revenue", template="plotly_dark", markers=True, title="近30天每日销售额")
    f1.update_traces(line_color="#60a5fa", line_width=2.5)
    charts.append({"title":"销售趋势", "html":f1.to_html(full_html=False, include_plotlyjs="cdn")})

    # 图2:各大区销售额柱状图
    reg = data["regions"].groupby("region")["revenue"].sum().reset_index().sort_values("revenue", ascending=False)
    f2 = px.bar(reg, x="region", y="revenue", template="plotly_dark", color="revenue", color_continuous_scale="Blues")
    f2.update_layout(showlegend=False, coloraxis_showscale=False)
    charts.append({"title":"各大区销售额", "html":f2.to_html(full_html=False, include_plotlyjs=False)})

    # 图3:商品类目销售占比甜甜圈图
    cat = data["products"].groupby("category")["revenue"].sum().reset_index()
    f3 = go.Figure(go.Pie(labels=cat["category"], values=cat["revenue"], hole=0.4, textinfo="label+percent"))
    f3.update_layout(template="plotly_dark")
    charts.append({"title":"商品类目占比", "html":f3.to_html(full_html=False, include_plotlyjs=False)})

    # 图4:流量 vs 订单量双轴折线图
    tw = data["traffic"].groupby(pd.Grouper(key="date", freq="W"))["pv"].sum().reset_index()
    ow = data["orders"].groupby(pd.Grouper(key="order_date", freq="W")).size().reset_index(name="count")
    f4 = go.Figure()
    f4.add_trace(go.Scatter(x=tw["date"], y=tw["pv"], name="页面访问量", line={"color":"#60a5fa","width":2.5}))
    f4.add_trace(go.Scatter(x=ow["order_date"], y=ow["count"], name="订单量", yaxis="y2",
                            line={"color":"#4ade80","width":2.5,"dash":"dot"}))
    f4.update_layout(template="plotly_dark", yaxis2={"overlaying":"y","side":"right"},
                     legend={"x":0,"y":1.1,"orientation":"h"})
    charts.append({"title":"流量与订单量趋势(双轴)", "html":f4.to_html(full_html=False, include_plotlyjs=False)})

    # 图5:用户转化漏斗
    f5 = go.Figure(go.Funnel(
        y=["访问","加购","下单","支付","完成"], x=[100000,42000,15000,12800,11500],
        textinfo="value+percent initial",
        marker_color=["#3b82f6","#6366f1","#8b5cf6","#a855f7","#c026d3"]))
    f5.update_layout(template="plotly_dark")
    charts.append({"title":"用户转化漏斗", "html":f5.to_html(full_html=False, include_plotlyjs=False)})

    return charts

# ── Step 4: 渲染 HTML 报告 ────────────────────────────────────────────

REPORT_TPL = """<!DOCTYPE html><html><head><meta charset="utf-8"><title>{{"{{"}}title{{"}}"}}</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>body{font-family:system-ui,sans-serif;background:#0f172a;color:#e2e8f0;margin:0;padding:32px}
h1{font-size:1.8rem;color:#f1f5f9;border-bottom:2px solid #334155;padding-bottom:12px}
.meta{font-size:.85rem;color:#94a3b8;margin-bottom:24px}
.kpi-row{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:16px;margin-bottom:28px}
.kpi{background:#1e293b;border:1px solid #334155;border-radius:12px;padding:20px;text-align:center}
.kv{font-size:2rem;font-weight:800;color:#60a5fa}.kl{font-size:.8rem;color:#94a3b8;margin-top:6px}
.kd{font-size:.85rem;margin-top:4px}.up{color:#4ade80}.down{color:#f87171}
.card{background:#1e293b;border:1px solid #334155;border-radius:12px;padding:24px;margin-bottom:20px}
.card-t{font-size:1rem;font-weight:700;color:#f1f5f9;margin-bottom:14px}</style></head><body>
<h1>{{"{{"}}title{{"}}"}}</h1>
<div class="meta">生成时间:{{"{{"}}generated_at{{"}}"}} &nbsp;|&nbsp; 数据周期:{{"{{"}}date_range{{"}}"}}</div>
<div class="kpi-row">{% for k in kpis %}
<div class="kpi"><div class="kv">{{"{{"}}k.value{{"}}"}}</div><div class="kl">{{"{{"}}k.label{{"}}"}}</div>
<div class="kd {{"{{"}}'up' if k.delta > 0 else ('down' if k.delta < 0 else ''){{"}}"}}">{{"{{"}}'+' if k.delta > 0 else ''{{"}}"}}{{"{{"}}k.delta{{"}}"}}% 环比</div>
</div>{% endfor %}</div>
{% for c in charts %}<div class="card"><div class="card-t">{{"{{"}}c.title{{"}}"}}</div>{{"{{"}}c.html{{"}}"}}</div>{% endfor %}
</body></html>"""

def render_report(kpis, charts, date_range) -> str:
    return Template(REPORT_TPL).render(
        title="运营数据周报",
        generated_at=datetime.now().strftime("%Y-%m-%d %H:%M"),
        date_range=date_range, kpis=kpis, charts=charts
    )

# ── Step 5: 保存 + 发送邮件 ───────────────────────────────────────────

def send_email(html: str, recipients: list[str], subject: str):
    msg = MIMEMultipart("alternative")
    msg["From"] = os.getenv("EMAIL_USER"); msg["To"] = ", ".join(recipients); msg["Subject"] = subject
    msg.attach(MIMEText(html, "html", "utf-8"))
    with smtplib.SMTP_SSL(os.getenv("SMTP_HOST","smtp.gmail.com"), 465) as smtp:
        smtp.login(os.getenv("EMAIL_USER"), os.getenv("EMAIL_PASS")); smtp.send_message(msg)
    logging.info(f"报告已发送至 {', '.join(recipients)}")

def main():
    now = datetime.now()
    ws = (now - timedelta(days=now.weekday())).strftime("%m/%d")
    date_range = f"{ws} – {now.strftime('%m/%d')}"

    data   = load_weekly_data()
    kpis   = calc_kpis(data)
    charts = generate_charts(data)
    html   = render_report(kpis, charts, date_range)

    filename = OUTPUT_DIR / f"weekly_report_{now.strftime('%Y%m%d')}.html"
    filename.write_text(html, encoding="utf-8")
    logging.info(f"报告已保存:{filename}")

    recipients = [r.strip() for r in os.getenv("REPORT_RECIPIENTS","").split(",") if r.strip()]
    if recipients:
        send_email(html, recipients, f"运营周报 W{now.isocalendar().week} ({date_range})")

if __name__ == "__main__": main()

**自动化运行:**配合第15章的定时任务,把 main() 设置为每周一早 8:00 自动运行,报告自动生成并发送,无需人工干预。

上一章 下一章第14章:GUI 自动化

本章评分
4.7  / 5  (20 评分)

💬 留言讨论