Chapter 13

Data Visualization — matplotlib, seaborn, and Plotly

Chapter 13: Data Visualization Automation — matplotlib, plotly & Report Generation

Manually screenshotting charts into PowerPoint or copying data into spreadsheets every week is exactly the kind of repetitive work Python was made to replace. This chapter covers library selection, matplotlib's Figure/Axes architecture for publication-quality static charts, seaborn for statistical plots, plotly for interactive HTML output, and Jinja2 for assembling it all into an emailable weekly report with five charts.

Library Selection

Library Role Output Best For
matplotlib Foundation, fully customizable PNG/PDF/SVG Batch static charts, publication quality, precise control
seaborn Statistical plots on matplotlib PNG/PDF Heatmaps, box plots, distribution charts
plotly Interactive visualization HTML / PNG (kaleido) Interactive dashboards, HTML email reports
altair Declarative, Vega-Lite based HTML/SVG Quick prototyping, Jupyter exploration

Decision rule: Batch static charts for email/PDF terminal

pip install matplotlib seaborn plotly kaleido pandas openpyxl jinja2

matplotlib — Publication-Quality Charts

Figure is the entire canvas. Axes is a single coordinate system on that canvas. Always use the OO interface (fig, ax = plt.subplots()), never the global plt.plot() — the global state is unpredictable in automation loops.

sales_chart.py — Multi-series monthly line chart

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
from pathlib import Path

plt.style.use("seaborn-v0_8-whitegrid")

def plot_monthly_sales(df: pd.DataFrame, output_path: str = "sales_chart.png") -> str:
    """df must have: month (str), product_line (str), revenue (float)"""
    fig, ax = plt.subplots(figsize=(12, 6))
    colors = ["#2563eb", "#16a34a", "#dc2626", "#9333ea", "#ea580c"]

    for i, pl in enumerate(df["product_line"].unique()):
        sub = df[df["product_line"] == pl].sort_values("month")
        ax.plot(sub["month"], sub["revenue"], marker="o", linewidth=2.2, markersize=6,
                color=colors[i % len(colors)], label=pl)
        last = sub.iloc[-1]
        ax.annotate(f"${last['revenue']/1000:.0f}K", xy=(last["month"], last["revenue"]),
                    xytext=(8,0), textcoords="offset points",
                    fontsize=9, color=colors[i % len(colors)], va="center")

    ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"${x/1000:.0f}K"))
    ax.set_xlabel("Month", fontsize=12); ax.set_ylabel("Revenue", fontsize=12)
    ax.set_title("Monthly Revenue by Product Line — 2024", fontsize=15, fontweight="bold", pad=16)
    ax.legend(loc="upper left", framealpha=0.9, fontsize=10)
    ax.tick_params(axis="x", rotation=45)

    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(output_path, dpi=300, bbox_inches="tight")
    plt.close(fig)  # Critical: always close in loops to prevent memory leaks
    return output_path

seaborn — Statistical Visualization

seaborn_charts.py

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

def plot_correlation_heatmap(df: pd.DataFrame, output_path: str = "corr.png") -> str:
    """Correlation matrix heatmap — reveals relationships between numeric variables."""
    corr = df.select_dtypes(include="number").corr()
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(corr, annot=True, fmt=".2f", cmap="RdYlGn", center=0, vmin=-1, vmax=1, linewidths=0.5, ax=ax)
    ax.set_title("Variable Correlation Matrix", fontsize=14, fontweight="bold")
    fig.tight_layout(); fig.savefig(output_path, dpi=200, bbox_inches="tight"); plt.close(fig)
    return output_path

def plot_performance_boxplot(df: pd.DataFrame, output_path: str = "boxplot.png") -> str:
    """Box plots by department — identifies outliers and distribution shape.
    df must have: department (str), score (float)"""
    fig, ax = plt.subplots(figsize=(10, 6))
    order = df.groupby("department")["score"].median().sort_values(ascending=False).index
    sns.boxplot(data=df, x="department", y="score", order=order, palette="husl", width=0.5,
                flierprops={"marker":"o","markersize":4,"alpha":0.5}, ax=ax)
    sns.stripplot(data=df, x="department", y="score", order=order,
                  color="black", alpha=0.3, size=3, jitter=True, ax=ax)
    ax.set_title("Performance Score Distribution by Department", fontsize=14, fontweight="bold")
    ax.tick_params(axis="x", rotation=30)
    fig.tight_layout(); fig.savefig(output_path, dpi=200, bbox_inches="tight"); plt.close(fig)
    return output_path

plotly — Interactive Charts

plotly_charts.py

import plotly.express as px
import plotly.graph_objects as go

def plotly_line(df, x, y, color=None, title="") -> str:
    """Returns embeddable HTML div — include_plotlyjs='cdn' loads the library once."""
    fig = px.line(df, x=x, y=y, color=color, title=title, template="plotly_dark", markers=True)
    fig.update_traces(line_width=2.5)
    return fig.to_html(full_html=False, include_plotlyjs="cdn")

def plotly_bar(df, x, y, color=None, title="") -> str:
    """Subsequent charts use include_plotlyjs=False — CDN loaded once is enough."""
    fig = px.bar(df, x=x, y=y, color=color, title=title, template="plotly_dark",
                 color_discrete_sequence=px.colors.qualitative.Set2, barmode="group")
    return fig.to_html(full_html=False, include_plotlyjs=False)

def plotly_funnel(stages: list, values: list, title: str = "") -> str:
    fig = go.Figure(go.Funnel(y=stages, x=values, textinfo="value+percent initial",
        marker_color=["#3b82f6","#6366f1","#8b5cf6","#a855f7","#c026d3"]))
    fig.update_layout(template="plotly_dark", title_text=title)
    return fig.to_html(full_html=False, include_plotlyjs=False)

def save_as_image(fig, path: str):
    """Export to static image — requires kaleido."""
    fig.write_image(path, width=1200, height=600, scale=2)

Report Generation — HTML vs PDF

Approach Stack Pros Cons Best For
PDF matplotlib + reportlab Fixed layout, printable No interactivity, font setup needed Formal reports, archival
HTML Jinja2 + plotly Interactive charts, email-friendly, no install Browser required Management dashboards, weekly ops

Full Project: Automated Weekly Operations Report

Pipeline: read Excel data weekly_report.py

"""Automated Weekly Operations Report — Excel import os, smtplib, logging
from datetime import datetime, timedelta
from pathlib import Path
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from jinja2 import Template
from dotenv import load_dotenv

load_dotenv(); logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
DATA_DIR = Path("data"); OUTPUT_DIR = Path("reports"); OUTPUT_DIR.mkdir(exist_ok=True)

def load_data() -> dict[str, pd.DataFrame]:
    return {
        "sales":    pd.read_excel(DATA_DIR/"sales.xlsx",    parse_dates=["date"]),
        "traffic":  pd.read_excel(DATA_DIR/"traffic.xlsx",  parse_dates=["date"]),
        "orders":   pd.read_excel(DATA_DIR/"orders.xlsx",   parse_dates=["order_date"]),
        "products": pd.read_excel(DATA_DIR/"products.xlsx"),
        "regions":  pd.read_excel(DATA_DIR/"regions.xlsx"),
    }

def calc_kpis(data: dict) -> list[dict]:
    now = datetime.now(); tw = now - timedelta(days=now.weekday()); lw = tw - timedelta(days=7)
    s = data["sales"]
    this_rev = s[s["date"]>=tw]["revenue"].sum()
    last_rev = s[(s["date"]>=lw)&(s["date"]<tw)]["revenue"].sum()
    dr = round((this_rev-last_rev)/last_rev*100,1) if last_rev else 0
    o = data["orders"]
    this_ord = o[o["order_date"]>=tw].shape[0]; last_ord = o[(o["order_date"]>=lw)&(o["order_date"]<tw)].shape[0]
    do = round((this_ord-last_ord)/last_ord*100,1) if last_ord else 0
    return [
        {"label":"Weekly Revenue","value":f"${this_rev/1000:.1f}K","delta":dr},
        {"label":"Orders","value":str(this_ord),"delta":do},
        {"label":"Avg Order","value":f"${this_rev/this_ord:.0f}" if this_ord else "—","delta":0},
    ]

def generate_charts(data: dict) -> list[dict]:
    charts = []
    # Chart 1: Daily revenue trend
    daily = data["sales"].groupby("date")["revenue"].sum().reset_index().tail(30)
    f1 = px.line(daily, x="date", y="revenue", template="plotly_dark", markers=True, title="Daily Revenue (30d)")
    f1.update_traces(line_color="#60a5fa", line_width=2.5)
    charts.append({"title":"Revenue Trend","html":f1.to_html(full_html=False,include_plotlyjs="cdn")})
    # Chart 2: Revenue by region
    reg = data["regions"].groupby("region")["revenue"].sum().reset_index().sort_values("revenue",ascending=False)
    f2 = px.bar(reg,x="region",y="revenue",template="plotly_dark",color="revenue",color_continuous_scale="Blues")
    f2.update_layout(showlegend=False,coloraxis_showscale=False)
    charts.append({"title":"Revenue by Region","html":f2.to_html(full_html=False,include_plotlyjs=False)})
    # Chart 3: Category donut
    cat = data["products"].groupby("category")["revenue"].sum().reset_index()
    f3 = go.Figure(go.Pie(labels=cat["category"],values=cat["revenue"],hole=0.4,textinfo="label+percent"))
    f3.update_layout(template="plotly_dark")
    charts.append({"title":"Revenue by Category","html":f3.to_html(full_html=False,include_plotlyjs=False)})
    # Chart 4: Traffic vs orders dual-axis
    tw = data["traffic"].groupby(pd.Grouper(key="date",freq="W"))["pv"].sum().reset_index()
    ow = data["orders"].groupby(pd.Grouper(key="order_date",freq="W")).size().reset_index(name="count")
    f4 = go.Figure()
    f4.add_trace(go.Scatter(x=tw["date"],y=tw["pv"],name="Page Views",line={"color":"#60a5fa","width":2.5}))
    f4.add_trace(go.Scatter(x=ow["order_date"],y=ow["count"],name="Orders",yaxis="y2",line={"color":"#4ade80","width":2.5,"dash":"dot"}))
    f4.update_layout(template="plotly_dark",yaxis2={"overlaying":"y","side":"right"},legend={"x":0,"y":1.1,"orientation":"h"})
    charts.append({"title":"Traffic vs Orders (Dual Axis)","html":f4.to_html(full_html=False,include_plotlyjs=False)})
    # Chart 5: Conversion funnel
    f5 = go.Figure(go.Funnel(y=["Visit","Add to Cart","Order","Pay","Complete"],
        x=[100000,42000,15000,12800,11500],textinfo="value+percent initial",
        marker_color=["#3b82f6","#6366f1","#8b5cf6","#a855f7","#c026d3"]))
    f5.update_layout(template="plotly_dark")
    charts.append({"title":"Conversion Funnel","html":f5.to_html(full_html=False,include_plotlyjs=False)})
    return charts

REPORT_TPL = """<!DOCTYPE html><html><head><meta charset="utf-8"><title>{{"{{"}}title{{"}}"}}</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>body{font-family:system-ui,sans-serif;background:#0f172a;color:#e2e8f0;margin:0;padding:32px}
h1{font-size:1.8rem;color:#f1f5f9;border-bottom:2px solid #334155;padding-bottom:12px}
.meta{font-size:.85rem;color:#94a3b8;margin-bottom:24px}
.kpi-row{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:16px;margin-bottom:28px}
.kpi{background:#1e293b;border:1px solid #334155;border-radius:12px;padding:20px;text-align:center}
.kv{font-size:2rem;font-weight:800;color:#60a5fa}.kl{font-size:.8rem;color:#94a3b8;margin-top:6px}
.kd{font-size:.85rem;margin-top:4px}.up{color:#4ade80}.down{color:#f87171}
.card{background:#1e293b;border:1px solid #334155;border-radius:12px;padding:24px;margin-bottom:20px}
.card-t{font-size:1rem;font-weight:700;color:#f1f5f9;margin-bottom:14px}</style></head><body>
<h1>{{"{{"}}title{{"}}"}}</h1><div class="meta">Generated: {{"{{"}}generated_at{{"}}"}} | Period: {{"{{"}}date_range{{"}}"}}</div>
<div class="kpi-row">{% for k in kpis %}<div class="kpi"><div class="kv">{{"{{"}}k.value{{"}}"}}</div>
<div class="kl">{{"{{"}}k.label{{"}}"}}</div>
<div class="kd {{"{{"}}'up' if k.delta > 0 else ('down' if k.delta < 0 else ''){{"}}"}}">{{"{{"}}'+' if k.delta > 0 else ''{{"}}"}}{{"{{"}}k.delta{{"}}"}}% WoW</div>
</div>{% endfor %}</div>
{% for c in charts %}<div class="card"><div class="card-t">{{"{{"}}c.title{{"}}"}}</div>{{"{{"}}c.html{{"}}"}}</div>{% endfor %}
</body></html>"""

def render_html(kpis, charts, date_range) -> str:
    return Template(REPORT_TPL).render(title="Weekly Ops Report",
        generated_at=datetime.now().strftime("%Y-%m-%d %H:%M"), date_range=date_range,
        kpis=kpis, charts=charts)

def send_email(html: str, recipients: list[str], subject: str):
    msg = MIMEMultipart("alternative")
    msg["From"]=os.getenv("EMAIL_USER"); msg["To"]=", ".join(recipients); msg["Subject"]=subject
    msg.attach(MIMEText(html,"html","utf-8"))
    with smtplib.SMTP_SSL(os.getenv("SMTP_HOST","smtp.gmail.com"),465) as smtp:
        smtp.login(os.getenv("EMAIL_USER"),os.getenv("EMAIL_PASS")); smtp.send_message(msg)

def main():
    now = datetime.now(); ws=(now-timedelta(days=now.weekday())).strftime("%b %d")
    date_range = f"{ws} – {now.strftime('%b %d')}"
    data=load_data(); kpis=calc_kpis(data); charts=generate_charts(data); html=render_html(kpis,charts,date_range)
    f = OUTPUT_DIR/f"weekly_{now.strftime('%Y%m%d')}.html"
    f.write_text(html, encoding="utf-8"); logging.info(f"Saved: {f}")
    rcpts=[r.strip() for r in os.getenv("REPORT_RECIPIENTS","").split(",") if r.strip()]
    if rcpts: send_email(html, rcpts, f"Weekly Ops Report — W{now.isocalendar().week} ({date_range})")

if __name__ == "__main__": main()

Scheduling: Combine with Chapter 15's scheduler to run main() every Monday at 8 AM automatically. Zero manual work required once configured.

Previous NextChapter 14: GUI Automation

Rate this chapter
4.7  / 5  (20 ratings)

💬 Comments