Data Visualization — matplotlib, seaborn, and Plotly
Chapter 13: Data Visualization Automation — matplotlib, plotly & Report Generation
Manually screenshotting charts into PowerPoint or copying data into spreadsheets every week is exactly the kind of repetitive work Python was made to replace. This chapter covers library selection, matplotlib's Figure/Axes architecture for publication-quality static charts, seaborn for statistical plots, plotly for interactive HTML output, and Jinja2 for assembling it all into an emailable weekly report with five charts.
Library Selection
| Library | Role | Output | Best For |
|---|---|---|---|
| matplotlib | Foundation, fully customizable | PNG/PDF/SVG | Batch static charts, publication quality, precise control |
| seaborn | Statistical plots on matplotlib | PNG/PDF | Heatmaps, box plots, distribution charts |
| plotly | Interactive visualization | HTML / PNG (kaleido) | Interactive dashboards, HTML email reports |
| altair | Declarative, Vega-Lite based | HTML/SVG | Quick prototyping, Jupyter exploration |
Decision rule: Batch static charts for email/PDF terminal
pip install matplotlib seaborn plotly kaleido pandas openpyxl jinja2
matplotlib — Publication-Quality Charts
Figure is the entire canvas. Axes is a single coordinate system on that canvas. Always use the OO interface (fig, ax = plt.subplots()), never the global plt.plot() — the global state is unpredictable in automation loops.
sales_chart.py — Multi-series monthly line chart
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
from pathlib import Path
plt.style.use("seaborn-v0_8-whitegrid")
def plot_monthly_sales(df: pd.DataFrame, output_path: str = "sales_chart.png") -> str:
"""df must have: month (str), product_line (str), revenue (float)"""
fig, ax = plt.subplots(figsize=(12, 6))
colors = ["#2563eb", "#16a34a", "#dc2626", "#9333ea", "#ea580c"]
for i, pl in enumerate(df["product_line"].unique()):
sub = df[df["product_line"] == pl].sort_values("month")
ax.plot(sub["month"], sub["revenue"], marker="o", linewidth=2.2, markersize=6,
color=colors[i % len(colors)], label=pl)
last = sub.iloc[-1]
ax.annotate(f"${last['revenue']/1000:.0f}K", xy=(last["month"], last["revenue"]),
xytext=(8,0), textcoords="offset points",
fontsize=9, color=colors[i % len(colors)], va="center")
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f"${x/1000:.0f}K"))
ax.set_xlabel("Month", fontsize=12); ax.set_ylabel("Revenue", fontsize=12)
ax.set_title("Monthly Revenue by Product Line — 2024", fontsize=15, fontweight="bold", pad=16)
ax.legend(loc="upper left", framealpha=0.9, fontsize=10)
ax.tick_params(axis="x", rotation=45)
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
fig.savefig(output_path, dpi=300, bbox_inches="tight")
plt.close(fig) # Critical: always close in loops to prevent memory leaks
return output_path
seaborn — Statistical Visualization
seaborn_charts.py
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
def plot_correlation_heatmap(df: pd.DataFrame, output_path: str = "corr.png") -> str:
"""Correlation matrix heatmap — reveals relationships between numeric variables."""
corr = df.select_dtypes(include="number").corr()
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="RdYlGn", center=0, vmin=-1, vmax=1, linewidths=0.5, ax=ax)
ax.set_title("Variable Correlation Matrix", fontsize=14, fontweight="bold")
fig.tight_layout(); fig.savefig(output_path, dpi=200, bbox_inches="tight"); plt.close(fig)
return output_path
def plot_performance_boxplot(df: pd.DataFrame, output_path: str = "boxplot.png") -> str:
"""Box plots by department — identifies outliers and distribution shape.
df must have: department (str), score (float)"""
fig, ax = plt.subplots(figsize=(10, 6))
order = df.groupby("department")["score"].median().sort_values(ascending=False).index
sns.boxplot(data=df, x="department", y="score", order=order, palette="husl", width=0.5,
flierprops={"marker":"o","markersize":4,"alpha":0.5}, ax=ax)
sns.stripplot(data=df, x="department", y="score", order=order,
color="black", alpha=0.3, size=3, jitter=True, ax=ax)
ax.set_title("Performance Score Distribution by Department", fontsize=14, fontweight="bold")
ax.tick_params(axis="x", rotation=30)
fig.tight_layout(); fig.savefig(output_path, dpi=200, bbox_inches="tight"); plt.close(fig)
return output_path
plotly — Interactive Charts
plotly_charts.py
import plotly.express as px
import plotly.graph_objects as go
def plotly_line(df, x, y, color=None, title="") -> str:
"""Returns embeddable HTML div — include_plotlyjs='cdn' loads the library once."""
fig = px.line(df, x=x, y=y, color=color, title=title, template="plotly_dark", markers=True)
fig.update_traces(line_width=2.5)
return fig.to_html(full_html=False, include_plotlyjs="cdn")
def plotly_bar(df, x, y, color=None, title="") -> str:
"""Subsequent charts use include_plotlyjs=False — CDN loaded once is enough."""
fig = px.bar(df, x=x, y=y, color=color, title=title, template="plotly_dark",
color_discrete_sequence=px.colors.qualitative.Set2, barmode="group")
return fig.to_html(full_html=False, include_plotlyjs=False)
def plotly_funnel(stages: list, values: list, title: str = "") -> str:
fig = go.Figure(go.Funnel(y=stages, x=values, textinfo="value+percent initial",
marker_color=["#3b82f6","#6366f1","#8b5cf6","#a855f7","#c026d3"]))
fig.update_layout(template="plotly_dark", title_text=title)
return fig.to_html(full_html=False, include_plotlyjs=False)
def save_as_image(fig, path: str):
"""Export to static image — requires kaleido."""
fig.write_image(path, width=1200, height=600, scale=2)
Report Generation — HTML vs PDF
| Approach | Stack | Pros | Cons | Best For |
|---|---|---|---|---|
| matplotlib + reportlab | Fixed layout, printable | No interactivity, font setup needed | Formal reports, archival | |
| HTML | Jinja2 + plotly | Interactive charts, email-friendly, no install | Browser required | Management dashboards, weekly ops |
Full Project: Automated Weekly Operations Report
Pipeline: read Excel data weekly_report.py
"""Automated Weekly Operations Report — Excel import os, smtplib, logging
from datetime import datetime, timedelta
from pathlib import Path
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from jinja2 import Template
from dotenv import load_dotenv
load_dotenv(); logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
DATA_DIR = Path("data"); OUTPUT_DIR = Path("reports"); OUTPUT_DIR.mkdir(exist_ok=True)
def load_data() -> dict[str, pd.DataFrame]:
return {
"sales": pd.read_excel(DATA_DIR/"sales.xlsx", parse_dates=["date"]),
"traffic": pd.read_excel(DATA_DIR/"traffic.xlsx", parse_dates=["date"]),
"orders": pd.read_excel(DATA_DIR/"orders.xlsx", parse_dates=["order_date"]),
"products": pd.read_excel(DATA_DIR/"products.xlsx"),
"regions": pd.read_excel(DATA_DIR/"regions.xlsx"),
}
def calc_kpis(data: dict) -> list[dict]:
now = datetime.now(); tw = now - timedelta(days=now.weekday()); lw = tw - timedelta(days=7)
s = data["sales"]
this_rev = s[s["date"]>=tw]["revenue"].sum()
last_rev = s[(s["date"]>=lw)&(s["date"]<tw)]["revenue"].sum()
dr = round((this_rev-last_rev)/last_rev*100,1) if last_rev else 0
o = data["orders"]
this_ord = o[o["order_date"]>=tw].shape[0]; last_ord = o[(o["order_date"]>=lw)&(o["order_date"]<tw)].shape[0]
do = round((this_ord-last_ord)/last_ord*100,1) if last_ord else 0
return [
{"label":"Weekly Revenue","value":f"${this_rev/1000:.1f}K","delta":dr},
{"label":"Orders","value":str(this_ord),"delta":do},
{"label":"Avg Order","value":f"${this_rev/this_ord:.0f}" if this_ord else "—","delta":0},
]
def generate_charts(data: dict) -> list[dict]:
charts = []
# Chart 1: Daily revenue trend
daily = data["sales"].groupby("date")["revenue"].sum().reset_index().tail(30)
f1 = px.line(daily, x="date", y="revenue", template="plotly_dark", markers=True, title="Daily Revenue (30d)")
f1.update_traces(line_color="#60a5fa", line_width=2.5)
charts.append({"title":"Revenue Trend","html":f1.to_html(full_html=False,include_plotlyjs="cdn")})
# Chart 2: Revenue by region
reg = data["regions"].groupby("region")["revenue"].sum().reset_index().sort_values("revenue",ascending=False)
f2 = px.bar(reg,x="region",y="revenue",template="plotly_dark",color="revenue",color_continuous_scale="Blues")
f2.update_layout(showlegend=False,coloraxis_showscale=False)
charts.append({"title":"Revenue by Region","html":f2.to_html(full_html=False,include_plotlyjs=False)})
# Chart 3: Category donut
cat = data["products"].groupby("category")["revenue"].sum().reset_index()
f3 = go.Figure(go.Pie(labels=cat["category"],values=cat["revenue"],hole=0.4,textinfo="label+percent"))
f3.update_layout(template="plotly_dark")
charts.append({"title":"Revenue by Category","html":f3.to_html(full_html=False,include_plotlyjs=False)})
# Chart 4: Traffic vs orders dual-axis
tw = data["traffic"].groupby(pd.Grouper(key="date",freq="W"))["pv"].sum().reset_index()
ow = data["orders"].groupby(pd.Grouper(key="order_date",freq="W")).size().reset_index(name="count")
f4 = go.Figure()
f4.add_trace(go.Scatter(x=tw["date"],y=tw["pv"],name="Page Views",line={"color":"#60a5fa","width":2.5}))
f4.add_trace(go.Scatter(x=ow["order_date"],y=ow["count"],name="Orders",yaxis="y2",line={"color":"#4ade80","width":2.5,"dash":"dot"}))
f4.update_layout(template="plotly_dark",yaxis2={"overlaying":"y","side":"right"},legend={"x":0,"y":1.1,"orientation":"h"})
charts.append({"title":"Traffic vs Orders (Dual Axis)","html":f4.to_html(full_html=False,include_plotlyjs=False)})
# Chart 5: Conversion funnel
f5 = go.Figure(go.Funnel(y=["Visit","Add to Cart","Order","Pay","Complete"],
x=[100000,42000,15000,12800,11500],textinfo="value+percent initial",
marker_color=["#3b82f6","#6366f1","#8b5cf6","#a855f7","#c026d3"]))
f5.update_layout(template="plotly_dark")
charts.append({"title":"Conversion Funnel","html":f5.to_html(full_html=False,include_plotlyjs=False)})
return charts
REPORT_TPL = """<!DOCTYPE html><html><head><meta charset="utf-8"><title>{{"{{"}}title{{"}}"}}</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>body{font-family:system-ui,sans-serif;background:#0f172a;color:#e2e8f0;margin:0;padding:32px}
h1{font-size:1.8rem;color:#f1f5f9;border-bottom:2px solid #334155;padding-bottom:12px}
.meta{font-size:.85rem;color:#94a3b8;margin-bottom:24px}
.kpi-row{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:16px;margin-bottom:28px}
.kpi{background:#1e293b;border:1px solid #334155;border-radius:12px;padding:20px;text-align:center}
.kv{font-size:2rem;font-weight:800;color:#60a5fa}.kl{font-size:.8rem;color:#94a3b8;margin-top:6px}
.kd{font-size:.85rem;margin-top:4px}.up{color:#4ade80}.down{color:#f87171}
.card{background:#1e293b;border:1px solid #334155;border-radius:12px;padding:24px;margin-bottom:20px}
.card-t{font-size:1rem;font-weight:700;color:#f1f5f9;margin-bottom:14px}</style></head><body>
<h1>{{"{{"}}title{{"}}"}}</h1><div class="meta">Generated: {{"{{"}}generated_at{{"}}"}} | Period: {{"{{"}}date_range{{"}}"}}</div>
<div class="kpi-row">{% for k in kpis %}<div class="kpi"><div class="kv">{{"{{"}}k.value{{"}}"}}</div>
<div class="kl">{{"{{"}}k.label{{"}}"}}</div>
<div class="kd {{"{{"}}'up' if k.delta > 0 else ('down' if k.delta < 0 else ''){{"}}"}}">{{"{{"}}'+' if k.delta > 0 else ''{{"}}"}}{{"{{"}}k.delta{{"}}"}}% WoW</div>
</div>{% endfor %}</div>
{% for c in charts %}<div class="card"><div class="card-t">{{"{{"}}c.title{{"}}"}}</div>{{"{{"}}c.html{{"}}"}}</div>{% endfor %}
</body></html>"""
def render_html(kpis, charts, date_range) -> str:
return Template(REPORT_TPL).render(title="Weekly Ops Report",
generated_at=datetime.now().strftime("%Y-%m-%d %H:%M"), date_range=date_range,
kpis=kpis, charts=charts)
def send_email(html: str, recipients: list[str], subject: str):
msg = MIMEMultipart("alternative")
msg["From"]=os.getenv("EMAIL_USER"); msg["To"]=", ".join(recipients); msg["Subject"]=subject
msg.attach(MIMEText(html,"html","utf-8"))
with smtplib.SMTP_SSL(os.getenv("SMTP_HOST","smtp.gmail.com"),465) as smtp:
smtp.login(os.getenv("EMAIL_USER"),os.getenv("EMAIL_PASS")); smtp.send_message(msg)
def main():
now = datetime.now(); ws=(now-timedelta(days=now.weekday())).strftime("%b %d")
date_range = f"{ws} – {now.strftime('%b %d')}"
data=load_data(); kpis=calc_kpis(data); charts=generate_charts(data); html=render_html(kpis,charts,date_range)
f = OUTPUT_DIR/f"weekly_{now.strftime('%Y%m%d')}.html"
f.write_text(html, encoding="utf-8"); logging.info(f"Saved: {f}")
rcpts=[r.strip() for r in os.getenv("REPORT_RECIPIENTS","").split(",") if r.strip()]
if rcpts: send_email(html, rcpts, f"Weekly Ops Report — W{now.isocalendar().week} ({date_range})")
if __name__ == "__main__": main()
Scheduling: Combine with Chapter 15's scheduler to run
main()every Monday at 8 AM automatically. Zero manual work required once configured.
Previous NextChapter 14: GUI Automation