Chapter 10

Functions and Arrays

Chapter 10: Shell Functions, Arrays, and String Processing

Functions are the foundation of script reuse, arrays enable bulk data processing, and advanced string operations let you say goodbye to awk/sed complexity. This chapter starts with function scope and return values, systematically covers indexed arrays, associative arrays, mapfile file reading, printf formatting, nameref, and closes with a complete production-grade function library.

10.1 Function Definition: Two Syntaxes

bash supports two function definition syntaxes that are functionally identical. The function keyword is a bash extension and not POSIX portable; the name() {} syntax works in all POSIX shells:

# 语法一:POSIX 兼容(推荐用于需要移植的脚本)
greet() {
    echo "Hello, $1!"
}

# 语法二:bash 关键字语法(bash 专属)
function farewell {
    echo "Goodbye, $1!"
}

# 两种语法可以混用,行为完全相同
greet "World"    # → Hello, World!
farewell "World" # → Goodbye, World!

# 函数必须先定义再调用(bash 顺序执行)
# 例外:如果函数定义在 source 的文件中,调用时必须先 source

# 单行函数(用分号分隔语句)
log() { echo "[$(date +%T)] $*"; }

# 函数名可以包含连字符(bash 特性)
check-root() {
    [[ $(id -u) -eq 0 ]] || { echo "需要 root 权限" >&2; return 1; }
}

# 查看已定义的函数
declare -F            # 列出所有函数名
declare -f greet      # 查看 greet 函数的完整定义
type greet            # 显示 greet 是一个 function

# 删除函数
unset -f greet

10.2 Parameters and Return Values

Function parameters are accessed via positional variables $1~$9/${10}. return can only return an integer exit code (0-255). Two idiomatic patterns exist for returning string data:

#!/bin/bash

# === local 变量作用域 ===
# 函数内不声明 local 的变量是全局的!
x=10

modify_global() {
    x=99        # 修改了全局变量 x
    local y=42  # y 仅在函数内有效
}

modify_global
echo $x   # → 99(全局被修改)
echo $y   # → (空,y 在函数外不存在)

# 良好实践:函数内所有变量都用 local
calculate() {
    local a="$1"
    local b="$2"
    local result=$(( a + b ))
    echo $result   # 通过 stdout 返回结果
}

# 捕获函数返回的字符串
sum=$(calculate 15 27)
echo "15 + 27 = $sum"   # → 15 + 27 = 42

# === return 退出码 ===
is_even() {
    local n="$1"
    (( n % 2 == 0 ))   # 算术表达式:0 为真(0),非0 为假(1)
    return $?
}

is_even 4 && echo "4 是偶数" || echo "4 是奇数"
is_even 7 && echo "7 是偶数" || echo "7 是奇数"

# === 通过全局变量返回复杂数据 ===
# 惯例:用 __ 前缀的全局变量作为"输出参数"
__split_result=()
split_string() {
    local str="$1"
    local sep="${2:- }"
    IFS="$sep" read -r -a __split_result 
  
## 10.3 Recursive Functions


  
```bash
#!/bin/bash

# 阶乘(递归)
factorial() {
    local n="$1"
    if (( n 
  
## 10.4 Advanced Function Features


  
```bash
# 函数作为命令(覆盖外部命令)
# 注意:这会屏蔽同名的外部命令,使用时要谨慎
ls() {
    command ls --color=auto -F "$@"  # command 调用真正的 ls
}

# source 导入函数库
source /path/to/lib.sh   # 在当前 Shell 中执行,共享作用域
# 简写
. /path/to/lib.sh

# 检查函数是否已定义
if declare -f my_func &>/dev/null; then
    echo "my_func 已定义"
    my_func
fi

# 列出所有函数
declare -F | awk '{print $3}'  # 只打印函数名

# 删除函数
unset -f ls   # 恢复 ls 为外部命令

# trap — 捕获信号,常用于函数中的清理操作
cleanup() {
    echo "清理临时文件..."
    rm -f /tmp/myapp.$$.*
}
trap cleanup EXIT   # 脚本退出时自动调用 cleanup
trap cleanup INT    # Ctrl+C 时也调用

# FUNCNAME 数组 — 函数调用栈
outer() {
    inner
}
inner() {
    echo "调用栈: ${FUNCNAME[*]}"  # → inner outer main
    echo "当前函数: ${FUNCNAME[0]}" # → inner
    echo "调用者: ${FUNCNAME[1]}"   # → outer
}
outer

10.5 Indexed Arrays (declare -a)

#!/bin/bash

# === 创建数组 ===
fruits=("apple" "banana" "cherry")           # 直接赋值
declare -a colors=("red" "green" "blue")     # 显式声明
nums=(1 2 3 4 5)
mixed=("hello" 42 "world" 3.14)             # 类型混合(全为字符串)

# 按索引赋值(可以不连续)
arr[0]="first"
arr[1]="second"
arr[5]="sixth"   # 中间空洞(3,4 不存在)

# === 访问元素 ===
echo ${fruits[0]}     # → apple(第一个)
echo ${fruits[1]}     # → banana
echo ${fruits[-1]}    # → cherry(最后一个,bash 4.3+)
echo ${fruits[-2]}    # → banana(倒数第二个)

# === 整体操作 ===
echo ${fruits[@]}     # → apple banana cherry(所有元素)
echo ${fruits[*]}     # → apple banana cherry(同上,在双引号内行为不同)
echo ${#fruits[@]}    # → 3(元素个数)
echo ${!fruits[@]}    # → 0 1 2(所有索引)

# $@ 与 $* 在引号内的区别(数组同理)
for item in "${fruits[@]}"; do echo "$item"; done  # 每个元素独立
for item in "${fruits[*]}"; do echo "$item"; done  # 合并为一个字符串

# === 增删改 ===
fruits+=("date" "elderberry")  # 追加元素
fruits[1]="BANANA"             # 修改元素
unset fruits[2]                # 删除元素(留下空洞,索引不重排)
echo ${fruits[@]}              # → apple BANANA date elderberry

# 删除整个数组
# unset fruits

# === 数组切片 ===
echo ${fruits[@]:1:2}   # 从索引1开始取2个元素

# === 遍历(带索引)===
for i in "${!fruits[@]}"; do
    echo "fruits[$i] = ${fruits[$i]}"
done

# === 数组排序 ===
unsorted=(banana apple cherry date elderberry)
IFS=$'\n' sorted=($(sort 
  
## 10.6 Associative Arrays (declare -A) — bash 4+


  
```bash
#!/bin/bash
# 关联数组(字典),bash 4.0+ 才支持
# 检查版本:[[ ${BASH_VERSINFO[0]} -ge 4 ]] || { echo "需要 bash 4+"; exit 1; }

# === 创建关联数组 ===
declare -A user_info
user_info[name]="Alice"
user_info[age]="30"
user_info[role]="admin"

# 一次性初始化(bash 4+)
declare -A config=(
    [host]="localhost"
    [port]="5432"
    [dbname]="myapp"
    [user]="dbuser"
)

# === 访问 ===
echo ${user_info[name]}     # → Alice
echo ${config[port]}        # → 5432

# === 整体操作 ===
echo ${config[@]}           # 所有值(顺序不保证)
echo ${!config[@]}          # 所有键
echo ${#config[@]}          # 键值对数量

# === 遍历键值对 ===
for key in "${!config[@]}"; do
    echo "$key = ${config[$key]}"
done

# === 检查键是否存在 ===
key="host"
if [[ -v config[$key] ]]; then   # -v 检查变量是否已设置(bash 4.2+)
    echo "键 '$key' 存在: ${config[$key]}"
fi

# 兼容性写法(bash 4.0)
if [[ ${config[$key]+_} ]]; then
    echo "键 '$key' 存在"
fi

# === 删除键 ===
unset config[user]
echo ${#config[@]}    # → 3

# === 实战:统计单词频率 ===
declare -A word_count
text="the quick brown fox jumps over the lazy dog the fox"
for word in $text; do
    (( word_count[$word]++ ))
done

# 按频率降序输出
for word in "${!word_count[@]}"; do
    echo "${word_count[$word]} $word"
done | sort -rn

# === 实战:解析配置文件 ===
declare -A cfg
while IFS='=' read -r key value; do
    [[ $key =~ ^[[:space:]]*# ]] && continue  # 跳过注释
    [[ -z $key ]] && continue                  # 跳过空行
    cfg[${key// /}]="${value// /}"             # 去除空格
done 
  
## 10.7 mapfile / readarray: Reading Files into Arrays


  
```bash
#!/bin/bash

# mapfile(bash 4+,readarray 是其别名)
# 将文件的每一行读入数组,-t 去掉末尾换行符(几乎总是需要)

mapfile -t lines  **Version Note:**          Both `mapfile` and associative arrays (`declare -A`) require bash 4.0+. macOS ships with bash 3.2 (GPL licensing). Install a newer version with `brew install bash`, or check the version at the top of your script: `[[ ${BASH_VERSINFO[0]} -ge 4 ]] || exit 1`.


  
  
## 10.8 Formatted Output with printf


  
```bash
#!/bin/bash

# printf 比 echo 更可控,行为与 C 的 printf 一致

# 基本格式符
printf "%s\n" "hello"           # 字符串
printf "%d\n" 42                # 整数
printf "%f\n" 3.14              # 浮点(默认6位小数)
printf "%.2f\n" 3.14159         # 保留2位小数
printf "%x\n" 255               # 十六进制(ff)
printf "%X\n" 255               # 十六进制大写(FF)
printf "%o\n" 8                 # 八进制(10)
printf "%e\n" 12345.678         # 科学计数法
printf "%b\n" "hello\nworld"    # 解释转义序列

# 宽度与对齐
printf "%-20s %5d\n" "Alice"  30   # 左对齐字符串,右对齐数字
printf "%-20s %5d\n" "Bob"    25
printf "%-20s %5d\n" "Charlie" 28
# 输出:
# Alice                   30
# Bob                     25
# Charlie                 28

# 补零
printf "%05d\n" 42      # → 00042
printf "%08.2f\n" 3.14  # → 00003.14

# 多次重复格式(参数多于占位符时,格式会重复)
printf "%s\n" apple banana cherry
# → apple
#    banana
#    cherry

# 颜色输出(ANSI 转义码)
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
BOLD='\033[1m'
RESET='\033[0m'

printf "${RED}错误:${RESET}文件不存在\n"
printf "${GREEN}成功:${RESET}部署完成\n"
printf "${YELLOW}警告:${RESET}磁盘空间不足\n"
printf "${BOLD}${BLUE}信息:${RESET}服务已启动\n"

# 写入文件(不用 echo 和重定向)
printf "Name: %s\nAge: %d\n" "Alice" 30 > /tmp/user.txt

# 生成固定宽度的表格
printf "%-15s %-10s %-10s\n" "Name" "Status" "PID"
printf "%-15s %-10s %-10s\n" "nginx" "running" "1234"
printf "%-15s %-10s %-10s\n" "mysql" "stopped" "—"

# 进度条
progress_bar() {
    local current="$1"
    local total="$2"
    local width=40
    local pct=$(( current * 100 / total ))
    local filled=$(( current * width / total ))
    local empty=$(( width - filled ))
    printf "\r[%-*s] %3d%%" $width "$(printf '#%.0s' $(seq 1 $filled))" $pct
    [[ $current -eq $total ]] && echo
}

for i in $(seq 1 20); do
    progress_bar $i 20
    sleep 0.05
done

10.9 Advanced String Processing

#!/bin/bash

# === 正则匹配与 BASH_REMATCH ===
email="[email protected]"
if [[ $email =~ ^([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+)\.([a-zA-Z]{2,})$ ]]; then
    echo "合法邮箱"
    echo "用户名: ${BASH_REMATCH[1]}"   # → user
    echo "域名: ${BASH_REMATCH[2]}"     # → example
    echo "顶级域: ${BASH_REMATCH[3]}"   # → com
fi

# 提取 IP 地址
text="服务器地址:192.168.1.100,备用:10.0.0.1"
if [[ $text =~ ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) ]]; then
    echo "第一个 IP: ${BASH_REMATCH[1]}"   # → 192.168.1.100
fi

# === 大小写转换(bash 4+)===
str="Hello World"
echo ${str,,}    # → hello world(全小写)
echo ${str^^}    # → HELLO WORLD(全大写)
echo ${str,}     # → hELLO wORLD(仅首字母小写,若原为大写)
echo ${str^}     # → Hello World(仅首字母大写)

# 逐字转换(指定字符)
echo ${str^^[aeiou]}   # → HEllO WOrld(元音转大写)

# === 去除首尾空白(bash 无内置,用参数展开模拟)===
trim() {
    local str="$1"
    # 去除前导空白
    str="${str#"${str%%[![:space:]]*}"}"
    # 去除尾部空白
    str="${str%"${str##*[![:space:]]}"}"
    echo "$str"
}
result=$(trim "   hello world   ")
echo "[$result]"   # → [hello world]

# 更简洁的写法(使用正则)
trim2() {
    local str="$*"
    [[ $str =~ ^[[:space:]]*(.*[^[:space:]])[[:space:]]*$ ]] && echo "${BASH_REMATCH[1]}" || echo ""
}

# === 字符串重复 ===
repeat() {
    local str="$1"
    local n="$2"
    printf "%${n}s" | tr ' ' "$str"
}
repeat '-' 40   # → ----------------------------------------

# === 字符串包含检测 ===
haystack="Hello, World!"
needle="World"
if [[ $haystack == *"$needle"* ]]; then
    echo "包含 '$needle'"
fi

# === 字符串分割 ===
csv="a,b,c,d,e"
IFS=',' read -r -a parts 
  
## 10.10 Name References (declare -n)


  
`declare -n` (nameref, bash 4.3+) creates a variable whose value is the name of another variable. Through nameref, functions can return arrays and other complex data structures:


  
```bash
#!/bin/bash

# 基本 nameref
real_var="Hello"
declare -n alias=real_var   # alias 是 real_var 的别名
echo "$alias"               # → Hello(访问 real_var 的值)
alias="World"               # 通过 alias 修改 real_var
echo "$real_var"            # → World

# === 函数通过 nameref 返回数组 ===
# 这是 bash 返回数组的最佳实践(bash 4.3+)

get_users() {
    declare -n _result="$1"   # $1 是调用者传入的数组名
    _result=()
    while IFS=: read -r name _ uid _; do
        (( uid >= 1000 )) && _result+=("$name")
    done 
  
## 10.11 Function Library Design: Complete Example


  
Below is a complete production-grade shell function library, including logging, colored output, error handling, and duplicate-load prevention:


  
```bash
#!/bin/bash
# lib/common.sh — 通用函数库
# 使用方法:source lib/common.sh

# ============================================================
# 防重复加载(幂等 source)
# ============================================================
[[ -n "${__COMMON_LIB_LOADED:-}" ]] && return 0
readonly __COMMON_LIB_LOADED=1

# ============================================================
# 颜色常量
# ============================================================
readonly _CLR_RESET='\033[0m'
readonly _CLR_RED='\033[0;31m'
readonly _CLR_GREEN='\033[0;32m'
readonly _CLR_YELLOW='\033[1;33m'
readonly _CLR_BLUE='\033[0;34m'
readonly _CLR_CYAN='\033[0;36m'
readonly _CLR_BOLD='\033[1m'

# ============================================================
# 日志函数
# ============================================================

# 内部:带时间戳输出
_log() {
    local level="$1"
    local color="$2"
    shift 2
    local msg="$*"
    local timestamp
    timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    printf "${color}[${timestamp}] [${level}]${_CLR_RESET} %s\n" "$msg" >&2
}

# 公共日志函数
log_info()    { _log "INFO " "${_CLR_GREEN}"  "$@"; }
log_warn()    { _log "WARN " "${_CLR_YELLOW}" "$@"; }
log_error()   { _log "ERROR" "${_CLR_RED}"    "$@"; }
log_debug()   {
    [[ "${LOG_LEVEL:-info}" == "debug" ]] || return 0
    _log "DEBUG" "${_CLR_CYAN}" "$@"
}

# 致命错误:打印后退出
log_fatal() {
    _log "FATAL" "${_CLR_RED}${_CLR_BOLD}" "$@"
    exit 1
}

# ============================================================
# 错误处理
# ============================================================

# 设置错误处理模式(在主脚本调用)
enable_strict_mode() {
    set -euo pipefail
    trap '_handle_error ${LINENO} "$BASH_COMMAND"' ERR
}

_handle_error() {
    local line="$1"
    local cmd="$2"
    log_error "命令失败(第 ${line} 行): ${cmd}"
    log_error "退出码: $?"
}

# ============================================================
# 工具函数
# ============================================================

# 检查命令是否存在
require_cmd() {
    local cmd
    for cmd in "$@"; do
        command -v "$cmd" &>/dev/null || log_fatal "必须安装命令: ${cmd}"
    done
}

# 检查是否以 root 运行
require_root() {
    [[ $(id -u) -eq 0 ]] || log_fatal "此脚本必须以 root 运行"
}

# 安全创建目录
ensure_dir() {
    local dir="$1"
    local mode="${2:-755}"
    if [[ ! -d "$dir" ]]; then
        mkdir -p "$dir" && chmod "$mode" "$dir" || log_fatal "无法创建目录: ${dir}"
        log_info "创建目录: ${dir}"
    fi
}

# 确认提示(返回 0 确认,1 取消)
confirm() {
    local msg="${1:-确认继续?}"
    local answer
    read -r -p "${msg} [y/N] " answer
    [[ ${answer,,} == "y" ]]
}

# 重试函数(最多 N 次)
retry() {
    local max_attempts="${1:-3}"
    local delay="${2:-2}"
    shift 2
    local attempt=1
    while (( attempt  backup.sql
#     log_info "备份完成:$(format_size $(stat -c%s backup.sql))"
# fi
#
# log_info "部署完成"

Function Library Best Practices: A good shell function library should have: (1) duplicate-load guard (__LIB_LOADED); (2) local for all internal variables; (3) private functions prefixed with _; (4) consistent timestamped log format; (5) all errors written to stderr (>&2); (6) an enable_strict_mode helper for callers to opt in to strict mode.

Chapter Summary: This chapter fully covered the bash function system (definition/scope/recursion/nameref), all operations on indexed and associative arrays, efficient file reading with mapfile, printf formatting and color output, advanced string processing, and a production-ready function library template. The next chapter dives into pipes, process substitution, and the underlying mechanics of file descriptors.

  Previous
  ← Ch9: Variables


  Next
  Ch11: Pipes →
Rate this chapter
4.5  / 5  (31 ratings)

💬 Comments