第 18 章

迷你 Shell

第18章：用 C 实现迷你 Shell

每位真正理解 Linux 的程序员都应该亲手写一个 Shell。它不需要完美，但它会让你真正理解 fork() 和 exec() 的区别，理解为什么管道需要关闭未使用的 fd，理解信号如何在进程组间传递。本章用约 400 行 C 代码，实现一个支持管道、重定向和内置命令的迷你 Shell，并通过 GDB 调试加深理解。

1. 为什么自己实现 Shell

Shell 是操作系统最薄的包装层之一。它几乎不做任何"神奇"的事情——绝大多数功能都是对几个系统调用的组合：fork() 创建子进程，execvp() 替换进程镜像，waitpid() 等待子进程结束，pipe() + dup2() 实现管道和重定向。理解这些之后，bash/zsh 的行为就不再神秘。

**理解原理：**fork/exec 分离设计的历史原因（Unix 哲学）
**学习系统调用：**pipe, dup2, open, waitpid, sigaction 的真实用法
**内核贡献基础：**理解 Shell 是向内核贡献 tty/pty 驱动的前提
**面试必备：**大厂系统编程面试的高频题目

2. Shell 工作循环（REPL）

Shell 的核心是一个 REPL（Read-Eval-Print-Loop）循环：读取用户输入，解析命令，执行命令，显示结果，循环往复。

/* repl.c — 最简 REPL 骨架 */
#include 
#include 
#include 
#include 
#include 

int main(void) {
    char *line;

    /* readline 提供行编辑和历史记录 */
    while ((line = readline("mysh$ ")) != NULL) {
        if (*line) {
            add_history(line);      /* 加入历史记录（上下箭头可翻） */
            /* TODO: 解析并执行 line */
            printf("got: %s\n", line);
        }
        free(line);                 /* readline 分配，调用者负责释放 */
    }

    printf("\n");                   /* Ctrl+D 退出时换行 */
    return 0;
}

readline: readline 库提供行编辑（左右箭头、Ctrl+A/E）、历史记录（上下箭头）和 Tab 补全接口。安装：apt install libreadline-dev（Debian）或 yum install readline-devel（RHEL）。编译需链接 -lreadline。

3. fork + execvp + waitpid

Unix 的进程创建采用"fork-then-exec"模式：fork() 复制当前进程（Copy-on-Write，物理内存共享直到写入），子进程调用 execvp() 用新程序替换自己的地址空间，父进程调用 waitpid() 等待并收割子进程（避免僵尸进程）。

/* exec_cmd.c — fork/execvp/waitpid 基础命令执行 */
#include 
#include 
#include 
#include 
#include 
#include 
#include 

/* 执行单条命令（无管道无重定向）
 * argv: NULL结尾的参数数组，如 {"ls", "-la", NULL}
 * 返回值：子进程退出状态，失败返回-1
 */
int exec_simple(char **argv) {
    if (argv == NULL || argv[0] == NULL)
        return 0;

    pid_t pid = fork();
    if (pid 
  
## 4. 内置命令


  
内置命令（built-in）必须在 Shell 进程本身内执行，不能 fork 子进程——因为它们需要改变 Shell 自身的状态（如当前目录、环境变量）。


  
```c
/* builtins.c — 内置命令实现 */
#include 
#include 
#include 
#include 
#include 

#define HISTORY_MAX 100
static char *history[HISTORY_MAX];
static int   history_count = 0;

/* 添加到历史记录 */
void history_add(const char *line) {
    if (history_count 
  
## 5. 信号处理


  
Shell 对信号的处理遵循一个核心原则：**Shell 本身忽略 SIGINT（Ctrl+C），但子进程（前台程序）不忽略它**。这样 Ctrl+C 只中断前台程序，不退出 Shell。这通过 `sigaction()` + 进程组实现。


  
```c
/* signals.c — Shell 信号处理 */
#include 
#include 
#include 
#include 

/* Shell 自身初始化：忽略交互式信号 */
void shell_init_signals(void) {
    struct sigaction sa;
    memset(&sa, 0, sizeof(sa));

    /* SIG_IGN: Shell 自身忽略 SIGINT（Ctrl+C）和 SIGQUIT（Ctrl+\） */
    sa.sa_handler = SIG_IGN;
    sigaction(SIGINT,  &sa, NULL);
    sigaction(SIGQUIT, &sa, NULL);

    /* SIGTSTP（Ctrl+Z）：Shell 也忽略，让子进程处理 */
    sigaction(SIGTSTP, &sa, NULL);

    /* SIGTTOU/SIGTTIN：后台进程读写终端产生，Shell 忽略 */
    sigaction(SIGTTOU, &sa, NULL);
    sigaction(SIGTTIN, &sa, NULL);
}

/* fork 后，子进程恢复默认信号处理并设置进程组 */
void child_init_signals(pid_t pgid) {
    struct sigaction sa;
    memset(&sa, 0, sizeof(sa));
    sa.sa_handler = SIG_DFL;   /* 恢复默认行为（SIGINT → 终止） */

    sigaction(SIGINT,  &sa, NULL);
    sigaction(SIGQUIT, &sa, NULL);
    sigaction(SIGTSTP, &sa, NULL);
    sigaction(SIGTTOU, &sa, NULL);
    sigaction(SIGTTIN, &sa, NULL);

    /* 将子进程放入自己的进程组（pgid==0 → 使用自身 PID） */
    setpgid(0, pgid);

    /* 将前台控制权转移给子进程的进程组 */
    tcsetpgrp(STDIN_FILENO, getpgrp());
}

/* sigaction vs signal 的关键区别：
 * signal():   行为在不同 Unix 实现间不一致，信号处理期间不自动屏蔽
 * sigaction(): POSIX标准，行为一致，支持 SA_RESTART（自动重启被中断的系统调用）
 *
 * SA_RESTART 很重要：没有它，信号会导致 read()/write() 返回 EINTR，
 * 需要手动重试。有了 SA_RESTART，系统调用会自动重新执行。
 */

6. 重定向实现

重定向的本质是在 fork 之后、exec 之前，用 dup2() 替换子进程的标准 fd（0/1/2）。dup2(newfd, oldfd) 将 oldfd 复制为 newfd，关闭原 newfd，之后程序通过 oldfd 读写实际上操作的是 newfd 指向的文件。

/* redirect.c — 重定向实现 */
#include 
#include 
#include 
#include 
#include 
#include 
#include 

typedef struct {
    char **argv;
    char  *redir_in;      /* " file" 的文件名 */
    char  *redir_append;  /* ">> file" 的文件名 */
    int    stderr_to_stdout; /* "2>&1" 标志 */
} Cmd;

/* 在子进程中执行重定向（fork 之后调用） */
void apply_redirects(Cmd *cmd) {
    int fd;

    /* 输入重定向：redir_in) {
        fd = open(cmd->redir_in, O_RDONLY);
        if (fd redir_in, strerror(errno));
            exit(1);
        }
        dup2(fd, STDIN_FILENO);   /* 标准输入 → 指向文件 */
        close(fd);                /* 关闭原 fd（已被 dup2 复制） */
    }

    /* 输出重定向：> file（截断模式） */
    if (cmd->redir_out) {
        fd = open(cmd->redir_out,
                  O_WRONLY | O_CREAT | O_TRUNC, 0644);
        if (fd redir_out, strerror(errno));
            exit(1);
        }
        dup2(fd, STDOUT_FILENO);  /* 标准输出 → 指向文件 */
        close(fd);
    }

    /* 追加重定向：>> file */
    if (cmd->redir_append) {
        fd = open(cmd->redir_append,
                  O_WRONLY | O_CREAT | O_APPEND, 0644);
        if (fd redir_append, strerror(errno));
            exit(1);
        }
        dup2(fd, STDOUT_FILENO);
        close(fd);
    }

    /* 2>&1：将 stderr 重定向到 stdout 当前指向的目标 */
    if (cmd->stderr_to_stdout)
        dup2(STDOUT_FILENO, STDERR_FILENO);
}

7. 管道实现

pipe(pipefd[2]) 创建一个匿名管道：pipefd[0] 是读端，pipefd[1] 是写端。实现 ls | grep txt 需要两个子进程：左边的子进程把 stdout 重定向到 pipefd[1]，右边的子进程把 stdin 重定向到 pipefd[0]。关键：双方都必须关闭自己不用的那端，否则读端永远不会看到 EOF。

/* pipe_demo.c — "ls | grep txt" 的完整 C 实现 */
#include 
#include 
#include 
#include 

int main(void) {
    int pipefd[2];

    /* 创建管道：pipefd[0]=读端  pipefd[1]=写端 */
    if (pipe(pipefd)  **最常见的管道 Bug:**     忘记在父进程（或不使用该端的子进程）中关闭管道的写端。后果是读端的 `read()` 永远阻塞——因为只要写端有任何进程打开，内核就不会发送 EOF。调试时用 `lsof | grep pipe` 查看管道 fd 残留。


  
  
## 8. 完整迷你 Shell 代码


  
以下是完整的迷你 Shell 实现（约 230 行核心代码），支持：简单命令、单级管道、输入/输出/追加重定向、内置命令（cd/pwd/export/history/exit）、Ctrl+C 信号处理。


  
```c
/* mysh.c — 完整迷你 Shell
 * 编译：gcc -Wall -Wextra -g -o mysh mysh.c -lreadline
 * 运行：./mysh
 */
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

/* ────────────── 常量与数据结构 ────────────── */
#define MAX_ARGS    64
#define MAX_CMDS    16     /* 管道中最多命令数 */
#define HIST_MAX   100

typedef struct {
    char *argv[MAX_ARGS];  /* 参数列表，NULL结尾 */
    char *redir_in;        /*  file */
    char *redir_append;    /* >> file */
    int   stderr_redir;    /* 2>&1 */
} Cmd;

typedef struct {
    Cmd  cmds[MAX_CMDS];   /* 管道中的各个命令 */
    int  count;            /* 命令数量 */
} Pipeline;

static int last_exit_code = 0;

/* ────────────── 信号初始化 ────────────── */
static void init_signals(void) {
    struct sigaction sa = {0};
    sa.sa_handler = SIG_IGN;
    sigaction(SIGINT,  &sa, NULL);
    sigaction(SIGQUIT, &sa, NULL);
    sigaction(SIGTSTP, &sa, NULL);
    sigaction(SIGTTOU, &sa, NULL);
    sigaction(SIGTTIN, &sa, NULL);
}

/* ────────────── 词法分析（tokenize） ────────────── */
/* 将输入行分割为 token 数组，返回 token 数量 */
static int tokenize(char *line, char **tokens, int max_tokens) {
    int n = 0;
    char *tok = strtok(line, " \t\n");
    while (tok != NULL && n redir_in = tokens[++i];
        } else if (strcmp(t, ">") == 0) {
            if (i + 1 redir_out = tokens[++i];
        } else if (strcmp(t, ">>") == 0) {
            if (i + 1 redir_append = tokens[++i];
        } else if (strcmp(t, "2>&1") == 0) {
            cmd->stderr_redir = 1;
        } else {
            if (argc argv[argc++] = t;
        }
        i++;
    }
    cmd->argv[argc] = NULL;
    return argc;
}

/* 将 token 数组按管道符'|'分割，填充 Pipeline 结构 */
static void parse_pipeline(char **tokens, int ntokens, Pipeline *pl) {
    pl->count = 0;
    int start = 0;

    for (int i = 0; i count cmds[pl->count++]);
            }
            start = i + 1;
        }
    }
}

/* ────────────── 内置命令 ────────────── */
static int builtin_cd(char **argv) {
    const char *dir = argv[1] ? argv[1] : (getenv("HOME") ? getenv("HOME") : "/");
    if (chdir(dir) != 0) {
        fprintf(stderr, "cd: %s: %s\n", dir, strerror(errno));
        return 1;
    }
    return 0;
}

static int builtin_pwd(char **argv) {
    (void)argv;
    char buf[4096];
    if (!getcwd(buf, sizeof(buf))) { perror("getcwd"); return 1; }
    puts(buf);
    return 0;
}

static int run_builtin(Cmd *cmd) {
    char **av = cmd->argv;
    if (!av[0]) return -1;
    if (strcmp(av[0], "cd")      == 0) return builtin_cd(av);
    if (strcmp(av[0], "pwd")     == 0) return builtin_pwd(av);
    if (strcmp(av[0], "history") == 0) {
        HIST_ENTRY **list = history_list();
        if (list) for (int i = 0; list[i]; i++)
            printf("%4d  %s\n", i + 1, list[i]->line);
        return 0;
    }
    if (strcmp(av[0], "exit") == 0) exit(av[1] ? atoi(av[1]) : 0);
    return -1;   /* 不是内置命令 */
}

/* ────────────── 子进程：应用重定向 ────────────── */
static void apply_redirects(Cmd *cmd) {
    int fd;
    if (cmd->redir_in) {
        fd = open(cmd->redir_in, O_RDONLY);
        if (fd redir_in); exit(1); }
        dup2(fd, STDIN_FILENO); close(fd);
    }
    if (cmd->redir_out) {
        fd = open(cmd->redir_out, O_WRONLY|O_CREAT|O_TRUNC, 0644);
        if (fd redir_out); exit(1); }
        dup2(fd, STDOUT_FILENO); close(fd);
    }
    if (cmd->redir_append) {
        fd = open(cmd->redir_append, O_WRONLY|O_CREAT|O_APPEND, 0644);
        if (fd redir_append); exit(1); }
        dup2(fd, STDOUT_FILENO); close(fd);
    }
    if (cmd->stderr_redir)
        dup2(STDOUT_FILENO, STDERR_FILENO);
}

/* ────────────── 执行 Pipeline ────────────── */
static int exec_pipeline(Pipeline *pl) {
    /* 单条命令：先尝试内置命令 */
    if (pl->count == 1) {
        int ret = run_builtin(&pl->cmds[0]);
        if (ret >= 0) return ret;
    }

    int n = pl->count;
    int pipes[MAX_CMDS - 1][2];   /* n-1 个管道 */
    pid_t pids[MAX_CMDS];

    /* 预先创建所有管道 */
    for (int i = 0; i  0) {
                dup2(pipes[i-1][0], STDIN_FILENO);
            }
            /* 连接管道：写入下一个命令 */
            if (i cmds[i]);

            /* exec */
            execvp(pl->cmds[i].argv[0], pl->cmds[i].argv);
            fprintf(stderr, "mysh: %s: %s\n",
                    pl->cmds[i].argv[0], strerror(errno));
            exit(127);
        }
    }

    /* 父进程：关闭所有管道 fd */
    for (int i = 0; i  0 && tokens[0][0] == '#') {
            free(line); continue;
        }

        /* 解析管道 */
        Pipeline pl;
        parse_pipeline(tokens, ntok, &pl);

        /* 执行 */
        if (pl.count > 0 && pl.cmds[0].argv[0] != NULL)
            last_exit_code = exec_pipeline(&pl);

        free(line);
    }

    return last_exit_code;
}

9. Makefile 构建

# Makefile — 迷你 Shell 构建文件

/* 保存为 Makefile（注意：配方行必须用 Tab 缩进，不能用空格） */

CC      = gcc
CFLAGS  = -Wall -Wextra -Wpedantic -g -std=c11
LDFLAGS = -lreadline
TARGET  = mysh
SRC     = mysh.c

.PHONY: all clean install bear

all: $(TARGET)

$(TARGET): $(SRC)
	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)

# bear 生成 compile_commands.json（供 clangd/LSP 使用）
bear: $(SRC)
	bear -- $(CC) $(CFLAGS) -o $(TARGET) $(SRC) $(LDFLAGS)

# 安装到 ~/bin
install: $(TARGET)
	install -m 755 $(TARGET) $(HOME)/bin/$(TARGET)

clean:
	rm -f $(TARGET) compile_commands.json

# 运行（带 ASAN 内存检测）
asan:
	$(CC) $(CFLAGS) -fsanitize=address,undefined \
	    -o $(TARGET)-asan $(SRC) $(LDFLAGS)
	./$(TARGET)-asan

compile_commands.json: 通过 bear -- make 生成编译命令数据库，让 clangd（VSCode/Neovim 的 LSP 服务）提供精确的代码补全、跳转定义和诊断，是现代 C 开发的标配。

10. GDB 调试

调试 Shell 特别需要关注 fork 后的子进程行为。GDB 默认在 fork 后继续跟踪父进程，但可以用 set follow-fork-mode child 切换到子进程。

# 启动 GDB
gdb ./mysh

# 基本命令速查
(gdb) b main              # 在 main 函数设断点
(gdb) b mysh.c:120        # 在第 120 行设断点
(gdb) b exec_pipeline     # 在函数设断点
(gdb) r                   # 运行程序（run）
(gdb) n                   # 单步执行（next，不进入函数）
(gdb) s                   # 单步执行（step，进入函数）
(gdb) c                   # 继续运行（continue）
(gdb) p pid               # 打印变量 pid 的值
(gdb) p *cmd              # 打印结构体内容
(gdb) p cmd->argv[0]      # 打印指针成员
(gdb) x/s buf             # 以字符串格式显示内存
(gdb) bt                  # 显示调用栈（backtrace）
(gdb) frame 2             # 切换到调用栈第2帧
(gdb) info locals         # 显示当前函数所有局部变量
(gdb) info registers      # 显示寄存器值
(gdb) watch pids[0]       # 监视变量（值改变时停下）

# 调试 fork 后的子进程
(gdb) set follow-fork-mode child    # fork 后跟踪子进程
(gdb) set follow-fork-mode parent   # fork 后跟踪父进程（默认）
(gdb) set detach-on-fork off        # fork 后两个进程都调试（复杂）

# 实例：调试管道挂起问题
(gdb) b exec_pipeline
(gdb) r
# 输入 "ls | grep txt" 触发断点
(gdb) n  # 单步到 pipe() 调用
(gdb) p pipes[0][0]  # 查看管道读端 fd 号
(gdb) p pipes[0][1]  # 查看管道写端 fd 号
# 确认 fork 后父进程正确关闭了所有管道 fd

# 调试内存问题（配合 AddressSanitizer）
# 编译：gcc -fsanitize=address -g -o mysh-asan mysh.c -lreadline
./mysh-asan
# ASAN 会在出现 use-after-free/buffer-overflow 时打印完整报告

11. 扩展练习

完成基础版本后，可以尝试以下扩展：

&& ||: 解析 &&（前一命令成功才执行后者）和 ||（失败才执行后者），检查 last_exit_code 决定是否执行下一条
$?: 展开 $? 为上一条命令的退出码，在 tokenize 后、parse 前做字符串替换
Tab 补全: 注册 rl_completion_entry_function，用 glob() 展开路径，用 opendir/readdir 遍历 PATH 中的可执行文件
后台任务 &: 检测命令末尾的 &，fork 后不调用 waitpid，改用 SIGCHLD 信号异步收割
多级管道: 当前实现已支持多级管道（MAX_CMDS 限制），验证 ls | grep txt | wc -l 工作正常
变量展开: 在 tokenize 后遍历 token，将 $VAR 替换为 getenv("VAR") 的值

本章总结: 迷你 Shell 涵盖了 Unix 系统编程的核心模式：fork-exec-wait（进程创建），pipe+dup2（进程间通信），sigaction（信号处理），open+dup2（I/O 重定向）。掌握这四组系统调用组合，你已经理解了 bash 80% 的核心实现，也为第19章的内核贡献打下了坚实基础。

  上一章
  ← 第17章：系统调用


  下一章
  第19章：内核贡献 →

本章评分

4.7 / 5 (11 评分)