File Operations Mastery
Algorithm Extension Ratio Speed Best For
gzip
.tar.gz / .tgz
Medium
Fast
Daily use, best compatibility
bzip2
.tar.bz2
High
Slow
Source releases (legacy)
xz
.tar.xz
Highest
Very slow
Kernel/package releases, storage-first
zstd
.tar.zst
High
Very fast
Modern default: best speed/ratio balance
**ๆจ่๏ผ**ๆฐ้กน็ฎไผๅ ้็จ zstdใFacebook ๅผๅ็ zstd ๅจๅ็ผฉ็ๆฅ่ฟ xz ็ๅๆถ๏ผ่งฃๅ้ๅบฆไธ gzip ็ธๅฝใLinux ๅ ๆ ธ 5.9+ ๅทฒๅฐ zstd ไฝไธบๅ ๆ ธ้ๅ็้ป่ฎคๅ็ผฉๆ ผๅผใ
rsync๏ผๅข้ๅๆญฅไธ่ฟ็จๅคไปฝ
rsync ๆฏ็ไบง็ฏๅขๅคไปฝ็ๆ ๅๅทฅๅ ทใๅฎ้่ฟๆฏ่พไธค็ซฏๆไปถ็ๆ ก้ชๅไธๆถ้ดๆณ๏ผๅชไผ ่พๅทฎๅผ้จๅ๏ผๆๅคงๅๅฐ็ฝ็ปๅผ้ใ
ๆ ธๅฟ้้กน
- -a๏ผๅฝๆกฃๆจกๅผ๏ผ็ญไปทไบ -rlptgoD๏ผไฟ็ๆ้/ๆถ้ดๆณ/่ฝฏ้พๆฅ/่ฎพๅคๆไปถใ
- -v๏ผๆพ็คบๆฏไธชไผ ่พ็ๆไปถๅใ
- -z๏ผไผ ่พๆถๅ็ผฉๆฐๆฎ๏ผ้ๅๆ ข้็ฝ็ปใ
- --delete๏ผๅ ้ค็ฎๆ ็ซฏๆไฝๆบ็ซฏๆฒกๆ็ๆไปถ๏ผ้ๅๅๆญฅ๏ผใ
- --progress๏ผๆพ็คบๆฏไธชๆไปถ็ไผ ่พ่ฟๅบฆใ
- --dry-run / -n๏ผๆจกๆๆง่ก๏ผๅชๆพ็คบๅฐ่ฆๅไปไน๏ผไธๅฎ้ ไฟฎๆนใ
- --exclude๏ผๆ้คๅน้ ็ๆไปถ/็ฎๅฝใ
- --bwlimit๏ผ้ๅถๅธฆๅฎฝ๏ผๅไฝ KB/sใ
# ๆฌๅฐ็ฎๅฝๅๆญฅ๏ผๆณจๆ๏ผsrc/ ๆซๅฐพๆๆ ่กจ็คบ"็ฎๅฝๅ
ๅฎน"๏ผๆ ๆๆ ่กจ็คบ"็ฎๅฝๆฌ่บซ"๏ผ
rsync -avz /var/www/html/ /backup/html/
# ๅ
ๆจกๆ๏ผ็กฎ่ฎคๆ ่ฏฏๅๅฎ้
ๆง่ก
rsync -avz --dry-run /var/www/ /backup/www/
rsync -avz /var/www/ /backup/www/
# ้ๅๅๆญฅ๏ผ็ฎๆ ็ซฏๅคไฝ็ๆไปถไผ่ขซๅ ้ค๏ผ
rsync -avz --delete /var/www/ /backup/www/
# ่ฟ็จๅๆญฅ๏ผrsync over SSH๏ผ
rsync -avz -e ssh /local/data/ user@remote:/backup/data/
# ๆๅฎ SSH ็ซฏๅฃ
rsync -avz -e "ssh -p 2222" /local/ user@host:/remote/
# ้้ไผ ่พ๏ผ1MB/s๏ผ๏ผ้ฟๅ
ๅ ๆปกๅธฆๅฎฝ
rsync -avz --bwlimit=1024 /data/ user@host:/data/
# ๆ้คๅคไธช็ฎๅฝ
rsync -avz \
--exclude='*.log' \
--exclude='cache/' \
--exclude='.git/' \
/var/www/ /backup/www/
# ๅข้ๅคไปฝ๏ผๅชๅๆญฅๆ่ฟ 1 ๅคฉๅ
ไฟฎๆน็ๆไปถ
rsync -avz --filter="m-1440" /data/ /backup/
็ไบงๅคไปฝ่ๆฌ็คบไพ
#!/bin/bash
# backup.sh โ ๆฏๆฅๅข้ๅคไปฝ่ๆฌ
set -euo pipefail
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
SRC="/var/www"
BACKUP_ROOT="/mnt/backup"
DEST="${BACKUP_ROOT}/${TIMESTAMP}"
LATEST="${BACKUP_ROOT}/latest"
LOG="/var/log/backup.log"
echo "[${TIMESTAMP}] Starting backup..." | tee -a "$LOG"
# ไฝฟ็จ --link-dest ๅฎ็ฐๅฟซ็
งๅผๅข้ๅคไปฝ๏ผๅชๆๅๅ็ๆไปถๆๅ ็จๆฐ็ฉบ้ด๏ผ
rsync -avz --delete \
--link-dest="${LATEST}" \
--exclude='*.tmp' \
--exclude='cache/' \
"${SRC}/" "${DEST}/" 2>&1 | tee -a "$LOG"
# ๆดๆฐ latest ่ฝฏ้พๆฅ
rm -f "${LATEST}"
ln -s "${DEST}" "${LATEST}"
echo "[$(date +%Y%m%d_%H%M%S)] Backup complete: ${DEST}" | tee -a "$LOG"
# ไฟ็ๆ่ฟ 30 ๅคฉ็ๅคไปฝ๏ผๅ ้คๆง็
find "${BACKUP_ROOT}" -maxdepth 1 -type d -name "20*" \
-mtime +30 -exec rm -rf {} +
**--link-dest ็ๅ็๏ผ**rsync ไผๅฐ็ฎๆ ็ฎๅฝไธญไธไธๆฌกๅคไปฝ็ธๅ็ๆไปถๅๅปบ็กฌ้พๆฅ๏ผ่ไธๆฏๅคๅถใ่ฟๆ ทๆฏๆฌกๅคไปฝ้ฝๆฏ"ๅฎๆดๅฟซ็ ง"๏ผไฝๅฎ้ ๅชๅ ็จๅทฎๅผ้จๅ็็ฃ็็ฉบ้ดใ30 ๅคฉ็ๆฏๆฅๅคไปฝๅฏ่ฝๅชๅ ็จ 2-3 ๅๅญๅจ็ฉบ้ดใ
xargs๏ผๆน้ๅค็็ๅฉๅจ
xargs ๅฐๆ ๅ่พๅ ฅ็ๅ ๅฎน่ฝฌๅไธบๅฝไปคๅๆฐ๏ผๅผฅ่กฅไบ Linux ็ฎก้ไธ่ฝ็ดๆฅไผ ๅ็ปๅฝไปค็ๅฑ้ใ
# ๅบๆฌ็จๆณ๏ผๅฐ find ็ปๆไผ ็ป rm
find /tmp -name "*.tmp" | xargs rm -f
# -I{} ๅ ไฝ็ฌฆ๏ผๅฐๅๆฐๆๅ
ฅๅฝไปคไธญ้ดไฝ็ฝฎ
find . -name "*.log" | xargs -I{} cp {} /backup/
# ๅค็ๅซ็ฉบๆ ผ็ๆไปถๅ๏ผ้
ๅ find -print0
find . -name "*.txt" -print0 | xargs -0 wc -l
# -P ๅนถ่กๆง่ก๏ผ4 ไธช่ฟ็จๅๆถๅค็๏ผ
find . -name "*.jpg" -print0 | xargs -0 -P4 -I{} convert {} -resize 800x {}-resized.jpg
# -n ๆฏๆฌกไผ ๅ
ฅ็ๅๆฐๆฐ้๏ผๆฏๆฌกไผ 2 ไธช๏ผ
echo "a b c d e f" | xargs -n2 echo
# ็ปๅ grep ๆฅๆพๅ
ๅซ็นๅฎๅ
ๅฎน็ๆไปถๅนถ็ป่ฎก
grep -rl "TODO" ./src | xargs wc -l
# ็จ xargs ๅฎๅ
จๅ ้คๅคง้ๆไปถ๏ผ้ฟๅ
"argument list too long"๏ผ
find /var/log -name "*.gz" -mtime +90 | xargs -r rm -f
# ไบคไบๅผ็กฎ่ฎค๏ผ-p ๆฏๆฌกๆง่กๅ่ฏข้ฎ๏ผ
find . -name "*.bak" | xargs -p rm
**ๅซ็ฉบๆ ผๆไปถๅๅฟ ้กป็จ -print0 + -0๏ผ**้ป่ฎค xargs ไปฅ็ฉบ็ฝๅญ็ฌฆ๏ผ็ฉบๆ ผใๆข่ก๏ผๅๅฒๅๆฐ๏ผๆไปถๅๅซ็ฉบๆ ผๆถไผๅบ้ใๅง็ป็จ find -print0 | xargs -0 ็็ปๅๅค็็ๅฎๆไปถ่ทฏๅพใ
watch๏ผๅฎๆถๅจๆ็ๆง
watch ไปฅๅบๅฎ้ด้้ๅคๆง่กๅฝไปคๅนถๅทๆฐๅฑๅน๏ผๆฏๅฎๆถ็ๆง็ณป็ป็ถๆ็็ฎๅๅฉๅจใ
# ๆฏ 2 ็งๅทๆฐไธๆฌก็ฃ็ไฝฟ็จๆ
ๅต
watch -n 2 df -h
# ้ซไบฎๆพ็คบๅๅๅ
ๅฎน๏ผ-d๏ผ
watch -d -n 1 'ss -tnp'
# ็ๆง่ฟ็จ๏ผ็ญๆไบ็ฎๅ็ top๏ผ
watch -n 1 'ps aux --sort=-%cpu | head -15'
# ็ๆง็ฎๅฝๆไปถๆฐ้ๅๅ
watch -n 5 'ls -l /var/spool/mail/ | wc -l'
# ็ๆง nginx ่ฎฟ้ฎๆฅๅฟๅฎๆถๆกๆฐ
watch -n 2 'wc -l /var/log/nginx/access.log'
# ็ๆง็ฝ็ป่ฟๆฅ็ถๆ็ป่ฎก
watch -n 2 'ss -s'
# ไธๆข่ก๏ผ--no-title ้่ๆ ้ขๆ ๏ผ
watch --no-title -n 1 uptime
inotifywait๏ผๆไปถๅๅจๅฎๆถ็ๅฌ
inotifywait ไฝฟ็จ Linux ๅ ๆ ธ็ inotify ๆฅๅฃ็ๅฌๆไปถ็ณป็ปไบไปถ๏ผๅฏไปฅๅจๆไปถ่ขซๅๅปบใไฟฎๆนใๅ ้คๆถ็ซๅณ่งฆๅๅๅบ๏ผๆฏ่ชๅจๅ้จ็ฝฒๅ้ ็ฝฎ็ญ้่ฝฝ็ๅบ็กๅทฅๅ ทใ
# ๅฎ่ฃ
sudo apt install inotify-tools
# ๆ็ปญ็ๅฌ็ฎๅฝ๏ผ-m ๆ็ปญ่ฟ่ก๏ผ-r ้ๅฝ๏ผ-e ๆๅฎไบไปถ๏ผ
inotifywait -m -r -e create,modify,delete /etc/nginx/
# ็ๅฌ็นๅฎไบไปถๅนถๆ ผๅผๅ่พๅบ
inotifywait -m -r \
--format '%T %w%f %e' \
--timefmt '%Y-%m-%d %H:%M:%S' \
-e create,modify,delete \
/var/www/html/
# ่ชๅจ้่ฝฝ nginx ้
็ฝฎ๏ผๆฃๆตๅฐ้
็ฝฎๆไปถๅๅๆถ้่ฝฝ
inotifywait -m -e modify /etc/nginx/nginx.conf |
while read -r path action file; do
echo "Config changed: $file ($action)"
nginx -t && systemctl reload nginx
done
# ่ชๅจๅๆญฅ๏ผๆฃๆตๅฐๆฌๅฐ็ฎๅฝๅๅๆถ่งฆๅ rsync
inotifywait -m -r -e create,modify,delete /var/www/html/ |
while read -r dir event file; do
echo "[$event] $dir$file"
rsync -az /var/www/html/ user@remote:/var/www/html/
done
**inotify ็ๅ ๆ ธ้ๅถ๏ผ**้ป่ฎคๆๅคง็ๅฌๆฐไธบ 8192๏ผ/proc/sys/fs/inotify/max_user_watches๏ผใ็ๅฌๅคงๅไปฃ็ ไปๅบๆถ้่ฆ่ฐๅคง๏ผ
echo 524288 | sudo tee /proc/sys/fs/inotify/max_user_watches๏ผๅนถๅๅ ฅ /etc/sysctl.conf ๆฐธไน ็ๆใ
ๆไปถๆฅๆพๅทฅๅ ทๅ จ่งฃ
Linux ๆๅค็ง"ๆฅๆพๅฝไปค"ๅทฅๅ ท๏ผๅฎไปฌ็ๅทฅไฝๅ็ๅ็จ้ๅไธ็ธๅ๏ผๅพๅคไบบๆททๆทไบๅฎไปฌใ
| Command | Search Scope | Speed | Notes |
|---|---|---|---|
| which | PATH ็ฏๅขๅ้ | Instant | Finds executable path in PATH |
| type | Shell built-in | Instant | Distinguishes builtins/aliases/functions |
| whereis | Fixed path list | Fast | Finds binary, man page, source |
| locate | Database index | Very fast | Requires updatedb, new files have delay |
| find | Real-time filesystem scan | Slow (large dirs) | Most powerful, supports complex criteria |
# which๏ผๆพๅฏๆง่กๆไปถไฝ็ฝฎ
which python3
# /usr/bin/python3
# type๏ผๅคๆญๅฝไปค็ฑปๅ๏ผๅ
ๅปบ/ๅซๅ/ๅฝๆฐ/ๆไปถ๏ผ
type ls
# ls is aliased to `ls --color=auto'
type cd
# cd is a shell builtin
# whereis๏ผๅๆถๆพๅฐไบ่ฟๅถใman ้กต้ขๅๆบ็ ไฝ็ฝฎ
whereis nginx
# nginx: /usr/sbin/nginx /etc/nginx /usr/share/man/man8/nginx.8.gz
# locate๏ผๆฐๆฎๅบๆฅๆพ๏ผๆๅฟซ
sudo updatedb # ๅ
ๆดๆฐๆฐๆฎๅบ
locate "*.conf" | grep nginx
# find๏ผ็ฒพ็กฎๅฎๆถๆ็ดข
find /etc -name "*.conf" -mtime -7 # 7 ๅคฉๅ
ไฟฎๆน่ฟ็้
็ฝฎๆไปถ
find /var/log -size +100M # ๅคงไบ 100MB ็ๆฅๅฟๆไปถ
find . -perm /4000 # ๆ SUID ไฝ็ๆไปถ๏ผๅฎๅ
จๅฎก่ฎก๏ผ
ๆๆฌๅค็ๅบ็กๅทฅๅ ท
ไปฅไธๅทฅๅ ทๆฏๆๆฌๅค็ๆตๆฐด็บฟ็ๅบ็ก็ปไปถ๏ผไธ grep/awk/sed ้ ๅไฝฟ็จๆๆๆดๅผบ๏ผ็ฌฌ4็ซ ่ฏฆ็ป่ฎฒ่งฃ๏ผใ
# wc๏ผ็ป่ฎก่กๆฐ/ๅ่ฏๆฐ/ๅญ่ๆฐ
wc -l access.log # ่กๆฐ
wc -w document.txt # ๅ่ฏๆฐ
wc -c binary.dat # ๅญ่ๆฐ
# sort๏ผๆๅบ
sort -n numbers.txt # ๆๆฐๅญๆๅบ
sort -rn numbers.txt # ้ๅบๆฐๅญๆๅบ
sort -k2 -t: /etc/passwd # ๆ็ฌฌ2ๅญๆฎตๆๅบ๏ผๅ้็ฌฆไธบ:
sort -u names.txt # ๆๅบๅนถๅป้
# uniq๏ผๅป้๏ผ้้
ๅ sort ไฝฟ็จ๏ผ
sort access.log | uniq -c | sort -rn | head -20 # ็ป่ฎกๆ้ข็น็่ก
# cut๏ผๅญๆฎตๅๅฒ
cut -d: -f1 /etc/passwd # ๅไปฅ:ๅๅฒ็็ฌฌ1ๅ๏ผ็จๆทๅ๏ผ
cut -c1-10 file.txt # ๅๆฏ่กๅ10ไธชๅญ็ฌฆ
# paste๏ผๆจชๅๅๅนถๆไปถ
paste file1.txt file2.txt # ไธคๆไปถๆๅๅๅนถ๏ผTab ๅ้
paste -d, file1.txt file2.txt # ็จ้ๅทๅ้
# tee๏ผๅๆถ่พๅบๅฐๅฑๅนๅๆไปถ
ls -la | tee listing.txt # ๆพ็คบๅนถไฟๅญ
echo "start" | tee -a build.log # ่ฟฝๅ ๆจกๅผ
{{else}}
Chapter 3: File Operations Mastery
Deep options and safe usage of cp/mv/rm, complete tar guide (gzip/bzip2/xz/zstd comparison), rsync incremental sync and remote backup, xargs batch processing, watch periodic monitoring, inotifywait real-time file change detection.
cp Deep Dive
cp is the most commonly used file copy command, but most users ignore its powerful options. Mastering these options lets you copy files more precisely and safely.
Core Options Explained
- -a (archive mode): Equivalent to
-dR --preserve=all. Preserves all metadata (permissions, timestamps, ownership, symlink structure). Best for full backups. - -r / -R (recursive): Copy directories and all their contents. Already included in -a.
- -p (preserve attributes): Preserves permissions, timestamps, and ownership but not symlinks like -a does.
- -u (update only): Only copy when the source is newer than the destination, or when the destination is missing. Great for incremental syncs.
- --backup: Backs up the destination before overwriting. Appends ~ by default; customize with --suffix=.bak.
- -l (hard link): Creates hard links instead of copying โ saves disk space.
- -s (symbolic link): Creates symlinks instead of copying.
- -L / -P: How to handle symlinks in the source. -L follows links (copies real files), -P preserves the link itself.
# Archive copy: preserve all attributes (great for server migrations)
cp -a /var/www/html/ /backup/html-20260425/
# Only copy files newer than destination (incremental)
cp -u /src/*.conf /dst/
# Auto-backup before overwriting, old file becomes .bak
cp --backup=numbered --suffix=.bak nginx.conf /etc/nginx/nginx.conf
# Copy directory following all symlinks (copies real files)
cp -rL /opt/app/ /backup/app/
# Copy directory preserving symlinks as-is
cp -ra /opt/app/ /backup/app/
-a vs -p difference: -p preserves only permissions, timestamps, and ownership. -a additionally preserves symlink structure (-d) and extended attributes (xattr). Use -a for backups, -p for everyday copies.
mv and Renaming
mv is atomic on the same filesystem (just modifies a directory entry, no data movement), but degrades to cp + rm across filesystems. Moving large files across disks saturates I/O and risks data loss if interrupted.
# Move file (same partition: instant; cross-partition: actual copy)
mv largefile.tar.gz /mnt/backup/
# Rename
mv old-name.txt new-name.txt
# Prompt before overwriting
mv -i source.txt target.txt
# Never overwrite existing destination
mv -n draft.txt production.txt
# Batch rename: change .jpeg to .jpg (requires rename tool)
rename 's/\.jpeg$/.jpg/' *.jpeg
# Batch rename with bash loop (no extra tools needed)
for f in *.log.1; do mv "$f" "${f%.1}.old"; done
Risk of cross-filesystem mv with large files: mv across disks equals cp + rm. If interrupted by a power cut or Ctrl-C, the source file is untouched but the destination is incomplete. For large cross-disk transfers, use rsync --remove-source-files and verify after completion.
rm Safety Practices
rm -rf is one of the most dangerous commands in Linux. No recycle bin, no confirmation, no undo.
Safer Alternatives
- trash-cli: Moves files to ~/.local/share/Trash. Recoverable with
trash-restore. - safe-rm: Blacklist-based protection to prevent deletion of /, /etc, and other critical paths.
- rm -i: Asks for confirmation per file โ good for sensitive operations.
- rmdir: Only removes empty directories โ naturally prevents accidental deletion.
- rm -d: Same as rmdir, removes empty directories only.
# Install trash-cli
sudo apt install trash-cli # Debian/Ubuntu
# Safe delete (recoverable)
trash-put /tmp/old-logs/
# List trash contents
trash-list
# Restore a file
trash-restore
# Empty trash
trash-empty
# Confirm each file before deletion
rm -ri ./temp-dir/
# Remove only empty directories (including empty subdirs)
find . -type d -empty -delete
Production rule: Before rm -rf in a script, always print the path or use
echo rm -rfto simulate. Never writerm -rf "$VAR/"without first validating VAR is non-empty โ an empty variable turns this intorm -rf /.
mkdir and Directory Operations
# Recursively create nested directories
mkdir -p /opt/app/{logs,conf,data,tmp}
# install -d: create directory and set permissions in one step
install -d -m 755 -o www-data -g www-data /var/www/html
# View directory tree
tree -L 2 /opt/app/
# Show file sizes
tree -sh /opt/app/
# Show only directories
tree -d /etc/
touch โ More Than Creating Files
touch isn't just for creating empty files. It can precisely control file timestamps, which matters for Makefiles and build systems.
# Create empty file
touch newfile.txt
# Create multiple files
touch file{01..10}.txt
# Update timestamps to now (file must exist; -c skips creation)
touch -c existing.txt
# Update only access time (atime)
touch -a logfile.txt
# Update only modification time (mtime)
touch -m config.conf
# Set a specific timestamp
touch -d "2026-01-01 00:00:00" archive.tar.gz
# Copy timestamps from one file to another
touch -r reference.txt target.txt
tar Complete Guide
tar (Tape ARchive) is the core Linux archiving tool. It doesn't compress by itself โ compression is done by external programs (gzip/bzip2/xz/zstd), invoked via the -z/-j/-J/--zstd flags.
Common Operations Quick Reference
# Create gzip archive (most common)
tar -czf archive.tar.gz /path/to/dir/
# Create with verbose output (list each file)
tar -czvf archive.tar.gz /path/to/dir/
# Extract to current directory
tar -xzf archive.tar.gz
# Extract to a specific directory
tar -xzf archive.tar.gz -C /opt/
# List archive contents without extracting
tar -tzf archive.tar.gz
# Extract a single file from the archive
tar -xzf archive.tar.gz path/inside/archive/file.conf
# Exclude specific directories and patterns
tar -czf backup.tar.gz /var/www/ \
--exclude='/var/www/cache' \
--exclude='*.log' \
--exclude='.git'
# bzip2 compression (higher ratio, slower)
tar -cjf archive.tar.bz2 /path/to/dir/
# xz compression (best ratio, slowest)
tar -cJf archive.tar.xz /path/to/dir/
# zstd compression (modern: fast + good ratio)
tar --zstd -cf archive.tar.zst /path/to/dir/
# Incremental backup: only pack files newer than given date
tar -czf incremental.tar.gz \
--newer-mtime="2026-04-01" /var/data/
Compression Algorithm Comparison
| Algorithm | Extension | Ratio | Speed | Best For |
|---|---|---|---|---|
| gzip | .tar.gz / .tgz | Medium | Fast | Daily use, best compatibility |
| bzip2 | .tar.bz2 | High | Slow | Source releases (legacy) |
| xz | .tar.xz | Highest | Very slow | Kernel/package releases, storage-first |
| zstd | .tar.zst | High | Very fast | Modern default: best speed/ratio balance |
Recommendation: Choose zstd for new projects. Developed by Facebook, zstd achieves near-xz compression ratios at gzip-comparable decompression speed. Linux kernel 5.9+ uses zstd as the default kernel image compression format.
rsync: Incremental Sync and Remote Backup
rsync is the standard tool for production backups. It compares checksums and timestamps on both ends, transferring only the differences โ drastically reducing network overhead.
Key Options
- -a: Archive mode, equivalent to -rlptgoD. Preserves permissions, timestamps, symlinks, device files.
- -v: Verbose โ shows each file being transferred.
- -z: Compress during transfer. Useful on slow networks.
- --delete: Remove files at the destination that don't exist at the source (mirror mode).
- --progress: Show per-file transfer progress.
- --dry-run / -n: Simulate execution โ show what would happen without making changes.
- --exclude: Exclude matching files/directories.
- --bwlimit: Limit bandwidth in KB/s.
# Local directory sync (trailing slash on src = "contents of dir", no slash = "the dir itself")
rsync -avz /var/www/html/ /backup/html/
# Dry run first, then execute for real
rsync -avz --dry-run /var/www/ /backup/www/
rsync -avz /var/www/ /backup/www/
# Mirror sync (delete files at destination not in source)
rsync -avz --delete /var/www/ /backup/www/
# Remote sync over SSH
rsync -avz -e ssh /local/data/ user@remote:/backup/data/
# Custom SSH port
rsync -avz -e "ssh -p 2222" /local/ user@host:/remote/
# Rate-limited transfer (1 MB/s) to avoid saturating bandwidth
rsync -avz --bwlimit=1024 /data/ user@host:/data/
# Exclude multiple patterns
rsync -avz \
--exclude='*.log' \
--exclude='cache/' \
--exclude='.git/' \
/var/www/ /backup/www/
Production Backup Script Example
#!/bin/bash
# backup.sh โ daily incremental backup script
set -euo pipefail
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
SRC="/var/www"
BACKUP_ROOT="/mnt/backup"
DEST="${BACKUP_ROOT}/${TIMESTAMP}"
LATEST="${BACKUP_ROOT}/latest"
LOG="/var/log/backup.log"
echo "[${TIMESTAMP}] Starting backup..." | tee -a "$LOG"
# --link-dest enables snapshot-style incremental backups
# (only changed files consume new disk space)
rsync -avz --delete \
--link-dest="${LATEST}" \
--exclude='*.tmp' \
--exclude='cache/' \
"${SRC}/" "${DEST}/" 2>&1 | tee -a "$LOG"
# Update the 'latest' symlink
rm -f "${LATEST}"
ln -s "${DEST}" "${LATEST}"
echo "[$(date +%Y%m%d_%H%M%S)] Backup complete: ${DEST}" | tee -a "$LOG"
# Keep last 30 days of backups, delete older ones
find "${BACKUP_ROOT}" -maxdepth 1 -type d -name "20*" \
-mtime +30 -exec rm -rf {} +
How --link-dest works: rsync creates hard links for files that are identical to the previous backup, rather than copying them. This means each backup is a "full snapshot" but only the changed files consume additional disk space. 30 daily backups might use only 2โ3x the storage of the original data.
xargs: Batch Processing Power
xargs converts standard input into command arguments, bridging the gap where Linux pipes can't pass arguments directly to commands.
# Basic: pass find results to rm
find /tmp -name "*.tmp" | xargs rm -f
# -I{} placeholder: insert argument at a specific position
find . -name "*.log" | xargs -I{} cp {} /backup/
# Handle filenames with spaces: use find -print0
find . -name "*.txt" -print0 | xargs -0 wc -l
# -P parallel execution (4 processes simultaneously)
find . -name "*.jpg" -print0 | xargs -0 -P4 -I{} convert {} -resize 800x {}-resized.jpg
# -n: number of arguments per invocation (2 at a time)
echo "a b c d e f" | xargs -n2 echo
# Find files with TODO and count lines
grep -rl "TODO" ./src | xargs wc -l
# Safely delete many files (avoids "argument list too long")
find /var/log -name "*.gz" -mtime +90 | xargs -r rm -f
# Interactive confirmation (-p prompts before each execution)
find . -name "*.bak" | xargs -p rm
Filenames with spaces require -print0 + -0: xargs splits on whitespace by default, so filenames with spaces will cause errors. Always use the find -print0 | xargs -0 combination when working with real file paths.
watch: Periodic Real-time Monitoring
watch repeatedly executes a command at a fixed interval and refreshes the screen โ a simple but powerful real-time monitoring tool.
# Refresh disk usage every 2 seconds
watch -n 2 df -h
# Highlight changes (-d)
watch -d -n 1 'ss -tnp'
# Monitor processes (simplified top alternative)
watch -n 1 'ps aux --sort=-%cpu | head -15'
# Monitor file count in a directory
watch -n 5 'ls -l /var/spool/mail/ | wc -l'
# Watch nginx access log line count
watch -n 2 'wc -l /var/log/nginx/access.log'
# Monitor network connection stats
watch -n 2 'ss -s'
# Hide the header bar
watch --no-title -n 1 uptime
inotifywait: Real-time File Change Detection
inotifywait uses the Linux kernel's inotify interface to watch filesystem events, triggering immediately when files are created, modified, or deleted. It's a foundation for automated deployments and hot-reload configuration systems.
# Install
sudo apt install inotify-tools
# Continuously monitor a directory (-m continuous, -r recursive, -e events)
inotifywait -m -r -e create,modify,delete /etc/nginx/
# Monitor with formatted output
inotifywait -m -r \
--format '%T %w%f %e' \
--timefmt '%Y-%m-%d %H:%M:%S' \
-e create,modify,delete \
/var/www/html/
# Auto-reload nginx when config file changes
inotifywait -m -e modify /etc/nginx/nginx.conf |
while read -r path action file; do
echo "Config changed: $file ($action)"
nginx -t && systemctl reload nginx
done
# Auto-sync: trigger rsync when local directory changes
inotifywait -m -r -e create,modify,delete /var/www/html/ |
while read -r dir event file; do
echo "[$event] $dir$file"
rsync -az /var/www/html/ user@remote:/var/www/html/
done
inotify kernel limits: The default max watchers is 8192 (/proc/sys/fs/inotify/max_user_watches). For large codebases, increase it:
echo 524288 | sudo tee /proc/sys/fs/inotify/max_user_watches, and persist in /etc/sysctl.conf.
File Lookup Tools Compared
| Command | Search Scope | Speed | Notes |
|---|---|---|---|
| which | PATH variable | Instant | Finds executable path in PATH only |
| type | Shell built-in | Instant | Distinguishes builtins/aliases/functions/executables |
| whereis | Fixed path list | Fast | Finds binary, man page, and source simultaneously |
| locate | Database index | Very fast | Requires updatedb; new files have a delay |
| find | Real-time filesystem scan | Slow (large dirs) | Most powerful; supports time/permission/size criteria |
# which: find executable location
which python3
# /usr/bin/python3
# type: classify a command
type ls
# ls is aliased to `ls --color=auto'
type cd
# cd is a shell builtin
# whereis: find binary, man page, and source at once
whereis nginx
# nginx: /usr/sbin/nginx /etc/nginx /usr/share/man/man8/nginx.8.gz
# locate: database-based search, very fast
sudo updatedb # update the database first
locate "*.conf" | grep nginx
# find: real-time precise search
find /etc -name "*.conf" -mtime -7 # configs modified in last 7 days
find /var/log -size +100M # log files over 100MB
find . -perm /4000 # files with SUID bit (security audit)
Text Processing Foundation Tools
# wc: count lines/words/bytes
wc -l access.log # line count
wc -w document.txt # word count
wc -c binary.dat # byte count
# sort
sort -n numbers.txt # numeric sort
sort -rn numbers.txt # reverse numeric sort
sort -k2 -t: /etc/passwd # sort by field 2, delimiter :
sort -u names.txt # sort and deduplicate
# uniq: deduplicate (requires sorted input)
sort access.log | uniq -c | sort -rn | head -20 # top 20 most frequent lines
# cut: field extraction
cut -d: -f1 /etc/passwd # first column (username) with : delimiter
cut -c1-10 file.txt # first 10 characters of each line
# paste: horizontal file merge
paste file1.txt file2.txt # merge columns with tab
paste -d, file1.txt file2.txt # merge with comma delimiter
# tee: output to both screen and file
ls -la | tee listing.txt # display and save
echo "start" | tee -a build.log # append mode
{{end}}
Previous
โ Ch2: Filesystem Deep Dive
Next
Ch4: Text Processing Tools โ