Bash Regex Reference

Regular expressions in Bash: =~ operator, POSIX character classes, grep/sed/awk patterns.

=~ Operator

# Match and capture with BASH_REMATCH
str="2024-03-15"
if [[ "$str" =~ ^([0-9]{4})-([0-9]{2})-([0-9]{2})$ ]]; then
  echo "Year:  ${BASH_REMATCH[1]}"   # 2024
  echo "Month: ${BASH_REMATCH[2]}"   # 03
  echo "Day:   ${BASH_REMATCH[3]}"   # 15
fi

# Email validation
email="[email protected]"
if [[ "$email" =~ ^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$ ]]; then
  echo "Valid"
fi

# Negation
[[ ! "$input" =~ ^[0-9]+$ ]] && echo "Not a number"

grep

grep 'pattern' file            # basic match
grep -E '[0-9]+' file          # extended regex
grep -P '\d+' file             # PCRE
grep -i 'pattern' file         # case-insensitive
grep -v 'pattern' file         # invert match
grep -n 'pattern' file         # show line numbers
grep -r 'pattern' ./dir/       # recursive
grep -o 'pattern' file         # print only match
grep -E '^(ERROR|WARN)' app.log

sed

sed 's/foo/bar/g' file                    # replace all
sed -E 's/([0-9]{4})-([0-9]{2})/\2\/\1/' f # swap with groups
sed '/^$/d' file                           # delete empty lines
sed '/pattern/d' file                      # delete matching lines
sed -n '/start/,/end/p' file               # print range

POSIX Character Classes

[:alpha:]   # letters [a-zA-Z]
[:digit:]   # digits [0-9]
[:alnum:]   # alphanumeric
[:space:]   # whitespace
[:upper:]   # uppercase
[:lower:]   # lowercase
[:punct:]   # punctuation

grep '[[:alpha:]]' file
grep '^[[:digit:]]\+$' file
sed 's/[[:space:]]\+/ /g' file

POSIX vs PCRE

# BRE (grep default): . * ^ $ [] \ \(\) \{\}
# ERE (grep -E, awk): . * + ? ^ $ [] | () {}
# PCRE (grep -P):     \d \w \s \b (?:) (?=) (?<=)

# Word boundary
grep -E '\bword\b' file          # ERE
grep -P '\bword\b' file          # PCRE
grep '\' file             # BRE

# Non-greedy (PCRE only)
grep -oP '<.+?>' file