Python Regex Complete Guide 2026: Patterns, Groups and Real-World Use

⏱️5 min read · 1,073 words

Python’s re module is one of the most powerful tools in a developer’s toolkit. In 2026, regular expressions remain essential for text parsing, validation, data extraction, and transformation. This guide covers Python regex from basics to advanced patterns that every developer should know.

📋 Table of Contents

Core Functions
Groups and Named Groups
Lookahead and Lookbehind
Flags and Compilation
Real-World Patterns

Core Functions

import re

text = "Contact us at support@example.com or sales@company.org"

# re.search() — find first match anywhere in string
m = re.search(r'[w.+-]+@[w-]+.[a-z]{2,}', text)
if m: print(m.group())   # "support@example.com"

# re.match() — match at START of string only
m = re.match(r'Contact', text)  # matches
m = re.match(r'support', text)  # None (not at start)

# re.findall() — return all matches as list
emails = re.findall(r'[w.+-]+@[w-]+.[a-z]{2,}', text)
print(emails)  # ['support@example.com', 'sales@company.org']

# re.finditer() — return match objects (for groups + positions)
for match in re.finditer(r'[w.+-]+@[w-]+.[a-z]{2,}', text):
    print(f"{match.group()} at position {match.start()}-{match.end()}")

# re.sub() — replace matches
clean = re.sub(r'[w.+-]+@[w-]+.[a-z]{2,}', '[EMAIL]', text)
print(clean)  # "Contact us at [EMAIL] or [EMAIL]"

# re.sub() with function
def capitalize_words(m): return m.group().upper()
result = re.sub(r'[a-z]+', capitalize_words, "hello world foo bar")

# re.split()
parts = re.split(r'[,;s]+', "one, two;three four")
print(parts)  # ['one', 'two', 'three', 'four']

Groups and Named Groups

# Capturing groups ()
m = re.search(r'(d{4})-(d{2})-(d{2})', '2026-05-29')
if m:
    year, month, day = m.groups()
    print(year, month, day)  # 2026 05 29

# Named groups (?P<name>...)
m = re.search(r'(?P<year>d{4})-(?P<month>d{2})-(?P<day>d{2})', '2026-05-29')
if m:
    print(m.group('year'))   # 2026
    print(m.groupdict())     # {'year': '2026', 'month': '05', 'day': '29'}

# Non-capturing group (?:...) — group without capturing
m = re.search(r'(?:https?|ftp)://(S+)', 'Visit https://example.com/path')
if m:
    print(m.group(1))  # 'example.com/path' (group 1, not the scheme)

# Backreferences  or g<name>
# Match repeated words
m = re.search(r'(w+)s+', 'the the cat sat', re.IGNORECASE)
if m: print(f"Repeated word: {m.group(1)}")

# Substitution with groups
result = re.sub(r'(w+)s(w+)', r' ', 'first last')
print(result)  # 'last first'

Lookahead and Lookbehind

text = "price: $100, discount: $20, total: $80"

# Positive lookahead (?=...) — match if followed by
prices = re.findall(r'$d+(?=,|s*$)', text)  # prices before comma or end

# Negative lookahead (?!...)
# Match words not followed by '.'
words = re.findall(r'w+(?!.)', 'Hello. World! Python. Language')

# Positive lookbehind (?<=...) — match if preceded by
amounts = re.findall(r'(?<=$)d+', text)  # numbers after $
print(amounts)  # ['100', '20', '80']

# Negative lookbehind (?<!...)
# Match digits not preceded by '-'
nums = re.findall(r'(?<!-)d+', "3 -4 10 -5")
print(nums)  # ['3', '10']

# Practical: extract quoted strings without quotes
strings = re.findall(r'(?<=")[^"]+(?=")', 'He said "hello" and "goodbye"')
print(strings)  # ['hello', 'goodbye']

Flags and Compilation

import re

# Common flags
re.IGNORECASE  # or re.I — case insensitive
re.MULTILINE   # or re.M — ^ and $ match line boundaries
re.DOTALL      # or re.S — . matches 
 too
re.VERBOSE     # or re.X — allow comments and whitespace

# Verbose pattern (readable)
email_pattern = re.compile(
    r'[w.+-]+@[w-]+(?:.[w-]+)*.[a-z]{2,}',
    re.IGNORECASE
)

emails = email_pattern.findall("Contact: Alice@Example.COM, bob@test.co.uk")
print(emails)

# Compile for performance (reuse same pattern)
phone_re = re.compile(r'+?1?s*(?d{3})?[s.-]d{3}[s.-]d{4}')
numbers = phone_re.findall("Call +1 (555) 123-4567 or 800.555.0100")

Real-World Patterns

import re

# URL parsing
URL_RE = re.compile(
    r'https?://'
    r'(?:www.)?'
    r'(?P<domain>[a-zA-Z0-9.-]+)'
    r'(?P<port>:d+)?'
    r'(?P<path>/[^s?#]*)?'
    r'(?P<query>?[^s#]*)?'
    r'(?P<fragment>#S*)?'
)
m = URL_RE.match('https://api.example.com:8080/v1/users?page=1#top')
if m:
    print(m.groupdict())

# Parse log lines
LOG_RE = re.compile(
    r'(?P<timestamp>d{4}-d{2}-d{2} d{2}:d{2}:d{2})'
    r' [(?P<level>DEBUG|INFO|WARNING|ERROR)]'
    r' (?P<message>.+)'
)
for line in log_file:
    if m := LOG_RE.match(line):  # walrus operator
        process_log(m.group('level'), m.group('message'))

# Extract code blocks from markdown
code_blocks = re.findall(r'```(w+)?
(.*?)```', markdown_text, re.DOTALL)
for lang, code in code_blocks:
    print(f"Language: {lang}, Code length: {len(code)}")

# Validate passwords (must have upper, lower, digit, special)
def is_strong_password(pwd: str) -> bool:
    patterns = [
        r'[A-Z]',      # uppercase
        r'[a-z]',      # lowercase
        r'd',         # digit
        r'[!@#$%^&*]', # special
    ]
    return (len(pwd) >= 8 and all(re.search(p, pwd) for p in patterns))

# HTML tag removal
clean_html = re.sub(r'<[^>]+>', '', '<p>Hello <b>world</b></p>')
print(clean_html)  # "Hello world"

# CamelCase to snake_case
def camel_to_snake(name: str) -> str:
    s = re.sub(r'([A-Z]+)([A-Z][a-z])', r'_', name)
    return re.sub(r'([a-zd])([A-Z])', r'_', s).lower()

print(camel_to_snake('CamelCaseString'))  # 'camel_case_string'
print(camel_to_snake('HTTPSConnection'))  # 'https_connection'

Python regex in 2026: essential for any text processing task. Master the core functions (search, findall, sub), understand groups and named groups for extraction, use lookaheads for context-aware matching, and always compile frequently-used patterns. For complex parsing tasks, consider libraries like pyparsing or lark-parser — regex gets unwieldy for deeply nested structures.

📚 You might also like

🔗 Share this article

X / Twitter Facebook WhatsApp LinkedIn Telegram