Python Regex Complete Guide 2026: Muster, Gruppen und reale Verwendung

⏱️5 min read · 956 words

Das re-Modul von Python ist eines der leistungsstärksten Tools im Toolkit eines Entwicklers. Auch im Jahr 2026 bleiben reguläre Ausdrücke für die Textanalyse, Validierung, Datenextraktion und Transformation unverzichtbar. Dieser Leitfaden behandelt Python-Regex von den Grundlagen bis hin zu fortgeschrittenen Mustern, die jeder Entwickler kennen sollte.

📋 Table of Contents

Kernfunktionen
Gruppen und benannte Gruppen
Lookahead und Lookbehind
Flaggen und Zusammenstellung
Muster aus der realen Welt

Kernfunktionen

import re

text = "Contact us at support@example.com or sales@company.org"

# re.search() — find first match anywhere in string
m = re.search(r'[w.+-]+@[w-]+.[a-z]{2,}', text)
if m: print(m.group())   # "support@example.com"

# re.match() — match at START of string only
m = re.match(r'Contact', text)  # matches
m = re.match(r'support', text)  # None (not at start)

# re.findall() — return all matches as list
emails = re.findall(r'[w.+-]+@[w-]+.[a-z]{2,}', text)
print(emails)  # ['support@example.com', 'sales@company.org']

# re.finditer() — return match objects (for groups + positions)
for match in re.finditer(r'[w.+-]+@[w-]+.[a-z]{2,}', text):
    print(f"{match.group()} at position {match.start()}-{match.end()}")

# re.sub() — replace matches
clean = re.sub(r'[w.+-]+@[w-]+.[a-z]{2,}', '[EMAIL]', text)
print(clean)  # "Contact us at [EMAIL] or [EMAIL]"

# re.sub() with function
def capitalize_words(m): return m.group().upper()
result = re.sub(r'[a-z]+', capitalize_words, "hello world foo bar")

# re.split()
parts = re.split(r'[,;s]+', "one, two;three four")
print(parts)  # ['one', 'two', 'three', 'four']

Gruppen und benannte Gruppen

# Capturing groups ()
m = re.search(r'(d{4})-(d{2})-(d{2})', '2026-05-29')
if m:
    year, month, day = m.groups()
    print(year, month, day)  # 2026 05 29

# Named groups (?P<name>...)
m = re.search(r'(?P<year>d{4})-(?P<month>d{2})-(?P<day>d{2})', '2026-05-29')
if m:
    print(m.group('year'))   # 2026
    print(m.groupdict())     # {'year': '2026', 'month': '05', 'day': '29'}

# Non-capturing group (?:...) — group without capturing
m = re.search(r'(?:https?|ftp)://(S+)', 'Visit https://example.com/path')
if m:
    print(m.group(1))  # 'example.com/path' (group 1, not the scheme)

# Backreferences  or g<name>
# Match repeated words
m = re.search(r'(w+)s+', 'the the cat sat', re.IGNORECASE)
if m: print(f"Repeated word: {m.group(1)}")

# Substitution with groups
result = re.sub(r'(w+)s(w+)', r' ', 'first last')
print(result)  # 'last first'

Lookahead und Lookbehind

text = "price: $100, discount: $20, total: $80"

# Positive lookahead (?=...) — match if followed by
prices = re.findall(r'$d+(?=,|s*$)', text)  # prices before comma or end

# Negative lookahead (?!...)
# Match words not followed by '.'
words = re.findall(r'w+(?!.)', 'Hello. World! Python. Language')

# Positive lookbehind (?<=...) — match if preceded by
amounts = re.findall(r'(?<=$)d+', text)  # numbers after $
print(amounts)  # ['100', '20', '80']

# Negative lookbehind (?<!...)
# Match digits not preceded by '-'
nums = re.findall(r'(?<!-)d+', "3 -4 10 -5")
print(nums)  # ['3', '10']

# Practical: extract quoted strings without quotes
strings = re.findall(r'(?<=")[^"]+(?=")', 'He said "hello" and "goodbye"')
print(strings)  # ['hello', 'goodbye']

Flaggen und Zusammenstellung

import re

# Common flags
re.IGNORECASE  # or re.I — case insensitive
re.MULTILINE   # or re.M — ^ and $ match line boundaries
re.DOTALL      # or re.S — . matches 
 too
re.VERBOSE     # or re.X — allow comments and whitespace

# Verbose pattern (readable)
email_pattern = re.compile(
    r'[w.+-]+@[w-]+(?:.[w-]+)*.[a-z]{2,}',
    re.IGNORECASE
)

emails = email_pattern.findall("Contact: Alice@Example.COM, bob@test.co.uk")
print(emails)

# Compile for performance (reuse same pattern)
phone_re = re.compile(r'+?1?s*(?d{3})?[s.-]d{3}[s.-]d{4}')
numbers = phone_re.findall("Call +1 (555) 123-4567 or 800.555.0100")

Muster aus der realen Welt

import re

# URL parsing
URL_RE = re.compile(
    r'https?://'
    r'(?:www.)?'
    r'(?P<domain>[a-zA-Z0-9.-]+)'
    r'(?P<port>:d+)?'
    r'(?P<path>/[^s?#]*)?'
    r'(?P<query>?[^s#]*)?'
    r'(?P<fragment>#S*)?'
)
m = URL_RE.match('https://api.example.com:8080/v1/users?page=1#top')
if m:
    print(m.groupdict())

# Parse log lines
LOG_RE = re.compile(
    r'(?P<timestamp>d{4}-d{2}-d{2} d{2}:d{2}:d{2})'
    r' [(?P<level>DEBUG|INFO|WARNING|ERROR)]'
    r' (?P<message>.+)'
)
for line in log_file:
    if m := LOG_RE.match(line):  # walrus operator
        process_log(m.group('level'), m.group('message'))

# Extract code blocks from markdown
code_blocks = re.findall(r'```(w+)?
(.*?)```', markdown_text, re.DOTALL)
for lang, code in code_blocks:
    print(f"Language: {lang}, Code length: {len(code)}")

# Validate passwords (must have upper, lower, digit, special)
def is_strong_password(pwd: str) -> bool:
    patterns = [
        r'[A-Z]',      # uppercase
        r'[a-z]',      # lowercase
        r'd',         # digit
        r'[!@#$%^&*]', # special
    ]
    return (len(pwd) >= 8 and all(re.search(p, pwd) for p in patterns))

# HTML tag removal
clean_html = re.sub(r'<[^>]+>', '', '<p>Hello <b>world</b></p>')
print(clean_html)  # "Hello world"

# CamelCase to snake_case
def camel_to_snake(name: str) -> str:
    s = re.sub(r'([A-Z]+)([A-Z][a-z])', r'_', name)
    return re.sub(r'([a-zd])([A-Z])', r'_', s).lower()

print(camel_to_snake('CamelCaseString'))  # 'camel_case_string'
print(camel_to_snake('HTTPSConnection'))  # 'https_connection'

Python-Regex im Jahr 2026: unverzichtbar für jede Textverarbeitungsaufgabe. Beherrschen Sie die Kernfunktionen (Suche, Findall, Sub), verstehen Sie Gruppen und benannte Gruppen für die Extraktion, verwenden Sie Lookaheads für kontextbezogenen Abgleich und kompilieren Sie immer häufig verwendete Muster. Ziehen Sie für komplexe Parsing-Aufgaben Bibliotheken wie Pyparsing oder Lark-Parser in Betracht – Regex wird für tief verschachtelte Strukturen unhandlich.

🔗 Share this article

X / Twitter Facebook WhatsApp LinkedIn Telegram

Kernfunktionen

Gruppen und benannte Gruppen

Lookahead und Lookbehind

Flaggen und Zusammenstellung

Muster aus der realen Welt

✍️ Leave a Comment

Stay ahead of tech trends