🌐 Detecting your location…
📢 Advertisement — Configure AdSense in Appearance → Customize → AdSense Settings

Guia completo do Python Regex 2026: padrões, grupos e uso no mundo real

⏱️5 min read  ·  983 words

O módulo re do Python é uma das ferramentas mais poderosas no kit de ferramentas de um desenvolvedor. Em 2026, as expressões regulares continuam essenciais para análise, validação, extração e transformação de texto. Este guia cobre regex Python desde padrões básicos até padrões avançados que todo desenvolvedor deve conhecer.

Funções principais

import re

text = "Contact us at support@example.com or sales@company.org"

# re.search() — find first match anywhere in string
m = re.search(r'[\w.+-]+@[\w-]+\.[a-z]{2,}', text)
if m: print(m.group())   # "support@example.com"

# re.match() — match at START of string only
m = re.match(r'Contact', text)  # matches
m = re.match(r'support', text)  # None (not at start)

# re.findall() — return all matches as list
emails = re.findall(r'[\w.+-]+@[\w-]+\.[a-z]{2,}', text)
print(emails)  # ['support@example.com', 'sales@company.org']

# re.finditer() — return match objects (for groups + positions)
for match in re.finditer(r'[\w.+-]+@[\w-]+\.[a-z]{2,}', text):
    print(f"{match.group()} at position {match.start()}-{match.end()}")

# re.sub() — replace matches
clean = re.sub(r'[\w.+-]+@[\w-]+\.[a-z]{2,}', '[EMAIL]', text)
print(clean)  # "Contact us at [EMAIL] or [EMAIL]"

# re.sub() with function
def capitalize_words(m): return m.group().upper()
result = re.sub(r'[a-z]+', capitalize_words, "hello world foo bar")

# re.split()
parts = re.split(r'[,;\s]+', "one, two;three four")
print(parts)  # ['one', 'two', 'three', 'four']

Grupos e grupos nomeados

# Capturing groups ()
m = re.search(r'(\d{4})-(\d{2})-(\d{2})', '2026-05-29')
if m:
    year, month, day = m.groups()
    print(year, month, day)  # 2026 05 29

# Named groups (?P<name>...)
m = re.search(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})', '2026-05-29')
if m:
    print(m.group('year'))   # 2026
    print(m.groupdict())     # {'year': '2026', 'month': '05', 'day': '29'}

# Non-capturing group (?:...) — group without capturing
m = re.search(r'(?:https?|ftp)://(\S+)', 'Visit https://example.com/path')
if m:
    print(m.group(1))  # 'example.com/path' (group 1, not the scheme)

# Backreferences  or \g<name>
# Match repeated words
m = re.search(r'(\w+)\s+', 'the the cat sat', re.IGNORECASE)
if m: print(f"Repeated word: {m.group(1)}")

# Substitution with groups
result = re.sub(r'(\w+)\s(\w+)', r' ', 'first last')
print(result)  # 'last first'

Olhar para frente e olhar para trás

text = "price: $100, discount: $20, total: $80"

# Positive lookahead (?=...) — match if followed by
prices = re.findall(r'\$\d+(?=,|\s*$)', text)  # prices before comma or end

# Negative lookahead (?!...)
# Match words not followed by '.'
words = re.findall(r'\w+(?!\.)', 'Hello. World! Python. Language')

# Positive lookbehind (?<=...) — match if preceded by
amounts = re.findall(r'(?<=\$)\d+', text)  # numbers after $
print(amounts)  # ['100', '20', '80']

# Negative lookbehind (?<!...)
# Match digits not preceded by '-'
nums = re.findall(r'(?<!-)\d+', "3 -4 10 -5")
print(nums)  # ['3', '10']

# Practical: extract quoted strings without quotes
strings = re.findall(r'(?<=")[^"]+(?=")', 'He said "hello" and "goodbye"')
print(strings)  # ['hello', 'goodbye']

Bandeiras e Compilação

import re

# Common flags
re.IGNORECASE  # or re.I — case insensitive
re.MULTILINE   # or re.M — ^ and $ match line boundaries
re.DOTALL      # or re.S — . matches 
 too
re.VERBOSE     # or re.X — allow comments and whitespace

# Verbose pattern (readable)
email_pattern = re.compile(
    r'[\w.+-]+@[\w-]+(?:\.[\w-]+)*\.[a-z]{2,}',
    re.IGNORECASE
)

emails = email_pattern.findall("Contact: Alice@Example.COM, bob@test.co.uk")
print(emails)

# Compile for performance (reuse same pattern)
phone_re = re.compile(r'\+?1?\s*\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}')
numbers = phone_re.findall("Call +1 (555) 123-4567 or 800.555.0100")

Padrões do mundo real

import re

# URL parsing
URL_RE = re.compile(
    r'https?://'
    r'(?:www\.)?'
    r'(?P<domain>[a-zA-Z0-9.-]+)'
    r'(?P<port>:\d+)?'
    r'(?P<path>/[^\s?#]*)?'
    r'(?P<query>\?[^\s#]*)?'
    r'(?P<fragment>#\S*)?'
)
m = URL_RE.match('https://api.example.com:8080/v1/users?page=1#top')
if m:
    print(m.groupdict())

# Parse log lines
LOG_RE = re.compile(
    r'(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})'
    r' \[(?P<level>DEBUG|INFO|WARNING|ERROR)\]'
    r' (?P<message>.+)'
)
for line in log_file:
    if m := LOG_RE.match(line):  # walrus operator
        process_log(m.group('level'), m.group('message'))

# Extract code blocks from markdown
code_blocks = re.findall(r'```(\w+)?
(.*?)```', markdown_text, re.DOTALL)
for lang, code in code_blocks:
    print(f"Language: {lang}, Code length: {len(code)}")

# Validate passwords (must have upper, lower, digit, special)
def is_strong_password(pwd: str) -> bool:
    patterns = [
        r'[A-Z]',      # uppercase
        r'[a-z]',      # lowercase
        r'\d',         # digit
        r'[!@#$%^&*]', # special
    ]
    return (len(pwd) >= 8 and all(re.search(p, pwd) for p in patterns))

# HTML tag removal
clean_html = re.sub(r'<[^>]+>', '', '<p>Hello <b>world</b></p>')
print(clean_html)  # "Hello world"

# CamelCase to snake_case
def camel_to_snake(name: str) -> str:
    s = re.sub(r'([A-Z]+)([A-Z][a-z])', r'_', name)
    return re.sub(r'([a-z\d])([A-Z])', r'_', s).lower()

print(camel_to_snake('CamelCaseString'))  # 'camel_case_string'
print(camel_to_snake('HTTPSConnection'))  # 'https_connection'

Regex Python em 2026: essencial para qualquer tarefa de processamento de texto. Domine as funções principais (pesquisa, findall, sub), entenda grupos e grupos nomeados para extração, use lookaheads para correspondência baseada no contexto e sempre compile padrões usados ​​com frequência. Para tarefas de análise complexas, considere bibliotecas como pyparsing ou lark-parser — regex fica difícil de manejar para estruturas profundamente aninhadas.

✍️ Leave a Comment

Your email address will not be published. Required fields are marked *

🌐 Read in:🇬🇧 English🇩🇪 Deutsch🇧🇷 Português🇸🇦 العربية🇮🇳 हिन्दी🇧🇩 বাংলা