Geradores e iteradores Python estão entre os recursos mais poderosos, mas muitas vezes incompreendidos. Em 2026, os geradores alimentam E/S assíncrona, pipelines de dados e processamento com uso eficiente de memória. Este guia completo cobre geradores, iteradores, expressões de rendimento, send() e aplicações do mundo real.
📋 Table of Contents
Iteradores – O Protocolo
# Any object with __iter__ and __next__ is an iterator
class CountDown:
def __init__(self, start: int):
self.current = start
def __iter__(self):
return self # iterator returns itself
def __next__(self):
if self.current <= 0:
raise StopIteration
value = self.current
self.current -= 1
return value
# Usage
for n in CountDown(5):
print(n) # 5, 4, 3, 2, 1
# Python's for loop is syntactic sugar for:
# it = iter(countdown)
# while True:
# try: n = next(it)
# except StopIteration: break
# print(n)
Geradores – A Maneira Simples
from typing import Generator, Iterator
# Generator function — uses yield instead of return
def count_down(start: int) -> Generator[int, None, None]:
while start > 0:
yield start
start -= 1
for n in count_down(5):
print(n)
# Generator expression (like list comprehension but lazy)
squares_gen = (x ** 2 for x in range(1_000_000)) # doesn't compute all at once
first_ten = list(squares_gen)[:10]
# vs list comprehension (computes everything immediately)
squares_list = [x ** 2 for x in range(1_000_000)] # uses ~8MB memory!
# Memory comparison
import sys
gen = (x ** 2 for x in range(1_000_000))
lst = [x ** 2 for x in range(1_000_000)]
print(sys.getsizeof(gen)) # ~200 bytes
print(sys.getsizeof(lst)) # ~8MB
Geradores Infinitos
from itertools import islice
def fibonacci() -> Generator[int, None, None]:
a, b = 0, 1
while True: # infinite!
yield a
a, b = b, a + b
def primes() -> Generator[int, None, None]:
def is_prime(n: int) -> bool:
if n < 2: return False
return all(n % i != 0 for i in range(2, int(n**0.5) + 1))
n = 2
while True:
if is_prime(n):
yield n
n += 1
# Take first N from infinite generator
first_10_fibs = list(islice(fibonacci(), 10))
first_20_primes = list(islice(primes(), 20))
print(first_10_fibs)
print(first_20_primes)
Pipelines Geradores para Processamento de Dados
import csv
from pathlib import Path
def read_large_csv(filepath: str) -> Generator[dict, None, None]:
with open(filepath, newline='') as f:
reader = csv.DictReader(f)
for row in reader:
yield row
def filter_rows(rows, **criteria) -> Generator[dict, None, None]:
for row in rows:
if all(row.get(k) == v for k, v in criteria.items()):
yield row
def transform(rows, **transforms) -> Generator[dict, None, None]:
for row in rows:
transformed = {**row}
for key, func in transforms.items():
transformed[key] = func(row[key])
yield transformed
def batch(rows, size: int = 100) -> Generator[list, None, None]:
batch_data = []
for row in rows:
batch_data.append(row)
if len(batch_data) >= size:
yield batch_data
batch_data = []
if batch_data:
yield batch_data
# Pipeline processes 10GB file with constant memory
pipeline = (
read_large_csv("10gb_sales.csv")
|> filter_rows(status="completed", year="2026")
|> transform(amount=float, quantity=int)
|> batch(size=1000)
)
# Actually in Python:
rows = read_large_csv("10gb_sales.csv")
filtered = filter_rows(rows, status="completed")
transformed = transform(filtered, amount=float)
batches = batch(transformed, size=1000)
for batch_data in batches:
db.bulk_insert("sales", batch_data)
print(f"Inserted {len(batch_data)} rows")
rendimento de – Delegando Geradores
def flatten(nested) -> Generator:
for item in nested:
if isinstance(item, (list, tuple)):
yield from flatten(item) # delegate to sub-generator
else:
yield item
nested = [1, [2, 3, [4, 5]], 6, [[7]]]
print(list(flatten(nested))) # [1, 2, 3, 4, 5, 6, 7]
# Combine multiple generators
def chain(*iterables) -> Generator:
for it in iterables:
yield from it
combined = chain(range(3), range(5, 8), "abc")
print(list(combined)) # [0, 1, 2, 5, 6, 7, 'a', 'b', 'c']
send() — Comunicação bidirecional
def running_average() -> Generator[float, float, None]:
total = 0.0
count = 0
average = 0.0
while True:
value = yield average # yield current average, receive new value
if value is None:
break
total += value
count += 1
average = total / count
gen = running_average()
next(gen) # prime the generator (advance to first yield)
print(gen.send(10)) # 10.0
print(gen.send(20)) # 15.0
print(gen.send(30)) # 20.0
print(gen.send(40)) # 25.0
Geradores Assíncronos
import asyncio
import httpx
async def fetch_pages(urls: list[str]) -> AsyncGenerator[dict, None]:
async with httpx.AsyncClient() as client:
for url in urls:
r = await client.get(url)
yield {"url": url, "status": r.status_code, "data": r.json()}
async def main():
urls = ["https://api.example.com/page/1",
"https://api.example.com/page/2",
"https://api.example.com/page/3"]
async for page in fetch_pages(urls):
print(f"Fetched {page['url']}: {page['status']}")
process(page['data'])
asyncio.run(main())
# Async generator expression
squares = (x ** 2 async for x in async_range(100))
itertools — Utilitários geradores
from itertools import (
count, cycle, repeat, # infinite
chain, islice, takewhile, # finite
groupby, product, # combinatorics
accumulate, starmap # transforms
)
# Infinite counter
for i in islice(count(10, 2), 5):
print(i) # 10, 12, 14, 16, 18
# Group consecutive elements
data = [{"date": "2026-01", "v": 10}, {"date": "2026-01", "v": 20},
{"date": "2026-02", "v": 30}]
for month, rows in groupby(data, key=lambda x: x["date"]):
total = sum(r["v"] for r in rows)
print(f"{month}: {total}")
# Running total
sales = [100, 200, 150, 300, 250]
running = list(accumulate(sales)) # [100, 300, 450, 750, 1000]
# Cartesian product
for size, color in product(["S","M","L"], ["red","blue"]):
print(f"{size}-{color}")
Os geradores Python em 2026 são essenciais para processamento de dados com uso eficiente de memória, sequências infinitas e E/S assíncrona. Use funções geradoras para iteradores personalizados, expressões geradoras para transformações lentas, rendimento de para delegação e ferramentas iterativas para padrões combinatórios comuns. Os benefícios de memória por si só justificam aprendê-los para qualquer trabalho de processamento de dados.
🔗 Share this article
✍️ Leave a Comment