Python-Generatoren und -Iteratoren gehören zu den leistungsstärksten, aber oft missverstandenen Funktionen. Im Jahr 2026 sorgen Generatoren für asynchrone E/A, Datenpipelines und speichereffiziente Verarbeitung. Dieses vollständige Handbuch behandelt Generatoren, Iteratoren, Ertragsausdrücke, send() und reale Anwendungen.
📋 Table of Contents
Iteratoren – Das Protokoll
# Any object with __iter__ and __next__ is an iterator
class CountDown:
def __init__(self, start: int):
self.current = start
def __iter__(self):
return self # iterator returns itself
def __next__(self):
if self.current <= 0:
raise StopIteration
value = self.current
self.current -= 1
return value
# Usage
for n in CountDown(5):
print(n) # 5, 4, 3, 2, 1
# Python's for loop is syntactic sugar for:
# it = iter(countdown)
# while True:
# try: n = next(it)
# except StopIteration: break
# print(n)
Generatoren – der einfache Weg
from typing import Generator, Iterator
# Generator function — uses yield instead of return
def count_down(start: int) -> Generator[int, None, None]:
while start > 0:
yield start
start -= 1
for n in count_down(5):
print(n)
# Generator expression (like list comprehension but lazy)
squares_gen = (x ** 2 for x in range(1_000_000)) # doesn't compute all at once
first_ten = list(squares_gen)[:10]
# vs list comprehension (computes everything immediately)
squares_list = [x ** 2 for x in range(1_000_000)] # uses ~8MB memory!
# Memory comparison
import sys
gen = (x ** 2 for x in range(1_000_000))
lst = [x ** 2 for x in range(1_000_000)]
print(sys.getsizeof(gen)) # ~200 bytes
print(sys.getsizeof(lst)) # ~8MB
Unendliche Generatoren
from itertools import islice
def fibonacci() -> Generator[int, None, None]:
a, b = 0, 1
while True: # infinite!
yield a
a, b = b, a + b
def primes() -> Generator[int, None, None]:
def is_prime(n: int) -> bool:
if n < 2: return False
return all(n % i != 0 for i in range(2, int(n**0.5) + 1))
n = 2
while True:
if is_prime(n):
yield n
n += 1
# Take first N from infinite generator
first_10_fibs = list(islice(fibonacci(), 10))
first_20_primes = list(islice(primes(), 20))
print(first_10_fibs)
print(first_20_primes)
Generatorpipelines für die Datenverarbeitung
import csv
from pathlib import Path
def read_large_csv(filepath: str) -> Generator[dict, None, None]:
with open(filepath, newline='') as f:
reader = csv.DictReader(f)
for row in reader:
yield row
def filter_rows(rows, **criteria) -> Generator[dict, None, None]:
for row in rows:
if all(row.get(k) == v for k, v in criteria.items()):
yield row
def transform(rows, **transforms) -> Generator[dict, None, None]:
for row in rows:
transformed = {**row}
for key, func in transforms.items():
transformed[key] = func(row[key])
yield transformed
def batch(rows, size: int = 100) -> Generator[list, None, None]:
batch_data = []
for row in rows:
batch_data.append(row)
if len(batch_data) >= size:
yield batch_data
batch_data = []
if batch_data:
yield batch_data
# Pipeline processes 10GB file with constant memory
pipeline = (
read_large_csv("10gb_sales.csv")
|> filter_rows(status="completed", year="2026")
|> transform(amount=float, quantity=int)
|> batch(size=1000)
)
# Actually in Python:
rows = read_large_csv("10gb_sales.csv")
filtered = filter_rows(rows, status="completed")
transformed = transform(filtered, amount=float)
batches = batch(transformed, size=1000)
for batch_data in batches:
db.bulk_insert("sales", batch_data)
print(f"Inserted {len(batch_data)} rows")
Ertrag aus – Delegieren von Generatoren
def flatten(nested) -> Generator:
for item in nested:
if isinstance(item, (list, tuple)):
yield from flatten(item) # delegate to sub-generator
else:
yield item
nested = [1, [2, 3, [4, 5]], 6, [[7]]]
print(list(flatten(nested))) # [1, 2, 3, 4, 5, 6, 7]
# Combine multiple generators
def chain(*iterables) -> Generator:
for it in iterables:
yield from it
combined = chain(range(3), range(5, 8), "abc")
print(list(combined)) # [0, 1, 2, 5, 6, 7, 'a', 'b', 'c']
send() – Zwei-Wege-Kommunikation
def running_average() -> Generator[float, float, None]:
total = 0.0
count = 0
average = 0.0
while True:
value = yield average # yield current average, receive new value
if value is None:
break
total += value
count += 1
average = total / count
gen = running_average()
next(gen) # prime the generator (advance to first yield)
print(gen.send(10)) # 10.0
print(gen.send(20)) # 15.0
print(gen.send(30)) # 20.0
print(gen.send(40)) # 25.0
Asynchrone Generatoren
import asyncio
import httpx
async def fetch_pages(urls: list[str]) -> AsyncGenerator[dict, None]:
async with httpx.AsyncClient() as client:
for url in urls:
r = await client.get(url)
yield {"url": url, "status": r.status_code, "data": r.json()}
async def main():
urls = ["https://api.example.com/page/1",
"https://api.example.com/page/2",
"https://api.example.com/page/3"]
async for page in fetch_pages(urls):
print(f"Fetched {page['url']}: {page['status']}")
process(page['data'])
asyncio.run(main())
# Async generator expression
squares = (x ** 2 async for x in async_range(100))
itertools – Generator-Dienstprogramme
from itertools import (
count, cycle, repeat, # infinite
chain, islice, takewhile, # finite
groupby, product, # combinatorics
accumulate, starmap # transforms
)
# Infinite counter
for i in islice(count(10, 2), 5):
print(i) # 10, 12, 14, 16, 18
# Group consecutive elements
data = [{"date": "2026-01", "v": 10}, {"date": "2026-01", "v": 20},
{"date": "2026-02", "v": 30}]
for month, rows in groupby(data, key=lambda x: x["date"]):
total = sum(r["v"] for r in rows)
print(f"{month}: {total}")
# Running total
sales = [100, 200, 150, 300, 250]
running = list(accumulate(sales)) # [100, 300, 450, 750, 1000]
# Cartesian product
for size, color in product(["S","M","L"], ["red","blue"]):
print(f"{size}-{color}")
Python-Generatoren im Jahr 2026 sind für eine speichereffiziente Datenverarbeitung, unendliche Sequenzen und asynchrone E/A unerlässlich. Verwenden Sie Generatorfunktionen für benutzerdefinierte Iteratoren, Generatorausdrücke für verzögerte Transformationen, Yield From für die Delegation und Itertools für gängige kombinatorische Muster. Allein die Speichervorteile rechtfertigen es, sie für jede Datenverarbeitungsarbeit zu erlernen.
🔗 Share this article
✍️ Leave a Comment