Python Generators and Iterators Guide 2026: yield, send and Pipelines

⏱️5 min read · 976 words

Python generators and iterators are among the most powerful but often misunderstood features. In 2026, generators power async I/O, data pipelines, and memory-efficient processing. This complete guide covers generators, iterators, yield expressions, send(), and real-world applications.

📋 Table of Contents

Iterators — The Protocol
Generators — The Simple Way
Infinite Generators
Generator Pipelines for Data Processing
yield from — Delegating Generators
send() — Two-Way Communication
Async Generators
itertools — Generator Utilities

Iterators — The Protocol

# Any object with __iter__ and __next__ is an iterator
class CountDown:
    def __init__(self, start: int):
        self.current = start

    def __iter__(self):
        return self  # iterator returns itself

    def __next__(self):
        if self.current <= 0:
            raise StopIteration
        value = self.current
        self.current -= 1
        return value

# Usage
for n in CountDown(5):
    print(n)  # 5, 4, 3, 2, 1

# Python's for loop is syntactic sugar for:
# it = iter(countdown)
# while True:
#     try: n = next(it)
#     except StopIteration: break
#     print(n)

Generators — The Simple Way

from typing import Generator, Iterator

# Generator function — uses yield instead of return
def count_down(start: int) -> Generator[int, None, None]:
    while start > 0:
        yield start
        start -= 1

for n in count_down(5):
    print(n)

# Generator expression (like list comprehension but lazy)
squares_gen = (x ** 2 for x in range(1_000_000))  # doesn't compute all at once
first_ten = list(squares_gen)[:10]

# vs list comprehension (computes everything immediately)
squares_list = [x ** 2 for x in range(1_000_000)]  # uses ~8MB memory!

# Memory comparison
import sys
gen = (x ** 2 for x in range(1_000_000))
lst = [x ** 2 for x in range(1_000_000)]
print(sys.getsizeof(gen))   # ~200 bytes
print(sys.getsizeof(lst))   # ~8MB

Infinite Generators

from itertools import islice

def fibonacci() -> Generator[int, None, None]:
    a, b = 0, 1
    while True:  # infinite!
        yield a
        a, b = b, a + b

def primes() -> Generator[int, None, None]:
    def is_prime(n: int) -> bool:
        if n < 2: return False
        return all(n % i != 0 for i in range(2, int(n**0.5) + 1))

    n = 2
    while True:
        if is_prime(n):
            yield n
        n += 1

# Take first N from infinite generator
first_10_fibs = list(islice(fibonacci(), 10))
first_20_primes = list(islice(primes(), 20))

print(first_10_fibs)
print(first_20_primes)

Generator Pipelines for Data Processing

import csv
from pathlib import Path

def read_large_csv(filepath: str) -> Generator[dict, None, None]:
    with open(filepath, newline='') as f:
        reader = csv.DictReader(f)
        for row in reader:
            yield row

def filter_rows(rows, **criteria) -> Generator[dict, None, None]:
    for row in rows:
        if all(row.get(k) == v for k, v in criteria.items()):
            yield row

def transform(rows, **transforms) -> Generator[dict, None, None]:
    for row in rows:
        transformed = {**row}
        for key, func in transforms.items():
            transformed[key] = func(row[key])
        yield transformed

def batch(rows, size: int = 100) -> Generator[list, None, None]:
    batch_data = []
    for row in rows:
        batch_data.append(row)
        if len(batch_data) >= size:
            yield batch_data
            batch_data = []
    if batch_data:
        yield batch_data

# Pipeline processes 10GB file with constant memory
pipeline = (
    read_large_csv("10gb_sales.csv")
    |> filter_rows(status="completed", year="2026")
    |> transform(amount=float, quantity=int)
    |> batch(size=1000)
)

# Actually in Python:
rows = read_large_csv("10gb_sales.csv")
filtered = filter_rows(rows, status="completed")
transformed = transform(filtered, amount=float)
batches = batch(transformed, size=1000)

for batch_data in batches:
    db.bulk_insert("sales", batch_data)
    print(f"Inserted {len(batch_data)} rows")

yield from — Delegating Generators

def flatten(nested) -> Generator:
    for item in nested:
        if isinstance(item, (list, tuple)):
            yield from flatten(item)  # delegate to sub-generator
        else:
            yield item

nested = [1, [2, 3, [4, 5]], 6, [[7]]]
print(list(flatten(nested)))  # [1, 2, 3, 4, 5, 6, 7]

# Combine multiple generators
def chain(*iterables) -> Generator:
    for it in iterables:
        yield from it

combined = chain(range(3), range(5, 8), "abc")
print(list(combined))  # [0, 1, 2, 5, 6, 7, 'a', 'b', 'c']

send() — Two-Way Communication

def running_average() -> Generator[float, float, None]:
    total = 0.0
    count = 0
    average = 0.0
    while True:
        value = yield average  # yield current average, receive new value
        if value is None:
            break
        total += value
        count += 1
        average = total / count

gen = running_average()
next(gen)  # prime the generator (advance to first yield)

print(gen.send(10))   # 10.0
print(gen.send(20))   # 15.0
print(gen.send(30))   # 20.0
print(gen.send(40))   # 25.0

Async Generators

import asyncio
import httpx

async def fetch_pages(urls: list[str]) -> AsyncGenerator[dict, None]:
    async with httpx.AsyncClient() as client:
        for url in urls:
            r = await client.get(url)
            yield {"url": url, "status": r.status_code, "data": r.json()}

async def main():
    urls = ["https://api.example.com/page/1",
            "https://api.example.com/page/2",
            "https://api.example.com/page/3"]

    async for page in fetch_pages(urls):
        print(f"Fetched {page['url']}: {page['status']}")
        process(page['data'])

asyncio.run(main())

# Async generator expression
squares = (x ** 2 async for x in async_range(100))

itertools — Generator Utilities

from itertools import (
    count, cycle, repeat,      # infinite
    chain, islice, takewhile,  # finite
    groupby, product,          # combinatorics
    accumulate, starmap        # transforms
)

# Infinite counter
for i in islice(count(10, 2), 5):
    print(i)  # 10, 12, 14, 16, 18

# Group consecutive elements
data = [{"date": "2026-01", "v": 10}, {"date": "2026-01", "v": 20},
        {"date": "2026-02", "v": 30}]
for month, rows in groupby(data, key=lambda x: x["date"]):
    total = sum(r["v"] for r in rows)
    print(f"{month}: {total}")

# Running total
sales = [100, 200, 150, 300, 250]
running = list(accumulate(sales))  # [100, 300, 450, 750, 1000]

# Cartesian product
for size, color in product(["S","M","L"], ["red","blue"]):
    print(f"{size}-{color}")

Python generators in 2026 are essential for memory-efficient data processing, infinite sequences, and async I/O. Use generator functions for custom iterators, generator expressions for lazy transformations, yield from for delegation, and itertools for common combinatorial patterns. The memory benefits alone justify learning them for any data processing work.

📚 You might also like

🔗 Share this article

X / Twitter Facebook WhatsApp LinkedIn Telegram