Python list comprehensions, generator expressions, and itertools are among the most Pythonic ways to write clean, efficient data transformation code. In 2026, with walrus operator adoption and Python 3.12’s improved syntax, these patterns are more powerful than ever. This guide covers everything from basic comprehensions to advanced itertools patterns.
📋 Table of Contents
List Comprehensions
# Basic — [expression for item in iterable]
squares = [x**2 for x in range(10)] # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
# With condition
evens = [x for x in range(20) if x % 2 == 0]
# Nested — flatten 2D list
matrix = [[1,2,3],[4,5,6],[7,8,9]]
flat = [num for row in matrix for num in row] # [1,2,3,4,5,6,7,8,9]
# Conditional expression (ternary)
labels = ["even" if x % 2 == 0 else "odd" for x in range(6)]
# Walrus operator — compute and filter
data = [1, -2, 3, -4, 5, -6]
results = [y for x in data if (y := x * 2) > 0] # [2, 6, 10]
# Dict comprehension
word_lengths = {word: len(word) for word in ["python", "code", "list"]}
# Set comprehension
unique_lengths = {len(word) for word in ["python", "code", "list", "data"]}
# Nested dict comprehension — matrix to dict
matrix_dict = {(i, j): matrix[i][j]
for i in range(len(matrix))
for j in range(len(matrix[i]))}
Generator Expressions
# Generator expression — lazy evaluation, memory efficient
total = sum(x**2 for x in range(1_000_000)) # never creates the full list
# vs list comprehension
total_list = sum([x**2 for x in range(1_000_000)]) # creates full list in RAM
# Chaining generators
data = range(1, 1001)
result = sum(
x**2
for x in data
if x % 3 == 0 # divisible by 3
)
# Use generators for large file processing
def read_large_file(filepath: str):
with open(filepath) as f:
for line in f:
yield line.strip()
# Process without loading entire file
errors = (line for line in read_large_file("app.log") if "ERROR" in line)
for error in errors:
print(error)
itertools — The Power Tools
from itertools import (
chain, chain_from_iterable,
compress, dropwhile, takewhile, filterfalse,
islice, starmap,
groupby,
combinations, permutations, product,
accumulate, pairwise, # Python 3.10+
batched, # Python 3.12+
count, cycle, repeat
)
# chain — combine iterables
letters = list(chain(['a','b'], ['c','d'], ['e'])) # ['a','b','c','d','e']
# compress — filter by selector
data = [1, 2, 3, 4, 5]
selectors = [True, False, True, False, True]
selected = list(compress(data, selectors)) # [1, 3, 5]
# takewhile / dropwhile
numbers = [1, 3, 5, 8, 10, 12, 11] # ascending then descending
ascending = list(takewhile(lambda x: x < 10, numbers)) # [1, 3, 5, 8]
rest = list(dropwhile(lambda x: x < 10, numbers)) # [10, 12, 11]
# groupby — group consecutive equal elements
data = [("Alice", "dev"), ("Bob", "dev"), ("Carol", "qa"), ("Dave", "qa")]
for dept, members in groupby(data, key=lambda x: x[1]):
print(dept, [m[0] for m in members])
# IMPORTANT: groupby only groups consecutive equal elements!
# Sort first: sorted(data, key=lambda x: x[1])
# combinations, permutations, product
pairs = list(combinations(['A','B','C'], 2)) # [('A','B'),('A','C'),('B','C')]
perms = list(permutations(['A','B'], 2)) # [('A','B'),('B','A')]
grid = list(product([1,2], ['a','b'])) # [(1,'a'),(1,'b'),(2,'a'),(2,'b')]
# accumulate — running totals
sales = [100, 150, 200, 120, 180]
running_total = list(accumulate(sales)) # [100, 250, 450, 570, 750]
running_max = list(accumulate(sales, max)) # [100, 150, 200, 200, 200]
# pairwise (Python 3.10+)
seq = [1, 2, 3, 4, 5]
pairs = list(pairwise(seq)) # [(1,2),(2,3),(3,4),(4,5)]
# batched (Python 3.12+)
data = range(10)
batches = list(batched(data, 3)) # [(0,1,2),(3,4,5),(6,7,8),(9,)]
# starmap — apply function to each tuple
args = [(2, 3), (4, 2), (5, 3)]
results = list(starmap(pow, args)) # [8, 16, 125]
# islice — lazy slice
first_five = list(islice(count(0), 5)) # [0, 1, 2, 3, 4]
every_other = list(islice(range(100), 0, 10, 2)) # [0, 2, 4, 6, 8]
Real-World Patterns
from itertools import groupby, chain
from operator import attrgetter
# Flatten nested structures
nested = [[1, [2, 3]], [4, [5, [6]]]]
def deep_flatten(lst):
for item in lst:
if isinstance(item, list):
yield from deep_flatten(item)
else:
yield item
flat = list(deep_flatten(nested)) # [1, 2, 3, 4, 5, 6]
# Group and aggregate
from collections import defaultdict
from itertools import groupby
orders = [
{"month": "2026-01", "category": "tech", "amount": 150},
{"month": "2026-01", "category": "books", "amount": 30},
{"month": "2026-01", "category": "tech", "amount": 200},
{"month": "2026-02", "category": "tech", "amount": 180},
]
# Group by month and category
orders.sort(key=lambda o: (o["month"], o["category"]))
for (month, cat), group in groupby(orders, key=lambda o: (o["month"], o["category"])):
total = sum(o["amount"] for o in group)
print(f"{month} {cat}: ${total}")
# Pipeline with generators — process large datasets efficiently
def parse_records(lines):
for line in lines:
if not line.strip(): continue
parts = line.split(',')
yield {"name": parts[0], "value": float(parts[1])}
def filter_valid(records):
return (r for r in records if r["value"] > 0)
def enrich(records):
return ({**r, "category": "high" if r["value"] > 100 else "low"} for r in records)
# Compose pipeline
with open("data.csv") as f:
pipeline = enrich(filter_valid(parse_records(f)))
for record in pipeline:
save_to_db(record)
Python comprehensions and itertools are essential for writing idiomatic, efficient Python. Use list comprehensions for simple transformations, generators for large datasets, and itertools for complex iterator operations. The key insight: prefer lazy evaluation (generators, itertools) over eager evaluation (lists) for large data — it dramatically reduces memory usage and often improves performance.
📚 You might also like
🔗 Share this article




✍️ Leave a Comment