LangChain Tutorial 2026: RAG, Agents, Memory and Structured Output

⏱️4 min read · 880 words

LangChain is the most widely-used framework for building LLM-powered applications in 2026. From simple chatbots to complex multi-agent systems with tool use, memory, and retrieval, LangChain provides the building blocks. This guide covers LangChain v0.3 with practical examples using Claude and OpenAI.

📋 Table of Contents

Installation and Setup
Basic Chain
RAG Chain
Conversation Memory
Tool Use and Agents
Structured Output

Installation and Setup

pip install langchain langchain-community langchain-anthropic langchain-openai
pip install chromadb sentence-transformers

# .env
ANTHROPIC_API_KEY=sk-ant-...
OPENAI_API_KEY=sk-...

Basic Chain

from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatAnthropic(model="claude-opus-4-5", temperature=0)

# Prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful Python expert. Answer concisely."),
    ("human", "{question}")
])

# Chain using LCEL (LangChain Expression Language)
chain = prompt | llm | StrOutputParser()

# Invoke
response = chain.invoke({"question": "What is a Python generator?"})
print(response)

# Stream
for chunk in chain.stream({"question": "Explain async/await in Python"}):
    print(chunk, end="", flush=True)

# Async
response = await chain.ainvoke({"question": "Python type hints?"})

RAG Chain

from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import RunnablePassthrough

# Load documents
loader = PyPDFLoader("docs/company_handbook.pdf")
docs = loader.load()

# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)

# Create vector store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_db")
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

# RAG prompt
rag_prompt = ChatPromptTemplate.from_template(
    "Answer the question based only on the following context:
"
    "{context}

"
    "Question: {question}

"
    "If the answer is not in the context, say so clearly."
)

def format_docs(docs):
    return "

".join(doc.page_content for doc in docs)

# RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

answer = rag_chain.invoke("What is the vacation policy?")
print(answer)

Conversation Memory

from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# In-memory history (use Redis for production)
store: dict[str, ChatMessageHistory] = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant."),
    ("placeholder", "{history}"),
    ("human", "{input}"),
])

chain = prompt | llm | StrOutputParser()

# Add memory
chain_with_memory = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

# Use with session
config = {"configurable": {"session_id": "user-123"}}

response1 = chain_with_memory.invoke({"input": "My name is Alice"}, config=config)
response2 = chain_with_memory.invoke({"input": "What's my name?"}, config=config)
# response2 remembers "Alice"!

Tool Use and Agents

from langchain.tools import tool
from langchain.agents import create_tool_calling_agent, AgentExecutor

@tool
def get_weather(location: str) -> str:
    # Get current weather for a location.
    # Real implementation would call a weather API
    return f"Weather in {location}: 22 Celsius, partly cloudy"

@tool
def search_web(query: str) -> str:
    # Search the web for information.
    import requests
    # Real: use Tavily or SerpAPI
    return f"Search results for '{query}': [Result 1...] [Result 2...]"

@tool
def calculate(expression: str) -> float:
    # Evaluate a mathematical expression.
    import ast
    return eval(ast.parse(expression, mode='eval').body)  # safe eval

tools = [get_weather, search_web, calculate]

# Create agent
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant with access to tools. Use them when needed."),
    ("human", "{input}"),
    ("placeholder", "{agent_scratchpad}"),
])

agent = create_tool_calling_agent(llm, tools, prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True, max_iterations=5)

result = executor.invoke({"input": "What's the weather in Sydney and what is 234 * 567?"})
print(result["output"])

Structured Output

from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser

class ArticleSummary(BaseModel):
    title: str = Field(description="Article title")
    summary: str = Field(description="3-sentence summary")
    key_points: list[str] = Field(description="3-5 key takeaways")
    sentiment: str = Field(description="positive/negative/neutral")

parser = PydanticOutputParser(pydantic_object=ArticleSummary)

prompt = ChatPromptTemplate.from_template(
    "Analyze this article and provide structured output.

"
    "Article: {article}

"
    "{format_instructions}"
)

chain = prompt | llm | parser

result: ArticleSummary = chain.invoke({
    "article": article_text,
    "format_instructions": parser.get_format_instructions()
})
print(result.title, result.key_points)

# Or use with_structured_output (simpler)
structured_llm = llm.with_structured_output(ArticleSummary)
result = structured_llm.invoke(f"Analyze: {article_text}")

LangChain v0.3 in 2026 is mature and production-ready. Use LCEL chains for simple pipelines, add retrieval for knowledge-grounded responses, tool-calling agents for interactive AI systems, and structured output for reliable parsing. For production, consider LangGraph (same authors) for stateful multi-agent workflows.