Elasticsearch is the most powerful open-source search and analytics engine in 2026. From autocomplete to log analytics to vector search for AI applications, Elasticsearch powers search at Wikipedia, GitHub, and thousands of companies. This guide covers indexing, querying, aggregations, and vector search.
📋 Table of Contents
Setup with Docker
docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" elasticsearch:8.13.0
# Check health
curl http://localhost:9200/_cluster/health?pretty
Python Client
pip install elasticsearch[async]
from elasticsearch import AsyncElasticsearch
import asyncio
es = AsyncElasticsearch("http://localhost:9200")
# Create index with mapping
async def setup_articles_index():
mapping = {
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "english",
"fields": {
"keyword": {"type": "keyword"},
"suggest": {"type": "completion"}
}
},
"content": {"type": "text", "analyzer": "english"},
"author": {"type": "keyword"},
"tags": {"type": "keyword"},
"published_at": {"type": "date"},
"views": {"type": "integer"},
"embedding": {
"type": "dense_vector",
"dims": 384,
"index": True,
"similarity": "cosine"
}
}
},
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
}
}
await es.indices.create(index="articles", body=mapping, ignore=400)
# Index documents
async def index_article(article: dict):
await es.index(
index="articles",
id=str(article["id"]),
document={
"title": article["title"],
"content": article["content"],
"author": article["author"],
"tags": article["tags"],
"published_at": article["published_at"],
"views": article.get("views", 0),
}
)
# Bulk index
async def bulk_index_articles(articles: list[dict]):
operations = []
for article in articles:
operations.append({"index": {"_index": "articles", "_id": str(article["id"])}})
operations.append({
"title": article["title"],
"content": article["content"],
"tags": article["tags"],
})
await es.bulk(operations=operations, refresh=True)
Full-Text Search
async def search_articles(query: str, tags: list[str] = None,
page: int = 1, size: int = 10) -> dict:
must = [
{
"multi_match": {
"query": query,
"fields": ["title^3", "content", "tags^2"], # title more important
"type": "best_fields",
"fuzziness": "AUTO", # handle typos
"minimum_should_match": "75%"
}
}
]
filters = []
if tags:
filters.append({"terms": {"tags": tags}})
filters.append({"range": {"published_at": {"gte": "now-1y"}}})
body = {
"query": {
"bool": {
"must": must,
"filter": filters,
}
},
"highlight": {
"fields": {
"title": {"number_of_fragments": 0},
"content": {"fragment_size": 150, "number_of_fragments": 3}
},
"pre_tags": ["<strong>"],
"post_tags": ["</strong>"]
},
"from": (page - 1) * size,
"size": size,
"_source": ["title", "author", "tags", "published_at", "views"],
}
result = await es.search(index="articles", body=body)
return {
"total": result["hits"]["total"]["value"],
"hits": [
{
**hit["_source"],
"id": hit["_id"],
"score": hit["_score"],
"highlights": hit.get("highlight", {})
}
for hit in result["hits"]["hits"]
]
}
# Autocomplete suggest
async def suggest(prefix: str) -> list[str]:
result = await es.search(
index="articles",
body={
"suggest": {
"title_suggest": {
"prefix": prefix,
"completion": {
"field": "title.suggest",
"size": 5,
"skip_duplicates": True,
}
}
}
}
)
return [
option["text"]
for option in result["suggest"]["title_suggest"][0]["options"]
]
Aggregations — Analytics
async def analytics_dashboard() -> dict:
result = await es.search(
index="articles",
body={
"size": 0, # no hits, only aggregations
"aggs": {
# Count by tag
"popular_tags": {
"terms": {
"field": "tags",
"size": 10,
"order": {"_count": "desc"}
}
},
# Articles per month
"monthly_counts": {
"date_histogram": {
"field": "published_at",
"calendar_interval": "month",
"format": "yyyy-MM"
}
},
# Average views by author
"avg_views_by_author": {
"terms": {"field": "author", "size": 5},
"aggs": {
"avg_views": {"avg": {"field": "views"}},
"total_views": {"sum": {"field": "views"}}
}
},
# Views percentiles
"views_percentiles": {
"percentiles": {
"field": "views",
"percents": [50, 75, 90, 95, 99]
}
}
}
}
)
return result["aggregations"]
Vector Search (KNN)
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
async def index_with_embedding(article: dict):
embedding = model.encode(article["title"] + " " + article["content"][:500]).tolist()
await es.index(
index="articles",
id=str(article["id"]),
document={**article, "embedding": embedding}
)
async def semantic_search(query: str, size: int = 10) -> list[dict]:
query_embedding = model.encode(query).tolist()
result = await es.search(
index="articles",
knn={
"field": "embedding",
"query_vector": query_embedding,
"k": size,
"num_candidates": 100,
},
source=["title", "content", "tags"]
)
return [h["_source"] for h in result["hits"]["hits"]]
# Hybrid search: keyword + vector
async def hybrid_search(query: str) -> list[dict]:
embedding = model.encode(query).tolist()
result = await es.search(
index="articles",
body={
"query": {
"multi_match": {
"query": query,
"fields": ["title^2", "content"],
}
},
"knn": {
"field": "embedding",
"query_vector": embedding,
"k": 10,
"num_candidates": 50,
},
"rank": {
"rrf": {} # reciprocal rank fusion to combine scores
}
}
)
return [h["_source"] for h in result["hits"]["hits"]]
Elasticsearch in 2026 is the most versatile search platform: full-text search for content, aggregations for analytics, vector search for AI semantic similarity. The Python client is clean and async-ready. Start with full-text search, add aggregations for analytics dashboards, and add vector search when you need semantic understanding.
📚 You might also like
🔗 Share this article




✍️ Leave a Comment