Elasticsearch Complete Guide 2026: Search, Aggregations and Vector Search

⏱️6 min read · 1,123 words

Elasticsearch is the most powerful open-source search and analytics engine in 2026. From autocomplete to log analytics to vector search for AI applications, Elasticsearch powers search at Wikipedia, GitHub, and thousands of companies. This guide covers indexing, querying, aggregations, and vector search.

📋 Table of Contents

Setup with Docker
Python Client
Full-Text Search
Aggregations — Analytics
Vector Search (KNN)

Setup with Docker

docker run -d   --name elasticsearch   -p 9200:9200   -e "discovery.type=single-node"   -e "xpack.security.enabled=false"   -e "ES_JAVA_OPTS=-Xms512m -Xmx512m"   elasticsearch:8.13.0

# Check health
curl http://localhost:9200/_cluster/health?pretty

Python Client

pip install elasticsearch[async]

from elasticsearch import AsyncElasticsearch
import asyncio

es = AsyncElasticsearch("http://localhost:9200")

# Create index with mapping
async def setup_articles_index():
    mapping = {
        "mappings": {
            "properties": {
                "title": {
                    "type": "text",
                    "analyzer": "english",
                    "fields": {
                        "keyword": {"type": "keyword"},
                        "suggest": {"type": "completion"}
                    }
                },
                "content": {"type": "text", "analyzer": "english"},
                "author": {"type": "keyword"},
                "tags": {"type": "keyword"},
                "published_at": {"type": "date"},
                "views": {"type": "integer"},
                "embedding": {
                    "type": "dense_vector",
                    "dims": 384,
                    "index": True,
                    "similarity": "cosine"
                }
            }
        },
        "settings": {
            "number_of_shards": 1,
            "number_of_replicas": 0,
        }
    }
    await es.indices.create(index="articles", body=mapping, ignore=400)

# Index documents
async def index_article(article: dict):
    await es.index(
        index="articles",
        id=str(article["id"]),
        document={
            "title": article["title"],
            "content": article["content"],
            "author": article["author"],
            "tags": article["tags"],
            "published_at": article["published_at"],
            "views": article.get("views", 0),
        }
    )

# Bulk index
async def bulk_index_articles(articles: list[dict]):
    operations = []
    for article in articles:
        operations.append({"index": {"_index": "articles", "_id": str(article["id"])}})
        operations.append({
            "title": article["title"],
            "content": article["content"],
            "tags": article["tags"],
        })
    await es.bulk(operations=operations, refresh=True)

Full-Text Search

async def search_articles(query: str, tags: list[str] = None,
                         page: int = 1, size: int = 10) -> dict:
    must = [
        {
            "multi_match": {
                "query": query,
                "fields": ["title^3", "content", "tags^2"],  # title more important
                "type": "best_fields",
                "fuzziness": "AUTO",           # handle typos
                "minimum_should_match": "75%"
            }
        }
    ]

    filters = []
    if tags:
        filters.append({"terms": {"tags": tags}})
    filters.append({"range": {"published_at": {"gte": "now-1y"}}})

    body = {
        "query": {
            "bool": {
                "must": must,
                "filter": filters,
            }
        },
        "highlight": {
            "fields": {
                "title": {"number_of_fragments": 0},
                "content": {"fragment_size": 150, "number_of_fragments": 3}
            },
            "pre_tags": ["<strong>"],
            "post_tags": ["</strong>"]
        },
        "from": (page - 1) * size,
        "size": size,
        "_source": ["title", "author", "tags", "published_at", "views"],
    }

    result = await es.search(index="articles", body=body)
    return {
        "total": result["hits"]["total"]["value"],
        "hits": [
            {
                **hit["_source"],
                "id": hit["_id"],
                "score": hit["_score"],
                "highlights": hit.get("highlight", {})
            }
            for hit in result["hits"]["hits"]
        ]
    }

# Autocomplete suggest
async def suggest(prefix: str) -> list[str]:
    result = await es.search(
        index="articles",
        body={
            "suggest": {
                "title_suggest": {
                    "prefix": prefix,
                    "completion": {
                        "field": "title.suggest",
                        "size": 5,
                        "skip_duplicates": True,
                    }
                }
            }
        }
    )
    return [
        option["text"]
        for option in result["suggest"]["title_suggest"][0]["options"]
    ]

Aggregations — Analytics

async def analytics_dashboard() -> dict:
    result = await es.search(
        index="articles",
        body={
            "size": 0,  # no hits, only aggregations
            "aggs": {
                # Count by tag
                "popular_tags": {
                    "terms": {
                        "field": "tags",
                        "size": 10,
                        "order": {"_count": "desc"}
                    }
                },
                # Articles per month
                "monthly_counts": {
                    "date_histogram": {
                        "field": "published_at",
                        "calendar_interval": "month",
                        "format": "yyyy-MM"
                    }
                },
                # Average views by author
                "avg_views_by_author": {
                    "terms": {"field": "author", "size": 5},
                    "aggs": {
                        "avg_views": {"avg": {"field": "views"}},
                        "total_views": {"sum": {"field": "views"}}
                    }
                },
                # Views percentiles
                "views_percentiles": {
                    "percentiles": {
                        "field": "views",
                        "percents": [50, 75, 90, 95, 99]
                    }
                }
            }
        }
    )
    return result["aggregations"]

Vector Search (KNN)

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

async def index_with_embedding(article: dict):
    embedding = model.encode(article["title"] + " " + article["content"][:500]).tolist()
    await es.index(
        index="articles",
        id=str(article["id"]),
        document={**article, "embedding": embedding}
    )

async def semantic_search(query: str, size: int = 10) -> list[dict]:
    query_embedding = model.encode(query).tolist()

    result = await es.search(
        index="articles",
        knn={
            "field": "embedding",
            "query_vector": query_embedding,
            "k": size,
            "num_candidates": 100,
        },
        source=["title", "content", "tags"]
    )
    return [h["_source"] for h in result["hits"]["hits"]]

# Hybrid search: keyword + vector
async def hybrid_search(query: str) -> list[dict]:
    embedding = model.encode(query).tolist()

    result = await es.search(
        index="articles",
        body={
            "query": {
                "multi_match": {
                    "query": query,
                    "fields": ["title^2", "content"],
                }
            },
            "knn": {
                "field": "embedding",
                "query_vector": embedding,
                "k": 10,
                "num_candidates": 50,
            },
            "rank": {
                "rrf": {}  # reciprocal rank fusion to combine scores
            }
        }
    )
    return [h["_source"] for h in result["hits"]["hits"]]

Elasticsearch in 2026 is the most versatile search platform: full-text search for content, aggregations for analytics, vector search for AI semantic similarity. The Python client is clean and async-ready. Start with full-text search, add aggregations for analytics dashboards, and add vector search when you need semantic understanding.

📚 You might also like

🔗 Share this article

X / Twitter Facebook WhatsApp LinkedIn Telegram