Elasticsearch ist die leistungsstärkste Open-Source-Such- und Analysemaschine im Jahr 2026. Von der automatischen Vervollständigung über Protokollanalysen bis hin zur Vektorsuche für KI-Anwendungen unterstützt Elasticsearch die Suche bei Wikipedia, GitHub und Tausenden von Unternehmen. In diesem Leitfaden werden Indizierung, Abfragen, Aggregationen und Vektorsuche behandelt.
📋 Table of Contents
Einrichtung mit Docker
docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" elasticsearch:8.13.0
# Check health
curl http://localhost:9200/_cluster/health?pretty
Python-Client
pip install elasticsearch[async]
from elasticsearch import AsyncElasticsearch
import asyncio
es = AsyncElasticsearch("http://localhost:9200")
# Create index with mapping
async def setup_articles_index():
mapping = {
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "english",
"fields": {
"keyword": {"type": "keyword"},
"suggest": {"type": "completion"}
}
},
"content": {"type": "text", "analyzer": "english"},
"author": {"type": "keyword"},
"tags": {"type": "keyword"},
"published_at": {"type": "date"},
"views": {"type": "integer"},
"embedding": {
"type": "dense_vector",
"dims": 384,
"index": True,
"similarity": "cosine"
}
}
},
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
}
}
await es.indices.create(index="articles", body=mapping, ignore=400)
# Index documents
async def index_article(article: dict):
await es.index(
index="articles",
id=str(article["id"]),
document={
"title": article["title"],
"content": article["content"],
"author": article["author"],
"tags": article["tags"],
"published_at": article["published_at"],
"views": article.get("views", 0),
}
)
# Bulk index
async def bulk_index_articles(articles: list[dict]):
operations = []
for article in articles:
operations.append({"index": {"_index": "articles", "_id": str(article["id"])}})
operations.append({
"title": article["title"],
"content": article["content"],
"tags": article["tags"],
})
await es.bulk(operations=operations, refresh=True)
Volltextsuche
async def search_articles(query: str, tags: list[str] = None,
page: int = 1, size: int = 10) -> dict:
must = [
{
"multi_match": {
"query": query,
"fields": ["title^3", "content", "tags^2"], # title more important
"type": "best_fields",
"fuzziness": "AUTO", # handle typos
"minimum_should_match": "75%"
}
}
]
filters = []
if tags:
filters.append({"terms": {"tags": tags}})
filters.append({"range": {"published_at": {"gte": "now-1y"}}})
body = {
"query": {
"bool": {
"must": must,
"filter": filters,
}
},
"highlight": {
"fields": {
"title": {"number_of_fragments": 0},
"content": {"fragment_size": 150, "number_of_fragments": 3}
},
"pre_tags": ["<strong>"],
"post_tags": ["</strong>"]
},
"from": (page - 1) * size,
"size": size,
"_source": ["title", "author", "tags", "published_at", "views"],
}
result = await es.search(index="articles", body=body)
return {
"total": result["hits"]["total"]["value"],
"hits": [
{
**hit["_source"],
"id": hit["_id"],
"score": hit["_score"],
"highlights": hit.get("highlight", {})
}
for hit in result["hits"]["hits"]
]
}
# Autocomplete suggest
async def suggest(prefix: str) -> list[str]:
result = await es.search(
index="articles",
body={
"suggest": {
"title_suggest": {
"prefix": prefix,
"completion": {
"field": "title.suggest",
"size": 5,
"skip_duplicates": True,
}
}
}
}
)
return [
option["text"]
for option in result["suggest"]["title_suggest"][0]["options"]
]
Aggregationen – Analysen
async def analytics_dashboard() -> dict:
result = await es.search(
index="articles",
body={
"size": 0, # no hits, only aggregations
"aggs": {
# Count by tag
"popular_tags": {
"terms": {
"field": "tags",
"size": 10,
"order": {"_count": "desc"}
}
},
# Articles per month
"monthly_counts": {
"date_histogram": {
"field": "published_at",
"calendar_interval": "month",
"format": "yyyy-MM"
}
},
# Average views by author
"avg_views_by_author": {
"terms": {"field": "author", "size": 5},
"aggs": {
"avg_views": {"avg": {"field": "views"}},
"total_views": {"sum": {"field": "views"}}
}
},
# Views percentiles
"views_percentiles": {
"percentiles": {
"field": "views",
"percents": [50, 75, 90, 95, 99]
}
}
}
}
)
return result["aggregations"]
Vektorsuche (KNN)
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
async def index_with_embedding(article: dict):
embedding = model.encode(article["title"] + " " + article["content"][:500]).tolist()
await es.index(
index="articles",
id=str(article["id"]),
document={**article, "embedding": embedding}
)
async def semantic_search(query: str, size: int = 10) -> list[dict]:
query_embedding = model.encode(query).tolist()
result = await es.search(
index="articles",
knn={
"field": "embedding",
"query_vector": query_embedding,
"k": size,
"num_candidates": 100,
},
source=["title", "content", "tags"]
)
return [h["_source"] for h in result["hits"]["hits"]]
# Hybrid search: keyword + vector
async def hybrid_search(query: str) -> list[dict]:
embedding = model.encode(query).tolist()
result = await es.search(
index="articles",
body={
"query": {
"multi_match": {
"query": query,
"fields": ["title^2", "content"],
}
},
"knn": {
"field": "embedding",
"query_vector": embedding,
"k": 10,
"num_candidates": 50,
},
"rank": {
"rrf": {} # reciprocal rank fusion to combine scores
}
}
)
return [h["_source"] for h in result["hits"]["hits"]]
Elasticsearch im Jahr 2026 ist die vielseitigste Suchplattform: Volltextsuche nach Inhalten, Aggregationen für Analysen, Vektorsuche für semantische KI-Ähnlichkeit. Der Python-Client ist sauber und asynchron. Beginnen Sie mit der Volltextsuche, fügen Sie Aggregationen für Analyse-Dashboards hinzu und fügen Sie die Vektorsuche hinzu, wenn Sie semantisches Verständnis benötigen.
🔗 Share this article
✍️ Leave a Comment