يعد Prometheus وGrafana بمثابة حزمة المراقبة المتوافقة مع معايير الصناعة في عام 2026. يقوم Prometheus بجمع مقاييس السلاسل الزمنية وتخزينها؛ يتصورها Grafana في لوحات المعلومات. وهي توفر معًا رؤية واضحة لأداء التطبيقات، وسلامة البنية التحتية، ومقاييس الأعمال. يغطي هذا الدليل التثبيت والتكوين وبناء لوحات معلومات الإنتاج.
📋 Table of Contents
نظرة عامة على الهندسة المعمارية
Monitoring Stack:
Application
↓ exposes /metrics endpoint
Prometheus (collector + time-series DB)
↓ scrapes every 15s
↓ evaluates alert rules
Alertmanager (routes alerts)
↓ PagerDuty / Slack / Email
Grafana (visualization)
← queries Prometheus (PromQL)
← queries Loki (logs)
← queries Tempo (traces)
إعداد عامل الميناء
# compose.yml — complete monitoring stack
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./alert.rules.yml:/etc/prometheus/alert.rules.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=secret
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards
- ./grafana/datasources:/etc/grafana/provisioning/datasources
node-exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
alertmanager:
image: prom/alertmanager:latest
ports:
- "9093:9093"
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
volumes:
prometheus_data:
grafana_data:
تكوين بروميثيوس
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: production
env: prod
rule_files:
- /etc/prometheus/alert.rules.yml
alerting:
alertmanagers:
- static_configs:
- targets: [alertmanager:9093]
scrape_configs:
# Prometheus itself
- job_name: prometheus
static_configs:
- targets: [localhost:9090]
# Application metrics
- job_name: myapp
scrape_interval: 10s
static_configs:
- targets: [myapp:8000]
metrics_path: /metrics
# Node (server) metrics
- job_name: node
static_configs:
- targets: [node-exporter:9100]
# Kubernetes pod discovery
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: "true"
تجهيز التطبيق الخاص بك
# FastAPI + Prometheus metrics
from prometheus_client import Counter, Histogram, Gauge, generate_latest
from fastapi import FastAPI, Request, Response
import time
app = FastAPI()
# Define metrics
REQUEST_COUNT = Counter(
"http_requests_total",
"Total HTTP requests",
["method", "endpoint", "status_code"]
)
REQUEST_DURATION = Histogram(
"http_request_duration_seconds",
"HTTP request duration",
["method", "endpoint"],
buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0]
)
ACTIVE_USERS = Gauge("active_users_total", "Currently active users")
DB_QUERY_DURATION = Histogram(
"db_query_duration_seconds",
"Database query duration",
["query_type", "table"]
)
@app.middleware("http")
async def metrics_middleware(request: Request, call_next):
start = time.time()
response = await call_next(request)
duration = time.time() - start
REQUEST_COUNT.labels(
method=request.method,
endpoint=request.url.path,
status_code=response.status_code
).inc()
REQUEST_DURATION.labels(
method=request.method,
endpoint=request.url.path
).observe(duration)
return response
@app.get("/metrics")
async def metrics():
return Response(generate_latest(), media_type="text/plain")
# Usage in code
ACTIVE_USERS.inc() # user logged in
ACTIVE_USERS.dec() # user logged out
with DB_QUERY_DURATION.labels(query_type="SELECT", table="users").time():
users = await db.users.find_all()
استعلامات PromQL
# Request rate (per second, 5-min window)
rate(http_requests_total[5m])
# Error rate
rate(http_requests_total{status_code=~"5.."}[5m])
# 95th percentile response time
histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))
# CPU usage
100 - (avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
# Memory usage percentage
(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100
# Available disk space
(node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100
# Requests per second by endpoint
topk(5, rate(http_requests_total[5m])) by (endpoint)
# Alert: error rate > 5%
(
rate(http_requests_total{status_code=~"5.."}[5m]) /
rate(http_requests_total[5m])
) > 0.05
قواعد التنبيه
# alert.rules.yml
groups:
- name: application
rules:
- alert: HighErrorRate
expr: |
(
rate(http_requests_total{status_code=~"5.."}[5m]) /
rate(http_requests_total[5m])
) > 0.05
for: 2m
labels:
severity: critical
annotations:
summary: "High error rate on {{ $labels.instance }}"
description: "Error rate is {{ $value | humanizePercentage }}"
- alert: HighLatency
expr: |
histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1.0
for: 5m
labels:
severity: warning
- alert: LowDiskSpace
expr: |
(node_filesystem_avail_bytes / node_filesystem_size_bytes) < 0.10
for: 1m
labels:
severity: critical
annotations:
summary: "Low disk space on {{ $labels.instance }}"
- alert: HighMemoryUsage
expr: |
(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > 0.90
for: 5m
labels:
severity: warning
إعداد لوحة تحكم Grafana
# grafana/datasources/prometheus.yml
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
jsonData:
timeInterval: 15s
# Import community dashboards from grafana.com:
# Node Exporter Full: dashboard ID 1860
# FastAPI: dashboard ID 16110
# PostgreSQL: dashboard ID 9628
Prometheus + Grafana في عام 2026 هو حزمة المراقبة النهائية. قم بتجهيز تطبيقاتك باستخدام مكتبة prometheus_client، وقم بتكوين عملية النسخ، وكتابة قواعد التنبيه، وإنشاء لوحات معلومات Grafana. استيراد لوحات معلومات المجتمع لمصدر العقدة والأطر الشائعة – تحتوي معظم الخدمات الشائعة بالفعل على لوحات معلومات على grafana.com/grafana/dashboards.
🔗 Share this article
✍️ Leave a Comment