Tutorial SQL avançado 2026: funções de janela, CTEs e otimização de consulta

⏱️7 min read · 1,343 words

SQL continua sendo uma das habilidades de maior ROI para desenvolvedores em 2026. SQL avançado desbloqueia análises complexas, consultas otimizadas e padrões que transformam aplicativos lentos em aplicativos rápidos. Este guia cobre funções de janela, CTEs, otimização de consultas e recursos específicos do PostgreSQL.

📋 Table of Contents

Funções de janela
Expressões de tabela comuns (CTEs)
Otimização de consulta
JSON no PostgreSQL
Agregações Avançadas
Transações e bloqueio
Funções úteis do PostgreSQL

Funções de janela

As funções de janela operam em um conjunto de linhas relacionadas à linha atual — sem comprimi-las como GROUP BY faz.

-- Basic window function syntax
SELECT
    name,
    department,
    salary,
    -- Aggregate over window (no GROUP BY needed)
    AVG(salary) OVER (PARTITION BY department) AS dept_avg,
    -- Ranking within partition
    RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS salary_rank,
    -- Row number (no ties)
    ROW_NUMBER() OVER (ORDER BY salary DESC) AS global_rank,
    -- Running total
    SUM(salary) OVER (ORDER BY hire_date ROWS UNBOUNDED PRECEDING) AS running_payroll
FROM employees;

-- Result (example):
-- name    | dept    | salary | dept_avg | salary_rank | global_rank
-- Alice   | Eng     | 90000  | 85000    | 1           | 1
-- Bob     | Eng     | 80000  | 85000    | 2           | 3
-- Carol   | Sales   | 70000  | 65000    | 1           | 5

-- LEAD/LAG: access other rows relative to current
SELECT
    date,
    revenue,
    LAG(revenue, 1) OVER (ORDER BY date) AS prev_day_revenue,
    LEAD(revenue, 1) OVER (ORDER BY date) AS next_day_revenue,
    revenue - LAG(revenue, 1) OVER (ORDER BY date) AS day_over_day_change,
    ROUND(
        100.0 * (revenue - LAG(revenue) OVER (ORDER BY date)) /
        LAG(revenue) OVER (ORDER BY date), 2
    ) AS pct_change
FROM daily_sales
ORDER BY date;

Expressões de tabela comuns (CTEs)

-- Basic CTE
WITH top_customers AS (
    SELECT
        customer_id,
        SUM(order_total) AS lifetime_value,
        COUNT(*) AS order_count
    FROM orders
    WHERE created_at >= NOW() - INTERVAL '1 year'
    GROUP BY customer_id
    HAVING SUM(order_total) > 1000
)
SELECT
    c.name,
    c.email,
    tc.lifetime_value,
    tc.order_count
FROM top_customers tc
JOIN customers c ON c.id = tc.customer_id
ORDER BY tc.lifetime_value DESC;

-- Multiple CTEs
WITH
monthly_revenue AS (
    SELECT
        DATE_TRUNC('month', created_at) AS month,
        SUM(total) AS revenue
    FROM orders
    GROUP BY 1
),
growth AS (
    SELECT
        month,
        revenue,
        LAG(revenue) OVER (ORDER BY month) AS prev_month,
        ROUND(
            100.0 * (revenue - LAG(revenue) OVER (ORDER BY month)) /
            NULLIF(LAG(revenue) OVER (ORDER BY month), 0), 2
        ) AS growth_pct
    FROM monthly_revenue
)
SELECT * FROM growth
WHERE month >= '2026-01-01'
ORDER BY month;

-- Recursive CTE: organizational hierarchy
WITH RECURSIVE org_tree AS (
    -- Anchor: top-level (no manager)
    SELECT id, name, manager_id, 0 AS depth, name::TEXT AS path
    FROM employees
    WHERE manager_id IS NULL

    UNION ALL

    -- Recursive: add reports
    SELECT e.id, e.name, e.manager_id, t.depth + 1, t.path || ' > ' || e.name
    FROM employees e
    JOIN org_tree t ON e.manager_id = t.id
    WHERE t.depth < 10  -- prevent infinite loops
)
SELECT depth, path, name FROM org_tree ORDER BY path;

Otimização de consulta

-- EXPLAIN ANALYZE: understand query plan
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT *
FROM orders o
JOIN customers c ON o.customer_id = c.id
WHERE o.status = 'pending'
  AND c.country = 'AU'
ORDER BY o.created_at DESC
LIMIT 20;

-- Key things to look for in EXPLAIN output:
-- "Seq Scan" = no index (often slow on large tables)
-- "Index Scan" = index used (fast)
-- "Hash Join" = in-memory join (good for large tables)
-- "Nested Loop" = O(n*m) join (good when one side is small)
-- high "rows=X (actual rows=Y)" discrepancy = outdated statistics

-- Fix: Create appropriate indexes
CREATE INDEX CONCURRENTLY idx_orders_status_created
ON orders (status, created_at DESC)
WHERE status = 'pending';  -- partial index: only index pending orders

-- Covering index (includes all needed columns = index-only scan)
CREATE INDEX idx_orders_covering
ON orders (customer_id, status)
INCLUDE (total, created_at);

-- Update statistics after large data changes
ANALYZE orders;
ANALYZE customers;

JSON no PostgreSQL

-- JSONB: binary JSON with indexing support (preferred over JSON)
CREATE TABLE events (
    id BIGSERIAL PRIMARY KEY,
    type VARCHAR(50),
    payload JSONB,
    created_at TIMESTAMPTZ DEFAULT NOW()
);

-- Insert JSON
INSERT INTO events (type, payload) VALUES
('user_signup', '{"email": "alice@example.com", "plan": "pro", "source": "organic"}'),
('purchase', '{"amount": 99.99, "items": [{"sku": "PRO001", "qty": 1}], "currency": "USD"}');

-- Query JSON fields
SELECT
    payload ->> 'email' AS email,
    payload -> 'plan' AS plan,      -- returns JSON (with quotes)
    payload ->> 'plan' AS plan_text -- returns text (no quotes)
FROM events
WHERE type = 'user_signup'
  AND payload ->> 'plan' = 'pro';

-- Query nested JSON
SELECT payload -> 'items' -> 0 ->> 'sku' AS first_sku
FROM events
WHERE type = 'purchase';

-- JSONB aggregation
SELECT
    payload ->> 'plan' AS plan,
    COUNT(*) AS count,
    AVG((payload ->> 'amount')::numeric) AS avg_spend
FROM events
GROUP BY payload ->> 'plan';

-- Index on JSONB field
CREATE INDEX idx_events_email ON events ((payload ->> 'email'));
CREATE INDEX idx_events_payload ON events USING GIN (payload);  -- for contains operator

Agregações Avançadas

-- FILTER clause: conditional aggregation
SELECT
    DATE_TRUNC('month', created_at) AS month,
    COUNT(*) AS total_orders,
    COUNT(*) FILTER (WHERE status = 'completed') AS completed,
    COUNT(*) FILTER (WHERE status = 'cancelled') AS cancelled,
    SUM(total) FILTER (WHERE status = 'completed') AS revenue,
    ROUND(
        100.0 * COUNT(*) FILTER (WHERE status = 'completed') / COUNT(*), 1
    ) AS completion_rate
FROM orders
GROUP BY 1
ORDER BY 1;

-- DISTINCT ON: return first row per group (PostgreSQL only)
SELECT DISTINCT ON (customer_id)
    customer_id, order_id, total, created_at
FROM orders
ORDER BY customer_id, created_at DESC;  -- returns most recent order per customer

-- Crosstab / Pivot
SELECT
    product_name,
    SUM(CASE WHEN EXTRACT(MONTH FROM sale_date) = 1 THEN quantity ELSE 0 END) AS jan,
    SUM(CASE WHEN EXTRACT(MONTH FROM sale_date) = 2 THEN quantity ELSE 0 END) AS feb,
    SUM(CASE WHEN EXTRACT(MONTH FROM sale_date) = 3 THEN quantity ELSE 0 END) AS mar
FROM sales
GROUP BY product_name;

-- PERCENTILE
SELECT
    department,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) AS median_salary,
    PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY salary) AS p90_salary
FROM employees
GROUP BY department;

Transações e bloqueio

-- Explicit transaction
BEGIN;

UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;

-- Both succeed or neither does
COMMIT;
-- Or: ROLLBACK;

-- Savepoints for partial rollback
BEGIN;
INSERT INTO orders (customer_id, total) VALUES (1, 99.99);
SAVEPOINT order_saved;

INSERT INTO order_items (order_id, product_id) VALUES (LASTVAL(), 999);
-- If this fails:
ROLLBACK TO SAVEPOINT order_saved;
-- Can try alternative without losing the order insert
COMMIT;

-- SELECT FOR UPDATE: lock rows during read
BEGIN;
SELECT balance FROM accounts WHERE id = 1 FOR UPDATE;
-- Other transactions cannot modify this row until COMMIT/ROLLBACK
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;

-- SKIP LOCKED: process queue items without blocking
DELETE FROM job_queue
WHERE id = (
    SELECT id FROM job_queue
    ORDER BY created_at
    FOR UPDATE SKIP LOCKED
    LIMIT 1
)
RETURNING *;

Funções úteis do PostgreSQL

-- String functions
SELECT
    UPPER(name), LOWER(email),
    LENGTH(description),
    TRIM(name), LTRIM(name), RTRIM(name),
    LEFT(description, 100) || '...' AS truncated,
    REPLACE(phone, '-', '') AS clean_phone,
    REGEXP_REPLACE(phone, '[^0-9]', '', 'g') AS digits_only
FROM customers;

-- Date functions
SELECT
    NOW(),                                        -- current timestamp
    CURRENT_DATE,                                 -- current date
    DATE_TRUNC('week', created_at),               -- start of week
    DATE_PART('year', created_at) AS year,
    AGE(NOW(), created_at) AS account_age,
    created_at + INTERVAL '30 days' AS expires_at,
    TO_CHAR(created_at, 'YYYY-MM-DD HH24:MI') AS formatted
FROM users;

-- Array functions
SELECT
    tags,
    ARRAY_LENGTH(tags, 1) AS tag_count,
    ARRAY_AGG(DISTINCT category) FILTER (WHERE category IS NOT NULL) AS categories
FROM posts
GROUP BY tags;

Habilidades avançadas de SQL estão entre as mais transferíveis e mais valorizadas no desenvolvimento de software. As funções de janela substituem subconsultas complexas, os CTEs tornam consultas legíveis em várias etapas e EXPLAIN ANALYZE revela por que as consultas são lentas. Domine-os e você escreverá consultas poderosas e rápidas.

🔗 Share this article

X / Twitter Facebook WhatsApp LinkedIn Telegram