Exemples Avancés PostgreSQL

Exemples avancés de base de données PostgreSQL incluant stratégies d'optimisation, requêtes complexes, indexation et réglage de performance

💻 Optimisation Performance PostgreSQL sql

🔴 complex ⭐⭐⭐⭐⭐

Techniques avancées d'optimisation PostgreSQL incluant indexation, optimisation de requêtes et réglage de configuration

⏱️ 60 min 🏷️ postgresql, database, sql, optimization, advanced
Prerequisites: PostgreSQL basics, SQL, Database administration, Performance tuning
-- PostgreSQL Performance Optimization Guide
-- Comprehensive examples for database performance tuning

-- 1. PostgreSQL Configuration (postgresql.conf)
-- ================================================================================

# Memory Settings (shared_buffers, work_mem, maintenance_work_mem)
shared_buffers = 256MB                    # 25% of RAM for shared memory
effective_cache_size = 1GB                # Estimated OS and disk cache
work_mem = 4MB                            # Memory for sort operations
maintenance_work_mem = 64MB              # Memory for maintenance operations
effective_io_concurrency = 200           # For SSD storage

# Connection Settings
max_connections = 100                     # Maximum concurrent connections
superuser_reserved_connections = 3       # Reserved for superusers

# WAL (Write-Ahead Logging) Settings
wal_buffers = 16MB                        # WAL buffer size
checkpoint_completion_target = 0.9       # Target for checkpoint completion
wal_writer_delay = 200ms                 # Delay between WAL writes

# Autovacuum Settings
autovacuum = on                           # Enable autovacuum
autovacuum_max_workers = 3               # Maximum autovacuum workers
autovacuum_naptime = 1min                # Time between autovacuum runs

# Random Page Cost (affects query planner)
random_page_cost = 1.1                    # For SSD storage
seq_page_cost = 1.0                       # Sequential page cost

# Statistics Target
default_statistics_target = 100           # Statistics precision

-- 2. Advanced Indexing Strategies
-- ================================================================================

-- B-Tree Index (default, good for equality and range queries)
CREATE INDEX CONCURRENTLY idx_users_email ON users(email);
CREATE INDEX CONCURRENTLY idx_orders_created_at ON orders(created_at DESC);

-- Composite Indexes (optimize multi-column queries)
CREATE INDEX CONCURRENTLY idx_orders_user_status_created
ON orders(user_id, status, created_at DESC);

-- Partial Indexes (index only specific rows)
CREATE INDEX CONCURRENTLY idx_active_users
ON users(created_at) WHERE status = 'active';

CREATE INDEX CONCURRENTLY idx_high_value_orders
ON orders(customer_id, total_amount) WHERE total_amount > 1000;

-- Unique Indexes
CREATE UNIQUE INDEX CONCURRENTLY idx_users_username_unique
ON users(username) WHERE username IS NOT NULL;

-- Expression Indexes (index computed values)
CREATE INDEX CONCURRENTLY idx_users_lower_email
ON users(LOWER(email));

CREATE INDEX CONCURRENTLY idx_orders_date_year
ON orders(EXTRACT(YEAR FROM created_at));

-- GIN Index (for array and full-text search)
CREATE INDEX CONCURRENTLY idx_products_tags
ON products USING GIN(tags);

CREATE INDEX CONCURRENTLY idx_documents_content
ON documents USING GIN(to_tsvector('english', content));

-- Hash Index (for equality comparisons only)
CREATE INDEX CONCURRENTLY idx_session_hash
ON sessions USING HASH(session_token);

-- BRIN Index (Block Range Index, good for time-series data)
CREATE INDEX CONCURRENTLY idx_metrics_timestamp
ON metrics USING BRIN(timestamp);

-- 3. Query Optimization Examples
-- ================================================================================

-- EXPLAIN ANALYZE for query performance analysis
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT u.name, COUNT(o.id) as order_count, SUM(o.total_amount) as total_spent
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2023-01-01'
    AND o.status = 'completed'
GROUP BY u.id, u.name
HAVING COUNT(o.id) > 10
ORDER BY total_spent DESC
LIMIT 10;

-- Materialized Views for complex aggregations
CREATE MATERIALIZED VIEW mv_user_order_stats AS
SELECT
    u.id as user_id,
    u.name as user_name,
    u.email,
    COUNT(o.id) as total_orders,
    SUM(CASE WHEN o.status = 'completed' THEN 1 ELSE 0 END) as completed_orders,
    SUM(o.total_amount) as total_spent,
    AVG(o.total_amount) as avg_order_value,
    MAX(o.created_at) as last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email
WITH DATA;

-- Create unique index on materialized view
CREATE UNIQUE INDEX idx_mv_user_order_stats_user_id
ON mv_user_order_stats(user_id);

-- Refresh materialized view
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_user_order_stats;

-- Common Table Expressions (CTEs) for complex queries
WITH monthly_sales AS (
    SELECT
        DATE_TRUNC('month', created_at) as month,
        SUM(total_amount) as monthly_total,
        COUNT(*) as order_count
    FROM orders
    WHERE created_at >= '2023-01-01'
    GROUP BY DATE_TRUNC('month', created_at)
),
sales_growth AS (
    SELECT
        month,
        monthly_total,
        order_count,
        LAG(monthly_total) OVER (ORDER BY month) as previous_month_total,
        LAG(order_count) OVER (ORDER BY month) as previous_month_count
    FROM monthly_sales
)
SELECT
    month,
    monthly_total,
    order_count,
    previous_month_total,
    previous_month_count,
    CASE
        WHEN previous_month_total > 0
        THEN ROUND((monthly_total - previous_month_total) * 100.0 / previous_month_total, 2)
        ELSE NULL
    END as growth_percentage
FROM sales_growth;

-- Window Functions for efficient analytics
SELECT
    id,
    user_id,
    total_amount,
    created_at,
    ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY total_amount DESC) as order_rank,
    SUM(total_amount) OVER (PARTITION BY user_id ORDER BY created_at
               ROWS UNBOUNDED PRECEDING) as running_total,
    AVG(total_amount) OVER (PARTITION BY user_id ORDER BY created_at
               ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) as moving_avg
FROM orders
WHERE user_id = 123;

-- 4. Partitioning for Large Tables
-- ================================================================================

-- Range Partitioning (time-series data)
CREATE TABLE orders_partitioned (
    id BIGSERIAL,
    user_id INTEGER NOT NULL,
    status VARCHAR(20) NOT NULL,
    total_amount DECIMAL(10,2) NOT NULL,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY RANGE (created_at);

-- Create partitions
CREATE TABLE orders_2023_q1 PARTITION OF orders_partitioned
    FOR VALUES FROM ('2023-01-01') TO ('2023-04-01');

CREATE TABLE orders_2023_q2 PARTITION OF orders_partitioned
    FOR VALUES FROM ('2023-04-01') TO ('2023-07-01');

CREATE TABLE orders_2023_q3 PARTITION OF orders_partitioned
    FOR VALUES FROM ('2023-07-01') TO ('2023-10-01');

CREATE TABLE orders_2023_q4 PARTITION OF orders_partitioned
    FOR VALUES FROM ('2023-10-01') TO ('2024-01-01');

-- List Partitioning (categorical data)
CREATE TABLE users_partitioned (
    id BIGSERIAL,
    username VARCHAR(50) NOT NULL,
    email VARCHAR(255) NOT NULL,
    region VARCHAR(50) NOT NULL,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY LIST (region);

-- Create partitions by region
CREATE TABLE users_north_america PARTITION OF users_partitioned
    FOR VALUES IN ('USA', 'Canada', 'Mexico');

CREATE TABLE users_europe PARTITION OF users_partitioned
    FOR VALUES IN ('UK', 'Germany', 'France', 'Spain', 'Italy');

CREATE TABLE users_asia PARTITION OF users_partitioned
    FOR VALUES IN ('China', 'Japan', 'India', 'South Korea');

CREATE TABLE users_other PARTITION OF users_partitioned
    DEFAULT;

-- 5. Advanced Function and Trigger Examples
-- ================================================================================

-- Search function using full-text search
CREATE OR REPLACE FUNCTION search_products(query_text TEXT)
RETURNS TABLE(
    id INTEGER,
    name TEXT,
    description TEXT,
    rank REAL
) AS $$
BEGIN
    RETURN QUERY
    SELECT
        p.id,
        p.name,
        p.description,
        ts_rank(search_vector, plainto_tsquery('english', query_text)) as rank
    FROM products p
    WHERE search_vector @@ plainto_tsquery('english', query_text)
    ORDER BY rank DESC;
END;
$$ LANGUAGE plpgsql;

-- Audit trigger for tracking changes
CREATE OR REPLACE FUNCTION audit_trigger_function()
RETURNS TRIGGER AS $$
BEGIN
    IF TG_OP = 'DELETE' THEN
        INSERT INTO audit_table (
            table_name, operation, old_data, changed_by, changed_at
        ) VALUES (
            TG_TABLE_NAME, TG_OP, row_to_hash(OLD), current_user, NOW()
        );
        RETURN OLD;
    ELSIF TG_OP = 'UPDATE' THEN
        INSERT INTO audit_table (
            table_name, operation, old_data, new_data, changed_by, changed_at
        ) VALUES (
            TG_TABLE_NAME, TG_OP, row_to_hash(OLD), row_to_hash(NEW), current_user, NOW()
        );
        RETURN NEW;
    ELSIF TG_OP = 'INSERT' THEN
        INSERT INTO audit_table (
            table_name, operation, new_data, changed_by, changed_at
        ) VALUES (
            TG_TABLE_NAME, TG_OP, row_to_hash(NEW), current_user, NOW()
        );
        RETURN NEW;
    END IF;
    RETURN NULL;
END;
$$ LANGUAGE plpgsql;

-- Apply audit trigger
CREATE TRIGGER audit_users_trigger
    AFTER INSERT OR UPDATE OR DELETE ON users
    FOR EACH ROW EXECUTE FUNCTION audit_trigger_function();

-- 6. Performance Monitoring and Diagnostics
-- ================================================================================

-- Enable pg_stat_statements extension
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;

-- Query performance statistics
SELECT
    query,
    calls,
    total_exec_time,
    mean_exec_time,
    rows,
    100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 10;

-- Index usage statistics
SELECT
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

-- Table size and row count statistics
SELECT
    schemaname,
    tablename,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
    pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) -
                 pg_relation_size(schemaname||'.'||tablename)) AS index_size,
    n_tup_ins,
    n_tup_upd,
    n_tup_del,
    n_live_tup,
    n_dead_tup,
    last_vacuum,
    last_autovacuum,
    last_analyze,
    last_autoanalyze
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;

-- Long-running queries
SELECT
    pid,
    now() - pg_stat_activity.query_start AS duration,
    query,
    state,
    wait_event_type,
    wait_event
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes'
    AND state = 'active'
ORDER BY duration DESC;

-- Blocking queries
SELECT
    blocked_locks.pid AS blocked_pid,
    blocked_activity.usename AS blocked_user,
    blocking_locks.pid AS blocking_pid,
    blocking_activity.usename AS blocking_user,
    blocked_activity.query AS blocked_statement,
    blocking_activity.query AS current_statement_in_blocking_process
FROM pg_catalog.pg_locks blocked_locks
    JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
    JOIN pg_catalog.pg_locks blocking_locks
        ON blocking_locks.locktype = blocked_locks.locktype
        AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
        AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
        AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
        AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
        AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
        AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
        AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
        AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
        AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
        AND blocking_locks.pid != blocked_locks.pid
    JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.GRANTED;

-- 7. Connection Pooling Configuration (pgbouncer.ini)
-- ================================================================================
[pgbouncer]
# Connection settings
listen_port = 6432
listen_addr = 127.0.0.1

# Database connections
pool_mode = transaction
max_client_conn = 100
default_pool_size = 20
min_pool_size = 5
reserve_pool_size = 5

# Timeouts
server_reset_query = DISCARD ALL
server_check_delay = 30
server_check_query = select 1
server_lifetime = 3600
server_idle_timeout = 600

# Logging
log_connections = 1
log_disconnections = 1
log_pooler_errors = 1

# 8. Backup and Recovery Scripts
-- ================================================================================

-- Backup script (bash)
#!/bin/bash
# postgresql_backup.sh

DB_NAME="your_database"
DB_USER="postgres"
BACKUP_DIR="/var/backups/postgresql"
DATE=$(date +%Y%m%d_%H%M%S)

# Create backup directory
mkdir -p $BACKUP_DIR

# Custom format backup with compression
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
    --format=custom \
    --compress=9 \
    --verbose \
    --file=$BACKUP_DIR/${DB_NAME}_${DATE}.backup

# Schema only backup
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
    --schema-only \
    --file=$BACKUP_DIR/${DB_NAME}_schema_${DATE}.sql

# Data only backup
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
    --data-only \
    --compress=9 \
    --file=$BACKUP_DIR/${DB_NAME}_data_${DATE}.sql.gz

# Remove old backups (keep 30 days)
find $BACKUP_DIR -name "*.backup" -mtime +30 -delete
find $BACKUP_DIR -name "*.sql" -mtime +30 -delete
find $BACKUP_DIR -name "*.sql.gz" -mtime +30 -delete

echo "Backup completed: $BACKUP_DIR/${DB_NAME}_${DATE}.backup"

-- Point-in-Time Recovery
-- Step 1: Stop PostgreSQL
sudo systemctl stop postgresql

-- Step 2: Create recovery configuration
echo "restore_command = 'cp /var/lib/postgresql/wal_archive/%f %p'" >> /var/lib/postgresql/12/main/recovery.conf
echo "recovery_target_time = '2023-12-01 10:00:00'" >> /var/lib/postgresql/12/main/recovery.conf

-- Step 3: Restore from backup
pg_restore -h localhost -U postgres -d your_database /path/to/backup.backup

-- Step 4: Start PostgreSQL
sudo systemctl start postgresql

-- 9. Security Configuration
-- ================================================================================

-- Row Level Security (RLS)
ALTER TABLE orders ENABLE ROW LEVEL SECURITY;

-- Policy for users to see only their own orders
CREATE POLICY user_orders_policy ON orders
    FOR ALL
    TO app_user
    USING (user_id = current_setting('app.current_user_id')::INTEGER);

-- Policy for managers to see all orders in their department
CREATE POLICY manager_orders_policy ON orders
    FOR SELECT
    TO manager_role
    USING (department_id = current_setting('app.current_department_id')::INTEGER);

-- Set user context
SET app.current_user_id = '123';

-- Encrypted columns using pgcrypto extension
CREATE EXTENSION IF NOT EXISTS pgcrypto;

-- Function to encrypt data
CREATE OR REPLACE FUNCTION encrypt_sensitive_data(data TEXT)
RETURNS TEXT AS $$
BEGIN
    RETURN encode(encrypt(data::bytea, 'encryption_key', 'aes'), 'base64');
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;

-- Function to decrypt data
CREATE OR REPLACE FUNCTION decrypt_sensitive_data(encrypted_data TEXT)
RETURNS TEXT AS $$
BEGIN
    RETURN convert_from(decrypt(decode(encrypted_data, 'base64'), 'encryption_key', 'aes'), 'UTF8');
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;

-- 10. Monitoring Dashboard Query
-- ================================================================================

-- Comprehensive monitoring dashboard query
WITH
database_stats AS (
    SELECT
        pg_size_pretty(pg_database_size(current_database())) AS database_size,
        (SELECT count(*) FROM pg_stat_activity WHERE state = 'active') AS active_connections,
        (SELECT count(*) FROM pg_stat_activity WHERE state = 'idle') AS idle_connections,
        (SELECT count(*) FROM pg_locks WHERE NOT granted) AS waiting_queries
),
table_stats AS (
    SELECT
        schemaname,
        tablename,
        pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
        n_live_tup,
        n_dead_tup,
        last_vacuum,
        last_autovacuum,
        extract(epoch from (now() - greatest(last_vacuum, last_autovacuum)))::integer as days_since_vacuum
    FROM pg_stat_user_tables
    ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
    LIMIT 10
),
index_stats AS (
    SELECT
        schemaname,
        tablename,
        indexname,
        idx_scan,
        idx_tup_read,
        idx_tup_fetch,
        pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
    FROM pg_stat_user_indexes
    WHERE idx_scan > 0
    ORDER BY idx_scan DESC
    LIMIT 10
),
slow_queries AS (
    SELECT
        query,
        calls,
        total_exec_time,
        mean_exec_time,
        rows
    FROM pg_stat_statements
    WHERE mean_exec_time > 1000  -- queries taking more than 1 second
    ORDER BY total_exec_time DESC
    LIMIT 5
)
SELECT 'Database Stats' as metric_type, '' as details, '' as value, '' as additional_info
UNION ALL
SELECT
    'Database Stats' as metric_type,
    'Size' as details,
    database_size as value,
    '' as additional_info
FROM database_stats
UNION ALL
SELECT
    'Database Stats' as metric_type,
    'Active Connections' as details,
    active_connections::text as value,
    '' as additional_info
FROM database_stats
UNION ALL
SELECT
    'Database Stats' as metric_type,
    'Idle Connections' as details,
    idle_connections::text as value,
    '' as additional_info
FROM database_stats
UNION ALL
SELECT
    'Database Stats' as metric_type,
    'Waiting Queries' as details,
    waiting_queries::text as value,
    '' as additional_info
FROM database_stats
UNION ALL
SELECT
    'Top Tables' as metric_type,
    tablename as details,
    total_size as value,
    n_live_tup::text || ' live rows, ' || days_since_vacuum::text || ' days since vacuum' as additional_info
FROM table_stats
UNION ALL
SELECT
    'Top Indexes' as metric_type,
    indexname as details,
    idx_scan::text || ' scans' as value,
    index_size as additional_info
FROM index_stats
UNION ALL
SELECT
    'Slow Queries' as metric_type,
    left(query, 50) || '...' as details,
    mean_exec_time::text || ' ms' as value,
    calls::text || ' calls, ' || total_exec_time::text || ' ms total' as additional_info
FROM slow_queries;

💻 Requêtes Complexes PostgreSQL sql

🔴 complex ⭐⭐⭐⭐

Requêtes SQL avancées démontrant joins, sous-requêtes, fonctions de fenêtre et techniques analytiques

⏱️ 50 min 🏷️ postgresql, sql, complex, analytics, queries
Prerequisites: Advanced SQL, PostgreSQL, Data Analysis, Window Functions
-- Complex PostgreSQL Queries and Analytics
-- Advanced SQL patterns for data analysis and reporting

-- 1. Setup Sample Tables for Demonstrations
-- ================================================================================

CREATE TABLE customers (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    email VARCHAR(255) UNIQUE NOT NULL,
    registration_date DATE NOT NULL DEFAULT CURRENT_DATE,
    country VARCHAR(50) NOT NULL,
    city VARCHAR(100),
    segment VARCHAR(20) DEFAULT 'regular' CHECK (segment IN ('regular', 'premium', 'vip')),
    last_purchase_date DATE,
    total_purchases DECIMAL(12,2) DEFAULT 0
);

CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200) NOT NULL,
    category VARCHAR(50) NOT NULL,
    price DECIMAL(10,2) NOT NULL,
    cost DECIMAL(10,2) NOT NULL,
    stock_quantity INTEGER NOT NULL DEFAULT 0,
    created_date DATE NOT NULL DEFAULT CURRENT_DATE,
    is_active BOOLEAN DEFAULT true,
    tags TEXT[] DEFAULT '{}'
);

CREATE TABLE orders (
    id SERIAL PRIMARY KEY,
    customer_id INTEGER REFERENCES customers(id),
    order_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
    status VARCHAR(20) NOT NULL CHECK (status IN ('pending', 'confirmed', 'shipped', 'delivered', 'cancelled')),
    total_amount DECIMAL(12,2) NOT NULL,
    shipping_address JSONB,
    payment_method VARCHAR(50),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE TABLE order_items (
    id SERIAL PRIMARY KEY,
    order_id INTEGER REFERENCES orders(id),
    product_id INTEGER REFERENCES products(id),
    quantity INTEGER NOT NULL CHECK (quantity > 0),
    unit_price DECIMAL(10,2) NOT NULL,
    discount_percent DECIMAL(5,2) DEFAULT 0 CHECK (discount_percent >= 0 AND discount_percent <= 100),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE TABLE product_reviews (
    id SERIAL PRIMARY KEY,
    customer_id INTEGER REFERENCES customers(id),
    product_id INTEGER REFERENCES products(id),
    rating INTEGER CHECK (rating >= 1 AND rating <= 5),
    review_text TEXT,
    review_date DATE NOT NULL DEFAULT CURRENT_DATE,
    is_verified BOOLEAN DEFAULT false,
    helpful_count INTEGER DEFAULT 0
);

-- 2. Complex JOIN Queries
-- ================================================================================

-- Customer Lifetime Value (CLV) Analysis
WITH customer_orders AS (
    SELECT
        c.id,
        c.name,
        c.email,
        c.registration_date,
        c.country,
        c.segment,
        COUNT(o.id) AS total_orders,
        SUM(CASE WHEN o.status = 'delivered' THEN 1 ELSE 0 END) AS completed_orders,
        SUM(CASE WHEN o.status = 'delivered' THEN o.total_amount ELSE 0 END) AS total_spent,
        AVG(CASE WHEN o.status = 'delivered' THEN o.total_amount END) AS avg_order_value,
        MIN(o.order_date) AS first_order_date,
        MAX(o.order_date) AS last_order_date,
        EXTRACT(DAYS FROM (MAX(o.order_date) - MIN(o.order_date))) AS customer_lifetime_days
    FROM customers c
    LEFT JOIN orders o ON c.id = o.customer_id
    GROUP BY c.id, c.name, c.email, c.registration_date, c.country, c.segment
),
customer_metrics AS (
    SELECT
        *,
        total_spent / NULLIF(total_orders, 0) AS avg_order_value_calculated,
        CASE
            WHEN last_order_date >= CURRENT_DATE - INTERVAL '30 days' THEN 'Active'
            WHEN last_order_date >= CURRENT_DATE - INTERVAL '90 days' THEN 'At Risk'
            WHEN last_order_date < CURRENT_DATE - INTERVAL '180 days' THEN 'Churned'
            ELSE 'Dormant'
        END AS customer_status,
        total_spent / NULLIF(EXTRACT(DAYS FROM (CURRENT_DATE - registration_date)), 0) * 365 AS annual_spend_rate
    FROM customer_orders
)
SELECT
    customer_status,
    country,
    segment,
    COUNT(*) AS customer_count,
    SUM(total_spent) AS total_revenue,
    AVG(avg_order_value_calculated) AS avg_order_value,
    AVG(annual_spend_rate) AS avg_annual_spend_rate,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_spent) AS median_clv,
    PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_spent) AS p95_clv
FROM customer_metrics
WHERE total_orders > 0
GROUP BY customer_status, country, segment
ORDER BY total_revenue DESC;

-- Product Performance Analysis with Category Comparison
WITH product_sales AS (
    SELECT
        p.id,
        p.name,
        p.category,
        p.price,
        p.cost,
        p.tags,
        COUNT(oi.id) AS times_ordered,
        SUM(oi.quantity) AS total_quantity_sold,
        SUM(oi.quantity * oi.unit_price) AS revenue,
        SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS actual_revenue,
        AVG(oi.discount_percent) AS avg_discount,
        COUNT(DISTINCT oi.order_id) AS unique_orders,
        MIN(o.order_date) AS first_sale_date,
        MAX(o.order_date) AS last_sale_date
    FROM products p
    LEFT JOIN order_items oi ON p.id = oi.product_id
    LEFT JOIN orders o ON oi.order_id = o.id AND o.status = 'delivered'
    WHERE p.is_active = true
    GROUP BY p.id, p.name, p.category, p.price, p.cost, p.tags
),
product_metrics AS (
    SELECT
        *,
        (revenue - COALESCE(cost * total_quantity_sold, 0)) AS profit,
        COALESCE((revenue - COALESCE(cost * total_quantity_sold, 0)) / NULLIF(revenue, 0) * 100, 0) AS profit_margin,
        revenue / NULLIF(NULLIF(times_ordered, 0), 0) AS avg_revenue_per_order,
        EXTRACT(DAYS FROM (last_sale_date - first_sale_date)) AS selling_period_days,
        total_quantity_sold / NULLIF(EXTRACT(DAYS FROM (last_sale_date - first_sale_date)), 0) AS daily_sales_rate
    FROM product_sales
),
category_benchmarks AS (
    SELECT
        category,
        AVG(profit_margin) AS category_avg_margin,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY profit_margin) AS category_p75_margin,
        AVG(daily_sales_rate) AS category_avg_daily_rate
    FROM product_metrics
    WHERE times_ordered > 0
    GROUP BY category
)
SELECT
    pm.name,
    pm.category,
    pm.price,
    pm.times_ordered,
    pm.total_quantity_sold,
    pm.profit,
    ROUND(pm.profit_margin, 2) AS profit_margin,
    pm.daily_sales_rate,
    ROUND(cb.category_avg_margin, 2) AS category_avg_margin,
    ROUND(pm.profit_margin - cb.category_avg_margin, 2) AS margin_vs_category,
    CASE
        WHEN pm.profit_margin > cb.category_p75_margin THEN 'Top Performer'
        WHEN pm.profit_margin > cb.category_avg_margin THEN 'Above Average'
        WHEN pm.profit_margin > 0 THEN 'Below Average'
        ELSE 'Loss Making'
    END AS performance_tier
FROM product_metrics pm
JOIN category_benchmarks cb ON pm.category = cb.category
WHERE pm.times_ordered > 10
ORDER BY pm.profit DESC;

-- 3. Window Functions and Time Series Analysis
-- ================================================================================

-- Monthly Sales Trends with Moving Averages and Growth Rates
WITH monthly_sales AS (
    SELECT
        DATE_TRUNC('month', order_date) AS month,
        COUNT(DISTINCT customer_id) AS unique_customers,
        COUNT(*) AS total_orders,
        SUM(total_amount) AS total_revenue,
        AVG(total_amount) AS avg_order_value
    FROM orders
    WHERE status = 'delivered'
        AND order_date >= CURRENT_DATE - INTERVAL '2 years'
    GROUP BY DATE_TRUNC('month', order_date)
),
sales_trends AS (
    SELECT
        month,
        unique_customers,
        total_orders,
        total_revenue,
        avg_order_value,
        LAG(total_revenue) OVER (ORDER BY month) AS prev_month_revenue,
        LAG(total_orders) OVER (ORDER BY month) AS prev_month_orders,
        LAG(unique_customers) OVER (ORDER BY month) AS prev_month_customers,
        SUM(total_revenue) OVER (ORDER BY month ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS rolling_12m_revenue,
        SUM(total_orders) OVER (ORDER BY month ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS rolling_12m_orders,
        AVG(total_revenue) OVER (ORDER BY month ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS moving_avg_5m,
        ROW_NUMBER() OVER (ORDER BY total_revenue DESC) AS revenue_rank,
        FIRST_VALUE(month) OVER (ORDER BY total_revenue DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS best_month
    FROM monthly_sales
)
SELECT
    month::DATE AS month_date,
    unique_customers,
    total_orders,
        total_revenue,
        avg_order_value,
        ROUND((total_revenue - prev_month_revenue) * 100.0 / NULLIF(prev_month_revenue, 0), 2) AS revenue_growth_pct,
        ROUND((total_orders - prev_month_orders) * 100.0 / NULLIF(prev_month_orders, 0), 2) AS orders_growth_pct,
        ROUND((unique_customers - prev_month_customers) * 100.0 / NULLIF(prev_month_customers, 0), 2) AS customers_growth_pct,
        moving_avg_5m,
        rolling_12m_revenue,
        revenue_rank,
        CASE
            WHEN month = best_month THEN 'Best Month'
            WHEN revenue_growth_pct > 0 AND orders_growth_pct > 0 THEN 'Strong Growth'
            WHEN revenue_growth_pct < -10 THEN 'Declining'
            ELSE 'Stable'
        END AS performance_indicator
FROM sales_trends
ORDER BY month;

-- Customer Segmentation using RFM (Recency, Frequency, Monetary)
WITH customer_rfm AS (
    SELECT
        c.id,
        c.name,
        c.email,
        c.registration_date,
        -- Recency: Days since last purchase
        EXTRACT(DAYS FROM (CURRENT_DATE - MAX(o.order_date))) AS recency_days,
        -- Frequency: Total number of orders
        COUNT(DISTINCT o.id) AS frequency,
        -- Monetary: Total amount spent
        SUM(o.total_amount) AS monetary_value,
        -- Additional metrics
        MIN(o.order_date) AS first_purchase_date,
        AVG(o.total_amount) AS avg_order_value,
        -- Customer tenure in days
        EXTRACT(DAYS FROM (CURRENT_DATE - c.registration_date)) AS tenure_days
    FROM customers c
    JOIN orders o ON c.id = o.customer_id
    WHERE o.status = 'delivered'
    GROUP BY c.id, c.name, c.email, c.registration_date
),
rfm_scores AS (
    SELECT
        id,
        name,
        email,
        recency_days,
        frequency,
        monetary_value,
        first_purchase_date,
        avg_order_value,
        tenure_days,
        -- Score each dimension (5 = best, 1 = worst)
        NTILE(5) OVER (ORDER BY recency_days ASC) AS recency_score,  -- Lower recency_days = higher score
        NTILE(5) OVER (ORDER BY frequency DESC) AS frequency_score,
        NTILE(5) OVER (ORDER BY monetary_value DESC) AS monetary_score,
        -- Combined RFM score
        (NTILE(5) OVER (ORDER BY recency_days ASC) +
         NTILE(5) OVER (ORDER BY frequency DESC) +
         NTILE(5) OVER (ORDER BY monetary_value DESC)) / 3.0 AS rfm_score
    FROM customer_rfm
),
rfm_segments AS (
    SELECT
        *,
        CASE
            WHEN recency_score >= 4 AND frequency_score >= 4 AND monetary_score >= 4 THEN 'Champions'
            WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score >= 3 THEN 'Loyal Customers'
            WHEN recency_score >= 4 AND frequency_score <= 2 THEN 'New Customers'
            WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score <= 2 THEN 'Potential Loyalists'
            WHEN recency_score <= 2 AND frequency_score >= 4 AND monetary_score >= 4 THEN 'At Risk'
            WHEN recency_score <= 2 AND frequency_score <= 2 AND monetary_score <= 2 THEN 'Lost'
            WHEN recency_score <= 2 AND frequency_score >= 3 THEN 'Cannot Lose Them'
            ELSE 'Others'
        END AS rfm_segment
    FROM rfm_scores
)
SELECT
    rfm_segment,
    COUNT(*) AS customer_count,
    SUM(monetary_value) AS total_revenue,
    AVG(monetary_value) AS avg_customer_value,
    AVG(frequency) AS avg_frequency,
    AVG(recency_days) AS avg_recency_days,
    ROUND(AVG(tenure_days), 0) AS avg_tenure_days,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS segment_percentage
FROM rfm_segments
GROUP BY rfm_segment
ORDER BY total_revenue DESC;

-- 4. JSONB and Array Operations
-- ================================================================================

-- Analysis of Product Tags and Shipping Patterns
WITH tag_analysis AS (
    SELECT
        p.category,
        UNNEST(p.tags) as tag,
        COUNT(*) as product_count,
        AVG(p.price) as avg_price,
        SUM(oi.quantity) as total_quantity_sold
    FROM products p
    LEFT JOIN order_items oi ON p.id = oi.product_id
    LEFT JOIN orders o ON oi.order_id = o.id AND o.status = 'delivered'
    WHERE array_length(p.tags, 1) > 0
    GROUP BY p.category, UNNEST(p.tags)
),
shipping_analysis AS (
    SELECT
        (o.shipping_address->>'country') as country,
        (o.shipping_address->>'city') as city,
        (o.shipping_address->>'postal_code') as postal_code,
        COUNT(*) as order_count,
        SUM(o.total_amount) as total_revenue,
        AVG(o.total_amount) as avg_order_value
    FROM orders o
    WHERE o.status = 'delivered'
        AND o.shipping_address IS NOT NULL
        AND o.shipping_address ? 'country'
    GROUP BY
        o.shipping_address->>'country',
        o.shipping_address->>'city',
        o.shipping_address->>'postal_code'
)
-- Combine tag and shipping analysis
SELECT
    'Product Tags' as analysis_type,
    category,
    tag as dimension,
    product_count,
    ROUND(avg_price, 2) as avg_price,
    total_quantity_sold
FROM tag_analysis
UNION ALL
SELECT
    'Shipping Analysis' as analysis_type,
    country as category,
    city as dimension,
    order_count as product_count,
    ROUND(avg_order_value, 2) as avg_price,
    total_revenue as total_quantity_sold
FROM shipping_analysis
ORDER BY analysis_type, product_count DESC;

-- 5. Complex Subqueries and CTEs
-- ================================================================================

-- Market Basket Analysis: Products Frequently Bought Together
WITH customer_product_pairs AS (
    SELECT
        o1.customer_id,
        o1.product_id as product_a,
        o2.product_id as product_b,
        COUNT(*) as purchase_count
    FROM order_items o1
    JOIN order_items o2 ON o1.order_id = o2.order_id AND o1.product_id < o2.product_id
    JOIN orders o ON o1.order_id = o.id
    WHERE o.status = 'delivered'
    GROUP BY o1.customer_id, o1.product_id, o2.product_id
),
product_frequencies AS (
    SELECT
        product_id,
        COUNT(DISTINCT customer_id) as customer_count
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.id
    WHERE o.status = 'delivered'
    GROUP BY product_id
),
basket_analysis AS (
    SELECT
        cpp.product_a,
        cpp.product_b,
        cpp.purchase_count,
        pf_a.customer_count as product_a_customers,
        pf_b.customer_count as product_b_customers,
        -- Calculate lift: observed co-occurrence / expected co-occurrence
        (cpp.purchase_count::FLOAT / NULLIF(pf_a.customer_count, 0) / NULLIF(pf_b.customer_count, 0) *
         (SELECT COUNT(DISTINCT customer_id) FROM orders WHERE status = 'delivered')) as lift
    FROM customer_product_pairs cpp
    JOIN product_frequencies pf_a ON cpp.product_a = pf_a.product_id
    JOIN product_frequencies pf_b ON cpp.product_b = pf_b.product_id
    WHERE cpp.purchase_count >= 5  -- Minimum co-occurrences
)
SELECT
    pa.name as product_a_name,
    pb.name as product_b_name,
    ba.purchase_count as times_purchased_together,
    ba.lift,
    ROUND(ba.lift, 2) as lift_rounded,
    pa.category as product_a_category,
    pb.category as product_b_category,
    CASE
        WHEN ba.lift > 2.0 THEN 'Strong Association'
        WHEN ba.lift > 1.5 THEN 'Moderate Association'
        WHEN ba.lift > 1.0 THEN 'Weak Association'
        ELSE 'No Association'
    END as association_strength
FROM basket_analysis ba
JOIN products pa ON ba.product_a = pa.id
JOIN products pb ON ba.product_b = pb.id
ORDER BY ba.purchase_count DESC;

-- 6. Advanced Aggregation and Statistical Analysis
-- ================================================================================

-- Customer Satisfaction and Purchase Correlation
WITH customer_satisfaction AS (
    SELECT
        c.id,
        c.name,
        c.segment,
        AVG(pr.rating) as avg_rating,
        COUNT(pr.id) as review_count,
        STDDEV(pr.rating) as rating_stddev,
        SUM(pr.helpful_count) as total_helpful_votes
    FROM customers c
    LEFT JOIN product_reviews pr ON c.id = pr.customer_id
    GROUP BY c.id, c.name, c.segment
),
customer_behavior AS (
    SELECT
        c.id,
        COUNT(o.id) as total_orders,
        SUM(CASE WHEN o.status = 'delivered' THEN o.total_amount ELSE 0 END) as total_spent,
        AVG(CASE WHEN o.status = 'delivered' THEN o.total_amount END) as avg_order_value,
        MIN(o.order_date) as first_order_date,
        MAX(o.order_date) as last_order_date,
        EXTRACT(DAYS FROM (MAX(o.order_date) - MIN(o.order_date))) as purchase_span_days
    FROM customers c
    LEFT JOIN orders o ON c.id = o.customer_id
    GROUP BY c.id
),
correlation_analysis AS (
    SELECT
        cs.id,
        cs.name,
        cs.segment,
        cs.avg_rating,
        cs.review_count,
        cb.total_orders,
        cb.total_spent,
        cb.avg_order_value,
        cb.purchase_span_days,
        -- Calculate correlation-like metrics
        CASE
            WHEN cs.avg_rating >= 4 AND cb.avg_order_value > (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'High Value, High Satisfaction'
            WHEN cs.avg_rating >= 4 AND cb.avg_order_value <= (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'Low Value, High Satisfaction'
            WHEN cs.avg_rating < 3 AND cb.avg_order_value > (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'High Value, Low Satisfaction'
            WHEN cs.avg_rating < 3 AND cb.avg_order_value <= (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'Low Value, Low Satisfaction'
            ELSE 'Mixed'
        END AS satisfaction_value_segment
    FROM customer_satisfaction cs
    JOIN customer_behavior cb ON cs.id = cb.id
    WHERE cs.review_count > 0 AND cb.total_orders > 0
)
SELECT
    segment,
    satisfaction_value_segment,
    COUNT(*) as customer_count,
    AVG(avg_rating) as avg_rating,
    AVG(total_spent) as avg_total_spent,
    AVG(avg_order_value) as avg_order_value,
    AVG(review_count) as avg_review_count,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as segment_percentage
FROM correlation_analysis
GROUP BY segment, satisfaction_value_segment
ORDER BY customer_count DESC;