🎯 Exemples recommandés
Balanced sample collections from various categories for you to explore
Exemples Avancés PostgreSQL
Exemples avancés de base de données PostgreSQL incluant stratégies d'optimisation, requêtes complexes, indexation et réglage de performance
💻 Optimisation Performance PostgreSQL sql
🔴 complex
⭐⭐⭐⭐⭐
Techniques avancées d'optimisation PostgreSQL incluant indexation, optimisation de requêtes et réglage de configuration
⏱️ 60 min
🏷️ postgresql, database, sql, optimization, advanced
Prerequisites:
PostgreSQL basics, SQL, Database administration, Performance tuning
-- PostgreSQL Performance Optimization Guide
-- Comprehensive examples for database performance tuning
-- 1. PostgreSQL Configuration (postgresql.conf)
-- ================================================================================
# Memory Settings (shared_buffers, work_mem, maintenance_work_mem)
shared_buffers = 256MB # 25% of RAM for shared memory
effective_cache_size = 1GB # Estimated OS and disk cache
work_mem = 4MB # Memory for sort operations
maintenance_work_mem = 64MB # Memory for maintenance operations
effective_io_concurrency = 200 # For SSD storage
# Connection Settings
max_connections = 100 # Maximum concurrent connections
superuser_reserved_connections = 3 # Reserved for superusers
# WAL (Write-Ahead Logging) Settings
wal_buffers = 16MB # WAL buffer size
checkpoint_completion_target = 0.9 # Target for checkpoint completion
wal_writer_delay = 200ms # Delay between WAL writes
# Autovacuum Settings
autovacuum = on # Enable autovacuum
autovacuum_max_workers = 3 # Maximum autovacuum workers
autovacuum_naptime = 1min # Time between autovacuum runs
# Random Page Cost (affects query planner)
random_page_cost = 1.1 # For SSD storage
seq_page_cost = 1.0 # Sequential page cost
# Statistics Target
default_statistics_target = 100 # Statistics precision
-- 2. Advanced Indexing Strategies
-- ================================================================================
-- B-Tree Index (default, good for equality and range queries)
CREATE INDEX CONCURRENTLY idx_users_email ON users(email);
CREATE INDEX CONCURRENTLY idx_orders_created_at ON orders(created_at DESC);
-- Composite Indexes (optimize multi-column queries)
CREATE INDEX CONCURRENTLY idx_orders_user_status_created
ON orders(user_id, status, created_at DESC);
-- Partial Indexes (index only specific rows)
CREATE INDEX CONCURRENTLY idx_active_users
ON users(created_at) WHERE status = 'active';
CREATE INDEX CONCURRENTLY idx_high_value_orders
ON orders(customer_id, total_amount) WHERE total_amount > 1000;
-- Unique Indexes
CREATE UNIQUE INDEX CONCURRENTLY idx_users_username_unique
ON users(username) WHERE username IS NOT NULL;
-- Expression Indexes (index computed values)
CREATE INDEX CONCURRENTLY idx_users_lower_email
ON users(LOWER(email));
CREATE INDEX CONCURRENTLY idx_orders_date_year
ON orders(EXTRACT(YEAR FROM created_at));
-- GIN Index (for array and full-text search)
CREATE INDEX CONCURRENTLY idx_products_tags
ON products USING GIN(tags);
CREATE INDEX CONCURRENTLY idx_documents_content
ON documents USING GIN(to_tsvector('english', content));
-- Hash Index (for equality comparisons only)
CREATE INDEX CONCURRENTLY idx_session_hash
ON sessions USING HASH(session_token);
-- BRIN Index (Block Range Index, good for time-series data)
CREATE INDEX CONCURRENTLY idx_metrics_timestamp
ON metrics USING BRIN(timestamp);
-- 3. Query Optimization Examples
-- ================================================================================
-- EXPLAIN ANALYZE for query performance analysis
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT u.name, COUNT(o.id) as order_count, SUM(o.total_amount) as total_spent
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2023-01-01'
AND o.status = 'completed'
GROUP BY u.id, u.name
HAVING COUNT(o.id) > 10
ORDER BY total_spent DESC
LIMIT 10;
-- Materialized Views for complex aggregations
CREATE MATERIALIZED VIEW mv_user_order_stats AS
SELECT
u.id as user_id,
u.name as user_name,
u.email,
COUNT(o.id) as total_orders,
SUM(CASE WHEN o.status = 'completed' THEN 1 ELSE 0 END) as completed_orders,
SUM(o.total_amount) as total_spent,
AVG(o.total_amount) as avg_order_value,
MAX(o.created_at) as last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email
WITH DATA;
-- Create unique index on materialized view
CREATE UNIQUE INDEX idx_mv_user_order_stats_user_id
ON mv_user_order_stats(user_id);
-- Refresh materialized view
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_user_order_stats;
-- Common Table Expressions (CTEs) for complex queries
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', created_at) as month,
SUM(total_amount) as monthly_total,
COUNT(*) as order_count
FROM orders
WHERE created_at >= '2023-01-01'
GROUP BY DATE_TRUNC('month', created_at)
),
sales_growth AS (
SELECT
month,
monthly_total,
order_count,
LAG(monthly_total) OVER (ORDER BY month) as previous_month_total,
LAG(order_count) OVER (ORDER BY month) as previous_month_count
FROM monthly_sales
)
SELECT
month,
monthly_total,
order_count,
previous_month_total,
previous_month_count,
CASE
WHEN previous_month_total > 0
THEN ROUND((monthly_total - previous_month_total) * 100.0 / previous_month_total, 2)
ELSE NULL
END as growth_percentage
FROM sales_growth;
-- Window Functions for efficient analytics
SELECT
id,
user_id,
total_amount,
created_at,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY total_amount DESC) as order_rank,
SUM(total_amount) OVER (PARTITION BY user_id ORDER BY created_at
ROWS UNBOUNDED PRECEDING) as running_total,
AVG(total_amount) OVER (PARTITION BY user_id ORDER BY created_at
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) as moving_avg
FROM orders
WHERE user_id = 123;
-- 4. Partitioning for Large Tables
-- ================================================================================
-- Range Partitioning (time-series data)
CREATE TABLE orders_partitioned (
id BIGSERIAL,
user_id INTEGER NOT NULL,
status VARCHAR(20) NOT NULL,
total_amount DECIMAL(10,2) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY RANGE (created_at);
-- Create partitions
CREATE TABLE orders_2023_q1 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-01-01') TO ('2023-04-01');
CREATE TABLE orders_2023_q2 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-04-01') TO ('2023-07-01');
CREATE TABLE orders_2023_q3 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-07-01') TO ('2023-10-01');
CREATE TABLE orders_2023_q4 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-10-01') TO ('2024-01-01');
-- List Partitioning (categorical data)
CREATE TABLE users_partitioned (
id BIGSERIAL,
username VARCHAR(50) NOT NULL,
email VARCHAR(255) NOT NULL,
region VARCHAR(50) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY LIST (region);
-- Create partitions by region
CREATE TABLE users_north_america PARTITION OF users_partitioned
FOR VALUES IN ('USA', 'Canada', 'Mexico');
CREATE TABLE users_europe PARTITION OF users_partitioned
FOR VALUES IN ('UK', 'Germany', 'France', 'Spain', 'Italy');
CREATE TABLE users_asia PARTITION OF users_partitioned
FOR VALUES IN ('China', 'Japan', 'India', 'South Korea');
CREATE TABLE users_other PARTITION OF users_partitioned
DEFAULT;
-- 5. Advanced Function and Trigger Examples
-- ================================================================================
-- Search function using full-text search
CREATE OR REPLACE FUNCTION search_products(query_text TEXT)
RETURNS TABLE(
id INTEGER,
name TEXT,
description TEXT,
rank REAL
) AS $$
BEGIN
RETURN QUERY
SELECT
p.id,
p.name,
p.description,
ts_rank(search_vector, plainto_tsquery('english', query_text)) as rank
FROM products p
WHERE search_vector @@ plainto_tsquery('english', query_text)
ORDER BY rank DESC;
END;
$$ LANGUAGE plpgsql;
-- Audit trigger for tracking changes
CREATE OR REPLACE FUNCTION audit_trigger_function()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'DELETE' THEN
INSERT INTO audit_table (
table_name, operation, old_data, changed_by, changed_at
) VALUES (
TG_TABLE_NAME, TG_OP, row_to_hash(OLD), current_user, NOW()
);
RETURN OLD;
ELSIF TG_OP = 'UPDATE' THEN
INSERT INTO audit_table (
table_name, operation, old_data, new_data, changed_by, changed_at
) VALUES (
TG_TABLE_NAME, TG_OP, row_to_hash(OLD), row_to_hash(NEW), current_user, NOW()
);
RETURN NEW;
ELSIF TG_OP = 'INSERT' THEN
INSERT INTO audit_table (
table_name, operation, new_data, changed_by, changed_at
) VALUES (
TG_TABLE_NAME, TG_OP, row_to_hash(NEW), current_user, NOW()
);
RETURN NEW;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Apply audit trigger
CREATE TRIGGER audit_users_trigger
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW EXECUTE FUNCTION audit_trigger_function();
-- 6. Performance Monitoring and Diagnostics
-- ================================================================================
-- Enable pg_stat_statements extension
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
-- Query performance statistics
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
rows,
100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 10;
-- Index usage statistics
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
-- Table size and row count statistics
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) -
pg_relation_size(schemaname||'.'||tablename)) AS index_size,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
-- Long-running queries
SELECT
pid,
now() - pg_stat_activity.query_start AS duration,
query,
state,
wait_event_type,
wait_event
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes'
AND state = 'active'
ORDER BY duration DESC;
-- Blocking queries
SELECT
blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
blocking_locks.pid AS blocking_pid,
blocking_activity.usename AS blocking_user,
blocked_activity.query AS blocked_statement,
blocking_activity.query AS current_statement_in_blocking_process
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks
ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.GRANTED;
-- 7. Connection Pooling Configuration (pgbouncer.ini)
-- ================================================================================
[pgbouncer]
# Connection settings
listen_port = 6432
listen_addr = 127.0.0.1
# Database connections
pool_mode = transaction
max_client_conn = 100
default_pool_size = 20
min_pool_size = 5
reserve_pool_size = 5
# Timeouts
server_reset_query = DISCARD ALL
server_check_delay = 30
server_check_query = select 1
server_lifetime = 3600
server_idle_timeout = 600
# Logging
log_connections = 1
log_disconnections = 1
log_pooler_errors = 1
# 8. Backup and Recovery Scripts
-- ================================================================================
-- Backup script (bash)
#!/bin/bash
# postgresql_backup.sh
DB_NAME="your_database"
DB_USER="postgres"
BACKUP_DIR="/var/backups/postgresql"
DATE=$(date +%Y%m%d_%H%M%S)
# Create backup directory
mkdir -p $BACKUP_DIR
# Custom format backup with compression
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
--format=custom \
--compress=9 \
--verbose \
--file=$BACKUP_DIR/${DB_NAME}_${DATE}.backup
# Schema only backup
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
--schema-only \
--file=$BACKUP_DIR/${DB_NAME}_schema_${DATE}.sql
# Data only backup
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
--data-only \
--compress=9 \
--file=$BACKUP_DIR/${DB_NAME}_data_${DATE}.sql.gz
# Remove old backups (keep 30 days)
find $BACKUP_DIR -name "*.backup" -mtime +30 -delete
find $BACKUP_DIR -name "*.sql" -mtime +30 -delete
find $BACKUP_DIR -name "*.sql.gz" -mtime +30 -delete
echo "Backup completed: $BACKUP_DIR/${DB_NAME}_${DATE}.backup"
-- Point-in-Time Recovery
-- Step 1: Stop PostgreSQL
sudo systemctl stop postgresql
-- Step 2: Create recovery configuration
echo "restore_command = 'cp /var/lib/postgresql/wal_archive/%f %p'" >> /var/lib/postgresql/12/main/recovery.conf
echo "recovery_target_time = '2023-12-01 10:00:00'" >> /var/lib/postgresql/12/main/recovery.conf
-- Step 3: Restore from backup
pg_restore -h localhost -U postgres -d your_database /path/to/backup.backup
-- Step 4: Start PostgreSQL
sudo systemctl start postgresql
-- 9. Security Configuration
-- ================================================================================
-- Row Level Security (RLS)
ALTER TABLE orders ENABLE ROW LEVEL SECURITY;
-- Policy for users to see only their own orders
CREATE POLICY user_orders_policy ON orders
FOR ALL
TO app_user
USING (user_id = current_setting('app.current_user_id')::INTEGER);
-- Policy for managers to see all orders in their department
CREATE POLICY manager_orders_policy ON orders
FOR SELECT
TO manager_role
USING (department_id = current_setting('app.current_department_id')::INTEGER);
-- Set user context
SET app.current_user_id = '123';
-- Encrypted columns using pgcrypto extension
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- Function to encrypt data
CREATE OR REPLACE FUNCTION encrypt_sensitive_data(data TEXT)
RETURNS TEXT AS $$
BEGIN
RETURN encode(encrypt(data::bytea, 'encryption_key', 'aes'), 'base64');
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
-- Function to decrypt data
CREATE OR REPLACE FUNCTION decrypt_sensitive_data(encrypted_data TEXT)
RETURNS TEXT AS $$
BEGIN
RETURN convert_from(decrypt(decode(encrypted_data, 'base64'), 'encryption_key', 'aes'), 'UTF8');
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
-- 10. Monitoring Dashboard Query
-- ================================================================================
-- Comprehensive monitoring dashboard query
WITH
database_stats AS (
SELECT
pg_size_pretty(pg_database_size(current_database())) AS database_size,
(SELECT count(*) FROM pg_stat_activity WHERE state = 'active') AS active_connections,
(SELECT count(*) FROM pg_stat_activity WHERE state = 'idle') AS idle_connections,
(SELECT count(*) FROM pg_locks WHERE NOT granted) AS waiting_queries
),
table_stats AS (
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum,
extract(epoch from (now() - greatest(last_vacuum, last_autovacuum)))::integer as days_since_vacuum
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 10
),
index_stats AS (
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE idx_scan > 0
ORDER BY idx_scan DESC
LIMIT 10
),
slow_queries AS (
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
rows
FROM pg_stat_statements
WHERE mean_exec_time > 1000 -- queries taking more than 1 second
ORDER BY total_exec_time DESC
LIMIT 5
)
SELECT 'Database Stats' as metric_type, '' as details, '' as value, '' as additional_info
UNION ALL
SELECT
'Database Stats' as metric_type,
'Size' as details,
database_size as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Database Stats' as metric_type,
'Active Connections' as details,
active_connections::text as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Database Stats' as metric_type,
'Idle Connections' as details,
idle_connections::text as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Database Stats' as metric_type,
'Waiting Queries' as details,
waiting_queries::text as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Top Tables' as metric_type,
tablename as details,
total_size as value,
n_live_tup::text || ' live rows, ' || days_since_vacuum::text || ' days since vacuum' as additional_info
FROM table_stats
UNION ALL
SELECT
'Top Indexes' as metric_type,
indexname as details,
idx_scan::text || ' scans' as value,
index_size as additional_info
FROM index_stats
UNION ALL
SELECT
'Slow Queries' as metric_type,
left(query, 50) || '...' as details,
mean_exec_time::text || ' ms' as value,
calls::text || ' calls, ' || total_exec_time::text || ' ms total' as additional_info
FROM slow_queries;
💻 Requêtes Complexes PostgreSQL sql
🔴 complex
⭐⭐⭐⭐
Requêtes SQL avancées démontrant joins, sous-requêtes, fonctions de fenêtre et techniques analytiques
⏱️ 50 min
🏷️ postgresql, sql, complex, analytics, queries
Prerequisites:
Advanced SQL, PostgreSQL, Data Analysis, Window Functions
-- Complex PostgreSQL Queries and Analytics
-- Advanced SQL patterns for data analysis and reporting
-- 1. Setup Sample Tables for Demonstrations
-- ================================================================================
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
registration_date DATE NOT NULL DEFAULT CURRENT_DATE,
country VARCHAR(50) NOT NULL,
city VARCHAR(100),
segment VARCHAR(20) DEFAULT 'regular' CHECK (segment IN ('regular', 'premium', 'vip')),
last_purchase_date DATE,
total_purchases DECIMAL(12,2) DEFAULT 0
);
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL,
category VARCHAR(50) NOT NULL,
price DECIMAL(10,2) NOT NULL,
cost DECIMAL(10,2) NOT NULL,
stock_quantity INTEGER NOT NULL DEFAULT 0,
created_date DATE NOT NULL DEFAULT CURRENT_DATE,
is_active BOOLEAN DEFAULT true,
tags TEXT[] DEFAULT '{}'
);
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
customer_id INTEGER REFERENCES customers(id),
order_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
status VARCHAR(20) NOT NULL CHECK (status IN ('pending', 'confirmed', 'shipped', 'delivered', 'cancelled')),
total_amount DECIMAL(12,2) NOT NULL,
shipping_address JSONB,
payment_method VARCHAR(50),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER NOT NULL CHECK (quantity > 0),
unit_price DECIMAL(10,2) NOT NULL,
discount_percent DECIMAL(5,2) DEFAULT 0 CHECK (discount_percent >= 0 AND discount_percent <= 100),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE TABLE product_reviews (
id SERIAL PRIMARY KEY,
customer_id INTEGER REFERENCES customers(id),
product_id INTEGER REFERENCES products(id),
rating INTEGER CHECK (rating >= 1 AND rating <= 5),
review_text TEXT,
review_date DATE NOT NULL DEFAULT CURRENT_DATE,
is_verified BOOLEAN DEFAULT false,
helpful_count INTEGER DEFAULT 0
);
-- 2. Complex JOIN Queries
-- ================================================================================
-- Customer Lifetime Value (CLV) Analysis
WITH customer_orders AS (
SELECT
c.id,
c.name,
c.email,
c.registration_date,
c.country,
c.segment,
COUNT(o.id) AS total_orders,
SUM(CASE WHEN o.status = 'delivered' THEN 1 ELSE 0 END) AS completed_orders,
SUM(CASE WHEN o.status = 'delivered' THEN o.total_amount ELSE 0 END) AS total_spent,
AVG(CASE WHEN o.status = 'delivered' THEN o.total_amount END) AS avg_order_value,
MIN(o.order_date) AS first_order_date,
MAX(o.order_date) AS last_order_date,
EXTRACT(DAYS FROM (MAX(o.order_date) - MIN(o.order_date))) AS customer_lifetime_days
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name, c.email, c.registration_date, c.country, c.segment
),
customer_metrics AS (
SELECT
*,
total_spent / NULLIF(total_orders, 0) AS avg_order_value_calculated,
CASE
WHEN last_order_date >= CURRENT_DATE - INTERVAL '30 days' THEN 'Active'
WHEN last_order_date >= CURRENT_DATE - INTERVAL '90 days' THEN 'At Risk'
WHEN last_order_date < CURRENT_DATE - INTERVAL '180 days' THEN 'Churned'
ELSE 'Dormant'
END AS customer_status,
total_spent / NULLIF(EXTRACT(DAYS FROM (CURRENT_DATE - registration_date)), 0) * 365 AS annual_spend_rate
FROM customer_orders
)
SELECT
customer_status,
country,
segment,
COUNT(*) AS customer_count,
SUM(total_spent) AS total_revenue,
AVG(avg_order_value_calculated) AS avg_order_value,
AVG(annual_spend_rate) AS avg_annual_spend_rate,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_spent) AS median_clv,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_spent) AS p95_clv
FROM customer_metrics
WHERE total_orders > 0
GROUP BY customer_status, country, segment
ORDER BY total_revenue DESC;
-- Product Performance Analysis with Category Comparison
WITH product_sales AS (
SELECT
p.id,
p.name,
p.category,
p.price,
p.cost,
p.tags,
COUNT(oi.id) AS times_ordered,
SUM(oi.quantity) AS total_quantity_sold,
SUM(oi.quantity * oi.unit_price) AS revenue,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS actual_revenue,
AVG(oi.discount_percent) AS avg_discount,
COUNT(DISTINCT oi.order_id) AS unique_orders,
MIN(o.order_date) AS first_sale_date,
MAX(o.order_date) AS last_sale_date
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
LEFT JOIN orders o ON oi.order_id = o.id AND o.status = 'delivered'
WHERE p.is_active = true
GROUP BY p.id, p.name, p.category, p.price, p.cost, p.tags
),
product_metrics AS (
SELECT
*,
(revenue - COALESCE(cost * total_quantity_sold, 0)) AS profit,
COALESCE((revenue - COALESCE(cost * total_quantity_sold, 0)) / NULLIF(revenue, 0) * 100, 0) AS profit_margin,
revenue / NULLIF(NULLIF(times_ordered, 0), 0) AS avg_revenue_per_order,
EXTRACT(DAYS FROM (last_sale_date - first_sale_date)) AS selling_period_days,
total_quantity_sold / NULLIF(EXTRACT(DAYS FROM (last_sale_date - first_sale_date)), 0) AS daily_sales_rate
FROM product_sales
),
category_benchmarks AS (
SELECT
category,
AVG(profit_margin) AS category_avg_margin,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY profit_margin) AS category_p75_margin,
AVG(daily_sales_rate) AS category_avg_daily_rate
FROM product_metrics
WHERE times_ordered > 0
GROUP BY category
)
SELECT
pm.name,
pm.category,
pm.price,
pm.times_ordered,
pm.total_quantity_sold,
pm.profit,
ROUND(pm.profit_margin, 2) AS profit_margin,
pm.daily_sales_rate,
ROUND(cb.category_avg_margin, 2) AS category_avg_margin,
ROUND(pm.profit_margin - cb.category_avg_margin, 2) AS margin_vs_category,
CASE
WHEN pm.profit_margin > cb.category_p75_margin THEN 'Top Performer'
WHEN pm.profit_margin > cb.category_avg_margin THEN 'Above Average'
WHEN pm.profit_margin > 0 THEN 'Below Average'
ELSE 'Loss Making'
END AS performance_tier
FROM product_metrics pm
JOIN category_benchmarks cb ON pm.category = cb.category
WHERE pm.times_ordered > 10
ORDER BY pm.profit DESC;
-- 3. Window Functions and Time Series Analysis
-- ================================================================================
-- Monthly Sales Trends with Moving Averages and Growth Rates
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
COUNT(DISTINCT customer_id) AS unique_customers,
COUNT(*) AS total_orders,
SUM(total_amount) AS total_revenue,
AVG(total_amount) AS avg_order_value
FROM orders
WHERE status = 'delivered'
AND order_date >= CURRENT_DATE - INTERVAL '2 years'
GROUP BY DATE_TRUNC('month', order_date)
),
sales_trends AS (
SELECT
month,
unique_customers,
total_orders,
total_revenue,
avg_order_value,
LAG(total_revenue) OVER (ORDER BY month) AS prev_month_revenue,
LAG(total_orders) OVER (ORDER BY month) AS prev_month_orders,
LAG(unique_customers) OVER (ORDER BY month) AS prev_month_customers,
SUM(total_revenue) OVER (ORDER BY month ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS rolling_12m_revenue,
SUM(total_orders) OVER (ORDER BY month ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS rolling_12m_orders,
AVG(total_revenue) OVER (ORDER BY month ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS moving_avg_5m,
ROW_NUMBER() OVER (ORDER BY total_revenue DESC) AS revenue_rank,
FIRST_VALUE(month) OVER (ORDER BY total_revenue DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS best_month
FROM monthly_sales
)
SELECT
month::DATE AS month_date,
unique_customers,
total_orders,
total_revenue,
avg_order_value,
ROUND((total_revenue - prev_month_revenue) * 100.0 / NULLIF(prev_month_revenue, 0), 2) AS revenue_growth_pct,
ROUND((total_orders - prev_month_orders) * 100.0 / NULLIF(prev_month_orders, 0), 2) AS orders_growth_pct,
ROUND((unique_customers - prev_month_customers) * 100.0 / NULLIF(prev_month_customers, 0), 2) AS customers_growth_pct,
moving_avg_5m,
rolling_12m_revenue,
revenue_rank,
CASE
WHEN month = best_month THEN 'Best Month'
WHEN revenue_growth_pct > 0 AND orders_growth_pct > 0 THEN 'Strong Growth'
WHEN revenue_growth_pct < -10 THEN 'Declining'
ELSE 'Stable'
END AS performance_indicator
FROM sales_trends
ORDER BY month;
-- Customer Segmentation using RFM (Recency, Frequency, Monetary)
WITH customer_rfm AS (
SELECT
c.id,
c.name,
c.email,
c.registration_date,
-- Recency: Days since last purchase
EXTRACT(DAYS FROM (CURRENT_DATE - MAX(o.order_date))) AS recency_days,
-- Frequency: Total number of orders
COUNT(DISTINCT o.id) AS frequency,
-- Monetary: Total amount spent
SUM(o.total_amount) AS monetary_value,
-- Additional metrics
MIN(o.order_date) AS first_purchase_date,
AVG(o.total_amount) AS avg_order_value,
-- Customer tenure in days
EXTRACT(DAYS FROM (CURRENT_DATE - c.registration_date)) AS tenure_days
FROM customers c
JOIN orders o ON c.id = o.customer_id
WHERE o.status = 'delivered'
GROUP BY c.id, c.name, c.email, c.registration_date
),
rfm_scores AS (
SELECT
id,
name,
email,
recency_days,
frequency,
monetary_value,
first_purchase_date,
avg_order_value,
tenure_days,
-- Score each dimension (5 = best, 1 = worst)
NTILE(5) OVER (ORDER BY recency_days ASC) AS recency_score, -- Lower recency_days = higher score
NTILE(5) OVER (ORDER BY frequency DESC) AS frequency_score,
NTILE(5) OVER (ORDER BY monetary_value DESC) AS monetary_score,
-- Combined RFM score
(NTILE(5) OVER (ORDER BY recency_days ASC) +
NTILE(5) OVER (ORDER BY frequency DESC) +
NTILE(5) OVER (ORDER BY monetary_value DESC)) / 3.0 AS rfm_score
FROM customer_rfm
),
rfm_segments AS (
SELECT
*,
CASE
WHEN recency_score >= 4 AND frequency_score >= 4 AND monetary_score >= 4 THEN 'Champions'
WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score >= 3 THEN 'Loyal Customers'
WHEN recency_score >= 4 AND frequency_score <= 2 THEN 'New Customers'
WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score <= 2 THEN 'Potential Loyalists'
WHEN recency_score <= 2 AND frequency_score >= 4 AND monetary_score >= 4 THEN 'At Risk'
WHEN recency_score <= 2 AND frequency_score <= 2 AND monetary_score <= 2 THEN 'Lost'
WHEN recency_score <= 2 AND frequency_score >= 3 THEN 'Cannot Lose Them'
ELSE 'Others'
END AS rfm_segment
FROM rfm_scores
)
SELECT
rfm_segment,
COUNT(*) AS customer_count,
SUM(monetary_value) AS total_revenue,
AVG(monetary_value) AS avg_customer_value,
AVG(frequency) AS avg_frequency,
AVG(recency_days) AS avg_recency_days,
ROUND(AVG(tenure_days), 0) AS avg_tenure_days,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS segment_percentage
FROM rfm_segments
GROUP BY rfm_segment
ORDER BY total_revenue DESC;
-- 4. JSONB and Array Operations
-- ================================================================================
-- Analysis of Product Tags and Shipping Patterns
WITH tag_analysis AS (
SELECT
p.category,
UNNEST(p.tags) as tag,
COUNT(*) as product_count,
AVG(p.price) as avg_price,
SUM(oi.quantity) as total_quantity_sold
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
LEFT JOIN orders o ON oi.order_id = o.id AND o.status = 'delivered'
WHERE array_length(p.tags, 1) > 0
GROUP BY p.category, UNNEST(p.tags)
),
shipping_analysis AS (
SELECT
(o.shipping_address->>'country') as country,
(o.shipping_address->>'city') as city,
(o.shipping_address->>'postal_code') as postal_code,
COUNT(*) as order_count,
SUM(o.total_amount) as total_revenue,
AVG(o.total_amount) as avg_order_value
FROM orders o
WHERE o.status = 'delivered'
AND o.shipping_address IS NOT NULL
AND o.shipping_address ? 'country'
GROUP BY
o.shipping_address->>'country',
o.shipping_address->>'city',
o.shipping_address->>'postal_code'
)
-- Combine tag and shipping analysis
SELECT
'Product Tags' as analysis_type,
category,
tag as dimension,
product_count,
ROUND(avg_price, 2) as avg_price,
total_quantity_sold
FROM tag_analysis
UNION ALL
SELECT
'Shipping Analysis' as analysis_type,
country as category,
city as dimension,
order_count as product_count,
ROUND(avg_order_value, 2) as avg_price,
total_revenue as total_quantity_sold
FROM shipping_analysis
ORDER BY analysis_type, product_count DESC;
-- 5. Complex Subqueries and CTEs
-- ================================================================================
-- Market Basket Analysis: Products Frequently Bought Together
WITH customer_product_pairs AS (
SELECT
o1.customer_id,
o1.product_id as product_a,
o2.product_id as product_b,
COUNT(*) as purchase_count
FROM order_items o1
JOIN order_items o2 ON o1.order_id = o2.order_id AND o1.product_id < o2.product_id
JOIN orders o ON o1.order_id = o.id
WHERE o.status = 'delivered'
GROUP BY o1.customer_id, o1.product_id, o2.product_id
),
product_frequencies AS (
SELECT
product_id,
COUNT(DISTINCT customer_id) as customer_count
FROM order_items oi
JOIN orders o ON oi.order_id = o.id
WHERE o.status = 'delivered'
GROUP BY product_id
),
basket_analysis AS (
SELECT
cpp.product_a,
cpp.product_b,
cpp.purchase_count,
pf_a.customer_count as product_a_customers,
pf_b.customer_count as product_b_customers,
-- Calculate lift: observed co-occurrence / expected co-occurrence
(cpp.purchase_count::FLOAT / NULLIF(pf_a.customer_count, 0) / NULLIF(pf_b.customer_count, 0) *
(SELECT COUNT(DISTINCT customer_id) FROM orders WHERE status = 'delivered')) as lift
FROM customer_product_pairs cpp
JOIN product_frequencies pf_a ON cpp.product_a = pf_a.product_id
JOIN product_frequencies pf_b ON cpp.product_b = pf_b.product_id
WHERE cpp.purchase_count >= 5 -- Minimum co-occurrences
)
SELECT
pa.name as product_a_name,
pb.name as product_b_name,
ba.purchase_count as times_purchased_together,
ba.lift,
ROUND(ba.lift, 2) as lift_rounded,
pa.category as product_a_category,
pb.category as product_b_category,
CASE
WHEN ba.lift > 2.0 THEN 'Strong Association'
WHEN ba.lift > 1.5 THEN 'Moderate Association'
WHEN ba.lift > 1.0 THEN 'Weak Association'
ELSE 'No Association'
END as association_strength
FROM basket_analysis ba
JOIN products pa ON ba.product_a = pa.id
JOIN products pb ON ba.product_b = pb.id
ORDER BY ba.purchase_count DESC;
-- 6. Advanced Aggregation and Statistical Analysis
-- ================================================================================
-- Customer Satisfaction and Purchase Correlation
WITH customer_satisfaction AS (
SELECT
c.id,
c.name,
c.segment,
AVG(pr.rating) as avg_rating,
COUNT(pr.id) as review_count,
STDDEV(pr.rating) as rating_stddev,
SUM(pr.helpful_count) as total_helpful_votes
FROM customers c
LEFT JOIN product_reviews pr ON c.id = pr.customer_id
GROUP BY c.id, c.name, c.segment
),
customer_behavior AS (
SELECT
c.id,
COUNT(o.id) as total_orders,
SUM(CASE WHEN o.status = 'delivered' THEN o.total_amount ELSE 0 END) as total_spent,
AVG(CASE WHEN o.status = 'delivered' THEN o.total_amount END) as avg_order_value,
MIN(o.order_date) as first_order_date,
MAX(o.order_date) as last_order_date,
EXTRACT(DAYS FROM (MAX(o.order_date) - MIN(o.order_date))) as purchase_span_days
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id
),
correlation_analysis AS (
SELECT
cs.id,
cs.name,
cs.segment,
cs.avg_rating,
cs.review_count,
cb.total_orders,
cb.total_spent,
cb.avg_order_value,
cb.purchase_span_days,
-- Calculate correlation-like metrics
CASE
WHEN cs.avg_rating >= 4 AND cb.avg_order_value > (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'High Value, High Satisfaction'
WHEN cs.avg_rating >= 4 AND cb.avg_order_value <= (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'Low Value, High Satisfaction'
WHEN cs.avg_rating < 3 AND cb.avg_order_value > (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'High Value, Low Satisfaction'
WHEN cs.avg_rating < 3 AND cb.avg_order_value <= (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'Low Value, Low Satisfaction'
ELSE 'Mixed'
END AS satisfaction_value_segment
FROM customer_satisfaction cs
JOIN customer_behavior cb ON cs.id = cb.id
WHERE cs.review_count > 0 AND cb.total_orders > 0
)
SELECT
segment,
satisfaction_value_segment,
COUNT(*) as customer_count,
AVG(avg_rating) as avg_rating,
AVG(total_spent) as avg_total_spent,
AVG(avg_order_value) as avg_order_value,
AVG(review_count) as avg_review_count,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as segment_percentage
FROM correlation_analysis
GROUP BY segment, satisfaction_value_segment
ORDER BY customer_count DESC;