🎯 Рекомендуемые коллекции
Балансированные коллекции примеров кода из различных категорий, которые вы можете исследовать
Продвинутые примеры PostgreSQL
Продвинутые примеры базы данных PostgreSQL, включая стратегии оптимизации, сложные запросы, индексацию и настройку производительности
💻 Оптимизация производительности PostgreSQL sql
🔴 complex
⭐⭐⭐⭐⭐
Продвинутые техники оптимизации PostgreSQL, включая индексацию, оптимизацию запросов и настройку конфигурации
⏱️ 60 min
🏷️ postgresql, database, sql, optimization, advanced
Prerequisites:
PostgreSQL basics, SQL, Database administration, Performance tuning
-- PostgreSQL Performance Optimization Guide
-- Comprehensive examples for database performance tuning
-- 1. PostgreSQL Configuration (postgresql.conf)
-- ================================================================================
# Memory Settings (shared_buffers, work_mem, maintenance_work_mem)
shared_buffers = 256MB # 25% of RAM for shared memory
effective_cache_size = 1GB # Estimated OS and disk cache
work_mem = 4MB # Memory for sort operations
maintenance_work_mem = 64MB # Memory for maintenance operations
effective_io_concurrency = 200 # For SSD storage
# Connection Settings
max_connections = 100 # Maximum concurrent connections
superuser_reserved_connections = 3 # Reserved for superusers
# WAL (Write-Ahead Logging) Settings
wal_buffers = 16MB # WAL buffer size
checkpoint_completion_target = 0.9 # Target for checkpoint completion
wal_writer_delay = 200ms # Delay between WAL writes
# Autovacuum Settings
autovacuum = on # Enable autovacuum
autovacuum_max_workers = 3 # Maximum autovacuum workers
autovacuum_naptime = 1min # Time between autovacuum runs
# Random Page Cost (affects query planner)
random_page_cost = 1.1 # For SSD storage
seq_page_cost = 1.0 # Sequential page cost
# Statistics Target
default_statistics_target = 100 # Statistics precision
-- 2. Advanced Indexing Strategies
-- ================================================================================
-- B-Tree Index (default, good for equality and range queries)
CREATE INDEX CONCURRENTLY idx_users_email ON users(email);
CREATE INDEX CONCURRENTLY idx_orders_created_at ON orders(created_at DESC);
-- Composite Indexes (optimize multi-column queries)
CREATE INDEX CONCURRENTLY idx_orders_user_status_created
ON orders(user_id, status, created_at DESC);
-- Partial Indexes (index only specific rows)
CREATE INDEX CONCURRENTLY idx_active_users
ON users(created_at) WHERE status = 'active';
CREATE INDEX CONCURRENTLY idx_high_value_orders
ON orders(customer_id, total_amount) WHERE total_amount > 1000;
-- Unique Indexes
CREATE UNIQUE INDEX CONCURRENTLY idx_users_username_unique
ON users(username) WHERE username IS NOT NULL;
-- Expression Indexes (index computed values)
CREATE INDEX CONCURRENTLY idx_users_lower_email
ON users(LOWER(email));
CREATE INDEX CONCURRENTLY idx_orders_date_year
ON orders(EXTRACT(YEAR FROM created_at));
-- GIN Index (for array and full-text search)
CREATE INDEX CONCURRENTLY idx_products_tags
ON products USING GIN(tags);
CREATE INDEX CONCURRENTLY idx_documents_content
ON documents USING GIN(to_tsvector('english', content));
-- Hash Index (for equality comparisons only)
CREATE INDEX CONCURRENTLY idx_session_hash
ON sessions USING HASH(session_token);
-- BRIN Index (Block Range Index, good for time-series data)
CREATE INDEX CONCURRENTLY idx_metrics_timestamp
ON metrics USING BRIN(timestamp);
-- 3. Query Optimization Examples
-- ================================================================================
-- EXPLAIN ANALYZE for query performance analysis
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT u.name, COUNT(o.id) as order_count, SUM(o.total_amount) as total_spent
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2023-01-01'
AND o.status = 'completed'
GROUP BY u.id, u.name
HAVING COUNT(o.id) > 10
ORDER BY total_spent DESC
LIMIT 10;
-- Materialized Views for complex aggregations
CREATE MATERIALIZED VIEW mv_user_order_stats AS
SELECT
u.id as user_id,
u.name as user_name,
u.email,
COUNT(o.id) as total_orders,
SUM(CASE WHEN o.status = 'completed' THEN 1 ELSE 0 END) as completed_orders,
SUM(o.total_amount) as total_spent,
AVG(o.total_amount) as avg_order_value,
MAX(o.created_at) as last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email
WITH DATA;
-- Create unique index on materialized view
CREATE UNIQUE INDEX idx_mv_user_order_stats_user_id
ON mv_user_order_stats(user_id);
-- Refresh materialized view
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_user_order_stats;
-- Common Table Expressions (CTEs) for complex queries
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', created_at) as month,
SUM(total_amount) as monthly_total,
COUNT(*) as order_count
FROM orders
WHERE created_at >= '2023-01-01'
GROUP BY DATE_TRUNC('month', created_at)
),
sales_growth AS (
SELECT
month,
monthly_total,
order_count,
LAG(monthly_total) OVER (ORDER BY month) as previous_month_total,
LAG(order_count) OVER (ORDER BY month) as previous_month_count
FROM monthly_sales
)
SELECT
month,
monthly_total,
order_count,
previous_month_total,
previous_month_count,
CASE
WHEN previous_month_total > 0
THEN ROUND((monthly_total - previous_month_total) * 100.0 / previous_month_total, 2)
ELSE NULL
END as growth_percentage
FROM sales_growth;
-- Window Functions for efficient analytics
SELECT
id,
user_id,
total_amount,
created_at,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY total_amount DESC) as order_rank,
SUM(total_amount) OVER (PARTITION BY user_id ORDER BY created_at
ROWS UNBOUNDED PRECEDING) as running_total,
AVG(total_amount) OVER (PARTITION BY user_id ORDER BY created_at
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) as moving_avg
FROM orders
WHERE user_id = 123;
-- 4. Partitioning for Large Tables
-- ================================================================================
-- Range Partitioning (time-series data)
CREATE TABLE orders_partitioned (
id BIGSERIAL,
user_id INTEGER NOT NULL,
status VARCHAR(20) NOT NULL,
total_amount DECIMAL(10,2) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY RANGE (created_at);
-- Create partitions
CREATE TABLE orders_2023_q1 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-01-01') TO ('2023-04-01');
CREATE TABLE orders_2023_q2 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-04-01') TO ('2023-07-01');
CREATE TABLE orders_2023_q3 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-07-01') TO ('2023-10-01');
CREATE TABLE orders_2023_q4 PARTITION OF orders_partitioned
FOR VALUES FROM ('2023-10-01') TO ('2024-01-01');
-- List Partitioning (categorical data)
CREATE TABLE users_partitioned (
id BIGSERIAL,
username VARCHAR(50) NOT NULL,
email VARCHAR(255) NOT NULL,
region VARCHAR(50) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY LIST (region);
-- Create partitions by region
CREATE TABLE users_north_america PARTITION OF users_partitioned
FOR VALUES IN ('USA', 'Canada', 'Mexico');
CREATE TABLE users_europe PARTITION OF users_partitioned
FOR VALUES IN ('UK', 'Germany', 'France', 'Spain', 'Italy');
CREATE TABLE users_asia PARTITION OF users_partitioned
FOR VALUES IN ('China', 'Japan', 'India', 'South Korea');
CREATE TABLE users_other PARTITION OF users_partitioned
DEFAULT;
-- 5. Advanced Function and Trigger Examples
-- ================================================================================
-- Search function using full-text search
CREATE OR REPLACE FUNCTION search_products(query_text TEXT)
RETURNS TABLE(
id INTEGER,
name TEXT,
description TEXT,
rank REAL
) AS $$
BEGIN
RETURN QUERY
SELECT
p.id,
p.name,
p.description,
ts_rank(search_vector, plainto_tsquery('english', query_text)) as rank
FROM products p
WHERE search_vector @@ plainto_tsquery('english', query_text)
ORDER BY rank DESC;
END;
$$ LANGUAGE plpgsql;
-- Audit trigger for tracking changes
CREATE OR REPLACE FUNCTION audit_trigger_function()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'DELETE' THEN
INSERT INTO audit_table (
table_name, operation, old_data, changed_by, changed_at
) VALUES (
TG_TABLE_NAME, TG_OP, row_to_hash(OLD), current_user, NOW()
);
RETURN OLD;
ELSIF TG_OP = 'UPDATE' THEN
INSERT INTO audit_table (
table_name, operation, old_data, new_data, changed_by, changed_at
) VALUES (
TG_TABLE_NAME, TG_OP, row_to_hash(OLD), row_to_hash(NEW), current_user, NOW()
);
RETURN NEW;
ELSIF TG_OP = 'INSERT' THEN
INSERT INTO audit_table (
table_name, operation, new_data, changed_by, changed_at
) VALUES (
TG_TABLE_NAME, TG_OP, row_to_hash(NEW), current_user, NOW()
);
RETURN NEW;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Apply audit trigger
CREATE TRIGGER audit_users_trigger
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW EXECUTE FUNCTION audit_trigger_function();
-- 6. Performance Monitoring and Diagnostics
-- ================================================================================
-- Enable pg_stat_statements extension
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
-- Query performance statistics
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
rows,
100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 10;
-- Index usage statistics
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
-- Table size and row count statistics
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) -
pg_relation_size(schemaname||'.'||tablename)) AS index_size,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
-- Long-running queries
SELECT
pid,
now() - pg_stat_activity.query_start AS duration,
query,
state,
wait_event_type,
wait_event
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes'
AND state = 'active'
ORDER BY duration DESC;
-- Blocking queries
SELECT
blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
blocking_locks.pid AS blocking_pid,
blocking_activity.usename AS blocking_user,
blocked_activity.query AS blocked_statement,
blocking_activity.query AS current_statement_in_blocking_process
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks
ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.GRANTED;
-- 7. Connection Pooling Configuration (pgbouncer.ini)
-- ================================================================================
[pgbouncer]
# Connection settings
listen_port = 6432
listen_addr = 127.0.0.1
# Database connections
pool_mode = transaction
max_client_conn = 100
default_pool_size = 20
min_pool_size = 5
reserve_pool_size = 5
# Timeouts
server_reset_query = DISCARD ALL
server_check_delay = 30
server_check_query = select 1
server_lifetime = 3600
server_idle_timeout = 600
# Logging
log_connections = 1
log_disconnections = 1
log_pooler_errors = 1
# 8. Backup and Recovery Scripts
-- ================================================================================
-- Backup script (bash)
#!/bin/bash
# postgresql_backup.sh
DB_NAME="your_database"
DB_USER="postgres"
BACKUP_DIR="/var/backups/postgresql"
DATE=$(date +%Y%m%d_%H%M%S)
# Create backup directory
mkdir -p $BACKUP_DIR
# Custom format backup with compression
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
--format=custom \
--compress=9 \
--verbose \
--file=$BACKUP_DIR/${DB_NAME}_${DATE}.backup
# Schema only backup
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
--schema-only \
--file=$BACKUP_DIR/${DB_NAME}_schema_${DATE}.sql
# Data only backup
pg_dump -h localhost -U $DB_USER -d $DB_NAME \
--data-only \
--compress=9 \
--file=$BACKUP_DIR/${DB_NAME}_data_${DATE}.sql.gz
# Remove old backups (keep 30 days)
find $BACKUP_DIR -name "*.backup" -mtime +30 -delete
find $BACKUP_DIR -name "*.sql" -mtime +30 -delete
find $BACKUP_DIR -name "*.sql.gz" -mtime +30 -delete
echo "Backup completed: $BACKUP_DIR/${DB_NAME}_${DATE}.backup"
-- Point-in-Time Recovery
-- Step 1: Stop PostgreSQL
sudo systemctl stop postgresql
-- Step 2: Create recovery configuration
echo "restore_command = 'cp /var/lib/postgresql/wal_archive/%f %p'" >> /var/lib/postgresql/12/main/recovery.conf
echo "recovery_target_time = '2023-12-01 10:00:00'" >> /var/lib/postgresql/12/main/recovery.conf
-- Step 3: Restore from backup
pg_restore -h localhost -U postgres -d your_database /path/to/backup.backup
-- Step 4: Start PostgreSQL
sudo systemctl start postgresql
-- 9. Security Configuration
-- ================================================================================
-- Row Level Security (RLS)
ALTER TABLE orders ENABLE ROW LEVEL SECURITY;
-- Policy for users to see only their own orders
CREATE POLICY user_orders_policy ON orders
FOR ALL
TO app_user
USING (user_id = current_setting('app.current_user_id')::INTEGER);
-- Policy for managers to see all orders in their department
CREATE POLICY manager_orders_policy ON orders
FOR SELECT
TO manager_role
USING (department_id = current_setting('app.current_department_id')::INTEGER);
-- Set user context
SET app.current_user_id = '123';
-- Encrypted columns using pgcrypto extension
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- Function to encrypt data
CREATE OR REPLACE FUNCTION encrypt_sensitive_data(data TEXT)
RETURNS TEXT AS $$
BEGIN
RETURN encode(encrypt(data::bytea, 'encryption_key', 'aes'), 'base64');
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
-- Function to decrypt data
CREATE OR REPLACE FUNCTION decrypt_sensitive_data(encrypted_data TEXT)
RETURNS TEXT AS $$
BEGIN
RETURN convert_from(decrypt(decode(encrypted_data, 'base64'), 'encryption_key', 'aes'), 'UTF8');
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
-- 10. Monitoring Dashboard Query
-- ================================================================================
-- Comprehensive monitoring dashboard query
WITH
database_stats AS (
SELECT
pg_size_pretty(pg_database_size(current_database())) AS database_size,
(SELECT count(*) FROM pg_stat_activity WHERE state = 'active') AS active_connections,
(SELECT count(*) FROM pg_stat_activity WHERE state = 'idle') AS idle_connections,
(SELECT count(*) FROM pg_locks WHERE NOT granted) AS waiting_queries
),
table_stats AS (
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum,
extract(epoch from (now() - greatest(last_vacuum, last_autovacuum)))::integer as days_since_vacuum
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 10
),
index_stats AS (
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE idx_scan > 0
ORDER BY idx_scan DESC
LIMIT 10
),
slow_queries AS (
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
rows
FROM pg_stat_statements
WHERE mean_exec_time > 1000 -- queries taking more than 1 second
ORDER BY total_exec_time DESC
LIMIT 5
)
SELECT 'Database Stats' as metric_type, '' as details, '' as value, '' as additional_info
UNION ALL
SELECT
'Database Stats' as metric_type,
'Size' as details,
database_size as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Database Stats' as metric_type,
'Active Connections' as details,
active_connections::text as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Database Stats' as metric_type,
'Idle Connections' as details,
idle_connections::text as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Database Stats' as metric_type,
'Waiting Queries' as details,
waiting_queries::text as value,
'' as additional_info
FROM database_stats
UNION ALL
SELECT
'Top Tables' as metric_type,
tablename as details,
total_size as value,
n_live_tup::text || ' live rows, ' || days_since_vacuum::text || ' days since vacuum' as additional_info
FROM table_stats
UNION ALL
SELECT
'Top Indexes' as metric_type,
indexname as details,
idx_scan::text || ' scans' as value,
index_size as additional_info
FROM index_stats
UNION ALL
SELECT
'Slow Queries' as metric_type,
left(query, 50) || '...' as details,
mean_exec_time::text || ' ms' as value,
calls::text || ' calls, ' || total_exec_time::text || ' ms total' as additional_info
FROM slow_queries;
💻 Сложные запросы PostgreSQL sql
🔴 complex
⭐⭐⭐⭐
Продвинутые SQL-запросы, демонстрирующие соединения, подзапросы, оконные функции и аналитические техники
⏱️ 50 min
🏷️ postgresql, sql, complex, analytics, queries
Prerequisites:
Advanced SQL, PostgreSQL, Data Analysis, Window Functions
-- Complex PostgreSQL Queries and Analytics
-- Advanced SQL patterns for data analysis and reporting
-- 1. Setup Sample Tables for Demonstrations
-- ================================================================================
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
registration_date DATE NOT NULL DEFAULT CURRENT_DATE,
country VARCHAR(50) NOT NULL,
city VARCHAR(100),
segment VARCHAR(20) DEFAULT 'regular' CHECK (segment IN ('regular', 'premium', 'vip')),
last_purchase_date DATE,
total_purchases DECIMAL(12,2) DEFAULT 0
);
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL,
category VARCHAR(50) NOT NULL,
price DECIMAL(10,2) NOT NULL,
cost DECIMAL(10,2) NOT NULL,
stock_quantity INTEGER NOT NULL DEFAULT 0,
created_date DATE NOT NULL DEFAULT CURRENT_DATE,
is_active BOOLEAN DEFAULT true,
tags TEXT[] DEFAULT '{}'
);
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
customer_id INTEGER REFERENCES customers(id),
order_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
status VARCHAR(20) NOT NULL CHECK (status IN ('pending', 'confirmed', 'shipped', 'delivered', 'cancelled')),
total_amount DECIMAL(12,2) NOT NULL,
shipping_address JSONB,
payment_method VARCHAR(50),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER NOT NULL CHECK (quantity > 0),
unit_price DECIMAL(10,2) NOT NULL,
discount_percent DECIMAL(5,2) DEFAULT 0 CHECK (discount_percent >= 0 AND discount_percent <= 100),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE TABLE product_reviews (
id SERIAL PRIMARY KEY,
customer_id INTEGER REFERENCES customers(id),
product_id INTEGER REFERENCES products(id),
rating INTEGER CHECK (rating >= 1 AND rating <= 5),
review_text TEXT,
review_date DATE NOT NULL DEFAULT CURRENT_DATE,
is_verified BOOLEAN DEFAULT false,
helpful_count INTEGER DEFAULT 0
);
-- 2. Complex JOIN Queries
-- ================================================================================
-- Customer Lifetime Value (CLV) Analysis
WITH customer_orders AS (
SELECT
c.id,
c.name,
c.email,
c.registration_date,
c.country,
c.segment,
COUNT(o.id) AS total_orders,
SUM(CASE WHEN o.status = 'delivered' THEN 1 ELSE 0 END) AS completed_orders,
SUM(CASE WHEN o.status = 'delivered' THEN o.total_amount ELSE 0 END) AS total_spent,
AVG(CASE WHEN o.status = 'delivered' THEN o.total_amount END) AS avg_order_value,
MIN(o.order_date) AS first_order_date,
MAX(o.order_date) AS last_order_date,
EXTRACT(DAYS FROM (MAX(o.order_date) - MIN(o.order_date))) AS customer_lifetime_days
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name, c.email, c.registration_date, c.country, c.segment
),
customer_metrics AS (
SELECT
*,
total_spent / NULLIF(total_orders, 0) AS avg_order_value_calculated,
CASE
WHEN last_order_date >= CURRENT_DATE - INTERVAL '30 days' THEN 'Active'
WHEN last_order_date >= CURRENT_DATE - INTERVAL '90 days' THEN 'At Risk'
WHEN last_order_date < CURRENT_DATE - INTERVAL '180 days' THEN 'Churned'
ELSE 'Dormant'
END AS customer_status,
total_spent / NULLIF(EXTRACT(DAYS FROM (CURRENT_DATE - registration_date)), 0) * 365 AS annual_spend_rate
FROM customer_orders
)
SELECT
customer_status,
country,
segment,
COUNT(*) AS customer_count,
SUM(total_spent) AS total_revenue,
AVG(avg_order_value_calculated) AS avg_order_value,
AVG(annual_spend_rate) AS avg_annual_spend_rate,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_spent) AS median_clv,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_spent) AS p95_clv
FROM customer_metrics
WHERE total_orders > 0
GROUP BY customer_status, country, segment
ORDER BY total_revenue DESC;
-- Product Performance Analysis with Category Comparison
WITH product_sales AS (
SELECT
p.id,
p.name,
p.category,
p.price,
p.cost,
p.tags,
COUNT(oi.id) AS times_ordered,
SUM(oi.quantity) AS total_quantity_sold,
SUM(oi.quantity * oi.unit_price) AS revenue,
SUM(oi.quantity * oi.unit_price * (1 - oi.discount_percent/100)) AS actual_revenue,
AVG(oi.discount_percent) AS avg_discount,
COUNT(DISTINCT oi.order_id) AS unique_orders,
MIN(o.order_date) AS first_sale_date,
MAX(o.order_date) AS last_sale_date
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
LEFT JOIN orders o ON oi.order_id = o.id AND o.status = 'delivered'
WHERE p.is_active = true
GROUP BY p.id, p.name, p.category, p.price, p.cost, p.tags
),
product_metrics AS (
SELECT
*,
(revenue - COALESCE(cost * total_quantity_sold, 0)) AS profit,
COALESCE((revenue - COALESCE(cost * total_quantity_sold, 0)) / NULLIF(revenue, 0) * 100, 0) AS profit_margin,
revenue / NULLIF(NULLIF(times_ordered, 0), 0) AS avg_revenue_per_order,
EXTRACT(DAYS FROM (last_sale_date - first_sale_date)) AS selling_period_days,
total_quantity_sold / NULLIF(EXTRACT(DAYS FROM (last_sale_date - first_sale_date)), 0) AS daily_sales_rate
FROM product_sales
),
category_benchmarks AS (
SELECT
category,
AVG(profit_margin) AS category_avg_margin,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY profit_margin) AS category_p75_margin,
AVG(daily_sales_rate) AS category_avg_daily_rate
FROM product_metrics
WHERE times_ordered > 0
GROUP BY category
)
SELECT
pm.name,
pm.category,
pm.price,
pm.times_ordered,
pm.total_quantity_sold,
pm.profit,
ROUND(pm.profit_margin, 2) AS profit_margin,
pm.daily_sales_rate,
ROUND(cb.category_avg_margin, 2) AS category_avg_margin,
ROUND(pm.profit_margin - cb.category_avg_margin, 2) AS margin_vs_category,
CASE
WHEN pm.profit_margin > cb.category_p75_margin THEN 'Top Performer'
WHEN pm.profit_margin > cb.category_avg_margin THEN 'Above Average'
WHEN pm.profit_margin > 0 THEN 'Below Average'
ELSE 'Loss Making'
END AS performance_tier
FROM product_metrics pm
JOIN category_benchmarks cb ON pm.category = cb.category
WHERE pm.times_ordered > 10
ORDER BY pm.profit DESC;
-- 3. Window Functions and Time Series Analysis
-- ================================================================================
-- Monthly Sales Trends with Moving Averages and Growth Rates
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
COUNT(DISTINCT customer_id) AS unique_customers,
COUNT(*) AS total_orders,
SUM(total_amount) AS total_revenue,
AVG(total_amount) AS avg_order_value
FROM orders
WHERE status = 'delivered'
AND order_date >= CURRENT_DATE - INTERVAL '2 years'
GROUP BY DATE_TRUNC('month', order_date)
),
sales_trends AS (
SELECT
month,
unique_customers,
total_orders,
total_revenue,
avg_order_value,
LAG(total_revenue) OVER (ORDER BY month) AS prev_month_revenue,
LAG(total_orders) OVER (ORDER BY month) AS prev_month_orders,
LAG(unique_customers) OVER (ORDER BY month) AS prev_month_customers,
SUM(total_revenue) OVER (ORDER BY month ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS rolling_12m_revenue,
SUM(total_orders) OVER (ORDER BY month ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS rolling_12m_orders,
AVG(total_revenue) OVER (ORDER BY month ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS moving_avg_5m,
ROW_NUMBER() OVER (ORDER BY total_revenue DESC) AS revenue_rank,
FIRST_VALUE(month) OVER (ORDER BY total_revenue DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS best_month
FROM monthly_sales
)
SELECT
month::DATE AS month_date,
unique_customers,
total_orders,
total_revenue,
avg_order_value,
ROUND((total_revenue - prev_month_revenue) * 100.0 / NULLIF(prev_month_revenue, 0), 2) AS revenue_growth_pct,
ROUND((total_orders - prev_month_orders) * 100.0 / NULLIF(prev_month_orders, 0), 2) AS orders_growth_pct,
ROUND((unique_customers - prev_month_customers) * 100.0 / NULLIF(prev_month_customers, 0), 2) AS customers_growth_pct,
moving_avg_5m,
rolling_12m_revenue,
revenue_rank,
CASE
WHEN month = best_month THEN 'Best Month'
WHEN revenue_growth_pct > 0 AND orders_growth_pct > 0 THEN 'Strong Growth'
WHEN revenue_growth_pct < -10 THEN 'Declining'
ELSE 'Stable'
END AS performance_indicator
FROM sales_trends
ORDER BY month;
-- Customer Segmentation using RFM (Recency, Frequency, Monetary)
WITH customer_rfm AS (
SELECT
c.id,
c.name,
c.email,
c.registration_date,
-- Recency: Days since last purchase
EXTRACT(DAYS FROM (CURRENT_DATE - MAX(o.order_date))) AS recency_days,
-- Frequency: Total number of orders
COUNT(DISTINCT o.id) AS frequency,
-- Monetary: Total amount spent
SUM(o.total_amount) AS monetary_value,
-- Additional metrics
MIN(o.order_date) AS first_purchase_date,
AVG(o.total_amount) AS avg_order_value,
-- Customer tenure in days
EXTRACT(DAYS FROM (CURRENT_DATE - c.registration_date)) AS tenure_days
FROM customers c
JOIN orders o ON c.id = o.customer_id
WHERE o.status = 'delivered'
GROUP BY c.id, c.name, c.email, c.registration_date
),
rfm_scores AS (
SELECT
id,
name,
email,
recency_days,
frequency,
monetary_value,
first_purchase_date,
avg_order_value,
tenure_days,
-- Score each dimension (5 = best, 1 = worst)
NTILE(5) OVER (ORDER BY recency_days ASC) AS recency_score, -- Lower recency_days = higher score
NTILE(5) OVER (ORDER BY frequency DESC) AS frequency_score,
NTILE(5) OVER (ORDER BY monetary_value DESC) AS monetary_score,
-- Combined RFM score
(NTILE(5) OVER (ORDER BY recency_days ASC) +
NTILE(5) OVER (ORDER BY frequency DESC) +
NTILE(5) OVER (ORDER BY monetary_value DESC)) / 3.0 AS rfm_score
FROM customer_rfm
),
rfm_segments AS (
SELECT
*,
CASE
WHEN recency_score >= 4 AND frequency_score >= 4 AND monetary_score >= 4 THEN 'Champions'
WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score >= 3 THEN 'Loyal Customers'
WHEN recency_score >= 4 AND frequency_score <= 2 THEN 'New Customers'
WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score <= 2 THEN 'Potential Loyalists'
WHEN recency_score <= 2 AND frequency_score >= 4 AND monetary_score >= 4 THEN 'At Risk'
WHEN recency_score <= 2 AND frequency_score <= 2 AND monetary_score <= 2 THEN 'Lost'
WHEN recency_score <= 2 AND frequency_score >= 3 THEN 'Cannot Lose Them'
ELSE 'Others'
END AS rfm_segment
FROM rfm_scores
)
SELECT
rfm_segment,
COUNT(*) AS customer_count,
SUM(monetary_value) AS total_revenue,
AVG(monetary_value) AS avg_customer_value,
AVG(frequency) AS avg_frequency,
AVG(recency_days) AS avg_recency_days,
ROUND(AVG(tenure_days), 0) AS avg_tenure_days,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS segment_percentage
FROM rfm_segments
GROUP BY rfm_segment
ORDER BY total_revenue DESC;
-- 4. JSONB and Array Operations
-- ================================================================================
-- Analysis of Product Tags and Shipping Patterns
WITH tag_analysis AS (
SELECT
p.category,
UNNEST(p.tags) as tag,
COUNT(*) as product_count,
AVG(p.price) as avg_price,
SUM(oi.quantity) as total_quantity_sold
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
LEFT JOIN orders o ON oi.order_id = o.id AND o.status = 'delivered'
WHERE array_length(p.tags, 1) > 0
GROUP BY p.category, UNNEST(p.tags)
),
shipping_analysis AS (
SELECT
(o.shipping_address->>'country') as country,
(o.shipping_address->>'city') as city,
(o.shipping_address->>'postal_code') as postal_code,
COUNT(*) as order_count,
SUM(o.total_amount) as total_revenue,
AVG(o.total_amount) as avg_order_value
FROM orders o
WHERE o.status = 'delivered'
AND o.shipping_address IS NOT NULL
AND o.shipping_address ? 'country'
GROUP BY
o.shipping_address->>'country',
o.shipping_address->>'city',
o.shipping_address->>'postal_code'
)
-- Combine tag and shipping analysis
SELECT
'Product Tags' as analysis_type,
category,
tag as dimension,
product_count,
ROUND(avg_price, 2) as avg_price,
total_quantity_sold
FROM tag_analysis
UNION ALL
SELECT
'Shipping Analysis' as analysis_type,
country as category,
city as dimension,
order_count as product_count,
ROUND(avg_order_value, 2) as avg_price,
total_revenue as total_quantity_sold
FROM shipping_analysis
ORDER BY analysis_type, product_count DESC;
-- 5. Complex Subqueries and CTEs
-- ================================================================================
-- Market Basket Analysis: Products Frequently Bought Together
WITH customer_product_pairs AS (
SELECT
o1.customer_id,
o1.product_id as product_a,
o2.product_id as product_b,
COUNT(*) as purchase_count
FROM order_items o1
JOIN order_items o2 ON o1.order_id = o2.order_id AND o1.product_id < o2.product_id
JOIN orders o ON o1.order_id = o.id
WHERE o.status = 'delivered'
GROUP BY o1.customer_id, o1.product_id, o2.product_id
),
product_frequencies AS (
SELECT
product_id,
COUNT(DISTINCT customer_id) as customer_count
FROM order_items oi
JOIN orders o ON oi.order_id = o.id
WHERE o.status = 'delivered'
GROUP BY product_id
),
basket_analysis AS (
SELECT
cpp.product_a,
cpp.product_b,
cpp.purchase_count,
pf_a.customer_count as product_a_customers,
pf_b.customer_count as product_b_customers,
-- Calculate lift: observed co-occurrence / expected co-occurrence
(cpp.purchase_count::FLOAT / NULLIF(pf_a.customer_count, 0) / NULLIF(pf_b.customer_count, 0) *
(SELECT COUNT(DISTINCT customer_id) FROM orders WHERE status = 'delivered')) as lift
FROM customer_product_pairs cpp
JOIN product_frequencies pf_a ON cpp.product_a = pf_a.product_id
JOIN product_frequencies pf_b ON cpp.product_b = pf_b.product_id
WHERE cpp.purchase_count >= 5 -- Minimum co-occurrences
)
SELECT
pa.name as product_a_name,
pb.name as product_b_name,
ba.purchase_count as times_purchased_together,
ba.lift,
ROUND(ba.lift, 2) as lift_rounded,
pa.category as product_a_category,
pb.category as product_b_category,
CASE
WHEN ba.lift > 2.0 THEN 'Strong Association'
WHEN ba.lift > 1.5 THEN 'Moderate Association'
WHEN ba.lift > 1.0 THEN 'Weak Association'
ELSE 'No Association'
END as association_strength
FROM basket_analysis ba
JOIN products pa ON ba.product_a = pa.id
JOIN products pb ON ba.product_b = pb.id
ORDER BY ba.purchase_count DESC;
-- 6. Advanced Aggregation and Statistical Analysis
-- ================================================================================
-- Customer Satisfaction and Purchase Correlation
WITH customer_satisfaction AS (
SELECT
c.id,
c.name,
c.segment,
AVG(pr.rating) as avg_rating,
COUNT(pr.id) as review_count,
STDDEV(pr.rating) as rating_stddev,
SUM(pr.helpful_count) as total_helpful_votes
FROM customers c
LEFT JOIN product_reviews pr ON c.id = pr.customer_id
GROUP BY c.id, c.name, c.segment
),
customer_behavior AS (
SELECT
c.id,
COUNT(o.id) as total_orders,
SUM(CASE WHEN o.status = 'delivered' THEN o.total_amount ELSE 0 END) as total_spent,
AVG(CASE WHEN o.status = 'delivered' THEN o.total_amount END) as avg_order_value,
MIN(o.order_date) as first_order_date,
MAX(o.order_date) as last_order_date,
EXTRACT(DAYS FROM (MAX(o.order_date) - MIN(o.order_date))) as purchase_span_days
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id
),
correlation_analysis AS (
SELECT
cs.id,
cs.name,
cs.segment,
cs.avg_rating,
cs.review_count,
cb.total_orders,
cb.total_spent,
cb.avg_order_value,
cb.purchase_span_days,
-- Calculate correlation-like metrics
CASE
WHEN cs.avg_rating >= 4 AND cb.avg_order_value > (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'High Value, High Satisfaction'
WHEN cs.avg_rating >= 4 AND cb.avg_order_value <= (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'Low Value, High Satisfaction'
WHEN cs.avg_rating < 3 AND cb.avg_order_value > (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'High Value, Low Satisfaction'
WHEN cs.avg_rating < 3 AND cb.avg_order_value <= (SELECT AVG(total_amount) FROM orders WHERE status = 'delivered') THEN 'Low Value, Low Satisfaction'
ELSE 'Mixed'
END AS satisfaction_value_segment
FROM customer_satisfaction cs
JOIN customer_behavior cb ON cs.id = cb.id
WHERE cs.review_count > 0 AND cb.total_orders > 0
)
SELECT
segment,
satisfaction_value_segment,
COUNT(*) as customer_count,
AVG(avg_rating) as avg_rating,
AVG(total_spent) as avg_total_spent,
AVG(avg_order_value) as avg_order_value,
AVG(review_count) as avg_review_count,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as segment_percentage
FROM correlation_analysis
GROUP BY segment, satisfaction_value_segment
ORDER BY customer_count DESC;