SQL 脚本示例

各种场景和用例的 SQL 查询示例和数据库脚本

📝 用户管理系统

🟡 intermediate

用户管理数据库设计和查询

-- User Management Database Schema
-- Compatible with PostgreSQL and MySQL

-- Create database
CREATE DATABASE user_management;
USE user_management;

-- Users table
CREATE TABLE users (
    user_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    username VARCHAR(50) UNIQUE NOT NULL,
    email VARCHAR(255) UNIQUE NOT NULL,
    password_hash VARCHAR(255) NOT NULL,
    first_name VARCHAR(100),
    last_name VARCHAR(100),
    phone VARCHAR(20),
    avatar_url TEXT,
    bio TEXT,
    date_of_birth DATE,
    gender ENUM('male', 'female', 'other', 'prefer_not_to_say'),

    -- Status fields
    is_active BOOLEAN DEFAULT TRUE,
    is_verified BOOLEAN DEFAULT FALSE,
    is_premium BOOLEAN DEFAULT FALSE,

    -- Timestamps
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    last_login_at TIMESTAMP,

    -- Constraints
    CONSTRAINT chk_username_length CHECK (LENGTH(username) >= 3),
    CONSTRAINT chk_email_format CHECK (email REGEXP '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
);

-- User roles table
CREATE TABLE roles (
    role_id INT PRIMARY KEY AUTO_INCREMENT,
    role_name VARCHAR(50) UNIQUE NOT NULL,
    description TEXT,
    permissions JSON,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- User role assignments
CREATE TABLE user_roles (
    user_id BIGINT,
    role_id INT,
    assigned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    assigned_by BIGINT,
    PRIMARY KEY (user_id, role_id),
    FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE,
    FOREIGN KEY (role_id) REFERENCES roles(role_id) ON DELETE CASCADE,
    FOREIGN KEY (assigned_by) REFERENCES users(user_id)
);

-- User preferences
CREATE TABLE user_preferences (
    user_id BIGINT PRIMARY KEY,
    language VARCHAR(10) DEFAULT 'en',
    timezone VARCHAR(50) DEFAULT 'UTC',
    theme VARCHAR(20) DEFAULT 'light',
    email_notifications BOOLEAN DEFAULT TRUE,
    push_notifications BOOLEAN DEFAULT TRUE,
    sms_notifications BOOLEAN DEFAULT FALSE,
    privacy_level ENUM('public', 'friends', 'private') DEFAULT 'public',
    two_factor_enabled BOOLEAN DEFAULT FALSE,
    FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE
);

-- User sessions
CREATE TABLE user_sessions (
    session_id VARCHAR(128) PRIMARY KEY,
    user_id BIGINT NOT NULL,
    ip_address VARCHAR(45),
    user_agent TEXT,
    device_type VARCHAR(20),
    is_active BOOLEAN DEFAULT TRUE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    expires_at TIMESTAMP,
    last_activity TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE
);

-- Login attempts for security
CREATE TABLE login_attempts (
    attempt_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    email VARCHAR(255) NOT NULL,
    ip_address VARCHAR(45),
    user_agent TEXT,
    success BOOLEAN NOT NULL,
    attempted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    failure_reason VARCHAR(255),
    INDEX idx_email_attempts (email),
    INDEX idx_attempted_at (attempted_at)
);

-- Password reset tokens
CREATE TABLE password_reset_tokens (
    token_id VARCHAR(128) PRIMARY KEY,
    user_id BIGINT NOT NULL,
    token_hash VARCHAR(255) NOT NULL,
    expires_at TIMESTAMP NOT NULL,
    used_at TIMESTAMP,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE
);

-- Insert default roles
INSERT INTO roles (role_name, description, permissions) VALUES
('user', 'Regular user with basic permissions', '{"read": true, "write_own": true}'),
('moderator', 'Can moderate content and users', '{"read": true, "write_own": true, "moderate": true}'),
('admin', 'Full administrative access', '{"all": true}'),
('super_admin', 'Super administrator with all privileges', '{"all": true, "system_admin": true}');

-- Sample queries for user management

-- Create a new user
INSERT INTO users (username, email, password_hash, first_name, last_name, is_active, is_verified)
VALUES ('johndoe', '[email protected]', '$2b$12$hashedpassword...', 'John', 'Doe', TRUE, FALSE);

-- Get user with roles and preferences
SELECT
    u.user_id,
    u.username,
    u.email,
    u.first_name,
    u.last_name,
    u.is_active,
    u.is_verified,
    u.created_at,
    u.last_login_at,
    GROUP_CONCAT(r.role_name) as roles,
    up.language,
    up.timezone,
    up.theme,
    up.two_factor_enabled
FROM users u
LEFT JOIN user_roles ur ON u.user_id = ur.user_id
LEFT JOIN roles r ON ur.role_id = r.role_id
LEFT JOIN user_preferences up ON u.user_id = up.user_id
WHERE u.user_id = 1
GROUP BY u.user_id;

-- Search users with filters
SELECT
    user_id,
    username,
    email,
    first_name,
    last_name,
    is_active,
    is_verified,
    created_at
FROM users
WHERE
    (LOWER(username) LIKE LOWER('%john%') OR
     LOWER(first_name) LIKE LOWER('%john%') OR
     LOWER(last_name) LIKE LOWER('%john%') OR
     LOWER(email) LIKE LOWER('%john%'))
    AND is_active = TRUE
ORDER BY created_at DESC
LIMIT 20 OFFSET 0;

-- Get user statistics
SELECT
    COUNT(*) as total_users,
    COUNT(CASE WHEN is_active = TRUE THEN 1 END) as active_users,
    COUNT(CASE WHEN is_verified = TRUE THEN 1 END) as verified_users,
    COUNT(CASE WHEN is_premium = TRUE THEN 1 END) as premium_users,
    COUNT(CASE WHEN created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY) THEN 1 END) as new_users_30_days
FROM users;

-- User activity report
SELECT
    DATE(u.created_at) as registration_date,
    COUNT(*) as new_users,
    COUNT(CASE WHEN u.is_active = TRUE THEN 1 END) as active_users,
    COUNT(CASE WHEN u.last_login_at >= DATE_SUB(NOW(), INTERVAL 7 DAY) THEN 1 END) as recently_active
FROM users u
WHERE u.created_at >= DATE_SUB(NOW(), INTERVAL 90 DAY)
GROUP BY DATE(u.created_at)
ORDER BY registration_date DESC;

-- Clean up expired sessions
DELETE FROM user_sessions
WHERE expires_at < NOW() OR (is_active = FALSE AND last_activity < DATE_SUB(NOW(), INTERVAL 7 DAY));

-- Get failed login attempts for security monitoring
SELECT
    email,
    ip_address,
    COUNT(*) as failed_attempts,
    MAX(attempted_at) as last_attempt
FROM login_attempts
WHERE success = FALSE
    AND attempted_at >= DATE_SUB(NOW(), INTERVAL 24 HOUR)
GROUP BY email, ip_address
HAVING COUNT(*) >= 5
ORDER BY failed_attempts DESC;

📝 分析查询

🟡 intermediate

数据分析和报表查询示例

-- Analytical SQL Queries for Business Intelligence
-- Compatible with PostgreSQL/MySQL with window functions

-- Customer Segmentation Analysis
WITH customer_metrics AS (
    SELECT
        c.customer_id,
        c.email,
        c.first_name,
        c.last_name,
        c.created_at,
        COUNT(o.order_id) as total_orders,
        SUM(o.total_amount) as total_spent,
        AVG(o.total_amount) as avg_order_value,
        MIN(o.created_at) as first_order_date,
        MAX(o.created_at) as last_order_date,
        DATEDIFF(NOW(), MAX(o.created_at)) as days_since_last_order,
        DATEDIFF(MAX(o.created_at), MIN(o.created_at)) as customer_lifetime_days
    FROM customers c
    LEFT JOIN orders o ON c.customer_id = o.customer_id
    WHERE o.status IN ('delivered', 'shipped') OR o.customer_id IS NULL
    GROUP BY c.customer_id
),
segmentation AS (
    SELECT *,
        CASE
            WHEN total_orders >= 10 AND total_spent >= 1000 THEN 'VIP'
            WHEN total_orders >= 5 AND total_spent >= 500 THEN 'Loyal'
            WHEN total_orders >= 1 AND days_since_last_order <= 90 THEN 'Active'
            WHEN days_since_last_order > 180 THEN 'Inactive'
            ELSE 'New'
        END as customer_segment,
        CASE
            WHEN total_spent >= 2000 THEN 'High Value'
            WHEN total_spent >= 500 THEN 'Medium Value'
            ELSE 'Low Value'
        END as value_tier
    FROM customer_metrics
)
SELECT
    customer_segment,
    value_tier,
    COUNT(*) as customer_count,
    AVG(total_orders) as avg_orders,
    AVG(total_spent) as avg_revenue,
    AVG(avg_order_value) as avg_order_value,
    AVG(days_since_last_order) as avg_days_since_last_order
FROM segmentation
GROUP BY customer_segment, value_tier
ORDER BY customer_segment, value_tier DESC;

-- Product Performance Analysis
WITH product_performance AS (
    SELECT
        p.product_id,
        p.name,
        p.category_id,
        p.price,
        COUNT(DISTINCT oi.order_id) as order_count,
        SUM(oi.quantity) as total_sold,
        SUM(oi.total_price) as total_revenue,
        AVG(oi.total_price) as avg_order_revenue,
        MIN(o.created_at) as first_sale_date,
        MAX(o.created_at) as last_sale_date,
        p.stock_quantity,
        p.created_at as product_created_date
    FROM products p
    LEFT JOIN order_items oi ON p.product_id = oi.product_id
    LEFT JOIN orders o ON oi.order_id = o.order_id
        AND o.status IN ('delivered', 'shipped')
    WHERE p.status = 'active'
    GROUP BY p.product_id
),
category_performance AS (
    SELECT
        c.name as category_name,
        COUNT(p.product_id) as product_count,
        SUM(pp.total_sold) as category_sold,
        SUM(pp.total_revenue) as category_revenue,
        AVG(pp.total_revenue) as avg_product_revenue
    FROM categories c
    LEFT JOIN products p ON c.category_id = p.category_id
    LEFT JOIN product_performance pp ON p.product_id = pp.product_id
    GROUP BY c.category_id, c.name
)
SELECT
    pp.*,
    cp.category_name,
    CASE
        WHEN pp.total_sold = 0 THEN 'No Sales'
        WHEN pp.total_sold <= 10 THEN 'Low Sales'
        WHEN pp.total_sold <= 50 THEN 'Medium Sales'
        ELSE 'High Sales'
    END as sales_performance
FROM product_performance pp
LEFT JOIN categories c ON pp.category_id = c.category_id
LEFT JOIN category_performance cp ON c.category_id = cp.category_id
ORDER BY pp.total_revenue DESC;

-- Monthly Revenue Trend Analysis
WITH monthly_revenue AS (
    SELECT
        DATE_FORMAT(o.created_at, '%Y-%m') as month,
        SUM(o.total_amount) as revenue,
        COUNT(o.order_id) as order_count,
        COUNT(DISTINCT o.customer_id) as unique_customers,
        AVG(o.total_amount) as avg_order_value
    FROM orders o
    WHERE o.status IN ('delivered', 'shipped')
        AND o.created_at >= DATE_SUB(NOW(), INTERVAL 24 MONTH)
    GROUP BY DATE_FORMAT(o.created_at, '%Y-%m')
),
revenue_growth AS (
    SELECT
        month,
        revenue,
        order_count,
        unique_customers,
        avg_order_value,
        LAG(revenue) OVER (ORDER BY month) as prev_revenue,
        LAG(order_count) OVER (ORDER BY month) as prev_order_count,
        LAG(unique_customers) OVER (ORDER BY month) as prev_customers
    FROM monthly_revenue
)
SELECT
    month,
    revenue,
    order_count,
    unique_customers,
    avg_order_value,
    prev_revenue,
    CASE
        WHEN prev_revenue IS NULL THEN NULL
        ELSE ROUND(((revenue - prev_revenue) / prev_revenue) * 100, 2)
    END as revenue_growth_pct,
    CASE
        WHEN prev_order_count IS NULL THEN NULL
        ELSE ROUND(((order_count - prev_order_count) / prev_order_count) * 100, 2)
    END as order_growth_pct,
    CASE
        WHEN prev_customers IS NULL THEN NULL
        ELSE ROUND(((unique_customers - prev_customers) / prev_customers) * 100, 2)
    END as customer_growth_pct
FROM revenue_growth
ORDER BY month DESC;

-- Cohort Analysis (Customer Retention)
WITH customer_cohorts AS (
    SELECT
        c.customer_id,
        DATE_FORMAT(MIN(o.created_at), '%Y-%m') as cohort_month,
        MIN(o.created_at) as first_order_date
    FROM customers c
    JOIN orders o ON c.customer_id = o.customer_id
    WHERE o.status IN ('delivered', 'shipped')
    GROUP BY c.customer_id
),
cohort_activity AS (
    SELECT
        cc.cohort_month,
        ca.customer_id,
        DATE_FORMAT(o.created_at, '%Y-%m') as activity_month,
        PERIOD_DIFF(DATE_FORMAT(o.created_at, '%Y%m'), DATE_FORMAT(cc.first_order_date, '%Y%m')) as period_number
    FROM customer_cohorts cc
    JOIN orders o ON cc.customer_id = o.customer_id
    WHERE o.status IN ('delivered', 'shipped')
        AND o.created_at >= cc.first_order_date
),
cohort_summary AS (
    SELECT
        cohort_month,
        period_number,
        COUNT(DISTINCT customer_id) as active_customers
    FROM cohort_activity
    GROUP BY cohort_month, period_number
),
cohort_size AS (
    SELECT
        cohort_month,
        COUNT(DISTINCT customer_id) as cohort_size
    FROM customer_cohorts
    GROUP BY cohort_month
)
SELECT
    cs.cohort_month,
    cs.period_number,
    cs.active_customers,
    csize.cohort_size,
    ROUND((cs.active_customers::DECIMAL / csize.cohort_size) * 100, 2) as retention_rate
FROM cohort_summary cs
JOIN cohort_size csize ON cs.cohort_month = csize.cohort_month
WHERE cs.period_number <= 12
ORDER BY cs.cohort_month, cs.period_number;

-- Product Affinity Analysis (Products frequently bought together)
WITH product_pairs AS (
    SELECT
        oi1.product_id as product_a,
        oi2.product_id as product_b,
        COUNT(*) as pair_count
    FROM order_items oi1
    JOIN order_items oi2 ON oi1.order_id = oi2.order_id
        AND oi1.product_id < oi2.product_id
    JOIN orders o ON oi1.order_id = o.order_id
    WHERE o.status IN ('delivered', 'shipped')
    GROUP BY oi1.product_id, oi2.product_id
),
product_popularity AS (
    SELECT
        oi.product_id,
        COUNT(DISTINCT oi.order_id) as order_count
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.order_id
    WHERE o.status IN ('delivered', 'shipped')
    GROUP BY oi.product_id
)
SELECT
    p1.name as product_a_name,
    p2.name as product_b_name,
    pp.pair_count,
    pop1.order_count as product_a_orders,
    pop2.order_count as product_b_orders,
    ROUND((pp.pair_count::DECIMAL / LEAST(pop1.order_count, pop2.order_count)) * 100, 2) as affinity_percentage
FROM product_pairs pp
JOIN products p1 ON pp.product_a = p1.product_id
JOIN products p2 ON pp.product_b = p2.product_id
JOIN product_popularity pop1 ON pp.product_a = pop1.product_id
JOIN product_popularity pop2 ON pp.product_b = pop2.product_id
WHERE pp.pair_count >= 5
ORDER BY affinity_percentage DESC, pp.pair_count DESC
LIMIT 20;

-- Customer Lifetime Value Prediction
WITH customer_ltv AS (
    SELECT
        c.customer_id,
        c.created_at as signup_date,
        COUNT(o.order_id) as order_count,
        SUM(o.total_amount) as total_revenue,
        AVG(o.total_amount) as avg_order_value,
        MIN(o.created_at) as first_order_date,
        MAX(o.created_at) as last_order_date,
        DATEDIFF(NOW(), c.created_at) as days_as_customer,
        DATEDIFF(NOW(), MAX(o.created_at)) as days_since_last_order,
        -- Calculate purchase frequency
        CASE
            WHEN COUNT(o.order_id) > 1 THEN
                DATEDIFF(MAX(o.created_at), MIN(o.created_at)) / (COUNT(o.order_id) - 1)
            ELSE 0
        END as avg_days_between_orders,
        -- Calculate customer lifetime (in days)
        DATEDIFF(MAX(o.created_at), MIN(o.created_at)) as customer_lifetime_days
    FROM customers c
    LEFT JOIN orders o ON c.customer_id = o.customer_id
    WHERE o.status IN ('delivered', 'shipped') OR o.customer_id IS NULL
    GROUP BY c.customer_id
)
SELECT
    'High Value Customers (> 12 months)' as segment,
    COUNT(*) as customer_count,
    AVG(total_revenue) as avg_ltv,
    AVG(order_count) as avg_orders,
    AVG(avg_order_value) as avg_order_size,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_revenue) as median_ltv,
    PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_revenue) as p90_ltv
FROM customer_ltv
WHERE days_as_customer >= 365 AND total_revenue > 0

UNION ALL

SELECT
    'Medium Value Customers (3-12 months)' as segment,
    COUNT(*) as customer_count,
    AVG(total_revenue) as avg_ltv,
    AVG(order_count) as avg_orders,
    AVG(avg_order_value) as avg_order_size,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_revenue) as median_ltv,
    PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_revenue) as p90_ltv
FROM customer_ltv
WHERE days_as_customer >= 90 AND days_as_customer < 365 AND total_revenue > 0

UNION ALL

SELECT
    'New Customers (< 3 months)' as segment,
    COUNT(*) as customer_count,
    AVG(total_revenue) as avg_ltv,
    AVG(order_count) as avg_orders,
    AVG(avg_order_value) as avg_order_size,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_revenue) as median_ltv,
    PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY total_revenue) as p90_ltv
FROM customer_ltv
WHERE days_as_customer < 90 AND total_revenue > 0;

📝 存储过程

🟡 intermediate

常用存储过程和函数示例

-- Stored Procedures and Functions for Common Operations

-- PostgreSQL/MySQL compatible implementations

-- Function to calculate order total with taxes and discounts
CREATE OR REPLACE FUNCTION calculate_order_total(
    p_order_id BIGINT,
    p_tax_rate DECIMAL(5,4) DEFAULT 0.08,
    p_shipping_rate DECIMAL(10,2) DEFAULT 0.00
) RETURNS DECIMAL(12,2) AS $$
DECLARE
    v_subtotal DECIMAL(12,2);
    v_tax_amount DECIMAL(12,2);
    v_shipping_amount DECIMAL(12,2);
    v_discount_amount DECIMAL(12,2);
    v_total_amount DECIMAL(12,2);
BEGIN
    -- Calculate subtotal
    SELECT COALESCE(SUM(total_price), 0) INTO v_subtotal
    FROM order_items
    WHERE order_id = p_order_id;

    -- Calculate tax amount
    v_tax_amount := v_subtotal * p_tax_rate;

    -- Calculate shipping (could be based on weight, distance, etc.)
    v_shipping_amount := p_shipping_rate;

    -- Apply discounts if any (example: 10% off orders over $100)
    IF v_subtotal > 100 THEN
        v_discount_amount := v_subtotal * 0.10;
    ELSE
        v_discount_amount := 0;
    END IF;

    -- Calculate final total
    v_total_amount := v_subtotal + v_tax_amount + v_shipping_amount - v_discount_amount;

    -- Return the total amount
    RETURN v_total_amount;
END;
$$ LANGUAGE plpgsql;

-- Procedure to update product stock after order fulfillment
CREATE OR REPLACE PROCEDURE update_product_stock_after_order(
    p_order_id BIGINT
) AS $$
DECLARE
    v_product_id BIGINT;
    v_variant_id BIGINT;
    v_quantity_ordered INT;
    v_current_stock INT;
    v_new_stock INT;
    v_order_status TEXT;
BEGIN
    -- Check if order is being shipped
    SELECT status INTO v_order_status
    FROM orders
    WHERE order_id = p_order_id;

    IF v_order_status != 'shipped' THEN
        RAISE EXCEPTION 'Order status must be shipped to update stock';
    END IF;

    -- Process each order item
    FOR v_product_id, v_variant_id, v_quantity_ordered IN
        SELECT product_id, variant_id, quantity
        FROM order_items
        WHERE order_id = p_order_id
    LOOP
        -- Update product stock (if no variant)
        IF v_variant_id IS NULL THEN
            SELECT stock_quantity INTO v_current_stock
            FROM products
            WHERE product_id = v_product_id;

            v_new_stock := v_current_stock - v_quantity_ordered;

            UPDATE products
            SET stock_quantity = v_new_stock,
                updated_at = CURRENT_TIMESTAMP
            WHERE product_id = v_product_id;

            -- Check if stock is below minimum level
            IF v_new_stock <= (SELECT min_stock_level FROM products WHERE product_id = v_product_id) THEN
                -- Trigger low stock alert (would call another procedure or send notification)
                PERFORM pg_notify('low_stock_alert',
                    format('Product %s stock is low: %s units remaining', v_product_id, v_new_stock));
            END IF;

        ELSE
            -- Update variant stock
            SELECT inventory_quantity INTO v_current_stock
            FROM product_variants
            WHERE variant_id = v_variant_id;

            v_new_stock := v_current_stock - v_quantity_ordered;

            UPDATE product_variants
            SET inventory_quantity = v_new_stock
            WHERE variant_id = v_variant_id;
        END IF;
    END LOOP;

    -- Update order status
    UPDATE orders
    SET fulfillment_status = 'fulfilled',
        updated_at = CURRENT_TIMESTAMP
    WHERE order_id = p_order_id;

END;
$$ LANGUAGE plpgsql;

-- Function to get customer LTV (Lifetime Value)
CREATE OR REPLACE FUNCTION get_customer_ltv(
    p_customer_id BIGINT
) RETURNS DECIMAL(12,2) AS $$
DECLARE
    v_total_revenue DECIMAL(12,2);
    v_total_orders INT;
    v_avg_order_value DECIMAL(12,2);
    v_days_as_customer INT;
    v_purchase_frequency DECIMAL(10,2);
    v_ltv DECIMAL(12,2);
BEGIN
    -- Calculate total revenue and orders
    SELECT
        COALESCE(SUM(total_amount), 0),
        COUNT(order_id)
    INTO v_total_revenue, v_total_orders
    FROM orders
    WHERE customer_id = p_customer_id
        AND status IN ('delivered', 'shipped');

    -- Calculate days as customer
    SELECT DATEDIFF(CURRENT_DATE, created_at)
    INTO v_days_as_customer
    FROM customers
    WHERE customer_id = p_customer_id;

    -- Calculate average order value
    IF v_total_orders > 0 THEN
        v_avg_order_value := v_total_revenue / v_total_orders;
        v_purchase_frequency := v_days_as_customer::DECIMAL / v_total_orders;
    ELSE
        v_avg_order_value := 0;
        v_purchase_frequency := 0;
    END IF;

    -- Calculate LTV (Average Order Value × Purchase Frequency × Customer Lifetime)
    v_ltv := v_avg_order_value * v_purchase_frequency * (v_days_as_customer / 365.0);

    RETURN v_ltv;
END;
$$ LANGUAGE plpgsql;

-- Procedure to process cart abandonment
CREATE OR REPLACE PROCEDURE process_cart_abandonment()
AS $$
DECLARE
    v_cart_id VARCHAR(128);
    v_customer_id BIGINT;
    v_customer_email VARCHAR(255);
    v_cart_total DECIMAL(12,2);
    v_cart_items INT;
    v_hours_abandoned INT;
BEGIN
    -- Find abandoned carts (older than 24 hours)
    FOR v_cart_id, v_customer_id, v_customer_email, v_cart_total, v_cart_items, v_hours_abandoned IN
        SELECT
            c.cart_id,
            c.customer_id,
            cu.email,
            COALESCE(SUM(ci.quantity * p.price), 0) as cart_total,
            COUNT(ci.item_id) as item_count,
            EXTRACT(HOUR FROM (CURRENT_TIMESTAMP - c.updated_at))::INT as hours_abandoned
        FROM cart c
        JOIN cart_items ci ON c.cart_id = ci.cart_id
        JOIN products p ON ci.product_id = p.product_id
        LEFT JOIN customers cu ON c.customer_id = cu.customer_id
        WHERE c.expires_at > CURRENT_TIMESTAMP
            AND c.updated_at < CURRENT_TIMESTAMP - INTERVAL '24 hours'
        GROUP BY c.cart_id, c.customer_id, cu.email
    LOOP
        -- Send abandoned cart email (in real implementation, this would call email service)
        INSERT INTO email_notifications (
            customer_id,
            email_type,
            subject,
            message,
            sent_at
        ) VALUES (
            v_customer_id,
            'cart_abandonment',
            'Complete Your Purchase',
            format('You left items in your cart! Complete your purchase within the next 6 hours to save %.2f', v_cart_total),
            CURRENT_TIMESTAMP
        );

        -- Update cart with abandonment flag
        UPDATE cart
        SET abandoned_at = CURRENT_TIMESTAMP
        WHERE cart_id = v_cart_id;

        -- Log the abandonment event
        INSERT INTO cart_abandonment_log (
            cart_id,
            customer_id,
            cart_total,
            item_count,
            hours_abandoned,
            processed_at
        ) VALUES (
            v_cart_id,
            v_customer_id,
            v_cart_total,
            v_cart_items,
            v_hours_abandoned,
            CURRENT_TIMESTAMP
        );
    END LOOP;
END;
$$ LANGUAGE plpgsql;

-- Function to get product recommendations based on purchase history
CREATE OR REPLACE FUNCTION get_product_recommendations(
    p_customer_id BIGINT,
    p_limit INT DEFAULT 10
) RETURNS TABLE(
    product_id BIGINT,
    product_name VARCHAR(255),
    recommendation_score DECIMAL(5,2),
    recommendation_reason TEXT
) AS $$
BEGIN
    RETURN QUERY
    WITH customer_purchases AS (
        SELECT DISTINCT oi.product_id
        FROM order_items oi
        JOIN orders o ON oi.order_id = o.order_id
        WHERE o.customer_id = p_customer_id
            AND o.status IN ('delivered', 'shipped')
    ),
    related_products AS (
        SELECT
            oi2.product_id,
            COUNT(*) as frequency,
            'frequently_bought_together' as reason
        FROM order_items oi1
        JOIN order_items oi2 ON oi1.order_id = oi2.order_id
            AND oi1.product_id != oi2.product_id
        WHERE oi1.product_id IN (SELECT product_id FROM customer_purchases)
        GROUP BY oi2.product_id
    ),
    category_products AS (
        SELECT
            p.product_id,
            'similar_category' as reason
        FROM products p
        JOIN product_categories pc ON p.product_id = pc.product_id
        WHERE pc.category_id IN (
            SELECT DISTINCT pc2.category_id
            FROM product_categories pc2
            WHERE pc2.product_id IN (SELECT product_id FROM customer_purchases)
        )
        AND p.product_id NOT IN (SELECT product_id FROM customer_purchases)
    )
    SELECT
        p.product_id,
        p.name,
        COALESCE(rp.frequency, 5) as recommendation_score,
        COALESCE(rp.reason, cp.reason) as recommendation_reason
    FROM products p
    LEFT JOIN related_products rp ON p.product_id = rp.product_id
    LEFT JOIN category_products cp ON p.product_id = cp.product_id
    WHERE p.status = 'active'
        AND p.stock_quantity > 0
        AND p.product_id NOT IN (SELECT product_id FROM customer_purchases)
    ORDER BY recommendation_score DESC
    LIMIT p_limit;
END;
$$ LANGUAGE plpgsql;

-- Procedure to generate daily sales report
CREATE OR REPLACE PROCEDURE generate_daily_sales_report(
    p_report_date DATE DEFAULT CURRENT_DATE
) AS $$
DECLARE
    v_total_orders INT;
    v_total_revenue DECIMAL(12,2);
    v_total_customers INT;
    v_avg_order_value DECIMAL(12,2);
    v_top_selling_product BIGINT;
    v_top_product_name VARCHAR(255);
    v_top_product_quantity INT;
BEGIN
    -- Calculate daily metrics
    SELECT
        COUNT(DISTINCT o.order_id),
        SUM(o.total_amount),
        COUNT(DISTINCT o.customer_id),
        AVG(o.total_amount)
    INTO v_total_orders, v_total_revenue, v_total_customers, v_avg_order_value
    FROM orders o
    WHERE DATE(o.created_at) = p_report_date
        AND o.status IN ('delivered', 'shipped');

    -- Find top selling product
    SELECT
        oi.product_id,
        p.name,
        SUM(oi.quantity)
    INTO v_top_selling_product, v_top_product_name, v_top_product_quantity
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.order_id
    JOIN products p ON oi.product_id = p.product_id
    WHERE DATE(o.created_at) = p_report_date
        AND o.status IN ('delivered', 'shipped')
    GROUP BY oi.product_id, p.name
    ORDER BY SUM(oi.quantity) DESC
    LIMIT 1;

    -- Insert report into daily reports table
    INSERT INTO daily_sales_reports (
        report_date,
        total_orders,
        total_revenue,
        total_customers,
        avg_order_value,
        top_selling_product_id,
        top_product_name,
        top_product_quantity,
        generated_at
    ) VALUES (
        p_report_date,
        v_total_orders,
        v_total_revenue,
        v_total_customers,
        v_avg_order_value,
        v_top_selling_product,
        v_top_product_name,
        v_top_product_quantity,
        CURRENT_TIMESTAMP
    );

    -- Log the report generation
    INSERT INTO report_generation_log (
        report_type,
        report_date,
        records_processed,
        generated_at
    ) VALUES (
        'daily_sales',
        p_report_date,
        v_total_orders,
        CURRENT_TIMESTAMP
    );
END;
$$ LANGUAGE plpgsql;

-- Function to validate email format
CREATE OR REPLACE FUNCTION is_valid_email(p_email VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
    RETURN p_email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}$';
END;
$$ LANGUAGE plpgsql;

-- Trigger function to update customer statistics
CREATE OR REPLACE FUNCTION update_customer_stats()
RETURNS TRIGGER AS $$
BEGIN
    IF TG_OP = 'INSERT' THEN
        -- New customer created
        UPDATE customers
        SET orders_count = 1,
            total_spent = NEW.total_amount,
            last_order_id = NEW.order_id,
            updated_at = CURRENT_TIMESTAMP
        WHERE customer_id = NEW.customer_id;

        RETURN NEW;

    ELSIF TG_OP = 'UPDATE' THEN
        -- Update customer stats when order status changes to delivered
        IF OLD.status != 'delivered' AND NEW.status = 'delivered' THEN
            UPDATE customers
            SET orders_count = orders_count + 1,
                total_spent = total_spent + NEW.total_amount,
                last_order_id = NEW.order_id,
                updated_at = CURRENT_TIMESTAMP
            WHERE customer_id = NEW.customer_id;
        END IF;

        RETURN NEW;

    ELSIF TG_OP = 'DELETE' THEN
        -- Handle order deletion if needed
        RETURN OLD;
    END IF;

    RETURN NULL;
END;
$$ LANGUAGE plpgsql;

-- Create trigger for customer stats update
CREATE TRIGGER trigger_update_customer_stats
    AFTER INSERT OR UPDATE ON orders
    FOR EACH ROW
    EXECUTE FUNCTION update_customer_stats();

📝 电商数据库架构

🔴 complex

完整的电商系统数据库设计

-- E-commerce Database Schema
-- PostgreSQL/MySQL compatible design

-- Products table
CREATE TABLE products (
    product_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    sku VARCHAR(100) UNIQUE NOT NULL,
    name VARCHAR(255) NOT NULL,
    description TEXT,
    short_description VARCHAR(500),
    price DECIMAL(10,2) NOT NULL,
    compare_price DECIMAL(10,2),
    cost_price DECIMAL(10,2),
    weight DECIMAL(8,3),
    dimensions JSON,

    -- Inventory
    stock_quantity INT DEFAULT 0,
    min_stock_level INT DEFAULT 0,
    track_inventory BOOLEAN DEFAULT TRUE,
    allow_backorder BOOLEAN DEFAULT FALSE,

    -- Status
    status ENUM('active', 'inactive', 'draft', 'archived') DEFAULT 'active',
    featured BOOLEAN DEFAULT FALSE,

    -- SEO
    slug VARCHAR(255),
    meta_title VARCHAR(255),
    meta_description TEXT,

    -- Timestamps
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,

    INDEX idx_name (name),
    INDEX idx_sku (sku),
    INDEX idx_status (status),
    INDEX idx_featured (featured),
    INDEX idx_price (price),
    FULLTEXT idx_search (name, description)
);

-- Categories
CREATE TABLE categories (
    category_id INT PRIMARY KEY AUTO_INCREMENT,
    name VARCHAR(255) NOT NULL,
    slug VARCHAR(255) UNIQUE NOT NULL,
    description TEXT,
    parent_id INT,
    image_url VARCHAR(500),
    sort_order INT DEFAULT 0,
    is_active BOOLEAN DEFAULT TRUE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (parent_id) REFERENCES categories(category_id) ON DELETE SET NULL,
    INDEX idx_parent (parent_id),
    INDEX idx_slug (slug)
);

-- Product categories mapping
CREATE TABLE product_categories (
    product_id BIGINT,
    category_id INT,
    PRIMARY KEY (product_id, category_id),
    FOREIGN KEY (product_id) REFERENCES products(product_id) ON DELETE CASCADE,
    FOREIGN KEY (category_id) REFERENCES categories(category_id) ON DELETE CASCADE
);

-- Product variants (for products with different sizes, colors, etc.)
CREATE TABLE product_variants (
    variant_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    product_id BIGINT NOT NULL,
    sku VARCHAR(100) UNIQUE NOT NULL,
    title VARCHAR(255),
    price DECIMAL(10,2),
    compare_price DECIMAL(10,2),
    weight DECIMAL(8,3),
    barcode VARCHAR(100),
    inventory_quantity INT DEFAULT 0,
    position INT DEFAULT 0,
    option1 VARCHAR(100), -- e.g., 'Small', 'Red'
    option2 VARCHAR(100),
    option3 VARCHAR(100),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (product_id) REFERENCES products(product_id) ON DELETE CASCADE,
    INDEX idx_product (product_id),
    INDEX idx_sku (sku)
);

-- Product images
CREATE TABLE product_images (
    image_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    product_id BIGINT,
    variant_id BIGINT,
    url VARCHAR(500) NOT NULL,
    alt_text VARCHAR(255),
    position INT DEFAULT 0,
    is_primary BOOLEAN DEFAULT FALSE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (product_id) REFERENCES products(product_id) ON DELETE CASCADE,
    FOREIGN KEY (variant_id) REFERENCES product_variants(variant_id) ON DELETE CASCADE,
    INDEX idx_product (product_id),
    INDEX idx_position (position)
);

-- Customers
CREATE TABLE customers (
    customer_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    email VARCHAR(255) UNIQUE NOT NULL,
    first_name VARCHAR(255),
    last_name VARCHAR(255),
    phone VARCHAR(50),
    password_hash VARCHAR(255),

    -- Preferences
    accepts_marketing BOOLEAN DEFAULT FALSE,
    tax_exempt BOOLEAN DEFAULT FALSE,
    tags VARCHAR(500), -- Comma-separated tags

    -- Addresses
    default_address_id BIGINT,

    -- Statistics
    total_spent DECIMAL(12,2) DEFAULT 0,
    orders_count INT DEFAULT 0,
    last_order_id BIGINT,

    -- Status
    is_active BOOLEAN DEFAULT TRUE,
    is_verified BOOLEAN DEFAULT FALSE,

    -- Timestamps
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    last_login_at TIMESTAMP,

    FOREIGN KEY (default_address_id) REFERENCES addresses(address_id),
    INDEX idx_email (email),
    INDEX idx_last_name (last_name),
    INDEX idx_total_spent (total_spent),
    INDEX idx_orders_count (orders_count)
);

-- Addresses
CREATE TABLE addresses (
    address_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    customer_id BIGINT,
    first_name VARCHAR(255),
    last_name VARCHAR(255),
    company VARCHAR(255),
    address1 VARCHAR(255),
    address2 VARCHAR(255),
    city VARCHAR(100),
    province VARCHAR(100),
    country VARCHAR(100),
    postal_code VARCHAR(20),
    phone VARCHAR(50),
    address_type ENUM('shipping', 'billing') DEFAULT 'shipping',
    is_default BOOLEAN DEFAULT FALSE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (customer_id) REFERENCES customers(customer_id) ON DELETE CASCADE,
    INDEX idx_customer (customer_id),
    INDEX idx_country (country),
    INDEX idx_postal_code (postal_code)
);

-- Orders
CREATE TABLE orders (
    order_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    order_number VARCHAR(50) UNIQUE NOT NULL,
    customer_id BIGINT,
    customer_email VARCHAR(255),

    -- Order details
    currency VARCHAR(3) DEFAULT 'USD',
    subtotal DECIMAL(12,2) NOT NULL,
    tax_amount DECIMAL(12,2) DEFAULT 0,
    shipping_amount DECIMAL(10,2) DEFAULT 0,
    discount_amount DECIMAL(10,2) DEFAULT 0,
    total_amount DECIMAL(12,2) NOT NULL,

    -- Status and tracking
    status ENUM('pending', 'confirmed', 'processing', 'shipped', 'delivered', 'cancelled', 'refunded') DEFAULT 'pending',
    financial_status ENUM('pending', 'authorized', 'captured', 'partially_refunded', 'refunded', 'voided') DEFAULT 'pending',
    fulfillment_status ENUM('unfulfilled', 'partial', 'fulfilled') DEFAULT 'unfulfilled',

    -- Shipping
    shipping_address_id BIGINT,
    billing_address_id BIGINT,
    tracking_number VARCHAR(100),
    carrier VARCHAR(50),

    -- Notes
    notes TEXT,
    customer_notes TEXT,

    -- Timestamps
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    shipped_at TIMESTAMP,
    delivered_at TIMESTAMP,

    FOREIGN KEY (customer_id) REFERENCES customers(customer_id),
    FOREIGN KEY (shipping_address_id) REFERENCES addresses(address_id),
    FOREIGN KEY (billing_address_id) REFERENCES addresses(address_id),
    INDEX idx_customer (customer_id),
    INDEX idx_status (status),
    INDEX idx_created_at (created_at),
    INDEX idx_total_amount (total_amount)
);

-- Order items
CREATE TABLE order_items (
    item_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    order_id BIGINT NOT NULL,
    product_id BIGINT,
    variant_id BIGINT,
    sku VARCHAR(100),
    product_name VARCHAR(255) NOT NULL,
    variant_title VARCHAR(255),
    quantity INT NOT NULL,
    unit_price DECIMAL(10,2) NOT NULL,
    total_price DECIMAL(12,2) NOT NULL,
    weight DECIMAL(8,3),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (order_id) REFERENCES orders(order_id) ON DELETE CASCADE,
    FOREIGN KEY (product_id) REFERENCES products(product_id),
    FOREIGN KEY (variant_id) REFERENCES product_variants(variant_id),
    INDEX idx_order (order_id),
    INDEX idx_product (product_id)
);

-- Shopping cart
CREATE TABLE cart (
    cart_id VARCHAR(128) PRIMARY KEY,
    customer_id BIGINT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    expires_at TIMESTAMP,
    FOREIGN KEY (customer_id) REFERENCES customers(customer_id) ON DELETE CASCADE,
    INDEX idx_customer (customer_id),
    INDEX idx_expires (expires_at)
);

-- Cart items
CREATE TABLE cart_items (
    item_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    cart_id VARCHAR(128),
    product_id BIGINT,
    variant_id BIGINT,
    quantity INT NOT NULL DEFAULT 1,
    added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (cart_id) REFERENCES cart(cart_id) ON DELETE CASCADE,
    FOREIGN KEY (product_id) REFERENCES products(product_id),
    FOREIGN KEY (variant_id) REFERENCES product_variants(variant_id),
    INDEX idx_cart (cart_id),
    INDEX idx_product (product_id)
);

-- Sample queries for e-commerce operations

-- Get product with variants and images
SELECT
    p.product_id,
    p.sku,
    p.name,
    p.description,
    p.price,
    p.stock_quantity,
    p.status,
    GROUP_CONCAT(DISTINCT c.name) as categories,
    (SELECT JSON_ARRAYAGG(
        JSON_OBJECT(
            'variant_id', v.variant_id,
            'sku', v.sku,
            'title', v.title,
            'price', v.price,
            'inventory_quantity', v.inventory_quantity,
            'option1', v.option1,
            'option2', v.option2,
            'option3', v.option3
        )
    ) FROM product_variants v WHERE v.product_id = p.product_id) as variants,
    (SELECT JSON_ARRAYAGG(
        JSON_OBJECT(
            'url', i.url,
            'alt_text', i.alt_text,
            'position', i.position,
            'is_primary', i.is_primary
        )
    ) FROM product_images i WHERE i.product_id = p.product_id) as images
FROM products p
LEFT JOIN product_categories pc ON p.product_id = pc.product_id
LEFT JOIN categories c ON pc.category_id = c.category_id
WHERE p.product_id = 1
GROUP BY p.product_id;

-- Advanced product search with filters
SELECT DISTINCT
    p.product_id,
    p.name,
    p.sku,
    p.price,
    p.compare_price,
    p.stock_quantity,
    p.status,
    p.featured,
    GROUP_CONCAT(DISTINCT c.name) as categories
FROM products p
LEFT JOIN product_categories pc ON p.product_id = pc.product_id
LEFT JOIN categories c ON pc.category_id = c.category_id
WHERE
    p.status = 'active'
    AND (p.name LIKE '%laptop%' OR p.description LIKE '%laptop%')
    AND p.price BETWEEN 500 AND 2000
    AND p.stock_quantity > 0
    AND (c.name = 'Electronics' OR c.parent_id IN (
        SELECT category_id FROM categories WHERE name = 'Electronics'
    ))
GROUP BY p.product_id
ORDER BY p.featured DESC, p.created_at DESC
LIMIT 20;

-- Customer order history with details
SELECT
    o.order_id,
    o.order_number,
    o.total_amount,
    o.status,
    o.created_at,
    oi.quantity,
    oi.unit_price,
    oi.total_price,
    p.name as product_name,
    v.title as variant_title,
    GROUP_CONCAT(DISTINCT oi.product_name) as products_summary
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
LEFT JOIN products p ON oi.product_id = p.product_id
LEFT JOIN product_variants v ON oi.variant_id = v.variant_id
WHERE o.customer_id = 1
ORDER BY o.created_at DESC;

-- Sales analytics query
SELECT
    DATE(o.created_at) as order_date,
    COUNT(*) as order_count,
    SUM(o.total_amount) as total_revenue,
    AVG(o.total_amount) as avg_order_value,
    COUNT(DISTINCT o.customer_id) as unique_customers,
    COUNT(DISTINCT oi.product_id) as unique_products_sold
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
WHERE o.status IN ('delivered', 'shipped')
    AND o.created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY)
GROUP BY DATE(o.created_at)
ORDER BY order_date DESC;

-- Low stock inventory report
SELECT
    p.product_id,
    p.sku,
    p.name,
    p.stock_quantity,
    p.min_stock_level,
    SUM(oi.quantity) as sold_last_30_days,
    (p.stock_quantity / NULLIF(SUM(oi.quantity), 0)) as stock_ratio
FROM products p
LEFT JOIN order_items oi ON p.product_id = oi.product_id
LEFT JOIN orders o ON oi.order_id = o.order_id
    AND o.created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY)
    AND o.status IN ('delivered', 'shipped')
WHERE
    p.track_inventory = TRUE
    AND p.status = 'active'
GROUP BY p.product_id
HAVING p.stock_quantity <= p.min_stock_level
    OR (p.stock_quantity / NULLIF(SUM(oi.quantity), 0)) < 0.1
ORDER BY (p.stock_quantity / NULLIF(SUM(oi.quantity), 0)) ASC;

📝 性能优化查询

🔴 complex

数据库性能优化和索引查询

-- SQL Performance Optimization Queries and Index Management

-- Analyze table sizes and storage usage
SELECT
    schemaname,
    tablename,
    attname,
    n_distinct,
    correlation,
    most_common_vals
FROM pg_stats
WHERE schemaname = 'public'
    AND tablename IN ('orders', 'order_items', 'products', 'customers')
ORDER BY tablename, attname;

-- Find missing indexes that could improve performance
WITH missing_indexes AS (
    SELECT
        schemaname,
        tablename,
        attname,
        n_distinct,
        CASE
            WHEN n_distinct > 100 AND correlation < 0.1 THEN 'Consider indexing for high cardinality column'
            WHEN correlation > 0.8 THEN 'Good clustering factor'
            WHEN correlation < 0.1 THEN 'Poor clustering, consider index'
        END as recommendation
    FROM pg_stats
    WHERE schemaname = 'public'
        AND tablename IN ('orders', 'order_items', 'products')
        AND n_distinct > 10
)
SELECT * FROM missing_indexes WHERE recommendation IS NOT NULL;

-- Index usage analysis
SELECT
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch,
    CASE
        WHEN idx_scan = 0 THEN 'Unused index - consider dropping'
        WHEN idx_scan < 10 THEN 'Low usage - evaluate necessity'
        ELSE 'Active index'
    END as usage_status
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY idx_scan;

-- Query performance analysis
SELECT
    query,
    calls,
    total_time,
    mean_time,
    rows,
    100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
WHERE query NOT LIKE '%pg_stat_statements%'
ORDER BY mean_time DESC
LIMIT 10;

-- Create optimized indexes for common queries

-- Orders table indexes
CREATE INDEX CONCURRENTLY idx_orders_customer_status ON orders(customer_id, status);
CREATE INDEX CONCURRENTLY idx_orders_created_status ON orders(created_at, status);
CREATE INDEX CONCURRENTLY idx_orders_total_amount ON orders(total_amount) WHERE total_amount > 100;
CREATE INDEX CONCURRENTLY idx_orders_customer_date ON orders(customer_id, created_at DESC);

-- Order items indexes
CREATE INDEX CONCURRENTLY idx_order_items_order_product ON order_items(order_id, product_id);
CREATE INDEX CONCURRENTLY idx_order_items_product_quantity ON order_items(product_id, quantity);
CREATE INDEX CONCURRENTLY idx_order_items_price ON order_items(unit_price) WHERE unit_price > 50;

-- Products indexes
CREATE INDEX CONCURRENTLY idx_products_status_price ON products(status, price);
CREATE INDEX CONCURRENTLY idx_products_category_stock ON products(category_id, stock_quantity) WHERE stock_quantity > 0;
CREATE INDEX CONCURRENTLY idx_products_featured_created ON products(featured, created_at) WHERE featured = true;
CREATE INDEX CONCURRENTLY idx_products_name_fts ON products USING gin(to_tsvector('english', name || ' ' || description));

-- Customer indexes
CREATE INDEX CONCURRENTLY idx_customers_email_active ON customers(email) WHERE is_active = true;
CREATE INDEX CONCURRENTLY idx_customers_total_spent ON customers(total_spent DESC);
CREATE INDEX CONCURRENTLY idx_customers_orders_created ON customers(orders_count, created_at);

-- Partitioning strategy for large tables (orders by year)
CREATE TABLE orders_y2024 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');

-- Materialized view for frequently accessed data
CREATE MATERIALIZED VIEW customer_summary AS
SELECT
    c.customer_id,
    c.email,
    c.first_name,
    c.last_name,
    c.created_at as signup_date,
    COUNT(o.order_id) as total_orders,
    SUM(o.total_amount) as total_spent,
    AVG(o.total_amount) as avg_order_value,
    MAX(o.created_at) as last_order_date,
    MIN(o.created_at) as first_order_date
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
    AND o.status IN ('delivered', 'shipped')
GROUP BY c.customer_id, c.email, c.first_name, c.last_name, c.created_at;

CREATE UNIQUE INDEX idx_customer_summary_id ON customer_summary(customer_id);
CREATE INDEX idx_customer_summary_spent ON customer_summary(total_spent DESC);
CREATE INDEX idx_customer_summary_orders ON customer_summary(total_orders DESC);

-- Refresh materialized view periodically
CREATE OR REPLACE FUNCTION refresh_customer_summary()
RETURNS void AS $$
BEGIN
    REFRESH MATERIALIZED VIEW CONCURRENTLY customer_summary;
END;
$$ LANGUAGE plpgsql;

-- Analyze slow queries with EXPLAIN ANALYZE
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT
    o.order_id,
    o.order_number,
    o.total_amount,
    o.created_at,
    c.first_name,
    c.last_name
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.status = 'delivered'
    AND o.created_at >= '2024-01-01'
ORDER BY o.total_amount DESC
LIMIT 100;

-- Optimized version of the same query with proper indexes
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT
    o.order_id,
    o.order_number,
    o.total_amount,
    o.created_at,
    c.first_name,
    c.last_name
FROM orders o
USE INDEX (idx_orders_status_created_total)
JOIN customers c ON o.customer_id = c.customer_id
USE INDEX (idx_customers_id)
WHERE o.status = 'delivered'
    AND o.created_at >= '2024-01-01'
ORDER BY o.total_amount DESC
LIMIT 100;

-- Query to identify table bloat and vacuum needs
SELECT
    schemaname,
    tablename,
    ROUND(CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS bloat,
    CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
    CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
    CASE WHEN relpages < otta THEN 0 ELSE (bs*(relpages-otta))::bigint END AS wastedsize
FROM (
    SELECT
        schemaname, tablename, cc.reltuples, cc.relpages, bs,
        CEIL((cc.reltuples*((datahdr+ma-
            (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta
    FROM (
        SELECT
            ma,bs,schemaname,tablename,
            (datawidth+(hdr+ma-(CASE WHEN hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
            (maxfracsum*(nullhdr+ma-(CASE WHEN nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
        FROM (
            SELECT
                schemaname, tablename, hdr, ma, bs,
                SUM((1-null_frac)*avg_width) AS datawidth,
                MAX(null_frac) AS maxfracsum,
                hdr+(
                    SELECT 1+COUNT(*)*(8-CASE WHEN avg_width<=248 THEN 1 ELSE 8 END)
                    FROM pg_stats s2
                    WHERE s2.schemaname=s.schemaname AND s2.tablename=s.tablename AND null_frac<>0
                ) AS nullhdr
            FROM pg_stats s, (
                SELECT
                    (SELECT current_setting('block_size')::numeric) AS bs,
                    CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
                    CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
                FROM (SELECT version() AS v) AS foo
            ) AS constants
            WHERE s.schemaname='public'
            GROUP BY 1,2,3,4,5
        ) AS foo
    ) AS rs
    JOIN pg_class cc ON cc.relname = rs.tablename
    JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
) AS sml
WHERE bloat > 1.5
ORDER BY wastedbytes DESC;

-- Automatic vacuum and analyze configuration recommendation
SELECT
    schemaname,
    tablename,
    n_tup_ins,
    n_tup_upd,
    n_tup_del,
    n_live_tup,
    n_dead_tup,
    last_vacuum,
    last_autovacuum,
    last_analyze,
    last_autoanalyze,
    vacuum_count,
    autovacuum_count,
    analyze_count,
    autoanalyze_count,
    ROUND(n_dead_tup::numeric / NULLIF(n_live_tup, 0) * 100, 2) as dead_tuple_ratio,
    CASE
        WHEN n_dead_tup::numeric / NULLIF(n_live_tup, 0) > 0.1 THEN 'Consider autovacuum tuning'
        ELSE 'Healthy'
    END as recommendation
FROM pg_stat_user_tables
ORDER BY dead_tuple_ratio DESC;

-- Create indexes for frequently used WHERE clauses
CREATE INDEX CONCURRENTLY idx_orders_status_customer_date
ON orders(status, customer_id, created_at DESC)
WHERE status IN ('pending', 'processing', 'shipped');

CREATE INDEX CONCURRENTLY idx_products_category_status_price
ON products(category_id, status, price DESC)
WHERE status = 'active' AND stock_quantity > 0;

CREATE INDEX CONCURRENTLY idx_customers_active_total_spent
ON customers(is_active, total_spent DESC)
WHERE is_active = true AND total_spent > 0;

-- Composite index for complex queries
CREATE INDEX CONCURRENTLY idx_orders_composite
ON orders(status, created_at DESC, total_amount DESC, customer_id);

-- Partial indexes for specific conditions
CREATE INDEX CONCURRENTLY idx_orders_high_value
ON orders(created_at, customer_id)
WHERE total_amount > 1000;

CREATE INDEX CONCURRENTLY idx_products_featured
ON products(created_at DESC)
WHERE featured = true AND status = 'active';

-- Index for JOIN operations
CREATE INDEX CONCURRENTLY idx_order_items_product_orders
ON order_items(product_id, order_id, quantity DESC);

-- Covering indexes to avoid table lookups
CREATE INDEX CONCURRENTLY idx_orders_covering
ON orders(customer_id, created_at DESC)
INCLUDE (order_id, total_amount, status);

-- Performance monitoring query
SELECT
    query,
    calls,
    total_exec_time,
    mean_exec_time,
    rows,
    100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
WHERE calls > 10
ORDER BY mean_exec_time DESC
LIMIT 20;

📝 数据迁移脚本

🔴 complex

数据库迁移和数据转换脚本

-- Data Migration Scripts and Database Versioning

-- Migration versioning table
CREATE TABLE IF NOT EXISTS schema_migrations (
    version VARCHAR(255) PRIMARY KEY,
    applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    description TEXT
);

-- Example migration: Add user preferences table
-- Version: 20240113_001_add_user_preferences
INSERT INTO schema_migrations (version, description) VALUES ('20240113_001_add_user_preferences', 'Add user preferences table');

CREATE TABLE IF NOT EXISTS user_preferences (
    preference_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    user_id BIGINT NOT NULL,
    preference_key VARCHAR(100) NOT NULL,
    preference_value TEXT,
    preference_type ENUM('string', 'integer', 'boolean', 'json') DEFAULT 'string',
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE,
    UNIQUE KEY unique_user_preference (user_id, preference_key),
    INDEX idx_user_id (user_id),
    INDEX idx_preference_key (_preference_key)
);

-- Migration: Add audit trails for critical tables
-- Version: 20240113_002_add_audit_trails
INSERT INTO schema_migrations (version, description) VALUES ('20240113_002_add_audit_trails', 'Add audit trail tables');

CREATE TABLE IF NOT EXISTS audit_trail (
    audit_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    table_name VARCHAR(100) NOT NULL,
    record_id VARCHAR(100) NOT NULL,
    action ENUM('INSERT', 'UPDATE', 'DELETE') NOT NULL,
    old_values JSON,
    new_values JSON,
    changed_by VARCHAR(255),
    changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    ip_address VARCHAR(45),
    user_agent TEXT,
    INDEX idx_table_record (table_name, record_id),
    INDEX idx_changed_at (changed_at),
    INDEX idx_changed_by (changed_by)
);

-- Trigger function for audit trail
DELIMITER //
CREATE TRIGGER trigger_audit_orders
    AFTER INSERT OR UPDATE OR DELETE ON orders
    FOR EACH ROW
BEGIN
    IF TG_OP = 'INSERT' THEN
        INSERT INTO audit_trail (
            table_name, record_id, action, new_values, changed_by
        ) VALUES (
            'orders', NEW.order_id, 'INSERT',
            JSON_OBJECT(
                'order_id', NEW.order_id,
                'customer_id', NEW.customer_id,
                'total_amount', NEW.total_amount,
                'status', NEW.status
            ),
            NEW.customer_id
        );
    ELSEIF TG_OP = 'UPDATE' THEN
        INSERT INTO audit_trail (
            table_name, record_id, action, old_values, new_values, changed_by
        ) VALUES (
            'orders', NEW.order_id, 'UPDATE',
            JSON_OBJECT(
                'order_id', OLD.order_id,
                'status', OLD.status,
                'total_amount', OLD.total_amount
            ),
            JSON_OBJECT(
                'order_id', NEW.order_id,
                'status', NEW.status,
                'total_amount', NEW.total_amount
            ),
            NEW.customer_id
        );
    ELSEIF TG_OP = 'DELETE' THEN
        INSERT INTO audit_trail (
            table_name, record_id, action, old_values, changed_by
        ) VALUES (
            'orders', OLD.order_id, 'DELETE',
            JSON_OBJECT(
                'order_id', OLD.order_id,
                'customer_id', OLD.customer_id,
                'total_amount', OLD.total_amount,
                'status', OLD.status
            ),
            OLD.customer_id
        );
    END IF;
END//
DELIMITER ;

-- Data transformation script: Standardize phone numbers
-- Version: 20240113_003_standardize_phone_numbers
INSERT INTO schema_migrations (version, description) VALUES ('20240113_003_standardize_phone_numbers', 'Standardize phone number formats');

UPDATE customers
SET phone = REGEXP_REPLACE(
    REGEXP_REPLACE(
        REGEXP_REPLACE(phone, '[^0-9+]', '', 'g'),
        '^1',
        ''
    ),
    '^',
    '+1'
)
WHERE phone IS NOT NULL
    AND phone REGEXP '[0-9]';

-- Data migration: Migrate legacy user roles to new permission system
-- Version: 20240113_004_migrate_user_roles
INSERT INTO schema_migrations (version, description) VALUES ('20240113_004_migrate_user_roles', 'Migrate legacy user roles to new permissions');

-- Create new permissions table
CREATE TABLE IF NOT EXISTS permissions (
    permission_id INT PRIMARY KEY AUTO_INCREMENT,
    permission_name VARCHAR(100) UNIQUE NOT NULL,
    description TEXT,
    module VARCHAR(50),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Insert default permissions
INSERT INTO permissions (permission_name, description, module) VALUES
('users.read', 'Read user information', 'users'),
('users.write', 'Create and update users', 'users'),
('users.delete', 'Delete users', 'users'),
('orders.read', 'Read order information', 'orders'),
('orders.write', 'Create and update orders', 'orders'),
('orders.delete', 'Delete orders', 'orders'),
('products.read', 'Read product information', 'products'),
('products.write', 'Create and update products', 'products'),
('products.delete', 'Delete products', 'products');

-- Create role permissions mapping table
CREATE TABLE IF NOT EXISTS role_permissions (
    role_id INT,
    permission_id INT,
    PRIMARY KEY (role_id, permission_id),
    FOREIGN KEY (role_id) REFERENCES roles(role_id) ON DELETE CASCADE,
    FOREIGN KEY (permission_id) REFERENCES permissions(permission_id) ON DELETE CASCADE
);

-- Migrate existing role permissions
INSERT INTO role_permissions (role_id, permission_id)
SELECT r.role_id, p.permission_id
FROM roles r
CROSS JOIN permissions p
WHERE
    (r.role_name = 'admin' AND p.module IN ('users', 'orders', 'products')) OR
    (r.role_name = 'moderator' AND p.module IN ('users', 'orders')) OR
    (r.role_name = 'user' AND p.permission_name IN ('orders.read', 'products.read'));

-- Data cleaning script: Remove duplicate records
-- Version: 20240113_005_remove_duplicates
INSERT INTO schema_migrations (version, description) VALUES ('20240113_005_remove_duplicates', 'Remove duplicate customer records');

-- Find and remove duplicate customers (based on email)
WITH duplicate_customers AS (
    SELECT
        email,
        MIN(customer_id) as keep_id,
        GROUP_CONCAT(customer_id) as all_ids,
        COUNT(*) as duplicate_count
    FROM customers
    WHERE email IS NOT NULL
    GROUP BY email
    HAVING COUNT(*) > 1
)
UPDATE customers c
SET is_active = FALSE
WHERE c.email IN (SELECT email FROM duplicate_customers)
    AND c.customer_id NOT IN (SELECT keep_id FROM duplicate_customers);

-- Data enrichment: Add geolocation data based on addresses
-- Version: 20240113_006_add_geolocation
INSERT INTO schema_migrations (version, description) VALUES ('20240113_006_add_geolocation', 'Add geolocation data to addresses');

ALTER TABLE addresses ADD COLUMN IF NOT EXISTS latitude DECIMAL(10,8);
ALTER TABLE addresses ADD COLUMN IF NOT EXISTS longitude DECIMAL(11,8);
ALTER TABLE addresses ADD COLUMN IF NOT EXISTS country_code VARCHAR(2);
ALTER TABLE addresses ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);

-- Update addresses with country codes and timezones
UPDATE addresses a
SET
    country_code = CASE
        WHEN a.country LIKE 'United States%' THEN 'US'
        WHEN a.country LIKE 'Canada%' THEN 'CA'
        WHEN a.country LIKE 'United Kingdom%' THEN 'GB'
        WHEN a.country LIKE 'Germany%' THEN 'DE'
        WHEN a.country LIKE 'France%' THEN 'FR'
        ELSE NULL
    END,
    timezone = CASE
        WHEN a.country LIKE 'United States%' AND a.province IN ('California', 'CA') THEN 'America/Los_Angeles'
        WHEN a.country LIKE 'United States%' AND a.province IN ('New York', 'NY') THEN 'America/New_York'
        WHEN a.country LIKE 'Canada%' AND a.province IN ('Ontario', 'ON') THEN 'America/Toronto'
        ELSE NULL
    END
WHERE country_code IS NULL;

-- Data archival: Archive old orders
-- Version: 20240113_007_archive_old_orders
INSERT INTO schema_migrations (version, description) VALUES ('20240113_007_archive_old_orders', 'Archive orders older than 2 years');

-- Create archive table
CREATE TABLE IF NOT EXISTS orders_archive (
    LIKE orders INCLUDING ALL
);

-- Archive old orders (older than 2 years and completed)
INSERT INTO orders_archive
SELECT * FROM orders
WHERE status IN ('delivered', 'cancelled', 'refunded')
    AND created_at < DATE_SUB(CURRENT_DATE, INTERVAL 2 YEAR);

-- Delete archived orders from main table
DELETE FROM orders
WHERE order_id IN (SELECT order_id FROM orders_archive);

-- Data validation script: Validate data integrity
-- Version: 20240113_008_validate_data_integrity
INSERT INTO schema_migrations (version, description) VALUES ('20240113_008_validate_data_integrity', 'Validate data integrity and fix inconsistencies');

-- Find orders with invalid customer references
SELECT 'Invalid customer references' as issue_type, COUNT(*) as count
FROM orders o
LEFT JOIN customers c ON o.customer_id = c.customer_id
WHERE o.customer_id IS NOT NULL AND c.customer_id IS NULL;

-- Fix orphaned order items
UPDATE order_items oi
SET product_id = NULL
WHERE product_id NOT IN (SELECT product_id FROM products);

-- Fix negative inventory quantities
UPDATE products
SET stock_quantity = 0
WHERE stock_quantity < 0;

UPDATE product_variants
SET inventory_quantity = 0
WHERE inventory_quantity < 0;

-- Data backup: Create backup of critical tables before major changes
-- Version: 20240113_009_create_backups
INSERT INTO schema_migrations (version, description) VALUES ('20240113_009_create_backups', 'Create backups of critical tables');

CREATE TABLE customers_backup_20240113 AS SELECT * FROM customers;
CREATE TABLE orders_backup_20240113 AS SELECT * FROM orders;
CREATE TABLE products_backup_20240113 AS SELECT * FROM products;

-- Rollback script: In case migration needs to be reverted
-- Version: 20240113_rollback
INSERT INTO schema_migrations (version, description) VALUES ('20240113_rollback', 'Rollback script available');

-- Note: In a real migration system, you would have specific rollback procedures
-- This is a placeholder showing the concept

-- Performance optimization after migration
-- Version: 20240113_010_optimize_post_migration
INSERT INTO schema_migrations (version, description) VALUES ('20240113_010_optimize_post_migration', 'Optimize database after migration');

-- Update table statistics
ANALYZE TABLE customers;
ANALYZE TABLE orders;
ANALYZE TABLE products;
ANALYZE TABLE user_preferences;
ANALYZE TABLE audit_trail;

-- Rebuild indexes if needed
OPTIMIZE TABLE customers;
OPTIMIZE TABLE orders;
OPTIMIZE TABLE products;

-- Migration status report
SELECT
    version,
    description,
    applied_at,
    CASE
        WHEN applied_at >= DATE_SUB(NOW(), INTERVAL 1 DAY) THEN 'Recent'
        WHEN applied_at >= DATE_SUB(NOW(), INTERVAL 7 DAY) THEN 'This Week'
        ELSE 'Older'
    END as status
FROM schema_migrations
ORDER BY applied_at DESC;