Hugging Face Deployment Beispiele

Hugging Face Model Deployment Beispiele einschließlich Transformers, Diffusers und Gradio Anwendungen mit Docker und Cloud Deployment

💻 Hugging Face Transformers API Deployment python

🟡 intermediate ⭐⭐⭐⭐

Hugging Face Transformers Modelle als REST API mit FastAPI und Docker Containerisierung deployen

⏱️ 45 min 🏷️ huggingface, deployment, api, transformers, ml
Prerequisites: Python, Docker, Kubernetes, Machine Learning, Hugging Face
# Hugging Face Transformers API Deployment
# Deploy ML models as REST API services with FastAPI and Docker

# 1. requirements.txt
fastapi==0.104.1
uvicorn[standard]==0.24.0
transformers==4.35.2
torch==2.1.1
torchvision==0.16.1
pillow==10.1.0
numpy==1.24.4
python-multipart==0.0.6
pydantic==2.5.0
aiofiles==23.2.1
python-dotenv==1.0.0
psutil==5.9.6
prometheus-client==0.19.0

# 2. Dockerfile
FROM python:3.11-slim

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    software-properties-common \
    git \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    libgomp1 \
    wget \
    && rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

# Copy requirements first for better caching
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Create non-root user
RUN useradd --create-home --shell /bin/bash app \
    && chown -R app:app /app
USER app

# Copy application code
COPY . .

# Create cache directory for models
RUN mkdir -p /app/.cache/huggingface \
    && chmod 777 /app/.cache/huggingface

# Expose port
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

# Start the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

# 3. main.py - FastAPI application
from fastapi import FastAPI, HTTPException, File, UploadFile, Form
from fastapi.responses import JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
import torch
import torch.nn.functional as F
from transformers import (
    AutoTokenizer, AutoModel, AutoModelForSequenceClassification,
    AutoModelForQuestionAnswering, AutoModelForCausalLM,
    pipeline, Pipeline
)
import os
import psutil
import time
import uuid
import asyncio
from datetime import datetime
import logging
from prometheus_client import Counter, Histogram, Gauge, generate_latest

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Prometheus metrics
REQUEST_COUNT = Counter('hf_requests_total', 'Total requests', ['model', 'endpoint'])
REQUEST_DURATION = Histogram('hf_request_duration_seconds', 'Request duration')
GPU_MEMORY_USAGE = Gauge('hf_gpu_memory_usage_bytes', 'GPU memory usage')
MODEL_LOAD_TIME = Histogram('hf_model_load_duration_seconds', 'Model loading time')

# Initialize FastAPI
app = FastAPI(
    title="Hugging Face Model API",
    description="Deploy Hugging Face models as REST API services",
    version="1.0.0",
    docs_url="/docs",
    redoc_url="/redoc"
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Model storage
models: Dict[str, Any] = {}

# Request models
class TextClassificationRequest(BaseModel):
    text: str = Field(..., min_length=1, max_length=512)
    model_name: Optional[str] = Field(default="distilbert-base-uncased-finetuned-sst-2-english")

class QuestionAnsweringRequest(BaseModel):
    question: str = Field(..., min_length=1)
    context: str = Field(..., min_length=1)
    model_name: Optional[str] = Field(default="distilbert-base-cased-distilled-squad")

class TextGenerationRequest(BaseModel):
    prompt: str = Field(..., min_length=1)
    max_length: Optional[int] = Field(default=100, ge=1, le=512)
    temperature: Optional[float] = Field(default=1.0, ge=0.0, le=2.0)
    model_name: Optional[str] = Field(default="gpt2")

# Response models
class ClassificationResponse(BaseModel):
    label: str
    score: float
    predictions: List[Dict[str, float]]

class QAResponse(BaseModel):
    answer: str
    score: float
    start: int
    end: int

class GenerationResponse(BaseModel):
    generated_text: str
    input_tokens: int
    output_tokens: int

class ModelInfo(BaseModel):
    name: str
    type: str
    loaded: bool
    device: str
    memory_usage: str

# Model loading functions
@MODEL_LOAD_TIME.time()
def load_classification_model(model_name: str):
    """Load text classification model"""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)

    if torch.cuda.is_available():
        model = model.to('cuda')

    return {
        'model': model,
        'tokenizer': tokenizer,
        'pipeline': pipeline("text-classification", model=model, tokenizer=tokenizer),
        'device': 'cuda' if torch.cuda.is_available() else 'cpu'
    }

@MODEL_LOAD_TIME.time()
def load_qa_model(model_name: str):
    """Load question answering model"""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForQuestionAnswering.from_pretrained(model_name)

    if torch.cuda.is_available():
        model = model.to('cuda')

    return {
        'model': model,
        'tokenizer': tokenizer,
        'pipeline': pipeline("question-answering", model=model, tokenizer=tokenizer),
        'device': 'cuda' if torch.cuda.is_available() else 'cpu'
    }

@MODEL_LOAD_TIME.time()
def load_generation_model(model_name: str):
    """Load text generation model"""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    if torch.cuda.is_available():
        model = model.to('cuda')

    # Add padding token if not present
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    return {
        'model': model,
        'tokenizer': tokenizer,
        'pipeline': pipeline("text-generation", model=model, tokenizer=tokenizer),
        'device': 'cuda' if torch.cuda.is_available() else 'cpu'
    }

# API Routes
@app.on_event("startup")
async def startup_event():
    """Load default models on startup"""
    logger.info("Loading default models...")

    try:
        # Load default models
        models['classification'] = load_classification_model(
            "distilbert-base-uncased-finetuned-sst-2-english"
        )
        models['qa'] = load_qa_model(
            "distilbert-base-cased-distilled-squad"
        )
        models['generation'] = load_generation_model("gpt2")

        logger.info("Default models loaded successfully")
    except Exception as e:
        logger.error(f"Error loading models: {e}")

@app.get("/", response_model=Dict[str, str])
async def root():
    """Root endpoint"""
    return {
        "message": "Hugging Face Model API",
        "docs": "/docs",
        "health": "/health",
        "models": "/models"
    }

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    gpu_available = torch.cuda.is_available()
    gpu_memory = 0

    if gpu_available:
        gpu_memory = torch.cuda.memory_allocated()
        GPU_MEMORY_USAGE.set(gpu_memory)

    return {
        "status": "healthy",
        "timestamp": datetime.utcnow().isoformat(),
        "gpu_available": gpu_available,
        "gpu_memory_mb": gpu_memory / 1024 / 1024,
        "cpu_percent": psutil.cpu_percent(),
        "memory_percent": psutil.virtual_memory().percent,
        "loaded_models": list(models.keys())
    }

@app.get("/models", response_model=List[ModelInfo])
async def get_loaded_models():
    """Get information about loaded models"""
    model_info = []

    for model_type, model_data in models.items():
        memory_usage = 0
        if torch.cuda.is_available() and hasattr(model_data['model'], 'parameters'):
            memory_usage = sum(p.numel() * p.element_size() for p in model_data['model'].parameters())

        model_info.append(ModelInfo(
            name=model_data['pipeline'].model.name_or_path,
            type=model_type,
            loaded=True,
            device=model_data['device'],
            memory_usage=f"{memory_usage / 1024 / 1024:.1f} MB"
        ))

    return model_info

@app.post("/predict/classify", response_model=ClassificationResponse)
@REQUEST_DURATION.time()
async def classify_text(request: TextClassificationRequest):
    """Classify text using Hugging Face models"""
    REQUEST_COUNT.labels(model=request.model_name, endpoint='classify').inc()

    try:
        # Load model if not already loaded
        if 'classification' not in models or request.model_name != models['classification']['pipeline'].model.name_or_path:
            logger.info(f"Loading classification model: {request.model_name}")
            models['classification'] = load_classification_model(request.model_name)

        # Make prediction
        with torch.no_grad():
            result = models['classification']['pipeline'](request.text)

        # Convert to response format
        if isinstance(result, list) and len(result) > 0:
            prediction = result[0]
        else:
            prediction = result

        return ClassificationResponse(
            label=prediction['label'],
            score=prediction['score'],
            predictions=[prediction] if not isinstance(prediction, list) else prediction
        )

    except Exception as e:
        logger.error(f"Classification error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/predict/qa", response_model=QAResponse)
@REQUEST_DURATION.time()
async def question_answering(request: QuestionAnsweringRequest):
    """Answer questions using context"""
    REQUEST_COUNT.labels(model=request.model_name, endpoint='qa').inc()

    try:
        # Load model if not already loaded
        if 'qa' not in models or request.model_name != models['qa']['pipeline'].model.name_or_path:
            logger.info(f"Loading QA model: {request.model_name}")
            models['qa'] = load_qa_model(request.model_name)

        # Make prediction
        with torch.no_grad():
            result = models['qa']['pipeline'](
                question=request.question,
                context=request.context
            )

        return QAResponse(
            answer=result['answer'],
            score=result['score'],
            start=result['start'],
            end=result['end']
        )

    except Exception as e:
        logger.error(f"QA error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/predict/generate", response_model=GenerationResponse)
@REQUEST_DURATION.time()
async def generate_text(request: TextGenerationRequest):
    """Generate text using language models"""
    REQUEST_COUNT.labels(model=request.model_name, endpoint='generate').inc()

    try:
        # Load model if not already loaded
        if 'generation' not in models or request.model_name != models['generation']['pipeline'].model.name_or_path:
            logger.info(f"Loading generation model: {request.model_name}")
            models['generation'] = load_generation_model(request.model_name)

        # Generate text
        with torch.no_grad():
            result = models['generation']['pipeline'](
                request.prompt,
                max_length=request.max_length,
                temperature=request.temperature,
                num_return_sequences=1,
                pad_token_id=models['generation']['tokenizer'].eos_token_id
            )

        generated_text = result[0]['generated_text']
        input_length = len(models['generation']['tokenizer'].encode(request.prompt))
        output_length = len(models['generation']['tokenizer'].encode(generated_text)) - input_length

        return GenerationResponse(
            generated_text=generated_text,
            input_tokens=input_length,
            output_tokens=output_length
        )

    except Exception as e:
        logger.error(f"Generation error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/predict/zero-shot", response_model=Dict[str, float])
async def zero_shot_classification(
    text: str = Form(...),
    labels: str = Form(...),
    model_name: Optional[str] = Form("facebook/bart-large-mnli")
):
    """Zero-shot text classification"""
    try:
        # Load zero-shot classification pipeline
        classifier = pipeline("zero-shot-classification", model=model_name)

        # Parse labels from comma-separated string
        label_list = [label.strip() for label in labels.split(',')]

        # Make prediction
        result = classifier(text, label_list)

        # Return as dictionary
        return {label: score for label, score in zip(result['labels'], result['scores'])}

    except Exception as e:
        logger.error(f"Zero-shot error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/predict/sentiment")
async def sentiment_analysis(text: str = Form(...)):
    """Simple sentiment analysis"""
    try:
        # Use built-in sentiment analysis pipeline
        classifier = pipeline("sentiment-analysis")
        result = classifier(text)
        return result[0]

    except Exception as e:
        logger.error(f"Sentiment analysis error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/metrics")
async def get_metrics():
    """Prometheus metrics endpoint"""
    return generate_latest()

# Batch processing endpoint
@app.post("/predict/batch/classify")
async def batch_classify(
    texts: List[str],
    model_name: Optional[str] = "distilbert-base-uncased-finetuned-sst-2-english"
):
    """Batch text classification"""
    if len(texts) > 100:
        raise HTTPException(status_code=400, detail="Maximum 100 texts allowed per batch")

    try:
        # Load model if needed
        if 'classification' not in models or model_name != models['classification']['pipeline'].model.name_or_path:
            models['classification'] = load_classification_model(model_name)

        # Process batch
        results = models['classification']['pipeline'](texts)

        return {"results": results}

    except Exception as e:
        logger.error(f"Batch classification error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# Model management endpoints
@app.post("/models/load/{model_type}")
async def load_model(model_type: str, model_name: str = Form(...)):
    """Load a specific model"""
    try:
        if model_type == "classification":
            models['classification'] = load_classification_model(model_name)
        elif model_type == "qa":
            models['qa'] = load_qa_model(model_name)
        elif model_type == "generation":
            models['generation'] = load_generation_model(model_name)
        else:
            raise HTTPException(status_code=400, detail="Invalid model type")

        return {"message": f"Model {model_name} loaded successfully for {model_type}"}

    except Exception as e:
        logger.error(f"Model loading error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.delete("/models/unload/{model_type}")
async def unload_model(model_type: str):
    """Unload a model to free memory"""
    if model_type in models:
        del models[model_type]

        # Clear GPU cache if available
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        return {"message": f"Model {model_type} unloaded successfully"}
    else:
        raise HTTPException(status_code=404, detail="Model not found")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

# 4. docker-compose.yml for development
version: '3.8'

services:
  hf-api:
    build: .
    ports:
      - "8000:8000"
    environment:
      - TRANSFORMERS_CACHE=/app/.cache/transformers
      - HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface
    volumes:
      - hf_cache:/app/.cache
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3001:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/dashboards:/etc/grafana/provisioning/dashboards
      - ./grafana/datasources:/etc/grafana/provisioning/datasources

volumes:
  hf_cache:
  grafana_data:

# 5. Kubernetes deployment manifest
# k8s-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hf-api-deployment
  labels:
    app: hf-api
spec:
  replicas: 2
  selector:
    matchLabels:
      app: hf-api
  template:
    metadata:
      labels:
        app: hf-api
    spec:
      containers:
      - name: hf-api
        image: your-registry/hf-api:latest
        ports:
        - containerPort: 8000
        env:
        - name: TRANSFORMERS_CACHE
          value: "/app/.cache/transformers"
        - name: HUGGINGFACE_HUB_CACHE
          value: "/app/.cache/huggingface"
        resources:
          requests:
            memory: "2Gi"
            cpu: "1000m"
          limits:
            memory: "8Gi"
            cpu: "2000m"
            nvidia.com/gpu: 1
        volumeMounts:
        - name: model-cache
          mountPath: /app/.cache
        livenessProbe:
          httpGet:
            path: /health
            port: 8000
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /health
            port: 8000
          initialDelaySeconds: 5
          periodSeconds: 5
      volumes:
      - name: model-cache
        persistentVolumeClaim:
          claimName: model-cache-pvc
      nodeSelector:
        accelerator: nvidia-tesla-v100
      tolerations:
      - key: nvidia.com/gpu
        operator: Exists
        effect: NoSchedule

---
apiVersion: v1
kind: Service
metadata:
  name: hf-api-service
spec:
  selector:
    app: hf-api
  ports:
  - protocol: TCP
    port: 80
    targetPort: 8000
  type: LoadBalancer

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: model-cache-pvc
spec:
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 50Gi

💻 Hugging Face Gradio Deployment python

🟡 intermediate ⭐⭐⭐

Interaktive ML Anwendungen mit Gradio deployen inklusive Modell-Integration und Produktions-Deployment

⏱️ 40 min 🏷️ huggingface, gradio, deployment, ml, interactive
Prerequisites: Python, Gradio, Transformers, PyTorch, Docker
# Hugging Face Gradio Deployment
# Create interactive ML applications with Gradio

# 1. requirements.txt
gradio==4.7.1
transformers==4.35.2
torch==2.1.1
torchvision==0.16.1
diffusers==0.24.0
accelerate==0.24.1
pillow==10.1.0
numpy==1.24.4
opencv-python==4.8.1.78
matplotlib==3.7.2
soundfile==0.12.1
librosa==0.10.1

# 2. app.py - Multi-purpose Gradio Application
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    AutoModelForCausalLM, AutoImageProcessor, AutoModelForImageClassification,
    BlipProcessor, BlipForConditionalGeneration,
    CLIPProcessor, CLIPModel
)
from diffusers import StableDiffusionPipeline
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import librosa
import soundfile as sf
import time
import os
from typing import List, Dict, Any
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Global model cache
models = {}

def load_text_classification_model(model_name):
    """Load text classification model"""
    if 'text_classifier' not in models:
        logger.info(f"Loading text classifier: {model_name}")
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name)

        if torch.cuda.is_available():
            model = model.to('cuda')

        models['text_classifier'] = {
            'model': model,
            'tokenizer': tokenizer,
            'pipeline': None
        }

    return models['text_classifier']

def load_image_classification_model(model_name):
    """Load image classification model"""
    if 'image_classifier' not in models:
        logger.info(f"Loading image classifier: {model_name}")
        processor = AutoImageProcessor.from_pretrained(model_name)
        model = AutoModelForImageClassification.from_pretrained(model_name)

        if torch.cuda.is_available():
            model = model.to('cuda')

        models['image_classifier'] = {
            'model': model,
            'processor': processor
        }

    return models['image_classifier']

def load_image_captioning_model(model_name):
    """Load image captioning model"""
    if 'image_captioner' not in models:
        logger.info(f"Loading image captioner: {model_name}")
        processor = BlipProcessor.from_pretrained(model_name)
        model = BlipForConditionalGeneration.from_pretrained(model_name)

        if torch.cuda.is_available():
            model = model.to('cuda')

        models['image_captioner'] = {
            'model': model,
            'processor': processor
        }

    return models['image_captioner']

def load_stable_diffusion_model(model_name):
    """Load Stable Diffusion model"""
    if 'stable_diffusion' not in models:
        logger.info(f"Loading Stable Diffusion: {model_name}")
        if torch.cuda.is_available():
            pipe = StableDiffusionPipeline.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                safety_checker=None
            )
            pipe = pipe.to("cuda")
        else:
            pipe = StableDiffusionPipeline.from_pretrained(model_name)

        models['stable_diffusion'] = pipe

    return models['stable_diffusion']

# Text Functions
def classify_text(text, model_name="cardiffnlp/twitter-roberta-base-sentiment-latest"):
    """Classify text sentiment"""
    try:
        classifier = load_text_classification_model(model_name)

        # Prepare input
        inputs = classifier['tokenizer'](text, return_tensors="pt", truncation=True, padding=True)

        if torch.cuda.is_available():
            inputs = {k: v.to('cuda') for k, v in inputs.items()}

        # Make prediction
        with torch.no_grad():
            outputs = classifier['model'](**inputs)
            predictions = F.softmax(outputs.logits, dim=-1)
            confidence, predicted_class = torch.max(predictions, dim=-1)

        # Map to human-readable labels
        id2label = classifier['model'].config.id2label
        label = id2label[predicted_class.item()]

        return {
            "label": label,
            "confidence": confidence.item(),
            "details": {
                "text": text,
                "model": model_name
            }
        }

    except Exception as e:
        return {"error": str(e)}

def generate_text(prompt, model_name="gpt2", max_length=100, temperature=1.0):
    """Generate text using language models"""
    try:
        if 'text_generator' not in models:
            logger.info(f"Loading text generator: {model_name}")
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForCausalLM.from_pretrained(model_name)

            if torch.cuda.is_available():
                model = model.to('cuda')

            models['text_generator'] = {
                'model': model,
                'tokenizer': tokenizer
            }

        generator = models['text_generator']

        # Generate text
        inputs = generator['tokenizer'](prompt, return_tensors="pt")

        if torch.cuda.is_available():
            inputs = {k: v.to('cuda') for k, v in inputs.items()}

        with torch.no_grad():
            outputs = generator['model'].generate(
                inputs['input_ids'],
                max_length=max_length,
                temperature=temperature,
                do_sample=True,
                pad_token_id=generator['tokenizer'].eos_token_id
            )

        generated_text = generator['tokenizer'].decode(outputs[0], skip_special_tokens=True)

        return {
            "generated_text": generated_text,
            "prompt": prompt,
            "model": model_name,
            "parameters": {
                "max_length": max_length,
                "temperature": temperature
            }
        }

    except Exception as e:
        return {"error": str(e)}

# Image Functions
def classify_image(image, model_name="google/vit-base-patch16-224"):
    """Classify image content"""
    try:
        classifier = load_image_classification_model(model_name)

        # Process image
        if isinstance(image, np.ndarray):
            image = Image.fromarray(image)

        inputs = classifier['processor'](image, return_tensors="pt")

        if torch.cuda.is_available():
            inputs = {k: v.to('cuda') for k, v in inputs.items()}

        # Make prediction
        with torch.no_grad():
            outputs = classifier['model'](**inputs)
            logits = outputs.logits
            predicted_class_idx = logits.argmax(-1).item()
            confidence = F.softmax(logits, dim=-1).max().item()

        # Get label
        predicted_class = classifier['model'].config.id2label[predicted_class_idx]

        return {
            "label": predicted_class,
            "confidence": confidence,
            "model": model_name
        }

    except Exception as e:
        return {"error": str(e)}

def generate_image_caption(image, model_name="Salesforce/blip-image-captioning-base"):
    """Generate caption for image"""
    try:
        captioner = load_image_captioning_model(model_name)

        # Process image
        if isinstance(image, np.ndarray):
            image = Image.fromarray(image)

        inputs = captioner['processor'](image, return_tensors="pt")

        if torch.cuda.is_available():
            inputs = {k: v.to('cuda') for k, v in inputs.items()}

        # Generate caption
        with torch.no_grad():
            out = captioner['model'].generate(**inputs, max_length=50)

        caption = captioner['processor'].decode(out[0], skip_special_tokens=True)

        return {
            "caption": caption,
            "model": model_name
        }

    except Exception as e:
        return {"error": str(e)}

def generate_image_from_text(prompt, model_name="runwayml/stable-diffusion-v1-5", num_steps=20, guidance_scale=7.5):
    """Generate image from text prompt"""
    try:
        pipe = load_stable_diffusion_model(model_name)

        # Generate image
        result = pipe(
            prompt,
            num_inference_steps=num_steps,
            guidance_scale=guidance_scale,
            width=512,
            height=512
        )

        # Convert to numpy array
        image_array = np.array(result.images[0])

        return {
            "image": image_array,
            "prompt": prompt,
            "model": model_name,
            "parameters": {
                "num_steps": num_steps,
                "guidance_scale": guidance_scale
            }
        }

    except Exception as e:
        return {"error": str(e)}

def enhance_image(image, operation="enhance"):
    """Enhance image using OpenCV"""
    try:
        if isinstance(image, np.ndarray):
            img_array = image
        else:
            img_array = np.array(image)

        if img_array.shape[-1] == 3:  # RGB
            img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)

        if operation == "enhance":
            # Apply histogram equalization
            enhanced = cv2.equalizeHist(cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY))
            enhanced = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)

        elif operation == "denoise":
            # Apply denoising
            enhanced = cv2.fastNlMeansDenoisingColored(img_array, None, 10, 10, 7, 21)

        elif operation == "sharpen":
            # Apply sharpening
            kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
            enhanced = cv2.filter2D(img_array, -1, kernel)

        elif operation == "blur":
            # Apply blur
            enhanced = cv2.GaussianBlur(img_array, (15, 15), 0)

        else:
            return {"error": "Invalid operation"}

        # Convert back to RGB
        if enhanced.shape[-1] == 3:
            enhanced = cv2.cvtColor(enhanced, cv2.COLOR_BGR2RGB)

        return {
            "image": enhanced,
            "operation": operation
        }

    except Exception as e:
        return {"error": str(e)}

# Audio Functions
def analyze_audio(audio, sample_rate=None):
    """Analyze audio properties"""
    try:
        # Convert to numpy array if needed
        if isinstance(audio, tuple):
            audio_array, sr = audio
        else:
            audio_array, sr = audio, sample_rate

        if len(audio_array.shape) > 1:
            # If stereo, convert to mono
            audio_array = np.mean(audio_array, axis=1)

        # Calculate properties
        duration = len(audio_array) / sr
        rms = np.sqrt(np.mean(audio_array**2))
        zero_crossing_rate = np.mean(np.diff(np.sign(audio_array)) != 0) * sr / 2

        return {
            "duration_seconds": duration,
            "sample_rate": sr,
            "rms": float(rms),
            "zero_crossing_rate": float(zero_crossing_rate),
            "peak": float(np.max(np.abs(audio_array))),
            "shape": audio_array.shape
        }

    except Exception as e:
        return {"error": str(e)}

# Create Gradio Interface
def create_interface():
    """Create comprehensive Gradio interface"""

    # Define custom CSS
    css = """
    .gradio-container {
        max-width: 1200px;
        margin: 0 auto;
    }
    .tab-nav {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    }
    """

    with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Hugging Face Model Playground") as demo:

        gr.Markdown("""
        # 🤗 Hugging Face Model Playground

        Interactive demos of various Hugging Face models including:
        - Text classification and generation
        - Image classification and captioning
        - Image generation with Stable Diffusion
        - Audio analysis and processing

        Select a tab below to explore different model capabilities!
        """)

        with gr.Tabs():
            # Text Tab
            with gr.TabItem("📝 Text Processing"):
                with gr.Tabs():
                    with gr.TabItem("Classification"):
                        gr.Markdown("### Text Classification")
                        with gr.Row():
                            text_input = gr.Textbox(
                                label="Enter text to classify",
                                placeholder="Type or paste your text here...",
                                lines=3
                            )
                            classify_model = gr.Dropdown(
                                choices=[
                                    "cardiffnlp/twitter-roberta-base-sentiment-latest",
                                    "mrm8488/distilbert-base-uncased-finetuned-ag_news",
                                    "facebook/bart-large-mnli"
                                ],
                                value="cardiffnlp/twitter-roberta-base-sentiment-latest",
                                label="Model Selection"
                            )

                        classify_btn = gr.Button("Classify Text", variant="primary")
                        classify_output = gr.JSON(label="Classification Results")

                        # Example inputs
                        gr.Examples(
                            examples=[
                                ["This movie is absolutely fantastic! I loved every minute of it.", "cardiffnlp/twitter-roberta-base-sentiment-latest"],
                                ["The service was terrible and the food was cold.", "cardiffnlp/twitter-roberta-base-sentiment-latest"],
                                ["Today the stock market experienced significant volatility.", "mrm8488/distilbert-base-uncased-finetuned-ag_news"]
                            ],
                            inputs=[text_input, classify_model],
                            outputs=classify_output
                        )

                    with gr.TabItem("Generation"):
                        gr.Markdown("### Text Generation")
                        with gr.Row():
                            prompt_input = gr.Textbox(
                                label="Enter prompt",
                                placeholder="Start writing a story or paragraph...",
                                lines=2
                            )
                            gen_model = gr.Dropdown(
                                choices=[
                                    "gpt2",
                                    "microsoft/DialoGPT-medium",
                                    "distilgpt2"
                                ],
                                value="gpt2",
                                label="Model"
                            )
                            max_length = gr.Slider(
                                minimum=50,
                                maximum=200,
                                value=100,
                                step=10,
                                label="Max Length"
                            )
                            temperature = gr.Slider(
                                minimum=0.1,
                                maximum=2.0,
                                value=1.0,
                                step=0.1,
                                label="Temperature"
                            )

                        generate_btn = gr.Button("Generate Text", variant="primary")
                        gen_output = gr.Textbox(label="Generated Text", lines=5)

                        # Examples
                        gr.Examples(
                            examples=[
                                ["Once upon a time in a magical forest,", "gpt2", 100, 0.8],
                                ["The future of artificial intelligence will be", "distilgpt2", 150, 1.2]
                            ],
                            inputs=[prompt_input, gen_model, max_length, temperature],
                            outputs=gen_output
                        )

            # Image Tab
            with gr.TabItem("🖼️ Image Processing"):
                with gr.Tabs():
                    with gr.TabItem("Classification"):
                        gr.Markdown("### Image Classification")
                        with gr.Row():
                            image_input = gr.Image(
                                label="Upload Image",
                                type="pil"
                            )
                            img_classify_model = gr.Dropdown(
                                choices=[
                                    "google/vit-base-patch16-224",
                                    "microsoft/resnet-50",
                                    "timm/vit_base_patch16_clip_224.laion2b"
                                ],
                                value="google/vit-base-patch16-224",
                                label="Model"
                            )

                        img_classify_btn = gr.Button("Classify Image", variant="primary")
                        img_classify_output = gr.JSON(label="Classification Results")

                        # Example images
                        gr.Examples(
                            examples=[
                                [None, "google/vit-base-patch16-224"]  # User needs to upload
                            ],
                            inputs=[image_input, img_classify_model],
                            outputs=img_classify_output
                        )

                    with gr.TabItem("Captioning"):
                        gr.Markdown("### Image Captioning")
                        caption_image_input = gr.Image(
                            label="Upload Image for Captioning",
                            type="pil"
                        )
                        caption_btn = gr.Button("Generate Caption", variant="primary")
                        caption_output = gr.Textbox(label="Generated Caption")

                    with gr.TabItem("Generation"):
                        gr.Markdown("### Image Generation (Stable Diffusion)")
                        with gr.Row():
                            prompt_input = gr.Textbox(
                                label="Enter image prompt",
                                placeholder="A beautiful landscape with mountains...",
                                lines=2
                            )
                            num_steps = gr.Slider(
                                minimum=10,
                                maximum=50,
                                value=20,
                                step=5,
                                label="Inference Steps"
                            )
                            guidance_scale = gr.Slider(
                                minimum=1.0,
                                maximum=20.0,
                                value=7.5,
                                step=0.5,
                                label="Guidance Scale"
                            )

                        generate_img_btn = gr.Button("Generate Image", variant="primary")
                        generate_img_output = gr.Image(label="Generated Image")

                        # Example prompts
                        gr.Examples(
                            examples=[
                                ["A serene sunset over a mountain lake with reflections", 20, 7.5],
                                ["A futuristic city with flying cars and neon lights", 30, 10],
                                ["A cute cat wearing a wizard hat, digital art", 25, 8]
                            ],
                            inputs=[prompt_input, num_steps, guidance_scale],
                            outputs=generate_img_output
                        )

                    with gr.TabItem("Enhancement"):
                        gr.Markdown("### Image Enhancement")
                        with gr.Row():
                            enhance_image_input = gr.Image(
                                label="Upload Image to Enhance",
                                type="pil"
                            )
                            enhance_operation = gr.Radio(
                                choices=["enhance", "denoise", "sharpen", "blur"],
                                value="enhance",
                                label="Enhancement Operation"
                            )

                        enhance_btn = gr.Button("Enhance Image", variant="primary")
                        enhance_output = gr.Image(label="Enhanced Image")

            # Audio Tab
            with gr.TabItem("🎵 Audio Analysis"):
                gr.Markdown("### Audio Analysis")
                audio_input = gr.Audio(
                    label="Upload Audio File",
                    type="numpy"
                )
                analyze_btn = gr.Button("Analyze Audio", variant="primary")
                audio_output = gr.JSON(label="Audio Analysis Results")

                # Example audio info
                gr.Markdown("""
                **Supported Audio Formats:**
                - WAV
                - MP3
                - FLAC
                - OGG

                **Analysis Includes:**
                - Duration
                - Sample Rate
                - RMS Level
                - Peak Level
                - Zero Crossing Rate
                """)

        # Event Handlers
        classify_btn.click(
            classify_text,
            inputs=[text_input, classify_model],
            outputs=classify_output
        )

        generate_btn.click(
            generate_text,
            inputs=[prompt_input, gen_model, max_length, temperature],
            outputs=gen_output
        )

        img_classify_btn.click(
            classify_image,
            inputs=[image_input, img_classify_model],
            outputs=img_classify_output
        )

        caption_btn.click(
            generate_image_caption,
            inputs=[caption_image_input],
            outputs=caption_output
        )

        generate_img_btn.click(
            generate_image_from_text,
            inputs=[prompt_input, num_steps, guidance_scale],
            outputs=generate_img_output
        )

        enhance_btn.click(
            enhance_image,
            inputs=[enhance_image_input, enhance_operation],
            outputs=enhance_output
        )

        analyze_btn.click(
            analyze_audio,
            inputs=[audio_input],
            outputs=audio_output
        )

    return demo

# Launch the application
if __name__ == "__main__":
    # Create interface
    demo = create_interface()

    # Configure launch parameters
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_api=True,
        auth=None,  # Add authentication if needed
        favicon_path=None,
        ssl_verify=True,
        quiet=False,
        show_error=True,
        max_threads=40,
        enable_queue=True,
        prevent_thread_lock=False
    )

# 3. Dockerfile for Gradio App
FROM python:3.11-slim

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    software-properties-common \
    git \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    libgomp1 \
    ffmpeg \
    wget \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Create non-root user
RUN useradd --create-home --shell /bin/bash gradio \
    && chown -R gradio:gradio /app
USER gradio

# Copy application code
COPY . .

# Create cache directory
RUN mkdir -p /app/.cache/huggingface \
    && chmod 777 /app/.cache/huggingface

EXPOSE 7860

CMD ["python", "app.py"]

# 4. docker-compose.yml for Gradio with monitoring
version: '3.8'

services:
  gradio-app:
    build: .
    ports:
      - "7860:7860"
    environment:
      - TRANSFORMERS_CACHE=/app/.cache/transformers
      - HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface
      - GRADIO_SERVER_NAME=0.0.0.0
    volumes:
      - hf_cache:/app/.cache
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    depends_on:
      - redis

  redis:
    image: redis:7-alpine
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data

  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./ssl:/etc/nginx/ssl
    depends_on:
      - gradio-app

volumes:
  hf_cache:
  redis_data:

# 5. Deployment configuration for cloud services
# Railway deployment (railway.toml)
[build]
builder = "nixpacks"

[deploy]
healthcheckPath = "/"
healthcheckTimeout = 300
restartPolicyType = "on_failure"
restartPolicyMaxRetries = 10

# Hugging Face Spaces configuration (README.md for Spaces)
title: Hugging Face Model Playground
emoji: 🤗
colorFrom: "#FFD700"
sdk: gradio
python_version: "3.11"
app_file: "app.py"
pinned: false
license: "mit"

# Custom configuration
[environment]
TRANSFORMERS_CACHE="/app/.cache/transformers"
HUGGINGFACE_HUB_CACHE="/app/.cache/huggingface"

[hardware]
gpu = "T4"  # or "A10G" for more demanding models
cpu = "4"
memory = "16Gi"

[scaling]
min_instances = 1
max_instances = 3
cooldown = 30