🎯 Recommended Samples

Balanced sample collections from various categories for you to explore

← Back to Tools ← Back to Samples

Kubernetes YAML Samples

Complete Kubernetes YAML manifests for Deployment, Service, ConfigMap, Secret, Ingress and other essential resources

💻 Basic Kubernetes Deployment

🟢 simple ⭐

Simple deployment manifest for running a web application with multiple replicas

⏱️ 10 min 🏷️ kubernetes, deployment, containers

Prerequisites: Kubernetes basics, YAML syntax

# Basic Kubernetes Deployment Example
# This file creates a simple deployment for a web application

apiVersion: apps/v1
kind: Deployment
metadata:
  name: web-app-deployment
  namespace: default
  labels:
    app: web-app
    version: v1
    environment: production
spec:
  replicas: 3  # Number of pod replicas
  selector:
    matchLabels:
      app: web-app
  template:
    metadata:
      labels:
        app: web-app
        version: v1
    spec:
      containers:
      - name: web-app-container
        image: nginx:1.21-alpine  # Container image
        ports:
        - containerPort: 80
          name: http
          protocol: TCP
        resources:
          requests:
            memory: "64Mi"
            cpu: "250m"  # 0.25 CPU cores
          limits:
            memory: "128Mi"
            cpu: "500m"  # 0.5 CPU cores
        livenessProbe:
          httpGet:
            path: /
            port: 80
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        readinessProbe:
          httpGet:
            path: /
            port: 80
          initialDelaySeconds: 5
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 3
        env:
        - name: APP_NAME
          value: "MyWebApp"
        - name: APP_VERSION
          value: "1.0.0"
        volumeMounts:
        - name: config-volume
          mountPath: /etc/config
          readOnly: true
      volumes:
      - name: config-volume
        configMap:
          name: web-app-config
      restartPolicy: Always
      terminationGracePeriodSeconds: 30
      dnsPolicy: ClusterFirst
      securityContext:
        runAsNonRoot: true
        runAsUser: 101
        fsGroup: 101

---
# Service to expose the deployment
apiVersion: v1
kind: Service
metadata:
  name: web-app-service
  namespace: default
  labels:
    app: web-app
spec:
  type: ClusterIP  # Internal service
  ports:
  - port: 80
    targetPort: 80
    protocol: TCP
    name: http
  selector:
    app: web-app

---
# ConfigMap for application configuration
apiVersion: v1
kind: ConfigMap
metadata:
  name: web-app-config
  namespace: default
  labels:
    app: web-app
data:
  app.properties: |
    server.port=8080
    app.name=WebApp
    app.version=1.0.0
    logging.level.root=INFO

  nginx.conf: |
    server {
        listen 80;
        server_name localhost;

        location / {
            root /usr/share/nginx/html;
            index index.html;
        }

        location /api {
            proxy_pass http://backend:3000;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
        }

        location /health {
            access_log off;
            return 200 "healthy\n";
            add_header Content-Type text/plain;
        }
    }

💻 Node.js Application with Database

🟡 intermediate ⭐⭐⭐

Complete Node.js application setup with PostgreSQL database, environment variables, and health checks

⏱️ 25 min 🏷️ kubernetes, nodejs, database, microservices

Prerequisites: Kubernetes basics, Node.js, Database concepts, HPA

# Node.js Application with PostgreSQL Database
# This example shows a complete web application setup

apiVersion: v1
kind: Namespace
metadata:
  name: node-app

---
# ConfigMap for application settings
apiVersion: v1
kind: ConfigMap
metadata:
  name: node-app-config
  namespace: node-app
data:
  NODE_ENV: "production"
  LOG_LEVEL: "info"
  API_PORT: "3000"
  REDIS_HOST: "redis-service"
  REDIS_PORT: "6379"

  app.conf: |
    {
      "server": {
        "port": 3000,
        "host": "0.0.0.0"
      },
      "database": {
        "host": "$DATABASE_HOST",
        "port": "$DATABASE_PORT",
        "name": "$DATABASE_NAME",
        "ssl": true
      },
      "redis": {
        "host": "$REDIS_HOST",
        "port": "$REDIS_PORT",
        "ttl": 3600
      },
      "logging": {
        "level": "$LOG_LEVEL",
        "format": "json"
      }
    }

---
# Secret for sensitive data
apiVersion: v1
kind: Secret
metadata:
  name: node-app-secrets
  namespace: node-app
type: Opaque
data:
  # Base64 encoded values
  DATABASE_HOST: cG9zdGdyZXNxbS1zZXJ2aWNl  # postgresql-service
  DATABASE_PORT: NTQzMw==  # 5432
  DATABASE_NAME: bm9kZWFwcGRi  # nodeappdb
  DATABASE_USER: YXBwX3VzZXI=  # app_user
  DATABASE_PASSWORD: c3VwZXJfc2VjcmV0X3Bhc3M=  # super_secr8t_pass
  JWT_SECRET: bm90X3NvX3NlY3JldF9qd3Rfa2V5  # not_so_secret_jwt_key
  REDIS_PASSWORD: cmVkaXNfcGFzc3dvcmQ=  # redis_password

---
# PostgreSQL StatefulSet
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: postgresql
  namespace: node-app
spec:
  serviceName: postgresql-service
  replicas: 1
  selector:
    matchLabels:
      app: postgresql
  template:
    metadata:
      labels:
        app: postgresql
    spec:
      containers:
      - name: postgresql
        image: postgres:15-alpine
        env:
        - name: POSTGRES_DB
          valueFrom:
            configMapKeyRef:
              name: node-app-config
              key: DATABASE_NAME
        - name: POSTGRES_USER
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: DATABASE_USER
        - name: POSTGRES_PASSWORD
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: DATABASE_PASSWORD
        - name: PGDATA
          value: /var/lib/postgresql/data/pgdata
        ports:
        - containerPort: 5432
          name: postgresql
        volumeMounts:
        - name: postgresql-storage
          mountPath: /var/lib/postgresql/data
        - name: postgresql-config
          mountPath: /etc/postgresql/postgresql.conf
          subPath: postgresql.conf
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          exec:
            command:
            - pg_isready
            - -U
            - postgres
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          exec:
            command:
            - pg_isready
            - -U
            - postgres
          initialDelaySeconds: 5
          periodSeconds: 5
      volumes:
      - name: postgresql-config
        configMap:
          name: postgresql-config
  volumeClaimTemplates:
  - metadata:
      name: postgresql-storage
    spec:
      accessModes: ["ReadWriteOnce"]
      resources:
        requests:
          storage: 10Gi
      storageClassName: fast-ssd  # Adjust based on your cluster

---
# PostgreSQL Service
apiVersion: v1
kind: Service
metadata:
  name: postgresql-service
  namespace: node-app
  labels:
    app: postgresql
spec:
  type: ClusterIP
  ports:
  - port: 5432
    targetPort: 5432
    protocol: TCP
    name: postgresql
  selector:
    app: postgresql

---
# PostgreSQL Configuration
apiVersion: v1
kind: ConfigMap
metadata:
  name: postgresql-config
  namespace: node-app
data:
  postgresql.conf: |
    # PostgreSQL Configuration
    listen_addresses = '*'
    port = 5432
    max_connections = 100
    shared_buffers = 128MB
    effective_cache_size = 4GB
    maintenance_work_mem = 64MB
    checkpoint_completion_target = 0.9
    wal_buffers = 16MB
    default_statistics_target = 100
    random_page_cost = 1.1
    effective_io_concurrency = 200
    work_mem = 4MB
    min_wal_size = 1GB
    max_wal_size = 4GB
    logging_collector = on
    log_directory = 'pg_log'
    log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
    log_statement = 'all'
    log_min_duration_statement = 1000

---
# Redis Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis
  namespace: node-app
spec:
  replicas: 1
  selector:
    matchLabels:
      app: redis
  template:
    metadata:
      labels:
        app: redis
    spec:
      containers:
      - name: redis
        image: redis:7-alpine
        command:
        - redis-server
        - --requirepass
        - $(REDIS_PASSWORD)
        env:
        - name: REDIS_PASSWORD
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: REDIS_PASSWORD
        ports:
        - containerPort: 6379
          name: redis
        resources:
          requests:
            memory: "128Mi"
            cpu: "100m"
          limits:
            memory: "256Mi"
            cpu: "200m"
        livenessProbe:
          exec:
            command:
            - redis-cli
            - -a
            - $(REDIS_PASSWORD)
            - ping
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          exec:
            command:
            - redis-cli
            - -a
            - $(REDIS_PASSWORD)
            - ping
          initialDelaySeconds: 5
          periodSeconds: 5

---
# Redis Service
apiVersion: v1
kind: Service
metadata:
  name: redis-service
  namespace: node-app
  labels:
    app: redis
spec:
  type: ClusterIP
  ports:
  - port: 6379
    targetPort: 6379
    protocol: TCP
    name: redis
  selector:
    app: redis

---
# Node.js Application Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
  name: node-app
  namespace: node-app
  labels:
    app: node-app
    version: v1
spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  selector:
    matchLabels:
      app: node-app
  template:
    metadata:
      labels:
        app: node-app
        version: v1
    spec:
      containers:
      - name: node-app
        image: node-app:1.0.0  # Replace with your actual image
        envFrom:
        - configMapRef:
            name: node-app-config
        env:
        - name: DATABASE_HOST
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: DATABASE_HOST
        - name: DATABASE_PORT
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: DATABASE_PORT
        - name: DATABASE_NAME
          valueFrom:
            configMapKeyRef:
              name: node-app-config
              key: DATABASE_NAME
        - name: DATABASE_USER
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: DATABASE_USER
        - name: DATABASE_PASSWORD
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: DATABASE_PASSWORD
        - name: JWT_SECRET
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: JWT_SECRET
        - name: REDIS_PASSWORD
          valueFrom:
            secretKeyRef:
              name: node-app-secrets
              key: REDIS_PASSWORD
        ports:
        - containerPort: 3000
          name: http
        resources:
          requests:
            memory: "256Mi"
            cpu: "200m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        readinessProbe:
          httpGet:
            path: /ready
            port: 3000
          initialDelaySeconds: 10
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 3
        startupProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 3
          failureThreshold: 30
        volumeMounts:
        - name: config-volume
          mountPath: /app/config
          readOnly: true
        - name: logs-volume
          mountPath: /app/logs
      volumes:
      - name: config-volume
        configMap:
          name: node-app-config
          items:
          - key: app.conf
            path: app.conf
      - name: logs-volume
        emptyDir: {}
      imagePullSecrets:
      - name: registry-secret  # For private Docker registry

---
# Node.js Application Service
apiVersion: v1
kind: Service
metadata:
  name: node-app-service
  namespace: node-app
  labels:
    app: node-app
spec:
  type: ClusterIP
  ports:
  - port: 80
    targetPort: 3000
    protocol: TCP
    name: http
  selector:
    app: node-app

---
# Horizontal Pod Autoscaler
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: node-app-hpa
  namespace: node-app
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: node-app
  minReplicas: 2
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
      - type: Percent
        value: 100
        periodSeconds: 15
      - type: Pods
        value: 4
        periodSeconds: 15
      selectPolicy: Max

💻 Ingress with SSL Termination

🟡 intermediate ⭐⭐⭐⭐

Advanced ingress configuration with SSL certificates, path-based routing, and load balancing

⏱️ 20 min 🏷️ kubernetes, ingress, ssl, load-balancer

Prerequisites: Kubernetes Ingress, SSL certificates, NGINX controller

# Ingress Controller with SSL Termination and Path-Based Routing
# This example shows how to expose multiple services with HTTPS

apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: web-app-ingress
  namespace: default
  annotations:
    kubernetes.io/ingress.class: "nginx"  # Use NGINX Ingress Controller
    cert-manager.io/cluster-issuer: "letsencrypt-prod"  # For automatic SSL certificates
    nginx.ingress.kubernetes.io/ssl-redirect: "true"  # Redirect HTTP to HTTPS
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
    nginx.ingress.kubernetes.io/limit-connections: "100"  # Rate limiting
    nginx.ingress.kubernetes.io/limit-rps: "50"
    nginx.ingress.kubernetes.io/limit-burst: "100"
    nginx.ingress.kubernetes.io/proxy-body-size: "50m"  # Max file upload size
    nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
    nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
    nginx.ingress.kubernetes.io/rate-limit: "100"  # Rate limiting per IP
    nginx.ingress.kubernetes.io/rate-limit-window: "1m"
    # CORS settings
    nginx.ingress.kubernetes.io/enable-cors: "true"
    nginx.ingress.kubernetes.io/cors-allow-origin: "*"
    nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, PUT, DELETE, OPTIONS"
    nginx.ingress.kubernetes.io/cors-allow-headers: "DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Authorization"
    # Custom error pages
    nginx.ingress.kubernetes.io/default-backend: "error-page-service"
spec:
  tls:
  - hosts:
    - api.example.com
    - app.example.com
    - admin.example.com
    secretName: example-com-tls  # TLS certificate secret
  rules:
  - host: api.example.com
    http:
      paths:
      - path: /api/v1
        pathType: Prefix
        backend:
          service:
            name: api-v1-service
            port:
              number: 80
      - path: /api/v2
        pathType: Prefix
        backend:
          service:
            name: api-v2-service
            port:
              number: 80
      - path: /health
        pathType: Prefix
        backend:
          service:
            name: api-health-service
            port:
              number: 80
  - host: app.example.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: frontend-service
            port:
              number: 80
      - path: /static
        pathType: Prefix
        backend:
          service:
            name: static-assets-service
            port:
              number: 80
  - host: admin.example.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: admin-dashboard-service
            port:
              number: 80

---
# Certificate for Ingress (using cert-manager)
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
  name: letsencrypt-prod
spec:
  acme:
    server: https://acme-v02.api.letsencrypt.org/directory
    email: [email protected]
    privateKeySecretRef:
      name: letsencrypt-prod-account-key
    solvers:
    - http01:
        ingress:
          class: nginx

---
# TLS Certificate Secret (if not using cert-manager)
apiVersion: v1
kind: Secret
metadata:
  name: example-com-tls
  namespace: default
type: kubernetes.io/tls
data:
  tls.crt: LS0tLS1CRUdJTi...  # Base64 encoded certificate
  tls.key: LS0tLS1CRUdJTi...  # Base64 encoded private key

---
# Backend Services for Ingress
apiVersion: v1
kind: Service
metadata:
  name: api-v1-service
  namespace: default
  labels:
    app: api
    version: v1
spec:
  type: ClusterIP
  ports:
  - port: 80
    targetPort: 3000
    protocol: TCP
    name: http
  selector:
    app: api
    version: v1

---
apiVersion: v1
kind: Service
metadata:
  name: frontend-service
  namespace: default
  labels:
    app: frontend
spec:
  type: ClusterIP
  ports:
  - port: 80
    targetPort: 80
    protocol: TCP
    name: http
  selector:
    app: frontend

---
# Network Policy for Security
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: ingress-network-policy
  namespace: default
spec:
  podSelector: {}
  policyTypes:
  - Ingress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: ingress-nginx
    - podSelector: {}
  ports:
  - protocol: TCP
    port: 80
  - protocol: TCP
    port: 443

---
# Rate Limiting ConfigMap for Ingress
apiVersion: v1
kind: ConfigMap
metadata:
  name: nginx-configuration
  namespace: ingress-nginx
  labels:
    app.kubernetes.io/name: ingress-nginx
data:
  use-proxy-protocol: "true"
  proxy-protocol-header-pattern: "^\[?([0-9a-zA-Z\.-]+)\]?:([0-9]+)$"
  limit-connections: "100"
  limit-rps: "50"
  limit-burst: "100"
  client-body-buffer-size: "64k"
  proxy-buffering: "on"
  proxy-buffer-size: "4k"
  proxy-buffers-number: "4"

---
# Custom Error Page Backend
apiVersion: v1
kind: Service
metadata:
  name: error-page-service
  namespace: default
spec:
  type: ClusterIP
  ports:
  - port: 80
    targetPort: 80
  selector:
    app: error-pages

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: error-pages
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: error-pages
  template:
    metadata:
      labels:
        app: error-pages
    spec:
      containers:
      - name: error-pages
        image: nginx:alpine
        ports:
        - containerPort: 80
        volumeMounts:
        - name: error-pages-config
          mountPath: /usr/share/nginx/html
      volumes:
      - name: error-pages-config
        configMap:
          name: error-pages-content

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: error-pages-content
  namespace: default
data:
  404.html: |
    <!DOCTYPE html>
    <html>
    <head>
        <title>404 - Page Not Found</title>
        <style>
            body { font-family: Arial, sans-serif; text-align: center; margin-top: 100px; }
            h1 { font-size: 48px; color: #e74c3c; }
            p { font-size: 18px; color: #7f8c8d; }
        </style>
    </head>
    <body>
        <h1>404</h1>
        <p>Oops! The page you're looking for doesn't exist.</p>
        <p><a href="/">Go back home</a></p>
    </body>
    </html>

  500.html: |
    <!DOCTYPE html>
    <html>
    <head>
        <title>500 - Server Error</title>
        <style>
            body { font-family: Arial, sans-serif; text-align: center; margin-top: 100px; }
            h1 { font-size: 48px; color: #e74c3c; }
            p { font-size: 18px; color: #7f8c8d; }
        </style>
    </head>
    <body>
        <h1>500</h1>
        <p>Oops! Something went wrong on our end.</p>
        <p>Please try again later or contact support.</p>
    </body>
    </html>

---
# Pod Disruption Budget for High Availability
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
  name: api-pdb
  namespace: default
spec:
  minAvailable: 2
  selector:
    matchLabels:
      app: api

---
# Resource Quota for Namespace
apiVersion: v1
kind: ResourceQuota
metadata:
  name: default-quota
  namespace: default
spec:
  hard:
    requests.cpu: "2"
    requests.memory: 4Gi
    limits.cpu: "4"
    limits.memory: 8Gi
    persistentvolumeclaims: "10"
    services: "20"
    secrets: "20"
    configmaps: "20"

💻 CronJob with Monitoring

🔴 complex ⭐⭐⭐⭐

Scheduled jobs with monitoring, alerting, and failure handling mechanisms

⏱️ 35 min 🏷️ kubernetes, cronjob, monitoring, backup

Prerequisites: Kubernetes CronJob, Prometheus monitoring, RBAC

# Kubernetes CronJob with Monitoring and Alerting
# This example shows scheduled jobs with comprehensive monitoring

apiVersion: batch/v1
kind: CronJob
metadata:
  name: data-backup-cronjob
  namespace: default
  labels:
    app: backup
    type: database
spec:
  schedule: "0 2 * * *"  # Run at 2 AM every day
  concurrencyPolicy: Forbid  # Don't run concurrent jobs
  successfulJobsHistoryLimit: 3
  failedJobsHistoryLimit: 5
  jobTemplate:
    spec:
      template:
        metadata:
          labels:
            app: backup
            type: database-backup
        spec:
          restartPolicy: OnFailure
          activeDeadlineSeconds: 3600  # 1 hour timeout
          containers:
          - name: backup-container
            image: postgres:15-alpine
            env:
            - name: PGPASSWORD
              valueFrom:
                secretKeyRef:
                  name: database-secrets
                  key: POSTGRES_PASSWORD
            - name: PGHOST
              value: "postgresql-service.default.svc.cluster.local"
            - name: PGUSER
              value: "postgres"
            - name: PGDATABASE
              value: "production"
            - name: BACKUP_DATE
              value: "$(date +%Y-%m-%d)"
            - name: S3_BUCKET
              value: "s3://company-backups/database"
            - name: SLACK_WEBHOOK
              valueFrom:
                secretKeyRef:
                  name: notifications-secrets
                  key: SLACK_WEBHOOK_URL
            command:
            - /bin/bash
            - -c
            - |
              set -e
              echo "Starting backup process at $(date)"

              # Create backup directory
              BACKUP_DIR="/tmp/backup-$(date +%Y%m%d-%H%M%S)"
              mkdir -p "$BACKUP_DIR"

              # Database backup
              echo "Creating database backup..."
              pg_dump -h "$PGHOST" -U "$PGUSER" -d "$PGDATABASE" > "$BACKUP_DIR/database_$(date +%Y%m%d).sql"

              # Compress backup
              echo "Compressing backup..."
              gzip "$BACKUP_DIR/database_$(date +%Y%m%d).sql"

              # Upload to S3
              echo "Uploading to S3..."
              aws s3 cp "$BACKUP_DIR/database_$(date +%Y%m%d).sql.gz" "$S3_BUCKET/$(date +%Y)/"

              # Verify upload
              if aws s3 ls "$S3_BUCKET/$(date +%Y)/database_$(date +%Y%m%d).sql.gz"; then
                echo "Backup completed successfully!"
                # Send success notification
                curl -X POST -H 'Content-type: application/json'                   --data '{"text":"✅ Database backup completed successfully for '$BACKUP_DATE'"}'                   "$SLACK_WEBHOOK"
              else
                echo "Backup verification failed!"
                # Send failure notification
                curl -X POST -H 'Content-type: application/json'                   --data '{"text":"❌ Database backup verification failed for '$BACKUP_DATE'"}'                   "$SLACK_WEBHOOK"
                exit 1
              fi

              # Cleanup
              rm -rf "$BACKUP_DIR"
              echo "Backup process completed at $(date)"
            resources:
              requests:
                memory: "512Mi"
                cpu: "250m"
              limits:
                memory: "1Gi"
                cpu: "500m"
            volumeMounts:
            - name: backup-temp
              mountPath: /tmp
            - name: backup-config
              mountPath: /etc/backup-config
              readOnly: true
          volumes:
          - name: backup-temp
            emptyDir: {}
          - name: backup-config
            configMap:
              name: backup-config
          # Node selector for backup jobs
          nodeSelector:
            node-type: worker
            backup-capable: "true"
          tolerations:
          - key: "backup-job"
            operator: "Equal"
            value: "true"
            effect: "NoSchedule"
          affinity:
            nodeAffinity:
              requiredDuringSchedulingIgnoredDuringExecution:
                nodeSelectorTerms:
                - matchExpressions:
                  - key: node-type
                    operator: In
                    values: ["worker"]

---
# Backup Configuration ConfigMap
apiVersion: v1
kind: ConfigMap
metadata:
  name: backup-config
  namespace: default
data:
  backup.conf: |
    # Backup Configuration
    RETENTION_DAYS=30
    S3_REGION=us-west-2
    S3_STORAGE_CLASS=STANDARD_IA
    BACKUP_TYPE=full
    ENCRYPTION_ENABLED=true

  retention-policy.sh: |
    #!/bin/bash
    # Cleanup old backups (retention policy)
    S3_BUCKET="s3://company-backups/database"
    RETENTION_DAYS=30

    aws s3 ls "$S3_BUCKET/" --recursive | while read -r line; do
      file_date=$(echo "$line" | awk '{print $1, $2}')
      file_path=$(echo "$line" | awk '{print $4}')

      file_timestamp=$(date -d"$file_date" +%s)
      current_timestamp=$(date +%s)
      age_days=$(( (current_timestamp - file_timestamp) / 86400 ))

      if [ "$age_days" -gt "$RETENTION_DAYS" ]; then
        echo "Deleting old backup: $file_path (age: $age_days days)"
        aws s3 rm "$S3_BUCKET/$file_path"
      fi
    done

---
# Service Account for Backup Jobs
apiVersion: v1
kind: ServiceAccount
metadata:
  name: backup-service-account
  namespace: default
automountServiceAccountToken: true

---
# Role for Backup Operations
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: backup-role
  namespace: default
rules:
- apiGroups: [""]
  resources: ["pods", "pods/log"]
  verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
  resources: ["jobs", "cronjobs"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: [""]
  resources: ["events"]
  verbs: ["create", "patch"]

---
# Role Binding
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: backup-role-binding
  namespace: default
subjects:
- kind: ServiceAccount
  name: backup-service-account
  namespace: default
roleRef:
  kind: Role
  name: backup-role
  apiGroup: rbac.authorization.k8s.io

---
# Monitoring Service for Backup Jobs
apiVersion: v1
kind: Service
metadata:
  name: backup-metrics
  namespace: default
  labels:
    app: backup-metrics
  annotations:
    prometheus.io/scrape: "true"
    prometheus.io/port: "9090"
    prometheus.io/path: "/metrics"
spec:
  type: ClusterIP
  ports:
  - port: 9090
    targetPort: 9090
    name: metrics
  selector:
    app: backup-metrics

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: backup-metrics
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: backup-metrics
  template:
    metadata:
      labels:
        app: backup-metrics
    spec:
      serviceAccountName: backup-service-account
      containers:
      - name: metrics-exporter
        image: prometheus-operator/prometheus-config-reloader:v0.43.2
        command:
        - /bin/sh
        - -c
        - |
          # Export backup job metrics
          while true; do
            kubectl get cronjobs -n default -o json | jq '.items[] |
            select(.metadata.name | startswith("backup")) |
            {
              job_name: .metadata.name,
              last_schedule: .status.lastScheduleTime,
              successful_jobs: .status.successfulJobsHistoryLimit,
              failed_jobs: .status.failedJobsHistoryLimit
            }'

            kubectl get jobs -n default -l type=database-backup -o json | jq '.items[] |
            {
              job_name: .metadata.name,
              start_time: .status.startTime,
              completion_time: .status.completionTime,
              succeeded: .status.succeeded,
              failed: .status.failed,
              active: .status.active
            }'

            sleep 30
          done
        ports:
        - containerPort: 9090
          name: metrics
        resources:
          requests:
            memory: "64Mi"
            cpu: "50m"
          limits:
            memory: "128Mi"
            cpu: "100m"

---
# Prometheus Rule for Backup Alerting
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  name: backup-alerting-rules
  namespace: default
  labels:
    app: prometheus-operator
spec:
  groups:
  - name: backup.rules
    rules:
    - alert: BackupJobFailed
      expr: kube_job_status_failed{job=~".*backup.*"} == 1
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: "Backup job {{ $labels.job_name }} failed"
        description: "Backup job {{ $labels.job_name }} has failed. Check the job logs for details."

    - alert: BackupJobNotRun
      expr: time() - kube_cronjob_status_last_schedule_time{job=~".*backup.*"} > 86400
      for: 15m
      labels:
        severity: warning
      annotations:
        summary: "Backup job {{ $labels.job_name }} hasn't run in 24 hours"
        description: "Backup job {{ $labels.job_name }} was scheduled but hasn't run in the last 24 hours."

    - alert: BackupJobLongRunning
      expr: kube_job_status_active{job=~".*backup.*"} > 0
      for: 2h
      labels:
        severity: warning
      annotations:
        summary: "Backup job {{ $labels.job_name }} is running for more than 2 hours"
        description: "Backup job {{ $labels.job_name }} has been active for more than 2 hours."

---
# Horizontal Pod Autoscaler for Backup Metrics
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: backup-metrics-hpa
  namespace: default
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: backup-metrics
  minReplicas: 1
  maxReplicas: 3
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70

---
# PodMonitor for Custom Metrics
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
  name: backup-pods
  namespace: default
  labels:
    app: prometheus-operator
spec:
  selector:
    matchLabels:
      type: database-backup
  namespaceSelector:
    matchNames:
    - default
  podMetricsEndpoints:
  - port: metrics
    interval: 30s
    path: /metrics