Exemplos Splunk Log Analysis

Configurações e exemplos de consultas Splunk completos para análise de logs, monitoring e inteligência operacional

💻 Consultas de Busca Splunk splunk

🟡 intermediate ⭐⭐⭐

Consultas essenciais SPL (Search Processing Language) para análise de logs e troubleshooting

⏱️ 30 min 🏷️ splunk, search, spl, log analysis
Prerequisites: Splunk knowledge, Basic understanding of log analysis
# Splunk Search Processing Language (SPL) Examples

## 1. Basic Search Queries
```splunk
# Search for error logs in application logs
index=application_logs sourcetype=app_error level=ERROR

# Filter by time range
index=web_access earliest=-1h latest=now

# Search for specific HTTP status codes
index=nginx_access status_code>=500

# Wildcard search
index=security "failed login*"
```

## 2. Field Extraction and Transformation
```splunk
# Extract fields from log messages
index=application_logs "Processing request"
| rex "Processing request (?<request_id>[w-]+) for user (?<user_id>d+)"

# Use eval to create new fields
index=web_access
| eval response_time_ms = round(response_time * 1000, 2)
| eval request_size_mb = round(request_bytes / 1024 / 1024, 2)

# Parse JSON logs
index=api_logs sourcetype=json
| spath input=_raw output=request
| fields request.method, request.endpoint, request.status

# Conditional field creation
index=database_logs
| eval severity=case(log_level="ERROR", "High", log_level="WARN", "Medium", log_level="INFO", "Low")
```

## 3. Statistical Analysis and Aggregation
```splunk
# Count events by category
index=application_logs
| stats count by level, component

# Average response time by endpoint
index=api_logs
| stats avg(response_time) as avg_response_time by endpoint
| sort -avg_response_time

# Percentile calculation
index=web_access
| stats p50(response_time) as p50, p95(response_time) as p95, p99(response_time) as p99 by uri

# Time-based aggregation
index=security_events
| bucket _time span=1h
| stats count as events by _time, event_type
| rename _time as time_bucket
```

## 4. Transaction Analysis
```splunk
# Create transactions for web sessions
index=web_access user_id=*
| transaction user_id maxpause=5m
| stats count as session_count, avg(duration) as avg_session_duration by user_id

# Track user journey
index=user_events user_id=12345
| sort _time
| streamstats count as event_num
| eval step = "Step " . event_num
| table _time, event_type, step, details

# Monitor API call chains
index=api_logs correlation_id=*
| transaction correlation_id
| stats values(endpoint) as api_calls by correlation_id
| where mvcount(api_calls) > 3
```

## 5. Alert and Anomaly Detection
```splunk
# Error rate monitoring
index=application_logs level=ERROR
| bucket _time span=5m
| stats count as errors by _time
| streamstats avg(errors) as avg_errors, stdev(errors) as stdev_errors
| eval threshold = avg_errors + (2 * stdev_errors)
| where errors > threshold
| table _time, errors, threshold

# Detect unusual login patterns
index=security_events event_type=login
| bucket _time span=1h
| stats count as logins by _time, user_id
| eventstats avg(logins) as avg_logins
| where logins > (avg_logins * 3)
| eval anomaly_type = "Unusual login activity"

# Performance degradation detection
index=api_logs response_time>0
| bucket _time span=10m
| stats avg(response_time) as avg_response_time by _time, endpoint
| streamstats avg(avg_response_time) as historical_avg by endpoint
| where avg_response_time > (historical_avg * 1.5)
```

## 6. Security Monitoring Queries
```splunk
# Failed login attempts
index=security_events event_type=login_failed
| stats count by user_id, source_ip
| where count > 10
| rename count as failed_attempts
| eval risk_score = min(count * 10, 100)

# Suspicious IP activity
index=security_events
| stats values(event_type) as activities, dc(user_id) as unique_users by source_ip
| where mvcount(activities) > 5 AND unique_users > 20
| eval risk_level = "High"

# Data access monitoring
index=data_access action=read
| stats count, values(file_name) as accessed_files by user_id
| where count > 1000
| eval concern_level = "Potential data exfiltration"

# Privilege escalation detection
index=audit_logs action=privilege_change
| bucket _time span=1h
| stats count as privilege_changes by _time, user_id
| where count > 5
| eval alert_type = "Privilege escalation"
```

## 7. Performance and Capacity Planning
```splunk
# CPU utilization trends
index=metrics host=web*
| timechart avg(cpu_percent) as CPU_Usage by host

# Memory usage analysis
index=metrics
| stats avg(memory_used) as used_memory, avg(memory_total) as total_memory by host
| eval memory_utilization = round((used_memory / total_memory) * 100, 2)
| where memory_utilization > 80
| table host, memory_utilization

# Disk space forecasting
index=metrics
| timechart avg(disk_usage_gb) as disk_used by host
| predict disk_used as prediction future_timespan=7d algorithm=llp
| table _time, disk_used, prediction

# Database connection pool monitoring
index=database_metrics
| timechart avg(active_connections), avg(max_connections) by database_server
| eval connection_utilization = (avg(active_connections) / avg(max_connections)) * 100
```

## 8. Business Intelligence and KPI
```splunk
# User registration metrics
index=user_events event_type=registration
| bucket _time span=1d
| stats count as registrations by _time
| trendline sma5(registrations) as registrations_trend
| timechart count(registrations) as daily_registrations, registrations_trend

# Conversion funnel analysis
index=user_events event_type IN (view_product, add_to_cart, purchase)
| stats count by event_type, user_id
| stats dc(user_id) as unique_users by event_type
| eventstats values(unique_users) as total_users
| eval conversion_rate = round((unique_users / total_users) * 100, 2)

# Revenue analysis
index=transactions status=completed
| bucket _time span=1h
| stats sum(amount) as revenue by _time, payment_method
| timechart sum(revenue) by payment_method

# Customer engagement score
index=user_events
| stats dc(event_id) as interactions, sum(event_value) as engagement_score by user_id
| eval engagement_level = case(engagement_score > 1000, "High", engagement_score > 500, "Medium", "Low")
| stats count by engagement_level
```

## 9. Advanced Lookup and Joins
```splunk
# Join with external lookup file
index=security_events event_type=login
| lookup user_accounts.csv user_id OUTPUT username, department
| stats count by department
| rename count as login_count

# Self-join for session analysis
index=user_events user_id=*
| stats earliest(_time) as first_event, latest(_time) as last_event, count as event_count by user_id
| eval session_duration = last_event - first_event
| where event_count > 50

# Correlate logs from different sources
index=web_access status_code=500
| join type=inner transaction_id [search index=application_logs level=ERROR]
| table _time, transaction_id, status_code, error_message
```

## 10. Machine Learning and Anomaly Detection
```splunk
| inputlookup training_data.csv
| fit StandardScaler amount, age into amount_scaled, age_scaled
| apply amount_scaled, age_scaled as "feature_1", "feature_2"
| fit IsolationForest feature_1, feature_2 into anomaly_model
| apply anomaly_model as anomaly_score

# Time series anomaly detection
index=metrics cpu_percent
| timechart span=1h avg(cpu_percent) as cpu
| fit DensityFunction cpu into cpu_density
| apply cpu_density as density_score
| where density_score < 0.1
| eval anomaly_type = "CPU utilization anomaly"

# Predictive analytics
index=server_metrics
| timechart span=1h avg(memory_usage) as memory, avg(cpu_usage) as cpu
| predict memory as predicted_memory algorithm=LLP future_timespan=4h
| predict cpu as predicted_cpu algorithm=LLP future_timespan=4h
```

💻 Configuração XML Dashboard Splunk xml

🟡 intermediate ⭐⭐⭐⭐

Configurações XML completas de dashboard para diferentes cenários de monitoring

⏱️ 40 min 🏷️ splunk, dashboard, xml, visualization
Prerequisites: Splunk dashboard knowledge, XML basics
<?xml version="1.0" encoding="UTF-8"?>
<dashboard>
  <label>Application Performance Monitoring</label>
  <description>Real-time monitoring of application performance and health</description>
  <search id="base_search">
    <query>index=application_logs</query>
    <earliest>-1h</earliest>
    <latest>now</latest>
  </search>

  <row>
    <panel>
      <title>Request Rate</title>
      <chart>
        <search>
          <query>index=web_access | timechart count as requests</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">line</option>
        <option name="charting.legend.placement">bottom</option>
      </chart>
    </panel>

    <panel>
      <title>Error Rate</title>
      <single>
        <search>
          <query>index=application_logs level=ERROR | stats count | eval error_rate = count</query>
          <earliest>-5m</earliest>
          <latest>now</latest>
        </search>
        <option name="colorBy">value</option>
        <option name="colorMode">block</option>
      </single>
    </panel>

    <panel>
      <title>Average Response Time</title>
      <chart>
        <search>
          <query>index=api_logs | timechart avg(response_time) as avg_response_time</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">area</option>
      </chart>
    </panel>
  </row>

  <row>
    <panel>
      <title>Top Error Messages</title>
      <table>
        <search>
          <query>index=application_logs level=ERROR
          | stats count by message
          | sort -count
          | head 10</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
        <option name="count">10</option>
        <option name="drilldown">none</option>
      </table>
    </panel>

    <panel>
      <title>Response Time Distribution</title>
      <chart>
        <search>
          <query>index=api_logs
          | bucket response_time span=100
          | stats count by response_time
          | rename response_time as "Response Time (ms)"</query>
          <earliest>-30m</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">column</option>
      </chart>
    </panel>
  </row>
</dashboard>

<!-- Security Operations Dashboard -->
<dashboard>
  <label>Security Operations Center</label>
  <description>Security monitoring and threat detection dashboard</description>

  <row>
    <panel>
      <title>Security Events Timeline</title>
      <chart>
        <search>
          <query>index=security_events
          | timechart count by event_type</query>
          <earliest>-24h</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">stackedcolumn</option>
      </chart>
    </panel>
  </row>

  <row>
    <panel>
      <title>Threat Intelligence Feed</title>
      <table>
        <search>
          <query>index=threat_intelligence
          | stats latest(first_seen) as last_seen, count by threat_type, severity
          | sort -last_seen</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
        <format type="color" field="severity">
          <colorPalette type="list">[#D93F3C,#F7BC38,#65A637]</colorPalette>
          <scale type="threshold">0,30,70</scale>
        </format>
      </table>
    </panel>

    <panel>
      <title>Failed Login Attempts</title>
      <map>
        <search>
          <query>index=security_events event_type=login_failed
          | iplocation source_ip
          | geostats count by source_ip</query>
          <earliest>-6h</earliest>
          <latest>now</latest>
        </search>
      </map>
    </panel>
  </row>
</dashboard>

<!-- Infrastructure Monitoring Dashboard -->
<dashboard>
  <label>Infrastructure Monitoring</label>
  <description>System resource and infrastructure health monitoring</description>

  <row>
    <panel>
      <title>CPU Utilization</title>
      <chart>
        <search>
          <query>index=metrics
          | timechart avg(cpu_percent) as CPU by host</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">line</option>
      </chart>
    </panel>

    <panel>
      <title>Memory Usage</title>
      <chart>
        <search>
          <query>index=metrics
          | timechart avg(memory_used_percent) as Memory by host</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
      </chart>
    </panel>
  </row>

  <row>
    <panel>
      <title>Disk Space Usage</title>
      <table>
        <search>
          <query>index=metrics
          | stats latest(disk_used_percent) as usage, latest(disk_free_gb) as free_gb by host, filesystem
          | eval status = case(usage > 90, "Critical", usage > 80, "Warning", "Normal")</query>
          <earliest>-5m</earliest>
          <latest>now</latest>
        </search>
        <format type="color" field="status">
          <colorPalette type="map">{"Critical":#D93F3C,"Warning":#F7BC38,"Normal":#65A637}</colorPalette>
        </format>
      </table>
    </panel>

    <panel>
      <title>Network Traffic</title>
      <chart>
        <search>
          <query>index=metrics
          | timechart sum(network_bytes_in) as Inbound, sum(network_bytes_out) as Outbound</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">area</option>
      </chart>
    </panel>
  </row>
</dashboard>

<!-- Business Intelligence Dashboard -->
<dashboard>
  <label>Business Intelligence</label>
  <description>Key business metrics and KPIs</description>

  <row>
    <panel>
      <title>Active Users</title>
      <single>
        <search>
          <query>index=user_events
          | stats dc(user_id) as active_users
          | where _time > now()-1h</query>
          <earliest>-1h</earliest>
          <latest>now</latest>
        </search>
        <option name="numberPrecision">0</option>
      </single>
    </panel>

    <panel>
      <title>Daily Revenue</title>
      <single>
        <search>
          <query>index=transactions status=completed
          | bucket _time span=1d
          | stats sum(amount) as daily_revenue
          | where _time > relative_time(now(), "-1d")</query>
          <earliest>-2d</earliest>
          <latest>now</latest>
        </search>
        <option name="unit">USD</option>
      </single>
    </panel>

    <panel>
      <title>Conversion Rate</title>
      <single>
        <search>
          <query>index=user_events event_type=registration
          | stats count as registrations
          | append [search index=user_events event_type=purchase
                   | stats count as purchases]
          | stats values(*) as *
          | eval conversion_rate = round((purchases / registrations) * 100, 2)</query>
          <earliest>-7d</earliest>
          <latest>now</latest>
        </search>
        <option name="unit">%</option>
      </single>
    </panel>
  </row>

  <row>
    <panel>
      <title>User Activity Heatmap</title>
      <heatmap>
        <search>
          <query>index=user_events
          | eval hour=strftime(_time, "%H")
          | eval day=strftime(_time, "%A")
          | stats count by hour, day</query>
          <earliest>-7d</earliest>
          <latest>now</latest>
        </search>
        <option name="heatmap.showLabels">0</option>
      </heatmap>
    </panel>

    <panel>
      <title>Feature Usage</title>
      <chart>
        <search>
          <query>index=user_events event_type=feature_used
          | stats count by feature_name
          | sort -count
          | head 10</query>
          <earliest>-7d</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">bar</option>
      </chart>
    </panel>
  </row>
</dashboard>

<!-- DevOps Monitoring Dashboard -->
<dashboard>
  <label>DevOps Monitoring</label>
  <description>Application deployment and CI/CD pipeline monitoring</description>

  <row>
    <panel>
      <title>Deployment Frequency</title>
      <chart>
        <search>
          <query>index=ci_cd event_type=deployment
          | timechart count as deployments</query>
          <earliest>-30d</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">column</option>
      </chart>
    </panel>

    <panel>
      <title>Build Success Rate</title>
      <single>
        <search>
          <query>index=ci_cd event_type=build
          | stats count as total_builds, count(eval(status="success")) as successful_builds
          | eval success_rate = round((successful_builds / total_builds) * 100, 2)</query>
          <earliest>-7d</earliest>
          <latest>now</latest>
        </search>
        <option name="unit">%</option>
      </single>
    </panel>
  </row>

  <row>
    <panel>
      <title>Code Quality Metrics</title>
      <table>
        <search>
          <query>index=code_quality
          | stats avg(test_coverage) as coverage, avg(code_complexity) as complexity by service_name
          | eval quality_score = case(coverage > 80 AND complexity < 10, "Excellent",
                                  coverage > 70 AND complexity < 15, "Good",
                                  "Needs Improvement")</query>
          <earliest>-24h</earliest>
          <latest>now</latest>
        </search>
        <format type="color" field="quality_score">
          <colorPalette type="map">{"Excellent":#65A637,"Good":#F7BC38,"Needs Improvement":#D93F3C}</colorPalette>
        </format>
      </table>
    </panel>

    <panel>
      <title>Lead Time Analysis</title>
          <search>
            <query>index=ci_cd event_type=commit
            | transaction maxpause=7d
            | rename duration as lead_time
            | timechart avg(lead_time) as avg_lead_time</query>
          <earliest>-30d</earliest>
          <latest>now</latest>
        </search>
        <option name="charting.chart">line</option>
      </chart>
    </panel>
  </row>
</dashboard>

💻 Alertas e Buscas Salvas Splunk json

🟡 intermediate ⭐⭐⭐⭐

Configuração de alertas automatizadas e buscas salvas para monitoring proativo

⏱️ 35 min 🏷️ splunk, alerts, monitoring, automation
Prerequisites: Splunk knowledge, REST API basics
{
  "saved_searches": [
    {
      "name": "High Error Rate Alert",
      "search": "index=application_logs level=ERROR | bucket _time span=5m | stats count as errors | where errors > 10",
      "dispatch": {
        "earliest_time": "-5m@m",
        "latest_time": "now",
        "time_format": "%s"
      },
      "actions": [
        {
          "email": {
            "to": "[email protected]",
            "subject": "High Error Rate Alert",
            "message": "Error rate exceeded threshold: {{results.errors}} errors in last 5 minutes"
          }
        },
        {
          "slack": {
            "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
            "channel": "#alerts",
            "message": "🚨 High error rate detected: {{results.errors}} errors in 5 minutes"
          }
        }
      ],
      "cron_schedule": "*/5 * * * *",
      "is_scheduled": true,
      "alert_type": "number of events",
      "alert_comparator": "greater than",
      "alert_threshold": "10",
      "alert_severity": "high",
      "actions": ["email", "slack"]
    },
    {
      "name": "CPU Utilization Warning",
      "search": "index=metrics host=web* | stats avg(cpu_percent) as cpu by host | where cpu > 80",
      "dispatch": {
        "earliest_time": "-5m@m",
        "latest_time": "now"
      },
      "actions": [
        {
          "email": {
            "to": "[email protected]",
            "subject": "High CPU Utilization Alert",
            "message": "Server {{results.host}} CPU utilization: {{results.cpu}}%"
          }
        }
      ],
      "cron_schedule": "*/2 * * * *",
      "is_scheduled": true,
      "alert_type": "number of events",
      "alert_threshold": "1",
      "alert_severity": "medium",
      "actions": ["email"]
    },
    {
      "name": "Security Event Anomaly",
      "search": "index=security_events event_type=login_failed | bucket _time span=1h | stats count by source_ip | where count > 50",
      "dispatch": {
        "earliest_time": "-1h@m",
        "latest_time": "now"
      },
      "actions": [
        {
          "pagerduty": {
            "service_key": "your-pagerduty-service-key",
            "severity": "high"
          }
        },
        {
          "email": {
            "to": "[email protected]",
            "subject": "Suspicious Login Activity",
            "message": "Multiple failed logins from {{results.source_ip}}: {{results.count}} attempts"
          }
        }
      ],
      "cron_schedule": "0 * * * *",
      "is_scheduled": true,
      "alert_type": "number of events",
      "alert_comparator": "greater than",
      "alert_threshold": "1",
      "alert_severity": "critical"
    },
    {
      "name": "Application Performance Degradation",
      "search": "index=api_logs | bucket _time span=10m | stats avg(response_time) as avg_response_time by endpoint | where avg_response_time > 2000",
      "dispatch": {
        "earliest_time": "-10m@m",
        "latest_time": "now"
      },
      "actions": [
        {
          "slack": {
            "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
            "channel": "#performance",
            "message": "⚠️ Performance degradation detected for {{results.endpoint}}: {{results.avg_response_time}}ms"
          }
        }
      ],
      "cron_schedule": "*/10 * * * *",
      "is_scheduled": true,
      "alert_type": "number of events",
      "alert_threshold": "1",
      "alert_severity": "medium"
    },
    {
      "name": "Disk Space Critical",
      "search": "index=metrics | stats latest(disk_used_percent) as usage by host, filesystem | where usage > 90",
      "dispatch": {
        "earliest_time": "-1m@m",
        "latest_time": "now"
      },
      "actions": [
        {
          "email": {
            "to": "[email protected]",
            "subject": "CRITICAL: Disk space alert",
            "message": "Server {{results.host}} filesystem {{results.filesystem}} is {{results.usage}}% full"
          }
        },
        {
          "script": {
            "script_file": "/opt/splunk/bin/scripts/disk_cleanup.sh",
            "script_args": ["{{results.host}}", "{{results.filesystem}}"]
          }
        }
      ],
      "cron_schedule": "*/1 * * * *",
      "is_scheduled": true,
      "alert_type": "number of events",
      "alert_threshold": "1",
      "alert_severity": "critical"
    }
  ],
  "alert_actions": {
    "email": {
      "from": "[email protected]",
      "smtp_server": "smtp.company.com",
      "smtp_port": 587,
      "use_tls": true,
      "auth_username": "[email protected]",
      "auth_password": "encrypted_password_here"
    },
    "slack": {
      "webhook_urls": {
        "alerts": "https://hooks.slack.com/services/YOUR/SLACK/ALERTS",
        "performance": "https://hooks.slack.com/services/YOUR/SLACK/PERFORMANCE",
        "security": "https://hooks.slack.com/services/YOUR/SLACK/SECURITY"
      },
      "default_channel": "#splunk-alerts"
    },
    "pagerduty": {
      "service_key": "your-pagerduty-service-key",
      "integration_key": "your-pagerduty-integration-key"
    },
    "script": {
      "scripts_dir": "/opt/splunk/bin/scripts",
      "execution_timeout": 60
    }
  },
  "escalation_policies": [
    {
      "name": "Critical Infrastructure Alerts",
      "rules": [
        {
          "trigger": "disk_space_critical OR cpu_utilization_critical",
          "actions": ["email", "pagerduty"],
          "escalation_delay": 0
        },
        {
          "trigger": "disk_space_critical AND duration > 15m",
          "actions": ["phone_call", "executive_notification"],
          "escalation_delay": 900
        }
      ]
    },
    {
      "name": "Security Alerts",
      "rules": [
        {
          "trigger": "security_event_anomaly OR high_error_rate",
          "actions": ["email", "slack", "pagerduty"],
          "escalation_delay": 0
        },
        {
          "trigger": "security_event_anomaly AND severity=high",
          "actions": ["security_team_only", "war_room"],
          "escalation_delay": 300
        }
      ]
    }
  ],
  "maintenance_windows": [
    {
      "name": "Weekend Maintenance",
      "schedule": {
        "type": "weekly",
        "days": ["saturday", "sunday"],
        "start_time": "02:00",
        "end_time": "06:00",
        "timezone": "UTC"
      },
      "suppressed_alerts": [
        "CPU Utilization Warning",
        "Application Performance Degradation"
      ],
      "critical_alerts_enabled": true
    },
    {
      "name": "Deployment Window",
      "schedule": {
        "type": "specific_dates",
        "dates": ["2024-01-15", "2024-02-01"],
        "start_time": "00:00",
        "end_time": "04:00"
      },
      "suppressed_alerts": [
        "High Error Rate Alert",
        "CPU Utilization Warning"
      ]
    }
  ],
  "report_schedules": [
    {
      "name": "Daily Operations Report",
      "search": "index=metrics | stats avg(cpu_percent) as avg_cpu, avg(memory_percent) as avg_mem by host",
      "schedule": "0 8 * * *",
      "recipients": ["[email protected]"],
      "format": "pdf",
      "subject": "Daily Infrastructure Report"
    },
    {
      "name": "Weekly Security Summary",
      "search": "index=security_events earliest=-7d latest=now | stats count by event_type",
      "schedule": "0 9 * * 1",
      "recipients": ["[email protected]", "[email protected]"],
      "format": "csv",
      "subject": "Weekly Security Summary"
    }
  ]
}

# Alert Configuration REST API Examples

# Create Alert via API
curl -k -u admin:password https://splunk.company.com:8089/servicesNS/admin/search/savedsearches   -d name="Custom Error Alert"   -d search="index=application_logs level=ERROR | bucket _time span=5m | stats count"   -d "dispatch.earliest_time"="-5m@m"   -d "dispatch.latest_time"="now"   -d "alert_type"="number of events"   -d "alert_comparator"="greater than"   -d "alert_threshold"="5"   -d "alert_severity"="3"   -d "actions.email"="1"   -d "action.email.to"="[email protected]"

# Update Alert Configuration
curl -k -u admin:password https://splunk.company.com:8089/servicesNS/admin/search/savedsearches/Custom%20Error%20Alert   -d "alert_threshold"="10"   -d "actions.slack"="1"   -d "action.slack.param.webhook_url"="https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK"

# Create Real-Time Alert
curl -k -u admin:password https://splunk.company.com:8089/servicesNS/admin/search/savedsearches   -d name="Real-Time Critical Error"   -d search="index=application_logs level=ERROR"   -d "dispatch.earliest_time"="rt"   -d "dispatch.latest_time"="rt"   -d "alert_type"="always"   -d "alert_comparator"="greater than"   -d "alert_threshold"="0"   -d "alert_severity"="6"   -d "cron_schedule"="*/1 * * * *"

# Python SDK Example for Alert Management
import splunklib.client as client

# Connect to Splunk
service = client.connect(
    host='splunk.company.com',
    port=8089,
    scheme='https',
    username='admin',
    password='your-password'
)

# Create saved search
savedsearch = service.savedsearches.create(
    name="Python API Alert",
    search="index=application_logs level=ERROR | stats count",
    alert_type='number of events',
    alert_comparator='greater than',
    alert_threshold=10,
    alert_severity=3
)

# Configure email action
savedsearch.update(
    **{
        'actions.email': '1',
        'action.email.to': '[email protected]',
        'action.email.subject': 'Splunk Alert: Error Count'
    }
)

# Schedule the search
savedsearch.update(
    cron_schedule='*/5 * * * *',
    is_scheduled=True
)

# Get all saved searches
for savedsearch in service.savedsearches:
    print(f"Name: {savedsearch.name}")
    print(f"Search: {savedsearch['search']}")
    print(f"Alert Threshold: {savedsearch.get('alert_threshold', 'N/A')}")
    print("---")

💻 Visualização de Dados Avançada Splunk splunk

🔴 complex ⭐⭐⭐⭐

Técnicas avançadas de visualização e gráficos personalizados para análise de dados

⏱️ 45 min 🏷️ splunk, visualization, charts, data analysis
Prerequisites: Advanced Splunk knowledge, JavaScript basics
# Advanced Splunk Data Visualization Examples

## 1. Custom Visualization with JSChart
```splunk
# Multi-series line chart with custom formatting
index=metrics
| timechart span=5m avg(cpu_percent) as CPU, avg(memory_percent) as Memory, avg(disk_io_percent) as DiskIO
| rename CPU as "CPU %", Memory as "Memory %", DiskIO as "Disk I/O %"
| eval time=strftime(_time, "%H:%M")
| fields time, "CPU %", "Memory %", "Disk I/O %"

# JavaScript for custom chart options
chartOptions = {
    type: 'line',
    series: [{
        name: 'CPU %',
        color: '#FF6B6B',
        lineWidth: 2
    }, {
        name: 'Memory %',
        color: '#4ECDC4',
        lineWidth: 2
    }, {
        name: 'Disk I/O %',
        color: '#45B7D1',
        lineWidth: 2
    }],
    xAxis: {
        title: 'Time'
    },
    yAxis: {
        title: 'Utilization %',
        min: 0,
        max: 100
    }
}
```

## 2. Heatmap for System Performance
```splunk
# System performance heatmap by hour and day
index=metrics
| eval hour=strftime(_time, "%H")
| eval day=strftime(_time, "%A")
| stats avg(cpu_percent) as avg_cpu by hour, day
| eval cpu_level=case(avg_cpu > 80, "Critical", avg_cpu > 60, "Warning", avg_cpu <= 60, "Normal")
| fields day, hour, cpu_level

# Custom heatmap configuration
heatmapOptions = {
    type: 'heatmap',
    colorPalette: {
        "Critical": "#D93F3C",
        "Warning": "#F7BC38",
        "Normal": "#65A637"
    },
    cellSize: {
        width: 40,
        height: 20
    }
}
```

## 3. Sankey Diagram for User Flow Analysis
```splunk
# User journey flow analysis
index=user_events user_id=*
| streamstats current=f previous=previous_event by user_id
| eval from_event=coalesce(previous_event, "START")
| eval to_event=coalesce(current, "END")
| stats count by from_event, to_event
| rename from_event as source, to_event as target, count as value

# Sankey diagram configuration
sankeyOptions = {
    type: 'sankey',
    nodeWidth: 15,
    nodePadding: 10,
    layout: 32,
    linkColor: 'gradient'
}
```

## 4. Treemap for Resource Usage
```splunk
# Resource usage treemap
index=metrics
| stats avg(cpu_percent) as cpu, avg(memory_percent) as memory, avg(disk_usage_gb) as disk by host
| eval total_resource_usage = cpu + memory + (disk * 10)
| stats values(host) as hosts, sum(total_resource_usage) as total_usage by environment
| rename environment as name, total_usage as size

# Custom treemap colors
treemapOptions = {
    type: 'treemap',
    colorRange: {
        min: '#E3F2FD',
        max: '#1565C0'
    },
    borderWidth: 2
}
```

## 5. Radar Chart for System Health
```splunk
# Multi-dimensional system health radar
index=metrics
| stats avg(cpu_percent) as cpu, avg(memory_percent) as memory,
       avg(network_in_mbps) as network, avg(disk_io_ops) as disk_io,
       avg(error_rate) as errors by host
| eval normalized_cpu = min(cpu, 100)
| eval normalized_memory = min(memory, 100)
| eval normalized_network = min(network * 2, 100)
| eval normalized_disk_io = min(disk_io * 5, 100)
| eval normalized_errors = min(errors * 10, 100)
| table host, normalized_cpu, normalized_memory, normalized_network, normalized_disk_io, normalized_errors

# Radar chart axes configuration
radarOptions = {
    type: 'radar',
    axes: ['CPU %', 'Memory %', 'Network', 'Disk I/O', 'Error Rate'],
    scale: {
        min: 0,
        max: 100
    },
    levels: 5
}
```

## 6. Advanced Time Series Forecasting
```splunk
# Sales forecasting with confidence intervals
index=sales
| timechart span=1d sum(amount) as daily_sales
| predict daily_sales as predicted_lower algorithm=LLP future_timespan=30d lower=95
| predict daily_sales as predicted_upper algorithm=LLP future_timespan=30d upper=95
| predict daily_sales as predicted algorithm=LLP future_timespan=30d
| eval actual_lower = if(_time < now(), daily_sales, null)
| eval actual_upper = if(_time < now(), daily_sales, null)

# Forecast chart with confidence bands
forecastOptions = {
    type: 'line',
    series: [
        {name: 'actual_lower', lineStyle: 'dashed'},
        {name: 'daily_sales', lineStyle: 'solid'},
        {name: 'actual_upper', lineStyle: 'dashed'},
        {name: 'predicted_lower', lineStyle: 'dotted'},
        {name: 'predicted', lineStyle: 'solid'},
        {name: 'predicted_upper', lineStyle: 'dotted'}
    ],
    legend: {
        position: 'top'
    }
}
```

## 7. Custom Gauge Charts
```splunk
# Performance score gauge
index=application_metrics
| stats avg(response_time) as avg_response,
       avg(error_rate) as avg_error_rate,
       avg(throughput) as avg_throughput
| eval response_score = max(0, min(100, 100 - (avg_response_time / 10)))
| eval error_score = max(0, min(100, 100 - (avg_error_rate * 10)))
| eval throughput_score = min(100, avg_throughput / 100)
| eval overall_score = round((response_score + error_score + throughput_score) / 3, 2)

# Gauge configuration
gaugeOptions = {
    type: 'gauge',
    ranges: {
        'Excellent': {min: 80, max: 100, color: '#65A637'},
        'Good': {min: 60, max: 80, color: '#F7BC38'},
        'Fair': {min: 40, max: 60, color: '#FF8C00'},
        'Poor': {min: 0, max: 40, color: '#D93F3C'}
    },
    threshold: 75
}
```

## 8. Network Topology Visualization
```splunk
# Service dependency network graph
index=api_logs
| eval service=case(match(endpoint, "/api/user/*"), "UserService",
                     match(endpoint, "/api/order/*"), "OrderService",
                     match(endpoint, "/api/payment/*"), "PaymentService",
                     "Unknown")
| transaction service maxpause=5s
| eval dependencies=mvfilter(match(service, "^(?!.*${service}$).*"))
| mvexpand dependencies
| stats count by service, dependencies
| rename service as source, dependencies as target, count as weight

# Network graph options
networkOptions = {
    type: 'network',
    nodeSize: 'weight',
    linkWidth: 'weight',
    colorScheme: 'category10',
    layout: 'force',
    physics: {
        enabled: true,
        stabilization: {iterations: 100}
    }
}
```

## 9. Correlation Matrix
```splunk
# Feature correlation matrix for anomaly detection
index=application_metrics
| table cpu_percent, memory_percent, disk_io_percent, network_mbps, response_time, error_rate
| correlate cpu_percent, memory_percent, disk_io_percent, network_mbps, response_time, error_rate
| rename "Correlation of cpu_percent" as "CPU"
| rename "Correlation of memory_percent" as "Memory"
| rename "Correlation of disk_io_percent" as "Disk I/O"
| rename "Correlation of network_mbps" as "Network"
| rename "Correlation of response_time" as "Response Time"
| rename "Correlation of error_rate" as "Error Rate"

# Heatmap for correlation matrix
correlationMatrixOptions = {
    type: 'heatmap',
    colorScale: {
        'type': 'diverging',
        'domain': [-1, 0, 1],
        'colors': ['#D93F3C', '#FFFFFF', '#65A637']
    },
    showValues: true,
    valueFormat: '.2f'
}
```

## 10. Custom Bubble Chart
```splunk
# Multi-dimensional bubble chart for application analysis
index=application_metrics
| stats avg(response_time) as avg_response,
       avg(throughput) as avg_throughput,
       count as request_count by endpoint
| eval size = log(request_count) * 10
| eval color_code = case(avg_response < 100, '#65A637',
                          avg_response < 500, '#F7BC38',
                          '#D93F3C')
| fields endpoint, avg_throughput, avg_response, size, color_code

# Bubble chart configuration
bubbleChartOptions = {
    type: 'bubble',
    xAxis: 'avg_throughput',
    yAxis: 'avg_response',
    sizeField: 'size',
    colorField: 'color_code',
    opacity: 0.8,
    borderWidth: 1
}
```