R Language Samples

Essential R programming examples for statistical analysis and data science

💻 R Hello World r

🟢 simple

Basic R Hello World program and fundamental syntax examples

⏱️ 15 min 🏷️ r, programming, beginner, statistics, data science
Prerequisites: Basic programming concepts, Understanding of statistical concepts
# R Hello World Examples

# 1. Basic Hello World
print("Hello, World!")

# 2. Hello World with variable
message <- "Hello, World!"
print(message)
cat("Using cat:", message, "\n")

# 3. Hello World with function
say_hello <- function() {
  return("Hello, World!")
}

print(say_hello())

# 4. Hello World with function parameters
greet <- function(name) {
  return(paste("Hello,", name, "!"))
}

print(greet("World"))
print(greet("R"))

# 5. Hello World with default parameter
greet_default <- function(name = "World") {
  return(paste("Hello,", name, "!"))
}

print(greet_default())
print(greet_default("R Programming"))

# 6. Hello World with user input
# Note: readLine() is interactive, so here's a simulation
simulate_input <- function() {
  user_name <- "R User"  # Simulated input
  return(paste("Hello,", user_name, "!"))
}

print(simulate_input())

# 7. Hello World multiple times
for (i in 1:5) {
  print(paste("Hello, World!", i))
}

# 8. Hello World with vector
greetings <- c("Hello", "Bonjour", "Hola", "Ciao", "こんにちは")

for (greeting in greetings) {
  print(paste(greeting, "World!"))
}

# 9. Hello World with list
greetings_list <- list(
  english = "Hello",
  spanish = "Hola",
  french = "Bonjour",
  german = "Hallo",
  japanese = "こんにちは"
)

for (lang in names(greetings_list)) {
  print(paste(greetings_list[[lang]], "World! (", lang, ")"))
}

# 10. Hello World with data frame
hello_data <- data.frame(
  language = c("English", "Spanish", "French", "German"),
  greeting = c("Hello", "Hola", "Bonjour", "Hallo"),
  stringsAsFactors = FALSE
)

print(hello_data)
print(paste(hello_data$greeting, "World!"))

# Basic data types examples
# Numeric
numeric_var <- 42
double_var <- 3.14159

# Integer
integer_var <- 42L

# Character
string_var <- "R Programming"

# Logical
logical_var <- TRUE
logical_var2 <- FALSE

# Complex
complex_var <- 3 + 2i

# Print data types
print(paste("Numeric:", numeric_var, "Type:", class(numeric_var)))
print(paste("Double:", double_var, "Type:", class(double_var)))
print(paste("Integer:", integer_var, "Type:", class(integer_var)))
print(paste("Character:", string_var, "Type:", class(string_var)))
print(paste("Logical:", logical_var, "Type:", class(logical_var)))
print(paste("Complex:", complex_var, "Type:", class(complex_var)))

# Control flow examples
age <- 18

# If-else
if (age >= 18) {
  print("You are an adult")
} else {
  print("You are a minor")
}

# If-else if-else
score <- 85
if (score >= 90) {
  grade <- "A"
} else if (score >= 80) {
  grade <- "B"
} else if (score >= 70) {
  grade <- "C"
} else {
  grade <- "F"
}

print(paste("Score:", score, "Grade:", grade))

# For loop
fruits <- c("apple", "banana", "cherry")
for (fruit in fruits) {
  print(paste("I like", fruit))
}

# While loop
count <- 0
while (count < 3) {
  print(paste("Count:", count))
  count <- count + 1
}

# Vector operations
# Creating vectors
numbers <- c(1, 2, 3, 4, 5)
strings <- c("a", "b", "c", "d")
logicals <- c(TRUE, FALSE, TRUE, TRUE)

# Vector operations
print("Original numbers:")
print(numbers)

print("Numbers + 10:")
print(numbers + 10)

print("Numbers * 2:")
print(numbers * 2)

print("Numbers > 3:")
print(numbers > 3)

# Vector indexing
print("First element:")
print(numbers[1])

print("First three elements:")
print(numbers[1:3])

print("Elements greater than 3:")
print(numbers[numbers > 3])

# Basic statistical functions
data_vector <- c(10, 15, 12, 18, 22, 14, 16, 20, 11, 13)

print("Statistical summary:")
print(paste("Mean:", mean(data_vector)))
print(paste("Median:", median(data_vector)))
print(paste("Standard deviation:", sd(data_vector)))
print(paste("Variance:", var(data_vector)))
print(paste("Min:", min(data_vector)))
print(paste("Max:", max(data_vector)))
print(paste("Range:", range(data_vector)))
print(paste("Sum:", sum(data_vector)))

# Summary function
print("Full summary:")
summary(data_vector)

# List examples
student_list <- list(
  name = "John Doe",
  age = 25,
  grades = c(85, 92, 78, 95),
  is_student = TRUE,
  address = list(
    street = "123 Main St",
    city = "New York",
    zip = "10001"
  )
)

print("Student list:")
print(student_list)

print("Student name:")
print(student_list$name)

print("Student grades:")
print(student_list$grades)

print("Average grade:")
print(mean(student_list$grades))

print("Student address:")
print(student_list$address)

💻 R Data Frames and Data Manipulation r

🟡 intermediate ⭐⭐⭐

Comprehensive examples of data frames, dplyr, and data manipulation in R

⏱️ 30 min 🏷️ r, data frames, dplyr, tidyverse, data manipulation
Prerequisites: R basics, Understanding of data structures, Basic statistics
# R Data Frames and Data Manipulation Examples

# Load required libraries
library(dplyr)
library(tidyr)
library(readr)

# 1. Creating Data Frames
# Create a data frame from scratch
students <- data.frame(
  id = 1:5,
  name = c("Alice", "Bob", "Charlie", "Diana", "Eve"),
  age = c(22, 25, 21, 23, 24),
  grade = c(85.5, 92.0, 78.5, 88.0, 91.5),
  major = c("CS", "Math", "Physics", "CS", "Math"),
  graduated = c(FALSE, FALSE, TRUE, FALSE, FALSE),
  stringsAsFactors = FALSE
)

print("Original students data frame:")
print(students)

# 2. Basic Data Frame Operations
print("Data frame structure:")
str(students)

print("Data frame dimensions:")
print(dim(students))

print("Column names:")
print(names(students))

print("Row names:")
print(rownames(students))

print("First 3 rows:")
print(head(students, 3))

print("Last 2 rows:")
print(tail(students, 2))

# 3. Accessing Data Frame Elements
print("Access by column name:")
print(students$name)

print("Access multiple columns:")
print(students[, c("name", "age", "grade")])

print("Access by row and column:")
print(students[1, ])  # First row, all columns
print(students[1, 2]) # First row, second column
print(students[, "name"]) # All rows, name column

# 4. Filtering and Subsetting
print("Students older than 22:")
print(students[students$age > 22, ])

print("CS majors:")
print(students[students$major == "CS", ])

print("Students with grade > 85:")
print(students[students$grade > 85, ])

print("Multiple conditions (age > 22 AND CS major):")
print(students[students$age > 22 & students$major == "CS", ])

print("Age 21 OR 22:")
print(students[students$age %in% c(21, 22), ])

# 5. Adding and Modifying Columns
students$gpa <- students$grade / 20  # Convert to GPA scale
students$age_group <- ifelse(students$age >= 23, "Senior", "Junior")

print("Added GPA and age group:")
print(students)

# Modify existing column
students$grade <- round(students$grade)
print("After rounding grades:")
print(students)

# 6. Using dplyr for Data Manipulation
library(dplyr)

# Select columns
selected <- students %>%
  select(name, age, grade, major)

print("Selected columns:")
print(selected)

# Filter rows
filtered <- students %>%
  filter(age >= 22, grade > 80)

print("Filtered data (age >= 22 AND grade > 80):")
print(filtered)

# Arrange (sort)
arranged <- students %>%
  arrange(desc(grade))

print("Arranged by grade (descending):")
print(arranged)

# Mutate (add/modify columns)
mutated <- students %>%
  mutate(
    grade_letter = case_when(
      grade >= 90 ~ "A",
      grade >= 80 ~ "B",
      grade >= 70 ~ "C",
      TRUE ~ "D"
    ),
    age_squared = age ^ 2
  )

print("With letter grades and age squared:")
print(mutated)

# 7. Grouping and Summarizing
# Create a larger dataset for grouping
expanded_data <- data.frame(
  department = rep(c("CS", "Math", "Physics", "Chemistry"), each = 10),
  score = c(rnorm(10, 85, 5), rnorm(10, 88, 4), rnorm(10, 82, 6), rnorm(10, 86, 5)),
  year = rep(2021:2023, length.out = 40)
)

print("Expanded data:")
print(head(expanded_data))

# Group by department
department_stats <- expanded_data %>%
  group_by(department) %>%
  summarise(
    count = n(),
    mean_score = mean(score),
    median_score = median(score),
    sd_score = sd(score),
    min_score = min(score),
    max_score = max(score)
  )

print("Department statistics:")
print(department_stats)

# Group by multiple variables
year_dept_stats <- expanded_data %>%
  group_by(year, department) %>%
  summarise(
    count = n(),
    mean_score = mean(score),
    .groups = "drop"
  )

print("Year and department statistics:")
print(year_dept_stats)

# 8. Joining Data Frames
# Create two related data frames
student_info <- data.frame(
  id = 1:5,
  name = c("Alice", "Bob", "Charlie", "Diana", "Eve"),
  major = c("CS", "Math", "Physics", "CS", "Math")
)

course_info <- data.frame(
  course_id = c("CS101", "CS102", "MATH101", "PHYS101"),
  course_name = c("Intro to CS", "Data Structures", "Calculus", "Mechanics"),
  department = c("CS", "CS", "Math", "Physics")
)

enrollments <- data.frame(
  student_id = c(1, 1, 2, 2, 3, 4, 4, 5),
  course_id = c("CS101", "CS102", "CS101", "MATH101", "PHYS101", "CS101", "CS102", "MATH101")
)

print("Student info:")
print(student_info)

print("Course info:")
print(course_info)

print("Enrollments:")
print(enrollments)

# Inner join
enrolled_details <- enrollments %>%
  left_join(student_info, by = c("student_id" = "id")) %>%
  left_join(course_info, by = "course_id")

print("Detailed enrollments:")
print(enrolled_details)

# 9. Reshaping Data (Tidy Data)
# Create wide format data
wide_data <- data.frame(
  student = c("Alice", "Bob", "Charlie"),
  math_score = c(85, 92, 78),
  science_score = c(88, 84, 90),
  english_score = c(92, 89, 85)
)

print("Wide format data:")
print(wide_data)

# Convert to long format
long_data <- wide_data %>%
  pivot_longer(
    cols = ends_with("_score"),
    names_to = "subject",
    values_to = "score",
    names_pattern = "(.*)_score"
  )

print("Long format data:")
print(long_data)

# Convert back to wide format
wide_again <- long_data %>%
  pivot_wider(
    names_from = subject,
    values_from = score,
    names_prefix = "_score"
  )

print("Wide format again:")
print(wide_again)

# 10. Handling Missing Values
# Create data with missing values
data_with_na <- data.frame(
  id = 1:6,
  name = c("Alice", "Bob", "Charlie", "Diana", "Eve", NA),
  score = c(85, NA, 92, 88, NA, 95),
  grade = c("B", NA, "A", "B", "A", "A")
)

print("Data with missing values:")
print(data_with_na)

print("Check for missing values:")
print(is.na(data_with_na))

print("Number of missing values per column:")
print(colSums(is.na(data_with_na)))

# Remove rows with missing values
complete_cases <- data_with_na %>%
  drop_na()

print("Complete cases only:")
print(complete_cases)

# Fill missing values
filled_data <- data_with_na %>%
  mutate(
    score = ifelse(is.na(score), mean(score, na.rm = TRUE), score),
    grade = ifelse(is.na(grade), "Unknown", grade)
  )

print("Data with filled missing values:")
print(filled_data)

# 11. Data Frame Operations with Base R
# Apply functions
print("Row means for score columns:")
print(apply(wide_data[, 2:4], 1, mean))

print("Column summaries:")
print(apply(wide_data[, 2:4], 2, function(x) c(min = min(x), max = max(x), mean = mean(x))))

# Using sapply
numeric_columns <- sapply(students, is.numeric)
print("Numeric columns:")
print(numeric_columns)

# 12. Advanced Data Manipulation
# Window functions with dplyr
students_ranked <- students %>%
  arrange(desc(grade)) %>%
  mutate(
    rank = row_number(),
    percentile = percent_rank(grade),
    lead_name = lead(name),
    lag_name = lag(name)
  )

print("Students with rank and percentiles:")
print(students_ranked)

# Cumulative calculations
running_totals <- expanded_data %>%
  arrange(year) %>%
  group_by(department) %>%
  mutate(
    cumulative_mean = cummean(score),
    cumulative_sum = cumsum(score),
    running_count = row_number()
  )

print("Running calculations:")
print(head(running_totals, 10))

# 13. Exporting Data
# Save to CSV
write_csv(students, "students.csv")

# Save to different formats
write.table(students, "students.txt", sep = "\t", row.names = FALSE)
saveRDS(students, "students.rds")

print("Data exported successfully!")

# Clean up (in real usage, you might want to keep the files)
if (file.exists("students.csv")) {
  file.remove("students.csv")
}

💻 R Statistical Analysis and Visualization r

🟡 intermediate ⭐⭐⭐⭐

Statistical tests, data analysis, and visualization with ggplot2

⏱️ 35 min 🏷️ r, statistics, visualization, ggplot2, data analysis
Prerequisites: R basics, Statistics fundamentals, Understanding of ggplot2
# R Statistical Analysis and Visualization Examples

# Load required libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(corrr)

# 1. Exploratory Data Analysis (EDA)
# Create sample datasets
set.seed(123)  # For reproducible results

# Student performance data
n_students <- 200
student_data <- data.frame(
  student_id = 1:n_students,
  age = sample(18:25, n_students, replace = TRUE),
  gpa = rnorm(n_students, mean = 3.2, sd = 0.5),
  study_hours = rnorm(n_students, mean = 20, sd = 5),
  extracurricular = sample(0:10, n_students, replace = TRUE),
  sleep_hours = rnorm(n_students, mean = 7, sd = 1.5),
  gender = sample(c("Male", "Female"), n_students, replace = TRUE),
  major = sample(c("CS", "Math", "Physics", "Biology", "Chemistry"),
                 n_students, replace = TRUE, prob = c(0.3, 0.25, 0.2, 0.15, 0.1))
)

# Clip values to reasonable ranges
student_data$gpa <- pmax(0, pmin(4.0, student_data$gpa))
student_data$study_hours <- pmax(5, pmin(40, student_data$study_hours))
student_data$sleep_hours <- pmax(4, pmin(12, student_data$sleep_hours))

print("Sample student data:")
print(head(student_data))

# Basic summary statistics
print("Summary statistics:")
summary(student_data)

# 2. Descriptive Statistics
# Numerical summaries
numeric_vars <- c("age", "gpa", "study_hours", "extracurricular", "sleep_hours")

print("Descriptive statistics:")
descriptive_stats <- student_data %>%
  select(all_of(numeric_vars)) %>%
  summarise(
    across(everything(), list(
      mean = ~mean(.x, na.rm = TRUE),
      median = ~median(.x, na.rm = TRUE),
      sd = ~sd(.x, na.rm = TRUE),
      min = ~min(.x, na.rm = TRUE),
      max = ~max(.x, na.rm = TRUE),
      q25 = ~quantile(.x, 0.25, na.rm = TRUE),
      q75 = ~quantile(.x, 0.75, na.rm = TRUE)
    ))
  )

print(descriptive_stats)

# Categorical variable summaries
print("Gender distribution:")
table(student_data$gender)

print("Major distribution:")
table(student_data$major)

# 3. Basic Visualizations with ggplot2

# Histogram of GPA
p1 <- ggplot(student_data, aes(x = gpa)) +
  geom_histogram(binwidth = 0.2, fill = "steelblue", alpha = 0.7, color = "black") +
  labs(title = "Distribution of GPA", x = "GPA", y = "Frequency") +
  theme_minimal()

print(p1)

# Box plot by major
p2 <- ggplot(student_data, aes(x = major, y = gpa, fill = major)) +
  geom_boxplot(alpha = 0.7) +
  labs(title = "GPA Distribution by Major", x = "Major", y = "GPA") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = "none")

print(p2)

# Scatter plot: Study Hours vs GPA
p3 <- ggplot(student_data, aes(x = study_hours, y = gpa)) +
  geom_point(alpha = 0.6, color = "blue") +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Study Hours vs GPA", x = "Study Hours per Week", y = "GPA") +
  theme_minimal()

print(p3)

# Faceted scatter plot by gender
p4 <- ggplot(student_data, aes(x = study_hours, y = gpa, color = gender)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~gender) +
  labs(title = "Study Hours vs GPA by Gender", x = "Study Hours", y = "GPA") +
  theme_minimal()

print(p4)

# 4. Correlation Analysis
# Calculate correlations
correlation_matrix <- student_data %>%
  select(all_of(numeric_vars)) %>%
  cor()

print("Correlation matrix:")
print(round(correlation_matrix, 3))

# Visualize correlations
library(reshape2)
cor_melt <- melt(correlation_matrix)

p5 <- ggplot(cor_melt, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  geom_text(aes(label = round(value, 2)), color = "white", size = 4) +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                       midpoint = 0, limit = c(-1, 1)) +
  labs(title = "Correlation Matrix", x = "", y = "") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(p5)

# 5. Statistical Tests

# T-test: GPA difference between genders
t_test_result <- t.test(gpa ~ gender, data = student_data)
print("T-test for GPA difference by gender:")
print(t_test_result)

# ANOVA: GPA difference across majors
anova_result <- aov(gpa ~ major, data = student_data)
print("ANOVA for GPA difference by major:")
print(summary(anova_result))

# Post-hoc test
if (summary(anova_result)[[1]][["Pr(>F)"]][1] < 0.05) {
  post_hoc <- TukeyHSD(anova_result)
  print("Post-hoc Tukey test:")
  print(post_hoc)
}

# Chi-square test: Gender vs Major
chi_square_result <- chisq.test(table(student_data$gender, student_data$major))
print("Chi-square test for Gender vs Major:")
print(chi_square_result)

# 6. Linear Regression
# Simple linear regression
simple_lm <- lm(gpa ~ study_hours, data = student_data)
print("Simple linear regression (GPA ~ Study Hours):")
print(summary(simple_lm))

# Multiple linear regression
multiple_lm <- lm(gpa ~ study_hours + sleep_hours + extracurricular + age + gender, data = student_data)
print("Multiple linear regression:")
print(summary(multiple_lm))

# Model diagnostics
par(mfrow = c(2, 2))
plot(multiple_lm)
par(mfrow = c(1, 1))

# 7. Advanced Visualizations

# Density plot
p6 <- ggplot(student_data, aes(x = gpa, fill = gender)) +
  geom_density(alpha = 0.5) +
  labs(title = "GPA Density by Gender", x = "GPA", y = "Density") +
  theme_minimal()

print(p6)

# Violin plot
p7 <- ggplot(student_data, aes(x = major, y = gpa, fill = gender)) +
  geom_violin(alpha = 0.6) +
  geom_boxplot(width = 0.1, alpha = 0.8) +
  labs(title = "GPA Distribution by Major and Gender", x = "Major", y = "GPA") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(p7)

# Heatmap of correlations using corrplot style
library(ggplot2)

# Create correlation plot
cor_df <- as.data.frame(as.table(correlation_matrix))
colnames(cor_df) <- c("Var1", "Var2", "Correlation")

p8 <- ggplot(cor_df, aes(x = Var1, y = Var2, fill = Correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                       midpoint = 0, space = "Lab",
                       name = "Correlation") +
  theme_minimal() +
  labs(title = "Correlation Heatmap") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(p8)

# 8. Statistical Summaries by Group
grouped_stats <- student_data %>%
  group_by(major, gender) %>%
  summarise(
    count = n(),
    mean_gpa = mean(gpa),
    sd_gpa = sd(gpa),
    median_study_hours = median(study_hours),
    mean_sleep_hours = mean(sleep_hours),
    .groups = "drop"
  )

print("Grouped statistics by major and gender:")
print(grouped_stats)

# 9. Probability Distributions
# Normal distribution checks
print("Shapiro-Wilk test for normality of GPA:")
shapiro_test <- shapiro.test(student_data$gpa)
print(shapiro_test)

# 10. Advanced Statistical Plots

# Q-Q plot
p9 <- ggplot(student_data, aes(sample = gpa)) +
  stat_qq() +
  stat_qq_line(color = "red") +
  labs(title = "Q-Q Plot for GPA", x = "Theoretical Quantiles", y = "Sample Quantiles") +
  theme_minimal()

print(p9)

# Residual plots for regression
residual_data <- data.frame(
  fitted = fitted(multiple_lm),
  residuals = resid(multiple_lm),
  standardized_residuals = rstandard(multiple_lm)
)

p10 <- ggplot(residual_data, aes(x = fitted, y = residuals)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
  labs(title = "Residuals vs Fitted Values", x = "Fitted Values", y = "Residuals") +
  theme_minimal()

print(p10)

p11 <- ggplot(residual_data, aes(x = fitted, y = standardized_residuals)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
  geom_hline(yintercept = -2, color = "blue", linetype = "dashed") +
  geom_hline(yintercept = 2, color = "blue", linetype = "dashed") +
  labs(title = "Standardized Residuals vs Fitted",
       x = "Fitted Values", y = "Standardized Residuals") +
  theme_minimal()

print(p11)

# 11. Model Comparison
# Compare nested models
reduced_lm <- lm(gpa ~ study_hours + sleep_hours, data = student_data)
full_lm <- lm(gpa ~ study_hours + sleep_hours + extracurricular + age + gender, data = student_data)

anova_comparison <- anova(reduced_lm, full_lm)
print("ANOVA for model comparison:")
print(anova_comparison)

# AIC comparison
print("Model AIC comparison:")
print(paste("Reduced model AIC:", round(AIC(reduced_lm), 2)))
print(paste("Full model AIC:", round(AIC(full_lm), 2)))

# 12. Power Analysis Simulation
# Simple power analysis for t-test
simulate_power <- function(n_per_group, effect_size, n_simulations = 1000) {
  significant_results <- 0

  for (i in 1:n_simulations) {
    # Simulate two groups
    group1 <- rnorm(n_per_group, mean = 0, sd = 1)
    group2 <- rnorm(n_per_group, mean = effect_size, sd = 1)

    # Perform t-test
    test_result <- t.test(group1, group2)

    if (test_result$p.value < 0.05) {
      significant_results <- significant_results + 1
    }
  }

  return(significant_results / n_simulations)
}

# Calculate power for different sample sizes
sample_sizes <- c(10, 20, 30, 50, 100)
power_results <- data.frame(
  sample_size = sample_sizes,
  power = sapply(sample_sizes, function(n) simulate_power(n, 0.5))
)

print("Power analysis results:")
print(power_results)

# Plot power curve
p12 <- ggplot(power_results, aes(x = sample_size, y = power)) +
  geom_line(color = "blue", size = 1) +
  geom_point(color = "blue", size = 2) +
  geom_hline(yintercept = 0.8, color = "red", linetype = "dashed") +
  labs(title = "Power Analysis Curve (Effect Size = 0.5)",
       x = "Sample Size per Group", y = "Statistical Power") +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent_format())

print(p12)

# 13. Export Results and Plots

# Save plots
ggsave("gpa_distribution.png", p1, width = 8, height = 6, dpi = 300)
ggsave("study_hours_vs_gpa.png", p3, width = 8, height = 6, dpi = 300)
ggsave("correlation_heatmap.png", p8, width = 8, height = 6, dpi = 300)

# Save analysis results
write.csv(descriptive_stats, "descriptive_statistics.csv", row.names = FALSE)
write.csv(grouped_stats, "grouped_statistics.csv", row.names = FALSE)

# Save regression results
library(stargazer)
# stargazer(multiple_lm, type = "text", out = "regression_results.txt")

print("Analysis completed! Results and plots saved.")