🎯 Exemples recommandés
Balanced sample collections from various categories for you to explore
Exemples Mojo
Exemples de langage de programmation Mojo - langage spécialisé développement IA avec simplicité Python et performance C++
💻 Mojo Hello World mojo
🟢 simple
⭐⭐
Programme Hello World Mojo avec syntaxe et fonctionnalités IA/ML
⏱️ 20 min
🏷️ mojo, ai, machine learning, performance
Prerequisites:
Basic programming concepts, Python familiarity helpful
// Mojo Hello World Examples - AI Development Language
# 1. Basic Hello World
fn main():
print("Hello, Mojo World!")
# 2. Hello World with Mojo types and variables
fn hello_with_types():
# Mojo supports Python-like syntax with type hints
name: String = "Mojo"
version: Float = 0.1
is_ready: Bool = True
year: Int = 2024
print("Language: {name}")
print("Version: {version}")
print("Ready: {is_ready}")
print("Year: {year}")
# 3. Hello World with AI context
fn ai_hello_world():
# Mojo is designed for AI/ML development
framework: String = "Mojo AI"
purpose: String = "High-performance AI development"
speedup: Float = 3500.0 # 3500x faster than Python for some operations
print("{framework}: {purpose}")
print("Performance improvement: {speedup}x")
# 4. Hello World with functions and type inference
def greet(name: String) -> String:
return f"Hello, {name}!"
def demonstrate_functions():
greeting = greet("Mojo Developer")
print(greeting)
# Type inference example
x = 42 # Inferred as Int
y = 3.14 # Inferred as Float
z = "AI/ML" # Inferred as String
print(f"x = {x}, y = {y}, z = {z}")
# 5. Hello World with Mojo structs
struct Point:
var x: Float
var y: Float
fn __init__(self, x: Float, y: Float):
self.x = x
self.y = y
fn distance(self, other: Point) -> Float:
return ((self.x - other.x)**2 + (self.y - other.y)**2).sqrt()
fn demonstrate_structs():
p1 = Point(0.0, 0.0)
p2 = Point(3.0, 4.0)
dist = p1.distance(p2)
print(f"Distance between points: {dist}")
# 6. Hello World with lists and comprehensions
fn demonstrate_collections():
# Python-like list syntax
numbers = [1, 2, 3, 4, 5]
squares = [x**2 for x in numbers]
even_squares = [x for x in squares if x % 2 == 0]
print(f"Original: {numbers}")
print(f"Squares: {squares}")
print(f"Even squares: {even_squares}")
# 7. Hello World with Mojo tensors (AI/ML focus)
from tensor import Tensor
fn tensor_hello_world():
# Create a simple tensor
data = Tensor[DType.float32](2, 3)
# Fill with values
for i in range(2):
for j in range(3):
data[i, j] = Float(i * 3 + j)
print("Tensor data:")
print(data)
print(f"Tensor shape: {data.shape()}")
# 8. Hello World with Mojo's memory safety
@value
struct SafeString:
var data: String
fn __init__(self, content: String):
self.data = content
fn greet(self) -> String:
return f"Safe hello: {self.data}"
fn demonstrate_memory_safety():
safe_msg = SafeString("Memory-safe Mojo!")
print(safe_msg.greet())
# 9. Hello World with Mojo concurrency
from algorithm import parallel_for
fn concurrent_hello():
# Parallel processing example
def print_message(i: Int):
print(f"Hello from thread {i}")
# Execute in parallel
parallel_for[print_message](range(4))
# 10. Hello World with ML concepts
fn ml_hello_world():
# Simple linear regression in Mojo style
@parameter
fn predict[X: Int](w: Tensor[DType.float32, X], b: Float, x: Tensor[DType.float32, X]) -> Float:
return (w @ x).sum() + b
# Create mock model parameters
weights = Tensor[DType.float32](3)
bias = 0.5
input = Tensor[DType.float32](3)
# Fill with values
for i in range(3):
weights[i] = Float(i + 1)
input[i] = Float(i * 0.5)
prediction = predict(weights, bias, input)
print(f"ML prediction: {prediction}")
# 11. Hello World with Mojo's SIMD capabilities
from simd import SIMD
fn simd_hello():
# SIMD vector operations
var vec = SIMD[DType.float32, 4](1.0, 2.0, 3.0, 4.0)
var result = vec * 2.0
print("SIMD vector operations:")
print(f"Original: {vec}")
print(f"Result: {result}")
# 12. Hello World with Mojo's string formatting
fn advanced_string_formatting():
name = "Mojo"
version = 0.1
features = ["High performance", "Python compatibility", "AI/ML focus"]
# f-string formatting (like Python)
message = f"{name} v{version}: {', '.join(features)}"
print(message)
# Format with alignment
print(f"{'Language':<10} | {'Version':>8}")
print(f"{name:<10} | {version:>8.1f}")
# Main function demonstrating all examples
fn main():
print("=== Mojo Hello World Examples ===\n")
print("1. Basic Hello World:")
hello_with_types()
print()
print("2. AI/ML Context:")
ai_hello_world()
print()
print("3. Functions:")
demonstrate_functions()
print()
print("4. Structs:")
demonstrate_structs()
print()
print("5. Collections:")
demonstrate_collections()
print()
print("6. Tensors (AI focus):")
tensor_hello_world()
print()
print("7. Memory Safety:")
demonstrate_memory_safety()
print()
print("8. Concurrency:")
concurrent_hello()
print()
print("9. ML Concepts:")
ml_hello_world()
print()
print("10. SIMD Operations:")
simd_hello()
print()
print("11. Advanced String Formatting:")
advanced_string_formatting()
print()
print("=== All Mojo Examples Completed ===")
💻 Bases Apprentissage Automatique Mojo mojo
🟡 intermediate
⭐⭐⭐⭐
Opérations ML de base en Mojo incluant tenseurs, réseaux neuronaux et boucles d'entraînement
⏱️ 35 min
🏷️ mojo, ml, ai, neural networks
Prerequisites:
Basic ML concepts, Linear algebra, Mojo basic syntax
// Mojo Machine Learning Basics
from tensor import Tensor
from random import random
from math import sqrt, exp
from algorithm import parallel_for
# 1. Tensor Operations Foundation
fn tensor_basics():
print("=== Tensor Basics ===")
# Create tensors of different types
# Float tensor
float_tensor = Tensor[DType.float32](3, 4)
# Int tensor
int_tensor = Tensor[DType.int32](2, 3)
# Initialize with values
for i in range(3):
for j in range(4):
float_tensor[i, j] = Float(i * 4 + j)
for i in range(2):
for j in range(3):
int_tensor[i, j] = i * 3 + j
print("Float tensor:")
print(float_tensor)
print(f"Shape: {float_tensor.shape()}")
print(f"Size: {float_tensor.numel()}")
print("\nInt tensor:")
print(int_tensor)
print(f"Shape: {int_tensor.shape()}")
# 2. Basic Neural Network Layer
@value
struct LinearLayer:
var weights: Tensor[DType.float32]
var bias: Tensor[DType.float32]
var input_size: Int
var output_size: Int
fn __init__(inout self, input_size: Int, output_size: Int):
self.input_size = input_size
self.output_size = output_size
# Initialize weights and bias
self.weights = Tensor[DType.float32](output_size, input_size)
self.bias = Tensor[DType.float32](output_size)
# Random initialization (Xavier/Glorot)
var scale = sqrt(2.0 / Float(input_size))
for i in range(output_size):
for j in range(input_size):
self.weights[i, j] = (random() - 0.5) * 2.0 * scale
self.bias[i] = 0.0
fn forward(self, input: Tensor[DType.float32]) -> Tensor[DType.float32]:
# Linear transformation: output = weights @ input + bias
var output = Tensor[DType.float32](self.output_size)
for i in range(self.output_size):
var sum = 0.0
for j in range(self.input_size):
sum += self.weights[i, j] * input[j]
output[i] = sum + self.bias[i]
return output
# 3. Activation Functions
fn relu(x: Tensor[DType.float32]) -> Tensor[DType.float32]:
var result = Tensor[DType.float32](x.numel())
for i in range(x.numel()):
result[i] = max(0.0, x[i])
return result
fn sigmoid(x: Tensor[DType.float32]) -> Tensor[DType.float32]:
var result = Tensor[DType.float32](x.numel())
for i in range(x.numel()):
result[i] = 1.0 / (1.0 + exp(-x[i]))
return result
fn softmax(x: Tensor[DType.float32]) -> Tensor[DType.float32]:
# Subtract max for numerical stability
var max_val = x[0]
for i in range(x.numel()):
if x[i] > max_val:
max_val = x[i]
var exp_values = Tensor[DType.float32](x.numel())
var exp_sum = 0.0
for i in range(x.numel()):
exp_values[i] = exp(x[i] - max_val)
exp_sum += exp_values[i]
for i in range(x.numel()):
exp_values[i] /= exp_sum
return exp_values
# 4. Loss Functions
fn mse_loss(predicted: Tensor[DType.float32], target: Tensor[DType.float32]) -> Float:
var sum = 0.0
for i in range(predicted.numel()):
var diff = predicted[i] - target[i]
sum += diff * diff
return sum / Float(predicted.numel())
fn cross_entropy_loss(predicted: Tensor[DType.float32], target: Int) -> Float:
# Predicted should be softmax probabilities
return -log(predicted[target] + 1e-10) # Add small epsilon for numerical stability
# 5. Simple Neural Network
@value
struct SimpleNeuralNetwork:
var layer1: LinearLayer
var layer2: LinearLayer
var layer3: LinearLayer
fn __init__(inout self, input_size: Int, hidden_size: Int, output_size: Int):
self.layer1 = LinearLayer(input_size, hidden_size)
self.layer2 = LinearLayer(hidden_size, hidden_size)
self.layer3 = LinearLayer(hidden_size, output_size)
fn forward(self, input: Tensor[DType.float32]) -> Tensor[DType.float32]:
# Forward pass through layers
var hidden1 = self.layer1.forward(input)
hidden1 = relu(hidden1)
var hidden2 = self.layer2.forward(hidden1)
hidden2 = relu(hidden2)
var output = self.layer3.forward(hidden2)
output = softmax(output)
return output
# 6. Training Data Generator
fn generate_xor_data(num_samples: Int) -> Tuple[Tensor[DType.float32], Tensor[Int]]:
# Generate XOR dataset
var inputs = Tensor[DType.float32](num_samples, 2)
var targets = Tensor[Int](num_samples)
for i in range(num_samples):
# Generate random binary inputs
var x1 = if random() > 0.5 else 0.0 else 1.0
var x2 = if random() > 0.5 else 0.0 else 1.0
var target = Int(x1 != x2) # XOR logic
inputs[i, 0] = x1
inputs[i, 1] = x2
targets[i] = target
return (inputs, targets)
# 7. Training Loop
fn train_network():
print("\n=== Training Neural Network ===")
# Create network
var network = SimpleNeuralNetwork(2, 4, 2)
# Generate training data
var (inputs, targets) = generate_xor_data(1000)
# Training parameters
var learning_rate = 0.1
var epochs = 100
print(f"Training for {epochs} epochs...")
for epoch in range(epochs):
var total_loss = 0.0
var correct = 0
# Simple gradient descent training
for i in range(100): # Use subset for faster training
# Get single sample
var input = Tensor[DType.float32](2)
input[0] = inputs[i, 0]
input[1] = inputs[i, 1]
var target = targets[i]
# Forward pass
var output = network.forward(input)
# Calculate loss
var loss = cross_entropy_loss(output, target)
total_loss += loss
# Simple accuracy calculation
var predicted_class = 0
if output[1] > output[0]:
predicted_class = 1
if predicted_class == target:
correct += 1
var accuracy = Float(correct) / Float(100)
print(f"Training completed! Accuracy: {accuracy:.2f}")
# 8. Batch Processing
fn batch_operations():
print("\n=== Batch Operations ===")
# Create batch of data
var batch_size = 32
var features = 10
var batch_data = Tensor[DType.float32](batch_size, features)
# Fill with random data
for i in range(batch_size):
for j in range(features):
batch_data[i, j] = random()
# Parallel batch processing
def process_batch(start_idx: Int, end_idx: Int):
for i in range(start_idx, end_idx):
# Simulate processing each sample
var sum = 0.0
for j in range(features):
sum += batch_data[i, j]
# Normalize the row
for j in range(features):
batch_data[i, j] /= sum
# Process in parallel chunks
var chunk_size = batch_size // 4
parallel_for[process_batch](range(0, batch_size, chunk_size))
print(f"Processed batch of size {batch_size} with {features} features")
print(f"Sample row after processing: [", end="")
for j in range(3):
print(f"{batch_data[0, j]:.3f}", end="")
if j < 2: print(", ", end="")
print("]")
# 9. Optimization Algorithms
struct GradientDescent:
var learning_rate: Float
fn __init__(self, learning_rate: Float):
self.learning_rate = learning_rate
fn update(self, param: Tensor[DType.float32], gradient: Tensor[DType.float32]):
for i in range(param.numel()):
param[i] -= self.learning_rate * gradient[i]
# 10. Simple CNN Example (1D convolution)
fn simple_convolution():
print("\n=== Simple 1D Convolution ===")
# Input signal
var signal = Tensor[DType.float32](10)
for i in range(10):
signal[i] = sin(Float(i) * 0.5)
# Convolution kernel
var kernel = Tensor[DType.float32](3)
kernel[0] = 0.25
kernel[1] = 0.5
kernel[2] = 0.25
# Convolution output
var output_size = signal.numel() - kernel.numel() + 1
var conv_output = Tensor[DType.float32](output_size)
for i in range(output_size):
var sum = 0.0
for j in range(kernel.numel()):
sum += signal[i + j] * kernel[j]
conv_output[i] = sum
print("Original signal: [", end="")
for i in range(5):
print(f"{signal[i]:.2f}", end="")
if i < 4: print(", ", end="")
print("]")
print("Convolved signal: [", end="")
for i in range(5):
print(f"{conv_output[i]:.2f}", end="")
if i < 4: print(", ", end="")
print("]")
# 11. Data Normalization
fn normalize_data(data: Tensor[DType.float32]) -> Tensor[DType.float32]:
# Calculate mean and standard deviation
var sum = 0.0
var sum_sq = 0.0
var n = Float(data.numel())
for i in range(data.numel()):
sum += data[i]
sum_sq += data[i] * data[i]
var mean = sum / n
var variance = (sum_sq / n) - (mean * mean)
var std = sqrt(max(variance, 1e-8))
# Normalize
var normalized = Tensor[DType.float32](data.numel())
for i in range(data.numel()):
normalized[i] = (data[i] - mean) / std
return normalized
fn demonstrate_normalization():
print("\n=== Data Normalization ===")
# Create sample data
var data = Tensor[DType.float32](10)
for i in range(10):
data[i] = Float(i) * 2.0 + 5.0 # Data with mean ~15
print("Original data: [", end="")
for i in range(5):
print(f"{data[i]:.1f}", end="")
if i < 4: print(", ", end="")
print("]")
var normalized = normalize_data(data)
print("Normalized data: [", end="")
for i in range(5):
print(f"{normalized[i]:.2f}", end="")
if i < 4: print(", ", end="")
print("]")
# Main function demonstrating all ML concepts
fn main():
print("=== Mojo Machine Learning Basics ===\n")
tensor_basics()
# Demonstrate neural network layer
print("\n=== Neural Network Layer ===")
var layer = LinearLayer(3, 4)
var input = Tensor[DType.float32](3)
for i in range(3):
input[i] = Float(i + 1)
var output = layer.forward(input)
print("Layer input:", input)
print("Layer output:", output)
# Demonstrate activation functions
print("\n=== Activation Functions ===")
var test_input = Tensor[DType.float32](5)
for i in range(5):
test_input[i] = Float(i - 2) # [-2, -1, 0, 1, 2]
print("Input:", test_input)
print("ReLU:", relu(test_input))
print("Sigmoid:", sigmoid(test_input))
print("Softmax:", softmax(test_input))
# Demonstrate loss functions
print("\n=== Loss Functions ===")
var pred = Tensor[DType.float32](3)
var target = Tensor[DType.float32](3)
for i in range(3):
pred[i] = Float(i)
target[i] = Float(i + 1)
print("MSE Loss:", mse_loss(pred, target))
# Training demonstration
train_network()
# Batch processing
batch_operations()
# Convolution
simple_convolution()
# Normalization
demonstrate_normalization()
print("\n=== All ML Basics Completed ===")
💻 Calcul Haute Performance Mojo mojo
🔴 complex
⭐⭐⭐⭐⭐
Fonctionnalités avancées Mojo incluant SIMD, traitement parallèle et optimisation mémoire
⏱️ 40 min
🏷️ mojo, performance, simd, parallel, ai
Prerequisites:
Advanced Mojo, Computer architecture, Parallel programming concepts
// Mojo High-Performance Computing for AI
from tensor import Tensor
from simd import SIMD
from algorithm import parallel_for, vectorize
from memory import _memcpy
from math import sqrt, sin, cos
from time import now
# 1. SIMD Vector Operations
fn simd_vector_operations():
print("=== SIMD Vector Operations ===")
# Create SIMD vectors
var vec_a = SIMD[DType.float32, 8](1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
var vec_b = SIMD[DType.float32, 8](8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0)
# Vector arithmetic
var vec_sum = vec_a + vec_b
var vec_product = vec_a * vec_b
var vec_diff = vec_a - vec_b
print("Vector A:", vec_a)
print("Vector B:", vec_b)
print("Sum:", vec_sum)
print("Product:", vec_product)
print("Difference:", vec_diff)
# Vector reduction operations
var sum_elements = 0.0
for i in range(8):
sum_elements += vec_a[i]
print(f"Sum of A elements: {sum_elements}")
# 2. Matrix Multiplication with SIMD
@value
struct Matrix4x4:
var data: SIMD[DType.float32, 16] # 4x4 matrix stored as SIMD vector
fn __init__(self):
self.data = SIMD[DType.float32, 16](0.0)
fn set(self, row: Int, col: Int, value: Float):
self.data[row * 4 + col] = value
fn get(self, row: Int, col: Int) -> Float:
return self.data[row * 4 + col]
fn __mul__(self, other: Matrix4x4) -> Matrix4x4:
var result = Matrix4x4()
# Optimized matrix multiplication using SIMD
for i in range(4):
for j in range(4):
var sum = 0.0
for k in range(4):
sum += self.get(i, k) * other.get(k, j)
result.set(i, j, sum)
return result
fn demonstrate_matrix_ops():
print("\n=== Optimized Matrix Operations ===")
var mat_a = Matrix4x4()
var mat_b = Matrix4x4()
# Fill matrices with identity + some values
for i in range(4):
for j in range(4):
mat_a.set(i, j, 1.0 if i == j else Float(i + j))
mat_b.set(i, j, 2.0 if i == j else Float(i * j))
print("Matrix A diagonal sum: 4.0")
print("Matrix B diagonal sum: 8.0")
var mat_c = mat_a * mat_b
print("Matrix multiplication completed")
# 3. Parallel Processing Example
fn parallel_processing():
print("\n=== Parallel Processing ===")
# Large dataset for parallel processing
var data_size = 1000000
var data = Tensor[DType.float32](data_size)
# Initialize data
for i in range(data_size):
data[i] = sin(Float(i) * 0.001)
# Sequential processing
var start_time = now()
var sequential_sum = 0.0
for i in range(data_size):
sequential_sum += data[i] * data[i]
var sequential_time = now() - start_time
# Parallel processing using Mojo's parallel_for
start_time = now()
var partial_sums = Tensor[DType.float32](8) # 8 threads
def process_chunk(start: Int, end: Int, thread_id: Int):
var local_sum = 0.0
for i in range(start, end):
local_sum += data[i] * data[i]
partial_sums[thread_id] = local_sum
# Launch parallel tasks
var chunk_size = data_size // 8
parallel_for[process_chunk](range(8))
# Combine partial results
var parallel_sum = 0.0
for i in range(8):
parallel_sum += partial_sums[i]
var parallel_time = now() - start_time
print(f"Sequential result: {sequential_sum}")
print(f"Parallel result: {parallel_sum}")
print(f"Sequential time: {sequential_time}μs")
print(f"Parallel time: {parallel_time}μs")
print(f"Speedup: {sequential_time / parallel_time:.2f}x")
# 4. Memory-Optimized Tensor Operations
fn memory_optimized_operations():
print("\n=== Memory-Optimized Operations ===")
# Create large tensors
var size = 1000
var tensor_a = Tensor[DType.float32](size, size)
var tensor_b = Tensor[DType.float32](size, size)
# Fill with test data
for i in range(size):
for j in range(size):
tensor_a[i, j] = Float(i * j) / 1000.0
tensor_b[i, j] = Float(i + j) / 1000.0
# Memory-efficient element-wise operation
var start_time = now()
# Process in blocks to improve cache locality
var block_size = 64
var result = Tensor[DType.float32](size, size)
for block_i in range(0, size, block_size):
for block_j in range(0, size, block_size):
# Process block
for i in range(block_i, min(block_i + block_size, size)):
for j in range(block_j, min(block_j + block_size, size)):
result[i, j] = tensor_a[i, j] + tensor_b[i, j]
var processing_time = now() - start_time
print(f"Block-based processing time: {processing_time}μs")
print(f"Result[500, 500] = {result[500, 500]:.4f}")
# 5. Vectorized Operations
fn vectorized_operations():
print("\n=== Vectorized Operations ===")
# Create large arrays
var n = 100000
var array_a = Tensor[DType.float32](n)
var array_b = Tensor[DType.float32](n)
var array_c = Tensor[DType.float32](n)
# Initialize
for i in range(n):
array_a[i] = Float(i)
array_b[i] = Float(n - i)
# Vectorized operation
@vectorize
def vectorized_add(a: Tensor[DType.float32], b: Tensor[DType.float32], c: Tensor[DType.float32]):
for i in range(len(c)):
c[i] = a[i] + b[i]
var start_time = now()
vectorized_add(array_a, array_b, array_c)
var vectorized_time = now() - start_time
# Verify result
var expected_sum = array_a[0] + array_b[0]
print(f"Vectorized addition result[0]: {array_c[0]} (expected: {expected_sum})")
print(f"Vectorized processing time: {vectorized_time}μs")
# 6. Custom Kernels for AI Operations
fn custom_ai_kernels():
print("\n=== Custom AI Kernels ===")
# ReLU activation kernel
@always_inline
fn relu_kernel(input: Pointer[DType.float32], output: Pointer[DType.float32], size: Int):
for i in range(size):
output[i] = max(0.0, input[i])
# Softmax kernel (optimized)
@always_inline
fn softmax_kernel(input: Pointer[DType.float32], output: Pointer[DType.float32], size: Int):
# Find maximum
var max_val = input[0]
for i in range(1, size):
if input[i] > max_val:
max_val = input[i]
# Compute exponential and sum
var sum = 0.0
for i in range(size):
var exp_val = exp(input[i] - max_val)
output[i] = exp_val
sum += exp_val
# Normalize
var inv_sum = 1.0 / sum
for i in range(size):
output[i] *= inv_sum
# Test kernels
var data_size = 1000
var input_data = Tensor[DType.float32](data_size)
var relu_output = Tensor[DType.float32](data_size)
var softmax_output = Tensor[DType.float32](data_size)
# Fill with random data
for i in range(data_size):
input_data[i] = (Float(i) - Float(data_size / 2)) / 100.0
var start_time = now()
relu_kernel(input_data.data(), relu_output.data(), data_size)
var relu_time = now() - start_time
start_time = now()
softmax_kernel(input_data.data(), softmax_output.data(), data_size)
var softmax_time = now() - start_time
print(f"ReLU kernel processing time: {relu_time}μs")
print(f"Softmax kernel processing time: {softmax_time}μs")
print(f"ReLU output[500]: {relu_output[500]:.4f}")
print(f"Softmax sum: {softmax_output.sum():.6f}")
# 7. GPU-like Parallel Reduction
fn parallel_reduction():
print("\n=== Parallel Reduction ===")
var data_size = 1048576 # 2^20 elements
var data = Tensor[DType.float32](data_size)
# Fill with data
for i in range(data_size):
data[i] = Float(i % 100) / 100.0
# Sequential reduction
var start_time = now()
var sequential_sum = 0.0
for i in range(data_size):
sequential_sum += data[i]
var sequential_time = now() - start_time
# Tree-based parallel reduction
start_time = now()
var temp_data = Tensor[DType.float32](data_size)
_memcpy(temp_data.data(), data.data(), data_size.numel() * sizeof(DType.float32))
var active_size = data_size
while active_size > 1:
for i in range(active_size // 2):
temp_data[i] = temp_data[2 * i] + temp_data[2 * i + 1]
if active_size % 2 == 1:
temp_data[active_size // 2] = temp_data[active_size - 1]
active_size = (active_size + 1) // 2
var parallel_sum = temp_data[0]
var parallel_time = now() - start_time
print(f"Sequential sum: {sequential_sum:.6f}")
print(f"Parallel sum: {parallel_sum:.6f}")
print(f"Sequential time: {sequential_time}μs")
print(f"Parallel time: {parallel_time}μs")
print(f"Reduction speedup: {sequential_time / parallel_time:.2f}x")
# 8. Cache-Oblivious Matrix Multiplication
fn cache_oblivious_multiply(A: Pointer[DType.float32], B: Pointer[DType.float32],
C: Pointer[DType.float32], n: Int, block_size: Int = 64):
# Recursive cache-oblivious matrix multiplication
if n <= block_size:
# Base case: regular multiplication
for i in range(n):
for j in range(n):
var sum = 0.0
for k in range(n):
sum += A[i * n + k] * B[k * n + j]
C[i * n + j] = sum
return
# Recursive case: divide into blocks
var half_n = n // 2
# C11 = A11 * B11 + A12 * B21
cache_oblivious_multiply(A, B, C, half_n, block_size)
cache_oblivious_multiply(A + half_n * n, B + half_n, C, half_n, block_size)
# C12 = A11 * B12 + A12 * B22
cache_oblivious_multiply(A, B + half_n, C + half_n, half_n, block_size)
cache_oblivious_multiply(A + half_n * n, B + half_n + half_n * n, C + half_n, half_n, block_size)
# C21 = A21 * B11 + A22 * B21
cache_oblivious_multiply(A + half_n * n * half_n, B, C + half_n * n, half_n, block_size)
cache_oblivious_multiply(A + half_n * n * half_n + half_n * n, B + half_n, C + half_n * n, half_n, block_size)
# C22 = A21 * B12 + A22 * B22
cache_oblivious_multiply(A + half_n * n * half_n, B + half_n, C + half_n * n + half_n, half_n, block_size)
cache_oblivious_multiply(A + half_n * n * half_n + half_n * n, B + half_n + half_n * n, C + half_n * n + half_n, half_n, block_size)
fn demonstrate_cache_optimization():
print("\n=== Cache-Oblivious Matrix Multiplication ===")
var size = 512 # Must be power of 2 for this implementation
var A = Tensor[DType.float32](size, size)
var B = Tensor[DType.float32](size, size)
var C = Tensor[DType.float32](size, size)
# Initialize matrices
for i in range(size):
for j in range(size):
A[i, j] = Float((i + j) % 100) / 100.0
B[i, j] = Float((i * j) % 100) / 100.0
C[i, j] = 0.0
var start_time = now()
cache_oblivious_multiply(A.data(), B.data(), C.data(), size)
var cache_time = now() - start_time
print(f"Cache-oblivious multiplication time: {cache_time}μs")
print(f"Result[256, 256] = {C[256, 256]:.6f}")
# 9. Memory Pool Allocator for AI Workloads
@value
struct MemoryPool:
var buffer: Pointer[DType.uint8]
var size: Int
var offset: Int
fn __init__(inout self, size: Int):
self.size = size
self.buffer = Pointer[DType.uint8].alloc(size)
self.offset = 0
fn __del__(self):
self.buffer.free()
fn allocate[T: AnyType](self, count: Int) -> Pointer[T]:
var required_bytes = count * sizeof(T)
var aligned_offset = (self.offset + 7) & ~7 # 8-byte alignment
if aligned_offset + required_bytes > self.size:
return Pointer[T].null() # Out of memory
var result = self.buffer.bitcast[T]() + (aligned_offset / sizeof(T))
self.offset = aligned_offset + required_bytes
return result
fn reset(self):
self.offset = 0
fn demonstrate_memory_pool():
print("\n=== Memory Pool Allocator ===")
var pool = MemoryPool(1024 * 1024) # 1MB pool
# Allocate tensors from pool
var tensor1_size = 1000
var tensor2_size = 2000
var tensor1 = pool.allocate[DType.float32](tensor1_size)
var tensor2 = pool.allocate[DType.float32](tensor2_size)
print(f"Allocated tensor1: {tensor1 != Pointer[DType.float32].null()}")
print(f"Allocated tensor2: {tensor2 != Pointer[DType.float32].null()}")
print(f"Pool offset: {pool.offset} bytes")
# Reset and reuse
pool.reset()
var tensor3 = pool.allocate[DType.float32](5000)
print(f"After reset - allocated tensor3: {tensor3 != Pointer[DType.float32].null()}")
print(f"New pool offset: {pool.offset} bytes")
# Main function demonstrating all high-performance features
fn main():
print("=== Mojo High-Performance Computing ===\n")
simd_vector_operations()
demonstrate_matrix_ops()
parallel_processing()
memory_optimized_operations()
vectorized_operations()
custom_ai_kernels()
parallel_reduction()
demonstrate_cache_optimization()
demonstrate_memory_pool()
print("\n=== All High-Performance Features Completed ===")