Functions in Python
Write reusable, modular code for ML pipelines and experiments
Function Basics
Functions are reusable blocks of code. In ML, you'll use them for: - Data preprocessing steps - Model training and evaluation - Feature engineering - Metrics calculation
### Defining Functions
# Basic function
def greet():
print("Hello, ML Engineer!")greet() # Call the function
# Function with parameters
def greet_user(name):
print(f"Hello, {name}!")
greet_user("Alice") # Hello, Alice!
# Function with return value
def square(x):
return x ** 2
result = square(5)
print(result) # 25
### Multiple Parameters and Returns
# Multiple parameters
def calculate_mse(y_true, y_pred):
"""Calculate Mean Squared Error"""
n = len(y_true)
squared_errors = [(t - p) ** 2 for t, p in zip(y_true, y_pred)]
return sum(squared_errors) / ntrue_values = [1, 2, 3, 4, 5]
predictions = [1.1, 2.2, 2.8, 4.1, 5.2]
mse = calculate_mse(true_values, predictions)
print(f"MSE: {mse:.4f}")
# Multiple return values
def train_test_split(data, test_ratio=0.2):
"""Split data into train and test sets"""
split_idx = int(len(data) * (1 - test_ratio))
train = data[:split_idx]
test = data[split_idx:]
return train, test # Returns a tuple
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
train_data, test_data = train_test_split(data)
print(f"Train: {train_data}, Test: {test_data}")
### Docstrings
Document your functions:
def normalize(data, method="minmax"):
"""
Normalize data using specified method.
Args:
data: List of numerical values
method: Normalization method ('minmax' or 'zscore')
Returns:
List of normalized values
Example:
>>> normalize([1, 2, 3], method='minmax')
[0.0, 0.5, 1.0]
"""
if method == "minmax":
min_val = min(data)
max_val = max(data)
return [(x - min_val) / (max_val - min_val) for x in data]
elif method == "zscore":
mean = sum(data) / len(data)
std = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
return [(x - mean) / std for x in data]
else:
raise ValueError(f"Unknown method: {method}")# Use the function
normalized = normalize([10, 20, 30, 40, 50])
print(normalized) # [0.0, 0.25, 0.5, 0.75, 1.0]
Hands-On Exercise: Build a Metrics Calculator
Create a flexible metrics calculation system for ML evaluation
def create_evaluator(metrics_dict):
"""
Create an evaluator function from a dictionary of metrics.
Args:
metrics_dict: Dict mapping metric names to functions
Each function takes (y_true, y_pred)
Returns:
Function that calculates all metrics
Example:
evaluator = create_evaluator({
"mse": lambda t, p: sum((a-b)**2 for a,b in zip(t,p))/len(t),
"mae": lambda t, p: sum(abs(a-b) for a,b in zip(t,p))/len(t)
})
results = evaluator([1,2,3], [1.1, 2.2, 2.8])
# Returns: {"mse": 0.03, "mae": 0.1666...}
"""
# YOUR CODE HERE
def evaluate(y_true, y_pred):
pass
return evaluate
# Test your function
metrics = {
"mse": lambda t, p: sum((a-b)**2 for a,b in zip(t,p))/len(t),
"mae": lambda t, p: sum(abs(a-b) for a,b in zip(t,p))/len(t),
"max_error": lambda t, p: max(abs(a-b) for a,b in zip(t,p))
}
evaluator = create_evaluator(metrics)
y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
y_pred = [1.1, 2.0, 3.2, 3.8, 5.1]
results = evaluator(y_true, y_pred)
print("Evaluation Results:")
for name, value in results.items():
print(f" {name}: {value:.4f}")