Python for ML
50 min

Functions in Python

Write reusable, modular code for ML pipelines and experiments

Progress20%

Function Basics

Functions are reusable blocks of code. In ML, you'll use them for:

- Data preprocessing steps - Model training and evaluation - Feature engineering - Metrics calculation

### Defining Functions

# Basic function
def greet():
    print("Hello, ML Engineer!")

greet() # Call the function

# Function with parameters def greet_user(name): print(f"Hello, {name}!")

greet_user("Alice") # Hello, Alice!

# Function with return value def square(x): return x ** 2

result = square(5) print(result) # 25

### Multiple Parameters and Returns

# Multiple parameters
def calculate_mse(y_true, y_pred):
    """Calculate Mean Squared Error"""
    n = len(y_true)
    squared_errors = [(t - p) ** 2 for t, p in zip(y_true, y_pred)]
    return sum(squared_errors) / n

true_values = [1, 2, 3, 4, 5] predictions = [1.1, 2.2, 2.8, 4.1, 5.2] mse = calculate_mse(true_values, predictions) print(f"MSE: {mse:.4f}")

# Multiple return values def train_test_split(data, test_ratio=0.2): """Split data into train and test sets""" split_idx = int(len(data) * (1 - test_ratio)) train = data[:split_idx] test = data[split_idx:] return train, test # Returns a tuple

data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] train_data, test_data = train_test_split(data) print(f"Train: {train_data}, Test: {test_data}")

### Docstrings

Document your functions:

def normalize(data, method="minmax"):
    """
    Normalize data using specified method.
    
    Args:
        data: List of numerical values
        method: Normalization method ('minmax' or 'zscore')
    
    Returns:
        List of normalized values
    
    Example:
        >>> normalize([1, 2, 3], method='minmax')
        [0.0, 0.5, 1.0]
    """
    if method == "minmax":
        min_val = min(data)
        max_val = max(data)
        return [(x - min_val) / (max_val - min_val) for x in data]
    elif method == "zscore":
        mean = sum(data) / len(data)
        std = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
        return [(x - mean) / std for x in data]
    else:
        raise ValueError(f"Unknown method: {method}")

# Use the function normalized = normalize([10, 20, 30, 40, 50]) print(normalized) # [0.0, 0.25, 0.5, 0.75, 1.0]

Hands-On Exercise: Build a Metrics Calculator

Create a flexible metrics calculation system for ML evaluation

def create_evaluator(metrics_dict):
    """
    Create an evaluator function from a dictionary of metrics.
    
    Args:
        metrics_dict: Dict mapping metric names to functions
                      Each function takes (y_true, y_pred)
    
    Returns:
        Function that calculates all metrics
    
    Example:
        evaluator = create_evaluator({
            "mse": lambda t, p: sum((a-b)**2 for a,b in zip(t,p))/len(t),
            "mae": lambda t, p: sum(abs(a-b) for a,b in zip(t,p))/len(t)
        })
        results = evaluator([1,2,3], [1.1, 2.2, 2.8])
        # Returns: {"mse": 0.03, "mae": 0.1666...}
    """
    # YOUR CODE HERE
    def evaluate(y_true, y_pred):
        pass
    
    return evaluate


# Test your function
metrics = {
    "mse": lambda t, p: sum((a-b)**2 for a,b in zip(t,p))/len(t),
    "mae": lambda t, p: sum(abs(a-b) for a,b in zip(t,p))/len(t),
    "max_error": lambda t, p: max(abs(a-b) for a,b in zip(t,p))
}

evaluator = create_evaluator(metrics)

y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
y_pred = [1.1, 2.0, 3.2, 3.8, 5.1]

results = evaluator(y_true, y_pred)
print("Evaluation Results:")
for name, value in results.items():
    print(f"  {name}: {value:.4f}")

Knowledge Check

Quiz

Question 1 of 5

What does *args capture in a function definition?

Next Lesson

Data Structures

Lists, dictionaries, sets for ML data handling