Python for ML
55 min

Data Structures in Python

Lists, dictionaries, sets, and tuples for ML data handling

Progress25%

Lists

Lists are ordered, mutable sequences. Essential for storing datasets, features, and predictions.

### Creating and Accessing Lists

# Creating lists

numbers = [1, 2, 3, 4, 5] mixed = [1, "hello", 3.14, True] nested = [[1, 2], [3, 4], [5, 6]]

# Accessing elements (0-indexed) print(numbers[0]) # 1 (first element) print(numbers[-1]) # 5 (last element) print(numbers[1:4]) # [2, 3, 4] (slicing) print(numbers[::2]) # [1, 3, 5] (every 2nd)

# Nested access print(nested[0][1]) # 2

### List Operations

features = [0.5, 0.8, 0.3]

# Adding elements features.append(0.9) # [0.5, 0.8, 0.3, 0.9] features.insert(0, 0.1) # [0.1, 0.5, 0.8, 0.3, 0.9] features.extend([0.6, 0.7]) # [0.1, 0.5, 0.8, 0.3, 0.9, 0.6, 0.7]

# Removing elements features.pop() # Removes and returns last: 0.7 features.remove(0.8) # Removes first occurrence of 0.8 del features[0] # Removes element at index 0

# Useful methods nums = [3, 1, 4, 1, 5, 9, 2, 6] print(len(nums)) # 8 print(min(nums)) # 1 print(max(nums)) # 9 print(sum(nums)) # 31 print(nums.count(1)) # 2 (count occurrences) print(nums.index(4)) # 2 (find index)

# Sorting nums.sort() # In-place: [1, 1, 2, 3, 4, 5, 6, 9] sorted_desc = sorted(nums, reverse=True) # New list, descending

### List Comprehensions for ML

# Feature scaling
raw_features = [10, 20, 30, 40, 50]
max_val = max(raw_features)
scaled = [x / max_val for x in raw_features]
print(scaled)  # [0.2, 0.4, 0.6, 0.8, 1.0]

# Filtering predictions = [0.1, 0.6, 0.3, 0.8, 0.55, 0.9] positive = [p for p in predictions if p >= 0.5] print(positive) # [0.6, 0.8, 0.55, 0.9]

# Transforming with condition labels = [1 if p >= 0.5 else 0 for p in predictions] print(labels) # [0, 1, 0, 1, 1, 1]

# Flattening nested lists batches = [[1, 2, 3], [4, 5], [6, 7, 8, 9]] flat = [item for batch in batches for item in batch] print(flat) # [1, 2, 3, 4, 5, 6, 7, 8, 9]

Hands-On Exercise: Build a Feature Store

Create a class that efficiently stores and retrieves ML features

class FeatureStore:
    """
    A feature store that:
    1. Stores features as (entity_id, feature_name) -> value
    2. Tracks which features exist
    3. Can retrieve all features for an entity
    4. Can find entities with a specific feature value
    """
    
    def __init__(self):
        # YOUR CODE HERE
        # Hint: Use a dict for storage
        # Hint: Use a set to track feature names
        pass
    
    def set_feature(self, entity_id, feature_name, value):
        """Store a feature value"""
        # YOUR CODE HERE
        pass
    
    def get_feature(self, entity_id, feature_name, default=None):
        """Get a feature value, return default if not found"""
        # YOUR CODE HERE
        pass
    
    def get_entity_features(self, entity_id):
        """Get all features for an entity as a dict"""
        # YOUR CODE HERE
        pass
    
    def get_all_feature_names(self):
        """Get set of all feature names"""
        # YOUR CODE HERE
        pass


# Test your implementation
store = FeatureStore()

# Store features for users
store.set_feature("user_1", "age", 25)
store.set_feature("user_1", "income", 50000)
store.set_feature("user_2", "age", 30)
store.set_feature("user_2", "income", 75000)
store.set_feature("user_2", "score", 0.85)

# Test retrieval
print("User 1 age:", store.get_feature("user_1", "age"))
print("User 1 score:", store.get_feature("user_1", "score", "N/A"))
print("User 2 features:", store.get_entity_features("user_2"))
print("All feature names:", store.get_all_feature_names())

Knowledge Check

Quiz

Question 1 of 5

Which data structure allows duplicate values and maintains order?

Next Lesson

NumPy Fundamentals

Master numerical computing with NumPy arrays