Data Structures in Python
Lists, dictionaries, sets, and tuples for ML data handling
Lists
Lists are ordered, mutable sequences. Essential for storing datasets, features, and predictions.
### Creating and Accessing Lists
# Creating lists
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True]
nested = [[1, 2], [3, 4], [5, 6]]# Accessing elements (0-indexed)
print(numbers[0]) # 1 (first element)
print(numbers[-1]) # 5 (last element)
print(numbers[1:4]) # [2, 3, 4] (slicing)
print(numbers[::2]) # [1, 3, 5] (every 2nd)
# Nested access
print(nested[0][1]) # 2
### List Operations
features = [0.5, 0.8, 0.3]# Adding elements
features.append(0.9) # [0.5, 0.8, 0.3, 0.9]
features.insert(0, 0.1) # [0.1, 0.5, 0.8, 0.3, 0.9]
features.extend([0.6, 0.7]) # [0.1, 0.5, 0.8, 0.3, 0.9, 0.6, 0.7]
# Removing elements
features.pop() # Removes and returns last: 0.7
features.remove(0.8) # Removes first occurrence of 0.8
del features[0] # Removes element at index 0
# Useful methods
nums = [3, 1, 4, 1, 5, 9, 2, 6]
print(len(nums)) # 8
print(min(nums)) # 1
print(max(nums)) # 9
print(sum(nums)) # 31
print(nums.count(1)) # 2 (count occurrences)
print(nums.index(4)) # 2 (find index)
# Sorting
nums.sort() # In-place: [1, 1, 2, 3, 4, 5, 6, 9]
sorted_desc = sorted(nums, reverse=True) # New list, descending
### List Comprehensions for ML
# Feature scaling
raw_features = [10, 20, 30, 40, 50]
max_val = max(raw_features)
scaled = [x / max_val for x in raw_features]
print(scaled) # [0.2, 0.4, 0.6, 0.8, 1.0]# Filtering
predictions = [0.1, 0.6, 0.3, 0.8, 0.55, 0.9]
positive = [p for p in predictions if p >= 0.5]
print(positive) # [0.6, 0.8, 0.55, 0.9]
# Transforming with condition
labels = [1 if p >= 0.5 else 0 for p in predictions]
print(labels) # [0, 1, 0, 1, 1, 1]
# Flattening nested lists
batches = [[1, 2, 3], [4, 5], [6, 7, 8, 9]]
flat = [item for batch in batches for item in batch]
print(flat) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
Hands-On Exercise: Build a Feature Store
Create a class that efficiently stores and retrieves ML features
class FeatureStore:
"""
A feature store that:
1. Stores features as (entity_id, feature_name) -> value
2. Tracks which features exist
3. Can retrieve all features for an entity
4. Can find entities with a specific feature value
"""
def __init__(self):
# YOUR CODE HERE
# Hint: Use a dict for storage
# Hint: Use a set to track feature names
pass
def set_feature(self, entity_id, feature_name, value):
"""Store a feature value"""
# YOUR CODE HERE
pass
def get_feature(self, entity_id, feature_name, default=None):
"""Get a feature value, return default if not found"""
# YOUR CODE HERE
pass
def get_entity_features(self, entity_id):
"""Get all features for an entity as a dict"""
# YOUR CODE HERE
pass
def get_all_feature_names(self):
"""Get set of all feature names"""
# YOUR CODE HERE
pass
# Test your implementation
store = FeatureStore()
# Store features for users
store.set_feature("user_1", "age", 25)
store.set_feature("user_1", "income", 50000)
store.set_feature("user_2", "age", 30)
store.set_feature("user_2", "income", 75000)
store.set_feature("user_2", "score", 0.85)
# Test retrieval
print("User 1 age:", store.get_feature("user_1", "age"))
print("User 1 score:", store.get_feature("user_1", "score", "N/A"))
print("User 2 features:", store.get_entity_features("user_2"))
print("All feature names:", store.get_all_feature_names())