Code Snippet Intermediate
python• Updated Jan 13, 2024
Data Structures Helper Functions
Utility functions for working with lists, dictionaries, sets, and other Python data structures
Table of Contents18 sections
Reading Progress
0%
Data Structures Helper Functions
List Utilities #
Remove duplicates while preserving order #
def remove_duplicates(lst):
"""Remove duplicates while preserving order."""
seen = set()
result = []
for item in lst:
if item not in seen:
seen.add(item)
result.append(item)
return result
# Alternative using dict (Python 3.7+)
def remove_duplicates_dict(lst):
return list(dict.fromkeys(lst))
# Example
numbers = [1, 2, 2, 3, 1, 4, 3]
unique = remove_duplicates(numbers) # [1, 2, 3, 4]
Flatten nested lists #
def flatten(nested_list):
"""Flatten a nested list of any depth."""
result = []
for item in nested_list:
if isinstance(item, list):
result.extend(flatten(item))
else:
result.append(item)
return result
# Alternative using itertools
from itertools import chain
def flatten_one_level(nested_list):
"""Flatten one level of nesting."""
return list(chain.from_iterable(nested_list))
# Examples
nested = [1, [2, 3], [4, [5, 6]], 7]
flat = flatten(nested) # [1, 2, 3, 4, 5, 6, 7]
one_level = [[1, 2], [3, 4], [5]]
flat_one = flatten_one_level(one_level) # [1, 2, 3, 4, 5]
Chunk list into smaller lists #
def chunk_list(lst, chunk_size):
"""Split list into chunks of specified size."""
return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]
def chunk_list_generator(lst, chunk_size):
"""Generator version for memory efficiency."""
for i in range(0, len(lst), chunk_size):
yield lst[i:i + chunk_size]
# Example
numbers = list(range(10))
chunks = chunk_list(numbers, 3) # [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
Find common and different elements #
def list_intersection(list1, list2):
"""Find common elements between two lists."""
return list(set(list1) & set(list2))
def list_difference(list1, list2):
"""Find elements in list1 but not in list2."""
return list(set(list1) - set(list2))
def list_symmetric_difference(list1, list2):
"""Find elements in either list but not in both."""
return list(set(list1) ^ set(list2))
# Examples
list1 = [1, 2, 3, 4, 5]
list2 = [4, 5, 6, 7, 8]
common = list_intersection(list1, list2) # [4, 5]
diff = list_difference(list1, list2) # [1, 2, 3]
sym_diff = list_symmetric_difference(list1, list2) # [1, 2, 3, 6, 7, 8]
Dictionary Utilities #
Merge dictionaries #
def merge_dicts(*dicts):
"""Merge multiple dictionaries."""
result = {}
for d in dicts:
result.update(d)
return result
# Python 3.9+ using union operator
def merge_dicts_modern(dict1, dict2):
return dict1 | dict2
# Deep merge for nested dictionaries
def deep_merge(dict1, dict2):
"""Deep merge two dictionaries."""
result = dict1.copy()
for key, value in dict2.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
# Examples
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}
merged = merge_dicts(dict1, dict2) # {'a': 1, 'b': 3, 'c': 4}
Filter and transform dictionaries #
def filter_dict(d, condition):
"""Filter dictionary based on condition function."""
return {k: v for k, v in d.items() if condition(k, v)}
def transform_dict_values(d, transform_func):
"""Transform all values in dictionary."""
return {k: transform_func(v) for k, v in d.items()}
def transform_dict_keys(d, transform_func):
"""Transform all keys in dictionary."""
return {transform_func(k): v for k, v in d.items()}
# Examples
data = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
# Filter even values
even_values = filter_dict(data, lambda k, v: v % 2 == 0) # {'b': 2, 'd': 4}
# Square all values
squared = transform_dict_values(data, lambda x: x ** 2) # {'a': 1, 'b': 4, 'c': 9, 'd': 16}
# Uppercase all keys
upper_keys = transform_dict_keys(data, str.upper) # {'A': 1, 'B': 2, 'C': 3, 'D': 4}
Invert dictionary #
def invert_dict(d):
"""Invert dictionary (swap keys and values)."""
return {v: k for k, v in d.items()}
def invert_dict_with_lists(d):
"""Invert dictionary where values might not be unique."""
inverted = {}
for k, v in d.items():
if v not in inverted:
inverted[v] = []
inverted[v].append(k)
return inverted
# Examples
original = {'a': 1, 'b': 2, 'c': 3}
inverted = invert_dict(original) # {1: 'a', 2: 'b', 3: 'c'}
with_duplicates = {'a': 1, 'b': 1, 'c': 2}
inverted_with_lists = invert_dict_with_lists(with_duplicates) # {1: ['a', 'b'], 2: ['c']}
Set Operations #
Set utilities #
def powerset(s):
"""Generate all subsets of a set."""
from itertools import combinations
result = []
for r in range(len(s) + 1):
result.extend(combinations(s, r))
return [set(subset) for subset in result]
def jaccard_similarity(set1, set2):
"""Calculate Jaccard similarity between two sets."""
intersection = len(set1 & set2)
union = len(set1 | set2)
return intersection / union if union > 0 else 0
# Examples
s = {1, 2, 3}
subsets = powerset(s) # [set(), {1}, {2}, {3}, {1, 2}, {1, 3}, {2, 3}, {1, 2, 3}]
set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}
similarity = jaccard_similarity(set1, set2) # 0.33 (2 common out of 6 total)
Advanced Data Structure Operations #
Group items #
def group_by(items, key_func):
"""Group items by a key function."""
from collections import defaultdict
groups = defaultdict(list)
for item in items:
key = key_func(item)
groups[key].append(item)
return dict(groups)
def group_by_attribute(objects, attribute):
"""Group objects by an attribute."""
return group_by(objects, lambda obj: getattr(obj, attribute))
# Examples
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9]
grouped = group_by(numbers, lambda x: x % 3) # {1: [1, 4, 7], 2: [2, 5, 8], 0: [3, 6, 9]}
words = ['apple', 'banana', 'apricot', 'cherry']
by_first_letter = group_by(words, lambda word: word[0]) # {'a': ['apple', 'apricot'], 'b': ['banana'], 'c': ['cherry']}
Counter utilities #
from collections import Counter
def most_common_items(items, n=5):
"""Get n most common items."""
return Counter(items).most_common(n)
def least_common_items(items, n=5):
"""Get n least common items."""
counter = Counter(items)
return counter.most_common()[:-n-1:-1]
def item_frequencies(items):
"""Get frequency distribution as percentages."""
counter = Counter(items)
total = sum(counter.values())
return {item: (count / total) * 100 for item, count in counter.items()}
# Examples
data = ['a', 'b', 'a', 'c', 'b', 'a', 'd']
most_common = most_common_items(data, 2) # [('a', 3), ('b', 2)]
frequencies = item_frequencies(data) # {'a': 42.86, 'b': 28.57, 'c': 14.29, 'd': 14.29}
Nested data access #
def get_nested_value(data, path, default=None):
"""Get value from nested dictionary using dot notation path."""
keys = path.split('.')
current = data
try:
for key in keys:
if isinstance(current, dict):
current = current[key]
elif isinstance(current, list):
current = current[int(key)]
else:
return default
return current
except (KeyError, IndexError, ValueError, TypeError):
return default
def set_nested_value(data, path, value):
"""Set value in nested dictionary using dot notation path."""
keys = path.split('.')
current = data
for key in keys[:-1]:
if key not in current:
current[key] = {}
current = current[key]
current[keys[-1]] = value
# Examples
nested_data = {
'user': {
'profile': {
'name': 'Alice',
'settings': {
'theme': 'dark'
}
},
'posts': [
{'title': 'First Post'},
{'title': 'Second Post'}
]
}
}
name = get_nested_value(nested_data, 'user.profile.name') # 'Alice'
theme = get_nested_value(nested_data, 'user.profile.settings.theme') # 'dark'
first_post = get_nested_value(nested_data, 'user.posts.0.title') # 'First Post'
set_nested_value(nested_data, 'user.profile.age', 30)
Data validation #
def validate_schema(data, schema):
"""Simple schema validation for dictionaries."""
def validate_value(value, expected_type):
if expected_type == 'any':
return True
if isinstance(expected_type, type):
return isinstance(value, expected_type)
if isinstance(expected_type, list):
return any(validate_value(value, t) for t in expected_type)
return False
if not isinstance(data, dict):
return False, "Data must be a dictionary"
for key, expected_type in schema.items():
if key not in data:
return False, f"Missing required key: {key}"
if not validate_value(data[key], expected_type):
return False, f"Invalid type for {key}: expected {expected_type}, got {type(data[key])}"
return True, "Valid"
# Example
user_schema = {
'name': str,
'age': int,
'email': str,
'is_active': bool,
'tags': list
}
user_data = {
'name': 'Alice',
'age': 30,
'email': '[email protected]',
'is_active': True,
'tags': ['python', 'developer']
}
is_valid, message = validate_schema(user_data, user_schema) # (True, "Valid")
Performance utilities #
import time
from functools import wraps
def memoize(func):
"""Simple memoization decorator."""
cache = {}
@wraps(func)
def wrapper(*args, **kwargs):
key = str(args) + str(sorted(kwargs.items()))
if key not in cache:
cache[key] = func(*args, **kwargs)
return cache[key]
wrapper.cache = cache
return wrapper
def time_function(func):
"""Decorator to time function execution."""
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
print(f"{func.__name__} took {end_time - start_time:.4f} seconds")
return result
return wrapper
# Examples
@memoize
def fibonacci(n):
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
@time_function
def slow_operation():
time.sleep(1)
return "Done"