Data Structures Helper Functions

List Utilities #

Remove duplicates while preserving order #

def remove_duplicates(lst):
    """Remove duplicates while preserving order."""
    seen = set()
    result = []
    for item in lst:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result

# Alternative using dict (Python 3.7+)
def remove_duplicates_dict(lst):
    return list(dict.fromkeys(lst))

# Example
numbers = [1, 2, 2, 3, 1, 4, 3]
unique = remove_duplicates(numbers)  # [1, 2, 3, 4]

Flatten nested lists #

def flatten(nested_list):
    """Flatten a nested list of any depth."""
    result = []
    for item in nested_list:
        if isinstance(item, list):
            result.extend(flatten(item))
        else:
            result.append(item)
    return result

# Alternative using itertools
from itertools import chain

def flatten_one_level(nested_list):
    """Flatten one level of nesting."""
    return list(chain.from_iterable(nested_list))

# Examples
nested = [1, [2, 3], [4, [5, 6]], 7]
flat = flatten(nested)  # [1, 2, 3, 4, 5, 6, 7]

one_level = [[1, 2], [3, 4], [5]]
flat_one = flatten_one_level(one_level)  # [1, 2, 3, 4, 5]

Chunk list into smaller lists #

def chunk_list(lst, chunk_size):
    """Split list into chunks of specified size."""
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

def chunk_list_generator(lst, chunk_size):
    """Generator version for memory efficiency."""
    for i in range(0, len(lst), chunk_size):
        yield lst[i:i + chunk_size]

# Example
numbers = list(range(10))
chunks = chunk_list(numbers, 3)  # [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

Find common and different elements #

def list_intersection(list1, list2):
    """Find common elements between two lists."""
    return list(set(list1) & set(list2))

def list_difference(list1, list2):
    """Find elements in list1 but not in list2."""
    return list(set(list1) - set(list2))

def list_symmetric_difference(list1, list2):
    """Find elements in either list but not in both."""
    return list(set(list1) ^ set(list2))

# Examples
list1 = [1, 2, 3, 4, 5]
list2 = [4, 5, 6, 7, 8]

common = list_intersection(list1, list2)  # [4, 5]
diff = list_difference(list1, list2)      # [1, 2, 3]
sym_diff = list_symmetric_difference(list1, list2)  # [1, 2, 3, 6, 7, 8]

Dictionary Utilities #

Merge dictionaries #

def merge_dicts(*dicts):
    """Merge multiple dictionaries."""
    result = {}
    for d in dicts:
        result.update(d)
    return result

# Python 3.9+ using union operator
def merge_dicts_modern(dict1, dict2):
    return dict1 | dict2

# Deep merge for nested dictionaries
def deep_merge(dict1, dict2):
    """Deep merge two dictionaries."""
    result = dict1.copy()
    for key, value in dict2.items():
        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
            result[key] = deep_merge(result[key], value)
        else:
            result[key] = value
    return result

# Examples
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}
merged = merge_dicts(dict1, dict2)  # {'a': 1, 'b': 3, 'c': 4}

Filter and transform dictionaries #

def filter_dict(d, condition):
    """Filter dictionary based on condition function."""
    return {k: v for k, v in d.items() if condition(k, v)}

def transform_dict_values(d, transform_func):
    """Transform all values in dictionary."""
    return {k: transform_func(v) for k, v in d.items()}

def transform_dict_keys(d, transform_func):
    """Transform all keys in dictionary."""
    return {transform_func(k): v for k, v in d.items()}

# Examples
data = {'a': 1, 'b': 2, 'c': 3, 'd': 4}

# Filter even values
even_values = filter_dict(data, lambda k, v: v % 2 == 0)  # {'b': 2, 'd': 4}

# Square all values
squared = transform_dict_values(data, lambda x: x ** 2)  # {'a': 1, 'b': 4, 'c': 9, 'd': 16}

# Uppercase all keys
upper_keys = transform_dict_keys(data, str.upper)  # {'A': 1, 'B': 2, 'C': 3, 'D': 4}

Invert dictionary #

def invert_dict(d):
    """Invert dictionary (swap keys and values)."""
    return {v: k for k, v in d.items()}

def invert_dict_with_lists(d):
    """Invert dictionary where values might not be unique."""
    inverted = {}
    for k, v in d.items():
        if v not in inverted:
            inverted[v] = []
        inverted[v].append(k)
    return inverted

# Examples
original = {'a': 1, 'b': 2, 'c': 3}
inverted = invert_dict(original)  # {1: 'a', 2: 'b', 3: 'c'}

with_duplicates = {'a': 1, 'b': 1, 'c': 2}
inverted_with_lists = invert_dict_with_lists(with_duplicates)  # {1: ['a', 'b'], 2: ['c']}

Set Operations #

Set utilities #

def powerset(s):
    """Generate all subsets of a set."""
    from itertools import combinations
    result = []
    for r in range(len(s) + 1):
        result.extend(combinations(s, r))
    return [set(subset) for subset in result]

def jaccard_similarity(set1, set2):
    """Calculate Jaccard similarity between two sets."""
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union > 0 else 0

# Examples
s = {1, 2, 3}
subsets = powerset(s)  # [set(), {1}, {2}, {3}, {1, 2}, {1, 3}, {2, 3}, {1, 2, 3}]

set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}
similarity = jaccard_similarity(set1, set2)  # 0.33 (2 common out of 6 total)

Advanced Data Structure Operations #

Group items #

def group_by(items, key_func):
    """Group items by a key function."""
    from collections import defaultdict
    groups = defaultdict(list)
    for item in items:
        key = key_func(item)
        groups[key].append(item)
    return dict(groups)

def group_by_attribute(objects, attribute):
    """Group objects by an attribute."""
    return group_by(objects, lambda obj: getattr(obj, attribute))

# Examples
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9]
grouped = group_by(numbers, lambda x: x % 3)  # {1: [1, 4, 7], 2: [2, 5, 8], 0: [3, 6, 9]}

words = ['apple', 'banana', 'apricot', 'cherry']
by_first_letter = group_by(words, lambda word: word[0])  # {'a': ['apple', 'apricot'], 'b': ['banana'], 'c': ['cherry']}

Counter utilities #

from collections import Counter

def most_common_items(items, n=5):
    """Get n most common items."""
    return Counter(items).most_common(n)

def least_common_items(items, n=5):
    """Get n least common items."""
    counter = Counter(items)
    return counter.most_common()[:-n-1:-1]

def item_frequencies(items):
    """Get frequency distribution as percentages."""
    counter = Counter(items)
    total = sum(counter.values())
    return {item: (count / total) * 100 for item, count in counter.items()}

# Examples
data = ['a', 'b', 'a', 'c', 'b', 'a', 'd']
most_common = most_common_items(data, 2)  # [('a', 3), ('b', 2)]
frequencies = item_frequencies(data)  # {'a': 42.86, 'b': 28.57, 'c': 14.29, 'd': 14.29}

Nested data access #

def get_nested_value(data, path, default=None):
    """Get value from nested dictionary using dot notation path."""
    keys = path.split('.')
    current = data
    try:
        for key in keys:
            if isinstance(current, dict):
                current = current[key]
            elif isinstance(current, list):
                current = current[int(key)]
            else:
                return default
        return current
    except (KeyError, IndexError, ValueError, TypeError):
        return default

def set_nested_value(data, path, value):
    """Set value in nested dictionary using dot notation path."""
    keys = path.split('.')
    current = data
    for key in keys[:-1]:
        if key not in current:
            current[key] = {}
        current = current[key]
    current[keys[-1]] = value

# Examples
nested_data = {
    'user': {
        'profile': {
            'name': 'Alice',
            'settings': {
                'theme': 'dark'
            }
        },
        'posts': [
            {'title': 'First Post'},
            {'title': 'Second Post'}
        ]
    }
}

name = get_nested_value(nested_data, 'user.profile.name')  # 'Alice'
theme = get_nested_value(nested_data, 'user.profile.settings.theme')  # 'dark'
first_post = get_nested_value(nested_data, 'user.posts.0.title')  # 'First Post'

set_nested_value(nested_data, 'user.profile.age', 30)

Data validation #

def validate_schema(data, schema):
    """Simple schema validation for dictionaries."""
    def validate_value(value, expected_type):
        if expected_type == 'any':
            return True
        if isinstance(expected_type, type):
            return isinstance(value, expected_type)
        if isinstance(expected_type, list):
            return any(validate_value(value, t) for t in expected_type)
        return False
    
    if not isinstance(data, dict):
        return False, "Data must be a dictionary"
    
    for key, expected_type in schema.items():
        if key not in data:
            return False, f"Missing required key: {key}"
        
        if not validate_value(data[key], expected_type):
            return False, f"Invalid type for {key}: expected {expected_type}, got {type(data[key])}"
    
    return True, "Valid"

# Example
user_schema = {
    'name': str,
    'age': int,
    'email': str,
    'is_active': bool,
    'tags': list
}

user_data = {
    'name': 'Alice',
    'age': 30,
    'email': '[email protected]',
    'is_active': True,
    'tags': ['python', 'developer']
}

is_valid, message = validate_schema(user_data, user_schema)  # (True, "Valid")

Performance utilities #

import time
from functools import wraps

def memoize(func):
    """Simple memoization decorator."""
    cache = {}
    
    @wraps(func)
    def wrapper(*args, **kwargs):
        key = str(args) + str(sorted(kwargs.items()))
        if key not in cache:
            cache[key] = func(*args, **kwargs)
        return cache[key]
    
    wrapper.cache = cache
    return wrapper

def time_function(func):
    """Decorator to time function execution."""
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"{func.__name__} took {end_time - start_time:.4f} seconds")
        return result
    return wrapper

# Examples
@memoize
def fibonacci(n):
    if n <= 1:
        return n
    return fibonacci(n-1) + fibonacci(n-2)

@time_function
def slow_operation():
    time.sleep(1)
    return "Done"

PyGuide

PyGuide

Data Structures Helper Functions

Data Structures Helper Functions

List Utilities #

Remove duplicates while preserving order #

Flatten nested lists #

Chunk list into smaller lists #

Find common and different elements #

Dictionary Utilities #

Merge dictionaries #

Filter and transform dictionaries #

Invert dictionary #

Set Operations #

Set utilities #

Advanced Data Structure Operations #

Group items #

Counter utilities #

Nested data access #

Data validation #

Performance utilities #

Related Snippets

Python Dictionary Iteration Examples: Ready-to-Use Code

Python Compilation Code Examples and Scripts

Python Enumerate Zip: Ready-to-Use Code Examples

Python Error Frame Inspection Code Examples