Code Snippet Beginner
python• Updated Jan 14, 2024
String Manipulation Utilities
Useful string manipulation functions for common text processing tasks
Table of Contents24 sections
Reading Progress
0%
String Manipulation Utilities
Basic String Operations #
Case conversions #
text = "Hello World"
# Convert to different cases
upper = text.upper() # "HELLO WORLD"
lower = text.lower() # "hello world"
title = text.title() # "Hello World"
capitalize = text.capitalize() # "Hello world"
# Check case
is_upper = text.isupper() # False
is_lower = text.islower() # False
is_title = text.istitle() # True
String formatting #
name = "Alice"
age = 30
# f-strings (Python 3.6+, recommended)
message = f"Hello, {name}! You are {age} years old."
# .format() method
message = "Hello, {}! You are {} years old.".format(name, age)
# Named placeholders
message = "Hello, {name}! You are {age} years old.".format(name=name, age=age)
# % formatting (old style)
message = "Hello, %s! You are %d years old." % (name, age)
String Cleaning and Validation #
Remove whitespace #
text = " Hello World \n"
stripped = text.strip() # "Hello World"
left_strip = text.lstrip() # "Hello World \n"
right_strip = text.rstrip() # " Hello World"
# Remove specific characters
cleaned = text.strip(' \n') # "Hello World"
Validate string content #
def is_email(email):
"""Simple email validation."""
return '@' in email and '.' in email.split('@')[1]
def is_phone_number(phone):
"""Check if string contains only digits and common phone characters."""
allowed = set('0123456789+-() ')
return all(char in allowed for char in phone)
def is_alphanumeric(text):
"""Check if string contains only letters and numbers."""
return text.isalnum()
# Examples
print(is_email("[email protected]")) # True
print(is_phone_number("+1 (555) 123-4567")) # True
print(is_alphanumeric("Hello123")) # True
Text Search and Replace #
Find and replace #
text = "The quick brown fox jumps over the lazy dog"
# Simple replace
new_text = text.replace("fox", "cat")
# Replace with limit
new_text = text.replace("the", "a", 1) # Replace only first occurrence
# Case-insensitive replace
import re
new_text = re.sub(r'the', 'a', text, flags=re.IGNORECASE)
Extract information #
import re
def extract_emails(text):
"""Extract email addresses from text."""
pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
return re.findall(pattern, text)
def extract_phone_numbers(text):
"""Extract phone numbers from text."""
pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'
return re.findall(pattern, text)
def extract_urls(text):
"""Extract URLs from text."""
pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
return re.findall(pattern, text)
# Example usage
text = "Contact us at [email protected] or call 555-123-4567. Visit https://example.com"
emails = extract_emails(text)
phones = extract_phone_numbers(text)
urls = extract_urls(text)
String Splitting and Joining #
Split strings #
text = "apple,banana,cherry,date"
# Split by delimiter
fruits = text.split(',') # ['apple', 'banana', 'cherry', 'date']
# Split with limit
parts = text.split(',', 2) # ['apple', 'banana', 'cherry,date']
# Split by whitespace
sentence = "The quick brown fox"
words = sentence.split() # ['The', 'quick', 'brown', 'fox']
# Split lines
multiline = "Line 1\nLine 2\nLine 3"
lines = multiline.splitlines() # ['Line 1', 'Line 2', 'Line 3']
Join strings #
words = ['apple', 'banana', 'cherry']
# Join with delimiter
result = ','.join(words) # "apple,banana,cherry"
result = ' and '.join(words) # "apple and banana and cherry"
# Join with newlines
result = '\n'.join(words)
Advanced String Manipulation #
Pad strings #
text = "Hello"
# Pad with spaces
left_padded = text.ljust(10) # "Hello "
right_padded = text.rjust(10) # " Hello"
center_padded = text.center(10) # " Hello "
# Pad with specific character
zero_padded = text.zfill(10) # "00000Hello"
dash_padded = text.center(10, '-') # "--Hello---"
Reverse strings #
text = "Hello World"
# Reverse entire string
reversed_text = text[::-1] # "dlroW olleH"
# Reverse words
words = text.split()
reversed_words = ' '.join(reversed(words)) # "World Hello"
# Reverse each word individually
reversed_each = ' '.join(word[::-1] for word in words) # "olleH dlroW"
Generate strings #
import string
import random
def generate_password(length=12):
"""Generate a random password."""
characters = string.ascii_letters + string.digits + "!@#$%^&*"
return ''.join(random.choice(characters) for _ in range(length))
def generate_slug(text):
"""Convert text to URL-friendly slug."""
import re
# Convert to lowercase and replace spaces with hyphens
slug = re.sub(r'[^\w\s-]', '', text.lower())
slug = re.sub(r'[\s_-]+', '-', slug)
return slug.strip('-')
# Examples
password = generate_password(16)
slug = generate_slug("My Blog Post Title!") # "my-blog-post-title"
Text Processing Utilities #
Count occurrences #
text = "The quick brown fox jumps over the lazy dog"
# Count specific substring
count = text.count("the") # Case-sensitive
# Count words
words = text.lower().split()
word_count = {}
for word in words:
word_count[word] = word_count.get(word, 0) + 1
# Using Counter
from collections import Counter
word_count = Counter(words)
Text statistics #
def text_stats(text):
"""Get basic statistics about text."""
return {
'characters': len(text),
'characters_no_spaces': len(text.replace(' ', '')),
'words': len(text.split()),
'sentences': len(text.split('.')),
'paragraphs': len(text.split('\n\n')),
'lines': len(text.splitlines())
}
# Example
text = "Hello world.\nThis is a test.\n\nNew paragraph."
stats = text_stats(text)
Clean and normalize text #
import re
def clean_text(text):
"""Clean and normalize text."""
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text)
# Remove special characters (keep letters, numbers, spaces)
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
# Convert to lowercase
text = text.lower()
# Strip leading/trailing whitespace
text = text.strip()
return text
def remove_html_tags(text):
"""Remove HTML tags from text."""
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
# Examples
dirty_text = " Hello, World!!! "
clean = clean_text(dirty_text) # "hello world"
html_text = "<p>Hello <b>World</b></p>"
no_html = remove_html_tags(html_text) # "Hello World"
String Templates #
Template strings #
from string import Template
# Simple template
template = Template("Hello, $name! Welcome to $place.")
result = template.substitute(name="Alice", place="Python")
# Safe substitution (won't raise error for missing keys)
result = template.safe_substitute(name="Alice") # Missing 'place' becomes $place
# Custom delimiter
class MyTemplate(Template):
delimiter = '%'
template = MyTemplate("Hello, %name!")
result = template.substitute(name="Alice")
Format specifications #
# Number formatting
number = 1234.5678
formatted = f"{number:,.2f}" # "1,234.57"
percentage = f"{0.1234:.2%}" # "12.34%"
# Date formatting
from datetime import datetime
now = datetime.now()
formatted_date = f"{now:%Y-%m-%d %H:%M:%S}"
# String alignment and padding
name = "Alice"
formatted = f"{name:>10}" # Right align in 10 chars
formatted = f"{name:<10}" # Left align in 10 chars
formatted = f"{name:^10}" # Center in 10 chars
formatted = f"{name:*^10}" # Center with * padding