File Handling in Python: A Complete Guide
File handling is a crucial skill in Python programming. Whether you're processing data, creating logs, or building applications that need to persist information, understanding how to work with files is essential. This tutorial will guide you through everything you need to know about file operations in Python.
What is File Handling? #
File handling refers to the process of working with files on your computer's storage system. This includes:
- Reading data from files
- Writing data to files
- Modifying existing files
- Managing file operations safely
Opening and Closing Files #
The Basic Syntax #
# Opening a file
file = open("filename.txt", "mode")
# Always close the file when done
file.close()
File Modes #
Mode | Description | Can Read | Can Write | Creates New File |
---|---|---|---|---|
'r' | Read only (default) | ✓ | ✗ | ✗ |
'w' | Write only | ✗ | ✓ | ✓ |
'a' | Append only | ✗ | ✓ | ✓ |
'r+' | Read and write | ✓ | ✓ | ✗ |
'w+' | Write and read | ✓ | ✓ | ✓ |
'a+' | Append and read | ✓ | ✓ | ✓ |
The with
Statement (Recommended) #
The with
statement automatically handles file closing, even if an error occurs:
# Recommended approach
with open("filename.txt", "r") as file:
content = file.read()
# File is automatically closed when exiting the block
Reading Files #
Reading the Entire File #
# Method 1: read() - entire file as string
with open("sample.txt", "r") as file:
content = file.read()
print(content)
# Method 2: readlines() - list of lines
with open("sample.txt", "r") as file:
lines = file.readlines()
for line in lines:
print(line.strip()) # strip() removes newline characters
Reading Line by Line #
# Method 1: readline() - one line at a time
with open("sample.txt", "r") as file:
first_line = file.readline()
second_line = file.readline()
print(f"First: {first_line.strip()}")
print(f"Second: {second_line.strip()}")
# Method 2: Iterate directly over file object
with open("sample.txt", "r") as file:
for line in file:
print(line.strip())
Reading with Error Handling #
try:
with open("data.txt", "r") as file:
content = file.read()
print(content)
except FileNotFoundError:
print("File not found. Please check the filename.")
except IOError:
print("Error reading file.")
Writing Files #
Writing Text to Files #
# Writing (overwrites existing content)
with open("output.txt", "w") as file:
file.write("Hello, World!\n")
file.write("This is a new line.\n")
# Writing multiple lines
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open("output.txt", "w") as file:
file.writelines(lines)
Appending to Files #
# Appending to existing file
with open("log.txt", "a") as file:
file.write("New log entry\n")
# Appending with timestamp
import datetime
with open("log.txt", "a") as file:
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
file.write(f"[{timestamp}] Application started\n")
Working with Different File Types #
Text Files #
# Reading a text file
with open("story.txt", "r") as file:
story = file.read()
word_count = len(story.split())
print(f"Word count: {word_count}")
CSV Files (without pandas) #
# Reading CSV manually
with open("data.csv", "r") as file:
for line in file:
columns = line.strip().split(",")
print(columns)
# Writing CSV
data = [
["Name", "Age", "City"],
["Alice", "25", "New York"],
["Bob", "30", "San Francisco"]
]
with open("people.csv", "w") as file:
for row in data:
file.write(",".join(row) + "\n")
CSV Files (with csv module) #
import csv
# Reading CSV with csv module
with open("data.csv", "r") as file:
csv_reader = csv.reader(file)
for row in csv_reader:
print(row)
# Writing CSV with csv module
data = [
["Name", "Age", "City"],
["Alice", "25", "New York"],
["Bob", "30", "San Francisco"]
]
with open("people.csv", "w", newline="") as file:
csv_writer = csv.writer(file)
csv_writer.writerows(data)
JSON Files #
import json
# Reading JSON
with open("data.json", "r") as file:
data = json.load(file)
print(data)
# Writing JSON
data = {
"name": "Alice",
"age": 25,
"hobbies": ["reading", "swimming", "coding"]
}
with open("person.json", "w") as file:
json.dump(data, file, indent=4)
File Operations #
Checking File Existence #
import os
# Check if file exists
if os.path.exists("data.txt"):
print("File exists")
else:
print("File not found")
# Check if it's a file or directory
if os.path.isfile("data.txt"):
print("It's a file")
elif os.path.isdir("data.txt"):
print("It's a directory")
Getting File Information #
import os
filename = "data.txt"
if os.path.exists(filename):
# File size in bytes
size = os.path.getsize(filename)
print(f"File size: {size} bytes")
# Last modification time
import datetime
mod_time = os.path.getmtime(filename)
readable_time = datetime.datetime.fromtimestamp(mod_time)
print(f"Last modified: {readable_time}")
Working with Directories #
import os
# Get current directory
current_dir = os.getcwd()
print(f"Current directory: {current_dir}")
# List files in directory
files = os.listdir(".")
print("Files in current directory:")
for file in files:
print(f" {file}")
# Create directory
if not os.path.exists("new_folder"):
os.makedirs("new_folder")
Advanced File Operations #
File Copying and Moving #
import shutil
# Copy file
shutil.copy("source.txt", "destination.txt")
# Move file
shutil.move("old_location.txt", "new_location.txt")
# Copy entire directory
shutil.copytree("source_folder", "destination_folder")
Working with File Paths #
import os
# Join paths properly
file_path = os.path.join("folder", "subfolder", "file.txt")
print(file_path)
# Get directory and filename
directory = os.path.dirname(file_path)
filename = os.path.basename(file_path)
print(f"Directory: {directory}")
print(f"Filename: {filename}")
# Split filename and extension
name, extension = os.path.splitext(filename)
print(f"Name: {name}, Extension: {extension}")
Temporary Files #
import tempfile
# Create temporary file
with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp_file:
temp_file.write("Temporary data")
temp_filename = temp_file.name
print(f"Temporary file: {temp_filename}")
# Clean up
os.unlink(temp_filename)
Error Handling and Best Practices #
Comprehensive Error Handling #
def read_file_safely(filename):
try:
with open(filename, 'r') as file:
return file.read()
except FileNotFoundError:
print(f"Error: File '{filename}' not found.")
return None
except PermissionError:
print(f"Error: Permission denied to read '{filename}'.")
return None
except IOError as e:
print(f"Error reading file: {e}")
return None
# Usage
content = read_file_safely("data.txt")
if content:
print(content)
Best Practices #
- Always use
with
statements for automatic file closing - Handle exceptions appropriately
- Use appropriate file modes for your needs
- Validate file paths before operations
- Consider encoding for text files
# Good practice example
def process_file(filename, encoding='utf-8'):
"""Process a text file with proper error handling."""
try:
with open(filename, 'r', encoding=encoding) as file:
lines = file.readlines()
# Process lines
processed_lines = []
for line in lines:
processed_lines.append(line.strip().upper())
# Write processed data
output_filename = f"processed_{filename}"
with open(output_filename, 'w', encoding=encoding) as file:
file.writelines(line + '\n' for line in processed_lines)
print(f"File processed successfully. Output: {output_filename}")
return True
except Exception as e:
print(f"Error processing file: {e}")
return False
Practical Examples #
Example 1: Log File Analyzer #
import datetime
import re
def analyze_log_file(filename):
"""Analyze log file and extract statistics."""
try:
with open(filename, 'r') as file:
lines = file.readlines()
stats = {
'total_lines': len(lines),
'error_count': 0,
'warning_count': 0,
'info_count': 0
}
for line in lines:
line = line.lower()
if 'error' in line:
stats['error_count'] += 1
elif 'warning' in line:
stats['warning_count'] += 1
elif 'info' in line:
stats['info_count'] += 1
return stats
except FileNotFoundError:
print(f"Log file '{filename}' not found.")
return None
# Usage
stats = analyze_log_file("application.log")
if stats:
print(f"Log Analysis Results:")
print(f"Total lines: {stats['total_lines']}")
print(f"Errors: {stats['error_count']}")
print(f"Warnings: {stats['warning_count']}")
print(f"Info: {stats['info_count']}")
Example 2: Configuration File Manager #
import json
class ConfigManager:
def __init__(self, config_file):
self.config_file = config_file
self.config = self.load_config()
def load_config(self):
"""Load configuration from file."""
try:
with open(self.config_file, 'r') as file:
return json.load(file)
except FileNotFoundError:
print(f"Config file not found. Creating default config.")
return self.create_default_config()
except json.JSONDecodeError:
print("Invalid JSON in config file. Using default config.")
return self.create_default_config()
def create_default_config(self):
"""Create default configuration."""
default_config = {
"database": {
"host": "localhost",
"port": 5432
},
"logging": {
"level": "INFO",
"file": "app.log"
}
}
self.save_config(default_config)
return default_config
def save_config(self, config=None):
"""Save configuration to file."""
if config is None:
config = self.config
try:
with open(self.config_file, 'w') as file:
json.dump(config, file, indent=4)
print("Configuration saved successfully.")
except IOError as e:
print(f"Error saving config: {e}")
def get(self, key, default=None):
"""Get configuration value."""
keys = key.split('.')
value = self.config
for k in keys:
value = value.get(k, default)
if value is None:
break
return value
def set(self, key, value):
"""Set configuration value."""
keys = key.split('.')
config = self.config
for k in keys[:-1]:
config = config.setdefault(k, {})
config[keys[-1]] = value
self.save_config()
# Usage
config = ConfigManager("app_config.json")
print(f"Database host: {config.get('database.host')}")
config.set('database.host', '192.168.1.100')
Example 3: File Backup System #
import shutil
import datetime
import os
def backup_files(source_dir, backup_dir):
"""Create timestamped backup of files."""
# Create backup directory if it doesn't exist
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
# Create timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
backup_folder = os.path.join(backup_dir, f"backup_{timestamp}")
try:
# Copy files
shutil.copytree(source_dir, backup_folder)
print(f"Backup created: {backup_folder}")
# Create backup log
log_file = os.path.join(backup_folder, "backup_log.txt")
with open(log_file, 'w') as file:
file.write(f"Backup created: {datetime.datetime.now()}\n")
file.write(f"Source: {source_dir}\n")
file.write(f"Destination: {backup_folder}\n")
return True
except Exception as e:
print(f"Backup failed: {e}")
return False
# Usage
success = backup_files("important_data", "backups")
Common File Operations Patterns #
Reading Large Files Efficiently #
def process_large_file(filename, chunk_size=1024):
"""Process large file in chunks to save memory."""
try:
with open(filename, 'r') as file:
while True:
chunk = file.read(chunk_size)
if not chunk:
break
# Process chunk
yield chunk
except IOError as e:
print(f"Error reading file: {e}")
# Usage
for chunk in process_large_file("large_file.txt"):
# Process each chunk
pass
File Filtering and Processing #
def filter_file_content(input_file, output_file, filter_func):
"""Filter file content based on a function."""
try:
with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
for line in infile:
if filter_func(line):
outfile.write(line)
print(f"Filtered content saved to {output_file}")
except IOError as e:
print(f"Error processing files: {e}")
# Usage: Filter lines containing "error"
filter_file_content("log.txt", "errors.txt", lambda line: "error" in line.lower())
Summary #
In this comprehensive tutorial, we covered:
- Basic file operations: Opening, reading, writing, and closing files
- File modes: Understanding different ways to open files
- The
with
statement: Best practice for file handling - Different file types: Text, CSV, JSON files
- File system operations: Checking existence, getting info, working with directories
- Error handling: Proper exception handling for file operations
- Advanced operations: Copying, moving, temporary files
- Practical examples: Real-world applications and patterns
Best Practices Summary #
- Always use
with
statements for automatic file closing - Handle exceptions appropriately with try-except blocks
- Use appropriate file modes for your specific needs
- Validate file paths before performing operations
- Consider file encoding when working with text files
- Process large files in chunks to conserve memory
- Create backups before modifying important files
- Use proper path handling with
os.path
functions
Next Steps #
Now that you understand file handling, explore:
- Database operations: Working with SQLite, PostgreSQL, etc.
- Web scraping: Downloading and processing files from the internet
- Data processing: Using pandas for advanced file operations
- Automation: Creating scripts for file management tasks
- Security: Handling file permissions and secure file operations
File handling is a fundamental skill that you'll use in many Python applications. Practice with different file types and scenarios to become proficient in managing data persistence in your programs!