文件操作
2026/2/1大约 5 分钟
文件操作
文件操作是处理持久化数据的基础,Python 提供了丰富的文件 I/O 功能。
基本文件操作
打开和读取文件
# 打开文件
file = open("example.txt", "r") # "r" = 读取模式
content = file.read()
print(content)
file.close()
# 使用 with 语句(推荐)
with open("example.txt", "r") as file:
content = file.read()
# 文件自动关闭
# 读取模式
mode = {
"r": "读取(默认)",
"w": "写入(覆盖)",
"a": "追加",
"r+": "读写(文件必须存在)",
"w+": "读写(创建或覆盖)",
"a+": "读写追加",
"x": "独占创建(文件存在则失败)"
}
# 二进制模式
"rb", "wb", "ab", "rb+", "wb+", "ab+"读取方法
# read():读取全部
with open("example.txt", "r") as file:
content = file.read() # 整个文件作为字符串
# read(size):读取指定字节数
with open("example.txt", "r") as file:
chunk = file.read(1024) # 读取 1024 字节
# readline():读取一行
with open("example.txt", "r") as file:
line = file.readline()
while line:
print(line.strip())
line = file.readline()
# readlines():读取所有行
with open("example.txt", "r") as file:
lines = file.readlines() # 返回列表
for line in lines:
print(line.strip())
# 遍历文件(推荐)
with open("example.txt", "r") as file:
for line in file:
print(line.strip())写入文件
# write():写入字符串
with open("output.txt", "w") as file:
file.write("Hello, World!\n")
file.write("Second line\n")
# writelines():写入字符串列表
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open("output.txt", "w") as file:
file.writelines(lines)
# 追加模式
with open("output.txt", "a") as file:
file.write("This will be appended\n")文件路径
pathlib(推荐)
from pathlib import Path
# 路径创建
path = Path("documents/report.txt")
absolute = Path("/home/user/docs")
home = Path.home() # 用户主目录
current = Path.cwd() # 当前工作目录
# 路径操作
path = Path("docs") / "report.txt" # 拼接路径
path = path.parent # 父目录
path = path.name # 文件名
path = path.stem # 文件名(无扩展名)
path = path.suffix # 扩展名
# 路径检查
path.exists() # 是否存在
path.is_file() # 是否是文件
path.is_dir() # 是否是目录
# 创建目录
Path("new_dir").mkdir(exist_ok=True)
Path("a/b/c").mkdir(parents=True, exist_ok=True)
# 遍历目录
for item in Path(".").iterdir():
print(f"{'DIR ' if item.is_dir() else 'FILE'} {item.name}")
# glob 模式匹配
for py_file in Path(".").glob("*.py"):
print(py_file)
for file in Path(".").rglob("*.md"): # 递归
print(file)os.path(传统方式)
import os
# 路径拼接
path = os.path.join("docs", "report.txt")
# 路径信息
os.path.exists(path)
os.path.isfile(path)
os.path.isdir(path)
os.path.basename(path) # 文件名
os.path.dirname(path) # 目录名
os.path.splitext(path) # (root, ext)
# 获取绝对路径
os.path.abspath("file.txt")
# 当前工作目录
os.getcwd()
os.chdir("/path/to/dir")
# 目录操作
os.mkdir("new_dir")
os.makedirs("a/b/c", exist_ok=True)
# 列出目录
os.listdir(".")文件属性
import os
from pathlib import Path
from datetime import datetime
path = Path("example.txt")
# 文件大小
size = path.stat().st_size
# 时间戳
created = datetime.fromtimestamp(path.stat().st_ctime)
modified = datetime.fromtimestamp(path.stat().st_mtime)
accessed = datetime.fromtimestamp(path.stat().st_atime)
# 权限
mode = path.stat().st_mode
# 使用 os 模块
size = os.path.getsize("example.txt")
mtime = os.path.getmtime("example.txt")
# 文件详细信息
import os
def get_file_info(filepath):
stat = os.stat(filepath)
return {
"size": stat.st_size,
"created": datetime.fromtimestamp(stat.st_ctime),
"modified": datetime.fromtimestamp(stat.st_mtime),
"is_file": os.path.isfile(filepath),
"is_dir": os.path.isdir(filepath),
}文件搜索
from pathlib import Path
# 查找文件
def find_file(name, start_path="."):
"""递归查找文件"""
for path in Path(start_path).rglob(name):
if path.is_file():
return path
return None
# 查找匹配模式的文件
def find_files(pattern, start_path="."):
"""查找匹配模式的文件"""
return list(Path(start_path).rglob(pattern))
# 使用
result = find_file("config.json")
py_files = find_files("*.py")JSON 文件
import json
# 读取 JSON
with open("data.json", "r") as file:
data = json.load(file)
# 写入 JSON
data = {"name": "Alice", "age": 25}
with open("output.json", "w") as file:
json.dump(data, file, indent=2)
# 美化输出
with open("output.json", "w") as file:
json.dump(data, file, indent=4, sort_keys=True, ensure_ascii=False)
# 从字符串
json_string = '{"name": "Bob"}'
data = json.loads(json_string)
# 转为字符串
data = {"name": "Charlie"}
json_string = json.dumps(data)CSV 文件
import csv
# 读取 CSV
with open("data.csv", "r") as file:
reader = csv.reader(file)
for row in reader:
print(row)
# 读取为字典
with open("data.csv", "r") as file:
reader = csv.DictReader(file)
for row in reader:
print(row["column_name"])
# 写入 CSV
data = [
["Name", "Age", "City"],
["Alice", 25, "NYC"],
["Bob", 30, "LA"]
]
with open("output.csv", "w", newline="") as file:
writer = csv.writer(file)
writer.writerows(data)
# 写入字典
with open("output.csv", "w", newline="") as file:
fieldnames = ["name", "age", "city"]
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({"name": "Alice", "age": 25, "city": "NYC"})文件编码
# 检测文件编码
import chardet
with open("unknown.txt", "rb") as file:
raw_data = file.read()
result = chardet.detect(raw_data)
encoding = result["encoding"]
confidence = result["confidence"]
# 指定编码读取
with open("file.txt", "r", encoding="utf-8") as file:
content = file.read()
# 处理编码错误
with open("file.txt", "r", encoding="utf-8", errors="ignore") as file:
content = file.read() # 忽略错误
with open("file.txt", "r", encoding="utf-8", errors="replace") as file:
content = file.read() # 替换错误字符临时文件
import tempfile
import os
# 临时文件
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp:
temp_name = temp.name
temp.write("Temporary data")
print(f"Temp file: {temp_name}")
# 手动删除
os.unlink(temp_name)
# 临时目录
with tempfile.TemporaryDirectory() as temp_dir:
print(f"Temp dir: {temp_dir}")
# 在此使用临时目录
# 自动清理
# 获取系统临时目录
tempfile.gettempdir() # 如 /tmp文件锁定
import fcntl # Unix/Linux
# Windows 使用 msvcrt 或 pywin32
def file_lock(filepath):
"""文件锁(Unix)"""
with open(filepath, "w") as f:
fcntl.flock(f, fcntl.LOCK_EX) # 排他锁
try:
# 临界区代码
f.write("Exclusive access")
finally:
fcntl.flock(f, fcntl.LOCK_UN) # 释放锁
# 跨平台方案:使用 portalocker
import portalocker
with open("file.txt", "w") as f:
portalocker.lock(f, portalocker.LOCK_EX)
# 写入操作
portalocker.unlock(f)高级操作
内存文件
from io import StringIO, BytesIO
# StringIO:文本内存文件
output = StringIO()
output.write("Hello")
output.write(" World")
content = output.getvalue()
output.close()
# BytesIO:二进制内存文件
output = BytesIO()
output.write(b"Binary data")
content = output.getvalue()
# 读取
input_stream = StringIO("Line 1\nLine 2\n")
for line in input_stream:
print(line.strip())文件压缩
import gzip
import bz2
import zipfile
# Gzip 压缩
with gzip.open("file.txt.gz", "wt") as f:
f.write("Compressed content")
# Gzip 解压
with gzip.open("file.txt.gz", "rt") as f:
content = f.read()
# Zip 文件
with zipfile.ZipFile("archive.zip", "w") as zipf:
zipf.write("file1.txt")
zipf.write("file2.txt")
# 读取 Zip
with zipfile.ZipFile("archive.zip", "r") as zipf:
# 列出文件
print(zipf.namelist())
# 提取文件
zipf.extractall("extracted/")
# 读取特定文件
with zipf.open("file1.txt") as f:
content = f.read()文件比较
import filecmp
# 比较文件
same = filecmp.cmp("file1.txt", "file2.txt")
# 比较目录
comparison = filecmp.dircmp("dir1", "dir2")
comparison.same_files # 相同文件
comparison.diff_files # 不同文件
comparison.left_only # 只在左边
comparison.right_only # 只在右边最佳实践
文件操作建议
- 使用 with 语句:自动关闭文件
- 指定编码:避免平台差异
- 使用 pathlib:更现代的路径处理
- 处理异常:文件操作可能失败
- 检查路径:操作前验证路径存在
性能优化
# 大文件分块读取
def read_in_chunks(file_path, chunk_size=4096):
with open(file_path, "rb") as f:
while chunk := f.read(chunk_size):
yield chunk
# 使用生成器处理大文件
def process_large_file(file_path):
with open(file_path, "r") as f:
for line in f:
yield process_line(line)常见陷阱
# ❌ 忘记关闭文件
file = open("data.txt", "r")
content = file.read()
# 如果这里出错,文件不会关闭
# ✅ 使用 with
with open("data.txt", "r") as file:
content = file.read()
# ❌ Windows 路径问题
path = "C:\new\test.txt" # \n 是换行符
# ✅ 使用原始字符串或 pathlib
path = r"C:\new\test.txt"
path = Path("C:/new/test.txt")