本文目录导读:

可以的!实用脚本批量解码是一个非常实用的需求,根据你的具体场景,我提供几个常见的批量解码脚本示例:
批量URL解码(Python)
import urllib.parse
import os
def batch_url_decode(input_dir, output_dir=None):
"""批量解码目录下的所有URL编码文件"""
if not output_dir:
output_dir = input_dir + "_decoded"
os.makedirs(output_dir, exist_ok=True)
for filename in os.listdir(input_dir):
if filename.endswith('.txt'):
input_path = os.path.join(input_dir, filename)
output_path = os.path.join(output_dir, filename)
with open(input_path, 'r', encoding='utf-8') as f:
content = f.read()
decoded_content = urllib.parse.unquote(content)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(decoded_content)
print(f"已解码: {filename}")
# 使用
# batch_url_decode("./url_files/")
批量Base64解码(Bash脚本)
#!/bin/bash
# batch_decode_base64.sh
input_dir="$1"
output_dir="${input_dir}/decoded"
mkdir -p "$output_dir"
for file in "$input_dir"/*.txt; do
if [ -f "$file" ]; then
filename=$(basename "$file")
while IFS= read -r line; do
echo "$line" | base64 -d >> "${output_dir}/${filename}"
echo "" >> "${output_dir}/${filename}"
done < "$file"
echo "已解码: $filename"
fi
done
批量多种编码格式解码(Python通用版)
import base64
import urllib.parse
import json
import os
def auto_decode(text):
"""自动尝试多种解码方式"""
decoders = [
('URL', lambda x: urllib.parse.unquote(x)),
('Base64', lambda x: base64.b64decode(x).decode('utf-8')),
('Unicode转义', lambda x: x.encode().decode('unicode_escape')),
]
results = []
for name, decoder in decoders:
try:
result = decoder(text)
results.append((name, result))
except:
continue
return results
def batch_decode_files(input_dir, output_dir="decoded"):
"""批量解码目录下所有文件"""
os.makedirs(output_dir, exist_ok=True)
for filename in os.listdir(input_dir):
input_path = os.path.join(input_dir, filename)
if not os.path.isfile(input_path):
continue
with open(input_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
output_data = []
for line in lines:
line = line.strip()
if not line:
continue
results = auto_decode(line)
if results:
# 选择第一个成功的解码结果
output_data.append(f"原文: {line}")
output_data.append(f"解码: {results[0][1]}")
output_data.append("---")
# 保存解码结果
output_path = os.path.join(output_dir, f"decoded_{filename}")
with open(output_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(output_data))
print(f"已处理: {filename} -> {output_path}")
# 使用示例
# batch_decode_files("./encoded_files/")
批量文件名解码(修复乱码文件名)
import os
import urllib.parse
def fix_mojibake_filenames(directory):
"""修复目录中的乱码文件名"""
for filename in os.listdir(directory):
old_path = os.path.join(directory, filename)
if os.path.isfile(old_path):
# 尝试多种解码方式
new_name = None
# 方法1: 尝试URL解码
try:
new_name = urllib.parse.unquote(filename)
except:
pass
# 方法2: 尝试从GBK解码到UTF-8
if new_name is None or new_name == filename:
try:
new_name = filename.encode('latin1').decode('gbk')
except:
pass
# 方法3: 尝试从ISO-8859-1解码到UTF-8
if new_name is None or new_name == filename:
try:
new_name = filename.encode('latin1').decode('utf-8')
except:
pass
if new_name and new_name != filename:
new_path = os.path.join(directory, new_name)
os.rename(old_path, new_path)
print(f"重命名: {filename} -> {new_name}")
# 使用
# fix_mojibake_filenames("./乱码文件目录/")
批量JSON/XML转义字符解码
import html
import json
import re
def decode_escaped_text(text):
"""解码转义字符"""
# HTML实体解码
text = html.unescape(text)
# Unicode转义序列解码
text = re.sub(r'\\u([0-9a-fA-F]{4})',
lambda x: chr(int(x.group(1), 16)), text)
# JSON字符串解码
try:
text = json.loads(f'"{text}"')
except:
pass
return text
def batch_decode_escaped(input_file, output_file):
"""批量解码转义文本文件"""
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
decoded = decode_escaped_text(content)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(decoded)
print(f"已解码: {input_file} -> {output_file}")
# 使用
# batch_decode_escaped("input.txt", "output.txt")
使用建议
- 选择合适的脚本:根据你的具体编码类型选择相应的脚本
- 备份原文件:解码前建议备份原始文件
- 测试小文件:先对少量文件测试,确认解码效果
- 编码检测:如果不知道编码格式,可以先用
chardet库检测
需要我针对某个特定场景(如HTML实体、Unicode转义等)提供更精确的解码脚本吗?