实用脚本能批量解码吗?

wen 实用脚本 11

本文目录导读:

实用脚本能批量解码吗?

  1. 批量URL解码(Python)
  2. 批量Base64解码(Bash脚本)
  3. 批量多种编码格式解码(Python通用版)
  4. 批量文件名解码(修复乱码文件名)
  5. 批量JSON/XML转义字符解码
  6. 使用建议

可以的!实用脚本批量解码是一个非常实用的需求,根据你的具体场景,我提供几个常见的批量解码脚本示例:

批量URL解码(Python)

import urllib.parse
import os
def batch_url_decode(input_dir, output_dir=None):
    """批量解码目录下的所有URL编码文件"""
    if not output_dir:
        output_dir = input_dir + "_decoded"
    os.makedirs(output_dir, exist_ok=True)
    for filename in os.listdir(input_dir):
        if filename.endswith('.txt'):
            input_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, filename)
            with open(input_path, 'r', encoding='utf-8') as f:
                content = f.read()
            decoded_content = urllib.parse.unquote(content)
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(decoded_content)
            print(f"已解码: {filename}")
# 使用
# batch_url_decode("./url_files/")

批量Base64解码(Bash脚本)

#!/bin/bash
# batch_decode_base64.sh
input_dir="$1"
output_dir="${input_dir}/decoded"
mkdir -p "$output_dir"
for file in "$input_dir"/*.txt; do
    if [ -f "$file" ]; then
        filename=$(basename "$file")
        while IFS= read -r line; do
            echo "$line" | base64 -d >> "${output_dir}/${filename}"
            echo "" >> "${output_dir}/${filename}"
        done < "$file"
        echo "已解码: $filename"
    fi
done

批量多种编码格式解码(Python通用版)

import base64
import urllib.parse
import json
import os
def auto_decode(text):
    """自动尝试多种解码方式"""
    decoders = [
        ('URL', lambda x: urllib.parse.unquote(x)),
        ('Base64', lambda x: base64.b64decode(x).decode('utf-8')),
        ('Unicode转义', lambda x: x.encode().decode('unicode_escape')),
    ]
    results = []
    for name, decoder in decoders:
        try:
            result = decoder(text)
            results.append((name, result))
        except:
            continue
    return results
def batch_decode_files(input_dir, output_dir="decoded"):
    """批量解码目录下所有文件"""
    os.makedirs(output_dir, exist_ok=True)
    for filename in os.listdir(input_dir):
        input_path = os.path.join(input_dir, filename)
        if not os.path.isfile(input_path):
            continue
        with open(input_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        output_data = []
        for line in lines:
            line = line.strip()
            if not line:
                continue
            results = auto_decode(line)
            if results:
                # 选择第一个成功的解码结果
                output_data.append(f"原文: {line}")
                output_data.append(f"解码: {results[0][1]}")
                output_data.append("---")
        # 保存解码结果
        output_path = os.path.join(output_dir, f"decoded_{filename}")
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write('\n'.join(output_data))
        print(f"已处理: {filename} -> {output_path}")
# 使用示例
# batch_decode_files("./encoded_files/")

批量文件名解码(修复乱码文件名)

import os
import urllib.parse
def fix_mojibake_filenames(directory):
    """修复目录中的乱码文件名"""
    for filename in os.listdir(directory):
        old_path = os.path.join(directory, filename)
        if os.path.isfile(old_path):
            # 尝试多种解码方式
            new_name = None
            # 方法1: 尝试URL解码
            try:
                new_name = urllib.parse.unquote(filename)
            except:
                pass
            # 方法2: 尝试从GBK解码到UTF-8
            if new_name is None or new_name == filename:
                try:
                    new_name = filename.encode('latin1').decode('gbk')
                except:
                    pass
            # 方法3: 尝试从ISO-8859-1解码到UTF-8
            if new_name is None or new_name == filename:
                try:
                    new_name = filename.encode('latin1').decode('utf-8')
                except:
                    pass
            if new_name and new_name != filename:
                new_path = os.path.join(directory, new_name)
                os.rename(old_path, new_path)
                print(f"重命名: {filename} -> {new_name}")
# 使用
# fix_mojibake_filenames("./乱码文件目录/")

批量JSON/XML转义字符解码

import html
import json
import re
def decode_escaped_text(text):
    """解码转义字符"""
    # HTML实体解码
    text = html.unescape(text)
    # Unicode转义序列解码
    text = re.sub(r'\\u([0-9a-fA-F]{4})', 
                  lambda x: chr(int(x.group(1), 16)), text)
    # JSON字符串解码
    try:
        text = json.loads(f'"{text}"')
    except:
        pass
    return text
def batch_decode_escaped(input_file, output_file):
    """批量解码转义文本文件"""
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    decoded = decode_escaped_text(content)
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(decoded)
    print(f"已解码: {input_file} -> {output_file}")
# 使用
# batch_decode_escaped("input.txt", "output.txt")

使用建议

  1. 选择合适的脚本:根据你的具体编码类型选择相应的脚本
  2. 备份原文件:解码前建议备份原始文件
  3. 测试小文件:先对少量文件测试,确认解码效果
  4. 编码检测:如果不知道编码格式,可以先用chardet库检测

需要我针对某个特定场景(如HTML实体、Unicode转义等)提供更精确的解码脚本吗?

抱歉,评论功能暂时关闭!