本文目录导读:

我来介绍Python实现音频处理的几种常见方式和相关案例。
基础音频处理库
安装必要的库
pip install librosa numpy scipy matplotlib soundfile pydub
常见音频处理案例
案例1:音频读取与基础信息获取
import librosa
import numpy as np
# 加载音频文件
audio_path = "example.wav"
y, sr = librosa.load(audio_path, sr=None) # sr=None保持原始采样率
print(f"音频时长: {len(y)/sr:.2f}秒")
print(f"采样率: {sr}Hz")
print(f"声道数: {y.ndim}")
print(f"音频形状: {y.shape}")
print(f"音频范围: [{y.min():.3f}, {y.max():.3f}]")
案例2:音频可视化
import matplotlib.pyplot as plt
import librosa.display
def visualize_audio(y, sr):
# 波形图
plt.figure(figsize=(12, 8))
plt.subplot(3, 1, 1)
librosa.display.waveshow(y, sr=sr)
plt.title('波形图')
plt.xlabel('时间 (秒)')
plt.ylabel('振幅')
# 频谱图
plt.subplot(3, 1, 2)
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar(format='%+2.0f dB')
plt.title('频谱图')
# 梅尔频谱图
plt.subplot(3, 1, 3)
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title('梅尔频谱图')
plt.tight_layout()
plt.show()
# 使用示例
y, sr = librosa.load("example.wav")
visualize_audio(y, sr)
案例3:音频效果处理
import librosa
import numpy as np
from scipy import signal
class AudioProcessor:
def __init__(self, y, sr):
self.y = y
self.sr = sr
def time_stretch(self, rate=1.5):
"""时间拉伸(改变速度)"""
return librosa.effects.time_stretch(self.y, rate=rate)
def pitch_shift(self, n_steps=2):
"""音调偏移"""
return librosa.effects.pitch_shift(self.y, sr=self.sr, n_steps=n_steps)
def apply_eq(self, low_cut=200, high_cut=8000):
"""简单均衡器"""
nyquist = self.sr // 2
low = low_cut / nyquist
high = high_cut / nyquist
b, a = signal.butter(4, [low, high], btype='band')
return signal.filtfilt(b, a, self.y)
def add_noise(self, noise_level=0.005):
"""添加噪声"""
noise = np.random.randn(len(self.y))
return self.y + noise_level * noise
def normalize(self):
"""音频归一化"""
max_val = np.max(np.abs(self.y))
if max_val > 0:
return self.y / max_val
return self.y
# 使用示例
processor = AudioProcessor(y, sr)
stretched = processor.time_stretch(1.2) # 加速20%
shifted = processor.pitch_shift(3) # 升3个半音
eq_audio = processor.apply_eq(500, 5000) # 频带滤波
noisy = processor.add_noise(0.01) # 添加噪声
案例4:音频特征提取
import librosa
import numpy as np
def extract_audio_features(y, sr):
features = {}
# 时域特征
features['rms'] = float(np.sqrt(np.mean(y**2)))
features['zero_crossing_rate'] = float(librosa.feature.zero_crossing_rate(y).mean())
# 频域特征
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
features['spectral_centroid'] = float(spectral_centroids.mean())
spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
features['spectral_rolloff'] = float(spectral_rolloff.mean())
# 梅尔频率倒谱系数 (MFCC)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
for i in range(13):
features[f'mfcc_{i+1}'] = float(mfccs[i].mean())
# 色度特征
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
for i in range(12):
features[f'chroma_{i}'] = float(chroma[i].mean())
# 节奏特征
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
features['tempo'] = float(tempo)
return features
# 使用示例
features = extract_audio_features(y, sr)
for key, value in features.items():
print(f"{key}: {value:.4f}")
案例5:音频分割与拼接
from pydub import AudioSegment
import os
class AudioEditor:
def __init__(self, audio_path):
self.audio = AudioSegment.from_file(audio_path)
def trim(self, start_ms, end_ms):
"""修剪音频"""
return self.audio[start_ms:end_ms]
def fade_in(self, duration_ms=2000):
"""淡入效果"""
return self.audio.fade_in(duration_ms)
def fade_out(self, duration_ms=2000):
"""淡出效果"""
return self.audio.fade_out(duration_ms)
def change_volume(self, db_change):
"""改变音量"""
return self.audio + db_change
def concatenate(self, audio_files):
"""拼接多个音频"""
combined = self.audio
for file in audio_files:
seg = AudioSegment.from_file(file)
combined = combined + seg
return combined
def split_by_silence(self, silence_thresh=-40, min_silence_len=1000):
"""根据静音分割音频"""
from pydub.silence import split_on_silence
chunks = split_on_silence(
self.audio,
min_silence_len=min_silence_len,
silence_thresh=silence_thresh
)
return chunks
def save(self, audio_segment, output_path, format='wav'):
"""保存音频"""
audio_segment.export(output_path, format=format)
# 使用示例
editor = AudioEditor("example.wav")
# 修剪前30秒
trimmed = editor.trim(0, 30000)
# 添加淡入淡出
faded = trimmed.fade_in(1000).fade_out(2000)
# 改变音量
louder = editor.change_volume(5)
# 分割音频
chunks = editor.split_by_silence()
for i, chunk in enumerate(chunks):
editor.save(chunk, f"chunk_{i}.wav")
案例6:实时音频处理
import pyaudio
import numpy as np
import threading
from queue import Queue
class RealTimeAudioProcessor:
def __init__(self, sample_rate=44100, chunk_size=1024):
self.sample_rate = sample_rate
self.chunk_size = chunk_size
self.audio = pyaudio.PyAudio()
self.is_recording = False
self.audio_queue = Queue()
def start_recording(self):
"""开始录音"""
self.is_recording = True
self.recording_thread = threading.Thread(target=self._record)
self.recording_thread.start()
def _record(self):
"""录音线程"""
stream = self.audio.open(
format=pyaudio.paFloat32,
channels=1,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.chunk_size,
stream_callback=self._audio_callback
)
while self.is_recording:
self.audio_queue.put(stream.read(self.chunk_size))
def _audio_callback(self, in_data, frame_count, time_info, status):
"""音频回调处理"""
audio_data = np.frombuffer(in_data, dtype=np.float32)
# 在这里进行实时处理
processed_data = self.process_audio(audio_data)
return (processed_data.tobytes(), pyaudio.paContinue)
def process_audio(self, audio_data):
"""音频处理函数(可自定义)"""
# 示例:简单增益
gain = 0.5
processed = audio_data * gain
# 示例:简单降噪(低通滤波)
from scipy import signal
b, a = signal.butter(4, 0.1, btype='low')
processed = signal.filtfilt(b, a, processed)
return processed
def stop_recording(self):
"""停止录音"""
self.is_recording = False
if hasattr(self, 'recording_thread'):
self.recording_thread.join()
self.audio.terminate()
# 使用示例
processor = RealTimeAudioProcessor()
# processor.start_recording()
# 处理中...
# processor.stop_recording()
完整实战:音频文件批处理
import os
import librosa
import soundfile as sf
import numpy as np
from tqdm import tqdm
class AudioBatchProcessor:
def __init__(self, input_dir, output_dir):
self.input_dir = input_dir
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
def process_file(self, audio_path, output_path):
"""处理单个音频文件"""
y, sr = librosa.load(audio_path, sr=None)
# 1. 标准化
y = y / np.max(np.abs(y))
# 2. 重采样到16kHz
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
# 3. 静音检测和去除
y, _ = librosa.effects.trim(y, top_db=20)
# 4. 保存处理后的音频
sf.write(output_path, y, 16000)
return {
'original_duration': len(y)/sr,
'processed_duration': len(y)/16000,
'original_sr': sr,
'processed_sr': 16000
}
def batch_process(self):
"""批量处理所有音频文件"""
results = []
for filename in tqdm(os.listdir(self.input_dir)):
if filename.endswith(('.wav', '.mp3', '.flac')):
input_path = os.path.join(self.input_dir, filename)
output_path = os.path.join(self.output_dir, f"processed_{filename}")
try:
info = self.process_file(input_path, output_path)
results.append({
'filename': filename,
'status': 'success',
**info
})
except Exception as e:
results.append({
'filename': filename,
'status': 'error',
'error': str(e)
})
return results
# 使用示例
processor = AudioBatchProcessor("input_audio", "output_audio")
results = processor.batch_process()
for result in results:
print(f"{result['filename']}: {result['status']}")
实用技巧
音频格式转换
from pydub import AudioSegment
# 格式转换
audio = AudioSegment.from_mp3("input.mp3")
audio.export("output.wav", format="wav")
# 调整采样率
audio = audio.set_frame_rate(16000)
# 声道转换
audio_mono = audio.set_channels(1) # 转单声道
audio_stereo = audio.set_channels(2) # 转立体声
音频质量评估
import numpy as np
from scipy import stats
def calculate_audio_quality(y):
"""计算音频质量指标"""
metrics = {}
# 信噪比估计
noise = y - np.mean(y)
signal_power = np.mean(y**2)
noise_power = np.mean(noise**2)
metrics['snr'] = 10 * np.log10(signal_power / noise_power)
# 峰值因子
metrics['crest_factor'] = np.max(np.abs(y)) / np.sqrt(np.mean(y**2))
# 动态范围
metrics['dynamic_range'] = 20 * np.log10(np.max(np.abs(y)) / np.min(np.abs(y[y > 0])))
return metrics
这个教程涵盖了Python音频处理的主要方面,你可以根据具体需求选择和组合这些功能,如果需要处理特定类型的音频(如语音、音乐或环境音),可能需要调整参数和方法。