651 lines
22 KiB
Python
Executable File
651 lines
22 KiB
Python
Executable File
"""WebRTC回声消除(AEC)演示脚本.
|
||
|
||
该脚本用于演示WebRTC APM库的回声消除功能:
|
||
1. 播放指定的音频文件(作为参考信号)
|
||
2. 同时录制麦克风输入(包含回声和环境声音)
|
||
3. 应用WebRTC回声消除处理
|
||
4. 保存原始录音和处理后的录音,以便比较
|
||
|
||
用法:
|
||
python webrtc_aec_demo.py [音频文件路径]
|
||
|
||
示例:
|
||
python webrtc_aec_demo.py 鞠婧祎.wav
|
||
"""
|
||
|
||
import ctypes
|
||
import os
|
||
import sys
|
||
import threading
|
||
import time
|
||
import wave
|
||
from ctypes import POINTER, Structure, byref, c_bool, c_float, c_int, c_short, c_void_p
|
||
|
||
import numpy as np
|
||
import pyaudio
|
||
import pygame
|
||
import soundfile as sf
|
||
from pygame import mixer
|
||
|
||
# 获取DLL文件的绝对路径
|
||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||
project_root = os.path.dirname(current_dir)
|
||
dll_path = os.path.join(
|
||
project_root, "libs", "webrtc_apm", "win", "x86_64", "libwebrtc_apm.dll"
|
||
)
|
||
|
||
# 加载DLL
|
||
try:
|
||
apm_lib = ctypes.CDLL(dll_path)
|
||
print(f"成功加载WebRTC APM库: {dll_path}")
|
||
except Exception as e:
|
||
print(f"加载WebRTC APM库失败: {e}")
|
||
sys.exit(1)
|
||
|
||
|
||
# 定义结构体和枚举类型
|
||
class DownmixMethod(ctypes.c_int):
|
||
AverageChannels = 0
|
||
UseFirstChannel = 1
|
||
|
||
|
||
class NoiseSuppressionLevel(ctypes.c_int):
|
||
Low = 0
|
||
Moderate = 1
|
||
High = 2
|
||
VeryHigh = 3
|
||
|
||
|
||
class GainControllerMode(ctypes.c_int):
|
||
AdaptiveAnalog = 0
|
||
AdaptiveDigital = 1
|
||
FixedDigital = 2
|
||
|
||
|
||
class ClippingPredictorMode(ctypes.c_int):
|
||
ClippingEventPrediction = 0
|
||
AdaptiveStepClippingPeakPrediction = 1
|
||
FixedStepClippingPeakPrediction = 2
|
||
|
||
|
||
# 定义Pipeline结构体
|
||
class Pipeline(Structure):
|
||
_fields_ = [
|
||
("MaximumInternalProcessingRate", c_int),
|
||
("MultiChannelRender", c_bool),
|
||
("MultiChannelCapture", c_bool),
|
||
("CaptureDownmixMethod", c_int),
|
||
]
|
||
|
||
|
||
# 定义PreAmplifier结构体
|
||
class PreAmplifier(Structure):
|
||
_fields_ = [("Enabled", c_bool), ("FixedGainFactor", c_float)]
|
||
|
||
|
||
# 定义AnalogMicGainEmulation结构体
|
||
class AnalogMicGainEmulation(Structure):
|
||
_fields_ = [("Enabled", c_bool), ("InitialLevel", c_int)]
|
||
|
||
|
||
# 定义CaptureLevelAdjustment结构体
|
||
class CaptureLevelAdjustment(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("PreGainFactor", c_float),
|
||
("PostGainFactor", c_float),
|
||
("MicGainEmulation", AnalogMicGainEmulation),
|
||
]
|
||
|
||
|
||
# 定义HighPassFilter结构体
|
||
class HighPassFilter(Structure):
|
||
_fields_ = [("Enabled", c_bool), ("ApplyInFullBand", c_bool)]
|
||
|
||
|
||
# 定义EchoCanceller结构体
|
||
class EchoCanceller(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("MobileMode", c_bool),
|
||
("ExportLinearAecOutput", c_bool),
|
||
("EnforceHighPassFiltering", c_bool),
|
||
]
|
||
|
||
|
||
# 定义NoiseSuppression结构体
|
||
class NoiseSuppression(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("NoiseLevel", c_int),
|
||
("AnalyzeLinearAecOutputWhenAvailable", c_bool),
|
||
]
|
||
|
||
|
||
# 定义TransientSuppression结构体
|
||
class TransientSuppression(Structure):
|
||
_fields_ = [("Enabled", c_bool)]
|
||
|
||
|
||
# 定义ClippingPredictor结构体
|
||
class ClippingPredictor(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("PredictorMode", c_int),
|
||
("WindowLength", c_int),
|
||
("ReferenceWindowLength", c_int),
|
||
("ReferenceWindowDelay", c_int),
|
||
("ClippingThreshold", c_float),
|
||
("CrestFactorMargin", c_float),
|
||
("UsePredictedStep", c_bool),
|
||
]
|
||
|
||
|
||
# 定义AnalogGainController结构体
|
||
class AnalogGainController(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("StartupMinVolume", c_int),
|
||
("ClippedLevelMin", c_int),
|
||
("EnableDigitalAdaptive", c_bool),
|
||
("ClippedLevelStep", c_int),
|
||
("ClippedRatioThreshold", c_float),
|
||
("ClippedWaitFrames", c_int),
|
||
("Predictor", ClippingPredictor),
|
||
]
|
||
|
||
|
||
# 定义GainController1结构体
|
||
class GainController1(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("ControllerMode", c_int),
|
||
("TargetLevelDbfs", c_int),
|
||
("CompressionGainDb", c_int),
|
||
("EnableLimiter", c_bool),
|
||
("AnalogController", AnalogGainController),
|
||
]
|
||
|
||
|
||
# 定义InputVolumeController结构体
|
||
class InputVolumeController(Structure):
|
||
_fields_ = [("Enabled", c_bool)]
|
||
|
||
|
||
# 定义AdaptiveDigital结构体
|
||
class AdaptiveDigital(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("HeadroomDb", c_float),
|
||
("MaxGainDb", c_float),
|
||
("InitialGainDb", c_float),
|
||
("MaxGainChangeDbPerSecond", c_float),
|
||
("MaxOutputNoiseLevelDbfs", c_float),
|
||
]
|
||
|
||
|
||
# 定义FixedDigital结构体
|
||
class FixedDigital(Structure):
|
||
_fields_ = [("GainDb", c_float)]
|
||
|
||
|
||
# 定义GainController2结构体
|
||
class GainController2(Structure):
|
||
_fields_ = [
|
||
("Enabled", c_bool),
|
||
("VolumeController", InputVolumeController),
|
||
("AdaptiveController", AdaptiveDigital),
|
||
("FixedController", FixedDigital),
|
||
]
|
||
|
||
|
||
# 定义完整的Config结构体
|
||
class Config(Structure):
|
||
_fields_ = [
|
||
("PipelineConfig", Pipeline),
|
||
("PreAmp", PreAmplifier),
|
||
("LevelAdjustment", CaptureLevelAdjustment),
|
||
("HighPass", HighPassFilter),
|
||
("Echo", EchoCanceller),
|
||
("NoiseSuppress", NoiseSuppression),
|
||
("TransientSuppress", TransientSuppression),
|
||
("GainControl1", GainController1),
|
||
("GainControl2", GainController2),
|
||
]
|
||
|
||
|
||
# 定义DLL函数原型
|
||
apm_lib.WebRTC_APM_Create.restype = c_void_p
|
||
apm_lib.WebRTC_APM_Create.argtypes = []
|
||
|
||
apm_lib.WebRTC_APM_Destroy.restype = None
|
||
apm_lib.WebRTC_APM_Destroy.argtypes = [c_void_p]
|
||
|
||
apm_lib.WebRTC_APM_CreateStreamConfig.restype = c_void_p
|
||
apm_lib.WebRTC_APM_CreateStreamConfig.argtypes = [c_int, c_int]
|
||
|
||
apm_lib.WebRTC_APM_DestroyStreamConfig.restype = None
|
||
apm_lib.WebRTC_APM_DestroyStreamConfig.argtypes = [c_void_p]
|
||
|
||
apm_lib.WebRTC_APM_ApplyConfig.restype = c_int
|
||
apm_lib.WebRTC_APM_ApplyConfig.argtypes = [c_void_p, POINTER(Config)]
|
||
|
||
apm_lib.WebRTC_APM_ProcessReverseStream.restype = c_int
|
||
apm_lib.WebRTC_APM_ProcessReverseStream.argtypes = [
|
||
c_void_p,
|
||
POINTER(c_short),
|
||
c_void_p,
|
||
c_void_p,
|
||
POINTER(c_short),
|
||
]
|
||
|
||
apm_lib.WebRTC_APM_ProcessStream.restype = c_int
|
||
apm_lib.WebRTC_APM_ProcessStream.argtypes = [
|
||
c_void_p,
|
||
POINTER(c_short),
|
||
c_void_p,
|
||
c_void_p,
|
||
POINTER(c_short),
|
||
]
|
||
|
||
apm_lib.WebRTC_APM_SetStreamDelayMs.restype = None
|
||
apm_lib.WebRTC_APM_SetStreamDelayMs.argtypes = [c_void_p, c_int]
|
||
|
||
|
||
def create_apm_config():
|
||
"""创建WebRTC APM配置 - 优化为保留自然语音,减少错误码-11问题"""
|
||
config = Config()
|
||
|
||
# 设置Pipeline配置 - 使用标准采样率避免重采样问题
|
||
config.PipelineConfig.MaximumInternalProcessingRate = 16000 # WebRTC优化频率
|
||
config.PipelineConfig.MultiChannelRender = False
|
||
config.PipelineConfig.MultiChannelCapture = False
|
||
config.PipelineConfig.CaptureDownmixMethod = DownmixMethod.AverageChannels
|
||
|
||
# 设置PreAmplifier配置 - 减少预放大干扰
|
||
config.PreAmp.Enabled = False # 关闭预放大,避免失真
|
||
config.PreAmp.FixedGainFactor = 1.0 # 不增益
|
||
|
||
# 设置LevelAdjustment配置 - 简化电平调整
|
||
config.LevelAdjustment.Enabled = False # 禁用电平调整以减少处理冲突
|
||
config.LevelAdjustment.PreGainFactor = 1.0
|
||
config.LevelAdjustment.PostGainFactor = 1.0
|
||
config.LevelAdjustment.MicGainEmulation.Enabled = False
|
||
config.LevelAdjustment.MicGainEmulation.InitialLevel = 100 # 降低初始电平避免过饱和
|
||
|
||
# 设置HighPassFilter配置 - 使用标准高通滤波
|
||
config.HighPass.Enabled = True # 启用高通滤波器移除低频噪声
|
||
config.HighPass.ApplyInFullBand = True # 在全频段应用,更好的兼容性
|
||
|
||
# 设置EchoCanceller配置 - 优化回声消除
|
||
config.Echo.Enabled = True # 启用回声消除
|
||
config.Echo.MobileMode = False # 使用标准模式而非移动模式以获取更好效果
|
||
config.Echo.ExportLinearAecOutput = False
|
||
config.Echo.EnforceHighPassFiltering = True # 启用强制高通滤波,帮助消除低频回声
|
||
|
||
# 设置NoiseSuppression配置 - 中等强度噪声抑制
|
||
config.NoiseSuppress.Enabled = True
|
||
config.NoiseSuppress.NoiseLevel = NoiseSuppressionLevel.Moderate # 中等级别抑制
|
||
config.NoiseSuppress.AnalyzeLinearAecOutputWhenAvailable = True
|
||
|
||
# 设置TransientSuppression配置
|
||
config.TransientSuppress.Enabled = False # 关闭瞬态抑制,避免切割语音
|
||
|
||
# 设置GainController1配置 - 轻度增益控制
|
||
config.GainControl1.Enabled = True # 启用增益控制
|
||
config.GainControl1.ControllerMode = GainControllerMode.AdaptiveDigital
|
||
config.GainControl1.TargetLevelDbfs = 3 # 降低目标电平(更积极的控制)
|
||
config.GainControl1.CompressionGainDb = 9 # 适中的压缩增益
|
||
config.GainControl1.EnableLimiter = True # 启用限制器
|
||
|
||
# AnalogGainController
|
||
config.GainControl1.AnalogController.Enabled = False # 关闭模拟增益控制
|
||
config.GainControl1.AnalogController.StartupMinVolume = 0
|
||
config.GainControl1.AnalogController.ClippedLevelMin = 70
|
||
config.GainControl1.AnalogController.EnableDigitalAdaptive = False
|
||
config.GainControl1.AnalogController.ClippedLevelStep = 15
|
||
config.GainControl1.AnalogController.ClippedRatioThreshold = 0.1
|
||
config.GainControl1.AnalogController.ClippedWaitFrames = 300
|
||
|
||
# ClippingPredictor
|
||
predictor = config.GainControl1.AnalogController.Predictor
|
||
predictor.Enabled = False
|
||
predictor.PredictorMode = ClippingPredictorMode.ClippingEventPrediction
|
||
predictor.WindowLength = 5
|
||
predictor.ReferenceWindowLength = 5
|
||
predictor.ReferenceWindowDelay = 5
|
||
predictor.ClippingThreshold = -1.0
|
||
predictor.CrestFactorMargin = 3.0
|
||
predictor.UsePredictedStep = True
|
||
|
||
# 设置GainController2配置 - 禁用以避免冲突
|
||
config.GainControl2.Enabled = False
|
||
config.GainControl2.VolumeController.Enabled = False
|
||
config.GainControl2.AdaptiveController.Enabled = False
|
||
config.GainControl2.AdaptiveController.HeadroomDb = 5.0
|
||
config.GainControl2.AdaptiveController.MaxGainDb = 30.0
|
||
config.GainControl2.AdaptiveController.InitialGainDb = 15.0
|
||
config.GainControl2.AdaptiveController.MaxGainChangeDbPerSecond = 6.0
|
||
config.GainControl2.AdaptiveController.MaxOutputNoiseLevelDbfs = -50.0
|
||
config.GainControl2.FixedController.GainDb = 0.0
|
||
|
||
return config
|
||
|
||
|
||
# 参考音频缓冲区(用于存储扬声器输出)
|
||
reference_buffer = []
|
||
reference_lock = threading.Lock()
|
||
|
||
|
||
def record_playback_audio(chunk_size, sample_rate, channels):
|
||
"""录制扬声器输出的音频(更准确的参考信号)"""
|
||
global reference_buffer
|
||
|
||
# 注:这是理想情况下的实现,但Windows下PyAudio通常无法直接录制扬声器输出
|
||
# 实际应用中,需要使用其他方法捕获系统音频输出
|
||
try:
|
||
p = pyaudio.PyAudio()
|
||
|
||
# 尝试创建一个从默认输出设备录制的流(部分系统支持)
|
||
# 注意:这在大多数系统上不起作用,这里只是作为示例
|
||
loopback_stream = p.open(
|
||
format=pyaudio.paInt16,
|
||
channels=channels,
|
||
rate=sample_rate,
|
||
input=True,
|
||
frames_per_buffer=chunk_size,
|
||
input_device_index=None, # 尝试使用默认输出设备作为输入源
|
||
)
|
||
|
||
# 开始录制
|
||
while True:
|
||
try:
|
||
data = loopback_stream.read(chunk_size, exception_on_overflow=False)
|
||
with reference_lock:
|
||
reference_buffer.append(data)
|
||
except OSError:
|
||
break
|
||
|
||
# 保持缓冲区大小合理
|
||
with reference_lock:
|
||
if len(reference_buffer) > 100: # 保持约2秒的缓冲
|
||
reference_buffer = reference_buffer[-100:]
|
||
except Exception as e:
|
||
print(f"无法录制系统音频: {e}")
|
||
finally:
|
||
try:
|
||
if "loopback_stream" in locals() and loopback_stream:
|
||
loopback_stream.stop_stream()
|
||
loopback_stream.close()
|
||
if "p" in locals() and p:
|
||
p.terminate()
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def aec_demo(audio_file):
|
||
"""WebRTC回声消除演示主函数."""
|
||
# 检查音频文件是否存在
|
||
if not os.path.exists(audio_file):
|
||
print(f"错误: 找不到音频文件 {audio_file}")
|
||
return
|
||
|
||
# 音频参数设置 - 使用WebRTC优化的音频参数
|
||
SAMPLE_RATE = 16000 # 采样率16kHz (WebRTC AEC优化采样率)
|
||
CHANNELS = 1 # 单声道
|
||
CHUNK = 160 # 每帧样本数(10ms @ 16kHz,WebRTC的标准帧大小)
|
||
FORMAT = pyaudio.paInt16 # 16位PCM格式
|
||
|
||
# 初始化PyAudio
|
||
p = pyaudio.PyAudio()
|
||
|
||
# 列出所有可用的音频设备信息供参考
|
||
print("\n可用音频设备:")
|
||
for i in range(p.get_device_count()):
|
||
dev_info = p.get_device_info_by_index(i)
|
||
print(f"设备 {i}: {dev_info['name']}")
|
||
print(f" - 输入通道: {dev_info['maxInputChannels']}")
|
||
print(f" - 输出通道: {dev_info['maxOutputChannels']}")
|
||
print(f" - 默认采样率: {dev_info['defaultSampleRate']}")
|
||
print("")
|
||
|
||
# 打开麦克风输入流
|
||
input_stream = p.open(
|
||
format=FORMAT,
|
||
channels=CHANNELS,
|
||
rate=SAMPLE_RATE,
|
||
input=True,
|
||
frames_per_buffer=CHUNK,
|
||
)
|
||
|
||
# 初始化pygame用于播放音频
|
||
pygame.init()
|
||
mixer.init(frequency=SAMPLE_RATE, size=-16, channels=CHANNELS, buffer=CHUNK * 4)
|
||
|
||
# 加载参考音频文件
|
||
print(f"加载音频文件: {audio_file}")
|
||
|
||
# 读取参考音频文件并转换采样率/通道数
|
||
# 注意:这里使用soundfile库加载音频文件以支持多种格式并进行重采样
|
||
try:
|
||
print("加载参考音频...")
|
||
# 使用soundfile库读取原始音频
|
||
ref_audio_data, orig_sr = sf.read(audio_file, dtype="int16")
|
||
print(
|
||
f"原始音频: 采样率={orig_sr}, 通道数="
|
||
f"{ref_audio_data.shape[1] if len(ref_audio_data.shape) > 1 else 1}"
|
||
)
|
||
|
||
# 转换为单声道(如果是立体声)
|
||
if len(ref_audio_data.shape) > 1 and ref_audio_data.shape[1] > 1:
|
||
ref_audio_data = ref_audio_data.mean(axis=1).astype(np.int16)
|
||
|
||
# 转换采样率(如果需要)
|
||
if orig_sr != SAMPLE_RATE:
|
||
print(f"重采样参考音频从{orig_sr}Hz到{SAMPLE_RATE}Hz...")
|
||
# 使用librosa或scipy进行重采样
|
||
from scipy import signal
|
||
|
||
ref_audio_data = signal.resample(
|
||
ref_audio_data, int(len(ref_audio_data) * SAMPLE_RATE / orig_sr)
|
||
).astype(np.int16)
|
||
|
||
# 保存为临时wav文件供pygame播放
|
||
temp_wav_path = os.path.join(current_dir, "temp_reference.wav")
|
||
with wave.open(temp_wav_path, "wb") as wf:
|
||
wf.setnchannels(1)
|
||
wf.setsampwidth(2) # 2字节(16位)
|
||
wf.setframerate(SAMPLE_RATE)
|
||
wf.writeframes(ref_audio_data.tobytes())
|
||
|
||
# 将参考音频分成CHUNK大小的帧
|
||
ref_audio_frames = []
|
||
for i in range(0, len(ref_audio_data), CHUNK):
|
||
if i + CHUNK <= len(ref_audio_data):
|
||
ref_audio_frames.append(ref_audio_data[i : i + CHUNK])
|
||
else:
|
||
# 最后一帧不足CHUNK大小,补零
|
||
last_frame = np.zeros(CHUNK, dtype=np.int16)
|
||
last_frame[: len(ref_audio_data) - i] = ref_audio_data[i:]
|
||
ref_audio_frames.append(last_frame)
|
||
|
||
print(f"参考音频准备完成,共{len(ref_audio_frames)}帧")
|
||
|
||
# 加载处理后的临时WAV文件
|
||
mixer.music.load(temp_wav_path)
|
||
except Exception as e:
|
||
print(f"加载参考音频时出错: {e}")
|
||
sys.exit(1)
|
||
|
||
# 创建WebRTC APM实例
|
||
apm = apm_lib.WebRTC_APM_Create()
|
||
|
||
# 应用APM配置
|
||
config = create_apm_config()
|
||
result = apm_lib.WebRTC_APM_ApplyConfig(apm, byref(config))
|
||
if result != 0:
|
||
print(f"警告: APM配置应用失败,错误码: {result}")
|
||
|
||
# 创建流配置
|
||
stream_config = apm_lib.WebRTC_APM_CreateStreamConfig(SAMPLE_RATE, CHANNELS)
|
||
|
||
# 设置较小的延迟时间以更准确匹配参考信号和麦克风信号
|
||
apm_lib.WebRTC_APM_SetStreamDelayMs(apm, 50)
|
||
|
||
# 创建录音缓冲区
|
||
original_frames = []
|
||
processed_frames = []
|
||
reference_frames = []
|
||
|
||
# 等待一会让音频系统准备好
|
||
time.sleep(0.5)
|
||
|
||
print("开始录制和处理...")
|
||
print("播放参考音频...")
|
||
|
||
mixer.music.play()
|
||
|
||
# 录制持续时间(根据音频文件长度)
|
||
try:
|
||
sound_length = mixer.Sound(temp_wav_path).get_length()
|
||
recording_time = sound_length if sound_length > 0 else 10
|
||
except Exception:
|
||
recording_time = 10 # 如果无法获取长度,默认10秒
|
||
|
||
recording_time += 1 # 额外1秒确保捕获所有音频
|
||
|
||
start_time = time.time()
|
||
current_ref_frame_index = 0
|
||
try:
|
||
while time.time() - start_time < recording_time:
|
||
# 从麦克风读取一帧数据
|
||
input_data = input_stream.read(CHUNK, exception_on_overflow=False)
|
||
|
||
# 保存原始录音
|
||
original_frames.append(input_data)
|
||
|
||
# 将输入数据转换为short数组
|
||
input_array = np.frombuffer(input_data, dtype=np.int16)
|
||
input_ptr = input_array.ctypes.data_as(POINTER(c_short))
|
||
|
||
# 获取当前参考音频帧
|
||
if current_ref_frame_index < len(ref_audio_frames):
|
||
ref_array = ref_audio_frames[current_ref_frame_index]
|
||
reference_frames.append(ref_array.tobytes())
|
||
current_ref_frame_index += 1
|
||
else:
|
||
# 如果参考音频播放完毕,使用静音帧
|
||
ref_array = np.zeros(CHUNK, dtype=np.int16)
|
||
reference_frames.append(ref_array.tobytes())
|
||
|
||
ref_ptr = ref_array.ctypes.data_as(POINTER(c_short))
|
||
|
||
# 创建输出缓冲区
|
||
output_array = np.zeros(CHUNK, dtype=np.int16)
|
||
output_ptr = output_array.ctypes.data_as(POINTER(c_short))
|
||
|
||
# 重要:先处理参考信号(扬声器输出)
|
||
# 创建参考信号的输出缓冲区(虽然不使用但必须提供)
|
||
ref_output_array = np.zeros(CHUNK, dtype=np.int16)
|
||
ref_output_ptr = ref_output_array.ctypes.data_as(POINTER(c_short))
|
||
|
||
result_reverse = apm_lib.WebRTC_APM_ProcessReverseStream(
|
||
apm, ref_ptr, stream_config, stream_config, ref_output_ptr
|
||
)
|
||
|
||
if result_reverse != 0:
|
||
print(f"\r警告: 参考信号处理失败,错误码: {result_reverse}")
|
||
|
||
# 然后处理麦克风信号,应用回声消除
|
||
result = apm_lib.WebRTC_APM_ProcessStream(
|
||
apm, input_ptr, stream_config, stream_config, output_ptr
|
||
)
|
||
|
||
if result != 0:
|
||
print(f"\r警告: 处理失败,错误码: {result}")
|
||
|
||
# 保存处理后的音频帧
|
||
processed_frames.append(output_array.tobytes())
|
||
|
||
# 计算并显示进度
|
||
progress = (time.time() - start_time) / recording_time * 100
|
||
sys.stdout.write(f"\r处理进度: {progress:.1f}%")
|
||
sys.stdout.flush()
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n录制被用户中断")
|
||
finally:
|
||
print("\n录制和处理完成")
|
||
|
||
# 停止播放
|
||
mixer.music.stop()
|
||
|
||
# 关闭音频流
|
||
input_stream.stop_stream()
|
||
input_stream.close()
|
||
|
||
# 释放APM资源
|
||
apm_lib.WebRTC_APM_DestroyStreamConfig(stream_config)
|
||
apm_lib.WebRTC_APM_Destroy(apm)
|
||
|
||
# 关闭PyAudio
|
||
p.terminate()
|
||
|
||
# 保存原始录音
|
||
original_output_path = os.path.join(current_dir, "original_recording.wav")
|
||
save_wav(original_output_path, original_frames, SAMPLE_RATE, CHANNELS)
|
||
|
||
# 保存处理后的录音
|
||
processed_output_path = os.path.join(current_dir, "processed_recording.wav")
|
||
save_wav(processed_output_path, processed_frames, SAMPLE_RATE, CHANNELS)
|
||
|
||
# 保存参考音频(播放的音频)
|
||
reference_output_path = os.path.join(current_dir, "reference_playback.wav")
|
||
save_wav(reference_output_path, reference_frames, SAMPLE_RATE, CHANNELS)
|
||
|
||
# 删除临时文件
|
||
if os.path.exists(temp_wav_path):
|
||
try:
|
||
os.remove(temp_wav_path)
|
||
except Exception:
|
||
pass
|
||
|
||
print(f"原始录音已保存至: {original_output_path}")
|
||
print(f"处理后的录音已保存至: {processed_output_path}")
|
||
print(f"参考音频已保存至: {reference_output_path}")
|
||
|
||
# 退出pygame
|
||
pygame.quit()
|
||
|
||
|
||
def save_wav(file_path, frames, sample_rate, channels):
|
||
"""将音频帧保存为WAV文件."""
|
||
with wave.open(file_path, "wb") as wf:
|
||
wf.setnchannels(channels)
|
||
wf.setsampwidth(2) # 2字节(16位)
|
||
wf.setframerate(sample_rate)
|
||
if isinstance(frames[0], bytes):
|
||
wf.writeframes(b"".join(frames))
|
||
else:
|
||
wf.writeframes(b"".join([f for f in frames if isinstance(f, bytes)]))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 获取命令行参数
|
||
if len(sys.argv) > 1:
|
||
audio_file = sys.argv[1]
|
||
else:
|
||
# 默认使用scripts目录下的鞠婧祎.wav
|
||
audio_file = os.path.join(current_dir, "鞠婧祎.wav")
|
||
|
||
# 如果默认文件不存在,尝试MP3版本
|
||
if not os.path.exists(audio_file):
|
||
audio_file = os.path.join(current_dir, "鞠婧祎.mp3")
|
||
if not os.path.exists(audio_file):
|
||
print("错误: 找不到默认音频文件,请指定要播放的音频文件路径")
|
||
print("用法: python webrtc_aec_demo.py [音频文件路径]")
|
||
sys.exit(1)
|
||
|
||
# 运行演示
|
||
aec_demo(audio_file)
|