"""WebRTC回声消除(AEC)演示脚本. 该脚本用于演示WebRTC APM库的回声消除功能: 1. 播放指定的音频文件(作为参考信号) 2. 同时录制麦克风输入(包含回声和环境声音) 3. 应用WebRTC回声消除处理 4. 保存原始录音和处理后的录音,以便比较 用法: python webrtc_aec_demo.py [音频文件路径] 示例: python webrtc_aec_demo.py 鞠婧祎.wav """ import ctypes import os import sys import threading import time import wave from ctypes import POINTER, Structure, byref, c_bool, c_float, c_int, c_short, c_void_p import numpy as np import pyaudio import pygame import soundfile as sf from pygame import mixer # 获取DLL文件的绝对路径 current_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(current_dir) dll_path = os.path.join( project_root, "libs", "webrtc_apm", "win", "x86_64", "libwebrtc_apm.dll" ) # 加载DLL try: apm_lib = ctypes.CDLL(dll_path) print(f"成功加载WebRTC APM库: {dll_path}") except Exception as e: print(f"加载WebRTC APM库失败: {e}") sys.exit(1) # 定义结构体和枚举类型 class DownmixMethod(ctypes.c_int): AverageChannels = 0 UseFirstChannel = 1 class NoiseSuppressionLevel(ctypes.c_int): Low = 0 Moderate = 1 High = 2 VeryHigh = 3 class GainControllerMode(ctypes.c_int): AdaptiveAnalog = 0 AdaptiveDigital = 1 FixedDigital = 2 class ClippingPredictorMode(ctypes.c_int): ClippingEventPrediction = 0 AdaptiveStepClippingPeakPrediction = 1 FixedStepClippingPeakPrediction = 2 # 定义Pipeline结构体 class Pipeline(Structure): _fields_ = [ ("MaximumInternalProcessingRate", c_int), ("MultiChannelRender", c_bool), ("MultiChannelCapture", c_bool), ("CaptureDownmixMethod", c_int), ] # 定义PreAmplifier结构体 class PreAmplifier(Structure): _fields_ = [("Enabled", c_bool), ("FixedGainFactor", c_float)] # 定义AnalogMicGainEmulation结构体 class AnalogMicGainEmulation(Structure): _fields_ = [("Enabled", c_bool), ("InitialLevel", c_int)] # 定义CaptureLevelAdjustment结构体 class CaptureLevelAdjustment(Structure): _fields_ = [ ("Enabled", c_bool), ("PreGainFactor", c_float), ("PostGainFactor", c_float), ("MicGainEmulation", AnalogMicGainEmulation), ] # 定义HighPassFilter结构体 class HighPassFilter(Structure): _fields_ = [("Enabled", c_bool), ("ApplyInFullBand", c_bool)] # 定义EchoCanceller结构体 class EchoCanceller(Structure): _fields_ = [ ("Enabled", c_bool), ("MobileMode", c_bool), ("ExportLinearAecOutput", c_bool), ("EnforceHighPassFiltering", c_bool), ] # 定义NoiseSuppression结构体 class NoiseSuppression(Structure): _fields_ = [ ("Enabled", c_bool), ("NoiseLevel", c_int), ("AnalyzeLinearAecOutputWhenAvailable", c_bool), ] # 定义TransientSuppression结构体 class TransientSuppression(Structure): _fields_ = [("Enabled", c_bool)] # 定义ClippingPredictor结构体 class ClippingPredictor(Structure): _fields_ = [ ("Enabled", c_bool), ("PredictorMode", c_int), ("WindowLength", c_int), ("ReferenceWindowLength", c_int), ("ReferenceWindowDelay", c_int), ("ClippingThreshold", c_float), ("CrestFactorMargin", c_float), ("UsePredictedStep", c_bool), ] # 定义AnalogGainController结构体 class AnalogGainController(Structure): _fields_ = [ ("Enabled", c_bool), ("StartupMinVolume", c_int), ("ClippedLevelMin", c_int), ("EnableDigitalAdaptive", c_bool), ("ClippedLevelStep", c_int), ("ClippedRatioThreshold", c_float), ("ClippedWaitFrames", c_int), ("Predictor", ClippingPredictor), ] # 定义GainController1结构体 class GainController1(Structure): _fields_ = [ ("Enabled", c_bool), ("ControllerMode", c_int), ("TargetLevelDbfs", c_int), ("CompressionGainDb", c_int), ("EnableLimiter", c_bool), ("AnalogController", AnalogGainController), ] # 定义InputVolumeController结构体 class InputVolumeController(Structure): _fields_ = [("Enabled", c_bool)] # 定义AdaptiveDigital结构体 class AdaptiveDigital(Structure): _fields_ = [ ("Enabled", c_bool), ("HeadroomDb", c_float), ("MaxGainDb", c_float), ("InitialGainDb", c_float), ("MaxGainChangeDbPerSecond", c_float), ("MaxOutputNoiseLevelDbfs", c_float), ] # 定义FixedDigital结构体 class FixedDigital(Structure): _fields_ = [("GainDb", c_float)] # 定义GainController2结构体 class GainController2(Structure): _fields_ = [ ("Enabled", c_bool), ("VolumeController", InputVolumeController), ("AdaptiveController", AdaptiveDigital), ("FixedController", FixedDigital), ] # 定义完整的Config结构体 class Config(Structure): _fields_ = [ ("PipelineConfig", Pipeline), ("PreAmp", PreAmplifier), ("LevelAdjustment", CaptureLevelAdjustment), ("HighPass", HighPassFilter), ("Echo", EchoCanceller), ("NoiseSuppress", NoiseSuppression), ("TransientSuppress", TransientSuppression), ("GainControl1", GainController1), ("GainControl2", GainController2), ] # 定义DLL函数原型 apm_lib.WebRTC_APM_Create.restype = c_void_p apm_lib.WebRTC_APM_Create.argtypes = [] apm_lib.WebRTC_APM_Destroy.restype = None apm_lib.WebRTC_APM_Destroy.argtypes = [c_void_p] apm_lib.WebRTC_APM_CreateStreamConfig.restype = c_void_p apm_lib.WebRTC_APM_CreateStreamConfig.argtypes = [c_int, c_int] apm_lib.WebRTC_APM_DestroyStreamConfig.restype = None apm_lib.WebRTC_APM_DestroyStreamConfig.argtypes = [c_void_p] apm_lib.WebRTC_APM_ApplyConfig.restype = c_int apm_lib.WebRTC_APM_ApplyConfig.argtypes = [c_void_p, POINTER(Config)] apm_lib.WebRTC_APM_ProcessReverseStream.restype = c_int apm_lib.WebRTC_APM_ProcessReverseStream.argtypes = [ c_void_p, POINTER(c_short), c_void_p, c_void_p, POINTER(c_short), ] apm_lib.WebRTC_APM_ProcessStream.restype = c_int apm_lib.WebRTC_APM_ProcessStream.argtypes = [ c_void_p, POINTER(c_short), c_void_p, c_void_p, POINTER(c_short), ] apm_lib.WebRTC_APM_SetStreamDelayMs.restype = None apm_lib.WebRTC_APM_SetStreamDelayMs.argtypes = [c_void_p, c_int] def create_apm_config(): """创建WebRTC APM配置 - 优化为保留自然语音,减少错误码-11问题""" config = Config() # 设置Pipeline配置 - 使用标准采样率避免重采样问题 config.PipelineConfig.MaximumInternalProcessingRate = 16000 # WebRTC优化频率 config.PipelineConfig.MultiChannelRender = False config.PipelineConfig.MultiChannelCapture = False config.PipelineConfig.CaptureDownmixMethod = DownmixMethod.AverageChannels # 设置PreAmplifier配置 - 减少预放大干扰 config.PreAmp.Enabled = False # 关闭预放大,避免失真 config.PreAmp.FixedGainFactor = 1.0 # 不增益 # 设置LevelAdjustment配置 - 简化电平调整 config.LevelAdjustment.Enabled = False # 禁用电平调整以减少处理冲突 config.LevelAdjustment.PreGainFactor = 1.0 config.LevelAdjustment.PostGainFactor = 1.0 config.LevelAdjustment.MicGainEmulation.Enabled = False config.LevelAdjustment.MicGainEmulation.InitialLevel = 100 # 降低初始电平避免过饱和 # 设置HighPassFilter配置 - 使用标准高通滤波 config.HighPass.Enabled = True # 启用高通滤波器移除低频噪声 config.HighPass.ApplyInFullBand = True # 在全频段应用,更好的兼容性 # 设置EchoCanceller配置 - 优化回声消除 config.Echo.Enabled = True # 启用回声消除 config.Echo.MobileMode = False # 使用标准模式而非移动模式以获取更好效果 config.Echo.ExportLinearAecOutput = False config.Echo.EnforceHighPassFiltering = True # 启用强制高通滤波,帮助消除低频回声 # 设置NoiseSuppression配置 - 中等强度噪声抑制 config.NoiseSuppress.Enabled = True config.NoiseSuppress.NoiseLevel = NoiseSuppressionLevel.Moderate # 中等级别抑制 config.NoiseSuppress.AnalyzeLinearAecOutputWhenAvailable = True # 设置TransientSuppression配置 config.TransientSuppress.Enabled = False # 关闭瞬态抑制,避免切割语音 # 设置GainController1配置 - 轻度增益控制 config.GainControl1.Enabled = True # 启用增益控制 config.GainControl1.ControllerMode = GainControllerMode.AdaptiveDigital config.GainControl1.TargetLevelDbfs = 3 # 降低目标电平(更积极的控制) config.GainControl1.CompressionGainDb = 9 # 适中的压缩增益 config.GainControl1.EnableLimiter = True # 启用限制器 # AnalogGainController config.GainControl1.AnalogController.Enabled = False # 关闭模拟增益控制 config.GainControl1.AnalogController.StartupMinVolume = 0 config.GainControl1.AnalogController.ClippedLevelMin = 70 config.GainControl1.AnalogController.EnableDigitalAdaptive = False config.GainControl1.AnalogController.ClippedLevelStep = 15 config.GainControl1.AnalogController.ClippedRatioThreshold = 0.1 config.GainControl1.AnalogController.ClippedWaitFrames = 300 # ClippingPredictor predictor = config.GainControl1.AnalogController.Predictor predictor.Enabled = False predictor.PredictorMode = ClippingPredictorMode.ClippingEventPrediction predictor.WindowLength = 5 predictor.ReferenceWindowLength = 5 predictor.ReferenceWindowDelay = 5 predictor.ClippingThreshold = -1.0 predictor.CrestFactorMargin = 3.0 predictor.UsePredictedStep = True # 设置GainController2配置 - 禁用以避免冲突 config.GainControl2.Enabled = False config.GainControl2.VolumeController.Enabled = False config.GainControl2.AdaptiveController.Enabled = False config.GainControl2.AdaptiveController.HeadroomDb = 5.0 config.GainControl2.AdaptiveController.MaxGainDb = 30.0 config.GainControl2.AdaptiveController.InitialGainDb = 15.0 config.GainControl2.AdaptiveController.MaxGainChangeDbPerSecond = 6.0 config.GainControl2.AdaptiveController.MaxOutputNoiseLevelDbfs = -50.0 config.GainControl2.FixedController.GainDb = 0.0 return config # 参考音频缓冲区(用于存储扬声器输出) reference_buffer = [] reference_lock = threading.Lock() def record_playback_audio(chunk_size, sample_rate, channels): """录制扬声器输出的音频(更准确的参考信号)""" global reference_buffer # 注:这是理想情况下的实现,但Windows下PyAudio通常无法直接录制扬声器输出 # 实际应用中,需要使用其他方法捕获系统音频输出 try: p = pyaudio.PyAudio() # 尝试创建一个从默认输出设备录制的流(部分系统支持) # 注意:这在大多数系统上不起作用,这里只是作为示例 loopback_stream = p.open( format=pyaudio.paInt16, channels=channels, rate=sample_rate, input=True, frames_per_buffer=chunk_size, input_device_index=None, # 尝试使用默认输出设备作为输入源 ) # 开始录制 while True: try: data = loopback_stream.read(chunk_size, exception_on_overflow=False) with reference_lock: reference_buffer.append(data) except OSError: break # 保持缓冲区大小合理 with reference_lock: if len(reference_buffer) > 100: # 保持约2秒的缓冲 reference_buffer = reference_buffer[-100:] except Exception as e: print(f"无法录制系统音频: {e}") finally: try: if "loopback_stream" in locals() and loopback_stream: loopback_stream.stop_stream() loopback_stream.close() if "p" in locals() and p: p.terminate() except Exception: pass def aec_demo(audio_file): """WebRTC回声消除演示主函数.""" # 检查音频文件是否存在 if not os.path.exists(audio_file): print(f"错误: 找不到音频文件 {audio_file}") return # 音频参数设置 - 使用WebRTC优化的音频参数 SAMPLE_RATE = 16000 # 采样率16kHz (WebRTC AEC优化采样率) CHANNELS = 1 # 单声道 CHUNK = 160 # 每帧样本数(10ms @ 16kHz,WebRTC的标准帧大小) FORMAT = pyaudio.paInt16 # 16位PCM格式 # 初始化PyAudio p = pyaudio.PyAudio() # 列出所有可用的音频设备信息供参考 print("\n可用音频设备:") for i in range(p.get_device_count()): dev_info = p.get_device_info_by_index(i) print(f"设备 {i}: {dev_info['name']}") print(f" - 输入通道: {dev_info['maxInputChannels']}") print(f" - 输出通道: {dev_info['maxOutputChannels']}") print(f" - 默认采样率: {dev_info['defaultSampleRate']}") print("") # 打开麦克风输入流 input_stream = p.open( format=FORMAT, channels=CHANNELS, rate=SAMPLE_RATE, input=True, frames_per_buffer=CHUNK, ) # 初始化pygame用于播放音频 pygame.init() mixer.init(frequency=SAMPLE_RATE, size=-16, channels=CHANNELS, buffer=CHUNK * 4) # 加载参考音频文件 print(f"加载音频文件: {audio_file}") # 读取参考音频文件并转换采样率/通道数 # 注意:这里使用soundfile库加载音频文件以支持多种格式并进行重采样 try: print("加载参考音频...") # 使用soundfile库读取原始音频 ref_audio_data, orig_sr = sf.read(audio_file, dtype="int16") print( f"原始音频: 采样率={orig_sr}, 通道数=" f"{ref_audio_data.shape[1] if len(ref_audio_data.shape) > 1 else 1}" ) # 转换为单声道(如果是立体声) if len(ref_audio_data.shape) > 1 and ref_audio_data.shape[1] > 1: ref_audio_data = ref_audio_data.mean(axis=1).astype(np.int16) # 转换采样率(如果需要) if orig_sr != SAMPLE_RATE: print(f"重采样参考音频从{orig_sr}Hz到{SAMPLE_RATE}Hz...") # 使用librosa或scipy进行重采样 from scipy import signal ref_audio_data = signal.resample( ref_audio_data, int(len(ref_audio_data) * SAMPLE_RATE / orig_sr) ).astype(np.int16) # 保存为临时wav文件供pygame播放 temp_wav_path = os.path.join(current_dir, "temp_reference.wav") with wave.open(temp_wav_path, "wb") as wf: wf.setnchannels(1) wf.setsampwidth(2) # 2字节(16位) wf.setframerate(SAMPLE_RATE) wf.writeframes(ref_audio_data.tobytes()) # 将参考音频分成CHUNK大小的帧 ref_audio_frames = [] for i in range(0, len(ref_audio_data), CHUNK): if i + CHUNK <= len(ref_audio_data): ref_audio_frames.append(ref_audio_data[i : i + CHUNK]) else: # 最后一帧不足CHUNK大小,补零 last_frame = np.zeros(CHUNK, dtype=np.int16) last_frame[: len(ref_audio_data) - i] = ref_audio_data[i:] ref_audio_frames.append(last_frame) print(f"参考音频准备完成,共{len(ref_audio_frames)}帧") # 加载处理后的临时WAV文件 mixer.music.load(temp_wav_path) except Exception as e: print(f"加载参考音频时出错: {e}") sys.exit(1) # 创建WebRTC APM实例 apm = apm_lib.WebRTC_APM_Create() # 应用APM配置 config = create_apm_config() result = apm_lib.WebRTC_APM_ApplyConfig(apm, byref(config)) if result != 0: print(f"警告: APM配置应用失败,错误码: {result}") # 创建流配置 stream_config = apm_lib.WebRTC_APM_CreateStreamConfig(SAMPLE_RATE, CHANNELS) # 设置较小的延迟时间以更准确匹配参考信号和麦克风信号 apm_lib.WebRTC_APM_SetStreamDelayMs(apm, 50) # 创建录音缓冲区 original_frames = [] processed_frames = [] reference_frames = [] # 等待一会让音频系统准备好 time.sleep(0.5) print("开始录制和处理...") print("播放参考音频...") mixer.music.play() # 录制持续时间(根据音频文件长度) try: sound_length = mixer.Sound(temp_wav_path).get_length() recording_time = sound_length if sound_length > 0 else 10 except Exception: recording_time = 10 # 如果无法获取长度,默认10秒 recording_time += 1 # 额外1秒确保捕获所有音频 start_time = time.time() current_ref_frame_index = 0 try: while time.time() - start_time < recording_time: # 从麦克风读取一帧数据 input_data = input_stream.read(CHUNK, exception_on_overflow=False) # 保存原始录音 original_frames.append(input_data) # 将输入数据转换为short数组 input_array = np.frombuffer(input_data, dtype=np.int16) input_ptr = input_array.ctypes.data_as(POINTER(c_short)) # 获取当前参考音频帧 if current_ref_frame_index < len(ref_audio_frames): ref_array = ref_audio_frames[current_ref_frame_index] reference_frames.append(ref_array.tobytes()) current_ref_frame_index += 1 else: # 如果参考音频播放完毕,使用静音帧 ref_array = np.zeros(CHUNK, dtype=np.int16) reference_frames.append(ref_array.tobytes()) ref_ptr = ref_array.ctypes.data_as(POINTER(c_short)) # 创建输出缓冲区 output_array = np.zeros(CHUNK, dtype=np.int16) output_ptr = output_array.ctypes.data_as(POINTER(c_short)) # 重要:先处理参考信号(扬声器输出) # 创建参考信号的输出缓冲区(虽然不使用但必须提供) ref_output_array = np.zeros(CHUNK, dtype=np.int16) ref_output_ptr = ref_output_array.ctypes.data_as(POINTER(c_short)) result_reverse = apm_lib.WebRTC_APM_ProcessReverseStream( apm, ref_ptr, stream_config, stream_config, ref_output_ptr ) if result_reverse != 0: print(f"\r警告: 参考信号处理失败,错误码: {result_reverse}") # 然后处理麦克风信号,应用回声消除 result = apm_lib.WebRTC_APM_ProcessStream( apm, input_ptr, stream_config, stream_config, output_ptr ) if result != 0: print(f"\r警告: 处理失败,错误码: {result}") # 保存处理后的音频帧 processed_frames.append(output_array.tobytes()) # 计算并显示进度 progress = (time.time() - start_time) / recording_time * 100 sys.stdout.write(f"\r处理进度: {progress:.1f}%") sys.stdout.flush() except KeyboardInterrupt: print("\n录制被用户中断") finally: print("\n录制和处理完成") # 停止播放 mixer.music.stop() # 关闭音频流 input_stream.stop_stream() input_stream.close() # 释放APM资源 apm_lib.WebRTC_APM_DestroyStreamConfig(stream_config) apm_lib.WebRTC_APM_Destroy(apm) # 关闭PyAudio p.terminate() # 保存原始录音 original_output_path = os.path.join(current_dir, "original_recording.wav") save_wav(original_output_path, original_frames, SAMPLE_RATE, CHANNELS) # 保存处理后的录音 processed_output_path = os.path.join(current_dir, "processed_recording.wav") save_wav(processed_output_path, processed_frames, SAMPLE_RATE, CHANNELS) # 保存参考音频(播放的音频) reference_output_path = os.path.join(current_dir, "reference_playback.wav") save_wav(reference_output_path, reference_frames, SAMPLE_RATE, CHANNELS) # 删除临时文件 if os.path.exists(temp_wav_path): try: os.remove(temp_wav_path) except Exception: pass print(f"原始录音已保存至: {original_output_path}") print(f"处理后的录音已保存至: {processed_output_path}") print(f"参考音频已保存至: {reference_output_path}") # 退出pygame pygame.quit() def save_wav(file_path, frames, sample_rate, channels): """将音频帧保存为WAV文件.""" with wave.open(file_path, "wb") as wf: wf.setnchannels(channels) wf.setsampwidth(2) # 2字节(16位) wf.setframerate(sample_rate) if isinstance(frames[0], bytes): wf.writeframes(b"".join(frames)) else: wf.writeframes(b"".join([f for f in frames if isinstance(f, bytes)])) if __name__ == "__main__": # 获取命令行参数 if len(sys.argv) > 1: audio_file = sys.argv[1] else: # 默认使用scripts目录下的鞠婧祎.wav audio_file = os.path.join(current_dir, "鞠婧祎.wav") # 如果默认文件不存在,尝试MP3版本 if not os.path.exists(audio_file): audio_file = os.path.join(current_dir, "鞠婧祎.mp3") if not os.path.exists(audio_file): print("错误: 找不到默认音频文件,请指定要播放的音频文件路径") print("用法: python webrtc_aec_demo.py [音频文件路径]") sys.exit(1) # 运行演示 aec_demo(audio_file)