651 lines
22 KiB
Python
651 lines
22 KiB
Python
![]() |
"""WebRTC回声消除(AEC)演示脚本.
|
|||
|
|
|||
|
该脚本用于演示WebRTC APM库的回声消除功能:
|
|||
|
1. 播放指定的音频文件(作为参考信号)
|
|||
|
2. 同时录制麦克风输入(包含回声和环境声音)
|
|||
|
3. 应用WebRTC回声消除处理
|
|||
|
4. 保存原始录音和处理后的录音,以便比较
|
|||
|
|
|||
|
用法:
|
|||
|
python webrtc_aec_demo.py [音频文件路径]
|
|||
|
|
|||
|
示例:
|
|||
|
python webrtc_aec_demo.py 鞠婧祎.wav
|
|||
|
"""
|
|||
|
|
|||
|
import ctypes
|
|||
|
import os
|
|||
|
import sys
|
|||
|
import threading
|
|||
|
import time
|
|||
|
import wave
|
|||
|
from ctypes import POINTER, Structure, byref, c_bool, c_float, c_int, c_short, c_void_p
|
|||
|
|
|||
|
import numpy as np
|
|||
|
import pyaudio
|
|||
|
import pygame
|
|||
|
import soundfile as sf
|
|||
|
from pygame import mixer
|
|||
|
|
|||
|
# 获取DLL文件的绝对路径
|
|||
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|||
|
project_root = os.path.dirname(current_dir)
|
|||
|
dll_path = os.path.join(
|
|||
|
project_root, "libs", "webrtc_apm", "win", "x86_64", "libwebrtc_apm.dll"
|
|||
|
)
|
|||
|
|
|||
|
# 加载DLL
|
|||
|
try:
|
|||
|
apm_lib = ctypes.CDLL(dll_path)
|
|||
|
print(f"成功加载WebRTC APM库: {dll_path}")
|
|||
|
except Exception as e:
|
|||
|
print(f"加载WebRTC APM库失败: {e}")
|
|||
|
sys.exit(1)
|
|||
|
|
|||
|
|
|||
|
# 定义结构体和枚举类型
|
|||
|
class DownmixMethod(ctypes.c_int):
|
|||
|
AverageChannels = 0
|
|||
|
UseFirstChannel = 1
|
|||
|
|
|||
|
|
|||
|
class NoiseSuppressionLevel(ctypes.c_int):
|
|||
|
Low = 0
|
|||
|
Moderate = 1
|
|||
|
High = 2
|
|||
|
VeryHigh = 3
|
|||
|
|
|||
|
|
|||
|
class GainControllerMode(ctypes.c_int):
|
|||
|
AdaptiveAnalog = 0
|
|||
|
AdaptiveDigital = 1
|
|||
|
FixedDigital = 2
|
|||
|
|
|||
|
|
|||
|
class ClippingPredictorMode(ctypes.c_int):
|
|||
|
ClippingEventPrediction = 0
|
|||
|
AdaptiveStepClippingPeakPrediction = 1
|
|||
|
FixedStepClippingPeakPrediction = 2
|
|||
|
|
|||
|
|
|||
|
# 定义Pipeline结构体
|
|||
|
class Pipeline(Structure):
|
|||
|
_fields_ = [
|
|||
|
("MaximumInternalProcessingRate", c_int),
|
|||
|
("MultiChannelRender", c_bool),
|
|||
|
("MultiChannelCapture", c_bool),
|
|||
|
("CaptureDownmixMethod", c_int),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义PreAmplifier结构体
|
|||
|
class PreAmplifier(Structure):
|
|||
|
_fields_ = [("Enabled", c_bool), ("FixedGainFactor", c_float)]
|
|||
|
|
|||
|
|
|||
|
# 定义AnalogMicGainEmulation结构体
|
|||
|
class AnalogMicGainEmulation(Structure):
|
|||
|
_fields_ = [("Enabled", c_bool), ("InitialLevel", c_int)]
|
|||
|
|
|||
|
|
|||
|
# 定义CaptureLevelAdjustment结构体
|
|||
|
class CaptureLevelAdjustment(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("PreGainFactor", c_float),
|
|||
|
("PostGainFactor", c_float),
|
|||
|
("MicGainEmulation", AnalogMicGainEmulation),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义HighPassFilter结构体
|
|||
|
class HighPassFilter(Structure):
|
|||
|
_fields_ = [("Enabled", c_bool), ("ApplyInFullBand", c_bool)]
|
|||
|
|
|||
|
|
|||
|
# 定义EchoCanceller结构体
|
|||
|
class EchoCanceller(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("MobileMode", c_bool),
|
|||
|
("ExportLinearAecOutput", c_bool),
|
|||
|
("EnforceHighPassFiltering", c_bool),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义NoiseSuppression结构体
|
|||
|
class NoiseSuppression(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("NoiseLevel", c_int),
|
|||
|
("AnalyzeLinearAecOutputWhenAvailable", c_bool),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义TransientSuppression结构体
|
|||
|
class TransientSuppression(Structure):
|
|||
|
_fields_ = [("Enabled", c_bool)]
|
|||
|
|
|||
|
|
|||
|
# 定义ClippingPredictor结构体
|
|||
|
class ClippingPredictor(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("PredictorMode", c_int),
|
|||
|
("WindowLength", c_int),
|
|||
|
("ReferenceWindowLength", c_int),
|
|||
|
("ReferenceWindowDelay", c_int),
|
|||
|
("ClippingThreshold", c_float),
|
|||
|
("CrestFactorMargin", c_float),
|
|||
|
("UsePredictedStep", c_bool),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义AnalogGainController结构体
|
|||
|
class AnalogGainController(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("StartupMinVolume", c_int),
|
|||
|
("ClippedLevelMin", c_int),
|
|||
|
("EnableDigitalAdaptive", c_bool),
|
|||
|
("ClippedLevelStep", c_int),
|
|||
|
("ClippedRatioThreshold", c_float),
|
|||
|
("ClippedWaitFrames", c_int),
|
|||
|
("Predictor", ClippingPredictor),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义GainController1结构体
|
|||
|
class GainController1(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("ControllerMode", c_int),
|
|||
|
("TargetLevelDbfs", c_int),
|
|||
|
("CompressionGainDb", c_int),
|
|||
|
("EnableLimiter", c_bool),
|
|||
|
("AnalogController", AnalogGainController),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义InputVolumeController结构体
|
|||
|
class InputVolumeController(Structure):
|
|||
|
_fields_ = [("Enabled", c_bool)]
|
|||
|
|
|||
|
|
|||
|
# 定义AdaptiveDigital结构体
|
|||
|
class AdaptiveDigital(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("HeadroomDb", c_float),
|
|||
|
("MaxGainDb", c_float),
|
|||
|
("InitialGainDb", c_float),
|
|||
|
("MaxGainChangeDbPerSecond", c_float),
|
|||
|
("MaxOutputNoiseLevelDbfs", c_float),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义FixedDigital结构体
|
|||
|
class FixedDigital(Structure):
|
|||
|
_fields_ = [("GainDb", c_float)]
|
|||
|
|
|||
|
|
|||
|
# 定义GainController2结构体
|
|||
|
class GainController2(Structure):
|
|||
|
_fields_ = [
|
|||
|
("Enabled", c_bool),
|
|||
|
("VolumeController", InputVolumeController),
|
|||
|
("AdaptiveController", AdaptiveDigital),
|
|||
|
("FixedController", FixedDigital),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义完整的Config结构体
|
|||
|
class Config(Structure):
|
|||
|
_fields_ = [
|
|||
|
("PipelineConfig", Pipeline),
|
|||
|
("PreAmp", PreAmplifier),
|
|||
|
("LevelAdjustment", CaptureLevelAdjustment),
|
|||
|
("HighPass", HighPassFilter),
|
|||
|
("Echo", EchoCanceller),
|
|||
|
("NoiseSuppress", NoiseSuppression),
|
|||
|
("TransientSuppress", TransientSuppression),
|
|||
|
("GainControl1", GainController1),
|
|||
|
("GainControl2", GainController2),
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
# 定义DLL函数原型
|
|||
|
apm_lib.WebRTC_APM_Create.restype = c_void_p
|
|||
|
apm_lib.WebRTC_APM_Create.argtypes = []
|
|||
|
|
|||
|
apm_lib.WebRTC_APM_Destroy.restype = None
|
|||
|
apm_lib.WebRTC_APM_Destroy.argtypes = [c_void_p]
|
|||
|
|
|||
|
apm_lib.WebRTC_APM_CreateStreamConfig.restype = c_void_p
|
|||
|
apm_lib.WebRTC_APM_CreateStreamConfig.argtypes = [c_int, c_int]
|
|||
|
|
|||
|
apm_lib.WebRTC_APM_DestroyStreamConfig.restype = None
|
|||
|
apm_lib.WebRTC_APM_DestroyStreamConfig.argtypes = [c_void_p]
|
|||
|
|
|||
|
apm_lib.WebRTC_APM_ApplyConfig.restype = c_int
|
|||
|
apm_lib.WebRTC_APM_ApplyConfig.argtypes = [c_void_p, POINTER(Config)]
|
|||
|
|
|||
|
apm_lib.WebRTC_APM_ProcessReverseStream.restype = c_int
|
|||
|
apm_lib.WebRTC_APM_ProcessReverseStream.argtypes = [
|
|||
|
c_void_p,
|
|||
|
POINTER(c_short),
|
|||
|
c_void_p,
|
|||
|
c_void_p,
|
|||
|
POINTER(c_short),
|
|||
|
]
|
|||
|
|
|||
|
apm_lib.WebRTC_APM_ProcessStream.restype = c_int
|
|||
|
apm_lib.WebRTC_APM_ProcessStream.argtypes = [
|
|||
|
c_void_p,
|
|||
|
POINTER(c_short),
|
|||
|
c_void_p,
|
|||
|
c_void_p,
|
|||
|
POINTER(c_short),
|
|||
|
]
|
|||
|
|
|||
|
apm_lib.WebRTC_APM_SetStreamDelayMs.restype = None
|
|||
|
apm_lib.WebRTC_APM_SetStreamDelayMs.argtypes = [c_void_p, c_int]
|
|||
|
|
|||
|
|
|||
|
def create_apm_config():
|
|||
|
"""创建WebRTC APM配置 - 优化为保留自然语音,减少错误码-11问题"""
|
|||
|
config = Config()
|
|||
|
|
|||
|
# 设置Pipeline配置 - 使用标准采样率避免重采样问题
|
|||
|
config.PipelineConfig.MaximumInternalProcessingRate = 16000 # WebRTC优化频率
|
|||
|
config.PipelineConfig.MultiChannelRender = False
|
|||
|
config.PipelineConfig.MultiChannelCapture = False
|
|||
|
config.PipelineConfig.CaptureDownmixMethod = DownmixMethod.AverageChannels
|
|||
|
|
|||
|
# 设置PreAmplifier配置 - 减少预放大干扰
|
|||
|
config.PreAmp.Enabled = False # 关闭预放大,避免失真
|
|||
|
config.PreAmp.FixedGainFactor = 1.0 # 不增益
|
|||
|
|
|||
|
# 设置LevelAdjustment配置 - 简化电平调整
|
|||
|
config.LevelAdjustment.Enabled = False # 禁用电平调整以减少处理冲突
|
|||
|
config.LevelAdjustment.PreGainFactor = 1.0
|
|||
|
config.LevelAdjustment.PostGainFactor = 1.0
|
|||
|
config.LevelAdjustment.MicGainEmulation.Enabled = False
|
|||
|
config.LevelAdjustment.MicGainEmulation.InitialLevel = 100 # 降低初始电平避免过饱和
|
|||
|
|
|||
|
# 设置HighPassFilter配置 - 使用标准高通滤波
|
|||
|
config.HighPass.Enabled = True # 启用高通滤波器移除低频噪声
|
|||
|
config.HighPass.ApplyInFullBand = True # 在全频段应用,更好的兼容性
|
|||
|
|
|||
|
# 设置EchoCanceller配置 - 优化回声消除
|
|||
|
config.Echo.Enabled = True # 启用回声消除
|
|||
|
config.Echo.MobileMode = False # 使用标准模式而非移动模式以获取更好效果
|
|||
|
config.Echo.ExportLinearAecOutput = False
|
|||
|
config.Echo.EnforceHighPassFiltering = True # 启用强制高通滤波,帮助消除低频回声
|
|||
|
|
|||
|
# 设置NoiseSuppression配置 - 中等强度噪声抑制
|
|||
|
config.NoiseSuppress.Enabled = True
|
|||
|
config.NoiseSuppress.NoiseLevel = NoiseSuppressionLevel.Moderate # 中等级别抑制
|
|||
|
config.NoiseSuppress.AnalyzeLinearAecOutputWhenAvailable = True
|
|||
|
|
|||
|
# 设置TransientSuppression配置
|
|||
|
config.TransientSuppress.Enabled = False # 关闭瞬态抑制,避免切割语音
|
|||
|
|
|||
|
# 设置GainController1配置 - 轻度增益控制
|
|||
|
config.GainControl1.Enabled = True # 启用增益控制
|
|||
|
config.GainControl1.ControllerMode = GainControllerMode.AdaptiveDigital
|
|||
|
config.GainControl1.TargetLevelDbfs = 3 # 降低目标电平(更积极的控制)
|
|||
|
config.GainControl1.CompressionGainDb = 9 # 适中的压缩增益
|
|||
|
config.GainControl1.EnableLimiter = True # 启用限制器
|
|||
|
|
|||
|
# AnalogGainController
|
|||
|
config.GainControl1.AnalogController.Enabled = False # 关闭模拟增益控制
|
|||
|
config.GainControl1.AnalogController.StartupMinVolume = 0
|
|||
|
config.GainControl1.AnalogController.ClippedLevelMin = 70
|
|||
|
config.GainControl1.AnalogController.EnableDigitalAdaptive = False
|
|||
|
config.GainControl1.AnalogController.ClippedLevelStep = 15
|
|||
|
config.GainControl1.AnalogController.ClippedRatioThreshold = 0.1
|
|||
|
config.GainControl1.AnalogController.ClippedWaitFrames = 300
|
|||
|
|
|||
|
# ClippingPredictor
|
|||
|
predictor = config.GainControl1.AnalogController.Predictor
|
|||
|
predictor.Enabled = False
|
|||
|
predictor.PredictorMode = ClippingPredictorMode.ClippingEventPrediction
|
|||
|
predictor.WindowLength = 5
|
|||
|
predictor.ReferenceWindowLength = 5
|
|||
|
predictor.ReferenceWindowDelay = 5
|
|||
|
predictor.ClippingThreshold = -1.0
|
|||
|
predictor.CrestFactorMargin = 3.0
|
|||
|
predictor.UsePredictedStep = True
|
|||
|
|
|||
|
# 设置GainController2配置 - 禁用以避免冲突
|
|||
|
config.GainControl2.Enabled = False
|
|||
|
config.GainControl2.VolumeController.Enabled = False
|
|||
|
config.GainControl2.AdaptiveController.Enabled = False
|
|||
|
config.GainControl2.AdaptiveController.HeadroomDb = 5.0
|
|||
|
config.GainControl2.AdaptiveController.MaxGainDb = 30.0
|
|||
|
config.GainControl2.AdaptiveController.InitialGainDb = 15.0
|
|||
|
config.GainControl2.AdaptiveController.MaxGainChangeDbPerSecond = 6.0
|
|||
|
config.GainControl2.AdaptiveController.MaxOutputNoiseLevelDbfs = -50.0
|
|||
|
config.GainControl2.FixedController.GainDb = 0.0
|
|||
|
|
|||
|
return config
|
|||
|
|
|||
|
|
|||
|
# 参考音频缓冲区(用于存储扬声器输出)
|
|||
|
reference_buffer = []
|
|||
|
reference_lock = threading.Lock()
|
|||
|
|
|||
|
|
|||
|
def record_playback_audio(chunk_size, sample_rate, channels):
|
|||
|
"""录制扬声器输出的音频(更准确的参考信号)"""
|
|||
|
global reference_buffer
|
|||
|
|
|||
|
# 注:这是理想情况下的实现,但Windows下PyAudio通常无法直接录制扬声器输出
|
|||
|
# 实际应用中,需要使用其他方法捕获系统音频输出
|
|||
|
try:
|
|||
|
p = pyaudio.PyAudio()
|
|||
|
|
|||
|
# 尝试创建一个从默认输出设备录制的流(部分系统支持)
|
|||
|
# 注意:这在大多数系统上不起作用,这里只是作为示例
|
|||
|
loopback_stream = p.open(
|
|||
|
format=pyaudio.paInt16,
|
|||
|
channels=channels,
|
|||
|
rate=sample_rate,
|
|||
|
input=True,
|
|||
|
frames_per_buffer=chunk_size,
|
|||
|
input_device_index=None, # 尝试使用默认输出设备作为输入源
|
|||
|
)
|
|||
|
|
|||
|
# 开始录制
|
|||
|
while True:
|
|||
|
try:
|
|||
|
data = loopback_stream.read(chunk_size, exception_on_overflow=False)
|
|||
|
with reference_lock:
|
|||
|
reference_buffer.append(data)
|
|||
|
except OSError:
|
|||
|
break
|
|||
|
|
|||
|
# 保持缓冲区大小合理
|
|||
|
with reference_lock:
|
|||
|
if len(reference_buffer) > 100: # 保持约2秒的缓冲
|
|||
|
reference_buffer = reference_buffer[-100:]
|
|||
|
except Exception as e:
|
|||
|
print(f"无法录制系统音频: {e}")
|
|||
|
finally:
|
|||
|
try:
|
|||
|
if "loopback_stream" in locals() and loopback_stream:
|
|||
|
loopback_stream.stop_stream()
|
|||
|
loopback_stream.close()
|
|||
|
if "p" in locals() and p:
|
|||
|
p.terminate()
|
|||
|
except Exception:
|
|||
|
pass
|
|||
|
|
|||
|
|
|||
|
def aec_demo(audio_file):
|
|||
|
"""WebRTC回声消除演示主函数."""
|
|||
|
# 检查音频文件是否存在
|
|||
|
if not os.path.exists(audio_file):
|
|||
|
print(f"错误: 找不到音频文件 {audio_file}")
|
|||
|
return
|
|||
|
|
|||
|
# 音频参数设置 - 使用WebRTC优化的音频参数
|
|||
|
SAMPLE_RATE = 16000 # 采样率16kHz (WebRTC AEC优化采样率)
|
|||
|
CHANNELS = 1 # 单声道
|
|||
|
CHUNK = 160 # 每帧样本数(10ms @ 16kHz,WebRTC的标准帧大小)
|
|||
|
FORMAT = pyaudio.paInt16 # 16位PCM格式
|
|||
|
|
|||
|
# 初始化PyAudio
|
|||
|
p = pyaudio.PyAudio()
|
|||
|
|
|||
|
# 列出所有可用的音频设备信息供参考
|
|||
|
print("\n可用音频设备:")
|
|||
|
for i in range(p.get_device_count()):
|
|||
|
dev_info = p.get_device_info_by_index(i)
|
|||
|
print(f"设备 {i}: {dev_info['name']}")
|
|||
|
print(f" - 输入通道: {dev_info['maxInputChannels']}")
|
|||
|
print(f" - 输出通道: {dev_info['maxOutputChannels']}")
|
|||
|
print(f" - 默认采样率: {dev_info['defaultSampleRate']}")
|
|||
|
print("")
|
|||
|
|
|||
|
# 打开麦克风输入流
|
|||
|
input_stream = p.open(
|
|||
|
format=FORMAT,
|
|||
|
channels=CHANNELS,
|
|||
|
rate=SAMPLE_RATE,
|
|||
|
input=True,
|
|||
|
frames_per_buffer=CHUNK,
|
|||
|
)
|
|||
|
|
|||
|
# 初始化pygame用于播放音频
|
|||
|
pygame.init()
|
|||
|
mixer.init(frequency=SAMPLE_RATE, size=-16, channels=CHANNELS, buffer=CHUNK * 4)
|
|||
|
|
|||
|
# 加载参考音频文件
|
|||
|
print(f"加载音频文件: {audio_file}")
|
|||
|
|
|||
|
# 读取参考音频文件并转换采样率/通道数
|
|||
|
# 注意:这里使用soundfile库加载音频文件以支持多种格式并进行重采样
|
|||
|
try:
|
|||
|
print("加载参考音频...")
|
|||
|
# 使用soundfile库读取原始音频
|
|||
|
ref_audio_data, orig_sr = sf.read(audio_file, dtype="int16")
|
|||
|
print(
|
|||
|
f"原始音频: 采样率={orig_sr}, 通道数="
|
|||
|
f"{ref_audio_data.shape[1] if len(ref_audio_data.shape) > 1 else 1}"
|
|||
|
)
|
|||
|
|
|||
|
# 转换为单声道(如果是立体声)
|
|||
|
if len(ref_audio_data.shape) > 1 and ref_audio_data.shape[1] > 1:
|
|||
|
ref_audio_data = ref_audio_data.mean(axis=1).astype(np.int16)
|
|||
|
|
|||
|
# 转换采样率(如果需要)
|
|||
|
if orig_sr != SAMPLE_RATE:
|
|||
|
print(f"重采样参考音频从{orig_sr}Hz到{SAMPLE_RATE}Hz...")
|
|||
|
# 使用librosa或scipy进行重采样
|
|||
|
from scipy import signal
|
|||
|
|
|||
|
ref_audio_data = signal.resample(
|
|||
|
ref_audio_data, int(len(ref_audio_data) * SAMPLE_RATE / orig_sr)
|
|||
|
).astype(np.int16)
|
|||
|
|
|||
|
# 保存为临时wav文件供pygame播放
|
|||
|
temp_wav_path = os.path.join(current_dir, "temp_reference.wav")
|
|||
|
with wave.open(temp_wav_path, "wb") as wf:
|
|||
|
wf.setnchannels(1)
|
|||
|
wf.setsampwidth(2) # 2字节(16位)
|
|||
|
wf.setframerate(SAMPLE_RATE)
|
|||
|
wf.writeframes(ref_audio_data.tobytes())
|
|||
|
|
|||
|
# 将参考音频分成CHUNK大小的帧
|
|||
|
ref_audio_frames = []
|
|||
|
for i in range(0, len(ref_audio_data), CHUNK):
|
|||
|
if i + CHUNK <= len(ref_audio_data):
|
|||
|
ref_audio_frames.append(ref_audio_data[i : i + CHUNK])
|
|||
|
else:
|
|||
|
# 最后一帧不足CHUNK大小,补零
|
|||
|
last_frame = np.zeros(CHUNK, dtype=np.int16)
|
|||
|
last_frame[: len(ref_audio_data) - i] = ref_audio_data[i:]
|
|||
|
ref_audio_frames.append(last_frame)
|
|||
|
|
|||
|
print(f"参考音频准备完成,共{len(ref_audio_frames)}帧")
|
|||
|
|
|||
|
# 加载处理后的临时WAV文件
|
|||
|
mixer.music.load(temp_wav_path)
|
|||
|
except Exception as e:
|
|||
|
print(f"加载参考音频时出错: {e}")
|
|||
|
sys.exit(1)
|
|||
|
|
|||
|
# 创建WebRTC APM实例
|
|||
|
apm = apm_lib.WebRTC_APM_Create()
|
|||
|
|
|||
|
# 应用APM配置
|
|||
|
config = create_apm_config()
|
|||
|
result = apm_lib.WebRTC_APM_ApplyConfig(apm, byref(config))
|
|||
|
if result != 0:
|
|||
|
print(f"警告: APM配置应用失败,错误码: {result}")
|
|||
|
|
|||
|
# 创建流配置
|
|||
|
stream_config = apm_lib.WebRTC_APM_CreateStreamConfig(SAMPLE_RATE, CHANNELS)
|
|||
|
|
|||
|
# 设置较小的延迟时间以更准确匹配参考信号和麦克风信号
|
|||
|
apm_lib.WebRTC_APM_SetStreamDelayMs(apm, 50)
|
|||
|
|
|||
|
# 创建录音缓冲区
|
|||
|
original_frames = []
|
|||
|
processed_frames = []
|
|||
|
reference_frames = []
|
|||
|
|
|||
|
# 等待一会让音频系统准备好
|
|||
|
time.sleep(0.5)
|
|||
|
|
|||
|
print("开始录制和处理...")
|
|||
|
print("播放参考音频...")
|
|||
|
|
|||
|
mixer.music.play()
|
|||
|
|
|||
|
# 录制持续时间(根据音频文件长度)
|
|||
|
try:
|
|||
|
sound_length = mixer.Sound(temp_wav_path).get_length()
|
|||
|
recording_time = sound_length if sound_length > 0 else 10
|
|||
|
except Exception:
|
|||
|
recording_time = 10 # 如果无法获取长度,默认10秒
|
|||
|
|
|||
|
recording_time += 1 # 额外1秒确保捕获所有音频
|
|||
|
|
|||
|
start_time = time.time()
|
|||
|
current_ref_frame_index = 0
|
|||
|
try:
|
|||
|
while time.time() - start_time < recording_time:
|
|||
|
# 从麦克风读取一帧数据
|
|||
|
input_data = input_stream.read(CHUNK, exception_on_overflow=False)
|
|||
|
|
|||
|
# 保存原始录音
|
|||
|
original_frames.append(input_data)
|
|||
|
|
|||
|
# 将输入数据转换为short数组
|
|||
|
input_array = np.frombuffer(input_data, dtype=np.int16)
|
|||
|
input_ptr = input_array.ctypes.data_as(POINTER(c_short))
|
|||
|
|
|||
|
# 获取当前参考音频帧
|
|||
|
if current_ref_frame_index < len(ref_audio_frames):
|
|||
|
ref_array = ref_audio_frames[current_ref_frame_index]
|
|||
|
reference_frames.append(ref_array.tobytes())
|
|||
|
current_ref_frame_index += 1
|
|||
|
else:
|
|||
|
# 如果参考音频播放完毕,使用静音帧
|
|||
|
ref_array = np.zeros(CHUNK, dtype=np.int16)
|
|||
|
reference_frames.append(ref_array.tobytes())
|
|||
|
|
|||
|
ref_ptr = ref_array.ctypes.data_as(POINTER(c_short))
|
|||
|
|
|||
|
# 创建输出缓冲区
|
|||
|
output_array = np.zeros(CHUNK, dtype=np.int16)
|
|||
|
output_ptr = output_array.ctypes.data_as(POINTER(c_short))
|
|||
|
|
|||
|
# 重要:先处理参考信号(扬声器输出)
|
|||
|
# 创建参考信号的输出缓冲区(虽然不使用但必须提供)
|
|||
|
ref_output_array = np.zeros(CHUNK, dtype=np.int16)
|
|||
|
ref_output_ptr = ref_output_array.ctypes.data_as(POINTER(c_short))
|
|||
|
|
|||
|
result_reverse = apm_lib.WebRTC_APM_ProcessReverseStream(
|
|||
|
apm, ref_ptr, stream_config, stream_config, ref_output_ptr
|
|||
|
)
|
|||
|
|
|||
|
if result_reverse != 0:
|
|||
|
print(f"\r警告: 参考信号处理失败,错误码: {result_reverse}")
|
|||
|
|
|||
|
# 然后处理麦克风信号,应用回声消除
|
|||
|
result = apm_lib.WebRTC_APM_ProcessStream(
|
|||
|
apm, input_ptr, stream_config, stream_config, output_ptr
|
|||
|
)
|
|||
|
|
|||
|
if result != 0:
|
|||
|
print(f"\r警告: 处理失败,错误码: {result}")
|
|||
|
|
|||
|
# 保存处理后的音频帧
|
|||
|
processed_frames.append(output_array.tobytes())
|
|||
|
|
|||
|
# 计算并显示进度
|
|||
|
progress = (time.time() - start_time) / recording_time * 100
|
|||
|
sys.stdout.write(f"\r处理进度: {progress:.1f}%")
|
|||
|
sys.stdout.flush()
|
|||
|
|
|||
|
except KeyboardInterrupt:
|
|||
|
print("\n录制被用户中断")
|
|||
|
finally:
|
|||
|
print("\n录制和处理完成")
|
|||
|
|
|||
|
# 停止播放
|
|||
|
mixer.music.stop()
|
|||
|
|
|||
|
# 关闭音频流
|
|||
|
input_stream.stop_stream()
|
|||
|
input_stream.close()
|
|||
|
|
|||
|
# 释放APM资源
|
|||
|
apm_lib.WebRTC_APM_DestroyStreamConfig(stream_config)
|
|||
|
apm_lib.WebRTC_APM_Destroy(apm)
|
|||
|
|
|||
|
# 关闭PyAudio
|
|||
|
p.terminate()
|
|||
|
|
|||
|
# 保存原始录音
|
|||
|
original_output_path = os.path.join(current_dir, "original_recording.wav")
|
|||
|
save_wav(original_output_path, original_frames, SAMPLE_RATE, CHANNELS)
|
|||
|
|
|||
|
# 保存处理后的录音
|
|||
|
processed_output_path = os.path.join(current_dir, "processed_recording.wav")
|
|||
|
save_wav(processed_output_path, processed_frames, SAMPLE_RATE, CHANNELS)
|
|||
|
|
|||
|
# 保存参考音频(播放的音频)
|
|||
|
reference_output_path = os.path.join(current_dir, "reference_playback.wav")
|
|||
|
save_wav(reference_output_path, reference_frames, SAMPLE_RATE, CHANNELS)
|
|||
|
|
|||
|
# 删除临时文件
|
|||
|
if os.path.exists(temp_wav_path):
|
|||
|
try:
|
|||
|
os.remove(temp_wav_path)
|
|||
|
except Exception:
|
|||
|
pass
|
|||
|
|
|||
|
print(f"原始录音已保存至: {original_output_path}")
|
|||
|
print(f"处理后的录音已保存至: {processed_output_path}")
|
|||
|
print(f"参考音频已保存至: {reference_output_path}")
|
|||
|
|
|||
|
# 退出pygame
|
|||
|
pygame.quit()
|
|||
|
|
|||
|
|
|||
|
def save_wav(file_path, frames, sample_rate, channels):
|
|||
|
"""将音频帧保存为WAV文件."""
|
|||
|
with wave.open(file_path, "wb") as wf:
|
|||
|
wf.setnchannels(channels)
|
|||
|
wf.setsampwidth(2) # 2字节(16位)
|
|||
|
wf.setframerate(sample_rate)
|
|||
|
if isinstance(frames[0], bytes):
|
|||
|
wf.writeframes(b"".join(frames))
|
|||
|
else:
|
|||
|
wf.writeframes(b"".join([f for f in frames if isinstance(f, bytes)]))
|
|||
|
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
# 获取命令行参数
|
|||
|
if len(sys.argv) > 1:
|
|||
|
audio_file = sys.argv[1]
|
|||
|
else:
|
|||
|
# 默认使用scripts目录下的鞠婧祎.wav
|
|||
|
audio_file = os.path.join(current_dir, "鞠婧祎.wav")
|
|||
|
|
|||
|
# 如果默认文件不存在,尝试MP3版本
|
|||
|
if not os.path.exists(audio_file):
|
|||
|
audio_file = os.path.join(current_dir, "鞠婧祎.mp3")
|
|||
|
if not os.path.exists(audio_file):
|
|||
|
print("错误: 找不到默认音频文件,请指定要播放的音频文件路径")
|
|||
|
print("用法: python webrtc_aec_demo.py [音频文件路径]")
|
|||
|
sys.exit(1)
|
|||
|
|
|||
|
# 运行演示
|
|||
|
aec_demo(audio_file)
|