“wangquan” 57fdf656b9 add_xiaozhi
2025-07-18 13:12:09 +08:00

177 lines
5.3 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import asyncio
import time
import os
#from datetime import datetime
from core.providers.tts.dto.dto import SentenceType
from core.utils.util import get_string_no_punctuation_or_emoji, analyze_emotion
from loguru import logger
TAG = __name__
emoji_map = {
"neutral": "😶",
"happy": "🙂",
"laughing": "😆",
"funny": "😂",
"sad": "😔",
"angry": "😠",
"crying": "😭",
"loving": "😍",
"embarrassed": "😳",
"surprised": "😲",
"shocked": "😱",
"thinking": "🤔",
"winking": "😉",
"cool": "😎",
"relaxed": "😌",
"delicious": "🤤",
"kissy": "😘",
"confident": "😏",
"sleepy": "😴",
"silly": "😜",
"confused": "🙄",
}
def save_text_to_file(text, filename="audio_logs.txt"):
"""
将文本内容保存到文件中
参数:
text: 要保存的文本内容
filename: 要保存的文件名(默认为 audio_logs.txt
"""
try:
# 创建日志目录(如果不存在)
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
filepath = os.path.join(log_dir, filename)
# 获取当前时间戳
#timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# 使用 with 语句确保文件正确关闭
with open(filepath, 'a', encoding='utf-8') as file:
file.write(f"{text}\n")
#logger.info(f"已将文本保存到 {filepath}")
return True
except Exception as e:
#logger.error(f"保存文件时出错: {str(e)}")
return False
async def sendAudioMessage(conn, sentenceType, audios, text):
if text is not None:
save_text_to_file(text)
# 发送句子开始消息
conn.logger.bind(tag=TAG).info(f"发送音频消息: {sentenceType}, {text}")
if text is not None:
emotion = analyze_emotion(text)
emoji = emoji_map.get(emotion, "🙂") # 默认使用笑脸
await conn.websocket.send(
json.dumps(
{
"type": "llm",
"text": emoji,
"emotion": emotion,
"session_id": conn.session_id,
}
)
)
pre_buffer = False
if conn.tts.tts_audio_first_sentence and text is not None:
conn.logger.bind(tag=TAG).info(f"发送第一段语音: {text}")
conn.tts.tts_audio_first_sentence = False
pre_buffer = True
await send_tts_message(conn, "sentence_start", text)
await sendAudio(conn, audios, pre_buffer)
await send_tts_message(conn, "sentence_end", text)
# 发送结束消息(如果是最后一个文本)
if conn.llm_finish_task and sentenceType == SentenceType.LAST:
await send_tts_message(conn, "stop", None)
conn.client_is_speaking = False
if conn.close_after_chat:
await conn.close()
# 播放音频
async def sendAudio(conn, audios, pre_buffer=True):
if audios is None or len(audios) == 0:
return
# 流控参数优化
frame_duration = 60 # 帧时长(毫秒),匹配 Opus 编码
start_time = time.perf_counter()
play_position = 0
last_reset_time = time.perf_counter() # 记录最后的重置时间
# 仅当第一句话时执行预缓冲
if pre_buffer:
pre_buffer_frames = min(3, len(audios))
for i in range(pre_buffer_frames):
await conn.websocket.send(audios[i])
remaining_audios = audios[pre_buffer_frames:]
else:
remaining_audios = audios
# 播放剩余音频帧
for opus_packet in remaining_audios:
if conn.client_abort:
break
# 重置没有声音的状态
conn.last_activity_time = time.time() * 1000
# 计算预期发送时间
expected_time = start_time + (play_position / 1000)
current_time = time.perf_counter()
delay = expected_time - current_time
if delay > 0:
await asyncio.sleep(delay)
await conn.websocket.send(opus_packet)
play_position += frame_duration
async def send_tts_message(conn, state, text=None):
"""发送 TTS 状态消息"""
message = {"type": "tts", "state": state, "session_id": conn.session_id}
if text is not None:
message["text"] = text
# TTS播放结束
if state == "stop":
# 播放提示音
tts_notify = conn.config.get("enable_stop_tts_notify", False)
if tts_notify:
stop_tts_notify_voice = conn.config.get(
"stop_tts_notify_voice", "config/assets/tts_notify.mp3"
)
audios, _ = conn.tts.audio_to_opus_data(stop_tts_notify_voice)
await sendAudio(conn, audios)
# 清除服务端讲话状态
conn.clearSpeakStatus()
# 发送消息到客户端
await conn.websocket.send(json.dumps(message))
async def send_stt_message(conn, text):
end_prompt_str = conn.config.get("end_prompt", {}).get("prompt")
if end_prompt_str and end_prompt_str == text:
await send_tts_message(conn, "start")
return
"""发送 STT 状态消息"""
stt_text = get_string_no_punctuation_or_emoji(text)
await conn.websocket.send(
json.dumps({"type": "stt", "text": stt_text, "session_id": conn.session_id})
)
conn.client_is_speaking = True
await send_tts_message(conn, "start")