add llm stream func

This commit is contained in:
lipku 2024-10-05 17:25:01 +08:00
parent 5e8884fcf3
commit 959ecf9be8
4 changed files with 141 additions and 82 deletions

View File

@ -3,6 +3,8 @@ Real time interactive streaming digital human realize audio video synchronous
[ernerf效果](https://www.bilibili.com/video/BV1PM4m1y7Q2/) [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/) [wav2lip效果](https://www.bilibili.com/video/BV1Bw4m1e74P/)
## 为避免与3d数字人混淆原项目metahuman-stream改名为livetalking原有链接地址继续可用
## Features
1. 支持多种数字人模型: ernerf、musetalk、wav2lip
2. 支持声音克隆

65
app.py
View File

@ -24,6 +24,7 @@ import argparse
import shutil
import asyncio
import string
app = Flask(__name__)
@ -52,14 +53,58 @@ def echo_socket(ws):
nerfreal.put_msg_txt(message)
def llm_response(message):
from llm.LLM import LLM
# llm = LLM().init_model('Gemini', model_path= 'gemini-pro',api_key='Your API Key', proxy_url=None)
# llm = LLM().init_model('ChatGPT', model_path= 'gpt-3.5-turbo',api_key='Your API Key')
llm = LLM().init_model('VllmGPT', model_path= 'THUDM/chatglm3-6b')
response = llm.chat(message)
print(response)
return response
# def llm_response(message):
# from llm.LLM import LLM
# # llm = LLM().init_model('Gemini', model_path= 'gemini-pro',api_key='Your API Key', proxy_url=None)
# # llm = LLM().init_model('ChatGPT', model_path= 'gpt-3.5-turbo',api_key='Your API Key')
# llm = LLM().init_model('VllmGPT', model_path= 'THUDM/chatglm3-6b')
# response = llm.chat(message)
# print(response)
# return response
def llm_response(message,nerfreal):
start = time.perf_counter()
from openai import OpenAI
client = OpenAI(
# 如果您没有配置环境变量请在此处用您的API Key进行替换
api_key=os.getenv("DASHSCOPE_API_KEY"),
# 填写DashScope SDK的base_url
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
end = time.perf_counter()
print(f"llm Time init: {end-start}s")
completion = client.chat.completions.create(
model="qwen-plus",
messages=[{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': message}],
stream=True,
# 通过以下设置在流式输出的最后一行展示token使用信息
stream_options={"include_usage": True}
)
result=""
first = True
for chunk in completion:
if len(chunk.choices)>0:
#print(chunk.choices[0].delta.content)
if first:
end = time.perf_counter()
print(f"llm Time to first chunk: {end-start}s")
first = False
msg = chunk.choices[0].delta.content
lastpos=0
#msglist = re.split('[,.!;:,。!?]',msg)
for i, char in enumerate(msg):
if char in ",.!;:,。!?:;" :
result = result+msg[lastpos:i+1]
lastpos = i+1
if len(result)>10:
print(result)
nerfreal.put_msg_txt(result)
result=""
result = result+msg[lastpos:]
end = time.perf_counter()
print(f"llm Time to last chunk: {end-start}s")
nerfreal.put_msg_txt(result)
@sockets.route('/humanchat')
def chat_socket(ws):
@ -147,8 +192,8 @@ async def human(request):
if params['type']=='echo':
nerfreals[sessionid].put_msg_txt(params['text'])
elif params['type']=='chat':
res=await asyncio.get_event_loop().run_in_executor(None, llm_response(params['text']))
nerfreals[sessionid].put_msg_txt(res)
res=await asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid])
#nerfreals[sessionid].put_msg_txt(res)
return web.Response(
content_type="application/json",

View File

@ -40,3 +40,4 @@ diffusers
accelerate
librosa
openai

View File

@ -37,6 +37,7 @@ class BaseTTS:
self.state = State.PAUSE
def put_msg_txt(self,msg):
if len(msg)>0:
self.msgqueue.put(msg)
def render(self,quit_event):
@ -99,6 +100,7 @@ class EdgeTTS(BaseTTS):
return stream
async def __main(self,voicename: str, text: str):
try:
communicate = edge_tts.Communicate(text, voicename)
#with open(OUTPUT_FILE, "wb") as file:
@ -112,6 +114,8 @@ class EdgeTTS(BaseTTS):
#file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
pass
except Exception as e:
print(e)
###########################################################################################
class VoitsTTS(BaseTTS):
@ -143,6 +147,7 @@ class VoitsTTS(BaseTTS):
# req["emotion"] = emotion
# #req["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
# req["streaming_mode"] = True
try:
res = requests.post(
f"{server_url}/tts",
json=req,
@ -156,15 +161,17 @@ class VoitsTTS(BaseTTS):
return
first = True
for chunk in res.iter_content(chunk_size=16000): # 1280 32K*20ms*2
for chunk in res.iter_content(chunk_size=12800): # 1280 32K*20ms*2
if first:
end = time.perf_counter()
print(f"gpt_sovits Time to first chunk: {end-start}s")
first = False
if chunk and self.state==State.RUNNING:
yield chunk
print("gpt_sovits response.elapsed:", res.elapsed)
#print("gpt_sovits response.elapsed:", res.elapsed)
except Exception as e:
print(e)
def stream_tts(self,audio_stream):
for chunk in audio_stream:
@ -199,6 +206,7 @@ class CosyVoiceTTS(BaseTTS):
'tts_text': text,
'prompt_text': reftext
}
try:
files = [('prompt_wav', ('prompt_wav', open(reffile, 'rb'), 'application/octet-stream'))]
res = requests.request("GET", f"{server_url}/inference_zero_shot", data=payload, files=files, stream=True)
@ -210,15 +218,16 @@ class CosyVoiceTTS(BaseTTS):
return
first = True
for chunk in res.iter_content(chunk_size=16000): # 1280 32K*20ms*2
for chunk in res.iter_content(chunk_size=8820): # 882 22.05K*20ms*2
if first:
end = time.perf_counter()
print(f"cosy_voice Time to first chunk: {end-start}s")
first = False
if chunk and self.state==State.RUNNING:
yield chunk
print("cosy_voice response.elapsed:", res.elapsed)
except Exception as e:
print(e)
def stream_tts(self,audio_stream):
for chunk in audio_stream:
@ -261,6 +270,7 @@ class XTTS(BaseTTS):
speaker["text"] = text
speaker["language"] = language
speaker["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
try:
res = requests.post(
f"{server_url}/tts_stream",
json=speaker,
@ -274,15 +284,16 @@ class XTTS(BaseTTS):
return
first = True
for chunk in res.iter_content(chunk_size=960): #24K*20ms*2
for chunk in res.iter_content(chunk_size=9600): #24K*20ms*2
if first:
end = time.perf_counter()
print(f"xtts Time to first chunk: {end-start}s")
first = False
if chunk:
yield chunk
print("xtts response.elapsed:", res.elapsed)
except Exception as e:
print(e)
def stream_tts(self,audio_stream):
for chunk in audio_stream: