add llm stream func
This commit is contained in:
parent
5e8884fcf3
commit
959ecf9be8
|
@ -3,6 +3,8 @@ Real time interactive streaming digital human, realize audio video synchronous
|
||||||
|
|
||||||
[ernerf效果](https://www.bilibili.com/video/BV1PM4m1y7Q2/) [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/) [wav2lip效果](https://www.bilibili.com/video/BV1Bw4m1e74P/)
|
[ernerf效果](https://www.bilibili.com/video/BV1PM4m1y7Q2/) [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/) [wav2lip效果](https://www.bilibili.com/video/BV1Bw4m1e74P/)
|
||||||
|
|
||||||
|
## 为避免与3d数字人混淆,原项目metahuman-stream改名为livetalking,原有链接地址继续可用
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
1. 支持多种数字人模型: ernerf、musetalk、wav2lip
|
1. 支持多种数字人模型: ernerf、musetalk、wav2lip
|
||||||
2. 支持声音克隆
|
2. 支持声音克隆
|
||||||
|
|
65
app.py
65
app.py
|
@ -24,6 +24,7 @@ import argparse
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
@ -52,14 +53,58 @@ def echo_socket(ws):
|
||||||
nerfreal.put_msg_txt(message)
|
nerfreal.put_msg_txt(message)
|
||||||
|
|
||||||
|
|
||||||
def llm_response(message):
|
# def llm_response(message):
|
||||||
from llm.LLM import LLM
|
# from llm.LLM import LLM
|
||||||
# llm = LLM().init_model('Gemini', model_path= 'gemini-pro',api_key='Your API Key', proxy_url=None)
|
# # llm = LLM().init_model('Gemini', model_path= 'gemini-pro',api_key='Your API Key', proxy_url=None)
|
||||||
# llm = LLM().init_model('ChatGPT', model_path= 'gpt-3.5-turbo',api_key='Your API Key')
|
# # llm = LLM().init_model('ChatGPT', model_path= 'gpt-3.5-turbo',api_key='Your API Key')
|
||||||
llm = LLM().init_model('VllmGPT', model_path= 'THUDM/chatglm3-6b')
|
# llm = LLM().init_model('VllmGPT', model_path= 'THUDM/chatglm3-6b')
|
||||||
response = llm.chat(message)
|
# response = llm.chat(message)
|
||||||
print(response)
|
# print(response)
|
||||||
return response
|
# return response
|
||||||
|
|
||||||
|
def llm_response(message,nerfreal):
|
||||||
|
start = time.perf_counter()
|
||||||
|
from openai import OpenAI
|
||||||
|
client = OpenAI(
|
||||||
|
# 如果您没有配置环境变量,请在此处用您的API Key进行替换
|
||||||
|
api_key=os.getenv("DASHSCOPE_API_KEY"),
|
||||||
|
# 填写DashScope SDK的base_url
|
||||||
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||||
|
)
|
||||||
|
end = time.perf_counter()
|
||||||
|
print(f"llm Time init: {end-start}s")
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="qwen-plus",
|
||||||
|
messages=[{'role': 'system', 'content': 'You are a helpful assistant.'},
|
||||||
|
{'role': 'user', 'content': message}],
|
||||||
|
stream=True,
|
||||||
|
# 通过以下设置,在流式输出的最后一行展示token使用信息
|
||||||
|
stream_options={"include_usage": True}
|
||||||
|
)
|
||||||
|
result=""
|
||||||
|
first = True
|
||||||
|
for chunk in completion:
|
||||||
|
if len(chunk.choices)>0:
|
||||||
|
#print(chunk.choices[0].delta.content)
|
||||||
|
if first:
|
||||||
|
end = time.perf_counter()
|
||||||
|
print(f"llm Time to first chunk: {end-start}s")
|
||||||
|
first = False
|
||||||
|
msg = chunk.choices[0].delta.content
|
||||||
|
lastpos=0
|
||||||
|
#msglist = re.split('[,.!;:,。!?]',msg)
|
||||||
|
for i, char in enumerate(msg):
|
||||||
|
if char in ",.!;:,。!?:;" :
|
||||||
|
result = result+msg[lastpos:i+1]
|
||||||
|
lastpos = i+1
|
||||||
|
if len(result)>10:
|
||||||
|
print(result)
|
||||||
|
nerfreal.put_msg_txt(result)
|
||||||
|
result=""
|
||||||
|
result = result+msg[lastpos:]
|
||||||
|
end = time.perf_counter()
|
||||||
|
print(f"llm Time to last chunk: {end-start}s")
|
||||||
|
nerfreal.put_msg_txt(result)
|
||||||
|
|
||||||
@sockets.route('/humanchat')
|
@sockets.route('/humanchat')
|
||||||
def chat_socket(ws):
|
def chat_socket(ws):
|
||||||
|
@ -147,8 +192,8 @@ async def human(request):
|
||||||
if params['type']=='echo':
|
if params['type']=='echo':
|
||||||
nerfreals[sessionid].put_msg_txt(params['text'])
|
nerfreals[sessionid].put_msg_txt(params['text'])
|
||||||
elif params['type']=='chat':
|
elif params['type']=='chat':
|
||||||
res=await asyncio.get_event_loop().run_in_executor(None, llm_response(params['text']))
|
res=await asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid])
|
||||||
nerfreals[sessionid].put_msg_txt(res)
|
#nerfreals[sessionid].put_msg_txt(res)
|
||||||
|
|
||||||
return web.Response(
|
return web.Response(
|
||||||
content_type="application/json",
|
content_type="application/json",
|
||||||
|
|
|
@ -40,3 +40,4 @@ diffusers
|
||||||
accelerate
|
accelerate
|
||||||
|
|
||||||
librosa
|
librosa
|
||||||
|
openai
|
||||||
|
|
155
ttsreal.py
155
ttsreal.py
|
@ -37,7 +37,8 @@ class BaseTTS:
|
||||||
self.state = State.PAUSE
|
self.state = State.PAUSE
|
||||||
|
|
||||||
def put_msg_txt(self,msg):
|
def put_msg_txt(self,msg):
|
||||||
self.msgqueue.put(msg)
|
if len(msg)>0:
|
||||||
|
self.msgqueue.put(msg)
|
||||||
|
|
||||||
def render(self,quit_event):
|
def render(self,quit_event):
|
||||||
process_thread = Thread(target=self.process_tts, args=(quit_event,))
|
process_thread = Thread(target=self.process_tts, args=(quit_event,))
|
||||||
|
@ -99,19 +100,22 @@ class EdgeTTS(BaseTTS):
|
||||||
return stream
|
return stream
|
||||||
|
|
||||||
async def __main(self,voicename: str, text: str):
|
async def __main(self,voicename: str, text: str):
|
||||||
communicate = edge_tts.Communicate(text, voicename)
|
try:
|
||||||
|
communicate = edge_tts.Communicate(text, voicename)
|
||||||
|
|
||||||
#with open(OUTPUT_FILE, "wb") as file:
|
#with open(OUTPUT_FILE, "wb") as file:
|
||||||
first = True
|
first = True
|
||||||
async for chunk in communicate.stream():
|
async for chunk in communicate.stream():
|
||||||
if first:
|
if first:
|
||||||
first = False
|
first = False
|
||||||
if chunk["type"] == "audio" and self.state==State.RUNNING:
|
if chunk["type"] == "audio" and self.state==State.RUNNING:
|
||||||
#self.push_audio(chunk["data"])
|
#self.push_audio(chunk["data"])
|
||||||
self.input_stream.write(chunk["data"])
|
self.input_stream.write(chunk["data"])
|
||||||
#file.write(chunk["data"])
|
#file.write(chunk["data"])
|
||||||
elif chunk["type"] == "WordBoundary":
|
elif chunk["type"] == "WordBoundary":
|
||||||
pass
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
###########################################################################################
|
###########################################################################################
|
||||||
class VoitsTTS(BaseTTS):
|
class VoitsTTS(BaseTTS):
|
||||||
|
@ -143,28 +147,31 @@ class VoitsTTS(BaseTTS):
|
||||||
# req["emotion"] = emotion
|
# req["emotion"] = emotion
|
||||||
# #req["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
# #req["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
||||||
# req["streaming_mode"] = True
|
# req["streaming_mode"] = True
|
||||||
res = requests.post(
|
try:
|
||||||
f"{server_url}/tts",
|
res = requests.post(
|
||||||
json=req,
|
f"{server_url}/tts",
|
||||||
stream=True,
|
json=req,
|
||||||
)
|
stream=True,
|
||||||
end = time.perf_counter()
|
)
|
||||||
print(f"gpt_sovits Time to make POST: {end-start}s")
|
end = time.perf_counter()
|
||||||
|
print(f"gpt_sovits Time to make POST: {end-start}s")
|
||||||
|
|
||||||
if res.status_code != 200:
|
if res.status_code != 200:
|
||||||
print("Error:", res.text)
|
print("Error:", res.text)
|
||||||
return
|
return
|
||||||
|
|
||||||
first = True
|
first = True
|
||||||
for chunk in res.iter_content(chunk_size=16000): # 1280 32K*20ms*2
|
|
||||||
if first:
|
for chunk in res.iter_content(chunk_size=12800): # 1280 32K*20ms*2
|
||||||
end = time.perf_counter()
|
if first:
|
||||||
print(f"gpt_sovits Time to first chunk: {end-start}s")
|
end = time.perf_counter()
|
||||||
first = False
|
print(f"gpt_sovits Time to first chunk: {end-start}s")
|
||||||
if chunk and self.state==State.RUNNING:
|
first = False
|
||||||
yield chunk
|
if chunk and self.state==State.RUNNING:
|
||||||
|
yield chunk
|
||||||
print("gpt_sovits response.elapsed:", res.elapsed)
|
#print("gpt_sovits response.elapsed:", res.elapsed)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
def stream_tts(self,audio_stream):
|
def stream_tts(self,audio_stream):
|
||||||
for chunk in audio_stream:
|
for chunk in audio_stream:
|
||||||
|
@ -199,26 +206,28 @@ class CosyVoiceTTS(BaseTTS):
|
||||||
'tts_text': text,
|
'tts_text': text,
|
||||||
'prompt_text': reftext
|
'prompt_text': reftext
|
||||||
}
|
}
|
||||||
files = [('prompt_wav', ('prompt_wav', open(reffile, 'rb'), 'application/octet-stream'))]
|
try:
|
||||||
res = requests.request("GET", f"{server_url}/inference_zero_shot", data=payload, files=files, stream=True)
|
files = [('prompt_wav', ('prompt_wav', open(reffile, 'rb'), 'application/octet-stream'))]
|
||||||
|
res = requests.request("GET", f"{server_url}/inference_zero_shot", data=payload, files=files, stream=True)
|
||||||
end = time.perf_counter()
|
|
||||||
print(f"cosy_voice Time to make POST: {end-start}s")
|
|
||||||
|
|
||||||
if res.status_code != 200:
|
|
||||||
print("Error:", res.text)
|
|
||||||
return
|
|
||||||
|
|
||||||
first = True
|
end = time.perf_counter()
|
||||||
for chunk in res.iter_content(chunk_size=16000): # 1280 32K*20ms*2
|
print(f"cosy_voice Time to make POST: {end-start}s")
|
||||||
if first:
|
|
||||||
end = time.perf_counter()
|
|
||||||
print(f"cosy_voice Time to first chunk: {end-start}s")
|
|
||||||
first = False
|
|
||||||
if chunk and self.state==State.RUNNING:
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
print("cosy_voice response.elapsed:", res.elapsed)
|
if res.status_code != 200:
|
||||||
|
print("Error:", res.text)
|
||||||
|
return
|
||||||
|
|
||||||
|
first = True
|
||||||
|
|
||||||
|
for chunk in res.iter_content(chunk_size=8820): # 882 22.05K*20ms*2
|
||||||
|
if first:
|
||||||
|
end = time.perf_counter()
|
||||||
|
print(f"cosy_voice Time to first chunk: {end-start}s")
|
||||||
|
first = False
|
||||||
|
if chunk and self.state==State.RUNNING:
|
||||||
|
yield chunk
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
def stream_tts(self,audio_stream):
|
def stream_tts(self,audio_stream):
|
||||||
for chunk in audio_stream:
|
for chunk in audio_stream:
|
||||||
|
@ -261,28 +270,30 @@ class XTTS(BaseTTS):
|
||||||
speaker["text"] = text
|
speaker["text"] = text
|
||||||
speaker["language"] = language
|
speaker["language"] = language
|
||||||
speaker["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
speaker["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
||||||
res = requests.post(
|
try:
|
||||||
f"{server_url}/tts_stream",
|
res = requests.post(
|
||||||
json=speaker,
|
f"{server_url}/tts_stream",
|
||||||
stream=True,
|
json=speaker,
|
||||||
)
|
stream=True,
|
||||||
end = time.perf_counter()
|
)
|
||||||
print(f"xtts Time to make POST: {end-start}s")
|
end = time.perf_counter()
|
||||||
|
print(f"xtts Time to make POST: {end-start}s")
|
||||||
|
|
||||||
if res.status_code != 200:
|
if res.status_code != 200:
|
||||||
print("Error:", res.text)
|
print("Error:", res.text)
|
||||||
return
|
return
|
||||||
|
|
||||||
first = True
|
first = True
|
||||||
for chunk in res.iter_content(chunk_size=960): #24K*20ms*2
|
|
||||||
if first:
|
for chunk in res.iter_content(chunk_size=9600): #24K*20ms*2
|
||||||
end = time.perf_counter()
|
if first:
|
||||||
print(f"xtts Time to first chunk: {end-start}s")
|
end = time.perf_counter()
|
||||||
first = False
|
print(f"xtts Time to first chunk: {end-start}s")
|
||||||
if chunk:
|
first = False
|
||||||
yield chunk
|
if chunk:
|
||||||
|
yield chunk
|
||||||
print("xtts response.elapsed:", res.elapsed)
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
def stream_tts(self,audio_stream):
|
def stream_tts(self,audio_stream):
|
||||||
for chunk in audio_stream:
|
for chunk in audio_stream:
|
||||||
|
|
Loading…
Reference in New Issue