fix musetalk for windows

This commit is contained in:
lipku 2024-06-10 13:10:21 +08:00
parent 58e763fdb6
commit 6fb8a19fd5
4 changed files with 109 additions and 24 deletions

View File

@ -54,7 +54,7 @@ python app.py
export HF_ENDPOINT=https://hf-mirror.com export HF_ENDPOINT=https://hf-mirror.com
``` ```
用浏览器打开http://serverip:8010/rtcpush.html, 在文本框输入任意文字,提交。数字人播报该段文字 用浏览器打开http://serverip:8010/rtcpushapi.html, 在文本框输入任意文字,提交。数字人播报该段文字
备注:服务端需要开放端口 tcp:8000,8010,1985; udp:8000 备注:服务端需要开放端口 tcp:8000,8010,1985; udp:8000
## 3. More Usage ## 3. More Usage
@ -128,7 +128,7 @@ python app.py --customvideo --customvideo_img data/customvideo/img --customvideo
``` ```
python app.py --transport webrtc python app.py --transport webrtc
``` ```
用浏览器打开http://serverip:8010/webrtc.html 用浏览器打开http://serverip:8010/webrtcapi.html
### 3.8 rtmp推送到srs ### 3.8 rtmp推送到srs
- 安装rtmpstream库 - 安装rtmpstream库
@ -142,7 +142,7 @@ docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.
```python ```python
python app.py --transport rtmp --push_url 'rtmp://localhost/live/livestream' python app.py --transport rtmp --push_url 'rtmp://localhost/live/livestream'
``` ```
用浏览器打开http://serverip:8010/echo.html 用浏览器打开http://serverip:8010/echoapi.html
### 3.9 模型用musetalk ### 3.9 模型用musetalk
暂不支持rtmp推送 暂不支持rtmp推送
@ -161,7 +161,7 @@ mim install "mmpose>=1.1.0"
下载数字人模型,链接: https://caiyun.139.com/m/i?2eAjs8optksop 提取码:3mkt, 解压后将整个文件夹拷到本项目的data/avatars下 下载数字人模型,链接: https://caiyun.139.com/m/i?2eAjs8optksop 提取码:3mkt, 解压后将整个文件夹拷到本项目的data/avatars下
- 运行 - 运行
python app.py --model musetalk --transport webrtc python app.py --model musetalk --transport webrtc
用浏览器打开http://serverip:8010/webrtc.html 用浏览器打开http://serverip:8010/webrtcapi.html
可以设置--batch_size 提高显卡利用率,设置--avatar_id 运行不同的数字人 可以设置--batch_size 提高显卡利用率,设置--avatar_id 运行不同的数字人
#### 替换成自己的数字人 #### 替换成自己的数字人
```bash ```bash

View File

@ -21,7 +21,7 @@ import multiprocessing as mp
from musetalk.utils.utils import get_file_type,get_video_fps,datagen from musetalk.utils.utils import get_file_type,get_video_fps,datagen
#from musetalk.utils.preprocessing import get_landmark_and_bbox,read_imgs,coord_placeholder #from musetalk.utils.preprocessing import get_landmark_and_bbox,read_imgs,coord_placeholder
from musetalk.utils.blending import get_image,get_image_prepare_material,get_image_blending from musetalk.utils.blending import get_image,get_image_prepare_material,get_image_blending
from musetalk.utils.utils import load_all_model from musetalk.utils.utils import load_all_model,load_diffusion_model,load_audio_model
from ttsreal import EdgeTTS,VoitsTTS,XTTS from ttsreal import EdgeTTS,VoitsTTS,XTTS
from museasr import MuseASR from museasr import MuseASR
@ -46,17 +46,17 @@ def __mirror_index(size, index):
else: else:
return size - res - 1 return size - res - 1
def inference(render_event,batch_size,input_latent_list_cycle,audio_feat_queue,audio_out_queue,res_frame_queue, def inference(render_event,batch_size,latents_out_path,audio_feat_queue,audio_out_queue,res_frame_queue,
vae, unet, pe,timesteps): ): #vae, unet, pe,timesteps
# _, vae, unet, pe = load_all_model() vae, unet, pe = load_diffusion_model()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# timesteps = torch.tensor([0], device=device) timesteps = torch.tensor([0], device=device)
# pe = pe.half() pe = pe.half()
# vae.vae = vae.vae.half() vae.vae = vae.vae.half()
# unet.model = unet.model.half() unet.model = unet.model.half()
#input_latent_list_cycle = torch.load(latents_out_path) input_latent_list_cycle = torch.load(latents_out_path)
length = len(input_latent_list_cycle) length = len(input_latent_list_cycle)
index = 0 index = 0
count=0 count=0
@ -119,7 +119,7 @@ def inference(render_event,batch_size,input_latent_list_cycle,audio_feat_queue,a
#self.__pushmedia(res_frame,loop,audio_track,video_track) #self.__pushmedia(res_frame,loop,audio_track,video_track)
res_frame_queue.put((res_frame,__mirror_index(length,index),audio_frames[i*2:i*2+2])) res_frame_queue.put((res_frame,__mirror_index(length,index),audio_frames[i*2:i*2+2]))
index = index + 1 index = index + 1
print('total batch time:',time.perf_counter()-starttime) #print('total batch time:',time.perf_counter()-starttime)
else: else:
time.sleep(1) time.sleep(1)
print('musereal inference processor stop') print('musereal inference processor stop')
@ -166,21 +166,22 @@ class MuseReal:
#self.__warm_up() #self.__warm_up()
self.render_event = mp.Event() self.render_event = mp.Event()
mp.Process(target=inference, args=(self.render_event,self.batch_size,self.input_latent_list_cycle, mp.Process(target=inference, args=(self.render_event,self.batch_size,self.latents_out_path,
self.asr.feat_queue,self.asr.output_queue,self.res_frame_queue, self.asr.feat_queue,self.asr.output_queue,self.res_frame_queue,
self.vae, self.unet, self.pe,self.timesteps)).start() )).start() #self.vae, self.unet, self.pe,self.timesteps
def __loadmodels(self): def __loadmodels(self):
# load model weights # load model weights
self.audio_processor, self.vae, self.unet, self.pe = load_all_model() self.audio_processor= load_audio_model()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # self.audio_processor, self.vae, self.unet, self.pe = load_all_model()
self.timesteps = torch.tensor([0], device=device) # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.pe = self.pe.half() # self.timesteps = torch.tensor([0], device=device)
self.vae.vae = self.vae.vae.half() # self.pe = self.pe.half()
self.unet.model = self.unet.model.half() # self.vae.vae = self.vae.vae.half()
# self.unet.model = self.unet.model.half()
def __loadavatar(self): def __loadavatar(self):
self.input_latent_list_cycle = torch.load(self.latents_out_path) #self.input_latent_list_cycle = torch.load(self.latents_out_path)
with open(self.coords_path, 'rb') as f: with open(self.coords_path, 'rb') as f:
self.coord_list_cycle = pickle.load(f) self.coord_list_cycle = pickle.load(f)
input_img_list = glob.glob(os.path.join(self.full_imgs_path, '*.[jpJP][pnPN]*[gG]')) input_img_list = glob.glob(os.path.join(self.full_imgs_path, '*.[jpJP][pnPN]*[gG]'))

View File

@ -62,3 +62,14 @@ def datagen(whisper_chunks,
latent_batch = torch.cat(latent_batch, dim=0) latent_batch = torch.cat(latent_batch, dim=0)
yield whisper_batch, latent_batch yield whisper_batch, latent_batch
def load_audio_model():
audio_processor = Audio2Feature(model_path="./models/whisper/tiny.pt")
return audio_processor
def load_diffusion_model():
vae = VAE(model_path = "./models/sd-vae-ft-mse/")
unet = UNet(unet_config="./models/musetalk/musetalk.json",
model_path ="./models/musetalk/pytorch_model.bin")
pe = PositionalEncoding(d_model=384)
return vae,unet,pe

73
web/echoapi.html Normal file
View File

@ -0,0 +1,73 @@
<!-- index.html -->
<html>
<head>
<script type="text/javascript" src="mpegts-1.7.3.min.js"></script>
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
<script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
</head>
<body>
<div class="container">
<h1>WebSocket Test</h1>
<form class="form-inline" id="echo-form">
<div class="form-group">
<p>input text</p>
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
</div>
<button type="submit" class="btn btn-default">Send</button>
</form>
<div id="log">
</div>
<video id="video_player" width="40%" controls autoplay muted></video>
</div>
</body>
<script type="text/javascript" charset="utf-8">
$(document).ready(function() {
var host = window.location.hostname
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
// ws.onopen = function() {
// console.log('Connected');
// };
// ws.onmessage = function(e) {
// console.log('Received: ' + e.data);
// data = e
// var vid = JSON.parse(data.data);
// console.log(typeof(vid),vid)
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
// };
// ws.onclose = function(e) {
// console.log('Closed');
// };
flvPlayer = mpegts.createPlayer({type: 'flv', url: "http://"+host+":8080/live/livestream.flv", isLive: true, enableStashBuffer: false});
flvPlayer.attachMediaElement(document.getElementById('video_player'));
flvPlayer.load();
flvPlayer.play();
$('#echo-form').on('submit', function(e) {
e.preventDefault();
var message = $('#message').val();
console.log('Sending: ' + message);
fetch('/human', {
body: JSON.stringify({
text: message,
type: 'echo',
}),
headers: {
'Content-Type': 'application/json'
},
method: 'POST'
});
//ws.send(message);
$('#message').val('');
});
});
</script>
</html>