fix musetalk for windows

2024-06-10 13:10:21 +08:00 · 2024-06-10 13:10:21 +08:00 · 6fb8a19fd5
parent 58e763fdb6
commit 6fb8a19fd5
4 changed files with 109 additions and 24 deletions
--- a/README.md
+++ b/README.md
@ -54,7 +54,7 @@ python app.py
 export HF_ENDPOINT=https://hf-mirror.com
 ```

-用浏览器打开http://serverip:8010/rtcpush.html, 在文本框输入任意文字，提交。数字人播报该段文字  
+用浏览器打开http://serverip:8010/rtcpushapi.html, 在文本框输入任意文字，提交。数字人播报该段文字  
 备注：服务端需要开放端口 tcp:8000,8010,1985; udp:8000

 ## 3. More Usage
@ -128,7 +128,7 @@ python app.py --customvideo --customvideo_img data/customvideo/img --customvideo
 ```
 python app.py --transport webrtc
 ```
-用浏览器打开http://serverip:8010/webrtc.html
+用浏览器打开http://serverip:8010/webrtcapi.html

 ### 3.8 rtmp推送到srs
 - 安装rtmpstream库  
@ -142,7 +142,7 @@ docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.
 ```python
 python app.py --transport rtmp --push_url 'rtmp://localhost/live/livestream'
 ```
-用浏览器打开http://serverip:8010/echo.html
+用浏览器打开http://serverip:8010/echoapi.html

 ### 3.9 模型用musetalk
 暂不支持rtmp推送
@ -161,7 +161,7 @@ mim install "mmpose>=1.1.0"
 下载数字人模型，链接: https://caiyun.139.com/m/i?2eAjs8optksop  提取码:3mkt, 解压后将整个文件夹拷到本项目的data/avatars下
 - 运行  
 python app.py --model musetalk --transport webrtc  
-用浏览器打开http://serverip:8010/webrtc.html  
+用浏览器打开http://serverip:8010/webrtcapi.html  
 可以设置--batch_size 提高显卡利用率，设置--avatar_id 运行不同的数字人
 #### 替换成自己的数字人
 ```bash
--- a/musereal.py
+++ b/musereal.py
@ -21,7 +21,7 @@ import multiprocessing as mp
 from musetalk.utils.utils import get_file_type,get_video_fps,datagen
 #from musetalk.utils.preprocessing import get_landmark_and_bbox,read_imgs,coord_placeholder
 from musetalk.utils.blending import get_image,get_image_prepare_material,get_image_blending
-from musetalk.utils.utils import load_all_model
+from musetalk.utils.utils import load_all_model,load_diffusion_model,load_audio_model
 from ttsreal import EdgeTTS,VoitsTTS,XTTS

 from museasr import MuseASR
@ -46,17 +46,17 @@ def __mirror_index(size, index):
    else:
        return size - res - 1 

-def inference(render_event,batch_size,input_latent_list_cycle,audio_feat_queue,audio_out_queue,res_frame_queue,
-              vae, unet, pe,timesteps):
+def inference(render_event,batch_size,latents_out_path,audio_feat_queue,audio_out_queue,res_frame_queue,
+              ): #vae, unet, pe,timesteps
    
-    # _, vae, unet, pe = load_all_model()
-    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # timesteps = torch.tensor([0], device=device)
-    # pe = pe.half()
-    # vae.vae = vae.vae.half()
-    # unet.model = unet.model.half()
+    vae, unet, pe = load_diffusion_model()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    timesteps = torch.tensor([0], device=device)
+    pe = pe.half()
+    vae.vae = vae.vae.half()
+    unet.model = unet.model.half()
    
-    #input_latent_list_cycle = torch.load(latents_out_path)
+    input_latent_list_cycle = torch.load(latents_out_path)
    length = len(input_latent_list_cycle)
    index = 0
    count=0
@ -119,7 +119,7 @@ def inference(render_event,batch_size,input_latent_list_cycle,audio_feat_queue,a
                    #self.__pushmedia(res_frame,loop,audio_track,video_track)
                    res_frame_queue.put((res_frame,__mirror_index(length,index),audio_frames[i*2:i*2+2]))
                    index = index + 1
-                print('total batch time:',time.perf_counter()-starttime)            
+                #print('total batch time:',time.perf_counter()-starttime)            
        else:
            time.sleep(1)
    print('musereal inference processor stop')
@ -166,21 +166,22 @@ class MuseReal:
        #self.__warm_up()
        
        self.render_event = mp.Event()
-        mp.Process(target=inference, args=(self.render_event,self.batch_size,self.input_latent_list_cycle,
+        mp.Process(target=inference, args=(self.render_event,self.batch_size,self.latents_out_path,
                                           self.asr.feat_queue,self.asr.output_queue,self.res_frame_queue,
-                                           self.vae, self.unet, self.pe,self.timesteps)).start()
+                                           )).start() #self.vae, self.unet, self.pe,self.timesteps

    def __loadmodels(self):
        # load model weights
-        self.audio_processor, self.vae, self.unet, self.pe = load_all_model()
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.timesteps = torch.tensor([0], device=device)
-        self.pe = self.pe.half()
-        self.vae.vae = self.vae.vae.half()
-        self.unet.model = self.unet.model.half()
+        self.audio_processor= load_audio_model()
+        # self.audio_processor, self.vae, self.unet, self.pe = load_all_model()
+        # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # self.timesteps = torch.tensor([0], device=device)
+        # self.pe = self.pe.half()
+        # self.vae.vae = self.vae.vae.half()
+        # self.unet.model = self.unet.model.half()

    def __loadavatar(self):
-        self.input_latent_list_cycle = torch.load(self.latents_out_path)
+        #self.input_latent_list_cycle = torch.load(self.latents_out_path)
        with open(self.coords_path, 'rb') as f:
            self.coord_list_cycle = pickle.load(f)
        input_img_list = glob.glob(os.path.join(self.full_imgs_path, '*.[jpJP][pnPN]*[gG]'))
--- a/musetalk/utils/utils.py
+++ b/musetalk/utils/utils.py
@ -62,3 +62,14 @@ def datagen(whisper_chunks,
        latent_batch = torch.cat(latent_batch, dim=0)

        yield whisper_batch, latent_batch
+
+def load_audio_model():
+    audio_processor = Audio2Feature(model_path="./models/whisper/tiny.pt")
+    return audio_processor
+
+def load_diffusion_model():
+    vae = VAE(model_path = "./models/sd-vae-ft-mse/")
+    unet = UNet(unet_config="./models/musetalk/musetalk.json",
+                model_path ="./models/musetalk/pytorch_model.bin")
+    pe = PositionalEncoding(d_model=384)
+    return vae,unet,pe
--- a/web/echoapi.html
+++ b/web/echoapi.html
@ -0,0 +1,73 @@
+<!-- index.html -->
+<html>
+<head>
+  <script type="text/javascript" src="mpegts-1.7.3.min.js"></script>
+  <script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
+  <script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
+
+
+  
+</head>
+<body>
+  <div class="container">
+    <h1>WebSocket Test</h1>
+    <form class="form-inline" id="echo-form">
+      <div class="form-group">
+        <p>input text</p>
+
+		<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
+      </div>
+      <button type="submit" class="btn btn-default">Send</button>
+    </form>
+    <div id="log">
+		
+	</div>
+	<video id="video_player" width="40%" controls autoplay muted></video>
+  </div>
+</body>
+<script type="text/javascript" charset="utf-8">
+
+	$(document).ready(function() {
+	  var host = window.location.hostname
+	//   var ws = new WebSocket("ws://"+host+":8000/humanecho");
+	//   //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
+	//   ws.onopen = function() {
+	// 	console.log('Connected');
+	//   };
+	//   ws.onmessage = function(e) {
+	// 	console.log('Received: ' + e.data);
+	// 	data = e
+	// 	var vid = JSON.parse(data.data); 
+	// 	console.log(typeof(vid),vid)
+	// 	//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
+		
+	//   };
+	//   ws.onclose = function(e) {
+	// 	console.log('Closed');
+	//   };
+
+	  flvPlayer = mpegts.createPlayer({type: 'flv', url: "http://"+host+":8080/live/livestream.flv", isLive: true, enableStashBuffer: false});
+	  flvPlayer.attachMediaElement(document.getElementById('video_player'));
+	  flvPlayer.load();
+	  flvPlayer.play();
+
+	  $('#echo-form').on('submit', function(e) {
+		e.preventDefault();
+		var message = $('#message').val();
+		console.log('Sending: ' + message);
+		fetch('/human', {
+				body: JSON.stringify({
+					text: message,
+					type: 'echo',
+				}),
+				headers: {
+					'Content-Type': 'application/json'
+				},
+				method: 'POST'
+		});
+		//ws.send(message);
+		$('#message').val('');
+		});
+	});
+</script>
+ </html>