add wav2lip customvideo
This commit is contained in:
parent
0c63e9a11b
commit
391512f68c
22
app.py
22
app.py
|
@ -140,7 +140,7 @@ async def human(request):
|
|||
if params['type']=='echo':
|
||||
nerfreals[sessionid].put_msg_txt(params['text'])
|
||||
elif params['type']=='chat':
|
||||
res=await asyncio.get_event_loop().run_in_executor(None, llm_response(params['text']))
|
||||
res=await asyncio.get_event_loop().run_in_executor(None, llm_response(params['text']))
|
||||
nerfreals[sessionid].put_msg_txt(res)
|
||||
|
||||
return web.Response(
|
||||
|
@ -150,6 +150,19 @@ async def human(request):
|
|||
),
|
||||
)
|
||||
|
||||
async def set_audiotype(request):
|
||||
params = await request.json()
|
||||
|
||||
sessionid = params.get('sessionid',0)
|
||||
nerfreals[sessionid].set_curr_state(params['audiotype'],params['reinit'])
|
||||
|
||||
return web.Response(
|
||||
content_type="application/json",
|
||||
text=json.dumps(
|
||||
{"code": 0, "data":"ok"}
|
||||
),
|
||||
)
|
||||
|
||||
async def on_shutdown(app):
|
||||
# close peer connections
|
||||
coros = [pc.close() for pc in pcs]
|
||||
|
@ -307,6 +320,8 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--customvideo_img', type=str, default='data/customvideo/img')
|
||||
parser.add_argument('--customvideo_imgnum', type=int, default=1)
|
||||
|
||||
parser.add_argument('--customvideo_config', type=str, default='')
|
||||
|
||||
parser.add_argument('--tts', type=str, default='edgetts') #xtts gpt-sovits
|
||||
parser.add_argument('--REF_FILE', type=str, default=None)
|
||||
parser.add_argument('--REF_TEXT', type=str, default=None)
|
||||
|
@ -325,6 +340,10 @@ if __name__ == '__main__':
|
|||
opt = parser.parse_args()
|
||||
#app.config.from_object(opt)
|
||||
#print(app.config)
|
||||
opt.customopt = []
|
||||
if opt.customvideo_config!='':
|
||||
with open(opt.customvideo_config,'r') as file:
|
||||
opt.customopt = json.load(file)
|
||||
|
||||
if opt.model == 'ernerf':
|
||||
from ernerf.nerf_triplane.provider import NeRFDataset_Test
|
||||
|
@ -402,6 +421,7 @@ if __name__ == '__main__':
|
|||
appasync.on_shutdown.append(on_shutdown)
|
||||
appasync.router.add_post("/offer", offer)
|
||||
appasync.router.add_post("/human", human)
|
||||
appasync.router.add_post("/set_audiotype", set_audiotype)
|
||||
appasync.router.add_static('/',path='web')
|
||||
|
||||
# Configure default CORS settings.
|
||||
|
|
11
baseasr.py
11
baseasr.py
|
@ -7,8 +7,9 @@ import multiprocessing as mp
|
|||
|
||||
|
||||
class BaseASR:
|
||||
def __init__(self, opt):
|
||||
def __init__(self, opt, parent=None):
|
||||
self.opt = opt
|
||||
self.parent = parent
|
||||
|
||||
self.fps = opt.fps # 20 ms per frame
|
||||
self.sample_rate = 16000
|
||||
|
@ -38,8 +39,12 @@ class BaseASR:
|
|||
type = 0
|
||||
#print(f'[INFO] get frame {frame.shape}')
|
||||
except queue.Empty:
|
||||
frame = np.zeros(self.chunk, dtype=np.float32)
|
||||
type = 1
|
||||
if self.parent and self.parent.curr_state>1: #播放自定义音频
|
||||
frame = self.parent.get_audio_stream(self.parent.curr_state)
|
||||
type = self.parent.curr_state
|
||||
else:
|
||||
frame = np.zeros(self.chunk, dtype=np.float32)
|
||||
type = 1
|
||||
|
||||
return frame,type
|
||||
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
import math
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
import os
|
||||
import time
|
||||
import cv2
|
||||
import glob
|
||||
import pickle
|
||||
import copy
|
||||
|
||||
import queue
|
||||
from queue import Queue
|
||||
from threading import Thread, Event
|
||||
from io import BytesIO
|
||||
import soundfile as sf
|
||||
|
||||
from tqdm import tqdm
|
||||
def read_imgs(img_list):
|
||||
frames = []
|
||||
print('reading images...')
|
||||
for img_path in tqdm(img_list):
|
||||
frame = cv2.imread(img_path)
|
||||
frames.append(frame)
|
||||
return frames
|
||||
|
||||
class BaseReal:
|
||||
def __init__(self, opt):
|
||||
self.opt = opt
|
||||
self.sample_rate = 16000
|
||||
self.chunk = self.sample_rate // opt.fps # 320 samples per chunk (20ms * 16000 / 1000)
|
||||
|
||||
self.curr_state=0
|
||||
self.custom_img_cycle = {}
|
||||
self.custom_audio_cycle = {}
|
||||
self.custom_audio_index = {}
|
||||
self.custom_index = {}
|
||||
self.custom_opt = {}
|
||||
self.__loadcustom()
|
||||
|
||||
def __loadcustom(self):
|
||||
for item in self.opt.customopt:
|
||||
print(item)
|
||||
input_img_list = glob.glob(os.path.join(item['imgpath'], '*.[jpJP][pnPN]*[gG]'))
|
||||
input_img_list = sorted(input_img_list, key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))
|
||||
self.custom_img_cycle[item['audiotype']] = read_imgs(input_img_list)
|
||||
self.custom_audio_cycle[item['audiotype']], sample_rate = sf.read(item['audiopath'], dtype='float32')
|
||||
self.custom_audio_index[item['audiotype']] = 0
|
||||
self.custom_index[item['audiotype']] = 0
|
||||
self.custom_opt[item['audiotype']] = item
|
||||
|
||||
def mirror_index(self,size, index):
|
||||
#size = len(self.coord_list_cycle)
|
||||
turn = index // size
|
||||
res = index % size
|
||||
if turn % 2 == 0:
|
||||
return res
|
||||
else:
|
||||
return size - res - 1
|
||||
|
||||
def get_audio_stream(self,audiotype):
|
||||
idx = self.custom_audio_index[audiotype]
|
||||
stream = self.custom_audio_cycle[audiotype][idx:idx+self.chunk]
|
||||
self.custom_audio_index[audiotype] += self.chunk
|
||||
if self.custom_audio_index[audiotype]>=stream.shape[0]:
|
||||
self.curr_state = 1 #当前视频不循环播放,切换到静音状态
|
||||
return stream
|
||||
|
||||
def set_curr_state(self,audiotype, reinit):
|
||||
self.curr_state = audiotype
|
||||
if reinit:
|
||||
self.custom_audio_index[audiotype] = 0
|
||||
self.custom_index[audiotype] = 0
|
||||
|
||||
# def process_custom(self,audiotype:int,idx:int):
|
||||
# if self.curr_state!=audiotype: #从推理切到口播
|
||||
# if idx in self.switch_pos: #在卡点位置可以切换
|
||||
# self.curr_state=audiotype
|
||||
# self.custom_index=0
|
||||
# else:
|
||||
# self.custom_index+=1
|
|
@ -0,0 +1,7 @@
|
|||
[
|
||||
{
|
||||
"audiotype":2,
|
||||
"imgpath":"data/customvideo/image",
|
||||
"audiopath":"data/customvideo/audio.wav"
|
||||
}
|
||||
]
|
21
lipreal.py
21
lipreal.py
|
@ -23,8 +23,8 @@ from ttsreal import EdgeTTS,VoitsTTS,XTTS
|
|||
from lipasr import LipASR
|
||||
import asyncio
|
||||
from av import AudioFrame, VideoFrame
|
||||
|
||||
from wav2lip.models import Wav2Lip
|
||||
from basereal import BaseReal
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
|
@ -143,9 +143,10 @@ def inference(render_event,batch_size,face_imgs_path,audio_feat_queue,audio_out_
|
|||
print('musereal inference processor stop')
|
||||
|
||||
@torch.no_grad()
|
||||
class LipReal:
|
||||
class LipReal(BaseReal):
|
||||
def __init__(self, opt):
|
||||
self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
|
||||
super().__init__(opt)
|
||||
#self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
|
||||
self.W = opt.W
|
||||
self.H = opt.H
|
||||
|
||||
|
@ -163,7 +164,7 @@ class LipReal:
|
|||
#self.__loadmodels()
|
||||
self.__loadavatar()
|
||||
|
||||
self.asr = LipASR(opt)
|
||||
self.asr = LipASR(opt,self)
|
||||
self.asr.warm_up()
|
||||
if opt.tts == "edgetts":
|
||||
self.tts = EdgeTTS(opt,self)
|
||||
|
@ -213,8 +214,16 @@ class LipReal:
|
|||
res_frame,idx,audio_frames = self.res_frame_queue.get(block=True, timeout=1)
|
||||
except queue.Empty:
|
||||
continue
|
||||
if audio_frames[0][1]==1 and audio_frames[1][1]==1: #全为静音数据,只需要取fullimg
|
||||
combine_frame = self.frame_list_cycle[idx]
|
||||
if audio_frames[0][1]!=0 and audio_frames[1][1]!=0: #全为静音数据,只需要取fullimg
|
||||
audiotype = audio_frames[0][1]
|
||||
if self.custom_index.get(audiotype) is not None: #有自定义视频
|
||||
mirindex = self.mirror_index(len(self.custom_img_cycle[audiotype]),self.custom_index[audiotype])
|
||||
combine_frame = self.custom_img_cycle[audiotype][mirindex]
|
||||
self.custom_index[audiotype] += 1
|
||||
# if not self.custom_opt[audiotype].loop and self.custom_index[audiotype]>=len(self.custom_img_cycle[audiotype]):
|
||||
# self.curr_state = 1 #当前视频不循环播放,切换到静音状态
|
||||
else:
|
||||
combine_frame = self.frame_list_cycle[idx]
|
||||
else:
|
||||
bbox = self.coord_list_cycle[idx]
|
||||
combine_frame = copy.deepcopy(self.frame_list_cycle[idx])
|
||||
|
|
|
@ -15,7 +15,7 @@ class VllmGPT:
|
|||
self.__URL = "http://{}:{}/v1/completions".format(self.host, self.port)
|
||||
self.__URL2 = "http://{}:{}/v1/chat/completions".format(self.host, self.port)
|
||||
|
||||
def question(self,cont):
|
||||
def chat(self,cont):
|
||||
chat_list = []
|
||||
# contentdb = content_db.new_instance()
|
||||
# list = contentdb.get_list('all','desc',11)
|
||||
|
@ -77,5 +77,5 @@ class VllmGPT:
|
|||
|
||||
if __name__ == "__main__":
|
||||
vllm = VllmGPT('192.168.1.3','8101')
|
||||
req = vllm.question("你叫什么名字啊今年多大了")
|
||||
req = vllm.chat("你叫什么名字啊今年多大了")
|
||||
print(req)
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>WebRTC webcam</title>
|
||||
<style>
|
||||
button {
|
||||
padding: 8px 16px;
|
||||
}
|
||||
|
||||
video {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.option {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
#media {
|
||||
max-width: 1280px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="option">
|
||||
<input id="use-stun" type="checkbox"/>
|
||||
<label for="use-stun">Use STUN server</label>
|
||||
</div>
|
||||
<button id="start" onclick="start()">Start</button>
|
||||
<button id="stop" style="display: none" onclick="stop()">Stop</button>
|
||||
<input type="hidden" id="sessionid" value="0">
|
||||
<form class="form-inline" id="echo-form">
|
||||
<div class="form-group">
|
||||
<p>input text</p>
|
||||
|
||||
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-default">Send</button>
|
||||
</form>
|
||||
|
||||
<div id="media">
|
||||
<h2>Media</h2>
|
||||
|
||||
<audio id="audio" autoplay="true"></audio>
|
||||
<video id="video" style="width:600px;" autoplay="true" playsinline="true"></video>
|
||||
</div>
|
||||
<button id="custom" onclick="custom()">切换视频</button>
|
||||
<input type="text" id="audiotype" value="0">
|
||||
|
||||
<script src="client.js"></script>
|
||||
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
|
||||
<script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
|
||||
</body>
|
||||
<script type="text/javascript" charset="utf-8">
|
||||
|
||||
$(document).ready(function() {
|
||||
// var host = window.location.hostname
|
||||
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
// ws.onopen = function() {
|
||||
// console.log('Connected');
|
||||
// };
|
||||
// ws.onmessage = function(e) {
|
||||
// console.log('Received: ' + e.data);
|
||||
// data = e
|
||||
// var vid = JSON.parse(data.data);
|
||||
// console.log(typeof(vid),vid)
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
// };
|
||||
// ws.onclose = function(e) {
|
||||
// console.log('Closed');
|
||||
// };
|
||||
|
||||
$('#echo-form').on('submit', function(e) {
|
||||
e.preventDefault();
|
||||
var message = $('#message').val();
|
||||
console.log('Sending: ' + message);
|
||||
console.log('sessionid: ',document.getElementById('sessionid').value);
|
||||
fetch('/human', {
|
||||
body: JSON.stringify({
|
||||
text: message,
|
||||
type: 'echo',
|
||||
interrupt: true,
|
||||
sessionid:parseInt(document.getElementById('sessionid').value),
|
||||
}),
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
method: 'POST'
|
||||
});
|
||||
//ws.send(message);
|
||||
$('#message').val('');
|
||||
});
|
||||
|
||||
function custom() {
|
||||
fetch('/set_audiotype', {
|
||||
body: JSON.stringify({
|
||||
audiotype: parseInt(document.getElementById('audiotype').value),
|
||||
reinit: false,
|
||||
sessionid:parseInt(document.getElementById('sessionid').value),
|
||||
}),
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
method: 'POST'
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</html>
|
|
@ -30,7 +30,7 @@
|
|||
</div>
|
||||
<button id="start" onclick="start()">Start</button>
|
||||
<button id="stop" style="display: none" onclick="stop()">Stop</button>
|
||||
<input type="hidden" id="sessionid" value="1234">
|
||||
<input type="hidden" id="sessionid" value="0">
|
||||
<form class="form-inline" id="echo-form">
|
||||
<div class="form-group">
|
||||
<p>input text</p>
|
||||
|
|
Loading…
Reference in New Issue