add wav2lip customvideo
This commit is contained in:
parent
0c63e9a11b
commit
391512f68c
22
app.py
22
app.py
|
@ -140,7 +140,7 @@ async def human(request):
|
||||||
if params['type']=='echo':
|
if params['type']=='echo':
|
||||||
nerfreals[sessionid].put_msg_txt(params['text'])
|
nerfreals[sessionid].put_msg_txt(params['text'])
|
||||||
elif params['type']=='chat':
|
elif params['type']=='chat':
|
||||||
res=await asyncio.get_event_loop().run_in_executor(None, llm_response(params['text']))
|
res=await asyncio.get_event_loop().run_in_executor(None, llm_response(params['text']))
|
||||||
nerfreals[sessionid].put_msg_txt(res)
|
nerfreals[sessionid].put_msg_txt(res)
|
||||||
|
|
||||||
return web.Response(
|
return web.Response(
|
||||||
|
@ -150,6 +150,19 @@ async def human(request):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def set_audiotype(request):
|
||||||
|
params = await request.json()
|
||||||
|
|
||||||
|
sessionid = params.get('sessionid',0)
|
||||||
|
nerfreals[sessionid].set_curr_state(params['audiotype'],params['reinit'])
|
||||||
|
|
||||||
|
return web.Response(
|
||||||
|
content_type="application/json",
|
||||||
|
text=json.dumps(
|
||||||
|
{"code": 0, "data":"ok"}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
async def on_shutdown(app):
|
async def on_shutdown(app):
|
||||||
# close peer connections
|
# close peer connections
|
||||||
coros = [pc.close() for pc in pcs]
|
coros = [pc.close() for pc in pcs]
|
||||||
|
@ -307,6 +320,8 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--customvideo_img', type=str, default='data/customvideo/img')
|
parser.add_argument('--customvideo_img', type=str, default='data/customvideo/img')
|
||||||
parser.add_argument('--customvideo_imgnum', type=int, default=1)
|
parser.add_argument('--customvideo_imgnum', type=int, default=1)
|
||||||
|
|
||||||
|
parser.add_argument('--customvideo_config', type=str, default='')
|
||||||
|
|
||||||
parser.add_argument('--tts', type=str, default='edgetts') #xtts gpt-sovits
|
parser.add_argument('--tts', type=str, default='edgetts') #xtts gpt-sovits
|
||||||
parser.add_argument('--REF_FILE', type=str, default=None)
|
parser.add_argument('--REF_FILE', type=str, default=None)
|
||||||
parser.add_argument('--REF_TEXT', type=str, default=None)
|
parser.add_argument('--REF_TEXT', type=str, default=None)
|
||||||
|
@ -325,6 +340,10 @@ if __name__ == '__main__':
|
||||||
opt = parser.parse_args()
|
opt = parser.parse_args()
|
||||||
#app.config.from_object(opt)
|
#app.config.from_object(opt)
|
||||||
#print(app.config)
|
#print(app.config)
|
||||||
|
opt.customopt = []
|
||||||
|
if opt.customvideo_config!='':
|
||||||
|
with open(opt.customvideo_config,'r') as file:
|
||||||
|
opt.customopt = json.load(file)
|
||||||
|
|
||||||
if opt.model == 'ernerf':
|
if opt.model == 'ernerf':
|
||||||
from ernerf.nerf_triplane.provider import NeRFDataset_Test
|
from ernerf.nerf_triplane.provider import NeRFDataset_Test
|
||||||
|
@ -402,6 +421,7 @@ if __name__ == '__main__':
|
||||||
appasync.on_shutdown.append(on_shutdown)
|
appasync.on_shutdown.append(on_shutdown)
|
||||||
appasync.router.add_post("/offer", offer)
|
appasync.router.add_post("/offer", offer)
|
||||||
appasync.router.add_post("/human", human)
|
appasync.router.add_post("/human", human)
|
||||||
|
appasync.router.add_post("/set_audiotype", set_audiotype)
|
||||||
appasync.router.add_static('/',path='web')
|
appasync.router.add_static('/',path='web')
|
||||||
|
|
||||||
# Configure default CORS settings.
|
# Configure default CORS settings.
|
||||||
|
|
11
baseasr.py
11
baseasr.py
|
@ -7,8 +7,9 @@ import multiprocessing as mp
|
||||||
|
|
||||||
|
|
||||||
class BaseASR:
|
class BaseASR:
|
||||||
def __init__(self, opt):
|
def __init__(self, opt, parent=None):
|
||||||
self.opt = opt
|
self.opt = opt
|
||||||
|
self.parent = parent
|
||||||
|
|
||||||
self.fps = opt.fps # 20 ms per frame
|
self.fps = opt.fps # 20 ms per frame
|
||||||
self.sample_rate = 16000
|
self.sample_rate = 16000
|
||||||
|
@ -38,8 +39,12 @@ class BaseASR:
|
||||||
type = 0
|
type = 0
|
||||||
#print(f'[INFO] get frame {frame.shape}')
|
#print(f'[INFO] get frame {frame.shape}')
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
frame = np.zeros(self.chunk, dtype=np.float32)
|
if self.parent and self.parent.curr_state>1: #播放自定义音频
|
||||||
type = 1
|
frame = self.parent.get_audio_stream(self.parent.curr_state)
|
||||||
|
type = self.parent.curr_state
|
||||||
|
else:
|
||||||
|
frame = np.zeros(self.chunk, dtype=np.float32)
|
||||||
|
type = 1
|
||||||
|
|
||||||
return frame,type
|
return frame,type
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
import math
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import cv2
|
||||||
|
import glob
|
||||||
|
import pickle
|
||||||
|
import copy
|
||||||
|
|
||||||
|
import queue
|
||||||
|
from queue import Queue
|
||||||
|
from threading import Thread, Event
|
||||||
|
from io import BytesIO
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
def read_imgs(img_list):
|
||||||
|
frames = []
|
||||||
|
print('reading images...')
|
||||||
|
for img_path in tqdm(img_list):
|
||||||
|
frame = cv2.imread(img_path)
|
||||||
|
frames.append(frame)
|
||||||
|
return frames
|
||||||
|
|
||||||
|
class BaseReal:
|
||||||
|
def __init__(self, opt):
|
||||||
|
self.opt = opt
|
||||||
|
self.sample_rate = 16000
|
||||||
|
self.chunk = self.sample_rate // opt.fps # 320 samples per chunk (20ms * 16000 / 1000)
|
||||||
|
|
||||||
|
self.curr_state=0
|
||||||
|
self.custom_img_cycle = {}
|
||||||
|
self.custom_audio_cycle = {}
|
||||||
|
self.custom_audio_index = {}
|
||||||
|
self.custom_index = {}
|
||||||
|
self.custom_opt = {}
|
||||||
|
self.__loadcustom()
|
||||||
|
|
||||||
|
def __loadcustom(self):
|
||||||
|
for item in self.opt.customopt:
|
||||||
|
print(item)
|
||||||
|
input_img_list = glob.glob(os.path.join(item['imgpath'], '*.[jpJP][pnPN]*[gG]'))
|
||||||
|
input_img_list = sorted(input_img_list, key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))
|
||||||
|
self.custom_img_cycle[item['audiotype']] = read_imgs(input_img_list)
|
||||||
|
self.custom_audio_cycle[item['audiotype']], sample_rate = sf.read(item['audiopath'], dtype='float32')
|
||||||
|
self.custom_audio_index[item['audiotype']] = 0
|
||||||
|
self.custom_index[item['audiotype']] = 0
|
||||||
|
self.custom_opt[item['audiotype']] = item
|
||||||
|
|
||||||
|
def mirror_index(self,size, index):
|
||||||
|
#size = len(self.coord_list_cycle)
|
||||||
|
turn = index // size
|
||||||
|
res = index % size
|
||||||
|
if turn % 2 == 0:
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
return size - res - 1
|
||||||
|
|
||||||
|
def get_audio_stream(self,audiotype):
|
||||||
|
idx = self.custom_audio_index[audiotype]
|
||||||
|
stream = self.custom_audio_cycle[audiotype][idx:idx+self.chunk]
|
||||||
|
self.custom_audio_index[audiotype] += self.chunk
|
||||||
|
if self.custom_audio_index[audiotype]>=stream.shape[0]:
|
||||||
|
self.curr_state = 1 #当前视频不循环播放,切换到静音状态
|
||||||
|
return stream
|
||||||
|
|
||||||
|
def set_curr_state(self,audiotype, reinit):
|
||||||
|
self.curr_state = audiotype
|
||||||
|
if reinit:
|
||||||
|
self.custom_audio_index[audiotype] = 0
|
||||||
|
self.custom_index[audiotype] = 0
|
||||||
|
|
||||||
|
# def process_custom(self,audiotype:int,idx:int):
|
||||||
|
# if self.curr_state!=audiotype: #从推理切到口播
|
||||||
|
# if idx in self.switch_pos: #在卡点位置可以切换
|
||||||
|
# self.curr_state=audiotype
|
||||||
|
# self.custom_index=0
|
||||||
|
# else:
|
||||||
|
# self.custom_index+=1
|
|
@ -0,0 +1,7 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"audiotype":2,
|
||||||
|
"imgpath":"data/customvideo/image",
|
||||||
|
"audiopath":"data/customvideo/audio.wav"
|
||||||
|
}
|
||||||
|
]
|
21
lipreal.py
21
lipreal.py
|
@ -23,8 +23,8 @@ from ttsreal import EdgeTTS,VoitsTTS,XTTS
|
||||||
from lipasr import LipASR
|
from lipasr import LipASR
|
||||||
import asyncio
|
import asyncio
|
||||||
from av import AudioFrame, VideoFrame
|
from av import AudioFrame, VideoFrame
|
||||||
|
|
||||||
from wav2lip.models import Wav2Lip
|
from wav2lip.models import Wav2Lip
|
||||||
|
from basereal import BaseReal
|
||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
@ -143,9 +143,10 @@ def inference(render_event,batch_size,face_imgs_path,audio_feat_queue,audio_out_
|
||||||
print('musereal inference processor stop')
|
print('musereal inference processor stop')
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
class LipReal:
|
class LipReal(BaseReal):
|
||||||
def __init__(self, opt):
|
def __init__(self, opt):
|
||||||
self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
|
super().__init__(opt)
|
||||||
|
#self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
|
||||||
self.W = opt.W
|
self.W = opt.W
|
||||||
self.H = opt.H
|
self.H = opt.H
|
||||||
|
|
||||||
|
@ -163,7 +164,7 @@ class LipReal:
|
||||||
#self.__loadmodels()
|
#self.__loadmodels()
|
||||||
self.__loadavatar()
|
self.__loadavatar()
|
||||||
|
|
||||||
self.asr = LipASR(opt)
|
self.asr = LipASR(opt,self)
|
||||||
self.asr.warm_up()
|
self.asr.warm_up()
|
||||||
if opt.tts == "edgetts":
|
if opt.tts == "edgetts":
|
||||||
self.tts = EdgeTTS(opt,self)
|
self.tts = EdgeTTS(opt,self)
|
||||||
|
@ -213,8 +214,16 @@ class LipReal:
|
||||||
res_frame,idx,audio_frames = self.res_frame_queue.get(block=True, timeout=1)
|
res_frame,idx,audio_frames = self.res_frame_queue.get(block=True, timeout=1)
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
continue
|
continue
|
||||||
if audio_frames[0][1]==1 and audio_frames[1][1]==1: #全为静音数据,只需要取fullimg
|
if audio_frames[0][1]!=0 and audio_frames[1][1]!=0: #全为静音数据,只需要取fullimg
|
||||||
combine_frame = self.frame_list_cycle[idx]
|
audiotype = audio_frames[0][1]
|
||||||
|
if self.custom_index.get(audiotype) is not None: #有自定义视频
|
||||||
|
mirindex = self.mirror_index(len(self.custom_img_cycle[audiotype]),self.custom_index[audiotype])
|
||||||
|
combine_frame = self.custom_img_cycle[audiotype][mirindex]
|
||||||
|
self.custom_index[audiotype] += 1
|
||||||
|
# if not self.custom_opt[audiotype].loop and self.custom_index[audiotype]>=len(self.custom_img_cycle[audiotype]):
|
||||||
|
# self.curr_state = 1 #当前视频不循环播放,切换到静音状态
|
||||||
|
else:
|
||||||
|
combine_frame = self.frame_list_cycle[idx]
|
||||||
else:
|
else:
|
||||||
bbox = self.coord_list_cycle[idx]
|
bbox = self.coord_list_cycle[idx]
|
||||||
combine_frame = copy.deepcopy(self.frame_list_cycle[idx])
|
combine_frame = copy.deepcopy(self.frame_list_cycle[idx])
|
||||||
|
|
|
@ -15,7 +15,7 @@ class VllmGPT:
|
||||||
self.__URL = "http://{}:{}/v1/completions".format(self.host, self.port)
|
self.__URL = "http://{}:{}/v1/completions".format(self.host, self.port)
|
||||||
self.__URL2 = "http://{}:{}/v1/chat/completions".format(self.host, self.port)
|
self.__URL2 = "http://{}:{}/v1/chat/completions".format(self.host, self.port)
|
||||||
|
|
||||||
def question(self,cont):
|
def chat(self,cont):
|
||||||
chat_list = []
|
chat_list = []
|
||||||
# contentdb = content_db.new_instance()
|
# contentdb = content_db.new_instance()
|
||||||
# list = contentdb.get_list('all','desc',11)
|
# list = contentdb.get_list('all','desc',11)
|
||||||
|
@ -77,5 +77,5 @@ class VllmGPT:
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
vllm = VllmGPT('192.168.1.3','8101')
|
vllm = VllmGPT('192.168.1.3','8101')
|
||||||
req = vllm.question("你叫什么名字啊今年多大了")
|
req = vllm.chat("你叫什么名字啊今年多大了")
|
||||||
print(req)
|
print(req)
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8"/>
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
|
<title>WebRTC webcam</title>
|
||||||
|
<style>
|
||||||
|
button {
|
||||||
|
padding: 8px 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
video {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.option {
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#media {
|
||||||
|
max-width: 1280px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="option">
|
||||||
|
<input id="use-stun" type="checkbox"/>
|
||||||
|
<label for="use-stun">Use STUN server</label>
|
||||||
|
</div>
|
||||||
|
<button id="start" onclick="start()">Start</button>
|
||||||
|
<button id="stop" style="display: none" onclick="stop()">Stop</button>
|
||||||
|
<input type="hidden" id="sessionid" value="0">
|
||||||
|
<form class="form-inline" id="echo-form">
|
||||||
|
<div class="form-group">
|
||||||
|
<p>input text</p>
|
||||||
|
|
||||||
|
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
|
||||||
|
</div>
|
||||||
|
<button type="submit" class="btn btn-default">Send</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<div id="media">
|
||||||
|
<h2>Media</h2>
|
||||||
|
|
||||||
|
<audio id="audio" autoplay="true"></audio>
|
||||||
|
<video id="video" style="width:600px;" autoplay="true" playsinline="true"></video>
|
||||||
|
</div>
|
||||||
|
<button id="custom" onclick="custom()">切换视频</button>
|
||||||
|
<input type="text" id="audiotype" value="0">
|
||||||
|
|
||||||
|
<script src="client.js"></script>
|
||||||
|
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
|
||||||
|
<script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
|
||||||
|
</body>
|
||||||
|
<script type="text/javascript" charset="utf-8">
|
||||||
|
|
||||||
|
$(document).ready(function() {
|
||||||
|
// var host = window.location.hostname
|
||||||
|
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
|
||||||
|
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||||
|
// ws.onopen = function() {
|
||||||
|
// console.log('Connected');
|
||||||
|
// };
|
||||||
|
// ws.onmessage = function(e) {
|
||||||
|
// console.log('Received: ' + e.data);
|
||||||
|
// data = e
|
||||||
|
// var vid = JSON.parse(data.data);
|
||||||
|
// console.log(typeof(vid),vid)
|
||||||
|
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||||
|
|
||||||
|
// };
|
||||||
|
// ws.onclose = function(e) {
|
||||||
|
// console.log('Closed');
|
||||||
|
// };
|
||||||
|
|
||||||
|
$('#echo-form').on('submit', function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
var message = $('#message').val();
|
||||||
|
console.log('Sending: ' + message);
|
||||||
|
console.log('sessionid: ',document.getElementById('sessionid').value);
|
||||||
|
fetch('/human', {
|
||||||
|
body: JSON.stringify({
|
||||||
|
text: message,
|
||||||
|
type: 'echo',
|
||||||
|
interrupt: true,
|
||||||
|
sessionid:parseInt(document.getElementById('sessionid').value),
|
||||||
|
}),
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
method: 'POST'
|
||||||
|
});
|
||||||
|
//ws.send(message);
|
||||||
|
$('#message').val('');
|
||||||
|
});
|
||||||
|
|
||||||
|
function custom() {
|
||||||
|
fetch('/set_audiotype', {
|
||||||
|
body: JSON.stringify({
|
||||||
|
audiotype: parseInt(document.getElementById('audiotype').value),
|
||||||
|
reinit: false,
|
||||||
|
sessionid:parseInt(document.getElementById('sessionid').value),
|
||||||
|
}),
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
method: 'POST'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</html>
|
|
@ -30,7 +30,7 @@
|
||||||
</div>
|
</div>
|
||||||
<button id="start" onclick="start()">Start</button>
|
<button id="start" onclick="start()">Start</button>
|
||||||
<button id="stop" style="display: none" onclick="stop()">Stop</button>
|
<button id="stop" style="display: none" onclick="stop()">Stop</button>
|
||||||
<input type="hidden" id="sessionid" value="1234">
|
<input type="hidden" id="sessionid" value="0">
|
||||||
<form class="form-inline" id="echo-form">
|
<form class="form-inline" id="echo-form">
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<p>input text</p>
|
<p>input text</p>
|
||||||
|
|
Loading…
Reference in New Issue