feat: 完善修改成自动绝对路径,添加接口生成
This commit is contained in:
parent
18d7db35a7
commit
cd7d5f31b5
15
README.md
15
README.md
|
@ -6,11 +6,10 @@ Real time interactive streaming digital human, realize audio video synchronous
|
|||
## Features
|
||||
1. 支持多种数字人模型: ernerf、musetalk、wav2lip
|
||||
2. 支持声音克隆
|
||||
3. 支持多种音频特征驱动:wav2vec、hubert
|
||||
3. 支持数字人说话被打断
|
||||
4. 支持全身视频拼接
|
||||
5. 支持rtmp和webrtc
|
||||
6. 支持视频编排:不说话时播放自定义视频
|
||||
7. 支持大模型对话
|
||||
|
||||
## 1. Installation
|
||||
|
||||
|
@ -171,13 +170,11 @@ cd MuseTalk
|
|||
修改configs/inference/realtime.yaml,将preparation改为True
|
||||
python -m scripts.realtime_inference --inference_config configs/inference/realtime.yaml
|
||||
运行后将results/avatars下文件拷到本项目的data/avatars下
|
||||
```
|
||||
|
||||
```bash
|
||||
也可以试用本地目录下的 simple_musetalk.py
|
||||
方法二
|
||||
执行
|
||||
cd musetalk
|
||||
python simple_musetalk.py --avatar_id 2 --file D:\\ok\\test.mp4
|
||||
运行后将直接生成在data/avatars下
|
||||
python simple_musetalk.py --avatar_id 4 --file D:\\ok\\test.mp4
|
||||
支持视频和图片生成 会自动生成到data的avatars目录下
|
||||
```
|
||||
|
||||
### 3.10 模型用wav2lip
|
||||
|
@ -185,7 +182,7 @@ python simple_musetalk.py --avatar_id 2 --file D:\\ok\\test.mp4
|
|||
- 下载模型
|
||||
下载wav2lip运行需要的模型,网盘地址 https://drive.uc.cn/s/551be97d7cfa4
|
||||
将s3fd.pth拷到本项目wav2lip/face_detection/detection/sfd/s3fd.pth, 将wav2lip.pth拷到本项目的models下
|
||||
数字人模型文件 wav2lip_avatar1.tar.gz, 解压后将整个文件夹拷到本项目的data/avatars下
|
||||
数字人模型文件 wav2lip_avatar1.tar.gz,网盘地址 https://drive.uc.cn/s/5bd0cde0b0774, 解压后将整个文件夹拷到本项目的data/avatars下
|
||||
- 运行
|
||||
python app.py --transport webrtc --model wav2lip --avatar_id wav2lip_avatar1
|
||||
用浏览器打开http://serverip:8010/webrtcapi.html
|
||||
|
|
135
app.py
135
app.py
|
@ -1,22 +1,30 @@
|
|||
# server.py
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import multiprocessing
|
||||
from threading import Thread, Event
|
||||
|
||||
import aiohttp
|
||||
import aiohttp_cors
|
||||
from aiohttp import web
|
||||
from aiortc import RTCPeerConnection, RTCSessionDescription
|
||||
from flask import Flask
|
||||
from flask import Flask, render_template,send_from_directory,request, jsonify
|
||||
from flask_sockets import Sockets
|
||||
import base64
|
||||
import time
|
||||
import json
|
||||
import gevent
|
||||
from gevent import pywsgi
|
||||
from geventwebsocket.handler import WebSocketHandler
|
||||
import os
|
||||
import re
|
||||
import numpy as np
|
||||
from threading import Thread,Event
|
||||
import multiprocessing
|
||||
|
||||
from musetalk.simple_musetalk import create_musetalk_human
|
||||
from aiohttp import web
|
||||
import aiohttp
|
||||
import aiohttp_cors
|
||||
from aiortc import RTCPeerConnection, RTCSessionDescription
|
||||
from webrtc import HumanPlayer
|
||||
|
||||
import argparse
|
||||
|
||||
import shutil
|
||||
import asyncio
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
sockets = Sockets(app)
|
||||
global nerfreal
|
||||
|
@ -51,7 +59,6 @@ def llm_response(message):
|
|||
print(response)
|
||||
return response
|
||||
|
||||
|
||||
@sockets.route('/humanchat')
|
||||
def chat_socket(ws):
|
||||
# 获取WebSocket对象
|
||||
|
@ -72,11 +79,9 @@ def chat_socket(ws):
|
|||
res=llm_response(message)
|
||||
nerfreal.put_msg_txt(res)
|
||||
|
||||
|
||||
#####webrtc###############################
|
||||
pcs = set()
|
||||
|
||||
|
||||
#@app.route('/offer', methods=['POST'])
|
||||
async def offer(request):
|
||||
params = await request.json()
|
||||
|
@ -110,10 +115,12 @@ async def offer(request):
|
|||
),
|
||||
)
|
||||
|
||||
|
||||
async def human(request):
|
||||
params = await request.json()
|
||||
|
||||
if params.get('interrupt'):
|
||||
nerfreal.pause_talk()
|
||||
|
||||
if params['type']=='echo':
|
||||
nerfreal.put_msg_txt(params['text'])
|
||||
elif params['type']=='chat':
|
||||
|
@ -127,35 +134,12 @@ async def human(request):
|
|||
),
|
||||
)
|
||||
|
||||
|
||||
async def handle_create_musetalk(request):
|
||||
reader = await request.multipart()
|
||||
# 处理文件部分
|
||||
file_part = await reader.next()
|
||||
filename = file_part.filename
|
||||
file_data = await file_part.read() # 读取文件的内容
|
||||
# 注意:确保这个文件路径是可写的
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(file_data)
|
||||
# 处理整数部分
|
||||
part = await reader.next()
|
||||
avatar_id = int(await part.text())
|
||||
create_musetalk_human(filename, avatar_id)
|
||||
os.remove(filename)
|
||||
return web.json_response({
|
||||
'status': 'success',
|
||||
'filename': filename,
|
||||
'int_value': avatar_id,
|
||||
})
|
||||
|
||||
|
||||
async def on_shutdown(app):
|
||||
# close peer connections
|
||||
coros = [pc.close() for pc in pcs]
|
||||
await asyncio.gather(*coros)
|
||||
pcs.clear()
|
||||
|
||||
|
||||
async def post(url,data):
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
@ -164,7 +148,6 @@ async def post(url, data):
|
|||
except aiohttp.ClientError as e:
|
||||
print(f'Error: {e}')
|
||||
|
||||
|
||||
async def run(push_url):
|
||||
pc = RTCPeerConnection()
|
||||
pcs.add(pc)
|
||||
|
@ -183,8 +166,6 @@ async def run(push_url):
|
|||
await pc.setLocalDescription(await pc.createOffer())
|
||||
answer = await post(push_url,pc.localDescription.sdp)
|
||||
await pc.setRemoteDescription(RTCSessionDescription(sdp=answer,type='answer'))
|
||||
|
||||
|
||||
##########################################
|
||||
# os.environ['MKL_SERVICE_FORCE_INTEL'] = '1'
|
||||
# os.environ['MULTIPROCESSING_METHOD'] = 'forkserver'
|
||||
|
@ -204,19 +185,13 @@ if __name__ == '__main__':
|
|||
### training options
|
||||
parser.add_argument('--ckpt', type=str, default='data/pretrained/ngp_kf.pth')
|
||||
|
||||
parser.add_argument('--num_rays', type=int, default=4096 * 16,
|
||||
help="num rays sampled per image for each training step")
|
||||
parser.add_argument('--num_rays', type=int, default=4096 * 16, help="num rays sampled per image for each training step")
|
||||
parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
|
||||
parser.add_argument('--max_steps', type=int, default=16,
|
||||
help="max num steps sampled per ray (only valid when using --cuda_ray)")
|
||||
parser.add_argument('--num_steps', type=int, default=16,
|
||||
help="num steps sampled per ray (only valid when NOT using --cuda_ray)")
|
||||
parser.add_argument('--upsample_steps', type=int, default=0,
|
||||
help="num steps up-sampled per ray (only valid when NOT using --cuda_ray)")
|
||||
parser.add_argument('--update_extra_interval', type=int, default=16,
|
||||
help="iter interval to update extra status (only valid when using --cuda_ray)")
|
||||
parser.add_argument('--max_ray_batch', type=int, default=4096,
|
||||
help="batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)")
|
||||
parser.add_argument('--max_steps', type=int, default=16, help="max num steps sampled per ray (only valid when using --cuda_ray)")
|
||||
parser.add_argument('--num_steps', type=int, default=16, help="num steps sampled per ray (only valid when NOT using --cuda_ray)")
|
||||
parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when NOT using --cuda_ray)")
|
||||
parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
|
||||
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)")
|
||||
|
||||
### loss set
|
||||
parser.add_argument('--warmup_step', type=int, default=10000, help="warm up steps")
|
||||
|
@ -231,31 +206,23 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--bg_img', type=str, default='white', help="background image")
|
||||
parser.add_argument('--fbg', action='store_true', help="frame-wise bg")
|
||||
parser.add_argument('--exp_eye', action='store_true', help="explicitly control the eyes")
|
||||
parser.add_argument('--fix_eye', type=float, default=-1,
|
||||
help="fixed eye area, negative to disable, set to 0-0.3 for a reasonable eye")
|
||||
parser.add_argument('--fix_eye', type=float, default=-1, help="fixed eye area, negative to disable, set to 0-0.3 for a reasonable eye")
|
||||
parser.add_argument('--smooth_eye', action='store_true', help="smooth the eye area sequence")
|
||||
|
||||
parser.add_argument('--torso_shrink', type=float, default=0.8,
|
||||
help="shrink bg coords to allow more flexibility in deform")
|
||||
parser.add_argument('--torso_shrink', type=float, default=0.8, help="shrink bg coords to allow more flexibility in deform")
|
||||
|
||||
### dataset options
|
||||
parser.add_argument('--color_space', type=str, default='srgb', help="Color space, supports (linear, srgb)")
|
||||
parser.add_argument('--preload', type=int, default=0,
|
||||
help="0 means load data from disk on-the-fly, 1 means preload to CPU, 2 means GPU.")
|
||||
parser.add_argument('--preload', type=int, default=0, help="0 means load data from disk on-the-fly, 1 means preload to CPU, 2 means GPU.")
|
||||
# (the default value is for the fox dataset)
|
||||
parser.add_argument('--bound', type=float, default=1,
|
||||
help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
|
||||
parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.")
|
||||
parser.add_argument('--scale', type=float, default=4, help="scale camera location into box[-bound, bound]^3")
|
||||
parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location")
|
||||
parser.add_argument('--dt_gamma', type=float, default=1 / 256,
|
||||
help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
|
||||
parser.add_argument('--dt_gamma', type=float, default=1/256, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
|
||||
parser.add_argument('--min_near', type=float, default=0.05, help="minimum near distance for camera")
|
||||
parser.add_argument('--density_thresh', type=float, default=10,
|
||||
help="threshold for density grid to be occupied (sigma)")
|
||||
parser.add_argument('--density_thresh_torso', type=float, default=0.01,
|
||||
help="threshold for density grid to be occupied (alpha)")
|
||||
parser.add_argument('--patch_size', type=int, default=1,
|
||||
help="[experimental] render patches in training, so as to apply LPIPS loss. 1 means disabled, use [64, 32, 16] to enable")
|
||||
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied (sigma)")
|
||||
parser.add_argument('--density_thresh_torso', type=float, default=0.01, help="threshold for density grid to be occupied (alpha)")
|
||||
parser.add_argument('--patch_size', type=int, default=1, help="[experimental] render patches in training, so as to apply LPIPS loss. 1 means disabled, use [64, 32, 16] to enable")
|
||||
|
||||
parser.add_argument('--init_lips', action='store_true', help="init lips region")
|
||||
parser.add_argument('--finetune_lips', action='store_true', help="use LPIPS and landmarks to fine tune lips region")
|
||||
|
@ -273,15 +240,12 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
|
||||
|
||||
### else
|
||||
parser.add_argument('--att', type=int, default=2,
|
||||
help="audio attention mode (0 = turn off, 1 = left-direction, 2 = bi-direction)")
|
||||
parser.add_argument('--aud', type=str, default='',
|
||||
help="audio source (empty will load the default, else should be a path to a npy file)")
|
||||
parser.add_argument('--att', type=int, default=2, help="audio attention mode (0 = turn off, 1 = left-direction, 2 = bi-direction)")
|
||||
parser.add_argument('--aud', type=str, default='', help="audio source (empty will load the default, else should be a path to a npy file)")
|
||||
parser.add_argument('--emb', action='store_true', help="use audio class + embedding instead of logits")
|
||||
|
||||
parser.add_argument('--ind_dim', type=int, default=4, help="individual code dim, 0 to turn off")
|
||||
parser.add_argument('--ind_num', type=int, default=10000,
|
||||
help="number of individual codes, should be larger than training dataset size")
|
||||
parser.add_argument('--ind_num', type=int, default=10000, help="number of individual codes, should be larger than training dataset size")
|
||||
|
||||
parser.add_argument('--ind_dim_torso', type=int, default=8, help="individual code dim, 0 to turn off")
|
||||
|
||||
|
@ -290,8 +254,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--part2', action='store_true', help="use partial training data (first 15s)")
|
||||
|
||||
parser.add_argument('--train_camera', action='store_true', help="optimize camera pose")
|
||||
parser.add_argument('--smooth_path', action='store_true',
|
||||
help="brute-force smooth camera pose trajectory with a window size")
|
||||
parser.add_argument('--smooth_path', action='store_true', help="brute-force smooth camera pose trajectory with a window size")
|
||||
parser.add_argument('--smooth_path_window', type=int, default=7, help="smoothing window size")
|
||||
|
||||
# asr
|
||||
|
@ -325,7 +288,6 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--batch_size', type=int, default=16)
|
||||
|
||||
parser.add_argument('--customvideo', action='store_true', help="custom video")
|
||||
parser.add_argument('--static_img', action='store_true', help="Use the first photo as a time of rest")
|
||||
parser.add_argument('--customvideo_img', type=str, default='data/customvideo/img')
|
||||
parser.add_argument('--customvideo_imgnum', type=int, default=1)
|
||||
|
||||
|
@ -339,8 +301,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--model', type=str, default='ernerf') #musetalk wav2lip
|
||||
|
||||
parser.add_argument('--transport', type=str, default='rtcpush') #rtmp webrtc rtcpush
|
||||
parser.add_argument('--push_url', type=str,
|
||||
default='http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream') # rtmp://localhost/live/livestream
|
||||
parser.add_argument('--push_url', type=str, default='http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream') #rtmp://localhost/live/livestream
|
||||
|
||||
parser.add_argument('--listenport', type=int, default=8010)
|
||||
|
||||
|
@ -353,7 +314,6 @@ if __name__ == '__main__':
|
|||
from ernerf.nerf_triplane.utils import *
|
||||
from ernerf.nerf_triplane.network import NeRFNetwork
|
||||
from nerfreal import NeRFReal
|
||||
|
||||
# assert test mode
|
||||
opt.test = True
|
||||
opt.test_train = False
|
||||
|
@ -388,8 +348,7 @@ if __name__ == '__main__':
|
|||
criterion = torch.nn.MSELoss(reduction='none')
|
||||
metrics = [] # use no metric in GUI for faster initialization...
|
||||
print(model)
|
||||
trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, criterion=criterion, fp16=opt.fp16,
|
||||
metrics=metrics, use_checkpoint=opt.ckpt)
|
||||
trainer = Trainer('ngp', opt, model, device=device, workspace=opt.workspace, criterion=criterion, fp16=opt.fp16, metrics=metrics, use_checkpoint=opt.ckpt)
|
||||
|
||||
test_loader = NeRFDataset_Test(opt, device=device).dataloader()
|
||||
model.aud_features = test_loader._data.auds
|
||||
|
@ -399,12 +358,10 @@ if __name__ == '__main__':
|
|||
nerfreal = NeRFReal(opt, trainer, test_loader)
|
||||
elif opt.model == 'musetalk':
|
||||
from musereal import MuseReal
|
||||
|
||||
print(opt)
|
||||
nerfreal = MuseReal(opt)
|
||||
elif opt.model == 'wav2lip':
|
||||
from lipreal import LipReal
|
||||
|
||||
print(opt)
|
||||
nerfreal = LipReal(opt)
|
||||
|
||||
|
@ -419,7 +376,6 @@ if __name__ == '__main__':
|
|||
appasync.on_shutdown.append(on_shutdown)
|
||||
appasync.router.add_post("/offer", offer)
|
||||
appasync.router.add_post("/human", human)
|
||||
appasync.router.add_post("/create_musetalk", handle_create_musetalk)
|
||||
appasync.router.add_static('/',path='web')
|
||||
|
||||
# Configure default CORS settings.
|
||||
|
@ -434,7 +390,6 @@ if __name__ == '__main__':
|
|||
for route in list(appasync.router.routes()):
|
||||
cors.add(route)
|
||||
|
||||
|
||||
def run_server(runner):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
@ -444,8 +399,6 @@ if __name__ == '__main__':
|
|||
if opt.transport=='rtcpush':
|
||||
loop.run_until_complete(run(opt.push_url))
|
||||
loop.run_forever()
|
||||
|
||||
|
||||
Thread(target=run_server, args=(web.AppRunner(appasync),)).start()
|
||||
|
||||
print('start websocket server')
|
||||
|
@ -453,3 +406,5 @@ if __name__ == '__main__':
|
|||
#app.router.add_post("/offer", offer)
|
||||
server = pywsgi.WSGIServer(('0.0.0.0', 8000), app, handler_class=WebSocketHandler)
|
||||
server.serve_forever()
|
||||
|
||||
|
129
asrreal.py
129
asrreal.py
|
@ -4,29 +4,19 @@ import torch
|
|||
import torch.nn.functional as F
|
||||
from transformers import AutoModelForCTC, AutoProcessor, Wav2Vec2Processor, HubertModel
|
||||
|
||||
#import pyaudio
|
||||
import soundfile as sf
|
||||
import resampy
|
||||
|
||||
import queue
|
||||
from queue import Queue
|
||||
#from collections import deque
|
||||
from threading import Thread, Event
|
||||
from io import BytesIO
|
||||
|
||||
class ASR:
|
||||
from baseasr import BaseASR
|
||||
|
||||
class ASR(BaseASR):
|
||||
def __init__(self, opt):
|
||||
|
||||
self.opt = opt
|
||||
|
||||
self.play = opt.asr_play #false
|
||||
super().__init__(opt)
|
||||
|
||||
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
self.fps = opt.fps # 20 ms per frame
|
||||
self.sample_rate = 16000
|
||||
self.chunk = self.sample_rate // self.fps # 320 samples per chunk (20ms * 16000 / 1000)
|
||||
self.mode = 'live' if opt.asr_wav == '' else 'file'
|
||||
|
||||
if 'esperanto' in self.opt.asr_model:
|
||||
self.audio_dim = 44
|
||||
elif 'deepspeech' in self.opt.asr_model:
|
||||
|
@ -41,30 +31,11 @@ class ASR:
|
|||
self.context_size = opt.m
|
||||
self.stride_left_size = opt.l
|
||||
self.stride_right_size = opt.r
|
||||
self.text = '[START]\n'
|
||||
self.terminated = False
|
||||
self.frames = []
|
||||
self.inwarm = False
|
||||
|
||||
# pad left frames
|
||||
if self.stride_left_size > 0:
|
||||
self.frames.extend([np.zeros(self.chunk, dtype=np.float32)] * self.stride_left_size)
|
||||
|
||||
|
||||
self.exit_event = Event()
|
||||
#self.audio_instance = pyaudio.PyAudio() #not need
|
||||
|
||||
# create input stream
|
||||
self.queue = Queue()
|
||||
self.output_queue = Queue()
|
||||
# start a background process to read frames
|
||||
#self.input_stream = self.audio_instance.open(format=pyaudio.paInt16, channels=1, rate=self.sample_rate, input=True, output=False, frames_per_buffer=self.chunk)
|
||||
#self.queue = Queue()
|
||||
#self.process_read_frame = Thread(target=_read_frame, args=(self.input_stream, self.exit_event, self.queue, self.chunk))
|
||||
|
||||
# current location of audio
|
||||
self.idx = 0
|
||||
|
||||
# create wav2vec model
|
||||
print(f'[INFO] loading ASR model {self.opt.asr_model}...')
|
||||
if 'hubert' in self.opt.asr_model:
|
||||
|
@ -74,10 +45,6 @@ class ASR:
|
|||
self.processor = AutoProcessor.from_pretrained(opt.asr_model)
|
||||
self.model = AutoModelForCTC.from_pretrained(opt.asr_model).to(self.device)
|
||||
|
||||
# prepare to save logits
|
||||
if self.opt.asr_save_feats:
|
||||
self.all_feats = []
|
||||
|
||||
# the extracted features
|
||||
# use a loop queue to efficiently record endless features: [f--t---][-------][-------]
|
||||
self.feat_buffer_size = 4
|
||||
|
@ -93,8 +60,16 @@ class ASR:
|
|||
# warm up steps needed: mid + right + window_size + attention_size
|
||||
self.warm_up_steps = self.context_size + self.stride_left_size + self.stride_right_size #+ self.stride_left_size #+ 8 + 2 * 3
|
||||
|
||||
self.listening = False
|
||||
self.playing = False
|
||||
def get_audio_frame(self):
|
||||
try:
|
||||
frame = self.queue.get(block=False)
|
||||
type = 0
|
||||
#print(f'[INFO] get frame {frame.shape}')
|
||||
except queue.Empty:
|
||||
frame = np.zeros(self.chunk, dtype=np.float32)
|
||||
type = 1
|
||||
|
||||
return frame,type
|
||||
|
||||
def get_next_feat(self): #get audio embedding to nerf
|
||||
# return a [1/8, 16] window, for the next input to nerf side.
|
||||
|
@ -136,17 +111,8 @@ class ASR:
|
|||
|
||||
def run_step(self):
|
||||
|
||||
if self.terminated:
|
||||
return
|
||||
|
||||
# get a frame of audio
|
||||
frame,type = self.__get_audio_frame()
|
||||
|
||||
# the last frame
|
||||
if frame is None:
|
||||
# terminate, but always run the network for the left frames
|
||||
self.terminated = True
|
||||
else:
|
||||
frame,type = self.get_audio_frame()
|
||||
self.frames.append(frame)
|
||||
# put to output
|
||||
self.output_queue.put((frame,type))
|
||||
|
@ -157,7 +123,6 @@ class ASR:
|
|||
inputs = np.concatenate(self.frames) # [N * chunk]
|
||||
|
||||
# discard the old part to save memory
|
||||
if not self.terminated:
|
||||
self.frames = self.frames[-(self.stride_left_size + self.stride_right_size):]
|
||||
|
||||
#print(f'[INFO] frame_to_text... ')
|
||||
|
@ -166,10 +131,6 @@ class ASR:
|
|||
#print(f'-------wav2vec time:{time.time()-t:.4f}s')
|
||||
feats = logits # better lips-sync than labels
|
||||
|
||||
# save feats
|
||||
if self.opt.asr_save_feats:
|
||||
self.all_feats.append(feats)
|
||||
|
||||
# record the feats efficiently.. (no concat, constant memory)
|
||||
start = self.feat_buffer_idx * self.context_size
|
||||
end = start + feats.shape[0]
|
||||
|
@ -203,24 +164,6 @@ class ASR:
|
|||
# np.save(output_path, unfold_feats.cpu().numpy())
|
||||
# print(f"[INFO] saved logits to {output_path}")
|
||||
|
||||
def put_audio_frame(self,audio_chunk): #16khz 20ms pcm
|
||||
self.queue.put(audio_chunk)
|
||||
|
||||
def __get_audio_frame(self):
|
||||
if self.inwarm: # warm up
|
||||
return np.zeros(self.chunk, dtype=np.float32),1
|
||||
|
||||
try:
|
||||
frame = self.queue.get(block=False)
|
||||
type = 0
|
||||
print(f'[INFO] get frame {frame.shape}')
|
||||
except queue.Empty:
|
||||
frame = np.zeros(self.chunk, dtype=np.float32)
|
||||
type = 1
|
||||
|
||||
self.idx = self.idx + self.chunk
|
||||
|
||||
return frame,type
|
||||
|
||||
|
||||
def __frame_to_text(self, frame):
|
||||
|
@ -241,8 +184,8 @@ class ASR:
|
|||
right = min(logits.shape[1], logits.shape[1] - self.stride_right_size + 1) # +1 to make sure output is the same length as input.
|
||||
|
||||
# do not cut right if terminated.
|
||||
if self.terminated:
|
||||
right = logits.shape[1]
|
||||
# if self.terminated:
|
||||
# right = logits.shape[1]
|
||||
|
||||
logits = logits[:, left:right]
|
||||
|
||||
|
@ -263,9 +206,22 @@ class ASR:
|
|||
return logits[0], None,None #predicted_ids[0], transcription # [N,]
|
||||
|
||||
|
||||
def get_audio_out(self): #get origin audio pcm to nerf
|
||||
return self.output_queue.get()
|
||||
def warm_up(self):
|
||||
print(f'[INFO] warm up ASR live model, expected latency = {self.warm_up_steps / self.fps:.6f}s')
|
||||
t = time.time()
|
||||
#for _ in range(self.stride_left_size):
|
||||
# self.frames.append(np.zeros(self.chunk, dtype=np.float32))
|
||||
for _ in range(self.warm_up_steps):
|
||||
self.run_step()
|
||||
#if torch.cuda.is_available():
|
||||
# torch.cuda.synchronize()
|
||||
t = time.time() - t
|
||||
print(f'[INFO] warm-up done, actual latency = {t:.6f}s')
|
||||
|
||||
#self.clear_queue()
|
||||
|
||||
#####not used function#####################################
|
||||
'''
|
||||
def __init_queue(self):
|
||||
self.frames = []
|
||||
self.queue.queue.clear()
|
||||
|
@ -290,26 +246,6 @@ class ASR:
|
|||
if self.play:
|
||||
self.output_queue.queue.clear()
|
||||
|
||||
def warm_up(self):
|
||||
|
||||
#self.listen()
|
||||
|
||||
self.inwarm = True
|
||||
print(f'[INFO] warm up ASR live model, expected latency = {self.warm_up_steps / self.fps:.6f}s')
|
||||
t = time.time()
|
||||
#for _ in range(self.stride_left_size):
|
||||
# self.frames.append(np.zeros(self.chunk, dtype=np.float32))
|
||||
for _ in range(self.warm_up_steps):
|
||||
self.run_step()
|
||||
#if torch.cuda.is_available():
|
||||
# torch.cuda.synchronize()
|
||||
t = time.time() - t
|
||||
print(f'[INFO] warm-up done, actual latency = {t:.6f}s')
|
||||
self.inwarm = False
|
||||
|
||||
#self.clear_queue()
|
||||
|
||||
#####not used function#####################################
|
||||
def listen(self):
|
||||
# start
|
||||
if self.mode == 'live' and not self.listening:
|
||||
|
@ -405,3 +341,4 @@ if __name__ == '__main__':
|
|||
|
||||
with ASR(opt) as asr:
|
||||
asr.run()
|
||||
'''
|
54
lipasr.py
54
lipasr.py
|
@ -6,60 +6,16 @@ import queue
|
|||
from queue import Queue
|
||||
import multiprocessing as mp
|
||||
|
||||
from baseasr import BaseASR
|
||||
from wav2lip import audio
|
||||
|
||||
class LipASR:
|
||||
def __init__(self, opt):
|
||||
self.opt = opt
|
||||
|
||||
self.fps = opt.fps # 20 ms per frame
|
||||
self.sample_rate = 16000
|
||||
self.chunk = self.sample_rate // self.fps # 320 samples per chunk (20ms * 16000 / 1000)
|
||||
self.queue = Queue()
|
||||
# self.input_stream = BytesIO()
|
||||
self.output_queue = mp.Queue()
|
||||
|
||||
#self.audio_processor = audio_processor
|
||||
self.batch_size = opt.batch_size
|
||||
|
||||
self.frames = []
|
||||
self.stride_left_size = opt.l
|
||||
self.stride_right_size = opt.r
|
||||
#self.context_size = 10
|
||||
self.feat_queue = mp.Queue(5)
|
||||
|
||||
self.warm_up()
|
||||
|
||||
def put_audio_frame(self,audio_chunk): #16khz 20ms pcm
|
||||
self.queue.put(audio_chunk)
|
||||
|
||||
def __get_audio_frame(self):
|
||||
try:
|
||||
frame = self.queue.get(block=True,timeout=0.01)
|
||||
type = 0
|
||||
#print(f'[INFO] get frame {frame.shape}')
|
||||
except queue.Empty:
|
||||
frame = np.zeros(self.chunk, dtype=np.float32)
|
||||
type = 1
|
||||
|
||||
return frame,type
|
||||
|
||||
def get_audio_out(self): #get origin audio pcm to nerf
|
||||
return self.output_queue.get()
|
||||
|
||||
def warm_up(self):
|
||||
for _ in range(self.stride_left_size + self.stride_right_size):
|
||||
audio_frame,type=self.__get_audio_frame()
|
||||
self.frames.append(audio_frame)
|
||||
self.output_queue.put((audio_frame,type))
|
||||
for _ in range(self.stride_left_size):
|
||||
self.output_queue.get()
|
||||
class LipASR(BaseASR):
|
||||
|
||||
def run_step(self):
|
||||
############################################## extract audio feature ##############################################
|
||||
# get a frame of audio
|
||||
for _ in range(self.batch_size*2):
|
||||
frame,type = self.__get_audio_frame()
|
||||
frame,type = self.get_audio_frame()
|
||||
self.frames.append(frame)
|
||||
# put to output
|
||||
self.output_queue.put((frame,type))
|
||||
|
@ -89,7 +45,3 @@ class LipASR:
|
|||
|
||||
# discard the old part to save memory
|
||||
self.frames = self.frames[-(self.stride_left_size + self.stride_right_size):]
|
||||
|
||||
|
||||
def get_next_feat(self,block,timeout):
|
||||
return self.feat_queue.get(block,timeout)
|
12
lipreal.py
12
lipreal.py
|
@ -164,6 +164,7 @@ class LipReal:
|
|||
self.__loadavatar()
|
||||
|
||||
self.asr = LipASR(opt)
|
||||
self.asr.warm_up()
|
||||
if opt.tts == "edgetts":
|
||||
self.tts = EdgeTTS(opt,self)
|
||||
elif opt.tts == "gpt-sovits":
|
||||
|
@ -200,6 +201,10 @@ class LipReal:
|
|||
def put_audio_frame(self,audio_chunk): #16khz 20ms pcm
|
||||
self.asr.put_audio_frame(audio_chunk)
|
||||
|
||||
def pause_talk(self):
|
||||
self.tts.pause_talk()
|
||||
self.asr.pause_talk()
|
||||
|
||||
|
||||
def process_frames(self,quit_event,loop=None,audio_track=None,video_track=None):
|
||||
|
||||
|
@ -257,9 +262,12 @@ class LipReal:
|
|||
t = time.perf_counter()
|
||||
self.asr.run_step()
|
||||
|
||||
if video_track._queue.qsize()>=2*self.opt.batch_size:
|
||||
# if video_track._queue.qsize()>=2*self.opt.batch_size:
|
||||
# print('sleep qsize=',video_track._queue.qsize())
|
||||
# time.sleep(0.04*video_track._queue.qsize()*0.8)
|
||||
if video_track._queue.qsize()>=5:
|
||||
print('sleep qsize=',video_track._queue.qsize())
|
||||
time.sleep(0.04*self.opt.batch_size*1.5)
|
||||
time.sleep(0.04*video_track._queue.qsize()*0.8)
|
||||
|
||||
# delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms
|
||||
# if delay > 0:
|
||||
|
|
54
museasr.py
54
museasr.py
|
@ -1,65 +1,22 @@
|
|||
import time
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
import queue
|
||||
from queue import Queue
|
||||
import multiprocessing as mp
|
||||
|
||||
from baseasr import BaseASR
|
||||
from musetalk.whisper.audio2feature import Audio2Feature
|
||||
|
||||
class MuseASR:
|
||||
class MuseASR(BaseASR):
|
||||
def __init__(self, opt, audio_processor:Audio2Feature):
|
||||
self.opt = opt
|
||||
|
||||
self.fps = opt.fps # 20 ms per frame
|
||||
self.sample_rate = 16000
|
||||
self.chunk = self.sample_rate // self.fps # 320 samples per chunk (20ms * 16000 / 1000)
|
||||
self.queue = Queue()
|
||||
# self.input_stream = BytesIO()
|
||||
self.output_queue = mp.Queue()
|
||||
|
||||
super().__init__(opt)
|
||||
self.audio_processor = audio_processor
|
||||
self.batch_size = opt.batch_size
|
||||
|
||||
self.frames = []
|
||||
self.stride_left_size = opt.l
|
||||
self.stride_right_size = opt.r
|
||||
self.feat_queue = mp.Queue(5)
|
||||
|
||||
self.warm_up()
|
||||
|
||||
def put_audio_frame(self,audio_chunk): #16khz 20ms pcm
|
||||
self.queue.put(audio_chunk)
|
||||
|
||||
def __get_audio_frame(self):
|
||||
try:
|
||||
frame = self.queue.get(block=True,timeout=0.01)
|
||||
type = 0
|
||||
#print(f'[INFO] get frame {frame.shape}')
|
||||
except queue.Empty:
|
||||
frame = np.zeros(self.chunk, dtype=np.float32)
|
||||
type = 1
|
||||
|
||||
return frame,type
|
||||
|
||||
def get_audio_out(self): #get origin audio pcm to nerf
|
||||
return self.output_queue.get()
|
||||
|
||||
def warm_up(self):
|
||||
for _ in range(self.stride_left_size + self.stride_right_size):
|
||||
audio_frame,type=self.__get_audio_frame()
|
||||
self.frames.append(audio_frame)
|
||||
self.output_queue.put((audio_frame,type))
|
||||
|
||||
for _ in range(self.stride_left_size):
|
||||
self.output_queue.get()
|
||||
|
||||
def run_step(self):
|
||||
############################################## extract audio feature ##############################################
|
||||
start_time = time.time()
|
||||
for _ in range(self.batch_size*2):
|
||||
audio_frame,type=self.__get_audio_frame()
|
||||
audio_frame,type=self.get_audio_frame()
|
||||
self.frames.append(audio_frame)
|
||||
self.output_queue.put((audio_frame,type))
|
||||
|
||||
|
@ -77,6 +34,3 @@ class MuseASR:
|
|||
self.feat_queue.put(whisper_chunks)
|
||||
# discard the old part to save memory
|
||||
self.frames = self.frames[-(self.stride_left_size + self.stride_right_size):]
|
||||
|
||||
def get_next_feat(self,block,timeout):
|
||||
return self.feat_queue.get(block,timeout)
|
25
musereal.py
25
musereal.py
|
@ -29,8 +29,6 @@ import asyncio
|
|||
from av import AudioFrame, VideoFrame
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def read_imgs(img_list):
|
||||
frames = []
|
||||
print('reading images...')
|
||||
|
@ -39,7 +37,6 @@ def read_imgs(img_list):
|
|||
frames.append(frame)
|
||||
return frames
|
||||
|
||||
|
||||
def __mirror_index(size, index):
|
||||
#size = len(self.coord_list_cycle)
|
||||
turn = index // size
|
||||
|
@ -49,7 +46,6 @@ def __mirror_index(size, index):
|
|||
else:
|
||||
return size - res - 1
|
||||
|
||||
|
||||
def inference(render_event,batch_size,latents_out_path,audio_feat_queue,audio_out_queue,res_frame_queue,
|
||||
): #vae, unet, pe,timesteps
|
||||
|
||||
|
@ -128,7 +124,6 @@ def inference(render_event, batch_size, latents_out_path, audio_feat_queue, audi
|
|||
time.sleep(1)
|
||||
print('musereal inference processor stop')
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
class MuseReal:
|
||||
def __init__(self, opt):
|
||||
|
@ -140,7 +135,6 @@ class MuseReal:
|
|||
|
||||
#### musetalk
|
||||
self.avatar_id = opt.avatar_id
|
||||
self.static_img = opt.static_img
|
||||
self.video_path = '' #video_path
|
||||
self.bbox_shift = opt.bbox_shift
|
||||
self.avatar_path = f"./data/avatars/{self.avatar_id}"
|
||||
|
@ -163,6 +157,7 @@ class MuseReal:
|
|||
self.__loadavatar()
|
||||
|
||||
self.asr = MuseASR(opt,self.audio_processor)
|
||||
self.asr.warm_up()
|
||||
if opt.tts == "edgetts":
|
||||
self.tts = EdgeTTS(opt,self)
|
||||
elif opt.tts == "gpt-sovits":
|
||||
|
@ -199,12 +194,18 @@ class MuseReal:
|
|||
input_mask_list = sorted(input_mask_list, key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))
|
||||
self.mask_list_cycle = read_imgs(input_mask_list)
|
||||
|
||||
|
||||
def put_msg_txt(self,msg):
|
||||
self.tts.put_msg_txt(msg)
|
||||
|
||||
def put_audio_frame(self,audio_chunk): #16khz 20ms pcm
|
||||
self.asr.put_audio_frame(audio_chunk)
|
||||
|
||||
def pause_talk(self):
|
||||
self.tts.pause_talk()
|
||||
self.asr.pause_talk()
|
||||
|
||||
|
||||
def __mirror_index(self, index):
|
||||
size = len(self.coord_list_cycle)
|
||||
turn = index // size
|
||||
|
@ -237,6 +238,7 @@ class MuseReal:
|
|||
encoder_hidden_states=audio_feature_batch).sample
|
||||
recon = self.vae.decode_latents(pred_latents)
|
||||
|
||||
|
||||
def process_frames(self,quit_event,loop=None,audio_track=None,video_track=None):
|
||||
|
||||
while not quit_event.is_set():
|
||||
|
@ -245,9 +247,6 @@ class MuseReal:
|
|||
except queue.Empty:
|
||||
continue
|
||||
if audio_frames[0][1]==1 and audio_frames[1][1]==1: #全为静音数据,只需要取fullimg
|
||||
if self.static_img:
|
||||
combine_frame = self.frame_list_cycle[0]
|
||||
else:
|
||||
combine_frame = self.frame_list_cycle[idx]
|
||||
else:
|
||||
bbox = self.coord_list_cycle[idx]
|
||||
|
@ -304,12 +303,16 @@ class MuseReal:
|
|||
# print(f"------actual avg infer fps:{count/totaltime:.4f}")
|
||||
# count=0
|
||||
# totaltime=0
|
||||
if video_track._queue.qsize() >= 2 * self.opt.batch_size:
|
||||
if video_track._queue.qsize()>=1.5*self.opt.batch_size:
|
||||
print('sleep qsize=',video_track._queue.qsize())
|
||||
time.sleep(0.04 * self.opt.batch_size * 1.5)
|
||||
time.sleep(0.04*video_track._queue.qsize()*0.8)
|
||||
# if video_track._queue.qsize()>=5:
|
||||
# print('sleep qsize=',video_track._queue.qsize())
|
||||
# time.sleep(0.04*video_track._queue.qsize()*0.8)
|
||||
|
||||
# delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms
|
||||
# if delay > 0:
|
||||
# time.sleep(delay)
|
||||
self.render_event.clear() #end infer process render
|
||||
print('musereal thread stop')
|
||||
|
|
@ -7,14 +7,15 @@ from PIL import Image
|
|||
from .model import BiSeNet
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
|
||||
class FaceParsing():
|
||||
def __init__(self,resnet_path='./models/face-parse-bisent/resnet18-5c106cde.pth',
|
||||
model_pth='./models/face-parse-bisent/79999_iter.pth'):
|
||||
self.net = self.model_init(resnet_path,model_pth)
|
||||
self.preprocess = self.image_preprocess()
|
||||
|
||||
def model_init(self,resnet_path, model_pth):
|
||||
def model_init(self,
|
||||
resnet_path,
|
||||
model_pth):
|
||||
net = BiSeNet(resnet_path)
|
||||
if torch.cuda.is_available():
|
||||
net.cuda()
|
||||
|
@ -49,8 +50,8 @@ class FaceParsing():
|
|||
parsing = Image.fromarray(parsing.astype(np.uint8))
|
||||
return parsing
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fp = FaceParsing()
|
||||
segmap = fp('154_small.png')
|
||||
segmap.save('res.png')
|
||||
|
||||
|
|
12
nerfreal.py
12
nerfreal.py
|
@ -20,9 +20,6 @@ class NeRFReal:
|
|||
self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
|
||||
self.W = opt.W
|
||||
self.H = opt.H
|
||||
self.debug = debug
|
||||
self.training = False
|
||||
self.step = 0 # training step
|
||||
|
||||
self.trainer = trainer
|
||||
self.data_loader = data_loader
|
||||
|
@ -44,7 +41,6 @@ class NeRFReal:
|
|||
#self.eye_area = None if not self.opt.exp_eye else data_loader._data.eye_area.mean().item()
|
||||
|
||||
# playing seq from dataloader, or pause.
|
||||
self.playing = True #False todo
|
||||
self.loader = iter(data_loader)
|
||||
|
||||
#self.render_buffer = np.zeros((self.W, self.H, 3), dtype=np.float32)
|
||||
|
@ -62,7 +58,6 @@ class NeRFReal:
|
|||
self.customimg_index = 0
|
||||
|
||||
# build asr
|
||||
if self.opt.asr:
|
||||
self.asr = ASR(opt)
|
||||
self.asr.warm_up()
|
||||
if opt.tts == "edgetts":
|
||||
|
@ -124,6 +119,10 @@ class NeRFReal:
|
|||
def put_audio_frame(self,audio_chunk): #16khz 20ms pcm
|
||||
self.asr.put_audio_frame(audio_chunk)
|
||||
|
||||
def pause_talk(self):
|
||||
self.tts.pause_talk()
|
||||
self.asr.pause_talk()
|
||||
|
||||
|
||||
def mirror_index(self, index):
|
||||
size = self.opt.customvideo_imgnum
|
||||
|
@ -248,7 +247,6 @@ class NeRFReal:
|
|||
# update texture every frame
|
||||
# audio stream thread...
|
||||
t = time.perf_counter()
|
||||
if self.opt.asr and self.playing:
|
||||
# run 2 ASR steps (audio is at 50FPS, video is at 25FPS)
|
||||
for _ in range(2):
|
||||
self.asr.run_step()
|
||||
|
@ -267,7 +265,7 @@ class NeRFReal:
|
|||
else:
|
||||
if video_track._queue.qsize()>=5:
|
||||
#print('sleep qsize=',video_track._queue.qsize())
|
||||
time.sleep(0.1)
|
||||
time.sleep(0.04*video_track._queue.qsize()*0.8)
|
||||
print('nerfreal thread stop')
|
||||
|
||||
|
17
ttsreal.py
17
ttsreal.py
|
@ -13,6 +13,11 @@ import queue
|
|||
from queue import Queue
|
||||
from io import BytesIO
|
||||
from threading import Thread, Event
|
||||
from enum import Enum
|
||||
|
||||
class State(Enum):
|
||||
RUNNING=0
|
||||
PAUSE=1
|
||||
|
||||
class BaseTTS:
|
||||
def __init__(self, opt, parent):
|
||||
|
@ -25,6 +30,11 @@ class BaseTTS:
|
|||
self.input_stream = BytesIO()
|
||||
|
||||
self.msgqueue = Queue()
|
||||
self.state = State.RUNNING
|
||||
|
||||
def pause_talk(self):
|
||||
self.msgqueue.queue.clear()
|
||||
self.state = State.PAUSE
|
||||
|
||||
def put_msg_txt(self,msg):
|
||||
self.msgqueue.put(msg)
|
||||
|
@ -37,6 +47,7 @@ class BaseTTS:
|
|||
while not quit_event.is_set():
|
||||
try:
|
||||
msg = self.msgqueue.get(block=True, timeout=1)
|
||||
self.state=State.RUNNING
|
||||
except queue.Empty:
|
||||
continue
|
||||
self.txt_to_audio(msg)
|
||||
|
@ -59,7 +70,7 @@ class EdgeTTS(BaseTTS):
|
|||
stream = self.__create_bytes_stream(self.input_stream)
|
||||
streamlen = stream.shape[0]
|
||||
idx=0
|
||||
while streamlen >= self.chunk:
|
||||
while streamlen >= self.chunk and self.state==State.RUNNING:
|
||||
self.parent.put_audio_frame(stream[idx:idx+self.chunk])
|
||||
streamlen -= self.chunk
|
||||
idx += self.chunk
|
||||
|
@ -92,7 +103,7 @@ class EdgeTTS(BaseTTS):
|
|||
async for chunk in communicate.stream():
|
||||
if first:
|
||||
first = False
|
||||
if chunk["type"] == "audio":
|
||||
if chunk["type"] == "audio" and self.state==State.RUNNING:
|
||||
#self.push_audio(chunk["data"])
|
||||
self.input_stream.write(chunk["data"])
|
||||
#file.write(chunk["data"])
|
||||
|
@ -147,7 +158,7 @@ class VoitsTTS(BaseTTS):
|
|||
end = time.perf_counter()
|
||||
print(f"gpt_sovits Time to first chunk: {end-start}s")
|
||||
first = False
|
||||
if chunk:
|
||||
if chunk and self.state==State.RUNNING:
|
||||
yield chunk
|
||||
|
||||
print("gpt_sovits response.elapsed:", res.elapsed)
|
||||
|
|
|
@ -29,22 +29,22 @@
|
|||
|
||||
$(document).ready(function() {
|
||||
var host = window.location.hostname
|
||||
var ws = new WebSocket("ws://"+host+":8000/humanchat");
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
ws.onopen = function() {
|
||||
console.log('Connected');
|
||||
};
|
||||
ws.onmessage = function(e) {
|
||||
console.log('Received: ' + e.data);
|
||||
data = e
|
||||
var vid = JSON.parse(data.data);
|
||||
console.log(typeof(vid),vid)
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
// ws.onopen = function() {
|
||||
// console.log('Connected');
|
||||
// };
|
||||
// ws.onmessage = function(e) {
|
||||
// console.log('Received: ' + e.data);
|
||||
// data = e
|
||||
// var vid = JSON.parse(data.data);
|
||||
// console.log(typeof(vid),vid)
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
};
|
||||
ws.onclose = function(e) {
|
||||
console.log('Closed');
|
||||
};
|
||||
// };
|
||||
// ws.onclose = function(e) {
|
||||
// console.log('Closed');
|
||||
// };
|
||||
|
||||
flvPlayer = mpegts.createPlayer({type: 'flv', url: "http://"+host+":8080/live/livestream.flv", isLive: true, enableStashBuffer: false});
|
||||
flvPlayer.attachMediaElement(document.getElementById('video_player'));
|
||||
|
@ -55,7 +55,17 @@
|
|||
e.preventDefault();
|
||||
var message = $('#message').val();
|
||||
console.log('Sending: ' + message);
|
||||
ws.send(message);
|
||||
fetch('/human', {
|
||||
body: JSON.stringify({
|
||||
text: message,
|
||||
type: 'chat',
|
||||
}),
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
method: 'POST'
|
||||
});
|
||||
//ws.send(message);
|
||||
$('#message').val('');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -51,29 +51,39 @@
|
|||
<script type="text/javascript" charset="utf-8">
|
||||
|
||||
$(document).ready(function() {
|
||||
var host = window.location.hostname
|
||||
var ws = new WebSocket("ws://"+host+":8000/humanchat");
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
ws.onopen = function() {
|
||||
console.log('Connected');
|
||||
};
|
||||
ws.onmessage = function(e) {
|
||||
console.log('Received: ' + e.data);
|
||||
data = e
|
||||
var vid = JSON.parse(data.data);
|
||||
console.log(typeof(vid),vid)
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
// var host = window.location.hostname
|
||||
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
// ws.onopen = function() {
|
||||
// console.log('Connected');
|
||||
// };
|
||||
// ws.onmessage = function(e) {
|
||||
// console.log('Received: ' + e.data);
|
||||
// data = e
|
||||
// var vid = JSON.parse(data.data);
|
||||
// console.log(typeof(vid),vid)
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
};
|
||||
ws.onclose = function(e) {
|
||||
console.log('Closed');
|
||||
};
|
||||
// };
|
||||
// ws.onclose = function(e) {
|
||||
// console.log('Closed');
|
||||
// };
|
||||
|
||||
$('#echo-form').on('submit', function(e) {
|
||||
e.preventDefault();
|
||||
var message = $('#message').val();
|
||||
console.log('Sending: ' + message);
|
||||
ws.send(message);
|
||||
fetch('/human', {
|
||||
body: JSON.stringify({
|
||||
text: message,
|
||||
type: 'chat',
|
||||
}),
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
method: 'POST'
|
||||
});
|
||||
//ws.send(message);
|
||||
$('#message').val('');
|
||||
});
|
||||
});
|
||||
|
|
|
@ -79,6 +79,7 @@
|
|||
body: JSON.stringify({
|
||||
text: message,
|
||||
type: 'echo',
|
||||
interrupt: true,
|
||||
}),
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
|
|
|
@ -53,29 +53,40 @@
|
|||
<script type="text/javascript" charset="utf-8">
|
||||
|
||||
$(document).ready(function() {
|
||||
var host = window.location.hostname
|
||||
var ws = new WebSocket("ws://"+host+":8000/humanchat");
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
ws.onopen = function() {
|
||||
console.log('Connected');
|
||||
};
|
||||
ws.onmessage = function(e) {
|
||||
console.log('Received: ' + e.data);
|
||||
data = e
|
||||
var vid = JSON.parse(data.data);
|
||||
console.log(typeof(vid),vid)
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
// var host = window.location.hostname
|
||||
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
// ws.onopen = function() {
|
||||
// console.log('Connected');
|
||||
// };
|
||||
// ws.onmessage = function(e) {
|
||||
// console.log('Received: ' + e.data);
|
||||
// data = e
|
||||
// var vid = JSON.parse(data.data);
|
||||
// console.log(typeof(vid),vid)
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
};
|
||||
ws.onclose = function(e) {
|
||||
console.log('Closed');
|
||||
};
|
||||
// };
|
||||
// ws.onclose = function(e) {
|
||||
// console.log('Closed');
|
||||
// };
|
||||
|
||||
$('#echo-form').on('submit', function(e) {
|
||||
e.preventDefault();
|
||||
var message = $('#message').val();
|
||||
console.log('Sending: ' + message);
|
||||
ws.send(message);
|
||||
fetch('/human', {
|
||||
body: JSON.stringify({
|
||||
text: message,
|
||||
type: 'chat',
|
||||
interrupt: true,
|
||||
}),
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
method: 'POST'
|
||||
});
|
||||
//ws.send(message);
|
||||
$('#message').val('');
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue