From 677227145e4aaa4ea522c000380950f4003a21ac Mon Sep 17 00:00:00 2001 From: lipku Date: Fri, 31 May 2024 22:39:03 +0800 Subject: [PATCH] improve nerf audio video sync --- app.py | 10 +++++----- musereal.py | 1 + nerfreal.py | 9 ++++++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 9a7c80c..c1a099b 100644 --- a/app.py +++ b/app.py @@ -20,10 +20,6 @@ from aiortc import RTCPeerConnection, RTCSessionDescription from webrtc import HumanPlayer import argparse -from ernerf.nerf_triplane.provider import NeRFDataset_Test -from ernerf.nerf_triplane.utils import * -from ernerf.nerf_triplane.network import NeRFNetwork -from nerfreal import NeRFReal import shutil import asyncio @@ -437,7 +433,7 @@ if __name__ == '__main__': #musetalk opt parser.add_argument('--avatar_id', type=str, default='avator_1') parser.add_argument('--bbox_shift', type=int, default=5) - parser.add_argument('--batch_size', type=int, default=4) + parser.add_argument('--batch_size', type=int, default=16) parser.add_argument('--customvideo', action='store_true', help="custom video") parser.add_argument('--customvideo_img', type=str, default='data/customvideo/img') @@ -466,6 +462,10 @@ if __name__ == '__main__': gspeaker = get_speaker(opt.REF_FILE, opt.TTS_SERVER) if opt.model == 'ernerf': + from ernerf.nerf_triplane.provider import NeRFDataset_Test + from ernerf.nerf_triplane.utils import * + from ernerf.nerf_triplane.network import NeRFNetwork + from nerfreal import NeRFReal # assert test mode opt.test = True opt.test_train = False diff --git a/musereal.py b/musereal.py index 6e6dc09..385b47d 100644 --- a/musereal.py +++ b/musereal.py @@ -26,6 +26,7 @@ from museasr import MuseASR import asyncio from av import AudioFrame, VideoFrame +@torch.no_grad() class MuseReal: def __init__(self, opt): self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters. diff --git a/nerfreal.py b/nerfreal.py index 9c3e271..d138480 100644 --- a/nerfreal.py +++ b/nerfreal.py @@ -262,7 +262,10 @@ class NeRFReal: print(f"------actual avg infer fps:{count/totaltime:.4f}") count=0 totaltime=0 - delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms - if delay > 0: - time.sleep(delay) + if video_track._queue.qsize()>=5: + #print('sleep qsize=',video_track._queue.qsize()) + time.sleep(0.1) + # delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms + # if delay > 0: + # time.sleep(delay) \ No newline at end of file