diff --git a/app.py b/app.py index 33172b9..03e91d1 100644 --- a/app.py +++ b/app.py @@ -15,6 +15,7 @@ import multiprocessing from aiohttp import web import aiohttp +import aiohttp_cors from aiortc import RTCPeerConnection, RTCSessionDescription from webrtc import HumanPlayer @@ -106,9 +107,9 @@ def gpt_sovits(text, character, language, server_url, emotion) -> Iterator[bytes if res.status_code != 200: print("Error:", res.text) return - + first = True - for chunk in res.iter_content(chunk_size=1280): #32K*20ms*2 + for chunk in res.iter_content(chunk_size=32000): # 1280 32K*20ms*2 if first: end = time.perf_counter() print(f"gpt_sovits Time to first chunk: {end-start}s") @@ -512,6 +513,18 @@ if __name__ == '__main__': appasync.router.add_post("/human", human) appasync.router.add_static('/',path='web') + # Configure default CORS settings. + cors = aiohttp_cors.setup(appasync, defaults={ + "*": aiohttp_cors.ResourceOptions( + allow_credentials=True, + expose_headers="*", + allow_headers="*", + ) + }) + # Configure CORS on all routes. + for route in list(appasync.router.routes()): + cors.add(route) + def run_server(runner): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) diff --git a/nerfreal.py b/nerfreal.py index 074219d..9c3e271 100644 --- a/nerfreal.py +++ b/nerfreal.py @@ -162,6 +162,8 @@ class NeRFReal: new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0]) new_frame.planes[0].update(frame.tobytes()) new_frame.sample_rate=16000 + # if audio_track._queue.qsize()>10: + # time.sleep(0.1) asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop) #t = time.time() if self.opt.customvideo and audiotype!=0: @@ -215,9 +217,7 @@ class NeRFReal: def render(self,quit_event,loop=None,audio_track=None,video_track=None): #if self.opt.asr: # self.asr.warm_up() - count=0 - totaltime=0 - + if self.opt.transport=='rtmp': from rtmp_streaming import StreamerConfig, Streamer fps=25 @@ -242,6 +242,10 @@ class NeRFReal: self.streamer.init(sc) #self.streamer.enable_av_debug_log() + count=0 + totaltime=0 + _starttime=time.perf_counter() + _totalframe=0 while not quit_event.is_set(): #todo # update texture every frame # audio stream thread... @@ -253,11 +257,12 @@ class NeRFReal: self.test_step(loop,audio_track,video_track) totaltime += (time.perf_counter() - t) count += 1 + _totalframe += 1 if count==100: print(f"------actual avg infer fps:{count/totaltime:.4f}") count=0 totaltime=0 - delay = 0.04 - (time.perf_counter() - t) #40ms + delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms if delay > 0: time.sleep(delay) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3fd1921..c3a3f0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,3 +32,4 @@ flask flask_sockets opencv-python-headless aiortc +aiohttp_cors diff --git a/webrtc.py b/webrtc.py index 4cca086..9fc94d1 100644 --- a/webrtc.py +++ b/webrtc.py @@ -7,7 +7,9 @@ import time from typing import Tuple, Dict, Optional, Set, Union from av.frame import Frame from av.packet import Packet +from av import AudioFrame import fractions +import numpy as np AUDIO_PTIME = 0.020 # 20ms audio packetization VIDEO_CLOCK_RATE = 90000 @@ -52,9 +54,9 @@ class PlayerStreamTrack(MediaStreamTrack): if hasattr(self, "_timestamp"): # self._timestamp = (time.time()-self._start) * VIDEO_CLOCK_RATE self._timestamp += int(VIDEO_PTIME * VIDEO_CLOCK_RATE) - # wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time() - # if wait>0: - # await asyncio.sleep(wait) + wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time() + if wait>0: + await asyncio.sleep(wait) else: self._start = time.time() self._timestamp = 0 @@ -63,9 +65,9 @@ class PlayerStreamTrack(MediaStreamTrack): if hasattr(self, "_timestamp"): # self._timestamp = (time.time()-self._start) * SAMPLE_RATE self._timestamp += int(AUDIO_PTIME * SAMPLE_RATE) - # wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time() - # if wait>0: - # await asyncio.sleep(wait) + wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time() + if wait>0: + await asyncio.sleep(wait) else: self._start = time.time() self._timestamp = 0 @@ -74,6 +76,22 @@ class PlayerStreamTrack(MediaStreamTrack): async def recv(self) -> Union[Frame, Packet]: # frame = self.frames[self.counter % 30] self._player._start(self) + # if self.kind == 'video': + # frame = await self._queue.get() + # else: #audio + # if hasattr(self, "_timestamp"): + # wait = self._start + self._timestamp / SAMPLE_RATE + AUDIO_PTIME - time.time() + # if wait>0: + # await asyncio.sleep(wait) + # if self._queue.qsize()<1: + # #frame = AudioFrame(format='s16', layout='mono', samples=320) + # audio = np.zeros((1, 320), dtype=np.int16) + # frame = AudioFrame.from_ndarray(audio, layout='mono', format='s16') + # frame.sample_rate=16000 + # else: + # frame = await self._queue.get() + # else: + # frame = await self._queue.get() frame = await self._queue.get() pts, time_base = await self.next_timestamp() frame.pts = pts