improve audio quality
This commit is contained in:
parent
3e702b8a12
commit
14b7772475
15
app.py
15
app.py
|
@ -15,6 +15,7 @@ import multiprocessing
|
||||||
|
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
import aiohttp_cors
|
||||||
from aiortc import RTCPeerConnection, RTCSessionDescription
|
from aiortc import RTCPeerConnection, RTCSessionDescription
|
||||||
from webrtc import HumanPlayer
|
from webrtc import HumanPlayer
|
||||||
|
|
||||||
|
@ -108,7 +109,7 @@ def gpt_sovits(text, character, language, server_url, emotion) -> Iterator[bytes
|
||||||
return
|
return
|
||||||
|
|
||||||
first = True
|
first = True
|
||||||
for chunk in res.iter_content(chunk_size=1280): #32K*20ms*2
|
for chunk in res.iter_content(chunk_size=32000): # 1280 32K*20ms*2
|
||||||
if first:
|
if first:
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
print(f"gpt_sovits Time to first chunk: {end-start}s")
|
print(f"gpt_sovits Time to first chunk: {end-start}s")
|
||||||
|
@ -512,6 +513,18 @@ if __name__ == '__main__':
|
||||||
appasync.router.add_post("/human", human)
|
appasync.router.add_post("/human", human)
|
||||||
appasync.router.add_static('/',path='web')
|
appasync.router.add_static('/',path='web')
|
||||||
|
|
||||||
|
# Configure default CORS settings.
|
||||||
|
cors = aiohttp_cors.setup(appasync, defaults={
|
||||||
|
"*": aiohttp_cors.ResourceOptions(
|
||||||
|
allow_credentials=True,
|
||||||
|
expose_headers="*",
|
||||||
|
allow_headers="*",
|
||||||
|
)
|
||||||
|
})
|
||||||
|
# Configure CORS on all routes.
|
||||||
|
for route in list(appasync.router.routes()):
|
||||||
|
cors.add(route)
|
||||||
|
|
||||||
def run_server(runner):
|
def run_server(runner):
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
|
|
11
nerfreal.py
11
nerfreal.py
|
@ -162,6 +162,8 @@ class NeRFReal:
|
||||||
new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
|
new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
|
||||||
new_frame.planes[0].update(frame.tobytes())
|
new_frame.planes[0].update(frame.tobytes())
|
||||||
new_frame.sample_rate=16000
|
new_frame.sample_rate=16000
|
||||||
|
# if audio_track._queue.qsize()>10:
|
||||||
|
# time.sleep(0.1)
|
||||||
asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
|
asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
|
||||||
#t = time.time()
|
#t = time.time()
|
||||||
if self.opt.customvideo and audiotype!=0:
|
if self.opt.customvideo and audiotype!=0:
|
||||||
|
@ -215,8 +217,6 @@ class NeRFReal:
|
||||||
def render(self,quit_event,loop=None,audio_track=None,video_track=None):
|
def render(self,quit_event,loop=None,audio_track=None,video_track=None):
|
||||||
#if self.opt.asr:
|
#if self.opt.asr:
|
||||||
# self.asr.warm_up()
|
# self.asr.warm_up()
|
||||||
count=0
|
|
||||||
totaltime=0
|
|
||||||
|
|
||||||
if self.opt.transport=='rtmp':
|
if self.opt.transport=='rtmp':
|
||||||
from rtmp_streaming import StreamerConfig, Streamer
|
from rtmp_streaming import StreamerConfig, Streamer
|
||||||
|
@ -242,6 +242,10 @@ class NeRFReal:
|
||||||
self.streamer.init(sc)
|
self.streamer.init(sc)
|
||||||
#self.streamer.enable_av_debug_log()
|
#self.streamer.enable_av_debug_log()
|
||||||
|
|
||||||
|
count=0
|
||||||
|
totaltime=0
|
||||||
|
_starttime=time.perf_counter()
|
||||||
|
_totalframe=0
|
||||||
while not quit_event.is_set(): #todo
|
while not quit_event.is_set(): #todo
|
||||||
# update texture every frame
|
# update texture every frame
|
||||||
# audio stream thread...
|
# audio stream thread...
|
||||||
|
@ -253,11 +257,12 @@ class NeRFReal:
|
||||||
self.test_step(loop,audio_track,video_track)
|
self.test_step(loop,audio_track,video_track)
|
||||||
totaltime += (time.perf_counter() - t)
|
totaltime += (time.perf_counter() - t)
|
||||||
count += 1
|
count += 1
|
||||||
|
_totalframe += 1
|
||||||
if count==100:
|
if count==100:
|
||||||
print(f"------actual avg infer fps:{count/totaltime:.4f}")
|
print(f"------actual avg infer fps:{count/totaltime:.4f}")
|
||||||
count=0
|
count=0
|
||||||
totaltime=0
|
totaltime=0
|
||||||
delay = 0.04 - (time.perf_counter() - t) #40ms
|
delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms
|
||||||
if delay > 0:
|
if delay > 0:
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
|
|
@ -32,3 +32,4 @@ flask
|
||||||
flask_sockets
|
flask_sockets
|
||||||
opencv-python-headless
|
opencv-python-headless
|
||||||
aiortc
|
aiortc
|
||||||
|
aiohttp_cors
|
||||||
|
|
30
webrtc.py
30
webrtc.py
|
@ -7,7 +7,9 @@ import time
|
||||||
from typing import Tuple, Dict, Optional, Set, Union
|
from typing import Tuple, Dict, Optional, Set, Union
|
||||||
from av.frame import Frame
|
from av.frame import Frame
|
||||||
from av.packet import Packet
|
from av.packet import Packet
|
||||||
|
from av import AudioFrame
|
||||||
import fractions
|
import fractions
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
AUDIO_PTIME = 0.020 # 20ms audio packetization
|
AUDIO_PTIME = 0.020 # 20ms audio packetization
|
||||||
VIDEO_CLOCK_RATE = 90000
|
VIDEO_CLOCK_RATE = 90000
|
||||||
|
@ -52,9 +54,9 @@ class PlayerStreamTrack(MediaStreamTrack):
|
||||||
if hasattr(self, "_timestamp"):
|
if hasattr(self, "_timestamp"):
|
||||||
# self._timestamp = (time.time()-self._start) * VIDEO_CLOCK_RATE
|
# self._timestamp = (time.time()-self._start) * VIDEO_CLOCK_RATE
|
||||||
self._timestamp += int(VIDEO_PTIME * VIDEO_CLOCK_RATE)
|
self._timestamp += int(VIDEO_PTIME * VIDEO_CLOCK_RATE)
|
||||||
# wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time()
|
wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time()
|
||||||
# if wait>0:
|
if wait>0:
|
||||||
# await asyncio.sleep(wait)
|
await asyncio.sleep(wait)
|
||||||
else:
|
else:
|
||||||
self._start = time.time()
|
self._start = time.time()
|
||||||
self._timestamp = 0
|
self._timestamp = 0
|
||||||
|
@ -63,9 +65,9 @@ class PlayerStreamTrack(MediaStreamTrack):
|
||||||
if hasattr(self, "_timestamp"):
|
if hasattr(self, "_timestamp"):
|
||||||
# self._timestamp = (time.time()-self._start) * SAMPLE_RATE
|
# self._timestamp = (time.time()-self._start) * SAMPLE_RATE
|
||||||
self._timestamp += int(AUDIO_PTIME * SAMPLE_RATE)
|
self._timestamp += int(AUDIO_PTIME * SAMPLE_RATE)
|
||||||
# wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time()
|
wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time()
|
||||||
# if wait>0:
|
if wait>0:
|
||||||
# await asyncio.sleep(wait)
|
await asyncio.sleep(wait)
|
||||||
else:
|
else:
|
||||||
self._start = time.time()
|
self._start = time.time()
|
||||||
self._timestamp = 0
|
self._timestamp = 0
|
||||||
|
@ -74,6 +76,22 @@ class PlayerStreamTrack(MediaStreamTrack):
|
||||||
async def recv(self) -> Union[Frame, Packet]:
|
async def recv(self) -> Union[Frame, Packet]:
|
||||||
# frame = self.frames[self.counter % 30]
|
# frame = self.frames[self.counter % 30]
|
||||||
self._player._start(self)
|
self._player._start(self)
|
||||||
|
# if self.kind == 'video':
|
||||||
|
# frame = await self._queue.get()
|
||||||
|
# else: #audio
|
||||||
|
# if hasattr(self, "_timestamp"):
|
||||||
|
# wait = self._start + self._timestamp / SAMPLE_RATE + AUDIO_PTIME - time.time()
|
||||||
|
# if wait>0:
|
||||||
|
# await asyncio.sleep(wait)
|
||||||
|
# if self._queue.qsize()<1:
|
||||||
|
# #frame = AudioFrame(format='s16', layout='mono', samples=320)
|
||||||
|
# audio = np.zeros((1, 320), dtype=np.int16)
|
||||||
|
# frame = AudioFrame.from_ndarray(audio, layout='mono', format='s16')
|
||||||
|
# frame.sample_rate=16000
|
||||||
|
# else:
|
||||||
|
# frame = await self._queue.get()
|
||||||
|
# else:
|
||||||
|
# frame = await self._queue.get()
|
||||||
frame = await self._queue.get()
|
frame = await self._queue.get()
|
||||||
pts, time_base = await self.next_timestamp()
|
pts, time_base = await self.next_timestamp()
|
||||||
frame.pts = pts
|
frame.pts = pts
|
||||||
|
|
Loading…
Reference in New Issue