improve audio quality

This commit is contained in:
lipku 2024-05-12 10:30:47 +08:00
parent 3e702b8a12
commit 14b7772475
4 changed files with 49 additions and 12 deletions

17
app.py
View File

@ -15,6 +15,7 @@ import multiprocessing
from aiohttp import web
import aiohttp
import aiohttp_cors
from aiortc import RTCPeerConnection, RTCSessionDescription
from webrtc import HumanPlayer
@ -106,9 +107,9 @@ def gpt_sovits(text, character, language, server_url, emotion) -> Iterator[bytes
if res.status_code != 200:
print("Error:", res.text)
return
first = True
for chunk in res.iter_content(chunk_size=1280): #32K*20ms*2
for chunk in res.iter_content(chunk_size=32000): # 1280 32K*20ms*2
if first:
end = time.perf_counter()
print(f"gpt_sovits Time to first chunk: {end-start}s")
@ -512,6 +513,18 @@ if __name__ == '__main__':
appasync.router.add_post("/human", human)
appasync.router.add_static('/',path='web')
# Configure default CORS settings.
cors = aiohttp_cors.setup(appasync, defaults={
"*": aiohttp_cors.ResourceOptions(
allow_credentials=True,
expose_headers="*",
allow_headers="*",
)
})
# Configure CORS on all routes.
for route in list(appasync.router.routes()):
cors.add(route)
def run_server(runner):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

View File

@ -162,6 +162,8 @@ class NeRFReal:
new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
new_frame.planes[0].update(frame.tobytes())
new_frame.sample_rate=16000
# if audio_track._queue.qsize()>10:
# time.sleep(0.1)
asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
#t = time.time()
if self.opt.customvideo and audiotype!=0:
@ -215,9 +217,7 @@ class NeRFReal:
def render(self,quit_event,loop=None,audio_track=None,video_track=None):
#if self.opt.asr:
# self.asr.warm_up()
count=0
totaltime=0
if self.opt.transport=='rtmp':
from rtmp_streaming import StreamerConfig, Streamer
fps=25
@ -242,6 +242,10 @@ class NeRFReal:
self.streamer.init(sc)
#self.streamer.enable_av_debug_log()
count=0
totaltime=0
_starttime=time.perf_counter()
_totalframe=0
while not quit_event.is_set(): #todo
# update texture every frame
# audio stream thread...
@ -253,11 +257,12 @@ class NeRFReal:
self.test_step(loop,audio_track,video_track)
totaltime += (time.perf_counter() - t)
count += 1
_totalframe += 1
if count==100:
print(f"------actual avg infer fps:{count/totaltime:.4f}")
count=0
totaltime=0
delay = 0.04 - (time.perf_counter() - t) #40ms
delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms
if delay > 0:
time.sleep(delay)

View File

@ -32,3 +32,4 @@ flask
flask_sockets
opencv-python-headless
aiortc
aiohttp_cors

View File

@ -7,7 +7,9 @@ import time
from typing import Tuple, Dict, Optional, Set, Union
from av.frame import Frame
from av.packet import Packet
from av import AudioFrame
import fractions
import numpy as np
AUDIO_PTIME = 0.020 # 20ms audio packetization
VIDEO_CLOCK_RATE = 90000
@ -52,9 +54,9 @@ class PlayerStreamTrack(MediaStreamTrack):
if hasattr(self, "_timestamp"):
# self._timestamp = (time.time()-self._start) * VIDEO_CLOCK_RATE
self._timestamp += int(VIDEO_PTIME * VIDEO_CLOCK_RATE)
# wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time()
# if wait>0:
# await asyncio.sleep(wait)
wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time()
if wait>0:
await asyncio.sleep(wait)
else:
self._start = time.time()
self._timestamp = 0
@ -63,9 +65,9 @@ class PlayerStreamTrack(MediaStreamTrack):
if hasattr(self, "_timestamp"):
# self._timestamp = (time.time()-self._start) * SAMPLE_RATE
self._timestamp += int(AUDIO_PTIME * SAMPLE_RATE)
# wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time()
# if wait>0:
# await asyncio.sleep(wait)
wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time()
if wait>0:
await asyncio.sleep(wait)
else:
self._start = time.time()
self._timestamp = 0
@ -74,6 +76,22 @@ class PlayerStreamTrack(MediaStreamTrack):
async def recv(self) -> Union[Frame, Packet]:
# frame = self.frames[self.counter % 30]
self._player._start(self)
# if self.kind == 'video':
# frame = await self._queue.get()
# else: #audio
# if hasattr(self, "_timestamp"):
# wait = self._start + self._timestamp / SAMPLE_RATE + AUDIO_PTIME - time.time()
# if wait>0:
# await asyncio.sleep(wait)
# if self._queue.qsize()<1:
# #frame = AudioFrame(format='s16', layout='mono', samples=320)
# audio = np.zeros((1, 320), dtype=np.int16)
# frame = AudioFrame.from_ndarray(audio, layout='mono', format='s16')
# frame.sample_rate=16000
# else:
# frame = await self._queue.get()
# else:
# frame = await self._queue.get()
frame = await self._queue.get()
pts, time_base = await self.next_timestamp()
frame.pts = pts