improve audio quality

This commit is contained in:
lipku 2024-05-12 10:30:47 +08:00
parent 3e702b8a12
commit 14b7772475
4 changed files with 49 additions and 12 deletions

17
app.py
View File

@ -15,6 +15,7 @@ import multiprocessing
from aiohttp import web from aiohttp import web
import aiohttp import aiohttp
import aiohttp_cors
from aiortc import RTCPeerConnection, RTCSessionDescription from aiortc import RTCPeerConnection, RTCSessionDescription
from webrtc import HumanPlayer from webrtc import HumanPlayer
@ -106,9 +107,9 @@ def gpt_sovits(text, character, language, server_url, emotion) -> Iterator[bytes
if res.status_code != 200: if res.status_code != 200:
print("Error:", res.text) print("Error:", res.text)
return return
first = True first = True
for chunk in res.iter_content(chunk_size=1280): #32K*20ms*2 for chunk in res.iter_content(chunk_size=32000): # 1280 32K*20ms*2
if first: if first:
end = time.perf_counter() end = time.perf_counter()
print(f"gpt_sovits Time to first chunk: {end-start}s") print(f"gpt_sovits Time to first chunk: {end-start}s")
@ -512,6 +513,18 @@ if __name__ == '__main__':
appasync.router.add_post("/human", human) appasync.router.add_post("/human", human)
appasync.router.add_static('/',path='web') appasync.router.add_static('/',path='web')
# Configure default CORS settings.
cors = aiohttp_cors.setup(appasync, defaults={
"*": aiohttp_cors.ResourceOptions(
allow_credentials=True,
expose_headers="*",
allow_headers="*",
)
})
# Configure CORS on all routes.
for route in list(appasync.router.routes()):
cors.add(route)
def run_server(runner): def run_server(runner):
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop) asyncio.set_event_loop(loop)

View File

@ -162,6 +162,8 @@ class NeRFReal:
new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0]) new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
new_frame.planes[0].update(frame.tobytes()) new_frame.planes[0].update(frame.tobytes())
new_frame.sample_rate=16000 new_frame.sample_rate=16000
# if audio_track._queue.qsize()>10:
# time.sleep(0.1)
asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop) asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
#t = time.time() #t = time.time()
if self.opt.customvideo and audiotype!=0: if self.opt.customvideo and audiotype!=0:
@ -215,9 +217,7 @@ class NeRFReal:
def render(self,quit_event,loop=None,audio_track=None,video_track=None): def render(self,quit_event,loop=None,audio_track=None,video_track=None):
#if self.opt.asr: #if self.opt.asr:
# self.asr.warm_up() # self.asr.warm_up()
count=0
totaltime=0
if self.opt.transport=='rtmp': if self.opt.transport=='rtmp':
from rtmp_streaming import StreamerConfig, Streamer from rtmp_streaming import StreamerConfig, Streamer
fps=25 fps=25
@ -242,6 +242,10 @@ class NeRFReal:
self.streamer.init(sc) self.streamer.init(sc)
#self.streamer.enable_av_debug_log() #self.streamer.enable_av_debug_log()
count=0
totaltime=0
_starttime=time.perf_counter()
_totalframe=0
while not quit_event.is_set(): #todo while not quit_event.is_set(): #todo
# update texture every frame # update texture every frame
# audio stream thread... # audio stream thread...
@ -253,11 +257,12 @@ class NeRFReal:
self.test_step(loop,audio_track,video_track) self.test_step(loop,audio_track,video_track)
totaltime += (time.perf_counter() - t) totaltime += (time.perf_counter() - t)
count += 1 count += 1
_totalframe += 1
if count==100: if count==100:
print(f"------actual avg infer fps:{count/totaltime:.4f}") print(f"------actual avg infer fps:{count/totaltime:.4f}")
count=0 count=0
totaltime=0 totaltime=0
delay = 0.04 - (time.perf_counter() - t) #40ms delay = _starttime+_totalframe*0.04-time.perf_counter() #40ms
if delay > 0: if delay > 0:
time.sleep(delay) time.sleep(delay)

View File

@ -32,3 +32,4 @@ flask
flask_sockets flask_sockets
opencv-python-headless opencv-python-headless
aiortc aiortc
aiohttp_cors

View File

@ -7,7 +7,9 @@ import time
from typing import Tuple, Dict, Optional, Set, Union from typing import Tuple, Dict, Optional, Set, Union
from av.frame import Frame from av.frame import Frame
from av.packet import Packet from av.packet import Packet
from av import AudioFrame
import fractions import fractions
import numpy as np
AUDIO_PTIME = 0.020 # 20ms audio packetization AUDIO_PTIME = 0.020 # 20ms audio packetization
VIDEO_CLOCK_RATE = 90000 VIDEO_CLOCK_RATE = 90000
@ -52,9 +54,9 @@ class PlayerStreamTrack(MediaStreamTrack):
if hasattr(self, "_timestamp"): if hasattr(self, "_timestamp"):
# self._timestamp = (time.time()-self._start) * VIDEO_CLOCK_RATE # self._timestamp = (time.time()-self._start) * VIDEO_CLOCK_RATE
self._timestamp += int(VIDEO_PTIME * VIDEO_CLOCK_RATE) self._timestamp += int(VIDEO_PTIME * VIDEO_CLOCK_RATE)
# wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time() wait = self._start + (self._timestamp / VIDEO_CLOCK_RATE) - time.time()
# if wait>0: if wait>0:
# await asyncio.sleep(wait) await asyncio.sleep(wait)
else: else:
self._start = time.time() self._start = time.time()
self._timestamp = 0 self._timestamp = 0
@ -63,9 +65,9 @@ class PlayerStreamTrack(MediaStreamTrack):
if hasattr(self, "_timestamp"): if hasattr(self, "_timestamp"):
# self._timestamp = (time.time()-self._start) * SAMPLE_RATE # self._timestamp = (time.time()-self._start) * SAMPLE_RATE
self._timestamp += int(AUDIO_PTIME * SAMPLE_RATE) self._timestamp += int(AUDIO_PTIME * SAMPLE_RATE)
# wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time() wait = self._start + (self._timestamp / SAMPLE_RATE) - time.time()
# if wait>0: if wait>0:
# await asyncio.sleep(wait) await asyncio.sleep(wait)
else: else:
self._start = time.time() self._start = time.time()
self._timestamp = 0 self._timestamp = 0
@ -74,6 +76,22 @@ class PlayerStreamTrack(MediaStreamTrack):
async def recv(self) -> Union[Frame, Packet]: async def recv(self) -> Union[Frame, Packet]:
# frame = self.frames[self.counter % 30] # frame = self.frames[self.counter % 30]
self._player._start(self) self._player._start(self)
# if self.kind == 'video':
# frame = await self._queue.get()
# else: #audio
# if hasattr(self, "_timestamp"):
# wait = self._start + self._timestamp / SAMPLE_RATE + AUDIO_PTIME - time.time()
# if wait>0:
# await asyncio.sleep(wait)
# if self._queue.qsize()<1:
# #frame = AudioFrame(format='s16', layout='mono', samples=320)
# audio = np.zeros((1, 320), dtype=np.int16)
# frame = AudioFrame.from_ndarray(audio, layout='mono', format='s16')
# frame.sample_rate=16000
# else:
# frame = await self._queue.get()
# else:
# frame = await self._queue.get()
frame = await self._queue.get() frame = await self._queue.get()
pts, time_base = await self.next_timestamp() pts, time_base = await self.next_timestamp()
frame.pts = pts frame.pts = pts