From 5340e77e76fecb06878fe336a8e9121888842de2 Mon Sep 17 00:00:00 2001 From: lipku Date: Sun, 8 Sep 2024 22:53:37 +0800 Subject: [PATCH] webrtc prefer h264 codec --- README.md | 15 ++------------- app.py | 9 ++++++++- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 9bdc6e7..79d22fc 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ conda create -n nerfstream python=3.10 conda activate nerfstream conda install pytorch==1.12.1 torchvision==0.13.1 cudatoolkit=11.3 -c pytorch pip install -r requirements.txt -#如果只用musetalk或者wav2lip模型,不需要安装下面的库 +#如果不训练ernerf模型,不需要安装下面的库 pip install "git+https://github.com/facebookresearch/pytorch3d.git" pip install tensorflow-gpu==2.8.0 pip install --upgrade "protobuf<=3.20.1" @@ -70,22 +70,11 @@ docker run --gpus all -it --network=host --rm registry.cn-beijing.aliyuncs.com/c 提供如下镜像 - autodl镜像: [autodl教程](autodl/README.md) -- 好易智算镜像(不需要单独部srs服务器), -在后台运行如下命令 -``` -nginx -cd ~/srs/trunk -./objs/srs -c conf/rtc.tcp.udp.conf -cd ~/metahuman-stream -python app.py -``` -浏览器打开http:/rtcpushapi.html,如http://www.haoee.com:25232/rtcpushapi.html ## 5. 性能分析 1. 帧率 -在Tesla T4显卡上测试整体fps为18左右,如果去掉音视频编码推流,帧率在20左右。用4090显卡可以达到40多帧/秒。 -优化:新开一个线程运行音视频编码推流 +在Tesla T4显卡上测试整体fps为18左右,如果去掉音视频编码推流,帧率在20左右。用4090显卡可以达到40多帧/秒。 2. 延时 整体延时3s左右 (1)tts延时1.7s左右,目前用的edgetts,需要将每句话转完后一次性输入,可以优化tts改成流式输入 diff --git a/app.py b/app.py index 6591f5c..45281bb 100644 --- a/app.py +++ b/app.py @@ -17,6 +17,7 @@ from aiohttp import web import aiohttp import aiohttp_cors from aiortc import RTCPeerConnection, RTCSessionDescription +from aiortc.rtcrtpsender import RTCRtpSender from webrtc import HumanPlayer import argparse @@ -115,6 +116,12 @@ async def offer(request): player = HumanPlayer(nerfreals[sessionid]) audio_sender = pc.addTrack(player.audio) video_sender = pc.addTrack(player.video) + capabilities = RTCRtpSender.getCapabilities("video") + preferences = list(filter(lambda x: x.name == "H264", capabilities.codecs)) + preferences += list(filter(lambda x: x.name == "VP8", capabilities.codecs)) + preferences += list(filter(lambda x: x.name == "rtx", capabilities.codecs)) + transceiver = pc.getTransceivers()[1] + transceiver.setCodecPreferences(preferences) await pc.setRemoteDescription(offer) @@ -338,7 +345,7 @@ if __name__ == '__main__': parser.add_argument('--customvideo_config', type=str, default='') - parser.add_argument('--tts', type=str, default='edgetts') #xtts gpt-sovits + parser.add_argument('--tts', type=str, default='edgetts') #xtts gpt-sovits cosyvoice parser.add_argument('--REF_FILE', type=str, default=None) parser.add_argument('--REF_TEXT', type=str, default=None) parser.add_argument('--TTS_SERVER', type=str, default='http://127.0.0.1:9880') # http://localhost:9000