default transport use rtcpush

This commit is contained in:
lipku 2024-05-02 20:32:28 +08:00
parent 4137e5bce6
commit 71009f9f28
7 changed files with 323 additions and 33 deletions

View File

@ -26,15 +26,16 @@ pip install tensorflow-gpu==2.8.0
```
linux cuda环境搭建可以参考这篇文章 https://zhuanlan.zhihu.com/p/674972886
### 1.2 安装rtmpstream库
参照 https://github.com/lipku/python_rtmpstream
## 2. Run
## 2. Quick Start
默认采用webrtc推流到srs
### 2.1 运行rtmpserver (srs)
```
docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5
export CANDIDATE='<服务器外网ip>'
docker run --rm --env CANDIDATE=$CANDIDATE \
-p 1935:1935 -p 8080:8080 -p 1985:1985 -p 8000:8000/udp \
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5 \
objs/srs -c conf/rtc.conf
```
### 2.2 启动数字人:
@ -48,16 +49,15 @@ python app.py
export HF_ENDPOINT=https://hf-mirror.com
```
运行成功后用vlc访问rtmp://serverip/live/livestream
用浏览器打开http://serverip:8010/echo.html, 在文本框输入任意文字,提交。数字人播报该段文字
用浏览器打开http://serverip:8010/rtcpush.html, 在文本框输入任意文字,提交。数字人播报该段文字
备注:服务端需要开放端口 tcp:8000,8010,1985; udp:8000
## 3. More Usage
### 3.1 使用LLM模型进行数字人对话
目前借鉴数字人对话系统[LinlyTalker](https://github.com/Kedreamix/Linly-Talker)的方式LLM模型支持Chatgpt,Qwen和GeminiPro。需要在app.py中填入自己的api_key。
用浏览器打开http://serverip:8010/chat.html
用浏览器打开http://serverip:8010/rtcpushchat.html
### 3.2 声音克隆
可以任意选用下面两种服务推荐用gpt-sovits
@ -106,28 +106,26 @@ python app.py --fullbody --fullbody_img data/fullbody/img --fullbody_offset_x 10
- --W、--H 训练视频的宽、高
- ernerf训练第三步torso如果训练的不好在拼接处会有接缝。可以在上面的命令加上--torso_imgs data/xxx/torso_imgstorso不用模型推理直接用训练数据集里的torso图片。这种方式可能头颈处会有些人工痕迹。
### 3.6 webrtc
#### 3.6.1 p2p模式
### 3.6 webrtc p2p
此种模式不需要srs
```
python app.py --transport webrtc
```
用浏览器打开http://serverip:8010/webrtc.html
#### 3.6.2 通过srs一对多
启动srs
### 3.7 rtmp推送到srs
- 安装rtmpstream库
参照 https://github.com/lipku/python_rtmpstream
- 启动srs
```
export CANDIDATE='<服务器外网ip>'
docker run --rm --env CANDIDATE=$CANDIDATE \
-p 1935:1935 -p 8080:8080 -p 1985:1985 -p 8000:8000/udp \
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5 \
objs/srs -c conf/rtc.conf
docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5
```
然后运行
- 然后运行
```python
python app.py --transport rtmp --push_url 'rtmp://localhost/live/livestream'
```
python app.py --transport rtcpush --push_url 'http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream'
```
用浏览器打开http://serverip:8010/rtcpush.html
用浏览器打开http://serverip:8010/echo.html
## 4. Docker Run
不需要第1步的安装直接运行。
@ -159,10 +157,7 @@ docker版本已经不是最新代码可以作为一个空环境把最新
整体延时3s左右
1tts延时1.7s左右目前用的edgetts需要将每句话转完后一次性输入可以优化tts改成流式输入
2wav2vec延时0.4s需要缓存18帧音频做计算
3srs转发延时设置srs服务器减少缓冲延时。具体配置可看 https://ossrs.net/lts/zh-cn/docs/v5/doc/low-latency, 配置了一个低延时版本
```python
docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.aliyuncs.com/lipku/srs:v1.1
```
3srs转发延时设置srs服务器减少缓冲延时。具体配置可看 https://ossrs.net/lts/zh-cn/docs/v5/doc/low-latency
## 8. TODO
- [x] 添加chatgpt实现数字人对话

6
app.py
View File

@ -372,8 +372,8 @@ if __name__ == '__main__':
# parser.add_argument('--asr_model', type=str, default='facebook/wav2vec2-large-960h-lv60-self')
# parser.add_argument('--asr_model', type=str, default='facebook/hubert-large-ls960-ft')
parser.add_argument('--transport', type=str, default='rtmp') #rtmp webrtc rtcpush
parser.add_argument('--push_url', type=str, default='rtmp://localhost/live/livestream') #http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream
parser.add_argument('--transport', type=str, default='rtcpush') #rtmp webrtc rtcpush
parser.add_argument('--push_url', type=str, default='http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream') #rtmp://localhost/live/livestream
parser.add_argument('--asr_save_feats', action='store_true')
# audio FPS
@ -403,7 +403,7 @@ if __name__ == '__main__':
tts_type = opt.tts
if tts_type == "xtts":
print("Computing the latents for a new reference...")
gspeaker = get_speaker(opt.REF_FILE, opt.tts_server)
gspeaker = get_speaker(opt.REF_FILE, opt.TTS_SERVER)
# assert test mode
opt.test = True

View File

@ -11,7 +11,6 @@ import cv2
from asrreal import ASR
import asyncio
from rtmp_streaming import StreamerConfig, Streamer
from av import AudioFrame, VideoFrame
class NeRFReal:
@ -202,6 +201,7 @@ class NeRFReal:
totaltime=0
if self.opt.transport=='rtmp':
from rtmp_streaming import StreamerConfig, Streamer
fps=25
#push_url='rtmp://localhost/live/livestream' #'data/video/output_0.mp4'
sc = StreamerConfig()
@ -236,7 +236,7 @@ class NeRFReal:
totaltime += (time.perf_counter() - t)
count += 1
if count==100:
print(f"------actual avg fps:{count/totaltime:.4f}")
print(f"------actual avg infer fps:{count/totaltime:.4f}")
count=0
totaltime=0
delay = 0.04 - (time.perf_counter() - t) #40ms

125
web/rtcpushchat.html Normal file
View File

@ -0,0 +1,125 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>WebRTC webcam</title>
<style>
button {
padding: 8px 16px;
}
video {
width: 100%;
}
.option {
margin-bottom: 8px;
}
#media {
max-width: 1280px;
}
</style>
</head>
<body>
<div class="option">
<input id="use-stun" type="checkbox"/>
<label for="use-stun">Use STUN server</label>
</div>
<button class="btn btn-primary" id="btn_play">Start</button>
<form class="form-inline" id="echo-form">
<div class="form-group">
<p>input text</p>
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
</div>
<button type="submit" class="btn btn-default">Send</button>
</form>
<div id="media">
<h2>Media</h2>
<video id="rtc_media_player" style="width:600px;" controls autoplay></video>
</div>
<script src="srs.sdk.js"></script>
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
<script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
</body>
<script type="text/javascript" charset="utf-8">
$(document).ready(function() {
var host = window.location.hostname
var ws = new WebSocket("ws://"+host+":8000/humanchat");
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
ws.onopen = function() {
console.log('Connected');
};
ws.onmessage = function(e) {
console.log('Received: ' + e.data);
data = e
var vid = JSON.parse(data.data);
console.log(typeof(vid),vid)
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
};
ws.onclose = function(e) {
console.log('Closed');
};
$('#echo-form').on('submit', function(e) {
e.preventDefault();
var message = $('#message').val();
console.log('Sending: ' + message);
ws.send(message);
$('#message').val('');
});
});
$(function(){
var sdk = null; // Global handler to do cleanup when republishing.
var startPlay = function() {
$('#rtc_media_player').show();
// Close PC when user replay.
if (sdk) {
sdk.close();
}
sdk = new SrsRtcWhipWhepAsync();
// User should set the stream when publish is done, @see https://webrtc.org/getting-started/media-devices
// However SRS SDK provides a consist API like https://webrtc.org/getting-started/remote-streams
$('#rtc_media_player').prop('srcObject', sdk.stream);
// Optional callback, SDK will add track to stream.
// sdk.ontrack = function (event) { console.log('Got track', event); sdk.stream.addTrack(event.track); };
var host = window.location.hostname
// For example: webrtc://r.ossrs.net/live/livestream
var url = "http://"+host+":1985/rtc/v1/whep/?app=live&stream=livestream"
sdk.play(url).then(function(session){
//$('#sessionid').html(session.sessionid);
//$('#simulator-drop').attr('href', session.simulator + '?drop=1&username=' + session.sessionid);
}).catch(function (reason) {
sdk.close();
$('#rtc_media_player').hide();
console.error(reason);
});
};
$('#rtc_media_player').hide();
// var query = parse_query_string();
// srs_init_whep("#txt_url", query);
$("#btn_play").click(startPlay);
// Never play util windows loaded @see https://github.com/ossrs/srs/issues/2732
// if (query.autostart === 'true') {
// $('#rtc_media_player').prop('muted', true);
// console.warn('For autostart, we should mute it, see https://www.jianshu.com/p/c3c6944eed5a ' +
// 'or https://developers.google.com/web/updates/2017/09/autoplay-policy-changes#audiovideo_elements');
// window.addEventListener("load", function(){ startPlay(); });
// }
});
</script>
</html>

83
web/webrtcchat.html Normal file
View File

@ -0,0 +1,83 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>WebRTC webcam</title>
<style>
button {
padding: 8px 16px;
}
video {
width: 100%;
}
.option {
margin-bottom: 8px;
}
#media {
max-width: 1280px;
}
</style>
</head>
<body>
<div class="option">
<input id="use-stun" type="checkbox"/>
<label for="use-stun">Use STUN server</label>
</div>
<button id="start" onclick="start()">Start</button>
<button id="stop" style="display: none" onclick="stop()">Stop</button>
<form class="form-inline" id="echo-form">
<div class="form-group">
<p>input text</p>
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
</div>
<button type="submit" class="btn btn-default">Send</button>
</form>
<div id="media">
<h2>Media</h2>
<audio id="audio" autoplay="true"></audio>
<video id="video" style="width:600px;" autoplay="true" playsinline="true"></video>
</div>
<script src="client.js"></script>
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
<script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
</body>
<script type="text/javascript" charset="utf-8">
$(document).ready(function() {
var host = window.location.hostname
var ws = new WebSocket("ws://"+host+":8000/humanchat");
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
ws.onopen = function() {
console.log('Connected');
};
ws.onmessage = function(e) {
console.log('Received: ' + e.data);
data = e
var vid = JSON.parse(data.data);
console.log(typeof(vid),vid)
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
};
ws.onclose = function(e) {
console.log('Closed');
};
$('#echo-form').on('submit', function(e) {
e.preventDefault();
var message = $('#message').val();
console.log('Sending: ' + message);
ws.send(message);
$('#message').val('');
});
});
</script>
</html>

75
web/whep.js Normal file
View File

@ -0,0 +1,75 @@
var pc = null;
function negotiate() {
var host = window.location.hostname
pc.addTransceiver('video', { direction: 'recvonly' });
pc.addTransceiver('audio', { direction: 'recvonly' });
return pc.createOffer().then((offer) => {
return pc.setLocalDescription(offer);
}).then(() => {
// wait for ICE gathering to complete
return new Promise((resolve) => {
if (pc.iceGatheringState === 'complete') {
resolve();
} else {
const checkState = () => {
if (pc.iceGatheringState === 'complete') {
pc.removeEventListener('icegatheringstatechange', checkState);
resolve();
}
};
pc.addEventListener('icegatheringstatechange', checkState);
}
});
}).then(() => {
var offer = pc.localDescription;
return fetch("http://"+host+":1985/rtc/v1/whep/?app=live&stream=livestream", {
body: offer.sdp,
headers: {
'Content-Type': 'application/sdp'
},
method: 'POST'
});
}).then((response) => {
console.log(response)
return response.data;
}).then((answer) => {
return pc.setRemoteDescription({sdp:answer,type:'answer'});
}).catch((e) => {
alert(e);
});
}
function start() {
var config = {
sdpSemantics: 'unified-plan'
};
if (document.getElementById('use-stun').checked) {
config.iceServers = [{ urls: ['stun:stun.l.google.com:19302'] }];
}
pc = new RTCPeerConnection(config);
// connect audio / video
pc.addEventListener('track', (evt) => {
if (evt.track.kind == 'video') {
document.getElementById('video').srcObject = evt.streams[0];
} else {
document.getElementById('audio').srcObject = evt.streams[0];
}
});
document.getElementById('start').style.display = 'none';
negotiate();
document.getElementById('stop').style.display = 'inline-block';
}
function stop() {
document.getElementById('stop').style.display = 'none';
// close peer connection
setTimeout(() => {
pc.close();
}, 500);
}

View File

@ -36,6 +36,10 @@ class PlayerStreamTrack(MediaStreamTrack):
self.kind = kind
self._player = player
self._queue = asyncio.Queue()
if self.kind == 'video':
self.framecount = 0
self.lasttime = time.perf_counter()
self.totaltime = 0
_start: float
_timestamp: int
@ -68,7 +72,7 @@ class PlayerStreamTrack(MediaStreamTrack):
return self._timestamp, AUDIO_TIME_BASE
async def recv(self) -> Union[Frame, Packet]:
# frame = self.frames[self.counter % 30]
# frame = self.frames[self.counter % 30]
self._player._start(self)
frame = await self._queue.get()
pts, time_base = await self.next_timestamp()
@ -77,6 +81,14 @@ class PlayerStreamTrack(MediaStreamTrack):
if frame is None:
self.stop()
raise Exception
if self.kind == 'video':
self.totaltime += (time.perf_counter() - self.lasttime)
self.framecount += 1
self.lasttime = time.perf_counter()
if self.framecount==100:
print(f"------actual avg final fps:{self.framecount/self.totaltime:.4f}")
self.framecount = 0
self.totaltime=0
return frame
def stop(self):