default transport use rtcpush
This commit is contained in:
parent
4137e5bce6
commit
71009f9f28
49
README.md
49
README.md
|
@ -26,15 +26,16 @@ pip install tensorflow-gpu==2.8.0
|
|||
```
|
||||
linux cuda环境搭建可以参考这篇文章 https://zhuanlan.zhihu.com/p/674972886
|
||||
|
||||
### 1.2 安装rtmpstream库
|
||||
参照 https://github.com/lipku/python_rtmpstream
|
||||
|
||||
|
||||
## 2. Run
|
||||
|
||||
## 2. Quick Start
|
||||
默认采用webrtc推流到srs
|
||||
### 2.1 运行rtmpserver (srs)
|
||||
```
|
||||
docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5
|
||||
export CANDIDATE='<服务器外网ip>'
|
||||
docker run --rm --env CANDIDATE=$CANDIDATE \
|
||||
-p 1935:1935 -p 8080:8080 -p 1985:1985 -p 8000:8000/udp \
|
||||
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5 \
|
||||
objs/srs -c conf/rtc.conf
|
||||
```
|
||||
|
||||
### 2.2 启动数字人:
|
||||
|
@ -48,16 +49,15 @@ python app.py
|
|||
export HF_ENDPOINT=https://hf-mirror.com
|
||||
```
|
||||
|
||||
运行成功后,用vlc访问rtmp://serverip/live/livestream
|
||||
|
||||
用浏览器打开http://serverip:8010/echo.html, 在文本框输入任意文字,提交。数字人播报该段文字
|
||||
用浏览器打开http://serverip:8010/rtcpush.html, 在文本框输入任意文字,提交。数字人播报该段文字
|
||||
备注:服务端需要开放端口 tcp:8000,8010,1985; udp:8000
|
||||
|
||||
## 3. More Usage
|
||||
### 3.1 使用LLM模型进行数字人对话
|
||||
|
||||
目前借鉴数字人对话系统[LinlyTalker](https://github.com/Kedreamix/Linly-Talker)的方式,LLM模型支持Chatgpt,Qwen和GeminiPro。需要在app.py中填入自己的api_key。
|
||||
|
||||
用浏览器打开http://serverip:8010/chat.html
|
||||
用浏览器打开http://serverip:8010/rtcpushchat.html
|
||||
|
||||
### 3.2 声音克隆
|
||||
可以任意选用下面两种服务,推荐用gpt-sovits
|
||||
|
@ -106,28 +106,26 @@ python app.py --fullbody --fullbody_img data/fullbody/img --fullbody_offset_x 10
|
|||
- --W、--H 训练视频的宽、高
|
||||
- ernerf训练第三步torso如果训练的不好,在拼接处会有接缝。可以在上面的命令加上--torso_imgs data/xxx/torso_imgs,torso不用模型推理,直接用训练数据集里的torso图片。这种方式可能头颈处会有些人工痕迹。
|
||||
|
||||
### 3.6 webrtc
|
||||
#### 3.6.1 p2p模式
|
||||
### 3.6 webrtc p2p
|
||||
此种模式不需要srs
|
||||
```
|
||||
python app.py --transport webrtc
|
||||
```
|
||||
用浏览器打开http://serverip:8010/webrtc.html
|
||||
|
||||
#### 3.6.2 通过srs一对多
|
||||
启动srs
|
||||
### 3.7 rtmp推送到srs
|
||||
- 安装rtmpstream库
|
||||
参照 https://github.com/lipku/python_rtmpstream
|
||||
|
||||
- 启动srs
|
||||
```
|
||||
export CANDIDATE='<服务器外网ip>'
|
||||
docker run --rm --env CANDIDATE=$CANDIDATE \
|
||||
-p 1935:1935 -p 8080:8080 -p 1985:1985 -p 8000:8000/udp \
|
||||
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5 \
|
||||
objs/srs -c conf/rtc.conf
|
||||
docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5
|
||||
```
|
||||
然后运行
|
||||
- 然后运行
|
||||
```python
|
||||
python app.py --transport rtmp --push_url 'rtmp://localhost/live/livestream'
|
||||
```
|
||||
python app.py --transport rtcpush --push_url 'http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream'
|
||||
```
|
||||
用浏览器打开http://serverip:8010/rtcpush.html
|
||||
用浏览器打开http://serverip:8010/echo.html
|
||||
|
||||
## 4. Docker Run
|
||||
不需要第1步的安装,直接运行。
|
||||
|
@ -159,10 +157,7 @@ docker版本已经不是最新代码,可以作为一个空环境,把最新
|
|||
整体延时3s左右
|
||||
(1)tts延时1.7s左右,目前用的edgetts,需要将每句话转完后一次性输入,可以优化tts改成流式输入
|
||||
(2)wav2vec延时0.4s,需要缓存18帧音频做计算
|
||||
(3)srs转发延时,设置srs服务器减少缓冲延时。具体配置可看 https://ossrs.net/lts/zh-cn/docs/v5/doc/low-latency, 配置了一个低延时版本
|
||||
```python
|
||||
docker run --rm -it -p 1935:1935 -p 1985:1985 -p 8080:8080 registry.cn-hangzhou.aliyuncs.com/lipku/srs:v1.1
|
||||
```
|
||||
(3)srs转发延时,设置srs服务器减少缓冲延时。具体配置可看 https://ossrs.net/lts/zh-cn/docs/v5/doc/low-latency
|
||||
|
||||
## 8. TODO
|
||||
- [x] 添加chatgpt实现数字人对话
|
||||
|
|
6
app.py
6
app.py
|
@ -372,8 +372,8 @@ if __name__ == '__main__':
|
|||
# parser.add_argument('--asr_model', type=str, default='facebook/wav2vec2-large-960h-lv60-self')
|
||||
# parser.add_argument('--asr_model', type=str, default='facebook/hubert-large-ls960-ft')
|
||||
|
||||
parser.add_argument('--transport', type=str, default='rtmp') #rtmp webrtc rtcpush
|
||||
parser.add_argument('--push_url', type=str, default='rtmp://localhost/live/livestream') #http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream
|
||||
parser.add_argument('--transport', type=str, default='rtcpush') #rtmp webrtc rtcpush
|
||||
parser.add_argument('--push_url', type=str, default='http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream') #rtmp://localhost/live/livestream
|
||||
|
||||
parser.add_argument('--asr_save_feats', action='store_true')
|
||||
# audio FPS
|
||||
|
@ -403,7 +403,7 @@ if __name__ == '__main__':
|
|||
tts_type = opt.tts
|
||||
if tts_type == "xtts":
|
||||
print("Computing the latents for a new reference...")
|
||||
gspeaker = get_speaker(opt.REF_FILE, opt.tts_server)
|
||||
gspeaker = get_speaker(opt.REF_FILE, opt.TTS_SERVER)
|
||||
|
||||
# assert test mode
|
||||
opt.test = True
|
||||
|
|
|
@ -11,7 +11,6 @@ import cv2
|
|||
|
||||
from asrreal import ASR
|
||||
import asyncio
|
||||
from rtmp_streaming import StreamerConfig, Streamer
|
||||
from av import AudioFrame, VideoFrame
|
||||
|
||||
class NeRFReal:
|
||||
|
@ -202,6 +201,7 @@ class NeRFReal:
|
|||
totaltime=0
|
||||
|
||||
if self.opt.transport=='rtmp':
|
||||
from rtmp_streaming import StreamerConfig, Streamer
|
||||
fps=25
|
||||
#push_url='rtmp://localhost/live/livestream' #'data/video/output_0.mp4'
|
||||
sc = StreamerConfig()
|
||||
|
@ -236,7 +236,7 @@ class NeRFReal:
|
|||
totaltime += (time.perf_counter() - t)
|
||||
count += 1
|
||||
if count==100:
|
||||
print(f"------actual avg fps:{count/totaltime:.4f}")
|
||||
print(f"------actual avg infer fps:{count/totaltime:.4f}")
|
||||
count=0
|
||||
totaltime=0
|
||||
delay = 0.04 - (time.perf_counter() - t) #40ms
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>WebRTC webcam</title>
|
||||
<style>
|
||||
button {
|
||||
padding: 8px 16px;
|
||||
}
|
||||
|
||||
video {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.option {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
#media {
|
||||
max-width: 1280px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="option">
|
||||
<input id="use-stun" type="checkbox"/>
|
||||
<label for="use-stun">Use STUN server</label>
|
||||
</div>
|
||||
<button class="btn btn-primary" id="btn_play">Start</button>
|
||||
<form class="form-inline" id="echo-form">
|
||||
<div class="form-group">
|
||||
<p>input text</p>
|
||||
|
||||
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-default">Send</button>
|
||||
</form>
|
||||
|
||||
<div id="media">
|
||||
<h2>Media</h2>
|
||||
|
||||
<video id="rtc_media_player" style="width:600px;" controls autoplay></video>
|
||||
</div>
|
||||
|
||||
<script src="srs.sdk.js"></script>
|
||||
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
|
||||
<script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
|
||||
</body>
|
||||
<script type="text/javascript" charset="utf-8">
|
||||
|
||||
$(document).ready(function() {
|
||||
var host = window.location.hostname
|
||||
var ws = new WebSocket("ws://"+host+":8000/humanchat");
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
ws.onopen = function() {
|
||||
console.log('Connected');
|
||||
};
|
||||
ws.onmessage = function(e) {
|
||||
console.log('Received: ' + e.data);
|
||||
data = e
|
||||
var vid = JSON.parse(data.data);
|
||||
console.log(typeof(vid),vid)
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
};
|
||||
ws.onclose = function(e) {
|
||||
console.log('Closed');
|
||||
};
|
||||
|
||||
$('#echo-form').on('submit', function(e) {
|
||||
e.preventDefault();
|
||||
var message = $('#message').val();
|
||||
console.log('Sending: ' + message);
|
||||
ws.send(message);
|
||||
$('#message').val('');
|
||||
});
|
||||
});
|
||||
|
||||
$(function(){
|
||||
var sdk = null; // Global handler to do cleanup when republishing.
|
||||
var startPlay = function() {
|
||||
$('#rtc_media_player').show();
|
||||
|
||||
// Close PC when user replay.
|
||||
if (sdk) {
|
||||
sdk.close();
|
||||
}
|
||||
sdk = new SrsRtcWhipWhepAsync();
|
||||
|
||||
// User should set the stream when publish is done, @see https://webrtc.org/getting-started/media-devices
|
||||
// However SRS SDK provides a consist API like https://webrtc.org/getting-started/remote-streams
|
||||
$('#rtc_media_player').prop('srcObject', sdk.stream);
|
||||
// Optional callback, SDK will add track to stream.
|
||||
// sdk.ontrack = function (event) { console.log('Got track', event); sdk.stream.addTrack(event.track); };
|
||||
|
||||
var host = window.location.hostname
|
||||
// For example: webrtc://r.ossrs.net/live/livestream
|
||||
var url = "http://"+host+":1985/rtc/v1/whep/?app=live&stream=livestream"
|
||||
sdk.play(url).then(function(session){
|
||||
//$('#sessionid').html(session.sessionid);
|
||||
//$('#simulator-drop').attr('href', session.simulator + '?drop=1&username=' + session.sessionid);
|
||||
}).catch(function (reason) {
|
||||
sdk.close();
|
||||
$('#rtc_media_player').hide();
|
||||
console.error(reason);
|
||||
});
|
||||
};
|
||||
|
||||
$('#rtc_media_player').hide();
|
||||
// var query = parse_query_string();
|
||||
// srs_init_whep("#txt_url", query);
|
||||
|
||||
$("#btn_play").click(startPlay);
|
||||
// Never play util windows loaded @see https://github.com/ossrs/srs/issues/2732
|
||||
// if (query.autostart === 'true') {
|
||||
// $('#rtc_media_player').prop('muted', true);
|
||||
// console.warn('For autostart, we should mute it, see https://www.jianshu.com/p/c3c6944eed5a ' +
|
||||
// 'or https://developers.google.com/web/updates/2017/09/autoplay-policy-changes#audiovideo_elements');
|
||||
// window.addEventListener("load", function(){ startPlay(); });
|
||||
// }
|
||||
});
|
||||
</script>
|
||||
</html>
|
|
@ -0,0 +1,83 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>WebRTC webcam</title>
|
||||
<style>
|
||||
button {
|
||||
padding: 8px 16px;
|
||||
}
|
||||
|
||||
video {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.option {
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
#media {
|
||||
max-width: 1280px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="option">
|
||||
<input id="use-stun" type="checkbox"/>
|
||||
<label for="use-stun">Use STUN server</label>
|
||||
</div>
|
||||
<button id="start" onclick="start()">Start</button>
|
||||
<button id="stop" style="display: none" onclick="stop()">Stop</button>
|
||||
<form class="form-inline" id="echo-form">
|
||||
<div class="form-group">
|
||||
<p>input text</p>
|
||||
|
||||
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message">test</textarea>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-default">Send</button>
|
||||
</form>
|
||||
|
||||
<div id="media">
|
||||
<h2>Media</h2>
|
||||
|
||||
<audio id="audio" autoplay="true"></audio>
|
||||
<video id="video" style="width:600px;" autoplay="true" playsinline="true"></video>
|
||||
</div>
|
||||
|
||||
<script src="client.js"></script>
|
||||
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
|
||||
<script type="text/javascript" src="https://ajax.aspnetcdn.com/ajax/jquery/jquery-2.1.1.min.js"></script>
|
||||
</body>
|
||||
<script type="text/javascript" charset="utf-8">
|
||||
|
||||
$(document).ready(function() {
|
||||
var host = window.location.hostname
|
||||
var ws = new WebSocket("ws://"+host+":8000/humanchat");
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
ws.onopen = function() {
|
||||
console.log('Connected');
|
||||
};
|
||||
ws.onmessage = function(e) {
|
||||
console.log('Received: ' + e.data);
|
||||
data = e
|
||||
var vid = JSON.parse(data.data);
|
||||
console.log(typeof(vid),vid)
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
};
|
||||
ws.onclose = function(e) {
|
||||
console.log('Closed');
|
||||
};
|
||||
|
||||
$('#echo-form').on('submit', function(e) {
|
||||
e.preventDefault();
|
||||
var message = $('#message').val();
|
||||
console.log('Sending: ' + message);
|
||||
ws.send(message);
|
||||
$('#message').val('');
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</html>
|
|
@ -0,0 +1,75 @@
|
|||
var pc = null;
|
||||
|
||||
function negotiate() {
|
||||
var host = window.location.hostname
|
||||
pc.addTransceiver('video', { direction: 'recvonly' });
|
||||
pc.addTransceiver('audio', { direction: 'recvonly' });
|
||||
return pc.createOffer().then((offer) => {
|
||||
return pc.setLocalDescription(offer);
|
||||
}).then(() => {
|
||||
// wait for ICE gathering to complete
|
||||
return new Promise((resolve) => {
|
||||
if (pc.iceGatheringState === 'complete') {
|
||||
resolve();
|
||||
} else {
|
||||
const checkState = () => {
|
||||
if (pc.iceGatheringState === 'complete') {
|
||||
pc.removeEventListener('icegatheringstatechange', checkState);
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
pc.addEventListener('icegatheringstatechange', checkState);
|
||||
}
|
||||
});
|
||||
}).then(() => {
|
||||
var offer = pc.localDescription;
|
||||
return fetch("http://"+host+":1985/rtc/v1/whep/?app=live&stream=livestream", {
|
||||
body: offer.sdp,
|
||||
headers: {
|
||||
'Content-Type': 'application/sdp'
|
||||
},
|
||||
method: 'POST'
|
||||
});
|
||||
}).then((response) => {
|
||||
console.log(response)
|
||||
return response.data;
|
||||
}).then((answer) => {
|
||||
return pc.setRemoteDescription({sdp:answer,type:'answer'});
|
||||
}).catch((e) => {
|
||||
alert(e);
|
||||
});
|
||||
}
|
||||
|
||||
function start() {
|
||||
var config = {
|
||||
sdpSemantics: 'unified-plan'
|
||||
};
|
||||
|
||||
if (document.getElementById('use-stun').checked) {
|
||||
config.iceServers = [{ urls: ['stun:stun.l.google.com:19302'] }];
|
||||
}
|
||||
|
||||
pc = new RTCPeerConnection(config);
|
||||
|
||||
// connect audio / video
|
||||
pc.addEventListener('track', (evt) => {
|
||||
if (evt.track.kind == 'video') {
|
||||
document.getElementById('video').srcObject = evt.streams[0];
|
||||
} else {
|
||||
document.getElementById('audio').srcObject = evt.streams[0];
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById('start').style.display = 'none';
|
||||
negotiate();
|
||||
document.getElementById('stop').style.display = 'inline-block';
|
||||
}
|
||||
|
||||
function stop() {
|
||||
document.getElementById('stop').style.display = 'none';
|
||||
|
||||
// close peer connection
|
||||
setTimeout(() => {
|
||||
pc.close();
|
||||
}, 500);
|
||||
}
|
14
webrtc.py
14
webrtc.py
|
@ -36,6 +36,10 @@ class PlayerStreamTrack(MediaStreamTrack):
|
|||
self.kind = kind
|
||||
self._player = player
|
||||
self._queue = asyncio.Queue()
|
||||
if self.kind == 'video':
|
||||
self.framecount = 0
|
||||
self.lasttime = time.perf_counter()
|
||||
self.totaltime = 0
|
||||
|
||||
_start: float
|
||||
_timestamp: int
|
||||
|
@ -68,7 +72,7 @@ class PlayerStreamTrack(MediaStreamTrack):
|
|||
return self._timestamp, AUDIO_TIME_BASE
|
||||
|
||||
async def recv(self) -> Union[Frame, Packet]:
|
||||
# frame = self.frames[self.counter % 30]
|
||||
# frame = self.frames[self.counter % 30]
|
||||
self._player._start(self)
|
||||
frame = await self._queue.get()
|
||||
pts, time_base = await self.next_timestamp()
|
||||
|
@ -77,6 +81,14 @@ class PlayerStreamTrack(MediaStreamTrack):
|
|||
if frame is None:
|
||||
self.stop()
|
||||
raise Exception
|
||||
if self.kind == 'video':
|
||||
self.totaltime += (time.perf_counter() - self.lasttime)
|
||||
self.framecount += 1
|
||||
self.lasttime = time.perf_counter()
|
||||
if self.framecount==100:
|
||||
print(f"------actual avg final fps:{self.framecount/self.totaltime:.4f}")
|
||||
self.framecount = 0
|
||||
self.totaltime=0
|
||||
return frame
|
||||
|
||||
def stop(self):
|
||||
|
|
Loading…
Reference in New Issue