remove unuse code
This commit is contained in:
parent
a8b40fa813
commit
00dbc71db9
|
@ -1,57 +0,0 @@
|
|||
|
||||
# autodl 使用教程
|
||||
|
||||
## autodl 镜像地址
|
||||
https://www.codewithgpu.com/i/lipku/metahuman-stream/base
|
||||
|
||||
## 代码目录
|
||||
/root/metahuman-stream/
|
||||
|
||||
## 依赖安装
|
||||
```
|
||||
cd metahuman-stream
|
||||
git pull
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## 开始运行
|
||||
建议直接使用rtcpush 普通webrtc模式存在显示不了的情况
|
||||
### 在任意公网ip启动 srs服务
|
||||
```
|
||||
export CANDIDATE='<服务器外网ip>'
|
||||
docker run --rm --env CANDIDATE=$CANDIDATE \
|
||||
-p 1935:1935 -p 8080:8080 -p 1985:1985 -p 8000:8000/udp \
|
||||
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5 \
|
||||
objs/srs -c conf/rtc.conf
|
||||
```
|
||||
### 推流到 srs 服务器
|
||||
```
|
||||
python app.py --listenport 6006 --transport rtcpush --push_url 'http://<阿里云服务外网ip>:1985/rtc/v1/whip/?app=live&stream=livestream'
|
||||
```
|
||||
|
||||
### 访问
|
||||
访问的是静态的rtcpushapi.html
|
||||
http://<autodl外网地址>/rtcpushapi.html
|
||||
你需要修改 项目目录中的 web/rtcpushapi.html
|
||||
|
||||
将
|
||||
```
|
||||
var url = "http://"+host+":1985/rtc/v1/whep/?app=live&stream=livestream"
|
||||
```
|
||||
|
||||
替换成
|
||||
```
|
||||
var url = "http://公网ip:1985/rtc/v1/whep/?app=live&stream=livestream"
|
||||
```
|
||||
|
||||
调整如下:
|
||||
<img src="./img/20240530112922.jpg">
|
||||
|
||||
成功则如下图
|
||||

|
||||
|
||||
## 注意事项
|
||||
1. autodl 如果是个人用户需要使用官方的ssh代理工具进行端口代理,才可以访问6006
|
||||
2. 声音延迟需要后台优化srs的功能
|
||||
3. musetalk 暂不支持rtmp推流 但是支持rtcpush
|
||||
4. musetalk 教程即将更新
|
Binary file not shown.
Before Width: | Height: | Size: 389 KiB |
Binary file not shown.
Before Width: | Height: | Size: 1.1 MiB |
|
@ -1,315 +0,0 @@
|
|||
import asyncio
|
||||
import json
|
||||
import websockets
|
||||
import time
|
||||
import logging
|
||||
import tracemalloc
|
||||
import numpy as np
|
||||
import argparse
|
||||
import ssl
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host",
|
||||
type=str,
|
||||
default="0.0.0.0",
|
||||
required=False,
|
||||
help="host ip, localhost, 0.0.0.0")
|
||||
parser.add_argument("--port",
|
||||
type=int,
|
||||
default=10095,
|
||||
required=False,
|
||||
help="grpc server port")
|
||||
parser.add_argument("--asr_model",
|
||||
type=str,
|
||||
default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
|
||||
help="model from modelscope")
|
||||
parser.add_argument("--asr_model_revision",
|
||||
type=str,
|
||||
default="v2.0.4",
|
||||
help="")
|
||||
parser.add_argument("--asr_model_online",
|
||||
type=str,
|
||||
default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
|
||||
help="model from modelscope")
|
||||
parser.add_argument("--asr_model_online_revision",
|
||||
type=str,
|
||||
default="v2.0.4",
|
||||
help="")
|
||||
parser.add_argument("--vad_model",
|
||||
type=str,
|
||||
default="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
|
||||
help="model from modelscope")
|
||||
parser.add_argument("--vad_model_revision",
|
||||
type=str,
|
||||
default="v2.0.4",
|
||||
help="")
|
||||
parser.add_argument("--punc_model",
|
||||
type=str,
|
||||
default="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
|
||||
help="model from modelscope")
|
||||
parser.add_argument("--punc_model_revision",
|
||||
type=str,
|
||||
default="v2.0.4",
|
||||
help="")
|
||||
parser.add_argument("--ngpu",
|
||||
type=int,
|
||||
default=1,
|
||||
help="0 for cpu, 1 for gpu")
|
||||
parser.add_argument("--device",
|
||||
type=str,
|
||||
default="cuda",
|
||||
help="cuda, cpu")
|
||||
parser.add_argument("--ncpu",
|
||||
type=int,
|
||||
default=4,
|
||||
help="cpu cores")
|
||||
parser.add_argument("--certfile",
|
||||
type=str,
|
||||
default="ssl_key/server.crt",
|
||||
required=False,
|
||||
help="certfile for ssl")
|
||||
|
||||
parser.add_argument("--keyfile",
|
||||
type=str,
|
||||
default="ssl_key/server.key",
|
||||
required=False,
|
||||
help="keyfile for ssl")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
websocket_users = set()
|
||||
|
||||
print("model loading")
|
||||
from funasr import AutoModel
|
||||
|
||||
# asr
|
||||
model_asr = AutoModel(model=args.asr_model,
|
||||
model_revision=args.asr_model_revision,
|
||||
ngpu=args.ngpu,
|
||||
ncpu=args.ncpu,
|
||||
device=args.device,
|
||||
disable_pbar=True,
|
||||
disable_log=True,
|
||||
)
|
||||
# asr
|
||||
model_asr_streaming = AutoModel(model=args.asr_model_online,
|
||||
model_revision=args.asr_model_online_revision,
|
||||
ngpu=args.ngpu,
|
||||
ncpu=args.ncpu,
|
||||
device=args.device,
|
||||
disable_pbar=True,
|
||||
disable_log=True,
|
||||
)
|
||||
# vad
|
||||
model_vad = AutoModel(model=args.vad_model,
|
||||
model_revision=args.vad_model_revision,
|
||||
ngpu=args.ngpu,
|
||||
ncpu=args.ncpu,
|
||||
device=args.device,
|
||||
disable_pbar=True,
|
||||
disable_log=True,
|
||||
# chunk_size=60,
|
||||
)
|
||||
|
||||
if args.punc_model != "":
|
||||
model_punc = AutoModel(model=args.punc_model,
|
||||
model_revision=args.punc_model_revision,
|
||||
ngpu=args.ngpu,
|
||||
ncpu=args.ncpu,
|
||||
device=args.device,
|
||||
disable_pbar=True,
|
||||
disable_log=True,
|
||||
)
|
||||
else:
|
||||
model_punc = None
|
||||
|
||||
|
||||
|
||||
print("model loaded! only support one client at the same time now!!!!")
|
||||
|
||||
async def ws_reset(websocket):
|
||||
print("ws reset now, total num is ",len(websocket_users))
|
||||
|
||||
websocket.status_dict_asr_online["cache"] = {}
|
||||
websocket.status_dict_asr_online["is_final"] = True
|
||||
websocket.status_dict_vad["cache"] = {}
|
||||
websocket.status_dict_vad["is_final"] = True
|
||||
websocket.status_dict_punc["cache"] = {}
|
||||
|
||||
await websocket.close()
|
||||
|
||||
|
||||
async def clear_websocket():
|
||||
for websocket in websocket_users:
|
||||
await ws_reset(websocket)
|
||||
websocket_users.clear()
|
||||
|
||||
|
||||
|
||||
async def ws_serve(websocket, path):
|
||||
frames = []
|
||||
frames_asr = []
|
||||
frames_asr_online = []
|
||||
global websocket_users
|
||||
# await clear_websocket()
|
||||
websocket_users.add(websocket)
|
||||
websocket.status_dict_asr = {}
|
||||
websocket.status_dict_asr_online = {"cache": {}, "is_final": False}
|
||||
websocket.status_dict_vad = {'cache': {}, "is_final": False}
|
||||
websocket.status_dict_punc = {'cache': {}}
|
||||
websocket.chunk_interval = 10
|
||||
websocket.vad_pre_idx = 0
|
||||
speech_start = False
|
||||
speech_end_i = -1
|
||||
websocket.wav_name = "microphone"
|
||||
websocket.mode = "2pass"
|
||||
print("new user connected", flush=True)
|
||||
|
||||
try:
|
||||
async for message in websocket:
|
||||
if isinstance(message, str):
|
||||
messagejson = json.loads(message)
|
||||
|
||||
if "is_speaking" in messagejson:
|
||||
websocket.is_speaking = messagejson["is_speaking"]
|
||||
websocket.status_dict_asr_online["is_final"] = not websocket.is_speaking
|
||||
if "chunk_interval" in messagejson:
|
||||
websocket.chunk_interval = messagejson["chunk_interval"]
|
||||
if "wav_name" in messagejson:
|
||||
websocket.wav_name = messagejson.get("wav_name")
|
||||
if "chunk_size" in messagejson:
|
||||
chunk_size = messagejson["chunk_size"]
|
||||
if isinstance(chunk_size, str):
|
||||
chunk_size = chunk_size.split(',')
|
||||
websocket.status_dict_asr_online["chunk_size"] = [int(x) for x in chunk_size]
|
||||
if "encoder_chunk_look_back" in messagejson:
|
||||
websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
|
||||
if "decoder_chunk_look_back" in messagejson:
|
||||
websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
|
||||
if "hotword" in messagejson:
|
||||
websocket.status_dict_asr["hotword"] = messagejson["hotword"]
|
||||
if "mode" in messagejson:
|
||||
websocket.mode = messagejson["mode"]
|
||||
|
||||
websocket.status_dict_vad["chunk_size"] = int(websocket.status_dict_asr_online["chunk_size"][1]*60/websocket.chunk_interval)
|
||||
if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
|
||||
if not isinstance(message, str):
|
||||
frames.append(message)
|
||||
duration_ms = len(message)//32
|
||||
websocket.vad_pre_idx += duration_ms
|
||||
|
||||
# asr online
|
||||
frames_asr_online.append(message)
|
||||
websocket.status_dict_asr_online["is_final"] = speech_end_i != -1
|
||||
if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.status_dict_asr_online["is_final"]:
|
||||
if websocket.mode == "2pass" or websocket.mode == "online":
|
||||
audio_in = b"".join(frames_asr_online)
|
||||
try:
|
||||
await async_asr_online(websocket, audio_in)
|
||||
except:
|
||||
print(f"error in asr streaming, {websocket.status_dict_asr_online}")
|
||||
frames_asr_online = []
|
||||
if speech_start:
|
||||
frames_asr.append(message)
|
||||
# vad online
|
||||
try:
|
||||
speech_start_i, speech_end_i = await async_vad(websocket, message)
|
||||
except:
|
||||
print("error in vad")
|
||||
if speech_start_i != -1:
|
||||
speech_start = True
|
||||
beg_bias = (websocket.vad_pre_idx-speech_start_i)//duration_ms
|
||||
frames_pre = frames[-beg_bias:]
|
||||
frames_asr = []
|
||||
frames_asr.extend(frames_pre)
|
||||
# asr punc offline
|
||||
if speech_end_i != -1 or not websocket.is_speaking:
|
||||
# print("vad end point")
|
||||
if websocket.mode == "2pass" or websocket.mode == "offline":
|
||||
audio_in = b"".join(frames_asr)
|
||||
try:
|
||||
await async_asr(websocket, audio_in)
|
||||
except:
|
||||
print("error in asr offline")
|
||||
frames_asr = []
|
||||
speech_start = False
|
||||
frames_asr_online = []
|
||||
websocket.status_dict_asr_online["cache"] = {}
|
||||
if not websocket.is_speaking:
|
||||
websocket.vad_pre_idx = 0
|
||||
frames = []
|
||||
websocket.status_dict_vad["cache"] = {}
|
||||
else:
|
||||
frames = frames[-20:]
|
||||
|
||||
|
||||
except websockets.ConnectionClosed:
|
||||
print("ConnectionClosed...", websocket_users,flush=True)
|
||||
await ws_reset(websocket)
|
||||
websocket_users.remove(websocket)
|
||||
except websockets.InvalidState:
|
||||
print("InvalidState...")
|
||||
except Exception as e:
|
||||
print("Exception:", e)
|
||||
|
||||
|
||||
async def async_vad(websocket, audio_in):
|
||||
|
||||
segments_result = model_vad.generate(input=audio_in, **websocket.status_dict_vad)[0]["value"]
|
||||
# print(segments_result)
|
||||
|
||||
speech_start = -1
|
||||
speech_end = -1
|
||||
|
||||
if len(segments_result) == 0 or len(segments_result) > 1:
|
||||
return speech_start, speech_end
|
||||
if segments_result[0][0] != -1:
|
||||
speech_start = segments_result[0][0]
|
||||
if segments_result[0][1] != -1:
|
||||
speech_end = segments_result[0][1]
|
||||
return speech_start, speech_end
|
||||
|
||||
|
||||
async def async_asr(websocket, audio_in):
|
||||
if len(audio_in) > 0:
|
||||
# print(len(audio_in))
|
||||
rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0]
|
||||
# print("offline_asr, ", rec_result)
|
||||
if model_punc is not None and len(rec_result["text"])>0:
|
||||
# print("offline, before punc", rec_result, "cache", websocket.status_dict_punc)
|
||||
rec_result = model_punc.generate(input=rec_result['text'], **websocket.status_dict_punc)[0]
|
||||
# print("offline, after punc", rec_result)
|
||||
if len(rec_result["text"])>0:
|
||||
# print("offline", rec_result)
|
||||
mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
|
||||
message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
|
||||
await websocket.send(message)
|
||||
|
||||
|
||||
async def async_asr_online(websocket, audio_in):
|
||||
if len(audio_in) > 0:
|
||||
# print(websocket.status_dict_asr_online.get("is_final", False))
|
||||
rec_result = model_asr_streaming.generate(input=audio_in, **websocket.status_dict_asr_online)[0]
|
||||
# print("online, ", rec_result)
|
||||
if websocket.mode == "2pass" and websocket.status_dict_asr_online.get("is_final", False):
|
||||
return
|
||||
# websocket.status_dict_asr_online["cache"] = dict()
|
||||
if len(rec_result["text"]):
|
||||
mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
|
||||
message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
|
||||
await websocket.send(message)
|
||||
|
||||
if len(args.certfile)>0:
|
||||
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
|
||||
# Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
|
||||
ssl_cert = args.certfile
|
||||
ssl_key = args.keyfile
|
||||
|
||||
ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
|
||||
start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
|
||||
else:
|
||||
start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
|
||||
asyncio.get_event_loop().run_until_complete(start_server)
|
||||
asyncio.get_event_loop().run_forever()
|
|
@ -1 +0,0 @@
|
|||
在浏览器中打开 samples/html/static/index.html,输入ASR服务器地址,支持麦克风输入,也支持文件输入
|
|
@ -1,132 +0,0 @@
|
|||
<!-- index.html -->
|
||||
<html>
|
||||
<head>
|
||||
<script type="text/javascript" src="mpegts-1.7.3.min.js"></script>
|
||||
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
|
||||
<script src="http://code.jquery.com/jquery-2.1.1.min.js"></script>
|
||||
<script src="recorder-core.js" charset="UTF-8"></script>
|
||||
<script src="wav.js" charset="UTF-8"></script>
|
||||
<script src="pcm.js" charset="UTF-8"></script>
|
||||
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>metahuman voice test</h1>
|
||||
<form class="form-inline" id="echo-form" name="ssbtn">
|
||||
<div class="form-group">
|
||||
<p>input text</p>
|
||||
|
||||
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message"></textarea>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-default">Send</button>
|
||||
</form>
|
||||
<div id="log">
|
||||
|
||||
</div>
|
||||
<video id="video_player" width="40%" controls autoplay muted></video>
|
||||
</div>
|
||||
|
||||
<div>------------------------------------------------------------------------------------------------------------------------------</div>
|
||||
<div class="div_class_topArea">
|
||||
|
||||
<div class="div_class_recordControl">
|
||||
asr服务器地址(必填):
|
||||
<br>
|
||||
<input id="wssip" type="text" onchange="addresschange()" style="width: 500px;" value="wss://127.0.0.1:10095/"/>
|
||||
<br>
|
||||
<a id="wsslink" style="display: none;" href="#" onclick="window.open('https://127.0.0.1:10095/', '_blank')"><div id="info_wslink">点此处手工授权wss://127.0.0.1:10095/</div></a>
|
||||
<br>
|
||||
<br>
|
||||
<div style="border:2px solid #ccc;display: none;">
|
||||
选择录音模式:<br/>
|
||||
|
||||
<label ><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="mic" checked="true"/>麦克风 </label>
|
||||
<label><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="file" />文件 </label>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div id="mic_mode_div" style="border:2px solid #ccc;display:none;">
|
||||
选择asr模型模式:<br/>
|
||||
|
||||
<label><input name="asr_mode" type="radio" value="2pass" />2pass </label>
|
||||
<label><input name="asr_mode" type="radio" value="online" checked="true"/>online </label>
|
||||
<label><input name="asr_mode" type="radio" value="2pass-offline" />2pass-offline </label>
|
||||
<label><input name="asr_mode" type="radio" value="offline" />offline </label>
|
||||
|
||||
</div>
|
||||
|
||||
<div id="rec_mode_div" style="border:2px solid #ccc;display:none;">
|
||||
|
||||
|
||||
<input type="file" id="upfile">
|
||||
|
||||
</div>
|
||||
|
||||
<div style="border:2px solid #ccc;display: none;">
|
||||
热词设置(一行一个关键字,空格隔开权重,如"阿里巴巴 20"):
|
||||
|
||||
<textarea rows="1" id="varHot" style=" width: 100%;height:auto" >阿里巴巴 20 hello world 40</textarea>
|
||||
|
||||
</div>
|
||||
<div style="display: none;">语音识别结果显示:</div>
|
||||
<br>
|
||||
|
||||
<textarea rows="10" id="varArea" readonly="true" style=" width: 100%;height:auto;display: none;" ></textarea>
|
||||
<br>
|
||||
<div id="info_div">请点击开始</div>
|
||||
<div class="div_class_buttons">
|
||||
<button id="btnConnect">连接</button>
|
||||
<button id="btnStart">开始</button>
|
||||
<button id="btnStop">停止</button>
|
||||
|
||||
</div>
|
||||
|
||||
<audio id="audio_record" type="audio/wav" controls style="margin-top: 2px; width: 100%;display: none;"></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="wsconnecter.js" charset="utf-8"></script>
|
||||
<script src="main.js" charset="utf-8"></script>
|
||||
|
||||
</body>
|
||||
<script type="text/javascript" charset="utf-8">
|
||||
|
||||
// $(document).ready(function() {
|
||||
// var host = window.location.hostname
|
||||
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
// ws.onopen = function() {
|
||||
// console.log('Connected');
|
||||
// };
|
||||
// ws.onmessage = function(e) {
|
||||
// console.log('Received: ' + e.data);
|
||||
// data = e
|
||||
// var vid = JSON.parse(data.data);
|
||||
// console.log(typeof(vid),vid)
|
||||
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
// };
|
||||
// ws.onclose = function(e) {
|
||||
// console.log('Closed');
|
||||
// };
|
||||
|
||||
// flvPlayer = mpegts.createPlayer({type: 'flv', url: "http://"+host+":8080/live/livestream.flv", isLive: true, enableStashBuffer: false});
|
||||
// flvPlayer.attachMediaElement(document.getElementById('video_player'));
|
||||
// flvPlayer.load();
|
||||
// flvPlayer.play();
|
||||
|
||||
// $('#echo-form').on('submit', function(e) {
|
||||
// e.preventDefault();
|
||||
// var message = $('#message').val();
|
||||
// console.log('Sending: ' + message);
|
||||
// ws.send(message);
|
||||
// $('#message').val('');
|
||||
// });
|
||||
// });
|
||||
|
||||
|
||||
|
||||
</script>
|
||||
</html>
|
|
@ -1,87 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
||||
<title>语音识别</title>
|
||||
|
||||
</head>
|
||||
<body style="margin-left: 3%">
|
||||
<script src="recorder-core.js" charset="UTF-8"></script>
|
||||
<script src="wav.js" charset="UTF-8"></script>
|
||||
<script src="pcm.js" charset="UTF-8"></script>
|
||||
|
||||
|
||||
|
||||
<h1>FunASR Demo</h1>
|
||||
<h3>这里是FunASR开源项目体验demo,集成了VAD、ASR与标点等工业级别的模型,支持长音频离线文件转写,实时语音识别等,开源项目地址:https://github.com/alibaba-damo-academy/FunASR</h3>
|
||||
|
||||
<div class="div_class_topArea">
|
||||
|
||||
<div class="div_class_recordControl">
|
||||
asr服务器地址(必填):
|
||||
<br>
|
||||
<input id="wssip" type="text" onchange="addresschange()" style=" width: 100%;height:100%" value="wss://127.0.0.1:10095/"/>
|
||||
<br>
|
||||
<a id="wsslink" href="#" onclick="window.open('https://127.0.0.1:10095/', '_blank')"><div id="info_wslink">点此处手工授权wss://127.0.0.1:10095/</div></a>
|
||||
<br>
|
||||
<br>
|
||||
<div style="border:2px solid #ccc;">
|
||||
选择录音模式:<br/>
|
||||
|
||||
<label><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="mic" checked="true"/>麦克风 </label>
|
||||
<label><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="file" />文件 </label>
|
||||
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<div id="mic_mode_div" style="border:2px solid #ccc;display:block;">
|
||||
选择asr模型模式:<br/>
|
||||
|
||||
<label><input name="asr_mode" type="radio" value="2pass" checked="true"/>2pass </label>
|
||||
<label><input name="asr_mode" type="radio" value="online" />online </label>
|
||||
<label><input name="asr_mode" type="radio" value="offline" />offline </label>
|
||||
|
||||
</div>
|
||||
|
||||
<div id="rec_mode_div" style="border:2px solid #ccc;display:none;">
|
||||
|
||||
|
||||
<input type="file" id="upfile">
|
||||
|
||||
</div>
|
||||
<br>
|
||||
<div style="border:2px solid #ccc;">
|
||||
热词设置(一行一个关键字,空格隔开权重,如"阿里巴巴 20"):
|
||||
<br>
|
||||
|
||||
|
||||
<textarea rows="3" id="varHot" style=" width: 100%;height:100%" >阿里巴巴 20 hello world 40</textarea>
|
||||
<br>
|
||||
|
||||
</div>
|
||||
语音识别结果显示:
|
||||
<br>
|
||||
|
||||
<textarea rows="10" id="varArea" readonly="true" style=" width: 100%;height:100%" ></textarea>
|
||||
<br>
|
||||
<div id="info_div">请点击开始</div>
|
||||
<div class="div_class_buttons">
|
||||
<button id="btnConnect">连接</button>
|
||||
<button id="btnStart">开始</button>
|
||||
<button id="btnStop">停止</button>
|
||||
|
||||
</div>
|
||||
|
||||
<audio id="audio_record" type="audio/wav" controls style="margin-top: 12px; width: 100%;"></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="wsconnecter.js" charset="utf-8"></script>
|
||||
<script src="main.js" charset="utf-8"></script>
|
||||
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -1,637 +0,0 @@
|
|||
/**
|
||||
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
|
||||
* Reserved. MIT License (https://opensource.org/licenses/MIT)
|
||||
*/
|
||||
/* 2022-2023 by zhaoming,mali aihealthx.com */
|
||||
|
||||
|
||||
// 连接; 定义socket连接类对象与语音对象
|
||||
var wsconnecter = new WebSocketConnectMethod({msgHandle:getJsonMessage,stateHandle:getConnState});
|
||||
var audioBlob;
|
||||
|
||||
// 录音; 定义录音对象,wav格式
|
||||
var rec = Recorder({
|
||||
type:"pcm",
|
||||
bitRate:16,
|
||||
sampleRate:16000,
|
||||
onProcess:recProcess
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
var sampleBuf=new Int16Array();
|
||||
// 定义按钮响应事件
|
||||
var btnStart = document.getElementById('btnStart');
|
||||
btnStart.onclick = record;
|
||||
var btnStop = document.getElementById('btnStop');
|
||||
btnStop.onclick = stop;
|
||||
btnStop.disabled = true;
|
||||
btnStart.disabled = true;
|
||||
|
||||
btnConnect= document.getElementById('btnConnect');
|
||||
btnConnect.onclick = start;
|
||||
|
||||
var awsslink= document.getElementById('wsslink');
|
||||
|
||||
|
||||
var rec_text=""; // for online rec asr result
|
||||
var offline_text=""; // for offline rec asr result
|
||||
var info_div = document.getElementById('info_div');
|
||||
|
||||
var upfile = document.getElementById('upfile');
|
||||
|
||||
|
||||
|
||||
var isfilemode=false; // if it is in file mode
|
||||
var file_ext="";
|
||||
var file_sample_rate=16000; //for wav file sample rate
|
||||
var file_data_array; // array to save file data
|
||||
|
||||
var totalsend=0;
|
||||
|
||||
var startTime = Date.now();
|
||||
|
||||
var now_ipaddress=window.location.href;
|
||||
now_ipaddress=now_ipaddress.replace("https://","wss://");
|
||||
now_ipaddress=now_ipaddress.replace("static/index.html","");
|
||||
// var localport=window.location.port;
|
||||
// now_ipaddress=now_ipaddress.replace(localport,"10095");
|
||||
// document.getElementById('wssip').value=now_ipaddress;
|
||||
// addresschange();
|
||||
function addresschange()
|
||||
{
|
||||
|
||||
var Uri = document.getElementById('wssip').value;
|
||||
document.getElementById('info_wslink').innerHTML="点此处手工授权(IOS手机)";
|
||||
Uri=Uri.replace(/wss/g,"https");
|
||||
console.log("addresschange uri=",Uri);
|
||||
|
||||
awsslink.onclick=function(){
|
||||
window.open(Uri, '_blank');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
upfile.onclick=function()
|
||||
{
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=false;
|
||||
|
||||
}
|
||||
|
||||
// from https://github.com/xiangyuecn/Recorder/tree/master
|
||||
var readWavInfo=function(bytes){
|
||||
//读取wav文件头,统一成44字节的头
|
||||
if(bytes.byteLength<44){
|
||||
return null;
|
||||
};
|
||||
var wavView=bytes;
|
||||
var eq=function(p,s){
|
||||
for(var i=0;i<s.length;i++){
|
||||
if(wavView[p+i]!=s.charCodeAt(i)){
|
||||
return false;
|
||||
};
|
||||
};
|
||||
return true;
|
||||
};
|
||||
|
||||
if(eq(0,"RIFF")&&eq(8,"WAVEfmt ")){
|
||||
|
||||
var numCh=wavView[22];
|
||||
if(wavView[20]==1 && (numCh==1||numCh==2)){//raw pcm 单或双声道
|
||||
var sampleRate=wavView[24]+(wavView[25]<<8)+(wavView[26]<<16)+(wavView[27]<<24);
|
||||
var bitRate=wavView[34]+(wavView[35]<<8);
|
||||
var heads=[wavView.subarray(0,12)],headSize=12;//head只保留必要的块
|
||||
//搜索data块的位置
|
||||
var dataPos=0; // 44 或有更多块
|
||||
for(var i=12,iL=wavView.length-8;i<iL;){
|
||||
if(wavView[i]==100&&wavView[i+1]==97&&wavView[i+2]==116&&wavView[i+3]==97){//eq(i,"data")
|
||||
heads.push(wavView.subarray(i,i+8));
|
||||
headSize+=8;
|
||||
dataPos=i+8;break;
|
||||
}
|
||||
var i0=i;
|
||||
i+=4;
|
||||
i+=4+wavView[i]+(wavView[i+1]<<8)+(wavView[i+2]<<16)+(wavView[i+3]<<24);
|
||||
if(i0==12){//fmt
|
||||
heads.push(wavView.subarray(i0,i));
|
||||
headSize+=i-i0;
|
||||
}
|
||||
}
|
||||
if(dataPos){
|
||||
var wavHead=new Uint8Array(headSize);
|
||||
for(var i=0,n=0;i<heads.length;i++){
|
||||
wavHead.set(heads[i],n);n+=heads[i].length;
|
||||
}
|
||||
return {
|
||||
sampleRate:sampleRate
|
||||
,bitRate:bitRate
|
||||
,numChannels:numCh
|
||||
,wavHead44:wavHead
|
||||
,dataPos:dataPos
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
return null;
|
||||
};
|
||||
|
||||
upfile.onchange = function () {
|
||||
var len = this.files.length;
|
||||
for(let i = 0; i < len; i++) {
|
||||
|
||||
let fileAudio = new FileReader();
|
||||
fileAudio.readAsArrayBuffer(this.files[i]);
|
||||
|
||||
file_ext=this.files[i].name.split('.').pop().toLowerCase();
|
||||
var audioblob;
|
||||
fileAudio.onload = function() {
|
||||
audioblob = fileAudio.result;
|
||||
|
||||
|
||||
file_data_array=audioblob;
|
||||
|
||||
|
||||
info_div.innerHTML='请点击连接进行识别';
|
||||
|
||||
}
|
||||
|
||||
fileAudio.onerror = function(e) {
|
||||
console.log('error' + e);
|
||||
}
|
||||
}
|
||||
// for wav file, we get the sample rate
|
||||
if(file_ext=="wav")
|
||||
for(let i = 0; i < len; i++) {
|
||||
|
||||
let fileAudio = new FileReader();
|
||||
fileAudio.readAsArrayBuffer(this.files[i]);
|
||||
fileAudio.onload = function() {
|
||||
audioblob = new Uint8Array(fileAudio.result);
|
||||
|
||||
// for wav file, we can get the sample rate
|
||||
var info=readWavInfo(audioblob);
|
||||
console.log(info);
|
||||
file_sample_rate=info.sampleRate;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function play_file()
|
||||
{
|
||||
var audioblob=new Blob( [ new Uint8Array(file_data_array)] , {type :"audio/wav"});
|
||||
var audio_record = document.getElementById('audio_record');
|
||||
audio_record.src = (window.URL||webkitURL).createObjectURL(audioblob);
|
||||
audio_record.controls=true;
|
||||
//audio_record.play(); //not auto play
|
||||
}
|
||||
function start_file_send()
|
||||
{
|
||||
sampleBuf=new Uint8Array( file_data_array );
|
||||
|
||||
var chunk_size=960; // for asr chunk_size [5, 10, 5]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
while(sampleBuf.length>=chunk_size){
|
||||
|
||||
sendBuf=sampleBuf.slice(0,chunk_size);
|
||||
totalsend=totalsend+sampleBuf.length;
|
||||
sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length);
|
||||
wsconnecter.wsSend(sendBuf);
|
||||
|
||||
|
||||
}
|
||||
|
||||
stop();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
function on_recoder_mode_change()
|
||||
{
|
||||
var item = null;
|
||||
var obj = document.getElementsByName("recoder_mode");
|
||||
for (var i = 0; i < obj.length; i++) { //遍历Radio
|
||||
if (obj[i].checked) {
|
||||
item = obj[i].value;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if(item=="mic")
|
||||
{
|
||||
document.getElementById("mic_mode_div").style.display = 'block';
|
||||
document.getElementById("rec_mode_div").style.display = 'none';
|
||||
|
||||
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=false;
|
||||
isfilemode=false;
|
||||
}
|
||||
else
|
||||
{
|
||||
document.getElementById("mic_mode_div").style.display = 'none';
|
||||
document.getElementById("rec_mode_div").style.display = 'block';
|
||||
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=true;
|
||||
isfilemode=true;
|
||||
info_div.innerHTML='请点击选择文件';
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function getHotwords(){
|
||||
|
||||
var obj = document.getElementById("varHot");
|
||||
|
||||
if(typeof(obj) == 'undefined' || obj==null || obj.value.length<=0){
|
||||
return null;
|
||||
}
|
||||
let val = obj.value.toString();
|
||||
|
||||
console.log("hotwords="+val);
|
||||
let items = val.split(/[(\r\n)\r\n]+/); //split by \r\n
|
||||
var jsonresult = {};
|
||||
const regexNum = /^[0-9]*$/; // test number
|
||||
for (item of items) {
|
||||
|
||||
let result = item.split(" ");
|
||||
if(result.length>=2 && regexNum.test(result[result.length-1]))
|
||||
{
|
||||
var wordstr="";
|
||||
for(var i=0;i<result.length-1;i++)
|
||||
wordstr=wordstr+result[i]+" ";
|
||||
|
||||
jsonresult[wordstr.trim()]= parseInt(result[result.length-1]);
|
||||
}
|
||||
}
|
||||
console.log("jsonresult="+JSON.stringify(jsonresult));
|
||||
return JSON.stringify(jsonresult);
|
||||
|
||||
}
|
||||
function getAsrMode(){
|
||||
|
||||
var item = null;
|
||||
var obj = document.getElementsByName("asr_mode");
|
||||
for (var i = 0; i < obj.length; i++) { //遍历Radio
|
||||
if (obj[i].checked) {
|
||||
item = obj[i].value;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if(isfilemode)
|
||||
{
|
||||
item= "offline";
|
||||
}
|
||||
console.log("asr mode"+item);
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
function handleWithTimestamp(tmptext,tmptime)
|
||||
{
|
||||
console.log( "tmptext: " + tmptext);
|
||||
console.log( "tmptime: " + tmptime);
|
||||
if(tmptime==null || tmptime=="undefined" || tmptext.length<=0)
|
||||
{
|
||||
return tmptext;
|
||||
}
|
||||
tmptext=tmptext.replace(/。|?|,|、|\?|\.|\ /g, ","); // in case there are a lot of "。"
|
||||
var words=tmptext.split(","); // split to chinese sentence or english words
|
||||
var jsontime=JSON.parse(tmptime); //JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD
|
||||
var char_index=0; // index for timestamp
|
||||
var text_withtime="";
|
||||
for(var i=0;i<words.length;i++)
|
||||
{
|
||||
if(words[i]=="undefined" || words[i].length<=0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
console.log("words===",words[i]);
|
||||
console.log( "words: " + words[i]+",time="+jsontime[char_index][0]/1000);
|
||||
if (/^[a-zA-Z]+$/.test(words[i]))
|
||||
{ // if it is english
|
||||
text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
|
||||
char_index=char_index+1; //for english, timestamp unit is about a word
|
||||
}
|
||||
else{
|
||||
// if it is chinese
|
||||
text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
|
||||
char_index=char_index+words[i].length; //for chinese, timestamp unit is about a char
|
||||
}
|
||||
}
|
||||
return text_withtime;
|
||||
|
||||
|
||||
}
|
||||
|
||||
// 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中
|
||||
function getJsonMessage( jsonMsg ) {
|
||||
if(!recive_msg) return;
|
||||
var currentTime = Date.now();
|
||||
res_time = currentTime-startTime;
|
||||
console.log(res_time)
|
||||
//时间之差在4秒则发送消息
|
||||
let waitTime = 15000;
|
||||
if(res_time>waitTime){
|
||||
//自动发送消息
|
||||
var f = document.getElementById("echo-form");
|
||||
f.submit = function(e){
|
||||
e.preventDefault();
|
||||
var message=document.getElementById('message').value;
|
||||
console.log('Sending: ' + message);
|
||||
ws.send(message);
|
||||
document.getElementById('message').value='';
|
||||
}
|
||||
|
||||
recive_msg = false;
|
||||
startTime = currentTime;
|
||||
// rec_text="";
|
||||
// var varArea_message=document.getElementById('message');
|
||||
// varArea_message.value="";
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//console.log(jsonMsg);
|
||||
console.log( "message: " + JSON.parse(jsonMsg.data)['text'] );
|
||||
var rectxt=""+JSON.parse(jsonMsg.data)['text'];
|
||||
|
||||
|
||||
var asrmodel=JSON.parse(jsonMsg.data)['mode'];
|
||||
var is_final=JSON.parse(jsonMsg.data)['is_final'];
|
||||
var timestamp=JSON.parse(jsonMsg.data)['timestamp'];
|
||||
if(asrmodel=="2pass-offline" || asrmodel=="offline")
|
||||
{
|
||||
|
||||
offline_text=offline_text+handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,"");
|
||||
rec_text=offline_text;
|
||||
}
|
||||
else
|
||||
{
|
||||
rec_text=rec_text+rectxt; //.replace(/ +/g,"");
|
||||
}
|
||||
var varArea=document.getElementById('varArea');
|
||||
var varArea_message=document.getElementById('message');
|
||||
|
||||
varArea.value=rec_text;
|
||||
varArea_message.value=rec_text;
|
||||
console.log( "offline_text: " + asrmodel+","+offline_text);
|
||||
console.log( "rec_text: " + rec_text);
|
||||
console.log( "isfilemode: " + isfilemode);
|
||||
console.log( "is_final: " + is_final);
|
||||
if (isfilemode==true && is_final==false){
|
||||
console.log("call stop ws!");
|
||||
play_file();
|
||||
wsconnecter.wsStop();
|
||||
|
||||
info_div.innerHTML="请点击连接";
|
||||
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
// 连接状态响应
|
||||
function getConnState( connState ) {
|
||||
if ( connState === 0 ) { //on open
|
||||
|
||||
|
||||
info_div.innerHTML='连接成功!请点击开始';
|
||||
if (isfilemode==true){
|
||||
info_div.innerHTML='请耐心等待,大文件等待时间更长';
|
||||
start_file_send();
|
||||
}
|
||||
else
|
||||
{
|
||||
btnStart.disabled = false;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=true;
|
||||
}
|
||||
} else if ( connState === 1 ) {
|
||||
//stop();
|
||||
} else if ( connState === 2 ) {
|
||||
stop();
|
||||
console.log( 'connecttion error' );
|
||||
|
||||
alert("连接地址"+document.getElementById('wssip').value+"失败,请检查asr地址和端口。或试试界面上手动授权,再连接。");
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=false;
|
||||
|
||||
|
||||
info_div.innerHTML='请点击连接';
|
||||
}
|
||||
}
|
||||
|
||||
function record()
|
||||
{
|
||||
|
||||
rec.open( function(){
|
||||
rec.start();
|
||||
console.log("开始");
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = false;
|
||||
btnConnect.disabled=true;
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
// 识别启动、停止、清空操作
|
||||
function start() {
|
||||
|
||||
// 清除显示
|
||||
clear();
|
||||
//控件状态更新
|
||||
console.log("isfilemode"+isfilemode);
|
||||
|
||||
//启动连接
|
||||
var ret=wsconnecter.wsStart();
|
||||
// 1 is ok, 0 is error
|
||||
if(ret==1){
|
||||
info_div.innerHTML="正在连接asr服务器,请等待...";
|
||||
isRec = true;
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=true;
|
||||
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
info_div.innerHTML="请点击开始";
|
||||
btnStart.disabled = true;
|
||||
btnStop.disabled = true;
|
||||
btnConnect.disabled=false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function stop() {
|
||||
var chunk_size = new Array( 5, 10, 5 );
|
||||
var request = {
|
||||
"chunk_size": chunk_size,
|
||||
"wav_name": "h5",
|
||||
"is_speaking": false,
|
||||
"chunk_interval":10,
|
||||
"mode":getAsrMode(),
|
||||
};
|
||||
console.log(request);
|
||||
if(sampleBuf.length>0){
|
||||
wsconnecter.wsSend(sampleBuf);
|
||||
console.log("sampleBuf.length"+sampleBuf.length);
|
||||
sampleBuf=new Int16Array();
|
||||
}
|
||||
wsconnecter.wsSend( JSON.stringify(request) );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// 控件状态更新
|
||||
|
||||
isRec = false;
|
||||
info_div.innerHTML="发送完数据,请等候,正在识别...";
|
||||
|
||||
if(isfilemode==false){
|
||||
btnStop.disabled = true;
|
||||
btnStart.disabled = true;
|
||||
btnConnect.disabled=true;
|
||||
//wait 3s for asr result
|
||||
setTimeout(function(){
|
||||
console.log("call stop ws!");
|
||||
wsconnecter.wsStop();
|
||||
btnConnect.disabled=false;
|
||||
info_div.innerHTML="请点击连接";}, 3000 );
|
||||
|
||||
|
||||
|
||||
rec.stop(function(blob,duration){
|
||||
|
||||
console.log(blob);
|
||||
var audioBlob = Recorder.pcm2wav(data = {sampleRate:16000, bitRate:16, blob:blob},
|
||||
function(theblob,duration){
|
||||
console.log(theblob);
|
||||
var audio_record = document.getElementById('audio_record');
|
||||
audio_record.src = (window.URL||webkitURL).createObjectURL(theblob);
|
||||
audio_record.controls=true;
|
||||
//audio_record.play();
|
||||
|
||||
|
||||
} ,function(msg){
|
||||
console.log(msg);
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
|
||||
},function(errMsg){
|
||||
console.log("errMsg: " + errMsg);
|
||||
});
|
||||
}
|
||||
// 停止连接
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
function clear() {
|
||||
|
||||
var varArea=document.getElementById('varArea');
|
||||
|
||||
varArea.value="";
|
||||
rec_text="";
|
||||
offline_text="";
|
||||
|
||||
}
|
||||
|
||||
|
||||
function recProcess( buffer, powerLevel, bufferDuration, bufferSampleRate,newBufferIdx,asyncEnd ) {
|
||||
if ( isRec === true ) {
|
||||
var data_48k = buffer[buffer.length-1];
|
||||
|
||||
var array_48k = new Array(data_48k);
|
||||
var data_16k=Recorder.SampleData(array_48k,bufferSampleRate,16000).data;
|
||||
|
||||
sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]);
|
||||
var chunk_size=960; // for asr chunk_size [5, 10, 5]
|
||||
info_div.innerHTML=""+bufferDuration/1000+"s";
|
||||
while(sampleBuf.length>=chunk_size){
|
||||
sendBuf=sampleBuf.slice(0,chunk_size);
|
||||
sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length);
|
||||
wsconnecter.wsSend(sendBuf);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
var recive_msg = true;
|
||||
$(document).ready(function() {
|
||||
var host = window.location.hostname
|
||||
var ws = new WebSocket("ws://"+host+":8000/humanecho");
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
|
||||
ws.onopen = function() {
|
||||
console.log('Connected');
|
||||
};
|
||||
ws.onmessage = function(e) {
|
||||
console.log('Received: ' + e.data);
|
||||
recive_msg = true;
|
||||
data = e
|
||||
var vid = JSON.parse(data.data);
|
||||
console.log(typeof(vid),vid)
|
||||
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
|
||||
|
||||
};
|
||||
ws.onclose = function(e) {
|
||||
console.log('Closed');
|
||||
};
|
||||
|
||||
flvPlayer = mpegts.createPlayer({type: 'flv', url: "http://"+host+":8080/live/livestream.flv", isLive: true, enableStashBuffer: false});
|
||||
flvPlayer.attachMediaElement(document.getElementById('video_player'));
|
||||
flvPlayer.load();
|
||||
flvPlayer.play();
|
||||
|
||||
$('#echo-form').on('submit', function(e) {
|
||||
e.preventDefault();
|
||||
var message = $('#message').val();
|
||||
console.log('Sending: ' + message);
|
||||
ws.send(message);
|
||||
$('#message').val('');
|
||||
});
|
||||
});
|
File diff suppressed because one or more lines are too long
|
@ -1,96 +0,0 @@
|
|||
/*
|
||||
pcm编码器+编码引擎
|
||||
https://github.com/xiangyuecn/Recorder
|
||||
|
||||
编码原理:本编码器输出的pcm格式数据其实就是Recorder中的buffers原始数据(经过了重新采样),16位时为LE小端模式(Little Endian),并未经过任何编码处理
|
||||
|
||||
编码的代码和wav.js区别不大,pcm加上一个44字节wav头即成wav文件;所以要播放pcm就很简单了,直接转成wav文件来播放,已提供转换函数 Recorder.pcm2wav
|
||||
*/
|
||||
(function(){
|
||||
"use strict";
|
||||
|
||||
Recorder.prototype.enc_pcm={
|
||||
stable:true
|
||||
,testmsg:"pcm为未封装的原始音频数据,pcm数据文件无法直接播放;支持位数8位、16位(填在比特率里面),采样率取值无限制"
|
||||
};
|
||||
Recorder.prototype.pcm=function(res,True,False){
|
||||
var This=this,set=This.set
|
||||
,size=res.length
|
||||
,bitRate=set.bitRate==8?8:16;
|
||||
|
||||
var buffer=new ArrayBuffer(size*(bitRate/8));
|
||||
var data=new DataView(buffer);
|
||||
var offset=0;
|
||||
|
||||
// 写入采样数据
|
||||
if(bitRate==8) {
|
||||
for(var i=0;i<size;i++,offset++) {
|
||||
//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点,虽然都有明显杂音
|
||||
var val=(res[i]>>8)+128;
|
||||
data.setInt8(offset,val,true);
|
||||
};
|
||||
}else{
|
||||
for (var i=0;i<size;i++,offset+=2){
|
||||
data.setInt16(offset,res[i],true);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
True(new Blob([data.buffer],{type:"audio/pcm"}));
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**pcm直接转码成wav,可以直接用来播放;需同时引入wav.js
|
||||
data: {
|
||||
sampleRate:16000 pcm的采样率
|
||||
bitRate:16 pcm的位数 取值:8 或 16
|
||||
blob:blob对象
|
||||
}
|
||||
data如果直接提供的blob将默认使用16位16khz的配置,仅用于测试
|
||||
True(wavBlob,duration)
|
||||
False(msg)
|
||||
**/
|
||||
Recorder.pcm2wav=function(data,True,False){
|
||||
if(data.slice && data.type!=null){//Blob 测试用
|
||||
data={blob:data};
|
||||
};
|
||||
var sampleRate=data.sampleRate||16000,bitRate=data.bitRate||16;
|
||||
if(!data.sampleRate || !data.bitRate){
|
||||
console.warn("pcm2wav必须提供sampleRate和bitRate");
|
||||
};
|
||||
if(!Recorder.prototype.wav){
|
||||
False("pcm2wav必须先加载wav编码器wav.js");
|
||||
return;
|
||||
};
|
||||
|
||||
var reader=new FileReader();
|
||||
reader.onloadend=function(){
|
||||
var pcm;
|
||||
if(bitRate==8){
|
||||
//8位转成16位
|
||||
var u8arr=new Uint8Array(reader.result);
|
||||
pcm=new Int16Array(u8arr.length);
|
||||
for(var j=0;j<u8arr.length;j++){
|
||||
pcm[j]=(u8arr[j]-128)<<8;
|
||||
};
|
||||
}else{
|
||||
pcm=new Int16Array(reader.result);
|
||||
};
|
||||
|
||||
Recorder({
|
||||
type:"wav"
|
||||
,sampleRate:sampleRate
|
||||
,bitRate:bitRate
|
||||
}).mock(pcm,sampleRate).stop(function(wavBlob,duration){
|
||||
True(wavBlob,duration);
|
||||
},False);
|
||||
};
|
||||
reader.readAsArrayBuffer(data.blob);
|
||||
};
|
||||
|
||||
|
||||
|
||||
})();
|
File diff suppressed because it is too large
Load Diff
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
wav编码器+编码引擎
|
||||
https://github.com/xiangyuecn/Recorder
|
||||
|
||||
当然最佳推荐使用mp3、wav格式,代码也是优先照顾这两种格式
|
||||
浏览器支持情况
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTML/Supported_media_formats
|
||||
|
||||
编码原理:给pcm数据加上一个44直接的wav头即成wav文件;pcm数据就是Recorder中的buffers原始数据(重新采样),16位时为LE小端模式(Little Endian),实质上是未经过任何编码处理
|
||||
*/
|
||||
(function(){
|
||||
"use strict";
|
||||
|
||||
Recorder.prototype.enc_wav={
|
||||
stable:true
|
||||
,testmsg:"支持位数8位、16位(填在比特率里面),采样率取值无限制"
|
||||
};
|
||||
Recorder.prototype.wav=function(res,True,False){
|
||||
var This=this,set=This.set
|
||||
,size=res.length
|
||||
,sampleRate=set.sampleRate
|
||||
,bitRate=set.bitRate==8?8:16;
|
||||
|
||||
//编码数据 https://github.com/mattdiamond/Recorderjs https://www.cnblogs.com/blqw/p/3782420.html https://www.cnblogs.com/xiaoqi/p/6993912.html
|
||||
var dataLength=size*(bitRate/8);
|
||||
var buffer=new ArrayBuffer(44+dataLength);
|
||||
var data=new DataView(buffer);
|
||||
|
||||
var offset=0;
|
||||
var writeString=function(str){
|
||||
for (var i=0;i<str.length;i++,offset++) {
|
||||
data.setUint8(offset,str.charCodeAt(i));
|
||||
};
|
||||
};
|
||||
var write16=function(v){
|
||||
data.setUint16(offset,v,true);
|
||||
offset+=2;
|
||||
};
|
||||
var write32=function(v){
|
||||
data.setUint32(offset,v,true);
|
||||
offset+=4;
|
||||
};
|
||||
|
||||
/* RIFF identifier */
|
||||
writeString('RIFF');
|
||||
/* RIFF chunk length */
|
||||
write32(36+dataLength);
|
||||
/* RIFF type */
|
||||
writeString('WAVE');
|
||||
/* format chunk identifier */
|
||||
writeString('fmt ');
|
||||
/* format chunk length */
|
||||
write32(16);
|
||||
/* sample format (raw) */
|
||||
write16(1);
|
||||
/* channel count */
|
||||
write16(1);
|
||||
/* sample rate */
|
||||
write32(sampleRate);
|
||||
/* byte rate (sample rate * block align) */
|
||||
write32(sampleRate*(bitRate/8));// *1 声道
|
||||
/* block align (channel count * bytes per sample) */
|
||||
write16(bitRate/8);// *1 声道
|
||||
/* bits per sample */
|
||||
write16(bitRate);
|
||||
/* data chunk identifier */
|
||||
writeString('data');
|
||||
/* data chunk length */
|
||||
write32(dataLength);
|
||||
// 写入采样数据
|
||||
if(bitRate==8) {
|
||||
for(var i=0;i<size;i++,offset++) {
|
||||
//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点,虽然都有明显杂音
|
||||
var val=(res[i]>>8)+128;
|
||||
data.setInt8(offset,val,true);
|
||||
};
|
||||
}else{
|
||||
for (var i=0;i<size;i++,offset+=2){
|
||||
data.setInt16(offset,res[i],true);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
True(new Blob([data.buffer],{type:"audio/wav"}));
|
||||
}
|
||||
})();
|
|
@ -1,119 +0,0 @@
|
|||
/**
|
||||
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
|
||||
* Reserved. MIT License (https://opensource.org/licenses/MIT)
|
||||
*/
|
||||
/* 2021-2023 by zhaoming,mali aihealthx.com */
|
||||
|
||||
function WebSocketConnectMethod( config ) { //定义socket连接方法类
|
||||
|
||||
|
||||
var speechSokt;
|
||||
var connKeeperID;
|
||||
|
||||
var msgHandle = config.msgHandle;
|
||||
var stateHandle = config.stateHandle;
|
||||
|
||||
this.wsStart = function () {
|
||||
var Uri = document.getElementById('wssip').value; //"wss://111.205.137.58:5821/wss/" //设置wss asr online接口地址 如 wss://X.X.X.X:port/wss/
|
||||
if(Uri.match(/wss:\S*|ws:\S*/))
|
||||
{
|
||||
console.log("Uri"+Uri);
|
||||
}
|
||||
else
|
||||
{
|
||||
alert("请检查wss地址正确性");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( 'WebSocket' in window ) {
|
||||
speechSokt = new WebSocket( Uri ); // 定义socket连接对象
|
||||
speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
|
||||
speechSokt.onclose = function(e){
|
||||
console.log("onclose ws!");
|
||||
//speechSokt.close();
|
||||
onClose(e);
|
||||
};
|
||||
speechSokt.onmessage = function(e){onMessage(e);};
|
||||
speechSokt.onerror = function(e){onError(e);};
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
alert('当前浏览器不支持 WebSocket');
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// 定义停止与发送函数
|
||||
this.wsStop = function () {
|
||||
if(speechSokt != undefined) {
|
||||
console.log("stop ws!");
|
||||
speechSokt.close();
|
||||
}
|
||||
};
|
||||
|
||||
this.wsSend = function ( oneData ) {
|
||||
|
||||
if(speechSokt == undefined) return;
|
||||
if ( speechSokt.readyState === 1 ) { // 0:CONNECTING, 1:OPEN, 2:CLOSING, 3:CLOSED
|
||||
|
||||
speechSokt.send( oneData );
|
||||
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
// SOCEKT连接中的消息与状态响应
|
||||
function onOpen( e ) {
|
||||
// 发送json
|
||||
var chunk_size = new Array( 5, 10, 5 );
|
||||
var request = {
|
||||
"chunk_size": chunk_size,
|
||||
"wav_name": "h5",
|
||||
"is_speaking": true,
|
||||
"chunk_interval":10,
|
||||
"itn":false,
|
||||
"mode":getAsrMode(),
|
||||
|
||||
};
|
||||
if(isfilemode)
|
||||
{
|
||||
request.wav_format=file_ext;
|
||||
if(file_ext=="wav")
|
||||
{
|
||||
request.wav_format="PCM";
|
||||
request.audio_fs=file_sample_rate;
|
||||
}
|
||||
}
|
||||
|
||||
var hotwords=getHotwords();
|
||||
|
||||
if(hotwords!=null )
|
||||
{
|
||||
request.hotwords=hotwords;
|
||||
}
|
||||
console.log(JSON.stringify(request));
|
||||
speechSokt.send(JSON.stringify(request));
|
||||
console.log("连接成功");
|
||||
stateHandle(0);
|
||||
|
||||
}
|
||||
|
||||
function onClose( e ) {
|
||||
stateHandle(1);
|
||||
}
|
||||
|
||||
function onMessage( e ) {
|
||||
|
||||
msgHandle( e );
|
||||
}
|
||||
|
||||
function onError( e ) {
|
||||
|
||||
info_div.innerHTML="连接"+e;
|
||||
console.log(e);
|
||||
stateHandle(2);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
1、启动语言识别服务端
|
||||
创建虚拟环境
|
||||
conda create -n funasr
|
||||
conda activate funasr
|
||||
安装依赖库
|
||||
pip install torch
|
||||
pip install modelscope
|
||||
pip install testresources
|
||||
pip install websockets
|
||||
pip install torchaudio
|
||||
pip install FunASR
|
||||
pip install pyaudio
|
||||
|
||||
|
||||
python funasr_wss_server.py --port 10095
|
||||
或者
|
||||
python funasr_wss_server.py --host "0.0.0.0" --port 10197 --ngpu 0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
https://github.com/alibaba-damo-academy/FunASR
|
||||
https://zhuanlan.zhihu.com/p/649935170
|
|
@ -1,17 +0,0 @@
|
|||
## certificate generation by yourself
|
||||
generated certificate may not suitable for all browsers due to security concerns. you'd better buy or download an authenticated ssl certificate from authorized agency.
|
||||
|
||||
```shell
|
||||
### 1) Generate a private key
|
||||
openssl genrsa -des3 -out server.key 2048
|
||||
|
||||
### 2) Generate a csr file
|
||||
openssl req -new -key server.key -out server.csr
|
||||
|
||||
### 3) Remove pass
|
||||
cp server.key server.key.org
|
||||
openssl rsa -in server.key.org -out server.key
|
||||
|
||||
### 4) Generated a crt file, valid for 1 year
|
||||
openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt
|
||||
```
|
|
@ -1,17 +0,0 @@
|
|||
## 自行生成证书
|
||||
生成证书(注意这种证书并不能被所有浏览器认可,部分手动授权可以访问,最好使用其他认证的官方ssl证书)
|
||||
|
||||
```shell
|
||||
### 1)生成私钥,按照提示填写内容
|
||||
openssl genrsa -des3 -out server.key 1024
|
||||
|
||||
### 2)生成csr文件 ,按照提示填写内容
|
||||
openssl req -new -key server.key -out server.csr
|
||||
|
||||
### 去掉pass
|
||||
cp server.key server.key.org
|
||||
openssl rsa -in server.key.org -out server.key
|
||||
|
||||
### 生成crt文件,有效期1年(365天)
|
||||
openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt
|
||||
```
|
|
@ -1,21 +0,0 @@
|
|||
-----BEGIN CERTIFICATE-----
|
||||
MIIDhTCCAm0CFGB0Po2IZ0hESavFpcSGRNb9xrNXMA0GCSqGSIb3DQEBCwUAMH8x
|
||||
CzAJBgNVBAYTAkNOMRAwDgYDVQQIDAdiZWlqaW5nMRAwDgYDVQQHDAdiZWlqaW5n
|
||||
MRAwDgYDVQQKDAdhbGliYWJhMRAwDgYDVQQLDAdhbGliYWJhMRAwDgYDVQQDDAdh
|
||||
bGliYWJhMRYwFAYJKoZIhvcNAQkBFgdhbGliYWJhMB4XDTIzMDYxODA2NTcxM1oX
|
||||
DTI0MDYxNzA2NTcxM1owfzELMAkGA1UEBhMCQ04xEDAOBgNVBAgMB2JlaWppbmcx
|
||||
EDAOBgNVBAcMB2JlaWppbmcxEDAOBgNVBAoMB2FsaWJhYmExEDAOBgNVBAsMB2Fs
|
||||
aWJhYmExEDAOBgNVBAMMB2FsaWJhYmExFjAUBgkqhkiG9w0BCQEWB2FsaWJhYmEw
|
||||
ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDH9Np1oBunQKMt5M/nU2nD
|
||||
qVHojXwKKwyiK9DSeGikKwArH2S9NUZNu5RDg46u0iWmT+Vz+toQhkJnfatOVskW
|
||||
f2bsI54n5eOvmoWOKDXYm2MscvjkuNiYRbqzgUuP9ZSx8k3uyRs++wvmwIoU+PV1
|
||||
EYFcjk1P2jUGUvKaUlmIDsjs1wOMIbKO6I0UX20FNKlGWacqMR/Dx2ltmGKT1Kaz
|
||||
Y335lor0bcfQtH542rGS7PDz6JMRNjFT1VFcmnrjRElf4STbaOiIfOjMVZ/9O8Hr
|
||||
LFItyvkb01Mt7O0jhAXHuE1l/8Y0N3MCYkELG9mQA0BYCFHY0FLuJrGoU03b8KWj
|
||||
AgMBAAEwDQYJKoZIhvcNAQELBQADggEBAEjC9jB1WZe2ki2JgCS+eAMFsFegiNEz
|
||||
D0klVB3kiCPK0g7DCxvfWR6kAgEynxRxVX6TN9QcLr4paZItC1Fu2gUMTteNqEuc
|
||||
dcixJdu9jumuUMBlAKgL5Yyk3alSErsn9ZVF/Q8Kx5arMO/TW3Ulsd8SWQL5C/vq
|
||||
Fe0SRhpKKoADPfl8MT/XMfB/MwNxVhYDSHzJ1EiN8O5ce6q2tTdi1mlGquzNxhjC
|
||||
7Q0F36V1HksfzolrlRWRKYP16isnaKUdFfeAzaJsYw33o6VRbk6fo2fTQDHS0wOs
|
||||
Q48Moc5UxKMLaMMCqLPpWu0TZse+kIw1nTWXk7yJtK0HK5PN3rTocEw=
|
||||
-----END CERTIFICATE-----
|
|
@ -1,27 +0,0 @@
|
|||
-----BEGIN RSA PRIVATE KEY-----
|
||||
MIIEowIBAAKCAQEAx/TadaAbp0CjLeTP51Npw6lR6I18CisMoivQ0nhopCsAKx9k
|
||||
vTVGTbuUQ4OOrtIlpk/lc/raEIZCZ32rTlbJFn9m7COeJ+Xjr5qFjig12JtjLHL4
|
||||
5LjYmEW6s4FLj/WUsfJN7skbPvsL5sCKFPj1dRGBXI5NT9o1BlLymlJZiA7I7NcD
|
||||
jCGyjuiNFF9tBTSpRlmnKjEfw8dpbZhik9Sms2N9+ZaK9G3H0LR+eNqxkuzw8+iT
|
||||
ETYxU9VRXJp640RJX+Ek22joiHzozFWf/TvB6yxSLcr5G9NTLeztI4QFx7hNZf/G
|
||||
NDdzAmJBCxvZkANAWAhR2NBS7iaxqFNN2/ClowIDAQABAoIBAQC1/STX6eFBWJMs
|
||||
MhUHdePNMU5bWmqK1qOo9jgZV33l7T06Alit3M8f8JoA2LwEYT/jHtS3upi+cXP+
|
||||
vWIs6tAaqdoDEmff6FxSd1EXEYHwo3yf+ASQJ6z66nwC5KrhW6L6Uo6bxm4F5Hfw
|
||||
jU0fyXeeFVCn7Nxw0SlxmA02Z70VFsL8BK9i3kajU18y6drf4VUm55oMEtdEmOh2
|
||||
eKn4qspBcNblbw+L0QJ+5kN1iRUyJHesQ1GpS+L3yeMVFCW7ctL4Bgw8Z7LE+z7i
|
||||
C0Weyhul8vuT+7nfF2T37zsSa8iixqpkTokeYh96CZ5nDqa2IDx3oNHWSlkIsV6g
|
||||
6EUEl9gBAoGBAPIw/M6fIDetMj8f1wG7mIRgJsxI817IS6aBSwB5HkoCJFfrR9Ua
|
||||
jMNCFIWNs/Om8xeGhq/91hbnCYDNK06V5CUa/uk4CYRs2eQZ3FKoNowtp6u/ieuU
|
||||
qg8bXM/vR2VWtWVixAMdouT3+KtvlgaVmSnrPiwO4pecGrwu5NW1oJCFAoGBANNb
|
||||
aE3AcwTDYsqh0N/75G56Q5s1GZ6MCDQGQSh8IkxL6Vg59KnJiIKQ7AxNKFgJZMtY
|
||||
zZHaqjazeHjOGTiYiC7MMVJtCcOBEfjCouIG8btNYv7Y3dWnOXRZni2telAsRrH9
|
||||
xS5LaFdCRTjVAwSsppMGwiQtyl6sGLMyz0SXoYoHAoGAKdkFFb6xFm26zOV3hTkg
|
||||
9V6X1ZyVUL9TMwYMK5zB+w+7r+VbmBrqT6LPYPRHL8adImeARlCZ+YMaRUMuRHnp
|
||||
3e94NFwWaOdWDu/Y/f9KzZXl7us9rZMWf12+/77cm0oMNeSG8fLg/qdKNHUneyPG
|
||||
P1QCfiJkTMYQaIvBxpuHjvECgYAKlZ9JlYOtD2PZJfVh4il0ZucP1L7ts7GNeWq1
|
||||
7lGBZKPQ6UYZYqBVeZB4pTyJ/B5yGIZi8YJoruAvnJKixPC89zjZGeDNS59sx8KE
|
||||
cziT2rJEdPPXCULVUs+bFf70GOOJcl33jYsyI3139SLrjwHghwwd57UkvJWYE8lR
|
||||
dA6A7QKBgEfTC+NlzqLPhbB+HPl6CvcUczcXcI9M0heVz/DNMA+4pjxPnv2aeIwh
|
||||
cL2wq2xr+g1wDBWGVGkVSuZhXm5E6gDetdyVeJnbIUhVjBblnbhHV6GrudjbXGnJ
|
||||
W9cBgu6DswyHU2cOsqmimu8zLmG6/dQYFHt+kUWGxN8opCzVjgWa
|
||||
-----END RSA PRIVATE KEY-----
|
|
@ -1,139 +0,0 @@
|
|||
# 采用gpt-sovits方案,bert-sovits适合长音频训练,gpt-sovits运行短音频快速推理
|
||||
## 部署tts推理
|
||||
git clone https://github.com/X-T-E-R/GPT-SoVITS-Inference.git
|
||||
|
||||
## 1. 安装依赖库
|
||||
```
|
||||
conda create -n GPTSoVits python=3.9
|
||||
conda activate GPTSoVits
|
||||
bash install.sh
|
||||
```
|
||||
从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型,并将它们放置在 `GPT_SoVITS\pretrained_models` 中
|
||||
|
||||
注意
|
||||
```
|
||||
是将 GPT-SoVITS 的模型文件放入 pretrained_models目录中
|
||||
```
|
||||
如下
|
||||
```
|
||||
pretrained_models/
|
||||
--chinese-hubert-base
|
||||
--chinese-roberta-wwm-ext-large
|
||||
s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||
s2D488k.pth
|
||||
s2G488k.pth
|
||||
```
|
||||
|
||||
## 2. Model Folder Format
|
||||
模型文件下载地址 https://www.yuque.com/xter/zibxlp/gsximn7ditzgispg
|
||||
下载的模型文件放到trained目录下, 如 `trained/Character1/`
|
||||
Put the pth / ckpt / wav files in it, the wav should be named as the prompt text
|
||||
Like :
|
||||
|
||||
```
|
||||
trained
|
||||
--hutao
|
||||
----hutao-e75.ckpt
|
||||
----hutao_e60_s3360.pth
|
||||
----hutao said something.wav
|
||||
```
|
||||
|
||||
## 3. 启动
|
||||
### 3.1 启动webui界面
|
||||
python webuis/character_manager/webui.py
|
||||
可以设置上传的模型数据
|
||||
### 3.2 启动api服务:
|
||||
python app.py
|
||||
|
||||
如果有错误提示找不到cmudict,从这下载https://github.com/nltk/nltk_data,将packages改名为nltk_data放到home目录下
|
||||
### 3.3 tts测试
|
||||
访问 http://127.0.0.1:5000 地址即可测试
|
||||
|
||||
### 3.4 api测试
|
||||
访问 http://127.0.0.1:5000/character_list 查看是否正常
|
||||
|
||||
## 4. 接口说明
|
||||
### 4.1 Character and Emotion List
|
||||
To obtain the supported characters and their corresponding emotions, please visit the following URL:
|
||||
- URL: `http://127.0.0.1:5000/character_list`
|
||||
- Returns: A JSON format list of characters and corresponding emotions
|
||||
- Method: `GET`
|
||||
|
||||
```
|
||||
{
|
||||
"Hanabi": [
|
||||
"default",
|
||||
"Normal",
|
||||
"Yandere",
|
||||
],
|
||||
"Hutao": [
|
||||
"default"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 4.2 Text-to-Speech
|
||||
|
||||
- URL: `http://127.0.0.1:5000/tts`
|
||||
- Returns: Audio on success. Error message on failure.
|
||||
- Method: `GET`/`POST`
|
||||
```
|
||||
{
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"character": "${chaName}",
|
||||
"emotion": "${Emotion}",
|
||||
"text": "${speakText}",
|
||||
"text_language": "${textLanguage}",
|
||||
"batch_size": ${batch_size},
|
||||
"speed": ${speed},
|
||||
"top_k": ${topK},
|
||||
"top_p": ${topP},
|
||||
"temperature": ${temperature},
|
||||
"stream": "${stream}",
|
||||
"format": "${Format}",
|
||||
"save_temp": "${saveTemp}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
##### Parameter Explanation
|
||||
|
||||
- **text**: The text to be converted, URL encoding is recommended.
|
||||
- **character**: Character folder name, pay attention to case sensitivity, full/half width, and language.
|
||||
- **emotion**: Character emotion, must be an actually supported emotion of the character, otherwise, the default emotion will be used.
|
||||
- **text_language**: Text language (auto / zh / en / ja), default is multilingual mixed.
|
||||
- **top_k**, **top_p**, **temperature**: GPT model parameters, no need to modify if unfamiliar.
|
||||
|
||||
- **batch_size**: How many batches at a time, can be increased for faster processing if you have a powerful computer, integer, default is 1.
|
||||
- **speed**: Speech speed, default is 1.0.
|
||||
- **save_temp**: Whether to save temporary files, when true, the backend will save the generated audio, and subsequent identical requests will directly return that data, default is false.
|
||||
- **stream**: Whether to stream, when true, audio will be returned sentence by sentence, default is false.
|
||||
- **format**: Format, default is WAV, allows MP3/ WAV/ OGG.
|
||||
|
||||
## 部署tts训练
|
||||
https://github.com/RVC-Boss/GPT-SoVITS
|
||||
根据文档说明部署,将训练后的模型拷到推理服务的trained目录下
|
||||
|
||||
## 如果你需要使用autodl 进行部署
|
||||
请使用 https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS 作为基础镜像你能快速进行部署
|
||||
### 下载
|
||||
```
|
||||
https://github.com/X-T-E-R/GPT-SoVITS-Inference
|
||||
```
|
||||
### 安装
|
||||
```
|
||||
cd GPT-SoVITS-Inference
|
||||
pip3 install -r requirements.txt
|
||||
cp -r GPT_SoVITS/pretrained_models/ ./GPT_SoVITS/pretrained_models
|
||||
```
|
||||
|
||||
### 启动api
|
||||
```
|
||||
python3 app.py
|
||||
```
|
||||
|
||||
### 启动webui
|
||||
```
|
||||
python3 webuis/character_manager/webui.py
|
||||
```
|
103
tts/README.md
103
tts/README.md
|
@ -1,103 +0,0 @@
|
|||
# 采用gpt-sovits方案,bert-sovits适合长音频训练,gpt-sovits运行短音频快速推理
|
||||
## 部署tts推理
|
||||
git clone https://github.com/RVC-Boss/GPT-SoVITS.git
|
||||
git checkout fast_inference_
|
||||
## 1. 安装依赖库
|
||||
```
|
||||
conda create -n GPTSoVits python=3.9
|
||||
conda activate GPTSoVits
|
||||
bash install.sh
|
||||
```
|
||||
从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型,并将它们放置在 `GPT_SoVITS/GPT_SoVITS/pretrained_models` 中
|
||||
|
||||
注意
|
||||
```
|
||||
是将 GPT-SoVITS 的模型文件放入 pretrained_models目录中
|
||||
```
|
||||
如下
|
||||
```
|
||||
pretrained_models/
|
||||
--chinese-hubert-base
|
||||
--chinese-roberta-wwm-ext-large
|
||||
s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
|
||||
s2D488k.pth
|
||||
s2G488k.pth
|
||||
```
|
||||
|
||||
## 3. 启动
|
||||
### 3.1 启动webui界面(测试效果用)
|
||||
python GPT_SoVITS/inference_webui.py
|
||||
|
||||
### 3.2 启动api服务:
|
||||
python api_v3.py
|
||||
|
||||
|
||||
## 4. 接口说明
|
||||
|
||||
### 4.1 Text-to-Speech
|
||||
|
||||
endpoint: `/tts`
|
||||
GET:
|
||||
```
|
||||
http://127.0.0.1:9880/tts?text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_lang=zh&ref_audio_path=archive_jingyuan_1.wav&prompt_lang=zh&prompt_text=我是「罗浮」云骑将军景元。不必拘谨,「将军」只是一时的身份,你称呼我景元便可&text_split_method=cut5&batch_size=1&media_type=wav&streaming_mode=true
|
||||
```
|
||||
|
||||
POST:
|
||||
```json
|
||||
{
|
||||
"text": "", # str.(required) text to be synthesized
|
||||
"text_lang": "", # str.(required) language of the text to be synthesized
|
||||
"ref_audio_path": "", # str.(required) reference audio path.
|
||||
"prompt_text": "", # str.(optional) prompt text for the reference audio
|
||||
"prompt_lang": "", # str.(required) language of the prompt text for the reference audio
|
||||
"top_k": 5, # int.(optional) top k sampling
|
||||
"top_p": 1, # float.(optional) top p sampling
|
||||
"temperature": 1, # float.(optional) temperature for sampling
|
||||
"text_split_method": "cut5", # str.(optional) text split method, see text_segmentation_method.py for details.
|
||||
"batch_size": 1, # int.(optional) batch size for inference
|
||||
"batch_threshold": 0.75, # float.(optional) threshold for batch splitting.
|
||||
"split_bucket": true, # bool.(optional) whether to split the batch into multiple buckets.
|
||||
"speed_factor":1.0, # float.(optional) control the speed of the synthesized audio.
|
||||
"fragment_interval":0.3, # float.(optional) to control the interval of the audio fragment.
|
||||
"seed": -1, # int.(optional) random seed for reproducibility.
|
||||
"media_type": "wav", # str.(optional) media type of the output audio, support "wav", "raw", "ogg", "aac".
|
||||
"streaming_mode": false, # bool.(optional) whether to return a streaming response.
|
||||
"parallel_infer": True, # bool.(optional) whether to use parallel inference.
|
||||
"repetition_penalty": 1.35, # float.(optional) repetition penalty for T2S model.
|
||||
"tts_infer_yaml_path": “GPT_SoVITS/configs/tts_infer.yaml” # str.(optional) tts infer yaml path
|
||||
}
|
||||
```
|
||||
|
||||
## 部署tts训练
|
||||
https://github.com/RVC-Boss/GPT-SoVITS
|
||||
切换自己训练的模型
|
||||
### 切换GPT模型
|
||||
|
||||
endpoint: `/set_gpt_weights`
|
||||
|
||||
GET:
|
||||
```
|
||||
http://127.0.0.1:9880/set_gpt_weights?weights_path=GPT_SoVITS/pretrained_models/xxx.ckpt
|
||||
```
|
||||
RESP:
|
||||
成功: 返回"success", http code 200
|
||||
失败: 返回包含错误信息的 json, http code 400
|
||||
|
||||
|
||||
### 切换Sovits模型
|
||||
|
||||
endpoint: `/set_sovits_weights`
|
||||
|
||||
GET:
|
||||
```
|
||||
http://127.0.0.1:9880/set_sovits_weights?weights_path=GPT_SoVITS/pretrained_models/xxx.pth
|
||||
```
|
||||
|
||||
RESP:
|
||||
成功: 返回"success", http code 200
|
||||
失败: 返回包含错误信息的 json, http code 400
|
||||
|
||||
"""
|
||||
|
||||
## 如果你需要使用autodl 进行部署
|
||||
请使用 https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS 作为基础镜像你能快速进行部署
|
|
@ -1,28 +0,0 @@
|
|||
import requests
|
||||
import pyaudio
|
||||
|
||||
# 流式传输音频的URL,你可以自由改成Post
|
||||
stream_url = 'http://127.0.0.1:5000/tts?text=这是一段测试文本,旨在通过多种语言风格和复杂性的内容来全面检验文本到语音系统的性能。接下来,我们会探索各种主题和语言结构,包括文学引用、技术性描述、日常会话以及诗歌等。首先,让我们从一段简单的描述性文本开始:“在一个阳光明媚的下午,一位年轻的旅者站在山顶上,眺望着下方那宽广而繁忙的城市。他的心中充满了对未来的憧憬和对旅途的期待。”这段文本测试了系统对自然景观描写的处理能力和情感表达的细腻程度。&stream=true'
|
||||
|
||||
# 初始化pyaudio
|
||||
p = pyaudio.PyAudio()
|
||||
|
||||
# 打开音频流
|
||||
stream = p.open(format=p.get_format_from_width(2),
|
||||
channels=1,
|
||||
rate=32000,
|
||||
output=True)
|
||||
|
||||
# 使用requests获取音频流,你可以自由改成Post
|
||||
response = requests.get(stream_url, stream=True)
|
||||
|
||||
# 读取数据块并播放
|
||||
for data in response.iter_content(chunk_size=1024):
|
||||
stream.write(data)
|
||||
|
||||
# 停止和关闭流
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
|
||||
# 终止pyaudio
|
||||
p.terminate()
|
Loading…
Reference in New Issue