remove unuse code

This commit is contained in:
lipku 2024-09-20 21:25:07 +08:00
parent a8b40fa813
commit 00dbc71db9
21 changed files with 0 additions and 3408 deletions

View File

@ -1,57 +0,0 @@
# autodl 使用教程
## autodl 镜像地址
https://www.codewithgpu.com/i/lipku/metahuman-stream/base
## 代码目录
/root/metahuman-stream/
## 依赖安装
```
cd metahuman-stream
git pull
pip install -r requirements.txt
```
## 开始运行
建议直接使用rtcpush 普通webrtc模式存在显示不了的情况
### 在任意公网ip启动 srs服务
```
export CANDIDATE='<服务器外网ip>'
docker run --rm --env CANDIDATE=$CANDIDATE \
-p 1935:1935 -p 8080:8080 -p 1985:1985 -p 8000:8000/udp \
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5 \
objs/srs -c conf/rtc.conf
```
### 推流到 srs 服务器
```
python app.py --listenport 6006 --transport rtcpush --push_url 'http://<阿里云服务外网ip>:1985/rtc/v1/whip/?app=live&stream=livestream'
```
### 访问
访问的是静态的rtcpushapi.html
http://<autodl外网地址>/rtcpushapi.html
你需要修改 项目目录中的 web/rtcpushapi.html
```
var url = "http://"+host+":1985/rtc/v1/whep/?app=live&stream=livestream"
```
替换成
```
var url = "http://公网ip:1985/rtc/v1/whep/?app=live&stream=livestream"
```
调整如下:
<img src="./img/20240530112922.jpg">
成功则如下图
![img.png](./img/success.png)
## 注意事项
1. autodl 如果是个人用户需要使用官方的ssh代理工具进行端口代理才可以访问6006
2. 声音延迟需要后台优化srs的功能
3. musetalk 暂不支持rtmp推流 但是支持rtcpush
4. musetalk 教程即将更新

Binary file not shown.

Before

Width:  |  Height:  |  Size: 389 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

View File

@ -1,315 +0,0 @@
import asyncio
import json
import websockets
import time
import logging
import tracemalloc
import numpy as np
import argparse
import ssl
parser = argparse.ArgumentParser()
parser.add_argument("--host",
type=str,
default="0.0.0.0",
required=False,
help="host ip, localhost, 0.0.0.0")
parser.add_argument("--port",
type=int,
default=10095,
required=False,
help="grpc server port")
parser.add_argument("--asr_model",
type=str,
default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
help="model from modelscope")
parser.add_argument("--asr_model_revision",
type=str,
default="v2.0.4",
help="")
parser.add_argument("--asr_model_online",
type=str,
default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
help="model from modelscope")
parser.add_argument("--asr_model_online_revision",
type=str,
default="v2.0.4",
help="")
parser.add_argument("--vad_model",
type=str,
default="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
help="model from modelscope")
parser.add_argument("--vad_model_revision",
type=str,
default="v2.0.4",
help="")
parser.add_argument("--punc_model",
type=str,
default="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
help="model from modelscope")
parser.add_argument("--punc_model_revision",
type=str,
default="v2.0.4",
help="")
parser.add_argument("--ngpu",
type=int,
default=1,
help="0 for cpu, 1 for gpu")
parser.add_argument("--device",
type=str,
default="cuda",
help="cuda, cpu")
parser.add_argument("--ncpu",
type=int,
default=4,
help="cpu cores")
parser.add_argument("--certfile",
type=str,
default="ssl_key/server.crt",
required=False,
help="certfile for ssl")
parser.add_argument("--keyfile",
type=str,
default="ssl_key/server.key",
required=False,
help="keyfile for ssl")
args = parser.parse_args()
websocket_users = set()
print("model loading")
from funasr import AutoModel
# asr
model_asr = AutoModel(model=args.asr_model,
model_revision=args.asr_model_revision,
ngpu=args.ngpu,
ncpu=args.ncpu,
device=args.device,
disable_pbar=True,
disable_log=True,
)
# asr
model_asr_streaming = AutoModel(model=args.asr_model_online,
model_revision=args.asr_model_online_revision,
ngpu=args.ngpu,
ncpu=args.ncpu,
device=args.device,
disable_pbar=True,
disable_log=True,
)
# vad
model_vad = AutoModel(model=args.vad_model,
model_revision=args.vad_model_revision,
ngpu=args.ngpu,
ncpu=args.ncpu,
device=args.device,
disable_pbar=True,
disable_log=True,
# chunk_size=60,
)
if args.punc_model != "":
model_punc = AutoModel(model=args.punc_model,
model_revision=args.punc_model_revision,
ngpu=args.ngpu,
ncpu=args.ncpu,
device=args.device,
disable_pbar=True,
disable_log=True,
)
else:
model_punc = None
print("model loaded! only support one client at the same time now!!!!")
async def ws_reset(websocket):
print("ws reset now, total num is ",len(websocket_users))
websocket.status_dict_asr_online["cache"] = {}
websocket.status_dict_asr_online["is_final"] = True
websocket.status_dict_vad["cache"] = {}
websocket.status_dict_vad["is_final"] = True
websocket.status_dict_punc["cache"] = {}
await websocket.close()
async def clear_websocket():
for websocket in websocket_users:
await ws_reset(websocket)
websocket_users.clear()
async def ws_serve(websocket, path):
frames = []
frames_asr = []
frames_asr_online = []
global websocket_users
# await clear_websocket()
websocket_users.add(websocket)
websocket.status_dict_asr = {}
websocket.status_dict_asr_online = {"cache": {}, "is_final": False}
websocket.status_dict_vad = {'cache': {}, "is_final": False}
websocket.status_dict_punc = {'cache': {}}
websocket.chunk_interval = 10
websocket.vad_pre_idx = 0
speech_start = False
speech_end_i = -1
websocket.wav_name = "microphone"
websocket.mode = "2pass"
print("new user connected", flush=True)
try:
async for message in websocket:
if isinstance(message, str):
messagejson = json.loads(message)
if "is_speaking" in messagejson:
websocket.is_speaking = messagejson["is_speaking"]
websocket.status_dict_asr_online["is_final"] = not websocket.is_speaking
if "chunk_interval" in messagejson:
websocket.chunk_interval = messagejson["chunk_interval"]
if "wav_name" in messagejson:
websocket.wav_name = messagejson.get("wav_name")
if "chunk_size" in messagejson:
chunk_size = messagejson["chunk_size"]
if isinstance(chunk_size, str):
chunk_size = chunk_size.split(',')
websocket.status_dict_asr_online["chunk_size"] = [int(x) for x in chunk_size]
if "encoder_chunk_look_back" in messagejson:
websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
if "decoder_chunk_look_back" in messagejson:
websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
if "hotword" in messagejson:
websocket.status_dict_asr["hotword"] = messagejson["hotword"]
if "mode" in messagejson:
websocket.mode = messagejson["mode"]
websocket.status_dict_vad["chunk_size"] = int(websocket.status_dict_asr_online["chunk_size"][1]*60/websocket.chunk_interval)
if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
if not isinstance(message, str):
frames.append(message)
duration_ms = len(message)//32
websocket.vad_pre_idx += duration_ms
# asr online
frames_asr_online.append(message)
websocket.status_dict_asr_online["is_final"] = speech_end_i != -1
if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.status_dict_asr_online["is_final"]:
if websocket.mode == "2pass" or websocket.mode == "online":
audio_in = b"".join(frames_asr_online)
try:
await async_asr_online(websocket, audio_in)
except:
print(f"error in asr streaming, {websocket.status_dict_asr_online}")
frames_asr_online = []
if speech_start:
frames_asr.append(message)
# vad online
try:
speech_start_i, speech_end_i = await async_vad(websocket, message)
except:
print("error in vad")
if speech_start_i != -1:
speech_start = True
beg_bias = (websocket.vad_pre_idx-speech_start_i)//duration_ms
frames_pre = frames[-beg_bias:]
frames_asr = []
frames_asr.extend(frames_pre)
# asr punc offline
if speech_end_i != -1 or not websocket.is_speaking:
# print("vad end point")
if websocket.mode == "2pass" or websocket.mode == "offline":
audio_in = b"".join(frames_asr)
try:
await async_asr(websocket, audio_in)
except:
print("error in asr offline")
frames_asr = []
speech_start = False
frames_asr_online = []
websocket.status_dict_asr_online["cache"] = {}
if not websocket.is_speaking:
websocket.vad_pre_idx = 0
frames = []
websocket.status_dict_vad["cache"] = {}
else:
frames = frames[-20:]
except websockets.ConnectionClosed:
print("ConnectionClosed...", websocket_users,flush=True)
await ws_reset(websocket)
websocket_users.remove(websocket)
except websockets.InvalidState:
print("InvalidState...")
except Exception as e:
print("Exception:", e)
async def async_vad(websocket, audio_in):
segments_result = model_vad.generate(input=audio_in, **websocket.status_dict_vad)[0]["value"]
# print(segments_result)
speech_start = -1
speech_end = -1
if len(segments_result) == 0 or len(segments_result) > 1:
return speech_start, speech_end
if segments_result[0][0] != -1:
speech_start = segments_result[0][0]
if segments_result[0][1] != -1:
speech_end = segments_result[0][1]
return speech_start, speech_end
async def async_asr(websocket, audio_in):
if len(audio_in) > 0:
# print(len(audio_in))
rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0]
# print("offline_asr, ", rec_result)
if model_punc is not None and len(rec_result["text"])>0:
# print("offline, before punc", rec_result, "cache", websocket.status_dict_punc)
rec_result = model_punc.generate(input=rec_result['text'], **websocket.status_dict_punc)[0]
# print("offline, after punc", rec_result)
if len(rec_result["text"])>0:
# print("offline", rec_result)
mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
await websocket.send(message)
async def async_asr_online(websocket, audio_in):
if len(audio_in) > 0:
# print(websocket.status_dict_asr_online.get("is_final", False))
rec_result = model_asr_streaming.generate(input=audio_in, **websocket.status_dict_asr_online)[0]
# print("online, ", rec_result)
if websocket.mode == "2pass" and websocket.status_dict_asr_online.get("is_final", False):
return
# websocket.status_dict_asr_online["cache"] = dict()
if len(rec_result["text"]):
mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
await websocket.send(message)
if len(args.certfile)>0:
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
# Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
ssl_cert = args.certfile
ssl_key = args.keyfile
ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
else:
start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
asyncio.get_event_loop().run_until_complete(start_server)
asyncio.get_event_loop().run_forever()

View File

@ -1 +0,0 @@
在浏览器中打开 samples/html/static/index.html输入ASR服务器地址支持麦克风输入也支持文件输入

View File

@ -1,132 +0,0 @@
<!-- index.html -->
<html>
<head>
<script type="text/javascript" src="mpegts-1.7.3.min.js"></script>
<script type="text/javascript" src="http://cdn.sockjs.org/sockjs-0.3.4.js"></script>
<script src="http://code.jquery.com/jquery-2.1.1.min.js"></script>
<script src="recorder-core.js" charset="UTF-8"></script>
<script src="wav.js" charset="UTF-8"></script>
<script src="pcm.js" charset="UTF-8"></script>
</head>
<body>
<div class="container">
<h1>metahuman voice test</h1>
<form class="form-inline" id="echo-form" name="ssbtn">
<div class="form-group">
<p>input text</p>
<textarea cols="2" rows="3" style="width:600px;height:50px;" class="form-control" id="message"></textarea>
</div>
<button type="submit" class="btn btn-default">Send</button>
</form>
<div id="log">
</div>
<video id="video_player" width="40%" controls autoplay muted></video>
</div>
<div>------------------------------------------------------------------------------------------------------------------------------</div>
<div class="div_class_topArea">
<div class="div_class_recordControl">
asr服务器地址(必填):
<br>
<input id="wssip" type="text" onchange="addresschange()" style="width: 500px;" value="wss://127.0.0.1:10095/"/>
<br>
<a id="wsslink" style="display: none;" href="#" onclick="window.open('https://127.0.0.1:10095/', '_blank')"><div id="info_wslink">点此处手工授权wss://127.0.0.1:10095/</div></a>
<br>
<br>
<div style="border:2px solid #ccc;display: none;">
选择录音模式:<br/>
<label ><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="mic" checked="true"/>麦克风 </label>&nbsp;&nbsp;
<label><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="file" />文件 </label>
</div>
<div id="mic_mode_div" style="border:2px solid #ccc;display:none;">
选择asr模型模式:<br/>
<label><input name="asr_mode" type="radio" value="2pass" />2pass </label>&nbsp;&nbsp;
<label><input name="asr_mode" type="radio" value="online" checked="true"/>online </label>&nbsp;&nbsp;
<label><input name="asr_mode" type="radio" value="2pass-offline" />2pass-offline </label>&nbsp;&nbsp;
<label><input name="asr_mode" type="radio" value="offline" />offline </label>
</div>
<div id="rec_mode_div" style="border:2px solid #ccc;display:none;">
<input type="file" id="upfile">
</div>
<div style="border:2px solid #ccc;display: none;">
热词设置(一行一个关键字,空格隔开权重,如"阿里巴巴 20")
<textarea rows="1" id="varHot" style=" width: 100%;height:auto" >阿里巴巴 20&#13;hello world 40</textarea>
</div>
<div style="display: none;">语音识别结果显示:</div>
<br>
<textarea rows="10" id="varArea" readonly="true" style=" width: 100%;height:auto;display: none;" ></textarea>
<br>
<div id="info_div">请点击开始</div>
<div class="div_class_buttons">
<button id="btnConnect">连接</button>
<button id="btnStart">开始</button>
<button id="btnStop">停止</button>
</div>
<audio id="audio_record" type="audio/wav" controls style="margin-top: 2px; width: 100%;display: none;"></audio>
</div>
</div>
<script src="wsconnecter.js" charset="utf-8"></script>
<script src="main.js" charset="utf-8"></script>
</body>
<script type="text/javascript" charset="utf-8">
// $(document).ready(function() {
// var host = window.location.hostname
// var ws = new WebSocket("ws://"+host+":8000/humanecho");
// //document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
// ws.onopen = function() {
// console.log('Connected');
// };
// ws.onmessage = function(e) {
// console.log('Received: ' + e.data);
// data = e
// var vid = JSON.parse(data.data);
// console.log(typeof(vid),vid)
// //document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
// };
// ws.onclose = function(e) {
// console.log('Closed');
// };
// flvPlayer = mpegts.createPlayer({type: 'flv', url: "http://"+host+":8080/live/livestream.flv", isLive: true, enableStashBuffer: false});
// flvPlayer.attachMediaElement(document.getElementById('video_player'));
// flvPlayer.load();
// flvPlayer.play();
// $('#echo-form').on('submit', function(e) {
// e.preventDefault();
// var message = $('#message').val();
// console.log('Sending: ' + message);
// ws.send(message);
// $('#message').val('');
// });
// });
</script>
</html>

View File

@ -1,87 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>语音识别</title>
</head>
<body style="margin-left: 3%">
<script src="recorder-core.js" charset="UTF-8"></script>
<script src="wav.js" charset="UTF-8"></script>
<script src="pcm.js" charset="UTF-8"></script>
<h1>FunASR Demo</h1>
<h3>这里是FunASR开源项目体验demo集成了VAD、ASR与标点等工业级别的模型支持长音频离线文件转写实时语音识别等开源项目地址https://github.com/alibaba-damo-academy/FunASR</h3>
<div class="div_class_topArea">
<div class="div_class_recordControl">
asr服务器地址(必填):
<br>
<input id="wssip" type="text" onchange="addresschange()" style=" width: 100%;height:100%" value="wss://127.0.0.1:10095/"/>
<br>
<a id="wsslink" href="#" onclick="window.open('https://127.0.0.1:10095/', '_blank')"><div id="info_wslink">点此处手工授权wss://127.0.0.1:10095/</div></a>
<br>
<br>
<div style="border:2px solid #ccc;">
选择录音模式:<br/>
<label><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="mic" checked="true"/>麦克风 </label>&nbsp;&nbsp;
<label><input name="recoder_mode" onclick="on_recoder_mode_change()" type="radio" value="file" />文件 </label>
</div>
<br>
<div id="mic_mode_div" style="border:2px solid #ccc;display:block;">
选择asr模型模式:<br/>
<label><input name="asr_mode" type="radio" value="2pass" checked="true"/>2pass </label>&nbsp;&nbsp;
<label><input name="asr_mode" type="radio" value="online" />online </label>&nbsp;&nbsp;
<label><input name="asr_mode" type="radio" value="offline" />offline </label>
</div>
<div id="rec_mode_div" style="border:2px solid #ccc;display:none;">
<input type="file" id="upfile">
</div>
<br>
<div style="border:2px solid #ccc;">
热词设置(一行一个关键字,空格隔开权重,如"阿里巴巴 20")
<br>
<textarea rows="3" id="varHot" style=" width: 100%;height:100%" >阿里巴巴 20&#13;hello world 40</textarea>
<br>
</div>
语音识别结果显示:
<br>
<textarea rows="10" id="varArea" readonly="true" style=" width: 100%;height:100%" ></textarea>
<br>
<div id="info_div">请点击开始</div>
<div class="div_class_buttons">
<button id="btnConnect">连接</button>
<button id="btnStart">开始</button>
<button id="btnStop">停止</button>
</div>
<audio id="audio_record" type="audio/wav" controls style="margin-top: 12px; width: 100%;"></audio>
</div>
</div>
<script src="wsconnecter.js" charset="utf-8"></script>
<script src="main.js" charset="utf-8"></script>
</body>
</html>

View File

@ -1,637 +0,0 @@
/**
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
* Reserved. MIT License (https://opensource.org/licenses/MIT)
*/
/* 2022-2023 by zhaoming,mali aihealthx.com */
// 连接; 定义socket连接类对象与语音对象
var wsconnecter = new WebSocketConnectMethod({msgHandle:getJsonMessage,stateHandle:getConnState});
var audioBlob;
// 录音; 定义录音对象,wav格式
var rec = Recorder({
type:"pcm",
bitRate:16,
sampleRate:16000,
onProcess:recProcess
});
var sampleBuf=new Int16Array();
// 定义按钮响应事件
var btnStart = document.getElementById('btnStart');
btnStart.onclick = record;
var btnStop = document.getElementById('btnStop');
btnStop.onclick = stop;
btnStop.disabled = true;
btnStart.disabled = true;
btnConnect= document.getElementById('btnConnect');
btnConnect.onclick = start;
var awsslink= document.getElementById('wsslink');
var rec_text=""; // for online rec asr result
var offline_text=""; // for offline rec asr result
var info_div = document.getElementById('info_div');
var upfile = document.getElementById('upfile');
var isfilemode=false; // if it is in file mode
var file_ext="";
var file_sample_rate=16000; //for wav file sample rate
var file_data_array; // array to save file data
var totalsend=0;
var startTime = Date.now();
var now_ipaddress=window.location.href;
now_ipaddress=now_ipaddress.replace("https://","wss://");
now_ipaddress=now_ipaddress.replace("static/index.html","");
// var localport=window.location.port;
// now_ipaddress=now_ipaddress.replace(localport,"10095");
// document.getElementById('wssip').value=now_ipaddress;
// addresschange();
function addresschange()
{
var Uri = document.getElementById('wssip').value;
document.getElementById('info_wslink').innerHTML="点此处手工授权IOS手机";
Uri=Uri.replace(/wss/g,"https");
console.log("addresschange uri=",Uri);
awsslink.onclick=function(){
window.open(Uri, '_blank');
}
}
upfile.onclick=function()
{
btnStart.disabled = true;
btnStop.disabled = true;
btnConnect.disabled=false;
}
// from https://github.com/xiangyuecn/Recorder/tree/master
var readWavInfo=function(bytes){
//读取wav文件头统一成44字节的头
if(bytes.byteLength<44){
return null;
};
var wavView=bytes;
var eq=function(p,s){
for(var i=0;i<s.length;i++){
if(wavView[p+i]!=s.charCodeAt(i)){
return false;
};
};
return true;
};
if(eq(0,"RIFF")&&eq(8,"WAVEfmt ")){
var numCh=wavView[22];
if(wavView[20]==1 && (numCh==1||numCh==2)){//raw pcm 单或双声道
var sampleRate=wavView[24]+(wavView[25]<<8)+(wavView[26]<<16)+(wavView[27]<<24);
var bitRate=wavView[34]+(wavView[35]<<8);
var heads=[wavView.subarray(0,12)],headSize=12;//head只保留必要的块
//搜索data块的位置
var dataPos=0; // 44 或有更多块
for(var i=12,iL=wavView.length-8;i<iL;){
if(wavView[i]==100&&wavView[i+1]==97&&wavView[i+2]==116&&wavView[i+3]==97){//eq(i,"data")
heads.push(wavView.subarray(i,i+8));
headSize+=8;
dataPos=i+8;break;
}
var i0=i;
i+=4;
i+=4+wavView[i]+(wavView[i+1]<<8)+(wavView[i+2]<<16)+(wavView[i+3]<<24);
if(i0==12){//fmt
heads.push(wavView.subarray(i0,i));
headSize+=i-i0;
}
}
if(dataPos){
var wavHead=new Uint8Array(headSize);
for(var i=0,n=0;i<heads.length;i++){
wavHead.set(heads[i],n);n+=heads[i].length;
}
return {
sampleRate:sampleRate
,bitRate:bitRate
,numChannels:numCh
,wavHead44:wavHead
,dataPos:dataPos
};
};
};
};
return null;
};
upfile.onchange = function () {
      var len = this.files.length;
for(let i = 0; i < len; i++) {
let fileAudio = new FileReader();
fileAudio.readAsArrayBuffer(this.files[i]);
file_ext=this.files[i].name.split('.').pop().toLowerCase();
var audioblob;
fileAudio.onload = function() {
audioblob = fileAudio.result;
file_data_array=audioblob;
info_div.innerHTML='请点击连接进行识别';
}
          fileAudio.onerror = function(e) {
            console.log('error' + e);
          }
}
// for wav file, we get the sample rate
if(file_ext=="wav")
for(let i = 0; i < len; i++) {
let fileAudio = new FileReader();
fileAudio.readAsArrayBuffer(this.files[i]);
fileAudio.onload = function() {
audioblob = new Uint8Array(fileAudio.result);
// for wav file, we can get the sample rate
var info=readWavInfo(audioblob);
console.log(info);
file_sample_rate=info.sampleRate;
}
      
}
}
function play_file()
{
var audioblob=new Blob( [ new Uint8Array(file_data_array)] , {type :"audio/wav"});
var audio_record = document.getElementById('audio_record');
audio_record.src = (window.URL||webkitURL).createObjectURL(audioblob);
audio_record.controls=true;
//audio_record.play(); //not auto play
}
function start_file_send()
{
sampleBuf=new Uint8Array( file_data_array );
var chunk_size=960; // for asr chunk_size [5, 10, 5]
while(sampleBuf.length>=chunk_size){
sendBuf=sampleBuf.slice(0,chunk_size);
totalsend=totalsend+sampleBuf.length;
sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length);
wsconnecter.wsSend(sendBuf);
}
stop();
}
function on_recoder_mode_change()
{
var item = null;
var obj = document.getElementsByName("recoder_mode");
for (var i = 0; i < obj.length; i++) { //遍历Radio
if (obj[i].checked) {
item = obj[i].value;
break;
}
}
if(item=="mic")
{
document.getElementById("mic_mode_div").style.display = 'block';
document.getElementById("rec_mode_div").style.display = 'none';
btnStart.disabled = true;
btnStop.disabled = true;
btnConnect.disabled=false;
isfilemode=false;
}
else
{
document.getElementById("mic_mode_div").style.display = 'none';
document.getElementById("rec_mode_div").style.display = 'block';
btnStart.disabled = true;
btnStop.disabled = true;
btnConnect.disabled=true;
isfilemode=true;
info_div.innerHTML='请点击选择文件';
}
}
function getHotwords(){
var obj = document.getElementById("varHot");
if(typeof(obj) == 'undefined' || obj==null || obj.value.length<=0){
return null;
}
let val = obj.value.toString();
console.log("hotwords="+val);
let items = val.split(/[(\r\n)\r\n]+/); //split by \r\n
var jsonresult = {};
const regexNum = /^[0-9]*$/; // test number
for (item of items) {
let result = item.split(" ");
if(result.length>=2 && regexNum.test(result[result.length-1]))
{
var wordstr="";
for(var i=0;i<result.length-1;i++)
wordstr=wordstr+result[i]+" ";
jsonresult[wordstr.trim()]= parseInt(result[result.length-1]);
}
}
console.log("jsonresult="+JSON.stringify(jsonresult));
return JSON.stringify(jsonresult);
}
function getAsrMode(){
var item = null;
var obj = document.getElementsByName("asr_mode");
for (var i = 0; i < obj.length; i++) { //遍历Radio
if (obj[i].checked) {
item = obj[i].value;
break;
}
}
if(isfilemode)
{
item= "offline";
}
console.log("asr mode"+item);
return item;
}
function handleWithTimestamp(tmptext,tmptime)
{
console.log( "tmptext: " + tmptext);
console.log( "tmptime: " + tmptime);
if(tmptime==null || tmptime=="undefined" || tmptext.length<=0)
{
return tmptext;
}
tmptext=tmptext.replace(/。|||、|\?|\.|\ /g, ","); // in case there are a lot of "。"
var words=tmptext.split(","); // split to chinese sentence or english words
var jsontime=JSON.parse(tmptime); //JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD
var char_index=0; // index for timestamp
var text_withtime="";
for(var i=0;i<words.length;i++)
{
if(words[i]=="undefined" || words[i].length<=0)
{
continue;
}
console.log("words===",words[i]);
console.log( "words: " + words[i]+",time="+jsontime[char_index][0]/1000);
if (/^[a-zA-Z]+$/.test(words[i]))
{ // if it is english
text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
char_index=char_index+1; //for english, timestamp unit is about a word
}
else{
// if it is chinese
text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
char_index=char_index+words[i].length; //for chinese, timestamp unit is about a char
}
}
return text_withtime;
}
// 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中
function getJsonMessage( jsonMsg ) {
if(!recive_msg) return;
var currentTime = Date.now();
res_time = currentTime-startTime;
console.log(res_time)
//时间之差在4秒则发送消息
let waitTime = 15000;
if(res_time>waitTime){
//自动发送消息
var f = document.getElementById("echo-form");
f.submit = function(e){
e.preventDefault();
var message=document.getElementById('message').value;
console.log('Sending: ' + message);
ws.send(message);
document.getElementById('message').value='';
}
recive_msg = false;
startTime = currentTime;
// rec_text="";
// var varArea_message=document.getElementById('message');
// varArea_message.value="";
return;
}
//console.log(jsonMsg);
console.log( "message: " + JSON.parse(jsonMsg.data)['text'] );
var rectxt=""+JSON.parse(jsonMsg.data)['text'];
var asrmodel=JSON.parse(jsonMsg.data)['mode'];
var is_final=JSON.parse(jsonMsg.data)['is_final'];
var timestamp=JSON.parse(jsonMsg.data)['timestamp'];
if(asrmodel=="2pass-offline" || asrmodel=="offline")
{
offline_text=offline_text+handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,"");
rec_text=offline_text;
}
else
{
rec_text=rec_text+rectxt; //.replace(/ +/g,"");
}
var varArea=document.getElementById('varArea');
var varArea_message=document.getElementById('message');
varArea.value=rec_text;
varArea_message.value=rec_text;
console.log( "offline_text: " + asrmodel+","+offline_text);
console.log( "rec_text: " + rec_text);
console.log( "isfilemode: " + isfilemode);
console.log( "is_final: " + is_final);
if (isfilemode==true && is_final==false){
console.log("call stop ws!");
play_file();
wsconnecter.wsStop();
info_div.innerHTML="请点击连接";
btnStart.disabled = true;
btnStop.disabled = true;
btnConnect.disabled=false;
}
}
// 连接状态响应
function getConnState( connState ) {
if ( connState === 0 ) { //on open
info_div.innerHTML='连接成功!请点击开始';
if (isfilemode==true){
info_div.innerHTML='请耐心等待,大文件等待时间更长';
start_file_send();
}
else
{
btnStart.disabled = false;
btnStop.disabled = true;
btnConnect.disabled=true;
}
} else if ( connState === 1 ) {
//stop();
} else if ( connState === 2 ) {
stop();
console.log( 'connecttion error' );
alert("连接地址"+document.getElementById('wssip').value+"失败,请检查asr地址和端口。或试试界面上手动授权再连接。");
btnStart.disabled = true;
btnStop.disabled = true;
btnConnect.disabled=false;
info_div.innerHTML='请点击连接';
}
}
function record()
{
rec.open( function(){
rec.start();
console.log("开始");
btnStart.disabled = true;
btnStop.disabled = false;
btnConnect.disabled=true;
});
}
// 识别启动、停止、清空操作
function start() {
// 清除显示
clear();
//控件状态更新
console.log("isfilemode"+isfilemode);
//启动连接
var ret=wsconnecter.wsStart();
// 1 is ok, 0 is error
if(ret==1){
info_div.innerHTML="正在连接asr服务器请等待...";
isRec = true;
btnStart.disabled = true;
btnStop.disabled = true;
btnConnect.disabled=true;
return 1;
}
else
{
info_div.innerHTML="请点击开始";
btnStart.disabled = true;
btnStop.disabled = true;
btnConnect.disabled=false;
return 0;
}
}
function stop() {
var chunk_size = new Array( 5, 10, 5 );
var request = {
"chunk_size": chunk_size,
"wav_name": "h5",
"is_speaking": false,
"chunk_interval":10,
"mode":getAsrMode(),
};
console.log(request);
if(sampleBuf.length>0){
wsconnecter.wsSend(sampleBuf);
console.log("sampleBuf.length"+sampleBuf.length);
sampleBuf=new Int16Array();
}
wsconnecter.wsSend( JSON.stringify(request) );
// 控件状态更新
isRec = false;
info_div.innerHTML="发送完数据,请等候,正在识别...";
if(isfilemode==false){
btnStop.disabled = true;
btnStart.disabled = true;
btnConnect.disabled=true;
//wait 3s for asr result
setTimeout(function(){
console.log("call stop ws!");
wsconnecter.wsStop();
btnConnect.disabled=false;
info_div.innerHTML="请点击连接";}, 3000 );
rec.stop(function(blob,duration){
console.log(blob);
var audioBlob = Recorder.pcm2wav(data = {sampleRate:16000, bitRate:16, blob:blob},
function(theblob,duration){
console.log(theblob);
var audio_record = document.getElementById('audio_record');
audio_record.src = (window.URL||webkitURL).createObjectURL(theblob);
audio_record.controls=true;
//audio_record.play();
} ,function(msg){
console.log(msg);
}
);
},function(errMsg){
console.log("errMsg: " + errMsg);
});
}
// 停止连接
}
function clear() {
var varArea=document.getElementById('varArea');
varArea.value="";
rec_text="";
offline_text="";
}
function recProcess( buffer, powerLevel, bufferDuration, bufferSampleRate,newBufferIdx,asyncEnd ) {
if ( isRec === true ) {
var data_48k = buffer[buffer.length-1];
var array_48k = new Array(data_48k);
var data_16k=Recorder.SampleData(array_48k,bufferSampleRate,16000).data;
sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]);
var chunk_size=960; // for asr chunk_size [5, 10, 5]
info_div.innerHTML=""+bufferDuration/1000+"s";
while(sampleBuf.length>=chunk_size){
sendBuf=sampleBuf.slice(0,chunk_size);
sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length);
wsconnecter.wsSend(sendBuf);
}
}
}
var recive_msg = true;
$(document).ready(function() {
var host = window.location.hostname
var ws = new WebSocket("ws://"+host+":8000/humanecho");
//document.getElementsByTagName("video")[0].setAttribute("src", aa["video"]);
ws.onopen = function() {
console.log('Connected');
};
ws.onmessage = function(e) {
console.log('Received: ' + e.data);
recive_msg = true;
data = e
var vid = JSON.parse(data.data);
console.log(typeof(vid),vid)
//document.getElementsByTagName("video")[0].setAttribute("src", vid["video"]);
};
ws.onclose = function(e) {
console.log('Closed');
};
flvPlayer = mpegts.createPlayer({type: 'flv', url: "http://"+host+":8080/live/livestream.flv", isLive: true, enableStashBuffer: false});
flvPlayer.attachMediaElement(document.getElementById('video_player'));
flvPlayer.load();
flvPlayer.play();
$('#echo-form').on('submit', function(e) {
e.preventDefault();
var message = $('#message').val();
console.log('Sending: ' + message);
ws.send(message);
$('#message').val('');
});
});

File diff suppressed because one or more lines are too long

View File

@ -1,96 +0,0 @@
/*
pcm编码器+编码引擎
https://github.com/xiangyuecn/Recorder
编码原理本编码器输出的pcm格式数据其实就是Recorder中的buffers原始数据经过了重新采样16位时为LE小端模式Little Endian并未经过任何编码处理
编码的代码和wav.js区别不大pcm加上一个44字节wav头即成wav文件所以要播放pcm就很简单了直接转成wav文件来播放已提供转换函数 Recorder.pcm2wav
*/
(function(){
"use strict";
Recorder.prototype.enc_pcm={
stable:true
,testmsg:"pcm为未封装的原始音频数据pcm数据文件无法直接播放支持位数8位、16位填在比特率里面采样率取值无限制"
};
Recorder.prototype.pcm=function(res,True,False){
var This=this,set=This.set
,size=res.length
,bitRate=set.bitRate==8?8:16;
var buffer=new ArrayBuffer(size*(bitRate/8));
var data=new DataView(buffer);
var offset=0;
// 写入采样数据
if(bitRate==8) {
for(var i=0;i<size;i++,offset++) {
//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点虽然都有明显杂音
var val=(res[i]>>8)+128;
data.setInt8(offset,val,true);
};
}else{
for (var i=0;i<size;i++,offset+=2){
data.setInt16(offset,res[i],true);
};
};
True(new Blob([data.buffer],{type:"audio/pcm"}));
};
/**pcmwavwav.js
data: {
sampleRate:16000 pcm的采样率
bitRate:16 pcm的位数 取值8 16
blob:blob对象
}
data如果直接提供的blob将默认使用16位16khz的配置仅用于测试
True(wavBlob,duration)
False(msg)
**/
Recorder.pcm2wav=function(data,True,False){
if(data.slice && data.type!=null){//Blob 测试用
data={blob:data};
};
var sampleRate=data.sampleRate||16000,bitRate=data.bitRate||16;
if(!data.sampleRate || !data.bitRate){
console.warn("pcm2wav必须提供sampleRate和bitRate");
};
if(!Recorder.prototype.wav){
False("pcm2wav必须先加载wav编码器wav.js");
return;
};
var reader=new FileReader();
reader.onloadend=function(){
var pcm;
if(bitRate==8){
//8位转成16位
var u8arr=new Uint8Array(reader.result);
pcm=new Int16Array(u8arr.length);
for(var j=0;j<u8arr.length;j++){
pcm[j]=(u8arr[j]-128)<<8;
};
}else{
pcm=new Int16Array(reader.result);
};
Recorder({
type:"wav"
,sampleRate:sampleRate
,bitRate:bitRate
}).mock(pcm,sampleRate).stop(function(wavBlob,duration){
True(wavBlob,duration);
},False);
};
reader.readAsArrayBuffer(data.blob);
};
})();

File diff suppressed because it is too large Load Diff

View File

@ -1,86 +0,0 @@
/*
wav编码器+编码引擎
https://github.com/xiangyuecn/Recorder
当然最佳推荐使用mp3wav格式代码也是优先照顾这两种格式
浏览器支持情况
https://developer.mozilla.org/en-US/docs/Web/HTML/Supported_media_formats
编码原理给pcm数据加上一个44直接的wav头即成wav文件pcm数据就是Recorder中的buffers原始数据重新采样16位时为LE小端模式Little Endian实质上是未经过任何编码处理
*/
(function(){
"use strict";
Recorder.prototype.enc_wav={
stable:true
,testmsg:"支持位数8位、16位填在比特率里面采样率取值无限制"
};
Recorder.prototype.wav=function(res,True,False){
var This=this,set=This.set
,size=res.length
,sampleRate=set.sampleRate
,bitRate=set.bitRate==8?8:16;
//编码数据 https://github.com/mattdiamond/Recorderjs https://www.cnblogs.com/blqw/p/3782420.html https://www.cnblogs.com/xiaoqi/p/6993912.html
var dataLength=size*(bitRate/8);
var buffer=new ArrayBuffer(44+dataLength);
var data=new DataView(buffer);
var offset=0;
var writeString=function(str){
for (var i=0;i<str.length;i++,offset++) {
data.setUint8(offset,str.charCodeAt(i));
};
};
var write16=function(v){
data.setUint16(offset,v,true);
offset+=2;
};
var write32=function(v){
data.setUint32(offset,v,true);
offset+=4;
};
/* RIFF identifier */
writeString('RIFF');
/* RIFF chunk length */
write32(36+dataLength);
/* RIFF type */
writeString('WAVE');
/* format chunk identifier */
writeString('fmt ');
/* format chunk length */
write32(16);
/* sample format (raw) */
write16(1);
/* channel count */
write16(1);
/* sample rate */
write32(sampleRate);
/* byte rate (sample rate * block align) */
write32(sampleRate*(bitRate/8));// *1 声道
/* block align (channel count * bytes per sample) */
write16(bitRate/8);// *1 声道
/* bits per sample */
write16(bitRate);
/* data chunk identifier */
writeString('data');
/* data chunk length */
write32(dataLength);
// 写入采样数据
if(bitRate==8) {
for(var i=0;i<size;i++,offset++) {
//16转8据说是雷霄骅的 https://blog.csdn.net/sevennight1989/article/details/85376149 细节比blqw的按比例的算法清晰点虽然都有明显杂音
var val=(res[i]>>8)+128;
data.setInt8(offset,val,true);
};
}else{
for (var i=0;i<size;i++,offset+=2){
data.setInt16(offset,res[i],true);
};
};
True(new Blob([data.buffer],{type:"audio/wav"}));
}
})();

View File

@ -1,119 +0,0 @@
/**
* Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
* Reserved. MIT License (https://opensource.org/licenses/MIT)
*/
/* 2021-2023 by zhaoming,mali aihealthx.com */
function WebSocketConnectMethod( config ) { //定义socket连接方法类
var speechSokt;
var connKeeperID;
var msgHandle = config.msgHandle;
var stateHandle = config.stateHandle;
this.wsStart = function () {
var Uri = document.getElementById('wssip').value; //"wss://111.205.137.58:5821/wss/" //设置wss asr online接口地址 如 wss://X.X.X.X:port/wss/
if(Uri.match(/wss:\S*|ws:\S*/))
{
console.log("Uri"+Uri);
}
else
{
alert("请检查wss地址正确性");
return 0;
}
if ( 'WebSocket' in window ) {
speechSokt = new WebSocket( Uri ); // 定义socket连接对象
speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
speechSokt.onclose = function(e){
console.log("onclose ws!");
//speechSokt.close();
onClose(e);
};
speechSokt.onmessage = function(e){onMessage(e);};
speechSokt.onerror = function(e){onError(e);};
return 1;
}
else {
alert('当前浏览器不支持 WebSocket');
return 0;
}
};
// 定义停止与发送函数
this.wsStop = function () {
if(speechSokt != undefined) {
console.log("stop ws!");
speechSokt.close();
}
};
this.wsSend = function ( oneData ) {
if(speechSokt == undefined) return;
if ( speechSokt.readyState === 1 ) { // 0:CONNECTING, 1:OPEN, 2:CLOSING, 3:CLOSED
speechSokt.send( oneData );
}
};
// SOCEKT连接中的消息与状态响应
function onOpen( e ) {
// 发送json
var chunk_size = new Array( 5, 10, 5 );
var request = {
"chunk_size": chunk_size,
"wav_name": "h5",
"is_speaking": true,
"chunk_interval":10,
"itn":false,
"mode":getAsrMode(),
};
if(isfilemode)
{
request.wav_format=file_ext;
if(file_ext=="wav")
{
request.wav_format="PCM";
request.audio_fs=file_sample_rate;
}
}
var hotwords=getHotwords();
if(hotwords!=null )
{
request.hotwords=hotwords;
}
console.log(JSON.stringify(request));
speechSokt.send(JSON.stringify(request));
console.log("连接成功");
stateHandle(0);
}
function onClose( e ) {
stateHandle(1);
}
function onMessage( e ) {
msgHandle( e );
}
function onError( e ) {
info_div.innerHTML="连接"+e;
console.log(e);
stateHandle(2);
}
}

View File

@ -1,24 +0,0 @@
1、启动语言识别服务端
创建虚拟环境
conda create -n funasr
conda activate funasr
安装依赖库
pip install torch
pip install modelscope
pip install testresources
pip install websockets
pip install torchaudio
pip install FunASR
pip install pyaudio
python funasr_wss_server.py --port 10095
或者
python funasr_wss_server.py --host "0.0.0.0" --port 10197 --ngpu 0
https://github.com/alibaba-damo-academy/FunASR
https://zhuanlan.zhihu.com/p/649935170

View File

@ -1,17 +0,0 @@
## certificate generation by yourself
generated certificate may not suitable for all browsers due to security concerns. you'd better buy or download an authenticated ssl certificate from authorized agency.
```shell
### 1) Generate a private key
openssl genrsa -des3 -out server.key 2048
### 2) Generate a csr file
openssl req -new -key server.key -out server.csr
### 3) Remove pass
cp server.key server.key.org
openssl rsa -in server.key.org -out server.key
### 4) Generated a crt file, valid for 1 year
openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt
```

View File

@ -1,17 +0,0 @@
## 自行生成证书
生成证书(注意这种证书并不能被所有浏览器认可,部分手动授权可以访问,最好使用其他认证的官方ssl证书)
```shell
### 1)生成私钥,按照提示填写内容
openssl genrsa -des3 -out server.key 1024
### 2)生成csr文件 ,按照提示填写内容
openssl req -new -key server.key -out server.csr
### 去掉pass
cp server.key server.key.org
openssl rsa -in server.key.org -out server.key
### 生成crt文件有效期1年365天
openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt
```

View File

@ -1,21 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIDhTCCAm0CFGB0Po2IZ0hESavFpcSGRNb9xrNXMA0GCSqGSIb3DQEBCwUAMH8x
CzAJBgNVBAYTAkNOMRAwDgYDVQQIDAdiZWlqaW5nMRAwDgYDVQQHDAdiZWlqaW5n
MRAwDgYDVQQKDAdhbGliYWJhMRAwDgYDVQQLDAdhbGliYWJhMRAwDgYDVQQDDAdh
bGliYWJhMRYwFAYJKoZIhvcNAQkBFgdhbGliYWJhMB4XDTIzMDYxODA2NTcxM1oX
DTI0MDYxNzA2NTcxM1owfzELMAkGA1UEBhMCQ04xEDAOBgNVBAgMB2JlaWppbmcx
EDAOBgNVBAcMB2JlaWppbmcxEDAOBgNVBAoMB2FsaWJhYmExEDAOBgNVBAsMB2Fs
aWJhYmExEDAOBgNVBAMMB2FsaWJhYmExFjAUBgkqhkiG9w0BCQEWB2FsaWJhYmEw
ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDH9Np1oBunQKMt5M/nU2nD
qVHojXwKKwyiK9DSeGikKwArH2S9NUZNu5RDg46u0iWmT+Vz+toQhkJnfatOVskW
f2bsI54n5eOvmoWOKDXYm2MscvjkuNiYRbqzgUuP9ZSx8k3uyRs++wvmwIoU+PV1
EYFcjk1P2jUGUvKaUlmIDsjs1wOMIbKO6I0UX20FNKlGWacqMR/Dx2ltmGKT1Kaz
Y335lor0bcfQtH542rGS7PDz6JMRNjFT1VFcmnrjRElf4STbaOiIfOjMVZ/9O8Hr
LFItyvkb01Mt7O0jhAXHuE1l/8Y0N3MCYkELG9mQA0BYCFHY0FLuJrGoU03b8KWj
AgMBAAEwDQYJKoZIhvcNAQELBQADggEBAEjC9jB1WZe2ki2JgCS+eAMFsFegiNEz
D0klVB3kiCPK0g7DCxvfWR6kAgEynxRxVX6TN9QcLr4paZItC1Fu2gUMTteNqEuc
dcixJdu9jumuUMBlAKgL5Yyk3alSErsn9ZVF/Q8Kx5arMO/TW3Ulsd8SWQL5C/vq
Fe0SRhpKKoADPfl8MT/XMfB/MwNxVhYDSHzJ1EiN8O5ce6q2tTdi1mlGquzNxhjC
7Q0F36V1HksfzolrlRWRKYP16isnaKUdFfeAzaJsYw33o6VRbk6fo2fTQDHS0wOs
Q48Moc5UxKMLaMMCqLPpWu0TZse+kIw1nTWXk7yJtK0HK5PN3rTocEw=
-----END CERTIFICATE-----

View File

@ -1,27 +0,0 @@
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEAx/TadaAbp0CjLeTP51Npw6lR6I18CisMoivQ0nhopCsAKx9k
vTVGTbuUQ4OOrtIlpk/lc/raEIZCZ32rTlbJFn9m7COeJ+Xjr5qFjig12JtjLHL4
5LjYmEW6s4FLj/WUsfJN7skbPvsL5sCKFPj1dRGBXI5NT9o1BlLymlJZiA7I7NcD
jCGyjuiNFF9tBTSpRlmnKjEfw8dpbZhik9Sms2N9+ZaK9G3H0LR+eNqxkuzw8+iT
ETYxU9VRXJp640RJX+Ek22joiHzozFWf/TvB6yxSLcr5G9NTLeztI4QFx7hNZf/G
NDdzAmJBCxvZkANAWAhR2NBS7iaxqFNN2/ClowIDAQABAoIBAQC1/STX6eFBWJMs
MhUHdePNMU5bWmqK1qOo9jgZV33l7T06Alit3M8f8JoA2LwEYT/jHtS3upi+cXP+
vWIs6tAaqdoDEmff6FxSd1EXEYHwo3yf+ASQJ6z66nwC5KrhW6L6Uo6bxm4F5Hfw
jU0fyXeeFVCn7Nxw0SlxmA02Z70VFsL8BK9i3kajU18y6drf4VUm55oMEtdEmOh2
eKn4qspBcNblbw+L0QJ+5kN1iRUyJHesQ1GpS+L3yeMVFCW7ctL4Bgw8Z7LE+z7i
C0Weyhul8vuT+7nfF2T37zsSa8iixqpkTokeYh96CZ5nDqa2IDx3oNHWSlkIsV6g
6EUEl9gBAoGBAPIw/M6fIDetMj8f1wG7mIRgJsxI817IS6aBSwB5HkoCJFfrR9Ua
jMNCFIWNs/Om8xeGhq/91hbnCYDNK06V5CUa/uk4CYRs2eQZ3FKoNowtp6u/ieuU
qg8bXM/vR2VWtWVixAMdouT3+KtvlgaVmSnrPiwO4pecGrwu5NW1oJCFAoGBANNb
aE3AcwTDYsqh0N/75G56Q5s1GZ6MCDQGQSh8IkxL6Vg59KnJiIKQ7AxNKFgJZMtY
zZHaqjazeHjOGTiYiC7MMVJtCcOBEfjCouIG8btNYv7Y3dWnOXRZni2telAsRrH9
xS5LaFdCRTjVAwSsppMGwiQtyl6sGLMyz0SXoYoHAoGAKdkFFb6xFm26zOV3hTkg
9V6X1ZyVUL9TMwYMK5zB+w+7r+VbmBrqT6LPYPRHL8adImeARlCZ+YMaRUMuRHnp
3e94NFwWaOdWDu/Y/f9KzZXl7us9rZMWf12+/77cm0oMNeSG8fLg/qdKNHUneyPG
P1QCfiJkTMYQaIvBxpuHjvECgYAKlZ9JlYOtD2PZJfVh4il0ZucP1L7ts7GNeWq1
7lGBZKPQ6UYZYqBVeZB4pTyJ/B5yGIZi8YJoruAvnJKixPC89zjZGeDNS59sx8KE
cziT2rJEdPPXCULVUs+bFf70GOOJcl33jYsyI3139SLrjwHghwwd57UkvJWYE8lR
dA6A7QKBgEfTC+NlzqLPhbB+HPl6CvcUczcXcI9M0heVz/DNMA+4pjxPnv2aeIwh
cL2wq2xr+g1wDBWGVGkVSuZhXm5E6gDetdyVeJnbIUhVjBblnbhHV6GrudjbXGnJ
W9cBgu6DswyHU2cOsqmimu8zLmG6/dQYFHt+kUWGxN8opCzVjgWa
-----END RSA PRIVATE KEY-----

View File

@ -1,139 +0,0 @@
# 采用gpt-sovits方案bert-sovits适合长音频训练gpt-sovits运行短音频快速推理
## 部署tts推理
git clone https://github.com/X-T-E-R/GPT-SoVITS-Inference.git
## 1. 安装依赖库
```
conda create -n GPTSoVits python=3.9
conda activate GPTSoVits
bash install.sh
```
从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型,并将它们放置在 `GPT_SoVITS\pretrained_models`
注意
```
是将 GPT-SoVITS 的模型文件放入 pretrained_models目录中
```
如下
```
pretrained_models/
--chinese-hubert-base
--chinese-roberta-wwm-ext-large
s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
s2D488k.pth
s2G488k.pth
```
## 2. Model Folder Format
模型文件下载地址 https://www.yuque.com/xter/zibxlp/gsximn7ditzgispg
下载的模型文件放到trained目录下, 如 `trained/Character1/`
Put the pth / ckpt / wav files in it, the wav should be named as the prompt text
Like :
```
trained
--hutao
----hutao-e75.ckpt
----hutao_e60_s3360.pth
----hutao said something.wav
```
## 3. 启动
### 3.1 启动webui界面
python webuis/character_manager/webui.py
可以设置上传的模型数据
### 3.2 启动api服务:
python app.py
如果有错误提示找不到cmudict从这下载https://github.com/nltk/nltk_data将packages改名为nltk_data放到home目录下
### 3.3 tts测试
访问 http://127.0.0.1:5000 地址即可测试
### 3.4 api测试
访问 http://127.0.0.1:5000/character_list 查看是否正常
## 4. 接口说明
### 4.1 Character and Emotion List
To obtain the supported characters and their corresponding emotions, please visit the following URL:
- URL: `http://127.0.0.1:5000/character_list`
- Returns: A JSON format list of characters and corresponding emotions
- Method: `GET`
```
{
"Hanabi": [
"default",
"Normal",
"Yandere",
],
"Hutao": [
"default"
]
}
```
### 4.2 Text-to-Speech
- URL: `http://127.0.0.1:5000/tts`
- Returns: Audio on success. Error message on failure.
- Method: `GET`/`POST`
```
{
"method": "POST",
"body": {
"character": "${chaName}",
"emotion": "${Emotion}",
"text": "${speakText}",
"text_language": "${textLanguage}",
"batch_size": ${batch_size},
"speed": ${speed},
"top_k": ${topK},
"top_p": ${topP},
"temperature": ${temperature},
"stream": "${stream}",
"format": "${Format}",
"save_temp": "${saveTemp}"
}
}
```
##### Parameter Explanation
- **text**: The text to be converted, URL encoding is recommended.
- **character**: Character folder name, pay attention to case sensitivity, full/half width, and language.
- **emotion**: Character emotion, must be an actually supported emotion of the character, otherwise, the default emotion will be used.
- **text_language**: Text language (auto / zh / en / ja), default is multilingual mixed.
- **top_k**, **top_p**, **temperature**: GPT model parameters, no need to modify if unfamiliar.
- **batch_size**: How many batches at a time, can be increased for faster processing if you have a powerful computer, integer, default is 1.
- **speed**: Speech speed, default is 1.0.
- **save_temp**: Whether to save temporary files, when true, the backend will save the generated audio, and subsequent identical requests will directly return that data, default is false.
- **stream**: Whether to stream, when true, audio will be returned sentence by sentence, default is false.
- **format**: Format, default is WAV, allows MP3/ WAV/ OGG.
## 部署tts训练
https://github.com/RVC-Boss/GPT-SoVITS
根据文档说明部署将训练后的模型拷到推理服务的trained目录下
## 如果你需要使用autodl 进行部署
请使用 https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS 作为基础镜像你能快速进行部署
### 下载
```
https://github.com/X-T-E-R/GPT-SoVITS-Inference
```
### 安装
```
cd GPT-SoVITS-Inference
pip3 install -r requirements.txt
cp -r GPT_SoVITS/pretrained_models/ ./GPT_SoVITS/pretrained_models
```
### 启动api
```
python3 app.py
```
### 启动webui
```
python3 webuis/character_manager/webui.py
```

View File

@ -1,103 +0,0 @@
# 采用gpt-sovits方案bert-sovits适合长音频训练gpt-sovits运行短音频快速推理
## 部署tts推理
git clone https://github.com/RVC-Boss/GPT-SoVITS.git
git checkout fast_inference_
## 1. 安装依赖库
```
conda create -n GPTSoVits python=3.9
conda activate GPTSoVits
bash install.sh
```
从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型,并将它们放置在 `GPT_SoVITS/GPT_SoVITS/pretrained_models`
注意
```
是将 GPT-SoVITS 的模型文件放入 pretrained_models目录中
```
如下
```
pretrained_models/
--chinese-hubert-base
--chinese-roberta-wwm-ext-large
s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt
s2D488k.pth
s2G488k.pth
```
## 3. 启动
### 3.1 启动webui界面测试效果用
python GPT_SoVITS/inference_webui.py
### 3.2 启动api服务:
python api_v3.py
## 4. 接口说明
### 4.1 Text-to-Speech
endpoint: `/tts`
GET:
```
http://127.0.0.1:9880/tts?text=先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。&text_lang=zh&ref_audio_path=archive_jingyuan_1.wav&prompt_lang=zh&prompt_text=我是「罗浮」云骑将军景元。不必拘谨,「将军」只是一时的身份,你称呼我景元便可&text_split_method=cut5&batch_size=1&media_type=wav&streaming_mode=true
```
POST:
```json
{
"text": "", # str.(required) text to be synthesized
"text_lang": "", # str.(required) language of the text to be synthesized
"ref_audio_path": "", # str.(required) reference audio path.
"prompt_text": "", # str.(optional) prompt text for the reference audio
"prompt_lang": "", # str.(required) language of the prompt text for the reference audio
"top_k": 5, # int.(optional) top k sampling
"top_p": 1, # float.(optional) top p sampling
"temperature": 1, # float.(optional) temperature for sampling
"text_split_method": "cut5", # str.(optional) text split method, see text_segmentation_method.py for details.
"batch_size": 1, # int.(optional) batch size for inference
"batch_threshold": 0.75, # float.(optional) threshold for batch splitting.
"split_bucket": true, # bool.(optional) whether to split the batch into multiple buckets.
"speed_factor":1.0, # float.(optional) control the speed of the synthesized audio.
"fragment_interval":0.3, # float.(optional) to control the interval of the audio fragment.
"seed": -1, # int.(optional) random seed for reproducibility.
"media_type": "wav", # str.(optional) media type of the output audio, support "wav", "raw", "ogg", "aac".
"streaming_mode": false, # bool.(optional) whether to return a streaming response.
"parallel_infer": True, # bool.(optional) whether to use parallel inference.
"repetition_penalty": 1.35, # float.(optional) repetition penalty for T2S model.
"tts_infer_yaml_path": “GPT_SoVITS/configs/tts_infer.yaml” # str.(optional) tts infer yaml path
}
```
## 部署tts训练
https://github.com/RVC-Boss/GPT-SoVITS
切换自己训练的模型
### 切换GPT模型
endpoint: `/set_gpt_weights`
GET:
```
http://127.0.0.1:9880/set_gpt_weights?weights_path=GPT_SoVITS/pretrained_models/xxx.ckpt
```
RESP:
成功: 返回"success", http code 200
失败: 返回包含错误信息的 json, http code 400
### 切换Sovits模型
endpoint: `/set_sovits_weights`
GET:
```
http://127.0.0.1:9880/set_sovits_weights?weights_path=GPT_SoVITS/pretrained_models/xxx.pth
```
RESP:
成功: 返回"success", http code 200
失败: 返回包含错误信息的 json, http code 400
"""
## 如果你需要使用autodl 进行部署
请使用 https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS 作为基础镜像你能快速进行部署

View File

@ -1,28 +0,0 @@
import requests
import pyaudio
# 流式传输音频的URL你可以自由改成Post
stream_url = 'http://127.0.0.1:5000/tts?text=这是一段测试文本,旨在通过多种语言风格和复杂性的内容来全面检验文本到语音系统的性能。接下来,我们会探索各种主题和语言结构,包括文学引用、技术性描述、日常会话以及诗歌等。首先,让我们从一段简单的描述性文本开始:“在一个阳光明媚的下午,一位年轻的旅者站在山顶上,眺望着下方那宽广而繁忙的城市。他的心中充满了对未来的憧憬和对旅途的期待。”这段文本测试了系统对自然景观描写的处理能力和情感表达的细腻程度。&stream=true'
# 初始化pyaudio
p = pyaudio.PyAudio()
# 打开音频流
stream = p.open(format=p.get_format_from_width(2),
channels=1,
rate=32000,
output=True)
# 使用requests获取音频流你可以自由改成Post
response = requests.get(stream_url, stream=True)
# 读取数据块并播放
for data in response.iter_content(chunk_size=1024):
stream.write(data)
# 停止和关闭流
stream.stop_stream()
stream.close()
# 终止pyaudio
p.terminate()