support fullbody human
This commit is contained in:
parent
df6b9d3c97
commit
b33e7ec9e2
22
README.md
22
README.md
|
@ -76,6 +76,28 @@ python app.py --tts xtts --ref_file data/ref.wav
|
||||||
```
|
```
|
||||||
python app.py --asr_model facebook/hubert-large-ls960-ft
|
python app.py --asr_model facebook/hubert-large-ls960-ft
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 3.4 设置背景图片
|
||||||
|
```
|
||||||
|
python app.py --bg_img bg.jpg
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.5 全身视频拼接
|
||||||
|
#### 3.5.1 切割训练用的视频
|
||||||
|
```
|
||||||
|
ffmpeg -i fullbody.mp4 -vf crop="400:400:100:5" train.mp4
|
||||||
|
```
|
||||||
|
用train.mp4训练模型
|
||||||
|
#### 3.5.2 提取全身图片
|
||||||
|
```
|
||||||
|
ffmpeg -i fullbody.mp4 -vf fps=25 -qmin 1 -q:v 1 -start_number 0 data/fullbody/img/%d.jpg
|
||||||
|
```
|
||||||
|
#### 3.5.2 启动数字人
|
||||||
|
```
|
||||||
|
python app.py --fullbody --fullbody_img data/fullbody/img --fullbody_offset_x 100 --fullbody_offset_y 5 --fullbody_width 580 --fullbody_height 1080 --W 400 --H 400
|
||||||
|
```
|
||||||
|
- --fullbody_width、--fullbody_height 全身视频的宽、高
|
||||||
|
- --W、--H 训练视频的宽、高
|
||||||
|
|
||||||
## 4. Docker Run
|
## 4. Docker Run
|
||||||
不需要第1步的安装,直接运行。
|
不需要第1步的安装,直接运行。
|
||||||
|
|
7
app.py
7
app.py
|
@ -260,6 +260,13 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('-m', type=int, default=50)
|
parser.add_argument('-m', type=int, default=50)
|
||||||
parser.add_argument('-r', type=int, default=10)
|
parser.add_argument('-r', type=int, default=10)
|
||||||
|
|
||||||
|
parser.add_argument('--fullbody', action='store_true', help="fullbody human")
|
||||||
|
parser.add_argument('--fullbody_img', type=str, default='data/fullbody/img')
|
||||||
|
parser.add_argument('--fullbody_width', type=int, default=580)
|
||||||
|
parser.add_argument('--fullbody_height', type=int, default=1080)
|
||||||
|
parser.add_argument('--fullbody_offset_x', type=int, default=0)
|
||||||
|
parser.add_argument('--fullbody_offset_y', type=int, default=0)
|
||||||
|
|
||||||
parser.add_argument('--tts', type=str, default='edgetts') #xtts
|
parser.add_argument('--tts', type=str, default='edgetts') #xtts
|
||||||
parser.add_argument('--ref_file', type=str, default=None)
|
parser.add_argument('--ref_file', type=str, default=None)
|
||||||
parser.add_argument('--xtts_server', type=str, default='http://localhost:9000')
|
parser.add_argument('--xtts_server', type=str, default='http://localhost:9000')
|
||||||
|
|
21
nerfreal.py
21
nerfreal.py
|
@ -7,6 +7,7 @@ import subprocess
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
import cv2
|
||||||
|
|
||||||
from asrreal import ASR
|
from asrreal import ASR
|
||||||
from rtmp_streaming import StreamerConfig, Streamer
|
from rtmp_streaming import StreamerConfig, Streamer
|
||||||
|
@ -33,6 +34,10 @@ class NeRFReal:
|
||||||
self.audio_features = data_loader._data.auds # [N, 29, 16]
|
self.audio_features = data_loader._data.auds # [N, 29, 16]
|
||||||
self.audio_idx = 0
|
self.audio_idx = 0
|
||||||
|
|
||||||
|
self.frame_total_num = data_loader._data.end_index
|
||||||
|
print("frame_total_num:",self.frame_total_num)
|
||||||
|
self.frame_index=0
|
||||||
|
|
||||||
# control eye
|
# control eye
|
||||||
self.eye_area = None if not self.opt.exp_eye else data_loader._data.eye_area.mean().item()
|
self.eye_area = None if not self.opt.exp_eye else data_loader._data.eye_area.mean().item()
|
||||||
|
|
||||||
|
@ -132,7 +137,15 @@ class NeRFReal:
|
||||||
#print('-------ernerf time: ',time.time()-t)
|
#print('-------ernerf time: ',time.time()-t)
|
||||||
#print(f'[INFO] outputs shape ',outputs['image'].shape)
|
#print(f'[INFO] outputs shape ',outputs['image'].shape)
|
||||||
image = (outputs['image'] * 255).astype(np.uint8)
|
image = (outputs['image'] * 255).astype(np.uint8)
|
||||||
self.streamer.stream_frame(image)
|
if not self.opt.fullbody:
|
||||||
|
self.streamer.stream_frame(image)
|
||||||
|
else: #fullbody human
|
||||||
|
image_fullbody = cv2.imread(os.path.join(self.opt.fullbody_img, str(self.frame_index%self.frame_total_num)+'.jpg'))
|
||||||
|
image_fullbody = cv2.cvtColor(image_fullbody, cv2.COLOR_BGR2RGB)
|
||||||
|
start_x = self.opt.fullbody_offset_x # 合并后小图片的起始x坐标
|
||||||
|
start_y = self.opt.fullbody_offset_y # 合并后小图片的起始y坐标
|
||||||
|
image_fullbody[start_y:start_y+image.shape[0], start_x:start_x+image.shape[1]] = image
|
||||||
|
self.streamer.stream_frame(image_fullbody)
|
||||||
#self.pipe.stdin.write(image.tostring())
|
#self.pipe.stdin.write(image.tostring())
|
||||||
for _ in range(2):
|
for _ in range(2):
|
||||||
frame = self.asr.get_audio_out()
|
frame = self.asr.get_audio_out()
|
||||||
|
@ -164,6 +177,11 @@ class NeRFReal:
|
||||||
sc.source_height = self.H
|
sc.source_height = self.H
|
||||||
sc.stream_width = self.W
|
sc.stream_width = self.W
|
||||||
sc.stream_height = self.H
|
sc.stream_height = self.H
|
||||||
|
if self.opt.fullbody:
|
||||||
|
sc.source_width = self.opt.fullbody_width
|
||||||
|
sc.source_height = self.opt.fullbody_height
|
||||||
|
sc.stream_width = self.opt.fullbody_width
|
||||||
|
sc.stream_height = self.opt.fullbody_height
|
||||||
sc.stream_fps = fps
|
sc.stream_fps = fps
|
||||||
sc.stream_bitrate = 1000000
|
sc.stream_bitrate = 1000000
|
||||||
sc.stream_profile = 'baseline' #'high444' # 'main'
|
sc.stream_profile = 'baseline' #'high444' # 'main'
|
||||||
|
@ -183,6 +201,7 @@ class NeRFReal:
|
||||||
for _ in range(2):
|
for _ in range(2):
|
||||||
self.asr.run_step()
|
self.asr.run_step()
|
||||||
self.test_step()
|
self.test_step()
|
||||||
|
self.frame_index = (self.frame_index+1)%self.frame_total_num
|
||||||
totaltime += (time.time() - t)
|
totaltime += (time.time() - t)
|
||||||
count += 1
|
count += 1
|
||||||
if count==100:
|
if count==100:
|
||||||
|
|
Loading…
Reference in New Issue