support fullbody human

This commit is contained in:
lipku 2024-03-23 21:13:21 +08:00
parent df6b9d3c97
commit b33e7ec9e2
3 changed files with 49 additions and 1 deletions

View File

@ -76,6 +76,28 @@ python app.py --tts xtts --ref_file data/ref.wav
``` ```
python app.py --asr_model facebook/hubert-large-ls960-ft python app.py --asr_model facebook/hubert-large-ls960-ft
``` ```
### 3.4 设置背景图片
```
python app.py --bg_img bg.jpg
```
### 3.5 全身视频拼接
#### 3.5.1 切割训练用的视频
```
ffmpeg -i fullbody.mp4 -vf crop="400:400:100:5" train.mp4 
```
用train.mp4训练模型
#### 3.5.2 提取全身图片
```
ffmpeg -i fullbody.mp4 -vf fps=25 -qmin 1 -q:v 1 -start_number 0 data/fullbody/img/%d.jpg
```
#### 3.5.2 启动数字人
```
python app.py --fullbody --fullbody_img data/fullbody/img --fullbody_offset_x 100 --fullbody_offset_y 5 --fullbody_width 580 --fullbody_height 1080 --W 400 --H 400
```
- --fullbody_width、--fullbody_height 全身视频的宽、高
- --W、--H 训练视频的宽、高
## 4. Docker Run ## 4. Docker Run
不需要第1步的安装直接运行。 不需要第1步的安装直接运行。

7
app.py
View File

@ -260,6 +260,13 @@ if __name__ == '__main__':
parser.add_argument('-m', type=int, default=50) parser.add_argument('-m', type=int, default=50)
parser.add_argument('-r', type=int, default=10) parser.add_argument('-r', type=int, default=10)
parser.add_argument('--fullbody', action='store_true', help="fullbody human")
parser.add_argument('--fullbody_img', type=str, default='data/fullbody/img')
parser.add_argument('--fullbody_width', type=int, default=580)
parser.add_argument('--fullbody_height', type=int, default=1080)
parser.add_argument('--fullbody_offset_x', type=int, default=0)
parser.add_argument('--fullbody_offset_y', type=int, default=0)
parser.add_argument('--tts', type=str, default='edgetts') #xtts parser.add_argument('--tts', type=str, default='edgetts') #xtts
parser.add_argument('--ref_file', type=str, default=None) parser.add_argument('--ref_file', type=str, default=None)
parser.add_argument('--xtts_server', type=str, default='http://localhost:9000') parser.add_argument('--xtts_server', type=str, default='http://localhost:9000')

View File

@ -7,6 +7,7 @@ import subprocess
import os import os
import time import time
import torch.nn.functional as F import torch.nn.functional as F
import cv2
from asrreal import ASR from asrreal import ASR
from rtmp_streaming import StreamerConfig, Streamer from rtmp_streaming import StreamerConfig, Streamer
@ -33,6 +34,10 @@ class NeRFReal:
self.audio_features = data_loader._data.auds # [N, 29, 16] self.audio_features = data_loader._data.auds # [N, 29, 16]
self.audio_idx = 0 self.audio_idx = 0
self.frame_total_num = data_loader._data.end_index
print("frame_total_num:",self.frame_total_num)
self.frame_index=0
# control eye # control eye
self.eye_area = None if not self.opt.exp_eye else data_loader._data.eye_area.mean().item() self.eye_area = None if not self.opt.exp_eye else data_loader._data.eye_area.mean().item()
@ -132,7 +137,15 @@ class NeRFReal:
#print('-------ernerf time: ',time.time()-t) #print('-------ernerf time: ',time.time()-t)
#print(f'[INFO] outputs shape ',outputs['image'].shape) #print(f'[INFO] outputs shape ',outputs['image'].shape)
image = (outputs['image'] * 255).astype(np.uint8) image = (outputs['image'] * 255).astype(np.uint8)
self.streamer.stream_frame(image) if not self.opt.fullbody:
self.streamer.stream_frame(image)
else: #fullbody human
image_fullbody = cv2.imread(os.path.join(self.opt.fullbody_img, str(self.frame_index%self.frame_total_num)+'.jpg'))
image_fullbody = cv2.cvtColor(image_fullbody, cv2.COLOR_BGR2RGB)
start_x = self.opt.fullbody_offset_x # 合并后小图片的起始x坐标
start_y = self.opt.fullbody_offset_y # 合并后小图片的起始y坐标
image_fullbody[start_y:start_y+image.shape[0], start_x:start_x+image.shape[1]] = image
self.streamer.stream_frame(image_fullbody)
#self.pipe.stdin.write(image.tostring()) #self.pipe.stdin.write(image.tostring())
for _ in range(2): for _ in range(2):
frame = self.asr.get_audio_out() frame = self.asr.get_audio_out()
@ -164,6 +177,11 @@ class NeRFReal:
sc.source_height = self.H sc.source_height = self.H
sc.stream_width = self.W sc.stream_width = self.W
sc.stream_height = self.H sc.stream_height = self.H
if self.opt.fullbody:
sc.source_width = self.opt.fullbody_width
sc.source_height = self.opt.fullbody_height
sc.stream_width = self.opt.fullbody_width
sc.stream_height = self.opt.fullbody_height
sc.stream_fps = fps sc.stream_fps = fps
sc.stream_bitrate = 1000000 sc.stream_bitrate = 1000000
sc.stream_profile = 'baseline' #'high444' # 'main' sc.stream_profile = 'baseline' #'high444' # 'main'
@ -183,6 +201,7 @@ class NeRFReal:
for _ in range(2): for _ in range(2):
self.asr.run_step() self.asr.run_step()
self.test_step() self.test_step()
self.frame_index = (self.frame_index+1)%self.frame_total_num
totaltime += (time.time() - t) totaltime += (time.time() - t)
count += 1 count += 1
if count==100: if count==100: