diff --git a/README.md b/README.md index 8616e7b..783fc06 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,28 @@ python app.py --tts xtts --ref_file data/ref.wav ``` python app.py --asr_model facebook/hubert-large-ls960-ft ``` + +### 3.4 设置背景图片 +``` +python app.py --bg_img bg.jpg +``` + +### 3.5 全身视频拼接 +#### 3.5.1 切割训练用的视频 +``` +ffmpeg -i fullbody.mp4 -vf crop="400:400:100:5" train.mp4  +``` +用train.mp4训练模型 +#### 3.5.2 提取全身图片 +``` +ffmpeg -i fullbody.mp4 -vf fps=25 -qmin 1 -q:v 1 -start_number 0 data/fullbody/img/%d.jpg +``` +#### 3.5.2 启动数字人 +``` +python app.py --fullbody --fullbody_img data/fullbody/img --fullbody_offset_x 100 --fullbody_offset_y 5 --fullbody_width 580 --fullbody_height 1080 --W 400 --H 400 +``` +- --fullbody_width、--fullbody_height 全身视频的宽、高 +- --W、--H 训练视频的宽、高 ## 4. Docker Run 不需要第1步的安装,直接运行。 diff --git a/app.py b/app.py index 2e5c5b2..ef3fcfb 100644 --- a/app.py +++ b/app.py @@ -260,6 +260,13 @@ if __name__ == '__main__': parser.add_argument('-m', type=int, default=50) parser.add_argument('-r', type=int, default=10) + parser.add_argument('--fullbody', action='store_true', help="fullbody human") + parser.add_argument('--fullbody_img', type=str, default='data/fullbody/img') + parser.add_argument('--fullbody_width', type=int, default=580) + parser.add_argument('--fullbody_height', type=int, default=1080) + parser.add_argument('--fullbody_offset_x', type=int, default=0) + parser.add_argument('--fullbody_offset_y', type=int, default=0) + parser.add_argument('--tts', type=str, default='edgetts') #xtts parser.add_argument('--ref_file', type=str, default=None) parser.add_argument('--xtts_server', type=str, default='http://localhost:9000') diff --git a/nerfreal.py b/nerfreal.py index 4e4b7c0..08c3d9b 100644 --- a/nerfreal.py +++ b/nerfreal.py @@ -7,6 +7,7 @@ import subprocess import os import time import torch.nn.functional as F +import cv2 from asrreal import ASR from rtmp_streaming import StreamerConfig, Streamer @@ -33,6 +34,10 @@ class NeRFReal: self.audio_features = data_loader._data.auds # [N, 29, 16] self.audio_idx = 0 + self.frame_total_num = data_loader._data.end_index + print("frame_total_num:",self.frame_total_num) + self.frame_index=0 + # control eye self.eye_area = None if not self.opt.exp_eye else data_loader._data.eye_area.mean().item() @@ -132,7 +137,15 @@ class NeRFReal: #print('-------ernerf time: ',time.time()-t) #print(f'[INFO] outputs shape ',outputs['image'].shape) image = (outputs['image'] * 255).astype(np.uint8) - self.streamer.stream_frame(image) + if not self.opt.fullbody: + self.streamer.stream_frame(image) + else: #fullbody human + image_fullbody = cv2.imread(os.path.join(self.opt.fullbody_img, str(self.frame_index%self.frame_total_num)+'.jpg')) + image_fullbody = cv2.cvtColor(image_fullbody, cv2.COLOR_BGR2RGB) + start_x = self.opt.fullbody_offset_x # 合并后小图片的起始x坐标 + start_y = self.opt.fullbody_offset_y # 合并后小图片的起始y坐标 + image_fullbody[start_y:start_y+image.shape[0], start_x:start_x+image.shape[1]] = image + self.streamer.stream_frame(image_fullbody) #self.pipe.stdin.write(image.tostring()) for _ in range(2): frame = self.asr.get_audio_out() @@ -164,6 +177,11 @@ class NeRFReal: sc.source_height = self.H sc.stream_width = self.W sc.stream_height = self.H + if self.opt.fullbody: + sc.source_width = self.opt.fullbody_width + sc.source_height = self.opt.fullbody_height + sc.stream_width = self.opt.fullbody_width + sc.stream_height = self.opt.fullbody_height sc.stream_fps = fps sc.stream_bitrate = 1000000 sc.stream_profile = 'baseline' #'high444' # 'main' @@ -183,6 +201,7 @@ class NeRFReal: for _ in range(2): self.asr.run_step() self.test_step() + self.frame_index = (self.frame_index+1)%self.frame_total_num totaltime += (time.time() - t) count += 1 if count==100: