126 lines
4.2 KiB
Python
126 lines
4.2 KiB
Python
from os import listdir, path
|
|
import numpy as np
|
|
import scipy, cv2, os, sys, argparse
|
|
import json, subprocess, random, string
|
|
from tqdm import tqdm
|
|
from glob import glob
|
|
import torch
|
|
import pickle
|
|
import face_detection
|
|
|
|
|
|
parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models')
|
|
parser.add_argument('--img_size', default=96, type=int)
|
|
parser.add_argument('--avatar_id', default='wav2lip_avatar1', type=str)
|
|
parser.add_argument('--video_path', default='', type=str)
|
|
parser.add_argument('--nosmooth', default=False, action='store_true',
|
|
help='Prevent smoothing face detections over a short temporal window')
|
|
parser.add_argument('--pads', nargs='+', type=int, default=[0, 10, 0, 0],
|
|
help='Padding (top, bottom, left, right). Please adjust to include chin at least')
|
|
parser.add_argument('--face_det_batch_size', type=int,
|
|
help='Batch size for face detection', default=16)
|
|
args = parser.parse_args()
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
print('Using {} for inference.'.format(device))
|
|
|
|
def osmakedirs(path_list):
|
|
for path in path_list:
|
|
os.makedirs(path) if not os.path.exists(path) else None
|
|
|
|
def video2imgs(vid_path, save_path, ext = '.png',cut_frame = 10000000):
|
|
cap = cv2.VideoCapture(vid_path)
|
|
count = 0
|
|
while True:
|
|
if count > cut_frame:
|
|
break
|
|
ret, frame = cap.read()
|
|
if ret:
|
|
cv2.imwrite(f"{save_path}/{count:08d}.png", frame)
|
|
count += 1
|
|
else:
|
|
break
|
|
|
|
def read_imgs(img_list):
|
|
frames = []
|
|
print('reading images...')
|
|
for img_path in tqdm(img_list):
|
|
frame = cv2.imread(img_path)
|
|
frames.append(frame)
|
|
return frames
|
|
|
|
def get_smoothened_boxes(boxes, T):
|
|
for i in range(len(boxes)):
|
|
if i + T > len(boxes):
|
|
window = boxes[len(boxes) - T:]
|
|
else:
|
|
window = boxes[i : i + T]
|
|
boxes[i] = np.mean(window, axis=0)
|
|
return boxes
|
|
|
|
def face_detect(images):
|
|
detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D,
|
|
flip_input=False, device=device)
|
|
|
|
batch_size = args.face_det_batch_size
|
|
|
|
while 1:
|
|
predictions = []
|
|
try:
|
|
for i in tqdm(range(0, len(images), batch_size)):
|
|
predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size])))
|
|
except RuntimeError:
|
|
if batch_size == 1:
|
|
raise RuntimeError('Image too big to run face detection on GPU. Please use the --resize_factor argument')
|
|
batch_size //= 2
|
|
print('Recovering from OOM error; New batch size: {}'.format(batch_size))
|
|
continue
|
|
break
|
|
|
|
results = []
|
|
pady1, pady2, padx1, padx2 = args.pads
|
|
for rect, image in zip(predictions, images):
|
|
if rect is None:
|
|
cv2.imwrite('temp/faulty_frame.jpg', image) # check this frame where the face was not detected.
|
|
raise ValueError('Face not detected! Ensure the video contains a face in all the frames.')
|
|
|
|
y1 = max(0, rect[1] - pady1)
|
|
y2 = min(image.shape[0], rect[3] + pady2)
|
|
x1 = max(0, rect[0] - padx1)
|
|
x2 = min(image.shape[1], rect[2] + padx2)
|
|
|
|
results.append([x1, y1, x2, y2])
|
|
|
|
boxes = np.array(results)
|
|
if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5)
|
|
results = [[image[y1: y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)]
|
|
|
|
del detector
|
|
return results
|
|
|
|
if __name__ == "__main__":
|
|
avatar_path = f"./results/avatars/{args.avatar_id}"
|
|
full_imgs_path = f"{avatar_path}/full_imgs"
|
|
face_imgs_path = f"{avatar_path}/face_imgs"
|
|
coords_path = f"{avatar_path}/coords.pkl"
|
|
osmakedirs([avatar_path,full_imgs_path,face_imgs_path])
|
|
print(args)
|
|
|
|
#if os.path.isfile(args.video_path):
|
|
video2imgs(args.video_path, full_imgs_path, ext = 'png')
|
|
input_img_list = sorted(glob(os.path.join(full_imgs_path, '*.[jpJP][pnPN]*[gG]')))
|
|
|
|
frames = read_imgs(input_img_list)
|
|
face_det_results = face_detect(frames)
|
|
coord_list = []
|
|
idx = 0
|
|
for frame,coords in face_det_results:
|
|
#x1, y1, x2, y2 = bbox
|
|
resized_crop_frame = cv2.resize(frame,(args.img_size, args.img_size)) #,interpolation = cv2.INTER_LANCZOS4)
|
|
cv2.imwrite(f"{face_imgs_path}/{idx:08d}.png", resized_crop_frame)
|
|
coord_list.append(coords)
|
|
idx = idx + 1
|
|
|
|
with open(coords_path, 'wb') as f:
|
|
pickle.dump(coord_list, f)
|