1
0
Fork 0
mirror of https://github.com/deepfakes/faceswap synced 2025-06-07 10:43:27 -04:00
faceswap/tools/alignments/jobs_frames.py
2024-04-05 13:51:57 +01:00

485 lines
21 KiB
Python

#!/usr/bin/env python3
""" Tools for manipulating the alignments using Frames as a source """
from __future__ import annotations
import logging
import os
import sys
import typing as T
from datetime import datetime
import cv2
import numpy as np
from tqdm import tqdm
from lib.align import DetectedFace, _EXTRACT_RATIOS
from lib.align.alignments import _VERSION, PNGHeaderDict
from lib.image import encode_image, generate_thumbnail, ImagesSaver
from plugins.extract.pipeline import Extractor, ExtractMedia
from .media import ExtractedFaces, Frames
if T.TYPE_CHECKING:
from argparse import Namespace
from .media import AlignmentData
logger = logging.getLogger(__name__)
class Draw():
""" Draws annotations onto original frames and saves into a sub-folder next to the original
frames.
Parameters
---------
alignments: :class:`tools.alignments.media.AlignmentsData`
The loaded alignments corresponding to the frames to be annotated
arguments: :class:`argparse.Namespace`
The command line arguments that have called this job
"""
def __init__(self, alignments: AlignmentData, arguments: Namespace) -> None:
logger.debug("Initializing %s: (arguments: %s)", self.__class__.__name__, arguments)
self._alignments = alignments
self._frames = Frames(arguments.frames_dir)
self._output_folder = self._set_output()
self._mesh_areas = {"mouth": (48, 68),
"right_eyebrow": (17, 22),
"left_eyebrow": (22, 27),
"right_eye": (36, 42),
"left_eye": (42, 48),
"nose": (27, 36),
"jaw": (0, 17),
"chin": (8, 11)}
logger.debug("Initialized %s", self.__class__.__name__)
def _set_output(self) -> str:
""" Set the output folder path.
If annotating a folder of frames, output will be placed in a sub folder within the frames
folder. If annotating a video, output will be a folder next to the original video.
Returns
-------
str
Full path to the output folder
"""
now = datetime.now().strftime("%Y%m%d_%H%M%S")
folder_name = f"drawn_landmarks_{now}"
if self._frames.is_video:
dest_folder = os.path.dirname(self._frames.folder)
else:
dest_folder = self._frames.folder
output_folder = os.path.join(dest_folder, folder_name)
logger.debug("Creating folder: '%s'", output_folder)
os.makedirs(output_folder)
return output_folder
def process(self) -> None:
""" Runs the process to draw face annotations onto original source frames. """
logger.info("[DRAW LANDMARKS]") # Tidy up cli output
frames_drawn = 0
for frame in tqdm(self._frames.file_list_sorted, desc="Drawing landmarks", leave=False):
frame_name = frame["frame_fullname"]
if not self._alignments.frame_exists(frame_name):
logger.verbose("Skipping '%s' - Alignments not found", frame_name) # type:ignore
continue
self._annotate_image(frame_name)
frames_drawn += 1
logger.info("%s Frame(s) output", frames_drawn)
def _annotate_image(self, frame_name: str) -> None:
""" Annotate the frame with each face that appears in the alignments file.
Parameters
----------
frame_name: str
The full path to the original frame
"""
logger.trace("Annotating frame: '%s'", frame_name) # type:ignore
image = self._frames.load_image(frame_name)
for idx, alignment in enumerate(self._alignments.get_faces_in_frame(frame_name)):
face = DetectedFace()
face.from_alignment(alignment, image=image)
# Bounding Box
assert face.left is not None
assert face.top is not None
cv2.rectangle(image, (face.left, face.top), (face.right, face.bottom), (255, 0, 0), 1)
self._annotate_landmarks(image, np.rint(face.landmarks_xy).astype("int32"))
self._annotate_extract_boxes(image, face, idx)
self._annotate_pose(image, face) # Pose (head is still loaded)
self._frames.save_image(self._output_folder, frame_name, image)
def _annotate_landmarks(self, image: np.ndarray, landmarks: np.ndarray) -> None:
""" Annotate the extract boxes onto the frame.
Parameters
----------
image: :class:`numpy.ndarray`
The frame that extract boxes are to be annotated on to
landmarks: :class:`numpy.ndarray`
The 68 point landmarks that are to be annotated onto the frame
"""
# Mesh
for area, indices in self._mesh_areas.items():
fill = area in ("right_eye", "left_eye", "mouth")
cv2.polylines(image, [landmarks[indices[0]:indices[1]]], fill, (255, 255, 0), 1)
# Landmarks
for (pos_x, pos_y) in landmarks:
cv2.circle(image, (pos_x, pos_y), 1, (0, 255, 255), -1)
@classmethod
def _annotate_extract_boxes(cls, image: np.ndarray, face: DetectedFace, index: int) -> None:
""" Annotate the mesh and landmarks boxes onto the frame.
Parameters
----------
image: :class:`numpy.ndarray`
The frame that mesh and landmarks are to be annotated on to
face: :class:`lib.align.DetectedFace`
The aligned face
index: int
The face index for the given face
"""
for area in T.get_args(T.Literal["face", "head"]):
face.load_aligned(image, centering=area, force=True)
color = (0, 255, 0) if area == "face" else (0, 0, 255)
top_left = face.aligned.original_roi[0]
top_left = (top_left[0], top_left[1] - 10)
cv2.putText(image, str(index), top_left, cv2.FONT_HERSHEY_DUPLEX, 1.0, color, 1)
cv2.polylines(image, [face.aligned.original_roi], True, color, 1)
@classmethod
def _annotate_pose(cls, image: np.ndarray, face: DetectedFace) -> None:
""" Annotate the pose onto the frame.
Parameters
----------
image: :class:`numpy.ndarray`
The frame that pose is to be annotated on to
face: :class:`lib.align.DetectedFace`
The aligned face loaded for head centering
"""
center = np.array((face.aligned.size / 2,
face.aligned.size / 2)).astype("int32").reshape(1, 2)
center = np.rint(face.aligned.transform_points(center, invert=True)).astype("int32")
points = face.aligned.pose.xyz_2d * face.aligned.size
points = np.rint(face.aligned.transform_points(points, invert=True)).astype("int32")
cv2.line(image, tuple(center), tuple(points[1]), (0, 255, 0), 2)
cv2.line(image, tuple(center), tuple(points[0]), (255, 0, 0), 2)
cv2.line(image, tuple(center), tuple(points[2]), (0, 0, 255), 2)
class Extract():
""" Re-extract faces from source frames based on Alignment data
Parameters
----------
alignments: :class:`tools.lib_alignments.media.AlignmentData`
The alignments data loaded from an alignments file for this rename job
arguments: :class:`argparse.Namespace`
The :mod:`argparse` arguments as passed in from :mod:`tools.py`
"""
def __init__(self, alignments: AlignmentData, arguments: Namespace) -> None:
logger.debug("Initializing %s: (arguments: %s)", self.__class__.__name__, arguments)
self._arguments = arguments
self._alignments = alignments
self._is_legacy = self._alignments.version == 1.0 # pylint:disable=protected-access
self._mask_pipeline: Extractor | None = None
self._faces_dir = arguments.faces_dir
self._min_size = self._get_min_size(arguments.size, arguments.min_size)
self._frames = Frames(arguments.frames_dir, self._get_count())
self._extracted_faces = ExtractedFaces(self._frames,
self._alignments,
size=arguments.size)
self._saver: ImagesSaver | None = None
logger.debug("Initialized %s", self.__class__.__name__)
@classmethod
def _get_min_size(cls, extract_size: int, min_size: int) -> int:
""" Obtain the minimum size that a face has been resized from to be included as a valid
extract.
Parameters
----------
extract_size: int
The requested size of the extracted images
min_size: int
The percentage amount that has been supplied for valid faces (as a percentage of
extract size)
Returns
-------
int
The minimum size, in pixels, that a face is resized from to be considered valid
"""
retval = 0 if min_size == 0 else max(4, int(extract_size * (min_size / 100.)))
logger.debug("Extract size: %s, min percentage size: %s, min_size: %s",
extract_size, min_size, retval)
return retval
def _get_count(self) -> int | None:
""" If the alignments file has been run through the manual tool, then it will hold video
meta information, meaning that the count of frames in the alignment file can be relied
on to be accurate.
Returns
-------
int or ``None``
For video input which contain video meta-data in the alignments file then the count of
frames is returned. In all other cases ``None`` is returned
"""
meta = self._alignments.video_meta_data
has_meta = all(val is not None for val in meta.values())
if has_meta:
retval: int | None = len(T.cast(dict[str, list[int] | list[float]], meta["pts_time"]))
else:
retval = None
logger.debug("Frame count from alignments file: (has_meta: %s, %s", has_meta, retval)
return retval
def process(self) -> None:
""" Run the re-extraction from Alignments file process"""
logger.info("[EXTRACT FACES]") # Tidy up cli output
self._check_folder()
if self._is_legacy:
self._legacy_check()
self._saver = ImagesSaver(self._faces_dir, as_bytes=True)
if self._min_size > 0:
logger.info("Only selecting faces that have been resized from a minimum resolution "
"of %spx", self._min_size)
self._export_faces()
def _check_folder(self) -> None:
""" Check that the faces folder doesn't pre-exist and create. """
err = None
if not self._faces_dir:
err = "ERROR: Output faces folder not provided."
elif not os.path.isdir(self._faces_dir):
logger.debug("Creating folder: '%s'", self._faces_dir)
os.makedirs(self._faces_dir)
elif os.listdir(self._faces_dir):
err = f"ERROR: Output faces folder should be empty: '{self._faces_dir}'"
if err:
logger.error(err)
sys.exit(0)
logger.verbose("Creating output folder at '%s'", self._faces_dir) # type:ignore
def _legacy_check(self) -> None:
""" Check whether the alignments file was created with the legacy extraction method.
If so, force user to re-extract all faces if any options have been specified, otherwise
raise the appropriate warnings and set the legacy options.
"""
if self._min_size > 0 or self._arguments.extract_every_n != 1:
logger.warning("This alignments file was generated with the legacy extraction method.")
logger.warning("You should run this extraction job, but with 'min_size' set to 0 and "
"'extract-every-n' set to 1 to update the alignments file.")
logger.warning("You can then re-run this extraction job with your chosen options.")
sys.exit(0)
maskers = ["components", "extended"]
nn_masks = [mask for mask in list(self._alignments.mask_summary) if mask not in maskers]
logtype = logger.warning if nn_masks else logger.info
logtype("This alignments file was created with the legacy extraction method and will be "
"updated.")
logtype("Faces will be extracted using the new method and landmarks based masks will be "
"regenerated.")
if nn_masks:
logtype("However, the NN based masks '%s' will be cropped to the legacy extraction "
"method, so you may want to run the mask tool to regenerate these "
"masks.", "', '".join(nn_masks))
self._mask_pipeline = Extractor(None, None, maskers, multiprocess=True)
self._mask_pipeline.launch()
# Update alignments versioning
self._alignments._io._version = _VERSION # pylint:disable=protected-access
def _export_faces(self) -> None:
""" Export the faces to the output folder. """
extracted_faces = 0
skip_list = self._set_skip_list()
count = self._frames.count if skip_list is None else self._frames.count - len(skip_list)
for filename, image in tqdm(self._frames.stream(skip_list=skip_list),
total=count, desc="Saving extracted faces",
leave=False):
frame_name = os.path.basename(filename)
if not self._alignments.frame_exists(frame_name):
logger.verbose("Skipping '%s' - Alignments not found", frame_name) # type:ignore
continue
extracted_faces += self._output_faces(frame_name, image)
if self._is_legacy and extracted_faces != 0 and self._min_size == 0:
self._alignments.save()
logger.info("%s face(s) extracted", extracted_faces)
def _set_skip_list(self) -> list[int] | None:
""" Set the indices for frames that should be skipped based on the `extract_every_n`
command line option.
Returns
-------
list or ``None``
A list of indices to be skipped if extract_every_n is not `1` otherwise
returns ``None``
"""
skip_num = self._arguments.extract_every_n
if skip_num == 1:
logger.debug("Not skipping any frames")
return None
skip_list = []
for idx, item in enumerate(T.cast(list[dict[str, str]], self._frames.file_list_sorted)):
if idx % skip_num != 0:
logger.trace("Adding image '%s' to skip list due to " # type:ignore
"extract_every_n = %s", item["frame_fullname"], skip_num)
skip_list.append(idx)
logger.debug("Adding skip list: %s", skip_list)
return skip_list
def _output_faces(self, filename: str, image: np.ndarray) -> int:
""" For each frame save out the faces
Parameters
----------
filename: str
The filename (without the full path) of the current frame
image: :class:`numpy.ndarray`
The full frame that faces are to be extracted from
Returns
-------
int
The total number of faces that have been extracted
"""
logger.trace("Outputting frame: %s", filename) # type:ignore
face_count = 0
frame_name = os.path.splitext(filename)[0]
faces = self._select_valid_faces(filename, image)
assert self._saver is not None
if not faces:
return face_count
if self._is_legacy:
faces = self._process_legacy(filename, image, faces)
for idx, face in enumerate(faces):
output = f"{frame_name}_{idx}.png"
meta: PNGHeaderDict = {
"alignments": face.to_png_meta(),
"source": {"alignments_version": self._alignments.version,
"original_filename": output,
"face_index": idx,
"source_filename": filename,
"source_is_video": self._frames.is_video,
"source_frame_dims": T.cast(tuple[int, int], image.shape[:2])}}
assert face.aligned.face is not None
self._saver.save(output, encode_image(face.aligned.face, ".png", metadata=meta))
if self._min_size == 0 and self._is_legacy:
face.thumbnail = generate_thumbnail(face.aligned.face, size=96, quality=60)
self._alignments.data[filename]["faces"][idx] = face.to_alignment()
face_count += 1
self._saver.close()
return face_count
def _select_valid_faces(self, frame: str, image: np.ndarray) -> list[DetectedFace]:
""" Return the aligned faces from a frame that meet the selection criteria,
Parameters
----------
frame: str
The filename (without the full path) of the current frame
image: :class:`numpy.ndarray`
The full frame that faces are to be extracted from
Returns
-------
list:
List of valid :class:`lib,align.DetectedFace` objects
"""
faces = self._extracted_faces.get_faces_in_frame(frame, image=image)
if self._min_size == 0:
valid_faces = faces
else:
sizes = self._extracted_faces.get_roi_size_for_frame(frame)
valid_faces = [faces[idx] for idx, size in enumerate(sizes)
if size >= self._min_size]
logger.trace("frame: '%s', total_faces: %s, valid_faces: %s", # type:ignore
frame, len(faces), len(valid_faces))
return valid_faces
def _process_legacy(self,
filename: str,
image: np.ndarray,
detected_faces: list[DetectedFace]) -> list[DetectedFace]:
""" Process legacy face extractions to new extraction method.
Updates stored masks to new extract size
Parameters
----------
filename: str
The current frame filename
image: :class:`numpy.ndarray`
The current image the contains the faces
detected_faces: list
list of :class:`lib.align.DetectedFace` objects for the current frame
Returns
-------
list
The updated list of :class:`lib.align.DetectedFace` objects for the current frame
"""
# Update landmarks based masks for face centering
assert self._mask_pipeline is not None
mask_item = ExtractMedia(filename, image, detected_faces=detected_faces)
self._mask_pipeline.input_queue.put(mask_item)
faces = next(self._mask_pipeline.detected_faces()).detected_faces
# Pad and shift Neural Network based masks to face centering
for face in faces:
self._pad_legacy_masks(face)
return faces
@classmethod
def _pad_legacy_masks(cls, detected_face: DetectedFace) -> None:
""" Recenter legacy Neural Network based masks from legacy centering to face centering
and pad accordingly.
Update the masks back into the detected face objects.
Parameters
----------
detected_face: :class:`lib.align.DetectedFace`
The detected face to update the masks for
"""
offset = detected_face.aligned.pose.offset["face"]
for name, mask in detected_face.mask.items(): # Re-center mask and pad to face size
if name in ("components", "extended"):
continue
old_mask = mask.mask.astype("float32") / 255.0
size = old_mask.shape[0]
new_size = int(size + (size * _EXTRACT_RATIOS["face"]) / 2)
shift = np.rint(offset * (size - (size * _EXTRACT_RATIOS["face"]))).astype("int32")
pos = np.array([(new_size // 2 - size // 2) - shift[1],
(new_size // 2) + (size // 2) - shift[1],
(new_size // 2 - size // 2) - shift[0],
(new_size // 2) + (size // 2) - shift[0]])
bounds = np.array([max(0, pos[0]), min(new_size, pos[1]),
max(0, pos[2]), min(new_size, pos[3])])
slice_in = [slice(0 - (pos[0] - bounds[0]), size - (pos[1] - bounds[1])),
slice(0 - (pos[2] - bounds[2]), size - (pos[3] - bounds[3]))]
slice_out = [slice(bounds[0], bounds[1]), slice(bounds[2], bounds[3])]
new_mask = np.zeros((new_size, new_size, 1), dtype="float32")
new_mask[slice_out[0], slice_out[1], :] = old_mask[slice_in[0], slice_in[1], :]
mask.replace_mask(new_mask)
# Get the affine matrix from recently generated components mask
# pylint:disable=protected-access
mask._affine_matrix = detected_face.mask["components"].affine_matrix