mirror of
https://github.com/deepfakes/faceswap
synced 2025-06-07 10:43:27 -04:00
519 lines
20 KiB
Python
519 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
""" Output processing for faceswap's mask tool """
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import sys
|
|
import typing as T
|
|
from argparse import Namespace
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
|
|
from lib.align import AlignedFace
|
|
from lib.align.alignments import AlignmentDict
|
|
|
|
from lib.image import ImagesSaver, read_image_meta_batch
|
|
from lib.utils import get_folder
|
|
from scripts.fsmedia import Alignments as ExtractAlignments
|
|
|
|
if T.TYPE_CHECKING:
|
|
from lib.align import Alignments, DetectedFace
|
|
from lib.align.aligned_face import CenteringType
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class Output:
|
|
""" Handles outputting of masks for preview/editting to disk
|
|
|
|
Parameters
|
|
----------
|
|
arguments: :class:`argparse.Namespace`
|
|
The command line arguments that the mask tool was called with
|
|
alignments: :class:~`lib.align.alignments.Alignments` | None
|
|
The alignments file object (or ``None`` if not provided and input is faces)
|
|
file_list: list[str]
|
|
Full file list for the loader. Used for extracting alignments from faces
|
|
"""
|
|
def __init__(self, arguments: Namespace,
|
|
alignments: Alignments | None,
|
|
file_list: list[str]) -> None:
|
|
logger.debug("Initializing %s (arguments: %s, alignments: %s, file_list: %s)",
|
|
self.__class__.__name__, arguments, alignments, len(file_list))
|
|
|
|
self._blur_kernel: int = arguments.blur_kernel
|
|
self._threshold: int = arguments.threshold
|
|
self._type: T.Literal["combined", "masked", "mask"] = arguments.output_type
|
|
self._full_frame: bool = arguments.full_frame
|
|
self._mask_type = arguments.masker
|
|
self._centering: CenteringType = arguments.centering
|
|
|
|
self._input_is_faces = arguments.input_type == "faces"
|
|
self._saver = self._set_saver(arguments.output, arguments.processing)
|
|
self._alignments = self._get_alignments(alignments, file_list)
|
|
|
|
self._full_frame_cache: dict[str, list[tuple[int, DetectedFace]]] = {}
|
|
|
|
logger.debug("Initialized %s", self.__class__.__name__)
|
|
|
|
@property
|
|
def should_save(self) -> bool:
|
|
"""bool: ``True`` if mask images should be output otherwise ``False`` """
|
|
return self._saver is not None
|
|
|
|
def _get_subfolder(self, output: str) -> str:
|
|
""" Obtain a subfolder within the output folder to save the output based on selected
|
|
output options.
|
|
|
|
Parameters
|
|
----------
|
|
output: str
|
|
Full path to the root output folder
|
|
|
|
Returns
|
|
-------
|
|
str:
|
|
The full path to where masks should be saved
|
|
"""
|
|
out_type = "frame" if self._full_frame else "face"
|
|
retval = os.path.join(output,
|
|
f"{self._mask_type}_{out_type}_{self._type}")
|
|
logger.info("Saving masks to '%s'", retval)
|
|
return retval
|
|
|
|
def _set_saver(self, output: str | None, processing: str) -> ImagesSaver | None:
|
|
""" set the saver in a background thread
|
|
|
|
Parameters
|
|
----------
|
|
output: str
|
|
Full path to the root output folder if provided
|
|
processing: str
|
|
The processing that has been selected
|
|
|
|
Returns
|
|
-------
|
|
``None`` or :class:`lib.image.ImagesSaver`:
|
|
If output is requested, returns a :class:`lib.image.ImagesSaver` otherwise
|
|
returns ``None``
|
|
"""
|
|
if output is None or not output:
|
|
if processing == "output":
|
|
logger.error("Processing set as 'output' but no output folder provided.")
|
|
sys.exit(0)
|
|
logger.debug("No output provided. Not creating saver")
|
|
return None
|
|
output_dir = get_folder(self._get_subfolder(output), make_folder=True)
|
|
retval = ImagesSaver(output_dir)
|
|
logger.debug(retval)
|
|
return retval
|
|
|
|
def _get_alignments(self,
|
|
alignments: Alignments | None,
|
|
file_list: list[str]) -> Alignments | None:
|
|
""" Obtain the alignments file. If input is faces and full frame output is requested then
|
|
the file needs to be generated from the input faces, if not provided
|
|
|
|
Parameters
|
|
----------
|
|
alignments: :class:~`lib.align.alignments.Alignments` | None
|
|
The alignments file object (or ``None`` if not provided and input is faces)
|
|
file_list: list[str]
|
|
Full paths to ihe mask tool input files
|
|
|
|
Returns
|
|
-------
|
|
:class:~`lib.align.alignments.Alignments` | None
|
|
The alignments file if provided and/or is required otherwise ``None``
|
|
"""
|
|
if alignments is not None or not self._full_frame:
|
|
return alignments
|
|
logger.debug("Generating alignments from faces")
|
|
|
|
data = T.cast(dict[str, AlignmentDict], {})
|
|
for _, meta in tqdm(read_image_meta_batch(file_list),
|
|
desc="Reading alignments from faces",
|
|
total=len(file_list),
|
|
leave=False):
|
|
fname = meta["itxt"]["source"]["source_filename"]
|
|
aln = meta["itxt"]["alignments"]
|
|
data.setdefault(fname, {}).setdefault("faces", # type:ignore[typeddict-item]
|
|
[]).append(aln)
|
|
|
|
dummy_args = Namespace(alignments_path="/dummy/alignments.fsa")
|
|
retval = ExtractAlignments(dummy_args, is_extract=True)
|
|
retval.update_from_dict(data)
|
|
return retval
|
|
|
|
def _get_background_frame(self, detected_faces: list[DetectedFace], frame_dims: tuple[int, int]
|
|
) -> np.ndarray:
|
|
""" Obtain the background image when final output is in full frame format. There will only
|
|
ever be one background, even when there are multiple faces
|
|
|
|
The output image will depend on the requested output type and whether the input is faces
|
|
or frames
|
|
|
|
Parameters
|
|
----------
|
|
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
|
|
Detected face objects for the output image
|
|
frame_dims: tuple[int, int]
|
|
The size of the original frame
|
|
|
|
Returns
|
|
-------
|
|
:class:`numpy.ndarray`
|
|
The full frame background image for applying masks to
|
|
"""
|
|
if self._type == "mask":
|
|
return np.zeros(frame_dims, dtype="uint8")
|
|
|
|
if not self._input_is_faces: # Frame is in the detected faces object
|
|
assert detected_faces[0].image is not None
|
|
return np.ascontiguousarray(detected_faces[0].image)
|
|
|
|
# Outputting to frames, but input is faces. Apply the face patches to an empty canvas
|
|
retval = np.zeros((*frame_dims, 3), dtype="uint8")
|
|
for detected_face in detected_faces:
|
|
assert detected_face.image is not None
|
|
face = AlignedFace(detected_face.landmarks_xy,
|
|
image=detected_face.image,
|
|
centering="head",
|
|
size=detected_face.image.shape[0],
|
|
is_aligned=True)
|
|
border = cv2.BORDER_TRANSPARENT if len(detected_faces) > 1 else cv2.BORDER_CONSTANT
|
|
assert face.face is not None
|
|
cv2.warpAffine(face.face,
|
|
face.adjusted_matrix,
|
|
tuple(reversed(frame_dims)),
|
|
retval,
|
|
flags=cv2.WARP_INVERSE_MAP | face.interpolators[1],
|
|
borderMode=border)
|
|
return retval
|
|
|
|
def _get_background_face(self,
|
|
detected_face: DetectedFace,
|
|
mask_centering: CenteringType,
|
|
mask_size: int) -> np.ndarray:
|
|
""" Obtain the background images when the output is faces
|
|
|
|
The output image will depend on the requested output type and whether the input is faces
|
|
or frames
|
|
|
|
Parameters
|
|
----------
|
|
detected_face: :class:`~lib.align.detected_face.DetectedFace`
|
|
Detected face object for the output image
|
|
mask_centering: Literal["face", "head", "legacy"]
|
|
The centering of the stored mask
|
|
mask_size: int
|
|
The pixel size of the stored mask
|
|
|
|
Returns
|
|
-------
|
|
list[]:class:`numpy.ndarray`]
|
|
The face background image for applying masks to for each detected face object
|
|
"""
|
|
if self._type == "mask":
|
|
return np.zeros((mask_size, mask_size), dtype="uint8")
|
|
|
|
assert detected_face.image is not None
|
|
|
|
if self._input_is_faces:
|
|
retval = AlignedFace(detected_face.landmarks_xy,
|
|
image=detected_face.image,
|
|
centering=mask_centering,
|
|
size=mask_size,
|
|
is_aligned=True).face
|
|
else:
|
|
centering: CenteringType = ("legacy" if self._alignments is not None and
|
|
self._alignments.version == 1.0
|
|
else mask_centering)
|
|
detected_face.load_aligned(detected_face.image,
|
|
size=mask_size,
|
|
centering=centering,
|
|
force=True)
|
|
retval = detected_face.aligned.face
|
|
|
|
assert retval is not None
|
|
return retval
|
|
|
|
def _get_background(self,
|
|
detected_faces: list[DetectedFace],
|
|
frame_dims: tuple[int, int],
|
|
mask_centering: CenteringType,
|
|
mask_size: int) -> np.ndarray:
|
|
""" Obtain the background image that the final outut will be placed on
|
|
|
|
Parameters
|
|
----------
|
|
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
|
|
Detected face objects for the output image
|
|
frame_dims: tuple[int, int]
|
|
The size of the original frame
|
|
mask_centering: Literal["face", "head", "legacy"]
|
|
The centering of the stored mask
|
|
mask_size: int
|
|
The pixel size of the stored mask
|
|
|
|
Returns
|
|
-------
|
|
:class:`numpy.ndarray`
|
|
The background image for the mask output
|
|
"""
|
|
if self._full_frame:
|
|
retval = self._get_background_frame(detected_faces, frame_dims)
|
|
else:
|
|
assert len(detected_faces) == 1 # If outputting faces, we should only receive 1 face
|
|
retval = self._get_background_face(detected_faces[0], mask_centering, mask_size)
|
|
|
|
logger.trace("Background image (size: %s, dtype: %s)", # type:ignore[attr-defined]
|
|
retval.shape, retval.dtype)
|
|
return retval
|
|
|
|
def _get_mask(self,
|
|
detected_faces: list[DetectedFace],
|
|
mask_type: str,
|
|
mask_dims: tuple[int, int]) -> np.ndarray:
|
|
""" Generate the mask to be applied to the final output frame
|
|
|
|
Parameters
|
|
----------
|
|
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
|
|
Detected face objects to generate the masks from
|
|
mask_type: str
|
|
The mask-type to use
|
|
mask_dims : tuple[int, int]
|
|
The size of the mask to output
|
|
|
|
Returns
|
|
-------
|
|
:class:`numpy.ndarray`
|
|
The final mask to apply to the output image
|
|
"""
|
|
retval = np.zeros(mask_dims, dtype="uint8")
|
|
for face in detected_faces:
|
|
mask_object = face.mask[mask_type]
|
|
mask_object.set_blur_and_threshold(blur_kernel=self._blur_kernel,
|
|
threshold=self._threshold)
|
|
if self._full_frame:
|
|
mask = mask_object.get_full_frame_mask(*reversed(mask_dims))
|
|
else:
|
|
mask = mask_object.mask[..., 0]
|
|
np.maximum(retval, mask, out=retval)
|
|
logger.trace("Final mask (shape: %s, dtype: %s)", # type:ignore[attr-defined]
|
|
retval.shape, retval.dtype)
|
|
return retval
|
|
|
|
def _build_output_image(self, background: np.ndarray, mask: np.ndarray) -> np.ndarray:
|
|
""" Collate the mask and images for the final output image, depending on selected output
|
|
type
|
|
|
|
Parameters
|
|
----------
|
|
background: :class:`numpy.ndarray`
|
|
The image that the mask will be applied to
|
|
mask: :class:`numpy.ndarray`
|
|
The mask to output
|
|
|
|
Returns
|
|
-------
|
|
:class:`numpy.ndarray`
|
|
The final output image
|
|
"""
|
|
if self._type == "mask":
|
|
return mask
|
|
|
|
mask = mask[..., None]
|
|
if self._type == "masked":
|
|
return np.concatenate([background, mask], axis=-1)
|
|
|
|
height, width = background.shape[:2]
|
|
masked = (background.astype("float32") * mask.astype("float32") / 255.).astype("uint8")
|
|
mask = np.tile(mask, 3)
|
|
for img in (background, masked, mask):
|
|
cv2.rectangle(img, (0, 0), (width - 1, height - 1), (255, 255, 255), 1)
|
|
axis = 0 if background.shape[0] < background.shape[1] else 1
|
|
retval = np.concatenate((background, masked, mask), axis=axis)
|
|
|
|
return retval
|
|
|
|
def _create_image(self,
|
|
detected_faces: list[DetectedFace],
|
|
mask_type: str,
|
|
frame_dims: tuple[int, int] | None) -> np.ndarray:
|
|
""" Create a mask preview image for saving out to disk
|
|
|
|
Parameters
|
|
----------
|
|
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
|
|
Detected face objects for the output image
|
|
mask_type: str
|
|
The mask_type to process
|
|
frame_dims: tuple[int, int] | None
|
|
The size of the original frame, if input is faces otherwise ``None``
|
|
|
|
Returns
|
|
-------
|
|
:class:`numpy.ndarray`:
|
|
A preview image depending on the output type in one of the following forms:
|
|
- Containing 3 sub images: The original face, the masked face and the mask
|
|
- The mask only
|
|
- The masked face
|
|
"""
|
|
assert detected_faces[0].image is not None
|
|
dims = T.cast(tuple[int, int],
|
|
frame_dims if self._input_is_faces else detected_faces[0].image.shape[:2])
|
|
assert dims is not None and len(dims) == 2
|
|
|
|
mask_centering = detected_faces[0].mask[mask_type].stored_centering
|
|
mask_size = detected_faces[0].mask[mask_type].stored_size
|
|
|
|
background = self._get_background(detected_faces, dims, mask_centering, mask_size)
|
|
mask = self._get_mask(detected_faces,
|
|
mask_type,
|
|
dims if self._full_frame else (mask_size, mask_size))
|
|
retval = self._build_output_image(background, mask)
|
|
|
|
logger.trace("Output image (shape: %s, dtype: %s)", # type:ignore[attr-defined]
|
|
retval.shape, retval.dtype)
|
|
return retval
|
|
|
|
def _handle_cache(self,
|
|
frame: str,
|
|
idx: int,
|
|
detected_face: DetectedFace) -> list[tuple[int, DetectedFace]]:
|
|
""" For full frame output, cache any faces until all detected faces have been seen. For
|
|
face output, just return the detected_face object inside a list
|
|
|
|
Parameters
|
|
----------
|
|
frame: str
|
|
The frame name in the alignments file
|
|
idx: int
|
|
The index of the face for this frame in the alignments file
|
|
detected_face: :class:`~lib.align.detected_face.DetectedFace`
|
|
A detected_face object for a face
|
|
|
|
Returns
|
|
-------
|
|
list[tuple[int, :class:`~lib.align.detected_face.DetectedFace`]]
|
|
Face index and detected face objects to be processed for this output, if any
|
|
"""
|
|
if not self._full_frame:
|
|
return [(idx, detected_face)]
|
|
|
|
assert self._alignments is not None
|
|
faces_in_frame = self._alignments.count_faces_in_frame(frame)
|
|
if faces_in_frame == 1:
|
|
return [(idx, detected_face)]
|
|
|
|
self._full_frame_cache.setdefault(frame, []).append((idx, detected_face))
|
|
|
|
if len(self._full_frame_cache[frame]) != faces_in_frame:
|
|
logger.trace("Caching face for frame '%s'", frame) # type:ignore[attr-defined]
|
|
return []
|
|
|
|
retval = self._full_frame_cache.pop(frame)
|
|
logger.trace("Processing '%s' from cache: %s", frame, retval) # type:ignore[attr-defined]
|
|
return retval
|
|
|
|
def _get_mask_types(self,
|
|
frame: str,
|
|
detected_faces: list[tuple[int, DetectedFace]]) -> list[str]:
|
|
""" Get the mask type names for the select mask type. Remove any detected faces where
|
|
the selected mask does not exist
|
|
|
|
Parameters
|
|
----------
|
|
frame: str
|
|
The frame name in the alignments file
|
|
idx: int
|
|
The index of the face for this frame in the alignments file
|
|
detected_face: list[tuple[int, :class:`~lib.align.detected_face.DetectedFace`]
|
|
The face index and detected_face object for output
|
|
|
|
Returns
|
|
-------
|
|
list[str]
|
|
List of mask type names to be processed
|
|
"""
|
|
if self._mask_type == "bisenet-fp":
|
|
mask_types = [f"{self._mask_type}_{area}" for area in ("face", "head")]
|
|
else:
|
|
mask_types = [self._mask_type]
|
|
|
|
if self._mask_type == "custom":
|
|
mask_types.append(f"{self._mask_type}_{self._centering}")
|
|
|
|
final_masks = set()
|
|
for idx in reversed(range(len(detected_faces))):
|
|
face_idx, detected_face = detected_faces[idx]
|
|
if detected_face.mask is None or not any(mask in detected_face.mask
|
|
for mask in mask_types):
|
|
logger.warning("Mask type '%s' does not exist for frame '%s' index %s. Skipping",
|
|
self._mask_type, frame, face_idx)
|
|
del detected_faces[idx]
|
|
continue
|
|
final_masks.update([m for m in detected_face.mask if m in mask_types])
|
|
|
|
retval = list(final_masks)
|
|
logger.trace("Handling mask types: %s", retval) # type:ignore[attr-defined]
|
|
return retval
|
|
|
|
def save(self,
|
|
frame: str,
|
|
idx: int,
|
|
detected_face: DetectedFace,
|
|
frame_dims: tuple[int, int] | None = None) -> None:
|
|
""" Build the mask preview image and save
|
|
|
|
Parameters
|
|
----------
|
|
frame: str
|
|
The frame name in the alignments file
|
|
idx: int
|
|
The index of the face for this frame in the alignments file
|
|
detected_face: :class:`~lib.align.detected_face.DetectedFace`
|
|
A detected_face object for a face
|
|
frame_dims: tuple[int, int] | None, optional
|
|
The size of the original frame, if input is faces otherwise ``None``. Default: ``None``
|
|
"""
|
|
assert self._saver is not None
|
|
|
|
faces = self._handle_cache(frame, idx, detected_face)
|
|
if not faces:
|
|
return
|
|
|
|
mask_types = self._get_mask_types(frame, faces)
|
|
if not faces or not mask_types:
|
|
logger.debug("No valid faces/masks to process for '%s'", frame)
|
|
return
|
|
|
|
for mask_type in mask_types:
|
|
detected_faces = [f[1] for f in faces if mask_type in f[1].mask]
|
|
if not detected_face:
|
|
logger.warning("No '%s' masks to output for '%s'", mask_type, frame)
|
|
continue
|
|
if len(detected_faces) != len(faces):
|
|
logger.warning("Some '%s' masks are missing for '%s'", mask_type, frame)
|
|
|
|
image = self._create_image(detected_faces, mask_type, frame_dims)
|
|
filename = os.path.splitext(frame)[0]
|
|
if len(mask_types) > 1:
|
|
filename += f"_{mask_type}"
|
|
if not self._full_frame:
|
|
filename += f"_{idx}"
|
|
filename = os.path.join(self._saver.location, f"{filename}.png")
|
|
logger.trace("filename: '%s', image_shape: %s", filename, image.shape) # type: ignore
|
|
self._saver.save(filename, image)
|
|
|
|
def close(self) -> None:
|
|
""" Shut down the image saver if it is open """
|
|
if self._saver is None:
|
|
return
|
|
logger.debug("Shutting down saver")
|
|
self._saver.close()
|