1
0
Fork 0
mirror of https://github.com/deepfakes/faceswap synced 2025-06-07 10:43:27 -04:00
faceswap/tools/mask/mask_output.py

519 lines
20 KiB
Python

#!/usr/bin/env python3
""" Output processing for faceswap's mask tool """
from __future__ import annotations
import logging
import os
import sys
import typing as T
from argparse import Namespace
import cv2
import numpy as np
from tqdm import tqdm
from lib.align import AlignedFace
from lib.align.alignments import AlignmentDict
from lib.image import ImagesSaver, read_image_meta_batch
from lib.utils import get_folder
from scripts.fsmedia import Alignments as ExtractAlignments
if T.TYPE_CHECKING:
from lib.align import Alignments, DetectedFace
from lib.align.aligned_face import CenteringType
logger = logging.getLogger(__name__)
class Output:
""" Handles outputting of masks for preview/editting to disk
Parameters
----------
arguments: :class:`argparse.Namespace`
The command line arguments that the mask tool was called with
alignments: :class:~`lib.align.alignments.Alignments` | None
The alignments file object (or ``None`` if not provided and input is faces)
file_list: list[str]
Full file list for the loader. Used for extracting alignments from faces
"""
def __init__(self, arguments: Namespace,
alignments: Alignments | None,
file_list: list[str]) -> None:
logger.debug("Initializing %s (arguments: %s, alignments: %s, file_list: %s)",
self.__class__.__name__, arguments, alignments, len(file_list))
self._blur_kernel: int = arguments.blur_kernel
self._threshold: int = arguments.threshold
self._type: T.Literal["combined", "masked", "mask"] = arguments.output_type
self._full_frame: bool = arguments.full_frame
self._mask_type = arguments.masker
self._centering: CenteringType = arguments.centering
self._input_is_faces = arguments.input_type == "faces"
self._saver = self._set_saver(arguments.output, arguments.processing)
self._alignments = self._get_alignments(alignments, file_list)
self._full_frame_cache: dict[str, list[tuple[int, DetectedFace]]] = {}
logger.debug("Initialized %s", self.__class__.__name__)
@property
def should_save(self) -> bool:
"""bool: ``True`` if mask images should be output otherwise ``False`` """
return self._saver is not None
def _get_subfolder(self, output: str) -> str:
""" Obtain a subfolder within the output folder to save the output based on selected
output options.
Parameters
----------
output: str
Full path to the root output folder
Returns
-------
str:
The full path to where masks should be saved
"""
out_type = "frame" if self._full_frame else "face"
retval = os.path.join(output,
f"{self._mask_type}_{out_type}_{self._type}")
logger.info("Saving masks to '%s'", retval)
return retval
def _set_saver(self, output: str | None, processing: str) -> ImagesSaver | None:
""" set the saver in a background thread
Parameters
----------
output: str
Full path to the root output folder if provided
processing: str
The processing that has been selected
Returns
-------
``None`` or :class:`lib.image.ImagesSaver`:
If output is requested, returns a :class:`lib.image.ImagesSaver` otherwise
returns ``None``
"""
if output is None or not output:
if processing == "output":
logger.error("Processing set as 'output' but no output folder provided.")
sys.exit(0)
logger.debug("No output provided. Not creating saver")
return None
output_dir = get_folder(self._get_subfolder(output), make_folder=True)
retval = ImagesSaver(output_dir)
logger.debug(retval)
return retval
def _get_alignments(self,
alignments: Alignments | None,
file_list: list[str]) -> Alignments | None:
""" Obtain the alignments file. If input is faces and full frame output is requested then
the file needs to be generated from the input faces, if not provided
Parameters
----------
alignments: :class:~`lib.align.alignments.Alignments` | None
The alignments file object (or ``None`` if not provided and input is faces)
file_list: list[str]
Full paths to ihe mask tool input files
Returns
-------
:class:~`lib.align.alignments.Alignments` | None
The alignments file if provided and/or is required otherwise ``None``
"""
if alignments is not None or not self._full_frame:
return alignments
logger.debug("Generating alignments from faces")
data = T.cast(dict[str, AlignmentDict], {})
for _, meta in tqdm(read_image_meta_batch(file_list),
desc="Reading alignments from faces",
total=len(file_list),
leave=False):
fname = meta["itxt"]["source"]["source_filename"]
aln = meta["itxt"]["alignments"]
data.setdefault(fname, {}).setdefault("faces", # type:ignore[typeddict-item]
[]).append(aln)
dummy_args = Namespace(alignments_path="/dummy/alignments.fsa")
retval = ExtractAlignments(dummy_args, is_extract=True)
retval.update_from_dict(data)
return retval
def _get_background_frame(self, detected_faces: list[DetectedFace], frame_dims: tuple[int, int]
) -> np.ndarray:
""" Obtain the background image when final output is in full frame format. There will only
ever be one background, even when there are multiple faces
The output image will depend on the requested output type and whether the input is faces
or frames
Parameters
----------
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
Detected face objects for the output image
frame_dims: tuple[int, int]
The size of the original frame
Returns
-------
:class:`numpy.ndarray`
The full frame background image for applying masks to
"""
if self._type == "mask":
return np.zeros(frame_dims, dtype="uint8")
if not self._input_is_faces: # Frame is in the detected faces object
assert detected_faces[0].image is not None
return np.ascontiguousarray(detected_faces[0].image)
# Outputting to frames, but input is faces. Apply the face patches to an empty canvas
retval = np.zeros((*frame_dims, 3), dtype="uint8")
for detected_face in detected_faces:
assert detected_face.image is not None
face = AlignedFace(detected_face.landmarks_xy,
image=detected_face.image,
centering="head",
size=detected_face.image.shape[0],
is_aligned=True)
border = cv2.BORDER_TRANSPARENT if len(detected_faces) > 1 else cv2.BORDER_CONSTANT
assert face.face is not None
cv2.warpAffine(face.face,
face.adjusted_matrix,
tuple(reversed(frame_dims)),
retval,
flags=cv2.WARP_INVERSE_MAP | face.interpolators[1],
borderMode=border)
return retval
def _get_background_face(self,
detected_face: DetectedFace,
mask_centering: CenteringType,
mask_size: int) -> np.ndarray:
""" Obtain the background images when the output is faces
The output image will depend on the requested output type and whether the input is faces
or frames
Parameters
----------
detected_face: :class:`~lib.align.detected_face.DetectedFace`
Detected face object for the output image
mask_centering: Literal["face", "head", "legacy"]
The centering of the stored mask
mask_size: int
The pixel size of the stored mask
Returns
-------
list[]:class:`numpy.ndarray`]
The face background image for applying masks to for each detected face object
"""
if self._type == "mask":
return np.zeros((mask_size, mask_size), dtype="uint8")
assert detected_face.image is not None
if self._input_is_faces:
retval = AlignedFace(detected_face.landmarks_xy,
image=detected_face.image,
centering=mask_centering,
size=mask_size,
is_aligned=True).face
else:
centering: CenteringType = ("legacy" if self._alignments is not None and
self._alignments.version == 1.0
else mask_centering)
detected_face.load_aligned(detected_face.image,
size=mask_size,
centering=centering,
force=True)
retval = detected_face.aligned.face
assert retval is not None
return retval
def _get_background(self,
detected_faces: list[DetectedFace],
frame_dims: tuple[int, int],
mask_centering: CenteringType,
mask_size: int) -> np.ndarray:
""" Obtain the background image that the final outut will be placed on
Parameters
----------
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
Detected face objects for the output image
frame_dims: tuple[int, int]
The size of the original frame
mask_centering: Literal["face", "head", "legacy"]
The centering of the stored mask
mask_size: int
The pixel size of the stored mask
Returns
-------
:class:`numpy.ndarray`
The background image for the mask output
"""
if self._full_frame:
retval = self._get_background_frame(detected_faces, frame_dims)
else:
assert len(detected_faces) == 1 # If outputting faces, we should only receive 1 face
retval = self._get_background_face(detected_faces[0], mask_centering, mask_size)
logger.trace("Background image (size: %s, dtype: %s)", # type:ignore[attr-defined]
retval.shape, retval.dtype)
return retval
def _get_mask(self,
detected_faces: list[DetectedFace],
mask_type: str,
mask_dims: tuple[int, int]) -> np.ndarray:
""" Generate the mask to be applied to the final output frame
Parameters
----------
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
Detected face objects to generate the masks from
mask_type: str
The mask-type to use
mask_dims : tuple[int, int]
The size of the mask to output
Returns
-------
:class:`numpy.ndarray`
The final mask to apply to the output image
"""
retval = np.zeros(mask_dims, dtype="uint8")
for face in detected_faces:
mask_object = face.mask[mask_type]
mask_object.set_blur_and_threshold(blur_kernel=self._blur_kernel,
threshold=self._threshold)
if self._full_frame:
mask = mask_object.get_full_frame_mask(*reversed(mask_dims))
else:
mask = mask_object.mask[..., 0]
np.maximum(retval, mask, out=retval)
logger.trace("Final mask (shape: %s, dtype: %s)", # type:ignore[attr-defined]
retval.shape, retval.dtype)
return retval
def _build_output_image(self, background: np.ndarray, mask: np.ndarray) -> np.ndarray:
""" Collate the mask and images for the final output image, depending on selected output
type
Parameters
----------
background: :class:`numpy.ndarray`
The image that the mask will be applied to
mask: :class:`numpy.ndarray`
The mask to output
Returns
-------
:class:`numpy.ndarray`
The final output image
"""
if self._type == "mask":
return mask
mask = mask[..., None]
if self._type == "masked":
return np.concatenate([background, mask], axis=-1)
height, width = background.shape[:2]
masked = (background.astype("float32") * mask.astype("float32") / 255.).astype("uint8")
mask = np.tile(mask, 3)
for img in (background, masked, mask):
cv2.rectangle(img, (0, 0), (width - 1, height - 1), (255, 255, 255), 1)
axis = 0 if background.shape[0] < background.shape[1] else 1
retval = np.concatenate((background, masked, mask), axis=axis)
return retval
def _create_image(self,
detected_faces: list[DetectedFace],
mask_type: str,
frame_dims: tuple[int, int] | None) -> np.ndarray:
""" Create a mask preview image for saving out to disk
Parameters
----------
detected_faces: list[:class:`~lib.align.detected_face.DetectedFace`]
Detected face objects for the output image
mask_type: str
The mask_type to process
frame_dims: tuple[int, int] | None
The size of the original frame, if input is faces otherwise ``None``
Returns
-------
:class:`numpy.ndarray`:
A preview image depending on the output type in one of the following forms:
- Containing 3 sub images: The original face, the masked face and the mask
- The mask only
- The masked face
"""
assert detected_faces[0].image is not None
dims = T.cast(tuple[int, int],
frame_dims if self._input_is_faces else detected_faces[0].image.shape[:2])
assert dims is not None and len(dims) == 2
mask_centering = detected_faces[0].mask[mask_type].stored_centering
mask_size = detected_faces[0].mask[mask_type].stored_size
background = self._get_background(detected_faces, dims, mask_centering, mask_size)
mask = self._get_mask(detected_faces,
mask_type,
dims if self._full_frame else (mask_size, mask_size))
retval = self._build_output_image(background, mask)
logger.trace("Output image (shape: %s, dtype: %s)", # type:ignore[attr-defined]
retval.shape, retval.dtype)
return retval
def _handle_cache(self,
frame: str,
idx: int,
detected_face: DetectedFace) -> list[tuple[int, DetectedFace]]:
""" For full frame output, cache any faces until all detected faces have been seen. For
face output, just return the detected_face object inside a list
Parameters
----------
frame: str
The frame name in the alignments file
idx: int
The index of the face for this frame in the alignments file
detected_face: :class:`~lib.align.detected_face.DetectedFace`
A detected_face object for a face
Returns
-------
list[tuple[int, :class:`~lib.align.detected_face.DetectedFace`]]
Face index and detected face objects to be processed for this output, if any
"""
if not self._full_frame:
return [(idx, detected_face)]
assert self._alignments is not None
faces_in_frame = self._alignments.count_faces_in_frame(frame)
if faces_in_frame == 1:
return [(idx, detected_face)]
self._full_frame_cache.setdefault(frame, []).append((idx, detected_face))
if len(self._full_frame_cache[frame]) != faces_in_frame:
logger.trace("Caching face for frame '%s'", frame) # type:ignore[attr-defined]
return []
retval = self._full_frame_cache.pop(frame)
logger.trace("Processing '%s' from cache: %s", frame, retval) # type:ignore[attr-defined]
return retval
def _get_mask_types(self,
frame: str,
detected_faces: list[tuple[int, DetectedFace]]) -> list[str]:
""" Get the mask type names for the select mask type. Remove any detected faces where
the selected mask does not exist
Parameters
----------
frame: str
The frame name in the alignments file
idx: int
The index of the face for this frame in the alignments file
detected_face: list[tuple[int, :class:`~lib.align.detected_face.DetectedFace`]
The face index and detected_face object for output
Returns
-------
list[str]
List of mask type names to be processed
"""
if self._mask_type == "bisenet-fp":
mask_types = [f"{self._mask_type}_{area}" for area in ("face", "head")]
else:
mask_types = [self._mask_type]
if self._mask_type == "custom":
mask_types.append(f"{self._mask_type}_{self._centering}")
final_masks = set()
for idx in reversed(range(len(detected_faces))):
face_idx, detected_face = detected_faces[idx]
if detected_face.mask is None or not any(mask in detected_face.mask
for mask in mask_types):
logger.warning("Mask type '%s' does not exist for frame '%s' index %s. Skipping",
self._mask_type, frame, face_idx)
del detected_faces[idx]
continue
final_masks.update([m for m in detected_face.mask if m in mask_types])
retval = list(final_masks)
logger.trace("Handling mask types: %s", retval) # type:ignore[attr-defined]
return retval
def save(self,
frame: str,
idx: int,
detected_face: DetectedFace,
frame_dims: tuple[int, int] | None = None) -> None:
""" Build the mask preview image and save
Parameters
----------
frame: str
The frame name in the alignments file
idx: int
The index of the face for this frame in the alignments file
detected_face: :class:`~lib.align.detected_face.DetectedFace`
A detected_face object for a face
frame_dims: tuple[int, int] | None, optional
The size of the original frame, if input is faces otherwise ``None``. Default: ``None``
"""
assert self._saver is not None
faces = self._handle_cache(frame, idx, detected_face)
if not faces:
return
mask_types = self._get_mask_types(frame, faces)
if not faces or not mask_types:
logger.debug("No valid faces/masks to process for '%s'", frame)
return
for mask_type in mask_types:
detected_faces = [f[1] for f in faces if mask_type in f[1].mask]
if not detected_face:
logger.warning("No '%s' masks to output for '%s'", mask_type, frame)
continue
if len(detected_faces) != len(faces):
logger.warning("Some '%s' masks are missing for '%s'", mask_type, frame)
image = self._create_image(detected_faces, mask_type, frame_dims)
filename = os.path.splitext(frame)[0]
if len(mask_types) > 1:
filename += f"_{mask_type}"
if not self._full_frame:
filename += f"_{idx}"
filename = os.path.join(self._saver.location, f"{filename}.png")
logger.trace("filename: '%s', image_shape: %s", filename, image.shape) # type: ignore
self._saver.save(filename, image)
def close(self) -> None:
""" Shut down the image saver if it is open """
if self._saver is None:
return
logger.debug("Shutting down saver")
self._saver.close()