#!/usr/bin python3 """ Face and landmarks detection for faceswap.py """ from __future__ import annotations import logging import os import typing as T from hashlib import sha1 from zlib import compress, decompress import numpy as np from lib.image import encode_image, read_image from lib.logger import parse_class_init from lib.utils import FaceswapError from .alignments import (Alignments, AlignmentFileDict, PNGHeaderAlignmentsDict, PNGHeaderDict, PNGHeaderSourceDict) from .aligned_face import AlignedFace from .aligned_mask import LandmarksMask, Mask from .constants import LANDMARK_PARTS if T.TYPE_CHECKING: from .aligned_face import CenteringType logger = logging.getLogger(__name__) class DetectedFace(): """ Detected face and landmark information Holds information about a detected face, it's location in a source image and the face's 68 point landmarks. Methods for aligning a face are also callable from here. Parameters ---------- image: numpy.ndarray, optional Original frame that holds this face. Optional (not required if just storing coordinates) left: int The left most point (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` width: int The width (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` top: int The top most point (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` height: int The height (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` landmarks_xy: list The 68 point landmarks as discovered in :mod:`plugins.extract.align`. Should be a ``list`` of 68 `(x, y)` ``tuples`` with each of the landmark co-ordinates. mask: dict The generated mask(s) for the face as generated in :mod:`plugins.extract.mask`. Must be a dict of {**name** (`str`): :class:`~lib.align.aligned_mask.Mask`}. Attributes ---------- image: numpy.ndarray, optional This is a generic image placeholder that should not be relied on to be holding a particular image. It may hold the source frame that holds the face, a cropped face or a scaled image depending on the method using this object. left: int The left most point (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` width: int The width (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` top: int The top most point (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` height: int The height (in pixels) of the face's bounding box as discovered in :mod:`plugins.extract.detect` landmarks_xy: list The 68 point landmarks as discovered in :mod:`plugins.extract.align`. mask: dict The generated mask(s) for the face as generated in :mod:`plugins.extract.mask`. Is a dict of {**name** (`str`): :class:`~lib.align.aligned_mask.Mask`}. """ def __init__(self, image: np.ndarray | None = None, left: int | None = None, width: int | None = None, top: int | None = None, height: int | None = None, landmarks_xy: np.ndarray | None = None, mask: dict[str, Mask] | None = None, filename: str | None = None) -> None: logger.trace(parse_class_init(locals())) # type:ignore[attr-defined] self.image = image self.left = left self.width = width self.top = top self.height = height self._landmarks_xy = landmarks_xy self._identity: dict[str, np.ndarray] = {} self.thumbnail: np.ndarray | None = None self.mask = {} if mask is None else mask self._training_masks: tuple[bytes, tuple[int, int, int]] | None = None self._aligned: AlignedFace | None = None logger.trace("Initialized %s", self.__class__.__name__) # type:ignore[attr-defined] @property def aligned(self) -> AlignedFace: """ The aligned face connected to this detected face. """ assert self._aligned is not None return self._aligned @property def landmarks_xy(self) -> np.ndarray: """ The aligned face connected to this detected face. """ assert self._landmarks_xy is not None return self._landmarks_xy @property def right(self) -> int: """int: Right point (in pixels) of face detection bounding box within the parent image """ assert self.left is not None and self.width is not None return self.left + self.width @property def bottom(self) -> int: """int: Bottom point (in pixels) of face detection bounding box within the parent image """ assert self.top is not None and self.height is not None return self.top + self.height @property def identity(self) -> dict[str, np.ndarray]: """ dict: Identity mechanism as key, identity embedding as value. """ return self._identity def add_mask(self, name: str, mask: np.ndarray, affine_matrix: np.ndarray, interpolator: int, storage_size: int = 128, storage_centering: CenteringType = "face") -> None: """ Add a :class:`~lib.align.aligned_mask.Mask` to this detected face The mask should be the original output from :mod:`plugins.extract.mask` If a mask with this name already exists it will be overwritten by the given mask. Parameters ---------- name: str The name of the mask as defined by the :attr:`plugins.extract.mask._base.name` parameter. mask: numpy.ndarray The mask that is to be added as output from :mod:`plugins.extract.mask` It should be in the range 0.0 - 1.0 ideally with a ``dtype`` of ``float32`` affine_matrix: numpy.ndarray The transformation matrix required to transform the mask to the original frame. interpolator, int: The CV2 interpolator required to transform this mask to it's original frame. storage_size, int (optional): The size the mask is to be stored at. Default: 128 storage_centering, str (optional): The centering to store the mask at. One of `"legacy"`, `"face"`, `"head"`. Default: `"face"` """ logger.trace("name: '%s', mask shape: %s, affine_matrix: %s, " # type:ignore[attr-defined] "interpolator: %s, storage_size: %s, storage_centering: %s)", name, mask.shape, affine_matrix, interpolator, storage_size, storage_centering) fsmask = Mask(storage_size=storage_size, storage_centering=storage_centering) fsmask.add(mask, affine_matrix, interpolator) self.mask[name] = fsmask def add_landmarks_xy(self, landmarks: np.ndarray) -> None: """ Add landmarks to the detected face object. If landmarks alread exist, they will be overwritten. Parameters ---------- landmarks: :class:`numpy.ndarray` The 68 point face landmarks to add for the face """ logger.trace("landmarks shape: '%s'", landmarks.shape) # type:ignore[attr-defined] self._landmarks_xy = landmarks def add_identity(self, name: str, embedding: np.ndarray, ) -> None: """ Add an identity embedding to this detected face. If an identity already exists for the given :attr:`name` it will be overwritten Parameters ---------- name: str The name of the mechanism that calculated the identity embedding: numpy.ndarray The identity embedding """ logger.trace("name: '%s', embedding shape: %s", # type:ignore[attr-defined] name, embedding.shape) assert name == "vggface2" assert embedding.shape[0] == 512 self._identity[name] = embedding def clear_all_identities(self) -> None: """ Remove all stored identity embeddings """ self._identity = {} def get_landmark_mask(self, area: T.Literal["eye", "face", "mouth"], blur_kernel: int, dilation: float) -> np.ndarray: """ Add a :class:`L~lib.align.aligned_mask.LandmarksMask` to this detected face Landmark based masks are generated from face Aligned Face landmark points. An aligned face must be loaded. As the data is coming from the already aligned face, no further mask cropping is required. Parameters ---------- area: ["face", "mouth", "eye"] The type of mask to obtain. `face` is a full face mask the others are masks for those specific areas blur_kernel: int The size of the kernel for blurring the mask edges dilation: float The amount of dilation to apply to the mask. as a percentage of the mask size Returns ------- :class:`numpy.ndarray` The generated landmarks mask for the selected area Raises ------ FaceSwapError If the aligned face does not contain the correct landmarks to generate a landmark mask """ # TODO Face mask generation from landmarks logger.trace("area: %s, dilation: %s", area, dilation) # type:ignore[attr-defined] lm_type = self.aligned.landmark_type if lm_type not in LANDMARK_PARTS: raise FaceswapError(f"Landmark based masks cannot be created for {lm_type.name}") lm_parts = LANDMARK_PARTS[self.aligned.landmark_type] mapped = {"mouth": ["mouth_outer"], "eye": ["right_eye", "left_eye"]} if not all(part in lm_parts for parts in mapped.values() for part in parts): raise FaceswapError(f"Landmark based masks cannot be created for {lm_type.name}") areas = {key: [slice(*lm_parts[v][:2]) for v in val]for key, val in mapped.items()} points = [self.aligned.landmarks[zone] for zone in areas[area]] lmmask = LandmarksMask(points, storage_size=self.aligned.size, storage_centering=self.aligned.centering, dilation=dilation) lmmask.set_blur_and_threshold(blur_kernel=blur_kernel) lmmask.generate_mask( self.aligned.adjusted_matrix, self.aligned.interpolators[1]) return lmmask.mask def store_training_masks(self, masks: list[np.ndarray | None], delete_masks: bool = False) -> None: """ Concatenate and compress the given training masks and store for retrieval. Parameters ---------- masks: list A list of training mask. Must be all be uint-8 3D arrays of the same size in 0-255 range delete_masks: bool, optional ``True`` to delete any of the :class:`~lib.align.aligned_mask.Mask` objects owned by this detected face. Use to free up unrequired memory usage. Default: ``False`` """ if delete_masks: del self.mask self.mask = {} valid = [msk for msk in masks if msk is not None] if not valid: return combined = np.concatenate(valid, axis=-1) self._training_masks = (compress(combined), combined.shape) def get_training_masks(self) -> np.ndarray | None: """ Obtain the decompressed combined training masks. Returns ------- :class:`numpy.ndarray` A 3D array containing the decompressed training masks as uint8 in 0-255 range if training masks are present otherwise ``None`` """ if not self._training_masks: return None return np.frombuffer(decompress(self._training_masks[0]), dtype="uint8").reshape(self._training_masks[1]) def to_alignment(self) -> AlignmentFileDict: """ Return the detected face formatted for an alignments file returns ------- alignment: dict The alignment dict will be returned with the keys ``x``, ``w``, ``y``, ``h``, ``landmarks_xy``, ``mask``. The additional key ``thumb`` will be provided if the detected face object contains a thumbnail. """ if (self.left is None or self.width is None or self.top is None or self.height is None): raise AssertionError("Some detected face variables have not been initialized") alignment = AlignmentFileDict(x=self.left, w=self.width, y=self.top, h=self.height, landmarks_xy=self.landmarks_xy, mask={name: mask.to_dict() for name, mask in self.mask.items()}, identity={k: v.tolist() for k, v in self._identity.items()}, thumb=self.thumbnail) logger.trace("Returning: %s", alignment) # type:ignore[attr-defined] return alignment def from_alignment(self, alignment: AlignmentFileDict, image: np.ndarray | None = None, with_thumb: bool = False) -> None: """ Set the attributes of this class from an alignments file and optionally load the face into the ``image`` attribute. Parameters ---------- alignment: dict A dictionary entry for a face from an alignments file containing the keys ``x``, ``w``, ``y``, ``h``, ``landmarks_xy``. Optionally the key ``thumb`` will be provided. This is for use in the manual tool and contains the compressed jpg thumbnail of the face to be allocated to :attr:`thumbnail. Optionally the key ``mask`` will be provided, but legacy alignments will not have this key. image: numpy.ndarray, optional If an image is passed in, then the ``image`` attribute will be set to the cropped face based on the passed in bounding box co-ordinates with_thumb: bool, optional Whether to load the jpg thumbnail into the detected face object, if provided. Default: ``False`` """ logger.trace("Creating from alignment: (alignment: %s," # type:ignore[attr-defined] " has_image: %s)", alignment, bool(image is not None)) self.left = alignment["x"] self.width = alignment["w"] self.top = alignment["y"] self.height = alignment["h"] landmarks = alignment["landmarks_xy"] if not isinstance(landmarks, np.ndarray): landmarks = np.array(landmarks, dtype="float32") self._identity = {T.cast(T.Literal["vggface2"], k): np.array(v, dtype="float32") for k, v in alignment.get("identity", {}).items()} self._landmarks_xy = landmarks.copy() if with_thumb: # Thumbnails currently only used for manual tool. Default to None self.thumbnail = alignment.get("thumb") # Manual tool and legacy alignments will not have a mask self._aligned = None if alignment.get("mask", None) is not None: self.mask = {} for name, mask_dict in alignment["mask"].items(): self.mask[name] = Mask() self.mask[name].from_dict(mask_dict) if image is not None and image.any(): self._image_to_face(image) logger.trace("Created from alignment: (left: %s, width: %s, " # type:ignore[attr-defined] "top: %s, height: %s, landmarks: %s, mask: %s)", self.left, self.width, self.top, self.height, self.landmarks_xy, self.mask) def to_png_meta(self) -> PNGHeaderAlignmentsDict: """ Return the detected face formatted for insertion into a png itxt header. returns: dict The alignments dict will be returned with the keys ``x``, ``w``, ``y``, ``h``, ``landmarks_xy`` and ``mask`` """ if (self.left is None or self.width is None or self.top is None or self.height is None): raise AssertionError("Some detected face variables have not been initialized") alignment = PNGHeaderAlignmentsDict( x=self.left, w=self.width, y=self.top, h=self.height, landmarks_xy=self.landmarks_xy.tolist(), mask={name: mask.to_png_meta() for name, mask in self.mask.items()}, identity={k: v.tolist() for k, v in self._identity.items()}) return alignment def from_png_meta(self, alignment: PNGHeaderAlignmentsDict) -> None: """ Set the attributes of this class from alignments stored in a png exif header. Parameters ---------- alignment: dict A dictionary entry for a face from alignments stored in a png exif header containing the keys ``x``, ``w``, ``y``, ``h``, ``landmarks_xy`` and ``mask`` """ self.left = alignment["x"] self.width = alignment["w"] self.top = alignment["y"] self.height = alignment["h"] self._landmarks_xy = np.array(alignment["landmarks_xy"], dtype="float32") self.mask = {} for name, mask_dict in alignment["mask"].items(): self.mask[name] = Mask() self.mask[name].from_dict(mask_dict) self._identity = {} for key, val in alignment.get("identity", {}).items(): assert key in ["vggface2"] self._identity[T.cast(T.Literal["vggface2"], key)] = np.array(val, dtype="float32") logger.trace("Created from png exif header: (left: %s, " # type:ignore[attr-defined] "width: %s, top: %s height: %s, landmarks: %s, mask: %s, identity: %s)", self.left, self.width, self.top, self.height, self.landmarks_xy, self.mask, {k: v.shape for k, v in self._identity.items()}) def _image_to_face(self, image: np.ndarray) -> None: """ set self.image to be the cropped face from detected bounding box """ logger.trace("Cropping face from image") # type:ignore[attr-defined] self.image = image[self.top: self.bottom, self.left: self.right] # <<< Aligned Face methods and properties >>> # def load_aligned(self, image: np.ndarray | None, size: int = 256, dtype: str | None = None, centering: CenteringType = "head", coverage_ratio: float = 1.0, force: bool = False, is_aligned: bool = False, is_legacy: bool = False) -> None: """ Align a face from a given image. Aligning a face is a relatively expensive task and is not required for all uses of the :class:`~lib.align.DetectedFace` object, so call this function explicitly to load an aligned face. This method plugs into :mod:`lib.align.AlignedFace` to perform face alignment based on this face's ``landmarks_xy``. If the face has already been aligned, then this function will return having performed no action. Parameters ---------- image: numpy.ndarray The image that contains the face to be aligned size: int The size of the output face in pixels dtype: str, optional Optionally set a ``dtype`` for the final face to be formatted in. Default: ``None`` centering: ["legacy", "face", "head"], optional The type of extracted face that should be loaded. "legacy" places the nose in the center of the image (the original method for aligning). "face" aligns for the nose to be in the center of the face (top to bottom) but the center of the skull for left to right. "head" aligns for the center of the skull (in 3D space) being the center of the extracted image, with the crop holding the full head. Default: `"head"` coverage_ratio: float, optional The amount of the aligned image to return. A ratio of 1.0 will return the full contents of the aligned image. A ratio of 0.5 will return an image of the given size, but will crop to the central 50%% of the image. Default: `1.0` force: bool, optional Force an update of the aligned face, even if it is already loaded. Default: ``False`` is_aligned: bool, optional Indicates that the :attr:`image` is an aligned face rather than a frame. Default: ``False`` is_legacy: bool, optional Only used if `is_aligned` is ``True``. ``True`` indicates that the aligned image being loaded is a legacy extracted face rather than a current head extracted face Notes ----- This method must be executed to get access to the following an :class:`AlignedFace` object """ if self._aligned and not force: # Don't reload an already aligned face logger.trace("Skipping alignment calculation for already " # type:ignore[attr-defined] "aligned face") else: logger.trace("Loading aligned face: (size: %s, " # type:ignore[attr-defined] "dtype: %s)", size, dtype) self._aligned = AlignedFace(self.landmarks_xy, image=image, centering=centering, size=size, coverage_ratio=coverage_ratio, dtype=dtype, is_aligned=is_aligned, is_legacy=is_aligned and is_legacy) _HASHES_SEEN: dict[str, dict[str, int]] = {} def update_legacy_png_header(filename: str, alignments: Alignments ) -> PNGHeaderDict | None: """ Update a legacy extracted face from pre v2.1 alignments by placing the alignment data for the face in the png exif header for the given filename with the given alignment data. If the given file is not a .png then a png is created and the original file is removed Parameters ---------- filename: str The image file to update alignments: :class:`lib.align.alignments.Alignments` The alignments data the contains the information to store in the image header. This must be a v2.0 or less alignments file as later versions no longer store the face hash (not required) Returns ------- dict The metadata that has been applied to the given image """ if alignments.version > 2.0: raise FaceswapError("The faces being passed in do not correspond to the given Alignments " "file. Please double check your sources and try again.") # Track hashes for multiple files with the same hash. Not the most robust but should be # effective enough folder = os.path.dirname(filename) if folder not in _HASHES_SEEN: _HASHES_SEEN[folder] = {} hashes_seen = _HASHES_SEEN[folder] in_image = read_image(filename, raise_error=True) in_hash = sha1(in_image).hexdigest() hashes_seen[in_hash] = hashes_seen.get(in_hash, -1) + 1 alignment = alignments.hashes_to_alignment.get(in_hash) if not alignment: logger.debug("Alignments not found for image: '%s'", filename) return None detected_face = DetectedFace() detected_face.from_alignment(alignment) # For dupe hash handling, make sure we get a different filename for repeat hashes src_fname, face_idx = list(alignments.hashes_to_frame[in_hash].items())[hashes_seen[in_hash]] orig_filename = f"{os.path.splitext(src_fname)[0]}_{face_idx}.png" meta = PNGHeaderDict(alignments=detected_face.to_png_meta(), source=PNGHeaderSourceDict( alignments_version=alignments.version, original_filename=orig_filename, face_index=face_idx, source_filename=src_fname, source_is_video=False, # Can't check so set false source_frame_dims=None)) out_filename = f"{os.path.splitext(filename)[0]}.png" # Make sure saved file is png out_image = encode_image(in_image, ".png", metadata=meta) with open(out_filename, "wb") as out_file: out_file.write(out_image) if filename != out_filename: # Remove the old non-png: logger.debug("Removing replaced face with deprecated extension: '%s'", filename) os.remove(filename) return meta