1
0
Fork 0
mirror of https://github.com/deepfakes/faceswap synced 2025-06-07 10:43:27 -04:00
faceswap/plugins/extract/detect/mtcnn.py
torzdf 6a3b674bef
Rebase code (#1326)
* Remove tensorflow_probability requirement

* setup.py - fix progress bars

* requirements.txt: Remove pre python 3.9 packages

* update apple requirements.txt

* update INSTALL.md

* Remove python<3.9 code

* setup.py - fix Windows Installer

* typing: python3.9 compliant

* Update pytest and readthedocs python versions

* typing fixes

* Python Version updates
  - Reduce max version to 3.10
  - Default to 3.10 in installers
  - Remove incompatible 3.11 tests

* Update dependencies

* Downgrade imageio dep for Windows

* typing: merge optional unions and fixes

* Updates
  - min python version 3.10
  - typing to python 3.10 spec
  - remove pre-tf2.10 code
  - Add conda tests

* train: re-enable optimizer saving

* Update dockerfiles

* Update setup.py
  - Apple Conda deps to setup.py
  - Better Cuda + dependency handling

* bugfix: Patch logging to prevent Autograph errors

* Update dockerfiles

* Setup.py - Setup.py - stdout to utf-8

* Add more OSes to github Actions

* suppress mac-os end to end test
2023-06-27 11:27:47 +01:00

754 lines
29 KiB
Python

#!/usr/bin/env python3
""" MTCNN Face detection plugin """
from __future__ import annotations
import logging
import typing as T
import cv2
import numpy as np
# Ignore linting errors from Tensorflow's thoroughly broken import system
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D, Permute, PReLU # noqa:E501 # pylint:disable=import-error
from lib.model.session import KSession
from ._base import BatchType, Detector
if T.TYPE_CHECKING:
from tensorflow import Tensor
logger = logging.getLogger(__name__)
class Detect(Detector):
""" MTCNN detector for face recognition. """
def __init__(self, **kwargs) -> None:
git_model_id = 2
model_filename = ["mtcnn_det_v2.1.h5", "mtcnn_det_v2.2.h5", "mtcnn_det_v2.3.h5"]
super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs)
self.name = "MTCNN"
self.input_size = 640
self.vram = 320 if not self.config["cpu"] else 0
self.vram_warnings = 64 if not self.config["cpu"] else 0 # Will run at this with warnings
self.vram_per_batch = 32 if not self.config["cpu"] else 0
self.batchsize = self.config["batch-size"]
self.kwargs = self._validate_kwargs()
self.color_format = "RGB"
def _validate_kwargs(self) -> dict[str, int | float | list[float]]:
""" Validate that config options are correct. If not reset to default """
valid = True
threshold = [self.config["threshold_1"],
self.config["threshold_2"],
self.config["threshold_3"]]
kwargs = {"minsize": self.config["minsize"],
"threshold": threshold,
"factor": self.config["scalefactor"],
"input_size": self.input_size}
if kwargs["minsize"] < 10:
valid = False
elif not all(0.0 < threshold <= 1.0 for threshold in kwargs['threshold']):
valid = False
elif not 0.0 < kwargs['factor'] < 1.0:
valid = False
if not valid:
kwargs = {}
logger.warning("Invalid MTCNN options in config. Running with defaults")
logger.debug("Using mtcnn kwargs: %s", kwargs)
return kwargs
def init_model(self) -> None:
""" Initialize MTCNN Model. """
assert isinstance(self.model_path, list)
self.model = MTCNN(self.model_path,
self.config["allow_growth"],
self._exclude_gpus,
self.config["cpu"],
**self.kwargs) # type:ignore
def process_input(self, batch: BatchType) -> None:
""" Compile the detection image(s) for prediction
Parameters
----------
batch: :class:`~plugins.extract.detect._base.DetectorBatch`
Contains the batch that is currently being passed through the plugin process
"""
batch.feed = (np.array(batch.image, dtype="float32") - 127.5) / 127.5
def predict(self, feed: np.ndarray) -> np.ndarray:
""" Run model to get predictions
Parameters
----------
batch: :class:`~plugins.extract.detect._base.DetectorBatch`
Contains the batch to pass through the MTCNN model
Returns
-------
dict
The batch with the predictions added to the dictionary
"""
assert isinstance(self.model, MTCNN)
prediction, points = self.model.detect_faces(feed)
logger.trace("prediction: %s, mtcnn_points: %s", # type:ignore
prediction, points)
return prediction
def process_output(self, batch: BatchType) -> None:
""" MTCNN performs no post processing so the original batch is returned
Parameters
----------
batch: :class:`~plugins.extract.detect._base.DetectorBatch`
Contains the batch to apply postprocessing to
"""
return
# MTCNN Detector
# Code adapted from: https://github.com/xiangrufan/keras-mtcnn
#
# Keras implementation of the face detection / alignment algorithm
# found at
# https://github.com/kpzhang93/MTCNN_face_detection_alignment
#
# MIT License
#
# Copyright (c) 2016 Kaipeng Zhang
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class PNet(KSession):
""" Keras P-Net model for MTCNN
Parameters
----------
model_path: str
The path to the keras model file
allow_growth: bool, optional
Enable the Tensorflow GPU allow_growth configuration option. This option prevents
Tensorflow from allocating all of the GPU VRAM, but can lead to higher fragmentation and
slower performance. Default: ``False``
exclude_gpus: list, optional
A list of indices correlating to connected GPUs that Tensorflow should not use. Pass
``None`` to not exclude any GPUs. Default: ``None``
cpu_mode: bool, optional
``True`` run the model on CPU. Default: ``False``
input_size: int
The input size of the model
minsize: int, optional
The minimum size of a face to accept as a detection. Default: `20`
threshold: list, optional
Threshold for P-Net
"""
def __init__(self,
model_path: str,
allow_growth: bool,
exclude_gpus: list[int] | None,
cpu_mode: bool,
input_size: int,
min_size: int,
factor: float,
threshold: float) -> None:
super().__init__("MTCNN-PNet",
model_path,
allow_growth=allow_growth,
exclude_gpus=exclude_gpus,
cpu_mode=cpu_mode)
self.define_model(self.model_definition)
self.load_model_weights()
self._input_size = input_size
self._threshold = threshold
self._pnet_scales = self._calculate_scales(min_size, factor)
self._pnet_sizes = [(int(input_size * scale), int(input_size * scale))
for scale in self._pnet_scales]
self._pnet_input: list[np.ndarray] | None = None
@staticmethod
def model_definition() -> tuple[list[Tensor], list[Tensor]]:
""" Keras P-Network Definition for MTCNN """
input_ = Input(shape=(None, None, 3))
var_x = Conv2D(10, (3, 3), strides=1, padding='valid', name='conv1')(input_)
var_x = PReLU(shared_axes=[1, 2], name='PReLU1')(var_x)
var_x = MaxPool2D(pool_size=2)(var_x)
var_x = Conv2D(16, (3, 3), strides=1, padding='valid', name='conv2')(var_x)
var_x = PReLU(shared_axes=[1, 2], name='PReLU2')(var_x)
var_x = Conv2D(32, (3, 3), strides=1, padding='valid', name='conv3')(var_x)
var_x = PReLU(shared_axes=[1, 2], name='PReLU3')(var_x)
classifier = Conv2D(2, (1, 1), activation='softmax', name='conv4-1')(var_x)
bbox_regress = Conv2D(4, (1, 1), name='conv4-2')(var_x)
return [input_], [classifier, bbox_regress]
def _calculate_scales(self,
minsize: int,
factor: float) -> list[float]:
""" Calculate multi-scale
Parameters
----------
minsize: int
Minimum size for a face to be accepted
factor: float
Scaling factor
Returns
-------
list
List of scale floats
"""
factor_count = 0
var_m = 12.0 / minsize
minl = self._input_size * var_m
# create scale pyramid
scales = []
while minl >= 12:
scales += [var_m * np.power(factor, factor_count)]
minl = minl * factor
factor_count += 1
logger.trace(scales) # type:ignore
return scales
def __call__(self, images: np.ndarray) -> list[np.ndarray]:
""" first stage - fast proposal network (p-net) to obtain face candidates
Parameters
----------
images: :class:`numpy.ndarray`
The batch of images to detect faces in
Returns
-------
List
List of face candidates from P-Net
"""
batch_size = images.shape[0]
rectangles: list[list[list[int | float]]] = [[] for _ in range(batch_size)]
scores: list[list[np.ndarray]] = [[] for _ in range(batch_size)]
if self._pnet_input is None:
self._pnet_input = [np.empty((batch_size, rheight, rwidth, 3), dtype="float32")
for rheight, rwidth in self._pnet_sizes]
for scale, batch, (rheight, rwidth) in zip(self._pnet_scales,
self._pnet_input,
self._pnet_sizes):
_ = [cv2.resize(images[idx], (rwidth, rheight), dst=batch[idx])
for idx in range(batch_size)]
cls_prob, roi = self.predict(batch)
cls_prob = cls_prob[..., 1]
out_side = max(cls_prob.shape[1:3])
cls_prob = np.swapaxes(cls_prob, 1, 2)
roi = np.swapaxes(roi, 1, 3)
for idx in range(batch_size):
# first index 0 = class score, 1 = one hot representation
rect, score = self._detect_face_12net(cls_prob[idx, ...],
roi[idx, ...],
out_side,
1 / scale)
rectangles[idx].extend(rect)
scores[idx].extend(score)
return [nms(np.array(rect), np.array(score), 0.7, "iou")[0] # don't output scores
for rect, score in zip(rectangles, scores)]
def _detect_face_12net(self,
class_probabilities: np.ndarray,
roi: np.ndarray,
size: int,
scale: float) -> tuple[np.ndarray, np.ndarray]:
""" Detect face position and calibrate bounding box on 12net feature map(matrix version)
Parameters
----------
class_probabilities: :class:`numpy.ndarray`
softmax feature map for face classify
roi: :class:`numpy.ndarray`
feature map for regression
size: int
feature map's largest size
scale: float
current input image scale in multi-scales
Returns
-------
list
Calibrated face candidates
"""
in_side = 2 * size + 11
stride = 0. if size == 1 else float(in_side - 12) / (size - 1)
(var_x, var_y) = np.nonzero(class_probabilities >= self._threshold)
boundingbox = np.array([var_x, var_y]).T
boundingbox = np.concatenate((np.fix((stride * (boundingbox) + 0) * scale),
np.fix((stride * (boundingbox) + 11) * scale)), axis=1)
offset = roi[:4, var_x, var_y].T
boundingbox = boundingbox + offset * 12.0 * scale
rectangles = np.concatenate((boundingbox,
np.array([class_probabilities[var_x, var_y]]).T), axis=1)
rectangles = rect2square(rectangles)
np.clip(rectangles[..., :4], 0., self._input_size, out=rectangles[..., :4])
pick = np.where(np.logical_and(rectangles[..., 2] > rectangles[..., 0],
rectangles[..., 3] > rectangles[..., 1]))[0]
rects = rectangles[pick, :4].astype("int")
scores = rectangles[pick, 4]
return nms(rects, scores, 0.3, "iou")
class RNet(KSession):
""" Keras R-Net model Definition for MTCNN
Parameters
----------
model_path: str
The path to the keras model file
allow_growth: bool, optional
Enable the Tensorflow GPU allow_growth configuration option. This option prevents
Tensorflow from allocating all of the GPU VRAM, but can lead to higher fragmentation and
slower performance. Default: ``False``
exclude_gpus: list, optional
A list of indices correlating to connected GPUs that Tensorflow should not use. Pass
``None`` to not exclude any GPUs. Default: ``None``
cpu_mode: bool, optional
``True`` run the model on CPU. Default: ``False``
input_size: int
The input size of the model
threshold: list, optional
Threshold for R-Net
"""
def __init__(self,
model_path: str,
allow_growth: bool,
exclude_gpus: list[int] | None,
cpu_mode: bool,
input_size: int,
threshold: float) -> None:
super().__init__("MTCNN-RNet",
model_path,
allow_growth=allow_growth,
exclude_gpus=exclude_gpus,
cpu_mode=cpu_mode)
self.define_model(self.model_definition)
self.load_model_weights()
self._input_size = input_size
self._threshold = threshold
@staticmethod
def model_definition() -> tuple[list[Tensor], list[Tensor]]:
""" Keras R-Network Definition for MTCNN """
input_ = Input(shape=(24, 24, 3))
var_x = Conv2D(28, (3, 3), strides=1, padding='valid', name='conv1')(input_)
var_x = PReLU(shared_axes=[1, 2], name='prelu1')(var_x)
var_x = MaxPool2D(pool_size=3, strides=2, padding='same')(var_x)
var_x = Conv2D(48, (3, 3), strides=1, padding='valid', name='conv2')(var_x)
var_x = PReLU(shared_axes=[1, 2], name='prelu2')(var_x)
var_x = MaxPool2D(pool_size=3, strides=2)(var_x)
var_x = Conv2D(64, (2, 2), strides=1, padding='valid', name='conv3')(var_x)
var_x = PReLU(shared_axes=[1, 2], name='prelu3')(var_x)
var_x = Permute((3, 2, 1))(var_x)
var_x = Flatten()(var_x)
var_x = Dense(128, name='conv4')(var_x)
var_x = PReLU(name='prelu4')(var_x)
classifier = Dense(2, activation='softmax', name='conv5-1')(var_x)
bbox_regress = Dense(4, name='conv5-2')(var_x)
return [input_], [classifier, bbox_regress]
def __call__(self,
images: np.ndarray,
rectangle_batch: list[np.ndarray],
) -> list[np.ndarray]:
""" second stage - refinement of face candidates with r-net
Parameters
----------
images: :class:`numpy.ndarray`
The batch of images to detect faces in
rectangle_batch:
List of :class:`numpy.ndarray` face candidates from P-Net
Returns
-------
List
List of :class:`numpy.ndarray` refined face candidates from R-Net
"""
ret: list[np.ndarray] = []
for idx, (rectangles, image) in enumerate(zip(rectangle_batch, images)):
if not np.any(rectangles):
ret.append(np.array([]))
continue
feed_batch = np.empty((rectangles.shape[0], 24, 24, 3), dtype="float32")
_ = [cv2.resize(image[rect[1]: rect[3], rect[0]: rect[2]],
(24, 24),
dst=feed_batch[idx])
for idx, rect in enumerate(rectangles)]
cls_prob, roi_prob = self.predict(feed_batch)
ret.append(self._filter_face_24net(cls_prob, roi_prob, rectangles))
return ret
def _filter_face_24net(self,
class_probabilities: np.ndarray,
roi: np.ndarray,
rectangles: np.ndarray,
) -> np.ndarray:
""" Filter face position and calibrate bounding box on 12net's output
Parameters
----------
class_probabilities: class:`np.ndarray`
Softmax feature map for face classify
roi: :class:`numpy.ndarray`
Feature map for regression
rectangles: list
12net's predict
Returns
-------
list
rectangles in the format [[x, y, x1, y1, score]]
"""
prob = class_probabilities[:, 1]
pick = np.nonzero(prob >= self._threshold)
bbox = rectangles.T[:4, pick]
scores = np.array([prob[pick]]).T.ravel()
deltas = roi.T[:4, pick]
dims = np.tile([bbox[2] - bbox[0], bbox[3] - bbox[1]], (2, 1, 1))
bbox = np.transpose(bbox + deltas * dims).reshape(-1, 4)
bbox = np.clip(rect2square(bbox), 0, self._input_size).astype("int")
return nms(bbox, scores, 0.3, "iou")[0]
class ONet(KSession):
""" Keras O-Net model for MTCNN
Parameters
----------
model_path: str
The path to the keras model file
allow_growth: bool, optional
Enable the Tensorflow GPU allow_growth configuration option. This option prevents
Tensorflow from allocating all of the GPU VRAM, but can lead to higher fragmentation and
slower performance. Default: ``False``
exclude_gpus: list, optional
A list of indices correlating to connected GPUs that Tensorflow should not use. Pass
``None`` to not exclude any GPUs. Default: ``None``
cpu_mode: bool, optional
``True`` run the model on CPU. Default: ``False``
input_size: int
The input size of the model
threshold: list, optional
Threshold for O-Net
"""
def __init__(self,
model_path: str,
allow_growth: bool,
exclude_gpus: list[int] | None,
cpu_mode: bool,
input_size: int,
threshold: float) -> None:
super().__init__("MTCNN-ONet",
model_path,
allow_growth=allow_growth,
exclude_gpus=exclude_gpus,
cpu_mode=cpu_mode)
self.define_model(self.model_definition)
self.load_model_weights()
self._input_size = input_size
self._threshold = threshold
@staticmethod
def model_definition() -> tuple[list[Tensor], list[Tensor]]:
""" Keras O-Network for MTCNN """
input_ = Input(shape=(48, 48, 3))
var_x = Conv2D(32, (3, 3), strides=1, padding='valid', name='conv1')(input_)
var_x = PReLU(shared_axes=[1, 2], name='prelu1')(var_x)
var_x = MaxPool2D(pool_size=3, strides=2, padding='same')(var_x)
var_x = Conv2D(64, (3, 3), strides=1, padding='valid', name='conv2')(var_x)
var_x = PReLU(shared_axes=[1, 2], name='prelu2')(var_x)
var_x = MaxPool2D(pool_size=3, strides=2)(var_x)
var_x = Conv2D(64, (3, 3), strides=1, padding='valid', name='conv3')(var_x)
var_x = PReLU(shared_axes=[1, 2], name='prelu3')(var_x)
var_x = MaxPool2D(pool_size=2)(var_x)
var_x = Conv2D(128, (2, 2), strides=1, padding='valid', name='conv4')(var_x)
var_x = PReLU(shared_axes=[1, 2], name='prelu4')(var_x)
var_x = Permute((3, 2, 1))(var_x)
var_x = Flatten()(var_x)
var_x = Dense(256, name='conv5')(var_x)
var_x = PReLU(name='prelu5')(var_x)
classifier = Dense(2, activation='softmax', name='conv6-1')(var_x)
bbox_regress = Dense(4, name='conv6-2')(var_x)
landmark_regress = Dense(10, name='conv6-3')(var_x)
return [input_], [classifier, bbox_regress, landmark_regress]
def __call__(self,
images: np.ndarray,
rectangle_batch: list[np.ndarray]
) -> list[tuple[np.ndarray, np.ndarray]]:
""" Third stage - further refinement and facial landmarks positions with o-net
Parameters
----------
images: :class:`numpy.ndarray`
The batch of images to detect faces in
rectangle_batch:
List of :class:`numpy.ndarray` face candidates from R-Net
Returns
-------
List
List of refined final candidates, scores and landmark points from O-Net
"""
ret: list[tuple[np.ndarray, np.ndarray]] = []
for idx, rectangles in enumerate(rectangle_batch):
if not np.any(rectangles):
ret.append((np.empty((0, 5)), np.empty(0)))
continue
image = images[idx]
feed_batch = np.empty((rectangles.shape[0], 48, 48, 3), dtype="float32")
_ = [cv2.resize(image[rect[1]: rect[3], rect[0]: rect[2]],
(48, 48),
dst=feed_batch[idx])
for idx, rect in enumerate(rectangles)]
cls_probs, roi_probs, pts_probs = self.predict(feed_batch)
ret.append(self._filter_face_48net(cls_probs, roi_probs, pts_probs, rectangles))
return ret
def _filter_face_48net(self, class_probabilities: np.ndarray,
roi: np.ndarray,
points: np.ndarray,
rectangles: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
""" Filter face position and calibrate bounding box on 12net's output
Parameters
----------
class_probabilities: :class:`numpy.ndarray` : class_probabilities[1] is face possibility
Array of face probabilities
roi: :class:`numpy.ndarray`
offset
points: :class:`numpy.ndarray`
5 point face landmark
rectangles: :class:`numpy.ndarray`
12net's predict, rectangles[i][0:3] is the position, rectangles[i][4] is score
Returns
-------
boxes: :class:`numpy.ndarray`
The [l, t, r, b, score] bounding boxes
points: :class:`numpy.ndarray`
The 5 point landmarks
"""
prob = class_probabilities[:, 1]
pick = np.nonzero(prob >= self._threshold)[0]
scores = np.array([prob[pick]]).T.ravel()
bbox = rectangles[pick]
dims = np.array([bbox[..., 2] - bbox[..., 0], bbox[..., 3] - bbox[..., 1]]).T
pts = np.vstack(
np.hsplit(points[pick], 2)).reshape(2, -1, 5).transpose(1, 2, 0).reshape(-1, 10)
pts = np.tile(dims, (1, 5)) * pts + np.tile(bbox[..., :2], (1, 5))
bbox = np.clip(np.floor(bbox + roi[pick] * np.tile(dims, (1, 2))),
0.,
self._input_size)
indices = np.where(
np.logical_and(bbox[..., 2] > bbox[..., 0], bbox[..., 3] > bbox[..., 1]))[0]
picks = np.concatenate([bbox[indices], pts[indices]], axis=-1)
results, scores = nms(picks, scores, 0.3, "iom")
return np.concatenate([results[..., :4], scores[..., None]], axis=-1), results[..., 4:].T
class MTCNN(): # pylint: disable=too-few-public-methods
""" MTCNN Detector for face alignment
Parameters
----------
model_path: list
List of paths to the 3 MTCNN subnet weights
allow_growth: bool, optional
Enable the Tensorflow GPU allow_growth configuration option. This option prevents
Tensorflow from allocating all of the GPU VRAM, but can lead to higher fragmentation and
slower performance. Default: ``False``
exclude_gpus: list, optional
A list of indices correlating to connected GPUs that Tensorflow should not use. Pass
``None`` to not exclude any GPUs. Default: ``None``
cpu_mode: bool, optional
``True`` run the model on CPU. Default: ``False``
input_size: int, optional
The height, width input size to the model. Default: 640
minsize: int, optional
The minimum size of a face to accept as a detection. Default: `20`
threshold: list, optional
List of floats for the three steps, Default: `[0.6, 0.7, 0.7]`
factor: float, optional
The factor used to create a scaling pyramid of face sizes to detect in the image.
Default: `0.709`
"""
def __init__(self,
model_path: list[str],
allow_growth: bool,
exclude_gpus: list[int] | None,
cpu_mode: bool,
input_size: int = 640,
minsize: int = 20,
threshold: list[float] | None = None,
factor: float = 0.709) -> None:
logger.debug("Initializing: %s: (model_path: '%s', allow_growth: %s, exclude_gpus: %s, "
"input_size: %s, minsize: %s, threshold: %s, factor: %s)",
self.__class__.__name__, model_path, allow_growth, exclude_gpus,
input_size, minsize, threshold, factor)
threshold = [0.6, 0.7, 0.7] if threshold is None else threshold
self._pnet = PNet(model_path[0],
allow_growth,
exclude_gpus,
cpu_mode,
input_size,
minsize,
factor,
threshold[0])
self._rnet = RNet(model_path[1],
allow_growth,
exclude_gpus,
cpu_mode,
input_size,
threshold[1])
self._onet = ONet(model_path[2],
allow_growth,
exclude_gpus,
cpu_mode,
input_size,
threshold[2])
logger.debug("Initialized: %s", self.__class__.__name__)
def detect_faces(self, batch: np.ndarray) -> tuple[np.ndarray, tuple[np.ndarray]]:
"""Detects faces in an image, and returns bounding boxes and points for them.
Parameters
----------
batch: :class:`numpy.ndarray`
The input batch of images to detect face in
Returns
-------
List
list of numpy arrays containing the bounding box and 5 point landmarks
of detected faces
"""
rectangles = self._pnet(batch)
rectangles = self._rnet(batch, rectangles)
ret_boxes, ret_points = zip(*self._onet(batch, rectangles))
return np.array(ret_boxes, dtype="object"), ret_points
def nms(rectangles: np.ndarray,
scores: np.ndarray,
threshold: float,
method: str = "iom") -> tuple[np.ndarray, np.ndarray]:
""" apply non-maximum suppression on ROIs in same scale(matrix version)
Parameters
----------
rectangles: :class:`np.ndarray`
The [b, l, t, r, b] bounding box detection candidates
threshold: float
Threshold for succesful match
method: str, optional
"iom" method or default. Defalt: "iom"
Returns
-------
rectangles: :class:`np.ndarray`
The [b, l, t, r, b] bounding boxes
scores :class:`np.ndarray`
The associated scores for the rectangles
"""
if not np.any(rectangles):
return rectangles, scores
bboxes = rectangles[..., :4].T
area = np.multiply(bboxes[2] - bboxes[0] + 1, bboxes[3] - bboxes[1] + 1)
s_sort = scores.argsort()
pick = []
while len(s_sort) > 0:
s_bboxes = np.concatenate([ # s_sort[-1] have highest prob score, s_sort[0:-1]->others
np.maximum(bboxes[:2, s_sort[-1], None], bboxes[:2, s_sort[0:-1]]),
np.minimum(bboxes[2:, s_sort[-1], None], bboxes[2:, s_sort[0:-1]])], axis=0)
inter = (np.maximum(0.0, s_bboxes[2] - s_bboxes[0] + 1) *
np.maximum(0.0, s_bboxes[3] - s_bboxes[1] + 1))
if method == "iom":
var_o = inter / np.minimum(area[s_sort[-1]], area[s_sort[0:-1]])
else:
var_o = inter / (area[s_sort[-1]] + area[s_sort[0:-1]] - inter)
pick.append(s_sort[-1])
s_sort = s_sort[np.where(var_o <= threshold)[0]]
result_rectangle = rectangles[pick]
result_scores = scores[pick]
return result_rectangle, result_scores
def rect2square(rectangles: np.ndarray) -> np.ndarray:
""" change rectangles into squares (matrix version)
Parameters
----------
rectangles: :class:`numpy.ndarray`
[b, x, y, x1, y1] rectangles
Return
------
list
Original rectangle changed to a square
"""
width = rectangles[:, 2] - rectangles[:, 0]
height = rectangles[:, 3] - rectangles[:, 1]
length = np.maximum(width, height).T
rectangles[:, 0] = rectangles[:, 0] + width * 0.5 - length * 0.5
rectangles[:, 1] = rectangles[:, 1] + height * 0.5 - length * 0.5
rectangles[:, 2:4] = rectangles[:, 0:2] + np.repeat([length], 2, axis=0).T
return rectangles