1
0
Fork 0
mirror of https://github.com/deepfakes/faceswap synced 2025-06-07 10:43:27 -04:00
faceswap/plugins/train/model/phaze_a.py
torzdf 6a3b674bef
Rebase code (#1326)
* Remove tensorflow_probability requirement

* setup.py - fix progress bars

* requirements.txt: Remove pre python 3.9 packages

* update apple requirements.txt

* update INSTALL.md

* Remove python<3.9 code

* setup.py - fix Windows Installer

* typing: python3.9 compliant

* Update pytest and readthedocs python versions

* typing fixes

* Python Version updates
  - Reduce max version to 3.10
  - Default to 3.10 in installers
  - Remove incompatible 3.11 tests

* Update dependencies

* Downgrade imageio dep for Windows

* typing: merge optional unions and fixes

* Updates
  - min python version 3.10
  - typing to python 3.10 spec
  - remove pre-tf2.10 code
  - Add conda tests

* train: re-enable optimizer saving

* Update dockerfiles

* Update setup.py
  - Apple Conda deps to setup.py
  - Better Cuda + dependency handling

* bugfix: Patch logging to prevent Autograph errors

* Update dockerfiles

* Setup.py - Setup.py - stdout to utf-8

* Add more OSes to github Actions

* suppress mac-os end to end test
2023-06-27 11:27:47 +01:00

1321 lines
53 KiB
Python

#!/usr/bin/env python3
""" Phaze-A Model by TorzDF with thanks to BirbFakes and the myriad of testers. """
# pylint: disable=too-many-lines
from __future__ import annotations
import logging
import typing as T
from dataclasses import dataclass
import numpy as np
# Ignore linting errors from Tensorflow's thoroughly broken import system
from tensorflow.keras.layers import LayerNormalization # pylint:disable=import-error
from tensorflow.keras import applications as kapp, backend as K # noqa:E501 # pylint:disable=import-error
from tensorflow.keras.layers import ( # pylint:disable=import-error
Add, BatchNormalization, Concatenate, Dense, Dropout, Flatten, GaussianNoise, MaxPool2D,
GlobalAveragePooling2D, GlobalMaxPooling2D, Input, LeakyReLU, Reshape, UpSampling2D,
Conv2D as KConv2D)
from tensorflow.keras.models import clone_model, Model as KModel # noqa:E501 # pylint:disable=import-error
from lib.model.nn_blocks import (
Conv2D, Conv2DBlock, Conv2DOutput, ResidualBlock, UpscaleBlock, Upscale2xBlock,
UpscaleResizeImagesBlock, UpscaleDNYBlock)
from lib.model.normalization import (
AdaInstanceNormalization, GroupNormalization, InstanceNormalization, RMSNormalization)
from lib.utils import get_tf_version, FaceswapError
from ._base import ModelBase, get_all_sub_models
if T.TYPE_CHECKING:
from tensorflow import keras
from tensorflow import Tensor
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
@dataclass
class _EncoderInfo:
""" Contains model configuration options for various Phaze-A Encoders.
Parameters
----------
keras_name: str
The name of the encoder in Keras Applications. Empty string `""` if the encoder does not
exist in Keras Applications
default_size: int
The default input size of the encoder
tf_min: float, optional
The lowest version of Tensorflow that the encoder can be used for. Default: `2.0`
scaling: tuple, optional
The float scaling that the encoder expects. Default: `(0, 1)`
min_size: int, optional
The minimum input size that the encoder will allow. Default: 32
enforce_for_weights: bool, optional
``True`` if the input size for the model must be forced to the default size when loading
imagenet weights, otherwise ``False``. Default: ``False``
color_order: str, optional
The color order that the model expects (`"bgr"` or `"rgb"`). Default: `"rgb"`
"""
keras_name: str
default_size: int
tf_min: tuple[int, int] = (2, 0)
scaling: tuple[int, int] = (0, 1)
min_size: int = 32
enforce_for_weights: bool = False
color_order: T.Literal["bgr", "rgb"] = "rgb"
_MODEL_MAPPING: dict[str, _EncoderInfo] = {
"densenet121": _EncoderInfo(
keras_name="DenseNet121", default_size=224),
"densenet169": _EncoderInfo(
keras_name="DenseNet169", default_size=224),
"densenet201": _EncoderInfo(
keras_name="DenseNet201", default_size=224),
"efficientnet_b0": _EncoderInfo(
keras_name="EfficientNetB0", tf_min=(2, 3), scaling=(0, 255), default_size=224),
"efficientnet_b1": _EncoderInfo(
keras_name="EfficientNetB1", tf_min=(2, 3), scaling=(0, 255), default_size=240),
"efficientnet_b2": _EncoderInfo(
keras_name="EfficientNetB2", tf_min=(2, 3), scaling=(0, 255), default_size=260),
"efficientnet_b3": _EncoderInfo(
keras_name="EfficientNetB3", tf_min=(2, 3), scaling=(0, 255), default_size=300),
"efficientnet_b4": _EncoderInfo(
keras_name="EfficientNetB4", tf_min=(2, 3), scaling=(0, 255), default_size=380),
"efficientnet_b5": _EncoderInfo(
keras_name="EfficientNetB5", tf_min=(2, 3), scaling=(0, 255), default_size=456),
"efficientnet_b6": _EncoderInfo(
keras_name="EfficientNetB6", tf_min=(2, 3), scaling=(0, 255), default_size=528),
"efficientnet_b7": _EncoderInfo(
keras_name="EfficientNetB7", tf_min=(2, 3), scaling=(0, 255), default_size=600),
"efficientnet_v2_b0": _EncoderInfo(
keras_name="EfficientNetV2B0", tf_min=(2, 8), scaling=(-1, 1), default_size=224),
"efficientnet_v2_b1": _EncoderInfo(
keras_name="EfficientNetV2B1", tf_min=(2, 8), scaling=(-1, 1), default_size=240),
"efficientnet_v2_b2": _EncoderInfo(
keras_name="EfficientNetV2B2", tf_min=(2, 8), scaling=(-1, 1), default_size=260),
"efficientnet_v2_b3": _EncoderInfo(
keras_name="EfficientNetV2B3", tf_min=(2, 8), scaling=(-1, 1), default_size=300),
"efficientnet_v2_s": _EncoderInfo(
keras_name="EfficientNetV2S", tf_min=(2, 8), scaling=(-1, 1), default_size=384),
"efficientnet_v2_m": _EncoderInfo(
keras_name="EfficientNetV2M", tf_min=(2, 8), scaling=(-1, 1), default_size=480),
"efficientnet_v2_l": _EncoderInfo(
keras_name="EfficientNetV2L", tf_min=(2, 8), scaling=(-1, 1), default_size=480),
"inception_resnet_v2": _EncoderInfo(
keras_name="InceptionResNetV2", scaling=(-1, 1), min_size=75, default_size=299),
"inception_v3": _EncoderInfo(
keras_name="InceptionV3", scaling=(-1, 1), min_size=75, default_size=299),
"mobilenet": _EncoderInfo(
keras_name="MobileNet", scaling=(-1, 1), default_size=224),
"mobilenet_v2": _EncoderInfo(
keras_name="MobileNetV2", scaling=(-1, 1), default_size=224),
"mobilenet_v3_large": _EncoderInfo(
keras_name="MobileNetV3Large", tf_min=(2, 4), scaling=(-1, 1), default_size=224),
"mobilenet_v3_small": _EncoderInfo(
keras_name="MobileNetV3Small", tf_min=(2, 4), scaling=(-1, 1), default_size=224),
"nasnet_large": _EncoderInfo(
keras_name="NASNetLarge", scaling=(-1, 1), default_size=331, enforce_for_weights=True),
"nasnet_mobile": _EncoderInfo(
keras_name="NASNetMobile", scaling=(-1, 1), default_size=224, enforce_for_weights=True),
"resnet50": _EncoderInfo(
keras_name="ResNet50", scaling=(-1, 1), min_size=32, default_size=224),
"resnet50_v2": _EncoderInfo(
keras_name="ResNet50V2", scaling=(-1, 1), default_size=224),
"resnet101": _EncoderInfo(
keras_name="ResNet101", scaling=(-1, 1), default_size=224),
"resnet101_v2": _EncoderInfo(
keras_name="ResNet101V2", scaling=(-1, 1), default_size=224),
"resnet152": _EncoderInfo(
keras_name="ResNet152", scaling=(-1, 1), default_size=224),
"resnet152_v2": _EncoderInfo(
keras_name="ResNet152V2", scaling=(-1, 1), default_size=224),
"vgg16": _EncoderInfo(
keras_name="VGG16", color_order="bgr", scaling=(0, 255), default_size=224),
"vgg19": _EncoderInfo(
keras_name="VGG19", color_order="bgr", scaling=(0, 255), default_size=224),
"xception": _EncoderInfo(
keras_name="Xception", scaling=(-1, 1), min_size=71, default_size=299),
"fs_original": _EncoderInfo(
keras_name="", color_order="bgr", min_size=32, default_size=1024)}
class Model(ModelBase):
""" Phaze-A Faceswap Model.
An highly adaptable and configurable model by torzDF
Parameters
----------513
args: varies
The default command line arguments passed in from :class:`~scripts.train.Train` or
:class:`~scripts.train.Convert`
kwargs: varies
The default keyword arguments passed in from :class:`~scripts.train.Train` or
:class:`~scripts.train.Convert`
"""
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
if self.config["output_size"] % 64 != 0:
raise FaceswapError("Phaze-A output shape must be a multiple of 64")
self._validate_encoder_architecture()
self.config["freeze_layers"] = self._select_freeze_layers()
self.input_shape = self._get_input_shape()
self.color_order = _MODEL_MAPPING[self.config["enc_architecture"]].color_order
def build(self) -> None:
""" Build the model and assign to :attr:`model`.
Override's the default build function for allowing the setting of dropout rate for pre-
existing models.
"""
is_summary = hasattr(self._args, "summary") and self._args.summary
if not self._io.model_exists or self._is_predict or is_summary:
logger.debug("New model, inference or summary. Falling back to default build: "
"(exists: %s, inference: %s, is_summary: %s)",
self._io.model_exists, self._is_predict, is_summary)
super().build()
return
with self._settings.strategy_scope():
model = self._io._load() # pylint:disable=protected-access
model = self._update_dropouts(model)
self._model = model
self._compile_model()
self._output_summary()
def _update_dropouts(self, model: keras.models.Model) -> keras.models.Model:
""" Update the saved model with new dropout rates.
Keras, annoyingly, does not actually change the dropout of the underlying layer, so we need
to update the rate, then clone the model into a new model and reload weights.
Parameters
----------
model: :class:`keras.models.Model`
The loaded saved Keras Model to update the dropout rates for
Returns
-------
:class:`keras.models.Model`
The loaded Keras Model with the dropout rates updated
"""
dropouts = {"fc": self.config["fc_dropout"],
"gblock": self.config["fc_gblock_dropout"]}
logger.debug("Config dropouts: %s", dropouts)
updated = False
for mod in get_all_sub_models(model):
if not mod.name.startswith("fc_"):
continue
key = "gblock" if "gblock" in mod.name else mod.name.split("_")[0]
rate = dropouts[key]
log_once = False
for layer in mod.layers:
if not isinstance(layer, Dropout):
continue
if layer.rate != rate:
logger.debug("Updating dropout rate for %s from %s to %s",
f"{mod.name} - {layer.name}", layer.rate, rate)
if not log_once:
logger.info("Updating Dropout Rate for '%s' from %s to %s",
mod.name, layer.rate, rate)
log_once = True
layer.rate = rate
updated = True
if updated:
logger.debug("Dropout rate updated. Cloning model")
new_model = clone_model(model)
new_model.set_weights(model.get_weights())
del model
model = new_model
return model
def _select_freeze_layers(self) -> list[str]:
""" Process the selected frozen layers and replace the `keras_encoder` option with the
actual keras model name
Returns
-------
list
The selected layers for weight freezing
"""
arch = self.config["enc_architecture"]
layers = self.config["freeze_layers"]
# EfficientNetV2 is inconsistent with other model's naming conventions
keras_name = _MODEL_MAPPING[arch].keras_name.replace("EfficientNetV2", "EfficientNetV2-")
if "keras_encoder" not in self.config["freeze_layers"]:
retval = layers
elif keras_name:
retval = [layer.replace("keras_encoder", keras_name.lower()) for layer in layers]
logger.debug("Substituting 'keras_encoder' for '%s'", arch)
else:
retval = [layer for layer in layers if layer != "keras_encoder"]
logger.debug("Removing 'keras_encoder' for '%s'", arch)
return retval
def _get_input_shape(self) -> tuple[int, int, int]:
""" Obtain the input shape for the model.
Input shape is calculated from the selected Encoder's input size, scaled to the user
selected Input Scaling, rounded down to the nearest 16 pixels.
Notes
-----
Some models (NasNet) require the input size to be of a certain dimension if loading
imagenet weights. In these instances resize inputs and raise warning message
Returns
-------
tuple
The shape tuple for the input size to the Phaze-A model
"""
arch = self.config["enc_architecture"]
enforce_size = _MODEL_MAPPING[arch].enforce_for_weights
default_size = _MODEL_MAPPING[arch].default_size
scaling = self.config["enc_scaling"] / 100
min_size = _MODEL_MAPPING[arch].min_size
size = int(max(min_size, ((default_size * scaling) // 16) * 16))
if self.config["enc_load_weights"] and enforce_size and scaling != 1.0:
logger.warning("%s requires input size to be %spx when loading imagenet weights. "
"Adjusting input size from %spx to %spx",
arch, default_size, size, default_size)
retval = (default_size, default_size, 3)
else:
retval = (size, size, 3)
logger.debug("Encoder input set to: %s", retval)
return retval
def _validate_encoder_architecture(self) -> None:
""" Validate that the requested architecture is a valid choice for the running system
configuration.
If the selection is not valid, an error is logged and system exits.
"""
arch = self.config["enc_architecture"].lower()
model = _MODEL_MAPPING.get(arch)
if not model:
raise FaceswapError(f"'{arch}' is not a valid choice for encoder architecture. Choose "
f"one of {list(_MODEL_MAPPING.keys())}.")
tf_ver = get_tf_version()
tf_min = model.tf_min
if tf_ver < tf_min:
raise FaceswapError(f"{arch}' is not compatible with your version of Tensorflow. The "
f"minimum version required is {tf_min} whilst you have version "
f"{tf_ver} installed.")
def build_model(self, inputs: list[Tensor]) -> keras.models.Model:
""" Create the model's structure.
Parameters
----------
inputs: list
A list of input tensors for the model. This will be a list of 2 tensors of
shape :attr:`input_shape`, the first for side "a", the second for side "b".
Returns
-------
:class:`keras.models.Model`
The generated model
"""
# Create sub-Models
encoders = self._build_encoders(inputs)
inters = self._build_fully_connected(encoders)
g_blocks = self._build_g_blocks(inters)
decoders = self._build_decoders(g_blocks)
# Create Autoencoder
outputs = [decoders["a"], decoders["b"]]
autoencoder = KModel(inputs, outputs, name=self.model_name)
return autoencoder
def _build_encoders(self, inputs: list[Tensor]) -> dict[str, keras.models.Model]:
""" Build the encoders for Phaze-A
Parameters
----------
inputs: list
A list of input tensors for the model. This will be a list of 2 tensors of
shape :attr:`input_shape`, the first for side "a", the second for side "b".
Returns
-------
dict
side as key ('a' or 'b'), encoder for side as value
"""
encoder = Encoder(self.input_shape, self.config)()
retval = {"a": encoder(inputs[0]), "b": encoder(inputs[1])}
logger.debug("Encoders: %s", retval)
return retval
def _build_fully_connected(
self,
inputs: dict[str, keras.models.Model]) -> dict[str, list[keras.models.Model]]:
""" Build the fully connected layers for Phaze-A
Parameters
----------
inputs: dict
The compiled encoder models that act as inputs to the fully connected layers
Returns
-------
dict
side as key ('a' or 'b'), fully connected model for side as value
"""
input_shapes = K.int_shape(inputs["a"])[1:]
if self.config["split_fc"]:
fc_a = FullyConnected("a", input_shapes, self.config)()
inter_a = [fc_a(inputs["a"])]
inter_b = [FullyConnected("b", input_shapes, self.config)()(inputs["b"])]
else:
fc_both = FullyConnected("both", input_shapes, self.config)()
inter_a = [fc_both(inputs["a"])]
inter_b = [fc_both(inputs["b"])]
if self.config["shared_fc"]:
if self.config["shared_fc"] == "full":
fc_shared = FullyConnected("shared", input_shapes, self.config)()
elif self.config["split_fc"]:
fc_shared = fc_a
else:
fc_shared = fc_both
inter_a = [Concatenate(name="inter_a")([inter_a[0], fc_shared(inputs["a"])])]
inter_b = [Concatenate(name="inter_b")([inter_b[0], fc_shared(inputs["b"])])]
if self.config["enable_gblock"]:
fc_gblock = FullyConnected("gblock", input_shapes, self.config)()
inter_a.append(fc_gblock(inputs["a"]))
inter_b.append(fc_gblock(inputs["b"]))
retval = {"a": inter_a, "b": inter_b}
logger.debug("Fully Connected: %s", retval)
return retval
def _build_g_blocks(
self,
inputs: dict[str, list[keras.models.Model]]
) -> dict[str, list[keras.models.Model] | keras.models.Model]:
""" Build the g-block layers for Phaze-A.
If a g-block has not been selected for this model, then the original `inters` models are
returned for passing straight to the decoder
Parameters
----------
inputs: dict
The compiled inter models that act as inputs to the g_blocks
Returns
-------
dict
side as key ('a' or 'b'), g-block model for side as value. If g-block has been disabled
then the values will be the fully connected layers
"""
if not self.config["enable_gblock"]:
logger.debug("No G-Block selected, returning Inters: %s", inputs)
return inputs
input_shapes = [K.int_shape(inter)[1:] for inter in inputs["a"]]
if self.config["split_gblock"]:
retval = {"a": GBlock("a", input_shapes, self.config)()(inputs["a"]),
"b": GBlock("b", input_shapes, self.config)()(inputs["b"])}
else:
g_block = GBlock("both", input_shapes, self.config)()
retval = {"a": g_block((inputs["a"])), "b": g_block((inputs["b"]))}
logger.debug("G-Blocks: %s", retval)
return retval
def _build_decoders(self,
inputs: dict[str, list[keras.models.Model] | keras.models.Model]
) -> dict[str, keras.models.Model]:
""" Build the encoders for Phaze-A
Parameters
----------
inputs: dict
A dict of inputs to the decoder. This will either be g-block output (if g-block is
enabled) or fully connected layers output (if g-block is disabled).
Returns
-------
dict
side as key ('a' or 'b'), decoder for side as value
"""
input_ = inputs["a"]
# If input is inters, shapes will be a list.
# There will only ever be 1 input. For inters: either inter out, or concatenate of inters
# For g-block, this only ever has one output
input_ = input_[0] if isinstance(input_, list) else input_
# If learning a mask and upscales have been placed into FC layer, then the mask will also
# come as an input
if self.config["learn_mask"] and self.config["dec_upscales_in_fc"]:
input_ = input_[0]
input_shape = K.int_shape(input_)[1:]
if self.config["split_decoders"]:
retval = {"a": Decoder("a", input_shape, self.config)()(inputs["a"]),
"b": Decoder("b", input_shape, self.config)()(inputs["b"])}
else:
decoder = Decoder("both", input_shape, self.config)()
retval = {"a": decoder(inputs["a"]), "b": decoder(inputs["b"])}
logger.debug("Decoders: %s", retval)
return retval
def _bottleneck(inputs: Tensor, bottleneck: str, size: int, normalization: str) -> Tensor:
""" The bottleneck fully connected layer. Can be called from Encoder or FullyConnected layers.
Parameters
----------
inputs: tensor
The input to the bottleneck layer
bottleneck: str
The type of layer to use for the bottleneck
size: int
The number of nodes for the dense layer (if selected)
normalization: str
The normalization method to use prior to the bottleneck layer
Returns
-------
tensor
The output from the bottleneck
"""
norms = {"layer": LayerNormalization,
"rms": RMSNormalization,
"instance": InstanceNormalization}
bottlenecks = {"average_pooling": GlobalAveragePooling2D(),
"dense": Dense(size),
"max_pooling": GlobalMaxPooling2D()}
var_x = inputs
if normalization:
var_x = norms[normalization]()(var_x)
if bottleneck == "dense" and len(K.int_shape(var_x)[1:]) > 1:
# Flatten non-1D inputs for dense bottleneck
var_x = Flatten()(var_x)
var_x = bottlenecks[bottleneck](var_x)
if len(K.int_shape(var_x)[1:]) > 1:
# Flatten prior to fc layers
var_x = Flatten()(var_x)
return var_x
def _get_upscale_layer(method: T.Literal["resize_images", "subpixel", "upscale_dny",
"upscale_fast", "upscale_hybrid", "upsample2d"],
filters: int,
activation: str | None = None,
upsamples: int | None = None,
interpolation: str | None = None) -> keras.layers.Layer:
""" Obtain an instance of the requested upscale method.
Parameters
----------
method: str
The user selected upscale method to use. One of `"resize_images"`, `"subpixel"`,
`"upscale_dny"`, `"upscale_fast"`, `"upscale_hybrid"`, `"upsample2d"`
filters: int
The number of filters to use in the upscale layer
activation: str, optional
The activation function to use in the upscale layer. ``None`` to use no activation.
Default: ``None``
upsamples: int, optional
Only used for UpSampling2D. If provided, then this is passed to the layer as the ``size``
parameter. Default: ``None``
interpolation: str, optional
Only used for UpSampling2D. If provided, then this is passed to the layer as the
``interpolation`` parameter. Default: ``None``
Returns
-------
:class:`keras.layers.Layer`
The selected configured upscale layer
"""
if method == "upsample2d":
kwargs: dict[str, str | int] = {}
if upsamples:
kwargs["size"] = upsamples
if interpolation:
kwargs["interpolation"] = interpolation
return UpSampling2D(**kwargs)
if method == "subpixel":
return UpscaleBlock(filters, activation=activation)
if method == "upscale_fast":
return Upscale2xBlock(filters, activation=activation, fast=True)
if method == "upscale_hybrid":
return Upscale2xBlock(filters, activation=activation, fast=False)
if method == "upscale_dny":
return UpscaleDNYBlock(filters, activation=activation)
return UpscaleResizeImagesBlock(filters, activation=activation)
def _get_curve(start_y: int,
end_y: int,
num_points: int,
scale: float,
mode: T.Literal["full", "cap_max", "cap_min"] = "full") -> list[int]:
""" Obtain a curve.
For the given start and end y values, return the y co-ordinates of a curve for the given
number of points. The points are rounded down to the nearest 8.
Parameters
----------
start_y: int
The y co-ordinate for the starting point of the curve
end_y: int
The y co-ordinate for the end point of the curve
num_points: int
The number of data points to plot on the x-axis
scale: float
The scale of the curve (from -.99 to 0.99)
slope_mode: str, optional
The method to generate the curve. One of `"full"`, `"cap_max"` or `"cap_min"`. `"full"`
mode generates a curve from the `"start_y"` to the `"end_y"` values. `"cap_max"` pads the
earlier points with the `"start_y"` value before filling out the remaining points at a
fixed divider to the `"end_y"` value. `"cap_min"` starts at the `"start_y" filling points
at a fixed divider until the `"end_y"` value is reached and pads the remaining points with
the `"end_y"` value. Default: `"full"`
Returns
-------
list
List of ints of points for the given curve
"""
scale = min(.99, max(-.99, scale))
logger.debug("Obtaining curve: (start_y: %s, end_y: %s, num_points: %s, scale: %s, mode: %s)",
start_y, end_y, num_points, scale, mode)
if mode == "full":
x_axis = np.linspace(0., 1., num=num_points)
y_axis = (x_axis - x_axis * scale) / (scale - abs(x_axis) * 2 * scale + 1)
y_axis = y_axis * (end_y - start_y) + start_y
retval = [int((y // 8) * 8) for y in y_axis]
else:
y_axis = [start_y]
scale = 1. - abs(scale)
for _ in range(num_points - 1):
current_value = max(end_y, int(((y_axis[-1] * scale) // 8) * 8))
y_axis.append(current_value)
if current_value == end_y:
break
pad = [start_y if mode == "cap_max" else end_y for _ in range(num_points - len(y_axis))]
retval = pad + y_axis if mode == "cap_max" else y_axis + pad
logger.debug("Returning curve: %s", retval)
return retval
def _scale_dim(target_resolution: int, original_dim: int) -> int:
""" Scale a given `original_dim` so that it is a factor of the target resolution.
Parameters
----------
target_resolution: int
The output resolution that is being targetted
original_dim: int
The dimension that needs to be checked for compatibility for upscaling to the
target resolution
Returns
-------
int
The highest dimension below or equal to `original_dim` that is a factor of the
target resolution.
"""
new_dim = target_resolution
while new_dim > original_dim:
next_dim = new_dim / 2
if not next_dim.is_integer():
break
new_dim = int(next_dim)
logger.debug("target_resolution: %s, original_dim: %s, new_dim: %s",
target_resolution, original_dim, new_dim)
return new_dim
class Encoder(): # pylint:disable=too-few-public-methods
""" Encoder. Uses one of pre-existing Keras/Faceswap models or custom encoder.
Parameters
----------
input_shape: tuple
The shape tuple for the input tensor
config: dict
The model configuration options
"""
def __init__(self, input_shape: tuple[int, ...], config: dict) -> None:
self.input_shape = input_shape
self._config = config
self._input_shape = input_shape
@property
def _model_kwargs(self) -> dict[str, dict[str, str | bool]]:
""" dict: Configuration option for architecture mapped to optional kwargs. """
return {"mobilenet": {"alpha": self._config["mobilenet_width"],
"depth_multiplier": self._config["mobilenet_depth"],
"dropout": self._config["mobilenet_dropout"]},
"mobilenet_v2": {"alpha": self._config["mobilenet_width"]},
"mobilenet_v3": {"alpha": self._config["mobilenet_width"],
"minimalist": self._config["mobilenet_minimalistic"],
"include_preprocessing": False}}
@property
def _selected_model(self) -> tuple[_EncoderInfo, dict]:
""" tuple(dict, :class:`_EncoderInfo`): The selected encoder model and it's associated
keyword arguments """
arch = self._config["enc_architecture"]
model = _MODEL_MAPPING[arch]
kwargs = self._model_kwargs.get(arch, {})
if arch.startswith("efficientnet_v2"):
kwargs["include_preprocessing"] = False
return model, kwargs
def __call__(self) -> keras.models.Model:
""" Create the Phaze-A Encoder Model.
Returns
-------
:class:`keras.models.Model`
The selected Encoder Model
"""
input_ = Input(shape=self._input_shape)
var_x = input_
scaling = self._selected_model[0].scaling
if scaling:
# Some models expect different scaling.
logger.debug("Scaling to %s for '%s'", scaling, self._config["enc_architecture"])
if scaling == (0, 255):
# models expecting inputs from 0 to 255.
var_x = var_x * 255.
if scaling == (-1, 1):
# models expecting inputs from -1 to 1.
var_x = var_x * 2.
var_x = var_x - 1.0
if (self._config["enc_architecture"].startswith("efficientnet_b")
and self._config["mixed_precision"]):
# There is a bug in EfficientNet pre-processing where the normalized mean for the
# imagenet rgb values are not cast to float16 when mixed precision is enabled.
# We monkeypatch in a cast constant until the issue is resolved
# TODO revert if/when applying Imagenet Normalization works with mixed precision
# confirmed bugged: TF2.10
logger.debug("Patching efficientnet.IMAGENET_STDDEV_RGB to float16 constant")
from keras.applications import efficientnet # pylint:disable=import-outside-toplevel
setattr(efficientnet,
"IMAGENET_STDDEV_RGB",
K.constant(efficientnet.IMAGENET_STDDEV_RGB, dtype="float16"))
var_x = self._get_encoder_model()(var_x)
if self._config["bottleneck_in_encoder"]:
var_x = _bottleneck(var_x,
self._config["bottleneck_type"],
self._config["bottleneck_size"],
self._config["bottleneck_norm"])
return KModel(input_, var_x, name="encoder")
def _get_encoder_model(self) -> keras.models.Model:
""" Return the model defined by the selected architecture.
Returns
-------
:class:`keras.Model`
The selected keras model for the chosen encoder architecture
"""
model, kwargs = self._selected_model
if model.keras_name:
kwargs["input_shape"] = self._input_shape
kwargs["include_top"] = False
kwargs["weights"] = "imagenet" if self._config["enc_load_weights"] else None
retval = getattr(kapp, model.keras_name)(**kwargs)
else:
retval = _EncoderFaceswap(self._config)
return retval
class _EncoderFaceswap(): # pylint:disable=too-few-public-methods
""" A configurable standard Faceswap encoder based off Original model.
Parameters
----------
config: dict
The model configuration options
"""
def __init__(self, config: dict) -> None:
self._config = config
self._type = self._config["enc_architecture"]
self._depth = config[f"{self._type}_depth"]
self._min_filters = config["fs_original_min_filters"]
self._max_filters = config["fs_original_max_filters"]
self._is_alt = config["fs_original_use_alt"]
self._relu_alpha = 0.2 if self._is_alt else 0.1
self._kernel_size = 3 if self._is_alt else 5
self._strides = 1 if self._is_alt else 2
def __call__(self, inputs: Tensor) -> Tensor:
""" Call the original Faceswap Encoder
Parameters
----------
inputs: tensor
The input tensor to the Faceswap Encoder
Returns
-------
tensor
The output tensor from the Faceswap Encoder
"""
var_x = inputs
filters = self._config["fs_original_min_filters"]
if self._is_alt:
var_x = Conv2DBlock(filters,
kernel_size=1,
strides=self._strides,
relu_alpha=self._relu_alpha)(var_x)
for i in range(self._depth):
name = f"fs_{'dny_' if self._is_alt else ''}enc"
var_x = Conv2DBlock(filters,
kernel_size=self._kernel_size,
strides=self._strides,
relu_alpha=self._relu_alpha,
name=f"{name}_convblk_{i}")(var_x)
filters = min(self._config["fs_original_max_filters"], filters * 2)
if self._is_alt and i == self._depth - 1:
var_x = Conv2DBlock(filters,
kernel_size=4,
strides=self._strides,
padding="valid",
relu_alpha=self._relu_alpha,
name=f"{name}_convblk_{i}_1")(var_x)
elif self._is_alt:
var_x = Conv2DBlock(filters,
kernel_size=self._kernel_size,
strides=self._strides,
relu_alpha=self._relu_alpha,
name=f"{name}_convblk_{i}_1")(var_x)
var_x = MaxPool2D(2, name=f"{name}_pool_{i}")(var_x)
return var_x
class FullyConnected(): # pylint:disable=too-few-public-methods
""" Intermediate Fully Connected layers for Phaze-A Model.
Parameters
----------
side: ["a", "b", "both", "gblock", "shared"]
The side of the model that the fully connected layers belong to. Used for naming
input_shape: tuple
The input shape for the fully connected layers
config: dict
The user configuration dictionary
"""
def __init__(self,
side: T.Literal["a", "b", "both", "gblock", "shared"],
input_shape: tuple,
config: dict) -> None:
logger.debug("Initializing: %s (side: %s, input_shape: %s)",
self.__class__.__name__, side, input_shape)
self._side = side
self._input_shape = input_shape
self._config = config
self._final_dims = self._config["fc_dimensions"] * (self._config["fc_upsamples"] + 1)
self._prefix = "fc_gblock" if self._side == "gblock" else "fc"
logger.debug("Initialized: %s (side: %s, min_nodes: %s, max_nodes: %s)",
self.__class__.__name__, self._side, self._min_nodes, self._max_nodes)
@property
def _min_nodes(self) -> int:
""" int: The number of nodes for the first Dense. For non g-block layers this will be the
given minimum filters multiplied by the dimensions squared. For g-block layers, this is the
given value """
if self._side == "gblock":
return self._config["fc_gblock_min_nodes"]
retval = self._scale_filters(self._config["fc_min_filters"])
retval = int(retval * self._config["fc_dimensions"] ** 2)
return retval
@property
def _max_nodes(self) -> int:
""" int: The number of nodes for the final Dense. For non g-block layers this will be the
given maximum filters multiplied by the dimensions squared. This number will be scaled down
if the final shape can not be mapped to the requested output size.
For g-block layers, this is the given config value.
"""
if self._side == "gblock":
return self._config["fc_gblock_max_nodes"]
retval = self._scale_filters(self._config["fc_max_filters"])
retval = int(retval * self._config["fc_dimensions"] ** 2)
return retval
def _scale_filters(self, original_filters: int) -> int:
""" Scale the filters to be compatible with the model's selected output size.
Parameters
----------
original_filters: int
The original user selected number of filters
Returns
-------
int
The number of filters scaled down for output size
"""
scaled_dim = _scale_dim(self._config["output_size"], self._final_dims)
if scaled_dim == self._final_dims:
logger.debug("filters don't require scaling. Returning: %s", original_filters)
return original_filters
flat = self._final_dims ** 2 * original_filters
modifier = self._final_dims ** 2 * scaled_dim ** 2
retval = int((flat // modifier) * modifier)
retval = int(retval / self._final_dims ** 2)
logger.debug("original_filters: %s, scaled_filters: %s", original_filters, retval)
return retval
def _do_upsampling(self, inputs: Tensor) -> Tensor:
""" Perform the upsampling at the end of the fully connected layers.
Parameters
----------
inputs: Tensor
The input to the upsample layers
Returns
-------
Tensor
The output from the upsample layers
"""
upsample_filts = self._scale_filters(self._config["fc_upsample_filters"])
upsampler = self._config["fc_upsampler"].lower()
num_upsamples = self._config["fc_upsamples"]
var_x = inputs
if upsampler == "upsample2d" and num_upsamples > 1:
upscaler = _get_upscale_layer(upsampler,
upsample_filts, # Not used but required
upsamples=2 ** num_upsamples,
interpolation="bilinear")
var_x = upscaler(var_x)
else:
for _ in range(num_upsamples):
upscaler = _get_upscale_layer(upsampler,
upsample_filts,
activation="leakyrelu")
var_x = upscaler(var_x)
if upsampler == "upsample2d":
var_x = LeakyReLU(alpha=0.1)(var_x)
return var_x
def __call__(self) -> keras.models.Model:
""" Call the intermediate layer.
Returns
-------
:class:`keras.models.Model`
The Fully connected model
"""
input_ = Input(shape=self._input_shape)
var_x = input_
node_curve = _get_curve(self._min_nodes,
self._max_nodes,
self._config[f"{self._prefix}_depth"],
self._config[f"{self._prefix}_filter_slope"])
if not self._config["bottleneck_in_encoder"]:
var_x = _bottleneck(var_x,
self._config["bottleneck_type"],
self._config["bottleneck_size"],
self._config["bottleneck_norm"])
dropout = f"{self._prefix}_dropout"
for idx, nodes in enumerate(node_curve):
var_x = Dropout(self._config[dropout], name=f"{dropout}_{idx + 1}")(var_x)
var_x = Dense(nodes)(var_x)
if self._side != "gblock":
dim = self._config["fc_dimensions"]
var_x = Reshape((dim, dim, int(self._max_nodes / (dim ** 2))))(var_x)
var_x = self._do_upsampling(var_x)
num_upscales = self._config["dec_upscales_in_fc"]
if num_upscales:
var_x = UpscaleBlocks(self._side,
self._config,
layer_indicies=(0, num_upscales))(var_x)
return KModel(input_, var_x, name=f"fc_{self._side}")
class UpscaleBlocks(): # pylint: disable=too-few-public-methods
""" Obtain a block of upscalers.
This class exists outside of the :class:`Decoder` model, as it is possible to place some of
the upscalers at the end of the Fully Connected Layers, so the upscale chain needs to be able
to be calculated by both the Fully Connected Layers and by the Decoder if required.
For this reason, the Upscale Filter list is created as a class attribute of the
:class:`UpscaleBlocks` layers for reference by either the Decoder or Fully Connected models
Parameters
----------
side: ["a", "b", "both", "shared"]
The side of the model that the Decoder belongs to. Used for naming
config: dict
The user configuration dictionary
layer_indices: tuple, optional
The tuple indicies indicating the starting layer index and the ending layer index to
generate upscales for. Used for when splitting upscales between the Fully Connected Layers
and the Decoder. ``None`` will generate the full Upscale chain. An end index of -1 will
generate the layers from the starting index to the final upscale. Default: ``None``
"""
_filters: list[int] = []
def __init__(self,
side: T.Literal["a", "b", "both", "shared"],
config: dict,
layer_indicies: tuple[int, int] | None = None) -> None:
logger.debug("Initializing: %s (side: %s, layer_indicies: %s)",
self.__class__.__name__, side, layer_indicies)
self._side = side
self._config = config
self._is_dny = self._config["dec_upscale_method"].lower() == "upscale_dny"
self._layer_indicies = layer_indicies
logger.debug("Initialized: %s", self.__class__.__name__,)
def _reshape_for_output(self, inputs: Tensor) -> Tensor:
""" Reshape the input for arbitrary output sizes.
The number of filters in the input will have been scaled to the model output size allowing
us to scale the dimensions to the requested output size.
Parameters
----------
inputs: tensor
The tensor that is to be reshaped
Returns
-------
tensor
The tensor shaped correctly to upscale to output size
"""
var_x = inputs
old_dim = K.int_shape(inputs)[1]
new_dim = _scale_dim(self._config["output_size"], old_dim)
if new_dim != old_dim:
old_shape = K.int_shape(inputs)[1:]
new_shape = (new_dim, new_dim, np.prod(old_shape) // new_dim ** 2)
logger.debug("Reshaping tensor from %s to %s for output size %s",
K.int_shape(inputs)[1:], new_shape, self._config["output_size"])
var_x = Reshape(new_shape)(var_x)
return var_x
def _upscale_block(self,
inputs: Tensor,
filters: int,
skip_residual: bool = False,
is_mask: bool = False) -> Tensor:
""" Upscale block for Phaze-A Decoder.
Uses requested upscale method, adds requested regularization and activation function.
Parameters
----------
inputs: tensor
The input tensor for the upscale block
filters: int
The number of filters to use for the upscale
skip_residual: bool, optional
``True`` if a residual block should not be placed in the upscale block, otherwise
``False``. Default ``False``
is_mask: bool, optional
``True`` if the input is a mask. ``False`` if the input is a face. Default: ``False``
Returns
-------
tensor
The output tensor from the upscale block
"""
upscaler = _get_upscale_layer(self._config["dec_upscale_method"].lower(),
filters,
activation="leakyrelu",
upsamples=2,
interpolation="bilinear")
var_x = upscaler(inputs)
if not is_mask and self._config["dec_gaussian"]:
var_x = GaussianNoise(1.0)(var_x)
if not is_mask and self._config["dec_res_blocks"] and not skip_residual:
var_x = self._normalization(var_x)
var_x = LeakyReLU(alpha=0.2)(var_x)
for _ in range(self._config["dec_res_blocks"]):
var_x = ResidualBlock(filters)(var_x)
else:
var_x = self._normalization(var_x)
if not self._is_dny:
var_x = LeakyReLU(alpha=0.1)(var_x)
return var_x
def _normalization(self, inputs: Tensor) -> Tensor:
""" Add a normalization layer if requested.
Parameters
----------
inputs: tensor
The input tensor to apply normalization to.
Returns
--------
tensor
The tensor with any normalization applied
"""
if not self._config["dec_norm"]:
return inputs
norms = {"batch": BatchNormalization,
"group": GroupNormalization,
"instance": InstanceNormalization,
"layer": LayerNormalization,
"rms": RMSNormalization}
return norms[self._config["dec_norm"]]()(inputs)
def _dny_entry(self, inputs: Tensor) -> Tensor:
""" Entry convolutions for using the upscale_dny method.
Parameters
----------
inputs: Tensor
The inputs to the dny entry block
Returns
-------
Tensor
The output from the dny entry block
"""
var_x = Conv2DBlock(self._config["dec_max_filters"],
kernel_size=4,
strides=1,
padding="same",
relu_alpha=0.2)(inputs)
var_x = Conv2DBlock(self._config["dec_max_filters"],
kernel_size=3,
strides=1,
padding="same",
relu_alpha=0.2)(var_x)
return var_x
def __call__(self, inputs: Tensor | list[Tensor]) -> Tensor | list[Tensor]:
""" Upscale Network.
Parameters
inputs: Tensor or list of tensors
Input tensor(s) to upscale block. This will be a single tensor if learn mask is not
selected or if this is the first call to the upscale blocks. If learn mask is selected
and this is not the first call to upscale blocks, then this will be a list of the face
and mask tensors.
Returns
-------
Tensor or list of tensors
The output of encoder blocks. Either a single tensor (if learn mask is not enabled) or
list of tensors (if learn mask is enabled)
"""
start_idx, end_idx = (0, None) if self._layer_indicies is None else self._layer_indicies
end_idx = None if end_idx == -1 else end_idx
if self._config["learn_mask"] and start_idx == 0:
# Mask needs to be created
var_x = inputs
var_y = inputs
elif self._config["learn_mask"]:
# Mask has already been created and is an input to upscale blocks
var_x, var_y = inputs
else:
# No mask required
var_x = inputs
if start_idx == 0:
var_x = self._reshape_for_output(var_x)
if self._config["learn_mask"]:
var_y = self._reshape_for_output(var_y)
if self._is_dny:
var_x = self._dny_entry(var_x)
if self._is_dny and self._config["learn_mask"]:
var_y = self._dny_entry(var_y)
# De-convolve
if not self._filters:
upscales = int(np.log2(self._config["output_size"] / K.int_shape(var_x)[1]))
self._filters.extend(_get_curve(self._config["dec_max_filters"],
self._config["dec_min_filters"],
upscales,
self._config["dec_filter_slope"],
mode=self._config["dec_slope_mode"]))
logger.debug("Generated class filters: %s", self._filters)
filters = self._filters[start_idx: end_idx]
for idx, filts in enumerate(filters):
skip_res = idx == len(filters) - 1 and self._config["dec_skip_last_residual"]
var_x = self._upscale_block(var_x, filts, skip_residual=skip_res)
if self._config["learn_mask"]:
var_y = self._upscale_block(var_y, filts, is_mask=True)
retval = [var_x, var_y] if self._config["learn_mask"] else var_x
return retval
class GBlock(): # pylint:disable=too-few-public-methods
""" G-Block model, borrowing from Adain StyleGAN.
Parameters
----------
side: ["a", "b", "both"]
The side of the model that the fully connected layers belong to. Used for naming
input_shapes: list or tuple
The shape tuples for the input to the G-Block. The first item is the input from each side's
fully connected model, the second item is the input shape from the combined fully connected
model.
config: dict
The user configuration dictionary
"""
def __init__(self,
side: T.Literal["a", "b", "both"],
input_shapes: list | tuple,
config: dict) -> None:
logger.debug("Initializing: %s (side: %s, input_shapes: %s)",
self.__class__.__name__, side, input_shapes)
self._side = side
self._config = config
self._inputs = [Input(shape=shape) for shape in input_shapes]
self._dense_nodes = 512
self._dense_recursions = 3
logger.debug("Initialized: %s", self.__class__.__name__)
@classmethod
def _g_block(cls, inputs: Tensor, style: Tensor, filters: int, recursions: int = 2) -> Tensor:
""" G_block adapted from ADAIN StyleGAN.
Parameters
----------
inputs: tensor
The input tensor to the G-Block model
style: tensor
The input combined 'style' tensor to the G-Block model
filters: int
The number of filters to use for the G-Block Convolutional layers
recursions: int, optional
The number of recursive Convolutions to process. Default: `2`
Returns
-------
tensor
The output tensor from the G-Block model
"""
var_x = inputs
for i in range(recursions):
styles = [Reshape([1, 1, filters])(Dense(filters)(style)) for _ in range(2)]
noise = KConv2D(filters, 1, padding="same")(GaussianNoise(1.0)(var_x))
if i == recursions - 1:
var_x = KConv2D(filters, 3, padding="same")(var_x)
var_x = AdaInstanceNormalization(dtype="float32")([var_x, *styles])
var_x = Add()([var_x, noise])
var_x = LeakyReLU(0.2)(var_x)
return var_x
def __call__(self) -> keras.models.Model:
""" G-Block Network.
Returns
-------
:class:`keras.models.Model`
The G-Block model
"""
var_x, style = self._inputs
for i in range(self._dense_recursions):
style = Dense(self._dense_nodes, kernel_initializer="he_normal")(style)
if i != self._dense_recursions - 1: # Don't add leakyReLu to final output
style = LeakyReLU(0.1)(style)
# Scale g_block filters to side dense
g_filts = K.int_shape(var_x)[-1]
var_x = Conv2D(g_filts, 3, strides=1, padding="same")(var_x)
var_x = GaussianNoise(1.0)(var_x)
var_x = self._g_block(var_x, style, g_filts)
return KModel(self._inputs, var_x, name=f"g_block_{self._side}")
class Decoder(): # pylint:disable=too-few-public-methods
""" Decoder Network.
Parameters
----------
side: ["a", "b", "both"]
The side of the model that the Decoder belongs to. Used for naming
input_shape: tuple
The shape tuple for the input to the decoder.
config: dict
The user configuration dictionary
"""
def __init__(self,
side: T.Literal["a", "b", "both"],
input_shape: tuple[int, int, int],
config: dict) -> None:
logger.debug("Initializing: %s (side: %s, input_shape: %s)",
self.__class__.__name__, side, input_shape)
self._side = side
self._input_shape = input_shape
self._config = config
logger.debug("Initialized: %s", self.__class__.__name__,)
def __call__(self) -> keras.models.Model:
""" Decoder Network.
Returns
-------
:class:`keras.models.Model`
The Decoder model
"""
inputs = Input(shape=self._input_shape)
num_ups_in_fc = self._config["dec_upscales_in_fc"]
if self._config["learn_mask"] and num_ups_in_fc:
# Mask has already been created in FC and is an output of that model
inputs = [inputs, Input(shape=self._input_shape)]
indicies = None if not num_ups_in_fc else (num_ups_in_fc, -1)
upscales = UpscaleBlocks(self._side,
self._config,
layer_indicies=indicies)(inputs)
if self._config["learn_mask"]:
var_x, var_y = upscales
else:
var_x = upscales
outputs = [Conv2DOutput(3, self._config["dec_output_kernel"], name="face_out")(var_x)]
if self._config["learn_mask"]:
outputs.append(Conv2DOutput(1,
self._config["dec_output_kernel"],
name="mask_out")(var_y))
return KModel(inputs, outputs=outputs, name=f"decoder_{self._side}")