Remove plaidML support (#1325)

* Remove PlaidML reference from readme files * Remove AMD option from installers * remove amd requirements and update setup.py * remove plaidml test from CI workflow * gpustats: remove plaidml backend * plaid removals: - faceswap.py - python version check - setup.cfg - plaidml typing ignore - lib.keras_utils - All plaid code - lib.launcher.py - All plaidml checks and configuration * remove tf2.2 specific code from GUI event reader * lib.model - remove all plaidml implementations * plugins.extract - remove plaidml code * plugins.train remove plaidml code * lib.convert - remove plaidml code * tools.model: remove plaidml code * Remove plaidML tests from unit tests * remove plaidml_utils and docsting cleanups * Remove plaidML refs from configs * fix keras imports
2025-06-07 10:43:27 -04:00 · 2023-06-21 12:57:33 +01:00 · 2023-06-21 12:57:33 +01:00 · 03f5c671bc
commit 03f5c671bc
parent 82e927d3e7
90 changed files with 2043 additions and 5547 deletions
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@ -3,7 +3,7 @@ name: ci/build
 on:
  push:
  pull_request:
-    paths-ignore: 
+    paths-ignore:
      - docs/**
      - "**/README.md"

@ -15,18 +15,13 @@ jobs:
      fail-fast: false
      matrix:
        python-version: ["3.7", "3.8", "3.9"]
-        backend: ["amd", "cpu"]
+        backend: ["cpu"]
        include:
-          - kbackend: "plaidml.keras.backend"
-            backend: "amd"
          - kbackend: "tensorflow"
            backend: "cpu"
-        exclude:
-          - python-version: 3.9
-            backend: amd
    steps:
      - uses: actions/checkout@v3
-      - name: Set up Python ${{ matrix.python-version }}      
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v3
        with:
          python-version: ${{ matrix.python-version }}
@ -50,14 +45,11 @@ jobs:
          mypy .
      - name: Simple Tests
        run: |
-          if [ "${{ matrix.backend }}" == "amd" ] ; then echo "{\"PLAIDML_DEVICE_IDS\":[\"llvm_cpu.0\"],\"PLAIDML_EXPERIMENTAL\":true}" > ~/.plaidml; fi ;
-          echo "{\"PLAIDML_DEVICE_IDS\":[\"llvm_cpu.0\"],\"PLAIDML_EXPERIMENTAL\":true}" > ~/.plaidml;
          FACESWAP_BACKEND="${{ matrix.backend }}" KERAS_BACKEND="${{ matrix.kbackend }}" py.test -v tests/;
      - name: End to End Tests
        run: |
          FACESWAP_BACKEND="${{ matrix.backend }}" KERAS_BACKEND="${{ matrix.kbackend }}" python tests/simple_tests.py;
-          if [ "${{ matrix.backend }}" == "amd" ] ; then rm -f ~/.plaidml; fi ;
- 
+
  build_windows:
    runs-on: windows-latest
    strategy:
@ -70,7 +62,7 @@ jobs:
          - backend: "directml"
    steps:
      - uses: actions/checkout@v3
-      - name: Set up Python ${{ matrix.python-version }}      
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v3
        with:
          python-version: ${{ matrix.python-version }}
--- a/.install/linux/faceswap_setup_x64.sh
+++ b/.install/linux/faceswap_setup_x64.sh
@ -134,13 +134,12 @@ ask_version() {
    # Ask which version of faceswap to install
    while true; do
        default=1
-        read -rp $'\e[36mSelect:\t1: NVIDIA\n\t2: AMD (ROCm)\n\t3: CPU\n\t4: AMD (PlaidML) - deprecated\n'"[default: $default]: "$'\e[97m' vers
+        read -rp $'\e[36mSelect:\t1: NVIDIA\n\t2: AMD (ROCm)\n\t3: CPU\n'"[default: $default]: "$'\e[97m' vers
        vers="${vers:-${default}}"
        case $vers in
            1) VERSION="nvidia" ; break ;;
            2) VERSION="rocm" ; break ;;
            3) VERSION="cpu" ; break ;;
-            4) VERSION="amd" ; PYENV_VERSION="3.8" ; break ;;
            * ) echo "Invalid selection." ;;
        esac
    done
@ -281,11 +280,6 @@ faceswap_opts () {
    latest graphics card drivers installed from the relevant vendor. Please select the version\
    of Faceswap you wish to install."
    ask_version
-    if [ $VERSION == "amd" ] ; then
-        warn "PlaidML support is deprecated and will be removed in a future update. If possible \
-        please consider using the ROCm version"
-        sleep 2
-    fi
    if [ $VERSION == "rocm" ] ; then
        warn "ROCm support is experimental. Please make sure that your GPU is supported by ROCm and that \
        ROCm has been installed on your system before proceeding. Installation instructions: \
@ -328,10 +322,6 @@ review() {
    fi
    echo "        - Faceswap will be installed in '$DIR_FACESWAP'"
    echo "        - Installing for '$VERSION'"
-    if [ $VERSION == "amd" ] ; then
-        echo -e "          \e[33m- Note: '$VERSION' is deprecated and will be removed in a\e[97m"
-        echo -e "          \e[33m  future update. Consider using the ROCm version.\e[97m"
-    fi
    if [ $VERSION == "rocm" ] ; then
        echo -e "          \e[33m- Note: Please ensure that ROCm is supported by your GPU\e[97m"
        echo -e "          \e[33m  and is installed prior to proceeding.\e[97m"
--- a/.install/windows/install.nsi
+++ b/.install/windows/install.nsi
@ -22,7 +22,7 @@ InstallDir $PROFILE\faceswap
 # Install cli flags
 !define flagsConda "/S /RegisterPython=0 /AddToPath=0 /D=$PROFILE\MiniConda3"
 !define flagsRepo "--depth 1 --no-single-branch ${wwwRepo}"
-!define flagsEnv "-y python=3."
+!define flagsEnv "-y python=3.9"

 # Folders
 Var ProgramData
@ -129,32 +129,25 @@ Function pgPrereqCreate
        ${NSD_CreateLabel} 10% $lblPos% 80% 14u "Faceswap"
        Pop $0

-        StrCpy $lblPos 50
+        StrCpy $lblPos 46
    # Info Custom Options
-    ${NSD_CreateGroupBox} 5% 40% 90% 120% "Custom Items"
+    ${NSD_CreateGroupBox} 5% 40% 90% 60% "Custom Items"
    Pop $0
        ${NSD_CreateRadioButton} 10% $lblPos% 27% 11u "Setup for NVIDIA GPU"
            Pop $ctlRadio
 		    ${NSD_AddStyle} $ctlRadio ${WS_GROUP}
            nsDialogs::SetUserData $ctlRadio "nvidia"
            ${NSD_OnClick} $ctlRadio RadioClick
-        ${NSD_CreateRadioButton} 50% $lblPos% 30% 11u "Setup for DirectML"
+        ${NSD_CreateRadioButton} 40% $lblPos% 25% 11u "Setup for DirectML"
            Pop $ctlRadio
            nsDialogs::SetUserData $ctlRadio "directml"
            ${NSD_OnClick} $ctlRadio RadioClick
-
-        intOp $lblPos $lblPos + 10
-
-        ${NSD_CreateRadioButton} 10% $lblPos% 25% 11u "Setup for CPU"
+        ${NSD_CreateRadioButton} 70% $lblPos% 20% 11u "Setup for CPU"
            Pop $ctlRadio
            nsDialogs::SetUserData $ctlRadio "cpu"
            ${NSD_OnClick} $ctlRadio RadioClick
-        ${NSD_CreateRadioButton} 50% $lblPos% 40% 11u "Setup for AMD (deprecated)"
-            Pop $ctlRadio
-            nsDialogs::SetUserData $ctlRadio "amd"
-            ${NSD_OnClick} $ctlRadio RadioClick

-        intOp $lblPos $lblPos + 12
+        intOp $lblPos $lblPos + 10

        ${NSD_CreateLabel} 10% $lblPos% 80% 10u "Environment Name (NB: Existing envs with this name will be deleted):"
        pop $0
@ -404,11 +397,6 @@ Function SetEnvironment

    CreateEnv:
        SetDetailsPrint listonly
-        ${If} $setupType == "amd"
-            StrCpy $0 "${flagsEnv}8"
-        ${else}
-            StrCpy $0 "${flagsEnv}9"
-        ${EndIf}        
        ExecDos::exec /NOUNLOAD /ASYNC /DETAILED "$\"$dirConda\scripts\activate.bat$\" && conda create $0 -n  $\"$envName$\" && conda deactivate"
        pop $0
        ExecDos::wait $0
--- a/INSTALL.md
+++ b/INSTALL.md
@ -62,7 +62,6 @@ The type of computations that the process does are well suited for graphics card
    - DirectX 12 AMD GPUs are supported on Windows through DirectML.
    - More modern AMD GPUs are supported on Linux through ROCm.
    - M-series Macs are supported through Tensorflow-Metal
-    - OpenCL 1.2 support through PlaidML is deprecated and will be removed in a future update
    - If using an Nvidia GPU, then it needs to support at least CUDA Compute Capability 3.5. (Release 1.0 will work on Compute Capability 3.0)
      To see which version your GPU supports, consult this list: https://developer.nvidia.com/cuda-gpus
      Desktop cards later than the 7xx series are most likely supported.
@ -142,7 +141,6 @@ If you are using an Nvidia card make sure you have the correct versions of Cuda/
 - Install tkinter (required for the GUI) by typing: `conda install tk`
 - Install requirements:
  - For Nvidia GPU users: `pip install -r ./requirements/requirements_nvidia.txt`
-  - For AMD GPU users: `pip install -r ./requirements/requirements_amd.txt`
  - For CPU users: `pip install -r ./requirements/requirements_cpu.txt`

 ## Running faceswap
--- a/README.md
+++ b/README.md
@ -79,7 +79,7 @@ We are very troubled by the fact that FaceSwap can be used for unethical and dis
 # How To setup and run the project
 FaceSwap is a Python program that will run on multiple Operating Systems including Windows, Linux, and MacOS.

-See [INSTALL.md](INSTALL.md) for full installation instructions. You will need a modern GPU with CUDA support for best performance. AMD GPUs are partially supported.
+See [INSTALL.md](INSTALL.md) for full installation instructions. You will need a modern GPU with CUDA support for best performance. Many AMD GPUs are supported through DirectML (Windows) and ROCm (Linux).

 # Overview
 The project has multiple entry points. You will have to:
--- a/faceswap.py
+++ b/faceswap.py
@ -11,7 +11,6 @@ if sys.platform.startswith("win"):

 from lib.cli import args as cli_args  # pylint:disable=wrong-import-position
 from lib.config import generate_configs  # pylint:disable=wrong-import-position
-from lib.utils import get_backend  # pylint:disable=wrong-import-position

 # LOCALES
 _LANG = gettext.translation("faceswap", localedir="locales", fallback=True)
@ -19,9 +18,6 @@ _ = _LANG.gettext

 if sys.version_info < (3, 7):
    raise ValueError("This program requires at least python3.7")
-if get_backend() == "amd" and sys.version_info >= (3, 9):
-    raise ValueError("The AMD version of Faceswap cannot run on versions of Python higher "
-                     "than 3.8")


 _PARSER = cli_args.FullHelpArgumentParser()
--- a/lib/cli/launcher.py
+++ b/lib/cli/launcher.py
@ -10,7 +10,7 @@ from typing import Callable, TYPE_CHECKING

 from lib.gpu_stats import set_exclude_devices, GPUStats
 from lib.logger import crash_log, log_setup
-from lib.utils import (deprecation_warning, FaceswapError, get_backend, get_tf_version,
+from lib.utils import (FaceswapError, get_backend, get_tf_version,
                       safe_shutdown, set_backend, set_system_verbosity)

 if TYPE_CHECKING:
@ -99,7 +99,6 @@ class ScriptExecutor():  # pylint:disable=too-few-public-methods
        FaceswapError
            If Tensorflow is not found, or is not between versions 2.4 and 2.9
        """
-        amd_ver = (2, 2)
        directml_ver = rocm_ver = (2, 10)
        min_ver = (2, 7)
        max_ver = (2, 10)
@ -122,18 +121,14 @@ class ScriptExecutor():  # pylint:disable=too-few-public-methods

        tf_ver = get_tf_version()
        backend = get_backend()
-        if backend != "amd" and tf_ver < min_ver:
+        if tf_ver < min_ver:
            msg = (f"The minimum supported Tensorflow is version {min_ver} but you have version "
                   f"{tf_ver} installed. Please upgrade Tensorflow.")
            self._handle_import_error(msg)
-        if backend != "amd" and tf_ver > max_ver:
+        if tf_ver > max_ver:
            msg = (f"The maximum supported Tensorflow is version {max_ver} but you have version "
                   f"{tf_ver} installed. Please downgrade Tensorflow.")
            self._handle_import_error(msg)
-        if backend == "amd" and tf_ver != amd_ver:
-            msg = (f"The supported Tensorflow version for AMD cards is {amd_ver} but you have "
-                   f"version {tf_ver} installed. Please install the correct version.")
-            self._handle_import_error(msg)
        if backend == "directml" and tf_ver != directml_ver:
            msg = (f"The supported Tensorflow version for DirectML cards is {directml_ver} but "
                   f"you have version {tf_ver} installed. Please install the correct version.")
@ -283,42 +278,7 @@ class ScriptExecutor():  # pylint:disable=too-few-public-methods

        if GPUStats().exclude_all_devices:
            msg = "Switching backend to CPU"
-            if get_backend() == "amd":
-                msg += (". Using Tensorflow for CPU operations.")
-                os.environ["KERAS_BACKEND"] = "tensorflow"
            set_backend("cpu")
            logger.info(msg)

        logger.debug("Executing: %s. PID: %s", self._command, os.getpid())
-
-        if get_backend() == "amd" and not self._setup_amd(arguments):
-            safe_shutdown(got_error=True)
-
-    @classmethod
-    def _setup_amd(cls, arguments: "argparse.Namespace") -> bool:
-        """ Test for plaidml and perform setup for AMD.
-
-        Parameters
-        ----------
-        arguments: :class:`argparse.Namespace`
-            The command line arguments passed to Faceswap.
-
-        Returns
-        -------
-        bool
-            ``True`` if AMD was set up succesfully otherwise ``False``
-        """
-        logger.debug("Setting up for AMD")
-        if platform.system() == "Windows":
-            deprecation_warning("The AMD backend",
-                                additional_info="Please consider re-installing using the "
-                                                "'DirectML' backend")
-        try:
-            import plaidml  # noqa pylint:disable=unused-import,import-outside-toplevel
-        except ImportError:
-            logger.error("PlaidML not found. Run `pip install plaidml-keras` for AMD support")
-            return False
-        from lib.gpu_stats import setup_plaidml  # pylint:disable=import-outside-toplevel
-        setup_plaidml(arguments.loglevel, arguments.exclude_gpus)
-        logger.debug("setup up for PlaidML")
-        return True
--- a/lib/gpu_stats/init.py
+++ b/lib/gpu_stats/init.py
@ -14,8 +14,6 @@ if backend == "nvidia" and platform.system().lower() == "darwin":
    from .nvidia_apple import NvidiaAppleStats as GPUStats  # type:ignore
 elif backend == "nvidia":
    from .nvidia import NvidiaStats as GPUStats  # type:ignore
-elif backend == "amd":
-    from .amd import AMDStats as GPUStats, setup_plaidml  # type:ignore
 elif backend == "apple_silicon":
    from .apple_silicon import AppleSiliconStats as GPUStats  # type:ignore
 elif backend == "directml":
--- a/lib/gpu_stats/amd.py
+++ b/lib/gpu_stats/amd.py
@ -1,400 +0,0 @@
-#!/usr/bin/env python3
-""" Collects and returns Information on available AMD GPUs. """
-import json
-import logging
-import os
-import sys
-
-from typing import List, Optional
-
-import plaidml
-
-from ._base import _GPUStats, _EXCLUDE_DEVICES
-
-
-_PLAIDML_INITIALIZED: bool = False
-
-
-def setup_plaidml(log_level: str, exclude_devices: List[int]) -> None:
-    """ Setup PlaidML for AMD Cards.
-
-    Sets the Keras backend to PlaidML, loads the plaidML backend and makes GPU Device information
-    from PlaidML available to :class:`AMDStats`.
-
-    Parameters
-    ----------
-    log_level: str
-        Faceswap's log level. Used for setting the log level inside PlaidML
-    exclude_devices: list
-        A list of integers of device IDs that should not be used by Faceswap
-    """
-    logger = logging.getLogger(__name__)  # pylint:disable=invalid-name
-    logger.info("Setting up for PlaidML")
-    logger.verbose("Setting Keras Backend to PlaidML")  # type:ignore
-    # Add explicitly excluded devices to list. The contents are checked in AMDstats
-    if exclude_devices:
-        _EXCLUDE_DEVICES.extend(int(idx) for idx in exclude_devices)
-    os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
-    stats = AMDStats(log_level=log_level)
-    logger.info("Using GPU(s): %s", [stats.names[i] for i in stats.active_devices])
-    logger.info("Successfully set up for PlaidML")
-
-
-class AMDStats(_GPUStats):
-    """ Holds information and statistics about AMD GPU(s) available on the currently
-    running system.
-
-    Notes
-    -----
-    The quality of data that returns is very much dependent on the OpenCL implementation used
-    for a particular OS. Some data is just not available at all, so assumptions and substitutions
-    are made where required. PlaidML is used as an interface into OpenCL to obtain the required
-    information.
-
-    PlaidML is explicitly initialized inside this class, as it can be called from the command line
-    arguments to list available GPUs. PlaidML needs to be set up and configured to obtain reliable
-    information. As the function :func:`setup_plaidml` is called very early within the Faceswap
-    and launch process and it references this class, initial PlaidML setup can all be handled here.
-
-    Parameters
-    ----------
-    log: bool, optional
-        Whether the class should output information to the logger. There may be occasions where the
-        logger has not yet been set up when this class is queried. Attempting to log in these
-        instances will raise an error. If GPU stats are being queried prior to the logger being
-        available then this parameter should be set to ``False``. Otherwise set to ``True``.
-        Default: ``True``
-    """
-    def __init__(self, log: bool = True, log_level: str = "INFO") -> None:
-
-        self._log_level: str = log_level.upper()
-
-        # Following attributes are set in :func:``_initialize``
-        self._ctx: Optional[plaidml.Context] = None
-        self._supported_devices: List[plaidml._DeviceConfig] = []
-        self._all_devices: List[plaidml._DeviceConfig] = []
-        self._device_details: List[dict] = []
-
-        super().__init__(log=log)
-
-    @property
-    def active_devices(self) -> List[int]:
-        """ list: The active device ids in use. """
-        return self._active_devices
-
-    @property
-    def _plaid_ids(self) -> List[str]:
-        """ list: The device identification for each GPU device that PlaidML has discovered. """
-        return [device.id.decode("utf-8", errors="replace") for device in self._all_devices]
-
-    @property
-    def _experimental_indices(self) -> List[int]:
-        """ list: The indices corresponding to :attr:`_ids` of GPU devices marked as
-        "experimental". """
-        retval = [idx for idx, device in enumerate(self._all_devices)
-                  if device not in self._supported_indices]
-        return retval
-
-    @property
-    def _supported_indices(self) -> List[int]:
-        """ list: The indices corresponding to :attr:`_ids` of GPU devices marked as
-        "supported". """
-        retval = [idx for idx, device in enumerate(self._all_devices)
-                  if device in self._supported_devices]
-        return retval
-
-    @property
-    def _all_vram(self) -> List[int]:
-        """ list: The VRAM of each GPU device that PlaidML has discovered. """
-        return [int(int(device.get("globalMemSize", 0)) / (1024 * 1024))
-                for device in self._device_details]
-
-    @property
-    def names(self) -> List[str]:
-        """ list: The name of each GPU device that PlaidML has discovered. """
-        return [f"{device.get('vendor', 'unknown')} - {device.get('name', 'unknown')} "
-                f"({ 'supported' if idx in self._supported_indices else 'experimental'})"
-                for idx, device in enumerate(self._device_details)]
-
-    def _initialize(self) -> None:
-        """ Initialize PlaidML for AMD GPUs.
-
-        If :attr:`_is_initialized` is ``True`` then this function just returns performing no
-        action.
-
-        if ``False`` then PlaidML is setup, if not already, and GPU information is extracted
-        from the PlaidML context.
-        """
-        if self._is_initialized:
-            return
-        self._log("debug", "Initializing PlaidML for AMD GPU.")
-
-        self._initialize_plaidml()
-
-        self._ctx = plaidml.Context()
-        self._supported_devices = self._get_supported_devices()
-        self._all_devices = self._get_all_devices()
-        self._device_details = self._get_device_details()
-        self._select_device()
-
-        super()._initialize()
-
-    def _initialize_plaidml(self) -> None:
-        """ Initialize PlaidML on first call to this class and set global
-        :attr:``_PLAIDML_INITIALIZED`` to ``True``. If PlaidML has already been initialized then
-        return performing no action. """
-        global _PLAIDML_INITIALIZED  # pylint:disable=global-statement
-
-        if _PLAIDML_INITIALIZED:
-            return
-
-        self._log("debug", "Performing first time PlaidML setup.")
-        self._set_plaidml_logger()
-
-        _PLAIDML_INITIALIZED = True
-
-    def _set_plaidml_logger(self) -> None:
-        """ Set PlaidMLs default logger to Faceswap Logger, prevent propagation and set the correct
-        log level. """
-        self._log("debug", "Setting PlaidML Default Logger")
-
-        plaidml.DEFAULT_LOG_HANDLER = logging.getLogger("plaidml_root")
-        plaidml.DEFAULT_LOG_HANDLER.propagate = False
-
-        numeric_level = getattr(logging, self._log_level, None)
-        assert numeric_level is not None
-        if numeric_level < 10:  # DEBUG Logging
-            plaidml._internal_set_vlog(1)  # pylint:disable=protected-access
-        elif numeric_level < 20:  # INFO Logging
-            plaidml._internal_set_vlog(0)  # pylint:disable=protected-access
-        else:  # WARNING LOGGING
-            plaidml.quiet()
-
-    def _get_supported_devices(self) -> List[plaidml._DeviceConfig]:
-        """ Obtain GPU devices from PlaidML that are marked as "supported".
-
-        Returns
-        -------
-        list_LOGGER.
-            The :class:`plaidml._DeviceConfig` objects for all supported GPUs that PlaidML has
-            discovered.
-        """
-        experimental_setting = plaidml.settings.experimental
-
-        plaidml.settings.experimental = False
-        devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
-        plaidml.settings.experimental = experimental_setting
-
-        supported = [d for d in devices
-                     if d.details
-                     and json.loads(
-                        d.details.decode("utf-8",
-                                         errors="replace")).get("type", "cpu").lower() == "gpu"]
-
-        self._log("debug", f"Obtained supported devices: {supported}")
-        return supported
-
-    def _get_all_devices(self) -> List[plaidml._DeviceConfig]:
-        """ Obtain all available (experimental and supported) GPU devices from PlaidML.
-
-        Returns
-        -------
-        list
-            The :class:`pladml._DeviceConfig` objects for GPUs that PlaidML has discovered.
-        """
-        experimental_setting = plaidml.settings.experimental
-        plaidml.settings.experimental = True
-        devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
-        plaidml.settings.experimental = experimental_setting
-
-        experi = [d for d in devices
-                  if d.details
-                  and json.loads(
-                    d.details.decode("utf-8",
-                                     errors="replace")).get("type", "cpu").lower() == "gpu"]
-
-        self._log("debug", f"Obtained experimental Devices: {experi}")
-
-        all_devices = experi + self._supported_devices
-        all_devices = all_devices if all_devices else self._get_fallback_devices()  # Use CPU
-
-        self._log("debug", f"Obtained all Devices: {all_devices}")
-        return all_devices
-
-    def _get_fallback_devices(self) -> List[plaidml._DeviceConfig]:
-        """ Called if a GPU has not been discovered. Return any devices we can run on.
-
-        Returns
-        -------
-        list:
-            The :class:`pladml._DeviceConfig` fallaback objects that PlaidML has discovered.
-        """
-        # Try get a supported device
-        experimental_setting = plaidml.settings.experimental
-        plaidml.settings.experimental = False
-        devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
-
-        # Try get any device
-        if not devices:
-            plaidml.settings.experimental = True
-            devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
-
-        plaidml.settings.experimental = experimental_setting
-
-        if not devices:
-            raise RuntimeError("No valid devices could be found for plaidML.")
-
-        self._log("warning", f"PlaidML could not find a GPU. Falling back to: "
-                  f"{[d.id.decode('utf-8', errors='replace') for d in devices]}")
-        return devices
-
-    def _get_device_details(self) -> List[dict]:
-        """ Obtain the device details for all connected AMD GPUS.
-
-        Returns
-        -------
-        list
-            The `dict` device detail for all GPUs that PlaidML has discovered.
-        """
-        details = []
-        for dev in self._all_devices:
-            if dev.details:
-                details.append(json.loads(dev.details.decode("utf-8", errors="replace")))
-            else:
-                details.append(dict(vendor=dev.id.decode("utf-8", errors="replace"),
-                                    name=dev.description.decode("utf-8", errors="replace"),
-                                    globalMemSize=4 * 1024 * 1024 * 1024))  # 4GB dummy ram
-        self._log("debug", f"Obtained Device details: {details}")
-        return details
-
-    def _select_device(self) -> None:
-        """
-        If the plaidml user configuration settings exist, then set the default GPU from the
-        settings file, Otherwise set the GPU to be the one with most VRAM. """
-        if os.path.exists(plaidml.settings.user_settings):  # pylint:disable=no-member
-            self._log("debug", "Setting PlaidML devices from user_settings")
-        else:
-            self._select_largest_gpu()
-
-    def _select_largest_gpu(self) -> None:
-        """ Set the default GPU to be a supported device with the most available VRAM. If no
-        supported device is available, then set the GPU to be an experimental device with the
-        most VRAM available. """
-        category = "supported" if self._supported_devices else "experimental"
-        self._log("debug", f"Obtaining largest {category} device")
-
-        indices = getattr(self, f"_{category}_indices")
-        if not indices:
-            self._log("error", "Failed to automatically detect your GPU.")
-            self._log("error", "Please run `plaidml-setup` to set up your GPU.")
-            sys.exit(1)
-
-        max_vram = max(self._all_vram[idx] for idx in indices)
-        self._log("debug", f"Max VRAM: {max_vram}")
-
-        gpu_idx = min(idx for idx, vram in enumerate(self._all_vram)
-                      if vram == max_vram and idx in indices)
-        self._log("debug", f"GPU IDX: {gpu_idx}")
-
-        selected_gpu = self._plaid_ids[gpu_idx]
-        self._log("info", f"Setting GPU to largest available {category} device. If you want to "
-                          "override this selection, run `plaidml-setup` from the command line.")
-
-        plaidml.settings.experimental = category == "experimental"
-        plaidml.settings.device_ids = [selected_gpu]
-
-    def _get_device_count(self) -> int:
-        """ Detect the number of AMD GPUs available from PlaidML.
-
-        Returns
-        -------
-        int
-            The total number of AMD GPUs available
-        """
-        retval = len(self._all_devices)
-        self._log("debug", f"GPU Device count: {retval}")
-        return retval
-
-    def _get_active_devices(self) -> List[int]:
-        """ Obtain the indices of active GPUs (those that have not been explicitly excluded by
-        PlaidML or explicitly excluded in the command line arguments).
-
-        Returns
-        -------
-        list
-            The list of device indices that are available for Faceswap to use
-        """
-        devices = [idx for idx, d_id in enumerate(self._plaid_ids)
-                   if d_id in plaidml.settings.device_ids and idx not in _EXCLUDE_DEVICES]
-        self._log("debug", f"Active GPU Devices: {devices}")
-        return devices
-
-    def _get_handles(self) -> list:
-        """ AMD Doesn't really use device handles, so we just return the all devices list
-
-        Returns
-        -------
-        list
-            The list of all AMD discovered GPUs
-        """
-        handles = self._all_devices
-        self._log("debug", f"AMD GPU Handles found: {handles}")
-        return handles
-
-    def _get_driver(self) -> str:
-        """ Obtain the AMD driver version currently in use.
-
-        Returns
-        -------
-        str
-            The current AMD GPU driver versions
-        """
-        drivers = "|".join([device.get("driverVersion", "No Driver Found")
-                            for device in self._device_details])
-        self._log("debug", f"GPU Drivers: {drivers}")
-        return drivers
-
-    def _get_device_names(self) -> List[str]:
-        """ Obtain the list of names of connected AMD GPUs as identified in :attr:`_handles`.
-
-        Returns
-        -------
-        list
-            The list of connected Nvidia GPU names
-        """
-        names = self.names
-        self._log("debug", f"GPU Devices: {names}")
-        return names
-
-    def _get_vram(self) -> List[int]:
-        """ Obtain the VRAM in Megabytes for each connected AMD GPU as identified in
-        :attr:`_handles`.
-
-        Returns
-        -------
-        list
-            The VRAM in Megabytes for each connected Nvidia GPU
-        """
-        vram = self._all_vram
-        self._log("debug", f"GPU VRAM: {vram}")
-        return vram
-
-    def _get_free_vram(self) -> List[int]:
-        """ Obtain the amount of VRAM that is available, in Megabytes, for each connected AMD
-        GPU.
-
-        Notes
-        -----
-        There is no useful way to get free VRAM on PlaidML. OpenCL loads and unloads VRAM as
-        required, so this returns the total memory available per card for AMD GPUs, which is
-        not particularly useful.
-
-        Returns
-        -------
-        list
-             List of `float`s containing the amount of VRAM available, in Megabytes, for each
-             connected GPU as corresponding to the values in :attr:`_handles
-        """
-        vram = self._all_vram
-        self._log("debug", f"GPU VRAM free: {vram}")
-        return vram
--- a/lib/gpu_stats/directml.py
+++ b/lib/gpu_stats/directml.py
@ -10,7 +10,7 @@ from dataclasses import dataclass
 from enum import Enum, IntEnum
 from typing import Any, Callable, cast, List

-from comtypes import COMError, IUnknown, GUID, STDMETHOD, HRESULT
+from comtypes import COMError, IUnknown, GUID, STDMETHOD, HRESULT  # pylint:disable=import-error

 from ._base import _GPUStats

@ -168,7 +168,7 @@ class DXGIQueryVideoMemoryInfo(StructureRepr):  # pylint:disable=too-few-public-


 # COM OBjects
-class IDXObject(IUnknown):
+class IDXObject(IUnknown):  # pylint:disable=too-few-public-methods
    """ Base interface for all DXGI objects.

    Reference
@ -184,7 +184,7 @@ class IDXObject(IUnknown):
                 STDMETHOD(HRESULT, "GetParent", [GUID, POINTER(POINTER(ctypes.c_void_p))])]


-class IDXGIFactory6(IDXObject):
+class IDXGIFactory6(IDXObject):  # pylint:disable=too-few-public-methods
    """ Implements methods for generating DXGI objects

    Reference
@ -224,7 +224,7 @@ class IDXGIFactory6(IDXObject):
                            POINTER(ctypes.c_void_p)])]


-class IDXGIAdapter3(IDXObject):
+class IDXGIAdapter3(IDXObject):  # pylint:disable=too-few-public-methods
    """ Represents a display sub-system (including one or more GPU's, DACs and video memory).

    Reference
@ -536,8 +536,8 @@ class DirectML(_GPUStats):
        If :attr:`_is_initialized` is ``True`` then this function just returns performing no
        action.

-        if ``False`` then PlaidML is setup, if not already, and GPU information is extracted
-        from the PlaidML context.
+        if ``False`` then DirectML is setup, if not already, and GPU information is extracted
+        from the DirectML context.
        """
        if self._is_initialized:
            return
--- a/lib/gui/analysis/event_reader.py
+++ b/lib/gui/analysis/event_reader.py
@ -16,7 +16,6 @@ from tensorflow.python.framework import (  # pylint:disable=no-name-in-module
    errors_impl as tf_errors)

 from lib.serializer import get_serializer
-from lib.utils import get_backend

 if sys.version_info < (3, 8):
    from typing_extensions import Literal
@ -692,9 +691,6 @@ class _EventParser():  # pylint:disable=too-few-public-methods
                    continue
                if event.summary.value[0].tag == "keras":
                    self._parse_outputs(event)
-                if get_backend() == "amd":
-                    # No model is logged for AMD so need to get loss labels from state file
-                    self._add_amd_loss_labels(session_id)
                if event.summary.value[0].tag.startswith("batch_"):
                    data[event.step] = self._process_event(event,
                                                           data.get(event.step, EventData()))
@ -771,28 +767,6 @@ class _EventParser():  # pylint:disable=too-few-public-methods
                         outputs, outputs.shape)
        return outputs

-    def _add_amd_loss_labels(self, session_id: int) -> None:
-        """ It is not possible to store the model config in the Tensorboard logs for AMD so we
-        need to obtain the loss labels from the model's state file. This is called now so we know
-        event data is being written, and therefore the most current loss label data is available
-        in the state file.
-
-        Loss names are added to :attr:`_loss_labels`
-
-        Parameters
-        ----------
-        session_id: int
-            The session id that the data is being cached for
-
-        """
-        if self._cache._loss_labels:  # pylint:disable=protected-access
-            return
-        # Import global session here to prevent circular import
-        from . import Session  # pylint:disable=import-outside-toplevel
-        loss_labels = sorted(Session.get_loss_keys(session_id=session_id))
-        self._loss_labels = loss_labels
-        logger.debug("Collated loss labels: %s", self._loss_labels)
-
    @classmethod
    def _process_event(cls, event: event_pb2.Event, step: EventData) -> EventData:
        """ Process a single Tensorflow event.
@ -815,7 +789,7 @@ class _EventParser():  # pylint:disable=too-few-public-methods
        """
        summary = event.summary.value[0]

-        if summary.tag in ("batch_loss", "batch_total"):  # Pre tf2.3 totals were "batch_total"
+        if summary.tag == "batch_loss":
            step.timestamp = event.wall_time
            return step

--- a/lib/gui/analysis/stats.py
+++ b/lib/gui/analysis/stats.py
@ -18,7 +18,6 @@ from typing import Any, cast, Dict, List, Optional, overload, Tuple, Union
 import numpy as np

 from lib.serializer import get_serializer
-from lib.utils import get_backend

 from .event_reader import TensorBoardLogs

@ -263,15 +262,10 @@ class GlobalSession():
            The loss keys for the given session. If ``None`` is passed as session_id then a unique
            list of all loss keys for all sessions is returned
        """
-        if get_backend() == "amd":
-            # We can't log the graph in Tensorboard logs for AMD so need to obtain from state file
-            loss_keys = {int(sess_id): [name for name in session["loss_names"] if name != "total"]
-                         for sess_id, session in self._state["sessions"].items()}
-        else:
-            assert self._tb_logs is not None
-            loss_keys = {sess_id: list(logs.keys())
-                         for sess_id, logs
-                         in self._tb_logs.get_loss(session_id=session_id).items()}
+        assert self._tb_logs is not None
+        loss_keys = {sess_id: list(logs.keys())
+                     for sess_id, logs
+                     in self._tb_logs.get_loss(session_id=session_id).items()}

        if session_id is None:
            retval: List[str] = list(set(loss_key
@ -339,9 +333,9 @@ class SessionsSummary():  # pylint:disable=too-few-public-methods
            logger.debug("Collating summary time stamps")

            self._time_stats = {
-                sess_id: dict(start_time=np.min(timestamps) if np.any(timestamps) else 0,
-                              end_time=np.max(timestamps) if np.any(timestamps) else 0,
-                              iterations=timestamps.shape[0] if np.any(timestamps) else 0)
+                sess_id: {"start_time": np.min(timestamps) if np.any(timestamps) else 0,
+                          "end_time": np.max(timestamps) if np.any(timestamps) else 0,
+                          "iterations": timestamps.shape[0] if np.any(timestamps) else 0}
                for sess_id, timestamps in cast(Dict[int, np.ndarray],
                                                self._session.get_timestamps(None)).items()}

@ -351,10 +345,10 @@ class SessionsSummary():  # pylint:disable=too-few-public-methods
            session_id = _SESSION.session_ids[-1]
            latest = cast(np.ndarray, self._session.get_timestamps(session_id))

-            self._time_stats[session_id] = dict(
-                start_time=np.min(latest) if np.any(latest) else 0,
-                end_time=np.max(latest) if np.any(latest) else 0,
-                iterations=latest.shape[0] if np.any(latest) else 0)
+            self._time_stats[session_id] = {
+                "start_time": np.min(latest) if np.any(latest) else 0,
+                "end_time": np.max(latest) if np.any(latest) else 0,
+                "iterations": latest.shape[0] if np.any(latest) else 0}

        logger.debug("time_stats: %s", self._time_stats)

@ -416,14 +410,15 @@ class SessionsSummary():  # pylint:disable=too-few-public-methods
        end = np.nan_to_num(timestamps["end_time"])
        elapsed = int(end - start)
        batchsize = self._session.batch_sizes.get(session_id, 0)
-        retval = dict(
-            session=session_id,
-            start=start,
-            end=end,
-            elapsed=elapsed,
-            rate=(((batchsize * 2) * timestamps["iterations"]) / elapsed if elapsed != 0 else 0),
-            batch=batchsize,
-            iterations=timestamps["iterations"])
+        retval = {
+            "session": session_id,
+            "start": start,
+            "end": end,
+            "elapsed": elapsed,
+            "rate": (((batchsize * 2) * timestamps["iterations"]) / elapsed
+                     if elapsed != 0 else 0),
+            "batch": batchsize,
+            "iterations": timestamps["iterations"]}
        logger.debug(retval)
        return retval

@ -557,9 +552,9 @@ class Calculations():
        self._loss_keys = loss_keys if isinstance(loss_keys, list) else [loss_keys]
        self._selections = selections if isinstance(selections, list) else [selections]
        self._is_totals = session_id is None
-        self._args: Dict[str, Union[int, float]] = dict(avg_samples=avg_samples,
-                                                        smooth_amount=smooth_amount,
-                                                        flatten_outliers=flatten_outliers)
+        self._args: Dict[str, Union[int, float]] = {"avg_samples": avg_samples,
+                                                    "smooth_amount": smooth_amount,
+                                                    "flatten_outliers": flatten_outliers}
        self._iterations = 0
        self._limit = 0
        self._start_iteration = 0
--- a/lib/keras_utils.py
+++ b/lib/keras_utils.py
@ -1,17 +1,14 @@
 #!/usr/bin/env python3
 """ Common multi-backend Keras utilities """
-from typing import Optional, Tuple
+from __future__ import annotations
+import typing as T

 import numpy as np

-from lib.utils import get_backend
+import tensorflow.keras.backend as K  # pylint:disable=import-error

-if get_backend() == "amd":
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-    from keras import backend as K
-else:
+if T.TYPE_CHECKING:
    from tensorflow import Tensor
-    from tensorflow.keras import backend as K  # pylint:disable=import-error


 def frobenius_norm(matrix: Tensor,
@ -46,7 +43,7 @@ def replicate_pad(image: Tensor, padding: int) -> Tensor:
    -----
    At the time of writing Keras/Tensorflow does not have a native replication padding method.
    The implementation here is probably not the most efficient, but it is a pure keras method
-    which should work on both TF and Plaid.
+    which should work on TF.

    Parameters
    ----------
@ -91,28 +88,22 @@ class ColorSpaceConvert():  # pylint:disable=too-few-public-methods
        One of `"srgb"`, `"rgb"`, `"xyz"`
    to_space: str
        One of `"lab"`, `"rgb"`, `"ycxcz"`, `"xyz"`
-    batch_shape: Tuple, optional
-        Shape tuple (b, h, w, c) if the image being processed. Required for PlaidML backend.
-        Optional. Default = ``None``

    Raises
    ------
    ValueError
        If the requested color space conversion is not defined
    """
-    def __init__(self,
-                 from_space: str,
-                 to_space: str,
-                 batch_shape: Optional[Tuple[int, int, int, int]] = None) -> None:
-        functions = dict(rgb_lab=self._rgb_to_lab,
-                         rgb_xyz=self._rgb_to_xyz,
-                         srgb_rgb=self._srgb_to_rgb,
-                         srgb_ycxcz=self._srgb_to_ycxcz,
-                         xyz_ycxcz=self._xyz_to_ycxcz,
-                         xyz_lab=self._xyz_to_lab,
-                         xyz_to_rgb=self._xyz_to_rgb,
-                         ycxcz_rgb=self._ycxcz_to_rgb,
-                         ycxcz_xyz=self._ycxcz_to_xyz)
+    def __init__(self, from_space: str, to_space: str) -> None:
+        functions = {"rgb_lab": self._rgb_to_lab,
+                     "rgb_xyz": self._rgb_to_xyz,
+                     "srgb_rgb": self._srgb_to_rgb,
+                     "srgb_ycxcz": self._srgb_to_ycxcz,
+                     "xyz_ycxcz": self._xyz_to_ycxcz,
+                     "xyz_lab": self._xyz_to_lab,
+                     "xyz_to_rgb": self._xyz_to_rgb,
+                     "ycxcz_rgb": self._ycxcz_to_rgb,
+                     "ycxcz_xyz": self._ycxcz_to_xyz}
        func_name = f"{from_space.lower()}_{to_space.lower()}"
        if func_name not in functions:
            raise ValueError(f"The color transform {from_space} to {to_space} is not defined.")
@ -124,10 +115,9 @@ class ColorSpaceConvert():  # pylint:disable=too-few-public-methods

        self._rgb_xyz_map = self._get_rgb_xyz_map()
        self._xyz_multipliers = K.constant([116, 500, 200], dtype="float32")
-        self._batch_shape = batch_shape

    @classmethod
-    def _get_rgb_xyz_map(cls) -> Tuple[Tensor, Tensor]:
+    def _get_rgb_xyz_map(cls) -> T.Tuple[Tensor, Tensor]:
        """ Obtain the mapping and inverse mapping for rgb to xyz color space conversion.

        Returns
@ -198,7 +188,7 @@ class ColorSpaceConvert():  # pylint:disable=too-few-public-methods
        Tensor
            The image tensor in XYZ format
        """
-        dim = K.int_shape(image) if self._batch_shape is None else self._batch_shape
+        dim = K.int_shape(image)
        image = K.permute_dimensions(image, (0, 3, 1, 2))
        image = K.reshape(image, (dim[0], dim[3], dim[1] * dim[2]))
        converted = K.permute_dimensions(K.dot(mapping, image), (1, 2, 0))
@ -278,7 +268,7 @@ class ColorSpaceConvert():  # pylint:disable=too-few-public-methods
        factor = 1 / (3 * (delta ** 2))

        clamped_term = K.pow(K.clip(image, delta_cube, None), 1.0 / 3.0)
-        div = (factor * image + (4 / 29))
+        div = factor * image + (4 / 29)

        image = K.switch(image > delta_cube, clamped_term, div)
        return K.concatenate([self._xyz_multipliers[0] * image[..., 1:2] - 16.,
--- a/lib/model/init.py
+++ b/lib/model/init.py
@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-""" Conditional imports depending on whether the AMD version is installed or not """
-
-from lib.utils import get_backend
-
-from .normalization import (AdaInstanceNormalization, GroupNormalization,  # noqa
-                            InstanceNormalization, LayerNormalization, RMSNormalization)
-from .loss import losses  # noqa
-
-if get_backend() == "amd":
-    from . import optimizers_plaid as optimizers  # noqa
-else:
-    from . import optimizers_tf as optimizers  #type:ignore # noqa
--- a/lib/model/autoclip.py
+++ b/lib/model/autoclip.py
@ -1,7 +1,4 @@
-""" Auto clipper for clipping gradients.
-
-Non AMD Only
-"""
+""" Auto clipper for clipping gradients. """
 from typing import List

 import tensorflow as tf
--- a/lib/model/initializers.py
+++ b/lib/model/initializers.py
@ -8,16 +8,10 @@ import inspect
 import numpy as np
 import tensorflow as tf

-from lib.utils import get_backend
+# Fix intellisense/linting for tf.keras' thoroughly broken import system
+keras = tf.keras
+K = keras.backend

-if get_backend() == "amd":
-    from keras.utils import get_custom_objects  # pylint:disable=no-name-in-module
-    from keras import backend as K
-    from keras import initializers
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.utils import get_custom_objects  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow.keras import initializers, backend as K  # noqa pylint:disable=no-name-in-module,import-error

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

@ -70,7 +64,7 @@ def compute_fans(shape, data_format='channels_last'):
    return fan_in, fan_out


-class ICNR(initializers.Initializer):  # pylint: disable=invalid-name,no-member
+class ICNR(keras.initializers.Initializer):  # type:ignore[name-defined]
    """ ICNR initializer for checkerboard artifact free sub pixel convolution

    Parameters
@ -100,7 +94,7 @@ class ICNR(initializers.Initializer):  # pylint: disable=invalid-name,no-member
        self.scale = scale
        self.initializer = initializer

-    def __call__(self, shape, dtype="float32"):
+    def __call__(self, shape, dtype="float32", **kwargs):
        """ Call function for the ICNR initializer.

        Parameters
@ -120,7 +114,7 @@ class ICNR(initializers.Initializer):  # pylint: disable=invalid-name,no-member
            return self.initializer(shape)
        new_shape = shape[:3] + [shape[3] // (self.scale ** 2)]
        if isinstance(self.initializer, dict):
-            self.initializer = initializers.deserialize(self.initializer)
+            self.initializer = keras.initializers.deserialize(self.initializer)
        var_x = self.initializer(new_shape, dtype)
        var_x = K.permute_dimensions(var_x, [2, 0, 1, 3])
        var_x = K.resize_images(var_x,
@ -136,9 +130,6 @@ class ICNR(initializers.Initializer):  # pylint: disable=invalid-name,no-member
    def _space_to_depth(self, input_tensor):
        """ Space to depth implementation.

-        PlaidML does not have a space to depth operation, so calculate if backend is amd
-        otherwise returns the :func:`tensorflow.space_to_depth` operation.
-
        Parameters
        ----------
        input_tensor: tensor
@ -149,16 +140,7 @@ class ICNR(initializers.Initializer):  # pylint: disable=invalid-name,no-member
        tensor
            The manipulated input tensor
        """
-        if get_backend() == "amd":
-            batch, height, width, depth = input_tensor.shape.dims
-            new_height = height // self.scale
-            new_width = width // self.scale
-            reshaped = K.reshape(input_tensor,
-                                 (batch, new_height, self.scale, new_width, self.scale, depth))
-            retval = K.reshape(K.permute_dimensions(reshaped, [0, 1, 3, 2, 4, 5]),
-                               (batch, new_height, new_width, -1))
-        else:
-            retval = tf.nn.space_to_depth(input_tensor, block_size=self.scale, data_format="NHWC")
+        retval = tf.nn.space_to_depth(input_tensor, block_size=self.scale, data_format="NHWC")
        logger.debug("Input shape: %s, Output shape: %s", input_tensor.shape, retval.shape)
        return retval

@ -177,7 +159,7 @@ class ICNR(initializers.Initializer):  # pylint: disable=invalid-name,no-member
        return dict(list(base_config.items()) + list(config.items()))


-class ConvolutionAware(initializers.Initializer):  # pylint: disable=no-member
+class ConvolutionAware(keras.initializers.Initializer):  # type:ignore[name-defined]
    """
    Initializer that generates orthogonal convolution filters in the Fourier space. If this
    initializer is passed a shape that is not 3D or 4D, orthogonal initialization will be used.
@ -210,11 +192,11 @@ class ConvolutionAware(initializers.Initializer):  # pylint: disable=no-member
    def __init__(self, eps_std=0.05, seed=None, initialized=False):
        self.eps_std = eps_std
        self.seed = seed
-        self.orthogonal = initializers.Orthogonal()  # pylint:disable=no-member
-        self.he_uniform = initializers.he_uniform()  # pylint:disable=no-member
+        self.orthogonal = keras.initializers.Orthogonal()
+        self.he_uniform = keras.initializers.he_uniform()
        self.initialized = initialized

-    def __call__(self, shape, dtype=None):
+    def __call__(self, shape, dtype=None, **kwargs):
        """ Call function for the ICNR initializer.

        Parameters
@ -248,7 +230,7 @@ class ConvolutionAware(initializers.Initializer):  # pylint: disable=no-member

            transpose_dimensions = (2, 1, 0)
            kernel_shape = (row,)
-            correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])  # noqa
+            correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])  # noqa:E501,E731 # pylint:disable=unnecessary-lambda-assignment
            correct_fft = np.fft.rfft

        elif rank == 4:
@ -317,12 +299,12 @@ class ConvolutionAware(initializers.Initializer):  # pylint: disable=no-member
        dict
            The configuration for ICNR Initialization
        """
-        return dict(eps_std=self.eps_std,
-                    seed=self.seed,
-                    initialized=self.initialized)
+        return {"eps_std": self.eps_std,
+                "seed": self.seed,
+                "initialized": self.initialized}


 # Update initializers into Keras custom objects
 for name, obj in inspect.getmembers(sys.modules[__name__]):
    if inspect.isclass(obj) and obj.__module__ == __name__:
-        get_custom_objects().update({name: obj})
+        keras.utils.get_custom_objects().update({name: obj})
--- a/lib/model/layers.py
+++ b/lib/model/layers.py
@ -8,23 +8,14 @@ import inspect

 import tensorflow as tf

-from lib.utils import get_backend
-
-if get_backend() == "amd":
-    from lib.plaidml_utils import pad
-    from keras.utils import get_custom_objects, conv_utils  # pylint:disable=no-name-in-module
-    import keras.backend as K
-    from keras.layers import InputSpec, Layer
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.utils import get_custom_objects  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow.keras import backend as K  # pylint:disable=import-error
-    from tensorflow.keras.layers import InputSpec, Layer  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow import pad  # type:ignore
-    from tensorflow.python.keras.utils import conv_utils  # pylint:disable=no-name-in-module
+# Fix intellisense/linting for tf.keras' thoroughly broken import system
+from tensorflow.python.keras.utils import conv_utils  # pylint:disable=no-name-in-module
+keras = tf.keras
+layers = keras.layers
+K = keras.backend


-class PixelShuffler(Layer):
+class PixelShuffler(keras.layers.Layer):  # type:ignore[name-defined]
    """ PixelShuffler layer for Keras.

    This layer requires a Convolution2D prior to it, having output filters computed according to
@ -65,10 +56,7 @@ class PixelShuffler(Layer):
    """
    def __init__(self, size=(2, 2), data_format=None, **kwargs):
        super().__init__(**kwargs)
-        if get_backend() == "amd":
-            self.data_format = K.normalize_data_format(data_format)  # pylint:disable=no-member
-        else:
-            self.data_format = conv_utils.normalize_data_format(data_format)
+        self.data_format = conv_utils.normalize_data_format(data_format)
        self.size = conv_utils.normalize_tuple(size, 2, 'size')

    def call(self, inputs, *args, **kwargs):
@ -195,7 +183,7 @@ class PixelShuffler(Layer):
        return dict(list(base_config.items()) + list(config.items()))


-class KResizeImages(Layer):
+class KResizeImages(keras.layers.Layer):  # type:ignore[name-defined]
    """ A custom upscale function that uses :class:`keras.backend.resize_images` to upsample.

    Parameters
@ -238,10 +226,7 @@ class KResizeImages(Layer):
        else:
            # Arbitrary resizing
            size = int(round(K.int_shape(inputs)[1] * self.size))
-            if get_backend() != "amd":
-                retval = tf.image.resize(inputs, (size, size), method=self.interpolation)
-            else:
-                raise NotImplementedError
+            retval = tf.image.resize(inputs, (size, size), method=self.interpolation)
        return retval

    def compute_output_shape(self, input_shape):
@ -271,12 +256,12 @@ class KResizeImages(Layer):
        dict
            A python dictionary containing the layer configuration
        """
-        config = dict(size=self.size, interpolation=self.interpolation)
+        config = {"size": self.size, "interpolation": self.interpolation}
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))


-class SubPixelUpscaling(Layer):
+class SubPixelUpscaling(keras.layers.Layer):  # type:ignore[name-defined]
    """ Sub-pixel convolutional up-scaling layer.

    This layer requires a Convolution2D prior to it, having output filters computed according to
@ -325,10 +310,7 @@ class SubPixelUpscaling(Layer):
        super().__init__(**kwargs)

        self.scale_factor = scale_factor
-        if get_backend() == "amd":
-            self.data_format = K.normalize_data_format(data_format)  # pylint:disable=no-member
-        else:
-            self.data_format = conv_utils.normalize_data_format(data_format)
+        self.data_format = conv_utils.normalize_data_format(data_format)

    def build(self, input_shape):
        """Creates the layer weights.
@ -472,7 +454,7 @@ class SubPixelUpscaling(Layer):
        return dict(list(base_config.items()) + list(config.items()))


-class ReflectionPadding2D(Layer):
+class ReflectionPadding2D(keras.layers.Layer):  # type:ignore[name-defined]
    """Reflection-padding layer for 2D input (e.g. picture).

    This layer can add rows and columns at the top, bottom, left and right side of an image tensor.
@ -506,7 +488,7 @@ class ReflectionPadding2D(Layer):
            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
            reference for weight shape computations.
        """
-        self.input_spec = [InputSpec(shape=input_shape)]
+        self.input_spec = [keras.layers.InputSpec(shape=input_shape)]
        super().build(input_shape)

    def compute_output_shape(self, input_shape):
@ -543,7 +525,7 @@ class ReflectionPadding2D(Layer):
                input_shape[2] + padding_width,
                input_shape[3])

-    def call(self, var_x, mask=None):  # pylint:disable=unused-argument,arguments-differ
+    def call(self, inputs, *args, **kwargs):
        """This is where the layer's logic lives.

        Parameters
@ -576,12 +558,12 @@ class ReflectionPadding2D(Layer):
        padding_left = padding_width // 2
        padding_right = padding_width - padding_left

-        return pad(var_x,
-                   [[0, 0],
-                    [padding_top, padding_bot],
-                    [padding_left, padding_right],
-                    [0, 0]],
-                   'REFLECT')
+        return tf.pad(inputs,
+                      [[0, 0],
+                       [padding_top, padding_bot],
+                       [padding_left, padding_right],
+                       [0, 0]],
+                      'REFLECT')

    def get_config(self):
        """Returns the config of the layer.
@ -604,18 +586,15 @@ class ReflectionPadding2D(Layer):
        return dict(list(base_config.items()) + list(config.items()))


-class _GlobalPooling2D(Layer):
+class _GlobalPooling2D(keras.layers.Layer):  # type:ignore[name-defined]
    """Abstract class for different global pooling 2D layers.

    From keras as access to pooling is trickier in tensorflow.keras
    """
    def __init__(self, data_format=None, **kwargs):
        super().__init__(**kwargs)
-        if get_backend() == "amd":
-            self.data_format = K.normalize_data_format(data_format)  # pylint:disable=no-member
-        else:
-            self.data_format = conv_utils.normalize_data_format(data_format)
-        self.input_spec = InputSpec(ndim=4)
+        self.data_format = conv_utils.normalize_data_format(data_format)
+        self.input_spec = keras.layers.InputSpec(ndim=4)

    def compute_output_shape(self, input_shape):
        """ Compute the output shape based on the input shape.
@ -704,7 +683,7 @@ class GlobalStdDevPooling2D(_GlobalPooling2D):
        return pooled


-class L2_normalize(Layer):  # pylint:disable=invalid-name
+class L2_normalize(keras.layers.Layer):  # type:ignore[name-defined]  # pylint:disable=invalid-name
    """ Normalizes a tensor w.r.t. the L2 norm alongside the specified axis.

    Parameters
@ -755,7 +734,7 @@ class L2_normalize(Layer):  # pylint:disable=invalid-name
        return config


-class Swish(Layer):
+class Swish(keras.layers.Layer):  # type:ignore[name-defined]
    """ Swish Activation Layer implementation for Keras.

    Parameters
@ -781,9 +760,6 @@ class Swish(Layer):
        inputs: tensor
            Input tensor, or list/tuple of input tensors
        """
-        if get_backend() == "amd":
-            return inputs * K.sigmoid(inputs * self.beta)
-        # Native TF Implementation has more memory-efficient gradients
        return tf.nn.swish(inputs * self.beta)

    def get_config(self):
@ -804,4 +780,4 @@ class Swish(Layer):
 # Update layers into Keras custom objects
 for name, obj in inspect.getmembers(sys.modules[__name__]):
    if inspect.isclass(obj) and obj.__module__ == __name__:
-        get_custom_objects().update({name: obj})
+        keras.utils.get_custom_objects().update({name: obj})
--- a/lib/model/loss/init.py
+++ b/lib/model/loss/init.py
@ -1,9 +0,0 @@
-#!/usr/bin/env python3
-""" Conditional imports depending on whether the AMD version is installed or not """
-
-from lib.utils import get_backend
-
-if get_backend() == "amd":
-    from . import loss_plaid as losses # noqa
-else:
-    from . import loss_tf as losses  # type:ignore # noqa
--- a/lib/model/loss/feature_loss_plaid.py
+++ b/lib/model/loss/feature_loss_plaid.py
@ -1,381 +0,0 @@
-#!/usr/bin/env python3
-""" Custom Feature Map Loss Functions for faceswap.py """
-from dataclasses import dataclass, field
-import logging
-
-from typing import Any, Callable, Dict, Optional, List, Tuple
-
-import plaidml
-from keras import applications as kapp
-from keras.layers import Dropout, Conv2D, Input, Layer
-from keras.models import Model
-import keras.backend as K
-
-import numpy as np
-
-from lib.model.nets import AlexNet, SqueezeNet
-from lib.utils import GetModel
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class NetInfo:
-    """ Data class for holding information about Trunk and Linear Layer nets.
-
-    Parameters
-    ----------
-    model_id: int
-        The model ID for the model stored in the deepfakes Model repo
-    model_name: str
-        The filename of the decompressed model/weights file
-    net: callable, Optional
-        The net definition to load, if any. Default:``None``
-    init_kwargs: dict, optional
-        Keyword arguments to initialize any :attr:`net`. Default: empty ``dict``
-    needs_init: bool, optional
-        True if the net needs initializing otherwise False. Default: ``True``
-    """
-    model_id: int = 0
-    model_name: str = ""
-    net: Optional[Callable] = None
-    init_kwargs: Dict[str, Any] = field(default_factory=dict)
-    needs_init: bool = True
-    outputs: List[Layer] = field(default_factory=list)
-
-
-class _TrunkNormLayer(Layer):
-    """ Create a layer for normalizing the output of the trunk model.
-
-    Parameters
-    ----------
-    epsilon: float, optional
-        A small number to add to the normalization. Default=`1e-10`
-    """
-    def __init__(self, epsilon: float = 1e-10, **kwargs):
-        super().__init__(*kwargs)
-        self._epsilon = epsilon
-
-    def call(self, inputs: plaidml.tile.Value, **kwargs) -> plaidml.tile.Value:
-        """ Call the trunk normalization layer.
-
-        Parameters
-        ----------
-        inputs: :class:`plaidml.tile.Value`
-            Input to the trunk output normalization layer
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The output from the layer
-        """
-        norm_factor = K.sqrt(K.sum(K.square(inputs), axis=-1, keepdims=True))
-        return inputs / (norm_factor + self._epsilon)
-
-
-class _LPIPSTrunkNet():  # pylint:disable=too-few-public-methods
-    """ Trunk neural network loader for LPIPS Loss function.
-
-    Parameters
-    ----------
-    net_name: str
-        The name of the trunk network to load. One of "alex", "squeeze" or "vgg16"
-    """
-    def __init__(self, net_name: str) -> None:
-        logger.debug("Initializing: %s (net_name '%s')",
-                     self.__class__.__name__, net_name)
-        self._net = self._nets[net_name]
-        logger.debug("Initialized: %s ", self.__class__.__name__)
-
-    @property
-    def _nets(self) -> Dict[str, NetInfo]:
-        """ :class:`NetInfo`: The Information about the requested net."""
-        return dict(
-            alex=NetInfo(model_id=15,
-                         model_name="alexnet_imagenet_no_top_v1.h5",
-                         net=AlexNet,
-                         outputs=[f"features.{idx}" for idx in (0, 3, 6, 8, 10)]),
-            squeeze=NetInfo(model_id=16,
-                            model_name="squeezenet_imagenet_no_top_v1.h5",
-                            net=SqueezeNet,
-                            outputs=[f"features.{idx}" for idx in (0, 4, 7, 9, 10, 11, 12)]),
-            vgg16=NetInfo(model_id=17,
-                          model_name="vgg16_imagenet_no_top_v1.h5",
-                          net=kapp.vgg16.VGG16,
-                          init_kwargs=dict(include_top=False, weights=None),
-                          outputs=[f"block{i + 1}_conv{2 if i < 2 else 3}" for i in range(5)]))
-
-    def _process_weights(self, model: Model) -> Model:
-        """ Save and lock weights if requested.
-
-        Parameters
-        ----------
-        model :class:`keras.models.Model`
-            The loaded trunk or linear network
-
-        layers: list, optional
-            A list of layer names to explicitly load/freeze. If ``None`` then all model
-            layers will be processed
-
-        Returns
-        -------
-        :class:`keras.models.Model`
-            The network with weights loaded/not loaded and layers locked/unlocked
-        """
-        weights = GetModel(self._net.model_name, self._net.model_id).model_path
-        model.load_weights(weights)
-        model.trainable = False
-        for layer in model.layers:
-            layer.trainable = False
-        return model
-
-    def __call__(self) -> Model:
-        """ Load the Trunk net, add normalization to feature outputs, load weights and set
-        trainable state.
-
-        Returns
-        -------
-        :class:`tensorflow.keras.models.Model`
-            The trunk net with normalized feature output layers
-        """
-        if self._net.net is None:
-            raise ValueError("No net loaded")
-
-        model = self._net.net(**self._net.init_kwargs)
-        model = model if self._net.init_kwargs else model()  # Non vgg need init
-        out_layers = [_TrunkNormLayer()(model.get_layer(name).output)
-                      for name in self._net.outputs]
-        model = Model(inputs=model.input, outputs=out_layers)
-        model = self._process_weights(model)
-        return model
-
-
-class _LinearLayer(Layer):
-    """ Create a layer for normalizing the output of the trunk model.
-
-    Parameters
-    ----------
-    use_dropout: bool, optional
-        Apply a dropout layer prior to the linear layer. Default: ``False``
-    """
-    def __init__(self, use_dropout: float = False, **kwargs):
-        self._use_dropout = use_dropout
-        super().__init__(**kwargs)
-
-    def call(self, inputs: plaidml.tile.Value, **kwargs) -> plaidml.tile.Value:
-        """ Call the trunk normalization layer.
-
-        Parameters
-        ----------
-        inputs: :class:`plaidml.tile.Value`
-            Input to the trunk output normalization layer
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The output from the layer
-        """
-        input_ = Input(K.int_shape(inputs)[1:])
-        var_x = Dropout(rate=0.5)(input_) if self._use_dropout else input_
-        var_x = Conv2D(1, 1, strides=1, padding="valid", use_bias=False)(var_x)
-        return var_x
-
-
-class _LPIPSLinearNet(_LPIPSTrunkNet):  # pylint:disable=too-few-public-methods
-    """ The Linear Network to be applied to the difference between the true and predicted outputs
-    of the trunk network.
-
-    Parameters
-    ----------
-    net_name: str
-        The name of the trunk network in use. One of "alex", "squeeze" or "vgg16"
-    trunk_net: :class:`keras.models.Model`
-        The trunk net to place the linear layer on.
-    use_dropout: bool
-        ``True`` if a dropout layer should be used in the Linear network otherwise ``False``
-    """
-    def __init__(self,
-                 net_name: str,
-                 trunk_net: Model,
-                 use_dropout: bool) -> None:
-        logger.debug(
-            "Initializing: %s (trunk_net: %s, use_dropout: %s)", self.__class__.__name__,
-            trunk_net, use_dropout)
-        super().__init__(net_name=net_name)
-
-        self._trunk = trunk_net
-        self._use_dropout = use_dropout
-
-        logger.debug("Initialized: %s", self.__class__.__name__)
-
-    @property
-    def _nets(self) -> Dict[str, NetInfo]:
-        """ :class:`NetInfo`: The Information about the requested net."""
-        return dict(
-            alex=NetInfo(model_id=18,
-                         model_name="alexnet_lpips_v1.h5",),
-            squeeze=NetInfo(model_id=19,
-                            model_name="squeezenet_lpips_v1.h5"),
-            vgg16=NetInfo(model_id=20,
-                          model_name="vgg16_lpips_v1.h5"))
-
-    def _linear_block(self, net_output_layer: plaidml.tile.Value) -> Tuple[plaidml.tile.Value,
-                                                                           plaidml.tile.Value]:
-        """ Build a linear block for a trunk network output.
-
-        Parameters
-        ----------
-        net_output_layer: :class:`plaidml.tile.Value`
-            An output from the selected trunk network
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The input to the linear block
-        :class:`plaidml.tile.Value`
-            The output from the linear block
-        """
-        in_shape = K.int_shape(net_output_layer)[1:]
-        input_ = Input(in_shape)
-        var_x = Dropout(rate=0.5)(input_) if self._use_dropout else input_
-        var_x = Conv2D(1, 1, strides=1, padding="valid", use_bias=False)(var_x)
-        return input_, var_x
-
-    def __call__(self) -> Model:
-        """ Build the linear network for the given trunk network's outputs. Load in trained weights
-        and set the model's trainable parameters.
-
-        Returns
-        -------
-        :class:`tensorflow.keras.models.Model`
-            The compiled Linear Net model
-        """
-        inputs = []
-        outputs = []
-        for layer in self._trunk.outputs:
-            inp, out = self._linear_block(layer)
-            inputs.append(inp)
-            outputs.append(out)
-
-        linear_model = Model(inputs=inputs, outputs=outputs)
-        linear_model = self._process_weights(linear_model)
-
-        return linear_model
-
-
-class LPIPSLoss():  # pylint:disable=too-few-public-methods
-    """ LPIPS Loss Function.
-
-    A perceptual loss function that uses linear outputs from pretrained CNNs feature layers.
-
-    Notes
-    -----
-    Channels Last implementation. All trunks implemented from the original paper.
-
-    References
-    ----------
-    https://richzhang.github.io/PerceptualSimilarity/
-
-    Parameters
-    ----------
-    trunk_network: str
-        The name of the trunk network to use. One of "alex", "squeeze" or "vgg16"
-    linear_use_dropout: bool, optional
-        ``True`` if a dropout layer should be used in the Linear network otherwise ``False``.
-        Default: ``True``
-    lpips: bool, optional
-        ``True`` to use linear network on top of the trunk network. ``False`` to just average the
-        output from the trunk network. Default ``True``
-    normalize: bool, optional
-        ``True`` if the input Tensor needs to be normalized from the 0. to 1. range to the -1. to
-        1. range. Default: ``True``
-    ret_per_layer: bool, optional
-        ``True`` to return the loss value per feature output layer otherwise ``False``.
-        Default: ``False``
-    """
-    def __init__(self,
-                 trunk_network: str,
-                 linear_use_dropout: bool = True,
-                 lpips: bool = False,  # TODO This should be True
-                 normalize: bool = True,
-                 ret_per_layer: bool = False) -> None:
-        logger.debug(
-            "Initializing: %s (trunk_network '%s', linear_use_dropout: %s, lpips: %s, "
-            "normalize: %s, ret_per_layer: %s)", self.__class__.__name__, trunk_network,
-            linear_use_dropout, lpips, normalize, ret_per_layer)
-
-        self._use_lpips = lpips
-        self._normalize = normalize
-        self._ret_per_layer = ret_per_layer
-        self._shift = K.constant(np.array([-.030, -.088, -.188],
-                                          dtype="float32")[None, None, None, :])
-        self._scale = K.constant(np.array([.458, .448, .450],
-                                          dtype="float32")[None, None, None, :])
-
-        self._trunk_net = _LPIPSTrunkNet(trunk_network)()
-        self._linear_net = _LPIPSLinearNet(trunk_network, self._trunk_net, linear_use_dropout)()
-
-        logger.debug("Initialized: %s", self.__class__.__name__)
-
-    def _process_diffs(self, inputs: List[plaidml.tile.Value]) -> List[plaidml.tile.Value]:
-        """ Perform processing on the Trunk Network outputs.
-
-        If :attr:`use_ldip` is enabled, process the diff values through the linear network,
-        otherwise return the diff values summed on the channels axis.
-
-        Parameters
-        ----------
-        inputs: list
-            List of the squared difference of the true and predicted outputs from the trunk network
-
-        Returns
-        -------
-        list
-            List of either the linear network outputs (when using lpips) or summed network outputs
-        """
-        if self._use_lpips:
-            # TODO Fix. Whilst the linear layer compiles and the weights load, PlaidML will
-            # error out as the graph is disconnected.
-            # The trunk output can be plugged straight into Linear input, but then weights for
-            # linear cannot be loaded, and this input would be incorrect (as linear input should
-            # be the diff between y_true and y_pred)
-            raise NotImplementedError
-            return self._linear_net(inputs)  # pylint:disable=unreachable
-        return [K.sum(x, axis=-1) for x in inputs]
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Perform the LPIPS Loss Function.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth batch of images
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted batch of images
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The final  loss value
-        """
-        if self._normalize:
-            y_true = (y_true * 2.0) - 1.0
-            y_pred = (y_pred * 2.0) - 1.0
-
-        y_true = (y_true - self._shift) / self._scale
-        y_pred = (y_pred - self._shift) / self._scale
-
-        net_true = self._trunk_net(y_true)
-        net_pred = self._trunk_net(y_pred)
-
-        diffs = [K.pow((out_true - out_pred), 2)
-                 for out_true, out_pred in zip(net_true, net_pred)]
-
-        res = [K.mean(diff, axis=(1, 2), keepdims=True) for diff in self._process_diffs(diffs)]
-
-        val = K.sum(K.concatenate(res), axis=None)
-
-        retval = (val, res) if self._ret_per_layer else val
-        return retval / 10.0   # Reduce by factor of 10 'cos this loss is STRONG
--- a/lib/model/loss/loss_plaid.py
+++ b/lib/model/loss/loss_plaid.py
@ -1,562 +0,0 @@
-#!/usr/bin/env python3
-""" Custom Loss Functions for faceswap.py """
-
-from __future__ import absolute_import
-
-import logging
-from typing import Callable, List, Tuple
-
-import numpy as np
-import plaidml
-
-from keras import backend as K
-from lib.plaidml_utils import pad
-from lib.utils import FaceswapError
-
-from .feature_loss_plaid import LPIPSLoss  #pylint:disable=unused-import # noqa
-from .perceptual_loss_plaid import DSSIMObjective, GMSDLoss, LDRFLIPLoss, MSSIMLoss  #pylint:disable=unused-import # noqa
-
-logger = logging.getLogger(__name__)  # pylint:disable=invalid-name
-
-
-class FocalFrequencyLoss():  # pylint:disable=too-few-public-methods
-    """ Focal Frequencey Loss Function.
-
-    A channels last implementation.
-
-    Notes
-    -----
-    There is a bug in this implementation that will do an incorrect FFT if
-    :attr:`patch_factor` >  ``1``, which means incorrect loss will be returned, so keep
-    patch factor at 1.
-
-    Parameters
-    ----------
-    alpha: float, Optional
-        Scaling factor of the spectrum weight matrix for flexibility. Default: ``1.0``
-    patch_factor: int, Optional
-        Factor to crop image patches for patch-based focal frequency loss.
-        Default: ``1``
-    ave_spectrum: bool, Optional
-        ``True`` to use minibatch average spectrum otherwise ``False``. Default: ``False``
-    log_matrix: bool, Optional
-        ``True`` to adjust the spectrum weight matrix by logarithm otherwise ``False``.
-        Default: ``False``
-    batch_matrix: bool, Optional
-        ``True`` to calculate the spectrum weight matrix using batch-based statistics otherwise
-        ``False``. Default: ``False``
-
-    References
-    ----------
-    https://arxiv.org/pdf/2012.12821.pdf
-    https://github.com/EndlessSora/focal-frequency-loss
-    """
-
-    def __init__(self,
-                 alpha: float = 1.0,
-                 patch_factor: int = 1,
-                 ave_spectrum: bool = False,
-                 log_matrix: bool = False,
-                 batch_matrix: bool = False) -> None:
-        self._alpha = alpha
-        self._patch_factor = patch_factor
-        self._ave_spectrum = ave_spectrum
-        self._log_matrix = log_matrix
-        self._batch_matrix = batch_matrix
-        self._dims: Tuple[int, int] = (0, 0)
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the Focal Frequency Loss Function.
-
-        # TODO Not implemented as:
-          - We need a PlaidML replacement for tf.signal
-          - The dimensions do not appear to be readable for y_pred
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth batch of images
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted batch of images
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The loss for this batch of images
-        """
-        raise FaceswapError("Focal Frequency Loss is not currently compatible with PlaidML. "
-                            "Please select a different Loss method.")
-
-
-class GeneralizedLoss():  # pylint:disable=too-few-public-methods
-    """  Generalized function used to return a large variety of mathematical loss functions.
-
-    The primary benefit is a smooth, differentiable version of L1 loss.
-
-    References
-    ----------
-    Barron, J. A More General Robust Loss Function - https://arxiv.org/pdf/1701.03077.pdf
-
-    Example
-    -------
-    >>> a=1.0, x>>c , c=1.0/255.0  # will give a smoothly differentiable version of L1 / MAE loss
-    >>> a=1.999999 (limit as a->2), beta=1.0/255.0 # will give L2 / RMSE loss
-
-    Parameters
-    ----------
-    alpha: float, optional
-        Penalty factor. Larger number give larger weight to large deviations. Default: `1.0`
-    beta: float, optional
-        Scale factor used to adjust to the input scale (i.e. inputs of mean `1e-4` or `256`).
-        Default: `1.0/255.0`
-    """
-    def __init__(self, alpha: float = 1.0, beta: float = 1.0/255.0) -> None:
-        self._alpha = alpha
-        self._beta = beta
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the Generalized Loss Function
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The loss value from the results of function(y_pred - y_true)
-        """
-        diff = y_pred - y_true
-        second = (K.pow(K.pow(diff/self._beta, 2.) / K.abs(2. - self._alpha) + 1.,
-                        (self._alpha / 2.)) - 1.)
-        loss = (K.abs(2. - self._alpha)/self._alpha) * second
-        loss = K.mean(loss, axis=-1) * self._beta
-        return loss
-
-
-class GradientLoss():  # pylint:disable=too-few-public-methods
-    """ Gradient Loss Function.
-
-    Calculates the first and second order gradient difference between pixels of an image in the x
-    and y dimensions. These gradients are then compared between the ground truth and the predicted
-    image and the difference is taken. When used as a loss, its minimization will result in
-    predicted images approaching the same level of sharpness / blurriness as the ground truth.
-
-    References
-    ----------
-    TV+TV2 Regularization with Non-Convex Sparseness-Inducing Penalty for Image Restoration,
-    Chengwu Lu & Hua Huang, 2014 - http://downloads.hindawi.com/journals/mpe/2014/790547.pdf
-    """
-    def __init__(self):
-        self.generalized_loss = GeneralizedLoss(alpha=1.9999)
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the gradient loss function.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: tensor or variable
-            :class:`plaidml.tile.Value`
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The loss value
-        """
-        tv_weight = 1.0
-        tv2_weight = 1.0
-        loss = 0.0
-        loss += tv_weight * (self.generalized_loss(self._diff_x(y_true), self._diff_x(y_pred)) +
-                             self.generalized_loss(self._diff_y(y_true), self._diff_y(y_pred)))
-        loss += tv2_weight * (self.generalized_loss(self._diff_xx(y_true), self._diff_xx(y_pred)) +
-                              self.generalized_loss(self._diff_yy(y_true), self._diff_yy(y_pred)) +
-                              self.generalized_loss(self._diff_xy(y_true), self._diff_xy(y_pred))
-                              * 2.)
-        loss = loss / (tv_weight + tv2_weight)
-        # TODO simplify to use MSE instead
-        return loss
-
-    @classmethod
-    def _diff_x(cls, img):
-        """ X Difference """
-        x_left = img[:, :, 1:2, :] - img[:, :, 0:1, :]
-        x_inner = img[:, :, 2:, :] - img[:, :, :-2, :]
-        x_right = img[:, :, -1:, :] - img[:, :, -2:-1, :]
-        x_out = K.concatenate([x_left, x_inner, x_right], axis=2)
-        return x_out * 0.5
-
-    @classmethod
-    def _diff_y(cls, img):
-        """ Y Difference """
-        y_top = img[:, 1:2, :, :] - img[:, 0:1, :, :]
-        y_inner = img[:, 2:, :, :] - img[:, :-2, :, :]
-        y_bot = img[:, -1:, :, :] - img[:, -2:-1, :, :]
-        y_out = K.concatenate([y_top, y_inner, y_bot], axis=1)
-        return y_out * 0.5
-
-    @classmethod
-    def _diff_xx(cls, img):
-        """ X-X Difference """
-        x_left = img[:, :, 1:2, :] + img[:, :, 0:1, :]
-        x_inner = img[:, :, 2:, :] + img[:, :, :-2, :]
-        x_right = img[:, :, -1:, :] + img[:, :, -2:-1, :]
-        x_out = K.concatenate([x_left, x_inner, x_right], axis=2)
-        return x_out - 2.0 * img
-
-    @classmethod
-    def _diff_yy(cls, img):
-        """ Y-Y Difference """
-        y_top = img[:, 1:2, :, :] + img[:, 0:1, :, :]
-        y_inner = img[:, 2:, :, :] + img[:, :-2, :, :]
-        y_bot = img[:, -1:, :, :] + img[:, -2:-1, :, :]
-        y_out = K.concatenate([y_top, y_inner, y_bot], axis=1)
-        return y_out - 2.0 * img
-
-    @classmethod
-    def _diff_xy(cls, img: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ X-Y Difference """
-        # xout1
-        # Left
-        top = img[:, 1:2, 1:2, :] + img[:, 0:1, 0:1, :]
-        inner = img[:, 2:, 1:2, :] + img[:, :-2, 0:1, :]
-        bottom = img[:, -1:, 1:2, :] + img[:, -2:-1, 0:1, :]
-        xy_left = K.concatenate([top, inner, bottom], axis=1)
-        # Mid
-        top = img[:, 1:2, 2:, :] + img[:, 0:1, :-2, :]
-        mid = img[:, 2:, 2:, :] + img[:, :-2, :-2, :]
-        bottom = img[:, -1:, 2:, :] + img[:, -2:-1, :-2, :]
-        xy_mid = K.concatenate([top, mid, bottom], axis=1)
-        # Right
-        top = img[:, 1:2, -1:, :] + img[:, 0:1, -2:-1, :]
-        inner = img[:, 2:, -1:, :] + img[:, :-2, -2:-1, :]
-        bottom = img[:, -1:, -1:, :] + img[:, -2:-1, -2:-1, :]
-        xy_right = K.concatenate([top, inner, bottom], axis=1)
-
-        # Xout2
-        # Left
-        top = img[:, 0:1, 1:2, :] + img[:, 1:2, 0:1, :]
-        inner = img[:, :-2, 1:2, :] + img[:, 2:, 0:1, :]
-        bottom = img[:, -2:-1, 1:2, :] + img[:, -1:, 0:1, :]
-        xy_left = K.concatenate([top, inner, bottom], axis=1)
-        # Mid
-        top = img[:, 0:1, 2:, :] + img[:, 1:2, :-2, :]
-        mid = img[:, :-2, 2:, :] + img[:, 2:, :-2, :]
-        bottom = img[:, -2:-1, 2:, :] + img[:, -1:, :-2, :]
-        xy_mid = K.concatenate([top, mid, bottom], axis=1)
-        # Right
-        top = img[:, 0:1, -1:, :] + img[:, 1:2, -2:-1, :]
-        inner = img[:, :-2, -1:, :] + img[:, 2:, -2:-1, :]
-        bottom = img[:, -2:-1, -1:, :] + img[:, -1:, -2:-1, :]
-        xy_right = K.concatenate([top, inner, bottom], axis=1)
-
-        xy_out1 = K.concatenate([xy_left, xy_mid, xy_right], axis=2)
-        xy_out2 = K.concatenate([xy_left, xy_mid, xy_right], axis=2)
-        return (xy_out1 - xy_out2) * 0.25
-
-
-class LaplacianPyramidLoss():  # pylint:disable=too-few-public-methods
-    """ Laplacian Pyramid Loss Function
-
-    Notes
-    -----
-    Channels last implementation on square images only.
-
-    Parameters
-    ----------
-    max_levels: int, Optional
-        The max number of laplacian pyramid levels to use. Default: `5`
-    gaussian_size: int, Optional
-        The size of the gaussian kernel. Default: `5`
-    gaussian_sigma: float, optional
-        The gaussian sigma. Default: 2.0
-
-    References
-    ----------
-    https://arxiv.org/abs/1707.05776
-    https://github.com/nathanaelbosch/generative-latent-optimization/blob/master/utils.py
-    """
-    def __init__(self,
-                 max_levels: int = 5,
-                 gaussian_size: int = 5,
-                 gaussian_sigma: float = 1.0) -> None:
-        self._max_levels = max_levels
-        self._weights = K.constant([np.power(2., -2 * idx) for idx in range(max_levels + 1)])
-        self._gaussian_kernel = self._get_gaussian_kernel(gaussian_size, gaussian_sigma)
-        self._shape: Tuple[int, ...] = ()
-
-    @classmethod
-    def _get_gaussian_kernel(cls, size: int, sigma: float) -> plaidml.tile.Value:
-        """ Obtain the base gaussian kernel for the Laplacian Pyramid.
-
-        Parameters
-        ----------
-        size: int, Optional
-            The size of the gaussian kernel
-        sigma: float
-            The gaussian sigma
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The base single channel Gaussian kernel
-        """
-        assert size % 2 == 1, ("kernel size must be uneven")
-        x_1 = np.linspace(- (size // 2), size // 2, size, dtype="float32")
-        x_1 /= np.sqrt(2)*sigma
-        x_2 = x_1 ** 2
-        kernel = np.exp(- x_2[:, None] - x_2[None, :])
-        kernel /= kernel.sum()
-        kernel = np.reshape(kernel, (size, size, 1, 1))
-        return K.constant(kernel)
-
-    def _conv_gaussian(self, inputs: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Perform Gaussian convolution on a batch of images.
-
-        Parameters
-        ----------
-        inputs: :class:`plaidml.tile.Value`
-            The input batch of images to perform Gaussian convolution on.
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The convolved images
-        """
-        channels = self._shape[-1]
-        gauss = K.tile(self._gaussian_kernel, (1, 1, 1, channels))
-
-        # PlaidML doesn't implement replication padding like pytorch. This is an inefficient way to
-        # implement it for a square guassian kernel
-        size = K.int_shape(self._gaussian_kernel)[1] // 2
-        padded_inputs = inputs
-        for _ in range(size):
-            padded_inputs = pad(padded_inputs,  # noqa,pylint:disable=no-value-for-parameter,unexpected-keyword-arg
-                                ([0, 0], [1, 1], [1, 1], [0, 0]),
-                                mode="REFLECT")
-
-        retval = K.conv2d(padded_inputs, gauss, strides=(1, 1), padding="valid")
-        return retval
-
-    def _get_laplacian_pyramid(self, inputs: plaidml.tile.Value) -> List[plaidml.tile.Value]:
-        """ Obtain the Laplacian Pyramid.
-
-        Parameters
-        ----------
-        inputs: :class:`plaidml.tile.Value`
-            The input batch of images to run through the Laplacian Pyramid
-
-        Returns
-        -------
-        list
-            The tensors produced from the Laplacian Pyramid
-        """
-        pyramid = []
-        current = inputs
-        for _ in range(self._max_levels):
-            gauss = self._conv_gaussian(current)
-            diff = current - gauss
-            pyramid.append(diff)
-            current = K.pool2d(gauss, (2, 2), strides=(2, 2), padding="valid", pool_mode="avg")
-        pyramid.append(current)
-        return pyramid
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Calculate the Laplacian Pyramid Loss.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-
-        Returns
-        -------
-        :class: `plaidml.tile.Value`
-            The loss value
-        """
-        if not self._shape:
-            self._shape = K.int_shape(y_pred)
-        pyramid_true = self._get_laplacian_pyramid(y_true)
-        pyramid_pred = self._get_laplacian_pyramid(y_pred)
-
-        losses = K.stack([K.sum(K.abs(ppred - ptrue)) / K.cast(K.prod(K.shape(ptrue)), "float32")
-                          for ptrue, ppred in zip(pyramid_true, pyramid_pred)])
-        loss = K.sum(losses * self._weights)
-        return loss
-
-
-class LInfNorm():  # pylint:disable=too-few-public-methods
-    """ Calculate the L-inf norm as a loss function. """
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the L-inf norm loss function.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The loss value
-        """
-        diff = K.abs(y_true - y_pred)
-        max_loss = K.max(diff, axis=(1, 2), keepdims=True)
-        loss = K.mean(max_loss, axis=-1)
-        return loss
-
-
-class LogCosh():  # pylint:disable=too-few-public-methods
-    """Logarithm of the hyperbolic cosine of the prediction error.
-
-    `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and to `abs(x) - log(2)`
-    for large `x`. This means that 'logcosh' works mostly like the mean squared error, but will not
-    be so strongly affected by the occasional wildly incorrect prediction.
-    """
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the LogCosh loss function.
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The loss value
-        """
-        diff = y_pred - y_true
-        loss = diff + K.softplus(-2. * diff) - K.log(K.constant(2., dtype="float32"))
-        return K.mean(loss, axis=-1)
-
-
-class LossWrapper():  # pylint:disable=too-few-public-methods
-    """ A wrapper class for multiple keras losses to enable multiple weighted loss functions on a
-    single output and masking.
-    """
-    def __init__(self) -> None:
-        self.__name__ = "LossWrapper"
-        logger.debug("Initializing: %s", self.__class__.__name__)
-        self._loss_functions: List[Callable] = []
-        self._loss_weights: List[float] = []
-        self._mask_channels: List[int] = []
-        logger.debug("Initialized: %s", self.__class__.__name__)
-
-    def add_loss(self,
-                 function,
-                 weight: float = 1.0,
-                 mask_channel: int = -1) -> None:
-        """ Add the given loss function with the given weight to the loss function chain.
-
-        Parameters
-        ----------
-        function: :class:`keras.losses.Loss`
-            The loss function to add to the loss chain
-        weight: float, optional
-            The weighting to apply to the loss function. Default: `1.0`
-        mask_channel: int, optional
-            The channel in the `y_true` image that the mask exists in. Set to `-1` if there is no
-            mask for the given loss function. Default: `-1`
-        """
-        logger.debug("Adding loss: (function: %s, weight: %s, mask_channel: %s)",
-                     function, weight, mask_channel)
-        self._loss_functions.append(function)
-        self._loss_weights.append(weight)
-        self._mask_channels.append(mask_channel)
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the sub loss functions for the loss wrapper.
-
-        Weights are returned as the weighted sum of the chosen losses.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The final loss value
-        """
-        loss = 0.0
-        for func, weight, mask_channel in zip(self._loss_functions,
-                                              self._loss_weights,
-                                              self._mask_channels):
-            logger.debug("Processing loss function: (func: %s, weight: %s, mask_channel: %s)",
-                         func, weight, mask_channel)
-            n_true, n_pred = self._apply_mask(y_true, y_pred, mask_channel)
-            # Some loss functions requires that y_pred be of a known shape, so specifically
-            # reshape the tensor.
-            n_pred = K.reshape(n_pred, K.int_shape(y_pred))
-            this_loss = func(n_true, n_pred)
-            loss_dims = K.ndim(this_loss)
-            loss += (K.mean(this_loss, axis=list(range(1, loss_dims))) * weight)
-        return loss
-
-    @classmethod
-    def _apply_mask(cls,
-                    y_true: plaidml.tile.Value,
-                    y_pred: plaidml.tile.Value,
-                    mask_channel: int,
-                    mask_prop: float = 1.0) -> Tuple[plaidml.tile.Value, plaidml.tile.Value]:
-        """ Apply the mask to the input y_true and y_pred. If a mask is not required then
-        return the unmasked inputs.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-        mask_channel: int
-            The channel within y_true that the required mask resides in
-        mask_prop: float, optional
-            The amount of mask propagation. Default: `1.0`
-
-        Returns
-        -------
-        tuple
-            (n_true, n_pred): The ground truth and predicted value tensors with the mask applied
-        """
-        if mask_channel == -1:
-            logger.debug("No mask to apply")
-            return y_true[..., :3], y_pred[..., :3]
-
-        logger.debug("Applying mask from channel %s", mask_channel)
-
-        mask = K.tile(K.expand_dims(y_true[..., mask_channel], axis=-1), (1, 1, 1, 3))
-        mask_as_k_inv_prop = 1 - mask_prop
-        mask = (mask * mask_prop) + mask_as_k_inv_prop
-
-        m_true = y_true[..., :3] * mask
-        m_pred = y_pred[..., :3] * mask
-
-        return m_true, m_pred
--- a/lib/model/loss/perceptual_loss_plaid.py
+++ b/lib/model/loss/perceptual_loss_plaid.py
@ -1,849 +0,0 @@
-#!/usr/bin/env python3
-""" PlaidML Keras implementation of Perceptual Loss Functions for faceswap.py """
-
-import logging
-import sys
-
-from typing import Dict, List, Optional, Tuple
-
-import numpy as np
-import plaidml
-
-from keras import backend as K
-
-from lib.keras_utils import ColorSpaceConvert, frobenius_norm, replicate_pad
-from lib.plaidml_utils import pad
-from lib.utils import FaceswapError
-
-if sys.version_info < (3, 8):
-    from typing_extensions import Literal
-else:
-    from typing import Literal
-
-
-logger = logging.getLogger(__name__)
-
-
-class DSSIMObjective():  # pylint:disable=too-few-public-methods
-    """ DSSIM Loss Function
-
-    Difference of Structural Similarity (DSSIM loss function).
-
-    Adapted from :func:`tensorflow.image.ssim` for a pure keras implentation.
-
-    Notes
-    -----
-    Channels last only. Assumes all input images are the same size and square
-
-    Parameters
-    ----------
-    k_1: float, optional
-        Parameter of the SSIM. Default: `0.01`
-    k_2: float, optional
-        Parameter of the SSIM. Default: `0.03`
-    filter_size: int, optional
-        size of gaussian filter Default: `11`
-    filter_sigma: float, optional
-        Width of gaussian filter Default: `1.5`
-    max_value: float, optional
-        Max value of the output. Default: `1.0`
-
-    Notes
-    ------
-    You should add a regularization term like a l2 loss in addition to this one.
-    """
-    def __init__(self,
-                 k_1: float = 0.01,
-                 k_2: float = 0.03,
-                 filter_size: int = 11,
-                 filter_sigma: float = 1.5,
-                 max_value: float = 1.0) -> None:
-        self._filter_size = filter_size
-        self._filter_sigma = filter_sigma
-        self._kernel = self._get_kernel()
-
-        compensation = 1.0
-        self._c1 = (k_1 * max_value) ** 2
-        self._c2 = ((k_2 * max_value) ** 2) * compensation
-
-    def _get_kernel(self) -> plaidml.tile.Value:
-        """ Obtain the base kernel for performing depthwise convolution.
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The gaussian kernel based on selected size and sigma
-        """
-        coords = np.arange(self._filter_size, dtype="float32")
-        coords -= (self._filter_size - 1) / 2.
-
-        kernel = np.square(coords)
-        kernel *= -0.5 / np.square(self._filter_sigma)
-        kernel = np.reshape(kernel, (1, -1)) + np.reshape(kernel, (-1, 1))
-        kernel = K.constant(np.reshape(kernel, (1, -1)))
-        kernel = K.softmax(kernel)
-        kernel = K.reshape(kernel, (self._filter_size, self._filter_size, 1, 1))
-        return kernel
-
-    @classmethod
-    def _depthwise_conv2d(cls,
-                          image: plaidml.tile.Value,
-                          kernel: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Perform a standardized depthwise convolution.
-
-        Parameters
-        ----------
-        image: :class:`plaidml.tile.Value`
-            Batch of images, channels last, to perform depthwise convolution
-        kernel: :class:`plaidml.tile.Value`
-            convolution kernel
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The output from the convolution
-        """
-        return K.depthwise_conv2d(image, kernel, strides=(1, 1), padding="valid")
-
-    def _get_ssim(self,
-                  y_true: plaidml.tile.Value,
-                  y_pred: plaidml.tile.Value) -> Tuple[plaidml.tile.Value, plaidml.tile.Value]:
-        """ Obtain the structural similarity between a batch of true and predicted images.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The input batch of ground truth images
-        y_pred: :class:`plaidml.tile.Value`
-            The input batch of predicted images
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The SSIM for the given images
-        :class:`plaidml.tile.Value`
-            The Contrast for the given images
-        """
-        channels = K.int_shape(y_pred)[-1]
-        kernel = K.tile(self._kernel, (1, 1, channels, 1))
-
-        # SSIM luminance measure is (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1)
-        mean_true = self._depthwise_conv2d(y_true, kernel)
-        mean_pred = self._depthwise_conv2d(y_pred, kernel)
-        num_lum = mean_true * mean_pred * 2.0
-        den_lum = K.square(mean_true) + K.square(mean_pred)
-        luminance = (num_lum + self._c1) / (den_lum + self._c1)
-
-        # SSIM contrast-structure measure is (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2)
-        num_con = self._depthwise_conv2d(y_true * y_pred, kernel) * 2.0
-        den_con = self._depthwise_conv2d(K.square(y_true) + K.square(y_pred), kernel)
-
-        contrast = (num_con - num_lum + self._c2) / (den_con - den_lum + self._c2)
-
-        # Average over the height x width dimensions
-        axes = (-3, -2)
-        ssim = K.mean(luminance * contrast, axis=axes)
-        contrast = K.mean(contrast, axis=axes)
-
-        return ssim, contrast
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the DSSIM  or MS-DSSIM Loss Function.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The input batch of ground truth images
-        y_pred: :class:`plaidml.tile.Value`
-            The input batch of predicted images
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The DSSIM or MS-DSSIM for the given images
-        """
-        ssim = self._get_ssim(y_true, y_pred)[0]
-        retval = (1. - ssim) / 2.0
-        return K.mean(retval)
-
-
-class GMSDLoss():  # pylint:disable=too-few-public-methods
-    """ Gradient Magnitude Similarity Deviation Loss.
-
-    Improved image quality metric over MS-SSIM with easier calculations
-
-    References
-    ----------
-    http://www4.comp.polyu.edu.hk/~cslzhang/IQA/GMSD/GMSD.htm
-    https://arxiv.org/ftp/arxiv/papers/1308/1308.3052.pdf
-    """
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Return the Gradient Magnitude Similarity Deviation Loss.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The loss value
-        """
-        image_shape = K.int_shape(y_pred)
-        true_edge = self._scharr_edges(y_true, True, image_shape)
-        pred_edge = self._scharr_edges(y_pred, True, image_shape)
-        ephsilon = 0.0025
-        upper = 2.0 * true_edge * pred_edge
-        lower = K.square(true_edge) + K.square(pred_edge)
-        gms = (upper + ephsilon) / (lower + ephsilon)
-        gmsd = K.std(gms, axis=(1, 2, 3), keepdims=True)
-        gmsd = K.squeeze(gmsd, axis=-1)
-        return gmsd
-
-    @classmethod
-    def _scharr_edges(cls,
-                      image: plaidml.tile.Value,
-                      magnitude: bool,
-                      image_shape: Tuple[None, int, int, int]) -> plaidml.tile.Value:
-        """ Returns a tensor holding modified Scharr edge maps.
-
-        Parameters
-        ----------
-        image: :class:`plaidml.tile.Value`
-            Image tensor with shape [batch_size, h, w, d] and type float32. The image(s) must be
-            2x2 or larger.
-        magnitude: bool
-            Boolean to determine if the edge magnitude or edge direction is returned
-        image_shape: tuple
-            The shape of the incoming image
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            Tensor holding edge maps for each channel. Returns a tensor with shape `[batch_size, h,
-            w, d, 2]` where the last two dimensions hold `[[dy[0], dx[0]], [dy[1], dx[1]], ...,
-            [dy[d-1], dx[d-1]]]` calculated using the Scharr filter.
-        """
-        # Define vertical and horizontal Scharr filters.
-        # 5x5 modified Scharr kernel ( reshape to (5,5,1,2) )
-        matrix = np.array([[[[0.00070, 0.00070]],
-                            [[0.00520, 0.00370]],
-                            [[0.03700, 0.00000]],
-                            [[0.00520, -0.0037]],
-                            [[0.00070, -0.0007]]],
-                           [[[0.00370, 0.00520]],
-                            [[0.11870, 0.11870]],
-                            [[0.25890, 0.00000]],
-                            [[0.11870, -0.1187]],
-                            [[0.00370, -0.0052]]],
-                           [[[0.00000, 0.03700]],
-                            [[0.00000, 0.25890]],
-                            [[0.00000, 0.00000]],
-                            [[0.00000, -0.2589]],
-                            [[0.00000, -0.0370]]],
-                           [[[-0.0037, 0.00520]],
-                            [[-0.1187, 0.11870]],
-                            [[-0.2589, 0.00000]],
-                            [[-0.1187, -0.1187]],
-                            [[-0.0037, -0.0052]]],
-                           [[[-0.0007, 0.00070]],
-                            [[-0.0052, 0.00370]],
-                            [[-0.0370, 0.00000]],
-                            [[-0.0052, -0.0037]],
-                            [[-0.0007, -0.0007]]]])
-        # num_kernels = [2]
-        kernels = K.constant(matrix, dtype='float32')
-        kernels = K.tile(kernels, [1, 1, image_shape[-1], 1])
-
-        # Use depth-wise convolution to calculate edge maps per channel.
-        # Output tensor has shape [batch_size, h, w, d * num_kernels].
-        pad_sizes = [[0, 0], [2, 2], [2, 2], [0, 0]]
-        padded = pad(image, pad_sizes, mode='REFLECT')
-        output = K.depthwise_conv2d(padded, kernels)
-
-        # TODO magnitude not implemented for plaidml
-        if not magnitude:  # direction of edges
-            raise FaceswapError("Magnitude for GMSD Loss is not implemented in PlaidML")
-        #    # Reshape to [batch_size, h, w, d, num_kernels].
-        #    shape = K.concatenate([image_shape, num_kernels], axis=0)
-        #    output = K.reshape(output, shape=shape)
-        #    output.set_shape(static_image_shape.concatenate(num_kernels))
-        #    output = tf.atan(K.squeeze(output[:, :, :, :, 0] / output[:, :, :, :, 1], axis=None))
-        # magnitude of edges -- unified x & y edges don't work well with Neural Networks
-        return output
-
-
-class LDRFLIPLoss():  # pylint:disable=too-few-public-methods
-    """ Computes the LDR-FLIP error map between two LDR images, assuming the images are observed
-    at a certain number of pixels per degree of visual angle.
-
-    References
-    ----------
-    https://research.nvidia.com/sites/default/files/node/3260/FLIP_Paper.pdf
-    https://github.com/NVlabs/flip
-
-    License
-    -------
-    BSD 3-Clause License
-    Copyright (c) 2020-2022, NVIDIA Corporation & AFFILIATES. All rights reserved.
-    Redistribution and use in source and binary forms, with or without modification, are permitted
-    provided that the following conditions are met:
-    Redistributions of source code must retain the above copyright notice, this list of conditions
-    and the following disclaimer.
-    Redistributions in binary form must reproduce the above copyright notice, this list of
-    conditions and the following disclaimer in the documentation and/or other materials provided
-    with the distribution.
-    Neither the name of the copyright holder nor the names of its contributors may be used to
-    endorse or promote products derived from this software without specific prior written
-    permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
-    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
-    AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-    POSSIBILITY OF SUCH DAMAGE.
-
-    Parameters
-    ----------
-    computed_distance_exponent: float, Optional
-        The computed distance exponent to apply to Hunt adjusted, filtered colors.
-        (`qc` in original paper). Default: `0.7`
-    feature_exponent: float, Optional
-        The feature exponent to apply for increasing the impact of feature difference on the
-        final loss value. (`qf` in original paper). Default: `0.5`
-    lower_threshold_exponent: float, Optional
-        The `pc` exponent for the color pipeline as described in the original paper: Default: `0.4`
-    upper_threshold_exponent: float, Optional
-        The `pt` exponent  for the color pipeline as described in the original paper.
-        Default: `0.95`
-    epsilon: float
-        A small value to improve training stability. Default: `1e-15`
-    pixels_per_degree: float, Optional
-        The estimated number of pixels per degree of visual angle of the observer. This effectively
-        impacts the tolerance when calculating loss. The default corresponds to viewing images on a
-        0.7m wide 4K monitor at 0.7m from the display. Default: ``None``
-    color_order: str
-        The `"BGR"` or `"RGB"` color order of the incoming images
-    """
-    def __init__(self,
-                 computed_distance_exponent: float = 0.7,
-                 feature_exponent: float = 0.5,
-                 lower_threshold_exponent: float = 0.4,
-                 upper_threshold_exponent: float = 0.95,
-                 epsilon: float = 1e-15,
-                 pixels_per_degree: Optional[float] = None,
-                 color_order: Literal["bgr", "rgb"] = "bgr") -> None:
-        logger.debug("Initializing: %s (computed_distance_exponent '%s', feature_exponent: %s, "
-                     "lower_threshold_exponent: %s, upper_threshold_exponent: %s, epsilon: %s, "
-                     "pixels_per_degree: %s, color_order: %s)", self.__class__.__name__,
-                     computed_distance_exponent, feature_exponent, lower_threshold_exponent,
-                     upper_threshold_exponent, epsilon, pixels_per_degree, color_order)
-
-        self._computed_distance_exponent = computed_distance_exponent
-        self._feature_exponent = feature_exponent
-        self._pc = lower_threshold_exponent
-        self._pt = upper_threshold_exponent
-        self._epsilon = epsilon
-        self._color_order = color_order.lower()
-
-        if pixels_per_degree is None:
-            pixels_per_degree = (0.7 * 3840 / 0.7) * np.pi / 180
-        self._pixels_per_degree = pixels_per_degree
-        self._spatial_filters = _SpatialFilters(pixels_per_degree)
-        self._feature_detector = _FeatureDetection(pixels_per_degree)
-        logger.debug("Initialized: %s ", self.__class__.__name__)
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the LDR Flip Loss Function
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth batch of images
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted batch of images
-
-        Returns
-        -------
-        :class::class:`plaidml.tile.Value`
-            The calculated Flip loss value
-        """
-        # TODO Fix for AMD. This loss function runs fine under plaidML end to end, but the output
-        # is NaN when tested on CPU. I cannot find a way to debug the values in plaidML tensors
-        # so cannot investigate where the NaNs are getting introduced.
-        # This may be a CPU issue (I cannot get plaidML to detect my Nvidia GPU) so currently this
-        # loss is enabled. If reports of NaNs then raise a NotImplementedError until issue can be
-        # properly addressed
-        if self._color_order == "bgr":  # Switch models training in bgr order to rgb
-            y_true = y_true[..., 2::-1]
-            y_pred = y_pred[..., 2::-1]
-
-        y_true = K.clip(y_true, 0, 1.)
-        y_pred = K.clip(y_pred, 0, 1.)
-
-        rgb2ycxcz = ColorSpaceConvert("srgb", "ycxcz", batch_shape=K.int_shape(y_pred))
-        true_ycxcz = rgb2ycxcz(y_true)
-        pred_ycxcz = rgb2ycxcz(y_pred)
-
-        delta_e_color = self._color_pipeline(true_ycxcz, pred_ycxcz)
-        delta_e_features = self._process_features(true_ycxcz, pred_ycxcz)
-
-        loss = K.pow(delta_e_color, 1 - delta_e_features)
-        return loss
-
-    def _color_pipeline(self,
-                        y_true: plaidml.tile.Value,
-                        y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Perform the color processing part of the FLIP loss function
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth batch of images in YCxCz color space
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted batch of images in YCxCz color space
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The exponentiated, maximum HyAB difference between two colors in Hunt-adjusted
-            L*A*B* space
-        """
-        filtered_true = self._spatial_filters(y_true)
-        filtered_pred = self._spatial_filters(y_pred)
-
-        rgb2lab = ColorSpaceConvert(from_space="rgb",
-                                    to_space="lab",
-                                    batch_shape=K.int_shape(filtered_pred))
-        preprocessed_true = self._hunt_adjustment(rgb2lab(filtered_true))
-        preprocessed_pred = self._hunt_adjustment(rgb2lab(filtered_pred))
-        hunt_adjusted_green = self._hunt_adjustment(
-            rgb2lab(K.constant(np.array([[[[0.0, 1.0, 0.0]]]]), dtype="float32")))
-        hunt_adjusted_blue = self._hunt_adjustment(
-            rgb2lab(K.constant(np.array([[[[0.0, 0.0, 1.0]]]]), dtype="float32")))
-
-        delta = self._hyab(preprocessed_true, preprocessed_pred)
-        power_delta = K.pow(delta, self._computed_distance_exponent)
-        cmax = K.pow(self._hyab(hunt_adjusted_green, hunt_adjusted_blue),
-                     self._computed_distance_exponent)
-        return self._redistribute_errors(power_delta, cmax)
-
-    def _process_features(self,
-                          y_true: plaidml.tile.Value,
-                          y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Perform the color processing part of the FLIP loss function
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth batch of images in YCxCz color space
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted batch of images in YCxCz color space
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The exponentiated features delta
-        """
-        col_y_true = (y_true[..., 0:1] + 16) / 116.
-        col_y_pred = (y_pred[..., 0:1] + 16) / 116.
-
-        edges_true = self._feature_detector(col_y_true, "edge")
-        points_true = self._feature_detector(col_y_true, "point")
-        edges_pred = self._feature_detector(col_y_pred, "edge")
-        points_pred = self._feature_detector(col_y_pred, "point")
-
-        delta = K.maximum(K.abs(frobenius_norm(edges_true) - frobenius_norm(edges_pred)),
-                          K.abs(frobenius_norm(points_pred) - frobenius_norm(points_true)))
-
-        delta = K.clip(delta, self._epsilon, None)
-        return K.pow(((1 / np.sqrt(2)) * delta), self._feature_exponent)
-
-    @classmethod
-    def _hunt_adjustment(cls, image: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Apply Hunt-adjustment to an image in L*a*b* color space
-
-        Parameters
-        ----------
-        image: :class:`plaidml.tile.Value`
-            The batch of images in L*a*b* to adjust
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The hunt adjusted batch of images in L*a*b color space
-        """
-        ch_l = image[..., 0:1]
-        adjusted = K.concatenate([ch_l, image[..., 1:] * (ch_l * 0.01)], axis=-1)
-        return adjusted
-
-    def _hyab(self, y_true, y_pred):
-        """ Compute the HyAB distance between true and predicted images.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth batch of images in standard or Hunt-adjusted L*A*B* color space
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted batch of images in in standard or Hunt-adjusted L*A*B* color space
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            image tensor containing the per-pixel HyAB distances between true and predicted images
-        """
-        delta = y_true - y_pred
-        root = K.sqrt(K.clip(K.pow(delta[..., 0:1], 2), self._epsilon, None))
-        delta_norm = frobenius_norm(delta[..., 1:3])
-        return root + delta_norm
-
-    def _redistribute_errors(self, power_delta_e_hyab, cmax):
-        """ Redistribute exponentiated HyAB errors to the [0,1] range
-
-        Parameters
-        ----------
-        power_delta_e_hyab: :class:`plaidml.tile.Value`
-            The exponentiated HyAb distance
-        cmax: :class:`plaidml.tile.Value`
-            The exponentiated, maximum HyAB difference between two colors in Hunt-adjusted
-            L*A*B* space
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The redistributed per-pixel HyAB distances (in range [0,1])
-        """
-        pccmax = self._pc * cmax
-        delta_e_c = K.switch(
-            power_delta_e_hyab < pccmax,
-            (self._pt / pccmax) * power_delta_e_hyab,
-            self._pt + ((power_delta_e_hyab - pccmax) / (cmax - pccmax)) * (1.0 - self._pt))
-        return delta_e_c
-
-
-class _SpatialFilters():  # pylint:disable=too-few-public-methods
-    """ Filters an image with channel specific spatial contrast sensitivity functions and clips
-    result to the unit cube in linear RGB.
-
-    For use with LDRFlipLoss.
-
-    Parameters
-    ----------
-    pixels_per_degree: float
-        The estimated number of pixels per degree of visual angle of the observer. This effectively
-        impacts the tolerance when calculating loss.
-    """
-    def __init__(self, pixels_per_degree: float) -> None:
-        self._pixels_per_degree = pixels_per_degree
-        self._spatial_filters, self._radius = self._generate_spatial_filters()
-        self._ycxcz2rgb = ColorSpaceConvert(from_space="ycxcz", to_space="rgb")
-
-    def _generate_spatial_filters(self) -> Tuple[plaidml.tile.Value, int]:
-        """ Generates spatial contrast sensitivity filters with width depending on the number of
-        pixels per degree of visual angle of the observer for channels "A", "RG" and "BY"
-
-        Returns
-        -------
-        dict
-            the channels ("A" (Achromatic CSF), "RG" (Red-Green CSF) or "BY" (Blue-Yellow CSF)) as
-            key with the Filter kernel corresponding to the spatial contrast sensitivity function
-            of channel and kernel's radius
-        """
-        mapping = dict(A=dict(a1=1, b1=0.0047, a2=0, b2=1e-5),
-                       RG=dict(a1=1, b1=0.0053, a2=0, b2=1e-5),
-                       BY=dict(a1=34.1, b1=0.04, a2=13.5, b2=0.025))
-
-        domain, radius = self._get_evaluation_domain(mapping["A"]["b1"],
-                                                     mapping["A"]["b2"],
-                                                     mapping["RG"]["b1"],
-                                                     mapping["RG"]["b2"],
-                                                     mapping["BY"]["b1"],
-                                                     mapping["BY"]["b2"])
-
-        weights = np.array([self._generate_weights(mapping[channel], domain)
-                            for channel in ("A", "RG", "BY")])
-        weights = K.constant(np.moveaxis(weights, 0, -1), dtype="float32")
-
-        return weights, radius
-
-    def _get_evaluation_domain(self,
-                               b1_a: float,
-                               b2_a: float,
-                               b1_rg: float,
-                               b2_rg: float,
-                               b1_by: float,
-                               b2_by: float) -> Tuple[np.ndarray, int]:
-        """ TODO docstring """
-        max_scale_parameter = max([b1_a, b2_a, b1_rg, b2_rg, b1_by, b2_by])
-        delta_x = 1.0 / self._pixels_per_degree
-        radius = int(np.ceil(3 * np.sqrt(max_scale_parameter / (2 * np.pi**2))
-                             * self._pixels_per_degree))
-        ax_x, ax_y = np.meshgrid(range(-radius, radius + 1), range(-radius, radius + 1))
-        domain = (ax_x * delta_x) ** 2 + (ax_y * delta_x) ** 2
-        return domain, radius
-
-    @classmethod
-    def _generate_weights(cls,
-                          channel: Dict[str, float],
-                          domain: np.ndarray) -> plaidml.tile.Value:
-        """ TODO docstring """
-        a_1, b_1, a_2, b_2 = channel["a1"], channel["b1"], channel["a2"], channel["b2"]
-        grad = (a_1 * np.sqrt(np.pi / b_1) * np.exp(-np.pi ** 2 * domain / b_1) +
-                a_2 * np.sqrt(np.pi / b_2) * np.exp(-np.pi ** 2 * domain / b_2))
-        grad = grad / np.sum(grad)
-        grad = np.reshape(grad, (*grad.shape, 1))
-        return grad
-
-    def __call__(self, image: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the spacial filtering.
-
-        Parameters
-        ----------
-        image: Tensor
-            Image tensor to filter in YCxCz color space
-
-        Returns
-        -------
-        Tensor
-            The input image transformed to linear RGB after filtering with spatial contrast
-            sensitivity functions
-        """
-        padded_image = replicate_pad(image, self._radius)
-        image_tilde_opponent = K.conv2d(padded_image,
-                                        self._spatial_filters,
-                                        strides=(1, 1),
-                                        padding="valid")
-        rgb = K.clip(self._ycxcz2rgb(image_tilde_opponent), 0., 1.)
-        return rgb
-
-
-class _FeatureDetection():  # pylint:disable=too-few-public-methods
-    """ Detect features (i.e. edges amd points) in an achromatic YCxCz image.
-
-    For use with LDRFlipLoss.
-
-    Parameters
-    ----------
-    pixels_per_degree: float
-        The number of pixels per degree of visual angle of the observer
-    """
-    def __init__(self, pixels_per_degree: float) -> None:
-        width = 0.082
-        self._std = 0.5 * width * pixels_per_degree
-        self._radius = int(np.ceil(3 * self._std))
-        self._grid = np.meshgrid(range(-self._radius, self._radius + 1),
-                                 range(-self._radius, self._radius + 1))
-        self._gradient = np.exp(-(self._grid[0] ** 2 + self._grid[1] ** 2)
-                                / (2 * (self._std ** 2)))
-
-    def __call__(self, image: plaidml.tile.Value, feature_type: str) -> plaidml.tile.Value:
-        """ Run the feature detection
-
-        Parameters
-        ----------
-        image: Tensor
-            Batch of images in YCxCz color space with normalized Y values
-        feature_type: str
-            Type of features to detect (`"edge"` or `"point"`)
-
-        Returns
-        -------
-        Tensor
-            Detected features in the 0-1 range
-        """
-        feature_type = feature_type.lower()
-
-        if feature_type == 'edge':
-            grad_x = np.multiply(-self._grid[0], self._gradient)
-        else:
-            grad_x = np.multiply(self._grid[0] ** 2 / (self._std ** 2) - 1, self._gradient)
-
-        negative_weights_sum = -np.sum(grad_x[grad_x < 0])
-        positive_weights_sum = np.sum(grad_x[grad_x > 0])
-
-        grad_x = K.constant(grad_x)
-        grad_x = K.switch(grad_x < 0, grad_x / negative_weights_sum, grad_x / positive_weights_sum)
-        kernel = K.expand_dims(K.expand_dims(grad_x, axis=-1), axis=-1)
-
-        features_x = K.conv2d(replicate_pad(image, self._radius),
-                              kernel,
-                              strides=(1, 1),
-                              padding="valid")
-        kernel = K.permute_dimensions(kernel, (1, 0, 2, 3))
-        features_y = K.conv2d(replicate_pad(image, self._radius),
-                              kernel,
-                              strides=(1, 1),
-                              padding="valid")
-        features = K.concatenate([features_x, features_y], axis=-1)
-        return features
-
-
-class MSSIMLoss(DSSIMObjective):  # pylint:disable=too-few-public-methods
-    """ Multiscale Structural Similarity Loss Function
-
-    Parameters
-    ----------
-    k_1: float, optional
-        Parameter of the SSIM. Default: `0.01`
-    k_2: float, optional
-        Parameter of the SSIM. Default: `0.03`
-    filter_size: int, optional
-        size of gaussian filter Default: `11`
-    filter_sigma: float, optional
-        Width of gaussian filter Default: `1.5`
-    max_value: float, optional
-        Max value of the output. Default: `1.0`
-    power_factors: tuple, optional
-        Iterable of weights for each of the scales. The number of scales used is the length of the
-        list. Index 0 is the unscaled resolution's weight and each increasing scale corresponds to
-        the image being downsampled by 2. Defaults to the values obtained in the original paper.
-        Default: (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
-
-    Notes
-    ------
-    You should add a regularization term like a l2 loss in addition to this one.
-    """
-    def __init__(self,
-                 k_1: float = 0.01,
-                 k_2: float = 0.03,
-                 filter_size: int = 11,
-                 filter_sigma: float = 1.5,
-                 max_value: float = 1.0,
-                 power_factors: Tuple[float, ...] = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
-                 ) -> None:
-        super().__init__(k_1=k_1,
-                         k_2=k_2,
-                         filter_size=filter_size,
-                         filter_sigma=filter_sigma,
-                         max_value=max_value)
-        self._power_factors = K.constant(power_factors)
-
-    def _get_smallest_size(self, size: int, idx: int) -> int:
-        """ Recursive function to obtain the smallest size that the image will be scaled to.
-        for MS-SSIM
-
-        Parameters
-        ----------
-        size: int
-            The current scaled size to iterate through
-        idx: int
-            The current iteration to be performed. When iteration hits zero the value will
-            be returned
-
-        Returns
-        -------
-        int
-            The smallest size the image will be scaled to based on the original image size and
-            the amount of scaling factors that will occur
-        """
-        logger.debug("scale id: %s, size: %s", idx, size)
-        if idx > 0:
-            size = self._get_smallest_size(size // 2, idx - 1)
-        return size
-
-    @classmethod
-    def _shrink_images(cls, images: List[plaidml.tile.Value]) -> List[plaidml.tile.Value]:
-        """ Reduce the dimensional space of a batch of images in half. If the images are an odd
-        number of pixels then pad them to an even dimension prior to shrinking
-
-        All incoming images are assumed square.
-
-        Parameters
-        ----------
-        images: list
-            The y_true, y_pred batch of images to be shrunk
-
-        Returns
-        -------
-        list
-            The y_true, y_pred batch shrunk by half
-        """
-        if any(x % 2 != 0 for x in K.int_shape(images[1])[1:2]):
-            images = [pad(img,
-                          [[0, 0], [0, 1], [0, 1], [0, 0]],
-                          mode="REFLECT")
-                      for img in images]
-
-        images = [K.pool2d(img, (2, 2), strides=(2, 2), padding="valid", pool_mode="avg")
-                  for img in images]
-
-        return images
-
-    def _get_ms_ssim(self,
-                     y_true: plaidml.tile.Value,
-                     y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Obtain the Multiscale Stuctural Similarity metric.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The input batch of ground truth images
-        y_pred: :class:`plaidml.tile.Value`
-            The input batch of predicted images
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The MS-SSIM for the given images
-        """
-        im_size = K.int_shape(y_pred)[1]
-        # filter size cannot be larger than the smallest scale
-        recursions = K.int_shape(self._power_factors)[0]
-        smallest_scale = self._get_smallest_size(im_size, recursions - 1)
-        if smallest_scale < self._filter_size:
-            self._filter_size = smallest_scale
-            self._kernel = self._get_kernel()
-
-        images = [y_true, y_pred]
-        contrasts = []
-
-        for idx in range(recursions):
-            images = self._shrink_images(images) if idx > 0 else images
-            ssim, contrast = self._get_ssim(*images)
-
-            if idx < recursions - 1:
-                contrasts.append(K.relu(K.expand_dims(contrast, axis=-1)))
-
-        contrasts.append(K.relu(K.expand_dims(ssim, axis=-1)))
-        mcs_and_ssim = K.concatenate(contrasts, axis=-1)
-        ms_ssim = K.pow(mcs_and_ssim, self._power_factors)
-
-        # K.prod does not work in plaidml so slow recursion it is
-        out = ms_ssim[..., 0]
-        for idx in range(1, recursions):
-            out *= ms_ssim[..., idx]
-        return out
-
-    def __call__(self,
-                 y_true: plaidml.tile.Value,
-                 y_pred: plaidml.tile.Value) -> plaidml.tile.Value:
-        """ Call the MS-SSIM Loss Function.
-
-        Parameters
-        ----------
-        y_true: :class:`plaidml.tile.Value`
-            The ground truth value
-        y_pred: :class:`plaidml.tile.Value`
-            The predicted value
-
-        Returns
-        -------
-        :class:`plaidml.tile.Value`
-            The MS-SSIM Loss value
-        """
-        ms_ssim = self._get_ms_ssim(y_true, y_pred)
-        retval = 1. - ms_ssim
-        return K.mean(retval)
--- a/lib/model/losses/init.py
+++ b/lib/model/losses/init.py
@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+""" Custom Loss Functions for Faceswap """
+
+from .feature_loss import LPIPSLoss
+from .loss import (FocalFrequencyLoss, GeneralizedLoss, GradientLoss,
+                   LaplacianPyramidLoss, LInfNorm, LossWrapper)
+from .perceptual_loss import DSSIMObjective, GMSDLoss, LDRFLIPLoss, MSSIMLoss
--- a/lib/model/loss/feature_loss_tf.py
+++ b/lib/model/loss/feature_loss_tf.py
@ -69,20 +69,20 @@ class _LPIPSTrunkNet():  # pylint:disable=too-few-public-methods
    @property
    def _nets(self) -> Dict[str, NetInfo]:
        """ :class:`NetInfo`: The Information about the requested net."""
-        return dict(
-            alex=NetInfo(model_id=15,
-                         model_name="alexnet_imagenet_no_top_v1.h5",
-                         net=AlexNet,
-                         outputs=[f"features.{idx}" for idx in (0, 3, 6, 8, 10)]),
-            squeeze=NetInfo(model_id=16,
-                            model_name="squeezenet_imagenet_no_top_v1.h5",
-                            net=SqueezeNet,
-                            outputs=[f"features.{idx}" for idx in (0, 4, 7, 9, 10, 11, 12)]),
-            vgg16=NetInfo(model_id=17,
-                          model_name="vgg16_imagenet_no_top_v1.h5",
-                          net=kapp.vgg16.VGG16,
-                          init_kwargs=dict(include_top=False, weights=None),
-                          outputs=[f"block{i + 1}_conv{2 if i < 2 else 3}" for i in range(5)]))
+        return {
+            "alex": NetInfo(model_id=15,
+                            model_name="alexnet_imagenet_no_top_v1.h5",
+                            net=AlexNet,
+                            outputs=[f"features.{idx}" for idx in (0, 3, 6, 8, 10)]),
+            "squeeze": NetInfo(model_id=16,
+                               model_name="squeezenet_imagenet_no_top_v1.h5",
+                               net=SqueezeNet,
+                               outputs=[f"features.{idx}" for idx in (0, 4, 7, 9, 10, 11, 12)]),
+            "vgg16": NetInfo(model_id=17,
+                             model_name="vgg16_imagenet_no_top_v1.h5",
+                             net=kapp.vgg16.VGG16,
+                             init_kwargs={"include_top": False, "weights": None},
+                             outputs=[f"block{i + 1}_conv{2 if i < 2 else 3}" for i in range(5)])}

    @classmethod
    def _normalize_output(cls, inputs: tf.Tensor, epsilon: float = 1e-10) -> tf.Tensor:
@ -178,13 +178,13 @@ class _LPIPSLinearNet(_LPIPSTrunkNet):  # pylint:disable=too-few-public-methods
    @property
    def _nets(self) -> Dict[str, NetInfo]:
        """ :class:`NetInfo`: The Information about the requested net."""
-        return dict(
-            alex=NetInfo(model_id=18,
-                         model_name="alexnet_lpips_v1.h5",),
-            squeeze=NetInfo(model_id=19,
-                            model_name="squeezenet_lpips_v1.h5"),
-            vgg16=NetInfo(model_id=20,
-                          model_name="vgg16_lpips_v1.h5"))
+        return {
+            "alex": NetInfo(model_id=18,
+                            model_name="alexnet_lpips_v1.h5",),
+            "squeeze": NetInfo(model_id=19,
+                               model_name="squeezenet_lpips_v1.h5"),
+            "vgg16": NetInfo(model_id=20,
+                             model_name="vgg16_lpips_v1.h5")}

    def _linear_block(self, net_output_layer: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
        """ Build a linear block for a trunk network output.
@ -275,7 +275,7 @@ class LPIPSLoss():  # pylint:disable=too-few-public-methods
        ``True`` to return the loss value per feature output layer otherwise ``False``.
        Default: ``False``
    """
-    def __init__(self,
+    def __init__(self,  # pylint:disable=too-many-arguments
                 trunk_network: str,
                 trunk_pretrained: bool = True,
                 trunk_eval_mode: bool = True,
--- a/lib/model/loss/loss_tf.py
+++ b/lib/model/loss/loss_tf.py
@ -10,12 +10,9 @@ import numpy as np
 import tensorflow as tf

 # Ignore linting errors from Tensorflow's thoroughly broken import system
-from tensorflow.python.keras.engine import compile_utils  # noqa pylint:disable=no-name-in-module,import-error
+from tensorflow.python.keras.engine import compile_utils  # pylint:disable=no-name-in-module
 from tensorflow.keras import backend as K  # pylint:disable=import-error

-from .feature_loss_tf import LPIPSLoss  #pylint:disable=unused-import # noqa
-from .perceptual_loss_tf import DSSIMObjective, GMSDLoss, LDRFLIPLoss, MSSIMLoss  #pylint:disable=unused-import # noqa
-
 logger = logging.getLogger(__name__)


@ -523,7 +520,7 @@ class LaplacianPyramidLoss():  # pylint:disable=too-few-public-methods

 class LInfNorm():  # pylint:disable=too-few-public-methods
    """ Calculate the L-inf norm as a loss function. """
-    def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:  # noqa,pylint:disable=no-self-use
+    def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        """ Call the L-inf norm loss function.

        Parameters
--- a/lib/model/loss/perceptual_loss_tf.py
+++ b/lib/model/loss/perceptual_loss_tf.py
@ -536,9 +536,9 @@ class _SpatialFilters():  # pylint:disable=too-few-public-methods
            key with the Filter kernel corresponding to the spatial contrast sensitivity function
            of channel and kernel's radius
        """
-        mapping = dict(A=dict(a1=1, b1=0.0047, a2=0, b2=1e-5),
-                       RG=dict(a1=1, b1=0.0053, a2=0, b2=1e-5),
-                       BY=dict(a1=34.1, b1=0.04, a2=13.5, b2=0.025))
+        mapping = {"A": {"a1": 1, "b1": 0.0047, "a2": 0, "b2": 1e-5},
+                   "RG": {"a1": 1, "b1": 0.0053, "a2": 0, "b2": 1e-5},
+                   "BY": {"a1": 34.1, "b1": 0.04, "a2": 13.5, "b2": 0.025}}

        domain, radius = self._get_evaluation_domain(mapping["A"]["b1"],
                                                     mapping["A"]["b2"],
@ -603,7 +603,7 @@ class _SpatialFilters():  # pylint:disable=too-few-public-methods


 class _FeatureDetection():  # pylint:disable=too-few-public-methods
-    """ Detect features (i.e. edges amd points) in an achromatic YCxCz image.
+    """ Detect features (i.e. edges and points) in an achromatic YCxCz image.

    For use with LDRFlipLoss.

--- a/lib/model/nets.py
+++ b/lib/model/nets.py
@ -1,18 +1,17 @@
 #!/usr/bin/env python3
 """ Ports of existing NN Architecture for use in faceswap.py """
+from __future__ import annotations
 import logging
-from typing import Optional, Tuple
+import typing as T

-from lib.utils import get_backend
+import tensorflow as tf

-if get_backend() == "amd":
-    from keras.layers import Concatenate, Conv2D, Input, MaxPool2D, ZeroPadding2D
-    from keras.models import Model
-    from plaidml.tile import Value as Tensor
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import Concatenate, Conv2D, Input, MaxPool2D, ZeroPadding2D  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow.keras.models import Model  # noqa pylint:disable=no-name-in-module,import-error
+# Fix intellisense/linting for tf.keras' thoroughly broken import system
+keras = tf.keras
+layers = keras.layers
+Model = keras.models.Model
+
+if T.TYPE_CHECKING:
    from tensorflow import Tensor


@ -32,7 +31,7 @@ class _net():  # pylint:disable=too-few-public-methods
        The input shape for the model. Default: ``None``
    """
    def __init__(self,
-                 input_shape: Optional[Tuple[int, int, int]] = None) -> None:
+                 input_shape: T.Optional[T.Tuple[int, int, int]] = None) -> None:
        logger.debug("Initializing: %s (input_shape: %s)", self.__class__.__name__, input_shape)
        self._input_shape = (None, None, 3) if input_shape is None else input_shape
        assert len(self._input_shape) == 3 and self._input_shape[-1] == 3, (
@ -57,7 +56,7 @@ class AlexNet(_net):  # pylint:disable=too-few-public-methods
    input_shape, Tuple, optional
        The input shape for the model. Default: ``None``
    """
-    def __init__(self, input_shape: Optional[Tuple[int, int, int]] = None) -> None:
+    def __init__(self, input_shape: T.Optional[T.Tuple[int, int, int]] = None) -> None:
        super().__init__(input_shape)
        self._feature_indices = [0, 3, 6, 8, 10]  # For naming equivalent to PyTorch
        self._filters = [64, 192, 384, 256, 256]  # Filters at each block
@ -76,7 +75,7 @@ class AlexNet(_net):  # pylint:disable=too-few-public-methods

        Parameters
        ----------
-        inputs: :class:`plaidml.tile.Value` or :class:`tf.Tensor`
+        inputs: :class:`tf.Tensor`
            The input tensor to the block
        padding: int
            The amount of zero paddin to apply prior to convolution
@ -93,20 +92,20 @@ class AlexNet(_net):  # pylint:disable=too-few-public-methods

        Returns
        -------
-        :class:`plaidml.tile.Value` or :class:`tf.Tensor`
+        :class:`tf.Tensor`
            The output of the Convolutional block
        """
        name = f"features.{block_idx}"
        var_x = inputs
        if max_pool:
-            var_x = MaxPool2D(pool_size=3, strides=2, name=f"{name}.pool")(var_x)
-        var_x = ZeroPadding2D(padding=padding, name=f"{name}.pad")(var_x)
-        var_x = Conv2D(filters,
-                       kernel_size=kernel_size,
-                       strides=strides,
-                       padding="valid",
-                       activation="relu",
-                       name=name)(var_x)
+            var_x = layers.MaxPool2D(pool_size=3, strides=2, name=f"{name}.pool")(var_x)
+        var_x = layers.ZeroPadding2D(padding=padding, name=f"{name}.pad")(var_x)
+        var_x = layers.Conv2D(filters,
+                              kernel_size=kernel_size,
+                              strides=strides,
+                              padding="valid",
+                              activation="relu",
+                              name=name)(var_x)
        return var_x

    def __call__(self) -> Model:
@ -117,7 +116,7 @@ class AlexNet(_net):  # pylint:disable=too-few-public-methods
        :class:`keras.models.Model`
            The compiled AlexNet model
        """
-        inputs = Input(self._input_shape)
+        inputs = layers.Input(self._input_shape)
        var_x = inputs
        kernel_size = 11
        strides = 4
@ -164,7 +163,7 @@ class SqueezeNet(_net):  # pylint:disable=too-few-public-methods

        Parameters
        ----------
-        inputs: :class:`plaidml.tile.Value` or :class:`tf.Tensor`
+        inputs: :class:`tf.Tensor`
            The input to the fire block
        squeeze_planes: int
            The number of filters for the squeeze convolution
@ -175,15 +174,20 @@ class SqueezeNet(_net):  # pylint:disable=too-few-public-methods

        Returns
        -------
-        :class:`plaidml.tile.Value` or :class:`tf.Tensor`
+        :class:`tf.Tensor`
            The output of the SqueezeNet fire block
        """
        name = f"features.{block_idx}"
-        squeezed = Conv2D(squeeze_planes, 1, activation="relu", name=f"{name}.squeeze")(inputs)
-        expand1 = Conv2D(expand_planes, 1, activation="relu", name=f"{name}.expand1x1")(squeezed)
-        expand3 = Conv2D(expand_planes, 3,
-                         activation="relu", padding="same", name=f"{name}.expand3x3")(squeezed)
-        return Concatenate(axis=-1, name=name)([expand1, expand3])
+        squeezed = layers.Conv2D(squeeze_planes, 1,
+                                 activation="relu", name=f"{name}.squeeze")(inputs)
+        expand1 = layers.Conv2D(expand_planes, 1,
+                                activation="relu", name=f"{name}.expand1x1")(squeezed)
+        expand3 = layers.Conv2D(expand_planes,
+                                3,
+                                activation="relu",
+                                padding="same",
+                                name=f"{name}.expand3x3")(squeezed)
+        return layers.Concatenate(axis=-1, name=name)([expand1, expand3])

    def __call__(self) -> Model:
        """ Create the SqueezeNet Model
@ -193,15 +197,15 @@ class SqueezeNet(_net):  # pylint:disable=too-few-public-methods
        :class:`keras.models.Model`
            The compiled SqueezeNet model
        """
-        inputs = Input(self._input_shape)
-        var_x = Conv2D(64, 3, strides=2, activation="relu", name="features.0")(inputs)
+        inputs = layers.Input(self._input_shape)
+        var_x = layers.Conv2D(64, 3, strides=2, activation="relu", name="features.0")(inputs)

        block_idx = 2
        squeeze = 16
        expand = 64
        for idx in range(4):
            if idx < 3:
-                var_x = MaxPool2D(pool_size=3, strides=2)(var_x)
+                var_x = layers.MaxPool2D(pool_size=3, strides=2)(var_x)
                block_idx += 1
            var_x = self._fire(var_x, squeeze, expand, block_idx)
            block_idx += 1
--- a/lib/model/nn_blocks.py
+++ b/lib/model/nn_blocks.py
@ -1,30 +1,20 @@
 #!/usr/bin/env python3
 """ Neural Network Blocks for faceswap.py. """
-
+from __future__ import annotations
 import logging
-from typing import Dict, Optional, Tuple, Union
+import typing as T

-from lib.utils import get_backend
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    Activation, Add, BatchNormalization, Concatenate, Conv2D as KConv2D, Conv2DTranspose,
+    DepthwiseConv2D as KDepthwiseConv2d, LeakyReLU, PReLU, SeparableConv2D, UpSampling2D)
+from tensorflow.keras.initializers import he_uniform, VarianceScaling  # noqa:E501  # pylint:disable=import-error

 from .initializers import ICNR, ConvolutionAware
 from .layers import PixelShuffler, ReflectionPadding2D, Swish, KResizeImages
 from .normalization import InstanceNormalization

-if get_backend() == "amd":
-    from keras.layers import (
-        Activation, Add, BatchNormalization, Concatenate, Conv2D as KConv2D, Conv2DTranspose,
-        DepthwiseConv2D as KDepthwiseConv2d, LeakyReLU, PReLU, SeparableConv2D, UpSampling2D)
-    from keras.initializers import he_uniform, VarianceScaling  # pylint:disable=no-name-in-module
-    # type checking:
-    import keras
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import (  # noqa pylint:disable=no-name-in-module,import-error
-        Activation, Add, BatchNormalization, Concatenate, Conv2D as KConv2D, Conv2DTranspose,
-        DepthwiseConv2D as KDepthwiseConv2d, LeakyReLU, PReLU, SeparableConv2D, UpSampling2D)
-    from tensorflow.keras.initializers import he_uniform, VarianceScaling  # noqa pylint:disable=no-name-in-module,import-error
-    # type checking:
+if T.TYPE_CHECKING:
    from tensorflow import keras
    from tensorflow import Tensor

@ -33,7 +23,7 @@ logger = logging.getLogger(__name__)  # pylint: disable=invalid-name


 _CONFIG: dict = {}
-_NAMES: Dict[str, int] = {}
+_NAMES: T.Dict[str, int] = {}


 def set_config(configuration: dict) -> None:
@ -199,7 +189,7 @@ class Conv2DOutput():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int]],
+                 kernel_size: T.Union[int, T.Tuple[int]],
                 activation: str = "sigmoid",
                 padding: str = "same", **kwargs) -> None:
        self._name = kwargs.pop("name") if "name" in kwargs else _get_name(
@ -275,11 +265,11 @@ class Conv2DBlock():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int, int]] = 5,
-                 strides: Union[int, Tuple[int, int]] = 2,
+                 kernel_size: T.Union[int, T.Tuple[int, int]] = 5,
+                 strides: T.Union[int, T.Tuple[int, int]] = 2,
                 padding: str = "same",
-                 normalization: Optional[str] = None,
-                 activation: Optional[str] = "leakyrelu",
+                 normalization: T.Optional[str] = None,
+                 activation: T.Optional[str] = "leakyrelu",
                 use_depthwise: bool = False,
                 relu_alpha: float = 0.1,
                 **kwargs) -> None:
@ -372,8 +362,8 @@ class SeparableConv2DBlock():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int, int]] = 5,
-                 strides: Union[int, Tuple[int, int]] = 2, **kwargs) -> None:
+                 kernel_size: T.Union[int, T.Tuple[int, int]] = 5,
+                 strides: T.Union[int, T.Tuple[int, int]] = 2, **kwargs) -> None:
        self._name = _get_name(f"separableconv2d_{filters}")
        logger.debug("name: %s, filters: %s, kernel_size: %s, strides: %s, kwargs: %s)",
                     self._name, filters, kernel_size, strides, kwargs)
@ -444,11 +434,11 @@ class UpscaleBlock():  # pylint:disable=too-few-public-methods

    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int, int]] = 3,
+                 kernel_size: T.Union[int, T.Tuple[int, int]] = 3,
                 padding: str = "same",
                 scale_factor: int = 2,
-                 normalization: Optional[str] = None,
-                 activation: Optional[str] = "leakyrelu",
+                 normalization: T.Optional[str] = None,
+                 activation: T.Optional[str] = "leakyrelu",
                 **kwargs) -> None:
        self._name = _get_name(f"upscale_{filters}")
        logger.debug("name: %s. filters: %s, kernel_size: %s, padding: %s, scale_factor: %s, "
@ -531,9 +521,9 @@ class Upscale2xBlock():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int, int]] = 3,
+                 kernel_size: T.Union[int, T.Tuple[int, int]] = 3,
                 padding: str = "same",
-                 activation: Optional[str] = "leakyrelu",
+                 activation: T.Optional[str] = "leakyrelu",
                 interpolation: str = "bilinear",
                 sr_ratio: float = 0.5,
                 scale_factor: int = 2,
@ -625,9 +615,9 @@ class UpscaleResizeImagesBlock():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int, int]] = 3,
+                 kernel_size: T.Union[int, T.Tuple[int, int]] = 3,
                 padding: str = "same",
-                 activation: Optional[str] = "leakyrelu",
+                 activation: T.Optional[str] = "leakyrelu",
                 scale_factor: int = 2,
                 interpolation: str = "bilinear") -> None:
        self._name = _get_name(f"upscale_ri_{filters}")
@ -710,9 +700,9 @@ class UpscaleDNYBlock():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int, int]] = 3,
+                 kernel_size: T.Union[int, T.Tuple[int, int]] = 3,
                 padding: str = "same",
-                 activation: Optional[str] = "leakyrelu",
+                 activation: T.Optional[str] = "leakyrelu",
                 size: int = 2,
                 interpolation: str = "bilinear",
                 **kwargs) -> None:
@ -767,7 +757,7 @@ class ResidualBlock():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 filters: int,
-                 kernel_size: Union[int, Tuple[int, int]] = 3,
+                 kernel_size: T.Union[int, T.Tuple[int, int]] = 3,
                 padding: str = "same",
                 **kwargs) -> None:
        self._name = _get_name(f"residual_{filters}")
--- a/lib/model/normalization/normalization_common.py
+++ b/lib/model/normalization/normalization_common.py
@ -1,205 +1,18 @@
 #!/usr/bin/env python3
-""" Normalization methods for faceswap.py common to both Plaid and Tensorflow Backends """
-
-import sys
+""" Normalization methods for faceswap.py specific to Tensorflow backend """
 import inspect
+import sys

-from lib.utils import get_backend
+import tensorflow as tf

-if get_backend() == "amd":
-    from keras.utils import get_custom_objects  # pylint:disable=no-name-in-module
-    from keras.layers import Layer, InputSpec
-    from keras import initializers, regularizers, constraints, backend as K
-    from keras.backend import normalize_data_format  # pylint:disable=no-name-in-module
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.utils import get_custom_objects  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow.keras.layers import Layer, InputSpec  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow.keras import initializers, regularizers, constraints, backend as K  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow.python.keras.utils.conv_utils import normalize_data_format  # noqa pylint:disable=no-name-in-module
+# Fix intellisense/linting for tf.keras' thoroughly broken import system
+from tensorflow.python.keras.utils.conv_utils import normalize_data_format  # noqa:E501 # pylint:disable=no-name-in-module
+keras = tf.keras
+layers = keras.layers
+K = keras.backend


-class InstanceNormalization(Layer):
-    """Instance normalization layer (Lei Ba et al, 2016, Ulyanov et al., 2016).
-
-    Normalize the activations of the previous layer at each step, i.e. applies a transformation
-    that maintains the mean activation close to 0 and the activation standard deviation close to 1.
-
-    Parameters
-    ----------
-    axis: int, optional
-        The axis that should be normalized (typically the features axis). For instance, after a
-        `Conv2D` layer with `data_format="channels_first"`, set `axis=1` in
-        :class:`InstanceNormalization`. Setting `axis=None` will normalize all values in each
-        instance of the batch. Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid
-        errors. Default: ``None``
-    epsilon: float, optional
-        Small float added to variance to avoid dividing by zero. Default: `1e-3`
-    center: bool, optional
-        If ``True``, add offset of `beta` to normalized tensor. If ``False``, `beta` is ignored.
-        Default: ``True``
-    scale: bool, optional
-        If ``True``, multiply by `gamma`. If ``False``, `gamma` is not used. When the next layer
-        is linear (also e.g. `relu`), this can be disabled since the scaling will be done by
-        the next layer. Default: ``True``
-    beta_initializer: str, optional
-        Initializer for the beta weight. Default: `"zeros"`
-    gamma_initializer: str, optional
-        Initializer for the gamma weight. Default: `"ones"`
-    beta_regularizer: str, optional
-        Optional regularizer for the beta weight. Default: ``None``
-    gamma_regularizer: str, optional
-        Optional regularizer for the gamma weight. Default: ``None``
-    beta_constraint: float, optional
-        Optional constraint for the beta weight. Default: ``None``
-    gamma_constraint: float, optional
-        Optional constraint for the gamma weight. Default: ``None``
-
-    References
-    ----------
-        - Layer Normalization - https://arxiv.org/abs/1607.06450
-
-        - Instance Normalization: The Missing Ingredient for Fast Stylization - \
-        https://arxiv.org/abs/1607.08022
-    """
-    # pylint:disable=too-many-instance-attributes,too-many-arguments
-    def __init__(self,
-                 axis=None,
-                 epsilon=1e-3,
-                 center=True,
-                 scale=True,
-                 beta_initializer="zeros",
-                 gamma_initializer="ones",
-                 beta_regularizer=None,
-                 gamma_regularizer=None,
-                 beta_constraint=None,
-                 gamma_constraint=None,
-                 **kwargs):
-        self.beta = None
-        self.gamma = None
-        super().__init__(**kwargs)
-        self.supports_masking = True
-        self.axis = axis
-        self.epsilon = epsilon
-        self.center = center
-        self.scale = scale
-        self.beta_initializer = initializers.get(beta_initializer)
-        self.gamma_initializer = initializers.get(gamma_initializer)
-        self.beta_regularizer = regularizers.get(beta_regularizer)
-        self.gamma_regularizer = regularizers.get(gamma_regularizer)
-        self.beta_constraint = constraints.get(beta_constraint)
-        self.gamma_constraint = constraints.get(gamma_constraint)
-
-    def build(self, input_shape):
-        """Creates the layer weights.
-
-        Parameters
-        ----------
-        input_shape: tensor
-            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
-            reference for weight shape computations.
-        """
-        ndim = len(input_shape)
-        if self.axis == 0:
-            raise ValueError("Axis cannot be zero")
-
-        if (self.axis is not None) and (ndim == 2):
-            raise ValueError("Cannot specify axis for rank 1 tensor")
-
-        self.input_spec = InputSpec(ndim=ndim)  # pylint:disable=attribute-defined-outside-init
-
-        if self.axis is None:
-            shape = (1,)
-        else:
-            shape = (input_shape[self.axis],)
-
-        if self.scale:
-            self.gamma = self.add_weight(shape=shape,
-                                         name="gamma",
-                                         initializer=self.gamma_initializer,
-                                         regularizer=self.gamma_regularizer,
-                                         constraint=self.gamma_constraint)
-        else:
-            self.gamma = None
-        if self.center:
-            self.beta = self.add_weight(shape=shape,
-                                        name="beta",
-                                        initializer=self.beta_initializer,
-                                        regularizer=self.beta_regularizer,
-                                        constraint=self.beta_constraint)
-        else:
-            self.beta = None
-        self.built = True  # pylint:disable=attribute-defined-outside-init
-
-    def call(self, inputs, training=None):  # pylint:disable=arguments-differ,unused-argument
-        """This is where the layer's logic lives.
-
-        Parameters
-        ----------
-        inputs: tensor
-            Input tensor, or list/tuple of input tensors
-
-        Returns
-        -------
-        tensor
-            A tensor or list/tuple of tensors
-        """
-        input_shape = K.int_shape(inputs)
-        reduction_axes = list(range(0, len(input_shape)))
-
-        if self.axis is not None:
-            del reduction_axes[self.axis]
-
-        del reduction_axes[0]
-
-        mean = K.mean(inputs, reduction_axes, keepdims=True)
-        stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
-        normed = (inputs - mean) / stddev
-
-        broadcast_shape = [1] * len(input_shape)
-        if self.axis is not None:
-            broadcast_shape[self.axis] = input_shape[self.axis]
-
-        if self.scale:
-            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
-            normed = normed * broadcast_gamma
-        if self.center:
-            broadcast_beta = K.reshape(self.beta, broadcast_shape)
-            normed = normed + broadcast_beta
-        return normed
-
-    def get_config(self):
-        """Returns the config of the layer.
-
-        A layer config is a Python dictionary (serializable) containing the configuration of a
-        layer. The same layer can be reinstated later (without its trained weights) from this
-        configuration.
-
-        The configuration of a layer does not include connectivity information, nor the layer
-        class name. These are handled by `Network` (one layer of abstraction above).
-
-        Returns
-        --------
-        dict
-            A python dictionary containing the layer configuration
-        """
-        config = {
-            "axis": self.axis,
-            "epsilon": self.epsilon,
-            "center": self.center,
-            "scale": self.scale,
-            "beta_initializer": initializers.serialize(self.beta_initializer),
-            "gamma_initializer": initializers.serialize(self.gamma_initializer),
-            "beta_regularizer": regularizers.serialize(self.beta_regularizer),
-            "gamma_regularizer": regularizers.serialize(self.gamma_regularizer),
-            "beta_constraint": constraints.serialize(self.beta_constraint),
-            "gamma_constraint": constraints.serialize(self.gamma_constraint)
-        }
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-
-class AdaInstanceNormalization(Layer):
+class AdaInstanceNormalization(layers.Layer):  # type:ignore[name-defined]
    """ Adaptive Instance Normalization Layer for Keras.

    Parameters
@ -302,7 +115,7 @@ class AdaInstanceNormalization(Layer):
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))

-    def compute_output_shape(self, input_shape):  # pylint:disable=no-self-use
+    def compute_output_shape(self, input_shape):
        """ Calculate the output shape from this layer.

        Parameters
@ -318,7 +131,7 @@ class AdaInstanceNormalization(Layer):
        return input_shape[0]


-class GroupNormalization(Layer):
+class GroupNormalization(layers.Layer):  # type:ignore[name-defined]
    """ Group Normalization

    Parameters
@ -357,10 +170,10 @@ class GroupNormalization(Layer):
        self.gamma = None
        super().__init__(**kwargs)
        self.axis = axis if isinstance(axis, (list, tuple)) else [axis]
-        self.gamma_init = initializers.get(gamma_init)
-        self.beta_init = initializers.get(beta_init)
-        self.gamma_regularizer = regularizers.get(gamma_regularizer)
-        self.beta_regularizer = regularizers.get(beta_regularizer)
+        self.gamma_init = keras.initializers.get(gamma_init)
+        self.beta_init = keras.initializers.get(beta_init)
+        self.gamma_regularizer = keras.regularizers.get(gamma_regularizer)
+        self.beta_regularizer = keras.regularizers.get(beta_regularizer)
        self.epsilon = epsilon
        self.group = group
        self.data_format = normalize_data_format(data_format)
@ -376,7 +189,7 @@ class GroupNormalization(Layer):
            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
            reference for weight shape computations.
        """
-        input_spec = [InputSpec(shape=input_shape)]
+        input_spec = [layers.InputSpec(shape=input_shape)]
        self.input_spec = input_spec  # pylint:disable=attribute-defined-outside-init
        shape = [1 for _ in input_shape]
        if self.data_format == 'channels_last':
@ -397,7 +210,7 @@ class GroupNormalization(Layer):
                                    name='beta')
        self.built = True  # pylint:disable=attribute-defined-outside-init

-    def call(self, inputs, mask=None):  # pylint:disable=unused-argument,arguments-differ
+    def call(self, inputs, *args, **kwargs):  # noqa:C901
        """This is where the layer's logic lives.

        Parameters
@ -486,16 +299,351 @@ class GroupNormalization(Layer):
        """
        config = {'epsilon': self.epsilon,
                  'axis': self.axis,
-                  'gamma_init': initializers.serialize(self.gamma_init),
-                  'beta_init': initializers.serialize(self.beta_init),
-                  'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
-                  'beta_regularizer': regularizers.serialize(self.gamma_regularizer),
+                  'gamma_init': keras.initializers.serialize(self.gamma_init),
+                  'beta_init': keras.initializers.serialize(self.beta_init),
+                  'gamma_regularizer': keras.regularizers.serialize(self.gamma_regularizer),
+                  'beta_regularizer': keras.regularizers.serialize(self.gamma_regularizer),
                  'group': self.group}
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))


+class InstanceNormalization(layers.Layer):  # type:ignore[name-defined]
+    """Instance normalization layer (Lei Ba et al, 2016, Ulyanov et al., 2016).
+
+    Normalize the activations of the previous layer at each step, i.e. applies a transformation
+    that maintains the mean activation close to 0 and the activation standard deviation close to 1.
+
+    Parameters
+    ----------
+    axis: int, optional
+        The axis that should be normalized (typically the features axis). For instance, after a
+        `Conv2D` layer with `data_format="channels_first"`, set `axis=1` in
+        :class:`InstanceNormalization`. Setting `axis=None` will normalize all values in each
+        instance of the batch. Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid
+        errors. Default: ``None``
+    epsilon: float, optional
+        Small float added to variance to avoid dividing by zero. Default: `1e-3`
+    center: bool, optional
+        If ``True``, add offset of `beta` to normalized tensor. If ``False``, `beta` is ignored.
+        Default: ``True``
+    scale: bool, optional
+        If ``True``, multiply by `gamma`. If ``False``, `gamma` is not used. When the next layer
+        is linear (also e.g. `relu`), this can be disabled since the scaling will be done by
+        the next layer. Default: ``True``
+    beta_initializer: str, optional
+        Initializer for the beta weight. Default: `"zeros"`
+    gamma_initializer: str, optional
+        Initializer for the gamma weight. Default: `"ones"`
+    beta_regularizer: str, optional
+        Optional regularizer for the beta weight. Default: ``None``
+    gamma_regularizer: str, optional
+        Optional regularizer for the gamma weight. Default: ``None``
+    beta_constraint: float, optional
+        Optional constraint for the beta weight. Default: ``None``
+    gamma_constraint: float, optional
+        Optional constraint for the gamma weight. Default: ``None``
+
+    References
+    ----------
+        - Layer Normalization - https://arxiv.org/abs/1607.06450
+
+        - Instance Normalization: The Missing Ingredient for Fast Stylization - \
+        https://arxiv.org/abs/1607.08022
+    """
+    # pylint:disable=too-many-instance-attributes,too-many-arguments
+    def __init__(self,
+                 axis=None,
+                 epsilon=1e-3,
+                 center=True,
+                 scale=True,
+                 beta_initializer="zeros",
+                 gamma_initializer="ones",
+                 beta_regularizer=None,
+                 gamma_regularizer=None,
+                 beta_constraint=None,
+                 gamma_constraint=None,
+                 **kwargs):
+        self.beta = None
+        self.gamma = None
+        super().__init__(**kwargs)
+        self.supports_masking = True
+        self.axis = axis
+        self.epsilon = epsilon
+        self.center = center
+        self.scale = scale
+        self.beta_initializer = keras.initializers.get(beta_initializer)
+        self.gamma_initializer = keras.initializers.get(gamma_initializer)
+        self.beta_regularizer = keras.regularizers.get(beta_regularizer)
+        self.gamma_regularizer = keras.regularizers.get(gamma_regularizer)
+        self.beta_constraint = keras.constraints.get(beta_constraint)
+        self.gamma_constraint = keras.constraints.get(gamma_constraint)
+
+    def build(self, input_shape):
+        """Creates the layer weights.
+
+        Parameters
+        ----------
+        input_shape: tensor
+            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
+            reference for weight shape computations.
+        """
+        ndim = len(input_shape)
+        if self.axis == 0:
+            raise ValueError("Axis cannot be zero")
+
+        if (self.axis is not None) and (ndim == 2):
+            raise ValueError("Cannot specify axis for rank 1 tensor")
+
+        self.input_spec = layers.InputSpec(ndim=ndim)  # noqa:E501  pylint:disable=attribute-defined-outside-init
+
+        if self.axis is None:
+            shape = (1,)
+        else:
+            shape = (input_shape[self.axis],)
+
+        if self.scale:
+            self.gamma = self.add_weight(shape=shape,
+                                         name="gamma",
+                                         initializer=self.gamma_initializer,
+                                         regularizer=self.gamma_regularizer,
+                                         constraint=self.gamma_constraint)
+        else:
+            self.gamma = None
+        if self.center:
+            self.beta = self.add_weight(shape=shape,
+                                        name="beta",
+                                        initializer=self.beta_initializer,
+                                        regularizer=self.beta_regularizer,
+                                        constraint=self.beta_constraint)
+        else:
+            self.beta = None
+        self.built = True  # pylint:disable=attribute-defined-outside-init
+
+    def call(self, inputs, training=None):  # pylint:disable=arguments-differ,unused-argument
+        """This is where the layer's logic lives.
+
+        Parameters
+        ----------
+        inputs: tensor
+            Input tensor, or list/tuple of input tensors
+
+        Returns
+        -------
+        tensor
+            A tensor or list/tuple of tensors
+        """
+        input_shape = K.int_shape(inputs)
+        reduction_axes = list(range(0, len(input_shape)))
+
+        if self.axis is not None:
+            del reduction_axes[self.axis]
+
+        del reduction_axes[0]
+
+        mean = K.mean(inputs, reduction_axes, keepdims=True)
+        stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
+        normed = (inputs - mean) / stddev
+
+        broadcast_shape = [1] * len(input_shape)
+        if self.axis is not None:
+            broadcast_shape[self.axis] = input_shape[self.axis]
+
+        if self.scale:
+            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
+            normed = normed * broadcast_gamma
+        if self.center:
+            broadcast_beta = K.reshape(self.beta, broadcast_shape)
+            normed = normed + broadcast_beta
+        return normed
+
+    def get_config(self):
+        """Returns the config of the layer.
+
+        A layer config is a Python dictionary (serializable) containing the configuration of a
+        layer. The same layer can be reinstated later (without its trained weights) from this
+        configuration.
+
+        The configuration of a layer does not include connectivity information, nor the layer
+        class name. These are handled by `Network` (one layer of abstraction above).
+
+        Returns
+        --------
+        dict
+            A python dictionary containing the layer configuration
+        """
+        config = {
+            "axis": self.axis,
+            "epsilon": self.epsilon,
+            "center": self.center,
+            "scale": self.scale,
+            "beta_initializer": keras.initializers.serialize(self.beta_initializer),
+            "gamma_initializer": keras.initializers.serialize(self.gamma_initializer),
+            "beta_regularizer": keras.regularizers.serialize(self.beta_regularizer),
+            "gamma_regularizer": keras.regularizers.serialize(self.gamma_regularizer),
+            "beta_constraint": keras.constraints.serialize(self.beta_constraint),
+            "gamma_constraint": keras.constraints.serialize(self.gamma_constraint)
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+
+class RMSNormalization(layers.Layer):  # type:ignore[name-defined]
+    """ Root Mean Square Layer Normalization (Biao Zhang, Rico Sennrich, 2019)
+
+    RMSNorm is a simplification of the original layer normalization (LayerNorm). LayerNorm is a
+    regularization technique that might handle the internal covariate shift issue so as to
+    stabilize the layer activations and improve model convergence. It has been proved quite
+    successful in NLP-based model. In some cases, LayerNorm has become an essential component
+    to enable model optimization, such as in the SOTA NMT model Transformer.
+
+    RMSNorm simplifies LayerNorm by removing the mean-centering operation, or normalizing layer
+    activations with RMS statistic.
+
+    Parameters
+    ----------
+    axis: int
+        The axis to normalize across. Typically this is the features axis. The left-out axes are
+        typically the batch axis/axes. This argument defaults to `-1`, the last dimension in the
+        input.
+    epsilon: float, optional
+        Small float added to variance to avoid dividing by zero. Default: `1e-8`
+    partial: float, optional
+        Partial multiplier for calculating pRMSNorm. Valid values are between `0.0` and `1.0`.
+        Setting to `0.0` or `1.0` disables. Default: `0.0`
+    bias: bool, optional
+        Whether to use a bias term for RMSNorm. Disabled by default because RMSNorm does not
+        enforce re-centering invariance. Default ``False``
+    kwargs: dict
+        Standard keras layer kwargs
+
+    References
+    ----------
+        - RMS Normalization - https://arxiv.org/abs/1910.07467
+        - Official implementation - https://github.com/bzhangGo/rmsnorm
+    """
+    def __init__(self, axis=-1, epsilon=1e-8, partial=0.0, bias=False, **kwargs):
+        self.scale = None
+        self.offset = 0
+        super().__init__(**kwargs)
+
+        # Checks
+        if not isinstance(axis, int):
+            raise TypeError(f"Expected an int for the argument 'axis', but received: {axis}")
+
+        if not 0.0 <= partial <= 1.0:
+            raise ValueError(f"partial must be between 0.0 and 1.0, but received {partial}")
+
+        self.axis = axis
+        self.epsilon = epsilon
+        self.partial = partial
+        self.bias = bias
+        self.offset = 0.
+
+    def build(self, input_shape):
+        """ Validate and populate :attr:`axis`
+
+        Parameters
+        ----------
+        input_shape: tensor
+            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
+            reference for weight shape computations.
+        """
+        ndims = len(input_shape)
+        if ndims is None:
+            raise ValueError(f"Input shape {input_shape} has undefined rank.")
+
+        # Resolve negative axis
+        if self.axis < 0:
+            self.axis += ndims
+
+        # Validate axes
+        if self.axis < 0 or self.axis >= ndims:
+            raise ValueError(f"Invalid axis: {self.axis}")
+
+        param_shape = [input_shape[self.axis]]
+        self.scale = self.add_weight(
+            name="scale",
+            shape=param_shape,
+            initializer="ones")
+        if self.bias:
+            self.offset = self.add_weight(
+                name="offset",
+                shape=param_shape,
+                initializer="zeros")
+
+        self.built = True  # pylint:disable=attribute-defined-outside-init
+
+    def call(self, inputs, *args, **kwargs):
+        """ Call Root Mean Square Layer Normalization
+
+        Parameters
+        ----------
+        inputs: tensor
+            Input tensor, or list/tuple of input tensors
+
+        Returns
+        -------
+        tensor
+            A tensor or list/tuple of tensors
+        """
+        # Compute the axes along which to reduce the mean / variance
+        input_shape = K.int_shape(inputs)
+        layer_size = input_shape[self.axis]
+
+        if self.partial in (0.0, 1.0):
+            mean_square = K.mean(K.square(inputs), axis=self.axis, keepdims=True)
+        else:
+            partial_size = int(layer_size * self.partial)
+            partial_x, _ = tf.split(  # pylint:disable=redundant-keyword-arg,no-value-for-parameter
+                inputs,
+                [partial_size, layer_size - partial_size],
+                axis=self.axis)
+            mean_square = K.mean(K.square(partial_x), axis=self.axis, keepdims=True)
+
+        recip_square_root = tf.math.rsqrt(mean_square + self.epsilon)
+        output = self.scale * inputs * recip_square_root + self.offset
+        return output
+
+    def compute_output_shape(self, input_shape):
+        """ The output shape of the layer is the same as the input shape.
+
+        Parameters
+        ----------
+        input_shape: tuple
+            The input shape to the layer
+
+        Returns
+        -------
+        tuple
+            The output shape to the layer
+        """
+        return input_shape
+
+    def get_config(self):
+        """Returns the config of the layer.
+
+        A layer config is a Python dictionary (serializable) containing the configuration of a
+        layer. The same layer can be reinstated later (without its trained weights) from this
+        configuration.
+
+        The configuration of a layer does not include connectivity information, nor the layer
+        class name. These are handled by `Network` (one layer of abstraction above).
+
+        Returns
+        --------
+        dict
+            A python dictionary containing the layer configuration
+        """
+        base_config = super().get_config()
+        config = {"axis": self.axis,
+                  "epsilon": self.epsilon,
+                  "partial": self.partial,
+                  "bias": self.bias}
+        return dict(list(base_config.items()) + list(config.items()))
+
+
 # Update normalization into Keras custom objects
 for name, obj in inspect.getmembers(sys.modules[__name__]):
    if inspect.isclass(obj) and obj.__module__ == __name__:
-        get_custom_objects().update({name: obj})
+        keras.utils.get_custom_objects().update({name: obj})
--- a/lib/model/normalization/init.py
+++ b/lib/model/normalization/init.py
@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-""" Conditional imports depending on whether the AMD version is installed or not """
-
-from lib.utils import get_backend
-from .normalization_common import AdaInstanceNormalization  # noqa
-from .normalization_common import GroupNormalization  # noqa
-from .normalization_common import InstanceNormalization  # noqa
-
-
-if get_backend() == "amd":
-    from .normalization_plaid import LayerNormalization, RMSNormalization  # noqa
-else:
-    from .normalization_tf import LayerNormalization, RMSNormalization  # noqa
--- a/lib/model/normalization/normalization_plaid.py
+++ b/lib/model/normalization/normalization_plaid.py
@ -1,384 +0,0 @@
-#!/usr/bin/env python3
-""" Normalization methods for faceswap.py. """
-
-import sys
-import inspect
-
-from plaidml.op import slice_tensor
-from keras.layers import Layer
-from keras import initializers, regularizers, constraints
-from keras import backend as K
-from keras.utils import get_custom_objects
-
-
-class LayerNormalization(Layer):
-    """Instance normalization layer (Lei Ba et al, 2016). Implementation adapted from
-    tensorflow.keras implementation and https://github.com/CyberZHG/keras-layer-normalization
-
-    Normalize the activations of the previous layer for each given example in a batch
-    independently, rather than across a batch like Batch Normalization. i.e. applies a
-    transformation that maintains the mean activation within each example close to 0 and the
-    activation standard deviation close to 1.
-
-    Parameters
-    ----------
-    axis: int or list/tuple
-        The axis or axes to normalize across. Typically this is the features axis/axes.
-        The left-out axes are typically the batch axis/axes. This argument defaults to `-1`, the
-        last dimension in the input.
-    epsilon: float, optional
-        Small float added to variance to avoid dividing by zero. Default: `1e-3`
-    center: bool, optional
-        If ``True``, add offset of `beta` to normalized tensor. If ``False``, `beta` is ignored.
-        Default: ``True``
-    scale: bool, optional
-        If ``True``, multiply by `gamma`. If ``False``, `gamma` is not used. When the next layer
-        is linear (also e.g. `relu`), this can be disabled since the scaling will be done by
-        the next layer. Default: ``True``
-    beta_initializer: str, optional
-        Initializer for the beta weight. Default: `"zeros"`
-    gamma_initializer: str, optional
-        Initializer for the gamma weight. Default: `"ones"`
-    beta_regularizer: str, optional
-        Optional regularizer for the beta weight. Default: ``None``
-    gamma_regularizer: str, optional
-        Optional regularizer for the gamma weight. Default: ``None``
-    beta_constraint: float, optional
-        Optional constraint for the beta weight. Default: ``None``
-    gamma_constraint: float, optional
-        Optional constraint for the gamma weight. Default: ``None``
-    kwargs: dict
-        Standard keras layer kwargs
-
-    References
-    ----------
-        - Layer Normalization - https://arxiv.org/abs/1607.06450
-        - Keras implementation - https://github.com/CyberZHG/keras-layer-normalization
-    """
-    def __init__(self,
-                 axis=-1,
-                 epsilon=1e-3,
-                 center=True,
-                 scale=True,
-                 beta_initializer="zeros",
-                 gamma_initializer="ones",
-                 beta_regularizer=None,
-                 gamma_regularizer=None,
-                 beta_constraint=None,
-                 gamma_constraint=None,
-                 **kwargs):
-
-        self.gamma = None
-        self.beta = None
-        super().__init__(**kwargs)
-
-        if isinstance(axis, (list, tuple)):
-            self.axis = axis[:]
-        elif isinstance(axis, int):
-            self.axis = axis
-        else:
-            raise TypeError("Expected an int or a list/tuple of ints for the argument 'axis', "
-                            f"but received: {axis}")
-
-        self.epsilon = epsilon
-        self.center = center
-        self.scale = scale
-        self.beta_initializer = initializers.get(beta_initializer)
-        self.gamma_initializer = initializers.get(gamma_initializer)
-        self.beta_regularizer = regularizers.get(beta_regularizer)
-        self.gamma_regularizer = regularizers.get(gamma_regularizer)
-        self.beta_constraint = constraints.get(beta_constraint)
-        self.gamma_constraint = constraints.get(gamma_constraint)
-        self.supports_masking = True
-
-    def build(self, input_shape):
-        """Creates the layer weights.
-
-        Parameters
-        ----------
-        input_shape: tensor
-            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
-            reference for weight shape computations.
-        """
-        ndims = len(input_shape)
-        if ndims is None:
-            raise ValueError(f"Input shape {input_shape} has undefined rank.")
-
-        # Convert axis to list and resolve negatives
-        if isinstance(self.axis, int):
-            self.axis = [self.axis]
-        elif isinstance(self.axis, tuple):
-            self.axis = list(self.axis)
-        for idx, axs in enumerate(self.axis):
-            if axs < 0:
-                self.axis[idx] = ndims + axs
-
-        # Validate axes
-        for axs in self.axis:
-            if axs < 0 or axs >= ndims:
-                raise ValueError(f"Invalid axis: {axs}")
-        if len(self.axis) != len(set(self.axis)):
-            raise ValueError("Duplicate axis: {}".format(tuple(self.axis)))
-
-        param_shape = [input_shape[dim] for dim in self.axis]
-        if self.scale:
-            self.gamma = self.add_weight(
-                name="gamma",
-                shape=param_shape,
-                initializer=self.gamma_initializer,
-                regularizer=self.gamma_regularizer,
-                constraint=self.gamma_constraint)
-        if self.center:
-            self.beta = self.add_weight(
-                name='beta',
-                shape=param_shape,
-                initializer=self.beta_initializer,
-                regularizer=self.beta_regularizer,
-                constraint=self.beta_constraint)
-
-        self.built = True  # pylint:disable=attribute-defined-outside-init
-
-    def call(self, inputs, **kwargs):  # pylint:disable=unused-argument
-        """This is where the layer's logic lives.
-
-        Parameters
-        ----------
-        inputs: tensor
-            Input tensor, or list/tuple of input tensors
-
-        Returns
-        -------
-        tensor
-            A tensor or list/tuple of tensors
-        """
-        # Compute the axes along which to reduce the mean / variance
-        input_shape = K.int_shape(inputs)
-        ndims = len(input_shape)
-
-        # Broadcasting only necessary for norm when the axis is not just the last dimension
-        broadcast_shape = [1] * ndims
-        for dim in self.axis:
-            broadcast_shape[dim] = input_shape[dim]
-
-        def _broadcast(var):
-            if (var is not None and len(var.shape) != ndims and self.axis != [ndims - 1]):
-                return K.reshape(var, broadcast_shape)
-            return var
-
-        # Calculate the moments on the last axis (layer activations).
-        mean = K.mean(inputs, self.axis, keepdims=True)
-        variance = K.mean(K.square(inputs - mean), axis=self.axis, keepdims=True)
-        std = K.sqrt(variance + self.epsilon)
-        outputs = (inputs - mean) / std
-
-        scale, offset = _broadcast(self.gamma), _broadcast(self.beta)
-        if self.scale:
-            outputs *= scale
-        if self.center:
-            outputs *= offset
-
-        return outputs
-
-    def compute_output_shape(self, input_shape):  # pylint:disable=no-self-use
-        """ The output shape of the layer is the same as the input shape.
-
-        Parameters
-        ----------
-        input_shape: tuple
-            The input shape to the layer
-
-        Returns
-        -------
-        tuple
-            The output shape to the layer
-        """
-        return input_shape
-
-    def get_config(self):
-        """Returns the config of the layer.
-
-        A layer config is a Python dictionary (serializable) containing the configuration of a
-        layer. The same layer can be reinstated later (without its trained weights) from this
-        configuration.
-
-        The configuration of a layer does not include connectivity information, nor the layer
-        class name. These are handled by `Network` (one layer of abstraction above).
-
-        Returns
-        --------
-        dict
-            A python dictionary containing the layer configuration
-        """
-        base_config = super().get_config()
-        config = dict(axis=self.axis,
-                      epsilon=self.epsilon,
-                      center=self.center,
-                      scale=self.scale,
-                      beta_initializer=initializers.serialize(self.beta_initializer),
-                      gamma_initializer=initializers.serialize(self.gamma_initializer),
-                      beta_regularizer=regularizers.serialize(self.beta_regularizer),
-                      gamma_regularizer=regularizers.serialize(self.gamma_regularizer),
-                      beta_constraint=constraints.serialize(self.beta_constraint),
-                      gamma_constraint=constraints.serialize(self.gamma_constraint))
-        return dict(list(base_config.items()) + list(config.items()))
-
-
-class RMSNormalization(Layer):
-    """ Root Mean Square Layer Normalization (Biao Zhang, Rico Sennrich, 2019)
-
-    RMSNorm is a simplification of the original layer normalization (LayerNorm). LayerNorm is a
-    regularization technique that might handle the internal covariate shift issue so as to
-    stabilize the layer activations and improve model convergence. It has been proved quite
-    successful in NLP-based model. In some cases, LayerNorm has become an essential component
-    to enable model optimization, such as in the SOTA NMT model Transformer.
-
-    RMSNorm simplifies LayerNorm by removing the mean-centering operation, or normalizing layer
-    activations with RMS statistic.
-
-    Parameters
-    ----------
-    axis: int
-        The axis to normalize across. Typically this is the features axis. The left-out axes are
-        typically the batch axis/axes. This argument defaults to `-1`, the last dimension in the
-        input.
-    epsilon: float, optional
-        Small float added to variance to avoid dividing by zero. Default: `1e-8`
-    partial: float, optional
-        Partial multiplier for calculating pRMSNorm. Valid values are between `0.0` and `1.0`.
-        Setting to `0.0` or `1.0` disables. Default: `0.0`
-    bias: bool, optional
-        Whether to use a bias term for RMSNorm. Disabled by default because RMSNorm does not
-        enforce re-centering invariance. Default ``False``
-    kwargs: dict
-        Standard keras layer kwargs
-
-    References
-    ----------
-        - RMS Normalization - https://arxiv.org/abs/1910.07467
-        - Official implementation - https://github.com/bzhangGo/rmsnorm
-    """
-    def __init__(self, axis=-1, epsilon=1e-8, partial=0.0, bias=False, **kwargs):
-        self.scale = None
-        self.offset = 0
-        super().__init__(**kwargs)
-
-        # Checks
-        if not isinstance(axis, int):
-            raise TypeError(f"Expected an int for the argument 'axis', but received: {axis}")
-
-        if not 0.0 <= partial <= 1.0:
-            raise ValueError(f"partial must be between 0.0 and 1.0, but received {partial}")
-
-        self.axis = axis
-        self.epsilon = epsilon
-        self.partial = partial
-        self.bias = bias
-        self.offset = 0.
-
-    def build(self, input_shape):
-        """ Validate and populate :attr:`axis`
-
-        Parameters
-        ----------
-        input_shape: tensor
-            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
-            reference for weight shape computations.
-        """
-        ndims = len(input_shape)
-        if ndims is None:
-            raise ValueError(f"Input shape {input_shape} has undefined rank.")
-
-        # Resolve negative axis
-        if self.axis < 0:
-            self.axis += ndims
-
-        # Validate axes
-        if self.axis < 0 or self.axis >= ndims:
-            raise ValueError(f"Invalid axis: {self.axis}")
-
-        param_shape = [input_shape[self.axis]]
-        self.scale = self.add_weight(
-            name="scale",
-            shape=param_shape,
-            initializer="ones")
-        if self.bias:
-            self.offset = self.add_weight(
-                name="offset",
-                shape=param_shape,
-                initializer="zeros")
-
-        self.built = True  # pylint:disable=attribute-defined-outside-init
-
-    def call(self, inputs, **kwargs):  # pylint:disable=unused-argument
-        """ Call Root Mean Square Layer Normalization
-
-        Parameters
-        ----------
-        inputs: tensor
-            Input tensor, or list/tuple of input tensors
-
-        Returns
-        -------
-        tensor
-            A tensor or list/tuple of tensors
-        """
-        # Compute the axes along which to reduce the mean / variance
-        input_shape = K.int_shape(inputs)
-        layer_size = input_shape[self.axis]
-
-        if self.partial in (0.0, 1.0):
-            mean_square = K.mean(K.square(inputs), axis=self.axis, keepdims=True)
-        else:
-            partial_size = int(layer_size * self.partial)
-            partial_x = slice_tensor(inputs,
-                                     axes=[self.axis],
-                                     starts=[0],
-                                     ends=[partial_size])
-            mean_square = K.mean(K.square(partial_x), axis=self.axis, keepdims=True)
-
-        recip_square_root = 1. / K.sqrt(mean_square + self.epsilon)
-        output = self.scale * inputs * recip_square_root + self.offset
-        return output
-
-    def compute_output_shape(self, input_shape):  # pylint:disable=no-self-use
-        """ The output shape of the layer is the same as the input shape.
-
-        Parameters
-        ----------
-        input_shape: tuple
-            The input shape to the layer
-
-        Returns
-        -------
-        tuple
-            The output shape to the layer
-        """
-        return input_shape
-
-    def get_config(self):
-        """Returns the config of the layer.
-
-        A layer config is a Python dictionary (serializable) containing the configuration of a
-        layer. The same layer can be reinstated later (without its trained weights) from this
-        configuration.
-
-        The configuration of a layer does not include connectivity information, nor the layer
-        class name. These are handled by `Network` (one layer of abstraction above).
-
-        Returns
-        --------
-        dict
-            A python dictionary containing the layer configuration
-        """
-        base_config = super().get_config()
-        config = dict(axis=self.axis,
-                      epsilon=self.epsilon,
-                      partial=self.partial,
-                      bias=self.bias)
-        return dict(list(base_config.items()) + list(config.items()))
-
-
-# Update normalization into Keras custom objects
-for name, obj in inspect.getmembers(sys.modules[__name__]):
-    if inspect.isclass(obj) and obj.__module__ == __name__:
-        get_custom_objects().update({name: obj})
--- a/lib/model/normalization/normalization_tf.py
+++ b/lib/model/normalization/normalization_tf.py
@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-""" Normalization methods for faceswap.py specific to Tensorflow backend """
-import inspect
-import sys
-
-import tensorflow as tf
-# Ignore linting errors from Tensorflow's thoroughly broken import system
-from tensorflow.keras import backend as K  # pylint:disable=import-error
-from tensorflow.keras.layers import Layer, LayerNormalization  # noqa pylint:disable=no-name-in-module,unused-import,import-error
-from tensorflow.keras.utils import get_custom_objects  # noqa pylint:disable=no-name-in-module,import-error
-
-
-class RMSNormalization(Layer):
-    """ Root Mean Square Layer Normalization (Biao Zhang, Rico Sennrich, 2019)
-
-    RMSNorm is a simplification of the original layer normalization (LayerNorm). LayerNorm is a
-    regularization technique that might handle the internal covariate shift issue so as to
-    stabilize the layer activations and improve model convergence. It has been proved quite
-    successful in NLP-based model. In some cases, LayerNorm has become an essential component
-    to enable model optimization, such as in the SOTA NMT model Transformer.
-
-    RMSNorm simplifies LayerNorm by removing the mean-centering operation, or normalizing layer
-    activations with RMS statistic.
-
-    Parameters
-    ----------
-    axis: int
-        The axis to normalize across. Typically this is the features axis. The left-out axes are
-        typically the batch axis/axes. This argument defaults to `-1`, the last dimension in the
-        input.
-    epsilon: float, optional
-        Small float added to variance to avoid dividing by zero. Default: `1e-8`
-    partial: float, optional
-        Partial multiplier for calculating pRMSNorm. Valid values are between `0.0` and `1.0`.
-        Setting to `0.0` or `1.0` disables. Default: `0.0`
-    bias: bool, optional
-        Whether to use a bias term for RMSNorm. Disabled by default because RMSNorm does not
-        enforce re-centering invariance. Default ``False``
-    kwargs: dict
-        Standard keras layer kwargs
-
-    References
-    ----------
-        - RMS Normalization - https://arxiv.org/abs/1910.07467
-        - Official implementation - https://github.com/bzhangGo/rmsnorm
-    """
-    def __init__(self, axis=-1, epsilon=1e-8, partial=0.0, bias=False, **kwargs):
-        self.scale = None
-        self.offset = 0
-        super().__init__(**kwargs)
-
-        # Checks
-        if not isinstance(axis, int):
-            raise TypeError(f"Expected an int for the argument 'axis', but received: {axis}")
-
-        if not 0.0 <= partial <= 1.0:
-            raise ValueError(f"partial must be between 0.0 and 1.0, but received {partial}")
-
-        self.axis = axis
-        self.epsilon = epsilon
-        self.partial = partial
-        self.bias = bias
-        self.offset = 0.
-
-    def build(self, input_shape):
-        """ Validate and populate :attr:`axis`
-
-        Parameters
-        ----------
-        input_shape: tensor
-            Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to
-            reference for weight shape computations.
-        """
-        ndims = len(input_shape)
-        if ndims is None:
-            raise ValueError(f"Input shape {input_shape} has undefined rank.")
-
-        # Resolve negative axis
-        if self.axis < 0:
-            self.axis += ndims
-
-        # Validate axes
-        if self.axis < 0 or self.axis >= ndims:
-            raise ValueError(f"Invalid axis: {self.axis}")
-
-        param_shape = [input_shape[self.axis]]
-        self.scale = self.add_weight(
-            name="scale",
-            shape=param_shape,
-            initializer="ones")
-        if self.bias:
-            self.offset = self.add_weight(
-                name="offset",
-                shape=param_shape,
-                initializer="zeros")
-
-        self.built = True  # pylint:disable=attribute-defined-outside-init
-
-    def call(self, inputs, **kwargs):  # pylint:disable=unused-argument
-        """ Call Root Mean Square Layer Normalization
-
-        Parameters
-        ----------
-        inputs: tensor
-            Input tensor, or list/tuple of input tensors
-
-        Returns
-        -------
-        tensor
-            A tensor or list/tuple of tensors
-        """
-        # Compute the axes along which to reduce the mean / variance
-        input_shape = K.int_shape(inputs)
-        layer_size = input_shape[self.axis]
-
-        if self.partial in (0.0, 1.0):
-            mean_square = K.mean(K.square(inputs), axis=self.axis, keepdims=True)
-        else:
-            partial_size = int(layer_size * self.partial)
-            partial_x, _ = tf.split(  # pylint:disable=redundant-keyword-arg,no-value-for-parameter
-                inputs,
-                [partial_size, layer_size - partial_size],
-                axis=self.axis)
-            mean_square = K.mean(K.square(partial_x), axis=self.axis, keepdims=True)
-
-        recip_square_root = tf.math.rsqrt(mean_square + self.epsilon)
-        output = self.scale * inputs * recip_square_root + self.offset
-        return output
-
-    def compute_output_shape(self, input_shape):  # pylint:disable=no-self-use
-        """ The output shape of the layer is the same as the input shape.
-
-        Parameters
-        ----------
-        input_shape: tuple
-            The input shape to the layer
-
-        Returns
-        -------
-        tuple
-            The output shape to the layer
-        """
-        return input_shape
-
-    def get_config(self):
-        """Returns the config of the layer.
-
-        A layer config is a Python dictionary (serializable) containing the configuration of a
-        layer. The same layer can be reinstated later (without its trained weights) from this
-        configuration.
-
-        The configuration of a layer does not include connectivity information, nor the layer
-        class name. These are handled by `Network` (one layer of abstraction above).
-
-        Returns
-        --------
-        dict
-            A python dictionary containing the layer configuration
-        """
-        base_config = super().get_config()
-        config = dict(axis=self.axis,
-                      epsilon=self.epsilon,
-                      partial=self.partial,
-                      bias=self.bias)
-        return dict(list(base_config.items()) + list(config.items()))
-
-
-# Update normalization into Keras custom objects
-for name, obj in inspect.getmembers(sys.modules[__name__]):
-    if inspect.isclass(obj) and obj.__module__ == __name__:
-        get_custom_objects().update({name: obj})
--- a/lib/model/optimizers_tf.py
+++ b/lib/model/optimizers_tf.py
@ -1,8 +1,5 @@
 #!/usr/bin/env python3
 """ Custom Optimizers for TensorFlow 2.x/tf.keras """
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function

 import inspect
 import sys
@ -10,8 +7,8 @@ import sys
 import tensorflow as tf

 # Ignore linting errors from Tensorflow's thoroughly broken import system
-from tensorflow.keras.optimizers import (Adam, Nadam, RMSprop)  # noqa pylint:disable=no-name-in-module,unused-import,import-error
-from tensorflow.keras.utils import get_custom_objects  # noqa pylint:disable=no-name-in-module,import-error
+from tensorflow.keras.optimizers import Adam, Nadam, RMSprop  # noqa:E501,F401  pylint:disable=import-error,unused-import
+keras = tf.keras


 class AdaBelief(tf.keras.optimizers.Optimizer):
@ -381,22 +378,22 @@ class AdaBelief(tf.keras.optimizers.Optimizer):
            The optimizer configuration.
        """
        config = super().get_config()
-        config.update(dict(learning_rate=self._serialize_hyperparameter("learning_rate"),
-                           beta_1=self._serialize_hyperparameter("beta_1"),
-                           beta_2=self._serialize_hyperparameter("beta_2"),
-                           decay=self._serialize_hyperparameter("decay"),
-                           weight_decay=self._serialize_hyperparameter("weight_decay"),
-                           sma_threshold=self._serialize_hyperparameter("sma_threshold"),
-                           epsilon=self.epsilon,
-                           amsgrad=self.amsgrad,
-                           rectify=self.rectify,
-                           total_steps=self._serialize_hyperparameter("total_steps"),
-                           warmup_proportion=self._serialize_hyperparameter("warmup_proportion"),
-                           min_lr=self._serialize_hyperparameter("min_lr")))
+        config.update({"learning_rate": self._serialize_hyperparameter("learning_rate"),
+                       "beta_1": self._serialize_hyperparameter("beta_1"),
+                       "beta_2": self._serialize_hyperparameter("beta_2"),
+                       "decay": self._serialize_hyperparameter("decay"),
+                       "weight_decay": self._serialize_hyperparameter("weight_decay"),
+                       "sma_threshold": self._serialize_hyperparameter("sma_threshold"),
+                       "epsilon": self.epsilon,
+                       "amsgrad": self.amsgrad,
+                       "rectify": self.rectify,
+                       "total_steps": self._serialize_hyperparameter("total_steps"),
+                       "warmup_proportion": self._serialize_hyperparameter("warmup_proportion"),
+                       "min_lr": self._serialize_hyperparameter("min_lr")})
        return config


 # Update layers into Keras custom objects
 for _name, obj in inspect.getmembers(sys.modules[__name__]):
    if inspect.isclass(obj) and obj.__module__ == __name__:
-        get_custom_objects().update({_name: obj})
+        keras.utils.get_custom_objects().update({_name: obj})
--- a/lib/model/optimizers_plaid.py
+++ b/lib/model/optimizers_plaid.py
@ -1,156 +0,0 @@
-#!/usr/bin/env python3
-""" Custom Optimizers for PlaidML/Keras 2.2. """
-import inspect
-import sys
-
-from keras import backend as K
-from keras.optimizers import Optimizer, Adam, Nadam, RMSprop  # noqa pylint:disable=unused-import
-from keras.utils import get_custom_objects
-
-
-class AdaBelief(Optimizer):
-    """AdaBelief optimizer.
-
-    Default parameters follow those provided in the original paper.
-
-    Parameters
-    ----------
-    learning_rate: float
-        The learning rate.
-    beta_1: float
-        The exponential decay rate for the 1st moment estimates.
-    beta_2: float
-        The exponential decay rate for the 2nd moment estimates.
-    epsilon: float, optional
-        A small constant for numerical stability. Default: `K.epsilon()`.
-    amsgrad: bool
-        Whether to apply AMSGrad variant of this algorithm from the paper "On the Convergence
-        of Adam and beyond".
-
-    References
-    ----------
-    AdaBelief - A Method for Stochastic Optimization - https://arxiv.org/abs/1412.6980v8
-    On the Convergence of AdaBelief and Beyond - https://openreview.net/forum?id=ryQu7f-RZ
-
-    Adapted from https://github.com/liaoxuanzhi/adabelief
-
-    BSD 2-Clause License
-
-    Copyright (c) 2021, Juntang Zhuang
-    All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are met:
-
-    1. Redistributions of source code must retain the above copyright notice, this
-    list of conditions and the following disclaimer.
-
-    2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    """
-
-    def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
-                 epsilon=None, decay=0., weight_decay=0.0, **kwargs):
-        super().__init__(**kwargs)
-        with K.name_scope(self.__class__.__name__):
-            self.iterations = K.variable(0, dtype='int64', name='iterations')
-            self.lr = K.variable(lr, name='lr')
-            self.beta_1 = K.variable(beta_1, name='beta_1')
-            self.beta_2 = K.variable(beta_2, name='beta_2')
-            self.decay = K.variable(decay, name='decay')
-        if epsilon is None:
-            epsilon = K.epsilon()
-        self.epsilon = float(epsilon)
-        self.initial_decay = decay
-        self.weight_decay = float(weight_decay)
-
-    def get_updates(self, loss, params):  # pylint:disable=too-many-locals
-        """ Get the weight updates
-
-        Parameters
-        ----------
-        loss: list
-            The loss to update
-        params: list
-            The variables
-        """
-        grads = self.get_gradients(loss, params)
-        self.updates = [K.update_add(self.iterations, 1)]
-
-        l_r = self.lr
-        if self.initial_decay > 0:
-            l_r = l_r * (1. / (1. + self.decay * K.cast(self.iterations,
-                                                        K.dtype(self.decay))))
-
-        var_t = K.cast(self.iterations, K.floatx()) + 1
-        # bias correction
-        bias_correction1 = 1. - K.pow(self.beta_1, var_t)
-        bias_correction2 = 1. - K.pow(self.beta_2, var_t)
-
-        m_s = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
-        v_s = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
-
-        self.weights = [self.iterations] + m_s + v_s
-
-        for param, grad, var_m, var_v in zip(params, grads, m_s, v_s):
-            if self.weight_decay != 0.:
-                grad += self.weight_decay * K.stop_gradient(param)
-
-            m_t = (self.beta_1 * var_m) + (1. - self.beta_1) * grad
-            m_corr_t = m_t / bias_correction1
-
-            v_t = (self.beta_2 * var_v) + (1. - self.beta_2) * K.square(grad - m_t) + self.epsilon
-            v_corr_t = K.sqrt(v_t / bias_correction2)
-
-            p_t = param - l_r * m_corr_t / (v_corr_t + self.epsilon)
-
-            self.updates.append(K.update(var_m, m_t))
-            self.updates.append(K.update(var_v, v_t))
-            new_param = p_t
-
-            # Apply constraints.
-            if getattr(param, 'constraint', None) is not None:
-                new_param = param.constraint(new_param)
-
-            self.updates.append(K.update(param, new_param))
-        return self.updates
-
-    def get_config(self):
-        """ Returns the config of the optimizer.
-
-        An optimizer config is a Python dictionary (serializable) containing the configuration of
-        an optimizer. The same optimizer can be re-instantiated later (without any saved state)
-        from this configuration.
-
-        Returns
-        -------
-        dict
-            The optimizer configuration.
-        """
-        config = dict(lr=float(K.get_value(self.lr)),
-                      beta_1=float(K.get_value(self.beta_1)),
-                      beta_2=float(K.get_value(self.beta_2)),
-                      decay=float(K.get_value(self.decay)),
-                      epsilon=self.epsilon,
-                      weight_decay=self.weight_decay)
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-
-# Update layers into Keras custom objects
-for name, obj in inspect.getmembers(sys.modules[__name__]):
-    if inspect.isclass(obj) and obj.__module__ == __name__:
-        get_custom_objects().update({name: obj})
--- a/lib/model/session.py
+++ b/lib/model/session.py
@ -8,15 +8,11 @@ from typing import Callable, ContextManager, List, Optional, Union
 import numpy as np
 import tensorflow as tf

-from lib.utils import get_backend
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import Activation  # pylint:disable=import-error
+from tensorflow.keras.models import load_model as k_load_model, Model  # noqa:E501  # pylint:disable=import-error

-if get_backend() == "amd":
-    from keras.layers import Activation
-    from keras.models import load_model as k_load_model, Model
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import Activation  # noqa pylint:disable=no-name-in-module,import-error
-    from tensorflow.keras.models import load_model as k_load_model, Model  # noqa pylint:disable=no-name-in-module,import-error
+from lib.utils import get_backend

 logger = logging.getLogger(__name__)  # pylint:disable=invalid-name

@ -28,8 +24,7 @@ class KSession():
    actions performed on a model are handled consistently and can be performed in parallel in
    separate threads.

-    This is an early implementation of this class, and should be expanded out over time
-    with relevant `AMD`, `CPU` and `NVIDIA` backend methods.
+    This is an early implementation of this class, and should be expanded out over time.

    Notes
    -----
@ -81,9 +76,7 @@ class KSession():
        """ Get predictions from the model.

        This method is a wrapper for :func:`keras.predict()` function. For Tensorflow backends
-        this is a straight call to the predict function. For PlaidML backends, this attempts
-        to optimize the inference batch sizes to reduce the number of kernels that need to be
-        compiled.
+        this is a straight call to the predict function.

        Parameters
        ----------
@ -100,53 +93,14 @@ class KSession():
        """
        assert self._model is not None
        with self._context:
-            if self._backend == "amd" and batch_size is not None:
-                return self._amd_predict_with_optimized_batchsizes(feed, batch_size)
            return self._model.predict(feed, verbose=0, batch_size=batch_size)

-    def _amd_predict_with_optimized_batchsizes(
-            self,
-            feed: Union[List[np.ndarray], np.ndarray],
-            batch_size: int) -> Union[List[np.ndarray], np.ndarray]:
-        """ Minimizes the amount of kernels to be compiled when using the ``amd`` backend with
-        varying batch sizes while trying to keep the batchsize as high as possible.
-
-        Parameters
-        ----------
-        feed: numpy.ndarray or list
-            The feed to be provided to the model as input. This should be a ``numpy.ndarray``
-            for single inputs or a ``list`` of ``numpy.ndarray`` objects for multiple inputs.
-        batch_size: int
-            The upper batchsize to use.
-        """
-        assert self._model is not None
-        if isinstance(feed, np.ndarray):
-            feed = [feed]
-        items = feed[0].shape[0]
-        done_items = 0
-        results = []
-        while done_items < items:
-            if batch_size < 4:  # Not much difference in BS < 4
-                batch_size = 1
-            batch_items = ((items - done_items) // batch_size) * batch_size
-            if batch_items:
-                pred_data = [x[done_items:done_items + batch_items] for x in feed]
-                pred = self._model.predict(pred_data, batch_size=batch_size)
-                done_items += batch_items
-                results.append(pred)
-            batch_size //= 2
-        if isinstance(results[0], np.ndarray):
-            return np.concatenate(results)
-        return [np.concatenate(x) for x in zip(*results)]
-
    def _set_session(self,
                     allow_growth: bool,
                     exclude_gpus: list,
                     cpu_mode: bool) -> ContextManager:
        """ Sets the backend session options.

-        For AMD backend this does nothing.
-
        For CPU backends, this hides any GPUs from Tensorflow.

        For Nvidia backends, this hides any GPUs that Tensorflow should not use and applies
@ -165,8 +119,6 @@ class KSession():
            ``True`` run the model on CPU. Default: ``False``
        """
        retval = nullcontext()
-        if self._backend == "amd":
-            return retval
        if self._backend == "cpu":
            logger.verbose("Hiding GPUs from Tensorflow")  # type:ignore
            tf.config.set_visible_devices([], "GPU")
@ -201,8 +153,7 @@ class KSession():
        logger.verbose("Initializing plugin model: %s", self._name)  # type:ignore
        with self._context:
            self._model = k_load_model(self._model_path, compile=False, **self._model_kwargs)
-            if self._backend != "amd":
-                self._model.make_predict_function()
+            self._model.make_predict_function()

    def define_model(self, function: Callable) -> None:
        """ Defines a model from the given function.
@ -233,8 +184,7 @@ class KSession():
        assert self._model is not None
        with self._context:
            self._model.load_weights(self._model_path)
-            if self._backend != "amd":
-                self._model.make_predict_function()
+            self._model.make_predict_function()

    def append_softmax_activation(self, layer_index: int = -1) -> None:
        """ Append a softmax activation layer to a model
--- a/lib/plaidml_utils.py
+++ b/lib/plaidml_utils.py
@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-""" PlaidML helper Utilities """
-from typing import Optional
-
-import plaidml
-
-
-def pad(data: plaidml.tile.Value,
-        paddings,
-        mode: str = "CONSTANT",
-        name: Optional[str] = None,  # pylint:disable=unused-argument
-        constant_value: int = 0) -> plaidml.tile.Value:
-    """ PlaidML Pad
-
-    Notes
-    -----
-    Currently only Reflect padding is supported.
-
-    Parameters
-    ----------
-    data :class:`plaidm.tile.Value`
-        The tensor to pad
-    mode: str, optional
-        The padding mode to use. Default: `"CONSTANT"`
-    name: str, optional
-        The name for the operation. Unused but kept for consistency with tf.pad. Default: ``None``
-    constant_value: int, optional
-        The value to pad the Tensor with. Default: `0`
-
-    Returns
-    -------
-    :class:`plaidm.tile.Value`
-        The padded tensor
-    """
-    # TODO: use / implement other padding method when required
-    # CONSTANT -> SpatialPadding ? | Doesn't support first and last axis +
-    #             no support for constant_value
-    # SYMMETRIC -> Requires implement ?
-    if mode.upper() != "REFLECT":
-        raise NotImplementedError("pad only supports mode == 'REFLECT'")
-    if constant_value != 0:
-        raise NotImplementedError("pad does not support constant_value != 0")
-    return plaidml.op.reflection_padding(data, paddings)
-
-
-def is_plaidml_error(error: Exception) -> bool:
-    """ Test whether the given exception is a plaidml Exception.
-
-    Parameters
-    ----------
-    error: :class:`Exception`
-        The generated error
-
-    Returns
-    -------
-    bool
-        ``True`` if the given error has been generated from plaidML otherwise ``False``
-    """
-    return isinstance(error, plaidml.exceptions.PlaidMLError)
--- a/lib/utils.py
+++ b/lib/utils.py
@ -35,7 +35,7 @@ _video_extensions = [  # pylint:disable=invalid-name
    ".avi", ".flv", ".mkv", ".mov", ".mp4", ".mpeg", ".mpg", ".webm", ".wmv",
    ".ts", ".vob"]
 _TF_VERS: Optional[Tuple[int, int]] = None
-ValidBackends = Literal["amd", "nvidia", "cpu", "apple_silicon", "directml", "rocm"]
+ValidBackends = Literal["nvidia", "cpu", "apple_silicon", "directml", "rocm"]


 class _Backend():  # pylint:disable=too-few-public-methods
@ -48,8 +48,7 @@ class _Backend():  # pylint:disable=too-few-public-methods
                                                    "2": "directml",
                                                    "3": "nvidia",
                                                    "4": "apple_silicon",
-                                                    "5": "rocm",
-                                                    "6": "amd"}
+                                                    "5": "rocm"}
        self._valid_backends = list(self._backends.values())
        self._config_file = self._get_config_file()
        self.backend = self._get_backend()
@ -138,7 +137,7 @@ def get_backend() -> ValidBackends:
    Returns
    -------
    str
-        The backend configuration in use by Faceswap. One of  ["amd", "cpu", "directml", "nvidia",
+        The backend configuration in use by Faceswap. One of  ["cpu", "directml", "nvidia", "rocm",
        "apple_silicon"]

    Example
@ -155,7 +154,7 @@ def set_backend(backend: str) -> None:

    Parameters
    ----------
-    backend: ["amd", "cpu", "directml", "nvidia", "apple_silicon"]
+    backend: ["cpu", "directml", "nvidia", "rocm", "apple_silicon"]
        The backend to set faceswap to

    Example
@ -766,7 +765,7 @@ class DebugTimes():
        self._times: Dict[str, List[float]] = {}
        self._steps: Dict[str, float] = {}
        self._interval = 1
-        self._display = dict(min=show_min, mean=show_mean, max=show_max)
+        self._display = {"min": show_min, "mean": show_mean, "max": show_max}

    def step_start(self, name: str, record: bool = True) -> None:
        """ Start the timer for the given step name.
--- a/locales/plugins.extract._config.pot
+++ b/locales/plugins.extract._config.pot
@ -27,7 +27,7 @@ msgstr ""

 #: plugins/extract/_config.py:39
 msgid ""
-"[Nvidia Only]. Enable the Tensorflow GPU `allow_growth` configuration "
+"Enable the Tensorflow GPU `allow_growth` configuration "
 "option. This option prevents Tensorflow from allocating all of the GPU VRAM "
 "at launch but can lead to higher VRAM fragmentation and slower performance. "
 "Should only be enabled if you are having problems running extraction."
--- a/locales/plugins.train._config.pot
+++ b/locales/plugins.train._config.pot
@ -276,7 +276,7 @@ msgstr ""

 #: plugins/train/_config.py:258
 msgid ""
-"[Not PlaidML] Apply AutoClipping to the gradients. AutoClip analyzes the "
+"Apply AutoClipping to the gradients. AutoClip analyzes the "
 "gradient weights and adjusts the normalization value dynamically to fit the "
 "data. Can help prevent NaNs and improve model optimization at the expense of "
 "VRAM. Ref: AutoClip: Adaptive Gradient Clipping for Source Separation "
@ -299,7 +299,7 @@ msgstr ""

 #: plugins/train/_config.py:286
 msgid ""
-"[Nvidia Only]. Enable the Tensorflow GPU 'allow_growth' configuration "
+"Enable the Tensorflow GPU 'allow_growth' configuration "
 "option. This option prevents Tensorflow from allocating all of the GPU VRAM "
 "at launch but can lead to higher VRAM fragmentation and slower performance. "
 "Should only be enabled if you are receiving errors regarding 'cuDNN fails to "
@ -308,7 +308,7 @@ msgstr ""

 #: plugins/train/_config.py:299
 msgid ""
-"[Not PlaidML], NVIDIA GPUs can run operations in float16 faster than in "
+"NVIDIA GPUs can run operations in float16 faster than in "
 "float32. Mixed precision allows you to use a mix of float16 with float32, to "
 "get the performance benefits from float16 and the numeric stability benefits "
 "from float32.\n"
--- a/locales/ru/LC_MESSAGES/plugins.extract._config.po
+++ b/locales/ru/LC_MESSAGES/plugins.extract._config.po
@ -27,12 +27,12 @@ msgstr "настройки"

 #: plugins/extract/_config.py:39
 msgid ""
-"[Nvidia Only]. Enable the Tensorflow GPU `allow_growth` configuration "
+"Enable the Tensorflow GPU `allow_growth` configuration "
 "option. This option prevents Tensorflow from allocating all of the GPU VRAM "
 "at launch but can lead to higher VRAM fragmentation and slower performance. "
 "Should only be enabled if you are having problems running extraction."
 msgstr ""
-"[Только для Nvidia]. Включите опцию конфигурации Tensorflow GPU "
+"Включите опцию конфигурации Tensorflow GPU "
 "`allow_growth`. Эта опция не позволяет Tensorflow выделять всю видеопамять "
 "видеокарты при запуске, но может привести к повышенной фрагментации "
 "видеопамяти и снижению производительности. Следует включать только в том "
--- a/locales/ru/LC_MESSAGES/plugins.train._config.mo
+++ b/locales/ru/LC_MESSAGES/plugins.train._config.mo
--- a/locales/ru/LC_MESSAGES/plugins.train._config.po
+++ b/locales/ru/LC_MESSAGES/plugins.train._config.po
@ -8,7 +8,7 @@ msgstr ""
 "Project-Id-Version: \n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2023-06-11 23:20+0100\n"
-"PO-Revision-Date: 2023-06-12 21:23+0700\n"
+"PO-Revision-Date: 2023-06-20 17:06+0100\n"
 "Last-Translator: \n"
 "Language-Team: \n"
 "Language: ru_RU\n"
@ -437,18 +437,17 @@ msgstr ""

 #: plugins/train/_config.py:258
 msgid ""
-"[Not PlaidML] Apply AutoClipping to the gradients. AutoClip analyzes the "
-"gradient weights and adjusts the normalization value dynamically to fit the "
-"data. Can help prevent NaNs and improve model optimization at the expense of "
-"VRAM. Ref: AutoClip: Adaptive Gradient Clipping for Source Separation "
-"Networks https://arxiv.org/abs/2007.14469"
+"Apply AutoClipping to the gradients. AutoClip analyzes the gradient weights "
+"and adjusts the normalization value dynamically to fit the data. Can help "
+"prevent NaNs and improve model optimization at the expense of VRAM. Ref: "
+"AutoClip: Adaptive Gradient Clipping for Source Separation Networks https://"
+"arxiv.org/abs/2007.14469"
 msgstr ""
-"[Не для PlaidML] Применить AutoClipping к градиентам. AutoClip анализирует "
-"веса градиентов и динамически корректирует значение нормализации, чтобы оно "
-"подходило к данным. Может помочь избежать NaN('не число') и улучшить "
-"оптимизацию модели ценой видеопамяти. Ссылка: AutoClip: Adaptive Gradient "
-"Clipping for Source Separation Networks [ТОЛЬКО на английском] https://arxiv."
-"org/abs/2007.14469"
+"Применить AutoClipping к градиентам. AutoClip анализирует веса градиентов и "
+"динамически корректирует значение нормализации, чтобы оно подходило к "
+"данным. Может помочь избежать NaN('не число') и улучшить оптимизацию модели "
+"ценой видеопамяти. Ссылка: AutoClip: Adaptive Gradient Clipping for Source "
+"Separation Networks [ТОЛЬКО на английском] https://arxiv.org/abs/2007.14469"

 #: plugins/train/_config.py:271 plugins/train/_config.py:283
 #: plugins/train/_config.py:297 plugins/train/_config.py:314
@ -471,11 +470,11 @@ msgstr ""

 #: plugins/train/_config.py:286
 msgid ""
-"[Nvidia Only]. Enable the Tensorflow GPU 'allow_growth' configuration "
-"option. This option prevents Tensorflow from allocating all of the GPU VRAM "
-"at launch but can lead to higher VRAM fragmentation and slower performance. "
-"Should only be enabled if you are receiving errors regarding 'cuDNN fails to "
-"initialize' when commencing training."
+"Enable the Tensorflow GPU 'allow_growth' configuration option. This option "
+"prevents Tensorflow from allocating all of the GPU VRAM at launch but can "
+"lead to higher VRAM fragmentation and slower performance. Should only be "
+"enabled if you are receiving errors regarding 'cuDNN fails to initialize' "
+"when commencing training."
 msgstr ""
 "[Только для Nvidia]. Включите опцию конфигурации Tensorflow GPU "
 "`allow_growth`. Эта опция не позволяет Tensorflow выделять всю видеопамять "
@ -486,10 +485,10 @@ msgstr ""

 #: plugins/train/_config.py:299
 msgid ""
-"[Not PlaidML], NVIDIA GPUs can run operations in float16 faster than in "
-"float32. Mixed precision allows you to use a mix of float16 with float32, to "
-"get the performance benefits from float16 and the numeric stability benefits "
-"from float32.\n"
+"NVIDIA GPUs can run operations in float16 faster than in float32. Mixed "
+"precision allows you to use a mix of float16 with float32, to get the "
+"performance benefits from float16 and the numeric stability benefits from "
+"float32.\n"
 "\n"
 "This is untested on DirectML backend, but will run on most Nvidia models. it "
 "will only speed up training on more recent GPUs. Those with compute "
@ -499,10 +498,10 @@ msgid ""
 "savings can enable some speedups. Generally RTX GPUs and later will offer "
 "the most benefit."
 msgstr ""
-"[Не для PlaidML], Видеокарты от NVIDIA могут оперировать в 'float16' "
-"быстрее, чем в 'float32'. Смешанная точность позволяет вам использовать микс "
-"float16 с float32, чтобы получить улучшение производительности от float16 и "
-"числовую стабильность от float32.\n"
+"Видеокарты от NVIDIA могут оперировать в 'float16' быстрее, чем в 'float32'. "
+"Смешанная точность позволяет вам использовать микс float16 с float32, чтобы "
+"получить улучшение производительности от float16 и числовую стабильность от "
+"float32.\n"
 "\n"
 "Это не было проверено на DirectML, но будет работать на большенстве моделей "
 "Nvidia. Оно только ускорит тренировку на более недавних видеокартах. Те, что "
--- a/plugins/extract/_config.py
+++ b/plugins/extract/_config.py
@ -11,7 +11,7 @@ from lib.config import FaceswapConfig
 _LANG = gettext.translation("plugins.extract._config", localedir="locales", fallback=True)
 _ = _LANG.gettext

-logger = logging.getLogger(__name__) 
+logger = logging.getLogger(__name__)


 class Config(FaceswapConfig):
@ -36,7 +36,7 @@ class Config(FaceswapConfig):
            datatype=bool,
            default=False,
            group=_("settings"),
-            info=_("[Nvidia Only]. Enable the Tensorflow GPU `allow_growth` configuration option. "
+            info=_("Enable the Tensorflow GPU `allow_growth` configuration option. "
                   "This option prevents Tensorflow from allocating all of the GPU VRAM at launch "
                   "but can lead to higher VRAM fragmentation and slower performance. Should only "
                   "be enabled if you are having problems running extraction."))
--- a/plugins/extract/align/_base/aligner.py
+++ b/plugins/extract/align/_base/aligner.py
@ -24,7 +24,7 @@ import numpy as np

 from tensorflow.python.framework import errors_impl as tf_errors  # pylint:disable=no-name-in-module # noqa

-from lib.utils import get_backend, FaceswapError
+from lib.utils import FaceswapError
 from plugins.extract._base import BatchType, Extractor, ExtractMedia, ExtractorBatch
 from .processing import AlignedFilter, ReAlign

@ -547,23 +547,6 @@ class Aligner(Extractor):  # pylint:disable=abstract-method
                   "CLI: Edit the file faceswap/config/extract.ini)."
                   "\n3) Enable 'Single Process' mode.")
            raise FaceswapError(msg) from err
-        except Exception as err:
-            if get_backend() == "amd":
-                # pylint:disable=import-outside-toplevel
-                from lib.plaidml_utils import is_plaidml_error
-                if (is_plaidml_error(err) and (
-                        "CL_MEM_OBJECT_ALLOCATION_FAILURE" in str(err).upper() or
-                        "enough memory for the current schedule" in str(err).lower())):
-                    msg = ("You do not have enough GPU memory available to run detection at "
-                           "the selected batch size. You can try a number of things:"
-                           "\n1) Close any other application that is using your GPU (web "
-                           "browsers are particularly bad for this)."
-                           "\n2) Lower the batchsize (the amount of images fed into the "
-                           "model) by editing the plugin settings (GUI: Settings > Configure "
-                           "extract settings, CLI: Edit the file "
-                           "faceswap/config/extract.ini).")
-                    raise FaceswapError(msg) from err
-            raise

    def _process_refeeds(self, batch: AlignerBatch) -> List[AlignerBatch]:
        """ Process the output for each selected re-feed
--- a/plugins/extract/detect/_base.py
+++ b/plugins/extract/detect/_base.py
@ -25,7 +25,7 @@ import numpy as np
 from tensorflow.python.framework import errors_impl as tf_errors  # pylint:disable=no-name-in-module # noqa

 from lib.align import DetectedFace
-from lib.utils import get_backend, FaceswapError
+from lib.utils import FaceswapError

 from plugins.extract._base import BatchType, Extractor, ExtractorBatch
 from plugins.extract.pipeline import ExtractMedia
@ -295,23 +295,6 @@ class Detector(Extractor):  # pylint:disable=abstract-method
                       "CLI: Edit the file faceswap/config/extract.ini)."
                       "\n3) Enable 'Single Process' mode.")
                raise FaceswapError(msg) from err
-            except Exception as err:
-                if get_backend() == "amd":
-                    # pylint:disable=import-outside-toplevel
-                    from lib.plaidml_utils import is_plaidml_error
-                    if (is_plaidml_error(err) and (
-                            "CL_MEM_OBJECT_ALLOCATION_FAILURE" in str(err).upper() or
-                            "enough memory for the current schedule" in str(err).lower())):
-                        msg = ("You do not have enough GPU memory available to run detection at "
-                               "the selected batch size. You can try a number of things:"
-                               "\n1) Close any other application that is using your GPU (web "
-                               "browsers are particularly bad for this)."
-                               "\n2) Lower the batchsize (the amount of images fed into the "
-                               "model) by editing the plugin settings (GUI: Settings > Configure "
-                               "extract settings, CLI: Edit the file "
-                               "faceswap/config/extract.ini).")
-                        raise FaceswapError(msg) from err
-                raise

            if angle != 0 and any(face.any() for face in batch.prediction):
                logger.verbose("found face(s) by rotating image %s "  # type:ignore[attr-defined]
--- a/plugins/extract/detect/mtcnn.py
+++ b/plugins/extract/detect/mtcnn.py
@ -1,22 +1,19 @@
 #!/usr/bin/env python3
 """ MTCNN Face detection plugin """
-from __future__ import absolute_import, division, print_function
+from __future__ import annotations
 import logging
-from typing import Dict, List, Optional, Tuple, Union
+import typing as T

 import cv2
 import numpy as np

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D, Permute, PReLU  # noqa:E501  # pylint:disable=import-error
+
 from lib.model.session import KSession
-from lib.utils import get_backend
 from ._base import BatchType, Detector

-if get_backend() == "amd":
-    from keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D, Permute, PReLU
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D, Permute, PReLU  # noqa pylint:disable=no-name-in-module,import-error
+if T.TYPE_CHECKING:
    from tensorflow import Tensor

 logger = logging.getLogger(__name__)
@ -37,7 +34,7 @@ class Detect(Detector):
        self.kwargs = self._validate_kwargs()
        self.color_format = "RGB"

-    def _validate_kwargs(self) -> Dict[str, Union[int, float, List[float]]]:
+    def _validate_kwargs(self) -> T.Dict[str, T.Union[int, float, T.List[float]]]:
        """ Validate that config options are correct. If not reset to default """
        valid = True
        threshold = [self.config["threshold_1"],
@ -167,7 +164,7 @@ class PNet(KSession):
    def __init__(self,
                 model_path: str,
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]],
+                 exclude_gpus: T.Optional[T.List[int]],
                 cpu_mode: bool,
                 input_size: int,
                 min_size: int,
@ -188,10 +185,10 @@ class PNet(KSession):
        self._pnet_scales = self._calculate_scales(min_size, factor)
        self._pnet_sizes = [(int(input_size * scale), int(input_size * scale))
                            for scale in self._pnet_scales]
-        self._pnet_input: Optional[List[np.ndarray]] = None
+        self._pnet_input: T.Optional[T.List[np.ndarray]] = None

    @staticmethod
-    def model_definition() -> Tuple[List[Tensor], List[Tensor]]:
+    def model_definition() -> T.Tuple[T.List[Tensor], T.List[Tensor]]:
        """ Keras P-Network Definition for MTCNN """
        input_ = Input(shape=(None, None, 3))
        var_x = Conv2D(10, (3, 3), strides=1, padding='valid', name='conv1')(input_)
@ -207,7 +204,7 @@ class PNet(KSession):

    def _calculate_scales(self,
                          minsize: int,
-                          factor: float) -> List[float]:
+                          factor: float) -> T.List[float]:
        """ Calculate multi-scale

        Parameters
@ -234,7 +231,7 @@ class PNet(KSession):
        logger.trace(scales)  # type:ignore
        return scales

-    def __call__(self, images: np.ndarray) -> List[np.ndarray]:
+    def __call__(self, images: np.ndarray) -> T.List[np.ndarray]:
        """ first stage - fast proposal network (p-net) to obtain face candidates

        Parameters
@ -248,8 +245,8 @@ class PNet(KSession):
            List of face candidates from P-Net
        """
        batch_size = images.shape[0]
-        rectangles: List[List[List[Union[int, float]]]] = [[] for _ in range(batch_size)]
-        scores: List[List[np.ndarray]] = [[] for _ in range(batch_size)]
+        rectangles: T.List[T.List[T.List[T.Union[int, float]]]] = [[] for _ in range(batch_size)]
+        scores: T.List[T.List[np.ndarray]] = [[] for _ in range(batch_size)]

        if self._pnet_input is None:
            self._pnet_input = [np.empty((batch_size, rheight, rwidth, 3), dtype="float32")
@ -281,7 +278,7 @@ class PNet(KSession):
                           class_probabilities: np.ndarray,
                           roi: np.ndarray,
                           size: int,
-                           scale: float) -> Tuple[np.ndarray, np.ndarray]:
+                           scale: float) -> T.Tuple[np.ndarray, np.ndarray]:
        """ Detect face position and calibrate bounding box on 12net feature map(matrix version)

        Parameters
@ -347,7 +344,7 @@ class RNet(KSession):
    def __init__(self,
                 model_path: str,
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]],
+                 exclude_gpus: T.Optional[T.List[int]],
                 cpu_mode: bool,
                 input_size: int,
                 threshold: float) -> None:
@ -363,7 +360,7 @@ class RNet(KSession):
        self._threshold = threshold

    @staticmethod
-    def model_definition() -> Tuple[List[Tensor], List[Tensor]]:
+    def model_definition() -> T.Tuple[T.List[Tensor], T.List[Tensor]]:
        """ Keras R-Network Definition for MTCNN """
        input_ = Input(shape=(24, 24, 3))
        var_x = Conv2D(28, (3, 3), strides=1, padding='valid', name='conv1')(input_)
@ -386,8 +383,8 @@ class RNet(KSession):

    def __call__(self,
                 images: np.ndarray,
-                 rectangle_batch: List[np.ndarray],
-                 ) -> List[np.ndarray]:
+                 rectangle_batch: T.List[np.ndarray],
+                 ) -> T.List[np.ndarray]:
        """ second stage - refinement of face candidates with r-net

        Parameters
@ -402,7 +399,7 @@ class RNet(KSession):
        List
            List of :class:`numpy.ndarray` refined face candidates from R-Net
        """
-        ret: List[np.ndarray] = []
+        ret: T.List[np.ndarray] = []
        for idx, (rectangles, image) in enumerate(zip(rectangle_batch, images)):
            if not np.any(rectangles):
                ret.append(np.array([]))
@ -415,8 +412,7 @@ class RNet(KSession):
                            dst=feed_batch[idx])
                 for idx, rect in enumerate(rectangles)]

-            cls_prob, roi_prob = self.predict(feed_batch,
-                                              batch_size=128 if get_backend() == "amd" else None)
+            cls_prob, roi_prob = self.predict(feed_batch)
            ret.append(self._filter_face_24net(cls_prob, roi_prob, rectangles))
        return ret

@ -478,7 +474,7 @@ class ONet(KSession):
    def __init__(self,
                 model_path: str,
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]],
+                 exclude_gpus: T.Optional[T.List[int]],
                 cpu_mode: bool,
                 input_size: int,
                 threshold: float) -> None:
@ -494,7 +490,7 @@ class ONet(KSession):
        self._threshold = threshold

    @staticmethod
-    def model_definition() -> Tuple[List[Tensor], List[Tensor]]:
+    def model_definition() -> T.Tuple[T.List[Tensor], T.List[Tensor]]:
        """ Keras O-Network for MTCNN """
        input_ = Input(shape=(48, 48, 3))
        var_x = Conv2D(32, (3, 3), strides=1, padding='valid', name='conv1')(input_)
@ -520,8 +516,8 @@ class ONet(KSession):

    def __call__(self,
                 images: np.ndarray,
-                 rectangle_batch: List[np.ndarray]
-                 ) -> List[Tuple[np.ndarray, np.ndarray]]:
+                 rectangle_batch: T.List[np.ndarray]
+                 ) -> T.List[T.Tuple[np.ndarray, np.ndarray]]:
        """ Third stage - further refinement and facial landmarks positions with o-net

        Parameters
@ -536,7 +532,7 @@ class ONet(KSession):
        List
            List of refined final candidates, scores and landmark points from O-Net
        """
-        ret: List[Tuple[np.ndarray, np.ndarray]] = []
+        ret: T.List[T.Tuple[np.ndarray, np.ndarray]] = []
        for idx, rectangles in enumerate(rectangle_batch):
            if not np.any(rectangles):
                ret.append((np.empty((0, 5)), np.empty(0)))
@ -549,16 +545,14 @@ class ONet(KSession):
                            dst=feed_batch[idx])
                 for idx, rect in enumerate(rectangles)]

-            cls_probs, roi_probs, pts_probs = self.predict(
-                feed_batch,
-                batch_size=128 if get_backend() == "amd" else None)
+            cls_probs, roi_probs, pts_probs = self.predict(feed_batch)
            ret.append(self._filter_face_48net(cls_probs, roi_probs, pts_probs, rectangles))
        return ret

    def _filter_face_48net(self, class_probabilities: np.ndarray,
                           roi: np.ndarray,
                           points: np.ndarray,
-                           rectangles: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+                           rectangles: np.ndarray) -> T.Tuple[np.ndarray, np.ndarray]:
        """ Filter face position and calibrate bounding box on 12net's output

        Parameters
@ -629,13 +623,13 @@ class MTCNN():  # pylint: disable=too-few-public-methods
        Default: `0.709`
    """
    def __init__(self,
-                 model_path: List[str],
+                 model_path: T.List[str],
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]],
+                 exclude_gpus: T.Optional[T.List[int]],
                 cpu_mode: bool,
                 input_size: int = 640,
                 minsize: int = 20,
-                 threshold: Optional[List[float]] = None,
+                 threshold: T.Optional[T.List[float]] = None,
                 factor: float = 0.709) -> None:
        logger.debug("Initializing: %s: (model_path: '%s', allow_growth: %s, exclude_gpus: %s, "
                     "input_size: %s, minsize: %s, threshold: %s, factor: %s)",
@ -666,7 +660,7 @@ class MTCNN():  # pylint: disable=too-few-public-methods

        logger.debug("Initialized: %s", self.__class__.__name__)

-    def detect_faces(self, batch: np.ndarray) -> Tuple[np.ndarray, Tuple[np.ndarray]]:
+    def detect_faces(self, batch: np.ndarray) -> T.Tuple[np.ndarray, T.Tuple[np.ndarray]]:
        """Detects faces in an image, and returns bounding boxes and points for them.

        Parameters
@ -690,7 +684,7 @@ class MTCNN():  # pylint: disable=too-few-public-methods
 def nms(rectangles: np.ndarray,
        scores: np.ndarray,
        threshold: float,
-        method: str = "iom") -> Tuple[np.ndarray, np.ndarray]:
+        method: str = "iom") -> T.Tuple[np.ndarray, np.ndarray]:
    """ apply non-maximum suppression on ROIs in same scale(matrix version)

    Parameters
--- a/plugins/extract/detect/mtcnn_defaults.py
+++ b/plugins/extract/detect/mtcnn_defaults.py
@ -90,7 +90,7 @@ _DEFAULTS = {
    ),
    "cpu": dict(
        default=True,
-        info="[Not PlaidML] MTCNN detector still runs fairly quickly on CPU on some setups. "
+        info="MTCNN detector still runs fairly quickly on CPU on some setups. "
             "Enable CPU mode here to use the CPU for this detector to save some VRAM at a speed "
             "cost.",
        datatype=bool,
--- a/plugins/extract/detect/s3fd.py
+++ b/plugins/extract/detect/s3fd.py
@ -5,27 +5,23 @@ https://arxiv.org/abs/1708.05237
 Adapted from S3FD Port in FAN:
 https://github.com/1adrianb/face-alignment
 """
+from __future__ import annotations
 import logging
-from typing import List, Optional, Tuple
+import typing as T

 from scipy.special import logsumexp
 import numpy as np

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow import keras
+from tensorflow.keras import backend as K  # pylint:disable=import-error
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    Concatenate, Conv2D, Input, Maximum, MaxPooling2D, ZeroPadding2D)
+
 from lib.model.session import KSession
-from lib.utils import get_backend
 from ._base import BatchType, Detector

-if get_backend() == "amd":
-    import keras
-    from keras import backend as K
-    from keras.layers import Concatenate, Conv2D, Input, Maximum, MaxPooling2D, ZeroPadding2D
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow import keras
-    from tensorflow.keras import backend as K  # pylint:disable=import-error
-    from tensorflow.keras.layers import (  # pylint:disable=no-name-in-module,import-error
-        Concatenate, Conv2D, Input, Maximum, MaxPooling2D, ZeroPadding2D)
+if T.TYPE_CHECKING:
    from tensorflow import Tensor

 logger = logging.getLogger(__name__)
@ -48,7 +44,7 @@ class Detect(Detector):
        """ Initialize S3FD Model"""
        assert isinstance(self.model_path, str)
        confidence = self.config["confidence"] / 100
-        model_kwargs = dict(custom_objects=dict(L2Norm=L2Norm, SliceO2K=SliceO2K))
+        model_kwargs = {"custom_objects": {"L2Norm": L2Norm, "SliceO2K": SliceO2K}}
        self.model = S3fd(self.model_path,
                          model_kwargs,
                          self.config["allow_growth"],
@ -129,10 +125,10 @@ class L2Norm(keras.layers.Layer):
 class SliceO2K(keras.layers.Layer):
    """ Custom Keras Slice layer generated by onnx2keras. """
    def __init__(self,
-                 starts: List[int],
-                 ends: List[int],
-                 axes: Optional[List[int]] = None,
-                 steps: Optional[List[int]] = None,
+                 starts: T.List[int],
+                 ends: T.List[int],
+                 axes: T.Optional[T.List[int]] = None,
+                 steps: T.Optional[T.List[int]] = None,
                 **kwargs) -> None:
        self._starts = starts
        self._ends = ends
@ -140,7 +136,7 @@ class SliceO2K(keras.layers.Layer):
        self._steps = steps
        super().__init__(**kwargs)

-    def _get_slices(self, dimensions: int) -> List[Tuple[int, ...]]:
+    def _get_slices(self, dimensions: int) -> T.List[T.Tuple[int, ...]]:
        """ Obtain slices for the given number of dimensions.

        Parameters
@ -158,7 +154,7 @@ class SliceO2K(keras.layers.Layer):
        assert len(axes) == len(steps) == len(self._starts) == len(self._ends)
        return list(zip(axes, self._starts, self._ends, steps))

-    def compute_output_shape(self, input_shape: Tuple[int, ...]) -> Tuple[int, ...]:
+    def compute_output_shape(self, input_shape: T.Tuple[int, ...]) -> T.Tuple[int, ...]:
        """Computes the output shape of the layer.

        Assumes that the layer will be built to match that input shape provided.
@ -234,7 +230,7 @@ class S3fd(KSession):
                 model_path: str,
                 model_kwargs: dict,
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]],
+                 exclude_gpus: T.Optional[T.List[int]],
                 confidence: float) -> None:
        logger.debug("Initializing: %s: (model_path: '%s', model_kwargs: %s, allow_growth: %s, "
                     "exclude_gpus: %s, confidence: %s)", self.__class__.__name__, model_path,
@ -250,7 +246,7 @@ class S3fd(KSession):
        self.average_img = np.array([104.0, 117.0, 123.0])
        logger.debug("Initialized: %s", self.__class__.__name__)

-    def model_definition(self) -> Tuple[List[Tensor], List[Tensor]]:
+    def model_definition(self) -> T.Tuple[T.List[Tensor], T.List[Tensor]]:
        """ Keras S3FD Model Definition, adapted from FAN pytorch implementation. """
        input_ = Input(shape=(640, 640, 3))
        var_x = self.conv_block(input_, 64, 1, 2)
@ -400,7 +396,7 @@ class S3fd(KSession):
        batch = batch - self.average_img
        return batch

-    def finalize_predictions(self, bounding_boxes_scales: List[np.ndarray]) -> np.ndarray:
+    def finalize_predictions(self, bounding_boxes_scales: T.List[np.ndarray]) -> np.ndarray:
        """ Process the output from the model to obtain faces

        Parameters
@ -417,7 +413,7 @@ class S3fd(KSession):
            ret.append(finallist)
        return np.array(ret, dtype="object")

-    def _post_process(self, bboxlist: List[np.ndarray]) -> np.ndarray:
+    def _post_process(self, bboxlist: T.List[np.ndarray]) -> np.ndarray:
        """ Perform post processing on output
            TODO: do this on the batch.
        """
--- a/plugins/extract/mask/_base.py
+++ b/plugins/extract/mask/_base.py
@ -22,7 +22,7 @@ import numpy as np
 from tensorflow.python.framework import errors_impl as tf_errors  # pylint:disable=no-name-in-module  # noqa

 from lib.align import AlignedFace, transform_image
-from lib.utils import get_backend, FaceswapError
+from lib.utils import FaceswapError
 from plugins.extract._base import BatchType, Extractor, ExtractorBatch, ExtractMedia

 if TYPE_CHECKING:
@ -222,23 +222,6 @@ class Masker(Extractor):  # pylint:disable=abstract-method
                   "CLI: Edit the file faceswap/config/extract.ini)."
                   "\n3) Enable 'Single Process' mode.")
            raise FaceswapError(msg) from err
-        except Exception as err:
-            if get_backend() == "amd":
-                # pylint:disable=import-outside-toplevel
-                from lib.plaidml_utils import is_plaidml_error
-                if (is_plaidml_error(err) and (
-                        "CL_MEM_OBJECT_ALLOCATION_FAILURE" in str(err).upper() or
-                        "enough memory for the current schedule" in str(err).lower())):
-                    msg = ("You do not have enough GPU memory available to run detection at "
-                           "the selected batch size. You can try a number of things:"
-                           "\n1) Close any other application that is using your GPU (web "
-                           "browsers are particularly bad for this)."
-                           "\n2) Lower the batchsize (the amount of images fed into the "
-                           "model) by editing the plugin settings (GUI: Settings > Configure "
-                           "extract settings, CLI: Edit the file "
-                           "faceswap/config/extract.ini).")
-                    raise FaceswapError(msg) from err
-            raise

    def finalize(self, batch: BatchType) -> Generator[ExtractMedia, None, None]:
        """ Finalize the output from Masker
--- a/plugins/extract/mask/bisenet_fp.py
+++ b/plugins/extract/mask/bisenet_fp.py
@ -4,28 +4,23 @@
 Architecture and Pre-Trained Model ported from PyTorch to Keras by TorzDF from
 https://github.com/zllrunning/face-parsing.PyTorch
 """
+from __future__ import annotations
 import logging
-from typing import cast, List, Optional, Tuple
+import typing as T

 import numpy as np

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras import backend as K  # pylint:disable=import-error
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    Activation, Add, BatchNormalization, Concatenate, Conv2D, GlobalAveragePooling2D, Input,
+    MaxPooling2D, Multiply, Reshape, UpSampling2D, ZeroPadding2D)
+
 from lib.model.session import KSession
-from lib.utils import get_backend
 from plugins.extract._base import _get_config
 from ._base import BatchType, Masker, MaskerBatch

-if get_backend() == "amd":
-    from keras import backend as K
-    from keras.layers import (
-        Activation, Add, BatchNormalization, Concatenate, Conv2D, GlobalAveragePooling2D, Input,
-        MaxPooling2D, Multiply, Reshape, UpSampling2D, ZeroPadding2D)
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import backend as K  # pylint:disable=import-error
-    from tensorflow.keras.layers import (  # pylint:disable=no-name-in-module,import-error
-        Activation, Add, BatchNormalization, Concatenate, Conv2D, GlobalAveragePooling2D, Input,
-        MaxPooling2D, Multiply, Reshape, UpSampling2D, ZeroPadding2D)
+if T.TYPE_CHECKING:
    from tensorflow import Tensor

 logger = logging.getLogger(__name__)
@ -54,7 +49,7 @@ class Mask(Masker):
        # Separate storage for face and head masks
        self._storage_name = f"{self._storage_name}_{self._storage_centering}"

-    def _check_weights_selection(self, configfile: Optional[str]) -> Tuple[bool, int]:
+    def _check_weights_selection(self, configfile: T.Optional[str]) -> T.Tuple[bool, int]:
        """ Check which weights have been selected.

        This is required for passing along the correct file name for the corresponding weights
@ -78,7 +73,7 @@ class Mask(Masker):
        version = 1 if not is_faceswap else 2 if config.get("include_hair") else 3
        return is_faceswap, version

-    def _get_segment_indices(self) -> List[int]:
+    def _get_segment_indices(self) -> T.List[int]:
        """ Obtain the segment indices to include within the face mask area based on user
        configuration settings.

@ -129,7 +124,7 @@ class Mask(Masker):
        mean = (0.384, 0.314, 0.279) if self._is_faceswap else (0.485, 0.456, 0.406)
        std = (0.324, 0.286, 0.275) if self._is_faceswap else (0.229, 0.224, 0.225)

-        batch.feed = ((np.array([cast(np.ndarray, feed.face)[..., :3]
+        batch.feed = ((np.array([T.cast(np.ndarray, feed.face)[..., :3]
                                 for feed in batch.feed_faces],
                                dtype="float32") / 255.0) - mean) / std
        logger.trace("feed shape: %s", batch.feed.shape)  # type:ignore
@ -168,7 +163,7 @@ class Mask(Masker):
 # SOFTWARE.


-_NAME_TRACKER = set()
+_NAME_TRACKER: T.Set[str] = set()


 def _get_name(name: str, start_idx: int = 1) -> str:
@ -559,7 +554,7 @@ class BiSeNet(KSession):
    def __init__(self,
                 model_path: str,
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]],
+                 exclude_gpus: T.Optional[T.List[int]],
                 input_size: int,
                 num_classes: int,
                 cpu_mode: bool) -> None:
@ -574,7 +569,7 @@ class BiSeNet(KSession):
        self.define_model(self._model_definition)
        self.load_model_weights()

-    def _model_definition(self) -> Tuple[Tensor, List[Tensor]]:
+    def _model_definition(self) -> T.Tuple[Tensor, T.List[Tensor]]:
        """ Definition of the VGG Obstructed Model.

        Returns
--- a/plugins/extract/mask/bisenet_fp_defaults.py
+++ b/plugins/extract/mask/bisenet_fp_defaults.py
@ -65,7 +65,7 @@ _DEFAULTS = {
        fixed=True),
    "cpu": dict(
        default=False,
-        info="[Not PlaidML] BiseNet mask still runs fairly quickly on CPU on some setups. Enable "
+        info="BiseNet mask still runs fairly quickly on CPU on some setups. Enable "
             "CPU mode here to use the CPU for this masker to save some VRAM at a speed cost.",
        datatype=bool,
        group="settings"),
--- a/plugins/extract/mask/vgg_clear.py
+++ b/plugins/extract/mask/vgg_clear.py
@ -1,24 +1,20 @@
 #!/usr/bin/env python3
 """ VGG Clear face mask plugin. """
+from __future__ import annotations
 import logging
-from typing import cast, List, Optional, Tuple
+import typing as T

 import numpy as np

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    Add, Conv2D, Conv2DTranspose, Cropping2D, Dropout, Input, Lambda, MaxPooling2D,
+    ZeroPadding2D)
+
 from lib.model.session import KSession
-from lib.utils import get_backend
 from ._base import BatchType, Masker, MaskerBatch

-if get_backend() == "amd":
-    from keras.layers import (
-        Add, Conv2D, Conv2DTranspose, Cropping2D, Dropout, Input, Lambda, MaxPooling2D,
-        ZeroPadding2D)
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import (  # pylint:disable=no-name-in-module,import-error
-        Add, Conv2D, Conv2DTranspose, Cropping2D, Dropout, Input, Lambda, MaxPooling2D,
-        ZeroPadding2D)
+if T.TYPE_CHECKING:
    from tensorflow import Tensor

 logger = logging.getLogger(__name__)
@ -51,7 +47,7 @@ class Mask(Masker):
    def process_input(self, batch: BatchType) -> None:
        """ Compile the detected faces for prediction """
        assert isinstance(batch, MaskerBatch)
-        input_ = np.array([cast(np.ndarray, feed.face)[..., :3]
+        input_ = np.array([T.cast(np.ndarray, feed.face)[..., :3]
                           for feed in batch.feed_faces], dtype="float32")
        batch.feed = input_ - np.mean(input_, axis=(1, 2))[:, None, None, :]
        logger.trace("feed shape: %s", batch.feed.shape)  # type: ignore
@ -98,7 +94,7 @@ class VGGClear(KSession):
    def __init__(self,
                 model_path: str,
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]]):
+                 exclude_gpus: T.Optional[T.List[int]]):
        super().__init__("VGG Obstructed",
                         model_path,
                         allow_growth=allow_growth,
@ -107,7 +103,7 @@ class VGGClear(KSession):
        self.load_model_weights()

    @classmethod
-    def _model_definition(cls) -> Tuple[Tensor, Tensor]:
+    def _model_definition(cls) -> T.Tuple[Tensor, Tensor]:
        """ Definition of the VGG Obstructed Model.

        Returns
@ -214,7 +210,7 @@ class _ScorePool():  # pylint:disable=too-few-public-methods
    crop: tuple
        The amount of 2D cropping to apply. Tuple of `ints`
    """
-    def __init__(self, level: int, scale: float, crop: Tuple[int, int]):
+    def __init__(self, level: int, scale: float, crop: T.Tuple[int, int]):
        self._name = f"_pool{level}"
        self._cropping = (crop, crop)
        self._scale = scale
--- a/plugins/extract/mask/vgg_obstructed.py
+++ b/plugins/extract/mask/vgg_obstructed.py
@ -1,29 +1,24 @@
 #!/usr/bin/env python3
 """ VGG Obstructed face mask plugin """
+from __future__ import annotations
 import logging
-from typing import cast, List, Optional, Tuple
+import typing as T

 import numpy as np

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    Add, Conv2D, Conv2DTranspose, Cropping2D, Dropout, Input, Lambda, MaxPooling2D,
+    ZeroPadding2D)
+
 from lib.model.session import KSession
-from lib.utils import get_backend
 from ._base import BatchType, Masker, MaskerBatch

-logger = logging.getLogger(__name__)
-
-
-if get_backend() == "amd":
-    from keras.layers import (
-        Add, Conv2D, Conv2DTranspose, Cropping2D, Dropout, Input, Lambda, MaxPooling2D,
-        ZeroPadding2D)
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import (  # pylint:disable=no-name-in-module,import-error
-        Add, Conv2D, Conv2DTranspose, Cropping2D, Dropout, Input, Lambda, MaxPooling2D,
-        ZeroPadding2D)
+if T.TYPE_CHECKING:
    from tensorflow import Tensor

+logger = logging.getLogger(__name__)
+

 class Mask(Masker):
    """ Neural network to process face image into a segmentation mask of the face """
@ -52,7 +47,7 @@ class Mask(Masker):
    def process_input(self, batch: BatchType) -> None:
        """ Compile the detected faces for prediction """
        assert isinstance(batch, MaskerBatch)
-        input_ = [cast(np.ndarray, feed.face)[..., :3] for feed in batch.feed_faces]
+        input_ = [T.cast(np.ndarray, feed.face)[..., :3] for feed in batch.feed_faces]
        batch.feed = input_ - np.mean(input_, axis=(1, 2))[:, None, None, :]
        logger.trace("feed shape: %s", batch.feed.shape)  # type:ignore

@ -95,7 +90,7 @@ class VGGObstructed(KSession):
    def __init__(self,
                 model_path: str,
                 allow_growth: bool,
-                 exclude_gpus: Optional[List[int]]) -> None:
+                 exclude_gpus: T.Optional[T.List[int]]) -> None:
        super().__init__("VGG Obstructed",
                         model_path,
                         allow_growth=allow_growth,
@ -104,7 +99,7 @@ class VGGObstructed(KSession):
        self.load_model_weights()

    @classmethod
-    def _model_definition(cls) -> Tuple[Tensor, Tensor]:
+    def _model_definition(cls) -> T.Tuple[Tensor, Tensor]:
        """ Definition of the VGG Obstructed Model.

        Returns
--- a/plugins/extract/pipeline.py
+++ b/plugins/extract/pipeline.py
@ -494,10 +494,10 @@ class Extractor():
        vram_buffer = 256  # Leave a buffer for VRAM allocation
        gpu_stats = GPUStats()
        stats = gpu_stats.get_card_most_free()
-        retval: Dict[str, Union[int, str]] = dict(count=gpu_stats.device_count,
-                                                  device=stats.device,
-                                                  vram_free=int(stats.free - vram_buffer),
-                                                  vram_total=int(stats.total))
+        retval: Dict[str, Union[int, str]] = {"count": gpu_stats.device_count,
+                                              "device": stats.device,
+                                              "vram_free": int(stats.free - vram_buffer),
+                                              "vram_total": int(stats.total)}
        logger.debug(retval)
        return retval

@ -517,10 +517,6 @@ class Extractor():
            logger.debug("No GPU detected. Enabling parallel processing.")
            return True

-        if get_backend() == "amd":
-            logger.debug("Parallel processing disabled by amd")
-            return False
-
        logger.verbose("%s - %sMB free of %sMB",  # type: ignore
                       self._vram_stats["device"],
                       self._vram_stats["vram_free"],
@ -545,7 +541,6 @@ class Extractor():
        list:
            The jobs to be undertaken split into phases that fit into GPU RAM
        """
-        force_single_process = not multiprocess or get_backend() == "amd"
        phases: List[List[str]] = []
        current_phase: List[str] = []
        available = cast(int, self._vram_stats["vram_free"])
@ -556,11 +551,11 @@ class Extractor():
            required = sum(self._vram_per_phase[p] for p in current_phase + [phase]) * scaling
            logger.debug("Num plugins for phase: %s, scaling: %s, vram required: %s",
                         num_plugins, scaling, required)
-            if required <= available and not force_single_process:
+            if required <= available and multiprocess:
                logger.debug("Required: %s, available: %s. Adding phase '%s' to current phase: %s",
                             required, available, phase, current_phase)
                current_phase.append(phase)
-            elif len(current_phase) == 0 or force_single_process:
+            elif len(current_phase) == 0 or not multiprocess:
                # Amount of VRAM required to run a single plugin is greater than available. We add
                # it anyway, and hope it will run with warnings, as the alternative is to not run
                # at all.
@ -692,7 +687,7 @@ class Extractor():
            next_phase = self._flow[self._flow.index(phase) + 1]
            out_qname = f"extract{self._instance}_{next_phase}_in"
        logger.debug("in_qname: %s, out_qname: %s", in_qname, out_qname)
-        kwargs = dict(in_queue=self._queues[in_qname], out_queue=self._queues[out_qname])
+        kwargs = {"in_queue": self._queues[in_qname], "out_queue": self._queues[out_qname]}

        plugin_type, idx = self._get_plugin_type_and_index(phase)
        plugin = getattr(self, f"_{plugin_type}")
--- a/plugins/extract/recognition/_base.py
+++ b/plugins/extract/recognition/_base.py
@ -15,18 +15,19 @@ To get a :class:`~lib.align.DetectedFace` object use the function:

 >>> face = self.to_detected_face(<face left>, <face top>, <face right>, <face bottom>)
 """
+from __future__ import annotations
 import logging
 import sys
+import typing as T

 from dataclasses import dataclass, field
-from typing import Generator, List, Optional, Tuple, TYPE_CHECKING

 import numpy as np
 from tensorflow.python.framework import errors_impl as tf_errors  # pylint:disable=no-name-in-module  # noqa

 from lib.align import AlignedFace, DetectedFace
 from lib.image import read_image_meta
-from lib.utils import FaceswapError, get_backend
+from lib.utils import FaceswapError
 from plugins.extract._base import BatchType, Extractor, ExtractorBatch
 from plugins.extract.pipeline import ExtractMedia

@ -36,7 +37,7 @@ else:
    from typing import get_args, Literal


-if TYPE_CHECKING:
+if T.TYPE_CHECKING:
    from queue import Queue
    from lib.align.aligned_face import CenteringType

@ -49,8 +50,8 @@ class RecogBatch(ExtractorBatch):

    Inherits from :class:`~plugins.extract._base.ExtractorBatch`
    """
-    detected_faces: List["DetectedFace"] = field(default_factory=list)
-    feed_faces: List[AlignedFace] = field(default_factory=list)
+    detected_faces: T.List["DetectedFace"] = field(default_factory=list)
+    feed_faces: T.List[AlignedFace] = field(default_factory=list)


 class Identity(Extractor):  # pylint:disable=abstract-method
@ -81,9 +82,9 @@ class Identity(Extractor):  # pylint:disable=abstract-method
    """

    def __init__(self,
-                 git_model_id: Optional[int] = None,
-                 model_filename: Optional[str] = None,
-                 configfile: Optional[str] = None,
+                 git_model_id: T.Optional[int] = None,
+                 model_filename: T.Optional[str] = None,
+                 configfile: T.Optional[str] = None,
                 instance: int = 0,
                 **kwargs):
        logger.debug("Initializing %s", self.__class__.__name__)
@ -93,7 +94,7 @@ class Identity(Extractor):  # pylint:disable=abstract-method
                         instance=instance,
                         **kwargs)
        self.input_size = 256  # Override for model specific input_size
-        self.centering: "CenteringType" = "legacy"  # Override for model specific centering
+        self.centering: CenteringType = "legacy"  # Override for model specific centering
        self.coverage_ratio = 1.0  # Override for model specific coverage_ratio

        self._plugin_type = "recognition"
@ -118,7 +119,7 @@ class Identity(Extractor):  # pylint:disable=abstract-method
        logger.debug("Obtained detected face: (filename: %s, detected_face: %s)",
                     item.filename, item.detected_faces)

-    def get_batch(self, queue: "Queue") -> Tuple[bool, RecogBatch]:
+    def get_batch(self, queue: Queue) -> T.Tuple[bool, RecogBatch]:
        """ Get items for inputting into the recognition from the queue in batches

        Items are returned from the ``queue`` in batches of
@ -224,25 +225,8 @@ class Identity(Extractor):  # pylint:disable=abstract-method
                   "CLI: Edit the file faceswap/config/extract.ini)."
                   "\n3) Enable 'Single Process' mode.")
            raise FaceswapError(msg) from err
-        except Exception as err:
-            if get_backend() == "amd":
-                # pylint:disable=import-outside-toplevel
-                from lib.plaidml_utils import is_plaidml_error
-                if (is_plaidml_error(err) and (
-                        "CL_MEM_OBJECT_ALLOCATION_FAILURE" in str(err).upper() or
-                        "enough memory for the current schedule" in str(err).lower())):
-                    msg = ("You do not have enough GPU memory available to run detection at "
-                           "the selected batch size. You can try a number of things:"
-                           "\n1) Close any other application that is using your GPU (web "
-                           "browsers are particularly bad for this)."
-                           "\n2) Lower the batchsize (the amount of images fed into the "
-                           "model) by editing the plugin settings (GUI: Settings > Configure "
-                           "extract settings, CLI: Edit the file "
-                           "faceswap/config/extract.ini).")
-                    raise FaceswapError(msg) from err
-            raise

-    def finalize(self, batch: BatchType) -> Generator[ExtractMedia, None, None]:
+    def finalize(self, batch: BatchType) -> T.Generator[ExtractMedia, None, None]:
        """ Finalize the output from Masker

        This should be called as the final task of each `plugin`.
@ -317,8 +301,8 @@ class IdentityFilter():
    def __init__(self, save_output: bool) -> None:
        logger.debug("Initializing %s: (save_output: %s)", self.__class__.__name__, save_output)
        self._save_output = save_output
-        self._filter: Optional[np.ndarray] = None
-        self._nfilter: Optional[np.ndarray] = None
+        self._filter: T.Optional[np.ndarray] = None
+        self._nfilter: T.Optional[np.ndarray] = None
        self._threshold = 0.0
        self._filter_enabled: bool = False
        self._nfilter_enabled: bool = False
@ -402,9 +386,9 @@ class IdentityFilter():
        return retval

    def _filter_faces(self,
-                      faces: List[DetectedFace],
-                      sub_folders: List[Optional[str]],
-                      should_filter: List[bool]) -> List[DetectedFace]:
+                      faces: T.List[DetectedFace],
+                      sub_folders: T.List[T.Optional[str]],
+                      should_filter: T.List[bool]) -> T.List[DetectedFace]:
        """ Filter the detected faces, either removing filtered faces from the list of detected
        faces or setting the output subfolder to `"_identity_filt"` for any filtered faces if
        saving output is enabled.
@ -426,7 +410,7 @@ class IdentityFilter():
            The filtered list of detected face objects, if saving filtered faces has not been
            selected or the full list of detected faces
        """
-        retval: List[DetectedFace] = []
+        retval: T.List[DetectedFace] = []
        self._counts += sum(should_filter)
        for idx, face in enumerate(faces):
            fldr = sub_folders[idx]
@ -445,8 +429,8 @@ class IdentityFilter():
        return retval

    def __call__(self,
-                 faces: List[DetectedFace],
-                 sub_folders: List[Optional[str]]) -> List[DetectedFace]:
+                 faces: T.List[DetectedFace],
+                 sub_folders: T.List[T.Optional[str]]) -> T.List[DetectedFace]:
        """ Call the identity filter function

        Parameters
@ -475,14 +459,14 @@ class IdentityFilter():
            logger.trace("All faces already filtered: %s", sub_folders)  # type: ignore
            return faces

-        should_filter: List[np.ndarray] = []
+        should_filter: T.List[np.ndarray] = []
        for f_type in get_args(Literal["filter", "nfilter"]):
            if not getattr(self, f"_{f_type}_enabled"):
                continue
            should_filter.append(self._get_matches(f_type, identities))

        # If any of the filter or nfilter evaluate to 'should filter' then filter out face
-        final_filter: List[bool] = np.array(should_filter).max(axis=0).tolist()
+        final_filter: T.List[bool] = np.array(should_filter).max(axis=0).tolist()
        logger.trace("should_filter: %s, final_filter: %s",  # type: ignore
                     should_filter, final_filter)
        return self._filter_faces(faces, sub_folders, final_filter)
--- a/plugins/extract/recognition/vgg_face2_defaults.py
+++ b/plugins/extract/recognition/vgg_face2_defaults.py
@ -66,7 +66,7 @@ _DEFAULTS = {
        fixed=True),
    "cpu": dict(
        default=False,
-        info="[Not PlaidML] VGG Face2 still runs fairly quickly on CPU on some setups. Enable "
+        info="VGG Face2 still runs fairly quickly on CPU on some setups. Enable "
             "CPU mode here to use the CPU for this plugin to save some VRAM at a speed cost.",
        datatype=bool,
        group="settings"),
--- a/plugins/train/_config.py
+++ b/plugins/train/_config.py
@ -255,7 +255,7 @@ class Config(FaceswapConfig):
            datatype=bool,
            default=False,
            info=_(
-                "[Not PlaidML] Apply AutoClipping to the gradients. AutoClip analyzes the "
+                "Apply AutoClipping to the gradients. AutoClip analyzes the "
                "gradient weights and adjusts the normalization value dynamically to fit the "
                "data. Can help prevent NaNs and improve model optimization at the expense of "
                "VRAM. Ref: AutoClip: Adaptive Gradient Clipping for Source Separation Networks "
@ -283,7 +283,7 @@ class Config(FaceswapConfig):
            group=_("network"),
            fixed=False,
            info=_(
-                "[Nvidia Only]. Enable the Tensorflow GPU 'allow_growth' configuration option. "
+                "Enable the Tensorflow GPU 'allow_growth' configuration option. "
                "This option prevents Tensorflow from allocating all of the GPU VRAM at launch "
                "but can lead to higher VRAM fragmentation and slower performance. Should only "
                "be enabled if you are receiving errors regarding 'cuDNN fails to initialize' "
@ -296,7 +296,7 @@ class Config(FaceswapConfig):
            fixed=False,
            group=_("network"),
            info=_(
-                "[Not PlaidML], NVIDIA GPUs can run operations in float16 faster than in "
+                "NVIDIA GPUs can run operations in float16 faster than in "
                "float32. Mixed precision allows you to use a mix of float16 with float32, to "
                "get the performance benefits from float16 and the numeric stability benefits "
                "from float32.\n\nThis is untested on DirectML backend, but will run on most "
--- a/plugins/train/model/_base/init.py
+++ b/plugins/train/model/_base/init.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python3
 """ Base class for Models plugins ALL Models should at least inherit from this class. """

-from .model import get_all_sub_models, KerasModel, ModelBase  # noqa
+from .model import get_all_sub_models, ModelBase
--- a/plugins/train/model/_base/io.py
+++ b/plugins/train/model/_base/io.py
@ -9,29 +9,25 @@ This module handles:
    - The loading, saving and backing up of keras models to and from disk.
    - The loading and freezing of weights for model plugins.
 """
+from __future__ import annotations
 import logging
 import os
 import sys
+import typing as T

-from typing import List, Optional, TYPE_CHECKING
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.models import load_model, Model as KModel  # noqa:E501  # pylint:disable=import-error

 from lib.model.backup_restore import Backup
-from lib.utils import FaceswapError, get_backend
+from lib.utils import FaceswapError

 if sys.version_info < (3, 8):
    from typing_extensions import Literal
 else:
    from typing import Literal

-if get_backend() == "amd":
-    import keras
-    from keras.models import load_model, Model as KModel
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow import keras  # pylint:disable=import-error,no-name-in-module
-    from tensorflow.keras.models import load_model, Model as KModel  # noqa pylint:disable=import-error,no-name-in-module
-
-if TYPE_CHECKING:
+if T.TYPE_CHECKING:
+    from tensorflow import keras
    from .model import ModelBase

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
@ -39,7 +35,7 @@ logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

 def get_all_sub_models(
        model: keras.models.Model,
-        models: Optional[List[keras.models.Model]] = None) -> List[keras.models.Model]:
+        models: T.Optional[T.List[keras.models.Model]] = None) -> T.List[keras.models.Model]:
    """ For a given model, return all sub-models that occur (recursively) as children.

    Parameters
@ -86,7 +82,7 @@ class IO():
        request.
    """
    def __init__(self,
-                 plugin: "ModelBase",
+                 plugin: ModelBase,
                 model_dir: str,
                 is_predict: bool,
                 save_optimizer: Literal["never", "always", "exit"]) -> None:
@ -94,7 +90,7 @@ class IO():
        self._is_predict = is_predict
        self._model_dir = model_dir
        self._save_optimizer = save_optimizer
-        self._history: List[List[float]] = [[], []]  # Loss histories per save iteration
+        self._history: T.List[T.List[float]] = [[], []]  # Loss histories per save iteration
        self._backup = Backup(self._model_dir, self._plugin.name)

    @property
@ -110,12 +106,12 @@ class IO():
        return os.path.isfile(self._filename)

    @property
-    def history(self) -> List[List[float]]:
+    def history(self) -> T.List[T.List[float]]:
        """ list: list of loss histories per side for the current save iteration. """
        return self._history

    @property
-    def multiple_models_in_folder(self) -> Optional[List[str]]:
+    def multiple_models_in_folder(self) -> T.Optional[T.List[str]]:
        """ :list: or ``None`` If there are multiple model types in the requested folder, or model
        types that don't correspond to the requested plugin type, then returns the list of plugin
        names that exist in the folder, otherwise returns ``None`` """
@ -214,7 +210,7 @@ class IO():
            msg += f" - Average loss since last save: {', '.join(lossmsg)}"
        logger.info(msg)

-    def _get_save_averages(self) -> List[float]:
+    def _get_save_averages(self) -> T.List[float]:
        """ Return the average loss since the last save iteration and reset historical loss """
        logger.debug("Getting save averages")
        if not all(loss for loss in self._history):
@ -226,7 +222,7 @@ class IO():
        logger.debug("Average losses since last save: %s", retval)
        return retval

-    def _should_backup(self, save_averages: List[float]) -> bool:
+    def _should_backup(self, save_averages: T.List[float]) -> bool:
        """ Check whether the loss averages for this save iteration is the lowest that has been
        seen.

@ -291,7 +287,7 @@ class Weights():
    plugin: :class:`Model`
        The parent plugin class that owns the IO functions.
    """
-    def __init__(self, plugin: "ModelBase") -> None:
+    def __init__(self, plugin: ModelBase) -> None:
        logger.debug("Initializing %s: (plugin: %s)", self.__class__.__name__, plugin)
        self._model = plugin.model
        self._name = plugin.model_name
@ -305,7 +301,7 @@ class Weights():
        logger.debug("Initialized %s", self.__class__.__name__)

    @classmethod
-    def _check_weights_file(cls, weights_file: str) -> Optional[str]:
+    def _check_weights_file(cls, weights_file: str) -> T.Optional[str]:
        """ Validate that we have a valid path to a .h5 file.

        Parameters
@ -407,7 +403,7 @@ class Weights():
                           "different settings than you have set for your current model.",
                           skipped_ops)

-    def _get_weights_model(self) -> List[keras.models.Model]:
+    def _get_weights_model(self) -> T.List[keras.models.Model]:
        """ Obtain a list of all sub-models contained within the weights model.

        Returns
--- a/plugins/train/model/_base/model.py
+++ b/plugins/train/model/_base/model.py
@ -4,78 +4,43 @@ Base class for Models. ALL Models should at least inherit from this class.

 See :mod:`~plugins.train.model.original` for an annotated example for how to create model plugins.
 """
+from __future__ import annotations
 import logging
 import os
 import sys
 import time
+import typing as T

 from collections import OrderedDict
-from typing import cast, Dict, List, Optional, Tuple, TYPE_CHECKING, Union

 import numpy as np
+import tensorflow as tf
+
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras import backend as K  # pylint:disable=import-error
+from tensorflow.keras.layers import Input  # pylint:disable=import-error
+from tensorflow.keras.models import load_model, Model as KModel  # noqa:E501  # pylint:disable=import-error

 from lib.serializer import get_serializer
 from lib.model.nn_blocks import set_config as set_nnblock_config
-from lib.utils import get_backend, FaceswapError
+from lib.utils import FaceswapError
 from plugins.train._config import Config

 from .io import IO, get_all_sub_models, Weights
 from .settings import Loss, Optimizer, Settings

-if get_backend() == "amd":
-    import keras
-    from keras import backend as K
-    from keras.layers import Input
-    from keras.models import load_model, Model as KModel
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow import keras  # pylint:disable=import-error
-    from tensorflow.keras import backend as K  # pylint:disable=import-error
-    from tensorflow.keras.layers import Input  # pylint:disable=import-error,no-name-in-module
-    from tensorflow.keras.models import load_model, Model as KModel  # noqa pylint:disable=import-error,no-name-in-module

 if sys.version_info < (3, 8):
    from typing_extensions import Literal
 else:
    from typing import Literal

-if TYPE_CHECKING:
+if T.TYPE_CHECKING:
    import argparse
    from lib.config import ConfigValueType

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
-_CONFIG: Dict[str, "ConfigValueType"] = {}
-
-
-def KerasModel(inputs: list, outputs: list, name: str) -> keras.models.Model:  # noqa, pylint:disable=invalid-name
-    """ wrapper for :class:`keras.models.Model`.
-
-    There are some minor foibles between Keras 2.2 and the Tensorflow version of Keras, so this
-    catches potential issues and fixes prior to returning the requested model.
-
-    All models created within plugins should use this method, and should not call keras directly
-    for a model.
-
-    Parameters
-    ----------
-    inputs: a keras.Input object or list of keras.Input objects.
-        The input(s) of the model
-    outputs: keras objects
-        The output(s) of the model.
-    name: str
-        The name of the model.
-
-    Returns
-    -------
-    :class:`keras.models.Model`
-        A Keras Model
-    """
-    if get_backend() == "amd":
-        logger.debug("Flattening inputs (%s) and outputs (%s) for AMD", inputs, outputs)
-        inputs = np.array(inputs).flatten().tolist()
-        outputs = np.array(outputs).flatten().tolist()
-        logger.debug("Flattened inputs (%s) and outputs (%s)", inputs, outputs)
-    return KModel(inputs, outputs, name=name)
+_CONFIG: T.Dict[str, ConfigValueType] = {}


 class ModelBase():
@ -108,19 +73,19 @@ class ModelBase():
    """
    def __init__(self,
                 model_dir: str,
-                 arguments: "argparse.Namespace",
+                 arguments: argparse.Namespace,
                 predict: bool = False) -> None:
        logger.debug("Initializing ModelBase (%s): (model_dir: '%s', arguments: %s, predict: %s)",
                     self.__class__.__name__, model_dir, arguments, predict)

        # Input shape must be set within the plugin after initializing
-        self.input_shape: Tuple[int, ...] = ()
+        self.input_shape: T.Tuple[int, ...] = ()
        self.trainer = "original"  # Override for plugin specific trainer
        self.color_order: Literal["bgr", "rgb"] = "bgr"  # Override for image color channel order

        self._args = arguments
        self._is_predict = predict
-        self._model: Optional[keras.models.Model] = None
+        self._model: T.Optional[tf.keras.models.Model] = None

        self._configfile = arguments.configfile if hasattr(arguments, "configfile") else None
        self._load_config()
@ -134,7 +99,7 @@ class ModelBase():
            raise FaceswapError("'Learn Mask' has been selected but you have not chosen a Mask to "
                                "use. Please select a mask or disable 'Learn Mask'.")

-        self._mixed_precision = self.config["mixed_precision"] and get_backend() != "amd"
+        self._mixed_precision = self.config["mixed_precision"]
        # self._io = IO(self, model_dir, self._is_predict, self.config["save_optimizer"])
        # TODO - Re-enable saving of optimizer once this bug is fixed:
        # File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
@ -158,12 +123,12 @@ class ModelBase():
        logger.debug("Initialized ModelBase (%s)", self.__class__.__name__)

    @property
-    def model(self) -> keras.models.Model:
+    def model(self) -> tf.keras.models.Model:
        """:class:`Keras.models.Model`: The compiled model for this plugin. """
        return self._model

    @property
-    def command_line_arguments(self) -> "argparse.Namespace":
+    def command_line_arguments(self) -> argparse.Namespace:
        """ :class:`argparse.Namespace`: The command line arguments passed to the model plugin from
        either the train or convert script """
        return self._args
@ -210,16 +175,16 @@ class ModelBase():
        return self.name

    @property
-    def input_shapes(self) -> List[Tuple[None, int, int, int]]:
+    def input_shapes(self) -> T.List[T.Tuple[None, int, int, int]]:
        """ list: A flattened list corresponding to all of the inputs to the model. """
-        shapes = [cast(Tuple[None, int, int, int], K.int_shape(inputs))
+        shapes = [T.cast(T.Tuple[None, int, int, int], K.int_shape(inputs))
                  for inputs in self.model.inputs]
        return shapes

    @property
-    def output_shapes(self) -> List[Tuple[None, int, int, int]]:
+    def output_shapes(self) -> T.List[T.Tuple[None, int, int, int]]:
        """ list: A flattened list corresponding to all of the outputs of the model. """
-        shapes = [cast(Tuple[None, int, int, int], K.int_shape(output))
+        shapes = [T.cast(T.Tuple[None, int, int, int], K.int_shape(output))
                  for output in self.model.outputs]
        return shapes

@ -342,13 +307,13 @@ class ModelBase():
        os.mkdir(self.model_dir)
        new_model = self.build_model(self._get_inputs())
        for model_name, layer_name in legacy_mapping.items():
-            old_model: keras.models.Model = load_model(os.path.join(archive_dir, model_name),
-                                                       compile=False)
+            old_model: tf.keras.models.Model = load_model(os.path.join(archive_dir, model_name),
+                                                          compile=False)
            layer = [layer for layer in new_model.layers if layer.name == layer_name]
            if not layer:
                logger.warning("Skipping legacy weights from '%s'...", model_name)
                continue
-            klayer: keras.layers.Layer = layer[0]
+            klayer: tf.keras.layers.Layer = layer[0]
            logger.info("Updating legacy weights from '%s'...", model_name)
            klayer.set_weights(old_model.get_weights())
        filename = self._io._filename  # pylint:disable=protected-access
@ -368,7 +333,7 @@ class ModelBase():
        a list of 2 shape tuples of 3 dimensions. """
        assert len(self.input_shape) == 3, "Input shape should be a 3 dimensional shape tuple"

-    def _get_inputs(self) -> List[keras.layers.Input]:
+    def _get_inputs(self) -> T.List[tf.keras.layers.Input]:
        """ Obtain the standardized inputs for the model.

        The inputs will be returned for the "A" and "B" sides in the shape as defined by
@ -387,7 +352,7 @@ class ModelBase():
        logger.debug("inputs: %s", inputs)
        return inputs

-    def build_model(self, inputs: List[keras.layers.Input]) -> keras.models.Model:
+    def build_model(self, inputs: T.List[tf.keras.layers.Input]) -> tf.keras.models.Model:
        """ Override for Model Specific autoencoder builds.

        Parameters
@ -399,11 +364,9 @@ class ModelBase():
        Returns
        -------
        :class:`keras.models.Model`
-            The output of this function must be a keras model generated from
-            :class:`plugins.train.model._base.KerasModel`. See Keras documentation for the correct
-            structure, but note that parameter :attr:`name` is a required rather than an optional
-            argument in Faceswap. You should assign this to the attribute ``self.name`` that is
-            automatically generated from the plugin's filename.
+            See Keras documentation for the correct structure, but note that parameter :attr:`name`
+            is a required rather than an optional argument in Faceswap. You should assign this to
+            the attribute ``self.name`` that is automatically generated from the plugin's filename.
        """
        raise NotImplementedError

@ -448,15 +411,12 @@ class ModelBase():
        if self.state.model_needs_rebuild:
            self._model = self._settings.check_model_precision(self._model, self._state)

-        autoclip = get_backend() != "amd" and self.config["autoclip"]
        optimizer = Optimizer(self.config["optimizer"],
                              self.config["learning_rate"],
-                              autoclip,
+                              self.config["autoclip"],
                              10 ** int(self.config["epsilon_exponent"])).optimizer
        if self._settings.use_mixed_precision:
            optimizer = self._settings.loss_scale_optimizer(optimizer)
-        if get_backend() == "amd":
-            self._rewrite_plaid_outputs()

        weights = Weights(self)
        weights.load(self._io.model_exists)
@ -467,29 +427,7 @@ class ModelBase():
        self._state.add_session_loss_names(self._loss.names)
        logger.debug("Compiled Model: %s", self.model)

-    def _rewrite_plaid_outputs(self) -> None:
-        """ Rewrite the output names for models using the PlaidML (Keras 2.2.4) backend
-
-        Keras 2.2.4 duplicates model output names if any of the models have multiple outputs
-        so we need to rename the outputs so we can successfully map the loss dictionaries.
-
-        This is a bit of a hack, but it does work.
-        """
-        # TODO Remove this rewrite code if PlaidML updates to a version of Keras where this is
-        # no longer necessary
-        if len(self.model.output_names) == len(set(self.model.output_names)):
-            logger.debug("Output names are unique, not rewriting: %s", self.model.output_names)
-            return
-        seen = {name: 0 for name in set(self.model.output_names)}
-        new_names = []
-        for name in self.model.output_names:
-            new_names.append(f"{name}_{seen[name]}")
-            seen[name] += 1
-        logger.debug("Output names rewritten: (old: %s, new: %s)",
-                     self.model.output_names, new_names)
-        self.model.output_names = new_names
-
-    def _legacy_mapping(self) -> Optional[dict]:
+    def _legacy_mapping(self) -> T.Optional[dict]:
        """ The mapping of separate model files to single model layers for transferring of legacy
        weights.

@ -501,7 +439,7 @@ class ModelBase():
        """
        return None

-    def add_history(self, loss: List[float]) -> None:
+    def add_history(self, loss: T.List[float]) -> None:
        """ Add the current iteration's loss history to :attr:`_io.history`.

        Called from the trainer after each iteration, for tracking loss drop over time between
@ -544,18 +482,18 @@ class State():
        self._filename = os.path.join(model_dir, filename)
        self._name = model_name
        self._iterations = 0
-        self._mixed_precision_layers: List[str] = []
+        self._mixed_precision_layers: T.List[str] = []
        self._rebuild_model = False
-        self._sessions: Dict[int, dict] = {}
-        self._lowest_avg_loss: Dict[str, float] = {}
-        self._config: Dict[str, "ConfigValueType"] = {}
+        self._sessions: T.Dict[int, dict] = {}
+        self._lowest_avg_loss: T.Dict[str, float] = {}
+        self._config: T.Dict[str, ConfigValueType] = {}
        self._load(config_changeable_items)
        self._session_id = self._new_session_id()
        self._create_new_session(no_logs, config_changeable_items)
        logger.debug("Initialized %s:", self.__class__.__name__)

    @property
-    def loss_names(self) -> List[str]:
+    def loss_names(self) -> T.List[str]:
        """ list: The loss names for the current session """
        return self._sessions[self._session_id]["loss_names"]

@ -580,7 +518,7 @@ class State():
        return self._session_id

    @property
-    def mixed_precision_layers(self) -> List[str]:
+    def mixed_precision_layers(self) -> T.List[str]:
        """list: Layers that can be switched between mixed-float16 and float32. """
        return self._mixed_precision_layers

@ -619,14 +557,14 @@ class State():
            values
        """
        logger.debug("Creating new session. id: %s", self._session_id)
-        self._sessions[self._session_id] = dict(timestamp=time.time(),
-                                                no_logs=no_logs,
-                                                loss_names=[],
-                                                batchsize=0,
-                                                iterations=0,
-                                                config=config_changeable_items)
+        self._sessions[self._session_id] = {"timestamp": time.time(),
+                                            "no_logs": no_logs,
+                                            "loss_names": [],
+                                            "batchsize": 0,
+                                            "iterations": 0,
+                                            "config": config_changeable_items}

-    def add_session_loss_names(self, loss_names: List[str]) -> None:
+    def add_session_loss_names(self, loss_names: T.List[str]) -> None:
        """ Add the session loss names to the sessions dictionary.

        The loss names are used for Tensorboard logging
@ -655,7 +593,7 @@ class State():
        self._iterations += 1
        self._sessions[self._session_id]["iterations"] += 1

-    def add_mixed_precision_layers(self, layers: List[str]) -> None:
+    def add_mixed_precision_layers(self, layers: T.List[str]) -> None:
        """ Add the list of model's layers that are compatible for mixed precision to the
        state dictionary """
        logger.debug("Storing mixed precision layers: %s", layers)
@ -717,14 +655,14 @@ class State():
        legacy_update = self._update_legacy_config()
        # Add any new items to state config for legacy purposes where the new default may be
        # detrimental to an existing model.
-        legacy_defaults: Dict[str, Union[str, int, bool]] = dict(centering="legacy",
-                                                                 mask_loss_function="mse",
-                                                                 l2_reg_term=100,
-                                                                 optimizer="adam",
-                                                                 mixed_precision=False)
+        legacy_defaults: T.Dict[str, T.Union[str, int, bool]] = {"centering": "legacy",
+                                                                 "mask_loss_function": "mse",
+                                                                 "l2_reg_term": 100,
+                                                                 "optimizer": "adam",
+                                                                 "mixed_precision": False}
        for key, val in _CONFIG.items():
            if key not in self._config.keys():
-                setting: "ConfigValueType" = legacy_defaults.get(key, val)
+                setting: ConfigValueType = legacy_defaults.get(key, val)
                logger.info("Adding new config item to state file: '%s': '%s'", key, setting)
                self._config[key] = setting
        self._update_changed_config_items(config_changeable_items)
@ -852,7 +790,7 @@ class _Inference():  # pylint:disable=too-few-public-methods
        ``True`` if the swap should be performed "B" > "A" ``False`` if the swap should be
        "A" > "B"
    """
-    def __init__(self, saved_model: keras.models.Model, switch_sides: bool) -> None:
+    def __init__(self, saved_model: tf.keras.models.Model, switch_sides: bool) -> None:
        logger.debug("Initializing: %s (saved_model: %s, switch_sides: %s)",
                     self.__class__.__name__, saved_model, switch_sides)
        self._config = saved_model.get_config()
@ -865,11 +803,11 @@ class _Inference():  # pylint:disable=too-few-public-methods
        logger.debug("Initialized: %s", self.__class__.__name__)

    @property
-    def model(self) -> keras.models.Model:
+    def model(self) -> tf.keras.models.Model:
        """ :class:`keras.models.Model`: The Faceswap model, compiled for inference. """
        return self._model

-    def _get_nodes(self, nodes: np.ndarray) -> List[Tuple[str, int]]:
+    def _get_nodes(self, nodes: np.ndarray) -> T.List[T.Tuple[str, int]]:
        """ Given in input list of nodes from a :attr:`keras.models.Model.get_config` dictionary,
        filters the layer name(s) and output index of the node, splitting to the correct output
        index in the event of multiple inputs.
@ -895,7 +833,7 @@ class _Inference():  # pylint:disable=too-few-public-methods
        retval = [(node[0], node[2]) for node in anodes]
        return retval

-    def _make_inference_model(self, saved_model: keras.models.Model) -> keras.models.Model:
+    def _make_inference_model(self, saved_model: tf.keras.models.Model) -> tf.keras.models.Model:
        """ Extract the sub-models from the saved model that are required for inference.

        Parameters
@ -911,7 +849,7 @@ class _Inference():  # pylint:disable=too-few-public-methods
        logger.debug("Compiling inference model. saved_model: %s", saved_model)
        struct = self._get_filtered_structure()
        model_inputs = self._get_inputs(saved_model.inputs)
-        compiled_layers: Dict[str, keras.layers.Layer] = {}
+        compiled_layers: T.Dict[str, tf.keras.layers.Layer] = {}
        for layer in saved_model.layers:
            if layer.name not in struct:
                logger.debug("Skipping unused layer: '%s'", layer.name)
@ -936,16 +874,12 @@ class _Inference():  # pylint:disable=too-few-public-methods
                    else:
                        next_input = inbound_layer

-                    if get_backend() == "amd" and isinstance(next_input, list):
-                        # tensorflow.keras and keras 2.2 behave differently for layer inputs
-                        layer_inputs.extend(next_input)
-                    else:
-                        layer_inputs.append(next_input)
+                    layer_inputs.append(next_input)

                logger.debug("Compiling layer '%s': layer inputs: %s", layer.name, layer_inputs)
                model = layer(layer_inputs)
            compiled_layers[layer.name] = model
-            retval = KerasModel(model_inputs, model, name=f"{saved_model.name}_inference")
+            retval = KModel(model_inputs, model, name=f"{saved_model.name}_inference")
        logger.debug("Compiled inference model '%s': %s", retval.name, retval)
        return retval

--- a/plugins/train/model/_base/settings.py
+++ b/plugins/train/model/_base/settings.py
@ -10,42 +10,36 @@ Handles configuration of model plugins for:
    - Optimizer settings
    - General global model configuration settings
 """
+from __future__ import annotations
 from dataclasses import dataclass, field
 import logging
 import platform
 import sys
+import typing as T

 from contextlib import nullcontext
-from typing import Any, Callable, ContextManager, Dict, List, Optional, TYPE_CHECKING, Union

 import tensorflow as tf
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras import losses as k_losses  # pylint:disable=import-error
+import tensorflow.keras.mixed_precision as mixedprecision  # noqa pylint:disable=import-error

 from lib.model import losses, optimizers
+from lib.model.autoclip import AutoClipper
 from lib.utils import get_backend

-if get_backend() == "amd":
-    import keras
-    from keras import losses as k_losses
-    from keras import backend as K
-    import tensorflow.keras.mixed_precision.experimental as mixedprecision  # noqa pylint:disable=import-error,no-name-in-module,ungrouped-imports
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow import keras
-    from tensorflow.keras import losses as k_losses  # pylint:disable=import-error
-    from tensorflow.keras import backend as K  # pylint:disable=import-error
-    import tensorflow.keras.mixed_precision as mixedprecision  # noqa pylint:disable=import-error,no-name-in-module
-    from lib.model.autoclip import AutoClipper  # pylint:disable=ungrouped-imports
-
-
 if sys.version_info < (3, 8):
    from typing_extensions import Literal
 else:
    from typing import Literal

-if TYPE_CHECKING:
+if T.TYPE_CHECKING:
    from argparse import Namespace
    from .model import State

+keras = tf.keras
+K = keras.backend
+
 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name


@ -64,9 +58,9 @@ class LossClass:
    kwargs: dict
        Any keyword arguments to supply to the loss function at initialization.
    """
-    function: Union[Callable[[tf.Tensor, tf.Tensor], tf.Tensor], Any] = k_losses.mae
+    function: T.Union[T.Callable[[tf.Tensor, tf.Tensor], tf.Tensor], T.Any] = k_losses.mae
    init: bool = True
-    kwargs: Dict[str, Any] = field(default_factory=dict)
+    kwargs: T.Dict[str, T.Any] = field(default_factory=dict)


 class Loss():
@ -83,38 +77,34 @@ class Loss():
        logger.debug("Initializing %s: (color_order: %s)", self.__class__.__name__, color_order)
        self._config = config
        self._mask_channels = self._get_mask_channels()
-        self._inputs: List[keras.layers.Layer] = []
-        self._names: List[str] = []
-        self._funcs: Dict[str, Callable] = {}
+        self._inputs: T.List[tf.keras.layers.Layer] = []
+        self._names: T.List[str] = []
+        self._funcs: T.Dict[str, T.Callable] = {}

-        logcosh = losses.LogCosh() if get_backend() == "amd" else k_losses.logcosh
-        self._loss_dict = dict(ffl=LossClass(function=losses.FocalFrequencyLoss),
-                               flip=LossClass(function=losses.LDRFLIPLoss,
-                                              kwargs=dict(color_order=color_order)),
-                               gmsd=LossClass(function=losses.GMSDLoss),
-                               l_inf_norm=LossClass(function=losses.LInfNorm),
-                               laploss=LossClass(function=losses.LaplacianPyramidLoss),
-                               logcosh=LossClass(function=logcosh,
-                                                 init=False),
-                               lpips_alex=LossClass(function=losses.LPIPSLoss,
-                                                    kwargs=dict(trunk_network="alex")),
-                               lpips_squeeze=LossClass(function=losses.LPIPSLoss,
-                                                       kwargs=dict(trunk_network="squeeze")),
-                               lpips_vgg16=LossClass(function=losses.LPIPSLoss,
-                                                     kwargs=dict(trunk_network="vgg16")),
-                               ms_ssim=LossClass(function=losses.MSSIMLoss),
-                               mae=LossClass(function=k_losses.mean_absolute_error,
-                                             init=False),
-                               mse=LossClass(function=k_losses.mean_squared_error,
-                                             init=False),
-                               pixel_gradient_diff=LossClass(function=losses.GradientLoss),
-                               ssim=LossClass(function=losses.DSSIMObjective),
-                               smooth_loss=LossClass(function=losses.GeneralizedLoss))
+        self._loss_dict = {"ffl": LossClass(function=losses.FocalFrequencyLoss),
+                           "flip": LossClass(function=losses.LDRFLIPLoss,
+                                             kwargs={"color_order": color_order}),
+                           "gmsd": LossClass(function=losses.GMSDLoss),
+                           "l_inf_norm": LossClass(function=losses.LInfNorm),
+                           "laploss": LossClass(function=losses.LaplacianPyramidLoss),
+                           "logcosh": LossClass(function=k_losses.logcosh, init=False),
+                           "lpips_alex": LossClass(function=losses.LPIPSLoss,
+                                                   kwargs={"trunk_network": "alex"}),
+                           "lpips_squeeze": LossClass(function=losses.LPIPSLoss,
+                                                      kwargs={"trunk_network": "squeeze"}),
+                           "lpips_vgg16": LossClass(function=losses.LPIPSLoss,
+                                                    kwargs={"trunk_network": "vgg16"}),
+                           "ms_ssim": LossClass(function=losses.MSSIMLoss),
+                           "mae": LossClass(function=k_losses.mean_absolute_error, init=False),
+                           "mse": LossClass(function=k_losses.mean_squared_error, init=False),
+                           "pixel_gradient_diff": LossClass(function=losses.GradientLoss),
+                           "ssim": LossClass(function=losses.DSSIMObjective),
+                           "smooth_loss": LossClass(function=losses.GeneralizedLoss)}

        logger.debug("Initialized: %s", self.__class__.__name__)

    @property
-    def names(self) -> List[str]:
+    def names(self) -> T.List[str]:
        """ list: The list of loss names for the model. """
        return self._names

@ -124,21 +114,21 @@ class Loss():
        return self._funcs

    @property
-    def _mask_inputs(self) -> Optional[list]:
+    def _mask_inputs(self) -> T.Optional[list]:
        """ list: The list of input tensors to the model that contain the mask. Returns ``None``
        if there is no mask input to the model. """
        mask_inputs = [inp for inp in self._inputs if inp.name.startswith("mask")]
        return None if not mask_inputs else mask_inputs

    @property
-    def _mask_shapes(self) -> Optional[List[tuple]]:
+    def _mask_shapes(self) -> T.Optional[T.List[tuple]]:
        """ list: The list of shape tuples for the mask input tensors for the model. Returns
        ``None`` if there is no mask input. """
        if self._mask_inputs is None:
            return None
        return [K.int_shape(mask_input) for mask_input in self._mask_inputs]

-    def configure(self, model: keras.models.Model) -> None:
+    def configure(self, model: tf.keras.models.Model) -> None:
        """ Configure the loss functions for the given inputs and outputs.

        Parameters
@ -151,7 +141,7 @@ class Loss():
        self._set_loss_functions(model.output_names)
        self._names.insert(0, "total")

-    def _set_loss_names(self, outputs: List[tf.Tensor]) -> None:
+    def _set_loss_names(self, outputs: T.List[tf.Tensor]) -> None:
        """ Name the losses based on model output.

        This is used for correct naming in the state file, for display purposes only.
@ -183,7 +173,7 @@ class Loss():
                self._names.append(f"{name}_{side}{suffix}")
        logger.debug(self._names)

-    def _get_function(self, name: str) -> Callable[[tf.Tensor, tf.Tensor], tf.Tensor]:
+    def _get_function(self, name: str) -> T.Callable[[tf.Tensor, tf.Tensor], tf.Tensor]:
        """ Obtain the requested Loss function

        Parameters
@ -201,7 +191,7 @@ class Loss():
        logger.debug("Obtained loss function `%s` (%s)", name, retval)
        return retval

-    def _set_loss_functions(self, output_names: List[str]):
+    def _set_loss_functions(self, output_names: T.List[str]):
        """ Set the loss functions and their associated weights.

        Adds the loss functions to the :attr:`functions` dictionary.
@ -261,7 +251,7 @@ class Loss():
                                      mask_channel=mask_channel)
            channel_idx += 1

-    def _get_mask_channels(self) -> List[int]:
+    def _get_mask_channels(self) -> T.List[int]:
        """ Obtain the channels from the face targets that the masks reside in from the training
        data generator.

@ -314,22 +304,22 @@ class Optimizer():  # pylint:disable=too-few-public-methods
                     ", epsilon: %s)", self.__class__.__name__, optimizer, learning_rate,
                     autoclip, epsilon)
        valid_optimizers = {"adabelief": (optimizers.AdaBelief,
-                                          dict(beta_1=0.5, beta_2=0.99, epsilon=epsilon)),
+                                          {"beta_1": 0.5, "beta_2": 0.99, "epsilon": epsilon}),
                            "adam": (optimizers.Adam,
-                                     dict(beta_1=0.5, beta_2=0.99, epsilon=epsilon)),
+                                     {"beta_1": 0.5, "beta_2": 0.99, "epsilon": epsilon}),
                            "nadam": (optimizers.Nadam,
-                                      dict(beta_1=0.5, beta_2=0.99, epsilon=epsilon)),
-                            "rms-prop": (optimizers.RMSprop, dict(epsilon=epsilon))}
+                                      {"beta_1": 0.5, "beta_2": 0.99, "epsilon": epsilon}),
+                            "rms-prop": (optimizers.RMSprop, {"epsilon": epsilon})}
        optimizer_info = valid_optimizers[optimizer]
-        self._optimizer: Callable = optimizer_info[0]
-        self._kwargs: Dict[str, Any] = optimizer_info[1]
+        self._optimizer: T.Callable = optimizer_info[0]
+        self._kwargs: T.Dict[str, T.Any] = optimizer_info[1]

        self._configure(learning_rate, autoclip)
-        logger.verbose("Using %s optimizer", optimizer.title())  # type:ignore
+        logger.verbose("Using %s optimizer", optimizer.title())  # type:ignore[attr-defined]
        logger.debug("Initialized: %s", self.__class__.__name__)

    @property
-    def optimizer(self) -> keras.optimizers.Optimizer:
+    def optimizer(self) -> tf.keras.optimizers.Optimizer:
        """ :class:`keras.optimizers.Optimizer`: The requested optimizer. """
        return self._optimizer(**self._kwargs)

@ -344,19 +334,8 @@ class Optimizer():  # pylint:disable=too-few-public-methods
            The selected learning rate to use
        autoclip: bool
            ``True`` if AutoClip should be enabled otherwise ``False``
-
-        Notes
-        -----
-        Clip-norm is ballooning VRAM usage, which is not expected behavior and may be a bug in
-        Keras/Tensorflow.
-
-        PlaidML has a bug regarding the clip-norm parameter See:
-        https://github.com/plaidml/plaidml/issues/228. We workaround by simply not adding this
-        parameter for AMD backend users.
        """
-        lr_key = "lr" if get_backend() == "amd" else "learning_rate"
-        self._kwargs[lr_key] = learning_rate
-
+        self._kwargs["learning_rate"] = learning_rate
        if not autoclip:
            return

@ -371,9 +350,7 @@ class Settings():
    Sets backend tensorflow settings prior to launching the model.

    Tensorflow 2 uses distribution strategies for multi-GPU/system training. These are context
-    managers. To enable the code to be more readable, we handle strategies the same way for Nvidia
-    and AMD backends. PlaidML does not support strategies, but we need to still create a context
-    manager so that we don't need branching logic.
+    managers.

    Parameters
    ----------
@ -389,7 +366,7 @@ class Settings():
        for training. Default: ``False``
    """
    def __init__(self,
-                 arguments: "Namespace",
+                 arguments: Namespace,
                 mixed_precision: bool,
                 allow_growth: bool,
                 is_predict: bool) -> None:
@ -418,7 +395,7 @@ class Settings():
    @classmethod
    def loss_scale_optimizer(
            cls,
-            optimizer: keras.optimizers.Optimizer) -> mixedprecision.LossScaleOptimizer:
+            optimizer: tf.keras.optimizers.Optimizer) -> mixedprecision.LossScaleOptimizer:
        """ Optimize loss scaling for mixed precision training.

        Parameters
@ -431,10 +408,10 @@ class Settings():
        :class:`tf.keras.mixed_precision.loss_scale_optimizer.LossScaleOptimizer`
            The original optimizer with loss scaling applied
        """
-        return mixedprecision.LossScaleOptimizer(optimizer)
+        return mixedprecision.LossScaleOptimizer(optimizer)  # pylint:disable=no-member

    @classmethod
-    def _set_tf_settings(cls, allow_growth: bool, exclude_devices: List[int]) -> None:
+    def _set_tf_settings(cls, allow_growth: bool, exclude_devices: T.List[int]) -> None:
        """ Specify Devices to place operations on and Allow TensorFlow to manage VRAM growth.

        Enables the Tensorflow allow_growth option if requested in the command line arguments
@ -448,10 +425,8 @@ class Settings():
            ``None`` if all devices should be made available
        """
        backend = get_backend()
-        if backend == "amd":
-            return  # No settings for AMD
        if backend == "cpu":
-            logger.verbose("Hiding GPUs from Tensorflow")  # type:ignore
+            logger.verbose("Hiding GPUs from Tensorflow")  # type:ignore[attr-defined]
            tf.config.set_visible_devices([], "GPU")
            return

@ -491,27 +466,22 @@ class Settings():
            ``True`` if mixed precision has been enabled otherwise ``False``
        """
        logger.debug("use_mixed_precision: %s", use_mixed_precision)
-        if get_backend() == "amd":
-            logger.debug("No action to perform for 'mixed_precision' on backend '%s': "
-                         "use_mixed_precision: %s)", get_backend(), use_mixed_precision)
-            return False
-
        if not use_mixed_precision:
-            policy = mixedprecision.Policy('float32')
-            mixedprecision.set_global_policy(policy)
+            policy = mixedprecision.Policy('float32')  # pylint:disable=no-member
+            mixedprecision.set_global_policy(policy)  # pylint:disable=no-member
            logger.debug("Disabling mixed precision. (Compute dtype: %s, variable_dtype: %s)",
                         policy.compute_dtype, policy.variable_dtype)
            return False

-        policy = mixedprecision.Policy('mixed_float16')
-        mixedprecision.set_global_policy(policy)
+        policy = mixedprecision.Policy('mixed_float16')  # pylint:disable=no-member
+        mixedprecision.set_global_policy(policy)  # pylint:disable=no-member
        logger.debug("Enabled mixed precision. (Compute dtype: %s, variable_dtype: %s)",
                     policy.compute_dtype, policy.variable_dtype)
        return True

    def _get_strategy(self,
                      strategy: Literal["default", "central-storage", "mirrored"]
-                      ) -> Optional[tf.distribute.Strategy]:
+                      ) -> T.Optional[tf.distribute.Strategy]:
        """ If we are running on Nvidia backend and the strategy is not ``None`` then return
        the correct tensorflow distribution strategy, otherwise return ``None``.

@ -595,7 +565,7 @@ class Settings():

        return tf.distribute.experimental.CentralStorageStrategy(parameter_device="/cpu:0")

-    def _get_mixed_precision_layers(self, layers: List[dict]) -> List[str]:
+    def _get_mixed_precision_layers(self, layers: T.List[dict]) -> T.List[str]:
        """ Obtain the names of the layers in a mixed precision model that have their dtype policy
        explicitly set to mixed-float16.

@ -625,7 +595,7 @@ class Settings():
                logger.debug("Skipping unsupported layer: %s %s", layer["name"], dtype)
        return retval

-    def _switch_precision(self, layers: List[dict], compatible: List[str]) -> None:
+    def _switch_precision(self, layers: T.List[dict], compatible: T.List[str]) -> None:
        """ Switch a model's datatype between mixed-float16 and float32.

        Parameters
@ -636,7 +606,7 @@ class Settings():
            A list of layer names that are compatible to have their datatype switched
        """
        dtype = "mixed_float16" if self.use_mixed_precision else "float32"
-        policy = dict(class_name="Policy", config=dict(name=dtype))
+        policy = {"class_name": "Policy", "config": {"name": dtype}}

        for layer in layers:
            config = layer["config"]
@ -654,9 +624,9 @@ class Settings():
            config["dtype"] = policy

    def get_mixed_precision_layers(self,
-                                   build_func: Callable[[List[keras.layers.Layer]],
-                                                        keras.models.Model],
-                                   inputs: List[keras.layers.Layer]) -> List[str]:
+                                   build_func: T.Callable[[T.List[tf.keras.layers.Layer]],
+                                                          tf.keras.models.Model],
+                                   inputs: T.List[tf.keras.layers.Layer]) -> T.List[str]:
        """ Get and store the mixed precision layers from a full precision enabled model.

        Parameters
@ -674,9 +644,6 @@ class Settings():
        """
        logger.info("Storing Mixed Precision compatible layers. Please ignore any following "
                    "warnings about using mixed precision.")
-        if get_backend() == "amd":
-            logger.debug("Mixed Precision not supported for AMD. Returning empty list")
-            return []
        self._set_keras_mixed_precision(True)
        model = build_func(inputs)
        layers = self._get_mixed_precision_layers(model.get_config()["layers"])
@ -685,8 +652,8 @@ class Settings():
        return layers

    def check_model_precision(self,
-                              model: keras.models.Model,
-                              state: "State") -> keras.models.Model:
+                              model: tf.keras.models.Model,
+                              state: "State") -> tf.keras.models.Model:
        """ Check the model's precision.

        If this is a new model, then
@ -709,9 +676,6 @@ class Settings():
        :class:`keras.models.Model`
            The original model with the datatype updated
        """
-        if get_backend() == "amd":  # Mixed precision not supported on amd
-            return model
-
        if self.use_mixed_precision and not state.mixed_precision_layers:
            # Switching to mixed precision on a model which was started in FP32 prior to the
            # ability to switch between precisions on a saved model is not supported as we
@ -735,7 +699,7 @@ class Settings():
        del model
        return new_model

-    def strategy_scope(self) -> ContextManager:
+    def strategy_scope(self) -> T.ContextManager:
        """ Return the strategy scope if we have set a strategy, otherwise return a null
        context.

--- a/plugins/train/model/dfaker.py
+++ b/plugins/train/model/dfaker.py
@ -4,18 +4,13 @@
 import logging
 import sys

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.initializers import RandomNormal  # pylint:disable=import-error
+from tensorflow.keras.layers import Input, LeakyReLU  # pylint:disable=import-error
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error
+
 from lib.model.nn_blocks import Conv2DOutput, UpscaleBlock, ResidualBlock
-from lib.utils import get_backend
-from .original import Model as OriginalModel, KerasModel
-
-if get_backend() == "amd":
-    from keras.initializers import RandomNormal  # pylint:disable=no-name-in-module
-    from keras.layers import Input, LeakyReLU
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.initializers import RandomNormal  # noqa pylint:disable=import-error,no-name-in-module
-    from tensorflow.keras.layers import Input, LeakyReLU  # noqa pylint:disable=import-error,no-name-in-module
-
+from .original import Model as OriginalModel

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

@ -64,4 +59,4 @@ class Model(OriginalModel):
            var_y = UpscaleBlock(64, activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name=f"mask_out_{side}")(var_y)
            outputs.append(var_y)
-        return KerasModel([input_], outputs=outputs, name=f"decoder_{side}")
+        return KModel([input_], outputs=outputs, name=f"decoder_{side}")
--- a/plugins/train/model/dfl_h128.py
+++ b/plugins/train/model/dfl_h128.py
@ -3,15 +3,12 @@
    Based on https://github.com/iperov/DeepFaceLab
 """

-from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock
-from lib.utils import get_backend
-from .original import Model as OriginalModel, KerasModel
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import Dense, Flatten, Input, Reshape  # noqa:E501  # pylint:disable=import-error
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error

-if get_backend() == "amd":
-    from keras.layers import Dense, Flatten, Input, Reshape
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import Dense, Flatten, Input, Reshape  # noqa pylint:disable=import-error,no-name-in-module
+from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock
+from .original import Model as OriginalModel


 class Model(OriginalModel):
@ -32,7 +29,7 @@ class Model(OriginalModel):
        var_x = Dense(8 * 8 * self.encoder_dim)(var_x)
        var_x = Reshape((8, 8, self.encoder_dim))(var_x)
        var_x = UpscaleBlock(self.encoder_dim, activation="leakyrelu")(var_x)
-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def decoder(self, side):
        """ DFL H128 Decoder """
@ -51,4 +48,4 @@ class Model(OriginalModel):
            var_y = UpscaleBlock(self.encoder_dim // 4, activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name=f"mask_out_{side}")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name=f"decoder_{side}")
+        return KModel(input_, outputs=outputs, name=f"decoder_{side}")
--- a/plugins/train/model/dfl_sae.py
+++ b/plugins/train/model/dfl_sae.py
@ -3,18 +3,16 @@
    Based on https://github.com/iperov/DeepFaceLab
 """
 import logging
+
 import numpy as np

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import Concatenate, Dense, Flatten, Input, LeakyReLU, Reshape  # noqa:E501  # pylint:disable=import-error
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error
+
 from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock
-from lib.utils import get_backend

-from ._base import ModelBase, KerasModel
-
-if get_backend() == "amd":
-    from keras.layers import Concatenate, Dense, Flatten, Input, LeakyReLU, Reshape
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import Concatenate, Dense, Flatten, Input, LeakyReLU, Reshape  # noqa pylint:disable=import-error,no-name-in-module
+from ._base import ModelBase

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

@ -75,9 +73,7 @@ class Model(ModelBase):
        else:
            outputs = [self.decoder("a", enc_output_shape)(encoder_a),
                       self.decoder("b", enc_output_shape)(encoder_b)]
-        autoencoder = KerasModel(inputs,
-                                 outputs,
-                                 name=self.model_name)
+        autoencoder = KModel(inputs, outputs, name=self.model_name)
        return autoencoder

    def encoder_df(self):
@ -93,7 +89,7 @@ class Model(ModelBase):
        var_x = Dense(lowest_dense_res * lowest_dense_res * self.ae_dims)(var_x)
        var_x = Reshape((lowest_dense_res, lowest_dense_res, self.ae_dims))(var_x)
        var_x = UpscaleBlock(self.ae_dims, activation="leakyrelu")(var_x)
-        return KerasModel(input_, var_x, name="encoder_df")
+        return KModel(input_, var_x, name="encoder_df")

    def encoder_liae(self):
        """ DFL SAE LIAE Encoder Network """
@ -104,7 +100,7 @@ class Model(ModelBase):
        var_x = Conv2DBlock(dims * 4, activation="leakyrelu")(var_x)
        var_x = Conv2DBlock(dims * 8, activation="leakyrelu")(var_x)
        var_x = Flatten()(var_x)
-        return KerasModel(input_, var_x, name="encoder_liae")
+        return KModel(input_, var_x, name="encoder_liae")

    def inter_liae(self, side, input_shape):
        """ DFL SAE LIAE Intermediate Network """
@ -115,7 +111,7 @@ class Model(ModelBase):
        var_x = Dense(lowest_dense_res * lowest_dense_res * self.ae_dims * 2)(var_x)
        var_x = Reshape((lowest_dense_res, lowest_dense_res, self.ae_dims * 2))(var_x)
        var_x = UpscaleBlock(self.ae_dims * 2, activation="leakyrelu")(var_x)
-        return KerasModel(input_, var_x, name=f"intermediate_{side}")
+        return KModel(input_, var_x, name=f"intermediate_{side}")

    def decoder(self, side, input_shape):
        """ DFL SAE Decoder Network"""
@ -153,15 +149,15 @@ class Model(ModelBase):
            var_y = UpscaleBlock(self.decoder_dim * 2, activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name=f"mask_out_{side}")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name=f"decoder_{side}")
+        return KModel(input_, outputs=outputs, name=f"decoder_{side}")

    def _legacy_mapping(self):
        """ The mapping of legacy separate model names to single model names """
-        mappings = dict(df={f"{self.name}_encoder.h5": "encoder_df",
-                            f"{self.name}_decoder_A.h5": "decoder_a",
-                            f"{self.name}_decoder_B.h5": "decoder_b"},
-                        liae={f"{self.name}_encoder.h5": "encoder_liae",
-                              f"{self.name}_intermediate_B.h5": "intermediate_both",
-                              f"{self.name}_intermediate.h5": "intermediate_b",
-                              f"{self.name}_decoder.h5": "decoder_both"})
+        mappings = {"df": {f"{self.name}_encoder.h5": "encoder_df",
+                           f"{self.name}_decoder_A.h5": "decoder_a",
+                           f"{self.name}_decoder_B.h5": "decoder_b"},
+                    "liae": {f"{self.name}_encoder.h5": "encoder_liae",
+                             f"{self.name}_intermediate_B.h5": "intermediate_both",
+                             f"{self.name}_intermediate.h5": "intermediate_b",
+                             f"{self.name}_decoder.h5": "decoder_both"}}
        return mappings[self.config["architecture"]]
--- a/plugins/train/model/dlight.py
+++ b/plugins/train/model/dlight.py
@ -9,21 +9,18 @@
    """
 import logging

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    AveragePooling2D, BatchNormalization, Concatenate, Dense, Dropout, Flatten, Input, Reshape,
+    LeakyReLU, UpSampling2D)
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error
+
 from lib.model.nn_blocks import (Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock,
                                 Upscale2xBlock)
-from lib.utils import FaceswapError, get_backend
+from lib.utils import FaceswapError

-from ._base import ModelBase, KerasModel
+from ._base import ModelBase

-if get_backend() == "amd":
-    from keras.layers import (
-        AveragePooling2D, BatchNormalization, Concatenate, Dense, Dropout, Flatten, Input, Reshape,
-        LeakyReLU, UpSampling2D)
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import (  # pylint:disable=import-error,no-name-in-module
-        AveragePooling2D, BatchNormalization, Concatenate, Dense, Dropout, Flatten, Input, Reshape,
-        LeakyReLU, UpSampling2D)

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

@ -35,21 +32,22 @@ class Model(ModelBase):
        super().__init__(*args, **kwargs)
        self.input_shape = (128, 128, 3)

-        self.features = dict(lowmem=0, fair=1, best=2)[self.config["features"]]
+        self.features = {"lowmem": 0, "fair": 1, "best": 2}[self.config["features"]]
        self.encoder_filters = 64 if self.features > 0 else 48

        bonum_fortunam = 128
        self.encoder_dim = {0: 512 + bonum_fortunam,
                            1: 1024 + bonum_fortunam,
                            2: 1536 + bonum_fortunam}[self.features]
-        self.details = dict(fast=0, good=1)[self.config["details"]]
+        self.details = {"fast": 0, "good": 1}[self.config["details"]]
        try:
            self.upscale_ratio = {128: 2,
                                  256: 4,
                                  384: 6}[self.config["output_size"]]
-        except KeyError:
+        except KeyError as err:
            logger.error("Config error: output_size must be one of: 128, 256, or 384.")
-            raise FaceswapError("Config error: output_size must be one of: 128, 256, or 384.")
+            raise FaceswapError("Config error: output_size must be one of: "
+                                "128, 256, or 384.") from err

        logger.debug("output_size: %s, features: %s, encoder_filters: %s, encoder_dim: %s, "
                     " details: %s, upscale_ratio: %s", self.config["output_size"], self.features,
@ -65,7 +63,7 @@ class Model(ModelBase):

        outputs = [self.decoder_a()(encoder_a), decoder_b()(encoder_b)]

-        autoencoder = KerasModel(inputs, outputs, name=self.model_name)
+        autoencoder = KModel(inputs, outputs, name=self.model_name)
        return autoencoder

    def encoder(self):
@ -104,7 +102,7 @@ class Model(ModelBase):
        var_x = Dropout(0.05)(var_x)
        var_x = Reshape((4, 4, 1024))(var_x)

-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def decoder_a(self):
        """ DeLight Decoder A(old face) Network """
@ -136,7 +134,7 @@ class Model(ModelBase):

            outputs.append(var_y)

-        return KerasModel([input_], outputs=outputs, name="decoder_a")
+        return KModel([input_], outputs=outputs, name="decoder_a")

    def decoder_b_fast(self):
        """ DeLight Fast Decoder B(new face) Network  """
@ -171,7 +169,7 @@ class Model(ModelBase):

            outputs.append(var_y)

-        return KerasModel([input_], outputs=outputs, name="decoder_b_fast")
+        return KModel([input_], outputs=outputs, name="decoder_b_fast")

    def decoder_b(self):
        """ DeLight Decoder B(new face) Network  """
@ -223,7 +221,7 @@ class Model(ModelBase):

            outputs.append(var_y)

-        return KerasModel([input_], outputs=outputs, name="decoder_b")
+        return KModel([input_], outputs=outputs, name="decoder_b")

    def _legacy_mapping(self):
        """ The mapping of legacy separate model names to single model names """
--- a/plugins/train/model/iae.py
+++ b/plugins/train/model/iae.py
@ -1,17 +1,13 @@
 #!/usr/bin/env python3
 """ Improved autoencoder for faceswap """

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import Concatenate, Dense, Flatten, Input, Reshape  # noqa:E501  # pylint:disable=import-error
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error
+
 from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock
-from lib.utils import get_backend

-from ._base import ModelBase, KerasModel
-
-if get_backend() == "amd":
-    from keras.layers import Concatenate, Dense, Flatten, Input, Reshape
-
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import Concatenate, Dense, Flatten, Input, Reshape  # noqa pylint:disable=import-error,no-name-in-module
+from ._base import ModelBase


 class Model(ModelBase):
@ -35,7 +31,7 @@ class Model(ModelBase):
        outputs = [decoder(Concatenate()([inter_a(encoder_a), inter_both(encoder_a)])),
                   decoder(Concatenate()([inter_b(encoder_b), inter_both(encoder_b)]))]

-        autoencoder = KerasModel(inputs, outputs, name=self.model_name)
+        autoencoder = KModel(inputs, outputs, name=self.model_name)
        return autoencoder

    def encoder(self):
@ -47,7 +43,7 @@ class Model(ModelBase):
        var_x = Conv2DBlock(512, activation="leakyrelu")(var_x)
        var_x = Conv2DBlock(1024, activation="leakyrelu")(var_x)
        var_x = Flatten()(var_x)
-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def intermediate(self, side):
        """ Intermediate Network """
@ -55,7 +51,7 @@ class Model(ModelBase):
        var_x = Dense(self.encoder_dim)(input_)
        var_x = Dense(4 * 4 * int(self.encoder_dim/2))(var_x)
        var_x = Reshape((4, 4, int(self.encoder_dim/2)))(var_x)
-        return KerasModel(input_, var_x, name=f"inter_{side}")
+        return KModel(input_, var_x, name=f"inter_{side}")

    def decoder(self):
        """ Decoder Network """
@ -76,7 +72,7 @@ class Model(ModelBase):
            var_y = UpscaleBlock(64, activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name="mask_out")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name="decoder")
+        return KModel(input_, outputs=outputs, name="decoder")

    def _legacy_mapping(self):
        """ The mapping of legacy separate model names to single model names """
--- a/plugins/train/model/lightweight.py
+++ b/plugins/train/model/lightweight.py
@ -4,8 +4,10 @@
    Based on the original https://www.reddit.com/r/deepfakes/
    code sample + contributions """

+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error
+
 from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock
-from .original import Model as OriginalModel, KerasModel, Dense, Flatten, Input, Reshape
+from .original import Model as OriginalModel, Dense, Flatten, Input, Reshape


 class Model(OriginalModel):
@ -25,7 +27,7 @@ class Model(OriginalModel):
        var_x = Dense(4 * 4 * 512)(var_x)
        var_x = Reshape((4, 4, 512))(var_x)
        var_x = UpscaleBlock(256, activation="leakyrelu")(var_x)
-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def decoder(self, side):
        """ Decoder Network """
@ -46,4 +48,4 @@ class Model(OriginalModel):
                                 activation="sigmoid",
                                 name=f"mask_out_{side}")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name=f"decoder_{side}")
+        return KModel(input_, outputs=outputs, name=f"decoder_{side}")
--- a/plugins/train/model/original.py
+++ b/plugins/train/model/original.py
@ -6,15 +6,12 @@ This model is heavily documented as it acts as a template that other model plugi
 from.
 """

-from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock
-from lib.utils import get_backend
-from ._base import KerasModel, ModelBase
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import Dense, Flatten, Reshape, Input  # noqa:E501  # pylint:disable=import-error
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error

-if get_backend() == "amd":
-    from keras.layers import Dense, Flatten, Reshape, Input
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.layers import Dense, Flatten, Reshape, Input  # noqa pylint:disable=import-error,no-name-in-module
+from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock
+from ._base import ModelBase


 class Model(ModelBase):
@ -66,12 +63,6 @@ class Model(ModelBase):
        2 Decoders are then defined (one for each side) with the encoder instances passed in as
        input to the corresponding decoders.

-        It is important to note that any models and sub-models should not call
-        :class:`keras.models.Model` directly, but rather call
-        :class:`plugins.train.model._base.KerasModel`. This acts as a wrapper for Keras' Model
-        class, but handles some minor differences which need to be handled between Nvidia and AMD
-        backends.
-
        The final output of the model should always call :class:`lib.model.nn_blocks.Conv2DOutput`
        so that the correct data type is set for the final activation, to support Mixed Precision
        Training. Failure to do so is likely to lead to issues when Mixed Precision is enabled.
@ -85,8 +76,7 @@ class Model(ModelBase):
        Returns
        -------
        :class:`keras.models.Model`
-            The output of this function must be a keras model generated from
-            :class:`plugins.train.model._base.KerasModel`. See Keras documentation for the correct
+            See Keras documentation for the correct
            structure, but note that parameter :attr:`name` is a required rather than an optional
            argument in Faceswap. You should assign this to the attribute ``self.name`` that is
            automatically generated from the plugin's filename.
@ -100,7 +90,7 @@ class Model(ModelBase):

        outputs = [self.decoder("a")(encoder_a), self.decoder("b")(encoder_b)]

-        autoencoder = KerasModel(inputs, outputs, name=self.model_name)
+        autoencoder = KModel(inputs, outputs, name=self.model_name)
        return autoencoder

    def encoder(self):
@ -127,7 +117,7 @@ class Model(ModelBase):
        var_x = Dense(4 * 4 * 1024)(var_x)
        var_x = Reshape((4, 4, 1024))(var_x)
        var_x = UpscaleBlock(512, activation="leakyrelu")(var_x)
-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def decoder(self, side):
        """ The original Faceswap Decoder Network.
@ -160,7 +150,7 @@ class Model(ModelBase):
            var_y = UpscaleBlock(64, activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name=f"mask_out_{side}")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name=f"decoder_{side}")
+        return KModel(input_, outputs=outputs, name=f"decoder_{side}")

    def _legacy_mapping(self):
        """ The mapping of legacy separate model names to single model names """
--- a/plugins/train/model/phaze_a.py
+++ b/plugins/train/model/phaze_a.py
@ -2,54 +2,44 @@
 """ Phaze-A Model by TorzDF with thanks to BirbFakes and the myriad of testers. """

 # pylint: disable=too-many-lines
+from __future__ import annotations
 import logging
 import sys
+import typing as T
 from dataclasses import dataclass

-from typing import Dict, List, Optional, Tuple, Union
-
 import numpy as np
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.layers import LayerNormalization  # pylint:disable=import-error
+from tensorflow.keras import applications as kapp, backend as K  # noqa:E501  # pylint:disable=import-error
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    Add, BatchNormalization, Concatenate, Dense, Dropout, Flatten, GaussianNoise, MaxPool2D,
+    GlobalAveragePooling2D, GlobalMaxPooling2D, Input, LeakyReLU, Reshape, UpSampling2D,
+    Conv2D as KConv2D)
+from tensorflow.keras.models import clone_model, Model as KModel  # noqa:E501  # pylint:disable=import-error

 from lib.model.nn_blocks import (
    Conv2D, Conv2DBlock, Conv2DOutput, ResidualBlock, UpscaleBlock, Upscale2xBlock,
    UpscaleResizeImagesBlock, UpscaleDNYBlock)
 from lib.model.normalization import (
-    AdaInstanceNormalization, GroupNormalization, InstanceNormalization, LayerNormalization,
-    RMSNormalization)
-from lib.utils import get_backend, get_tf_version, FaceswapError
+    AdaInstanceNormalization, GroupNormalization, InstanceNormalization, RMSNormalization)
+from lib.utils import get_tf_version, FaceswapError

-from ._base import KerasModel, ModelBase, get_all_sub_models
+from ._base import ModelBase, get_all_sub_models

 if sys.version_info < (3, 8):
    from typing_extensions import Literal
 else:
    from typing import Literal

-logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
-
-if get_backend() == "amd":
-    from keras import applications as kapp, backend as K
-    from keras.layers import (
-        Add, BatchNormalization, Concatenate, Dense, Dropout, Flatten, GaussianNoise, MaxPool2D,
-        GlobalAveragePooling2D, GlobalMaxPooling2D, Input, LeakyReLU, Reshape, UpSampling2D,
-        Conv2D as KConv2D)
-    from keras.models import clone_model
-    # typing checks
-    import keras
-    from plaidml.tile import Value as Tensor  # pylint:disable=import-error
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import applications as kapp, backend as K  # pylint:disable=import-error
-    from tensorflow.keras.layers import (  # pylint:disable=import-error,no-name-in-module
-        Add, BatchNormalization, Concatenate, Dense, Dropout, Flatten, GaussianNoise, MaxPool2D,
-        GlobalAveragePooling2D, GlobalMaxPooling2D, Input, LeakyReLU, Reshape, UpSampling2D,
-        Conv2D as KConv2D)
-    from tensorflow.keras.models import clone_model  # noqa pylint:disable=import-error,no-name-in-module
-    # typing checks
+if T.TYPE_CHECKING:
    from tensorflow import keras
    from tensorflow import Tensor


+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+
@dataclass
 class _EncoderInfo:
    """ Contains model configuration options for various Phaze-A Encoders.
@ -61,9 +51,6 @@ class _EncoderInfo:
        exist in Keras Applications
    default_size: int
        The default input size of the encoder
-    no_amd: bool, optional
-        ``True`` if the encoder is not compatible with the PlaidML backend otherwise ``False``.
-        Default: ``False``
    tf_min: float, optional
        The lowest version of Tensorflow that the encoder can be used for. Default: `2.0`
    scaling: tuple, optional
@ -78,104 +65,86 @@ class _EncoderInfo:
    """
    keras_name: str
    default_size: int
-    no_amd: bool = False
-    tf_min: Tuple[int, int] = (2, 0)
-    scaling: Tuple[int, int] = (0, 1)
+    tf_min: T.Tuple[int, int] = (2, 0)
+    scaling: T.Tuple[int, int] = (0, 1)
    min_size: int = 32
    enforce_for_weights: bool = False
    color_order: Literal["bgr", "rgb"] = "rgb"


-_MODEL_MAPPING: Dict[str, _EncoderInfo] = dict(
-    densenet121=_EncoderInfo(
+_MODEL_MAPPING: T.Dict[str, _EncoderInfo] = {
+    "densenet121": _EncoderInfo(
        keras_name="DenseNet121", default_size=224),
-    densenet169=_EncoderInfo(
+    "densenet169": _EncoderInfo(
        keras_name="DenseNet169", default_size=224),
-    densenet201=_EncoderInfo(
+    "densenet201": _EncoderInfo(
        keras_name="DenseNet201", default_size=224),
-    efficientnet_b0=_EncoderInfo(
-        keras_name="EfficientNetB0",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=224),
-    efficientnet_b1=_EncoderInfo(
-        keras_name="EfficientNetB1",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=240),
-    efficientnet_b2=_EncoderInfo(
-        keras_name="EfficientNetB2",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=260),
-    efficientnet_b3=_EncoderInfo(
-        keras_name="EfficientNetB3",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=300),
-    efficientnet_b4=_EncoderInfo(
-        keras_name="EfficientNetB4",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=380),
-    efficientnet_b5=_EncoderInfo(
-        keras_name="EfficientNetB5",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=456),
-    efficientnet_b6=_EncoderInfo(
-        keras_name="EfficientNetB6",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=528),
-    efficientnet_b7=_EncoderInfo(
-        keras_name="EfficientNetB7",
-        no_amd=True, tf_min=(2, 3), scaling=(0, 255), default_size=600),
-    efficientnet_v2_b0=_EncoderInfo(
-        keras_name="EfficientNetV2B0",
-        no_amd=True, tf_min=(2, 8), scaling=(-1, 1), default_size=224),
-    efficientnet_v2_b1=_EncoderInfo(
-        keras_name="EfficientNetV2B1",
-        no_amd=True, tf_min=(2, 8), scaling=(-1, 1), default_size=240),
-    efficientnet_v2_b2=_EncoderInfo(
-        keras_name="EfficientNetV2B2",
-        no_amd=True, tf_min=(2, 8), scaling=(-1, 1), default_size=260),
-    efficientnet_v2_b3=_EncoderInfo(
-        keras_name="EfficientNetV2B3",
-        no_amd=True, tf_min=(2, 8), scaling=(-1, 1), default_size=300),
-    efficientnet_v2_s=_EncoderInfo(
-        keras_name="EfficientNetV2S",
-        no_amd=True, tf_min=(2, 8), scaling=(-1, 1), default_size=384),
-    efficientnet_v2_m=_EncoderInfo(
-        keras_name="EfficientNetV2M",
-        no_amd=True, tf_min=(2, 8), scaling=(-1, 1), default_size=480),
-    efficientnet_v2_l=_EncoderInfo(
-        keras_name="EfficientNetV2L",
-        no_amd=True, tf_min=(2, 8), scaling=(-1, 1), default_size=480),
-    inception_resnet_v2=_EncoderInfo(
+    "efficientnet_b0": _EncoderInfo(
+        keras_name="EfficientNetB0", tf_min=(2, 3), scaling=(0, 255), default_size=224),
+    "efficientnet_b1": _EncoderInfo(
+        keras_name="EfficientNetB1", tf_min=(2, 3), scaling=(0, 255), default_size=240),
+    "efficientnet_b2": _EncoderInfo(
+        keras_name="EfficientNetB2", tf_min=(2, 3), scaling=(0, 255), default_size=260),
+    "efficientnet_b3": _EncoderInfo(
+        keras_name="EfficientNetB3", tf_min=(2, 3), scaling=(0, 255), default_size=300),
+    "efficientnet_b4": _EncoderInfo(
+        keras_name="EfficientNetB4", tf_min=(2, 3), scaling=(0, 255), default_size=380),
+    "efficientnet_b5": _EncoderInfo(
+        keras_name="EfficientNetB5", tf_min=(2, 3), scaling=(0, 255), default_size=456),
+    "efficientnet_b6": _EncoderInfo(
+        keras_name="EfficientNetB6", tf_min=(2, 3), scaling=(0, 255), default_size=528),
+    "efficientnet_b7": _EncoderInfo(
+        keras_name="EfficientNetB7", tf_min=(2, 3), scaling=(0, 255), default_size=600),
+    "efficientnet_v2_b0": _EncoderInfo(
+        keras_name="EfficientNetV2B0", tf_min=(2, 8), scaling=(-1, 1), default_size=224),
+    "efficientnet_v2_b1": _EncoderInfo(
+        keras_name="EfficientNetV2B1", tf_min=(2, 8), scaling=(-1, 1), default_size=240),
+    "efficientnet_v2_b2": _EncoderInfo(
+        keras_name="EfficientNetV2B2", tf_min=(2, 8), scaling=(-1, 1), default_size=260),
+    "efficientnet_v2_b3": _EncoderInfo(
+        keras_name="EfficientNetV2B3", tf_min=(2, 8), scaling=(-1, 1), default_size=300),
+    "efficientnet_v2_s": _EncoderInfo(
+        keras_name="EfficientNetV2S", tf_min=(2, 8), scaling=(-1, 1), default_size=384),
+    "efficientnet_v2_m": _EncoderInfo(
+        keras_name="EfficientNetV2M", tf_min=(2, 8), scaling=(-1, 1), default_size=480),
+    "efficientnet_v2_l": _EncoderInfo(
+        keras_name="EfficientNetV2L", tf_min=(2, 8), scaling=(-1, 1), default_size=480),
+    "inception_resnet_v2": _EncoderInfo(
        keras_name="InceptionResNetV2", scaling=(-1, 1), min_size=75, default_size=299),
-    inception_v3=_EncoderInfo(
+    "inception_v3": _EncoderInfo(
        keras_name="InceptionV3", scaling=(-1, 1), min_size=75, default_size=299),
-    mobilenet=_EncoderInfo(
+    "mobilenet": _EncoderInfo(
        keras_name="MobileNet", scaling=(-1, 1), default_size=224),
-    mobilenet_v2=_EncoderInfo(
+    "mobilenet_v2": _EncoderInfo(
        keras_name="MobileNetV2", scaling=(-1, 1), default_size=224),
-    mobilenet_v3_large=_EncoderInfo(
-        keras_name="MobileNetV3Large",
-        no_amd=True, tf_min=(2, 4), scaling=(-1, 1), default_size=224),
-    mobilenet_v3_small=_EncoderInfo(
-        keras_name="MobileNetV3Small",
-        no_amd=True, tf_min=(2, 4), scaling=(-1, 1), default_size=224),
-    nasnet_large=_EncoderInfo(
+    "mobilenet_v3_large": _EncoderInfo(
+        keras_name="MobileNetV3Large", tf_min=(2, 4), scaling=(-1, 1), default_size=224),
+    "mobilenet_v3_small": _EncoderInfo(
+        keras_name="MobileNetV3Small", tf_min=(2, 4), scaling=(-1, 1), default_size=224),
+    "nasnet_large": _EncoderInfo(
        keras_name="NASNetLarge", scaling=(-1, 1), default_size=331, enforce_for_weights=True),
-    nasnet_mobile=_EncoderInfo(
+    "nasnet_mobile": _EncoderInfo(
        keras_name="NASNetMobile", scaling=(-1, 1), default_size=224, enforce_for_weights=True),
-    resnet50=_EncoderInfo(
+    "resnet50": _EncoderInfo(
        keras_name="ResNet50", scaling=(-1, 1), min_size=32, default_size=224),
-    resnet50_v2=_EncoderInfo(
-        keras_name="ResNet50V2", no_amd=True, scaling=(-1, 1), default_size=224),
-    resnet101=_EncoderInfo(
-        keras_name="ResNet101", no_amd=True, scaling=(-1, 1), default_size=224),
-    resnet101_v2=_EncoderInfo(
-        keras_name="ResNet101V2", no_amd=True, scaling=(-1, 1), default_size=224),
-    resnet152=_EncoderInfo(
-        keras_name="ResNet152", no_amd=True, scaling=(-1, 1), default_size=224),
-    resnet152_v2=_EncoderInfo(
-        keras_name="ResNet152V2", no_amd=True, scaling=(-1, 1), default_size=224),
-    vgg16=_EncoderInfo(
+    "resnet50_v2": _EncoderInfo(
+        keras_name="ResNet50V2", scaling=(-1, 1), default_size=224),
+    "resnet101": _EncoderInfo(
+        keras_name="ResNet101", scaling=(-1, 1), default_size=224),
+    "resnet101_v2": _EncoderInfo(
+        keras_name="ResNet101V2", scaling=(-1, 1), default_size=224),
+    "resnet152": _EncoderInfo(
+        keras_name="ResNet152", scaling=(-1, 1), default_size=224),
+    "resnet152_v2": _EncoderInfo(
+        keras_name="ResNet152V2", scaling=(-1, 1), default_size=224),
+    "vgg16": _EncoderInfo(
        keras_name="VGG16", color_order="bgr", scaling=(0, 255), default_size=224),
-    vgg19=_EncoderInfo(
+    "vgg19": _EncoderInfo(
        keras_name="VGG19", color_order="bgr", scaling=(0, 255), default_size=224),
-    xception=_EncoderInfo(
+    "xception": _EncoderInfo(
        keras_name="Xception", scaling=(-1, 1), min_size=71, default_size=299),
-    fs_original=_EncoderInfo(
-        keras_name="", color_order="bgr", min_size=32, default_size=1024))
+    "fs_original": _EncoderInfo(
+        keras_name="", color_order="bgr", min_size=32, default_size=1024)}


 class Model(ModelBase):
@ -239,8 +208,8 @@ class Model(ModelBase):
        :class:`keras.models.Model`
            The loaded Keras Model with the dropout rates updated
        """
-        dropouts = dict(fc=self.config["fc_dropout"],
-                        gblock=self.config["fc_gblock_dropout"])
+        dropouts = {"fc": self.config["fc_dropout"],
+                    "gblock": self.config["fc_gblock_dropout"]}
        logger.debug("Config dropouts: %s", dropouts)
        updated = False
        for mod in get_all_sub_models(model):
@ -269,7 +238,7 @@ class Model(ModelBase):
            model = new_model
        return model

-    def _select_freeze_layers(self) -> List[str]:
+    def _select_freeze_layers(self) -> T.List[str]:
        """ Process the selected frozen layers and replace the `keras_encoder` option with the
        actual keras model name

@ -293,7 +262,7 @@ class Model(ModelBase):
            logger.debug("Removing 'keras_encoder' for '%s'", arch)
        return retval

-    def _get_input_shape(self) -> Tuple[int, int, int]:
+    def _get_input_shape(self) -> T.Tuple[int, int, int]:
        """ Obtain the input shape for the model.

        Input shape is calculated from the selected Encoder's input size, scaled to the user
@ -340,19 +309,14 @@ class Model(ModelBase):
            raise FaceswapError(f"'{arch}' is not a valid choice for encoder architecture. Choose "
                                f"one of {list(_MODEL_MAPPING.keys())}.")

-        if get_backend() == "amd" and model.no_amd:
-            valid = [k for k, v in _MODEL_MAPPING.items() if not v.no_amd]
-            raise FaceswapError(f"'{arch}' is not compatible with the AMD backend. Choose one of "
-                                f"{valid}.")
-
        tf_ver = get_tf_version()
        tf_min = model.tf_min
-        if get_backend() != "amd" and tf_ver < tf_min:
+        if tf_ver < tf_min:
            raise FaceswapError(f"{arch}' is not compatible with your version of Tensorflow. The "
                                f"minimum version required is {tf_min} whilst you have version "
                                f"{tf_ver} installed.")

-    def build_model(self, inputs: List[Tensor]) -> keras.models.Model:
+    def build_model(self, inputs: T.List[Tensor]) -> keras.models.Model:
        """ Create the model's structure.

        Parameters
@ -364,11 +328,7 @@ class Model(ModelBase):
        Returns
        -------
        :class:`keras.models.Model`
-            The output of this function must be a keras model generated from
-            :class:`plugins.train.model._base.KerasModel`. See Keras documentation for the correct
-            structure, but note that parameter :attr:`name` is a required rather than an optional
-            argument in Faceswap. You should assign this to the attribute ``self.name`` that is
-            automatically generated from the plugin's filename.
+            The generated model
        """
        # Create sub-Models
        encoders = self._build_encoders(inputs)
@ -378,10 +338,10 @@ class Model(ModelBase):

        # Create Autoencoder
        outputs = [decoders["a"], decoders["b"]]
-        autoencoder = KerasModel(inputs, outputs, name=self.model_name)
+        autoencoder = KModel(inputs, outputs, name=self.model_name)
        return autoencoder

-    def _build_encoders(self, inputs: List[Tensor]) -> Dict[str, keras.models.Model]:
+    def _build_encoders(self, inputs: T.List[Tensor]) -> T.Dict[str, keras.models.Model]:
        """ Build the encoders for Phaze-A

        Parameters
@ -396,13 +356,13 @@ class Model(ModelBase):
            side as key ('a' or 'b'), encoder for side as value
        """
        encoder = Encoder(self.input_shape, self.config)()
-        retval = dict(a=encoder(inputs[0]), b=encoder(inputs[1]))
+        retval = {"a": encoder(inputs[0]), "b": encoder(inputs[1])}
        logger.debug("Encoders: %s", retval)
        return retval

    def _build_fully_connected(
            self,
-            inputs: Dict[str, keras.models.Model]) -> Dict[str, List[keras.models.Model]]:
+            inputs: T.Dict[str, keras.models.Model]) -> T.Dict[str, T.List[keras.models.Model]]:
        """ Build the fully connected layers for Phaze-A

        Parameters
@ -441,14 +401,14 @@ class Model(ModelBase):
            inter_a.append(fc_gblock(inputs["a"]))
            inter_b.append(fc_gblock(inputs["b"]))

-        retval = dict(a=inter_a, b=inter_b)
+        retval = {"a": inter_a, "b": inter_b}
        logger.debug("Fully Connected: %s", retval)
        return retval

    def _build_g_blocks(
                self,
-                inputs: Dict[str, List[keras.models.Model]]
-            ) -> Dict[str, Union[List[keras.models.Model], keras.models.Model]]:
+                inputs: T.Dict[str, T.List[keras.models.Model]]
+            ) -> T.Dict[str, T.Union[T.List[keras.models.Model], keras.models.Model]]:
        """ Build the g-block layers for Phaze-A.

        If a g-block has not been selected for this model, then the original `inters` models are
@ -471,19 +431,19 @@ class Model(ModelBase):

        input_shapes = [K.int_shape(inter)[1:] for inter in inputs["a"]]
        if self.config["split_gblock"]:
-            retval = dict(a=GBlock("a", input_shapes, self.config)()(inputs["a"]),
-                          b=GBlock("b", input_shapes, self.config)()(inputs["b"]))
+            retval = {"a": GBlock("a", input_shapes, self.config)()(inputs["a"]),
+                      "b": GBlock("b", input_shapes, self.config)()(inputs["b"])}
        else:
            g_block = GBlock("both", input_shapes, self.config)()
-            retval = dict(a=g_block((inputs["a"])), b=g_block((inputs["b"])))
+            retval = {"a": g_block((inputs["a"])), "b": g_block((inputs["b"]))}

        logger.debug("G-Blocks: %s", retval)
        return retval

    def _build_decoders(
            self,
-            inputs: Dict[str, Union[List[keras.models.Model], keras.models.Model]]
-            ) -> Dict[str, keras.models.Model]:
+            inputs: T.Dict[str, T.Union[T.List[keras.models.Model], keras.models.Model]]
+            ) -> T.Dict[str, keras.models.Model]:
        """ Build the encoders for Phaze-A

        Parameters
@ -511,11 +471,11 @@ class Model(ModelBase):
        input_shape = K.int_shape(input_)[1:]

        if self.config["split_decoders"]:
-            retval = dict(a=Decoder("a", input_shape, self.config)()(inputs["a"]),
-                          b=Decoder("b", input_shape, self.config)()(inputs["b"]))
+            retval = {"a": Decoder("a", input_shape, self.config)()(inputs["a"]),
+                      "b": Decoder("b", input_shape, self.config)()(inputs["b"])}
        else:
            decoder = Decoder("both", input_shape, self.config)()
-            retval = dict(a=decoder(inputs["a"]), b=decoder(inputs["b"]))
+            retval = {"a": decoder(inputs["a"]), "b": decoder(inputs["b"])}

        logger.debug("Decoders: %s", retval)
        return retval
@ -540,12 +500,12 @@ def _bottleneck(inputs: Tensor, bottleneck: str, size: int, normalization: str)
    tensor
        The output from the bottleneck
    """
-    norms = dict(layer=LayerNormalization,
-                 rms=RMSNormalization,
-                 instance=InstanceNormalization)
-    bottlenecks = dict(average_pooling=GlobalAveragePooling2D(),
-                       dense=Dense(size),
-                       max_pooling=GlobalMaxPooling2D())
+    norms = {"layer": LayerNormalization,
+             "rms": RMSNormalization,
+             "instance": InstanceNormalization}
+    bottlenecks = {"average_pooling": GlobalAveragePooling2D(),
+                   "dense": Dense(size),
+                   "max_pooling": GlobalMaxPooling2D()}
    var_x = inputs
    if normalization:
        var_x = norms[normalization]()(var_x)
@ -562,9 +522,9 @@ def _bottleneck(inputs: Tensor, bottleneck: str, size: int, normalization: str)
 def _get_upscale_layer(method: Literal["resize_images", "subpixel", "upscale_dny", "upscale_fast",
                                       "upscale_hybrid", "upsample2d"],
                       filters: int,
-                       activation: Optional[str] = None,
-                       upsamples: Optional[int] = None,
-                       interpolation: Optional[str] = None) -> keras.layers.Layer:
+                       activation: T.Optional[str] = None,
+                       upsamples: T.Optional[int] = None,
+                       interpolation: T.Optional[str] = None) -> keras.layers.Layer:
    """ Obtain an instance of the requested upscale method.

    Parameters
@ -590,7 +550,7 @@ def _get_upscale_layer(method: Literal["resize_images", "subpixel", "upscale_dny
        The selected configured upscale layer
    """
    if method == "upsample2d":
-        kwargs: Dict[str, Union[str, int]] = {}
+        kwargs: T.Dict[str, T.Union[str, int]] = {}
        if upsamples:
            kwargs["size"] = upsamples
        if interpolation:
@ -611,7 +571,7 @@ def _get_curve(start_y: int,
               end_y: int,
               num_points: int,
               scale: float,
-               mode: Literal["full", "cap_max", "cap_min"] = "full") -> List[int]:
+               mode: Literal["full", "cap_max", "cap_min"] = "full") -> T.List[int]:
    """ Obtain a curve.

    For the given start and end y values, return the y co-ordinates of a curve for the given
@ -700,24 +660,24 @@ class Encoder():  # pylint:disable=too-few-public-methods
    config: dict
        The model configuration options
    """
-    def __init__(self, input_shape: Tuple[int, ...], config: dict) -> None:
+    def __init__(self, input_shape: T.Tuple[int, ...], config: dict) -> None:
        self.input_shape = input_shape
        self._config = config
        self._input_shape = input_shape

    @property
-    def _model_kwargs(self) -> Dict[str, Dict[str, Union[str, bool]]]:
+    def _model_kwargs(self) -> T.Dict[str, T.Dict[str, T.Union[str, bool]]]:
        """ dict: Configuration option for architecture mapped to optional kwargs. """
-        return dict(mobilenet=dict(alpha=self._config["mobilenet_width"],
-                                   depth_multiplier=self._config["mobilenet_depth"],
-                                   dropout=self._config["mobilenet_dropout"]),
-                    mobilenet_v2=dict(alpha=self._config["mobilenet_width"]),
-                    mobilenet_v3=dict(alpha=self._config["mobilenet_width"],
-                                      minimalist=self._config["mobilenet_minimalistic"],
-                                      include_preprocessing=False))
+        return {"mobilenet": {"alpha": self._config["mobilenet_width"],
+                              "depth_multiplier": self._config["mobilenet_depth"],
+                              "dropout": self._config["mobilenet_dropout"]},
+                "mobilenet_v2": {"alpha": self._config["mobilenet_width"]},
+                "mobilenet_v3": {"alpha": self._config["mobilenet_width"],
+                                 "minimalist": self._config["mobilenet_minimalistic"],
+                                 "include_preprocessing": False}}

    @property
-    def _selected_model(self) -> Tuple[_EncoderInfo, dict]:
+    def _selected_model(self) -> T.Tuple[_EncoderInfo, dict]:
        """ tuple(dict, :class:`_EncoderInfo`): The selected encoder model and it's associated
        keyword arguments """
        arch = self._config["enc_architecture"]
@ -772,7 +732,7 @@ class Encoder():  # pylint:disable=too-few-public-methods
                                self._config["bottleneck_size"],
                                self._config["bottleneck_norm"])

-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def _get_encoder_model(self) -> keras.models.Model:
        """ Return the model defined by the selected architecture.
@ -1007,7 +967,7 @@ class FullyConnected():  # pylint:disable=too-few-public-methods
                                      self._config,
                                      layer_indicies=(0, num_upscales))(var_x)

-        return KerasModel(input_, var_x, name=f"fc_{self._side}")
+        return KModel(input_, var_x, name=f"fc_{self._side}")


 class UpscaleBlocks():  # pylint: disable=too-few-public-methods
@ -1032,12 +992,12 @@ class UpscaleBlocks():  # pylint: disable=too-few-public-methods
        and the Decoder. ``None`` will generate the full Upscale chain. An end index of -1 will
        generate the layers from the starting index to the final upscale. Default: ``None``
    """
-    _filters: List[int] = []
+    _filters: T.List[int] = []

    def __init__(self,
                 side: Literal["a", "b", "both", "shared"],
                 config: dict,
-                 layer_indicies: Optional[Tuple[int, int]] = None) -> None:
+                 layer_indicies: T.Optional[T.Tuple[int, int]] = None) -> None:
        logger.debug("Initializing: %s (side: %s, layer_indicies: %s)",
                     self.__class__.__name__, side, layer_indicies)
        self._side = side
@ -1134,11 +1094,11 @@ class UpscaleBlocks():  # pylint: disable=too-few-public-methods
        """
        if not self._config["dec_norm"]:
            return inputs
-        norms = dict(batch=BatchNormalization,
-                     group=GroupNormalization,
-                     instance=InstanceNormalization,
-                     layer=LayerNormalization,
-                     rms=RMSNormalization)
+        norms = {"batch": BatchNormalization,
+                 "group": GroupNormalization,
+                 "instance": InstanceNormalization,
+                 "layer": LayerNormalization,
+                 "rms": RMSNormalization}
        return norms[self._config["dec_norm"]]()(inputs)

    def _dny_entry(self, inputs: Tensor) -> Tensor:
@ -1166,7 +1126,7 @@ class UpscaleBlocks():  # pylint: disable=too-few-public-methods
                            relu_alpha=0.2)(var_x)
        return var_x

-    def __call__(self, inputs: Union[Tensor, List[Tensor]]) -> Union[Tensor, List[Tensor]]:
+    def __call__(self, inputs: T.Union[Tensor, T.List[Tensor]]) -> T.Union[Tensor, T.List[Tensor]]:
        """ Upscale Network.

        Parameters
@ -1244,7 +1204,7 @@ class GBlock():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 side: Literal["a", "b", "both"],
-                 input_shapes: Union[list, tuple],
+                 input_shapes: T.Union[list, tuple],
                 config: dict) -> None:
        logger.debug("Initializing: %s (side: %s, input_shapes: %s)",
                     self.__class__.__name__, side, input_shapes)
@ -1308,7 +1268,7 @@ class GBlock():  # pylint:disable=too-few-public-methods
        var_x = Conv2D(g_filts, 3, strides=1, padding="same")(var_x)
        var_x = GaussianNoise(1.0)(var_x)
        var_x = self._g_block(var_x, style, g_filts)
-        return KerasModel(self._inputs, var_x, name=f"g_block_{self._side}")
+        return KModel(self._inputs, var_x, name=f"g_block_{self._side}")


 class Decoder():  # pylint:disable=too-few-public-methods
@ -1325,7 +1285,7 @@ class Decoder():  # pylint:disable=too-few-public-methods
    """
    def __init__(self,
                 side: Literal["a", "b", "both"],
-                 input_shape: Tuple[int, int, int],
+                 input_shape: T.Tuple[int, int, int],
                 config: dict) -> None:
        logger.debug("Initializing: %s (side: %s, input_shape: %s)",
                     self.__class__.__name__, side, input_shape)
@ -1366,4 +1326,4 @@ class Decoder():  # pylint:disable=too-few-public-methods
                                        self._config["dec_output_kernel"],
                                        name="mask_out")(var_y))

-        return KerasModel(inputs, outputs=outputs, name=f"decoder_{self._side}")
+        return KModel(inputs, outputs=outputs, name=f"decoder_{self._side}")
--- a/plugins/train/model/phaze_a_defaults.py
+++ b/plugins/train/model/phaze_a_defaults.py
--- a/plugins/train/model/realface.py
+++ b/plugins/train/model/realface.py
@ -10,17 +10,13 @@
 import logging
 import sys

-from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock
-from lib.utils import get_backend
-from ._base import ModelBase, KerasModel
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.initializers import RandomNormal  # pylint:disable=import-error
+from tensorflow.keras.layers import Dense, Flatten, Input, LeakyReLU, Reshape  # noqa:E501  # pylint:disable=import-error
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error

-if get_backend() == "amd":
-    from keras.initializers import RandomNormal  # pylint:disable=no-name-in-module
-    from keras.layers import Dense, Flatten, Input, LeakyReLU, Reshape
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.initializers import RandomNormal  # noqa pylint:disable=import-error,no-name-in-module
-    from tensorflow.keras.layers import Dense, Flatten, Input, LeakyReLU, Reshape  # noqa pylint:disable=import-error,no-name-in-module
+from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock
+from ._base import ModelBase

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

@ -80,7 +76,7 @@ class Model(ModelBase):

        outputs = [self.decoder_a()(encoder_a), self.decoder_b()(encoder_b)]

-        autoencoder = KerasModel(inputs, outputs, name=self.model_name)
+        autoencoder = KModel(inputs, outputs, name=self.model_name)
        return autoencoder

    def encoder(self):
@ -98,7 +94,7 @@ class Model(ModelBase):

        var_x = Conv2DBlock(encoder_complexity * 2**(idx + 1), activation="leakyrelu")(var_x)

-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def decoder_b(self):
        """ RealFace Decoder Network """
@ -142,7 +138,7 @@ class Model(ModelBase):

            outputs += [var_y]

-        return KerasModel(input_, outputs=outputs, name="decoder_b")
+        return KModel(input_, outputs=outputs, name="decoder_b")

    def decoder_a(self):
        """ RealFace Decoder (A) Network """
@ -187,7 +183,7 @@ class Model(ModelBase):

            outputs += [var_y]

-        return KerasModel(input_, outputs=outputs, name="decoder_a")
+        return KModel(input_, outputs=outputs, name="decoder_a")

    def _legacy_mapping(self):
        """ The mapping of legacy separate model names to single model names """
--- a/plugins/train/model/unbalanced.py
+++ b/plugins/train/model/unbalanced.py
@ -3,17 +3,14 @@
    Based on the original https://www.reddit.com/r/deepfakes/
        code sample + contributions """

-from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock
-from lib.utils import get_backend
-from ._base import ModelBase, KerasModel
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.initializers import RandomNormal  # pylint:disable=import-error
+from tensorflow.keras.layers import (  # pylint:disable=import-error
+    Dense, Flatten, Input, LeakyReLU, Reshape, SpatialDropout2D)
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error

-if get_backend() == "amd":
-    from keras.initializers import RandomNormal  # pylint:disable=no-name-in-module
-    from keras.layers import Dense, Flatten, Input, LeakyReLU, Reshape, SpatialDropout2D
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.initializers import RandomNormal  # noqa pylint:disable=import-error,no-name-in-module
-    from tensorflow.keras.layers import Dense, Flatten, Input, LeakyReLU, Reshape, SpatialDropout2D  # noqa pylint:disable=import-error,no-name-in-module
+from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock
+from ._base import ModelBase


 class Model(ModelBase):
@ -33,12 +30,12 @@ class Model(ModelBase):

        outputs = [self.decoder_a()(encoder_a), self.decoder_b()(encoder_b)]

-        autoencoder = KerasModel(inputs, outputs, name=self.model_name)
+        autoencoder = KModel(inputs, outputs, name=self.model_name)
        return autoencoder

    def encoder(self):
        """ Unbalanced Encoder """
-        kwargs = dict(kernel_initializer=self.kernel_initializer)
+        kwargs = {"kernel_initializer": self.kernel_initializer}
        encoder_complexity = 128 if self.low_mem else self.config["complexity_encoder"]
        dense_dim = 384 if self.low_mem else 512
        dense_shape = self.input_shape[0] // 16
@ -61,11 +58,11 @@ class Model(ModelBase):
        var_x = Dense(dense_shape * dense_shape * dense_dim,
                      kernel_initializer=self.kernel_initializer)(var_x)
        var_x = Reshape((dense_shape, dense_shape, dense_dim))(var_x)
-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def decoder_a(self):
        """ Decoder for side A """
-        kwargs = dict(kernel_size=5, kernel_initializer=self.kernel_initializer)
+        kwargs = {"kernel_size": 5, "kernel_initializer": self.kernel_initializer}
        decoder_complexity = 320 if self.low_mem else self.config["complexity_decoder_a"]
        dense_dim = 384 if self.low_mem else 512
        decoder_shape = self.input_shape[0] // 16
@ -93,11 +90,11 @@ class Model(ModelBase):
            var_y = UpscaleBlock(decoder_complexity // 4, activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name="mask_out_a")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name="decoder_a")
+        return KModel(input_, outputs=outputs, name="decoder_a")

    def decoder_b(self):
        """ Decoder for side B """
-        kwargs = dict(kernel_size=5, kernel_initializer=self.kernel_initializer)
+        kwargs = {"kernel_size": 5, "kernel_initializer": self.kernel_initializer}
        decoder_complexity = 384 if self.low_mem else self.config["complexity_decoder_b"]
        dense_dim = 384 if self.low_mem else 512
        decoder_shape = self.input_shape[0] // 16
@ -137,7 +134,7 @@ class Model(ModelBase):
                var_y = UpscaleBlock(decoder_complexity // 8, activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name="mask_out_b")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name="decoder_b")
+        return KModel(input_, outputs=outputs, name="decoder_b")

    def _legacy_mapping(self):
        """ The mapping of legacy separate model names to single model names """
--- a/plugins/train/model/villain.py
+++ b/plugins/train/model/villain.py
@ -3,20 +3,16 @@
    Based on the original https://www.reddit.com/r/deepfakes/ code sample + contributions
    Adapted from a model by VillainGuy (https://github.com/VillainGuy) """

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras.initializers import RandomNormal  # pylint:disable=import-error
+from tensorflow.keras.layers import add, Dense, Flatten, Input, LeakyReLU, Reshape  # noqa:E501  # pylint:disable=import-error
+from tensorflow.keras.models import Model as KModel  # pylint:disable=import-error
+
 from lib.model.layers import PixelShuffler
 from lib.model.nn_blocks import (Conv2DOutput, Conv2DBlock, ResidualBlock, SeparableConv2DBlock,
                                 UpscaleBlock)
-from lib.utils import get_backend

-from .original import Model as OriginalModel, KerasModel
-
-if get_backend() == "amd":
-    from keras.initializers import RandomNormal  # pylint:disable=no-name-in-module
-    from keras.layers import add, Dense, Flatten, Input, LeakyReLU, Reshape
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras.initializers import RandomNormal  # noqa pylint:disable=import-error,no-name-in-module
-    from tensorflow.keras.layers import add, Dense, Flatten, Input, LeakyReLU, Reshape  # noqa pylint:disable=import-error,no-name-in-module
+from .original import Model as OriginalModel


 class Model(OriginalModel):
@ -29,7 +25,7 @@ class Model(OriginalModel):

    def encoder(self):
        """ Encoder Network """
-        kwargs = dict(kernel_initializer=self.kernel_initializer)
+        kwargs = {"kernel_initializer": self.kernel_initializer}
        input_ = Input(shape=self.input_shape)
        in_conv_filters = self.input_shape[0]
        if self.input_shape[0] > 128:
@ -61,11 +57,11 @@ class Model(OriginalModel):
        var_x = Dense(dense_shape * dense_shape * 1024, **kwargs)(var_x)
        var_x = Reshape((dense_shape, dense_shape, 1024))(var_x)
        var_x = UpscaleBlock(512, activation="leakyrelu", **kwargs)(var_x)
-        return KerasModel(input_, var_x, name="encoder")
+        return KModel(input_, var_x, name="encoder")

    def decoder(self, side):
        """ Decoder Network """
-        kwargs = dict(kernel_initializer=self.kernel_initializer)
+        kwargs = {"kernel_initializer": self.kernel_initializer}
        decoder_shape = self.input_shape[0] // 8
        input_ = Input(shape=(decoder_shape, decoder_shape, 512))

@ -89,4 +85,4 @@ class Model(OriginalModel):
            var_y = UpscaleBlock(self.input_shape[0], activation="leakyrelu")(var_y)
            var_y = Conv2DOutput(1, 5, name=f"mask_out_{side}")(var_y)
            outputs.append(var_y)
-        return KerasModel(input_, outputs=outputs, name=f"decoder_{side}")
+        return KModel(input_, outputs=outputs, name=f"decoder_{side}")
--- a/plugins/train/trainer/_base.py
+++ b/plugins/train/trainer/_base.py
@ -6,12 +6,12 @@ At present there is only the :class:`~plugins.train.trainer.original` plugin, so
 inherits from this class. If further plugins are developed, then common code should be kept here,
 with "original" unique code split out to the original plugin.
 """
-
+from __future__ import annotations
 import logging
 import os
 import sys
 import time
-from typing import Callable, cast, Dict, Generator, List, Optional, Tuple, TYPE_CHECKING, Union
+import typing as T

 import cv2
 import numpy as np
@ -23,10 +23,10 @@ from tensorflow.python.framework import (  # pylint:disable=no-name-in-module
 from lib.image import hex_to_rgb
 from lib.training import PreviewDataGenerator, TrainingDataGenerator
 from lib.training.generator import BatchType, DataGenerator
-from lib.utils import FaceswapError, get_backend, get_folder, get_image_paths, get_tf_version
+from lib.utils import FaceswapError, get_folder, get_image_paths, get_tf_version
 from plugins.train._config import Config

-if TYPE_CHECKING:
+if T.TYPE_CHECKING:
    from plugins.train.model._base import ModelBase
    from lib.config import ConfigValueType

@ -39,7 +39,7 @@ logger = logging.getLogger(__name__)  # pylint: disable=invalid-name


 def _get_config(plugin_name: str,
-                configfile: Optional[str] = None) -> Dict[str, "ConfigValueType"]:
+                configfile: T.Optional[str] = None) -> T.Dict[str, ConfigValueType]:
    """ Return the configuration for the requested trainer.

    Parameters
@ -79,10 +79,10 @@ class TrainerBase():
    """

    def __init__(self,
-                 model: "ModelBase",
-                 images: Dict[Literal["a", "b"], List[str]],
+                 model: ModelBase,
+                 images: T.Dict[Literal["a", "b"], T.List[str]],
                 batch_size: int,
-                 configfile: Optional[str]) -> None:
+                 configfile: T.Optional[str]) -> None:
        logger.debug("Initializing %s: (model: '%s', batch_size: %s)",
                     self.__class__.__name__, model, batch_size)
        self._model = model
@ -97,21 +97,21 @@ class TrainerBase():
        self._tensorboard = self._set_tensorboard()
        self._samples = _Samples(self._model,
                                 self._model.coverage_ratio,
-                                 cast(int, self._config["mask_opacity"]),
-                                 cast(str, self._config["mask_color"]))
+                                 T.cast(int, self._config["mask_opacity"]),
+                                 T.cast(str, self._config["mask_color"]))

        num_images = self._config.get("preview_images", 14)
        assert isinstance(num_images, int)
        self._timelapse = _Timelapse(self._model,
                                     self._model.coverage_ratio,
                                     num_images,
-                                     cast(int, self._config["mask_opacity"]),
-                                     cast(str, self._config["mask_color"]),
+                                     T.cast(int, self._config["mask_opacity"]),
+                                     T.cast(str, self._config["mask_color"]),
                                     self._feeder,
                                     self._images)
        logger.debug("Initialized %s", self.__class__.__name__)

-    def _get_config(self, configfile: Optional[str]) -> Dict[str, "ConfigValueType"]:
+    def _get_config(self, configfile: T.Optional[str]) -> T.Dict[str, ConfigValueType]:
        """ Get the saved training config options. Override any global settings with the setting
        provided from the model's saved config.

@ -157,7 +157,7 @@ class TrainerBase():
                               f"session_{self._model.state.session_id}")
        tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                     histogram_freq=0,  # Must be 0 or hangs
-                                                     write_graph=get_backend() != "amd",
+                                                     write_graph=True,
                                                     write_images=False,
                                                     update_freq="batch",
                                                     profile_batch=0,
@ -173,10 +173,10 @@ class TrainerBase():
        self._samples.toggle_mask_display()

    def train_one_step(self,
-                       viewer: Optional[Callable[[np.ndarray, str], None]],
-                       timelapse_kwargs: Optional[Dict[Literal["input_a",
-                                                               "input_b",
-                                                               "output"], str]]) -> None:
+                       viewer: T.Optional[T.Callable[[np.ndarray, str], None]],
+                       timelapse_kwargs: T.Optional[T.Dict[Literal["input_a",
+                                                                   "input_b",
+                                                                   "output"], str]]) -> None:
        """ Running training on a batch of images for each side.

        Triggered from the training cycle in :class:`scripts.train.Train`.
@ -215,12 +215,9 @@ class TrainerBase():
                       (self._model.iterations - 1) % snapshot_interval == 0)

        model_inputs, model_targets = self._feeder.get_batch()
-        if get_backend() == "amd":  # Expand out AMD inputs + targets
-            model_inputs = [inp for side in model_inputs for inp in side]  # type: ignore
-            model_targets = [tgt for side in model_targets for tgt in side]  # type: ignore

        try:
-            loss: List[float] = self._model.model.train_on_batch(model_inputs, y=model_targets)
+            loss: T.List[float] = self._model.model.train_on_batch(model_inputs, y=model_targets)
        except tf_errors.ResourceExhaustedError as err:
            msg = ("You do not have enough GPU memory available to train the selected model at "
                   "the selected settings. You can try a number of things:"
@ -232,23 +229,6 @@ class TrainerBase():
                   "\n4) Use a more lightweight model, or select the model's 'LowMem' option "
                   "(in config) if it has one.")
            raise FaceswapError(msg) from err
-        except Exception as err:
-            if get_backend() == "amd":
-                # pylint:disable=import-outside-toplevel
-                from lib.plaidml_utils import is_plaidml_error
-                if (is_plaidml_error(err) and (
-                        "CL_MEM_OBJECT_ALLOCATION_FAILURE" in str(err).upper() or
-                        "enough memory for the current schedule" in str(err).lower())):
-                    msg = ("You do not have enough GPU memory available to train the selected "
-                           "model at the selected settings. You can try a number of things:"
-                           "\n1) Close any other application that is using your GPU (web browsers "
-                           "are particularly bad for this)."
-                           "\n2) Lower the batchsize (the amount of images fed into the model "
-                           "each iteration)."
-                           "\n3) Use a more lightweight model, or select the model's 'LowMem' "
-                           "option (in config) if it has one.")
-                    raise FaceswapError(msg) from err
-            raise
        self._log_tensorboard(loss)
        loss = self._collate_and_store_loss(loss[1:])
        self._print_loss(loss)
@ -256,7 +236,7 @@ class TrainerBase():
            self._model.snapshot()
        self._update_viewers(viewer, timelapse_kwargs)

-    def _log_tensorboard(self, loss: List[float]) -> None:
+    def _log_tensorboard(self, loss: T.List[float]) -> None:
        """ Log current loss to Tensorboard log files

        Parameters
@ -282,7 +262,7 @@ class TrainerBase():
        else:
            self._tensorboard.on_train_batch_end(self._model.iterations, logs=logs)

-    def _collate_and_store_loss(self, loss: List[float]) -> List[float]:
+    def _collate_and_store_loss(self, loss: T.List[float]) -> T.List[float]:
        """ Collate the loss into totals for each side.

        The losses are summed into a total for each side. Loss totals are added to
@ -318,7 +298,7 @@ class TrainerBase():
        logger.trace("original loss: %s, combined_loss: %s", loss, combined_loss)  # type: ignore
        return combined_loss

-    def _print_loss(self, loss: List[float]) -> None:
+    def _print_loss(self, loss: T.List[float]) -> None:
        """ Outputs the loss for the current iteration to the console.

        Parameters
@ -338,10 +318,10 @@ class TrainerBase():
                           "line: %s, error: %s", output, str(err))

    def _update_viewers(self,
-                        viewer: Optional[Callable[[np.ndarray, str], None]],
-                        timelapse_kwargs: Optional[Dict[Literal["input_a",
-                                                                "input_b",
-                                                                "output"], str]]) -> None:
+                        viewer: T.Optional[T.Callable[[np.ndarray, str], None]],
+                        timelapse_kwargs: T.Optional[T.Dict[Literal["input_a",
+                                                                    "input_b",
+                                                                    "output"], str]]) -> None:
        """ Update the preview viewer and timelapse output

        Parameters
@ -391,10 +371,10 @@ class _Feeder():
        The configuration for this trainer
    """
    def __init__(self,
-                 images: Dict[Literal["a", "b"], List[str]],
-                 model: 'ModelBase',
+                 images: T.Dict[Literal["a", "b"], T.List[str]],
+                 model: ModelBase,
                 batch_size: int,
-                 config: Dict[str, "ConfigValueType"]) -> None:
+                 config: T.Dict[str, ConfigValueType]) -> None:
        logger.debug("Initializing %s: num_images: %s, batch_size: %s, config: %s)",
                     self.__class__.__name__, {k: len(v) for k, v in images.items()}, batch_size,
                     config)
@ -405,14 +385,14 @@ class _Feeder():
        self._feeds = {side: self._load_generator(side, False).minibatch_ab()
                       for side in get_args(Literal["a", "b"])}

-        self._display_feeds = dict(preview=self._set_preview_feed(), timelapse={})
+        self._display_feeds = {"preview": self._set_preview_feed(), "timelapse": {}}
        logger.debug("Initialized %s:", self.__class__.__name__)

    def _load_generator(self,
                        side: Literal["a", "b"],
                        is_display: bool,
-                        batch_size: Optional[int] = None,
-                        images: Optional[List[str]] = None) -> DataGenerator:
+                        batch_size: T.Optional[int] = None,
+                        images: T.Optional[T.List[str]] = None) -> DataGenerator:
        """ Load the :class:`~lib.training_data.TrainingDataGenerator` for this feeder.

        Parameters
@ -444,7 +424,7 @@ class _Feeder():
                           self._batch_size if batch_size is None else batch_size)
        return retval

-    def _set_preview_feed(self) -> Dict[Literal["a", "b"], Generator[BatchType, None, None]]:
+    def _set_preview_feed(self) -> T.Dict[Literal["a", "b"], T.Generator[BatchType, None, None]]:
        """ Set the preview feed for this feeder.

        Creates a generator from :class:`lib.training_data.PreviewDataGenerator` specifically
@ -456,7 +436,7 @@ class _Feeder():
            The side ("a" or "b") as key, :class:`~lib.training_data.PreviewDataGenerator` as
            value.
        """
-        retval: Dict[Literal["a", "b"], Generator[BatchType, None, None]] = {}
+        retval: T.Dict[Literal["a", "b"], T.Generator[BatchType, None, None]] = {}
        num_images = self._config.get("preview_images", 14)
        assert isinstance(num_images, int)
        for side in get_args(Literal["a", "b"]):
@ -468,7 +448,7 @@ class _Feeder():
                                                batch_size=batchsize).minibatch_ab()
        return retval

-    def get_batch(self) -> Tuple[List[List[np.ndarray]], ...]:
+    def get_batch(self) -> T.Tuple[T.List[T.List[np.ndarray]], ...]:
        """ Get the feed data and the targets for each training side for feeding into the model's
        train function.

@ -479,8 +459,8 @@ class _Feeder():
        model_targets: list
            The targets for the model for each side A and B
        """
-        model_inputs: List[List[np.ndarray]] = []
-        model_targets: List[List[np.ndarray]] = []
+        model_inputs: T.List[T.List[np.ndarray]] = []
+        model_targets: T.List[T.List[np.ndarray]] = []
        for side in ("a", "b"):
            side_feed, side_targets = next(self._feeds[side])
            if self._model.config["learn_mask"]:  # Add the face mask as it's own target
@ -492,8 +472,8 @@ class _Feeder():

        return model_inputs, model_targets

-    def generate_preview(self,
-                         is_timelapse: bool = False) -> Dict[Literal["a", "b"], List[np.ndarray]]:
+    def generate_preview(self, is_timelapse: bool = False
+                         ) -> T.Dict[Literal["a", "b"], T.List[np.ndarray]]:
        """ Generate the images for preview window or timelapse

        Parameters
@ -510,14 +490,14 @@ class _Feeder():
        """
        logger.debug("Generating preview (is_timelapse: %s)", is_timelapse)

-        batchsizes: List[int] = []
-        feed: Dict[Literal["a", "b"], np.ndarray] = {}
-        samples: Dict[Literal["a", "b"], np.ndarray] = {}
-        masks: Dict[Literal["a", "b"], np.ndarray] = {}
+        batchsizes: T.List[int] = []
+        feed: T.Dict[Literal["a", "b"], np.ndarray] = {}
+        samples: T.Dict[Literal["a", "b"], np.ndarray] = {}
+        masks: T.Dict[Literal["a", "b"], np.ndarray] = {}

        # MyPy can't recurse into nested dicts to get the type :(
-        iterator = cast(Dict[Literal["a", "b"], Generator[BatchType, None, None]],
-                        self._display_feeds["timelapse" if is_timelapse else "preview"])
+        iterator = T.cast(T.Dict[Literal["a", "b"], T.Generator[BatchType, None, None]],
+                          self._display_feeds["timelapse" if is_timelapse else "preview"])
        for side in get_args(Literal["a", "b"]):
            side_feed, side_samples = next(iterator[side])
            batchsizes.append(len(side_samples[0]))
@ -533,10 +513,10 @@ class _Feeder():

    def compile_sample(self,
                       image_count: int,
-                       feed: Dict[Literal["a", "b"], np.ndarray],
-                       samples: Dict[Literal["a", "b"], np.ndarray],
-                       masks: Dict[Literal["a", "b"], np.ndarray]
-                       ) -> Dict[Literal["a", "b"], List[np.ndarray]]:
+                       feed: T.Dict[Literal["a", "b"], np.ndarray],
+                       samples: T.Dict[Literal["a", "b"], np.ndarray],
+                       masks: T.Dict[Literal["a", "b"], np.ndarray]
+                       ) -> T.Dict[Literal["a", "b"], T.List[np.ndarray]]:
        """ Compile the preview samples for display.

        Parameters
@ -562,7 +542,7 @@ class _Feeder():
        num_images = self._config.get("preview_images", 14)
        assert isinstance(num_images, int)
        num_images = min(image_count, num_images)
-        retval: Dict[Literal["a", "b"], List[np.ndarray]] = {}
+        retval: T.Dict[Literal["a", "b"], T.List[np.ndarray]] = {}
        for side in get_args(Literal["a", "b"]):
            logger.debug("Compiling samples: (side: '%s', samples: %s)", side, num_images)
            retval[side] = [feed[side][0:num_images],
@ -572,7 +552,7 @@ class _Feeder():
        return retval

    def set_timelapse_feed(self,
-                           images: Dict[Literal["a", "b"], List[str]],
+                           images: T.Dict[Literal["a", "b"], T.List[str]],
                           batch_size: int) -> None:
        """ Set the time-lapse feed for this feeder.

@ -590,8 +570,8 @@ class _Feeder():
                     images, batch_size)

        # MyPy can't recurse into nested dicts to get the type :(
-        iterator = cast(Dict[Literal["a", "b"], Generator[BatchType, None, None]],
-                        self._display_feeds["timelapse"])
+        iterator = T.cast(T.Dict[Literal["a", "b"], T.Generator[BatchType, None, None]],
+                          self._display_feeds["timelapse"])

        for side in get_args(Literal["a", "b"]):
            imgs = images[side]
@ -626,7 +606,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
        for generating samples corresponding to each side.
    """
    def __init__(self,
-                 model: "ModelBase",
+                 model: ModelBase,
                 coverage_ratio: float,
                 mask_opacity: int,
                 mask_color: str) -> None:
@ -635,7 +615,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
                     self.__class__.__name__, model, coverage_ratio, mask_opacity, mask_color)
        self._model = model
        self._display_mask = model.config["learn_mask"] or model.config["penalized_mask_loss"]
-        self.images: Dict[Literal["a", "b"], List[np.ndarray]] = {}
+        self.images: T.Dict[Literal["a", "b"], T.List[np.ndarray]] = {}
        self._coverage_ratio = coverage_ratio
        self._mask_opacity = mask_opacity / 100.0
        self._mask_color = np.array(hex_to_rgb(mask_color))[..., 2::-1] / 255.
@ -659,7 +639,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
            A compiled preview image ready for display or saving
        """
        logger.debug("Showing sample")
-        feeds: Dict[Literal["a", "b"], np.ndarray] = {}
+        feeds: T.Dict[Literal["a", "b"], np.ndarray] = {}
        for idx, side in enumerate(get_args(Literal["a", "b"])):
            feed = self.images[side][0]
            input_shape = self._model.model.input_shape[idx][1:]
@ -704,7 +684,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
        logger.debug("Resized sample: (side: '%s' shape: %s)", side, retval.shape)
        return retval

-    def _get_predictions(self, feed_a: np.ndarray, feed_b: np.ndarray) -> Dict[str, np.ndarray]:
+    def _get_predictions(self, feed_a: np.ndarray, feed_b: np.ndarray) -> T.Dict[str, np.ndarray]:
        """ Feed the samples to the model and return predictions

        Parameters
@ -720,16 +700,10 @@ class _Samples():  # pylint:disable=too-few-public-methods
            List of :class:`numpy.ndarray` of predictions received from the model
        """
        logger.debug("Getting Predictions")
-        preds: Dict[str, np.ndarray] = {}
+        preds: T.Dict[str, np.ndarray] = {}
        standard = self._model.model.predict([feed_a, feed_b], verbose=0)
        swapped = self._model.model.predict([feed_b, feed_a], verbose=0)

-        if self._model.config["learn_mask"] and get_backend() == "amd":
-            # Ravel results for plaidml
-            split = len(standard) // 2
-            standard = [standard[:split], standard[split:]]
-            swapped = [swapped[:split], swapped[split:]]
-
        if self._model.config["learn_mask"]:  # Add mask to 4th channel of final output
            standard = [np.concatenate(side[-2:], axis=-1) for side in standard]
            swapped = [np.concatenate(side[-2:], axis=-1) for side in swapped]
@ -745,7 +719,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
        logger.debug("Returning predictions: %s", {key: val.shape for key, val in preds.items()})
        return preds

-    def _compile_preview(self, predictions: Dict[str, np.ndarray]) -> np.ndarray:
+    def _compile_preview(self, predictions: T.Dict[str, np.ndarray]) -> np.ndarray:
        """ Compile predictions and images into the final preview image.

        Parameters
@ -758,8 +732,8 @@ class _Samples():  # pylint:disable=too-few-public-methods
        :class:`numpy.ndarry`
            A compiled preview image ready for display or saving
        """
-        figures: Dict[Literal["a", "b"], np.ndarray] = {}
-        headers: Dict[Literal["a", "b"], np.ndarray] = {}
+        figures: T.Dict[Literal["a", "b"], np.ndarray] = {}
+        headers: T.Dict[Literal["a", "b"], np.ndarray] = {}

        for side, samples in self.images.items():
            other_side = "a" if side == "b" else "b"
@ -788,8 +762,8 @@ class _Samples():  # pylint:disable=too-few-public-methods

    def _to_full_frame(self,
                       side: Literal["a", "b"],
-                       samples: List[np.ndarray],
-                       predictions: List[np.ndarray]) -> List[np.ndarray]:
+                       samples: T.List[np.ndarray],
+                       predictions: T.List[np.ndarray]) -> T.List[np.ndarray]:
        """ Patch targets and prediction images into images of model output size.

        Parameters
@ -832,7 +806,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
                      side: Literal["a", "b"],
                      images: np.ndarray,
                      prediction_size: int,
-                      color: Tuple[float, float, float]) -> np.ndarray:
+                      color: T.Tuple[float, float, float]) -> np.ndarray:
        """ Add a frame overlay to preview images indicating the region of interest.

        This applies the red border that appears in the preview images.
@ -873,7 +847,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
        logger.debug("Overlayed background. Shape: %s", images.shape)
        return images

-    def _compile_masked(self, faces: List[np.ndarray], masks: np.ndarray) -> List[np.ndarray]:
+    def _compile_masked(self, faces: T.List[np.ndarray], masks: np.ndarray) -> T.List[np.ndarray]:
        """ Add the mask to the faces for masked preview.

        Places an opaque red layer over areas of the face that are masked out.
@ -892,7 +866,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
            List of :class:`numpy.ndarray` faces with the opaque mask layer applied
        """
        orig_masks = 1 - np.rint(masks)
-        masks3: Union[List[np.ndarray], np.ndarray] = []
+        masks3: T.Union[T.List[np.ndarray], np.ndarray] = []

        if faces[-1].shape[-1] == 4:  # Mask contained in alpha channel of predictions
            pred_masks = [1 - np.rint(face[..., -1])[..., None] for face in faces[-2:]]
@ -901,7 +875,7 @@ class _Samples():  # pylint:disable=too-few-public-methods
        else:
            masks3 = np.repeat(np.expand_dims(orig_masks, axis=0), 3, axis=0)

-        retval: List[np.ndarray] = []
+        retval: T.List[np.ndarray] = []
        alpha = 1.0 - self._mask_opacity
        for previews, compiled_masks in zip(faces, masks3):
            overlays = previews.copy()
@ -984,8 +958,8 @@ class _Samples():  # pylint:disable=too-few-public-methods

    @classmethod
    def _duplicate_headers(cls,
-                           headers: Dict[Literal["a", "b"], np.ndarray],
-                           columns: int) -> Dict[Literal["a", "b"], np.ndarray]:
+                           headers: T.Dict[Literal["a", "b"], np.ndarray],
+                           columns: int) -> T.Dict[Literal["a", "b"], np.ndarray]:
        """ Duplicate headers for the number of columns displayed for each side.

        Parameters
@ -1028,13 +1002,13 @@ class _Timelapse():  # pylint:disable=too-few-public-methods
        The full paths to the training images for each side of the model
    """
    def __init__(self,
-                 model: "ModelBase",
+                 model: ModelBase,
                 coverage_ratio: float,
                 image_count: int,
                 mask_opacity: int,
                 mask_color: str,
                 feeder: _Feeder,
-                 image_paths: Dict[Literal["a", "b"], List[str]]) -> None:
+                 image_paths: T.Dict[Literal["a", "b"], T.List[str]]) -> None:
        logger.debug("Initializing %s: model: %s, coverage_ratio: %s, image_count: %s, "
                     "mask_opacity: %s, mask_color: %s, feeder: %s, image_paths: %s)",
                     self.__class__.__name__, model, coverage_ratio, image_count, mask_opacity,
@ -1068,7 +1042,7 @@ class _Timelapse():  # pylint:disable=too-few-public-methods
        logger.debug("Time-lapse output set to '%s'", self._output_file)

        # Rewrite paths to pull from the training images so mask and face data can be accessed
-        images: Dict[Literal["a", "b"], List[str]] = {}
+        images: T.Dict[Literal["a", "b"], T.List[str]] = {}
        for side, input_ in zip(get_args(Literal["a", "b"]), (input_a, input_b)):
            training_path = os.path.dirname(self._image_paths[side][0])
            images[side] = [os.path.join(training_path, os.path.basename(pth))
@ -1080,9 +1054,9 @@ class _Timelapse():  # pylint:disable=too-few-public-methods
        self._feeder.set_timelapse_feed(images, batchsize)
        logger.debug("Set up time-lapse")

-    def output_timelapse(self, timelapse_kwargs: Dict[Literal["input_a",
-                                                              "input_b",
-                                                              "output"], str]) -> None:
+    def output_timelapse(self, timelapse_kwargs: T.Dict[Literal["input_a",
+                                                                "input_b",
+                                                                "output"], str]) -> None:
        """ Generate the time-lapse samples and output the created time-lapse to the specified
        output folder.

@ -1094,7 +1068,7 @@ class _Timelapse():  # pylint:disable=too-few-public-methods
        """
        logger.debug("Ouputting time-lapse")
        if not self._output_file:
-            self._setup(**cast(Dict[str, str], timelapse_kwargs))
+            self._setup(**T.cast(T.Dict[str, str], timelapse_kwargs))

        logger.debug("Getting time-lapse samples")
        self._samples.images = self._feeder.generate_preview(is_timelapse=True)
--- a/requirements/requirements_amd.txt
+++ b/requirements/requirements_amd.txt
@ -1,6 +0,0 @@
-r _requirements_base.txt
-# tf2.2 is last version that tensorboard logging works with old Keras
-numpy>=1.18.0,<1.19.0  # TF Will uninstall anything equal or over 1.19.0
-protobuf>= 3.19.0,<3.20.0  # TF has started pulling in incompatible protobuf
-tensorflow>=2.2.0,<2.3.0
-plaidml-keras==0.7.0
--- a/scripts/convert.py
+++ b/scripts/convert.py
@ -1,14 +1,14 @@
 #!/usr/bin python3
 """ Main entry point to the convert process of FaceSwap """
-
+from __future__ import annotations
 from dataclasses import dataclass, field
 import logging
 import re
 import os
 import sys
+import typing as T
 from threading import Event
 from time import sleep
-from typing import Callable, cast, Dict, List, Optional, Tuple, TYPE_CHECKING, Union

 import cv2
 import numpy as np
@ -22,7 +22,7 @@ from lib.gpu_stats import GPUStats
 from lib.image import read_image_meta_batch, ImagesLoader
 from lib.multithreading import MultiThread, total_cpus
 from lib.queue_manager import queue_manager
-from lib.utils import FaceswapError, get_backend, get_folder, get_image_paths
+from lib.utils import FaceswapError, get_folder, get_image_paths
 from plugins.extract.pipeline import Extractor, ExtractMedia
 from plugins.plugin_loader import PluginLoader

@ -31,7 +31,7 @@ if sys.version_info < (3, 8):
 else:
    from typing import get_args, Literal

-if TYPE_CHECKING:
+if T.TYPE_CHECKING:
    from argparse import Namespace
    from plugins.convert.writer._base import Output
    from plugins.train.model._base import ModelBase
@ -61,8 +61,8 @@ class ConvertItem:
        The swapped faces returned from the model's predict function
    """
    inbound: ExtractMedia
-    feed_faces: List[AlignedFace] = field(default_factory=list)
-    reference_faces: List[AlignedFace] = field(default_factory=list)
+    feed_faces: T.List[AlignedFace] = field(default_factory=list)
+    reference_faces: T.List[AlignedFace] = field(default_factory=list)
    swapped_faces: np.ndarray = np.array([])


@ -84,7 +84,7 @@ class Convert():  # pylint:disable=too-few-public-methods
        The arguments to be passed to the convert process as generated from Faceswap's command
        line arguments
    """
-    def __init__(self, arguments: "Namespace") -> None:
+    def __init__(self, arguments: Namespace) -> None:
        logger.debug("Initializing %s: (args: %s)", self.__class__.__name__, arguments)
        self._args = arguments

@ -290,7 +290,7 @@ class DiskIO():
    """

    def __init__(self,
-                 alignments: Alignments, images: ImagesLoader, arguments: "Namespace") -> None:
+                 alignments: Alignments, images: ImagesLoader, arguments: Namespace) -> None:
        logger.debug("Initializing %s: (alignments: %s, images: %s, arguments: %s)",
                     self.__class__.__name__, alignments, images, arguments)
        self._alignments = alignments
@ -307,8 +307,8 @@ class DiskIO():
        # Extractor for on the fly detection
        self._extractor = self._load_extractor()

-        self._queues: Dict[Literal["load", "save"], "EventQueue"] = {}
-        self._threads: Dict[Literal["load", "save"], MultiThread] = {}
+        self._queues: T.Dict[Literal["load", "save"], EventQueue] = {}
+        self._threads: T.Dict[Literal["load", "save"], MultiThread] = {}
        self._init_threads()
        logger.debug("Initialized %s", self.__class__.__name__)

@ -324,13 +324,13 @@ class DiskIO():
        return self._writer.config.get("draw_transparent", False)

    @property
-    def pre_encode(self) -> Optional[Callable[[np.ndarray], List[bytes]]]:
+    def pre_encode(self) -> T.Optional[T.Callable[[np.ndarray], T.List[bytes]]]:
        """ python function: Selected writer's pre-encode function, if it has one,
        otherwise ``None`` """
        dummy = np.zeros((20, 20, 3), dtype="uint8")
        test = self._writer.pre_encode(dummy)
-        retval: Optional[Callable[[np.ndarray],
-                                  List[bytes]]] = None if test is None else self._writer.pre_encode
+        retval: T.Optional[T.Callable[[np.ndarray],
+                           T.List[bytes]]] = None if test is None else self._writer.pre_encode
        logger.debug("Writer pre_encode function: %s", retval)
        return retval

@ -347,7 +347,7 @@ class DiskIO():
        return self._threads["load"]

    @property
-    def load_queue(self) -> "EventQueue":
+    def load_queue(self) -> EventQueue:
        """ :class:`~lib.queue_manager.EventQueue`: The queue that images and detected faces are "
        "loaded into. """
        return self._queues["load"]
@ -363,7 +363,7 @@ class DiskIO():
        return retval

    # Initialization
-    def _get_writer(self) -> "Output":
+    def _get_writer(self) -> Output:
        """ Load the selected writer plugin.

        Returns
@ -384,7 +384,7 @@ class DiskIO():
        return PluginLoader.get_converter("writer", self._args.writer)(*args,
                                                                       configfile=configfile)

-    def _get_frame_ranges(self) -> Optional[List[Tuple[int, int]]]:
+    def _get_frame_ranges(self) -> T.Optional[T.List[T.Tuple[int, int]]]:
        """ Obtain the frame ranges that are to be converted.

        If frame ranges have been specified, then split the command line formatted arguments into
@ -422,7 +422,7 @@ class DiskIO():
        logger.debug("frame ranges: %s", retval)
        return retval

-    def _load_extractor(self) -> Optional[Extractor]:
+    def _load_extractor(self) -> T.Optional[Extractor]:
        """ Load the CV2-DNN Face Extractor Chain.

        For On-The-Fly conversion we use a CPU based extractor to avoid stacking the GPU.
@ -571,7 +571,7 @@ class DiskIO():
        logger.trace("idx: %s, skipframe: %s", idx, skipframe)  # type: ignore
        return skipframe

-    def _get_detected_faces(self, filename: str, image: np.ndarray) -> List[DetectedFace]:
+    def _get_detected_faces(self, filename: str, image: np.ndarray) -> T.List[DetectedFace]:
        """ Return the detected faces for the given image.

        If we have an alignments file, then the detected faces are created from that file. If
@ -597,7 +597,7 @@ class DiskIO():
        logger.trace("Got %s faces for: '%s'", len(detected_faces), filename)  # type:ignore
        return detected_faces

-    def _alignments_faces(self, frame_name: str, image: np.ndarray) -> List[DetectedFace]:
+    def _alignments_faces(self, frame_name: str, image: np.ndarray) -> T.List[DetectedFace]:
        """ Return detected faces from an alignments file.

        Parameters
@ -644,7 +644,7 @@ class DiskIO():
            tqdm.write(f"No alignment found for {frame_name}, skipping")
        return have_alignments

-    def _detect_faces(self, filename: str, image: np.ndarray) -> List[DetectedFace]:
+    def _detect_faces(self, filename: str, image: np.ndarray) -> T.List[DetectedFace]:
        """ Extract the face from a frame for On-The-Fly conversion.

        Pulls detected faces out of the Extraction pipeline.
@ -714,7 +714,7 @@ class Predict():
        The arguments that were passed to the convert process as generated from Faceswap's command
        line arguments
    """
-    def __init__(self, in_queue: "EventQueue", queue_size: int, arguments: "Namespace") -> None:
+    def __init__(self, in_queue: EventQueue, queue_size: int, arguments: Namespace) -> None:
        logger.debug("Initializing %s: (args: %s, queue_size: %s, in_queue: %s)",
                     self.__class__.__name__, arguments, queue_size, in_queue)
        self._args = arguments
@ -740,12 +740,12 @@ class Predict():
        return self._thread

    @property
-    def in_queue(self) -> "EventQueue":
+    def in_queue(self) -> EventQueue:
        """ :class:`~lib.queue_manager.EventQueue`: The input queue to the predictor. """
        return self._in_queue

    @property
-    def out_queue(self) -> "EventQueue":
+    def out_queue(self) -> EventQueue:
        """ :class:`~lib.queue_manager.EventQueue`: The output queue from the predictor. """
        return self._out_queue

@ -765,7 +765,7 @@ class Predict():
        return self._coverage_ratio

    @property
-    def centering(self) -> "CenteringType":
+    def centering(self) -> CenteringType:
        """ str: The centering that the model was trained on (`"head", "face"` or `"legacy"`) """
        return self._centering

@ -779,7 +779,7 @@ class Predict():
        """ int: The size in pixels of the Faceswap model output. """
        return self._sizes["output"]

-    def _get_io_sizes(self) -> Dict[str, int]:
+    def _get_io_sizes(self) -> T.Dict[str, int]:
        """ Obtain the input size and output size of the model.

        Returns
@ -795,7 +795,7 @@ class Predict():
        logger.debug(retval)
        return retval

-    def _load_model(self) -> "ModelBase":
+    def _load_model(self) -> ModelBase:
        """ Load the Faceswap model.

        Returns
@ -896,9 +896,9 @@ class Predict():
        """
        faces_seen = 0
        consecutive_no_faces = 0
-        batch: List[ConvertItem] = []
+        batch: T.List[ConvertItem] = []
        while True:
-            item: Union[Literal["EOF"], ConvertItem] = self._in_queue.get()
+            item: T.Union[Literal["EOF"], ConvertItem] = self._in_queue.get()
            if item == "EOF":
                logger.debug("EOF Received")
                if batch:  # Process out any remaining items
@ -938,7 +938,7 @@ class Predict():
        self._out_queue.put("EOF")
        logger.debug("Load queue complete")

-    def _process_batch(self, batch: List[ConvertItem], faces_seen: int):
+    def _process_batch(self, batch: T.List[ConvertItem], faces_seen: int):
        """ Predict faces on the given batch of images and queue out to patch thread

        Parameters
@ -959,9 +959,6 @@ class Predict():
        if faces_seen != 0:
            feed_faces = self._compile_feed_faces(feed_batch)
            batch_size = None
-            if get_backend() == "amd" and feed_faces.shape[0] != self._batchsize:
-                logger.verbose("Fallback to BS=1")  # type:ignore
-                batch_size = 1
            predicted = self._predict(feed_faces, batch_size)
        else:
            predicted = np.array([])
@ -1004,7 +1001,7 @@ class Predict():
        logger.trace("Loaded aligned faces: '%s'", item.inbound.filename)  # type:ignore

    @staticmethod
-    def _compile_feed_faces(feed_faces: List[AlignedFace]) -> np.ndarray:
+    def _compile_feed_faces(feed_faces: T.List[AlignedFace]) -> np.ndarray:
        """ Compile a batch of faces for feeding into the Predictor.

        Parameters
@ -1018,12 +1015,12 @@ class Predict():
            A batch of faces ready for feeding into the Faceswap model.
        """
        logger.trace("Compiling feed face. Batchsize: %s", len(feed_faces))  # type:ignore
-        retval = np.stack([cast(np.ndarray, feed_face.face)[..., :3]
+        retval = np.stack([T.cast(np.ndarray, feed_face.face)[..., :3]
                           for feed_face in feed_faces]) / 255.0
        logger.trace("Compiled Feed faces. Shape: %s", retval.shape)  # type:ignore
        return retval

-    def _predict(self, feed_faces: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray:
+    def _predict(self, feed_faces: np.ndarray, batch_size: T.Optional[int] = None) -> np.ndarray:
        """ Run the Faceswap models' prediction function.

        Parameters
@ -1048,7 +1045,7 @@ class Predict():
        logger.trace("Input shape(s): %s", [item.shape for item in feed])  # type:ignore

        inbound = self._model.model.predict(feed, verbose=0, batch_size=batch_size)
-        predicted: List[np.ndarray] = inbound if isinstance(inbound, list) else [inbound]
+        predicted: T.List[np.ndarray] = inbound if isinstance(inbound, list) else [inbound]

        if self._model.color_order.lower() == "rgb":
            predicted[0] = predicted[0][..., ::-1]
@ -1065,7 +1062,7 @@ class Predict():
        logger.trace("Final shape: %s", retval.shape)  # type:ignore
        return retval

-    def _queue_out_frames(self, batch: List[ConvertItem], swapped_faces: np.ndarray) -> None:
+    def _queue_out_frames(self, batch: T.List[ConvertItem], swapped_faces: np.ndarray) -> None:
        """ Compile the batch back to original frames and put to the Out Queue.

        For batching, faces are split away from their frames. This compiles all detected faces
@ -1110,8 +1107,8 @@ class OptionalActions():  # pylint:disable=too-few-public-methods
        The alignments file for this conversion
    """
    def __init__(self,
-                 arguments: "Namespace",
-                 input_images: List[np.ndarray],
+                 arguments: Namespace,
+                 input_images: T.List[np.ndarray],
                 alignments: Alignments) -> None:
        logger.debug("Initializing %s", self.__class__.__name__)
        self._args = arguments
@ -1134,7 +1131,7 @@ class OptionalActions():  # pylint:disable=too-few-public-methods
        self._alignments.filter_faces(accept_dict, filter_out=False)
        logger.info("Faces filtered out: %s", pre_face_count - self._alignments.faces_count)

-    def _get_face_metadata(self) -> Dict[str, List[int]]:
+    def _get_face_metadata(self) -> T.Dict[str, T.List[int]]:
        """ Check for the existence of an aligned directory for identifying which faces in the
        target frames should be swapped. If it exists, scan the folder for face's metadata

@ -1143,7 +1140,7 @@ class OptionalActions():  # pylint:disable=too-few-public-methods
        dict
            Dictionary of source frame names with a list of associated face indices to be skipped
        """
-        retval: Dict[str, List[int]] = {}
+        retval: T.Dict[str, T.List[int]] = {}
        input_aligned_dir = self._args.input_aligned_dir

        if input_aligned_dir is None:
--- a/setup.cfg
+++ b/setup.cfg
@ -35,8 +35,6 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 [mypy-PIL.*]
 ignore_missing_imports = True
-[mypy-plaidml.*]
-ignore_missing_imports = True
 [mypy-psutil.*]
 ignore_missing_imports = True
 [mypy-pynvml.*]
--- a/setup.py
+++ b/setup.py
@ -60,13 +60,13 @@ class Environment():
        setup is running. Default: ``False``
    """

-    _backends = (("nvidia", "amd", "apple_silicon", "directml", "rocm", "cpu"))
+    _backends = (("nvidia", "apple_silicon", "directml", "rocm", "cpu"))

    def __init__(self, updater: bool = False) -> None:
        self.updater = updater
        # Flag that setup is being run by installer so steps can be skipped
        self.is_installer: bool = False
-        self.backend: Optional[Literal["nvidia", "amd", "apple_silicon",
+        self.backend: Optional[Literal["nvidia", "apple_silicon",
                                       "directml", "cpu", "rocm"]] = None
        self.enable_docker: bool = False
        self.cuda_cudnn = ["", ""]
@ -130,7 +130,7 @@ class Environment():
                      (hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix))
        else:
            prefix = os.path.dirname(sys.prefix)
-            retval = (os.path.basename(prefix) == "envs")
+            retval = os.path.basename(prefix) == "envs"
        return retval

    def _process_arguments(self) -> None:
@ -185,10 +185,6 @@ class Environment():
            logger.error("Please run this script with Python version 3.7 to 3.9 64bit and try "
                         "again.")
            sys.exit(1)
-        if self.backend == "amd" and sys.version_info >= (3, 9):
-            logger.error("The AMD version of Faceswap cannot be installed on versions of Python "
-                         "higher than 3.8")
-            sys.exit(1)

    def _output_runtime_info(self) -> None:
        """ Output run time info """
@ -240,10 +236,6 @@ class Environment():
        Update the LD_LIBRARY_PATH environment variable when activating a conda environment
        and revert it when deactivating.

-        Windows + AMD + Python 3.8:
-        Add CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1 environment variable to get around a bug which
-        prevents SciPy from loading in this config: https://github.com/scipy/scipy/issues/14002
-
        Notes
        -----
        From Tensorflow 2.7, installing Cuda Toolkit from conda-forge and tensorflow from pip
@ -255,10 +247,8 @@ class Environment():
            return

        linux_update = self.os_version[0].lower() == "linux" and self.backend == "nvidia"
-        windows_update = (self.os_version[0].lower() == "windows" and
-                          self.backend == "amd" and (3, 8) <= sys.version_info < (3, 9))

-        if not linux_update and not windows_update:
+        if not linux_update:
            return

        conda_prefix = os.environ["CONDA_PREFIX"]
@ -267,9 +257,8 @@ class Environment():
        os.makedirs(activate_folder, exist_ok=True)
        os.makedirs(deactivate_folder, exist_ok=True)

-        ext = ".bat" if windows_update else ".sh"
-        activate_script = os.path.join(conda_prefix, activate_folder, f"env_vars{ext}")
-        deactivate_script = os.path.join(conda_prefix, deactivate_folder, f"env_vars{ext}")
+        activate_script = os.path.join(conda_prefix, activate_folder, "env_vars.sh")
+        deactivate_script = os.path.join(conda_prefix, deactivate_folder, "env_vars.sh")

        if os.path.isfile(activate_script):
            # Only create file if it does not already exist. There may be instances where people
@ -277,22 +266,14 @@ class Environment():
            # people should already know what they are doing.
            return

-        if linux_update:
-            conda_libs = os.path.join(conda_prefix, "lib")
-            activate = ["#!/bin/sh\n\n",
-                        "export OLD_LD_LIBRARY_PATH=${LD_LIBRARY_PATH}\n",
-                        f"export LD_LIBRARY_PATH='{conda_libs}':${{LD_LIBRARY_PATH}}\n"]
-            deactivate = ["#!/bin/sh\n\n",
-                          "export LD_LIBRARY_PATH=${OLD_LD_LIBRARY_PATH}\n",
-                          "unset OLD_LD_LIBRARY_PATH\n"]
-            logger.info("Cuda search path set to '%s'", conda_libs)
-
-        if windows_update:
-            activate = ["@ECHO OFF\n",
-                        "set CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1\n"]
-            deactivate = ["@ECHO OFF\n",
-                          "set CONDA_DLL_SEARCH_MODIFICATION_ENABLE=\n"]
-            logger.verbose("CONDA_DLL_SEARCH_MODIFICATION_ENABLE set to 1")  # type: ignore
+        conda_libs = os.path.join(conda_prefix, "lib")
+        activate = ["#!/bin/sh\n\n",
+                    "export OLD_LD_LIBRARY_PATH=${LD_LIBRARY_PATH}\n",
+                    f"export LD_LIBRARY_PATH='{conda_libs}':${{LD_LIBRARY_PATH}}\n"]
+        deactivate = ["#!/bin/sh\n\n",
+                      "export LD_LIBRARY_PATH=${OLD_LD_LIBRARY_PATH}\n",
+                      "unset OLD_LD_LIBRARY_PATH\n"]
+        logger.info("Cuda search path set to '%s'", conda_libs)

        with open(activate_script, "w", encoding="utf8") as afile:
            afile.writelines(activate)
@ -628,28 +609,10 @@ class Checks():  # pylint:disable=too-few-public-methods
            logger.info("DirectML Support Enabled")
            self._env.backend = "directml"

-    def _amd_ask_enable(self) -> None:
-        """ Set backend to 'amd' to use plaidML if AMD support required """
-        msg = ""
-        if self._env.os_version[0] == "Windows":
-            msg = "AMD users should select 'DirectML support' if possible.\r\n"
-        if self._env.os_version[0] == "Linux":
-            msg = "AMD users should select 'ROCm support' if possible.\r\n"
-
-        logger.info("AMD Support:\r\nThis version is deprecated and will be removed from a future "
-                    "update.\r\n%s"
-                    "Nvidia Users MUST answer 'no' to this option.", msg)
-        i = input("Enable AMD Support? [y/N] ")
-        if i in ("Y", "y"):
-            logger.info("AMD Support Enabled")
-            self._env.backend = "amd"
-
    def _user_input(self) -> None:
        """ Get user input for AMD/DirectML/ROCm/Cuda/Docker """
        self._directml_ask_enable()
        self._rocm_ask_enable()
-        if not self._env.backend:
-            self._amd_ask_enable()
        if not self._env.backend:
            self._docker_ask_enable()
            self._cuda_ask_enable()
--- a/tests/lib/gui/stats/event_reader_test.py
+++ b/tests/lib/gui/stats/event_reader_test.py
@ -624,11 +624,10 @@ class Test_EventParser:  # pylint:disable=invalid-name
        monkeypatch: :class:`pytest.MonkeyPatch`
            For patching different iterators for testing output
        """
-        monkeypatch.setattr("lib.utils._FS_BACKEND", "cpu")  # We'll test AMD separately
+        monkeypatch.setattr("lib.utils._FS_BACKEND", "cpu")

        event_parse = event_parser_instance
        event_parse._parse_outputs = cast(MagicMock, mocker.MagicMock())  # type:ignore
-        event_parse._add_amd_loss_labels = cast(MagicMock, mocker.MagicMock())  # type:ignore
        event_parse._process_event = cast(MagicMock, mocker.MagicMock())  # type:ignore
        event_parse._cache.cache_data = cast(MagicMock, mocker.MagicMock())  # type:ignore

@ -638,11 +637,9 @@ class Test_EventParser:  # pylint:disable=invalid-name
                            iter([self._create_example_event(0, 1., time())]))
        event_parse.cache_events(1)
        assert event_parse._parse_outputs.called
-        assert not event_parse._add_amd_loss_labels.called
        assert not event_parse._process_event.called
        assert event_parse._cache.cache_data.called
        event_parse._parse_outputs.reset_mock()
-        event_parse._add_amd_loss_labels.reset_mock()
        event_parse._process_event.reset_mock()
        event_parse._cache.cache_data.reset_mock()

@ -652,11 +649,9 @@ class Test_EventParser:  # pylint:disable=invalid-name
                            iter([self._create_example_event(1, 1., time())]))
        event_parse.cache_events(1)
        assert not event_parse._parse_outputs.called
-        assert not event_parse._add_amd_loss_labels.called
        assert event_parse._process_event.called
        assert event_parse._cache.cache_data.called
        event_parse._parse_outputs.reset_mock()
-        event_parse._add_amd_loss_labels.reset_mock()
        event_parse._process_event.reset_mock()
        event_parse._cache.cache_data.reset_mock()

@ -665,25 +660,12 @@ class Test_EventParser:  # pylint:disable=invalid-name
                            "_iterator",
                            iter([event_pb2.Event(step=1).SerializeToString()]))
        assert not event_parse._parse_outputs.called
-        assert not event_parse._add_amd_loss_labels.called
        assert not event_parse._process_event.called
        assert not event_parse._cache.cache_data.called
        event_parse._parse_outputs.reset_mock()
-        event_parse._add_amd_loss_labels.reset_mock()
        event_parse._process_event.reset_mock()
        event_parse._cache.cache_data.reset_mock()

-        # AMD + batch item 2
-        monkeypatch.setattr("lib.utils._FS_BACKEND", "amd")
-        monkeypatch.setattr(event_parse,
-                            "_iterator",
-                            iter([self._create_example_event(2, 1., time())]))
-        event_parse.cache_events(1)
-        assert not event_parse._parse_outputs.called
-        assert event_parse._add_amd_loss_labels.called
-        assert event_parse._process_event.called
-        assert event_parse._cache.cache_data.called
-
    def test__parse_outputs(self,
                            event_parser_instance: _EventParser,
                            mocker: pytest_mock.MockerFixture) -> None:
@ -729,34 +711,6 @@ class Test_EventParser:  # pylint:disable=invalid-name
        assert actual.shape == (2, 1, 3)
        np.testing.assert_equal(expected, actual)

-    def test__add_amd_loss_labels(self,
-                                  event_parser_instance: _EventParser,
-                                  mocker: pytest_mock.MockerFixture) -> None:
-        """ Test _add_amd_loss_labels works correctly
-
-        Parameters
-        ----------
-        event_parser_instance: :class:`lib.gui.analysis.event_reader._EventParser`
-            The class instance to test
-        mocker: :class:`pytest_mock.MockerFixture`
-            Mocker for checking Session data
-        """
-        event_parse = event_parser_instance
-
-        # Already collected
-        assert not event_parse._cache._loss_labels
-        event_parse._cache._loss_labels.extend(["label_a", "label_b"])
-        event_parse._add_amd_loss_labels(1)
-        assert not event_parse._loss_labels
-
-        # New labels
-        event_parse._cache._loss_labels = []
-        mock_session = mocker.patch("lib.gui.analysis.Session")
-        mock_session.get_loss_keys.return_value = ["label_c", "label_d"]
-        assert not event_parse._cache._loss_labels
-        event_parse._add_amd_loss_labels(1)
-        assert event_parse._loss_labels == ["label_c", "label_d"]
-
    def test__process_event(self, event_parser_instance: _EventParser) -> None:
        """ Test _process_event works correctly

--- a/tests/lib/model/initializers_test.py
+++ b/tests/lib/model/initializers_test.py
@ -7,18 +7,12 @@ Adapted from Keras tests.
 import pytest
 import numpy as np

+from tensorflow.keras import backend as K  # pylint:disable=import-error
+from tensorflow.keras import initializers as k_initializers  # noqa:E501  # pylint:disable=import-error
+
 from lib.model import initializers
 from lib.utils import get_backend

-if get_backend() == "amd":
-    from keras import backend as K
-    from keras import initializers as k_initializers
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import backend as K  # pylint:disable=import-error
-    from tensorflow.keras import initializers as k_initializers  # pylint:disable=import-error
-
-
 CONV_SHAPE = (3, 3, 256, 2048)
 CONV_ID = get_backend().upper()

@ -49,8 +43,11 @@ def test_icnr(tensor_shape):
    """
    fan_in, _ = initializers.compute_fans(tensor_shape)
    std = np.sqrt(2. / fan_in)
-    _runner(initializers.ICNR(initializer=k_initializers.he_uniform(), scale=2), tensor_shape,
-            target_mean=0, target_std=std)
+    _runner(initializers.ICNR(initializer=k_initializers.he_uniform(),  # pylint:disable=no-member
+                              scale=2),
+            tensor_shape,
+            target_mean=0,
+            target_std=std)


@pytest.mark.parametrize('tensor_shape', [CONV_SHAPE], ids=[CONV_ID])
--- a/tests/lib/model/layers_test.py
+++ b/tests/lib/model/layers_test.py
@ -10,17 +10,13 @@ import numpy as np

 from numpy.testing import assert_allclose

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras import Input, Model, backend as K  # pylint:disable=import-error
+
 from lib.model import layers
 from lib.utils import get_backend
 from tests.utils import has_arg

-if get_backend() == "amd":
-    from keras import Input, Model, backend as K
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import Input, Model, backend as K  # pylint:disable=import-error
-
-
 CONV_SHAPE = (3, 3, 256, 2048)
 CONV_ID = get_backend().upper()

@ -40,7 +36,7 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,  # noqa
        for i, var_e in enumerate(input_data_shape):
            if var_e is None:
                input_data_shape[i] = np.random.randint(1, 4)
-        input_data = (10 * np.random.random(input_data_shape))
+        input_data = 10 * np.random.random(input_data_shape)
        input_data = input_data.astype(input_dtype)
    else:
        if input_shape is None:
@ -111,7 +107,6 @@ def test_pixel_shuffler(dummy):  # pylint:disable=unused-argument
    layer_test(layers.PixelShuffler, input_shape=(2, 4, 4, 1024))


-@pytest.mark.skipif(get_backend() == "amd", reason="amd does not support this layer")
@pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()])
 def test_subpixel_upscaling(dummy):  # pylint:disable=unused-argument
    """ Sub Pixel up-scaling layer test """
--- a/tests/lib/model/losses_test.py
+++ b/tests/lib/model/losses_test.py
@ -7,16 +7,13 @@ Adapted from Keras tests.
 import pytest
 import numpy as np

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras import backend as K, losses as k_losses  # noqa:E501  # pylint:disable=import-error
+
+
 from lib.model import losses
 from lib.utils import get_backend

-if get_backend() == "amd":
-    from keras import backend as K, losses as k_losses
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import backend as K, losses as k_losses  # pylint:disable=import-error
-
-
 _PARAMS = [(losses.GeneralizedLoss(), (2, 16, 16)),
           (losses.GradientLoss(), (2, 16, 16)),
           # TODO Make sure these output dimensions are correct
@ -33,11 +30,8 @@ def test_loss_output(loss_func, output_shape):
    y_a = K.variable(np.random.random((2, 16, 16, 3)))
    y_b = K.variable(np.random.random((2, 16, 16, 3)))
    objective_output = loss_func(y_a, y_b)
-    if get_backend() == "amd":
-        assert K.eval(objective_output).shape == output_shape
-    else:
-        output = objective_output.numpy()
-        assert output.dtype == "float32" and not np.any(np.isnan(output))
+    output = objective_output.numpy()
+    assert output.dtype == "float32" and not np.any(np.isnan(output))


 _LWPARAMS = [losses.DSSIMObjective(),
@ -48,7 +42,7 @@ _LWPARAMS = [losses.DSSIMObjective(),
             losses.LaplacianPyramidLoss(),
             losses.LDRFLIPLoss(),
             losses.LInfNorm(),
-             losses.LogCosh() if get_backend() == "amd" else k_losses.logcosh,
+             k_losses.logcosh,  # pylint:disable=no-member
             k_losses.mean_absolute_error,
             k_losses.mean_squared_error,
             losses.MSSIMLoss()]
@ -60,17 +54,11 @@ _LWIDS = [f"{loss}[{get_backend().upper()}]" for loss in _LWIDS]
@pytest.mark.parametrize("loss_func", _LWPARAMS, ids=_LWIDS)
 def test_loss_wrapper(loss_func):
    """ Test penalized loss wrapper works as expected """
-    if get_backend() == "amd":
-        if isinstance(loss_func, losses.FocalFrequencyLoss):
-            pytest.skip("FocalFrequencyLoss Loss is not currently compatible with PlaidML")
    y_a = K.variable(np.random.random((2, 64, 64, 4)))
    y_b = K.variable(np.random.random((2, 64, 64, 3)))
    p_loss = losses.LossWrapper()
    p_loss.add_loss(loss_func, 1.0, -1)
    p_loss.add_loss(k_losses.mean_squared_error, 2.0, 3)
    output = p_loss(y_a, y_b)
-    if get_backend() == "amd":
-        assert K.dtype(output) == "float32" and K.eval(output).shape == (2, )
-    else:
-        output = output.numpy()
-        assert output.dtype == "float32" and not np.any(np.isnan(output))
+    output = output.numpy()
+    assert output.dtype == "float32" and not np.any(np.isnan(output))
--- a/tests/lib/model/nn_blocks_test.py
+++ b/tests/lib/model/nn_blocks_test.py
@ -9,25 +9,17 @@ from itertools import product
 import pytest
 import numpy as np

-
 from numpy.testing import assert_allclose

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras import Input, Model, backend as K  # pylint:disable=import-error
+
 from lib.model import nn_blocks
 from lib.utils import get_backend

-if get_backend() == "amd":
-    from keras import Input, Model, backend as K
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import Input, Model, backend as K  # pylint:disable=import-error
-

 def block_test(layer_func, kwargs={}, input_shape=None):
-    """Test routine for faceswap neural network blocks.
-
-    Tests are simple and are to ensure that the blocks compile on both tensorflow
-    and plaidml backends
-    """
+    """Test routine for faceswap neural network blocks. """
    # generate input data
    assert input_shape
    input_dtype = K.floatx()
@ -35,7 +27,7 @@ def block_test(layer_func, kwargs={}, input_shape=None):
    for i, var_e in enumerate(input_data_shape):
        if var_e is None:
            input_data_shape[i] = np.random.randint(1, 4)
-    input_data = (10 * np.random.random(input_data_shape))
+    input_data = 10 * np.random.random(input_data_shape)
    input_data = input_data.astype(input_dtype)
    expected_output_dtype = input_dtype

@ -64,16 +56,16 @@ def block_test(layer_func, kwargs={}, input_shape=None):

 _PARAMS = ["use_icnr_init", "use_convaware_init", "use_reflect_padding"]
 _VALUES = list(product([True, False], repeat=len(_PARAMS)))
-_IDS = ["{}[{}]".format("|".join([_PARAMS[idx] for idx, b in enumerate(v) if b]),
-                        get_backend().upper()) for v in _VALUES]
+_IDS = [f"{'|'.join([_PARAMS[idx] for idx, b in enumerate(v) if b])}[{get_backend().upper()}]"
+        for v in _VALUES]


@pytest.mark.parametrize(_PARAMS, _VALUES, ids=_IDS)
 def test_blocks(use_icnr_init, use_convaware_init, use_reflect_padding):
    """ Test for all blocks contained within the NNBlocks Class """
-    config = dict(icnr_init=use_icnr_init,
-                  conv_aware_init=use_convaware_init,
-                  reflect_padding=use_reflect_padding)
+    config = {"icnr_init": use_icnr_init,
+              "conv_aware_init": use_convaware_init,
+              "reflect_padding": use_reflect_padding}
    nn_blocks.set_config(config)
    block_test(nn_blocks.Conv2DOutput(64, 3), input_shape=(2, 8, 8, 32))
    block_test(nn_blocks.Conv2DBlock(64), input_shape=(2, 8, 8, 32))
--- a/tests/lib/model/normalization_test.py
+++ b/tests/lib/model/normalization_test.py
@ -8,17 +8,13 @@ from itertools import product
 import numpy as np
 import pytest

+from tensorflow.keras import regularizers, models, layers  # noqa:E501  # pylint:disable=import-error
+
 from lib.model import normalization
 from lib.utils import get_backend

 from tests.lib.model.layers_test import layer_test

-if get_backend() == "amd":
-    from keras import regularizers, models, layers
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import regularizers, models, layers  # pylint:disable=import-error
-

@pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()])
 def test_instance_normalization(dummy):  # pylint:disable=unused-argument
@ -69,8 +65,8 @@ def test_group_normalization(dummy):  # pylint:disable=unused-argument

 _PARAMS = ["center", "scale"]
 _VALUES = list(product([True, False], repeat=len(_PARAMS)))
-_IDS = ["{}[{}]".format("|".join([_PARAMS[idx] for idx, b in enumerate(v) if b]),
-                        get_backend().upper()) for v in _VALUES]
+_IDS = [f"{'|'.join([_PARAMS[idx] for idx, b in enumerate(v) if b])}[{get_backend().upper()}]"
+        for v in _VALUES]


@pytest.mark.parametrize(_PARAMS, _VALUES, ids=_IDS)
@ -95,14 +91,6 @@ def test_adain_normalization(center, scale):
            assert expected_dim == actual_dim


-@pytest.mark.parametrize(_PARAMS, _VALUES, ids=_IDS)
-def test_layer_normalization(center, scale):
-    """ Basic test for layer normalization. """
-    layer_test(normalization.LayerNormalization,
-               kwargs={"center": center, "scale": scale},
-               input_shape=(4, 512))
-
-
 _PARAMS = ["partial", "bias"]
 _VALUES = [(0.0, False), (0.25, False), (0.5, True), (0.75, False), (1.0, True)]  # type:ignore
 _IDS = [f"partial={v[0]}|bias={v[1]}[{get_backend().upper()}]" for v in _VALUES]
--- a/tests/lib/model/optimizers_test.py
+++ b/tests/lib/model/optimizers_test.py
@ -7,22 +7,16 @@ import pytest

 import numpy as np
 from numpy.testing import assert_allclose
+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow.keras import optimizers as k_optimizers  # pylint:disable=import-error
+from tensorflow.keras.layers import Dense, Activation  # pylint:disable=import-error
+from tensorflow.keras.models import Sequential  # pylint:disable=import-error

 from lib.model import optimizers
 from lib.utils import get_backend

 from tests.utils import generate_test_data, to_categorical

-if get_backend() == "amd":
-    from keras import optimizers as k_optimizers
-    from keras.layers import Dense, Activation
-    from keras.models import Sequential
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow.keras import optimizers as k_optimizers  # pylint:disable=import-error
-    from tensorflow.keras.layers import Dense, Activation  # noqa pylint:disable=import-error,no-name-in-module
-    from tensorflow.keras.models import Sequential  # pylint:disable=import-error,no-name-in-module
-

 def get_test_data():
    """ Obtain randomized test data for training """
@ -49,8 +43,7 @@ def _test_optimizer(optimizer, target=0.75):
                  metrics=["accuracy"])

    history = model.fit(x_train, y_train, epochs=2, batch_size=16, verbose=0)
-    accuracy = "acc" if get_backend() == "amd" else "accuracy"
-    assert history.history[accuracy][-1] >= target
+    assert history.history["accuracy"][-1] >= target
    config = k_optimizers.serialize(optimizer)
    optim = k_optimizers.deserialize(config)
    new_config = k_optimizers.serialize(optim)
@ -59,9 +52,6 @@ def _test_optimizer(optimizer, target=0.75):
    assert config == new_config

    # Test constraints.
-    if get_backend() == "amd":
-        # NB: PlaidML does not support constraints, so this test skipped for AMD backends
-        return
    model = Sequential()
    dense = Dense(10,
                  input_shape=(x_train.shape[1],),
@ -83,8 +73,8 @@ def _test_optimizer(optimizer, target=0.75):
@pytest.mark.parametrize("dummy", [None], ids=[get_backend().upper()])
 def test_adam(dummy):  # pylint:disable=unused-argument
    """ Test for custom Adam optimizer """
-    _test_optimizer(k_optimizers.Adam(), target=0.45)
-    _test_optimizer(k_optimizers.Adam(decay=1e-3), target=0.45)
+    _test_optimizer(k_optimizers.Adam(), target=0.45)  # pylint:disable=no-member
+    _test_optimizer(k_optimizers.Adam(decay=1e-3), target=0.45)  # pylint:disable=no-member


@pytest.mark.parametrize("dummy", [None], ids=[get_backend().upper()])
--- a/tests/lib/sysinfo_test.py
+++ b/tests/lib/sysinfo_test.py
@ -51,17 +51,17 @@ def test_init(sys_info_instance: _SysInfo) -> None:

    assert hasattr(sys_info_instance, "_system")
    assert isinstance(sys_info_instance._system, dict)
-    assert sys_info_instance._system == dict(platform=platform.platform(),
-                                             system=platform.system().lower(),
-                                             machine=platform.machine(),
-                                             release=platform.release(),
-                                             processor=platform.processor(),
-                                             cpu_count=os.cpu_count())
+    assert sys_info_instance._system == {"platform": platform.platform(),
+                                         "system": platform.system().lower(),
+                                         "machine": platform.machine(),
+                                         "release": platform.release(),
+                                         "processor": platform.processor(),
+                                         "cpu_count": os.cpu_count()}

    assert hasattr(sys_info_instance, "_python")
    assert isinstance(sys_info_instance._python, dict)
-    assert sys_info_instance._python == dict(implementation=platform.python_implementation(),
-                                             version=platform.python_version())
+    assert sys_info_instance._python == {"implementation": platform.python_implementation(),
+                                         "version": platform.python_version()}

    assert hasattr(sys_info_instance, "_gpu")
    assert isinstance(sys_info_instance._gpu, GPUInfo)
@ -302,7 +302,7 @@ def test__configs__parse_json(configs_instance: _Configs,

    """
    assert hasattr(configs_instance, "_parse_json")
-    file = ('{"test": "param"}')
+    file = '{"test": "param"}'
    monkeypatch.setattr("builtins.open", lambda *args, **kwargs: StringIO(file))

    converted = configs_instance._parse_json(".file")
--- a/tests/lib/utils_test.py
+++ b/tests/lib/utils_test.py
@ -42,8 +42,8 @@ def test_set_backend(monkeypatch: pytest.MonkeyPatch) -> None:
    set_backend("directml")
    assert utils._FS_BACKEND == "directml"
    monkeypatch.delattr(utils, "_FS_BACKEND")  # _FS_BACKEND is not already defined
-    set_backend("amd")
-    assert utils._FS_BACKEND == "amd"
+    set_backend("rocm")
+    assert utils._FS_BACKEND == "rocm"


 def test_get_backend(monkeypatch: pytest.MonkeyPatch) -> None:
@ -73,9 +73,9 @@ def test__backend(monkeypatch: pytest.MonkeyPatch) -> None:

    monkeypatch.setattr("os.environ", {})  # Environment variable not set, dummy in config file
    monkeypatch.setattr("os.path.isfile", lambda x: True)
-    monkeypatch.setattr("builtins.open", lambda *args, **kwargs: StringIO('{"backend": "amd"}'))
+    monkeypatch.setattr("builtins.open", lambda *args, **kwargs: StringIO('{"backend": "cpu"}'))
    backend = _Backend()
-    assert backend.backend == "amd"
+    assert backend.backend == "cpu"

    monkeypatch.setattr("os.path.isfile", lambda x: False)  # no config file, dummy in user input
    monkeypatch.setattr("builtins.input", lambda x: "3")
--- a/tests/simple_tests.py
+++ b/tests/simple_tests.py
@ -14,7 +14,6 @@ from urllib.request import urlretrieve
 import os
 from os.path import join as pathjoin, expanduser

-_TRAIN_ARGS = (1, 1) if os.environ.get("FACESWAP_BACKEND", "cpu").lower() == "amd" else (4, 4)
 FAIL_COUNT = 0
 TEST_COUNT = 0
 _COLORS = {
@ -202,8 +201,8 @@ def main():
            train_args("lightweight",
                       pathjoin(vid_base, "model"),
                       pathjoin(vid_base, "faces"),
-                       iterations=_TRAIN_ARGS[0],
-                       batchsize=_TRAIN_ARGS[1],
+                       iterations=1,
+                       batchsize=1,
                       extra_args="-wl"))

        set_train_config(False)
@ -212,8 +211,8 @@ def main():
            train_args("lightweight",
                       pathjoin(vid_base, "model"),
                       pathjoin(vid_base, "faces"),
-                       iterations=_TRAIN_ARGS[0],
-                       batchsize=_TRAIN_ARGS[1],
+                       iterations=1,
+                       batchsize=1,
                       extra_args="-wl"))

    if was_trained:
--- a/tests/startup_test.py
+++ b/tests/startup_test.py
@ -2,20 +2,14 @@
 """ Sanity checks for Faceswap. """

 import inspect
-
 import pytest

+# Ignore linting errors from Tensorflow's thoroughly broken import system
+from tensorflow import keras
+from tensorflow.keras import backend as K  # pylint:disable=import-error
+
 from lib.utils import get_backend

-if get_backend() == "amd":
-    import keras
-    from keras import backend as K
-else:
-    # Ignore linting errors from Tensorflow's thoroughly broken import system
-    from tensorflow import keras
-    from tensorflow.keras import backend as K  # pylint:disable=import-error
-
-
 _BACKEND = get_backend()


@ -24,14 +18,11 @@ def test_backend(dummy):  # pylint:disable=unused-argument
    """ Sanity check to ensure that Keras backend is returning the correct object type. """
    test_var = K.variable((1, 1, 4, 4))
    lib = inspect.getmodule(test_var).__name__.split(".")[0]
-    assert ((_BACKEND in ("cpu", "directml") and lib == "tensorflow")
-            or (_BACKEND == "amd" and lib == "plaidml"))
+    assert _BACKEND in ("cpu", "directml") and lib == "tensorflow"


@pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()])
 def test_keras(dummy):  # pylint:disable=unused-argument
-    """ Sanity check to ensure that tensorflow keras is being used for CPU and standard
-    keras for AMD. """
-    assert ((_BACKEND in ("cpu", "directml")
-             and keras.__version__ in ("2.7.0", "2.8.0", "2.9.0", "2.10.0"))
-            or (_BACKEND == "amd" and keras.__version__ == "2.2.4"))
+    """ Sanity check to ensure that tensorflow keras is being used for CPU """
+    assert (_BACKEND in ("cpu", "directml")
+            and keras.__version__ in ("2.7.0", "2.8.0", "2.9.0", "2.10.0"))
--- a/tests/utils.py
+++ b/tests/utils.py
@ -101,25 +101,19 @@ def has_arg(func, name, accept_all=False):
    bool
        Whether `func` accepts a `name` keyword argument.
    """
-    if sys.version_info < (3,):
-        arg_spec = inspect.getargspec(func)
-        if accept_all and arg_spec.keywords is not None:
-            return True
-        return (name in arg_spec.args)
-    elif sys.version_info < (3, 3):
+    if sys.version_info < (3, 3):
        arg_spec = inspect.getfullargspec(func)
        if accept_all and arg_spec.varkw is not None:
            return True
        return (name in arg_spec.args or
                name in arg_spec.kwonlyargs)
-    else:
-        signature = inspect.signature(func)
-        parameter = signature.parameters.get(name)
-        if parameter is None:
-            if accept_all:
-                for param in signature.parameters.values():
-                    if param.kind == inspect.Parameter.VAR_KEYWORD:
-                        return True
-            return False
-        return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
-                                   inspect.Parameter.KEYWORD_ONLY))
+    signature = inspect.signature(func)
+    parameter = signature.parameters.get(name)
+    if parameter is None:
+        if accept_all:
+            for param in signature.parameters.values():
+                if param.kind == inspect.Parameter.VAR_KEYWORD:
+                    return True
+        return False
+    return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                               inspect.Parameter.KEYWORD_ONLY))
--- a/tools/model/model.py
+++ b/tools/model/model.py
@ -1,28 +1,23 @@
 #!/usr/bin/env python3
 """ Tool to restore models from backup """
-
+from __future__ import annotations
 import logging
 import os
 import sys
-from typing import Any, Tuple, TYPE_CHECKING, Union
+import typing as T

 import numpy as np
 import tensorflow as tf
+from tensorflow import keras

 from lib.model.backup_restore import Backup
-from lib.utils import get_backend

 # Import the following libs for custom objects
 from lib.model import initializers, layers, normalization  # noqa # pylint:disable=unused-import
 from plugins.train.model._base.model import _Inference

-if get_backend() == "amd":
-    import keras
-else:
-    from tensorflow import keras

-
-if TYPE_CHECKING:
+if T.TYPE_CHECKING:
    import argparse

 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
@ -36,7 +31,7 @@ class Model():  # pylint:disable=too-few-public-methods
    :class:`argparse.Namespace`
        The command line arguments calling the model tool
    """
-    def __init__(self, arguments: 'argparse.Namespace') -> None:
+    def __init__(self, arguments: argparse.Namespace) -> None:
        logger.debug("Initializing %s: (arguments: '%s'", self.__class__.__name__, arguments)
        self._configure_tensorflow()
        self._model_dir = self._check_folder(arguments.model_dir)
@ -45,13 +40,11 @@ class Model():  # pylint:disable=too-few-public-methods
    @classmethod
    def _configure_tensorflow(cls) -> None:
        """ Disable eager execution and force Tensorflow into CPU mode. """
-        if get_backend() == "amd":
-            return
        tf.config.set_visible_devices([], device_type="GPU")
        tf.compat.v1.disable_eager_execution()

    @classmethod
-    def _get_job(cls, arguments: "argparse.Namespace") -> Any:
+    def _get_job(cls, arguments: argparse.Namespace) -> T.Any:
        """ Get the correct object that holds the selected job.

        Parameters
@ -120,12 +113,12 @@ class Inference():  # pylint:disable=too-few-public-methods
    :class:`argparse.Namespace`
        The command line arguments calling the model tool
    """
-    def __init__(self, arguments: "argparse.Namespace") -> None:
+    def __init__(self, arguments: argparse.Namespace) -> None:
        self._switch = arguments.swap_model
        self._format = arguments.format
        self._input_file, self._output_file = self._get_output_file(arguments.model_dir)

-    def _get_output_file(self, model_dir: str) -> Tuple[str, str]:
+    def _get_output_file(self, model_dir: str) -> T.Tuple[str, str]:
        """ Obtain the full path for the output model file/folder

        Parameters
@ -168,7 +161,7 @@ class NaNScan():  # pylint:disable=too-few-public-methods
    :class:`argparse.Namespace`
        The command line arguments calling the model tool
    """
-    def __init__(self, arguments: "argparse.Namespace") -> None:
+    def __init__(self, arguments: argparse.Namespace) -> None:
        logger.debug("Initializing %s: (arguments: '%s'", self.__class__.__name__, arguments)
        self._model_file = self._get_model_filename(arguments.model_dir)

@ -190,7 +183,7 @@ class NaNScan():  # pylint:disable=too-few-public-methods
        return os.path.join(model_dir, model_file)

    def _parse_weights(self,
-                       layer: Union[keras.models.Model, keras.layers.Layer]) -> dict:
+                       layer: T.Union[keras.models.Model, keras.layers.Layer]) -> dict:
        """ Recursively pass through sub-models to scan layer weights"""
        weights = layer.get_weights()
        logger.debug("Processing weights for layer '%s', length: '%s'",
@ -214,7 +207,7 @@ class NaNScan():  # pylint:disable=too-few-public-methods

        if nans + infs == 0:
            return {}
-        return dict(nans=nans, infs=infs)
+        return {"nans": nans, "infs": infs}

    def _parse_output(self, errors: dict, indent: int = 0) -> None:
        """ Parse the output of the errors dictionary and print a pretty summary.
@ -260,7 +253,7 @@ class Restore():  # pylint:disable=too-few-public-methods
    :class:`argparse.Namespace`
        The command line arguments calling the model tool
    """
-    def __init__(self, arguments: "argparse.Namespace") -> None:
+    def __init__(self, arguments: argparse.Namespace) -> None:
        logger.debug("Initializing %s: (arguments: '%s'", self.__class__.__name__, arguments)
        self._model_dir = arguments.model_dir
        self._model_name = self._get_model_name()