1
0
Fork 0
mirror of https://github.com/deepfakes/faceswap synced 2025-06-08 20:13:52 -04:00
faceswap/lib/gpu_stats/amd.py
2022-12-20 15:30:26 +00:00

400 lines
15 KiB
Python

#!/usr/bin/env python3
""" Collects and returns Information on available AMD GPUs. """
import json
import logging
import os
import sys
from typing import List, Optional
import plaidml
from ._base import _GPUStats, _EXCLUDE_DEVICES
_PLAIDML_INITIALIZED: bool = False
def setup_plaidml(log_level: str, exclude_devices: List[int]) -> None:
""" Setup PlaidML for AMD Cards.
Sets the Keras backend to PlaidML, loads the plaidML backend and makes GPU Device information
from PlaidML available to :class:`AMDStats`.
Parameters
----------
log_level: str
Faceswap's log level. Used for setting the log level inside PlaidML
exclude_devices: list
A list of integers of device IDs that should not be used by Faceswap
"""
logger = logging.getLogger(__name__) # pylint:disable=invalid-name
logger.info("Setting up for PlaidML")
logger.verbose("Setting Keras Backend to PlaidML") # type:ignore
# Add explicitly excluded devices to list. The contents are checked in AMDstats
if exclude_devices:
_EXCLUDE_DEVICES.extend(int(idx) for idx in exclude_devices)
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
stats = AMDStats(log_level=log_level)
logger.info("Using GPU(s): %s", [stats.names[i] for i in stats.active_devices])
logger.info("Successfully set up for PlaidML")
class AMDStats(_GPUStats):
""" Holds information and statistics about AMD GPU(s) available on the currently
running system.
Notes
-----
The quality of data that returns is very much dependent on the OpenCL implementation used
for a particular OS. Some data is just not available at all, so assumptions and substitutions
are made where required. PlaidML is used as an interface into OpenCL to obtain the required
information.
PlaidML is explicitly initialized inside this class, as it can be called from the command line
arguments to list available GPUs. PlaidML needs to be set up and configured to obtain reliable
information. As the function :func:`setup_plaidml` is called very early within the Faceswap
and launch process and it references this class, initial PlaidML setup can all be handled here.
Parameters
----------
log: bool, optional
Whether the class should output information to the logger. There may be occasions where the
logger has not yet been set up when this class is queried. Attempting to log in these
instances will raise an error. If GPU stats are being queried prior to the logger being
available then this parameter should be set to ``False``. Otherwise set to ``True``.
Default: ``True``
"""
def __init__(self, log: bool = True, log_level: str = "INFO") -> None:
self._log_level: str = log_level.upper()
# Following attributes are set in :func:``_initialize``
self._ctx: Optional[plaidml.Context] = None
self._supported_devices: List[plaidml._DeviceConfig] = []
self._all_devices: List[plaidml._DeviceConfig] = []
self._device_details: List[dict] = []
super().__init__(log=log)
@property
def active_devices(self) -> List[int]:
""" list: The active device ids in use. """
return self._active_devices
@property
def _plaid_ids(self) -> List[str]:
""" list: The device identification for each GPU device that PlaidML has discovered. """
return [device.id.decode("utf-8", errors="replace") for device in self._all_devices]
@property
def _experimental_indices(self) -> List[int]:
""" list: The indices corresponding to :attr:`_ids` of GPU devices marked as
"experimental". """
retval = [idx for idx, device in enumerate(self._all_devices)
if device not in self._supported_indices]
return retval
@property
def _supported_indices(self) -> List[int]:
""" list: The indices corresponding to :attr:`_ids` of GPU devices marked as
"supported". """
retval = [idx for idx, device in enumerate(self._all_devices)
if device in self._supported_devices]
return retval
@property
def _all_vram(self) -> List[int]:
""" list: The VRAM of each GPU device that PlaidML has discovered. """
return [int(int(device.get("globalMemSize", 0)) / (1024 * 1024))
for device in self._device_details]
@property
def names(self) -> List[str]:
""" list: The name of each GPU device that PlaidML has discovered. """
return [f"{device.get('vendor', 'unknown')} - {device.get('name', 'unknown')} "
f"({ 'supported' if idx in self._supported_indices else 'experimental'})"
for idx, device in enumerate(self._device_details)]
def _initialize(self) -> None:
""" Initialize PlaidML for AMD GPUs.
If :attr:`_is_initialized` is ``True`` then this function just returns performing no
action.
if ``False`` then PlaidML is setup, if not already, and GPU information is extracted
from the PlaidML context.
"""
if self._is_initialized:
return
self._log("debug", "Initializing PlaidML for AMD GPU.")
self._initialize_plaidml()
self._ctx = plaidml.Context()
self._supported_devices = self._get_supported_devices()
self._all_devices = self._get_all_devices()
self._device_details = self._get_device_details()
self._select_device()
super()._initialize()
def _initialize_plaidml(self) -> None:
""" Initialize PlaidML on first call to this class and set global
:attr:``_PLAIDML_INITIALIZED`` to ``True``. If PlaidML has already been initialized then
return performing no action. """
global _PLAIDML_INITIALIZED # pylint:disable=global-statement
if _PLAIDML_INITIALIZED:
return
self._log("debug", "Performing first time PlaidML setup.")
self._set_plaidml_logger()
_PLAIDML_INITIALIZED = True
def _set_plaidml_logger(self) -> None:
""" Set PlaidMLs default logger to Faceswap Logger, prevent propagation and set the correct
log level. """
self._log("debug", "Setting PlaidML Default Logger")
plaidml.DEFAULT_LOG_HANDLER = logging.getLogger("plaidml_root")
plaidml.DEFAULT_LOG_HANDLER.propagate = False
numeric_level = getattr(logging, self._log_level, None)
assert numeric_level is not None
if numeric_level < 10: # DEBUG Logging
plaidml._internal_set_vlog(1) # pylint:disable=protected-access
elif numeric_level < 20: # INFO Logging
plaidml._internal_set_vlog(0) # pylint:disable=protected-access
else: # WARNING LOGGING
plaidml.quiet()
def _get_supported_devices(self) -> List[plaidml._DeviceConfig]:
""" Obtain GPU devices from PlaidML that are marked as "supported".
Returns
-------
list_LOGGER.
The :class:`plaidml._DeviceConfig` objects for all supported GPUs that PlaidML has
discovered.
"""
experimental_setting = plaidml.settings.experimental
plaidml.settings.experimental = False
devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
plaidml.settings.experimental = experimental_setting
supported = [d for d in devices
if d.details
and json.loads(
d.details.decode("utf-8",
errors="replace")).get("type", "cpu").lower() == "gpu"]
self._log("debug", f"Obtained supported devices: {supported}")
return supported
def _get_all_devices(self) -> List[plaidml._DeviceConfig]:
""" Obtain all available (experimental and supported) GPU devices from PlaidML.
Returns
-------
list
The :class:`pladml._DeviceConfig` objects for GPUs that PlaidML has discovered.
"""
experimental_setting = plaidml.settings.experimental
plaidml.settings.experimental = True
devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
plaidml.settings.experimental = experimental_setting
experi = [d for d in devices
if d.details
and json.loads(
d.details.decode("utf-8",
errors="replace")).get("type", "cpu").lower() == "gpu"]
self._log("debug", f"Obtained experimental Devices: {experi}")
all_devices = experi + self._supported_devices
all_devices = all_devices if all_devices else self._get_fallback_devices() # Use CPU
self._log("debug", f"Obtained all Devices: {all_devices}")
return all_devices
def _get_fallback_devices(self) -> List[plaidml._DeviceConfig]:
""" Called if a GPU has not been discovered. Return any devices we can run on.
Returns
-------
list:
The :class:`pladml._DeviceConfig` fallaback objects that PlaidML has discovered.
"""
# Try get a supported device
experimental_setting = plaidml.settings.experimental
plaidml.settings.experimental = False
devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
# Try get any device
if not devices:
plaidml.settings.experimental = True
devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0]
plaidml.settings.experimental = experimental_setting
if not devices:
raise RuntimeError("No valid devices could be found for plaidML.")
self._log("warning", f"PlaidML could not find a GPU. Falling back to: "
f"{[d.id.decode('utf-8', errors='replace') for d in devices]}")
return devices
def _get_device_details(self) -> List[dict]:
""" Obtain the device details for all connected AMD GPUS.
Returns
-------
list
The `dict` device detail for all GPUs that PlaidML has discovered.
"""
details = []
for dev in self._all_devices:
if dev.details:
details.append(json.loads(dev.details.decode("utf-8", errors="replace")))
else:
details.append(dict(vendor=dev.id.decode("utf-8", errors="replace"),
name=dev.description.decode("utf-8", errors="replace"),
globalMemSize=4 * 1024 * 1024 * 1024)) # 4GB dummy ram
self._log("debug", f"Obtained Device details: {details}")
return details
def _select_device(self) -> None:
"""
If the plaidml user configuration settings exist, then set the default GPU from the
settings file, Otherwise set the GPU to be the one with most VRAM. """
if os.path.exists(plaidml.settings.user_settings): # pylint:disable=no-member
self._log("debug", "Setting PlaidML devices from user_settings")
else:
self._select_largest_gpu()
def _select_largest_gpu(self) -> None:
""" Set the default GPU to be a supported device with the most available VRAM. If no
supported device is available, then set the GPU to be an experimental device with the
most VRAM available. """
category = "supported" if self._supported_devices else "experimental"
self._log("debug", f"Obtaining largest {category} device")
indices = getattr(self, f"_{category}_indices")
if not indices:
self._log("error", "Failed to automatically detect your GPU.")
self._log("error", "Please run `plaidml-setup` to set up your GPU.")
sys.exit(1)
max_vram = max(self._all_vram[idx] for idx in indices)
self._log("debug", f"Max VRAM: {max_vram}")
gpu_idx = min(idx for idx, vram in enumerate(self._all_vram)
if vram == max_vram and idx in indices)
self._log("debug", f"GPU IDX: {gpu_idx}")
selected_gpu = self._plaid_ids[gpu_idx]
self._log("info", f"Setting GPU to largest available {category} device. If you want to "
"override this selection, run `plaidml-setup` from the command line.")
plaidml.settings.experimental = category == "experimental"
plaidml.settings.device_ids = [selected_gpu]
def _get_device_count(self) -> int:
""" Detect the number of AMD GPUs available from PlaidML.
Returns
-------
int
The total number of AMD GPUs available
"""
retval = len(self._all_devices)
self._log("debug", f"GPU Device count: {retval}")
return retval
def _get_active_devices(self) -> List[int]:
""" Obtain the indices of active GPUs (those that have not been explicitly excluded by
PlaidML or explicitly excluded in the command line arguments).
Returns
-------
list
The list of device indices that are available for Faceswap to use
"""
devices = [idx for idx, d_id in enumerate(self._plaid_ids)
if d_id in plaidml.settings.device_ids and idx not in _EXCLUDE_DEVICES]
self._log("debug", f"Active GPU Devices: {devices}")
return devices
def _get_handles(self) -> list:
""" AMD Doesn't really use device handles, so we just return the all devices list
Returns
-------
list
The list of all AMD discovered GPUs
"""
handles = self._all_devices
self._log("debug", f"AMD GPU Handles found: {handles}")
return handles
def _get_driver(self) -> str:
""" Obtain the AMD driver version currently in use.
Returns
-------
str
The current AMD GPU driver versions
"""
drivers = "|".join([device.get("driverVersion", "No Driver Found")
for device in self._device_details])
self._log("debug", f"GPU Drivers: {drivers}")
return drivers
def _get_device_names(self) -> List[str]:
""" Obtain the list of names of connected AMD GPUs as identified in :attr:`_handles`.
Returns
-------
list
The list of connected Nvidia GPU names
"""
names = self.names
self._log("debug", f"GPU Devices: {names}")
return names
def _get_vram(self) -> List[int]:
""" Obtain the VRAM in Megabytes for each connected AMD GPU as identified in
:attr:`_handles`.
Returns
-------
list
The VRAM in Megabytes for each connected Nvidia GPU
"""
vram = self._all_vram
self._log("debug", f"GPU VRAM: {vram}")
return vram
def _get_free_vram(self) -> List[int]:
""" Obtain the amount of VRAM that is available, in Megabytes, for each connected AMD
GPU.
Notes
-----
There is no useful way to get free VRAM on PlaidML. OpenCL loads and unloads VRAM as
required, so this returns the total memory available per card for AMD GPUs, which is
not particularly useful.
Returns
-------
list
List of `float`s containing the amount of VRAM available, in Megabytes, for each
connected GPU as corresponding to the values in :attr:`_handles
"""
vram = self._all_vram
self._log("debug", f"GPU VRAM free: {vram}")
return vram