#!/usr/bin/env python3 """ Collects and returns Information on available AMD GPUs. """ import json import logging import os import sys from typing import List, Optional import plaidml from ._base import _GPUStats, _EXCLUDE_DEVICES _PLAIDML_INITIALIZED: bool = False def setup_plaidml(log_level: str, exclude_devices: List[int]) -> None: """ Setup PlaidML for AMD Cards. Sets the Keras backend to PlaidML, loads the plaidML backend and makes GPU Device information from PlaidML available to :class:`AMDStats`. Parameters ---------- log_level: str Faceswap's log level. Used for setting the log level inside PlaidML exclude_devices: list A list of integers of device IDs that should not be used by Faceswap """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name logger.info("Setting up for PlaidML") logger.verbose("Setting Keras Backend to PlaidML") # type:ignore # Add explicitly excluded devices to list. The contents are checked in AMDstats if exclude_devices: _EXCLUDE_DEVICES.extend(int(idx) for idx in exclude_devices) os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" stats = AMDStats(log_level=log_level) logger.info("Using GPU(s): %s", [stats.names[i] for i in stats.active_devices]) logger.info("Successfully set up for PlaidML") class AMDStats(_GPUStats): """ Holds information and statistics about AMD GPU(s) available on the currently running system. Notes ----- The quality of data that returns is very much dependent on the OpenCL implementation used for a particular OS. Some data is just not available at all, so assumptions and substitutions are made where required. PlaidML is used as an interface into OpenCL to obtain the required information. PlaidML is explicitly initialized inside this class, as it can be called from the command line arguments to list available GPUs. PlaidML needs to be set up and configured to obtain reliable information. As the function :func:`setup_plaidml` is called very early within the Faceswap and launch process and it references this class, initial PlaidML setup can all be handled here. Parameters ---------- log: bool, optional Whether the class should output information to the logger. There may be occasions where the logger has not yet been set up when this class is queried. Attempting to log in these instances will raise an error. If GPU stats are being queried prior to the logger being available then this parameter should be set to ``False``. Otherwise set to ``True``. Default: ``True`` """ def __init__(self, log: bool = True, log_level: str = "INFO") -> None: self._log_level: str = log_level.upper() # Following attributes are set in :func:``_initialize`` self._ctx: Optional[plaidml.Context] = None self._supported_devices: List[plaidml._DeviceConfig] = [] self._all_devices: List[plaidml._DeviceConfig] = [] self._device_details: List[dict] = [] super().__init__(log=log) @property def active_devices(self) -> List[int]: """ list: The active device ids in use. """ return self._active_devices @property def _plaid_ids(self) -> List[str]: """ list: The device identification for each GPU device that PlaidML has discovered. """ return [device.id.decode("utf-8", errors="replace") for device in self._all_devices] @property def _experimental_indices(self) -> List[int]: """ list: The indices corresponding to :attr:`_ids` of GPU devices marked as "experimental". """ retval = [idx for idx, device in enumerate(self._all_devices) if device not in self._supported_indices] return retval @property def _supported_indices(self) -> List[int]: """ list: The indices corresponding to :attr:`_ids` of GPU devices marked as "supported". """ retval = [idx for idx, device in enumerate(self._all_devices) if device in self._supported_devices] return retval @property def _all_vram(self) -> List[int]: """ list: The VRAM of each GPU device that PlaidML has discovered. """ return [int(int(device.get("globalMemSize", 0)) / (1024 * 1024)) for device in self._device_details] @property def names(self) -> List[str]: """ list: The name of each GPU device that PlaidML has discovered. """ return [f"{device.get('vendor', 'unknown')} - {device.get('name', 'unknown')} " f"({ 'supported' if idx in self._supported_indices else 'experimental'})" for idx, device in enumerate(self._device_details)] def _initialize(self) -> None: """ Initialize PlaidML for AMD GPUs. If :attr:`_is_initialized` is ``True`` then this function just returns performing no action. if ``False`` then PlaidML is setup, if not already, and GPU information is extracted from the PlaidML context. """ if self._is_initialized: return self._log("debug", "Initializing PlaidML for AMD GPU.") self._initialize_plaidml() self._ctx = plaidml.Context() self._supported_devices = self._get_supported_devices() self._all_devices = self._get_all_devices() self._device_details = self._get_device_details() self._select_device() super()._initialize() def _initialize_plaidml(self) -> None: """ Initialize PlaidML on first call to this class and set global :attr:``_PLAIDML_INITIALIZED`` to ``True``. If PlaidML has already been initialized then return performing no action. """ global _PLAIDML_INITIALIZED # pylint:disable=global-statement if _PLAIDML_INITIALIZED: return self._log("debug", "Performing first time PlaidML setup.") self._set_plaidml_logger() _PLAIDML_INITIALIZED = True def _set_plaidml_logger(self) -> None: """ Set PlaidMLs default logger to Faceswap Logger, prevent propagation and set the correct log level. """ self._log("debug", "Setting PlaidML Default Logger") plaidml.DEFAULT_LOG_HANDLER = logging.getLogger("plaidml_root") plaidml.DEFAULT_LOG_HANDLER.propagate = False numeric_level = getattr(logging, self._log_level, None) assert numeric_level is not None if numeric_level < 10: # DEBUG Logging plaidml._internal_set_vlog(1) # pylint:disable=protected-access elif numeric_level < 20: # INFO Logging plaidml._internal_set_vlog(0) # pylint:disable=protected-access else: # WARNING LOGGING plaidml.quiet() def _get_supported_devices(self) -> List[plaidml._DeviceConfig]: """ Obtain GPU devices from PlaidML that are marked as "supported". Returns ------- list_LOGGER. The :class:`plaidml._DeviceConfig` objects for all supported GPUs that PlaidML has discovered. """ experimental_setting = plaidml.settings.experimental plaidml.settings.experimental = False devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0] plaidml.settings.experimental = experimental_setting supported = [d for d in devices if d.details and json.loads( d.details.decode("utf-8", errors="replace")).get("type", "cpu").lower() == "gpu"] self._log("debug", f"Obtained supported devices: {supported}") return supported def _get_all_devices(self) -> List[plaidml._DeviceConfig]: """ Obtain all available (experimental and supported) GPU devices from PlaidML. Returns ------- list The :class:`pladml._DeviceConfig` objects for GPUs that PlaidML has discovered. """ experimental_setting = plaidml.settings.experimental plaidml.settings.experimental = True devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0] plaidml.settings.experimental = experimental_setting experi = [d for d in devices if d.details and json.loads( d.details.decode("utf-8", errors="replace")).get("type", "cpu").lower() == "gpu"] self._log("debug", f"Obtained experimental Devices: {experi}") all_devices = experi + self._supported_devices all_devices = all_devices if all_devices else self._get_fallback_devices() # Use CPU self._log("debug", f"Obtained all Devices: {all_devices}") return all_devices def _get_fallback_devices(self) -> List[plaidml._DeviceConfig]: """ Called if a GPU has not been discovered. Return any devices we can run on. Returns ------- list: The :class:`pladml._DeviceConfig` fallaback objects that PlaidML has discovered. """ # Try get a supported device experimental_setting = plaidml.settings.experimental plaidml.settings.experimental = False devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0] # Try get any device if not devices: plaidml.settings.experimental = True devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0] plaidml.settings.experimental = experimental_setting if not devices: raise RuntimeError("No valid devices could be found for plaidML.") self._log("warning", f"PlaidML could not find a GPU. Falling back to: " f"{[d.id.decode('utf-8', errors='replace') for d in devices]}") return devices def _get_device_details(self) -> List[dict]: """ Obtain the device details for all connected AMD GPUS. Returns ------- list The `dict` device detail for all GPUs that PlaidML has discovered. """ details = [] for dev in self._all_devices: if dev.details: details.append(json.loads(dev.details.decode("utf-8", errors="replace"))) else: details.append(dict(vendor=dev.id.decode("utf-8", errors="replace"), name=dev.description.decode("utf-8", errors="replace"), globalMemSize=4 * 1024 * 1024 * 1024)) # 4GB dummy ram self._log("debug", f"Obtained Device details: {details}") return details def _select_device(self) -> None: """ If the plaidml user configuration settings exist, then set the default GPU from the settings file, Otherwise set the GPU to be the one with most VRAM. """ if os.path.exists(plaidml.settings.user_settings): # pylint:disable=no-member self._log("debug", "Setting PlaidML devices from user_settings") else: self._select_largest_gpu() def _select_largest_gpu(self) -> None: """ Set the default GPU to be a supported device with the most available VRAM. If no supported device is available, then set the GPU to be an experimental device with the most VRAM available. """ category = "supported" if self._supported_devices else "experimental" self._log("debug", f"Obtaining largest {category} device") indices = getattr(self, f"_{category}_indices") if not indices: self._log("error", "Failed to automatically detect your GPU.") self._log("error", "Please run `plaidml-setup` to set up your GPU.") sys.exit(1) max_vram = max(self._all_vram[idx] for idx in indices) self._log("debug", f"Max VRAM: {max_vram}") gpu_idx = min(idx for idx, vram in enumerate(self._all_vram) if vram == max_vram and idx in indices) self._log("debug", f"GPU IDX: {gpu_idx}") selected_gpu = self._plaid_ids[gpu_idx] self._log("info", f"Setting GPU to largest available {category} device. If you want to " "override this selection, run `plaidml-setup` from the command line.") plaidml.settings.experimental = category == "experimental" plaidml.settings.device_ids = [selected_gpu] def _get_device_count(self) -> int: """ Detect the number of AMD GPUs available from PlaidML. Returns ------- int The total number of AMD GPUs available """ retval = len(self._all_devices) self._log("debug", f"GPU Device count: {retval}") return retval def _get_active_devices(self) -> List[int]: """ Obtain the indices of active GPUs (those that have not been explicitly excluded by PlaidML or explicitly excluded in the command line arguments). Returns ------- list The list of device indices that are available for Faceswap to use """ devices = [idx for idx, d_id in enumerate(self._plaid_ids) if d_id in plaidml.settings.device_ids and idx not in _EXCLUDE_DEVICES] self._log("debug", f"Active GPU Devices: {devices}") return devices def _get_handles(self) -> list: """ AMD Doesn't really use device handles, so we just return the all devices list Returns ------- list The list of all AMD discovered GPUs """ handles = self._all_devices self._log("debug", f"AMD GPU Handles found: {handles}") return handles def _get_driver(self) -> str: """ Obtain the AMD driver version currently in use. Returns ------- str The current AMD GPU driver versions """ drivers = "|".join([device.get("driverVersion", "No Driver Found") for device in self._device_details]) self._log("debug", f"GPU Drivers: {drivers}") return drivers def _get_device_names(self) -> List[str]: """ Obtain the list of names of connected AMD GPUs as identified in :attr:`_handles`. Returns ------- list The list of connected Nvidia GPU names """ names = self.names self._log("debug", f"GPU Devices: {names}") return names def _get_vram(self) -> List[int]: """ Obtain the VRAM in Megabytes for each connected AMD GPU as identified in :attr:`_handles`. Returns ------- list The VRAM in Megabytes for each connected Nvidia GPU """ vram = self._all_vram self._log("debug", f"GPU VRAM: {vram}") return vram def _get_free_vram(self) -> List[int]: """ Obtain the amount of VRAM that is available, in Megabytes, for each connected AMD GPU. Notes ----- There is no useful way to get free VRAM on PlaidML. OpenCL loads and unloads VRAM as required, so this returns the total memory available per card for AMD GPUs, which is not particularly useful. Returns ------- list List of `float`s containing the amount of VRAM available, in Megabytes, for each connected GPU as corresponding to the values in :attr:`_handles """ vram = self._all_vram self._log("debug", f"GPU VRAM free: {vram}") return vram