1
0
Fork 0
mirror of https://github.com/deepfakes/faceswap synced 2025-06-09 04:36:50 -04:00
faceswap/lib/gpu_stats.py
torzdf cbba53ea67
Core Updates (#1015)
- Remove lib.utils.keras_backend_quiet and replace with get_backend() where relevant
- Document lib.gpu_stats and lib.sys_info
- Remove call to GPUStats.is_plaidml from convert and replace with get_backend()
- lib.gui.menu - typofix
2020-04-24 16:41:27 +01:00

356 lines
14 KiB
Python

#!/usr/bin python3
""" Collects and returns Information on available GPUs.
The information returned from this module provides information for both Nvidia and AMD GPUs.
However, the information available for Nvidia is far more thorough than what is available for
AMD, where we need to plug into plaidML to pull stats. The quality of this data will vary
depending on the OS' particular OpenCL implementation.
"""
import logging
import os
import platform
from lib.utils import get_backend
if platform.system() == 'Darwin':
import pynvx # pylint: disable=import-error
IS_MACOS = True
else:
import pynvml
IS_MACOS = False
# Limited PlaidML/AMD Stats
try:
from lib.plaidml_tools import PlaidMLStats as plaidlib # pylint:disable=ungrouped-imports
except ImportError:
plaidlib = None
class GPUStats():
""" Holds information and statistics about the GPU(s) available on the currently
running system.
Parameters
----------
log: bool, optional
Whether the class should output information to the logger. There may be occasions where the
logger has not yet been set up when this class is queried. Attempting to log in these
instances will raise an error. If GPU stats are being queried prior to the logger being
available then this parameter should be set to ``False``. Otherwise set to ``True``.
Default: ``True``
"""
def __init__(self, log=True):
# Logger is held internally, as we don't want to log when obtaining system stats on crash
self._logger = logging.getLogger(__name__) if log else None
self._log("debug", "Initializing {}".format(self.__class__.__name__))
self._plaid = None
self._initialized = False
self._device_count = 0
self._active_devices = list()
self._handles = list()
self._driver = None
self._devices = list()
self._vram = None
self._initialize(log)
self._driver = self._get_driver()
self._devices = self._get_devices()
self._vram = self._get_vram()
if not self._active_devices:
self._log("warning", "No GPU detected. Switching to CPU mode")
return
self._shutdown()
self._log("debug", "Initialized {}".format(self.__class__.__name__))
@property
def device_count(self):
"""int: The number of GPU devices discovered on the system. """
return self._device_count
@property
def _is_plaidml(self):
""" bool: ``True`` if the backend is plaidML otherwise ``False``. """
return self._plaid is not None
@property
def sys_info(self):
""" dict: GPU Stats that are required for system information logging.
The dictionary contains the following data:
**vram** (`list`): the total amount of VRAM in Megabytes for each GPU as pertaining to
:attr:`_handles`
**driver** (`str`): The GPU driver version that is installed on the OS
**devices** (`list`): The device name of each GPU on the system as pertaining
to :attr:`_handles`
**devices_active** (`list`): The device name of each active GPU on the system as
pertaining to :attr:`_handles`
"""
return dict(vram=self._vram,
driver=self._driver,
devices=self._devices,
devices_active=self._active_devices)
def _log(self, level, message):
""" If the class has been initialized with :attr:`log` as `True` then log the message
otherwise skip logging.
Parameters
----------
level: str
The log level to log at
message: str
The message to log
"""
if self._logger is None:
return
logger = getattr(self._logger, level.lower())
logger(message)
def _initialize(self, log=False):
""" Initialize the library that will be returning stats for the system's GPU(s).
For Nvidia (on Linux and Windows) the library is `pynvml`. For Nvidia (on macOS) the
library is `pynvx`. For AMD `plaidML` is used.
Parameters
----------
log: bool, optional
Whether the class should output information to the logger. There may be occasions where
the logger has not yet been set up when this class is queried. Attempting to log in
these instances will raise an error. If GPU stats are being queried prior to the
logger being available then this parameter should be set to ``False``. Otherwise set
to ``True``. Default: ``False``
"""
if not self._initialized:
if get_backend() == "amd":
self._log("debug", "AMD Detected. Using plaidMLStats")
loglevel = "INFO" if self._logger is None else self._logger.getEffectiveLevel()
self._plaid = plaidlib(loglevel=loglevel, log=log)
elif IS_MACOS:
self._log("debug", "macOS Detected. Using pynvx")
try:
pynvx.cudaInit()
except RuntimeError:
self._initialized = True
return
else:
try:
self._log("debug", "OS is not macOS. Trying pynvml")
pynvml.nvmlInit()
except (pynvml.NVMLError_LibraryNotFound, # pylint: disable=no-member
pynvml.NVMLError_DriverNotLoaded, # pylint: disable=no-member
pynvml.NVMLError_NoPermission) as err: # pylint: disable=no-member
if plaidlib is not None:
self._log("debug", "pynvml errored. Trying plaidML")
self._plaid = plaidlib(log=log)
else:
msg = ("There was an error reading from the Nvidia Machine Learning "
"Library. Either you do not have an Nvidia GPU (in which case "
"this warning can be ignored) or the most likely cause is "
"incorrectly installed drivers. If this is the case, Please remove "
"and reinstall your Nvidia drivers before reporting."
"Original Error: {}".format(str(err)))
self._log("warning", msg)
self._initialized = True
return
except Exception as err: # pylint: disable=broad-except
msg = ("An unhandled exception occured loading pynvml. "
"Original error: {}".format(str(err)))
if self._logger:
self._logger.error(msg)
else:
print(msg)
self._initialized = True
return
self._initialized = True
self._get_device_count()
self._get_active_devices()
self._get_handles()
def _shutdown(self):
""" Shutdown pynvml if it was the library used for obtaining stats and set
:attr:`_initialized` back to ``False``. """
if self._initialized:
self._handles = list()
if not IS_MACOS and not self._is_plaidml:
pynvml.nvmlShutdown()
self._initialized = False
def _get_device_count(self):
""" Detect the number of GPUs attached to the system and allocate to
:attr:`_device_count`. """
if self._is_plaidml:
self._device_count = self._plaid.device_count
elif IS_MACOS:
self._device_count = pynvx.cudaDeviceGetCount(ignore=True)
else:
try:
self._device_count = pynvml.nvmlDeviceGetCount()
except pynvml.NVMLError:
self._device_count = 0
self._log("debug", "GPU Device count: {}".format(self._device_count))
def _get_active_devices(self):
""" Obtain the indices of active GPUs (those that have not been explicitly excluded by
CUDA_VISIBLE_DEVICES or plaidML) and allocate to :attr:`_active_devices`. """
if self._is_plaidml:
self._active_devices = self._plaid.active_devices
else:
devices = os.environ.get("CUDA_VISIBLE_DEVICES", None)
if self._device_count == 0:
self._active_devices = list()
elif devices is not None:
self._active_devices = [int(i) for i in devices.split(",") if devices]
else:
self._active_devices = list(range(self._device_count))
self._log("debug", "Active GPU Devices: {}".format(self._active_devices))
def _get_handles(self):
""" Obtain the internal handle identifiers for the system GPUs and allocate to
:attr:`_handles`. """
if self._is_plaidml:
self._handles = self._plaid.devices
elif IS_MACOS:
self._handles = pynvx.cudaDeviceGetHandles(ignore=True)
else:
self._handles = [pynvml.nvmlDeviceGetHandleByIndex(i)
for i in range(self._device_count)]
self._log("debug", "GPU Handles found: {}".format(len(self._handles)))
def _get_driver(self):
""" Obtain and return the installed driver version for the system's GPUs.
Returns
-------
str
The currently installed GPU driver version
"""
if self._is_plaidml:
driver = self._plaid.drivers
elif IS_MACOS:
driver = pynvx.cudaSystemGetDriverVersion(ignore=True)
else:
try:
driver = pynvml.nvmlSystemGetDriverVersion().decode("utf-8")
except pynvml.NVMLError:
driver = "No Nvidia driver found"
self._log("debug", "GPU Driver: {}".format(driver))
return driver
def _get_devices(self):
""" Obtain the name of the installed devices. The quality of this information depends on
the backend and OS being used, but it should be sufficient for identifying cards.
Returns
-------
list
List of device names for connected GPUs as corresponding to the values in
:attr:`_handles`
"""
self._initialize()
if self._device_count == 0:
names = list()
if self._is_plaidml:
names = self._plaid.names
elif IS_MACOS:
names = [pynvx.cudaGetName(handle, ignore=True)
for handle in self._handles]
else:
names = [pynvml.nvmlDeviceGetName(handle).decode("utf-8")
for handle in self._handles]
self._log("debug", "GPU Devices: {}".format(names))
return names
def _get_vram(self):
""" Obtain the total VRAM in Megabytes for each connected GPU.
Returns
-------
list
List of floats containing the total amount of VRAM in Megabytes for each connected GPU
as corresponding to the values in :attr:`_handles
"""
self._initialize()
if self._device_count == 0:
vram = list()
elif self._is_plaidml:
vram = self._plaid.vram
elif IS_MACOS:
vram = [pynvx.cudaGetMemTotal(handle, ignore=True) / (1024 * 1024)
for handle in self._handles]
else:
vram = [pynvml.nvmlDeviceGetMemoryInfo(handle).total /
(1024 * 1024)
for handle in self._handles]
self._log("debug", "GPU VRAM: {}".format(vram))
return vram
def _get_free_vram(self):
""" Obtain the amount of VRAM that is available, in Megabytes, for each connected GPU.
Returns
-------
list
List of floats containing the amount of VRAM available, in Megabytes, for each
connected GPU as corresponding to the values in :attr:`_handles
Notes
-----
There is no useful way to get free VRAM on PlaidML. OpenCL loads and unloads VRAM as
required, so this returns the total memory available per card for AMD cards, which us
not particularly useful.
"""
self._initialize()
if self._is_plaidml:
vram = self._plaid.vram
elif IS_MACOS:
vram = [pynvx.cudaGetMemFree(handle, ignore=True) / (1024 * 1024)
for handle in self._handles]
else:
vram = [pynvml.nvmlDeviceGetMemoryInfo(handle).free / (1024 * 1024)
for handle in self._handles]
self._shutdown()
self._log("debug", "GPU VRAM free: {}".format(vram))
return vram
def get_card_most_free(self):
""" Obtain statistics for the GPU with the most available free VRAM.
Returns
-------
dict
The dictionary contains the following data:
**card_id** (`int`): The index of the card as pertaining to :attr:`_handles`
**device** (`str`): The name of the device
**free** (`float`): The amount of available VRAM on the GPU
**total** (`float`): the total amount of VRAM on the GPU
If a GPU is not detected then the **card_id** is returned as ``-1`` and the amount
of free and total RAM available is fixed to 2048 Megabytes.
"""
if self._device_count == 0:
return {"card_id": -1,
"device": "No GPU devices found",
"free": 2048,
"total": 2048}
free_vram = [self._get_free_vram()[i] for i in self._active_devices]
vram_free = max(free_vram)
card_id = self._active_devices[free_vram.index(vram_free)]
retval = {"card_id": card_id,
"device": self._devices[card_id],
"free": vram_free,
"total": self._vram[card_id]}
self._log("debug", "Active GPU Card with most free VRAM: {}".format(retval))
return retval