locales: Train settings

2025-06-07 10:43:27 -04:00 · 2023-06-11 23:30:05 +01:00 · 2023-06-11 23:30:05 +01:00 · 02239d4183
commit 02239d4183
parent a66214c58e
5 changed files with 938 additions and 287 deletions
--- a/lib/config.py
+++ b/lib/config.py
@ -3,6 +3,7 @@
    Extends out :class:`configparser.ConfigParser` functionality by checking for default
    configuration updates and returning data in it's correct format """

+import gettext
 import logging
 import os
 import sys
@ -16,6 +17,10 @@ from typing import Dict, List, Optional, Tuple, Union

 from lib.utils import full_path_split

+# LOCALES
+_LANG = gettext.translation("lib.config", localedir="locales", fallback=True)
+_ = _LANG.gettext
+
 # Can't type OrderedDict fully on Python 3.8 or lower
 if sys.version_info < (3, 9):
    OrderedDictSectionType = OrderedDict
@ -282,7 +287,9 @@ class FaceswapConfig():
                logger.error(err)
                raise ValueError(err)
            return configfile
-        dirname = os.path.dirname(sys.modules[self.__module__].__file__)
+        filepath = sys.modules[self.__module__].__file__
+        assert filepath is not None
+        dirname = os.path.dirname(filepath)
        folder, fname = os.path.split(dirname)
        retval = os.path.join(os.path.dirname(folder), "config", f"{fname}.ini")
        logger.debug("Config File location: '%s'", retval)
@ -383,23 +390,23 @@ class FaceswapConfig():
        """ Add extra helptext info from parameters """
        helptext += "\n"
        if not fixed:
-            helptext += "\nThis option can be updated for existing models.\n"
+            helptext += _("\nThis option can be updated for existing models.\n")
        if datatype == list:
-            helptext += ("\nIf selecting multiple options then each option should be separated "
-                         "by a space or a comma (e.g. item1, item2, item3)\n")
+            helptext += _("\nIf selecting multiple options then each option should be separated "
+                          "by a space or a comma (e.g. item1, item2, item3)\n")
        if choices and choices != "colorchooser":
-            helptext += f"\nChoose from: {choices}"
+            helptext += _("\nChoose from: {}").format(choices)
        elif datatype == bool:
-            helptext += "\nChoose from: True, False"
+            helptext += _("\nChoose from: True, False")
        elif datatype == int:
            assert min_max is not None
            cmin, cmax = min_max
-            helptext += f"\nSelect an integer between {cmin} and {cmax}"
+            helptext += _("\nSelect an integer between {} and {}").format(cmin, cmax)
        elif datatype == float:
            assert min_max is not None
            cmin, cmax = min_max
-            helptext += f"\nSelect a decimal number between {cmin} and {cmax}"
-        helptext += f"\n[Default: {default}]"
+            helptext += _("\nSelect a decimal number between {} and {}").format(cmin, cmax)
+        helptext += _("\n[Default: {}]").format(default)
        return helptext

    def _check_exists(self) -> bool:
--- a/locales/lib.config.pot
+++ b/locales/lib.config.pot
@ -0,0 +1,61 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2023-06-11 23:28+0100\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: lib/config.py:393
+msgid ""
+"\n"
+"This option can be updated for existing models.\n"
+msgstr ""
+
+#: lib/config.py:395
+msgid ""
+"\n"
+"If selecting multiple options then each option should be separated by a "
+"space or a comma (e.g. item1, item2, item3)\n"
+msgstr ""
+
+#: lib/config.py:398
+msgid ""
+"\n"
+"Choose from: {}"
+msgstr ""
+
+#: lib/config.py:400
+msgid ""
+"\n"
+"Choose from: True, False"
+msgstr ""
+
+#: lib/config.py:404
+msgid ""
+"\n"
+"Select an integer between {} and {}"
+msgstr ""
+
+#: lib/config.py:408
+msgid ""
+"\n"
+"Select a decimal number between {} and {}"
+msgstr ""
+
+#: lib/config.py:409
+msgid ""
+"\n"
+"[Default: {}]"
+msgstr ""
--- a/locales/plugins.train._config.pot
+++ b/locales/plugins.train._config.pot
@ -0,0 +1,552 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2023-06-11 23:20+0100\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: plugins/train/_config.py:17
+msgid ""
+"\n"
+"NB: Unless specifically stated, values changed here will only take effect "
+"when creating a new model."
+msgstr ""
+
+#: plugins/train/_config.py:22
+msgid ""
+"Focal Frequency Loss. Analyzes the frequency spectrum of the images rather "
+"than the images themselves. This loss function can be used on its own, but "
+"the original paper found increased benefits when using it as a complementary "
+"loss to another spacial loss function (e.g. MSE). Ref: Focal Frequency Loss "
+"for Image Reconstruction and Synthesis https://arxiv.org/pdf/2012.12821.pdf "
+"NB: This loss does not currently work on AMD cards."
+msgstr ""
+
+#: plugins/train/_config.py:29
+msgid ""
+"Nvidia FLIP. A perceptual loss measure that approximates the difference "
+"perceived by humans as they alternate quickly (or flip) between two images. "
+"Used on its own and this loss function creates a distinct grid on the "
+"output. However it can be helpful when used as a complimentary loss "
+"function. Ref: FLIP: A Difference Evaluator for Alternating Images: https://"
+"research.nvidia.com/sites/default/files/node/3260/FLIP_Paper.pdf"
+msgstr ""
+
+#: plugins/train/_config.py:36
+msgid ""
+"Gradient Magnitude Similarity Deviation seeks to match the global standard "
+"deviation of the pixel to pixel differences between two images. Similar in "
+"approach to SSIM. Ref: Gradient Magnitude Similarity Deviation: An Highly "
+"Efficient Perceptual Image Quality Index https://arxiv.org/ftp/arxiv/"
+"papers/1308/1308.3052.pdf"
+msgstr ""
+
+#: plugins/train/_config.py:41
+msgid ""
+"The L_inf norm will reduce the largest individual pixel error in an image. "
+"As each largest error is minimized sequentially, the overall error is "
+"improved. This loss will be extremely focused on outliers."
+msgstr ""
+
+#: plugins/train/_config.py:45
+msgid ""
+"Laplacian Pyramid Loss. Attempts to improve results by focussing on edges "
+"using Laplacian Pyramids. As this loss function gives priority to edges over "
+"other low-frequency information, like color, it should not be used on its "
+"own. The original implementation uses this loss as a complimentary function "
+"to MSE. Ref: Optimizing the Latent Space of Generative Networks https://"
+"arxiv.org/abs/1707.05776"
+msgstr ""
+
+#: plugins/train/_config.py:52
+msgid ""
+"LPIPS is a perceptual loss that uses the feature outputs of other pretrained "
+"models as a loss metric. Be aware that this loss function will use more "
+"VRAM. Used on its own and this loss will create a distinct moire pattern on "
+"the output, however it can be helpful as a complimentary loss function. The "
+"output of this function is strong, so depending on your chosen primary loss "
+"function, you are unlikely going to want to set the weight above about 25%. "
+"Ref: The Unreasonable Effectiveness of Deep Features as a Perceptual Metric "
+"http://arxiv.org/abs/1801.03924\n"
+"This variant uses the AlexNet backbone. A fairly light and old model which "
+"performed best in the paper's original implementation.\n"
+"NB: For AMD Users the final linear layer is not implemented."
+msgstr ""
+
+#: plugins/train/_config.py:62
+msgid ""
+"Same as lpips_alex, but using the SqueezeNet backbone. A more lightweight "
+"version of AlexNet.\n"
+"NB: For AMD Users the final linear layer is not implemented."
+msgstr ""
+
+#: plugins/train/_config.py:65
+msgid ""
+"Same as lpips_alex, but using the VGG16 backbone. A more heavyweight model.\n"
+"NB: For AMD Users the final linear layer is not implemented."
+msgstr ""
+
+#: plugins/train/_config.py:68
+msgid ""
+"log(cosh(x)) acts similar to MSE for small errors and to MAE for large "
+"errors. Like MSE, it is very stable and prevents overshoots when errors are "
+"near zero. Like MAE, it is robust to outliers."
+msgstr ""
+
+#: plugins/train/_config.py:72
+msgid ""
+"Mean absolute error will guide reconstructions of each pixel towards its "
+"median value in the training dataset. Robust to outliers but as a median, it "
+"can potentially ignore some infrequent image types in the dataset."
+msgstr ""
+
+#: plugins/train/_config.py:76
+msgid ""
+"Mean squared error will guide reconstructions of each pixel towards its "
+"average value in the training dataset. As an avg, it will be susceptible to "
+"outliers and typically produces slightly blurrier results. Ref: Multi-Scale "
+"Structural Similarity for Image Quality Assessment https://www.cns.nyu.edu/"
+"pub/eero/wang03b.pdf"
+msgstr ""
+
+#: plugins/train/_config.py:81
+msgid ""
+"Multiscale Structural Similarity Index Metric is similar to SSIM except that "
+"it performs the calculations along multiple scales of the input image."
+msgstr ""
+
+#: plugins/train/_config.py:84
+msgid ""
+"Smooth_L1 is a modification of the MAE loss to correct two of its "
+"disadvantages. This loss has improved stability and guidance for small "
+"errors. Ref: A General and Adaptive Robust Loss Function https://arxiv.org/"
+"pdf/1701.03077.pdf"
+msgstr ""
+
+#: plugins/train/_config.py:88
+msgid ""
+"Structural Similarity Index Metric is a perception-based loss that considers "
+"changes in texture, luminance, contrast, and local spatial statistics of an "
+"image. Potentially delivers more realistic looking images. Ref: Image "
+"Quality Assessment: From Error Visibility to Structural Similarity http://"
+"www.cns.nyu.edu/pub/eero/wang03-reprint.pdf"
+msgstr ""
+
+#: plugins/train/_config.py:93
+msgid ""
+"Instead of minimizing the difference between the absolute value of each "
+"pixel in two reference images, compute the pixel to pixel spatial difference "
+"in each image and then minimize that difference between two images. Allows "
+"for large color shifts, but maintains the structure of the image."
+msgstr ""
+
+#: plugins/train/_config.py:97
+msgid "Do not use an additional loss function."
+msgstr ""
+
+#: plugins/train/_config.py:117
+msgid "Options that apply to all models"
+msgstr ""
+
+#: plugins/train/_config.py:126 plugins/train/_config.py:150
+msgid "face"
+msgstr ""
+
+#: plugins/train/_config.py:128
+msgid ""
+"How to center the training image. The extracted images are centered on the "
+"middle of the skull based on the face's estimated pose. A subsection of "
+"these images are used for training. The centering used dictates how this "
+"subsection will be cropped from the aligned images.\n"
+"\tface: Centers the training image on the center of the face, adjusting for "
+"pitch and yaw.\n"
+"\thead: Centers the training image on the center of the head, adjusting for "
+"pitch and yaw. NB: You should only select head centering if you intend to "
+"include the full head (including hair) in the final swap. This may give "
+"mixed results. Additionally, it is only worth choosing head centering if you "
+"are training with a mask that includes the hair (e.g. BiSeNet-FP-Head).\n"
+"\tlegacy: The 'original' extraction technique. Centers the training image "
+"near the tip of the nose with no adjustment. Can result in the edges of the "
+"face appearing outside of the training area."
+msgstr ""
+
+#: plugins/train/_config.py:152
+msgid ""
+"How much of the extracted image to train on. A lower coverage will limit the "
+"model's scope to a zoomed-in central area while higher amounts can include "
+"the entire face. A trade-off exists between lower amounts given more detail "
+"versus higher amounts avoiding noticeable swap transitions. For 'Face' "
+"centering you will want to leave this above 75%. For Head centering you will "
+"most likely want to set this to 100%. Sensible values for 'Legacy' centering "
+"are:\n"
+"\t62.5% spans from eyebrow to eyebrow.\n"
+"\t75.0% spans from temple to temple.\n"
+"\t87.5% spans from ear to ear.\n"
+"\t100.0% is a mugshot."
+msgstr ""
+
+#: plugins/train/_config.py:168 plugins/train/_config.py:179
+msgid "initialization"
+msgstr ""
+
+#: plugins/train/_config.py:170
+msgid ""
+"Use ICNR to tile the default initializer in a repeating pattern. This "
+"strategy is designed for pairing with sub-pixel / pixel shuffler to reduce "
+"the 'checkerboard effect' in image reconstruction. \n"
+"\t https://arxiv.org/ftp/arxiv/papers/1707/1707.02937.pdf"
+msgstr ""
+
+#: plugins/train/_config.py:181
+msgid ""
+"Use Convolution Aware Initialization for convolutional layers. This can help "
+"eradicate the vanishing and exploding gradient problem as well as lead to "
+"higher accuracy, lower loss and faster convergence.\n"
+"NB:\n"
+"\t This can use more VRAM when creating a new model so you may want to lower "
+"the batch size for the first run. The batch size can be raised again when "
+"reloading the model. \n"
+"\t Multi-GPU is not supported for this option, so you should start the model "
+"on a single GPU. Once training has started, you can stop training, enable "
+"multi-GPU and resume.\n"
+"\t Building the model will likely take several minutes as the calculations "
+"for this initialization technique are expensive. This will only impact "
+"starting a new model."
+msgstr ""
+
+#: plugins/train/_config.py:198 plugins/train/_config.py:223
+#: plugins/train/_config.py:238 plugins/train/_config.py:265
+msgid "optimizer"
+msgstr ""
+
+#: plugins/train/_config.py:202
+msgid ""
+"The optimizer to use.\n"
+"\t adabelief - Adapting Stepsizes by the Belief in Observed Gradients. An "
+"optimizer with the aim to converge faster, generalize better and remain more "
+"stable. (https://arxiv.org/abs/2010.07468). NB: Epsilon for AdaBelief needs "
+"to be set to a smaller value than other Optimizers. Generally setting the "
+"'Epsilon Exponent' to around '-16' should work.\n"
+"\t adam - Adaptive Moment Optimization. A stochastic gradient descent method "
+"that is based on adaptive estimation of first-order and second-order "
+"moments.\n"
+"\t nadam - Adaptive Moment Optimization with Nesterov Momentum. Much like "
+"Adam but uses a different formula for calculating momentum.\n"
+"\t rms-prop - Root Mean Square Propagation. Maintains a moving (discounted) "
+"average of the square of the gradients. Divides the gradient by the root of "
+"this average."
+msgstr ""
+
+#: plugins/train/_config.py:225
+msgid ""
+"Learning rate - how fast your network will learn (how large are the "
+"modifications to the model weights after one batch of training). Values that "
+"are too large might result in model crashes and the inability of the model "
+"to find the best solution. Values that are too small might be unable to "
+"escape from dead-ends and find the best global minimum."
+msgstr ""
+
+#: plugins/train/_config.py:240
+msgid ""
+"The epsilon adds a small constant to weight updates to attempt to avoid "
+"'divide by zero' errors. Unless you are using the AdaBelief Optimizer, then "
+"Generally this option should be left at default value, For AdaBelief, "
+"setting this to around '-16' should work.\n"
+"In all instances if you are getting 'NaN' loss values, and have been unable "
+"to resolve the issue any other way (for example, increasing batch size, or "
+"lowering learning rate), then raising the epsilon can lead to a more stable "
+"model. It may, however, come at the cost of slower training and a less "
+"accurate final result.\n"
+"NB: The value given here is the 'exponent' to the epsilon. For example, "
+"choosing '-7' will set the epsilon to 1e-7. Choosing '-3' will set the "
+"epsilon to 0.001 (1e-3)."
+msgstr ""
+
+#: plugins/train/_config.py:258
+msgid ""
+"[Not PlaidML] Apply AutoClipping to the gradients. AutoClip analyzes the "
+"gradient weights and adjusts the normalization value dynamically to fit the "
+"data. Can help prevent NaNs and improve model optimization at the expense of "
+"VRAM. Ref: AutoClip: Adaptive Gradient Clipping for Source Separation "
+"Networks https://arxiv.org/abs/2007.14469"
+msgstr ""
+
+#: plugins/train/_config.py:271 plugins/train/_config.py:283
+#: plugins/train/_config.py:297 plugins/train/_config.py:314
+msgid "network"
+msgstr ""
+
+#: plugins/train/_config.py:273
+msgid ""
+"Use reflection padding rather than zero padding with convolutions. Each "
+"convolution must pad the image boundaries to maintain the proper sizing. "
+"More complex padding schemes can reduce artifacts at the border of the "
+"image.\n"
+"\t http://www-cs.engr.ccny.cuny.edu/~wolberg/cs470/hw/hw2_pad.txt"
+msgstr ""
+
+#: plugins/train/_config.py:286
+msgid ""
+"[Nvidia Only]. Enable the Tensorflow GPU 'allow_growth' configuration "
+"option. This option prevents Tensorflow from allocating all of the GPU VRAM "
+"at launch but can lead to higher VRAM fragmentation and slower performance. "
+"Should only be enabled if you are receiving errors regarding 'cuDNN fails to "
+"initialize' when commencing training."
+msgstr ""
+
+#: plugins/train/_config.py:299
+msgid ""
+"[Not PlaidML], NVIDIA GPUs can run operations in float16 faster than in "
+"float32. Mixed precision allows you to use a mix of float16 with float32, to "
+"get the performance benefits from float16 and the numeric stability benefits "
+"from float32.\n"
+"\n"
+"This is untested on DirectML backend, but will run on most Nvidia models. it "
+"will only speed up training on more recent GPUs. Those with compute "
+"capability 7.0 or higher will see the greatest performance benefit from "
+"mixed precision because they have Tensor Cores. Older GPUs offer no math "
+"performance benefit for using mixed precision, however memory and bandwidth "
+"savings can enable some speedups. Generally RTX GPUs and later will offer "
+"the most benefit."
+msgstr ""
+
+#: plugins/train/_config.py:316
+msgid ""
+"If a 'NaN' is generated in the model, this means that the model has "
+"corrupted and the model is likely to start deteriorating from this point on. "
+"Enabling NaN protection will stop training immediately in the event of a "
+"NaN. The last save will not contain the NaN, so you may still be able to "
+"rescue your model."
+msgstr ""
+
+#: plugins/train/_config.py:329
+msgid "convert"
+msgstr ""
+
+#: plugins/train/_config.py:331
+msgid ""
+"[GPU Only]. The number of faces to feed through the model at once when "
+"running the Convert process.\n"
+"\n"
+"NB: Increasing this figure is unlikely to improve convert speed, however, if "
+"you are getting Out of Memory errors, then you may want to reduce the batch "
+"size."
+msgstr ""
+
+#: plugins/train/_config.py:350
+msgid ""
+"Loss configuration options\n"
+"Loss is the mechanism by which a Neural Network judges how well it thinks "
+"that it is recreating a face."
+msgstr ""
+
+#: plugins/train/_config.py:357 plugins/train/_config.py:369
+#: plugins/train/_config.py:382 plugins/train/_config.py:402
+#: plugins/train/_config.py:414 plugins/train/_config.py:434
+#: plugins/train/_config.py:446 plugins/train/_config.py:466
+#: plugins/train/_config.py:482 plugins/train/_config.py:498
+#: plugins/train/_config.py:515
+msgid "loss"
+msgstr ""
+
+#: plugins/train/_config.py:361
+msgid "The loss function to use."
+msgstr ""
+
+#: plugins/train/_config.py:373
+msgid ""
+"The second loss function to use. If using a structural based loss (such as "
+"SSIM, MS-SSIM or GMSD) it is common to add an L1 regularization(MAE) or L2 "
+"regularization (MSE) function. You can adjust the weighting of this loss "
+"function with the loss_weight_2 option."
+msgstr ""
+
+#: plugins/train/_config.py:388
+msgid ""
+"The amount of weight to apply to the second loss function.\n"
+"\n"
+"\n"
+"\n"
+"The value given here is as a percentage denoting how much the selected "
+"function should contribute to the overall loss cost of the model. For "
+"example:\n"
+"\t 100 - The loss calculated for the second loss function will be applied at "
+"its full amount towards the overall loss score. \n"
+"\t 25 - The loss calculated for the second loss function will be reduced by "
+"a quarter prior to adding to the overall loss score. \n"
+"\t 400 - The loss calculated for the second loss function will be mulitplied "
+"4 times prior to adding to the overall loss score. \n"
+"\t 0 - Disables the second loss function altogether."
+msgstr ""
+
+#: plugins/train/_config.py:406
+msgid ""
+"The third loss function to use. You can adjust the weighting of this loss "
+"function with the loss_weight_3 option."
+msgstr ""
+
+#: plugins/train/_config.py:420
+msgid ""
+"The amount of weight to apply to the third loss function.\n"
+"\n"
+"\n"
+"\n"
+"The value given here is as a percentage denoting how much the selected "
+"function should contribute to the overall loss cost of the model. For "
+"example:\n"
+"\t 100 - The loss calculated for the third loss function will be applied at "
+"its full amount towards the overall loss score. \n"
+"\t 25 - The loss calculated for the third loss function will be reduced by a "
+"quarter prior to adding to the overall loss score. \n"
+"\t 400 - The loss calculated for the third loss function will be mulitplied "
+"4 times prior to adding to the overall loss score. \n"
+"\t 0 - Disables the third loss function altogether."
+msgstr ""
+
+#: plugins/train/_config.py:438
+msgid ""
+"The fourth loss function to use. You can adjust the weighting of this loss "
+"function with the loss_weight_3 option."
+msgstr ""
+
+#: plugins/train/_config.py:452
+msgid ""
+"The amount of weight to apply to the fourth loss function.\n"
+"\n"
+"\n"
+"\n"
+"The value given here is as a percentage denoting how much the selected "
+"function should contribute to the overall loss cost of the model. For "
+"example:\n"
+"\t 100 - The loss calculated for the fourth loss function will be applied at "
+"its full amount towards the overall loss score. \n"
+"\t 25 - The loss calculated for the fourth loss function will be reduced by "
+"a quarter prior to adding to the overall loss score. \n"
+"\t 400 - The loss calculated for the fourth loss function will be mulitplied "
+"4 times prior to adding to the overall loss score. \n"
+"\t 0 - Disables the fourth loss function altogether."
+msgstr ""
+
+#: plugins/train/_config.py:471
+msgid ""
+"The loss function to use when learning a mask.\n"
+"\t MAE - Mean absolute error will guide reconstructions of each pixel "
+"towards its median value in the training dataset. Robust to outliers but as "
+"a median, it can potentially ignore some infrequent image types in the "
+"dataset.\n"
+"\t MSE - Mean squared error will guide reconstructions of each pixel towards "
+"its average value in the training dataset. As an average, it will be "
+"susceptible to outliers and typically produces slightly blurrier results."
+msgstr ""
+
+#: plugins/train/_config.py:488
+msgid ""
+"The amount of priority to give to the eyes.\n"
+"\n"
+"The value given here is as a multiplier of the main loss score. For "
+"example:\n"
+"\t 1 - The eyes will receive the same priority as the rest of the face. \n"
+"\t 10 - The eyes will be given a score 10 times higher than the rest of the "
+"face.\n"
+"\n"
+"NB: Penalized Mask Loss must be enable to use this option."
+msgstr ""
+
+#: plugins/train/_config.py:504
+msgid ""
+"The amount of priority to give to the mouth.\n"
+"\n"
+"The value given here is as a multiplier of the main loss score. For "
+"Example:\n"
+"\t 1 - The mouth will receive the same priority as the rest of the face. \n"
+"\t 10 - The mouth will be given a score 10 times higher than the rest of the "
+"face.\n"
+"\n"
+"NB: Penalized Mask Loss must be enable to use this option."
+msgstr ""
+
+#: plugins/train/_config.py:517
+msgid ""
+"Image loss function is weighted by mask presence. For areas of the image "
+"without the facial mask, reconstruction errors will be ignored while the "
+"masked face area is prioritized. May increase overall quality by focusing "
+"attention on the core face area."
+msgstr ""
+
+#: plugins/train/_config.py:528 plugins/train/_config.py:570
+#: plugins/train/_config.py:584 plugins/train/_config.py:593
+msgid "mask"
+msgstr ""
+
+#: plugins/train/_config.py:531
+msgid ""
+"The mask to be used for training. If you have selected 'Learn Mask' or "
+"'Penalized Mask Loss' you must select a value other than 'none'. The "
+"required mask should have been selected as part of the Extract process. If "
+"it does not exist in the alignments file then it will be generated prior to "
+"training commencing.\n"
+"\tnone: Don't use a mask.\n"
+"\tbisenet-fp_face: Relatively lightweight NN based mask that provides more "
+"refined control over the area to be masked (configurable in mask settings). "
+"Use this version of bisenet-fp if your model is trained with 'face' or "
+"'legacy' centering.\n"
+"\tbisenet-fp_head: Relatively lightweight NN based mask that provides more "
+"refined control over the area to be masked (configurable in mask settings). "
+"Use this version of bisenet-fp if your model is trained with 'head' "
+"centering.\n"
+"\tcomponents: Mask designed to provide facial segmentation based on the "
+"positioning of landmark locations. A convex hull is constructed around the "
+"exterior of the landmarks to create a mask.\n"
+"\tcustom_face: Custom user created, face centered mask.\n"
+"\tcustom_head: Custom user created, head centered mask.\n"
+"\textended: Mask designed to provide facial segmentation based on the "
+"positioning of landmark locations. A convex hull is constructed around the "
+"exterior of the landmarks and the mask is extended upwards onto the "
+"forehead.\n"
+"\tvgg-clear: Mask designed to provide smart segmentation of mostly frontal "
+"faces clear of obstructions. Profile faces and obstructions may result in "
+"sub-par performance.\n"
+"\tvgg-obstructed: Mask designed to provide smart segmentation of mostly "
+"frontal faces. The mask model has been specifically trained to recognize "
+"some facial obstructions (hands and eyeglasses). Profile faces may result in "
+"sub-par performance.\n"
+"\tunet-dfl: Mask designed to provide smart segmentation of mostly frontal "
+"faces. The mask model has been trained by community members and will need "
+"testing for further description. Profile faces may result in sub-par "
+"performance."
+msgstr ""
+
+#: plugins/train/_config.py:572
+msgid ""
+"Apply gaussian blur to the mask input. This has the effect of smoothing the "
+"edges of the mask, which can help with poorly calculated masks and give less "
+"of a hard edge to the predicted mask. The size is in pixels (calculated from "
+"a 128px mask). Set to 0 to not apply gaussian blur. This value should be "
+"odd, if an even number is passed in then it will be rounded to the next odd "
+"number."
+msgstr ""
+
+#: plugins/train/_config.py:586
+msgid ""
+"Sets pixels that are near white to white and near black to black. Set to 0 "
+"for off."
+msgstr ""
+
+#: plugins/train/_config.py:595
+msgid ""
+"Dedicate a portion of the model to learning how to duplicate the input mask. "
+"Increases VRAM usage in exchange for learning a quick ability to try to "
+"replicate more complex mask models."
+msgstr ""
--- a/plugins/extract/_config.py
+++ b/plugins/extract/_config.py
@ -11,7 +11,7 @@ from lib.config import FaceswapConfig
 _LANG = gettext.translation("plugins.extract._config", localedir="locales", fallback=True)
 _ = _LANG.gettext

-logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+logger = logging.getLogger(__name__) 


 class Config(FaceswapConfig):
--- a/plugins/train/_config.py
+++ b/plugins/train/_config.py
@ -1,46 +1,54 @@
 #!/usr/bin/env python3
 """ Default configurations for models """

+import gettext
 import logging
 import os

 from lib.config import FaceswapConfig
 from plugins.plugin_loader import PluginLoader

-logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+# LOCALES
+_LANG = gettext.translation("plugins.train._config", localedir="locales", fallback=True)
+_ = _LANG.gettext

-ADDITIONAL_INFO = ("\nNB: Unless specifically stated, values changed here will only take effect "
-                   "when creating a new model.")
+logger = logging.getLogger(__name__)

-_LOSS_HELP = dict(
-    ffl="Focal Frequency Loss. Analyzes the frequency spectrum of the images rather than the "
+ADDITIONAL_INFO = _("\nNB: Unless specifically stated, values changed here will only take effect "
+                    "when creating a new model.")
+
+_LOSS_HELP = {
+    "ffl": _(
+        "Focal Frequency Loss. Analyzes the frequency spectrum of the images rather than the "
        "images themselves. This loss function can be used on its own, but the original paper "
        "found increased benefits when using it as a complementary loss to another spacial loss "
        "function (e.g. MSE). Ref: Focal Frequency Loss for Image Reconstruction and Synthesis "
-        "https://arxiv.org/pdf/2012.12821.pdf NB: This loss does not currently work on AMD cards.",
-    flip="Nvidia FLIP. A perceptual loss measure that approximates the difference perceived by "
-         "humans as they alternate quickly (or flip) between two images. Used on its own and this "
-         "loss function creates a distinct grid on the output. However it can be helpful when "
-         "used as a complimentary loss function. Ref: FLIP: A Difference Evaluator for "
-         "Alternating Images: "
-         "https://research.nvidia.com/sites/default/files/node/3260/FLIP_Paper.pdf",
-    gmsd=(
+        "https://arxiv.org/pdf/2012.12821.pdf NB: This loss does not currently work on AMD "
+        "cards."),
+    "flip": _(
+        "Nvidia FLIP. A perceptual loss measure that approximates the difference perceived by "
+        "humans as they alternate quickly (or flip) between two images. Used on its own and this "
+        "loss function creates a distinct grid on the output. However it can be helpful when "
+        "used as a complimentary loss function. Ref: FLIP: A Difference Evaluator for "
+        "Alternating Images: "
+        "https://research.nvidia.com/sites/default/files/node/3260/FLIP_Paper.pdf"),
+    "gmsd": _(
        "Gradient Magnitude Similarity Deviation seeks to match the global standard deviation of "
        "the pixel to pixel differences between two images. Similar in approach to SSIM. Ref: "
        "Gradient Magnitude Similarity Deviation: An Highly Efficient Perceptual Image Quality "
        "Index https://arxiv.org/ftp/arxiv/papers/1308/1308.3052.pdf"),
-    l_inf_norm=(
+    "l_inf_norm": _(
        "The L_inf norm will reduce the largest individual pixel error in an image. As "
        "each largest error is minimized sequentially, the overall error is improved. This loss "
        "will be extremely focused on outliers."),
-    laploss=(
+    "laploss": _(
        "Laplacian Pyramid Loss. Attempts to improve results by focussing on edges using "
        "Laplacian Pyramids. As this loss function gives priority to edges over other low-"
        "frequency information, like color, it should not be used on its own. The original "
        "implementation uses this loss as a complimentary function to MSE. "
        "Ref: Optimizing the Latent Space of Generative Networks "
        "https://arxiv.org/abs/1707.05776"),
-    lpips_alex=(
+    "lpips_alex": _(
        "LPIPS is a perceptual loss that uses the feature outputs of other pretrained models as a "
        "loss metric. Be aware that this loss function will use more VRAM. Used on its own and "
        "this loss will create a distinct moire pattern on the output, however it can be helpful "
@ -50,42 +58,43 @@ _LOSS_HELP = dict(
        "Metric http://arxiv.org/abs/1801.03924\nThis variant uses the AlexNet backbone. A fairly "
        "light and old model which performed best in the paper's original implementation.\nNB: "
        "For AMD Users the final linear layer is not implemented."),
-    lpips_squeeze=(
+    "lpips_squeeze": _(
        "Same as lpips_alex, but using the SqueezeNet backbone. A more lightweight "
        "version of AlexNet.\nNB: For AMD Users the final linear layer is not implemented."),
-    lpips_vgg16="Same as lpips_alex, but using the VGG16 backbone. A more heavyweight model.\n"
-                "NB: For AMD Users the final linear layer is not implemented.",
-    logcosh=(
+    "lpips_vgg16": _(
+        "Same as lpips_alex, but using the VGG16 backbone. A more heavyweight model.\n"
+        "NB: For AMD Users the final linear layer is not implemented."),
+    "logcosh": _(
        "log(cosh(x)) acts similar to MSE for small errors and to MAE for large errors. Like "
        "MSE, it is very stable and prevents overshoots when errors are near zero. Like MAE, it "
        "is robust to outliers."),
-    mae=(
+    "mae": _(
        "Mean absolute error will guide reconstructions of each pixel towards its median value in "
        "the training dataset. Robust to outliers but as a median, it can potentially ignore some "
        "infrequent image types in the dataset."),
-    mse=(
+    "mse": _(
        "Mean squared error will guide reconstructions of each pixel towards its average value in "
        "the training dataset. As an avg, it will be susceptible to outliers and typically "
        "produces slightly blurrier results. Ref: Multi-Scale Structural Similarity for Image "
        "Quality Assessment https://www.cns.nyu.edu/pub/eero/wang03b.pdf"),
-    ms_ssim=(
+    "ms_ssim": _(
        "Multiscale Structural Similarity Index Metric is similar to SSIM except that it "
        "performs the calculations along multiple scales of the input image."),
-    smooth_loss=(
+    "smooth_loss": _(
        "Smooth_L1 is a modification of the MAE loss to correct two of its disadvantages. "
        "This loss has improved stability and guidance for small errors. Ref: A General and "
        "Adaptive Robust Loss Function https://arxiv.org/pdf/1701.03077.pdf"),
-    ssim=(
+    "ssim": _(
        "Structural Similarity Index Metric is a perception-based loss that considers changes in "
        "texture, luminance, contrast, and local spatial statistics of an image. Potentially "
        "delivers more realistic looking images. Ref: Image Quality Assessment: From Error "
        "Visibility to Structural Similarity http://www.cns.nyu.edu/pub/eero/wang03-reprint.pdf"),
-    pixel_gradient_diff=(
+    "pixel_gradient_diff": _(
        "Instead of minimizing the difference between the absolute value of each "
        "pixel in two reference images, compute the pixel to pixel spatial difference in each "
        "image and then minimize that difference between two images. Allows for large color "
        "shifts, but maintains the structure of the image."),
-    none="Do not use an additional loss function.")
+    "none": _("Do not use an additional loss function.")}

 _NON_PRIMARY_LOSS = ["flip", "lpips_alex", "lpips_squeeze", "lpips_vgg16", "none"]

@ -105,7 +114,7 @@ class Config(FaceswapConfig):
        logger.debug("Setting global config")
        section = "global"
        self.add_section(section,
-                         "Options that apply to all models" + ADDITIONAL_INFO)
+                         _("Options that apply to all models") + ADDITIONAL_INFO)
        self.add_item(
            section=section,
            title="centering",
@ -114,21 +123,22 @@ class Config(FaceswapConfig):
            default="face",
            choices=["face", "head", "legacy"],
            fixed=True,
-            group="face",
-            info="How to center the training image. The extracted images are centered on the "
-                 "middle of the skull based on the face's estimated pose. A subsection of these "
-                 "images are used for training. The centering used dictates how this subsection "
-                 "will be cropped from the aligned images."
-                 "\n\tface: Centers the training image on the center of the face, adjusting for "
-                 "pitch and yaw."
-                 "\n\thead: Centers the training image on the center of the head, adjusting for "
-                 "pitch and yaw. NB: You should only select head centering if you intend to "
-                 "include the full head (including hair) in the final swap. This may give mixed "
-                 "results. Additionally, it is only worth choosing head centering if you are "
-                 "training with a mask that includes the hair (e.g. BiSeNet-FP-Head)."
-                 "\n\tlegacy: The 'original' extraction technique. Centers the training image "
-                 "near the tip of the nose with no adjustment. Can result in the edges of the "
-                 "face appearing outside of the training area.")
+            group=_("face"),
+            info=_(
+                "How to center the training image. The extracted images are centered on the "
+                "middle of the skull based on the face's estimated pose. A subsection of these "
+                "images are used for training. The centering used dictates how this subsection "
+                "will be cropped from the aligned images."
+                "\n\tface: Centers the training image on the center of the face, adjusting for "
+                "pitch and yaw."
+                "\n\thead: Centers the training image on the center of the head, adjusting for "
+                "pitch and yaw. NB: You should only select head centering if you intend to "
+                "include the full head (including hair) in the final swap. This may give mixed "
+                "results. Additionally, it is only worth choosing head centering if you are "
+                "training with a mask that includes the hair (e.g. BiSeNet-FP-Head)."
+                "\n\tlegacy: The 'original' extraction technique. Centers the training image "
+                "near the tip of the nose with no adjustment. Can result in the edges of the "
+                "face appearing outside of the training area."))
        self.add_item(
            section=section,
            title="coverage",
@ -137,68 +147,71 @@ class Config(FaceswapConfig):
            min_max=(62.5, 100.0),
            rounding=2,
            fixed=True,
-            group="face",
-            info="How much of the extracted image to train on. A lower coverage will limit the "
-                 "model's scope to a zoomed-in central area while higher amounts can include the "
-                 "entire face. A trade-off exists between lower amounts given more detail "
-                 "versus higher amounts avoiding noticeable swap transitions. For 'Face' "
-                 "centering you will want to leave this above 75%. For Head centering you will "
-                 "most likely want to set this to 100%. Sensible values for 'Legacy' "
-                 "centering are:"
-                 "\n\t62.5% spans from eyebrow to eyebrow."
-                 "\n\t75.0% spans from temple to temple."
-                 "\n\t87.5% spans from ear to ear."
-                 "\n\t100.0% is a mugshot.")
-
+            group=_("face"),
+            info=_(
+                "How much of the extracted image to train on. A lower coverage will limit the "
+                "model's scope to a zoomed-in central area while higher amounts can include the "
+                "entire face. A trade-off exists between lower amounts given more detail "
+                "versus higher amounts avoiding noticeable swap transitions. For 'Face' "
+                "centering you will want to leave this above 75%. For Head centering you will "
+                "most likely want to set this to 100%. Sensible values for 'Legacy' "
+                "centering are:"
+                "\n\t62.5% spans from eyebrow to eyebrow."
+                "\n\t75.0% spans from temple to temple."
+                "\n\t87.5% spans from ear to ear."
+                "\n\t100.0% is a mugshot."))
        self.add_item(
            section=section,
            title="icnr_init",
            datatype=bool,
            default=False,
-            group="initialization",
-            info="Use ICNR to tile the default initializer in a repeating pattern. "
-                 "This strategy is designed for pairing with sub-pixel / pixel shuffler "
-                 "to reduce the 'checkerboard effect' in image reconstruction. "
-                 "\n\t https://arxiv.org/ftp/arxiv/papers/1707/1707.02937.pdf")
+            group=_("initialization"),
+            info=_(
+                "Use ICNR to tile the default initializer in a repeating pattern. "
+                "This strategy is designed for pairing with sub-pixel / pixel shuffler "
+                "to reduce the 'checkerboard effect' in image reconstruction. "
+                "\n\t https://arxiv.org/ftp/arxiv/papers/1707/1707.02937.pdf"))
        self.add_item(
            section=section,
            title="conv_aware_init",
            datatype=bool,
            default=False,
-            group="initialization",
-            info="Use Convolution Aware Initialization for convolutional layers. "
-                 "This can help eradicate the vanishing and exploding gradient problem "
-                 "as well as lead to higher accuracy, lower loss and faster convergence.\nNB:"
-                 "\n\t This can use more VRAM when creating a new model so you may want to "
-                 "lower the batch size for the first run. The batch size can be raised "
-                 "again when reloading the model. "
-                 "\n\t Multi-GPU is not supported for this option, so you should start the model "
-                 "on a single GPU. Once training has started, you can stop training, enable "
-                 "multi-GPU and resume."
-                 "\n\t Building the model will likely take several minutes as the calculations "
-                 "for this initialization technique are expensive. This will only impact starting "
-                 "a new model.")
+            group=_("initialization"),
+            info=_(
+                "Use Convolution Aware Initialization for convolutional layers. "
+                "This can help eradicate the vanishing and exploding gradient problem "
+                "as well as lead to higher accuracy, lower loss and faster convergence.\nNB:"
+                "\n\t This can use more VRAM when creating a new model so you may want to "
+                "lower the batch size for the first run. The batch size can be raised "
+                "again when reloading the model. "
+                "\n\t Multi-GPU is not supported for this option, so you should start the model "
+                "on a single GPU. Once training has started, you can stop training, enable "
+                "multi-GPU and resume."
+                "\n\t Building the model will likely take several minutes as the calculations "
+                "for this initialization technique are expensive. This will only impact starting "
+                "a new model."))
        self.add_item(
            section=section,
            title="optimizer",
            datatype=str,
            gui_radio=True,
-            group="optimizer",
+            group=_("optimizer"),
            default="adam",
            choices=["adabelief", "adam", "nadam", "rms-prop"],
-            info="The optimizer to use."
-                 "\n\t adabelief - Adapting Stepsizes by the Belief in Observed Gradients. An "
-                 "optimizer with the aim to converge faster, generalize better and remain more "
-                 "stable. (https://arxiv.org/abs/2010.07468). NB: Epsilon for AdaBelief needs to "
-                 "be set to a smaller value than other Optimizers. Generally setting the 'Epsilon "
-                 "Exponent' to around '-16' should work."
-                 "\n\t adam - Adaptive Moment Optimization. A stochastic gradient descent method "
-                 "that is based on adaptive estimation of first-order and second-order moments."
-                 "\n\t nadam - Adaptive Moment Optimization with Nesterov Momentum. Much like "
-                 "Adam but uses a different formula for calculating momentum."
-                 "\n\t rms-prop - Root Mean Square Propagation. Maintains a moving (discounted) "
-                 "average of the square of the gradients. Divides the gradient by the root of "
-                 "this average.")
+            info=_(
+                "The optimizer to use."
+                "\n\t adabelief - Adapting Stepsizes by the Belief in Observed Gradients. An "
+                "optimizer with the aim to converge faster, generalize better and remain more "
+                "stable. (https://arxiv.org/abs/2010.07468). NB: Epsilon for AdaBelief needs to "
+                "be set to a smaller value than other Optimizers. Generally setting the 'Epsilon "
+                "Exponent' to around '-16' should work."
+                "\n\t adam - Adaptive Moment Optimization. A stochastic gradient descent method "
+                "that is based on adaptive estimation of first-order and second-order moments."
+                "\n\t nadam - Adaptive Moment Optimization with Nesterov Momentum. Much like "
+                "Adam but uses a different formula for calculating momentum."
+                "\n\t rms-prop - Root Mean Square Propagation. Maintains a moving (discounted) "
+                "average of the square of the gradients. Divides the gradient by the root of "
+                "this average."))
        self.add_item(
            section=section,
            title="learning_rate",
@ -207,12 +220,13 @@ class Config(FaceswapConfig):
            min_max=(1e-6, 1e-4),
            rounding=6,
            fixed=False,
-            group="optimizer",
-            info="Learning rate - how fast your network will learn (how large are the "
-                 "modifications to the model weights after one batch of training). Values that "
-                 "are too large might result in model crashes and the inability of the model to "
-                 "find the best solution. Values that are too small might be unable to escape "
-                 "from dead-ends and find the best global minimum.")
+            group=_("optimizer"),
+            info=_(
+                "Learning rate - how fast your network will learn (how large are the "
+                "modifications to the model weights after one batch of training). Values that "
+                "are too large might result in model crashes and the inability of the model to "
+                "find the best solution. Values that are too small might be unable to escape "
+                "from dead-ends and find the best global minimum."))
        self.add_item(
            section=section,
            title="epsilon_exponent",
@ -221,82 +235,88 @@ class Config(FaceswapConfig):
            min_max=(-20, 0),
            rounding=1,
            fixed=False,
-            group="optimizer",
-            info="The epsilon adds a small constant to weight updates to attempt to avoid 'divide "
-                 "by zero' errors. Unless you are using the AdaBelief Optimizer, then Generally "
-                 "this option should be left at default value, For AdaBelief, setting this to "
-                 "around '-16' should work.\n"
-                 "In all instances if you are getting 'NaN' loss values, and have been unable to "
-                 "resolve the issue any other way (for example, increasing batch size, or "
-                 "lowering learning rate), then raising the epsilon can lead to a more stable "
-                 "model. It may, however, come at the cost of slower training and a less accurate "
-                 "final result.\n"
-                 "NB: The value given here is the 'exponent' to the epsilon. For example, "
-                 "choosing '-7' will set the epsilon to 1e-7. Choosing '-3' will set the epsilon "
-                 "to 0.001 (1e-3).")
+            group=_("optimizer"),
+            info=_(
+                "The epsilon adds a small constant to weight updates to attempt to avoid 'divide "
+                "by zero' errors. Unless you are using the AdaBelief Optimizer, then Generally "
+                "this option should be left at default value, For AdaBelief, setting this to "
+                "around '-16' should work.\n"
+                "In all instances if you are getting 'NaN' loss values, and have been unable to "
+                "resolve the issue any other way (for example, increasing batch size, or "
+                "lowering learning rate), then raising the epsilon can lead to a more stable "
+                "model. It may, however, come at the cost of slower training and a less accurate "
+                "final result.\n"
+                "NB: The value given here is the 'exponent' to the epsilon. For example, "
+                "choosing '-7' will set the epsilon to 1e-7. Choosing '-3' will set the epsilon "
+                "to 0.001 (1e-3)."))
        self.add_item(
            section=section,
            title="autoclip",
            datatype=bool,
            default=False,
-            info="[Not PlaidML] Apply AutoClipping to the gradients. AutoClip analyzes the "
-                 "gradient weights and adjusts the normalization value dynamically to fit the "
-                 "data. Can help prevent NaNs and improve model optimization at the expense of "
-                 "VRAM. Ref: AutoClip: Adaptive Gradient Clipping for Source Separation Networks "
-                 "https://arxiv.org/abs/2007.14469",
+            info=_(
+                "[Not PlaidML] Apply AutoClipping to the gradients. AutoClip analyzes the "
+                "gradient weights and adjusts the normalization value dynamically to fit the "
+                "data. Can help prevent NaNs and improve model optimization at the expense of "
+                "VRAM. Ref: AutoClip: Adaptive Gradient Clipping for Source Separation Networks "
+                "https://arxiv.org/abs/2007.14469"),
            fixed=False,
            gui_radio=True,
-            group="optimizer")
+            group=_("optimizer"))
        self.add_item(
            section=section,
            title="reflect_padding",
            datatype=bool,
            default=False,
-            group="network",
-            info="Use reflection padding rather than zero padding with convolutions. "
-                 "Each convolution must pad the image boundaries to maintain the proper "
-                 "sizing. More complex padding schemes can reduce artifacts at the "
-                 "border of the image."
-                 "\n\t http://www-cs.engr.ccny.cuny.edu/~wolberg/cs470/hw/hw2_pad.txt")
+            group=_("network"),
+            info=_(
+                "Use reflection padding rather than zero padding with convolutions. "
+                "Each convolution must pad the image boundaries to maintain the proper "
+                "sizing. More complex padding schemes can reduce artifacts at the "
+                "border of the image."
+                "\n\t http://www-cs.engr.ccny.cuny.edu/~wolberg/cs470/hw/hw2_pad.txt"))
        self.add_item(
            section=section,
            title="allow_growth",
            datatype=bool,
            default=False,
-            group="network",
+            group=_("network"),
            fixed=False,
-            info="[Nvidia Only]. Enable the Tensorflow GPU 'allow_growth' configuration option. "
-                 "This option prevents Tensorflow from allocating all of the GPU VRAM at launch "
-                 "but can lead to higher VRAM fragmentation and slower performance. Should only "
-                 "be enabled if you are receiving errors regarding 'cuDNN fails to initialize' "
-                 "when commencing training.")
+            info=_(
+                "[Nvidia Only]. Enable the Tensorflow GPU 'allow_growth' configuration option. "
+                "This option prevents Tensorflow from allocating all of the GPU VRAM at launch "
+                "but can lead to higher VRAM fragmentation and slower performance. Should only "
+                "be enabled if you are receiving errors regarding 'cuDNN fails to initialize' "
+                "when commencing training."))
        self.add_item(
            section=section,
            title="mixed_precision",
            datatype=bool,
            default=False,
            fixed=False,
-            group="network",
-            info="[Not PlaidML], NVIDIA GPUs can run operations in float16 faster than in "
-                 "float32. Mixed precision allows you to use a mix of float16 with float32, to "
-                 "get the performance benefits from float16 and the numeric stability benefits "
-                 "from float32.\n\nThis is untested on DirectML backend, but will run on most "
-                 "Nvidia models. it will only speed up training on more recent GPUs. Those with "
-                 "compute capability 7.0 or higher will see the greatest performance benefit from "
-                 "mixed precision because they have Tensor Cores. Older GPUs offer no math "
-                 "performance benefit for using mixed precision, however memory and bandwidth "
-                 "savings can enable some speedups. Generally RTX GPUs and later will offer the "
-                 "most benefit.")
+            group=_("network"),
+            info=_(
+                "[Not PlaidML], NVIDIA GPUs can run operations in float16 faster than in "
+                "float32. Mixed precision allows you to use a mix of float16 with float32, to "
+                "get the performance benefits from float16 and the numeric stability benefits "
+                "from float32.\n\nThis is untested on DirectML backend, but will run on most "
+                "Nvidia models. it will only speed up training on more recent GPUs. Those with "
+                "compute capability 7.0 or higher will see the greatest performance benefit from "
+                "mixed precision because they have Tensor Cores. Older GPUs offer no math "
+                "performance benefit for using mixed precision, however memory and bandwidth "
+                "savings can enable some speedups. Generally RTX GPUs and later will offer the "
+                "most benefit."))
        self.add_item(
            section=section,
            title="nan_protection",
            datatype=bool,
            default=True,
-            group="network",
-            info="If a 'NaN' is generated in the model, this means that the model has corrupted "
-                 "and the model is likely to start deteriorating from this point on. Enabling NaN "
-                 "protection will stop training immediately in the event of a NaN. The last save "
-                 "will not contain the NaN, so you may still be able to rescue your model.",
+            group=_("network"),
+            info=_(
+                "If a 'NaN' is generated in the model, this means that the model has corrupted "
+                "and the model is likely to start deteriorating from this point on. Enabling NaN "
+                "protection will stop training immediately in the event of a NaN. The last save "
+                "will not contain the NaN, so you may still be able to rescue your model."),
            fixed=False)
        self.add_item(
            section=section,
@ -306,11 +326,12 @@ class Config(FaceswapConfig):
            min_max=(1, 32),
            rounding=1,
            fixed=False,
-            group="convert",
-            info="[GPU Only]. The number of faces to feed through the model at once when running "
-                 "the Convert process.\n\nNB: Increasing this figure is unlikely to improve "
-                 "convert speed, however, if you are getting Out of Memory errors, then you may "
-                 "want to reduce the batch size.")
+            group=_("convert"),
+            info=_(
+                "[GPU Only]. The number of faces to feed through the model at once when running "
+                "the Convert process.\n\nNB: Increasing this figure is unlikely to improve "
+                "convert speed, however, if you are getting Out of Memory errors, then you may "
+                "want to reduce the batch size."))

    def _set_loss(self) -> None:
        # pylint:disable=line-too-long
@ -326,171 +347,177 @@ class Config(FaceswapConfig):
        logger.debug("Setting Loss config")
        section = "global.loss"
        self.add_section(section,
-                         "Loss configuration options\n"
-                         "Loss is the mechanism by which a Neural Network judges how well it "
-                         "thinks that it is recreating a face." + ADDITIONAL_INFO)
+                         _("Loss configuration options\n"
+                           "Loss is the mechanism by which a Neural Network judges how well it "
+                           "thinks that it is recreating a face.") + ADDITIONAL_INFO)
        self.add_item(
            section=section,
            title="loss_function",
            datatype=str,
-            group="loss",
+            group=_("loss"),
            default="ssim",
            fixed=False,
            choices=[x for x in sorted(_LOSS_HELP) if x not in _NON_PRIMARY_LOSS],
-            info="The loss function to use.\n\n\t" +
-                 "\n\n\t".join(f"{k}: {v}"
-                               for k, v in sorted(_LOSS_HELP.items())
-                               if k not in _NON_PRIMARY_LOSS))
+            info=(_("The loss function to use.") +
+                  "\n\n\t" + "\n\n\t".join(f"{k}: {v}"
+                                           for k, v in sorted(_LOSS_HELP.items())
+                                           if k not in _NON_PRIMARY_LOSS)))
        self.add_item(
            section=section,
            title="loss_function_2",
            datatype=str,
-            group="loss",
+            group=_("loss"),
            default="mse",
            fixed=False,
            choices=list(sorted(_LOSS_HELP)),
-            info="The second loss function to use. If using a structural based loss (such as "
-                 "SSIM, MS-SSIM or GMSD) it is common to add an L1 regularization(MAE) or L2 "
-                 "regularization (MSE) function. You can adjust the weighting of this loss "
-                 "function with the loss_weight_2 option.\n\n\t" +
-                 "\n\n\t".join(f"{k}: {v}"
-                               for k, v in sorted(_LOSS_HELP.items())))
+            info=(_("The second loss function to use. If using a structural based loss (such as "
+                    "SSIM, MS-SSIM or GMSD) it is common to add an L1 regularization(MAE) or L2 "
+                    "regularization (MSE) function. You can adjust the weighting of this loss "
+                    "function with the loss_weight_2 option.") +
+                  "\n\n\t" + "\n\n\t".join(f"{k}: {v}" for k, v in sorted(_LOSS_HELP.items()))))
        self.add_item(
            section=section,
            title="loss_weight_2",
            datatype=int,
-            group="loss",
+            group=_("loss"),
            min_max=(0, 400),
            rounding=1,
            default=100,
            fixed=False,
-            info="The amount of weight to apply to the second loss function.\n\n"
-                 "\n\nThe value given here is as a percentage denoting how much the selected "
-                 "function should contribute to the overall loss cost of the model. For example:"
-                 "\n\t 100 - The loss calculated for the second loss function will be applied at "
-                 "its full amount towards the overall loss score. "
-                 "\n\t 25 - The loss calculated for the second loss function will be reduced by a "
-                 "quarter prior to adding to the overall loss score. "
-                 "\n\t 400 - The loss calculated for the second loss function will be mulitplied "
-                 "4 times prior to adding to the overall loss score. "
-                 "\n\t 0 - Disables the second loss function altogether.")
+            info=_(
+                "The amount of weight to apply to the second loss function.\n\n"
+                "\n\nThe value given here is as a percentage denoting how much the selected "
+                "function should contribute to the overall loss cost of the model. For example:"
+                "\n\t 100 - The loss calculated for the second loss function will be applied at "
+                "its full amount towards the overall loss score. "
+                "\n\t 25 - The loss calculated for the second loss function will be reduced by a "
+                "quarter prior to adding to the overall loss score. "
+                "\n\t 400 - The loss calculated for the second loss function will be mulitplied "
+                "4 times prior to adding to the overall loss score. "
+                "\n\t 0 - Disables the second loss function altogether."))
        self.add_item(
            section=section,
            title="loss_function_3",
            datatype=str,
-            group="loss",
+            group=_("loss"),
            default="none",
            fixed=False,
            choices=list(sorted(_LOSS_HELP)),
-            info="The third loss function to use. You can adjust the weighting of this loss "
-                 "function with the loss_weight_3 option.\n\n\t" +
-                 "\n\n\t".join(f"{k}: {v}"
-                               for k, v in sorted(_LOSS_HELP.items())))
+            info=(_("The third loss function to use. You can adjust the weighting of this loss "
+                    "function with the loss_weight_3 option.") +
+                  "\n\n\t" +
+                  "\n\n\t".join(f"{k}: {v}" for k, v in sorted(_LOSS_HELP.items()))))
        self.add_item(
            section=section,
            title="loss_weight_3",
            datatype=int,
-            group="loss",
+            group=_("loss"),
            min_max=(0, 400),
            rounding=1,
            default=0,
            fixed=False,
-            info="The amount of weight to apply to the third loss function.\n\n"
-                 "\n\nThe value given here is as a percentage denoting how much the selected "
-                 "function should contribute to the overall loss cost of the model. For example:"
-                 "\n\t 100 - The loss calculated for the third loss function will be applied at "
-                 "its full amount towards the overall loss score. "
-                 "\n\t 25 - The loss calculated for the third loss function will be reduced by a "
-                 "quarter prior to adding to the overall loss score. "
-                 "\n\t 400 - The loss calculated for the third loss function will be mulitplied 4 "
-                 "times prior to adding to the overall loss score. "
-                 "\n\t 0 - Disables the third loss function altogether.")
+            info=_(
+                "The amount of weight to apply to the third loss function.\n\n"
+                "\n\nThe value given here is as a percentage denoting how much the selected "
+                "function should contribute to the overall loss cost of the model. For example:"
+                "\n\t 100 - The loss calculated for the third loss function will be applied at "
+                "its full amount towards the overall loss score. "
+                "\n\t 25 - The loss calculated for the third loss function will be reduced by a "
+                "quarter prior to adding to the overall loss score. "
+                "\n\t 400 - The loss calculated for the third loss function will be mulitplied 4 "
+                "times prior to adding to the overall loss score. "
+                "\n\t 0 - Disables the third loss function altogether."))
        self.add_item(
            section=section,
            title="loss_function_4",
            datatype=str,
-            group="loss",
+            group=_("loss"),
            default="none",
            fixed=False,
            choices=list(sorted(_LOSS_HELP)),
-            info="The fourth loss function to use. You can adjust the weighting of this loss "
-                 "function with the loss_weight_3 option.\n\n\t" +
-                 "\n\n\t".join(f"{k}: {v}"
-                               for k, v in sorted(_LOSS_HELP.items())))
+            info=(_("The fourth loss function to use. You can adjust the weighting of this loss "
+                    "function with the loss_weight_3 option.") +
+                  "\n\n\t" +
+                  "\n\n\t".join(f"{k}: {v}" for k, v in sorted(_LOSS_HELP.items()))))
        self.add_item(
            section=section,
            title="loss_weight_4",
            datatype=int,
-            group="loss",
+            group=_("loss"),
            min_max=(0, 400),
            rounding=1,
            default=0,
            fixed=False,
-            info="The amount of weight to apply to the fourth loss function.\n\n"
-                 "\n\nThe value given here is as a percentage denoting how much the selected "
-                 "function should contribute to the overall loss cost of the model. For example:"
-                 "\n\t 100 - The loss calculated for the fourth loss function will be applied at "
-                 "its full amount towards the overall loss score. "
-                 "\n\t 25 - The loss calculated for the fourth loss function will be reduced by a "
-                 "quarter prior to adding to the overall loss score. "
-                 "\n\t 400 - The loss calculated for the fourth loss function will be mulitplied "
-                 "4 times prior to adding to the overall loss score. "
-                 "\n\t 0 - Disables the fourth loss function altogether.")
+            info=_(
+                "The amount of weight to apply to the fourth loss function.\n\n"
+                "\n\nThe value given here is as a percentage denoting how much the selected "
+                "function should contribute to the overall loss cost of the model. For example:"
+                "\n\t 100 - The loss calculated for the fourth loss function will be applied at "
+                "its full amount towards the overall loss score. "
+                "\n\t 25 - The loss calculated for the fourth loss function will be reduced by a "
+                "quarter prior to adding to the overall loss score. "
+                "\n\t 400 - The loss calculated for the fourth loss function will be mulitplied "
+                "4 times prior to adding to the overall loss score. "
+                "\n\t 0 - Disables the fourth loss function altogether."))
        self.add_item(
            section=section,
            title="mask_loss_function",
            datatype=str,
-            group="loss",
+            group=_("loss"),
            default="mse",
            fixed=False,
            choices=["mae", "mse"],
-            info="The loss function to use when learning a mask."
-                 "\n\t MAE - Mean absolute error will guide reconstructions of each pixel "
-                 "towards its median value in the training dataset. Robust to outliers but as "
-                 "a median, it can potentially ignore some infrequent image types in the dataset."
-                 "\n\t MSE - Mean squared error will guide reconstructions of each pixel "
-                 "towards its average value in the training dataset. As an average, it will be "
-                 "susceptible to outliers and typically produces slightly blurrier results.")
+            info=_(
+                "The loss function to use when learning a mask."
+                "\n\t MAE - Mean absolute error will guide reconstructions of each pixel "
+                "towards its median value in the training dataset. Robust to outliers but as "
+                "a median, it can potentially ignore some infrequent image types in the dataset."
+                "\n\t MSE - Mean squared error will guide reconstructions of each pixel "
+                "towards its average value in the training dataset. As an average, it will be "
+                "susceptible to outliers and typically produces slightly blurrier results."))
        self.add_item(
            section=section,
            title="eye_multiplier",
            datatype=int,
-            group="loss",
+            group=_("loss"),
            min_max=(1, 40),
            rounding=1,
            default=3,
            fixed=False,
-            info="The amount of priority to give to the eyes.\n\nThe value given here is as a "
-                 "multiplier of the main loss score. For example:"
-                 "\n\t 1 - The eyes will receive the same priority as the rest of the face. "
-                 "\n\t 10 - The eyes will be given a score 10 times higher than the rest of the "
-                 "face."
-                 "\n\nNB: Penalized Mask Loss must be enable to use this option.")
+            info=_(
+                "The amount of priority to give to the eyes.\n\nThe value given here is as a "
+                "multiplier of the main loss score. For example:"
+                "\n\t 1 - The eyes will receive the same priority as the rest of the face. "
+                "\n\t 10 - The eyes will be given a score 10 times higher than the rest of the "
+                "face."
+                "\n\nNB: Penalized Mask Loss must be enable to use this option."))
        self.add_item(
            section=section,
            title="mouth_multiplier",
            datatype=int,
-            group="loss",
+            group=_("loss"),
            min_max=(1, 40),
            rounding=1,
            default=2,
            fixed=False,
-            info="The amount of priority to give to the mouth.\n\nThe value given here is as a "
-                 "multiplier of the main loss score. For Example:"
-                 "\n\t 1 - The mouth will receive the same priority as the rest of the face. "
-                 "\n\t 10 - The mouth will be given a score 10 times higher than the rest of the "
-                 "face."
-                 "\n\nNB: Penalized Mask Loss must be enable to use this option.")
+            info=_(
+                "The amount of priority to give to the mouth.\n\nThe value given here is as a "
+                "multiplier of the main loss score. For Example:"
+                "\n\t 1 - The mouth will receive the same priority as the rest of the face. "
+                "\n\t 10 - The mouth will be given a score 10 times higher than the rest of the "
+                "face."
+                "\n\nNB: Penalized Mask Loss must be enable to use this option."))
        self.add_item(
            section=section,
            title="penalized_mask_loss",
            datatype=bool,
            default=True,
-            group="loss",
-            info="Image loss function is weighted by mask presence. For areas of "
-                 "the image without the facial mask, reconstruction errors will be "
-                 "ignored while the masked face area is prioritized. May increase "
-                 "overall quality by focusing attention on the core face area.")
+            group=_("loss"),
+            info=_(
+                "Image loss function is weighted by mask presence. For areas of "
+                "the image without the facial mask, reconstruction errors will be "
+                "ignored while the masked face area is prioritized. May increase "
+                "overall quality by focusing attention on the core face area."))
        self.add_item(
            section=section,
            title="mask_type",
@ -498,40 +525,41 @@ class Config(FaceswapConfig):
            default="extended",
            choices=PluginLoader.get_available_extractors("mask",
                                                          add_none=True, extend_plugin=True),
-            group="mask",
+            group=_("mask"),
            gui_radio=True,
-            info="The mask to be used for training. If you have selected 'Learn Mask' or "
-                 "'Penalized Mask Loss' you must select a value other than 'none'. The required "
-                 "mask should have been selected as part of the Extract process. If it does not "
-                 "exist in the alignments file then it will be generated prior to training "
-                 "commencing."
-                 "\n\tnone: Don't use a mask."
-                 "\n\tbisenet-fp_face: Relatively lightweight NN based mask that provides more "
-                 "refined control over the area to be masked (configurable in mask settings). "
-                 "Use this version of bisenet-fp if your model is trained with 'face' or "
-                 "'legacy' centering."
-                 "\n\tbisenet-fp_head: Relatively lightweight NN based mask that provides more "
-                 "refined control over the area to be masked (configurable in mask settings). "
-                 "Use this version of bisenet-fp if your model is trained with 'head' centering."
-                 "\n\tcomponents: Mask designed to provide facial segmentation based on the "
-                 "positioning of landmark locations. A convex hull is constructed around the "
-                 "exterior of the landmarks to create a mask."
-                 "\n\tcustom_face: Custom user created, face centered mask."
-                 "\n\tcustom_head: Custom user created, head centered mask."
-                 "\n\textended: Mask designed to provide facial segmentation based on the "
-                 "positioning of landmark locations. A convex hull is constructed around the "
-                 "exterior of the landmarks and the mask is extended upwards onto the forehead."
-                 "\n\tvgg-clear: Mask designed to provide smart segmentation of mostly frontal "
-                 "faces clear of obstructions. Profile faces and obstructions may result in "
-                 "sub-par performance."
-                 "\n\tvgg-obstructed: Mask designed to provide smart segmentation of mostly "
-                 "frontal faces. The mask model has been specifically trained to recognize "
-                 "some facial obstructions (hands and eyeglasses). Profile faces may result in "
-                 "sub-par performance."
-                 "\n\tunet-dfl: Mask designed to provide smart segmentation of mostly frontal "
-                 "faces. The mask model has been trained by community members and will need "
-                 "testing for further description. Profile faces may result in sub-par "
-                 "performance.")
+            info=_(
+                "The mask to be used for training. If you have selected 'Learn Mask' or "
+                "'Penalized Mask Loss' you must select a value other than 'none'. The required "
+                "mask should have been selected as part of the Extract process. If it does not "
+                "exist in the alignments file then it will be generated prior to training "
+                "commencing."
+                "\n\tnone: Don't use a mask."
+                "\n\tbisenet-fp_face: Relatively lightweight NN based mask that provides more "
+                "refined control over the area to be masked (configurable in mask settings). "
+                "Use this version of bisenet-fp if your model is trained with 'face' or "
+                "'legacy' centering."
+                "\n\tbisenet-fp_head: Relatively lightweight NN based mask that provides more "
+                "refined control over the area to be masked (configurable in mask settings). "
+                "Use this version of bisenet-fp if your model is trained with 'head' centering."
+                "\n\tcomponents: Mask designed to provide facial segmentation based on the "
+                "positioning of landmark locations. A convex hull is constructed around the "
+                "exterior of the landmarks to create a mask."
+                "\n\tcustom_face: Custom user created, face centered mask."
+                "\n\tcustom_head: Custom user created, head centered mask."
+                "\n\textended: Mask designed to provide facial segmentation based on the "
+                "positioning of landmark locations. A convex hull is constructed around the "
+                "exterior of the landmarks and the mask is extended upwards onto the forehead."
+                "\n\tvgg-clear: Mask designed to provide smart segmentation of mostly frontal "
+                "faces clear of obstructions. Profile faces and obstructions may result in "
+                "sub-par performance."
+                "\n\tvgg-obstructed: Mask designed to provide smart segmentation of mostly "
+                "frontal faces. The mask model has been specifically trained to recognize "
+                "some facial obstructions (hands and eyeglasses). Profile faces may result in "
+                "sub-par performance."
+                "\n\tunet-dfl: Mask designed to provide smart segmentation of mostly frontal "
+                "faces. The mask model has been trained by community members and will need "
+                "testing for further description. Profile faces may result in sub-par "
+                "performance."))
        self.add_item(
            section=section,
            title="mask_blur_kernel",
@ -539,12 +567,13 @@ class Config(FaceswapConfig):
            min_max=(0, 9),
            rounding=1,
            default=3,
-            group="mask",
-            info="Apply gaussian blur to the mask input. This has the effect of smoothing the "
-                 "edges of the mask, which can help with poorly calculated masks and give less "
-                 "of a hard edge to the predicted mask. The size is in pixels (calculated from "
-                 "a 128px mask). Set to 0 to not apply gaussian blur. This value should be odd, "
-                 "if an even number is passed in then it will be rounded to the next odd number.")
+            group=_("mask"),
+            info=_(
+                "Apply gaussian blur to the mask input. This has the effect of smoothing the "
+                "edges of the mask, which can help with poorly calculated masks and give less "
+                "of a hard edge to the predicted mask. The size is in pixels (calculated from "
+                "a 128px mask). Set to 0 to not apply gaussian blur. This value should be odd, "
+                "if an even number is passed in then it will be rounded to the next odd number."))
        self.add_item(
            section=section,
            title="mask_threshold",
@ -552,15 +581,17 @@ class Config(FaceswapConfig):
            default=4,
            min_max=(0, 50),
            rounding=1,
-            group="mask",
-            info="Sets pixels that are near white to white and near black to black. Set to 0 for "
-                 "off.")
+            group=_("mask"),
+            info=_(
+                "Sets pixels that are near white to white and near black to black. Set to 0 for "
+                "off."))
        self.add_item(
            section=section,
            title="learn_mask",
            datatype=bool,
            default=False,
-            group="mask",
-            info="Dedicate a portion of the model to learning how to duplicate the input "
-                 "mask. Increases VRAM usage in exchange for learning a quick ability to try "
-                 "to replicate more complex mask models.")
+            group=_("mask"),
+            info=_(
+                "Dedicate a portion of the model to learning how to duplicate the input "
+                "mask. Increases VRAM usage in exchange for learning a quick ability to try "
+                "to replicate more complex mask models."))