update transcribe other language fnc

2025-06-07 06:05:52 -04:00 · 2024-03-19 03:52:27 -07:00 · 2024-03-19 03:52:27 -07:00 · 0d2bbdb836
commit 0d2bbdb836
parent a29fe161bc
2 changed files with 33 additions and 6 deletions
--- a/changelog.md
+++ b/changelog.md
@ -9,6 +9,7 @@ AR Quantization
 - ggml - https://github.com/ggerganov/ggml/issues/59
 - TortoiseCPP https://github.com/balisujohn/tortoise.cpp

+## 3/19/2024

 ## 3/17/2024
 - Adding in other language capability training to the repo, a few files are modifed in dlas and tortoise-tts modules for the cleaners to allow this to happen.
--- a/src/webui.py
+++ b/src/webui.py
@ -313,9 +313,22 @@ def prepare_dataset_proxy(voice, language, validation_text_length, validation_au
    return "\n".join(messages)


-def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align):
+def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align, rename):
    indir = f'./training/{voice}/processed'
    dataset_dir = os.path.join(indir, "run")
+    merge_dir = os.path.join(dataset_dir, "dataset/wav_splits")
+    audio_dataset_path = os.path.join(merge_dir, 'audio')
+    train_text_path = os.path.join(dataset_dir, 'dataset/train.txt')
+    validation_text_path = os.path.join(dataset_dir, 'dataset/validation.txt')
+    voice_root_path = os.path.join ("./training", voice)
+    
+    items_to_move = [audio_dataset_path, train_text_path, validation_text_path]
+    
+    for item in items_to_move:
+        if os.path.exists(os.path.join(voice_root_path, os.path.basename(item))):
+            raise gr.Error(f"Before running this button again, please remove the following from training/{voice}:\ntrain.txt\nvalidation.txt\naudio")
+            
+    
    if continuation_directory:
        dataset_dir = f'./training/{voice}/processed/{continuation_directory}'

@ -324,18 +337,27 @@ def transcribe_other_language_proxy(voice, language, chunk_size, continuation_di
        new_dataset_dir = os.path.join(indir, f"run_{current_datetime}")
        os.rename(dataset_dir, new_dataset_dir)

-    chosen_directory = f'./voices/{voice}'
+    chosen_directory = os.path.join("./voices", voice)

    process_audio_files(base_directory=dataset_dir,
                        language=language,
                        audio_dir=chosen_directory,
                        chunk_size=chunk_size,
-                        no_align=align)
-    merge_dir = os.path.join(dataset_dir, "dataset/wav_splits")
-    
+                        no_align=align,
+                        rename_files=rename)
+
    merge_segments(merge_dir)
    

+    
+    for item in items_to_move:
+        if os.path.exists(os.path.join(voice_root_path, os.path.basename(item))):
+            print("Already exists")
+        else:
+            shutil.move(item, voice_root_path)
+    
+    
+

 def update_args_proxy(*args):
    kwargs = {}
@ -754,6 +776,9 @@ def setup_gradio():
                            DATASET2_SETTINGS['align'] = gr.Checkbox(
                                label="Disable Alignment", value=False   
                            )
+                            DATASET2_SETTINGS['rename'] = gr.Checkbox(
+                                label="Rename Audio Files", value=True
+                            )
                        transcribe2_button = gr.Button(
                            value="Transcribe and Process")
                        # dataset2_settings = list(DATASET2_SETTINGS.values()) # Really only need this for tqdm to extract values
@ -1248,7 +1273,8 @@ def setup_gradio():
                DATASET2_SETTINGS['language'],
                DATASET2_SETTINGS['chunk_size'],
                DATASET2_SETTINGS['continue_directory'],
-                DATASET2_SETTINGS["align"]
+                DATASET2_SETTINGS["align"],
+                DATASET2_SETTINGS["rename"]
            ]
        )