mirror of
https://github.com/JarodMica/ai-voice-cloning.git
synced 2025-06-07 06:05:52 -04:00
save changes and move back to master branch
This commit is contained in:
parent
c9552688c6
commit
8c66ab3e5b
5 changed files with 81 additions and 31 deletions
|
@ -9,6 +9,12 @@ AR Quantization
|
|||
- ggml - https://github.com/ggerganov/ggml/issues/59
|
||||
- TortoiseCPP https://github.com/balisujohn/tortoise.cpp
|
||||
|
||||
## 8/24/2024
|
||||
Haven't done work here in awhile, with that being the case, I've forgotten quite a bit of the things that I was working on
|
||||
- The purpose of this repo was to explore the idea of expanding the embedding table for vocabulary in tortoise, this is very much possible and how I did it is in expand_tortoise.py
|
||||
- This branch removes the clvp model and allows for 1 sample inference (the updates are done in the tortoise repo not this one though)
|
||||
- Looking at the changes, added advanced settings inside of the webui
|
||||
|
||||
## 5/3/2024
|
||||
### Oops
|
||||
- I've been training on the multilingual dataset for the past few days... and I forgot to convert the train.txt kanji into hiragana... oops.
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit a01bb6f7f47f37cba10de6d25136b20b80a56e8b
|
||||
Subproject commit f117be60a71501f0399522cd17f945cfea73cdb3
|
|
@ -1,5 +1,7 @@
|
|||
# Moved all of the imports into __name__ == "__main__" due to how multiprocessing spawns instances, makes multiprocessing faster as it reduces import overhead
|
||||
|
||||
# Need to check hz of dataset prep
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
import sys
|
||||
|
|
|
@ -4028,6 +4028,7 @@ def load_whisper_model(language=None, model_name=None, progress=None):
|
|||
whisper_align_model = whisperx.load_align_model(model_name="WAV2VEC2_ASR_LARGE_LV60K_960H" if language=="en" else None, language_code=language, device=device)
|
||||
|
||||
print("Loaded Whisper model")
|
||||
return whisper_model
|
||||
|
||||
def unload_whisper():
|
||||
global whisper_model
|
||||
|
|
101
src/webui.py
101
src/webui.py
|
@ -314,7 +314,9 @@ def prepare_dataset_proxy(voice, language, validation_text_length, validation_au
|
|||
return "\n".join(messages)
|
||||
|
||||
|
||||
def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align, rename, num_processes, keep_originals, progress=gr.Progress(track_tqdm=True)):
|
||||
def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align, rename, num_processes, keep_originals,
|
||||
srt_multiprocessing, ext, speaker_id, progress=gr.Progress(track_tqdm=True)):
|
||||
whisper_model = load_whisper_model(language=language)
|
||||
num_processes = int(num_processes)
|
||||
training_folder = get_training_folder(voice)
|
||||
processed_folder = os.path.join(training_folder,"processed")
|
||||
|
@ -369,33 +371,35 @@ def transcribe_other_language_proxy(voice, language, chunk_size, continuation_di
|
|||
pass
|
||||
|
||||
progress(0.0, desc="Converting to MP3 files") # add tqdm later
|
||||
import modules.tortoise_dataset_tools.audio_conversion_tools.convert_to_mp3 as c2mp3
|
||||
|
||||
# Hacky way to get the functions working without changing where they output to...
|
||||
for item in os.listdir(chosen_directory):
|
||||
if os.path.isfile(os.path.join(chosen_directory, item)):
|
||||
original_dir = os.path.join(chosen_directory, "original_files")
|
||||
if not os.path.exists(original_dir):
|
||||
os.makedirs(original_dir)
|
||||
item_path = os.path.join(chosen_directory, item)
|
||||
try:
|
||||
shutil.move(item_path, original_dir)
|
||||
except:
|
||||
os.remove(item_path)
|
||||
|
||||
try:
|
||||
c2mp3.process_folder(original_dir, large_file_num_processes)
|
||||
except:
|
||||
raise gr.Error('No files found in the voice folder specified, make sure it is not empty. If you interrupted the process, the files may be in the "original_files" folder')
|
||||
|
||||
# Hacky way to move the files back into the main voice folder
|
||||
for item in os.listdir(os.path.join(original_dir, "converted")):
|
||||
item_path = os.path.join(original_dir, "converted", item)
|
||||
if os.path.isfile(item_path):
|
||||
try:
|
||||
shutil.move(item_path, chosen_directory)
|
||||
except:
|
||||
os.remove(item_path)
|
||||
if ext=="mp3":
|
||||
import modules.tortoise_dataset_tools.audio_conversion_tools.convert_to_mp3 as c2mp3
|
||||
|
||||
# Hacky way to get the functions working without changing where they output to...
|
||||
for item in os.listdir(chosen_directory):
|
||||
if os.path.isfile(os.path.join(chosen_directory, item)):
|
||||
original_dir = os.path.join(chosen_directory, "original_files")
|
||||
if not os.path.exists(original_dir):
|
||||
os.makedirs(original_dir)
|
||||
item_path = os.path.join(chosen_directory, item)
|
||||
try:
|
||||
shutil.move(item_path, original_dir)
|
||||
except:
|
||||
os.remove(item_path)
|
||||
|
||||
try:
|
||||
c2mp3.process_folder(original_dir, large_file_num_processes)
|
||||
except:
|
||||
raise gr.Error('No files found in the voice folder specified, make sure it is not empty. If you interrupted the process, the files may be in the "original_files" folder')
|
||||
|
||||
# Hacky way to move the files back into the main voice folder
|
||||
for item in os.listdir(os.path.join(original_dir, "converted")):
|
||||
item_path = os.path.join(original_dir, "converted", item)
|
||||
if os.path.isfile(item_path):
|
||||
try:
|
||||
shutil.move(item_path, chosen_directory)
|
||||
except:
|
||||
os.remove(item_path)
|
||||
|
||||
if not keep_originals:
|
||||
originals_files = os.path.join(chosen_directory, "original_files")
|
||||
|
@ -412,7 +416,13 @@ def transcribe_other_language_proxy(voice, language, chunk_size, continuation_di
|
|||
chunk_size=chunk_size,
|
||||
no_align=align,
|
||||
rename_files=rename,
|
||||
num_processes=num_processes)
|
||||
num_processes=num_processes,
|
||||
whisper_model=whisper_model,
|
||||
srt_multiprocessing=srt_multiprocessing,
|
||||
ext=ext,
|
||||
speaker_id=speaker_id,
|
||||
sr_rate
|
||||
)
|
||||
progress(0.7, desc="Audio processing completed")
|
||||
|
||||
progress(0.7, desc="Merging segments")
|
||||
|
@ -875,9 +885,11 @@ def setup_gradio():
|
|||
DATASET2_SETTINGS['chunk_size'] = gr.Textbox(
|
||||
label="Chunk Size", value="15")
|
||||
DATASET2_SETTINGS['num_processes'] = gr.Textbox(
|
||||
label="Processes to Use", value=int(max(1, multiprocessing.cpu_count())))
|
||||
label="Processes to Use", value=int(max(1, multiprocessing.cpu_count())-2))
|
||||
|
||||
with gr.Row():
|
||||
EXEC_SETTINGS['whisper_model'] = gr.Dropdown(
|
||||
WHISPER_MODELS, label="Whisperx Model", value=args.whisper_model)
|
||||
DATASET2_SETTINGS['align'] = gr.Checkbox(
|
||||
label="Disable WhisperX Alignment", value=False
|
||||
)
|
||||
|
@ -887,6 +899,19 @@ def setup_gradio():
|
|||
DATASET2_SETTINGS['keep_originals'] = gr.Checkbox(
|
||||
label="Keep Original Files", value=True
|
||||
)
|
||||
|
||||
advanced_toggle = gr.Button(value="Show Advanced Settings")
|
||||
|
||||
with gr.Row(visible=False) as advanced_settings_row:
|
||||
DATASET2_SETTINGS["srt_multiprocessing"] = gr.Checkbox(
|
||||
label="Disable for Files < 20s", value=True
|
||||
)
|
||||
DATASET2_SETTINGS["ext"] = gr.Dropdown(
|
||||
label="Audio Extension", value="mp3", choices=["wav", "mp3"]
|
||||
)
|
||||
DATASET2_SETTINGS["speaker_id"] = gr.Checkbox(
|
||||
label="Speaker ID", value=False
|
||||
)
|
||||
transcribe2_button = gr.Button(
|
||||
value="Transcribe and Process")
|
||||
|
||||
|
@ -1397,7 +1422,10 @@ def setup_gradio():
|
|||
DATASET2_SETTINGS["align"],
|
||||
DATASET2_SETTINGS["rename"],
|
||||
DATASET2_SETTINGS['num_processes'],
|
||||
DATASET2_SETTINGS['keep_originals']
|
||||
DATASET2_SETTINGS['keep_originals'],
|
||||
DATASET2_SETTINGS["srt_multiprocessing"],
|
||||
DATASET2_SETTINGS['ext'],
|
||||
DATASET2_SETTINGS['speaker_id']
|
||||
],
|
||||
outputs=transcribe2_output
|
||||
)
|
||||
|
@ -1417,6 +1445,19 @@ def setup_gradio():
|
|||
],
|
||||
outputs=transcribe2_output
|
||||
)
|
||||
# Function to toggle advanced settings visibility
|
||||
def toggle_advanced_settings(show):
|
||||
if show == "Show Advanced Settings":
|
||||
return gr.update(value="Hide Advanced Settings"), gr.update(visible=True)
|
||||
else:
|
||||
return gr.update(value="Show Advanced Settings"), gr.update(visible=False)
|
||||
|
||||
# Connect the toggle button to the toggle function
|
||||
advanced_toggle.click(
|
||||
fn=toggle_advanced_settings,
|
||||
inputs=[advanced_toggle],
|
||||
outputs=[advanced_toggle, advanced_settings_row]
|
||||
)
|
||||
|
||||
transcribe_all_button.click(
|
||||
prepare_all_datasets,
|
||||
|
|
Loading…
Add table
Reference in a new issue