mirror of
https://github.com/JarodMica/ai-voice-cloning.git
synced 2025-06-07 06:05:52 -04:00
update transcribe other language fnc
This commit is contained in:
parent
a29fe161bc
commit
0d2bbdb836
2 changed files with 33 additions and 6 deletions
|
@ -9,6 +9,7 @@ AR Quantization
|
|||
- ggml - https://github.com/ggerganov/ggml/issues/59
|
||||
- TortoiseCPP https://github.com/balisujohn/tortoise.cpp
|
||||
|
||||
## 3/19/2024
|
||||
|
||||
## 3/17/2024
|
||||
- Adding in other language capability training to the repo, a few files are modifed in dlas and tortoise-tts modules for the cleaners to allow this to happen.
|
||||
|
|
38
src/webui.py
38
src/webui.py
|
@ -313,9 +313,22 @@ def prepare_dataset_proxy(voice, language, validation_text_length, validation_au
|
|||
return "\n".join(messages)
|
||||
|
||||
|
||||
def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align):
|
||||
def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align, rename):
|
||||
indir = f'./training/{voice}/processed'
|
||||
dataset_dir = os.path.join(indir, "run")
|
||||
merge_dir = os.path.join(dataset_dir, "dataset/wav_splits")
|
||||
audio_dataset_path = os.path.join(merge_dir, 'audio')
|
||||
train_text_path = os.path.join(dataset_dir, 'dataset/train.txt')
|
||||
validation_text_path = os.path.join(dataset_dir, 'dataset/validation.txt')
|
||||
voice_root_path = os.path.join ("./training", voice)
|
||||
|
||||
items_to_move = [audio_dataset_path, train_text_path, validation_text_path]
|
||||
|
||||
for item in items_to_move:
|
||||
if os.path.exists(os.path.join(voice_root_path, os.path.basename(item))):
|
||||
raise gr.Error(f"Before running this button again, please remove the following from training/{voice}:\ntrain.txt\nvalidation.txt\naudio")
|
||||
|
||||
|
||||
if continuation_directory:
|
||||
dataset_dir = f'./training/{voice}/processed/{continuation_directory}'
|
||||
|
||||
|
@ -324,18 +337,27 @@ def transcribe_other_language_proxy(voice, language, chunk_size, continuation_di
|
|||
new_dataset_dir = os.path.join(indir, f"run_{current_datetime}")
|
||||
os.rename(dataset_dir, new_dataset_dir)
|
||||
|
||||
chosen_directory = f'./voices/{voice}'
|
||||
chosen_directory = os.path.join("./voices", voice)
|
||||
|
||||
process_audio_files(base_directory=dataset_dir,
|
||||
language=language,
|
||||
audio_dir=chosen_directory,
|
||||
chunk_size=chunk_size,
|
||||
no_align=align)
|
||||
merge_dir = os.path.join(dataset_dir, "dataset/wav_splits")
|
||||
|
||||
no_align=align,
|
||||
rename_files=rename)
|
||||
|
||||
merge_segments(merge_dir)
|
||||
|
||||
|
||||
|
||||
for item in items_to_move:
|
||||
if os.path.exists(os.path.join(voice_root_path, os.path.basename(item))):
|
||||
print("Already exists")
|
||||
else:
|
||||
shutil.move(item, voice_root_path)
|
||||
|
||||
|
||||
|
||||
|
||||
def update_args_proxy(*args):
|
||||
kwargs = {}
|
||||
|
@ -754,6 +776,9 @@ def setup_gradio():
|
|||
DATASET2_SETTINGS['align'] = gr.Checkbox(
|
||||
label="Disable Alignment", value=False
|
||||
)
|
||||
DATASET2_SETTINGS['rename'] = gr.Checkbox(
|
||||
label="Rename Audio Files", value=True
|
||||
)
|
||||
transcribe2_button = gr.Button(
|
||||
value="Transcribe and Process")
|
||||
# dataset2_settings = list(DATASET2_SETTINGS.values()) # Really only need this for tqdm to extract values
|
||||
|
@ -1248,7 +1273,8 @@ def setup_gradio():
|
|||
DATASET2_SETTINGS['language'],
|
||||
DATASET2_SETTINGS['chunk_size'],
|
||||
DATASET2_SETTINGS['continue_directory'],
|
||||
DATASET2_SETTINGS["align"]
|
||||
DATASET2_SETTINGS["align"],
|
||||
DATASET2_SETTINGS["rename"]
|
||||
]
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue