cleaning up things

This commit is contained in:
Jarod Mica 2024-03-25 11:28:39 -07:00
parent 8e01cd8fad
commit acbf1b2b2f
3 changed files with 108 additions and 33 deletions

View file

@ -9,6 +9,17 @@ AR Quantization
- ggml - https://github.com/ggerganov/ggml/issues/59
- TortoiseCPP https://github.com/balisujohn/tortoise.cpp
## 3/24/2024
Just cleaning up somethings and running tests on the code to make sure it functions as it should. I should think of maybe a way to automate this... but that's a problem for another time.
- Some values like number of processes (num_processes) to spawn based on your CPU cores added for conversion tasks
- Changed tab "Prepare Other Langauge" to "Prepare Dataset for Large Files"
- Moved all of the imports inside of main.py into the __name__ check to reduce overhead of multiprocessing
- Ironing out continuation of transcription in case interrupted, so far, the cases I've tested I've fixed and added approrpiate code to accomodate these situations. The only test case that doesn't work correctly would be if a file is interrupted in the middle of splitting segments based on the srt script since the segments never get written to train.txt...
- Maybe have a way of mapping what has already been segmented to the srt file that exists there? I'll have to think about this one.
Other stuff
- Removes the "temp" file that is created for rename
- Modified the dataset script maker to ignore folders that contain mp3 segments already
## 3/23/2024
- Comment out valle and bark instantiations to clean up console

View file

@ -1,29 +1,29 @@
import os
import sys
if os.path.exists("runtime"):
# Get the directory where the script is located
script_dir = os.path.dirname(os.path.abspath(__file__))
# Add this directory to sys.path
if script_dir not in sys.path:
sys.path.insert(0, script_dir)
if 'TORTOISE_MODELS_DIR' not in os.environ:
os.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))
if 'TRANSFORMERS_CACHE' not in os.environ:
os.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
from utils import *
from webui import *
from api.websocket_server import start_websocket_server
# Moved all of the imports into __name__ == "__main__" due to how multiprocessing spawns instances, makes multiprocessing faster as it reduces import overhead
if __name__ == "__main__":
import os
import sys
if os.path.exists("runtime"):
# Get the directory where the script is located
script_dir = os.path.dirname(os.path.abspath(__file__))
# Add this directory to sys.path
if script_dir not in sys.path:
sys.path.insert(0, script_dir)
if 'TORTOISE_MODELS_DIR' not in os.environ:
os.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))
if 'TRANSFORMERS_CACHE' not in os.environ:
os.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
from utils import *
from webui import *
from api.websocket_server import start_websocket_server
args = setup_args()
if args.listen_path is not None and args.listen_path != "/":

View file

@ -314,7 +314,8 @@ def prepare_dataset_proxy(voice, language, validation_text_length, validation_au
return "\n".join(messages)
def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align, rename, progress=gr.Progress(track_tqdm=True)):
def transcribe_other_language_proxy(voice, language, chunk_size, continuation_directory, align, rename, num_processes, keep_originals, progress=gr.Progress(track_tqdm=True)):
num_processes = int(num_processes)
training_folder = get_training_folder(voice)
processed_folder = os.path.join(training_folder,"processed")
dataset_dir = os.path.join(processed_folder, "run")
@ -323,6 +324,8 @@ def transcribe_other_language_proxy(voice, language, chunk_size, continuation_di
train_text_path = os.path.join(dataset_dir, 'dataset/train.txt')
validation_text_path = os.path.join(dataset_dir, 'dataset/validation.txt')
large_file_num_processes = int(num_processes/2) # Used for instances where larger files are being processed, as to not run out of RAM
items_to_move = [audio_dataset_path, train_text_path, validation_text_path]
for item in items_to_move:
@ -339,21 +342,74 @@ def transcribe_other_language_proxy(voice, language, chunk_size, continuation_di
from modules.tortoise_dataset_tools.audio_conversion_tools.split_long_file import get_duration, process_folder
chosen_directory = os.path.join("./voices", voice)
file_durations = [get_duration(os.path.join(chosen_directory, item)) for item in os.listdir(chosen_directory) if os.path.isfile(os.path.join(chosen_directory, item))]
items = os.listdir(chosen_directory)
# In case of sudden restart, removes this intermediary file used for rename
for file in items:
if "file___" in file:
os.remove(os.path.join(chosen_directory, file))
file_durations = [get_duration(os.path.join(chosen_directory, item)) for item in items if os.path.isfile(os.path.join(chosen_directory, item))]
progress(0.0, desc="Splitting long files")
if any(duration > 3600*2 for duration in file_durations):
process_folder(chosen_directory)
process_folder(chosen_directory, large_file_num_processes)
if not keep_originals:
originals_pre_split_path = os.path.join(chosen_directory, "original_pre_split")
try:
shutil.rmtree(originals_pre_split_path)
except:
# There is no directory to delete
pass
progress(0.0, desc="Converting to MP3 files") # add tqdm later
import modules.tortoise_dataset_tools.audio_conversion_tools.convert_to_mp3 as c2mp3
# Hacky way to get the functions working without changing where they output to...
for item in os.listdir(chosen_directory):
if os.path.isfile(os.path.join(chosen_directory, item)):
original_dir = os.path.join(chosen_directory, "original_files")
if not os.path.exists(original_dir):
os.makedirs(original_dir)
item_path = os.path.join(chosen_directory, item)
try:
shutil.move(item_path, original_dir)
except:
os.remove(item_path)
try:
c2mp3.process_folder(original_dir, large_file_num_processes)
except:
raise gr.Error('No files found in the voice folder specified, make sure it is not empty. If you interrupted the process, the files may be in the "original_files" folder')
# Hacky way to move the files back into the main voice folder
for item in os.listdir(os.path.join(original_dir, "converted")):
item_path = os.path.join(original_dir, "converted", item)
if os.path.isfile(item_path):
try:
shutil.move(item_path, chosen_directory)
except:
os.remove(item_path)
if not keep_originals:
originals_files = os.path.join(chosen_directory, "original_files")
try:
shutil.rmtree(originals_files)
except:
# There is no directory to delete
pass
progress(0.1, desc="Processing audio files")
progress(0.4, desc="Processing audio files")
process_audio_files(base_directory=dataset_dir,
language=language,
audio_dir=chosen_directory,
chunk_size=chunk_size,
no_align=align,
rename_files=rename)
progress(0.5, desc="Audio processing completed")
rename_files=rename,
num_processes=num_processes)
progress(0.7, desc="Audio processing completed")
progress(0.5, desc="Merging segments")
progress(0.7, desc="Merging segments")
merge_segments(merge_dir)
progress(0.9, desc="Segment merging completed")
@ -791,7 +847,7 @@ def setup_gradio():
with gr.Column():
prepare_dataset_output = gr.TextArea(
label="Console Output", interactive=False, max_lines=8)
with gr.Tab("Prepare Large Files"):
with gr.Tab("Prepare Dataset for Large Files"):
with gr.Row():
with gr.Column():
DATASET2_SETTINGS = {}
@ -810,6 +866,9 @@ def setup_gradio():
label="Language", value="en")
DATASET2_SETTINGS['chunk_size'] = gr.Textbox(
label="Chunk Size", value="20")
DATASET2_SETTINGS['num_processes'] = gr.Textbox(
label="Processes to Use", value=int(max(1, multiprocessing.cpu_count())))
with gr.Row():
DATASET2_SETTINGS['align'] = gr.Checkbox(
label="Disable WhisperX Alignment", value=False
@ -817,6 +876,9 @@ def setup_gradio():
DATASET2_SETTINGS['rename'] = gr.Checkbox(
label="Rename Audio Files", value=True
)
DATASET2_SETTINGS['keep_originals'] = gr.Checkbox(
label="Keep Original Files", value=True
)
transcribe2_button = gr.Button(
value="Transcribe and Process")
@ -1323,7 +1385,9 @@ def setup_gradio():
DATASET2_SETTINGS['chunk_size'],
DATASET2_SETTINGS['continue_directory'],
DATASET2_SETTINGS["align"],
DATASET2_SETTINGS["rename"]
DATASET2_SETTINGS["rename"],
DATASET2_SETTINGS['num_processes'],
DATASET2_SETTINGS['keep_originals']
],
outputs=transcribe2_output
)