From b1807b8168019b39d29eb6c5065c147193bb0eec Mon Sep 17 00:00:00 2001 From: Fmstrat Date: Mon, 15 Apr 2024 07:59:32 -0400 Subject: [PATCH] added linux support via docker --- .dockerignore | 1 + .gitignore | 4 +- Dockerfile | 74 +++++++++++++++++++++--- README.md | 47 ++++++++++----- setup-cuda.sh | 0 setup-docker.sh | 17 +++++- setup-rocm-bnb.sh | 0 setup-rocm.sh | 0 src/utils.py | 7 ++- start-docker.sh | 25 ++++---- start.sh | 8 ++- train-docker.sh | 0 train.sh | 0 update-force.sh | 0 update.sh | 0 voices/random/cond_latents_d1f79232.pth | Bin 920 -> 920 bytes 16 files changed, 142 insertions(+), 41 deletions(-) mode change 100644 => 100755 setup-cuda.sh mode change 100644 => 100755 setup-docker.sh mode change 100644 => 100755 setup-rocm-bnb.sh mode change 100644 => 100755 setup-rocm.sh mode change 100644 => 100755 start-docker.sh mode change 100644 => 100755 start.sh mode change 100644 => 100755 train-docker.sh mode change 100644 => 100755 train.sh mode change 100644 => 100755 update-force.sh mode change 100644 => 100755 update.sh diff --git a/.dockerignore b/.dockerignore index d316141..d6d63f6 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,3 +2,4 @@ /training /voices /bin +Dockerfile diff --git a/.gitignore b/.gitignore index 83b7e01..6e46296 100644 --- a/.gitignore +++ b/.gitignore @@ -148,4 +148,6 @@ dmypy.json .custom/* results/* -debug_states/* \ No newline at end of file +debug_states/* +bin/* + diff --git a/Dockerfile b/Dockerfile index 7ff4dd3..0a87fba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,36 @@ -FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 +FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 ARG DEBIAN_FRONTEND=noninteractive ARG TZ=UTC ARG MINICONDA_VERSION=23.1.0-1 -ARG PYTHON_VERSION=3.9.13 +ARG PYTHON_VERSION=3.11 +ARG UID=1000 +ARG GID=1000 +# TZ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# Prereqs RUN apt-get update -RUN apt install -y curl wget git ffmpeg -RUN adduser --disabled-password --gecos '' --shell /bin/bash user +RUN apt-get install -y \ + curl \ + wget \ + git \ + ffmpeg \ + p7zip-full \ + gcc \ + g++ \ + vim + +# User +RUN groupadd --gid $GID user +RUN useradd --no-log-init --create-home --shell /bin/bash --uid $UID --gid $GID user USER user ENV HOME=/home/user WORKDIR $HOME RUN mkdir $HOME/.cache $HOME/.config && chmod -R 777 $HOME + +# Python RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh RUN chmod +x Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh RUN ./Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh -b -p /home/user/miniconda @@ -20,18 +38,60 @@ ENV PATH="$HOME/miniconda/bin:$PATH" RUN conda init RUN conda install python=$PYTHON_VERSION RUN python3 -m pip install --upgrade pip -RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 +# Base path RUN mkdir $HOME/ai-voice-cloning WORKDIR $HOME/ai-voice-cloning -COPY --chown=user:user modules modules +# Built in modules +COPY --chown=user:user modules modules RUN python3 -m pip install -r ./modules/tortoise-tts/requirements.txt RUN python3 -m pip install -e ./modules/tortoise-tts/ RUN python3 -m pip install -r ./modules/dlas/requirements.txt RUN python3 -m pip install -e ./modules/dlas/ + +# RVC +RUN \ + curl -L -o /tmp/rvc.zip https://huggingface.co/Jmica/rvc/resolve/main/rvc_lightweight.zip?download=true &&\ + 7z x /tmp/rvc.zip &&\ + rm -f /tmp/rvc.zip +USER root +RUN \ + chown user:user rvc -R &&\ + chmod -R u+rwX,go+rX,go-w rvc +USER user +RUN python3 -m pip install -r ./rvc/requirements.txt + +# Fairseq +# Using patched version for Python 3.11 due to https://github.com/facebookresearch/fairseq/issues/5012 +RUN python3 -m pip install git+https://github.com/liyaodev/fairseq + +# RVC Pipeline +RUN python3 -m pip install git+https://github.com/JarodMica/rvc-tts-pipeline.git@lightweight#egg=rvc_tts_pipe + +# Deepspeed +RUN python3 -m pip install deepspeed + +# PyFastMP3Decoder +RUN python3 -m pip install cython +RUN git clone https://github.com/neonbjb/pyfastmp3decoder.git +RUN \ + cd pyfastmp3decoder &&\ + git submodule update --init --recursive &&\ + python setup.py install &&\ + cd .. + +# WhisperX +RUN python3 -m pip install git+https://github.com/m-bain/whisperx.git + +# Main requirements ADD requirements.txt requirements.txt RUN python3 -m pip install -r ./requirements.txt + +# The app ADD --chown=user:user . $HOME/ai-voice-cloning -CMD ["python", "./src/main.py", "--listen", "0.0.0.0:7680"] +ENV IN_DOCKER=true + +CMD ["./start.sh"] diff --git a/README.md b/README.md index 0565001..b16d8c5 100644 --- a/README.md +++ b/README.md @@ -14,21 +14,20 @@ That being said, some enhancements added compared to the original repo: This is a fork of the repo originally located here: https://git.ecker.tech/mrq/ai-voice-cloning. All of the work that was put into it to incoporate training with DLAS and inference with Tortoise belong to mrq, the author of the original ai-voice-cloning repo. -## Setup (Windows + Nvidia) -This repo only works on **Windows with NVIDIA GPUs**. I don't have any plans on making it compatible with other systems, but it shouldn't be too difficult to port if you have experience in coding or are an expert level ChatGPT user. If you do successfully do this and want to share, pull requests are always welcome! -> **Tips for developers:** setup-cuda.bat should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the requirements.txt in the root is needed to change the version *back* to compatible versions for this repo. +## Setup +This repo works on **Windows with NVIDIA GPUs** and **Linux running Docker with NVIDIA GPUs**. -### Package Installation (Recommended) -Install 7zip on your computer: https://www.7-zip.org/ +### Windows +1. Optional, but recommended: Install 7zip on your computer: https://www.7-zip.org/ - If you run into any extraction issues, most likely it's due to your 7zip being out-of-date OR you're using a different extractor. +2. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v2.0 +3. Extract the 7zip archive. +4. Open up ai-voice-cloning and then run ```start.bat``` -1. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v2.0 -2. Extract the 7zip archive. -3. Open up ai-voice-cloning and then run ```start.bat``` +#### Alternative Manual Installation -### Manual Installation If you are installing this manually, you will need: -- Python 3.9: https://www.python.org/downloads/release/python-3913/ +- Python 3.11: https://www.python.org/downloads/release/python-311/ - Git: https://www.git-scm.com/downloads 1. Clone the repository @@ -36,12 +35,19 @@ If you are installing this manually, you will need: git clone https://github.com/JarodMica/ai-voice-cloning.git ``` 2. Run the ```setup-cuda.bat``` file and it will start running through all of the python packages needed - - If you don't have python 3.9, it won't work and you'll need to go download it + - If you don't have python 3.11, it won't work and you'll need to go download it 3. After it finishes, run ```start.bat``` and this will start downloading most of the models you'll need. - Some models are downloaded when you first use them. You'll incur additional downloads during generation and when training (for whisper). However, once they are finished, you won't ever have to download them again as long as you don't delete them. They are located in the ```models``` folder of the root. 4. **(Optional)** You can opt to install whisperx for training by running ```setup-whipserx.bat``` - Check out the whisperx github page for more details, but it's much faster for longer audio files. If you're processing one-by-one with an already split dataset, it doesn't improve speeds that much. + +### Linux via Docker +1. Clone the repository: `git clone https://github.com/JarodMica/ai-voice-cloning.git && cd ai-voice-cloning` +2. Build the image with `./setup-docker.sh` +3. Start the container with `./start-docker.sh` + + ## Instructions Checkout the YouTube video: Watch First: https://www.youtube.com/watch?v=p31Ax_A5VKA&t=158s @@ -57,12 +63,12 @@ You will now have access to parameters you could adjust in RVC for the RVC voice ## Updating Your Installation Below are how you can update the package for the latest updates -### Package +### Windows >**NOTE:** If there are major feature change, check the latest release to see if ```update_package.bat``` will work. If NOT, you will need to re-download and re-extract the package from Hugging Face. -1. Run the update_package.bat file +1. Run the `update_package.bat `file - It will clone the repo and will copy the src folder from the repo to the package. -### Manual Installation +#### Alternative Manual Installation You should be able to navigate into the folder and then pull the repo to update it. ``` cd ai-voice-cloning @@ -70,6 +76,14 @@ git pull ``` If there are large features added, you may need to delete the venv and the re-run the setup-cuda script to make sure there are no package issues +### Linux via Docker +You should be able to navigate into the folder and then pull the repo to update it, then rebuild your Docker image. +``` +cd ai-voice-cloning +git pull +./setup-docker.sh +``` + ## Documentation ### Troubleshooting Manual Installation @@ -82,8 +96,9 @@ pip uninstall torch pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 ``` -Other documentation tbd - ## Bug Reporting If you run into any problems, please open up a new issue on the issues tab. + +## Tips for developers +`setup-cuda.bat` should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the `requirements.txt` in the root is needed to change the version *back* to compatible versions for this repo. diff --git a/setup-cuda.sh b/setup-cuda.sh old mode 100644 new mode 100755 diff --git a/setup-docker.sh b/setup-docker.sh old mode 100644 new mode 100755 index c4ca008..14aab64 --- a/setup-docker.sh +++ b/setup-docker.sh @@ -1,4 +1,15 @@ #!/bin/bash -git submodule init -git submodule update --remote -docker build -t ai-voice-cloning . + +function main() { + if [ ! -f modules/tortoise-tts/README.md ]; then + git submodule init + git submodule update --remote + fi + docker build \ + --build-arg UID=$(id -u) \ + --build-arg GID=$(id -g) \ + -t ai-voice-cloning \ + . +} + +main diff --git a/setup-rocm-bnb.sh b/setup-rocm-bnb.sh old mode 100644 new mode 100755 diff --git a/setup-rocm.sh b/setup-rocm.sh old mode 100644 new mode 100755 diff --git a/src/utils.py b/src/utils.py index ce158ee..a9c0f4c 100644 --- a/src/utils.py +++ b/src/utils.py @@ -43,6 +43,8 @@ from tortoise.api_fast import TextToSpeech as Toroise_TTS_Hifi from tortoise.utils.audio import load_audio, load_voice, load_voices, get_voice_dir, get_voices from tortoise.utils.text import split_and_recombine_text from tortoise.utils.device import get_device_name, set_device_name, get_device_count, get_device_vram, get_device_batch_size, do_gc +# TODO: The below import blocks any CLI parameters. +# Try running with --low-vram from rvc_pipe.rvc_infer import rvc_convert MODELS['dvae.pth'] = "https://huggingface.co/jbetker/tortoise-tts-v2/resolve/3704aea61678e7e468a06d8eea121dba368a798e/.models/dvae.pth" @@ -1181,6 +1183,7 @@ def generate_tortoise(**kwargs): model_hash = settings["model_hash"][:8] if settings is not None and "model_hash" in settings else tts.autoregressive_model_hash[:8] dir = f'{get_voice_dir()}/{voice}/' + # TODO: Use of model_hash here causes issues in development as new hashes are added to the repo. latents_path = f'{dir}/cond_latents_{model_hash}.pth' if voice == "random" or voice == "microphone": @@ -3789,7 +3792,9 @@ def unload_tts(): do_gc() def reload_tts(): - subprocess.Popen(["start.bat"]) + in_docker = os.environ.get("IN_DOCKER", "false") + if in_docker == "false": + subprocess.Popen(["start.bat"]) with open("reload_flag.txt", "w") as f: f.write("reload") os.kill(os.getpid(), signal.SIGTERM) # Or signal.SIGKILL for an even harder kill diff --git a/start-docker.sh b/start-docker.sh old mode 100644 new mode 100755 index b11279f..0d64f44 --- a/start-docker.sh +++ b/start-docker.sh @@ -1,14 +1,19 @@ #!/bin/bash -CMD="python3 ./src/main.py $@" -# CMD="bash" -CPATH="/home/user/ai-voice-cloning" -docker run --rm --gpus all \ - --mount "type=bind,src=$PWD/models,dst=$CPATH/models" \ - --mount "type=bind,src=$PWD/training,dst=$CPATH/training" \ - --mount "type=bind,src=$PWD/voices,dst=$CPATH/voices" \ - --mount "type=bind,src=$PWD/bin,dst=$CPATH/bin" \ - --workdir $CPATH \ + +docker run \ + -ti \ + --rm \ + --gpus all \ + --name ai-voice-cloning \ + -v "${PWD}/models:/home/user/ai-voice-cloning/models" \ + -v "${PWD}/training:/home/user/ai-voice-cloning/training" \ + -v "${PWD}/voices:/home/user/ai-voice-cloning/voices" \ + -v "${PWD}/bin:/home/user/ai-voice-cloning/bin" \ + -v "${PWD}/config:/home/user/ai-voice-cloning/config" \ --user "$(id -u):$(id -g)" \ --net host \ - -it ai-voice-cloning $CMD + ai-voice-cloning +# For dev: +# -v "${PWD}/src:/home/user/ai-voice-cloning/src" \ +# -v "/home/user/ai-voice-cloning/src/__pycache__" \ diff --git a/start.sh b/start.sh old mode 100644 new mode 100755 index e0ac548..489948e --- a/start.sh +++ b/start.sh @@ -1,5 +1,7 @@ #!/bin/bash ulimit -Sn `ulimit -Hn` # ROCm is a bitch -source ./venv/bin/activate -python3 ./src/main.py "$@" -deactivate +while [ true ]; do + python3 ./src/main.py "$@" + echo "Press Cntrl-C to quit or application will restart... (5s)" + sleep 5 +done diff --git a/train-docker.sh b/train-docker.sh old mode 100644 new mode 100755 diff --git a/train.sh b/train.sh old mode 100644 new mode 100755 diff --git a/update-force.sh b/update-force.sh old mode 100644 new mode 100755 diff --git a/update.sh b/update.sh old mode 100644 new mode 100755 diff --git a/voices/random/cond_latents_d1f79232.pth b/voices/random/cond_latents_d1f79232.pth index a5527c40236ee42703931e1c4b73593577a1d7c9..b1d917c33a419559731435fd47b80e76ea785314 100644 GIT binary patch delta 68 zcmbQiK7)P3K}LNuV>1&|b8};JOA|8_149E-Lt_IlFtju_Ff=kVGB-0dH?R!wX6LBl QSKd7N1LGeS2!m+_07{?|)Bpeg delta 68 zcmbQiK7)P3K}LN;Q%iF*a|1JDLjwy#3sZALQ)2@#Ftju_Ff=kVGB-0dH?R!wX6I0- QvG1Myf$