Merge pull request #81 from Fmstrat/linux-and-docker

Add Linux support via Docker
This commit is contained in:
Jarod Mica 2024-04-23 13:25:12 -07:00 committed by GitHub
commit 24892dc2cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 174 additions and 45 deletions

View file

@ -2,3 +2,4 @@
/training
/voices
/bin
Dockerfile

4
.gitignore vendored
View file

@ -148,4 +148,6 @@ dmypy.json
.custom/*
results/*
debug_states/*
debug_states/*
bin/*

View file

@ -1,18 +1,36 @@
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
ARG DEBIAN_FRONTEND=noninteractive
ARG TZ=UTC
ARG MINICONDA_VERSION=23.1.0-1
ARG PYTHON_VERSION=3.9.13
ARG PYTHON_VERSION=3.11
ARG UID=1000
ARG GID=1000
# TZ
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
# Prereqs
RUN apt-get update
RUN apt install -y curl wget git ffmpeg
RUN adduser --disabled-password --gecos '' --shell /bin/bash user
RUN apt-get install -y \
curl \
wget \
git \
ffmpeg \
p7zip-full \
gcc \
g++ \
vim
# User
RUN groupadd --gid $GID user
RUN useradd --no-log-init --create-home --shell /bin/bash --uid $UID --gid $GID user
USER user
ENV HOME=/home/user
WORKDIR $HOME
RUN mkdir $HOME/.cache $HOME/.config && chmod -R 777 $HOME
# Python
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh
RUN chmod +x Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh
RUN ./Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh -b -p /home/user/miniconda
@ -20,18 +38,60 @@ ENV PATH="$HOME/miniconda/bin:$PATH"
RUN conda init
RUN conda install python=$PYTHON_VERSION
RUN python3 -m pip install --upgrade pip
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Base path
RUN mkdir $HOME/ai-voice-cloning
WORKDIR $HOME/ai-voice-cloning
COPY --chown=user:user modules modules
# Built in modules
COPY --chown=user:user modules modules
RUN python3 -m pip install -r ./modules/tortoise-tts/requirements.txt
RUN python3 -m pip install -e ./modules/tortoise-tts/
RUN python3 -m pip install -r ./modules/dlas/requirements.txt
RUN python3 -m pip install -e ./modules/dlas/
# RVC
RUN \
curl -L -o /tmp/rvc.zip https://huggingface.co/Jmica/rvc/resolve/main/rvc_lightweight.zip?download=true &&\
7z x /tmp/rvc.zip &&\
rm -f /tmp/rvc.zip
USER root
RUN \
chown user:user rvc -R &&\
chmod -R u+rwX,go+rX,go-w rvc
USER user
RUN python3 -m pip install -r ./rvc/requirements.txt
# Fairseq
# Using patched version for Python 3.11 due to https://github.com/facebookresearch/fairseq/issues/5012
RUN python3 -m pip install git+https://github.com/liyaodev/fairseq
# RVC Pipeline
RUN python3 -m pip install git+https://github.com/JarodMica/rvc-tts-pipeline.git@lightweight#egg=rvc_tts_pipe
# Deepspeed
RUN python3 -m pip install deepspeed
# PyFastMP3Decoder
RUN python3 -m pip install cython
RUN git clone https://github.com/neonbjb/pyfastmp3decoder.git
RUN \
cd pyfastmp3decoder &&\
git submodule update --init --recursive &&\
python setup.py install &&\
cd ..
# WhisperX
RUN python3 -m pip install git+https://github.com/m-bain/whisperx.git
# Main requirements
ADD requirements.txt requirements.txt
RUN python3 -m pip install -r ./requirements.txt
# The app
ADD --chown=user:user . $HOME/ai-voice-cloning
CMD ["python", "./src/main.py", "--listen", "0.0.0.0:7680"]
ENV IN_DOCKER=true
CMD ["./start.sh"]

View file

@ -14,21 +14,20 @@ That being said, some enhancements added compared to the original repo:
This is a fork of the repo originally located here: https://git.ecker.tech/mrq/ai-voice-cloning. All of the work that was put into it to incoporate training with DLAS and inference with Tortoise belong to mrq, the author of the original ai-voice-cloning repo.
## Setup (Windows + Nvidia)
This repo only works on **Windows with NVIDIA GPUs**. I don't have any plans on making it compatible with other systems, but it shouldn't be too difficult to port if you have experience in coding or are an expert level ChatGPT user. If you do successfully do this and want to share, pull requests are always welcome!
> **Tips for developers:** setup-cuda.bat should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the requirements.txt in the root is needed to change the version *back* to compatible versions for this repo.
## Setup
This repo works on **Windows with NVIDIA GPUs** and **Linux running Docker with NVIDIA GPUs**.
### Package Installation (Recommended)
Install 7zip on your computer: https://www.7-zip.org/
### Windows Package (Recommended)
1. Optional, but recommended: Install 7zip on your computer: https://www.7-zip.org/
- If you run into any extraction issues, most likely it's due to your 7zip being out-of-date OR you're using a different extractor.
2. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v3.0
3. Extract the 7zip archive.
4. Open up ai-voice-cloning and then run ```start.bat```
1. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v2.0
2. Extract the 7zip archive.
3. Open up ai-voice-cloning and then run ```start.bat```
#### Alternative Manual Installation
### Manual Installation
If you are installing this manually, you will need:
- Python 3.9: https://www.python.org/downloads/release/python-3913/
- Python 3.11: https://www.python.org/downloads/release/python-311/
- Git: https://www.git-scm.com/downloads
1. Clone the repository
@ -36,15 +35,45 @@ If you are installing this manually, you will need:
git clone https://github.com/JarodMica/ai-voice-cloning.git
```
2. Run the ```setup-cuda.bat``` file and it will start running through all of the python packages needed
- If you don't have python 3.9, it won't work and you'll need to go download it
- If you don't have python 3.11, it won't work and you'll need to go download it
3. After it finishes, run ```start.bat``` and this will start downloading most of the models you'll need.
- Some models are downloaded when you first use them. You'll incur additional downloads during generation and when training (for whisper). However, once they are finished, you won't ever have to download them again as long as you don't delete them. They are located in the ```models``` folder of the root.
4. **(Optional)** You can opt to install whisperx for training by running ```setup-whipserx.bat```
- Check out the whisperx github page for more details, but it's much faster for longer audio files. If you're processing one-by-one with an already split dataset, it doesn't improve speeds that much.
### Docker for Linux (or WSL2)
#### Linux Specific Setup
1. Make sure the latest nvidia drivers are installed: `sudo ubuntu-drivers install`
2. Install Docker your preferred way
#### Windows Specific Setup
> Make sure your Nvidia drivers are up to date: https://www.nvidia.com/download/index.aspx
1. Install WSL2 in PowerShell with `wsl --install` and restart
2. Open PowerShell, type and enter ```ubuntu```. It should now load you into wsl2
3. Remove the original nvidia cache key: `sudo apt-key del 7fa2af80`
4. Download CUDA toolkit keyring: `wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.1-1_all.deb`
5. Install keyring: `sudo dpkg -i cuda-keyring_1.1-1_all.deb`
6. Update package list: `sudo apt-get update`
7. Install CUDA toolkit: `sudo apt-get -y install cuda-toolkit-12-4`
8. Install Docker Desktop using WSL2 as the backend
9. Restart
10. If you wish to monitor the terminal remotely via SSH, follow [this guide](https://www.hanselman.com/blog/how-to-ssh-into-wsl2-on-windows-10-from-an-external-machine).
11. Open PowerShell, type ```ubuntu```, [then follow below](#building-and-running-in-docker)
#### Building and Running in Docker
1. Open a terminal (or Ubuntu WSL)
2. Clone the repository: `git clone https://github.com/JarodMica/ai-voice-cloning.git && cd ai-voice-cloning`
3. Build the image with `./setup-docker.sh`
4. Start the container with `./start-docker.sh`
5. Visit `http://localhost:7860` or remotely with `http://<ip>:7860`
## Instructions
Checkout the YouTube video:
Watch First: https://www.youtube.com/watch?v=p31Ax_A5VKA&t=158s
Watch First: https://youtu.be/WWhNqJEmF9M?si=RhUZhYersAvSZ4wf
Watch Second (RVC update): https://www.youtube.com/watch?v=7tpWH8_S8es&t=504s
Everything is pretty much the same as before if you've used this repository in the past, however, there is a new option to convert text output using ```rvc```. Before you can use it, you will need a **trained** RVC .pth file that you get from RVC or online, and then you will need to place it in ```models/rvc_models/```. Both .index and .pth files can be placed in here and they'll show up correctly in their respective dropdown menus.
@ -57,12 +86,12 @@ You will now have access to parameters you could adjust in RVC for the RVC voice
## Updating Your Installation
Below are how you can update the package for the latest updates
### Package
### Windows
>**NOTE:** If there are major feature change, check the latest release to see if ```update_package.bat``` will work. If NOT, you will need to re-download and re-extract the package from Hugging Face.
1. Run the update_package.bat file
1. Run the `update_package.bat `file
- It will clone the repo and will copy the src folder from the repo to the package.
### Manual Installation
#### Alternative Manual Installation
You should be able to navigate into the folder and then pull the repo to update it.
```
cd ai-voice-cloning
@ -70,6 +99,14 @@ git pull
```
If there are large features added, you may need to delete the venv and the re-run the setup-cuda script to make sure there are no package issues
### Linux via Docker
You should be able to navigate into the folder and then pull the repo to update it, then rebuild your Docker image.
```
cd ai-voice-cloning
git pull
./setup-docker.sh
```
## Documentation
### Troubleshooting Manual Installation
@ -82,8 +119,9 @@ pip uninstall torch
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
```
Other documentation tbd
## Bug Reporting
If you run into any problems, please open up a new issue on the issues tab.
## Tips for developers
`setup-cuda.bat` should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the `requirements.txt` in the root is needed to change the version *back* to compatible versions for this repo.

0
setup-cuda.sh Normal file → Executable file
View file

17
setup-docker.sh Normal file → Executable file
View file

@ -1,4 +1,15 @@
#!/bin/bash
git submodule init
git submodule update --remote
docker build -t ai-voice-cloning .
function main() {
if [ ! -f modules/tortoise-tts/README.md ]; then
git submodule init
git submodule update --remote
fi
docker build \
--build-arg UID=$(id -u) \
--build-arg GID=$(id -g) \
-t ai-voice-cloning \
.
}
main

0
setup-rocm-bnb.sh Normal file → Executable file
View file

0
setup-rocm.sh Normal file → Executable file
View file

View file

@ -43,6 +43,8 @@ from tortoise.api_fast import TextToSpeech as Toroise_TTS_Hifi
from tortoise.utils.audio import load_audio, load_voice, load_voices, get_voice_dir, get_voices
from tortoise.utils.text import split_and_recombine_text
from tortoise.utils.device import get_device_name, set_device_name, get_device_count, get_device_vram, get_device_batch_size, do_gc
# TODO: The below import blocks any CLI parameters.
# Try running with --low-vram
from rvc_pipe.rvc_infer import rvc_convert
MODELS['dvae.pth'] = "https://huggingface.co/jbetker/tortoise-tts-v2/resolve/3704aea61678e7e468a06d8eea121dba368a798e/.models/dvae.pth"
@ -1181,6 +1183,7 @@ def generate_tortoise(**kwargs):
model_hash = settings["model_hash"][:8] if settings is not None and "model_hash" in settings else tts.autoregressive_model_hash[:8]
dir = f'{get_voice_dir()}/{voice}/'
# TODO: Use of model_hash here causes issues in development as new hashes are added to the repo.
latents_path = f'{dir}/cond_latents_{model_hash}.pth'
if voice == "random" or voice == "microphone":
@ -3485,7 +3488,7 @@ def setup_args(cli=False):
try:
match = re.findall(r"^(?:(.+?):(\d+))?(\/.*?)?$", args.listen)[0]
args.listen_host = match[0] if match[0] != "" else "127.0.0.1"
args.listen_host = match[0] if match[0] != "" else "0.0.0.0"
args.listen_port = match[1] if match[1] != "" else None
args.listen_path = match[2] if match[2] != "" else "/"
except Exception as e:
@ -3789,7 +3792,9 @@ def unload_tts():
do_gc()
def reload_tts():
subprocess.Popen(["start.bat"])
in_docker = os.environ.get("IN_DOCKER", "false")
if in_docker == "false":
subprocess.Popen(["start.bat"])
with open("reload_flag.txt", "w") as f:
f.write("reload")
os.kill(os.getpid(), signal.SIGTERM) # Or signal.SIGKILL for an even harder kill

34
start-docker.sh Normal file → Executable file
View file

@ -1,14 +1,24 @@
#!/bin/bash
CMD="python3 ./src/main.py $@"
# CMD="bash"
CPATH="/home/user/ai-voice-cloning"
docker run --rm --gpus all \
--mount "type=bind,src=$PWD/models,dst=$CPATH/models" \
--mount "type=bind,src=$PWD/training,dst=$CPATH/training" \
--mount "type=bind,src=$PWD/voices,dst=$CPATH/voices" \
--mount "type=bind,src=$PWD/bin,dst=$CPATH/bin" \
--workdir $CPATH \
--user "$(id -u):$(id -g)" \
--net host \
-it ai-voice-cloning $CMD
docker run \
-ti \
--rm \
--gpus all \
--name ai-voice-cloning \
-v "${PWD}/models:/home/user/ai-voice-cloning/models" \
-v "${PWD}/training:/home/user/ai-voice-cloning/training" \
-v "${PWD}/voices:/home/user/ai-voice-cloning/voices" \
-v "${PWD}/bin:/home/user/ai-voice-cloning/bin" \
-v "${PWD}/config:/home/user/ai-voice-cloning/config" \
--user "$(id -u):$(id -g)" \
-p "7860:7860" \
ai-voice-cloning $@
# For dev:
# -v "${PWD}/src:/home/user/ai-voice-cloning/src" \
# -v "${PWD}/modules/tortoise_dataset_tools/dataset_whisper_tools:/home/user/ai-voice-cloning/modules/tortoise_dataset_tools/dataset_whisper_tools" \
# -v "${PWD}/modules/dlas/dlas:/home/user/ai-voice-cloning/modules/dlas/dlas" \
# -v "/home/user/ai-voice-cloning/src/__pycache__" \
# For testing:
# -e "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True" \

8
start.sh Normal file → Executable file
View file

@ -1,5 +1,7 @@
#!/bin/bash
ulimit -Sn `ulimit -Hn` # ROCm is a bitch
source ./venv/bin/activate
python3 ./src/main.py "$@"
deactivate
while [ true ]; do
python3 ./src/main.py "$@"
echo "Press Cntrl-C to quit or application will restart... (5s)"
sleep 5
done

0
train-docker.sh Normal file → Executable file
View file

0
train.sh Normal file → Executable file
View file

0
update-force.sh Normal file → Executable file
View file

0
update.sh Normal file → Executable file
View file