mirror of
https://github.com/JarodMica/ai-voice-cloning.git
synced 2025-06-07 14:15:57 -04:00
Merge pull request #81 from Fmstrat/linux-and-docker
Add Linux support via Docker
This commit is contained in:
commit
24892dc2cc
15 changed files with 174 additions and 45 deletions
|
@ -2,3 +2,4 @@
|
|||
/training
|
||||
/voices
|
||||
/bin
|
||||
Dockerfile
|
||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -149,3 +149,5 @@ dmypy.json
|
|||
.custom/*
|
||||
results/*
|
||||
debug_states/*
|
||||
bin/*
|
||||
|
||||
|
|
74
Dockerfile
74
Dockerfile
|
@ -1,18 +1,36 @@
|
|||
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
|
||||
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG TZ=UTC
|
||||
ARG MINICONDA_VERSION=23.1.0-1
|
||||
ARG PYTHON_VERSION=3.9.13
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ARG UID=1000
|
||||
ARG GID=1000
|
||||
|
||||
# TZ
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
# Prereqs
|
||||
RUN apt-get update
|
||||
RUN apt install -y curl wget git ffmpeg
|
||||
RUN adduser --disabled-password --gecos '' --shell /bin/bash user
|
||||
RUN apt-get install -y \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
ffmpeg \
|
||||
p7zip-full \
|
||||
gcc \
|
||||
g++ \
|
||||
vim
|
||||
|
||||
# User
|
||||
RUN groupadd --gid $GID user
|
||||
RUN useradd --no-log-init --create-home --shell /bin/bash --uid $UID --gid $GID user
|
||||
USER user
|
||||
ENV HOME=/home/user
|
||||
WORKDIR $HOME
|
||||
RUN mkdir $HOME/.cache $HOME/.config && chmod -R 777 $HOME
|
||||
|
||||
# Python
|
||||
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh
|
||||
RUN chmod +x Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh
|
||||
RUN ./Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh -b -p /home/user/miniconda
|
||||
|
@ -20,18 +38,60 @@ ENV PATH="$HOME/miniconda/bin:$PATH"
|
|||
RUN conda init
|
||||
RUN conda install python=$PYTHON_VERSION
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
# Base path
|
||||
RUN mkdir $HOME/ai-voice-cloning
|
||||
WORKDIR $HOME/ai-voice-cloning
|
||||
COPY --chown=user:user modules modules
|
||||
|
||||
# Built in modules
|
||||
COPY --chown=user:user modules modules
|
||||
RUN python3 -m pip install -r ./modules/tortoise-tts/requirements.txt
|
||||
RUN python3 -m pip install -e ./modules/tortoise-tts/
|
||||
RUN python3 -m pip install -r ./modules/dlas/requirements.txt
|
||||
RUN python3 -m pip install -e ./modules/dlas/
|
||||
|
||||
# RVC
|
||||
RUN \
|
||||
curl -L -o /tmp/rvc.zip https://huggingface.co/Jmica/rvc/resolve/main/rvc_lightweight.zip?download=true &&\
|
||||
7z x /tmp/rvc.zip &&\
|
||||
rm -f /tmp/rvc.zip
|
||||
USER root
|
||||
RUN \
|
||||
chown user:user rvc -R &&\
|
||||
chmod -R u+rwX,go+rX,go-w rvc
|
||||
USER user
|
||||
RUN python3 -m pip install -r ./rvc/requirements.txt
|
||||
|
||||
# Fairseq
|
||||
# Using patched version for Python 3.11 due to https://github.com/facebookresearch/fairseq/issues/5012
|
||||
RUN python3 -m pip install git+https://github.com/liyaodev/fairseq
|
||||
|
||||
# RVC Pipeline
|
||||
RUN python3 -m pip install git+https://github.com/JarodMica/rvc-tts-pipeline.git@lightweight#egg=rvc_tts_pipe
|
||||
|
||||
# Deepspeed
|
||||
RUN python3 -m pip install deepspeed
|
||||
|
||||
# PyFastMP3Decoder
|
||||
RUN python3 -m pip install cython
|
||||
RUN git clone https://github.com/neonbjb/pyfastmp3decoder.git
|
||||
RUN \
|
||||
cd pyfastmp3decoder &&\
|
||||
git submodule update --init --recursive &&\
|
||||
python setup.py install &&\
|
||||
cd ..
|
||||
|
||||
# WhisperX
|
||||
RUN python3 -m pip install git+https://github.com/m-bain/whisperx.git
|
||||
|
||||
# Main requirements
|
||||
ADD requirements.txt requirements.txt
|
||||
RUN python3 -m pip install -r ./requirements.txt
|
||||
|
||||
# The app
|
||||
ADD --chown=user:user . $HOME/ai-voice-cloning
|
||||
|
||||
CMD ["python", "./src/main.py", "--listen", "0.0.0.0:7680"]
|
||||
ENV IN_DOCKER=true
|
||||
|
||||
CMD ["./start.sh"]
|
||||
|
|
72
README.md
72
README.md
|
@ -14,21 +14,20 @@ That being said, some enhancements added compared to the original repo:
|
|||
|
||||
This is a fork of the repo originally located here: https://git.ecker.tech/mrq/ai-voice-cloning. All of the work that was put into it to incoporate training with DLAS and inference with Tortoise belong to mrq, the author of the original ai-voice-cloning repo.
|
||||
|
||||
## Setup (Windows + Nvidia)
|
||||
This repo only works on **Windows with NVIDIA GPUs**. I don't have any plans on making it compatible with other systems, but it shouldn't be too difficult to port if you have experience in coding or are an expert level ChatGPT user. If you do successfully do this and want to share, pull requests are always welcome!
|
||||
> **Tips for developers:** setup-cuda.bat should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the requirements.txt in the root is needed to change the version *back* to compatible versions for this repo.
|
||||
## Setup
|
||||
This repo works on **Windows with NVIDIA GPUs** and **Linux running Docker with NVIDIA GPUs**.
|
||||
|
||||
### Package Installation (Recommended)
|
||||
Install 7zip on your computer: https://www.7-zip.org/
|
||||
### Windows Package (Recommended)
|
||||
1. Optional, but recommended: Install 7zip on your computer: https://www.7-zip.org/
|
||||
- If you run into any extraction issues, most likely it's due to your 7zip being out-of-date OR you're using a different extractor.
|
||||
2. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v3.0
|
||||
3. Extract the 7zip archive.
|
||||
4. Open up ai-voice-cloning and then run ```start.bat```
|
||||
|
||||
1. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v2.0
|
||||
2. Extract the 7zip archive.
|
||||
3. Open up ai-voice-cloning and then run ```start.bat```
|
||||
#### Alternative Manual Installation
|
||||
|
||||
### Manual Installation
|
||||
If you are installing this manually, you will need:
|
||||
- Python 3.9: https://www.python.org/downloads/release/python-3913/
|
||||
- Python 3.11: https://www.python.org/downloads/release/python-311/
|
||||
- Git: https://www.git-scm.com/downloads
|
||||
|
||||
1. Clone the repository
|
||||
|
@ -36,15 +35,45 @@ If you are installing this manually, you will need:
|
|||
git clone https://github.com/JarodMica/ai-voice-cloning.git
|
||||
```
|
||||
2. Run the ```setup-cuda.bat``` file and it will start running through all of the python packages needed
|
||||
- If you don't have python 3.9, it won't work and you'll need to go download it
|
||||
- If you don't have python 3.11, it won't work and you'll need to go download it
|
||||
3. After it finishes, run ```start.bat``` and this will start downloading most of the models you'll need.
|
||||
- Some models are downloaded when you first use them. You'll incur additional downloads during generation and when training (for whisper). However, once they are finished, you won't ever have to download them again as long as you don't delete them. They are located in the ```models``` folder of the root.
|
||||
4. **(Optional)** You can opt to install whisperx for training by running ```setup-whipserx.bat```
|
||||
- Check out the whisperx github page for more details, but it's much faster for longer audio files. If you're processing one-by-one with an already split dataset, it doesn't improve speeds that much.
|
||||
|
||||
|
||||
### Docker for Linux (or WSL2)
|
||||
|
||||
#### Linux Specific Setup
|
||||
1. Make sure the latest nvidia drivers are installed: `sudo ubuntu-drivers install`
|
||||
2. Install Docker your preferred way
|
||||
|
||||
#### Windows Specific Setup
|
||||
> Make sure your Nvidia drivers are up to date: https://www.nvidia.com/download/index.aspx
|
||||
1. Install WSL2 in PowerShell with `wsl --install` and restart
|
||||
2. Open PowerShell, type and enter ```ubuntu```. It should now load you into wsl2
|
||||
3. Remove the original nvidia cache key: `sudo apt-key del 7fa2af80`
|
||||
4. Download CUDA toolkit keyring: `wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.1-1_all.deb`
|
||||
5. Install keyring: `sudo dpkg -i cuda-keyring_1.1-1_all.deb`
|
||||
6. Update package list: `sudo apt-get update`
|
||||
7. Install CUDA toolkit: `sudo apt-get -y install cuda-toolkit-12-4`
|
||||
8. Install Docker Desktop using WSL2 as the backend
|
||||
9. Restart
|
||||
10. If you wish to monitor the terminal remotely via SSH, follow [this guide](https://www.hanselman.com/blog/how-to-ssh-into-wsl2-on-windows-10-from-an-external-machine).
|
||||
11. Open PowerShell, type ```ubuntu```, [then follow below](#building-and-running-in-docker)
|
||||
|
||||
#### Building and Running in Docker
|
||||
1. Open a terminal (or Ubuntu WSL)
|
||||
2. Clone the repository: `git clone https://github.com/JarodMica/ai-voice-cloning.git && cd ai-voice-cloning`
|
||||
3. Build the image with `./setup-docker.sh`
|
||||
4. Start the container with `./start-docker.sh`
|
||||
5. Visit `http://localhost:7860` or remotely with `http://<ip>:7860`
|
||||
|
||||
## Instructions
|
||||
Checkout the YouTube video:
|
||||
Watch First: https://www.youtube.com/watch?v=p31Ax_A5VKA&t=158s
|
||||
|
||||
Watch First: https://youtu.be/WWhNqJEmF9M?si=RhUZhYersAvSZ4wf
|
||||
|
||||
Watch Second (RVC update): https://www.youtube.com/watch?v=7tpWH8_S8es&t=504s
|
||||
|
||||
Everything is pretty much the same as before if you've used this repository in the past, however, there is a new option to convert text output using ```rvc```. Before you can use it, you will need a **trained** RVC .pth file that you get from RVC or online, and then you will need to place it in ```models/rvc_models/```. Both .index and .pth files can be placed in here and they'll show up correctly in their respective dropdown menus.
|
||||
|
@ -57,12 +86,12 @@ You will now have access to parameters you could adjust in RVC for the RVC voice
|
|||
## Updating Your Installation
|
||||
Below are how you can update the package for the latest updates
|
||||
|
||||
### Package
|
||||
### Windows
|
||||
>**NOTE:** If there are major feature change, check the latest release to see if ```update_package.bat``` will work. If NOT, you will need to re-download and re-extract the package from Hugging Face.
|
||||
1. Run the update_package.bat file
|
||||
1. Run the `update_package.bat `file
|
||||
- It will clone the repo and will copy the src folder from the repo to the package.
|
||||
|
||||
### Manual Installation
|
||||
#### Alternative Manual Installation
|
||||
You should be able to navigate into the folder and then pull the repo to update it.
|
||||
```
|
||||
cd ai-voice-cloning
|
||||
|
@ -70,6 +99,14 @@ git pull
|
|||
```
|
||||
If there are large features added, you may need to delete the venv and the re-run the setup-cuda script to make sure there are no package issues
|
||||
|
||||
### Linux via Docker
|
||||
You should be able to navigate into the folder and then pull the repo to update it, then rebuild your Docker image.
|
||||
```
|
||||
cd ai-voice-cloning
|
||||
git pull
|
||||
./setup-docker.sh
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
### Troubleshooting Manual Installation
|
||||
|
@ -82,8 +119,9 @@ pip uninstall torch
|
|||
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
```
|
||||
|
||||
Other documentation tbd
|
||||
|
||||
## Bug Reporting
|
||||
|
||||
If you run into any problems, please open up a new issue on the issues tab.
|
||||
|
||||
## Tips for developers
|
||||
`setup-cuda.bat` should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the `requirements.txt` in the root is needed to change the version *back* to compatible versions for this repo.
|
||||
|
|
0
setup-cuda.sh
Normal file → Executable file
0
setup-cuda.sh
Normal file → Executable file
17
setup-docker.sh
Normal file → Executable file
17
setup-docker.sh
Normal file → Executable file
|
@ -1,4 +1,15 @@
|
|||
#!/bin/bash
|
||||
git submodule init
|
||||
git submodule update --remote
|
||||
docker build -t ai-voice-cloning .
|
||||
|
||||
function main() {
|
||||
if [ ! -f modules/tortoise-tts/README.md ]; then
|
||||
git submodule init
|
||||
git submodule update --remote
|
||||
fi
|
||||
docker build \
|
||||
--build-arg UID=$(id -u) \
|
||||
--build-arg GID=$(id -g) \
|
||||
-t ai-voice-cloning \
|
||||
.
|
||||
}
|
||||
|
||||
main
|
||||
|
|
0
setup-rocm-bnb.sh
Normal file → Executable file
0
setup-rocm-bnb.sh
Normal file → Executable file
0
setup-rocm.sh
Normal file → Executable file
0
setup-rocm.sh
Normal file → Executable file
|
@ -43,6 +43,8 @@ from tortoise.api_fast import TextToSpeech as Toroise_TTS_Hifi
|
|||
from tortoise.utils.audio import load_audio, load_voice, load_voices, get_voice_dir, get_voices
|
||||
from tortoise.utils.text import split_and_recombine_text
|
||||
from tortoise.utils.device import get_device_name, set_device_name, get_device_count, get_device_vram, get_device_batch_size, do_gc
|
||||
# TODO: The below import blocks any CLI parameters.
|
||||
# Try running with --low-vram
|
||||
from rvc_pipe.rvc_infer import rvc_convert
|
||||
|
||||
MODELS['dvae.pth'] = "https://huggingface.co/jbetker/tortoise-tts-v2/resolve/3704aea61678e7e468a06d8eea121dba368a798e/.models/dvae.pth"
|
||||
|
@ -1181,6 +1183,7 @@ def generate_tortoise(**kwargs):
|
|||
model_hash = settings["model_hash"][:8] if settings is not None and "model_hash" in settings else tts.autoregressive_model_hash[:8]
|
||||
|
||||
dir = f'{get_voice_dir()}/{voice}/'
|
||||
# TODO: Use of model_hash here causes issues in development as new hashes are added to the repo.
|
||||
latents_path = f'{dir}/cond_latents_{model_hash}.pth'
|
||||
|
||||
if voice == "random" or voice == "microphone":
|
||||
|
@ -3485,7 +3488,7 @@ def setup_args(cli=False):
|
|||
try:
|
||||
match = re.findall(r"^(?:(.+?):(\d+))?(\/.*?)?$", args.listen)[0]
|
||||
|
||||
args.listen_host = match[0] if match[0] != "" else "127.0.0.1"
|
||||
args.listen_host = match[0] if match[0] != "" else "0.0.0.0"
|
||||
args.listen_port = match[1] if match[1] != "" else None
|
||||
args.listen_path = match[2] if match[2] != "" else "/"
|
||||
except Exception as e:
|
||||
|
@ -3789,6 +3792,8 @@ def unload_tts():
|
|||
do_gc()
|
||||
|
||||
def reload_tts():
|
||||
in_docker = os.environ.get("IN_DOCKER", "false")
|
||||
if in_docker == "false":
|
||||
subprocess.Popen(["start.bat"])
|
||||
with open("reload_flag.txt", "w") as f:
|
||||
f.write("reload")
|
||||
|
|
34
start-docker.sh
Normal file → Executable file
34
start-docker.sh
Normal file → Executable file
|
@ -1,14 +1,24 @@
|
|||
#!/bin/bash
|
||||
CMD="python3 ./src/main.py $@"
|
||||
# CMD="bash"
|
||||
CPATH="/home/user/ai-voice-cloning"
|
||||
docker run --rm --gpus all \
|
||||
--mount "type=bind,src=$PWD/models,dst=$CPATH/models" \
|
||||
--mount "type=bind,src=$PWD/training,dst=$CPATH/training" \
|
||||
--mount "type=bind,src=$PWD/voices,dst=$CPATH/voices" \
|
||||
--mount "type=bind,src=$PWD/bin,dst=$CPATH/bin" \
|
||||
--workdir $CPATH \
|
||||
--user "$(id -u):$(id -g)" \
|
||||
--net host \
|
||||
-it ai-voice-cloning $CMD
|
||||
|
||||
docker run \
|
||||
-ti \
|
||||
--rm \
|
||||
--gpus all \
|
||||
--name ai-voice-cloning \
|
||||
-v "${PWD}/models:/home/user/ai-voice-cloning/models" \
|
||||
-v "${PWD}/training:/home/user/ai-voice-cloning/training" \
|
||||
-v "${PWD}/voices:/home/user/ai-voice-cloning/voices" \
|
||||
-v "${PWD}/bin:/home/user/ai-voice-cloning/bin" \
|
||||
-v "${PWD}/config:/home/user/ai-voice-cloning/config" \
|
||||
--user "$(id -u):$(id -g)" \
|
||||
-p "7860:7860" \
|
||||
ai-voice-cloning $@
|
||||
|
||||
# For dev:
|
||||
# -v "${PWD}/src:/home/user/ai-voice-cloning/src" \
|
||||
# -v "${PWD}/modules/tortoise_dataset_tools/dataset_whisper_tools:/home/user/ai-voice-cloning/modules/tortoise_dataset_tools/dataset_whisper_tools" \
|
||||
# -v "${PWD}/modules/dlas/dlas:/home/user/ai-voice-cloning/modules/dlas/dlas" \
|
||||
# -v "/home/user/ai-voice-cloning/src/__pycache__" \
|
||||
|
||||
# For testing:
|
||||
# -e "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True" \
|
||||
|
|
8
start.sh
Normal file → Executable file
8
start.sh
Normal file → Executable file
|
@ -1,5 +1,7 @@
|
|||
#!/bin/bash
|
||||
ulimit -Sn `ulimit -Hn` # ROCm is a bitch
|
||||
source ./venv/bin/activate
|
||||
python3 ./src/main.py "$@"
|
||||
deactivate
|
||||
while [ true ]; do
|
||||
python3 ./src/main.py "$@"
|
||||
echo "Press Cntrl-C to quit or application will restart... (5s)"
|
||||
sleep 5
|
||||
done
|
||||
|
|
0
train-docker.sh
Normal file → Executable file
0
train-docker.sh
Normal file → Executable file
0
train.sh
Normal file → Executable file
0
train.sh
Normal file → Executable file
0
update-force.sh
Normal file → Executable file
0
update-force.sh
Normal file → Executable file
0
update.sh
Normal file → Executable file
0
update.sh
Normal file → Executable file
Loading…
Add table
Reference in a new issue