mirror of
https://github.com/JarodMica/ai-voice-cloning.git
synced 2025-06-07 06:05:52 -04:00
added linux support via docker
This commit is contained in:
parent
9f212832b2
commit
b1807b8168
16 changed files with 142 additions and 41 deletions
|
@ -2,3 +2,4 @@
|
|||
/training
|
||||
/voices
|
||||
/bin
|
||||
Dockerfile
|
||||
|
|
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -148,4 +148,6 @@ dmypy.json
|
|||
|
||||
.custom/*
|
||||
results/*
|
||||
debug_states/*
|
||||
debug_states/*
|
||||
bin/*
|
||||
|
||||
|
|
74
Dockerfile
74
Dockerfile
|
@ -1,18 +1,36 @@
|
|||
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
|
||||
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG TZ=UTC
|
||||
ARG MINICONDA_VERSION=23.1.0-1
|
||||
ARG PYTHON_VERSION=3.9.13
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ARG UID=1000
|
||||
ARG GID=1000
|
||||
|
||||
# TZ
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
# Prereqs
|
||||
RUN apt-get update
|
||||
RUN apt install -y curl wget git ffmpeg
|
||||
RUN adduser --disabled-password --gecos '' --shell /bin/bash user
|
||||
RUN apt-get install -y \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
ffmpeg \
|
||||
p7zip-full \
|
||||
gcc \
|
||||
g++ \
|
||||
vim
|
||||
|
||||
# User
|
||||
RUN groupadd --gid $GID user
|
||||
RUN useradd --no-log-init --create-home --shell /bin/bash --uid $UID --gid $GID user
|
||||
USER user
|
||||
ENV HOME=/home/user
|
||||
WORKDIR $HOME
|
||||
RUN mkdir $HOME/.cache $HOME/.config && chmod -R 777 $HOME
|
||||
|
||||
# Python
|
||||
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh
|
||||
RUN chmod +x Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh
|
||||
RUN ./Miniconda3-py39_$MINICONDA_VERSION-Linux-x86_64.sh -b -p /home/user/miniconda
|
||||
|
@ -20,18 +38,60 @@ ENV PATH="$HOME/miniconda/bin:$PATH"
|
|||
RUN conda init
|
||||
RUN conda install python=$PYTHON_VERSION
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
# Base path
|
||||
RUN mkdir $HOME/ai-voice-cloning
|
||||
WORKDIR $HOME/ai-voice-cloning
|
||||
COPY --chown=user:user modules modules
|
||||
|
||||
# Built in modules
|
||||
COPY --chown=user:user modules modules
|
||||
RUN python3 -m pip install -r ./modules/tortoise-tts/requirements.txt
|
||||
RUN python3 -m pip install -e ./modules/tortoise-tts/
|
||||
RUN python3 -m pip install -r ./modules/dlas/requirements.txt
|
||||
RUN python3 -m pip install -e ./modules/dlas/
|
||||
|
||||
# RVC
|
||||
RUN \
|
||||
curl -L -o /tmp/rvc.zip https://huggingface.co/Jmica/rvc/resolve/main/rvc_lightweight.zip?download=true &&\
|
||||
7z x /tmp/rvc.zip &&\
|
||||
rm -f /tmp/rvc.zip
|
||||
USER root
|
||||
RUN \
|
||||
chown user:user rvc -R &&\
|
||||
chmod -R u+rwX,go+rX,go-w rvc
|
||||
USER user
|
||||
RUN python3 -m pip install -r ./rvc/requirements.txt
|
||||
|
||||
# Fairseq
|
||||
# Using patched version for Python 3.11 due to https://github.com/facebookresearch/fairseq/issues/5012
|
||||
RUN python3 -m pip install git+https://github.com/liyaodev/fairseq
|
||||
|
||||
# RVC Pipeline
|
||||
RUN python3 -m pip install git+https://github.com/JarodMica/rvc-tts-pipeline.git@lightweight#egg=rvc_tts_pipe
|
||||
|
||||
# Deepspeed
|
||||
RUN python3 -m pip install deepspeed
|
||||
|
||||
# PyFastMP3Decoder
|
||||
RUN python3 -m pip install cython
|
||||
RUN git clone https://github.com/neonbjb/pyfastmp3decoder.git
|
||||
RUN \
|
||||
cd pyfastmp3decoder &&\
|
||||
git submodule update --init --recursive &&\
|
||||
python setup.py install &&\
|
||||
cd ..
|
||||
|
||||
# WhisperX
|
||||
RUN python3 -m pip install git+https://github.com/m-bain/whisperx.git
|
||||
|
||||
# Main requirements
|
||||
ADD requirements.txt requirements.txt
|
||||
RUN python3 -m pip install -r ./requirements.txt
|
||||
|
||||
# The app
|
||||
ADD --chown=user:user . $HOME/ai-voice-cloning
|
||||
|
||||
CMD ["python", "./src/main.py", "--listen", "0.0.0.0:7680"]
|
||||
ENV IN_DOCKER=true
|
||||
|
||||
CMD ["./start.sh"]
|
||||
|
|
47
README.md
47
README.md
|
@ -14,21 +14,20 @@ That being said, some enhancements added compared to the original repo:
|
|||
|
||||
This is a fork of the repo originally located here: https://git.ecker.tech/mrq/ai-voice-cloning. All of the work that was put into it to incoporate training with DLAS and inference with Tortoise belong to mrq, the author of the original ai-voice-cloning repo.
|
||||
|
||||
## Setup (Windows + Nvidia)
|
||||
This repo only works on **Windows with NVIDIA GPUs**. I don't have any plans on making it compatible with other systems, but it shouldn't be too difficult to port if you have experience in coding or are an expert level ChatGPT user. If you do successfully do this and want to share, pull requests are always welcome!
|
||||
> **Tips for developers:** setup-cuda.bat should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the requirements.txt in the root is needed to change the version *back* to compatible versions for this repo.
|
||||
## Setup
|
||||
This repo works on **Windows with NVIDIA GPUs** and **Linux running Docker with NVIDIA GPUs**.
|
||||
|
||||
### Package Installation (Recommended)
|
||||
Install 7zip on your computer: https://www.7-zip.org/
|
||||
### Windows
|
||||
1. Optional, but recommended: Install 7zip on your computer: https://www.7-zip.org/
|
||||
- If you run into any extraction issues, most likely it's due to your 7zip being out-of-date OR you're using a different extractor.
|
||||
2. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v2.0
|
||||
3. Extract the 7zip archive.
|
||||
4. Open up ai-voice-cloning and then run ```start.bat```
|
||||
|
||||
1. Head over to the releases tab and download the latest package on Hugging Face: https://github.com/JarodMica/ai-voice-cloning/releases/tag/v2.0
|
||||
2. Extract the 7zip archive.
|
||||
3. Open up ai-voice-cloning and then run ```start.bat```
|
||||
#### Alternative Manual Installation
|
||||
|
||||
### Manual Installation
|
||||
If you are installing this manually, you will need:
|
||||
- Python 3.9: https://www.python.org/downloads/release/python-3913/
|
||||
- Python 3.11: https://www.python.org/downloads/release/python-311/
|
||||
- Git: https://www.git-scm.com/downloads
|
||||
|
||||
1. Clone the repository
|
||||
|
@ -36,12 +35,19 @@ If you are installing this manually, you will need:
|
|||
git clone https://github.com/JarodMica/ai-voice-cloning.git
|
||||
```
|
||||
2. Run the ```setup-cuda.bat``` file and it will start running through all of the python packages needed
|
||||
- If you don't have python 3.9, it won't work and you'll need to go download it
|
||||
- If you don't have python 3.11, it won't work and you'll need to go download it
|
||||
3. After it finishes, run ```start.bat``` and this will start downloading most of the models you'll need.
|
||||
- Some models are downloaded when you first use them. You'll incur additional downloads during generation and when training (for whisper). However, once they are finished, you won't ever have to download them again as long as you don't delete them. They are located in the ```models``` folder of the root.
|
||||
4. **(Optional)** You can opt to install whisperx for training by running ```setup-whipserx.bat```
|
||||
- Check out the whisperx github page for more details, but it's much faster for longer audio files. If you're processing one-by-one with an already split dataset, it doesn't improve speeds that much.
|
||||
|
||||
|
||||
### Linux via Docker
|
||||
1. Clone the repository: `git clone https://github.com/JarodMica/ai-voice-cloning.git && cd ai-voice-cloning`
|
||||
2. Build the image with `./setup-docker.sh`
|
||||
3. Start the container with `./start-docker.sh`
|
||||
|
||||
|
||||
## Instructions
|
||||
Checkout the YouTube video:
|
||||
Watch First: https://www.youtube.com/watch?v=p31Ax_A5VKA&t=158s
|
||||
|
@ -57,12 +63,12 @@ You will now have access to parameters you could adjust in RVC for the RVC voice
|
|||
## Updating Your Installation
|
||||
Below are how you can update the package for the latest updates
|
||||
|
||||
### Package
|
||||
### Windows
|
||||
>**NOTE:** If there are major feature change, check the latest release to see if ```update_package.bat``` will work. If NOT, you will need to re-download and re-extract the package from Hugging Face.
|
||||
1. Run the update_package.bat file
|
||||
1. Run the `update_package.bat `file
|
||||
- It will clone the repo and will copy the src folder from the repo to the package.
|
||||
|
||||
### Manual Installation
|
||||
#### Alternative Manual Installation
|
||||
You should be able to navigate into the folder and then pull the repo to update it.
|
||||
```
|
||||
cd ai-voice-cloning
|
||||
|
@ -70,6 +76,14 @@ git pull
|
|||
```
|
||||
If there are large features added, you may need to delete the venv and the re-run the setup-cuda script to make sure there are no package issues
|
||||
|
||||
### Linux via Docker
|
||||
You should be able to navigate into the folder and then pull the repo to update it, then rebuild your Docker image.
|
||||
```
|
||||
cd ai-voice-cloning
|
||||
git pull
|
||||
./setup-docker.sh
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
### Troubleshooting Manual Installation
|
||||
|
@ -82,8 +96,9 @@ pip uninstall torch
|
|||
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
```
|
||||
|
||||
Other documentation tbd
|
||||
|
||||
## Bug Reporting
|
||||
|
||||
If you run into any problems, please open up a new issue on the issues tab.
|
||||
|
||||
## Tips for developers
|
||||
`setup-cuda.bat` should have everything that you need for the packages to be installed. All of the different requirements files make it quite a mess in the script, but each repo has their requirements installed, and then at the end, the `requirements.txt` in the root is needed to change the version *back* to compatible versions for this repo.
|
||||
|
|
0
setup-cuda.sh
Normal file → Executable file
0
setup-cuda.sh
Normal file → Executable file
17
setup-docker.sh
Normal file → Executable file
17
setup-docker.sh
Normal file → Executable file
|
@ -1,4 +1,15 @@
|
|||
#!/bin/bash
|
||||
git submodule init
|
||||
git submodule update --remote
|
||||
docker build -t ai-voice-cloning .
|
||||
|
||||
function main() {
|
||||
if [ ! -f modules/tortoise-tts/README.md ]; then
|
||||
git submodule init
|
||||
git submodule update --remote
|
||||
fi
|
||||
docker build \
|
||||
--build-arg UID=$(id -u) \
|
||||
--build-arg GID=$(id -g) \
|
||||
-t ai-voice-cloning \
|
||||
.
|
||||
}
|
||||
|
||||
main
|
||||
|
|
0
setup-rocm-bnb.sh
Normal file → Executable file
0
setup-rocm-bnb.sh
Normal file → Executable file
0
setup-rocm.sh
Normal file → Executable file
0
setup-rocm.sh
Normal file → Executable file
|
@ -43,6 +43,8 @@ from tortoise.api_fast import TextToSpeech as Toroise_TTS_Hifi
|
|||
from tortoise.utils.audio import load_audio, load_voice, load_voices, get_voice_dir, get_voices
|
||||
from tortoise.utils.text import split_and_recombine_text
|
||||
from tortoise.utils.device import get_device_name, set_device_name, get_device_count, get_device_vram, get_device_batch_size, do_gc
|
||||
# TODO: The below import blocks any CLI parameters.
|
||||
# Try running with --low-vram
|
||||
from rvc_pipe.rvc_infer import rvc_convert
|
||||
|
||||
MODELS['dvae.pth'] = "https://huggingface.co/jbetker/tortoise-tts-v2/resolve/3704aea61678e7e468a06d8eea121dba368a798e/.models/dvae.pth"
|
||||
|
@ -1181,6 +1183,7 @@ def generate_tortoise(**kwargs):
|
|||
model_hash = settings["model_hash"][:8] if settings is not None and "model_hash" in settings else tts.autoregressive_model_hash[:8]
|
||||
|
||||
dir = f'{get_voice_dir()}/{voice}/'
|
||||
# TODO: Use of model_hash here causes issues in development as new hashes are added to the repo.
|
||||
latents_path = f'{dir}/cond_latents_{model_hash}.pth'
|
||||
|
||||
if voice == "random" or voice == "microphone":
|
||||
|
@ -3789,7 +3792,9 @@ def unload_tts():
|
|||
do_gc()
|
||||
|
||||
def reload_tts():
|
||||
subprocess.Popen(["start.bat"])
|
||||
in_docker = os.environ.get("IN_DOCKER", "false")
|
||||
if in_docker == "false":
|
||||
subprocess.Popen(["start.bat"])
|
||||
with open("reload_flag.txt", "w") as f:
|
||||
f.write("reload")
|
||||
os.kill(os.getpid(), signal.SIGTERM) # Or signal.SIGKILL for an even harder kill
|
||||
|
|
25
start-docker.sh
Normal file → Executable file
25
start-docker.sh
Normal file → Executable file
|
@ -1,14 +1,19 @@
|
|||
#!/bin/bash
|
||||
CMD="python3 ./src/main.py $@"
|
||||
# CMD="bash"
|
||||
CPATH="/home/user/ai-voice-cloning"
|
||||
docker run --rm --gpus all \
|
||||
--mount "type=bind,src=$PWD/models,dst=$CPATH/models" \
|
||||
--mount "type=bind,src=$PWD/training,dst=$CPATH/training" \
|
||||
--mount "type=bind,src=$PWD/voices,dst=$CPATH/voices" \
|
||||
--mount "type=bind,src=$PWD/bin,dst=$CPATH/bin" \
|
||||
--workdir $CPATH \
|
||||
|
||||
docker run \
|
||||
-ti \
|
||||
--rm \
|
||||
--gpus all \
|
||||
--name ai-voice-cloning \
|
||||
-v "${PWD}/models:/home/user/ai-voice-cloning/models" \
|
||||
-v "${PWD}/training:/home/user/ai-voice-cloning/training" \
|
||||
-v "${PWD}/voices:/home/user/ai-voice-cloning/voices" \
|
||||
-v "${PWD}/bin:/home/user/ai-voice-cloning/bin" \
|
||||
-v "${PWD}/config:/home/user/ai-voice-cloning/config" \
|
||||
--user "$(id -u):$(id -g)" \
|
||||
--net host \
|
||||
-it ai-voice-cloning $CMD
|
||||
ai-voice-cloning
|
||||
|
||||
# For dev:
|
||||
# -v "${PWD}/src:/home/user/ai-voice-cloning/src" \
|
||||
# -v "/home/user/ai-voice-cloning/src/__pycache__" \
|
||||
|
|
8
start.sh
Normal file → Executable file
8
start.sh
Normal file → Executable file
|
@ -1,5 +1,7 @@
|
|||
#!/bin/bash
|
||||
ulimit -Sn `ulimit -Hn` # ROCm is a bitch
|
||||
source ./venv/bin/activate
|
||||
python3 ./src/main.py "$@"
|
||||
deactivate
|
||||
while [ true ]; do
|
||||
python3 ./src/main.py "$@"
|
||||
echo "Press Cntrl-C to quit or application will restart... (5s)"
|
||||
sleep 5
|
||||
done
|
||||
|
|
0
train-docker.sh
Normal file → Executable file
0
train-docker.sh
Normal file → Executable file
0
train.sh
Normal file → Executable file
0
train.sh
Normal file → Executable file
0
update-force.sh
Normal file → Executable file
0
update-force.sh
Normal file → Executable file
0
update.sh
Normal file → Executable file
0
update.sh
Normal file → Executable file
Binary file not shown.
Loading…
Add table
Reference in a new issue