diff --git a/.env b/.env new file mode 100644 index 0000000..e789dca --- /dev/null +++ b/.env @@ -0,0 +1,27 @@ +TORCH_CUDA_ARCH_LIST=8.6 + +# these commands worked for me with roughly 4.5GB of vram +CLI_ARGS=--model airoboros-13B-gpt4-1.2-GPTQ --wbits 4 --listen --auto-devices --extension api --gpu-memory 20 --load-in-8bit + +# the following examples have been tested with the files linked in docs/README_docker.md: +# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25 +# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share +# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices + +# the port the webui binds to on the host +HOST_PORT=7860 +# the port the webui binds to inside the container +CONTAINER_PORT=7860 + +# the port the api binds to on the host +HOST_API_PORT=5000 +# the port the api binds to inside the container +CONTAINER_API_PORT=5000 + +# the port the api stream endpoint binds to on the host +HOST_API_STREAM_PORT=5005 +# the port the api stream endpoint binds to inside the container +CONTAINER_API_STREAM_PORT=5005 + +# the version used to install text-generation-webui from +WEBUI_VERSION=HEAD diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7cc0ff1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,68 @@ +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder + +RUN apt-get update && \ + apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \ + rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build + +WORKDIR /build + +RUN python3 -m venv /build/venv +RUN . /build/venv/bin/activate && \ + pip3 install --upgrade pip setuptools wheel && \ + pip3 install torch torchvision torchaudio && \ + pip3 install -r requirements.txt + +# https://developer.nvidia.com/cuda-gpus +# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" +ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" +RUN . /build/venv/bin/activate && \ + python3 setup_cuda.py bdist_wheel -d . + +FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 + +LABEL maintainer="Your Name " +LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" + +RUN apt-get update && \ + apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ && \ + rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv +RUN mkdir /app + +WORKDIR /app + +ARG WEBUI_VERSION +RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source" + +RUN virtualenv /app/venv +RUN . /app/venv/bin/activate && \ + pip3 install --upgrade pip setuptools wheel && \ + pip3 install torch torchvision torchaudio + +COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa +RUN . /app/venv/bin/activate && \ + pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl + +COPY extensions/api/requirements.txt /app/extensions/api/requirements.txt +COPY extensions/elevenlabs_tts/requirements.txt /app/extensions/elevenlabs_tts/requirements.txt +COPY extensions/google_translate/requirements.txt /app/extensions/google_translate/requirements.txt +COPY extensions/silero_tts/requirements.txt /app/extensions/silero_tts/requirements.txt +COPY extensions/whisper_stt/requirements.txt /app/extensions/whisper_stt/requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt + +COPY requirements.txt /app/requirements.txt +RUN . /app/venv/bin/activate && \ + pip3 install -r requirements.txt + +RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so + +COPY . /app/ +ENV CLI_ARGS="" +CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8b022d1 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,32 @@ +version: "3.3" +services: + text-generation-webui: + build: + context: . + args: + # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus + TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST} + WEBUI_VERSION: ${WEBUI_VERSION} + env_file: .env + ports: + - "${HOST_PORT}:${CONTAINER_PORT}" + - "${HOST_API_PORT}:${CONTAINER_API_PORT}" + - "${HOST_API_STREAM_PORT}:${CONTAINER_API_STREAM_PORT}" + stdin_open: true + tty: true + volumes: + - ./characters:/app/characters + - ./extensions:/app/extensions + - ./loras:/app/loras + - /mnt/ml_models/text-gen/:/app/models + - ./presets:/app/presets + - ./prompts:/app/prompts + - ./softprompts:/app/softprompts + - ./training:/app/training + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu]