mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 06:06:20 -04:00
commit
9bb9ce079e
158 changed files with 893 additions and 732 deletions
|
@ -101,7 +101,7 @@ jobs:
|
|||
- name: Build Package
|
||||
shell: bash
|
||||
run: |
|
||||
rm -rf .git cmd* update_wizard* start_wsl.bat wsl.sh Colab-TextGen-GPU.ipynb docker
|
||||
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker
|
||||
|
||||
# Define common variables
|
||||
CUDA_VERSION="${{ matrix.cuda }}"
|
||||
|
|
|
@ -100,7 +100,7 @@ jobs:
|
|||
- name: Build Package
|
||||
shell: bash
|
||||
run: |
|
||||
rm -rf .git cmd* update_wizard* start_wsl.bat wsl.sh Colab-TextGen-GPU.ipynb docker
|
||||
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker
|
||||
|
||||
# Define common variables
|
||||
AVX_SUPPORT="${{ matrix.avx }}"
|
||||
|
|
2
.github/workflows/build-portable-release.yml
vendored
2
.github/workflows/build-portable-release.yml
vendored
|
@ -100,7 +100,7 @@ jobs:
|
|||
- name: Build Package
|
||||
shell: bash
|
||||
run: |
|
||||
rm -rf .git cmd* update_wizard* start_wsl.bat wsl.sh Colab-TextGen-GPU.ipynb docker
|
||||
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker
|
||||
|
||||
# Define common variables
|
||||
AVX_SUPPORT="${{ matrix.avx }}"
|
||||
|
|
20
.gitignore
vendored
20
.gitignore
vendored
|
@ -1,26 +1,8 @@
|
|||
/cache
|
||||
/characters
|
||||
/css
|
||||
/extensions
|
||||
/grammars
|
||||
/installer_files
|
||||
/logs
|
||||
/loras
|
||||
/models
|
||||
/presets
|
||||
/prompts
|
||||
/repositories
|
||||
/softprompts
|
||||
/torch-dumps
|
||||
/training/datasets
|
||||
|
||||
/CMD_FLAGS.txt
|
||||
/img_bot*
|
||||
/img_me*
|
||||
/models/config-user.yaml
|
||||
/notification.mp3
|
||||
/settings*.json
|
||||
/settings*.yaml
|
||||
/user_data
|
||||
|
||||
.chroma
|
||||
.DS_Store
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
# Only used by the one-click installer.
|
||||
# Example:
|
||||
# --listen --api
|
242
README.md
242
README.md
|
@ -43,7 +43,7 @@ Download from: https://github.com/oobabooga/text-generation-webui/releases
|
|||
|
||||
To restart the web UI later, just run the same `start_` script. If you need to reinstall, delete the `installer_files` folder created during setup and run the script again.
|
||||
|
||||
You can use command-line flags, like `./start_linux.sh --help`, or add them to `CMD_FLAGS.txt` (such as `--api` to enable API use). To update the project, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`.
|
||||
You can use command-line flags, like `./start_linux.sh --help`, or add them to `user_data/CMD_FLAGS.txt` (such as `--api` to enable API use). To update the project, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`.
|
||||
|
||||
<details>
|
||||
<summary>
|
||||
|
@ -157,7 +157,7 @@ mkdir -p logs cache
|
|||
# TORCH_CUDA_ARCH_LIST based on your GPU model
|
||||
# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal)
|
||||
# BUILD_EXTENIONS optionally add comma separated list of extensions to build
|
||||
# Edit CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
|
||||
# Edit user_data/CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
|
||||
#
|
||||
docker compose up --build
|
||||
```
|
||||
|
@ -182,131 +182,139 @@ List of command-line flags
|
|||
</summary>
|
||||
|
||||
```txt
|
||||
usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--settings SETTINGS]
|
||||
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]]
|
||||
[--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast]
|
||||
[--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn]
|
||||
[--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--batch-size BATCH_SIZE] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS] [--tensor-split TENSOR_SPLIT]
|
||||
[--numa] [--no-kv-offload] [--row-split] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa]
|
||||
[--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
|
||||
[--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
|
||||
[--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
|
||||
[--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6]
|
||||
[--api-disable-ipv4] [--nowebui]
|
||||
usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
|
||||
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR]
|
||||
[--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit]
|
||||
[--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--threads THREADS] [--threads-batch THREADS_BATCH] [--batch-size BATCH_SIZE] [--no-mmap]
|
||||
[--mlock] [--n-gpu-layers N_GPU_LAYERS] [--tensor-split TENSOR_SPLIT] [--numa] [--no-kv-offload] [--row-split] [--extra-flags EXTRA_FLAGS] [--streaming-llm] [--ctx-size N]
|
||||
[--model-draft MODEL_DRAFT] [--draft-max DRAFT_MAX] [--gpu-layers-draft GPU_LAYERS_DRAFT] [--device-draft DEVICE_DRAFT] [--ctx-size-draft CTX_SIZE_DRAFT] [--gpu-split GPU_SPLIT]
|
||||
[--autosplit] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] [--num_experts_per_token N] [--enable_tp] [--hqq-backend HQQ_BACKEND] [--cpp-runner]
|
||||
[--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE]
|
||||
[--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH]
|
||||
[--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--old-colors] [--api] [--public-api]
|
||||
[--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6] [--api-disable-ipv4] [--nowebui]
|
||||
|
||||
Text generation web UI
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-h, --help show this help message and exit
|
||||
|
||||
Basic settings:
|
||||
--multi-user Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.
|
||||
--character CHARACTER The name of the character to load in chat mode by default.
|
||||
--model MODEL Name of the model to load by default.
|
||||
--lora LORA [LORA ...] The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.
|
||||
--model-dir MODEL_DIR Path to directory with all the models.
|
||||
--lora-dir LORA_DIR Path to directory with all the loras.
|
||||
--settings SETTINGS Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this
|
||||
file will be loaded by default without the need to use the --settings flag.
|
||||
--extensions EXTENSIONS [EXTENSIONS ...] The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.
|
||||
--verbose Print the prompts to the terminal.
|
||||
--idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.
|
||||
--multi-user Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.
|
||||
--character CHARACTER The name of the character to load in chat mode by default.
|
||||
--model MODEL Name of the model to load by default.
|
||||
--lora LORA [LORA ...] The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.
|
||||
--model-dir MODEL_DIR Path to directory with all the models.
|
||||
--lora-dir LORA_DIR Path to directory with all the loras.
|
||||
--model-menu Show a model menu in the terminal when the web UI is first launched.
|
||||
--settings SETTINGS Load the default interface settings from this yaml file. See user_data/settings-template.yaml for an example. If you create a file called
|
||||
user_data/settings.yaml, this file will be loaded by default without the need to use the --settings flag.
|
||||
--extensions EXTENSIONS [EXTENSIONS ...] The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.
|
||||
--verbose Print the prompts to the terminal.
|
||||
--idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.
|
||||
|
||||
Model loader:
|
||||
--loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2,
|
||||
HQQ, TensorRT-LLM.
|
||||
--loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ,
|
||||
TensorRT-LLM.
|
||||
|
||||
Transformers/Accelerate:
|
||||
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
|
||||
--auto-devices Automatically split the model across the available GPU(s) and CPU.
|
||||
--gpu-memory GPU_MEMORY [GPU_MEMORY ...] Maximum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values
|
||||
in MiB like --gpu-memory 3500MiB.
|
||||
--cpu-memory CPU_MEMORY Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.
|
||||
--disk If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.
|
||||
--disk-cache-dir DISK_CACHE_DIR Directory to save the disk cache to. Defaults to "cache".
|
||||
--load-in-8bit Load the model with 8-bit precision (using bitsandbytes).
|
||||
--bf16 Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
|
||||
--no-cache Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.
|
||||
--trust-remote-code Set trust_remote_code=True while loading the model. Necessary for some models.
|
||||
--force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
|
||||
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
|
||||
--use_flash_attention_2 Set use_flash_attention_2=True while loading the model.
|
||||
--use_eager_attention Set attn_implementation= eager while loading the model.
|
||||
--torch-compile Compile the model with torch.compile for improved performance.
|
||||
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
|
||||
--cpu-memory CPU_MEMORY Maximum CPU memory in GiB. Use this for CPU offloading.
|
||||
--disk If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.
|
||||
--disk-cache-dir DISK_CACHE_DIR Directory to save the disk cache to. Defaults to "user_data/cache".
|
||||
--load-in-8bit Load the model with 8-bit precision (using bitsandbytes).
|
||||
--bf16 Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
|
||||
--no-cache Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.
|
||||
--trust-remote-code Set trust_remote_code=True while loading the model. Necessary for some models.
|
||||
--force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
|
||||
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
|
||||
--use_flash_attention_2 Set use_flash_attention_2=True while loading the model.
|
||||
--use_eager_attention Set attn_implementation= eager while loading the model.
|
||||
--torch-compile Compile the model with torch.compile for improved performance.
|
||||
|
||||
bitsandbytes 4-bit:
|
||||
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
|
||||
--use_double_quant use_double_quant for 4-bit.
|
||||
--compute_dtype COMPUTE_DTYPE compute dtype for 4-bit. Valid options: bfloat16, float16, float32.
|
||||
--quant_type QUANT_TYPE quant_type for 4-bit. Valid options: nf4, fp4.
|
||||
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
|
||||
--use_double_quant use_double_quant for 4-bit.
|
||||
--compute_dtype COMPUTE_DTYPE compute dtype for 4-bit. Valid options: bfloat16, float16, float32.
|
||||
--quant_type QUANT_TYPE quant_type for 4-bit. Valid options: nf4, fp4.
|
||||
|
||||
llama.cpp:
|
||||
--flash-attn Use flash-attention.
|
||||
--n_ctx N_CTX Size of the prompt context.
|
||||
--threads THREADS Number of threads to use.
|
||||
--threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing.
|
||||
--batch-size BATCH_SIZE Maximum number of prompt tokens to batch together when calling llama_eval.
|
||||
--no-mmap Prevent mmap from being used.
|
||||
--mlock Force the system to keep the model in RAM.
|
||||
--n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU.
|
||||
--tensor-split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.
|
||||
--numa Activate NUMA task allocation for llama.cpp.
|
||||
--no-kv-offload Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
|
||||
--row-split Split the model by rows across GPUs. This may improve multi-gpu performance.
|
||||
--flash-attn Use flash-attention.
|
||||
--threads THREADS Number of threads to use.
|
||||
--threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing.
|
||||
--batch-size BATCH_SIZE Maximum number of prompt tokens to batch together when calling llama_eval.
|
||||
--no-mmap Prevent mmap from being used.
|
||||
--mlock Force the system to keep the model in RAM.
|
||||
--n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU.
|
||||
--tensor-split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.
|
||||
--numa Activate NUMA task allocation for llama.cpp.
|
||||
--no-kv-offload Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
|
||||
--row-split Split the model by rows across GPUs. This may improve multi-gpu performance.
|
||||
--extra-flags EXTRA_FLAGS Extra flags to pass to llama-server. Format: "flag1=value1;flag2;flag3=value3". Example: "override-tensor=exps=CPU"
|
||||
--streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.
|
||||
|
||||
Context and cache management:
|
||||
--ctx-size N, --n_ctx N, --max_seq_len N Context size in tokens.
|
||||
|
||||
Speculative decoding:
|
||||
--model-draft MODEL_DRAFT Path to the draft model for speculative decoding.
|
||||
--draft-max DRAFT_MAX Number of tokens to draft for speculative decoding.
|
||||
--gpu-layers-draft GPU_LAYERS_DRAFT Number of layers to offload to the GPU for the draft model.
|
||||
--device-draft DEVICE_DRAFT Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1
|
||||
--ctx-size-draft CTX_SIZE_DRAFT Size of the prompt context for the draft model. If 0, uses the same as the main model.
|
||||
|
||||
ExLlamaV2:
|
||||
--gpu-split GPU_SPLIT Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.
|
||||
--autosplit Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.
|
||||
--max_seq_len MAX_SEQ_LEN Maximum sequence length.
|
||||
--cfg-cache ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.
|
||||
--no_flash_attn Force flash-attention to not be used.
|
||||
--no_xformers Force xformers to not be used.
|
||||
--no_sdpa Force Torch SDPA to not be used.
|
||||
--num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral.
|
||||
--enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2.
|
||||
--gpu-split GPU_SPLIT Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.
|
||||
--autosplit Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.
|
||||
--cfg-cache ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.
|
||||
--no_flash_attn Force flash-attention to not be used.
|
||||
--no_xformers Force xformers to not be used.
|
||||
--no_sdpa Force Torch SDPA to not be used.
|
||||
--num_experts_per_token N Number of experts to use for generation. Applies to MoE models like Mixtral.
|
||||
--enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2.
|
||||
|
||||
HQQ:
|
||||
--hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.
|
||||
--hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.
|
||||
|
||||
TensorRT-LLM:
|
||||
--cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.
|
||||
--cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.
|
||||
|
||||
Cache:
|
||||
--cache_type CACHE_TYPE KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.
|
||||
--cache_type CACHE_TYPE KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.
|
||||
|
||||
DeepSpeed:
|
||||
--deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.
|
||||
--nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading.
|
||||
--local_rank LOCAL_RANK DeepSpeed: Optional argument for distributed setups.
|
||||
--deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.
|
||||
--nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading.
|
||||
--local_rank LOCAL_RANK DeepSpeed: Optional argument for distributed setups.
|
||||
|
||||
RoPE:
|
||||
--alpha_value ALPHA_VALUE Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.
|
||||
--rope_freq_base ROPE_FREQ_BASE If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63).
|
||||
--compress_pos_emb COMPRESS_POS_EMB Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale.
|
||||
--alpha_value ALPHA_VALUE Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.
|
||||
--rope_freq_base ROPE_FREQ_BASE If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63).
|
||||
--compress_pos_emb COMPRESS_POS_EMB Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale.
|
||||
|
||||
Gradio:
|
||||
--listen Make the web UI reachable from your local network.
|
||||
--listen-port LISTEN_PORT The listening port that the server will use.
|
||||
--listen-host LISTEN_HOST The hostname that the server will use.
|
||||
--share Create a public URL. This is useful for running the web UI on Google Colab or similar.
|
||||
--auto-launch Open the web UI in the default browser upon launch.
|
||||
--gradio-auth GRADIO_AUTH Set Gradio authentication password in the format "username:password". Multiple credentials can also be supplied with "u1:p1,u2:p2,u3:p3".
|
||||
--gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.
|
||||
--ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file.
|
||||
--ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file.
|
||||
--subpath SUBPATH Customize the subpath for gradio, use with reverse proxy
|
||||
--old-colors Use the legacy Gradio colors, before the December/2024 update.
|
||||
--listen Make the web UI reachable from your local network.
|
||||
--listen-port LISTEN_PORT The listening port that the server will use.
|
||||
--listen-host LISTEN_HOST The hostname that the server will use.
|
||||
--share Create a public URL. This is useful for running the web UI on Google Colab or similar.
|
||||
--auto-launch Open the web UI in the default browser upon launch.
|
||||
--gradio-auth GRADIO_AUTH Set Gradio authentication password in the format "username:password". Multiple credentials can also be supplied with "u1:p1,u2:p2,u3:p3".
|
||||
--gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.
|
||||
--ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file.
|
||||
--ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file.
|
||||
--subpath SUBPATH Customize the subpath for gradio, use with reverse proxy
|
||||
--old-colors Use the legacy Gradio colors, before the December/2024 update.
|
||||
|
||||
API:
|
||||
--api Enable the API extension.
|
||||
--public-api Create a public URL for the API using Cloudfare.
|
||||
--public-api-id PUBLIC_API_ID Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.
|
||||
--api-port API_PORT The listening port for the API.
|
||||
--api-key API_KEY API authentication key.
|
||||
--admin-key ADMIN_KEY API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.
|
||||
--api-enable-ipv6 Enable IPv6 for the API
|
||||
--api-disable-ipv4 Disable IPv4 for the API
|
||||
--nowebui Do not launch the Gradio UI. Useful for launching the API in standalone mode.
|
||||
--api Enable the API extension.
|
||||
--public-api Create a public URL for the API using Cloudfare.
|
||||
--public-api-id PUBLIC_API_ID Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.
|
||||
--api-port API_PORT The listening port for the API.
|
||||
--api-key API_KEY API authentication key.
|
||||
--admin-key ADMIN_KEY API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.
|
||||
--api-enable-ipv6 Enable IPv6 for the API
|
||||
--api-disable-ipv4 Disable IPv4 for the API
|
||||
--nowebui Do not launch the Gradio UI. Useful for launching the API in standalone mode.
|
||||
```
|
||||
|
||||
</details>
|
||||
|
@ -317,35 +325,37 @@ https://github.com/oobabooga/text-generation-webui/wiki
|
|||
|
||||
## Downloading models
|
||||
|
||||
Models should be placed in the folder `text-generation-webui/models`. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads).
|
||||
Models should be placed in the folder `text-generation-webui/user_data/models`. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads).
|
||||
|
||||
* GGUF models are a single file and should be placed directly into `models`. Example:
|
||||
* GGUF models are a single file and should be placed directly into `user_data/models`. Example:
|
||||
|
||||
```
|
||||
text-generation-webui
|
||||
└── models
|
||||
└── llama-2-13b-chat.Q4_K_M.gguf
|
||||
└── user_data
|
||||
└── models
|
||||
└── llama-2-13b-chat.Q4_K_M.gguf
|
||||
```
|
||||
|
||||
* The remaining model types (like 16-bit Transformers models and EXL2 models) are made of several files and must be placed in a subfolder. Example:
|
||||
|
||||
```
|
||||
text-generation-webui
|
||||
├── models
|
||||
│ ├── lmsys_vicuna-33b-v1.3
|
||||
│ │ ├── config.json
|
||||
│ │ ├── generation_config.json
|
||||
│ │ ├── pytorch_model-00001-of-00007.bin
|
||||
│ │ ├── pytorch_model-00002-of-00007.bin
|
||||
│ │ ├── pytorch_model-00003-of-00007.bin
|
||||
│ │ ├── pytorch_model-00004-of-00007.bin
|
||||
│ │ ├── pytorch_model-00005-of-00007.bin
|
||||
│ │ ├── pytorch_model-00006-of-00007.bin
|
||||
│ │ ├── pytorch_model-00007-of-00007.bin
|
||||
│ │ ├── pytorch_model.bin.index.json
|
||||
│ │ ├── special_tokens_map.json
|
||||
│ │ ├── tokenizer_config.json
|
||||
│ │ └── tokenizer.model
|
||||
└── user_data
|
||||
└── models
|
||||
└── lmsys_vicuna-33b-v1.3
|
||||
├── config.json
|
||||
├── generation_config.json
|
||||
├── pytorch_model-00001-of-00007.bin
|
||||
├── pytorch_model-00002-of-00007.bin
|
||||
├── pytorch_model-00003-of-00007.bin
|
||||
├── pytorch_model-00004-of-00007.bin
|
||||
├── pytorch_model-00005-of-00007.bin
|
||||
├── pytorch_model-00006-of-00007.bin
|
||||
├── pytorch_model-00007-of-00007.bin
|
||||
├── pytorch_model.bin.index.json
|
||||
├── special_tokens_map.json
|
||||
├── tokenizer_config.json
|
||||
└── tokenizer.model
|
||||
```
|
||||
|
||||
In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with:
|
||||
|
|
11
cmd_wsl.bat
11
cmd_wsl.bat
|
@ -1,11 +0,0 @@
|
|||
@echo off
|
||||
|
||||
cd /D "%~dp0"
|
||||
|
||||
set PATH=%PATH%;%SystemRoot%\system32
|
||||
|
||||
@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script
|
||||
call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh cmd"
|
||||
|
||||
:end
|
||||
pause
|
99
css/main.css
99
css/main.css
|
@ -625,19 +625,19 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
width: 100%;
|
||||
overflow-y: visible;
|
||||
}
|
||||
|
||||
|
||||
.message {
|
||||
break-inside: avoid;
|
||||
}
|
||||
|
||||
|
||||
.gradio-container {
|
||||
overflow: visible;
|
||||
}
|
||||
|
||||
|
||||
.tab-nav {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
|
||||
#chat-tab > :first-child {
|
||||
max-width: unset;
|
||||
}
|
||||
|
@ -1291,3 +1291,94 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
.dark .footer-button:hover svg {
|
||||
stroke: rgb(209 213 219);
|
||||
}
|
||||
|
||||
.tgw-accordion {
|
||||
padding: 10px 12px !important;
|
||||
}
|
||||
|
||||
.dark .tgw-accordion {
|
||||
border: 1px solid var(--border-color-dark);
|
||||
}
|
||||
|
||||
.welcome-greeting {
|
||||
text-align: center;
|
||||
margin-top: 40vh;
|
||||
font-size: 24px;
|
||||
opacity: 0.7;
|
||||
padding-left: 1rem;
|
||||
padding-right: 1rem;
|
||||
}
|
||||
|
||||
/* Thinking blocks styling */
|
||||
.thinking-block {
|
||||
margin-bottom: 12px;
|
||||
border-radius: 8px;
|
||||
border: 1px solid rgb(0 0 0 / 10%);
|
||||
background-color: var(--light-theme-gray);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.dark .thinking-block {
|
||||
background-color: var(--darker-gray);
|
||||
}
|
||||
|
||||
.thinking-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
padding: 10px 16px;
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
font-size: 14px;
|
||||
color: rgb(0 0 0 / 70%);
|
||||
transition: background-color 0.2s;
|
||||
}
|
||||
|
||||
.thinking-header:hover {
|
||||
background-color: rgb(0 0 0 / 3%);
|
||||
}
|
||||
|
||||
.thinking-header::-webkit-details-marker {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.thinking-icon {
|
||||
margin-right: 8px;
|
||||
color: rgb(0 0 0 / 50%);
|
||||
}
|
||||
|
||||
.thinking-title {
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.thinking-content {
|
||||
padding: 12px 16px;
|
||||
border-top: 1px solid rgb(0 0 0 / 7%);
|
||||
color: rgb(0 0 0 / 70%);
|
||||
font-size: 14px;
|
||||
line-height: 1.5;
|
||||
overflow-wrap: break-word;
|
||||
max-height: 250px;
|
||||
overflow-y: scroll;
|
||||
contain: layout;
|
||||
}
|
||||
|
||||
/* Animation for opening thinking blocks */
|
||||
@keyframes fadeIn {
|
||||
from { opacity: 0; }
|
||||
to { opacity: 1; }
|
||||
}
|
||||
|
||||
.thinking-block[open] .thinking-content {
|
||||
animation: fadeIn 0.3s ease-out;
|
||||
}
|
||||
|
||||
/* Additional style for in-progress thinking */
|
||||
.thinking-block[data-streaming="true"] .thinking-title {
|
||||
animation: pulse 1.5s infinite;
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0% { opacity: 0.6; }
|
||||
50% { opacity: 1; }
|
||||
100% { opacity: 0.6; }
|
||||
}
|
||||
|
|
146
docs/10 - WSL.md
146
docs/10 - WSL.md
|
@ -1,146 +0,0 @@
|
|||
## WSL instructions
|
||||
|
||||
If you do not have WSL installed, follow the [instructions below](https://github.com/oobabooga/text-generation-webui/wiki/10-%E2%80%90-WSL#wsl-installation) first.
|
||||
|
||||
### Additional WSL setup info
|
||||
|
||||
If you want to install Linux to a drive other than C, open powershell and enter these commands:
|
||||
|
||||
```
|
||||
cd D:\Path\To\Linux
|
||||
$ProgressPreference = 'SilentlyContinue'
|
||||
Invoke-WebRequest -Uri <LinuxDistroURL> -OutFile Linux.appx -UseBasicParsing
|
||||
mv Linux.appx Linux.zip
|
||||
```
|
||||
|
||||
Then open Linux.zip and you should see several .appx files inside.
|
||||
|
||||
The one with _x64.appx contains the exe installer that you need.
|
||||
|
||||
Extract the contents of that _x64.appx file and run <distro>.exe to install.
|
||||
|
||||
Linux Distro URLs: https://learn.microsoft.com/en-us/windows/wsl/install-manual#downloading-distributions
|
||||
|
||||
**ENSURE THAT THE WSL LINUX DISTRO THAT YOU WISH TO USE IS SET AS THE DEFAULT!**
|
||||
|
||||
Do this by using these commands:
|
||||
|
||||
```
|
||||
wsl -l
|
||||
wsl -s <DistroName>
|
||||
```
|
||||
|
||||
### Web UI Installation
|
||||
|
||||
Run the "start" script. By default it will install the web UI in WSL:
|
||||
/home/{username}/text-gen-install
|
||||
|
||||
To launch the web UI in the future after it is already installed, run
|
||||
the same "start" script. Ensure that one_click.py and wsl.sh are next to it!
|
||||
|
||||
### Updating the web UI
|
||||
|
||||
As an alternative to running the "update" script, you can also run "wsl.sh update" in WSL.
|
||||
|
||||
### Running an interactive shell
|
||||
|
||||
As an alternative to running the "cmd" script, you can also run "wsl.sh cmd" in WSL.
|
||||
|
||||
### Changing the default install location
|
||||
|
||||
To change this, you will need to edit the scripts as follows:
|
||||
wsl.sh: line ~22 INSTALL_DIR="/path/to/install/dir"
|
||||
|
||||
Keep in mind that there is a long-standing bug in WSL that significantly
|
||||
slows drive read/write speeds when using a physical drive as opposed to
|
||||
the virtual one that Linux is installed in.
|
||||
|
||||
## WSL installation
|
||||
|
||||
Guide created by [@jfryton](https://github.com/jfryton). Thank you jfryton.
|
||||
|
||||
-----
|
||||
|
||||
Here's an easy-to-follow, step-by-step guide for installing Windows Subsystem for Linux (WSL) with Ubuntu on Windows 10/11:
|
||||
|
||||
### Step 1: Enable WSL
|
||||
|
||||
1. Press the Windows key + X and click on "Windows PowerShell (Admin)" or "Windows Terminal (Admin)" to open PowerShell or Terminal with administrator privileges.
|
||||
2. In the PowerShell window, type the following command and press Enter:
|
||||
|
||||
```
|
||||
wsl --install
|
||||
```
|
||||
|
||||
If this command doesn't work, you can enable WSL with the following command for Windows 10:
|
||||
|
||||
```
|
||||
wsl --set-default-version 1
|
||||
```
|
||||
|
||||
For Windows 11, you can use:
|
||||
|
||||
```
|
||||
wsl --set-default-version 2
|
||||
```
|
||||
|
||||
You may be prompted to restart your computer. If so, save your work and restart.
|
||||
|
||||
### Step 2: Install Ubuntu
|
||||
|
||||
1. Open the Microsoft Store.
|
||||
2. Search for "Ubuntu" in the search bar.
|
||||
3. Choose the desired Ubuntu version (e.g., Ubuntu 20.04 LTS) and click "Get" or "Install" to download and install the Ubuntu app.
|
||||
4. Once the installation is complete, click "Launch" or search for "Ubuntu" in the Start menu and open the app.
|
||||
|
||||
### Step 3: Set up Ubuntu
|
||||
|
||||
1. When you first launch the Ubuntu app, it will take a few minutes to set up. Be patient as it installs the necessary files and sets up your environment.
|
||||
2. Once the setup is complete, you will be prompted to create a new UNIX username and password. Choose a username and password, and make sure to remember them, as you will need them for future administrative tasks within the Ubuntu environment.
|
||||
|
||||
### Step 4: Update and upgrade packages
|
||||
|
||||
1. After setting up your username and password, it's a good idea to update and upgrade your Ubuntu system. Run the following commands in the Ubuntu terminal:
|
||||
|
||||
```
|
||||
sudo apt update
|
||||
sudo apt upgrade
|
||||
```
|
||||
|
||||
2. Enter your password when prompted. This will update the package list and upgrade any outdated packages.
|
||||
|
||||
Congratulations! You have now installed WSL with Ubuntu on your Windows 10/11 system. You can use the Ubuntu terminal for various tasks, like running Linux commands, installing packages, or managing files.
|
||||
|
||||
You can launch your WSL Ubuntu installation by selecting the Ubuntu app (like any other program installed on your computer) or typing 'ubuntu' into Powershell or Terminal.
|
||||
|
||||
### Step 5: Proceed with Linux instructions
|
||||
|
||||
1. You can now follow the Linux setup instructions. If you receive any error messages about a missing tool or package, just install them using apt:
|
||||
|
||||
```
|
||||
sudo apt install [missing package]
|
||||
```
|
||||
|
||||
You will probably need to install build-essential
|
||||
|
||||
```
|
||||
sudo apt install build-essential
|
||||
```
|
||||
|
||||
If you face any issues or need to troubleshoot, you can always refer to the official Microsoft documentation for WSL: https://docs.microsoft.com/en-us/windows/wsl/
|
||||
|
||||
### WSL2 performance using /mnt:
|
||||
|
||||
When you git clone a repository, put it inside WSL and not outside. To understand more, take a look at this [issue](https://github.com/microsoft/WSL/issues/4197#issuecomment-604592340)
|
||||
|
||||
### Bonus: Port Forwarding
|
||||
|
||||
By default, you won't be able to access the webui from another device on your local network. You will need to setup the appropriate port forwarding using the following steps:
|
||||
|
||||
1. First, get the IP address of the WSL by typing `wsl hostname -I`. This will output the IP address, for example `172.20.134.111`.
|
||||
2. Then, use the following command (using PowerShell or Terminal with administrator privileges) to set up port forwarding, replacing `172.20.134.111` with the IP address you obtained in step 1:
|
||||
|
||||
```
|
||||
netsh interface portproxy add v4tov4 listenaddress=0.0.0.0 listenport=7860 connectaddress=172.20.134.111 connectport=7860
|
||||
```
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
'''
|
||||
Downloads models from Hugging Face to models/username_modelname.
|
||||
Downloads models from Hugging Face to user_data/models/username_modelname.
|
||||
|
||||
Example:
|
||||
python download-model.py facebook/opt-1.3b
|
||||
|
@ -175,7 +175,7 @@ class ModelDownloader:
|
|||
if model_dir:
|
||||
base_folder = model_dir
|
||||
else:
|
||||
base_folder = 'models' if not is_lora else 'loras'
|
||||
base_folder = 'user_data/models' if not is_lora else 'user_data/loras'
|
||||
|
||||
# If the model is of type GGUF, save directly in the base_folder
|
||||
if is_llamacpp:
|
||||
|
@ -356,7 +356,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--specific-file', type=str, default=None, help='Name of the specific file to download (if not provided, downloads all).')
|
||||
parser.add_argument('--exclude-pattern', type=str, default=None, help='Regex pattern to exclude files from download.')
|
||||
parser.add_argument('--output', type=str, default=None, help='Save the model files to this folder.')
|
||||
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
|
||||
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/user_data/models).')
|
||||
parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
|
||||
parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
|
||||
parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.')
|
||||
|
|
|
@ -59,4 +59,4 @@ def create_graph(lora_path, lora_name):
|
|||
print(f"File 'training_graph.json' does not exist in the {lora_path}")
|
||||
|
||||
except ImportError:
|
||||
print("matplotlib is not installed. Please install matplotlib to create PNG graphs")
|
||||
print("matplotlib is not installed. Please install matplotlib to create PNG graphs")
|
||||
|
|
|
@ -175,23 +175,23 @@ def ui():
|
|||
with gr.Row():
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
dataset = gr.Dropdown(choices=get_datasets('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(dataset, lambda: None, lambda: {'choices': get_datasets('training/datasets', 'json')}, 'refresh-button')
|
||||
dataset = gr.Dropdown(choices=get_datasets('user_data/training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(dataset, lambda: None, lambda: {'choices': get_datasets('user_data/training/datasets', 'json')}, 'refresh-button')
|
||||
with gr.Row():
|
||||
eval_dataset = gr.Dropdown(choices=get_datasets('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': get_datasets('training/datasets', 'json')}, 'refresh-button')
|
||||
eval_dataset = gr.Dropdown(choices=get_datasets('user_data/training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': get_datasets('user_data/training/datasets', 'json')}, 'refresh-button')
|
||||
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
format = gr.Dropdown(choices=get_datasets('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(format, lambda: None, lambda: {'choices': get_datasets('training/formats', 'json')}, 'refresh-button')
|
||||
format = gr.Dropdown(choices=get_datasets('user_data/training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(format, lambda: None, lambda: {'choices': get_datasets('user_data/training/formats', 'json')}, 'refresh-button')
|
||||
with gr.Row():
|
||||
eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.')
|
||||
|
||||
with gr.Tab(label="Text file"):
|
||||
with gr.Row():
|
||||
raw_text_file = gr.Dropdown(choices=get_datasets('training/datasets', 'txt'), value='None', label='Text file', info='The text file to use for training.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': get_datasets('training/datasets', 'txt')}, 'refresh-button')
|
||||
raw_text_file = gr.Dropdown(choices=get_datasets('user_data/training/datasets', 'txt'), value='None', label='Text file', info='The text file to use for training.', elem_classes=['slim-dropdown'])
|
||||
create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': get_datasets('user_data/training/datasets', 'txt')}, 'refresh-button')
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
|
@ -208,7 +208,7 @@ def ui():
|
|||
download_file_url = gr.Textbox(label='Download JSON or txt file to datasets (or formats) folder', value='',info='The URL of a file to download. If on github, make sure you get url of the raw file (https://raw.githubusercontent.com/...). If huggin face, make sure the url has /resolve/ in it not /blob/')
|
||||
with gr.Row():
|
||||
download_check_overwrite = gr.Checkbox(label='Overwrite', value=False, info='Overwrite if file exist')
|
||||
download_folder = gr.Radio(label="Destination", value='training/datasets', choices=['training/datasets', 'training/formats'], interactive=True)
|
||||
download_folder = gr.Radio(label="Destination", value='user_data/training/datasets', choices=['user_data/training/datasets', 'user_data/training/formats'], interactive=True)
|
||||
download_button = gr.Button('Download')
|
||||
download_status = gr.Textbox(label='Download Status', value='', interactive=False)
|
||||
with gr.Row():
|
||||
|
@ -235,7 +235,7 @@ def ui():
|
|||
with gr.Row():
|
||||
with gr.Column():
|
||||
models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True)
|
||||
evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + get_datasets('training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under training/datasets.')
|
||||
evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + get_datasets('user_data/training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under user_data/training/datasets.')
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
stride_length = gr.Slider(label='Stride', minimum=1, maximum=2048, value=512, step=1, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
|
||||
|
@ -310,7 +310,7 @@ def ui():
|
|||
|
||||
if raw_text_file not in ['None', '']:
|
||||
logger.info("Loading Text file...")
|
||||
fullpath = clean_path('training/datasets', f'{raw_text_file}')
|
||||
fullpath = clean_path('user_data/training/datasets', f'{raw_text_file}')
|
||||
fullpath = Path(fullpath)
|
||||
if fullpath.is_dir():
|
||||
logger.info('Training path directory {}'.format(raw_text_file))
|
||||
|
@ -324,10 +324,10 @@ def ui():
|
|||
logger.info(f"Loaded training file: {file_path.name}")
|
||||
else:
|
||||
try:
|
||||
with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
|
||||
with open(clean_path('user_data/training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
|
||||
raw_text = file.read().replace('\r', '')
|
||||
except:
|
||||
yield f"{raw_text_file}.txt doesn't seem to exsist anymore... check your training/datasets folder"
|
||||
yield f"{raw_text_file}.txt doesn't seem to exsist anymore... check your user_data/training/datasets folder"
|
||||
return
|
||||
|
||||
|
||||
|
@ -353,7 +353,7 @@ def ui():
|
|||
yield "Select format choice for dataset."
|
||||
return
|
||||
|
||||
with open(clean_path('training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
|
||||
with open(clean_path('user_data/training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
|
||||
format_data: dict[str, str] = json.load(formatFile)
|
||||
|
||||
def generate_prompt(data_point: dict[str, str]):
|
||||
|
@ -381,7 +381,7 @@ def ui():
|
|||
return tokenize_dummy(prompt)
|
||||
|
||||
logger.info("Loading JSON datasets...")
|
||||
data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
|
||||
data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json'))
|
||||
|
||||
data_keys = []
|
||||
|
||||
|
@ -456,7 +456,7 @@ def ui():
|
|||
#debug_slicer.change(lambda x: non_serialized_params.update({"debug_slicer": x}), debug_slicer, None)
|
||||
|
||||
def update_dataset():
|
||||
return gr.update(choices=get_datasets('training/datasets', 'json')), gr.update(choices=get_datasets('training/datasets', 'txt'))
|
||||
return gr.update(choices=get_datasets('user_data/training/datasets', 'json')), gr.update(choices=get_datasets('user_data/training/datasets', 'txt'))
|
||||
|
||||
download_button.click(download_file_from_url, [download_file_url,download_check_overwrite,download_folder] , download_status).then(update_dataset,None,[dataset , raw_text_file])
|
||||
|
||||
|
@ -670,7 +670,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||
if raw_text_file not in ['None', '']:
|
||||
train_template["template_type"] = "raw_text"
|
||||
logger.info("Loading text file...")
|
||||
fullpath = clean_path('training/datasets', f'{raw_text_file}')
|
||||
fullpath = clean_path('user_data/training/datasets', f'{raw_text_file}')
|
||||
fullpath = Path(fullpath)
|
||||
if fullpath.is_dir():
|
||||
logger.info('Training path directory {}'.format(raw_text_file))
|
||||
|
@ -683,7 +683,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||
|
||||
logger.info(f"Loaded training file: {file_path.name}")
|
||||
else:
|
||||
with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
|
||||
with open(clean_path('user_data/training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
|
||||
raw_text = file.read().replace('\r', '')
|
||||
|
||||
# FPHAM PRECISE SLICING
|
||||
|
@ -720,7 +720,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||
|
||||
train_template["template_type"] = "dataset"
|
||||
|
||||
with open(clean_path('training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
|
||||
with open(clean_path('user_data/training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
|
||||
format_data: dict[str, str] = json.load(formatFile)
|
||||
|
||||
# == store training prompt ==
|
||||
|
@ -742,7 +742,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||
return tokenize(prompt, add_eos_token, add_bos_token)
|
||||
|
||||
logger.info("Loading JSON datasets...")
|
||||
data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
|
||||
data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json'))
|
||||
train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
|
||||
|
||||
print(f"BOS: {add_bos_token} EOS: {add_eos_token}")
|
||||
|
@ -751,7 +751,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||
if eval_dataset == 'None':
|
||||
eval_data = None
|
||||
else:
|
||||
eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
|
||||
eval_data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{eval_dataset}.json'))
|
||||
eval_data = eval_data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
|
||||
|
||||
# == We MUST reload model if it went through any previous training, even failed one ==
|
||||
|
@ -1157,11 +1157,11 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||
decoded_entries.append({"value": decoded_text})
|
||||
|
||||
# Write the log file
|
||||
Path('logs').mkdir(exist_ok=True)
|
||||
with open(Path('logs/train_dataset_sample.json'), 'w') as json_file:
|
||||
Path('user_data/logs').mkdir(exist_ok=True)
|
||||
with open(Path('user_data/logs/train_dataset_sample.json'), 'w') as json_file:
|
||||
json.dump(decoded_entries, json_file, indent=4)
|
||||
|
||||
logger.info("Log file 'train_dataset_sample.json' created in the 'logs' directory.")
|
||||
logger.info("Log file 'train_dataset_sample.json' created in the 'user_data/logs' directory.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create log file due to error: {e}")
|
||||
|
||||
|
|
|
@ -194,13 +194,13 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c
|
|||
|
||||
if debug_slicer:
|
||||
# Write the log file
|
||||
Path('logs').mkdir(exist_ok=True)
|
||||
Path('user_data/logs').mkdir(exist_ok=True)
|
||||
sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
|
||||
output_file = "logs/sentencelist.json"
|
||||
output_file = "user_data/logs/sentencelist.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(sentencelist_dict, f,indent=2)
|
||||
|
||||
print("Saved sentencelist.json in logs folder")
|
||||
print("Saved sentencelist.json in user_data/logs folder")
|
||||
|
||||
return sentencelist
|
||||
|
||||
|
@ -281,13 +281,13 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len
|
|||
|
||||
if debug_slicer:
|
||||
# Write the log file
|
||||
Path('logs').mkdir(exist_ok=True)
|
||||
Path('user_data/logs').mkdir(exist_ok=True)
|
||||
sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
|
||||
output_file = "logs/sentencelist.json"
|
||||
output_file = "user_data/logs/sentencelist.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(sentencelist_dict, f,indent=2)
|
||||
|
||||
print("Saved sentencelist.json in logs folder")
|
||||
print("Saved sentencelist.json in user_data/logs folder")
|
||||
|
||||
return sentencelist
|
||||
|
||||
|
|
|
@ -72,13 +72,13 @@ def generate_html():
|
|||
global cards
|
||||
cards = []
|
||||
# Iterate through files in image folder
|
||||
for file in sorted(Path("characters").glob("*")):
|
||||
for file in sorted(Path("user_data/characters").glob("*")):
|
||||
if file.suffix in [".json", ".yml", ".yaml"]:
|
||||
character = file.stem
|
||||
container_html = '<div class="character-container">'
|
||||
image_html = "<div class='placeholder'></div>"
|
||||
|
||||
for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
|
||||
for path in [Path(f"user_data/characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
|
||||
if path.exists():
|
||||
image_html = f'<img src="file/{get_image_cache(path)}">'
|
||||
break
|
||||
|
|
|
@ -86,6 +86,20 @@ app.add_middleware(
|
|||
)
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def validate_host_header(request: Request, call_next):
|
||||
# Be strict about only approving access to localhost by default
|
||||
if not (shared.args.listen or shared.args.public_api):
|
||||
host = request.headers.get("host", "").split(":")[0]
|
||||
if host not in ["localhost", "127.0.0.1"]:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"detail": "Invalid host header"}
|
||||
)
|
||||
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
@app.options("/", dependencies=check_key)
|
||||
async def options_route():
|
||||
return JSONResponse(content="OK")
|
||||
|
@ -236,6 +250,11 @@ async def handle_moderations(request: Request):
|
|||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.get("/v1/internal/health", dependencies=check_key)
|
||||
async def handle_health_check():
|
||||
return JSONResponse(content={"status": "ok"})
|
||||
|
||||
|
||||
@app.post("/v1/internal/encode", response_model=EncodeResponse, dependencies=check_key)
|
||||
async def handle_token_encode(request_data: EncodeRequest):
|
||||
response = token_encode(request_data.text)
|
||||
|
|
|
@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
|
|||
|
||||
|
||||
class GenerationOptions(BaseModel):
|
||||
preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
|
||||
preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/user_data/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
|
||||
dynatemp_low: float = 1
|
||||
dynatemp_high: float = 1
|
||||
dynatemp_exponent: float = 1
|
||||
|
@ -103,10 +103,10 @@ class ChatCompletionRequestParams(BaseModel):
|
|||
|
||||
mode: str = Field(default='instruct', description="Valid options: instruct, chat, chat-instruct.")
|
||||
|
||||
instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/instruction-templates. If not set, the correct template will be automatically obtained from the model metadata.")
|
||||
instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/user_data/instruction-templates. If not set, the correct template will be automatically obtained from the model metadata.")
|
||||
instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.")
|
||||
|
||||
character: str | None = Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.")
|
||||
character: str | None = Field(default=None, description="A character defined under text-generation-webui/user_data/characters. If not set, the default \"Assistant\" character will be used.")
|
||||
bot_name: str | None = Field(default=None, description="Overwrites the value set by character field.", alias="name2")
|
||||
context: str | None = Field(default=None, description="Overwrites the value set by character field.")
|
||||
greeting: str | None = Field(default=None, description="Overwrites the value set by character field.")
|
||||
|
|
|
@ -148,7 +148,7 @@ class ChromaCollector():
|
|||
id_ = new_ids[i]
|
||||
metadata = metadatas[i] if metadatas is not None else None
|
||||
embedding = self.embeddings_cache.get(text)
|
||||
if embedding is not None and embedding.any():
|
||||
if embedding is not None and any(embedding):
|
||||
existing_texts.append(text)
|
||||
existing_embeddings.append(embedding)
|
||||
existing_ids.append(id_)
|
||||
|
|
|
@ -31,24 +31,94 @@ function removeLastClick() {
|
|||
}
|
||||
|
||||
function handleMorphdomUpdate(text) {
|
||||
// Track closed blocks
|
||||
const closedBlocks = new Set();
|
||||
document.querySelectorAll(".thinking-block").forEach(block => {
|
||||
const blockId = block.getAttribute("data-block-id");
|
||||
// If block exists and is not open, add to closed set
|
||||
if (blockId && !block.hasAttribute("open")) {
|
||||
closedBlocks.add(blockId);
|
||||
}
|
||||
});
|
||||
|
||||
// Store scroll positions for any open blocks
|
||||
const scrollPositions = {};
|
||||
document.querySelectorAll(".thinking-block[open]").forEach(block => {
|
||||
const content = block.querySelector(".thinking-content");
|
||||
const blockId = block.getAttribute("data-block-id");
|
||||
if (content && blockId) {
|
||||
const isAtBottom = Math.abs((content.scrollHeight - content.scrollTop) - content.clientHeight) < 5;
|
||||
scrollPositions[blockId] = {
|
||||
position: content.scrollTop,
|
||||
isAtBottom: isAtBottom
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
morphdom(
|
||||
document.getElementById("chat").parentNode,
|
||||
"<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
|
||||
{
|
||||
onBeforeElUpdated: function(fromEl, toEl) {
|
||||
// Preserve code highlighting
|
||||
if (fromEl.tagName === "PRE" && fromEl.querySelector("code[data-highlighted]")) {
|
||||
const fromCode = fromEl.querySelector("code");
|
||||
const toCode = toEl.querySelector("code");
|
||||
|
||||
if (fromCode && toCode && fromCode.textContent === toCode.textContent) {
|
||||
// If the <code> content is the same, preserve the entire <pre> element
|
||||
toEl.className = fromEl.className;
|
||||
toEl.innerHTML = fromEl.innerHTML;
|
||||
return false; // Skip updating the <pre> element
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// For thinking blocks, respect closed state
|
||||
if (fromEl.classList && fromEl.classList.contains("thinking-block") &&
|
||||
toEl.classList && toEl.classList.contains("thinking-block")) {
|
||||
const blockId = toEl.getAttribute("data-block-id");
|
||||
// If this block was closed by user, keep it closed
|
||||
if (blockId && closedBlocks.has(blockId)) {
|
||||
toEl.removeAttribute("open");
|
||||
}
|
||||
}
|
||||
|
||||
return !fromEl.isEqualNode(toEl);
|
||||
},
|
||||
|
||||
onElUpdated: function(el) {
|
||||
// Restore scroll positions for open thinking blocks
|
||||
if (el.classList && el.classList.contains("thinking-block") && el.hasAttribute("open")) {
|
||||
const blockId = el.getAttribute("data-block-id");
|
||||
const content = el.querySelector(".thinking-content");
|
||||
|
||||
if (content && blockId && scrollPositions[blockId]) {
|
||||
setTimeout(() => {
|
||||
if (scrollPositions[blockId].isAtBottom) {
|
||||
content.scrollTop = content.scrollHeight;
|
||||
} else {
|
||||
content.scrollTop = scrollPositions[blockId].position;
|
||||
}
|
||||
}, 0);
|
||||
}
|
||||
}
|
||||
return !fromEl.isEqualNode(toEl); // Update only if nodes differ
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Add toggle listeners for new blocks
|
||||
document.querySelectorAll(".thinking-block").forEach(block => {
|
||||
if (!block._hasToggleListener) {
|
||||
block.addEventListener("toggle", function(e) {
|
||||
if (this.open) {
|
||||
const content = this.querySelector(".thinking-content");
|
||||
if (content) {
|
||||
setTimeout(() => {
|
||||
content.scrollTop = content.scrollHeight;
|
||||
}, 0);
|
||||
}
|
||||
}
|
||||
});
|
||||
block._hasToggleListener = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -395,7 +395,7 @@ let bigPictureVisible = false;
|
|||
function addBigPicture() {
|
||||
var imgElement = document.createElement("img");
|
||||
var timestamp = new Date().getTime();
|
||||
imgElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
|
||||
imgElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
|
||||
imgElement.classList.add("bigProfilePicture");
|
||||
imgElement.addEventListener("load", function () {
|
||||
this.style.visibility = "visible";
|
||||
|
|
|
@ -2,6 +2,6 @@ function updateBigPicture() {
|
|||
var existingElement = document.querySelector(".bigProfilePicture");
|
||||
if (existingElement) {
|
||||
var timestamp = new Date().getTime();
|
||||
existingElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
|
||||
existingElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -417,16 +417,8 @@ def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_
|
|||
yield history
|
||||
return
|
||||
|
||||
show_after = html.escape(state.get("show_after")) if state.get("show_after") else None
|
||||
for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message, for_ui=for_ui):
|
||||
if show_after:
|
||||
after = history["visible"][-1][1].partition(show_after)[2] or "*Is thinking...*"
|
||||
yield {
|
||||
'internal': history['internal'],
|
||||
'visible': history['visible'][:-1] + [[history['visible'][-1][0], after]]
|
||||
}
|
||||
else:
|
||||
yield history
|
||||
yield history
|
||||
|
||||
|
||||
def character_is_loaded(state, raise_exception=False):
|
||||
|
@ -533,9 +525,9 @@ def start_new_chat(state):
|
|||
|
||||
def get_history_file_path(unique_id, character, mode):
|
||||
if mode == 'instruct':
|
||||
p = Path(f'logs/instruct/{unique_id}.json')
|
||||
p = Path(f'user_data/logs/instruct/{unique_id}.json')
|
||||
else:
|
||||
p = Path(f'logs/chat/{character}/{unique_id}.json')
|
||||
p = Path(f'user_data/logs/chat/{character}/{unique_id}.json')
|
||||
|
||||
return p
|
||||
|
||||
|
@ -571,13 +563,13 @@ def rename_history(old_id, new_id, character, mode):
|
|||
|
||||
def get_paths(state):
|
||||
if state['mode'] == 'instruct':
|
||||
return Path('logs/instruct').glob('*.json')
|
||||
return Path('user_data/logs/instruct').glob('*.json')
|
||||
else:
|
||||
character = state['character_menu']
|
||||
|
||||
# Handle obsolete filenames and paths
|
||||
old_p = Path(f'logs/{character}_persistent.json')
|
||||
new_p = Path(f'logs/persistent_{character}.json')
|
||||
old_p = Path(f'user_data/logs/{character}_persistent.json')
|
||||
new_p = Path(f'user_data/logs/persistent_{character}.json')
|
||||
if old_p.exists():
|
||||
logger.warning(f"Renaming \"{old_p}\" to \"{new_p}\"")
|
||||
old_p.rename(new_p)
|
||||
|
@ -589,7 +581,7 @@ def get_paths(state):
|
|||
p.parent.mkdir(exist_ok=True)
|
||||
new_p.rename(p)
|
||||
|
||||
return Path(f'logs/chat/{character}').glob('*.json')
|
||||
return Path(f'user_data/logs/chat/{character}').glob('*.json')
|
||||
|
||||
|
||||
def find_all_histories(state):
|
||||
|
@ -740,7 +732,7 @@ def generate_pfp_cache(character):
|
|||
if not cache_folder.exists():
|
||||
cache_folder.mkdir()
|
||||
|
||||
for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
|
||||
for path in [Path(f"user_data/characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
|
||||
if path.exists():
|
||||
original_img = Image.open(path)
|
||||
original_img.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
|
||||
|
@ -760,12 +752,12 @@ def load_character(character, name1, name2):
|
|||
|
||||
filepath = None
|
||||
for extension in ["yml", "yaml", "json"]:
|
||||
filepath = Path(f'characters/{character}.{extension}')
|
||||
filepath = Path(f'user_data/characters/{character}.{extension}')
|
||||
if filepath.exists():
|
||||
break
|
||||
|
||||
if filepath is None or not filepath.exists():
|
||||
logger.error(f"Could not find the character \"{character}\" inside characters/. No character has been loaded.")
|
||||
logger.error(f"Could not find the character \"{character}\" inside user_data/characters. No character has been loaded.")
|
||||
raise ValueError
|
||||
|
||||
file_contents = open(filepath, 'r', encoding='utf-8').read()
|
||||
|
@ -804,7 +796,7 @@ def load_instruction_template(template):
|
|||
if template == 'None':
|
||||
return ''
|
||||
|
||||
for filepath in [Path(f'instruction-templates/{template}.yaml'), Path('instruction-templates/Alpaca.yaml')]:
|
||||
for filepath in [Path(f'user_data/instruction-templates/{template}.yaml'), Path('user_data/instruction-templates/Alpaca.yaml')]:
|
||||
if filepath.exists():
|
||||
break
|
||||
else:
|
||||
|
@ -846,17 +838,17 @@ def upload_character(file, img, tavern=False):
|
|||
|
||||
outfile_name = name
|
||||
i = 1
|
||||
while Path(f'characters/{outfile_name}.yaml').exists():
|
||||
while Path(f'user_data/characters/{outfile_name}.yaml').exists():
|
||||
outfile_name = f'{name}_{i:03d}'
|
||||
i += 1
|
||||
|
||||
with open(Path(f'characters/{outfile_name}.yaml'), 'w', encoding='utf-8') as f:
|
||||
with open(Path(f'user_data/characters/{outfile_name}.yaml'), 'w', encoding='utf-8') as f:
|
||||
f.write(yaml_data)
|
||||
|
||||
if img is not None:
|
||||
img.save(Path(f'characters/{outfile_name}.png'))
|
||||
img.save(Path(f'user_data/characters/{outfile_name}.png'))
|
||||
|
||||
logger.info(f'New character saved to "characters/{outfile_name}.yaml".')
|
||||
logger.info(f'New character saved to "user_data/characters/{outfile_name}.yaml".')
|
||||
return gr.update(value=outfile_name, choices=get_available_characters())
|
||||
|
||||
|
||||
|
@ -931,9 +923,9 @@ def save_character(name, greeting, context, picture, filename):
|
|||
return
|
||||
|
||||
data = generate_character_yaml(name, greeting, context)
|
||||
filepath = Path(f'characters/{filename}.yaml')
|
||||
filepath = Path(f'user_data/characters/{filename}.yaml')
|
||||
save_file(filepath, data)
|
||||
path_to_img = Path(f'characters/{filename}.png')
|
||||
path_to_img = Path(f'user_data/characters/{filename}.png')
|
||||
if picture is not None:
|
||||
picture.save(path_to_img)
|
||||
logger.info(f'Saved {path_to_img}.')
|
||||
|
@ -941,9 +933,9 @@ def save_character(name, greeting, context, picture, filename):
|
|||
|
||||
def delete_character(name, instruct=False):
|
||||
for extension in ["yml", "yaml", "json"]:
|
||||
delete_file(Path(f'characters/{name}.{extension}'))
|
||||
delete_file(Path(f'user_data/characters/{name}.{extension}'))
|
||||
|
||||
delete_file(Path(f'characters/{name}.png'))
|
||||
delete_file(Path(f'user_data/characters/{name}.png'))
|
||||
|
||||
|
||||
def jinja_template_from_old_format(params, verbose=False):
|
||||
|
@ -1246,7 +1238,7 @@ def handle_save_template_click(instruction_template_str):
|
|||
contents = generate_instruction_template_yaml(instruction_template_str)
|
||||
return [
|
||||
"My Template.yaml",
|
||||
"instruction-templates/",
|
||||
"user_data/instruction-templates/",
|
||||
contents,
|
||||
gr.update(visible=True)
|
||||
]
|
||||
|
@ -1255,7 +1247,7 @@ def handle_save_template_click(instruction_template_str):
|
|||
def handle_delete_template_click(template):
|
||||
return [
|
||||
f"{template}.yaml",
|
||||
"instruction-templates/",
|
||||
"user_data/instruction-templates/",
|
||||
gr.update(visible=False)
|
||||
]
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ from modules.text_generation import encode
|
|||
|
||||
|
||||
def load_past_evaluations():
|
||||
if Path('logs/evaluations.csv').exists():
|
||||
df = pd.read_csv(Path('logs/evaluations.csv'), dtype=str)
|
||||
if Path('user_data/logs/evaluations.csv').exists():
|
||||
df = pd.read_csv(Path('user_data/logs/evaluations.csv'), dtype=str)
|
||||
df['Perplexity'] = pd.to_numeric(df['Perplexity'])
|
||||
return df
|
||||
else:
|
||||
|
@ -26,7 +26,7 @@ past_evaluations = load_past_evaluations()
|
|||
def save_past_evaluations(df):
|
||||
global past_evaluations
|
||||
past_evaluations = df
|
||||
filepath = Path('logs/evaluations.csv')
|
||||
filepath = Path('user_data/logs/evaluations.csv')
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.to_csv(filepath, index=False)
|
||||
|
||||
|
@ -69,7 +69,7 @@ def calculate_perplexity(models, input_dataset, stride, _max_length):
|
|||
data = load_dataset('ptb_text_only', 'penn_treebank', split='test')
|
||||
text = " ".join(data['sentence'])
|
||||
else:
|
||||
with open(Path(f'training/datasets/{input_dataset}.txt'), 'r', encoding='utf-8') as f:
|
||||
with open(Path(f'user_data/training/datasets/{input_dataset}.txt'), 'r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
|
||||
for model in models:
|
||||
|
|
|
@ -40,7 +40,7 @@ class Exllamav2Model:
|
|||
config.model_dir = str(path_to_model)
|
||||
config.prepare()
|
||||
|
||||
config.max_seq_len = shared.args.max_seq_len
|
||||
config.max_seq_len = shared.args.ctx_size
|
||||
config.scale_pos_emb = shared.args.compress_pos_emb
|
||||
config.scale_alpha_value = shared.args.alpha_value
|
||||
config.no_flash_attn = shared.args.no_flash_attn
|
||||
|
@ -85,7 +85,44 @@ class Exllamav2Model:
|
|||
model.load_autosplit(cache)
|
||||
|
||||
tokenizer = ExLlamaV2Tokenizer(config)
|
||||
generator = ExLlamaV2StreamingGenerator(model, cache, tokenizer)
|
||||
|
||||
# Initialize draft model for speculative decoding
|
||||
draft_model = None
|
||||
draft_cache = None
|
||||
|
||||
if shared.args.model_draft and shared.args.model_draft.lower() not in ["none", ""]:
|
||||
logger.info(f"Loading draft model for speculative decoding: {shared.args.model_draft}")
|
||||
|
||||
# Find the draft model path
|
||||
draft_path = Path(shared.args.model_draft)
|
||||
if not draft_path.exists():
|
||||
draft_path = Path(f'{shared.args.model_dir}') / Path(shared.args.model_draft)
|
||||
|
||||
draft_config = ExLlamaV2Config()
|
||||
draft_config.model_dir = str(draft_path)
|
||||
draft_config.prepare()
|
||||
draft_config.arch_compat_overrides()
|
||||
|
||||
# Set context size for draft model
|
||||
if shared.args.ctx_size_draft > 0:
|
||||
draft_config.max_seq_len = shared.args.ctx_size_draft
|
||||
else:
|
||||
draft_config.max_seq_len = config.max_seq_len
|
||||
|
||||
draft_model = ExLlamaV2(draft_config)
|
||||
draft_cache = cache_type(draft_model, lazy=True)
|
||||
draft_model.load_autosplit(draft_cache)
|
||||
|
||||
logger.info(f"Draft model loaded successfully with max_draft={shared.args.draft_max}")
|
||||
|
||||
generator = ExLlamaV2StreamingGenerator(
|
||||
model,
|
||||
cache,
|
||||
tokenizer,
|
||||
draft_model=draft_model,
|
||||
draft_cache=draft_cache,
|
||||
num_speculative_tokens=shared.args.draft_max if draft_model is not None else 0
|
||||
)
|
||||
|
||||
result = self()
|
||||
result.model = model
|
||||
|
@ -93,6 +130,8 @@ class Exllamav2Model:
|
|||
result.tokenizer = tokenizer
|
||||
result.generator = generator
|
||||
result.loras = None
|
||||
result.draft_model = draft_model
|
||||
result.draft_cache = draft_cache
|
||||
return result, result
|
||||
|
||||
def encode(self, string, **kwargs):
|
||||
|
@ -179,6 +218,10 @@ class Exllamav2Model:
|
|||
else:
|
||||
max_new_tokens = state['max_new_tokens']
|
||||
|
||||
# Reset speculative decoding stats if using a draft model
|
||||
if hasattr(self, 'draft_model') and self.draft_model is not None:
|
||||
self.generator.reset_sd_stats()
|
||||
|
||||
self.generator.begin_stream(ids, settings, loras=self.loras)
|
||||
|
||||
decoded_text = ''
|
||||
|
@ -190,6 +233,11 @@ class Exllamav2Model:
|
|||
decoded_text += chunk
|
||||
yield decoded_text
|
||||
|
||||
# Log speculative decoding stats if using draft model
|
||||
if hasattr(self, 'draft_model') and self.draft_model is not None:
|
||||
efficiency, accuracy, total_tokens, total_draft_tokens, accepted_draft_tokens = self.generator.get_sd_stats()
|
||||
logger.info(f"Speculative decoding: accepted={accepted_draft_tokens}/{total_draft_tokens} tokens")
|
||||
|
||||
def generate(self, prompt, state):
|
||||
output = ''
|
||||
for output in self.generate_with_streaming(prompt, state):
|
||||
|
|
|
@ -192,7 +192,7 @@ class Exllamav2HF(PreTrainedModel, GenerationMixin):
|
|||
config.model_dir = str(pretrained_model_name_or_path)
|
||||
config.prepare()
|
||||
|
||||
config.max_seq_len = shared.args.max_seq_len
|
||||
config.max_seq_len = shared.args.ctx_size
|
||||
config.scale_pos_emb = shared.args.compress_pos_emb
|
||||
config.scale_alpha_value = shared.args.alpha_value
|
||||
config.no_flash_attn = shared.args.no_flash_attn
|
||||
|
|
|
@ -5,6 +5,7 @@ from typing import Any, Dict, Optional, Union
|
|||
|
||||
import torch
|
||||
from exllamav3 import Cache, Config, Model
|
||||
from exllamav3.cache import CacheLayer_fp16, CacheLayer_quant
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from transformers import (
|
||||
GenerationConfig,
|
||||
|
@ -33,13 +34,39 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
|||
self.ex_model = Model.from_config(config)
|
||||
|
||||
# Calculate the closest multiple of 256 at or above the chosen value
|
||||
max_tokens = shared.args.max_seq_len
|
||||
max_tokens = shared.args.ctx_size
|
||||
if max_tokens % 256 != 0:
|
||||
adjusted_tokens = ((max_tokens // 256) + 1) * 256
|
||||
logger.warning(f"max_num_tokens must be a multiple of 256. Adjusting from {max_tokens} to {adjusted_tokens}")
|
||||
max_tokens = adjusted_tokens
|
||||
|
||||
self.ex_cache = Cache(self.ex_model, max_num_tokens=max_tokens)
|
||||
# Parse cache type
|
||||
cache_type = shared.args.cache_type.lower()
|
||||
cache_kwargs = {}
|
||||
if cache_type == 'fp16':
|
||||
layer_type = CacheLayer_fp16
|
||||
elif cache_type.startswith('q'):
|
||||
layer_type = CacheLayer_quant
|
||||
if '_' in cache_type:
|
||||
# Different bits for k and v (e.g., q4_q8)
|
||||
k_part, v_part = cache_type.split('_')
|
||||
k_bits = int(k_part[1:])
|
||||
v_bits = int(v_part[1:])
|
||||
else:
|
||||
# Same bits for k and v (e.g., q4)
|
||||
k_bits = v_bits = int(cache_type[1:])
|
||||
|
||||
# Validate bit ranges
|
||||
if not (2 <= k_bits <= 8 and 2 <= v_bits <= 8):
|
||||
logger.warning(f"Invalid quantization bits: k_bits={k_bits}, v_bits={v_bits}. Must be between 2 and 8. Falling back to fp16.")
|
||||
layer_type = CacheLayer_fp16
|
||||
else:
|
||||
cache_kwargs = {'k_bits': k_bits, 'v_bits': v_bits}
|
||||
else:
|
||||
logger.warning(f"Unrecognized cache type: {cache_type}. Falling back to fp16.")
|
||||
layer_type = CacheLayer_fp16
|
||||
|
||||
self.ex_cache = Cache(self.ex_model, max_num_tokens=max_tokens, layer_type=layer_type, **cache_kwargs)
|
||||
|
||||
# Create load parameters dictionary
|
||||
load_params = {'progressbar': True}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
'''
|
||||
Copied from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14184
|
||||
Most of the code here was adapted from:
|
||||
https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14184
|
||||
'''
|
||||
|
||||
import inspect
|
||||
|
@ -7,6 +8,30 @@ import warnings
|
|||
from functools import wraps
|
||||
|
||||
import gradio as gr
|
||||
import gradio.routes
|
||||
import gradio.utils
|
||||
from starlette.middleware.trustedhost import TrustedHostMiddleware
|
||||
|
||||
from modules import shared
|
||||
|
||||
orig_create_app = gradio.routes.App.create_app
|
||||
|
||||
|
||||
# Be strict about only approving access to localhost by default
|
||||
def create_app_with_trustedhost(*args, **kwargs):
|
||||
app = orig_create_app(*args, **kwargs)
|
||||
|
||||
if not (shared.args.listen or shared.args.share):
|
||||
app.add_middleware(
|
||||
TrustedHostMiddleware,
|
||||
allowed_hosts=["localhost", "127.0.0.1"]
|
||||
)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
gradio.routes.App.create_app = create_app_with_trustedhost
|
||||
gradio.utils.launch_counter = lambda: None
|
||||
|
||||
|
||||
class GradioDeprecationWarning(DeprecationWarning):
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import datetime
|
||||
import functools
|
||||
import html
|
||||
import os
|
||||
|
@ -106,8 +107,87 @@ def replace_blockquote(m):
|
|||
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
|
||||
|
||||
|
||||
def extract_thinking_block(string):
|
||||
"""Extract thinking blocks from the beginning of a string."""
|
||||
if not string:
|
||||
return None, string
|
||||
|
||||
THINK_START_TAG = "<think>"
|
||||
THINK_END_TAG = "</think>"
|
||||
|
||||
# Look for opening tag
|
||||
start_pos = string.lstrip().find(THINK_START_TAG)
|
||||
if start_pos == -1:
|
||||
return None, string
|
||||
|
||||
# Adjust start position to account for any leading whitespace
|
||||
start_pos = string.find(THINK_START_TAG)
|
||||
|
||||
# Find the content after the opening tag
|
||||
content_start = start_pos + len(THINK_START_TAG)
|
||||
|
||||
# Look for closing tag
|
||||
end_pos = string.find(THINK_END_TAG, content_start)
|
||||
|
||||
if end_pos != -1:
|
||||
# Both tags found - extract content between them
|
||||
thinking_content = string[content_start:end_pos]
|
||||
remaining_content = string[end_pos + len(THINK_END_TAG):]
|
||||
return thinking_content, remaining_content
|
||||
else:
|
||||
# Only opening tag found - everything else is thinking content
|
||||
thinking_content = string[content_start:]
|
||||
return thinking_content, ""
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def convert_to_markdown(string):
|
||||
def convert_to_markdown(string, message_id=None):
|
||||
if not string:
|
||||
return ""
|
||||
|
||||
# Use a default message ID if none provided
|
||||
if message_id is None:
|
||||
message_id = "unknown"
|
||||
|
||||
# Extract thinking block if present
|
||||
thinking_content, remaining_content = extract_thinking_block(string)
|
||||
|
||||
# Process the main content
|
||||
html_output = process_markdown_content(remaining_content)
|
||||
|
||||
# If thinking content was found, process it using the same function
|
||||
if thinking_content is not None:
|
||||
thinking_html = process_markdown_content(thinking_content)
|
||||
|
||||
# Generate unique ID for the thinking block
|
||||
block_id = f"thinking-{message_id}-0"
|
||||
|
||||
# Check if thinking is complete or still in progress
|
||||
is_streaming = not remaining_content
|
||||
title_text = "Thinking..." if is_streaming else "Thought"
|
||||
|
||||
thinking_block = f'''
|
||||
<details class="thinking-block" data-block-id="{block_id}" data-streaming="{str(is_streaming).lower()}" open>
|
||||
<summary class="thinking-header">
|
||||
<svg class="thinking-icon" width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M8 1.33334C4.31868 1.33334 1.33334 4.31868 1.33334 8.00001C1.33334 11.6813 4.31868 14.6667 8 14.6667C11.6813 14.6667 14.6667 11.6813 14.6667 8.00001C14.6667 4.31868 11.6813 1.33334 8 1.33334Z" stroke="currentColor" stroke-width="1.33" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 10.6667V8.00001" stroke="currentColor" stroke-width="1.33" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M8 5.33334H8.00667" stroke="currentColor" stroke-width="1.33" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
<span class="thinking-title">{title_text}</span>
|
||||
</summary>
|
||||
<div class="thinking-content pretty_scrollbar">{thinking_html}</div>
|
||||
</details>
|
||||
'''
|
||||
|
||||
# Prepend the thinking block to the message HTML
|
||||
html_output = thinking_block + html_output
|
||||
|
||||
return html_output
|
||||
|
||||
|
||||
def process_markdown_content(string):
|
||||
"""Process a string through the markdown conversion pipeline."""
|
||||
if not string:
|
||||
return ""
|
||||
|
||||
|
@ -208,15 +288,15 @@ def convert_to_markdown(string):
|
|||
return html_output
|
||||
|
||||
|
||||
def convert_to_markdown_wrapped(string, use_cache=True):
|
||||
def convert_to_markdown_wrapped(string, message_id=None, use_cache=True):
|
||||
'''
|
||||
Used to avoid caching convert_to_markdown calls during streaming.
|
||||
'''
|
||||
|
||||
if use_cache:
|
||||
return convert_to_markdown(string)
|
||||
return convert_to_markdown(string, message_id=message_id)
|
||||
|
||||
return convert_to_markdown.__wrapped__(string)
|
||||
return convert_to_markdown.__wrapped__(string, message_id=message_id)
|
||||
|
||||
|
||||
def generate_basic_html(string):
|
||||
|
@ -272,7 +352,7 @@ def generate_instruct_html(history):
|
|||
for i in range(len(history['visible'])):
|
||||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
output += (
|
||||
|
@ -307,19 +387,19 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
|
|||
|
||||
# We use ?character and ?time.time() to force the browser to reset caches
|
||||
img_bot = (
|
||||
f'<img src="file/cache/pfp_character_thumb.png?{character}" class="pfp_character">'
|
||||
if Path("cache/pfp_character_thumb.png").exists() else ''
|
||||
f'<img src="file/user_data/cache/pfp_character_thumb.png?{character}" class="pfp_character">'
|
||||
if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
|
||||
)
|
||||
|
||||
img_me = (
|
||||
f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
|
||||
if Path("cache/pfp_me.png").exists() else ''
|
||||
f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
|
||||
if Path("user_data/cache/pfp_me.png").exists() else ''
|
||||
)
|
||||
|
||||
for i in range(len(history['visible'])):
|
||||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
output += (
|
||||
|
@ -359,7 +439,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
|
|||
for i in range(len(history['visible'])):
|
||||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
output += (
|
||||
|
@ -389,8 +469,21 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
|
|||
return output
|
||||
|
||||
|
||||
def time_greeting():
|
||||
current_hour = datetime.datetime.now().hour
|
||||
if 5 <= current_hour < 12:
|
||||
return "Good morning!"
|
||||
elif 12 <= current_hour < 18:
|
||||
return "Good afternoon!"
|
||||
else:
|
||||
return "Good evening!"
|
||||
|
||||
|
||||
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
|
||||
if mode == 'instruct':
|
||||
if len(history['visible']) == 0:
|
||||
greeting = f"<div class=\"welcome-greeting\">{time_greeting()} How can I help you today?</div>"
|
||||
result = f'<div class="chat" id="chat">{greeting}</div>'
|
||||
elif mode == 'instruct':
|
||||
result = generate_instruct_html(history)
|
||||
elif style == 'wpp':
|
||||
result = generate_chat_html(history, name1, name2)
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
import json
|
||||
import os
|
||||
import pprint
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import llama_cpp_binaries
|
||||
import requests
|
||||
|
@ -251,7 +253,7 @@ class LlamaServer:
|
|||
cmd = [
|
||||
self.server_path,
|
||||
"--model", self.model_path,
|
||||
"--ctx-size", str(shared.args.n_ctx),
|
||||
"--ctx-size", str(shared.args.ctx_size),
|
||||
"--n-gpu-layers", str(shared.args.n_gpu_layers),
|
||||
"--batch-size", str(shared.args.batch_size),
|
||||
"--port", str(self.port),
|
||||
|
@ -281,6 +283,41 @@ class LlamaServer:
|
|||
cmd += ["--rope-freq-scale", str(1.0 / shared.args.compress_pos_emb)]
|
||||
if shared.args.rope_freq_base > 0:
|
||||
cmd += ["--rope-freq-base", str(shared.args.rope_freq_base)]
|
||||
if shared.args.model_draft not in [None, 'None']:
|
||||
path = Path(shared.args.model_draft)
|
||||
if not path.exists():
|
||||
path = Path(f'{shared.args.model_dir}/{shared.args.model_draft}')
|
||||
|
||||
if path.is_file():
|
||||
model_file = path
|
||||
else:
|
||||
model_file = sorted(Path(f'{shared.args.model_dir}/{shared.args.model_draft}').glob('*.gguf'))[0]
|
||||
|
||||
cmd += ["--model-draft", model_file]
|
||||
if shared.args.draft_max > 0:
|
||||
cmd += ["--draft-max", str(shared.args.draft_max)]
|
||||
if shared.args.gpu_layers_draft > 0:
|
||||
cmd += ["--gpu-layers-draft", str(shared.args.gpu_layers_draft)]
|
||||
if shared.args.device_draft:
|
||||
cmd += ["--device-draft", shared.args.device_draft]
|
||||
if shared.args.ctx_size_draft > 0:
|
||||
cmd += ["--ctx-size-draft", str(shared.args.ctx_size_draft)]
|
||||
if shared.args.streaming_llm:
|
||||
cmd += ["--cache-reuse", "1"]
|
||||
if shared.args.extra_flags:
|
||||
# Clean up the input
|
||||
extra_flags = shared.args.extra_flags.strip()
|
||||
if extra_flags.startswith('"') and extra_flags.endswith('"'):
|
||||
extra_flags = extra_flags[1:-1].strip()
|
||||
elif extra_flags.startswith("'") and extra_flags.endswith("'"):
|
||||
extra_flags = extra_flags[1:-1].strip()
|
||||
|
||||
for flag_item in extra_flags.split(','):
|
||||
if '=' in flag_item:
|
||||
flag, value = flag_item.split('=', 1)
|
||||
cmd += [f"--{flag}", value]
|
||||
else:
|
||||
cmd.append(f"--{flag_item}")
|
||||
|
||||
env = os.environ.copy()
|
||||
if os.name == 'posix':
|
||||
|
@ -299,17 +336,7 @@ class LlamaServer:
|
|||
env=env
|
||||
)
|
||||
|
||||
def filter_stderr(process_stderr):
|
||||
try:
|
||||
for line in iter(process_stderr.readline, ''):
|
||||
if not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
|
||||
sys.stderr.write(line)
|
||||
sys.stderr.flush()
|
||||
except (ValueError, IOError):
|
||||
# Handle pipe closed exceptions
|
||||
pass
|
||||
|
||||
threading.Thread(target=filter_stderr, args=(self.process.stderr,), daemon=True).start()
|
||||
threading.Thread(target=filter_stderr_with_progress, args=(self.process.stderr,), daemon=True).start()
|
||||
|
||||
# Wait for server to be healthy
|
||||
health_url = f"http://127.0.0.1:{self.port}/health"
|
||||
|
@ -360,3 +387,18 @@ class LlamaServer:
|
|||
self.process.kill()
|
||||
|
||||
self.process = None
|
||||
|
||||
|
||||
def filter_stderr_with_progress(process_stderr):
|
||||
progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
|
||||
try:
|
||||
for line in iter(process_stderr.readline, ''):
|
||||
progress_match = progress_pattern.search(line)
|
||||
if progress_match:
|
||||
sys.stderr.write(line)
|
||||
sys.stderr.flush()
|
||||
elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
|
||||
sys.stderr.write(line)
|
||||
sys.stderr.flush()
|
||||
except (ValueError, IOError):
|
||||
pass
|
||||
|
|
|
@ -9,9 +9,11 @@ loaders_and_params = OrderedDict({
|
|||
'threads',
|
||||
'threads_batch',
|
||||
'batch_size',
|
||||
'n_ctx',
|
||||
'ctx_size',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
'extra_flags',
|
||||
'streaming_llm',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'flash_attn',
|
||||
|
@ -20,6 +22,12 @@ loaders_and_params = OrderedDict({
|
|||
'no_mmap',
|
||||
'mlock',
|
||||
'numa',
|
||||
'model_draft',
|
||||
'draft_max',
|
||||
'gpu_layers_draft',
|
||||
'device_draft',
|
||||
'ctx_size_draft',
|
||||
'speculative_decoding_accordion',
|
||||
],
|
||||
'Transformers': [
|
||||
'gpu_split',
|
||||
|
@ -41,14 +49,15 @@ loaders_and_params = OrderedDict({
|
|||
'no_use_fast',
|
||||
],
|
||||
'ExLlamav3_HF': [
|
||||
'max_seq_len',
|
||||
'ctx_size',
|
||||
'cache_type',
|
||||
'gpu_split',
|
||||
'cfg_cache',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
],
|
||||
'ExLlamav2_HF': [
|
||||
'max_seq_len',
|
||||
'ctx_size',
|
||||
'cache_type',
|
||||
'gpu_split',
|
||||
'alpha_value',
|
||||
|
@ -64,7 +73,7 @@ loaders_and_params = OrderedDict({
|
|||
'no_use_fast',
|
||||
],
|
||||
'ExLlamav2': [
|
||||
'max_seq_len',
|
||||
'ctx_size',
|
||||
'cache_type',
|
||||
'gpu_split',
|
||||
'alpha_value',
|
||||
|
@ -76,6 +85,10 @@ loaders_and_params = OrderedDict({
|
|||
'no_xformers',
|
||||
'no_sdpa',
|
||||
'exllamav2_info',
|
||||
'model_draft',
|
||||
'draft_max',
|
||||
'ctx_size_draft',
|
||||
'speculative_decoding_accordion',
|
||||
],
|
||||
'HQQ': [
|
||||
'hqq_backend',
|
||||
|
@ -83,7 +96,7 @@ loaders_and_params = OrderedDict({
|
|||
'no_use_fast',
|
||||
],
|
||||
'TensorRT-LLM': [
|
||||
'max_seq_len',
|
||||
'ctx_size',
|
||||
'cpp_runner',
|
||||
'tensorrt_llm_info',
|
||||
]
|
||||
|
|
|
@ -52,10 +52,8 @@ def load_model(model_name, loader=None):
|
|||
tokenizer = load_tokenizer(model_name)
|
||||
|
||||
shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings})
|
||||
if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt'):
|
||||
shared.settings['truncation_length'] = shared.args.max_seq_len
|
||||
elif loader == 'llama.cpp':
|
||||
shared.settings['truncation_length'] = shared.args.n_ctx
|
||||
if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt') or loader == 'llama.cpp':
|
||||
shared.settings['truncation_length'] = shared.args.ctx_size
|
||||
|
||||
logger.info(f"Loaded \"{model_name}\" in {(time.time()-t0):.2f} seconds.")
|
||||
logger.info(f"LOADER: \"{loader}\"")
|
||||
|
|
|
@ -11,8 +11,7 @@ def get_fallback_settings():
|
|||
return {
|
||||
'bf16': False,
|
||||
'use_eager_attention': False,
|
||||
'max_seq_len': 2048,
|
||||
'n_ctx': 2048,
|
||||
'ctx_size': 2048,
|
||||
'rope_freq_base': 0,
|
||||
'compress_pos_emb': 1,
|
||||
'alpha_value': 1,
|
||||
|
@ -26,7 +25,7 @@ def get_fallback_settings():
|
|||
def get_model_metadata(model):
|
||||
model_settings = {}
|
||||
|
||||
# Get settings from models/config.yaml and models/config-user.yaml
|
||||
# Get settings from user_data/models/config.yaml and user_data/models/config-user.yaml
|
||||
settings = shared.model_config
|
||||
for pat in settings:
|
||||
if re.match(pat.lower(), Path(model).name.lower()):
|
||||
|
@ -59,7 +58,7 @@ def get_model_metadata(model):
|
|||
|
||||
for k in metadata:
|
||||
if k.endswith('context_length'):
|
||||
model_settings['n_ctx'] = min(metadata[k], 8192)
|
||||
model_settings['ctx_size'] = min(metadata[k], 8192)
|
||||
model_settings['truncation_length_info'] = metadata[k]
|
||||
elif k.endswith('rope.freq_base'):
|
||||
model_settings['rope_freq_base'] = metadata[k]
|
||||
|
@ -97,7 +96,7 @@ def get_model_metadata(model):
|
|||
if k in metadata:
|
||||
model_settings['truncation_length'] = metadata[k]
|
||||
model_settings['truncation_length_info'] = metadata[k]
|
||||
model_settings['max_seq_len'] = min(metadata[k], 8192)
|
||||
model_settings['ctx_size'] = min(metadata[k], 8192)
|
||||
|
||||
if 'rope_theta' in metadata:
|
||||
model_settings['rope_freq_base'] = metadata['rope_theta']
|
||||
|
@ -145,7 +144,7 @@ def get_model_metadata(model):
|
|||
if 'rope_freq_base' in model_settings and model_settings['rope_freq_base'] == 10000:
|
||||
model_settings.pop('rope_freq_base')
|
||||
|
||||
# Apply user settings from models/config-user.yaml
|
||||
# Apply user settings from user_data/models/config-user.yaml
|
||||
settings = shared.user_config
|
||||
for pat in settings:
|
||||
if re.match(pat.lower(), Path(model).name.lower()):
|
||||
|
@ -224,7 +223,7 @@ def apply_model_settings_to_state(model, state):
|
|||
|
||||
def save_model_settings(model, state):
|
||||
'''
|
||||
Save the settings for this model to models/config-user.yaml
|
||||
Save the settings for this model to user_data/models/config-user.yaml
|
||||
'''
|
||||
if model == 'None':
|
||||
yield ("Not saving the settings because no model is selected in the menu.")
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
from pathlib import Path
|
||||
|
||||
from modules.logging_colors import logger
|
||||
|
||||
if Path('../webui.py').exists():
|
||||
logger.warning('\nIt looks like you are running an outdated version of '
|
||||
'the one-click-installers.\n'
|
||||
'Please migrate your installation following the instructions here:\n'
|
||||
'https://github.com/oobabooga/text-generation-webui/wiki/Migrating-an-old-one%E2%80%90click-install')
|
|
@ -58,7 +58,7 @@ def presets_params():
|
|||
def load_preset(name, verbose=False):
|
||||
generate_params = default_preset()
|
||||
if name not in ['None', None, '']:
|
||||
path = Path(f'presets/{name}.yaml')
|
||||
path = Path(f'user_data/presets/{name}.yaml')
|
||||
if path.exists():
|
||||
with open(path, 'r') as infile:
|
||||
preset = yaml.safe_load(infile)
|
||||
|
|
|
@ -7,7 +7,7 @@ def load_prompt(fname):
|
|||
if fname in ['None', '']:
|
||||
return ''
|
||||
else:
|
||||
file_path = Path(f'prompts/{fname}.txt')
|
||||
file_path = Path(f'user_data/prompts/{fname}.txt')
|
||||
if not file_path.exists():
|
||||
return ''
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import argparse
|
||||
import copy
|
||||
import os
|
||||
import shlex
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
from pathlib import Path
|
||||
|
@ -31,7 +32,7 @@ need_restart = False
|
|||
settings = {
|
||||
'show_controls': True,
|
||||
'start_with': '',
|
||||
'mode': 'chat-instruct',
|
||||
'mode': 'instruct',
|
||||
'chat_style': 'cai-chat',
|
||||
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
|
||||
'prompt-default': 'QA',
|
||||
|
@ -57,7 +58,6 @@ settings = {
|
|||
'seed': -1,
|
||||
'custom_stopping_strings': '',
|
||||
'custom_token_bans': '',
|
||||
'show_after': '',
|
||||
'negative_prompt': '',
|
||||
'autoload_model': False,
|
||||
'dark_theme': True,
|
||||
|
@ -77,10 +77,10 @@ group.add_argument('--multi-user', action='store_true', help='Multi-user mode. C
|
|||
group.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
|
||||
group.add_argument('--model', type=str, help='Name of the model to load by default.')
|
||||
group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
|
||||
group.add_argument('--model-dir', type=str, default='models/', help='Path to directory with all the models.')
|
||||
group.add_argument('--lora-dir', type=str, default='loras/', help='Path to directory with all the loras.')
|
||||
group.add_argument('--model-dir', type=str, default='user_data/models', help='Path to directory with all the models.')
|
||||
group.add_argument('--lora-dir', type=str, default='user_data/loras', help='Path to directory with all the loras.')
|
||||
group.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.')
|
||||
group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
|
||||
group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See user_data/settings-template.yaml for an example. If you create a file called user_data/settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
|
||||
group.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
|
||||
group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
|
||||
group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.')
|
||||
|
@ -94,7 +94,7 @@ group = parser.add_argument_group('Transformers/Accelerate')
|
|||
group.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')
|
||||
group.add_argument('--cpu-memory', type=float, default=0, help='Maximum CPU memory in GiB. Use this for CPU offloading.')
|
||||
group.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')
|
||||
group.add_argument('--disk-cache-dir', type=str, default='cache', help='Directory to save the disk cache to. Defaults to "cache".')
|
||||
group.add_argument('--disk-cache-dir', type=str, default='user_data/cache', help='Directory to save the disk cache to. Defaults to "user_data/cache".')
|
||||
group.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision (using bitsandbytes).')
|
||||
group.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
|
||||
group.add_argument('--no-cache', action='store_true', help='Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.')
|
||||
|
@ -115,10 +115,9 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for
|
|||
# llama.cpp
|
||||
group = parser.add_argument_group('llama.cpp')
|
||||
group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.')
|
||||
group.add_argument('--n_ctx', type=int, default=8192, help='Size of the prompt context.')
|
||||
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
|
||||
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
|
||||
group.add_argument('--batch-size', type=int, default=2048, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
|
||||
group.add_argument('--batch-size', type=int, default=256, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
|
||||
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
|
||||
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
||||
group.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
|
||||
|
@ -126,17 +125,31 @@ group.add_argument('--tensor-split', type=str, default=None, help='Split the mod
|
|||
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
|
||||
group.add_argument('--no-kv-offload', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
||||
group.add_argument('--row-split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||
group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"')
|
||||
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
|
||||
# Cache
|
||||
group = parser.add_argument_group('Context and cache management')
|
||||
group.add_argument('--ctx-size', '--n_ctx', '--max_seq_len', type=int, default=8192, metavar='N', help='Context size in tokens.')
|
||||
group.add_argument('--cache_type', type=str, default='fp16', help='KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits separately, e.g. q4_q8).')
|
||||
|
||||
# Speculative decoding
|
||||
group = parser.add_argument_group('Speculative decoding')
|
||||
group.add_argument('--model-draft', type=str, default=None, help='Path to the draft model for speculative decoding.')
|
||||
group.add_argument('--draft-max', type=int, default=4, help='Number of tokens to draft for speculative decoding.')
|
||||
group.add_argument('--gpu-layers-draft', type=int, default=256, help='Number of layers to offload to the GPU for the draft model.')
|
||||
group.add_argument('--device-draft', type=str, default=None, help='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
|
||||
group.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
|
||||
|
||||
# ExLlamaV2
|
||||
group = parser.add_argument_group('ExLlamaV2')
|
||||
group.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.')
|
||||
group.add_argument('--autosplit', action='store_true', help='Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.')
|
||||
group.add_argument('--max_seq_len', type=int, default=8192, help='Maximum sequence length.')
|
||||
group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.')
|
||||
group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.')
|
||||
group.add_argument('--no_xformers', action='store_true', help='Force xformers to not be used.')
|
||||
group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to not be used.')
|
||||
group.add_argument('--num_experts_per_token', type=int, default=2, help='Number of experts to use for generation. Applies to MoE models like Mixtral.')
|
||||
group.add_argument('--num_experts_per_token', type=int, default=2, metavar='N', help='Number of experts to use for generation. Applies to MoE models like Mixtral.')
|
||||
group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.')
|
||||
|
||||
# HQQ
|
||||
|
@ -192,12 +205,36 @@ group.add_argument('--nowebui', action='store_true', help='Do not launch the Gra
|
|||
# Deprecated parameters
|
||||
group = parser.add_argument_group('Deprecated')
|
||||
|
||||
# Handle CMD_FLAGS.txt
|
||||
cmd_flags_path = Path(__file__).parent.parent / "user_data" / "CMD_FLAGS.txt"
|
||||
if cmd_flags_path.exists():
|
||||
with cmd_flags_path.open('r', encoding='utf-8') as f:
|
||||
cmd_flags = ' '.join(
|
||||
line.strip().rstrip('\\').strip()
|
||||
for line in f
|
||||
if line.strip().rstrip('\\').strip() and not line.strip().startswith('#')
|
||||
)
|
||||
|
||||
if cmd_flags:
|
||||
# Command-line takes precedence over CMD_FLAGS.txt
|
||||
sys.argv = [sys.argv[0]] + shlex.split(cmd_flags) + sys.argv[1:]
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
args_defaults = parser.parse_args([])
|
||||
|
||||
# Create a mapping of all argument aliases to their canonical names
|
||||
alias_to_dest = {}
|
||||
for action in parser._actions:
|
||||
for opt in action.option_strings:
|
||||
alias_to_dest[opt.lstrip('-').replace('-', '_')] = action.dest
|
||||
|
||||
provided_arguments = []
|
||||
for arg in sys.argv[1:]:
|
||||
arg = arg.lstrip('-').replace('-', '_')
|
||||
if hasattr(args, arg):
|
||||
if arg in alias_to_dest:
|
||||
provided_arguments.append(alias_to_dest[arg])
|
||||
elif hasattr(args, arg):
|
||||
provided_arguments.append(arg)
|
||||
|
||||
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
from pathlib import Path
|
||||
|
||||
import tensorrt_llm
|
||||
import torch
|
||||
from tensorrt_llm.runtime import ModelRunner, ModelRunnerCpp
|
||||
|
||||
import tensorrt_llm
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import (
|
||||
get_max_prompt_length,
|
||||
get_reply_from_output_ids
|
||||
)
|
||||
from tensorrt_llm.runtime import ModelRunner, ModelRunnerCpp
|
||||
|
||||
|
||||
class TensorRTLLMModel:
|
||||
|
@ -35,7 +35,7 @@ class TensorRTLLMModel:
|
|||
logger.info("TensorRT-LLM: Using \"ModelRunnerCpp\"")
|
||||
runner_kwargs.update(
|
||||
max_batch_size=1,
|
||||
max_input_len=shared.args.max_seq_len - 512,
|
||||
max_input_len=shared.args.ctx_size - 512,
|
||||
max_output_len=512,
|
||||
max_beam_width=1,
|
||||
max_attention_window_size=None,
|
||||
|
|
|
@ -264,6 +264,11 @@ def apply_stopping_strings(reply, all_stop_strings):
|
|||
|
||||
|
||||
def get_reply_from_output_ids(output_ids, state=None, starting_from=0):
|
||||
import torch
|
||||
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
|
||||
reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True)
|
||||
|
||||
# Handle tokenizers that do not add the leading space for the first token
|
||||
|
|
|
@ -52,7 +52,7 @@ def create_ui():
|
|||
with gr.Column():
|
||||
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
|
||||
|
||||
with gr.Accordion(label='Target Modules', open=False):
|
||||
with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
|
||||
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM requirements and adapter size.\nNOTE: Only works for model_id='llama', other types will retain default training behavior and not use these settings.")
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
|
@ -86,7 +86,7 @@ def create_ui():
|
|||
with gr.Row():
|
||||
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
|
||||
|
||||
with gr.Accordion(label='Advanced Options', open=False):
|
||||
with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')
|
||||
|
@ -106,23 +106,23 @@ def create_ui():
|
|||
with gr.Column():
|
||||
with gr.Tab(label='Formatted Dataset'):
|
||||
with gr.Row():
|
||||
format = gr.Dropdown(choices=utils.get_datasets('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('training/formats', 'json')}, 'refresh-button', interactive=not mu)
|
||||
format = gr.Dropdown(choices=utils.get_datasets('user_data/training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/formats', 'json')}, 'refresh-button', interactive=not mu)
|
||||
|
||||
with gr.Row():
|
||||
dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button', interactive=not mu)
|
||||
dataset = gr.Dropdown(choices=utils.get_datasets('user_data/training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/datasets', 'json')}, 'refresh-button', interactive=not mu)
|
||||
|
||||
with gr.Row():
|
||||
eval_dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button', interactive=not mu)
|
||||
eval_dataset = gr.Dropdown(choices=utils.get_datasets('user_data/training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/datasets', 'json')}, 'refresh-button', interactive=not mu)
|
||||
|
||||
eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.')
|
||||
|
||||
with gr.Tab(label="Raw text file"):
|
||||
with gr.Row():
|
||||
raw_text_file = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'txt')}, 'refresh-button', interactive=not mu)
|
||||
raw_text_file = gr.Dropdown(choices=utils.get_datasets('user_data/training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
ui.create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/datasets', 'txt')}, 'refresh-button', interactive=not mu)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
|
@ -143,7 +143,7 @@ def create_ui():
|
|||
with gr.Row():
|
||||
with gr.Column():
|
||||
models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True, interactive=not mu)
|
||||
evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under training/datasets.', interactive=not mu)
|
||||
evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('user_data/training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under user_data/training/datasets.', interactive=not mu)
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
|
||||
|
@ -402,7 +402,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
|
|||
if raw_text_file not in ['None', '']:
|
||||
train_template["template_type"] = "raw_text"
|
||||
logger.info("Loading raw text file dataset")
|
||||
fullpath = clean_path('training/datasets', f'{raw_text_file}')
|
||||
fullpath = clean_path('user_data/training/datasets', f'{raw_text_file}')
|
||||
fullpath = Path(fullpath)
|
||||
if fullpath.is_dir():
|
||||
logger.info('Training path directory {}'.format(raw_text_file))
|
||||
|
@ -415,7 +415,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
|
|||
|
||||
logger.info(f"Loaded training file: {file_path.name}")
|
||||
else:
|
||||
with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
|
||||
with open(clean_path('user_data/training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
|
||||
raw_text = file.read().replace('\r', '')
|
||||
|
||||
cut_string = hard_cut_string.replace('\\n', '\n')
|
||||
|
@ -460,7 +460,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
|
|||
|
||||
train_template["template_type"] = "dataset"
|
||||
|
||||
with open(clean_path('training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
|
||||
with open(clean_path('user_data/training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
|
||||
format_data: dict[str, str] = json.load(formatFile)
|
||||
|
||||
# == store training prompt ==
|
||||
|
@ -482,13 +482,13 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
|
|||
return tokenize(prompt, add_eos_token)
|
||||
|
||||
logger.info("Loading JSON datasets")
|
||||
data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
|
||||
data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json'))
|
||||
train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
|
||||
|
||||
if eval_dataset == 'None':
|
||||
eval_data = None
|
||||
else:
|
||||
eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
|
||||
eval_data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{eval_dataset}.json'))
|
||||
eval_data = eval_data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
|
||||
|
||||
# == We MUST reload model if it went through any previous training, even failed one ==
|
||||
|
@ -676,11 +676,11 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
|
|||
decoded_entries.append({"value": decoded_text})
|
||||
|
||||
# Write the log file
|
||||
Path('logs').mkdir(exist_ok=True)
|
||||
with open(Path('logs/train_dataset_sample.json'), 'w') as json_file:
|
||||
Path('user_data/logs').mkdir(exist_ok=True)
|
||||
with open(Path('user_data/logs/train_dataset_sample.json'), 'w') as json_file:
|
||||
json.dump(decoded_entries, json_file, indent=4)
|
||||
|
||||
logger.info("Log file 'train_dataset_sample.json' created in the 'logs' directory.")
|
||||
logger.info("Log file 'train_dataset_sample.json' created in the 'user_data/logs' directory.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create log file due to error: {e}")
|
||||
|
||||
|
|
|
@ -249,7 +249,7 @@ def load_model_HF(model_name):
|
|||
)
|
||||
|
||||
if shared.args.disk:
|
||||
params['offload_folder'] = shared.args.disk_cache_dir
|
||||
params['offload_folder'] = str(Path(shared.args.disk_cache_dir))
|
||||
|
||||
if shared.args.compress_pos_emb > 1:
|
||||
params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb}
|
||||
|
|
|
@ -94,7 +94,7 @@ if not shared.args.old_colors:
|
|||
input_radius='0.375rem',
|
||||
)
|
||||
|
||||
if Path("notification.mp3").exists():
|
||||
if Path("user_data/notification.mp3").exists():
|
||||
audio_notification_js = "document.querySelector('#audio_notification audio')?.play();"
|
||||
else:
|
||||
audio_notification_js = ""
|
||||
|
@ -110,10 +110,10 @@ def list_model_elements():
|
|||
'threads_batch',
|
||||
'batch_size',
|
||||
'hqq_backend',
|
||||
'n_ctx',
|
||||
'max_seq_len',
|
||||
'ctx_size',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
'extra_flags',
|
||||
'gpu_split',
|
||||
'alpha_value',
|
||||
'rope_freq_base',
|
||||
|
@ -145,6 +145,11 @@ def list_model_elements():
|
|||
'cpp_runner',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
'model_draft',
|
||||
'draft_max',
|
||||
'gpu_layers_draft',
|
||||
'device_draft',
|
||||
'ctx_size_draft',
|
||||
]
|
||||
|
||||
return elements
|
||||
|
@ -201,7 +206,6 @@ def list_interface_input_elements():
|
|||
'sampler_priority',
|
||||
'custom_stopping_strings',
|
||||
'custom_token_bans',
|
||||
'show_after',
|
||||
'negative_prompt',
|
||||
'dry_sequence_breakers',
|
||||
'grammar_string',
|
||||
|
@ -262,7 +266,7 @@ def apply_interface_values(state, use_persistent=False):
|
|||
if 'textbox-default' in state and 'prompt_menu-default' in state:
|
||||
state.pop('prompt_menu-default')
|
||||
|
||||
if 'textbox-notebook' and 'prompt_menu-notebook' in state:
|
||||
if 'textbox-notebook' in state and 'prompt_menu-notebook' in state:
|
||||
state.pop('prompt_menu-notebook')
|
||||
|
||||
elements = list_interface_input_elements()
|
||||
|
|
|
@ -88,7 +88,7 @@ def create_ui():
|
|||
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
|
||||
shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
|
||||
|
@ -146,7 +146,7 @@ def create_chat_settings_ui():
|
|||
|
||||
with gr.Column(scale=1):
|
||||
shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil', interactive=not mu)
|
||||
shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None, interactive=not mu)
|
||||
shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('user_data/cache/pfp_me.png')) if Path('user_data/cache/pfp_me.png').exists() else None, interactive=not mu)
|
||||
|
||||
with gr.Tab('Instruction template'):
|
||||
with gr.Row():
|
||||
|
|
|
@ -102,7 +102,7 @@ def handle_save_prompt(text):
|
|||
return [
|
||||
text,
|
||||
utils.current_time() + ".txt",
|
||||
"prompts/",
|
||||
"user_data/prompts/",
|
||||
gr.update(visible=True)
|
||||
]
|
||||
|
||||
|
@ -110,6 +110,6 @@ def handle_save_prompt(text):
|
|||
def handle_delete_prompt(prompt):
|
||||
return [
|
||||
prompt + ".txt",
|
||||
"prompts/",
|
||||
"user_data/prompts/",
|
||||
gr.update(visible=True)
|
||||
]
|
||||
|
|
|
@ -28,7 +28,7 @@ def create_ui():
|
|||
|
||||
# Character saver/deleter
|
||||
with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']:
|
||||
shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your characters/ folder with this base filename.')
|
||||
shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your user_data/characters folder with this base filename.')
|
||||
with gr.Row():
|
||||
shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes="small-button")
|
||||
shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes="small-button", variant='primary', interactive=not mu)
|
||||
|
@ -41,7 +41,7 @@ def create_ui():
|
|||
|
||||
# Preset saver
|
||||
with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['preset_saver']:
|
||||
shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info='The preset will be saved to your presets/ folder with this base filename.')
|
||||
shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info='The preset will be saved to your user_data/presets folder with this base filename.')
|
||||
shared.gradio['save_preset_contents'] = gr.Textbox(lines=10, label='File contents')
|
||||
with gr.Row():
|
||||
shared.gradio['save_preset_cancel'] = gr.Button('Cancel', elem_classes="small-button")
|
||||
|
@ -72,7 +72,7 @@ def create_event_handlers():
|
|||
|
||||
def handle_save_preset_confirm_click(filename, contents):
|
||||
try:
|
||||
utils.save_file(f"presets/{filename}.yaml", contents)
|
||||
utils.save_file(f"user_data/presets/{filename}.yaml", contents)
|
||||
available_presets = utils.get_available_presets()
|
||||
output = gr.update(choices=available_presets, value=filename)
|
||||
except Exception:
|
||||
|
@ -145,7 +145,7 @@ def handle_save_preset_click(state):
|
|||
def handle_delete_preset_click(preset):
|
||||
return [
|
||||
f"{preset}.yaml",
|
||||
"presets/",
|
||||
"user_data/presets/",
|
||||
gr.update(visible=True)
|
||||
]
|
||||
|
||||
|
@ -154,7 +154,7 @@ def handle_save_grammar_click(grammar_string):
|
|||
return [
|
||||
grammar_string,
|
||||
"My Fancy Grammar.gbnf",
|
||||
"grammars/",
|
||||
"user_data/grammars/",
|
||||
gr.update(visible=True)
|
||||
]
|
||||
|
||||
|
@ -162,6 +162,6 @@ def handle_save_grammar_click(grammar_string):
|
|||
def handle_delete_grammar_click(grammar_file):
|
||||
return [
|
||||
grammar_file,
|
||||
"grammars/",
|
||||
"user_data/grammars/",
|
||||
gr.update(visible=True)
|
||||
]
|
||||
|
|
|
@ -51,11 +51,11 @@ def create_ui():
|
|||
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
|
||||
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
|
||||
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
|
||||
shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
|
||||
shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
|
||||
shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
|
||||
shared.gradio['ctx_size'] = gr.Number(label='ctx_size', precision=0, step=256, value=shared.args.ctx_size, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768, 65536.')
|
||||
shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
|
||||
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
|
||||
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
||||
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
|
||||
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
|
||||
shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
|
||||
shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
|
||||
|
@ -70,6 +70,7 @@ def create_ui():
|
|||
shared.gradio['torch_compile'] = gr.Checkbox(label="torch-compile", value=shared.args.torch_compile, info='Compile the model with torch.compile for improved performance.')
|
||||
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
|
||||
shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
|
||||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
|
||||
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
||||
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||
|
@ -90,7 +91,18 @@ def create_ui():
|
|||
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Set trust_remote_code=True while loading the tokenizer/model. To enable this option, start the web UI with the --trust-remote-code flag.', interactive=shared.args.trust_remote_code)
|
||||
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
|
||||
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `max_seq_len` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
|
||||
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
|
||||
|
||||
# Speculative decoding
|
||||
with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
|
||||
with gr.Row():
|
||||
shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', interactive=not mu)
|
||||
ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
|
||||
|
||||
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding.')
|
||||
shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
|
||||
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
|
||||
shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
|
||||
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
|
@ -211,9 +223,9 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
|
|||
model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None
|
||||
)
|
||||
|
||||
if output_folder == Path("models"):
|
||||
if output_folder == Path("user_data/models"):
|
||||
output_folder = Path(shared.args.model_dir)
|
||||
elif output_folder == Path("loras"):
|
||||
elif output_folder == Path("user_data/loras"):
|
||||
output_folder = Path(shared.args.lora_dir)
|
||||
|
||||
if check:
|
||||
|
@ -234,10 +246,8 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
|
|||
|
||||
def update_truncation_length(current_length, state):
|
||||
if 'loader' in state:
|
||||
if state['loader'].lower().startswith('exllama'):
|
||||
return state['max_seq_len']
|
||||
elif state['loader'] == 'llama.cpp':
|
||||
return state['n_ctx']
|
||||
if state['loader'].lower().startswith('exllama') or state['loader'] == 'llama.cpp':
|
||||
return state['ctx_size']
|
||||
|
||||
return current_length
|
||||
|
||||
|
|
|
@ -93,7 +93,6 @@ def create_ui(default_preset):
|
|||
shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
|
||||
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
|
||||
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
|
||||
shared.gradio['show_after'] = gr.Textbox(value=shared.settings['show_after'] or None, label='Show after', info='Hide the reply before this text.', placeholder="</think>")
|
||||
shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
|
||||
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
|
||||
with gr.Row() as shared.gradio['grammar_file_row']:
|
||||
|
@ -122,16 +121,14 @@ def create_event_handlers():
|
|||
|
||||
|
||||
def get_truncation_length():
|
||||
if 'max_seq_len' in shared.provided_arguments or shared.args.max_seq_len != shared.args_defaults.max_seq_len:
|
||||
return shared.args.max_seq_len
|
||||
elif 'n_ctx' in shared.provided_arguments or shared.args.n_ctx != shared.args_defaults.n_ctx:
|
||||
return shared.args.n_ctx
|
||||
if 'ctx_size' in shared.provided_arguments or shared.args.ctx_size != shared.args_defaults.ctx_size:
|
||||
return shared.args.ctx_size
|
||||
else:
|
||||
return shared.settings['truncation_length']
|
||||
|
||||
|
||||
def load_grammar(name):
|
||||
p = Path(f'grammars/{name}')
|
||||
p = Path(f'user_data/grammars/{name}')
|
||||
if p.exists():
|
||||
return open(p, 'r', encoding='utf-8').read()
|
||||
else:
|
||||
|
|
|
@ -13,7 +13,7 @@ def create_ui():
|
|||
shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
|
||||
with gr.Row():
|
||||
shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
|
||||
shared.gradio['save_settings'] = gr.Button('Save UI defaults to settings.yaml', interactive=not mu)
|
||||
shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
|
@ -48,7 +48,7 @@ def handle_save_settings(state, preset, extensions, show_controls, theme):
|
|||
return [
|
||||
contents,
|
||||
"settings.yaml",
|
||||
"./",
|
||||
"user_data/",
|
||||
gr.update(visible=True)
|
||||
]
|
||||
|
||||
|
|
|
@ -76,44 +76,54 @@ def get_available_models():
|
|||
# Get all GGUF files
|
||||
gguf_files = get_available_ggufs()
|
||||
|
||||
# Filter out non-first parts of multipart GGUF files
|
||||
filtered_gguf_files = []
|
||||
for gguf_path in gguf_files:
|
||||
filename = os.path.basename(gguf_path)
|
||||
|
||||
match = re.search(r'-(\d+)-of-\d+\.gguf$', filename)
|
||||
|
||||
if match:
|
||||
part_number = match.group(1)
|
||||
# Keep only if it's part 1
|
||||
if part_number.lstrip("0") == "1":
|
||||
filtered_gguf_files.append(gguf_path)
|
||||
else:
|
||||
# Not a multi-part file
|
||||
filtered_gguf_files.append(gguf_path)
|
||||
|
||||
model_dir = Path(shared.args.model_dir)
|
||||
|
||||
# Find top-level directories containing GGUF files
|
||||
dirs_with_gguf = set()
|
||||
for gguf_path in gguf_files:
|
||||
path = Path(gguf_path)
|
||||
if path.parts: # If in a subdirectory
|
||||
dirs_with_gguf.add(path.parts[0]) # Add top-level directory
|
||||
if len(path.parts) > 0:
|
||||
dirs_with_gguf.add(path.parts[0])
|
||||
|
||||
# Find directories with safetensors files directly under them
|
||||
# Find directories with safetensors files
|
||||
dirs_with_safetensors = set()
|
||||
for item in os.listdir(model_dir):
|
||||
item_path = model_dir / item
|
||||
if item_path.is_dir():
|
||||
# Check if there are safetensors files directly under this directory
|
||||
if any(file.lower().endswith(('.safetensors', '.pt')) for file in os.listdir(item_path) if (item_path / file).is_file()):
|
||||
dirs_with_safetensors.add(item)
|
||||
|
||||
# Find valid model directories
|
||||
model_dirs = []
|
||||
|
||||
for item in os.listdir(model_dir):
|
||||
item_path = model_dir / item
|
||||
|
||||
# Skip if not a directory
|
||||
if not item_path.is_dir():
|
||||
continue
|
||||
|
||||
# Include directory if it either:
|
||||
# 1. Doesn't contain GGUF files, OR
|
||||
# 2. Contains both GGUF and safetensors files
|
||||
# Include directory if it either doesn't contain GGUF files
|
||||
# or contains both GGUF and safetensors files
|
||||
if item not in dirs_with_gguf or item in dirs_with_safetensors:
|
||||
model_dirs.append(item)
|
||||
|
||||
model_dirs = sorted(model_dirs, key=natural_keys)
|
||||
|
||||
# Combine all models
|
||||
return ['None'] + gguf_files + model_dirs
|
||||
return ['None'] + filtered_gguf_files + model_dirs
|
||||
|
||||
|
||||
def get_available_ggufs():
|
||||
|
@ -131,11 +141,11 @@ def get_available_ggufs():
|
|||
|
||||
|
||||
def get_available_presets():
|
||||
return sorted(set((k.stem for k in Path('presets').glob('*.yaml'))), key=natural_keys)
|
||||
return sorted(set((k.stem for k in Path('user_data/presets').glob('*.yaml'))), key=natural_keys)
|
||||
|
||||
|
||||
def get_available_prompts():
|
||||
prompt_files = list(Path('prompts').glob('*.txt'))
|
||||
prompt_files = list(Path('user_data/prompts').glob('*.txt'))
|
||||
sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True)
|
||||
prompts = [file.stem for file in sorted_files]
|
||||
prompts.append('None')
|
||||
|
@ -143,12 +153,12 @@ def get_available_prompts():
|
|||
|
||||
|
||||
def get_available_characters():
|
||||
paths = (x for x in Path('characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
|
||||
paths = (x for x in Path('user_data/characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
|
||||
return sorted(set((k.stem for k in paths)), key=natural_keys)
|
||||
|
||||
|
||||
def get_available_instruction_templates():
|
||||
path = "instruction-templates"
|
||||
path = "user_data/instruction-templates"
|
||||
paths = []
|
||||
if os.path.exists(path):
|
||||
paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
|
||||
|
@ -179,4 +189,4 @@ def get_available_chat_styles():
|
|||
|
||||
|
||||
def get_available_grammars():
|
||||
return ['None'] + sorted([item.name for item in list(Path('grammars').glob('*.gbnf'))], key=natural_keys)
|
||||
return ['None'] + sorted([item.name for item in list(Path('user_data/grammars').glob('*.gbnf'))], key=natural_keys)
|
||||
|
|
14
one_click.py
14
one_click.py
|
@ -28,14 +28,7 @@ conda_env_path = os.path.join(script_dir, "installer_files", "env")
|
|||
state_file = '.installer_state.json'
|
||||
|
||||
# Command-line flags
|
||||
cmd_flags_path = os.path.join(script_dir, "CMD_FLAGS.txt")
|
||||
if os.path.exists(cmd_flags_path):
|
||||
with open(cmd_flags_path, 'r') as f:
|
||||
CMD_FLAGS = ' '.join(line.strip().rstrip('\\').strip() for line in f if line.strip().rstrip('\\').strip() and not line.strip().startswith('#'))
|
||||
else:
|
||||
CMD_FLAGS = ''
|
||||
|
||||
flags = f"{' '.join([flag for flag in sys.argv[1:] if flag != '--update-wizard'])} {CMD_FLAGS}"
|
||||
flags = f"{' '.join([flag for flag in sys.argv[1:] if flag != '--update-wizard'])}"
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
|
@ -300,9 +293,10 @@ def install_webui():
|
|||
|
||||
# Write a flag to CMD_FLAGS.txt for CPU mode
|
||||
if selected_gpu == "NONE":
|
||||
cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt")
|
||||
with open(cmd_flags_path, 'r+') as cmd_flags_file:
|
||||
if "--cpu" not in cmd_flags_file.read():
|
||||
print_big_message("Adding the --cpu flag to CMD_FLAGS.txt.")
|
||||
print_big_message("Adding the --cpu flag to user_data/CMD_FLAGS.txt.")
|
||||
cmd_flags_file.write("\n--cpu\n")
|
||||
|
||||
# Handle CUDA version display
|
||||
|
@ -538,7 +532,7 @@ if __name__ == "__main__":
|
|||
flags_list = re.split(' +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)|=', flags)
|
||||
model_dir = [flags_list[(flags_list.index(flag) + 1)] for flag in flags_list if flag == '--model-dir'][0].strip('"\'')
|
||||
else:
|
||||
model_dir = 'models'
|
||||
model_dir = 'user_data/models'
|
||||
|
||||
if len([item for item in glob.glob(f'{model_dir}/*') if not item.endswith(('.txt', '.yaml'))]) == 0:
|
||||
print_big_message("You haven't downloaded any model yet.\nOnce the web UI launches, head over to the \"Model\" tab and download one.")
|
||||
|
|
|
@ -30,12 +30,12 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
|
|
@ -29,6 +29,6 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
@ -29,6 +29,6 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
@ -29,7 +29,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3-py3-none-any.whl
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4-py3-none-any.whl
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
|
||||
|
|
|
@ -29,8 +29,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3-py3-none-any.whl
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4-py3-none-any.whl
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
|
||||
|
|
|
@ -29,5 +29,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
@ -29,5 +29,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
@ -30,12 +30,12 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
|
|
@ -15,5 +15,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -15,4 +15,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -15,4 +15,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -15,5 +15,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
@ -15,6 +15,6 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
|
|
|
@ -15,5 +15,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
@ -15,5 +15,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
@ -15,5 +15,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -15,5 +15,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkan-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkan-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
@ -15,5 +15,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkanavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkanavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
19
server.py
19
server.py
|
@ -1,7 +1,6 @@
|
|||
import os
|
||||
import warnings
|
||||
|
||||
import modules.one_click_installer_check
|
||||
from modules import shared
|
||||
from modules.block_requests import OpenMonkeyPatch, RequestBlocker
|
||||
from modules.logging_colors import logger
|
||||
|
@ -94,8 +93,8 @@ def create_interface():
|
|||
'filter_by_loader': shared.args.loader or 'All'
|
||||
})
|
||||
|
||||
if Path("cache/pfp_character.png").exists():
|
||||
Path("cache/pfp_character.png").unlink()
|
||||
if Path("user_data/cache/pfp_character.png").exists():
|
||||
Path("user_data/cache/pfp_character.png").unlink()
|
||||
|
||||
# css/js strings
|
||||
css = ui.css
|
||||
|
@ -112,8 +111,8 @@ def create_interface():
|
|||
shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements})
|
||||
|
||||
# Audio notification
|
||||
if Path("notification.mp3").exists():
|
||||
shared.gradio['audio_notification'] = gr.Audio(interactive=False, value="notification.mp3", elem_id="audio_notification", visible=False)
|
||||
if Path("user_data/notification.mp3").exists():
|
||||
shared.gradio['audio_notification'] = gr.Audio(interactive=False, value="user_data/notification.mp3", elem_id="audio_notification", visible=False)
|
||||
|
||||
# Floating menus for saving/deleting files
|
||||
ui_file_saving.create_ui()
|
||||
|
@ -179,7 +178,7 @@ def create_interface():
|
|||
ssl_keyfile=shared.args.ssl_keyfile,
|
||||
ssl_certfile=shared.args.ssl_certfile,
|
||||
root_path=shared.args.subpath,
|
||||
allowed_paths=["cache", "css", "extensions", "js"]
|
||||
allowed_paths=["css", "js", "extensions", "user_data/cache"]
|
||||
)
|
||||
|
||||
|
||||
|
@ -192,10 +191,10 @@ if __name__ == "__main__":
|
|||
settings_file = None
|
||||
if shared.args.settings is not None and Path(shared.args.settings).exists():
|
||||
settings_file = Path(shared.args.settings)
|
||||
elif Path('settings.yaml').exists():
|
||||
settings_file = Path('settings.yaml')
|
||||
elif Path('settings.json').exists():
|
||||
settings_file = Path('settings.json')
|
||||
elif Path('user_data/settings.yaml').exists():
|
||||
settings_file = Path('user_data/settings.yaml')
|
||||
elif Path('user_data/settings.json').exists():
|
||||
settings_file = Path('user_data/settings.json')
|
||||
|
||||
if settings_file is not None:
|
||||
logger.info(f"Loading settings from \"{settings_file}\"")
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
@echo off
|
||||
|
||||
cd /D "%~dp0"
|
||||
|
||||
set PATH=%PATH%;%SystemRoot%\system32
|
||||
|
||||
@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script
|
||||
call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh %*"
|
||||
|
||||
:end
|
||||
pause
|
|
@ -1,11 +0,0 @@
|
|||
@echo off
|
||||
|
||||
cd /D "%~dp0"
|
||||
|
||||
set PATH=%PATH%;%SystemRoot%\system32
|
||||
|
||||
@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script calling wsl.sh with 'update' will run updater
|
||||
call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh update-wizard"
|
||||
|
||||
:end
|
||||
pause
|
3
user_data/CMD_FLAGS.txt
Normal file
3
user_data/CMD_FLAGS.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Add persistent flags here to use every time you launch the web UI.
|
||||
# Example:
|
||||
# --listen --api
|
Before Width: | Height: | Size: 206 KiB After Width: | Height: | Size: 206 KiB |
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue