From 3361728da154ab6ed9b472b529bc06a0a01e633c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 26 Aug 2023 22:24:44 -0700 Subject: [PATCH] Change some comments --- README.md | 16 ++++++++-------- modules/ui_model_menu.py | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f527cd55..77d67fa9 100644 --- a/README.md +++ b/README.md @@ -269,16 +269,16 @@ Optionally, you can use the following command-line flags: #### llama.cpp -| Flag | Description | -|-------------|-------------| -| `--no-mmap` | Prevent mmap from being used. | -| `--mlock` | Force the system to keep the model in RAM. | +| Flag | Description | +|---------------|---------------| +| `--no-mmap` | Prevent mmap from being used. | +| `--mlock` | Force the system to keep the model in RAM. | | `--mul_mat_q` | Activate new mulmat kernels. | | `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. | -| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 | -| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). | -| `--n_gqa N_GQA` | grouped-query attention. Must be 8 for llama-2 70b. | -| `--rms_norm_eps RMS_NORM_EPS` | 5e-6 is a good value for llama-2 models. | +| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 | +| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). | +| `--n_gqa N_GQA` | GGML only (not used by GGUF): Grouped-Query Attention. Must be 8 for llama-2 70b. | +| `--rms_norm_eps RMS_NORM_EPS` | GGML only (not used by GGUF): 5e-6 is a good value for llama-2 models. | | `--cpu` | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. | |`--cfg-cache` | llamacpp_HF: Create an additional cache for CFG negative prompts. | diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index fde2fb38..5c945def 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -80,8 +80,8 @@ def create_ui(): shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=16384, step=256, label="n_ctx", value=shared.args.n_ctx) shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads) shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, value=shared.args.n_batch) - shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='grouped-query attention. Must be 8 for llama-2 70b.') - shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.rms_norm_eps, info='5e-6 is a good value for llama-2 models.') + shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='GGML only (not used by GGUF): Grouped-Query Attention. Must be 8 for llama-2 70b.') + shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.rms_norm_eps, info='GGML only (not used by GGUF): 5e-6 is a good value for llama-2 models.') shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None")