Adding lobe chat support
This commit is contained in:
parent
d385272ab6
commit
ea8b7fccbe
4 changed files with 183 additions and 60 deletions
2
build.pl
2
build.pl
|
@ -37,4 +37,4 @@ for my $file (path("files")->children(qr/\.tmpl/)) {
|
||||||
$output_file->spew_utf8($data);
|
$output_file->spew_utf8($data);
|
||||||
}
|
}
|
||||||
|
|
||||||
data_print($conf->text_gen->models->{general1}->gen_cli($conf))
|
#data_print($conf->text_gen->models->{general1}->gen_cli($conf))
|
||||||
|
|
221
config.toml
221
config.toml
|
@ -6,40 +6,24 @@
|
||||||
# Todo mounts?
|
# Todo mounts?
|
||||||
model_dir = "/app/models/TheBloke" # should this be automatic? part of the top level?
|
model_dir = "/app/models/TheBloke" # should this be automatic? part of the top level?
|
||||||
|
|
||||||
#[text_gen.models.highcontext]
|
[text_gen.models.guard1]
|
||||||
# api = true
|
|
||||||
# auto_devices = true
|
|
||||||
# chat_buttons = true
|
|
||||||
# desc_act = true
|
|
||||||
# disable_exllama = true
|
|
||||||
# extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
|
||||||
# families = ["highcontext"]
|
|
||||||
# gpu_list = []
|
|
||||||
# listen = true
|
|
||||||
# loader = "llamacpp"
|
|
||||||
# memory_split = [0]
|
|
||||||
# model = "Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ"
|
|
||||||
# no_inject_fused_attention = true
|
|
||||||
# no_use_cuda_fp16 = true
|
|
||||||
# trust_remote_code = true
|
|
||||||
|
|
||||||
[text_gen.models.general1]
|
|
||||||
api = true
|
api = true
|
||||||
auto_devices = true
|
auto_devices = true
|
||||||
chat_buttons = true
|
chat_buttons = true
|
||||||
desc_act = true
|
desc_act = true
|
||||||
disable_exllama = true
|
disable_exllama = true
|
||||||
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
extensions = ["api", "openai"] # TODO this way? or flags for each named extension in each one?
|
||||||
families = ["general", "embeddings"]
|
families = ["guard"]
|
||||||
gpu_list = ["3"]
|
gpu_list = ["2"]
|
||||||
listen = true
|
listen = true
|
||||||
loader = "llamacpp"
|
loader = "llamacpp"
|
||||||
memory_split = [20]
|
memory_split = [20]
|
||||||
model = "LlongOrca-13B-16K-GGUF"
|
model = "LlamaGuard-7B-GGUF"
|
||||||
no_inject_fused_attention = true
|
no_inject_fused_attention = true
|
||||||
no_use_cuda_fp16 = true
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
trust_remote_code = true
|
trust_remote_code = true
|
||||||
no_mul_mat_q = true
|
no_mul_mat_q = false
|
||||||
no_mmap = true
|
no_mmap = true
|
||||||
n_gpu_layers = 128
|
n_gpu_layers = 128
|
||||||
numa = true
|
numa = true
|
||||||
|
@ -52,24 +36,49 @@
|
||||||
chat_buttons = true
|
chat_buttons = true
|
||||||
desc_act = true
|
desc_act = true
|
||||||
disable_exllama = true
|
disable_exllama = true
|
||||||
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
extensions = ["api", "openai"] # TODO this way? or flags for each named extension in each one?
|
||||||
families = ["chat", "embeddings"]
|
families = ["chat"]
|
||||||
gpu_list = ["0"]
|
gpu_list = ["0"]
|
||||||
listen = true
|
listen = true
|
||||||
loader = "llamacpp"
|
loader = "llamacpp"
|
||||||
memory_split = [20]
|
memory_split = [20]
|
||||||
model = "Chronos-Hermes-13b-v2-GGUF"
|
model = "dolphin-2.6-mistral-7B-GGUF"
|
||||||
no_inject_fused_attention = true
|
no_inject_fused_attention = true
|
||||||
no_use_cuda_fp16 = true
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
trust_remote_code = true
|
trust_remote_code = true
|
||||||
no_mul_mat_q = true
|
no_mul_mat_q = false
|
||||||
no_mmap = true
|
no_mmap = true
|
||||||
n_gpu_layers = 128
|
n_gpu_layers = 128
|
||||||
numa = true
|
numa = true
|
||||||
logits_all = true
|
logits_all = true
|
||||||
tensor_split = [1]
|
tensor_split = [1]
|
||||||
|
|
||||||
[text_gen.models.guard1]
|
[text_gen.models.guard3]
|
||||||
|
api = true
|
||||||
|
auto_devices = true
|
||||||
|
chat_buttons = true
|
||||||
|
desc_act = true
|
||||||
|
disable_exllama = true
|
||||||
|
extensions = ["api", "openai"] # TODO this way? or flags for each named extension in each one?
|
||||||
|
families = ["guard"]
|
||||||
|
gpu_list = ["2"]
|
||||||
|
listen = true
|
||||||
|
loader = "llamacpp"
|
||||||
|
memory_split = [20]
|
||||||
|
model = "LlamaGuard-7B-GGUF"
|
||||||
|
no_inject_fused_attention = true
|
||||||
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
|
trust_remote_code = true
|
||||||
|
no_mul_mat_q = false
|
||||||
|
no_mmap = true
|
||||||
|
n_gpu_layers = 128
|
||||||
|
numa = true
|
||||||
|
logits_all = true
|
||||||
|
tensor_split = [1]
|
||||||
|
|
||||||
|
[text_gen.models.guard4]
|
||||||
api = true
|
api = true
|
||||||
auto_devices = true
|
auto_devices = true
|
||||||
chat_buttons = true
|
chat_buttons = true
|
||||||
|
@ -86,35 +95,12 @@
|
||||||
no_use_cuda_fp16 = true
|
no_use_cuda_fp16 = true
|
||||||
openai_api = true
|
openai_api = true
|
||||||
trust_remote_code = true
|
trust_remote_code = true
|
||||||
no_mul_mat_q = true
|
no_mul_mat_q = false
|
||||||
no_mmap = true
|
no_mmap = true
|
||||||
n_gpu_layers = 128
|
n_gpu_layers = 128
|
||||||
numa = true
|
numa = true
|
||||||
logits_all = true
|
logits_all = true
|
||||||
tensor_split = [1]
|
tensor_split = [1]
|
||||||
#[text_gen.models.censored1]
|
|
||||||
# api = true
|
|
||||||
# auto_devices = true
|
|
||||||
# chat_buttons = true
|
|
||||||
# desc_act = true
|
|
||||||
# disable_exllama = true
|
|
||||||
# extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
|
||||||
# families = ["censored", "embeddings"]
|
|
||||||
# gpu_list = ["1"]
|
|
||||||
# listen = true
|
|
||||||
# loader = "llamacpp"
|
|
||||||
# memory_split = [20]
|
|
||||||
# model = "WizardLM-13B-V1.2-GGUF"
|
|
||||||
# no_inject_fused_attention = true
|
|
||||||
# no_use_cuda_fp16 = true
|
|
||||||
# openai_api = true
|
|
||||||
# trust_remote_code = true
|
|
||||||
# no_mul_mat_q = true
|
|
||||||
# no_mmap = true
|
|
||||||
# n_gpu_layers = 128
|
|
||||||
# numa = true
|
|
||||||
# logits_all = true
|
|
||||||
# tensor_split = [1]
|
|
||||||
|
|
||||||
[text_gen.models.coder1]
|
[text_gen.models.coder1]
|
||||||
api = true
|
api = true
|
||||||
|
@ -124,7 +110,7 @@
|
||||||
disable_exllama = true
|
disable_exllama = true
|
||||||
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
||||||
families = ["coder", "embeddings"]
|
families = ["coder", "embeddings"]
|
||||||
gpu_list = [0, 1, 3, 4]
|
gpu_list = [2,3]
|
||||||
listen = true
|
listen = true
|
||||||
loader = "llamacpp"
|
loader = "llamacpp"
|
||||||
memory_split = [20, 20]
|
memory_split = [20, 20]
|
||||||
|
@ -133,13 +119,12 @@
|
||||||
no_use_cuda_fp16 = true
|
no_use_cuda_fp16 = true
|
||||||
openai_api = true
|
openai_api = true
|
||||||
trust_remote_code = true
|
trust_remote_code = true
|
||||||
no_mul_mat_q = true
|
no_mul_mat_q = false
|
||||||
no_mmap = true
|
no_mmap = true
|
||||||
n_gpu_layers = 128
|
n_gpu_layers = 128
|
||||||
numa = true
|
numa = true
|
||||||
logits_all = true
|
logits_all = true
|
||||||
tensor_split = [1,1,1,5]
|
tensor_split = [1,3]
|
||||||
|
|
||||||
|
|
||||||
[text_gen.models.embedding1]
|
[text_gen.models.embedding1]
|
||||||
api = true
|
api = true
|
||||||
|
@ -158,13 +143,135 @@
|
||||||
no_use_cuda_fp16 = true
|
no_use_cuda_fp16 = true
|
||||||
openai_api = true
|
openai_api = true
|
||||||
trust_remote_code = true
|
trust_remote_code = true
|
||||||
no_mul_mat_q = true
|
no_mul_mat_q = false
|
||||||
no_mmap = true
|
no_mmap = true
|
||||||
n_gpu_layers = 128
|
n_gpu_layers = 128
|
||||||
numa = true
|
numa = true
|
||||||
logits_all = true
|
logits_all = true
|
||||||
tensor_split = [1]
|
tensor_split = [1]
|
||||||
|
|
||||||
|
#dolphin-2.5-mixtral-8x7b-GGUF
|
||||||
|
[text_gen.models.mixtral1]
|
||||||
|
api = true
|
||||||
|
auto_devices = true
|
||||||
|
chat_buttons = true
|
||||||
|
desc_act = true
|
||||||
|
disable_exllama = true
|
||||||
|
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
||||||
|
families = ["mixtral", "embeddings"]
|
||||||
|
gpu_list = [0,1]
|
||||||
|
listen = true
|
||||||
|
loader = "llamacpp"
|
||||||
|
memory_split = [20, 20]
|
||||||
|
model = "dolphin-2.5-mixtral-8x7b-GGUF"
|
||||||
|
no_inject_fused_attention = true
|
||||||
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
|
trust_remote_code = true
|
||||||
|
no_mul_mat_q = false
|
||||||
|
no_mmap = true
|
||||||
|
n_gpu_layers = 128
|
||||||
|
numa = true
|
||||||
|
logits_all = true
|
||||||
|
tensor_split = [2,4]
|
||||||
|
|
||||||
|
[text_gen.models.tiny1]
|
||||||
|
api = true
|
||||||
|
auto_devices = true
|
||||||
|
chat_buttons = true
|
||||||
|
desc_act = true
|
||||||
|
disable_exllama = true
|
||||||
|
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
||||||
|
families = ["tiny", "embeddings"]
|
||||||
|
gpu_list = [3]
|
||||||
|
listen = true
|
||||||
|
loader = "llamacpp"
|
||||||
|
memory_split = [20, 20]
|
||||||
|
model = "TinyLlama-1.1B-Chat-v1.0-GGUF"
|
||||||
|
no_inject_fused_attention = true
|
||||||
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
|
trust_remote_code = true
|
||||||
|
no_mul_mat_q = false
|
||||||
|
no_mmap = true
|
||||||
|
n_gpu_layers = 128
|
||||||
|
numa = true
|
||||||
|
logits_all = true
|
||||||
|
tensor_split = [1]
|
||||||
|
|
||||||
|
[text_gen.models.tiny2]
|
||||||
|
api = true
|
||||||
|
auto_devices = true
|
||||||
|
chat_buttons = true
|
||||||
|
desc_act = true
|
||||||
|
disable_exllama = true
|
||||||
|
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
||||||
|
families = ["tiny", "embeddings"]
|
||||||
|
gpu_list = [3]
|
||||||
|
listen = true
|
||||||
|
loader = "llamacpp"
|
||||||
|
memory_split = [20, 20]
|
||||||
|
model = "TinyLlama-1.1B-Chat-v1.0-GGUF"
|
||||||
|
no_inject_fused_attention = true
|
||||||
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
|
trust_remote_code = true
|
||||||
|
no_mul_mat_q = false
|
||||||
|
no_mmap = true
|
||||||
|
n_gpu_layers = 128
|
||||||
|
numa = true
|
||||||
|
logits_all = true
|
||||||
|
tensor_split = [1]
|
||||||
|
|
||||||
|
[text_gen.models.phi1]
|
||||||
|
api = true
|
||||||
|
auto_devices = true
|
||||||
|
chat_buttons = true
|
||||||
|
desc_act = true
|
||||||
|
disable_exllama = true
|
||||||
|
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
||||||
|
families = ["phi", "embeddings"]
|
||||||
|
gpu_list = [3]
|
||||||
|
listen = true
|
||||||
|
loader = "llamacpp"
|
||||||
|
memory_split = [20, 20]
|
||||||
|
model = "dolphin-2_6-phi-2-GGUF"
|
||||||
|
no_inject_fused_attention = true
|
||||||
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
|
trust_remote_code = true
|
||||||
|
no_mul_mat_q = false
|
||||||
|
no_mmap = true
|
||||||
|
n_gpu_layers = 128
|
||||||
|
numa = true
|
||||||
|
logits_all = true
|
||||||
|
tensor_split = [1]
|
||||||
|
|
||||||
|
[text_gen.models.phi2]
|
||||||
|
api = true
|
||||||
|
auto_devices = true
|
||||||
|
chat_buttons = true
|
||||||
|
desc_act = true
|
||||||
|
disable_exllama = true
|
||||||
|
extensions = ["api", "openai", "superboogav2"] # TODO this way? or flags for each named extension in each one?
|
||||||
|
families = ["phi", "embeddings"]
|
||||||
|
gpu_list = [0]
|
||||||
|
listen = true
|
||||||
|
loader = "llamacpp"
|
||||||
|
memory_split = [20, 20]
|
||||||
|
model = "dolphin-2_6-phi-2-GGUF"
|
||||||
|
no_inject_fused_attention = true
|
||||||
|
no_use_cuda_fp16 = true
|
||||||
|
openai_api = true
|
||||||
|
trust_remote_code = true
|
||||||
|
no_mul_mat_q = false
|
||||||
|
no_mmap = true
|
||||||
|
n_gpu_layers = 128
|
||||||
|
numa = true
|
||||||
|
logits_all = true
|
||||||
|
tensor_split = [1]
|
||||||
|
|
||||||
|
|
||||||
[agnai]
|
[agnai]
|
||||||
anonymous = false
|
anonymous = false
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ version: "3.9"
|
||||||
services:
|
services:
|
||||||
<% for my ($name, $gen_config) ($config->text_gen->models->%*) { =%>
|
<% for my ($name, $gen_config) ($config->text_gen->models->%*) { =%>
|
||||||
text-<%= $name %>:
|
text-<%= $name %>:
|
||||||
image: gitea.simcop2387.info/simcop2387/text-gen-python-base:<%= $gen_config->image_tag %>
|
image: <%= $gen_config->image_name %>:<%= $gen_config->image_tag %>
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
environment:
|
environment:
|
||||||
CONTAINER_PORT: 7860
|
CONTAINER_PORT: 7860
|
||||||
|
@ -35,6 +35,19 @@ services:
|
||||||
device_ids: ['<%= join(',', $gen_config->gpu_list->@*) %>']
|
device_ids: ['<%= join(',', $gen_config->gpu_list->@*) %>']
|
||||||
capabilities: [gpu]
|
capabilities: [gpu]
|
||||||
<% } =%>
|
<% } =%>
|
||||||
|
|
||||||
|
<% for my ($name, $lobe_config) ($config->lobe_chat->%*) { =%>
|
||||||
|
lobe-<%= $name %>:
|
||||||
|
image: <%= $lobe_config->image_name %>:<%= $lobe_config->image_tag %>
|
||||||
|
restart: unless-stopped
|
||||||
|
environment: # TODO This needs the proxy really, and config in the thing
|
||||||
|
- OPENAI_API_KEY=11111111
|
||||||
|
- OPENAI_PROXY_URL=http://openai.mixtral1-model.brainiac.ai.simcop2387.info/v1
|
||||||
|
- ACCESS_CODE=12345
|
||||||
|
ports:
|
||||||
|
- "<%= $lobe_config->lobe_host_port %>:3210"
|
||||||
|
<% } =%>
|
||||||
|
|
||||||
nginx:
|
nginx:
|
||||||
image: nginx:latest
|
image: nginx:latest
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
@ -42,3 +55,4 @@ services:
|
||||||
- 80:80
|
- 80:80
|
||||||
volumes:
|
volumes:
|
||||||
- ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf:ro
|
- ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf:ro
|
||||||
|
|
||||||
|
|
|
@ -182,7 +182,9 @@ class AIConfig :does(Object::PadX::Role::AutoMarshal) :Struct {
|
||||||
method from_file :common ($file) {
|
method from_file :common ($file) {
|
||||||
my $file_p = path($file);
|
my $file_p = path($file);
|
||||||
|
|
||||||
my $hr = from_toml($file_p->slurp_utf8);
|
my ($hr, $toml_error) = from_toml($file_p->slurp_utf8);
|
||||||
|
|
||||||
|
croak $toml_error if $toml_error;
|
||||||
|
|
||||||
my ($conf, $error) = AIConfig->new($hr->%*);
|
my ($conf, $error) = AIConfig->new($hr->%*);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue