From fb380fb9c2c4b011b68a620e065415e3639647e2 Mon Sep 17 00:00:00 2001 From: LaaZa Date: Fri, 12 May 2023 14:46:52 +0300 Subject: [PATCH 1/2] Add initial support for MPT --- auto_gptq/modeling/__init__.py | 1 + auto_gptq/modeling/_const.py | 2 +- auto_gptq/modeling/auto.py | 4 +++- auto_gptq/modeling/mpt.py | 18 ++++++++++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 auto_gptq/modeling/mpt.py diff --git a/auto_gptq/modeling/__init__.py b/auto_gptq/modeling/__init__.py index 1e5566d..9425ef0 100644 --- a/auto_gptq/modeling/__init__.py +++ b/auto_gptq/modeling/__init__.py @@ -7,3 +7,4 @@ from .gptj import * from .llama import * from .moss import * from .opt import * +from .mpt import * \ No newline at end of file diff --git a/auto_gptq/modeling/_const.py b/auto_gptq/modeling/_const.py index ac7e6f0..299c979 100644 --- a/auto_gptq/modeling/_const.py +++ b/auto_gptq/modeling/_const.py @@ -6,7 +6,7 @@ from transformers import __version__ as transformers_version CPU = device("cpu") CUDA_0 = device("cuda:0") -SUPPORTED_MODELS = ["bloom", "gptj", "gpt2", "gpt_neox", "opt", "moss"] +SUPPORTED_MODELS = ["bloom", "gptj", "gpt2", "gpt_neox", "opt", "moss", "mpt"] if parse_version(transformers_version) >= parse_version("v4.28.0"): SUPPORTED_MODELS.append("llama") diff --git a/auto_gptq/modeling/auto.py b/auto_gptq/modeling/auto.py index 42cca45..3b9dc5c 100644 --- a/auto_gptq/modeling/auto.py +++ b/auto_gptq/modeling/auto.py @@ -9,6 +9,7 @@ from .gpt2 import GPT2GPTQForCausalLM from .llama import LlamaGPTQForCausalLM from .moss import MOSSGPTQForCausalLM from .opt import OPTGPTQForCausalLM +from .mpt import MPTGPTQForCausalLM GPTQ_CAUSAL_LM_MODEL_MAP = { @@ -18,7 +19,8 @@ GPTQ_CAUSAL_LM_MODEL_MAP = { "gpt2": GPT2GPTQForCausalLM, "llama": LlamaGPTQForCausalLM, "opt": OPTGPTQForCausalLM, - "moss": MOSSGPTQForCausalLM + "moss": MOSSGPTQForCausalLM, + "mpt": MPTGPTQForCausalLM } diff --git a/auto_gptq/modeling/mpt.py b/auto_gptq/modeling/mpt.py new file mode 100644 index 0000000..fd36c37 --- /dev/null +++ b/auto_gptq/modeling/mpt.py @@ -0,0 +1,18 @@ +from auto_gptq.modeling import BaseGPTQForCausalLM + + +class MPTGPTQForCausalLM(BaseGPTQForCausalLM): + layer_type = "MPTBlock" + layers_block_name = "transformer.blocks" + outside_layer_modules = [ + "transformer.wte", "transformer.norm_f" + ] + + inside_layer_modules = [ + ["attn.Wqkv"], + ["attn.out_proj"], + ["ffn.up_proj"], + ["ffn.down_proj"] + ] + +__all__ = ["MPTGPTQForCausalLM"] \ No newline at end of file From 99acbead4206ee3ff11e3ac31fc4d0a40359afe9 Mon Sep 17 00:00:00 2001 From: LaaZa Date: Wed, 4 Oct 2023 01:07:55 +0300 Subject: [PATCH 2/2] Add support for Mistral models. --- auto_gptq/modeling/__init__.py | 3 ++- auto_gptq/modeling/_const.py | 3 +++ auto_gptq/modeling/auto.py | 2 ++ auto_gptq/modeling/mistral.py | 16 ++++++++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 auto_gptq/modeling/mistral.py diff --git a/auto_gptq/modeling/__init__.py b/auto_gptq/modeling/__init__.py index fd40631..24f78e4 100644 --- a/auto_gptq/modeling/__init__.py +++ b/auto_gptq/modeling/__init__.py @@ -12,4 +12,5 @@ from .gpt_bigcode import * from .codegen import * from .baichuan import * from .internlm import * -from .qwen import * \ No newline at end of file +from .qwen import * +from .mistral import * \ No newline at end of file diff --git a/auto_gptq/modeling/_const.py b/auto_gptq/modeling/_const.py index bfb0cf1..e478bf8 100644 --- a/auto_gptq/modeling/_const.py +++ b/auto_gptq/modeling/_const.py @@ -26,6 +26,9 @@ if compare_transformers_version("v4.28.0", op="ge"): SUPPORTED_MODELS.append("llama") if compare_transformers_version("v4.33.0", op="ge"): SUPPORTED_MODELS.append("falcon") +if compare_transformers_version("v4.34.0", op="ge"): + SUPPORTED_MODELS.append("mistral") + EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048 diff --git a/auto_gptq/modeling/auto.py b/auto_gptq/modeling/auto.py index 0155e99..7f869f8 100644 --- a/auto_gptq/modeling/auto.py +++ b/auto_gptq/modeling/auto.py @@ -16,6 +16,7 @@ from .gpt_bigcode import GPTBigCodeGPTQForCausalLM from .baichuan import BaiChuanGPTQForCausalLM from .internlm import InternLMGPTQForCausalLM from .qwen import QwenGPTQForCausalLM +from .mistral import MistralGPTQForCausalLM GPTQ_CAUSAL_LM_MODEL_MAP = { "bloom": BloomGPTQForCausalLM, @@ -33,6 +34,7 @@ GPTQ_CAUSAL_LM_MODEL_MAP = { "baichuan": BaiChuanGPTQForCausalLM, "internlm": InternLMGPTQForCausalLM, "qwen": QwenGPTQForCausalLM, + "mistral": MistralGPTQForCausalLM, } diff --git a/auto_gptq/modeling/mistral.py b/auto_gptq/modeling/mistral.py new file mode 100644 index 0000000..5a36d89 --- /dev/null +++ b/auto_gptq/modeling/mistral.py @@ -0,0 +1,16 @@ +from ._base import * + + +class MistralGPTQForCausalLM(BaseGPTQForCausalLM): + layer_type = "MistralDecoderLayer" + layers_block_name = "model.layers" + outside_layer_modules = ["model.embed_tokens", "model.norm"] + inside_layer_modules = [ + ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"], + ["self_attn.o_proj"], + ["mlp.up_proj", "mlp.gate_proj"], + ["mlp.down_proj"], + ] + + +__all__ = ["MistralGPTQForCausalLM"] \ No newline at end of file