diff --git a/auto_gptq/modeling/__init__.py b/auto_gptq/modeling/__init__.py index fd40631..cf5ab4e 100644 --- a/auto_gptq/modeling/__init__.py +++ b/auto_gptq/modeling/__init__.py @@ -12,4 +12,6 @@ from .gpt_bigcode import * from .codegen import * from .baichuan import * from .internlm import * -from .qwen import * \ No newline at end of file +from .qwen import * +from .mistral import * +from .mpt import * diff --git a/auto_gptq/modeling/_const.py b/auto_gptq/modeling/_const.py index bfb0cf1..c0ab77f 100644 --- a/auto_gptq/modeling/_const.py +++ b/auto_gptq/modeling/_const.py @@ -21,11 +21,15 @@ SUPPORTED_MODELS = [ "baichuan", "internlm", "qwen", + "mpt", ] if compare_transformers_version("v4.28.0", op="ge"): SUPPORTED_MODELS.append("llama") if compare_transformers_version("v4.33.0", op="ge"): SUPPORTED_MODELS.append("falcon") +if compare_transformers_version("v4.34.0", op="ge"): + SUPPORTED_MODELS.append("mistral") + EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048 diff --git a/auto_gptq/modeling/auto.py b/auto_gptq/modeling/auto.py index 0155e99..550862a 100644 --- a/auto_gptq/modeling/auto.py +++ b/auto_gptq/modeling/auto.py @@ -16,6 +16,8 @@ from .gpt_bigcode import GPTBigCodeGPTQForCausalLM from .baichuan import BaiChuanGPTQForCausalLM from .internlm import InternLMGPTQForCausalLM from .qwen import QwenGPTQForCausalLM +from .mistral import MistralGPTQForCausalLM +from .mpt import MPTGPTQForCausalLM GPTQ_CAUSAL_LM_MODEL_MAP = { "bloom": BloomGPTQForCausalLM, @@ -33,6 +35,8 @@ GPTQ_CAUSAL_LM_MODEL_MAP = { "baichuan": BaiChuanGPTQForCausalLM, "internlm": InternLMGPTQForCausalLM, "qwen": QwenGPTQForCausalLM, + "mistral": MistralGPTQForCausalLM, + "mpt": MPTGPTQForCausalLM, } diff --git a/auto_gptq/modeling/mistral.py b/auto_gptq/modeling/mistral.py new file mode 100644 index 0000000..5a36d89 --- /dev/null +++ b/auto_gptq/modeling/mistral.py @@ -0,0 +1,16 @@ +from ._base import * + + +class MistralGPTQForCausalLM(BaseGPTQForCausalLM): + layer_type = "MistralDecoderLayer" + layers_block_name = "model.layers" + outside_layer_modules = ["model.embed_tokens", "model.norm"] + inside_layer_modules = [ + ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"], + ["self_attn.o_proj"], + ["mlp.up_proj", "mlp.gate_proj"], + ["mlp.down_proj"], + ] + + +__all__ = ["MistralGPTQForCausalLM"] \ No newline at end of file diff --git a/auto_gptq/modeling/mpt.py b/auto_gptq/modeling/mpt.py new file mode 100644 index 0000000..fd36c37 --- /dev/null +++ b/auto_gptq/modeling/mpt.py @@ -0,0 +1,18 @@ +from auto_gptq.modeling import BaseGPTQForCausalLM + + +class MPTGPTQForCausalLM(BaseGPTQForCausalLM): + layer_type = "MPTBlock" + layers_block_name = "transformer.blocks" + outside_layer_modules = [ + "transformer.wte", "transformer.norm_f" + ] + + inside_layer_modules = [ + ["attn.Wqkv"], + ["attn.out_proj"], + ["ffn.up_proj"], + ["ffn.down_proj"] + ] + +__all__ = ["MPTGPTQForCausalLM"] \ No newline at end of file