AutoGPTQ/auto_gptq/modeling/gptj.py
2023-05-14 16:17:21 +08:00

19 lines
550 B
Python

from ._base import *
from ..nn_modules.fused_gptj_attn import FusedGPTJAttentionForQuantizedModel
class GPTJGPTQForCausalLM(BaseGPTQForCausalLM):
layer_type = "GPTJBlock"
layers_block_name = "transformer.h"
outside_layer_modules = ["transformer.wte", "transformer.ln_f"]
inside_layer_modules = [
["attn.k_proj", "attn.v_proj", "attn.q_proj"],
["attn.out_proj"],
["mlp.fc_in"],
["mlp.fc_out"]
]
fused_attn_module_type = FusedGPTJAttentionForQuantizedModel
__all__ = ["GPTJGPTQForCausalLM"]