AutoGPTQ/auto_gptq/modeling/opt.py
2023-04-14 01:09:40 +08:00

23 lines
716 B
Python

from ._base import *
class OPTGPTQForCausalLM(BaseGPTQForCausalLM):
layers_block_name = "model.decoder.layers"
outside_layer_modules = [
"model.decoder.embed_tokens", "model.decoder.embed_positions", "model.decoder.project_out",
"model.decoder.project_in", "model.decoder.final_layer_norm"
]
inside_layer_modules = [
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
["self_attn.out_proj"],
["fc1"],
["fc2"]
]
@staticmethod
def _resize_attention_mask(attention_mask):
attention_mask = [attention_mask.unsqueeze(1) for attention_mask in attention_mask]
return attention_mask
__all__ = ["OPTGPTQForCausalLM"]