diff --git a/auto_gptq/modeling/llama.py b/auto_gptq/modeling/llama.py index 52b94f1..6321199 100644 --- a/auto_gptq/modeling/llama.py +++ b/auto_gptq/modeling/llama.py @@ -12,10 +12,5 @@ class LlamaGPTQForCausalLM(BaseGPTQForCausalLM): ["mlp.down_proj"] ] - @staticmethod - def _resize_attention_mask(attention_mask): - attention_mask = [each.unsqueeze(1) for each in attention_mask] - return attention_mask - __all__ = ["LlamaGPTQForCausalLM"] diff --git a/auto_gptq/modeling/opt.py b/auto_gptq/modeling/opt.py index 720765f..89066c2 100644 --- a/auto_gptq/modeling/opt.py +++ b/auto_gptq/modeling/opt.py @@ -15,10 +15,5 @@ class OPTGPTQForCausalLM(BaseGPTQForCausalLM): ["fc2"] ] - @staticmethod - def _resize_attention_mask(attention_mask): - attention_mask = [each.unsqueeze(1) for each in attention_mask] - return attention_mask - __all__ = ["OPTGPTQForCausalLM"]