AutoGPTQ/auto_gptq/modeling_auto.py
2023-04-14 01:09:40 +08:00

54 lines
1.7 KiB
Python

from transformers import AutoConfig
from .modeling import BaseQuantizeConfig, GPTQ_CAUSAL_LM_MODEL_MAP
from .modeling._const import SUPPORTED_MODELS
def check_and_get_model_type(model_dir):
config = AutoConfig.from_pretrained(model_dir)
if config.model_type not in SUPPORTED_MODELS:
raise TypeError(f"{config.model_type} isn't supported yet.")
model_type = config.model_type
return model_type
class AutoGPTQModelForCausalLM:
def __init__(self):
raise EnvironmentError(
"AutoGPTQModelForCausalLM is designed to be instantiated\n"
"using `AutoGPTQModelForCausalLM.from_pretrained` if want to quantize a pretrained model.\n"
"using `AutoGPTQModelForCausalLM.from_quantized` if want to inference with quantized model."
)
@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: str,
quantize_config: BaseQuantizeConfig,
bf16: bool = False,
**model_init_kwargs
):
model_type = check_and_get_model_type(pretrained_model_name_or_path)
return GPTQ_CAUSAL_LM_MODEL_MAP[model_type].from_pretrained(
pretrained_model_name_or_path=pretrained_model_name_or_path,
quantize_config=quantize_config,
bf16=bf16,
**model_init_kwargs
)
@classmethod
def from_quantized(
cls,
save_dir: str,
device: str = "cpu",
use_safetensors: bool = False
):
model_type = check_and_get_model_type(save_dir)
return GPTQ_CAUSAL_LM_MODEL_MAP[model_type].from_quantized(
save_dir=save_dir,
device=device,
use_safetensors=use_safetensors
)
__all__ = ["AutoGPTQModelForCausalLM"]