diff --git a/examples/quantization/quant_with_alpaca.py b/examples/quantization/quant_with_alpaca.py index de7d8bc..603f171 100644 --- a/examples/quantization/quant_with_alpaca.py +++ b/examples/quantization/quant_with_alpaca.py @@ -77,6 +77,7 @@ def main(): parser.add_argument("--quantized_model_dir", type=str, default=None) parser.add_argument("--bits", type=int, default=4, choices=[2, 3, 4, 8]) parser.add_argument("--group_size", type=int, default=128) + parser.add_argument("--desc_act", action="store_true", help="whether to quantize with desc_act") parser.add_argument("--num_samples", type=int, default=128, help="how many samples will be used to quantize model") parser.add_argument("--save_and_reload", action="store_true", help="whether save quantized model to disk and reload back") parser.add_argument("--fast_tokenizer", action="store_true", help="whether use fast tokenizer") @@ -104,7 +105,7 @@ def main(): ) model = AutoGPTQForCausalLM.from_pretrained( args.pretrained_model_dir, - quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=args.group_size), + quantize_config=BaseQuantizeConfig(bits=args.bits, group_size=args.group_size, desc_act=args.desc_act), max_memory=max_memory )