update basic_usage.py
This commit is contained in:
parent
df8672ce75
commit
e826d89dbc
2 changed files with 20 additions and 1 deletions
|
@ -11,6 +11,8 @@ To Execute `basic_usage.py`, using command like this:
|
|||
python basic_usage.py
|
||||
```
|
||||
|
||||
This script also showcases how to download/upload quantized model from/to 🤗 Hub, to enable those features, you can uncomment the commented codes.
|
||||
|
||||
To Execute `basic_usage_with_wikitext2.py`, using command like this:
|
||||
```shell
|
||||
python basic_usage_with_wikitext2.py
|
||||
|
|
|
@ -31,11 +31,28 @@ def main():
|
|||
# save quantized model
|
||||
model.save_quantized(quantized_model_dir)
|
||||
|
||||
# push quantized model to Hugging Face Hub.
|
||||
# to use use_auth_token=True, Login first via huggingface-cli login.
|
||||
# or pass explcit token with: use_auth_token="hf_xxxxxxx"
|
||||
# (uncomment the following three lines to enable this feature)
|
||||
# repo_id = f"YourUserName/{quantized_model_dir}"
|
||||
# commit_message = f"AutoGPTQ model for {pretrained_model_dir}: {quantize_config.bits}bits, gr{quantize_config.group_size}, desc_act={quantize_config.desc_act}"
|
||||
# model.push_to_hub(repo_id, commit_message=commit_message, use_auth_token=True)
|
||||
|
||||
# alternatively you can save and push at the same time
|
||||
# (uncomment the following three lines to enable this feature)
|
||||
# repo_id = f"YourUserName/{quantized_model_dir}"
|
||||
# commit_message = f"AutoGPTQ model for {pretrained_model_dir}: {quantize_config.bits}bits, gr{quantize_config.group_size}, desc_act={quantize_config.desc_act}"
|
||||
# model.push_to_hub(repo_id, save_dir=quantized_model_dir, use_safetensors=True, commit_message=commit_message, use_auth_token=True)
|
||||
|
||||
# save quantized model using safetensors
|
||||
model.save_quantized(quantized_model_dir, use_safetensors=True)
|
||||
|
||||
# load quantized model to the first GPU
|
||||
model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir)
|
||||
model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir, device="cuda:0")
|
||||
|
||||
# download quantized model from Hugging Face Hub and load to the first GPU
|
||||
# model = AutoGPTQForCausalLM.from_quantized(repo_id, device="cuda:0", use_safetensors=True, use_triton=False)
|
||||
|
||||
# inference with model.generate
|
||||
print(tokenizer.decode(model.generate(**tokenizer("auto_gptq is", return_tensors="pt").to(model.device))[0]))
|
||||
|
|
Loading…
Add table
Reference in a new issue