Fix exllamav3_hf models failing to unload (closes #7031)

This commit is contained in:
oobabooga 2025-05-30 12:05:49 -07:00
parent 298d4719c6
commit 219f0a7731
2 changed files with 20 additions and 0 deletions

View file

@ -245,3 +245,20 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
return Exllamav3HF(pretrained_model_name_or_path)
def unload(self):
"""Properly unload the ExllamaV3 model and free GPU memory."""
if hasattr(self, 'ex_model') and self.ex_model is not None:
self.ex_model.unload()
self.ex_model = None
if hasattr(self, 'ex_cache') and self.ex_cache is not None:
self.ex_cache = None
# Clean up any additional ExllamaV3 resources
if hasattr(self, 'past_seq'):
self.past_seq = None
if hasattr(self, 'past_seq_negative'):
self.past_seq_negative = None
if hasattr(self, 'ex_cache_negative'):
self.ex_cache_negative = None

View file

@ -116,10 +116,13 @@ def unload_model(keep_model_name=False):
return
is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
if shared.args.loader == 'ExLlamav3_HF':
shared.model.unload()
shared.model = shared.tokenizer = None
shared.lora_names = []
shared.model_dirty_from_training = False
if not is_llamacpp:
from modules.torch_utils import clear_torch_cache
clear_torch_cache()