mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-06-07 06:06:20 -04:00
Fix exllamav3_hf models failing to unload (closes #7031)
This commit is contained in:
parent
298d4719c6
commit
219f0a7731
2 changed files with 20 additions and 0 deletions
|
@ -245,3 +245,20 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
|||
pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
|
||||
|
||||
return Exllamav3HF(pretrained_model_name_or_path)
|
||||
|
||||
def unload(self):
|
||||
"""Properly unload the ExllamaV3 model and free GPU memory."""
|
||||
if hasattr(self, 'ex_model') and self.ex_model is not None:
|
||||
self.ex_model.unload()
|
||||
self.ex_model = None
|
||||
|
||||
if hasattr(self, 'ex_cache') and self.ex_cache is not None:
|
||||
self.ex_cache = None
|
||||
|
||||
# Clean up any additional ExllamaV3 resources
|
||||
if hasattr(self, 'past_seq'):
|
||||
self.past_seq = None
|
||||
if hasattr(self, 'past_seq_negative'):
|
||||
self.past_seq_negative = None
|
||||
if hasattr(self, 'ex_cache_negative'):
|
||||
self.ex_cache_negative = None
|
||||
|
|
|
@ -116,10 +116,13 @@ def unload_model(keep_model_name=False):
|
|||
return
|
||||
|
||||
is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
|
||||
if shared.args.loader == 'ExLlamav3_HF':
|
||||
shared.model.unload()
|
||||
|
||||
shared.model = shared.tokenizer = None
|
||||
shared.lora_names = []
|
||||
shared.model_dirty_from_training = False
|
||||
|
||||
if not is_llamacpp:
|
||||
from modules.torch_utils import clear_torch_cache
|
||||
clear_torch_cache()
|
||||
|
|
Loading…
Add table
Reference in a new issue