Merge pull request #6797 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2025-03-15 00:11:25 -03:00 committed by GitHub
commit 80cdbe4e09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 676 additions and 228 deletions

View file

@ -249,8 +249,8 @@ button {
}
.pretty_scrollbar::-webkit-scrollbar {
width: 7px;
height: 7px;
width: 8px;
height: 8px;
}
.pretty_scrollbar::-webkit-scrollbar-track {
@ -295,7 +295,7 @@ audio {
width: 0;
text-align: left;
direction: rtl;
right: 5px;
right: 13px;
}
#default-token-counter {
@ -1163,7 +1163,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
}
.header_bar button.selected {
background: white;
background: #E0E0E0;
}
#chat-controls,

View file

@ -21,6 +21,7 @@ class GenerationOptions(BaseModel):
eta_cutoff: float = 0
tfs: float = 1
top_a: float = 0
top_n_sigma: float = 0
dry_multiplier: float = 0
dry_allowed_length: int = 2
dry_base: float = 1.75

View file

@ -1,9 +1,14 @@
import time
import html
import functools
import re
import gradio
import numpy as np
import torch
from transformers import LogitsProcessor
import colorsys
from modules import html_generator, shared
@ -28,7 +33,7 @@ class PerplexityLogits(LogitsProcessor):
self.verbose = verbose
def __call__(self, input_ids, scores):
# t0 = time.time()
#t0 = time.time()
probs = torch.softmax(scores, dim=-1, dtype=torch.float)
log_probs = torch.nan_to_num(torch.log(probs)) # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
entropy = -torch.sum(probs * log_probs)
@ -42,9 +47,8 @@ class PerplexityLogits(LogitsProcessor):
if len(self.selected_probs) > 0:
# Is the selected token in the top tokens?
if self.verbose:
print('Probs: Token after', shared.tokenizer.decode(last_token_id))
print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
print(shared.tokenizer.decode(last_token_id), [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]],
[round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
if last_token_id in self.top_token_ids_list[-1][0]:
idx = self.top_token_ids_list[-1][0].index(last_token_id)
self.selected_probs.append(self.top_probs_list[-1][0][idx])
@ -60,7 +64,7 @@ class PerplexityLogits(LogitsProcessor):
pplbar = "-"
if not np.isnan(perplexity):
pplbar = "*" * round(perplexity)
print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")
print(f"PPL for token after {shared.tokenizer.decode(last_token_id)}: {perplexity:.2f} {pplbar}")
# Get top 5 probabilities
top_tokens_and_probs = torch.topk(probs, 5)
@ -73,14 +77,15 @@ class PerplexityLogits(LogitsProcessor):
probs = probs.cpu().numpy().flatten()
self.last_probs = probs # Need to keep this as a reference for top probs
# t1 = time.time()
# print(f"PPL Processor: {(t1-t0):.3f} s")
#t1 = time.time()
#print(f"PPL Processor: {(t1-t0):.3f} s")
# About 1 ms, though occasionally up to around 100 ms, not sure why...
# Doesn't actually modify the logits!
return scores
# Stores the perplexity and top probabilities
# global ppl_logits_processor
ppl_logits_processor = None
@ -91,130 +96,192 @@ def logits_processor_modifier(logits_processor_list, input_ids):
logits_processor_list.append(ppl_logits_processor)
def get_last_token(text, tokens_list, token_ids_list, token_probs_list):
for token, token_id, prob in zip(tokens_list, token_ids_list, token_probs_list):
if text.strip().endswith(token.strip()): # Whitespace could be a problem
return token, token_id, prob
# Unknown?
print("Last token not found in list:", tokens_list)
return '', -1, 0.0
def output_modifier(text):
global ppl_logits_processor
# t0 = time.time()
#t0 = time.time()
original_text = text
if not params['active']:
if not params['active'] or ppl_logits_processor is None:
return text
# Space at the beginning to account for tokenization spaces...
text = ' ' + html.unescape(text)
# TODO: It's probably more efficient to do this above rather than modifying all these lists
# Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation
perplexities = ppl_logits_processor.perplexities_list[:-1]
top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1]
perplexities = ppl_logits_processor.perplexities_list
top_token_ids_list = ppl_logits_processor.top_token_ids_list
top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list]
top_probs_list = ppl_logits_processor.top_probs_list[:-1]
top_probs_list = ppl_logits_processor.top_probs_list
# Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt
gen_token_ids = ppl_logits_processor.generated_token_ids[1:]
# Add last sampled token, if possible (it could be past the end of the top 5 list)
last_token, last_token_id, last_prob = get_last_token(text, top_tokens_list[-1], top_token_ids_list[-1][0], top_probs_list[-1][0])
if last_token_id != -1:
gen_token_ids.append(last_token_id)
gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids]
sel_probs = ppl_logits_processor.selected_probs[1:]
if last_token_id != -1:
sel_probs.append(last_prob)
end_part = '</div></div>' if params['probability_dropdown'] else '</span>' # Helps with finding the index after replacing part of the text.
i = 0
for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
# Initial space added to deal with some tokenizers...
# Used to find where the message started generating, for working with "continue" generations
# Doesn't work for longer messages... Not sure how I should handle this
full_msg = shared.tokenizer.decode([token_id for token_id in gen_token_ids[:-1]]).strip()
# There was an issue with tab lengths being off by one...
# Seems like it might be model-dependent...
#text = re.sub(r'( {3,})', r'\1 ', text)
# Subtracting 2 to hopefully help with the tokenization spaces and continue issues,
# Though it's possible it could overwrite the previous token if it's the same in the last 2 chars
i = text.find(full_msg) - 2
if i < 0:
# Backup, try removing the extra whitespace (needed for continue)
i = text.find(full_msg.strip()) - 2
if i < 0:
i = 0
#i = 0
# Add token index for ability to regenerate from there
nonwhitespace_token_found = False
missing_token_count = 0
for index, token, prob, ppl, top_tokens, top_probs in zip(range(len(gen_tokens)), gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
# Somehow this works without issues, but not sure how...
if not nonwhitespace_token_found and token.strip() == '':
#print('Ignoring initial whitespace token...')
continue
nonwhitespace_token_found = True
max_prob = top_probs[0][0]
color = 'ffffff'
if params['color_by_probability'] and params['color_by_perplexity']:
color = probability_perplexity_color_scale(prob, ppl)
color = probability_perplexity_color_scale(prob, max_prob, ppl)
elif params['color_by_perplexity']:
color = perplexity_color_scale(ppl)
elif params['color_by_probability']:
color = probability_color_scale(prob)
if token in text[i:]:
if token.strip() in text[i:]:
if params['probability_dropdown']:
text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], ppl), 1)
text = text[:i] + text[i:].replace(token.replace('\n', ''), add_dropdown_html(token, index, i, color, top_tokens, top_probs[0], ppl), 1)
else:
text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
text = text[:i] + text[i:].replace(token.replace('\n', ''), add_color_html(token, color), 1)
# This might be slightly inefficient
i += text[i:].find(end_part) + len(end_part)
else:
missing_token_count += 1
print('Missing token:', token, '...', text[i:i+20])
# If there are any missing tokens, then either the tokenization was off, or this is the start of a conversation, or something else went wrong
if missing_token_count > 5:
print("Canceling token coloring...")
return original_text
# Use full perplexity list for calculating the average here.
print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
# t1 = time.time()
# print(f"Modifier: {(t1-t0):.3f} s")
# Fix issue with mean of empty slice
if len(ppl_logits_processor.perplexities_list) > 1:
print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
#t1 = time.time()
#print(f"Output modifier: {(t1-t0):.3f} s")
# About 50 ms
return text
return text.strip() # Remove extra beginning whitespace that some tokenizers add
def probability_color_scale(prob):
'''
Green-yellow-red color scale
'''
# hue (0.0 = red, 0.33 = green)
# saturation (0.0 = gray / white, 1.0 = normal, just leave at 1.0)
# brightness (0.0 = black, 1.0 = brightest, use something in between for better readability if you want...)
hue = prob * 0.33
rv, gv, bv = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
# to hex
hex_col = f"{int(rv*255):02x}{int(gv*255):02x}{int(bv*255):02x}"
rv = 0
gv = 0
if prob <= 0.5:
rv = 'ff'
gv = hex(int(255 * prob * 2))[2:]
if len(gv) < 2:
gv = '0' * (2 - len(gv)) + gv
else:
rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
gv = 'ff'
if len(rv) < 2:
rv = '0' * (2 - len(rv)) + rv
return rv + gv + '00'
return hex_col
def perplexity_color_scale(ppl):
'''
Red component only, white for 0 perplexity (sorry if you're not in dark mode)
'''
value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
if len(value) < 2:
value = '0' * (2 - len(value)) + value
# hue (0.0 = red)
# saturation (1.0 = red)
# brightness (0.0 = black, 1.0 = red)
# scale saturation from white to red the higher the perplexity
return 'ff' + value + value
ppl = min(ppl, params['ppl_scale']) # clip ppl to 0-params['ppl_scale'] for color scaling. 15 should be fine for clipping and scaling
sat = ppl / params['ppl_scale']
rv, gv, bv = colorsys.hsv_to_rgb(0.0, sat, 1.0)
# to hex
hex_col = f"{int(rv*255):02x}{int(gv*255):02x}{int(bv*255):02x}"
return hex_col
def probability_perplexity_color_scale(prob, ppl):
def probability_perplexity_color_scale(prob, max_prob, ppl):
'''
Green-yellow-red for probability and blue component for perplexity
Green-yellow-red for relative probability compared to maximum for the current token, and blue component for perplexity
'''
rv = 0
gv = 0
bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
if len(bv) < 2:
bv = '0' * (2 - len(bv)) + bv
if prob <= 0.5:
rv = 'ff'
gv = hex(int(255 * prob * 2))[2:]
if len(gv) < 2:
gv = '0' * (2 - len(gv)) + gv
else:
rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
gv = 'ff'
if len(rv) < 2:
rv = '0' * (2 - len(rv)) + rv
return rv + gv + bv
hue = prob/max_prob * 0.33
rv, gv, _ = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
ppl = min(ppl, params['ppl_scale']) # clip ppl to 0-params['ppl_scale'] for color scaling. 15 should be fine for clipping and scaling
bv = ppl / params['ppl_scale']
# to hex
hex_col = f"{int(rv*255):02x}{int(gv*255):02x}{int(bv*255):02x}"
return hex_col
def add_color_html(token, color):
return f'<span style="color: #{color}">{token}</span>'
output = ''
output += f'<span style="color: #{color}">{html.escape(repr(token)[1:-1])}</span>'
#if '\n' in token or '\r' in token: #token.isspace():
# output += '<br>'
return output
# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
# TODO: Might also need message index for the click-to-regenerate feature to work... For now it only works in the last message, which I think is fine.
# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history. The slowdown seems to be mostly resolved in the current version though
# I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
# Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
# I wonder if we can also avoid using deepcopy here.
def add_dropdown_html(token, color, top_tokens, top_probs, perplexity=0):
html = f'<div class="hoverable"><span style="color: #{color}">{token}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
for token_option, prob in zip(top_tokens, top_probs):
def add_dropdown_html(token, index, msg_position, color, top_tokens, top_probs, perplexity=0):
#print("Token:", token, token.isspace(), '\n' in token or '\r' in token)
output = ''
# Use the repr to get characters like \n visible. Exclude the quotes around it
output += f'<div class="hoverable" name="tok_{index}_{msg_position}"><span style="color: #{color}">{html.escape(repr(token)[1:-1])}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
for i, token_option, prob in zip(range(len(top_tokens)), top_tokens, top_probs):
# TODO: Bold for selected token?
# Using divs prevented the problem of divs inside spans causing issues.
# Now the problem is that divs show the same whitespace of one space between every token.
# There is probably some way to fix this in CSS that I don't know about.
row_color = probability_color_scale(prob)
row_class = ' class="selected"' if token_option == token else ''
html += f'<tr{row_class}><td style="color: #{row_color}">{token_option}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
# This time we want to include the quotes around it so that we can see where the spaces are.
output += f'<tr{row_class}><td name="opt_{index}_{i}_{msg_position}" style="color: #{row_color}">{html.escape(repr(token_option))}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
if perplexity != 0:
ppl_color = perplexity_color_scale(perplexity)
html += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
html += '</tbody></table></div></div>'
return html # About 750 characters per token...
output += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
output += '</tbody></table></div></div>'
#if '\n' in token or '\r' in token: #token.isspace():
# output += '<br>' # I imagine this will cause problems sometimes
return output # About 750 characters per token...
def custom_css():
@ -223,8 +290,8 @@ def custom_css():
display: none;
position: absolute;
z-index: 50;
background-color: var(--block-background-fill);
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
background-color: var(--background-fill-secondary);
box-shadow: 0px 8px 16px 0px rgba(0,0,0,1.0);
width: max-content;
overflow: visible;
padding: 5px;
@ -238,7 +305,7 @@ def custom_css():
}
.dropdown-content tr.selected {
background-color: var(--block-label-background-fill);
background-color: var(--background-fill-primary);
}
.dropdown-content td {
@ -267,21 +334,111 @@ def custom_css():
# TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
# However, it also makes the scrollbar disappear, which is bad.
# The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
#.chat {
# overflow-y: auto;
#}
.chat {
overflow-y: auto;
}
"""
def custom_js():
return """
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
// Note that this will only work as intended on the last agent message
document.addEventListener("click", async function(event) {
//console.log(event.target);
const name = event.target.getAttribute("name");
if (name != null && name.includes("opt_")) {
const name_parts = name.split("_");
const token_index = name_parts[1];
const option_index = name_parts[2];
const msg_pos = name_parts[3];
// Exclude the quotes and convert newlines... Not sure about the newlines though
// TODO: Seems like continuing generation from a newline causes problems whether you add it or not!
const token_string = event.target.innerHTML.substring(1, event.target.innerHTML.length-1).replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"r", "g"), '').replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"n", "g"), '');
//console.log(token_index + ", " + option_index + ", " + token_string);
// Get all the previous text (I'm sure there is a more efficient way to do this)
var msg_text = ""
const msg_html = event.target.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement;
var msg_parts = msg_html.childNodes;
for (var i = 0; i < msg_parts.length; i++) {
var msg_part = msg_parts[i];
if (msg_part.nodeType === Node.ELEMENT_NODE) {
if (msg_part.nodeName == "DIV") {
msg_part_name = msg_part.getAttribute("name")
if (msg_part_name != null) {
var current_token_index = msg_part_name.split("_")[1];
var current_message_pos = msg_part_name.split("_")[2];
if (current_token_index == token_index && current_message_pos == msg_pos) {
// Use the replacement token
// TODO: Don't have access to the tokenizer here, and sometimes there needs to be a space added before this token
msg_text += token_string //.replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"r", "g"), '').replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"n", "g"), '');
break;
}
else {
// Replace here or at the end?
var text = msg_part.firstChild.innerHTML.replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"r", "g"), '').replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"n", "g"), '')
msg_text += text;
}
}
}
else {
// Break tag (hacky workaround because the newline literal can't be parsed here)
//msg_text += String.fromCharCode(10);
// Do nothing???
}
}
else if (msg_part.nodeType === Node.TEXT_NODE) {
msg_text += msg_part.textContent;
}
}
var textbox = document.querySelector("#chat-input textarea");
textbox.focus();
textbox.value = msg_text.trimStart() // Fix initial tokenization spaces
//console.log(textbox.value);
// Add some delays to make sure it's processed correctly. Without these, there's a chance the events don't go through correctly and it doesn't work
// It's unknown how long this will take, and probably depends on the size of the message...
// It would be better to somehow wait for gradio to update instead of waiting a fixed amount of time.
// Hopefully 1 second of delay before starting generation isn't unacceptable.
var inputEvent = new Event('input', {
bubbles: true,
cancelable: true,
});
textbox.dispatchEvent(inputEvent);
var changeEvent = new Event('change', {
bubbles: true,
cancelable: true,
});
textbox.dispatchEvent(changeEvent);
await sleep(250);
document.getElementById("Replace-last").click();
// This can take a while to execute
await sleep(750);
document.getElementById("Continue").click();
}
});
console.log("Custom JS for perplexity_colors loaded");
"""
# Monkeypatch applied to html_generator.py
# We simply don't render markdown into HTML. We wrap everything in <pre> tags to preserve whitespace
# formatting. If you're coloring tokens by perplexity or probability, or especially if you're using
# the probability dropdown, you probably care more about seeing the tokens the model actually outputted
# rather than rendering ```code blocks``` or *italics*.
@functools.lru_cache(maxsize=4096)
def convert_to_markdown(string):
return '<pre>' + string + '</pre>'
def convert_to_markdown_wrapped(string, use_cache=True):
if use_cache:
return convert_to_markdown(string)
return convert_to_markdown.__wrapped__(string)
# This is still necessary for formatting to work correctly
html_generator.convert_to_markdown = convert_to_markdown
@ -298,7 +455,7 @@ def ui():
def update_prob_dropdown_check(x):
params.update({'probability_dropdown': x})
active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with llama.cpp, but it does work with ExLlamav2_HF and llamacpp_HF when set up correctly")
color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")

View file

@ -1,5 +1,41 @@
# superboogav2
# SuperboogaV2
For a description, please see the comments in this Pull Request:
Enhance your LLM with additional information from text, URLs, and files for more accurate and context-aware responses.
https://github.com/oobabooga/text-generation-webui/pull/3272
---
## Installation and Activation
1. Start the conda environment by running `cmd_windows.bat` or the equivalent for your system in the root directory of `text-generation-webui`.
2. Install the necessary packages:
```
pip install -r extensions/superboogav2/requirements.txt
```
3. Activate the extension in the `Session` tab of the web UI.
4. Click on `Apply flags/extensions and restart`. Optionally save the configuration by clicking on `Save UI defaults to settings.yaml`.
## Usage and Features
After activation, you can scroll further down in the chat UI to reveal the SuperboogaV2 interface. Here, you can add extra information to your chats through text input, multiple URLs, or by providing multiple files subject to the context window limit of your model.
The extra information and the current date and time are provided to the model as embeddings that persist across conversations. To clear them, click the `Clear Data` button and start a new chat. You can adjust the text extraction parameters and other options in the `Settings`.
## Supported File Formats
SuperboogaV2 utilizes MuPDF, pandas, python-docx, and python-pptx to extract text from various file formats, including:
- TXT
- PDF
- EPUB
- HTML
- CSV
- ODT/ODS/ODP
- DOCX/PPTX/XLSX
## Additional Information
SuperboogaV2 processes your data into context-aware chunks, applies cleaning techniques, and stores them as embeddings to minimize redundant computations. Relevance is determined using distance calculations and prioritization of recent information.
For a detailed description and more information, refer to the comments in this pull request: [https://github.com/oobabooga/text-generation-webui/pull/3272](https://github.com/oobabooga/text-generation-webui/pull/3272)

View file

@ -1,7 +1,7 @@
import math
import random
import threading
import torch
import chromadb
import numpy as np
import posthog
@ -16,9 +16,6 @@ from modules.text_generation import decode, encode
posthog.capture = lambda *args, **kwargs: None
embedder = embedding_functions.SentenceTransformerEmbeddingFunction("sentence-transformers/all-mpnet-base-v2")
class Info:
def __init__(self, start_index, text_with_context, distance, id):
self.text_with_context = text_with_context
@ -77,11 +74,23 @@ class Info:
class ChromaCollector():
def __init__(self):
name = ''.join(random.choice('ab') for _ in range(10))
name = "".join(random.choice("ab") for _ in range(10))
self.name = name
self.chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
self.collection = self.chroma_client.create_collection(name=name, embedding_function=embedder)
self.embedder = embedding_functions.SentenceTransformerEmbeddingFunction(
"sentence-transformers/all-mpnet-base-v2",
device=("cuda" if torch.cuda.is_available() else "cpu"),
)
chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
self.collection = chroma_client.create_collection(
name=self.name,
embedding_function=self.embedder,
metadata={
"hnsw:search_ef": 200,
"hnsw:construction_ef": 200,
"hnsw:M": 64,
},
)
self.ids = []
self.id_to_info = {}
@ -110,7 +119,7 @@ class ChromaCollector():
# If there are any non-existing texts, compute their embeddings all at once. Each call to embed has significant overhead.
if non_existing_texts:
non_existing_embeddings = embedder(non_existing_texts)
non_existing_embeddings = self.embedder(non_existing_texts)
for text, embedding in zip(non_existing_texts, non_existing_embeddings):
self.embeddings_cache[text] = embedding
@ -139,7 +148,7 @@ class ChromaCollector():
id_ = new_ids[i]
metadata = metadatas[i] if metadatas is not None else None
embedding = self.embeddings_cache.get(text)
if embedding:
if embedding is not None and embedding.any():
existing_texts.append(text)
existing_embeddings.append(embedding)
existing_ids.append(id_)
@ -323,6 +332,8 @@ class ChromaCollector():
def delete(self, ids_to_delete: list[str], where: dict):
with self.lock:
ids_to_delete = self.collection.get(ids=ids_to_delete, where=where)['ids']
if not ids_to_delete:
return
self.collection.delete(ids=ids_to_delete, where=where)
# Remove the deleted ids from self.ids and self.id_to_info
@ -335,12 +346,7 @@ class ChromaCollector():
def clear(self):
with self.lock:
self.chroma_client.reset()
self.ids = []
self.chroma_client.delete_collection(name=self.name)
self.collection = self.chroma_client.create_collection(name=self.name, embedding_function=embedder)
self.__init__() # reinitialize the collector
logger.info('Successfully cleared all records and reset chromaDB.')

View file

@ -127,6 +127,9 @@
"default": "\n\n<<document end>>\n\n"
},
"manual": {
"default": false
},
"add_date_time": {
"default": true
},
"add_chat_to_data": {

View file

@ -6,6 +6,7 @@ It will only include full words.
import bisect
import re
from datetime import datetime
import extensions.superboogav2.parameters as parameters
@ -154,6 +155,13 @@ def process_and_add_to_collector(corpus: str, collector: ChromaCollector, clear_
data_chunks_with_context = []
data_chunk_starting_indices = []
if parameters.get_add_date_time():
now = datetime.now()
date_time_chunk = f"Current time is {now.strftime('%H:%M:%S')}. Today is {now.strftime('%A')}. The current date is {now.strftime('%Y-%m-%d')}."
data_chunks.append(date_time_chunk)
data_chunks_with_context.append(date_time_chunk)
data_chunk_starting_indices.append(0)
# Handling chunk_regex
if parameters.get_chunk_regex():
if parameters.get_chunk_separator():

View file

@ -39,11 +39,11 @@ def _markdown_hyperparams():
# Convert numpy types to python types.
def _convert_np_types(params):
for key in params:
if type(params[key]) == np.bool_:
if isinstance(params[key], np.bool_):
params[key] = bool(params[key])
elif type(params[key]) == np.int64:
elif isinstance(params[key], np.int64):
params[key] = int(params[key])
elif type(params[key]) == np.float64:
elif isinstance(params[key], np.float64):
params[key] = float(params[key])
return params

View file

@ -251,6 +251,10 @@ def get_is_manual() -> bool:
return bool(Parameters.getInstance().hyperparameters['manual']['default'])
def get_add_date_time() -> bool:
return bool(Parameters.getInstance().hyperparameters['add_date_time']['default'])
def get_add_chat_to_data() -> bool:
return bool(Parameters.getInstance().hyperparameters['add_chat_to_data']['default'])
@ -331,6 +335,10 @@ def set_manual(value: bool):
Parameters.getInstance().hyperparameters['manual']['default'] = value
def set_add_date_time(value: bool):
Parameters.getInstance().hyperparameters['add_date_time']['default'] = value
def set_add_chat_to_data(value: bool):
Parameters.getInstance().hyperparameters['add_chat_to_data']['default'] = value

View file

@ -1,10 +1,16 @@
beautifulsoup4==4.12.2
chromadb==0.4.24
beautifulsoup4==4.13.3
chromadb==0.6.3
lxml
nltk
optuna
pandas==2.0.3
posthog==2.4.2
sentence_transformers==2.2.2
pandas
posthog==3.13.0
sentence_transformers==3.3.1
spacy
pytextrank
num2words
PyMuPDF
python-docx
python-pptx
openpyxl
odfpy

View file

@ -9,6 +9,13 @@ os.environ['NLTK_DATA'] = str(Path("extensions/superboogav2/nltk_data").resolve(
import codecs
import textwrap
import docx
import pptx
import fitz
fitz.TOOLS.mupdf_display_errors(False)
import pandas as pd
from odf.opendocument import load
from odf.draw import Page
import gradio as gr
@ -46,11 +53,123 @@ def _feed_data_into_collector(corpus):
yield '### Done.'
def _feed_file_into_collector(file):
yield '### Reading and processing the input dataset...'
text = file.decode('utf-8')
process_and_add_to_collector(text, collector, False, create_metadata_source('file'))
yield '### Done.'
def _feed_file_into_collector(files):
if not files:
logger.warning("No files selected.")
return
def read_binary_file(file_path):
try:
with open(file_path, 'rb') as f:
return f.read()
except Exception:
logger.error(f"Failed to read {file_path}.")
return None
def extract_with_utf8(text):
try:
return text.decode('utf-8')
except Exception:
return ""
def extract_with_fitz(file_content):
try:
with fitz.open(stream=file_content, filetype=None) as doc:
num_pages = doc.page_count
text = "\n".join(block[4] for page in doc for block in page.get_text("blocks") if block[6] == 0)
logger.info(f"Extracted text from {num_pages} pages with fitz.")
return text
except Exception:
return ""
def extract_with_docx(file_path):
try:
paragraphs = docx.Document(file_path).paragraphs
text = "\n".join(para.text for para in paragraphs)
logger.info(f"Extracted text from {len(paragraphs)} paragraphs with docx.")
return text
except Exception:
return ""
def extract_with_pptx(file_path):
try:
slides = pptx.Presentation(file_path).slides
text = "\n".join(
shape.text for slide in slides for shape in slide.shapes if hasattr(shape, "text")
)
logger.info(f"Extracted text from {len(slides)} slides with pptx.")
return text
except Exception:
return ""
def extract_with_odf(file_path):
if not file_path.endswith(".odp"):
return ""
try:
doc = load(file_path)
text_content = []
def extract_text(element):
parts = []
if hasattr(element, "childNodes"):
for node in element.childNodes:
if node.nodeType == node.TEXT_NODE:
parts.append(node.data)
else:
parts.append(extract_text(node))
return "".join(parts)
for slide in doc.getElementsByType(Page):
slide_text = extract_text(slide)
if slide_text.strip():
text_content.append(slide_text.strip())
text = "\n".join(text_content)
logger.info(f"Extracted text from {len(doc.getElementsByType(Page))} slides with odf.")
return text
except Exception as e:
logger.error(f"Failed to extract text from {file_path}: {str(e)}")
return ""
def extract_with_pandas(file_path):
try:
df = pd.read_excel(file_path)
text = "\n".join(str(cell) for col in df.columns for cell in df[col])
logger.info(f"Extracted text from {df.shape[0]}x{df.shape[1]} cells with pandas.")
return text
except Exception:
return ""
for index, file in enumerate(files, start=1):
file_name = os.path.basename(file)
logger.info(f"Processing {file_name}...")
file_content = read_binary_file(file)
if not file_content:
continue
text_extractors = [
lambda: extract_with_utf8(file_content),
lambda: extract_with_fitz(file_content),
lambda: extract_with_docx(file),
lambda: extract_with_pptx(file),
lambda: extract_with_odf(file),
lambda: extract_with_pandas(file),
]
for extractor in text_extractors:
text = extractor()
if text:
break
if not text:
logger.error(f"Failed to extract text from {file_name}, unsupported format.")
continue
process_and_add_to_collector(text, collector, False, create_metadata_source(f"file-{index}"))
logger.info("Done.")
yield "### Done."
def _feed_url_into_collector(urls):
@ -107,7 +226,7 @@ def _get_optimizable_settings() -> list:
def _apply_settings(optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, add_date_time, postfix, data_separator, prefix, max_token_count,
chunk_count, chunk_sep, context_len, chunk_regex, chunk_len, threads, strong_cleanup):
logger.debug('Applying settings.')
@ -124,6 +243,7 @@ def _apply_settings(optimization_steps, time_power, time_steepness, significant_
parameters.set_injection_strategy(injection_strategy)
parameters.set_add_chat_to_data(add_chat_to_data)
parameters.set_manual(manual)
parameters.set_add_date_time(add_date_time)
parameters.set_postfix(codecs.decode(postfix, 'unicode_escape'))
parameters.set_data_separator(codecs.decode(data_separator, 'unicode_escape'))
parameters.set_prefix(codecs.decode(prefix, 'unicode_escape'))
@ -237,11 +357,11 @@ def ui():
url_input = gr.Textbox(lines=10, label='Input URLs', info='Enter one or more URLs separated by newline characters.')
strong_cleanup = gr.Checkbox(value=parameters.get_is_strong_cleanup(), label='Strong cleanup', info='Only keeps html elements that look like long-form text.')
threads = gr.Number(value=parameters.get_num_threads(), label='Threads', info='The number of threads to use while downloading the URLs.', precision=0)
update_url = gr.Button('Load data')
update_urls = gr.Button('Load data')
with gr.Tab("File input"):
file_input = gr.File(label='Input file', type='binary')
update_file = gr.Button('Load data')
file_input = gr.File(label="Input file", type="filepath", file_count="multiple")
update_files = gr.Button('Load data')
with gr.Tab("Settings"):
with gr.Accordion("Processing settings", open=True):
@ -258,6 +378,7 @@ def ui():
postfix = gr.Textbox(value=codecs.encode(parameters.get_postfix(), 'unicode_escape').decode(), label='Postfix', info='What to put after the injection point.')
with gr.Row():
manual = gr.Checkbox(value=parameters.get_is_manual(), label="Is Manual", info="Manually specify when to use ChromaDB. Insert `!c` at the start or end of the message to trigger a query.", visible=shared.is_chat())
add_date_time = gr.Checkbox(value=parameters.get_add_date_time(), label="Add date and time to Data", info="Make the current date and time available to the model.", visible=shared.is_chat())
add_chat_to_data = gr.Checkbox(value=parameters.get_add_chat_to_data(), label="Add Chat to Data", info="Automatically feed the chat history as you chat.", visible=shared.is_chat())
injection_strategy = gr.Radio(choices=[parameters.PREPEND_TO_LAST, parameters.APPEND_TO_LAST, parameters.HIJACK_LAST_IN_CONTEXT], value=parameters.get_injection_strategy(), label='Injection Strategy', info='Where to inject the messages in chat or instruct mode.', visible=shared.is_chat())
with gr.Row():
@ -313,14 +434,14 @@ def ui():
last_updated = gr.Markdown()
all_params = [optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, add_date_time, postfix, data_separator, prefix, max_token_count,
chunk_count, chunk_sep, context_len, chunk_regex, chunk_len, threads, strong_cleanup]
optimizable_params = [time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
preprocess_pipeline, chunk_count, context_len, chunk_len]
update_data.click(_feed_data_into_collector, [data_input], last_updated, show_progress=False)
update_url.click(_feed_url_into_collector, [url_input], last_updated, show_progress=False)
update_file.click(_feed_file_into_collector, [file_input], last_updated, show_progress=False)
update_urls.click(_feed_url_into_collector, [url_input], last_updated, show_progress=False)
update_files.click(_feed_file_into_collector, [file_input], last_updated, show_progress=False)
benchmark_button.click(_begin_benchmark, [], last_updated, show_progress=True)
optimize_button.click(_begin_optimization, [], [last_updated] + optimizable_params, show_progress=True)
clear_button.click(_clear_data, [], last_updated, show_progress=False)
@ -339,6 +460,7 @@ def ui():
api_on.input(fn=_apply_settings, inputs=all_params, show_progress=False)
injection_strategy.input(fn=_apply_settings, inputs=all_params, show_progress=False)
add_chat_to_data.input(fn=_apply_settings, inputs=all_params, show_progress=False)
add_date_time.input(fn=_apply_settings, inputs=all_params, show_progress=False)
manual.input(fn=_apply_settings, inputs=all_params, show_progress=False)
postfix.input(fn=_apply_settings, inputs=all_params, show_progress=False)
data_separator.input(fn=_apply_settings, inputs=all_params, show_progress=False)

View file

@ -11,6 +11,7 @@ from pathlib import Path
import gradio as gr
import yaml
from jinja2.ext import loopcontrols
from jinja2.sandbox import ImmutableSandboxedEnvironment
from PIL import Image
@ -35,7 +36,11 @@ def strftime_now(format):
return datetime.now().strftime(format)
jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
jinja_env = ImmutableSandboxedEnvironment(
trim_blocks=True,
lstrip_blocks=True,
extensions=[loopcontrols]
)
jinja_env.globals["strftime_now"] = strftime_now

View file

@ -121,5 +121,45 @@ def monkey_patch_llama_cpp_python(lib):
lib.Llama.original_generate = lib.Llama.generate
lib.Llama.generate = my_generate
# Also patch Jinja2ChatFormatter to handle loop controls
if hasattr(lib, 'llama_chat_format') and hasattr(lib.llama_chat_format, 'Jinja2ChatFormatter'):
Formatter = lib.llama_chat_format.Jinja2ChatFormatter
if not getattr(Formatter, '_is_patched', False):
def patched_init(self, *args, **kwargs):
# Extract parameters from args or kwargs
if args:
self.template = args[0]
self.eos_token = args[1] if len(args) > 1 else kwargs.get('eos_token')
self.bos_token = args[2] if len(args) > 2 else kwargs.get('bos_token')
self.add_generation_prompt = args[3] if len(args) > 3 else kwargs.get('add_generation_prompt', True)
self.stop_token_ids = args[4] if len(args) > 4 else kwargs.get('stop_token_ids')
else:
self.template = kwargs.get('template')
self.eos_token = kwargs.get('eos_token')
self.bos_token = kwargs.get('bos_token')
self.add_generation_prompt = kwargs.get('add_generation_prompt', True)
self.stop_token_ids = kwargs.get('stop_token_ids')
# Process stop tokens as in the original
self.stop_token_ids = (
set(self.stop_token_ids) if self.stop_token_ids is not None else None
)
# Create environment with loopcontrols extension
import jinja2
from jinja2.ext import loopcontrols
self._environment = jinja2.sandbox.ImmutableSandboxedEnvironment(
loader=jinja2.BaseLoader(),
trim_blocks=True,
lstrip_blocks=True,
extensions=[loopcontrols]
).from_string(self.template)
# Replace the original __init__ with our patched version
Formatter.__init__ = patched_init
Formatter._is_patched = True
# Set the flag to indicate that the patch has been applied
lib.Llama._is_patched = True

View file

@ -137,6 +137,7 @@ def transformers_samplers():
'eta_cutoff',
'tfs',
'top_a',
'top_n_sigma',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
@ -224,6 +225,7 @@ loaders_samplers = {
'eta_cutoff',
'tfs',
'top_a',
'top_n_sigma',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
@ -288,6 +290,7 @@ loaders_samplers = {
'eta_cutoff',
'tfs',
'top_a',
'top_n_sigma',
'dry_multiplier',
'dry_allowed_length',
'dry_base',

View file

@ -28,6 +28,7 @@ def default_preset():
'eta_cutoff': 0,
'tfs': 1,
'top_a': 0,
'top_n_sigma': 0,
'dry_multiplier': 0,
'dry_allowed_length': 2,
'dry_base': 1.75,
@ -45,7 +46,7 @@ def default_preset():
'do_sample': True,
'dynamic_temperature': False,
'temperature_last': False,
'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_n_sigma\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
'dry_sequence_breakers': '"\\n", ":", "\\"", "*"',
}

View file

@ -5,7 +5,6 @@ import random
import torch
import transformers
from transformers import LogitsWarper
from transformers.generation.logits_process import (
LogitNormalization,
LogitsProcessor,
@ -19,7 +18,7 @@ from modules.models import get_device
global_scores = None
class TemperatureLogitsWarperCustom(LogitsWarper):
class TemperatureLogitsWarperCustom(LogitsProcessor):
'''
A copy of the original Transformers temperature logits warper.
'''
@ -42,7 +41,7 @@ class TemperatureLogitsWarperCustom(LogitsWarper):
return scores
class DynamicTemperatureLogitsWarper(LogitsWarper):
class DynamicTemperatureLogitsWarper(LogitsProcessor):
'''
Dynamic temperature.
'''
@ -100,7 +99,7 @@ class DynamicTemperatureLogitsWarper(LogitsWarper):
return scores
class QuadraticSamplingLogitsWarper(LogitsWarper):
class QuadraticSamplingLogitsWarper(LogitsProcessor):
'''
Quadratic sampling with smoothing factor and smoothing curve parameters.
'''
@ -127,7 +126,7 @@ class QuadraticSamplingLogitsWarper(LogitsWarper):
return transformed_logits
class TailFreeLogitsWarper(LogitsWarper):
class TailFreeLogitsWarper(LogitsProcessor):
def __init__(self, tfs: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
tfs = float(tfs)
if tfs < 0 or tfs > 1.0:
@ -167,7 +166,7 @@ class TailFreeLogitsWarper(LogitsWarper):
return scores
class TopALogitsWarper(LogitsWarper):
class TopALogitsWarper(LogitsProcessor):
def __init__(self, top_a: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
top_a = float(top_a)
if top_a < 0 or top_a > 1.0:
@ -193,8 +192,48 @@ class TopALogitsWarper(LogitsWarper):
return scores
class TopNSigmaLogitsWarper(LogitsProcessor):
def __init__(self, n_sigma: float = 2.0, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
"""
Initialize Top-nσ Sampling logits warper.
Args:
n_sigma: The threshold multiplier for standard deviation
filter_value: Value to assign to filtered logits
min_tokens_to_keep: Minimum number of tokens to keep
"""
if n_sigma < 0:
raise ValueError(f"`n_sigma` must be a non-negative float, but is {n_sigma}")
self.n_sigma = n_sigma
self.filter_value = filter_value
self.min_tokens_to_keep = min_tokens_to_keep
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
# Calculate max of logits
max_logit = torch.max(scores, dim=-1, keepdim=True)[0]
# Calculate standard deviation only on finite values
finite_mask = torch.isfinite(scores)
finite_scores = scores.masked_fill(~finite_mask, 0.0)
std_logit = torch.std(finite_scores, dim=-1, keepdim=True)
# Create mask where tokens with logits >= max_logit - n_sigma * std_logit are kept
threshold = max_logit - self.n_sigma * std_logit
indices_to_remove = scores < threshold
if self.min_tokens_to_keep > 1:
# Keep at least min_tokens_to_keep tokens
top_k_indices = torch.topk(scores, self.min_tokens_to_keep, dim=-1)[1]
indices_to_remove.scatter_(-1, top_k_indices, False)
# Apply mask by setting filtered tokens to filter_value
scores = scores.masked_fill(indices_to_remove, self.filter_value)
return scores
# Exclude Top Choices (XTC)
class XTCLogitsWarper(LogitsWarper):
class XTCLogitsWarper(LogitsProcessor):
def __init__(self, threshold: float, probability: float, filter_value: float = -float("Inf")):
self.threshold = threshold
self.probability = probability
@ -312,7 +351,7 @@ class DRYLogitsProcessor(LogitsProcessor):
return scores
class MirostatLogitsWarper(LogitsWarper):
class MirostatLogitsWarper(LogitsProcessor):
def __init__(self, mirostat_mode: int, mirostat_tau: float, mirostat_eta: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
if mirostat_mode not in [2]:
raise ValueError(f"`mirostat` has to be a an integer 2, but is {mirostat_mode}")
@ -361,7 +400,7 @@ class MirostatLogitsWarper(LogitsWarper):
return scores
class SpyLogitsWarper(LogitsWarper):
class SpyLogitsWarper(LogitsProcessor):
def __init__(self):
pass
@ -525,6 +564,14 @@ def get_logits_processor_patch(self, **kwargs):
)
)
if generation_config.top_n_sigma is not None and generation_config.top_n_sigma > 0.0:
warpers_to_add.append(
TopNSigmaLogitsWarper(
n_sigma=generation_config.top_n_sigma,
min_tokens_to_keep=min_tokens_to_keep
)
)
if generation_config.xtc_probability is not None and generation_config.xtc_probability > 0:
warpers_to_add.append(
XTCLogitsWarper(
@ -589,6 +636,7 @@ def get_logits_processor_patch(self, **kwargs):
'TailFreeLogitsWarper': 'tfs',
'TemperatureLogitsWarperCustom': 'temperature',
'TopALogitsWarper': 'top_a',
'TopNSigmaLogitsWarper': 'top_n_sigma',
'TopKLogitsWarper': 'top_k',
'TopPLogitsWarper': 'top_p',
'TypicalLogitsWarper': 'typical_p',
@ -636,6 +684,7 @@ def generation_config_init_patch(self, **kwargs):
self.smoothing_curve = kwargs.pop("smoothing_curve", 1.0)
self.tfs = kwargs.pop("tfs", 1.0)
self.top_a = kwargs.pop("top_a", 0.0)
self.top_n_sigma = kwargs.pop("top_n_sigma", 0.0)
self.mirostat_mode = kwargs.pop("mirostat_mode", 0)
self.mirostat_eta = kwargs.pop("mirostat_eta", 0.1)
self.mirostat_tau = kwargs.pop("mirostat_tau", 5)
@ -649,7 +698,7 @@ def generation_config_init_patch(self, **kwargs):
self.xtc_threshold = kwargs.pop("xtc_threshold", 0.1)
self.xtc_probability = kwargs.pop("xtc_probability", 0)
self.temperature_last = kwargs.pop("temperature_last", False)
self.sampler_priority = kwargs.pop("sampler_priority", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'dry', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat', 'xtc', 'encoder_repetition_penalty', 'no_repeat_ngram'])
self.sampler_priority = kwargs.pop("sampler_priority", ['repetition_penalty', 'presence_penalty', 'frequency_penalty', 'dry', 'temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_n_sigma', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat', 'xtc', 'encoder_repetition_penalty', 'no_repeat_ngram'])
def hijack_samplers():

View file

@ -302,6 +302,7 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
'xtc_probability',
'tfs',
'top_a',
'top_n_sigma',
'dry_multiplier',
'dry_allowed_length',
'dry_base',

View file

@ -183,6 +183,7 @@ def list_interface_input_elements():
'eta_cutoff',
'tfs',
'top_a',
'top_n_sigma',
'dry_multiplier',
'dry_allowed_length',
'dry_base',

View file

@ -37,6 +37,7 @@ def create_ui(default_preset):
gr.Markdown('## Curve cutoff')
shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=generate_params['top_n_sigma'], step=0.01, label='top_n_sigma')
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')

View file

@ -1,11 +1,11 @@
accelerate==1.3.*
accelerate==1.4.*
bitsandbytes==0.45.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -21,7 +21,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -32,29 +32,29 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -1,10 +1,10 @@
accelerate==1.3.*
accelerate==1.4.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -20,7 +20,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -31,14 +31,14 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -1,10 +1,10 @@
accelerate==1.3.*
accelerate==1.4.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -20,7 +20,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -31,12 +31,12 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -1,10 +1,10 @@
accelerate==1.3.*
accelerate==1.4.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -20,7 +20,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -31,8 +31,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl

View file

@ -1,10 +1,10 @@
accelerate==1.3.*
accelerate==1.4.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -20,7 +20,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -31,10 +31,10 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl

View file

@ -1,10 +1,10 @@
accelerate==1.3.*
accelerate==1.4.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -20,7 +20,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View file

@ -1,10 +1,10 @@
accelerate==1.3.*
accelerate==1.4.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -20,7 +20,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View file

@ -1,11 +1,11 @@
accelerate==1.3.*
accelerate==1.4.*
bitsandbytes==0.45.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -21,7 +21,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb
@ -32,29 +32,29 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -1,10 +1,10 @@
accelerate==1.3.*
accelerate==1.4.*
colorama
datasets
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.5
jinja2==3.1.6
markdown
numba==0.59.*
numpy==1.26.*
@ -20,7 +20,7 @@ safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.48.*
transformers==4.49.*
tqdm
wandb