update tokenizers

This commit is contained in:
Nader Arbabian
2025-06-10 17:01:28 -07:00
committed by Nader Arbabian
parent d99adcfb36
commit 4bac805093
4 changed files with 15 additions and 15 deletions
+2 -2
View File
@@ -56,7 +56,7 @@ import dataclasses
import random
from typing import Dict, Any
from transformers import AutoTokenizer # used to count tokens in a prompt
from transformers import OpenAIGPTTokenizer # used to count tokens in a prompt
import nltk # used to download a list of all words to generate a random prompt and benchmark the LLM model
from lib.data_types import ApiPayload
@@ -65,7 +65,7 @@ nltk.download("words")
WORD_LIST = nltk.corpus.words.words()
# you can use any tokenizer that fits your LLM. `openai-gpt` is free to use and is a good fit for most LLMs
tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
@dataclasses.dataclass
class InputData(ApiPayload):