update tokenizers
This commit is contained in:
committed by
Nader Arbabian
parent
d99adcfb36
commit
4bac805093
+9
-9
@@ -1,9 +1,9 @@
|
||||
aiohttp==3.11.16
|
||||
anyio==4.4.0
|
||||
lib==4.0.0
|
||||
nltk==3.9.1
|
||||
psutil==6.0.0
|
||||
pycryptodome==3.20.0
|
||||
Requests==2.32.4
|
||||
transformers==4.50.0
|
||||
utils==1.0.2
|
||||
aiohttp~=3.11
|
||||
anyio~=4.4
|
||||
lib~=4.0
|
||||
nltk~=3.9
|
||||
psutil~=6.0
|
||||
pycryptodome~=3.20
|
||||
Requests~=2.32
|
||||
transformers~=4.52
|
||||
utils~=1.0
|
||||
|
||||
@@ -56,7 +56,7 @@ import dataclasses
|
||||
import random
|
||||
from typing import Dict, Any
|
||||
|
||||
from transformers import AutoTokenizer # used to count tokens in a prompt
|
||||
from transformers import OpenAIGPTTokenizer # used to count tokens in a prompt
|
||||
import nltk # used to download a list of all words to generate a random prompt and benchmark the LLM model
|
||||
|
||||
from lib.data_types import ApiPayload
|
||||
@@ -65,7 +65,7 @@ nltk.download("words")
|
||||
WORD_LIST = nltk.corpus.words.words()
|
||||
|
||||
# you can use any tokenizer that fits your LLM. `openai-gpt` is free to use and is a good fit for most LLMs
|
||||
tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
|
||||
|
||||
@dataclasses.dataclass
|
||||
class InputData(ApiPayload):
|
||||
|
||||
@@ -3,7 +3,7 @@ import random
|
||||
import inspect
|
||||
from typing import Dict, Any
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
from transformers import OpenAIGPTTokenizer
|
||||
import nltk
|
||||
|
||||
from lib.data_types import ApiPayload, JsonDataException
|
||||
@@ -12,7 +12,7 @@ nltk.download("words")
|
||||
WORD_LIST = nltk.corpus.words.words()
|
||||
|
||||
# used to count to count tokens and workload for LLM
|
||||
tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
|
||||
@@ -3,7 +3,7 @@ import random
|
||||
import inspect
|
||||
from typing import Dict, Any
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
from transformers import OpenAIGPTTokenizer
|
||||
import nltk
|
||||
|
||||
from lib.data_types import ApiPayload, JsonDataException
|
||||
@@ -11,7 +11,7 @@ from lib.data_types import ApiPayload, JsonDataException
|
||||
nltk.download("words")
|
||||
WORD_LIST = nltk.corpus.words.words()
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
|
||||
tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
|
||||
Reference in New Issue
Block a user