update tokenizers
This commit is contained in:
committed by
Nader Arbabian
parent
d99adcfb36
commit
4bac805093
+9
-9
@@ -1,9 +1,9 @@
|
|||||||
aiohttp==3.11.16
|
aiohttp~=3.11
|
||||||
anyio==4.4.0
|
anyio~=4.4
|
||||||
lib==4.0.0
|
lib~=4.0
|
||||||
nltk==3.9.1
|
nltk~=3.9
|
||||||
psutil==6.0.0
|
psutil~=6.0
|
||||||
pycryptodome==3.20.0
|
pycryptodome~=3.20
|
||||||
Requests==2.32.4
|
Requests~=2.32
|
||||||
transformers==4.50.0
|
transformers~=4.52
|
||||||
utils==1.0.2
|
utils~=1.0
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ import dataclasses
|
|||||||
import random
|
import random
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from transformers import AutoTokenizer # used to count tokens in a prompt
|
from transformers import OpenAIGPTTokenizer # used to count tokens in a prompt
|
||||||
import nltk # used to download a list of all words to generate a random prompt and benchmark the LLM model
|
import nltk # used to download a list of all words to generate a random prompt and benchmark the LLM model
|
||||||
|
|
||||||
from lib.data_types import ApiPayload
|
from lib.data_types import ApiPayload
|
||||||
@@ -65,7 +65,7 @@ nltk.download("words")
|
|||||||
WORD_LIST = nltk.corpus.words.words()
|
WORD_LIST = nltk.corpus.words.words()
|
||||||
|
|
||||||
# you can use any tokenizer that fits your LLM. `openai-gpt` is free to use and is a good fit for most LLMs
|
# you can use any tokenizer that fits your LLM. `openai-gpt` is free to use and is a good fit for most LLMs
|
||||||
tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
|
tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class InputData(ApiPayload):
|
class InputData(ApiPayload):
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import random
|
|||||||
import inspect
|
import inspect
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import OpenAIGPTTokenizer
|
||||||
import nltk
|
import nltk
|
||||||
|
|
||||||
from lib.data_types import ApiPayload, JsonDataException
|
from lib.data_types import ApiPayload, JsonDataException
|
||||||
@@ -12,7 +12,7 @@ nltk.download("words")
|
|||||||
WORD_LIST = nltk.corpus.words.words()
|
WORD_LIST = nltk.corpus.words.words()
|
||||||
|
|
||||||
# used to count to count tokens and workload for LLM
|
# used to count to count tokens and workload for LLM
|
||||||
tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
|
tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import random
|
|||||||
import inspect
|
import inspect
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import OpenAIGPTTokenizer
|
||||||
import nltk
|
import nltk
|
||||||
|
|
||||||
from lib.data_types import ApiPayload, JsonDataException
|
from lib.data_types import ApiPayload, JsonDataException
|
||||||
@@ -11,7 +11,7 @@ from lib.data_types import ApiPayload, JsonDataException
|
|||||||
nltk.download("words")
|
nltk.download("words")
|
||||||
WORD_LIST = nltk.corpus.words.words()
|
WORD_LIST = nltk.corpus.words.words()
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
|
tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
|
|||||||
Reference in New Issue
Block a user