diff --git a/requirements.txt b/requirements.txt index 0eb3fed..d9cca4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -aiohttp==3.11.16 -anyio==4.4.0 -lib==4.0.0 -nltk==3.9.1 -psutil==6.0.0 -pycryptodome==3.20.0 -Requests==2.32.4 -transformers==4.50.0 -utils==1.0.2 +aiohttp~=3.11 +anyio~=4.4 +lib~=4.0 +nltk~=3.9 +psutil~=6.0 +pycryptodome~=3.20 +Requests~=2.32 +transformers~=4.52 +utils~=1.0 diff --git a/workers/hello_world/README.md b/workers/hello_world/README.md index 8ae5012..a523e10 100644 --- a/workers/hello_world/README.md +++ b/workers/hello_world/README.md @@ -56,7 +56,7 @@ import dataclasses import random from typing import Dict, Any -from transformers import AutoTokenizer # used to count tokens in a prompt +from transformers import OpenAIGPTTokenizer # used to count tokens in a prompt import nltk # used to download a list of all words to generate a random prompt and benchmark the LLM model from lib.data_types import ApiPayload @@ -65,7 +65,7 @@ nltk.download("words") WORD_LIST = nltk.corpus.words.words() # you can use any tokenizer that fits your LLM. `openai-gpt` is free to use and is a good fit for most LLMs -tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt") +tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt") @dataclasses.dataclass class InputData(ApiPayload): diff --git a/workers/hello_world/data_types.py b/workers/hello_world/data_types.py index 50ce3bc..0c2a296 100644 --- a/workers/hello_world/data_types.py +++ b/workers/hello_world/data_types.py @@ -3,7 +3,7 @@ import random import inspect from typing import Dict, Any -from transformers import AutoTokenizer +from transformers import OpenAIGPTTokenizer import nltk from lib.data_types import ApiPayload, JsonDataException @@ -12,7 +12,7 @@ nltk.download("words") WORD_LIST = nltk.corpus.words.words() # used to count to count tokens and workload for LLM -tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt") +tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt") @dataclasses.dataclass diff --git a/workers/tgi/data_types.py b/workers/tgi/data_types.py index 699837d..56e0b5b 100644 --- a/workers/tgi/data_types.py +++ b/workers/tgi/data_types.py @@ -3,7 +3,7 @@ import random import inspect from typing import Dict, Any -from transformers import AutoTokenizer +from transformers import OpenAIGPTTokenizer import nltk from lib.data_types import ApiPayload, JsonDataException @@ -11,7 +11,7 @@ from lib.data_types import ApiPayload, JsonDataException nltk.download("words") WORD_LIST = nltk.corpus.words.words() -tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt") +tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt") @dataclasses.dataclass