Conrad Lippert-Zajaczkowski commited on
Commit ·
80ef686
1
Parent(s): 48f8e5d
test
Browse files
handler.py
CHANGED
|
@@ -17,15 +17,16 @@ class EndpointHandler:
|
|
| 17 |
def __init__(self, path=""):
|
| 18 |
# load the model
|
| 19 |
print('starting to load tokenizer')
|
| 20 |
-
tokenizer = LlamaTokenizer.from_pretrained("
|
| 21 |
print('loaded tokenizer')
|
| 22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
| 24 |
model = LlamaForCausalLM.from_pretrained(
|
| 25 |
-
"
|
| 26 |
device_map="auto",
|
| 27 |
torch_dtype=dtype,
|
| 28 |
-
offload_folder="offload"
|
|
|
|
| 29 |
)
|
| 30 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 31 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
|
|
|
| 17 |
def __init__(self, path=""):
|
| 18 |
# load the model
|
| 19 |
print('starting to load tokenizer')
|
| 20 |
+
tokenizer = LlamaTokenizer.from_pretrained("/repository/orca_tokenizer", local_files_only=True)
|
| 21 |
print('loaded tokenizer')
|
| 22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
| 24 |
model = LlamaForCausalLM.from_pretrained(
|
| 25 |
+
"/repository/pytorch_model",
|
| 26 |
device_map="auto",
|
| 27 |
torch_dtype=dtype,
|
| 28 |
+
offload_folder="offload",
|
| 29 |
+
local_files_only=True
|
| 30 |
)
|
| 31 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 32 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
added_tokens.json → orca_tokenizer/added_tokens.json
RENAMED
|
File without changes
|
special_tokens_map.json → orca_tokenizer/special_tokens_map.json
RENAMED
|
File without changes
|
tokenizer.model → orca_tokenizer/tokenizer.model
RENAMED
|
File without changes
|
tokenizer_config.json → orca_tokenizer/tokenizer_config.json
RENAMED
|
File without changes
|