Conrad Lippert-Zajaczkowski commited on
Commit ·
48f8e5d
1
Parent(s): e31fd99
test specific
Browse files- handler.py +3 -2
handler.py
CHANGED
|
@@ -17,14 +17,15 @@ class EndpointHandler:
|
|
| 17 |
def __init__(self, path=""):
|
| 18 |
# load the model
|
| 19 |
print('starting to load tokenizer')
|
| 20 |
-
tokenizer = LlamaTokenizer.from_pretrained("
|
| 21 |
print('loaded tokenizer')
|
| 22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
| 24 |
model = LlamaForCausalLM.from_pretrained(
|
| 25 |
-
"
|
| 26 |
device_map="auto",
|
| 27 |
torch_dtype=dtype,
|
|
|
|
| 28 |
)
|
| 29 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 30 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
|
|
|
| 17 |
def __init__(self, path=""):
|
| 18 |
# load the model
|
| 19 |
print('starting to load tokenizer')
|
| 20 |
+
tokenizer = LlamaTokenizer.from_pretrained(".", local_files_only=True)
|
| 21 |
print('loaded tokenizer')
|
| 22 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 23 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|
| 24 |
model = LlamaForCausalLM.from_pretrained(
|
| 25 |
+
".",
|
| 26 |
device_map="auto",
|
| 27 |
torch_dtype=dtype,
|
| 28 |
+
offload_folder="offload"
|
| 29 |
)
|
| 30 |
gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
|
| 31 |
print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
|