test

Files changed (5) hide show

handler.py CHANGED Viewed

@@ -17,15 +17,16 @@ class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         print('starting to load tokenizer')
-        tokenizer = LlamaTokenizer.from_pretrained(".", local_files_only=True)
         print('loaded tokenizer')
         gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
         print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
         model = LlamaForCausalLM.from_pretrained(
-            ".",
             device_map="auto",
             torch_dtype=dtype,
-            offload_folder="offload"
         )
         gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
         print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')

     def __init__(self, path=""):
         # load the model
         print('starting to load tokenizer')
+        tokenizer = LlamaTokenizer.from_pretrained("/repository/orca_tokenizer", local_files_only=True)
         print('loaded tokenizer')
         gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
         print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')
         model = LlamaForCausalLM.from_pretrained(
+            "/repository/pytorch_model",
             device_map="auto",
             torch_dtype=dtype,
+            offload_folder="offload",
+            local_files_only=True
         )
         gpu_info1 = nvmlDeviceGetMemoryInfo(gpu_h1)
         print(f'vram {gpu_info1.total} used {gpu_info1.used} free {gpu_info1.free}')

added_tokens.json → orca_tokenizer/added_tokens.json RENAMED Viewed

File without changes

special_tokens_map.json → orca_tokenizer/special_tokens_map.json RENAMED Viewed

File without changes

tokenizer.model → orca_tokenizer/tokenizer.model RENAMED Viewed

File without changes

tokenizer_config.json → orca_tokenizer/tokenizer_config.json RENAMED Viewed

File without changes