Instructions to use Efficient-Large-Model/VILA15-40b-hf-preview with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Efficient-Large-Model/VILA15-40b-hf-preview with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Efficient-Large-Model/VILA15-40b-hf-preview", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Efficient-Large-Model/VILA15-40b-hf-preview", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Efficient-Large-Model/VILA15-40b-hf-preview with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Efficient-Large-Model/VILA15-40b-hf-preview" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Efficient-Large-Model/VILA15-40b-hf-preview", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/Efficient-Large-Model/VILA15-40b-hf-preview
- SGLang
How to use Efficient-Large-Model/VILA15-40b-hf-preview with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Efficient-Large-Model/VILA15-40b-hf-preview" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Efficient-Large-Model/VILA15-40b-hf-preview", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Efficient-Large-Model/VILA15-40b-hf-preview" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Efficient-Large-Model/VILA15-40b-hf-preview", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use Efficient-Large-Model/VILA15-40b-hf-preview with Docker Model Runner:
docker model run hf.co/Efficient-Large-Model/VILA15-40b-hf-preview
| import glob | |
| import os | |
| from collections import defaultdict | |
| from typing import Any, Dict, List, Optional, Union | |
| import cv2 | |
| import numpy as np | |
| import PIL | |
| import PIL.Image | |
| import requests | |
| from transformers import PretrainedConfig | |
| # from llava.constants import MEDIA_TOKENS | |
| # from llava.media import Image, Video | |
| # from llava.utils import make_list | |
| # from llava.utils.logging import logger | |
| MEDIA_TOKENS = { | |
| "image": "<image>", | |
| "video": "<vila/video>", | |
| } | |
| class Media: | |
| pass | |
| class File(Media): | |
| def __init__(self, path: str) -> None: | |
| self.path = path | |
| class Image(File): | |
| pass | |
| class Video(File): | |
| pass | |
| def make_list(obj: Any) -> List: | |
| return obj if isinstance(obj, list) else [obj] | |
| def _extract_image(image: Union[Image, PIL.Image.Image]) -> PIL.Image.Image: | |
| if isinstance(image, Image): | |
| if image.path.startswith("http://") or image.path.startswith("https://"): | |
| image = PIL.Image.open(requests.get(image.path, stream=True).raw) | |
| else: | |
| image = PIL.Image.open(image.path) | |
| return image | |
| def _load_video(video_path: str, *, num_frames: int) -> List[PIL.Image.Image]: | |
| # Load video frames from a directory | |
| if os.path.isdir(video_path): | |
| frame_paths = sorted(glob.glob(os.path.join(video_path, "*"))) | |
| indices = np.round(np.linspace(0, len(frame_paths) - 1, num_frames)).astype(int) | |
| return [PIL.Image.open(frame_paths[index]) for index in indices] | |
| # Load video frames from a video file | |
| vidcap = cv2.VideoCapture(video_path) | |
| # Find the last frame as frame count might not be accurate | |
| frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| while frame_count > 0: | |
| vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame_count - 1) | |
| if vidcap.grab(): | |
| break | |
| frame_count -= 1 | |
| else: | |
| raise ValueError(f"Video '{video_path}' has no frames.") | |
| # Extract frames uniformly | |
| indices = np.round(np.linspace(0, frame_count - 1, num_frames)).astype(int) | |
| frames = {} | |
| for index in indices: | |
| if index in frames: | |
| continue | |
| vidcap.set(cv2.CAP_PROP_POS_FRAMES, index) | |
| success, frame = vidcap.read() | |
| if not success: | |
| print(f"Failed to read frame {index} from video '{video_path}'. Skipped.") | |
| continue | |
| frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| frames[index] = PIL.Image.fromarray(frame) | |
| return [frames[index] for index in indices if index in frames] | |
| def _extract_video(video: Video, config: PretrainedConfig) -> List[PIL.Image.Image]: | |
| num_frames = config.num_video_frames | |
| if getattr(config, "fps") != 0: | |
| print("Extracting frames from video with specified FPS is not supported yet. Ignored.") | |
| frames = _load_video(video.path, num_frames=num_frames) | |
| return frames | |
| def extract_media( | |
| messages: List[Dict[str, Any]], | |
| config: Optional[PretrainedConfig] = None, | |
| draft: bool = False, | |
| ) -> Dict[str, List[Any]]: | |
| media = defaultdict(list) | |
| for message in messages: | |
| text = "" | |
| for part in make_list(message["value"]): | |
| if isinstance(part, str): | |
| for token in MEDIA_TOKENS.values(): | |
| if token in part: | |
| print(f"Media token '{token}' found in text: '{part}'. Removed.") | |
| part = part.replace(token, "").strip() | |
| text += part | |
| elif isinstance(part, (Image, PIL.Image.Image)): | |
| if draft: | |
| media["image"].append(part) | |
| else: | |
| media["image"].append(_extract_image(part)) | |
| text += MEDIA_TOKENS["image"] | |
| elif isinstance(part, Video): | |
| if draft: | |
| media["video"].append(part) | |
| else: | |
| media["video"].append(_extract_video(part, config)) | |
| text += MEDIA_TOKENS["video"] | |
| else: | |
| raise ValueError(f"Unsupported prompt part type: {type(part)}") | |
| message["value"] = text | |
| return media | |