Upload VideoEngine_optimized.py
Browse files- VideoEngine_optimized.py +15 -13
VideoEngine_optimized.py
CHANGED
|
@@ -25,6 +25,7 @@ from diffusers.utils.export_utils import export_to_video
|
|
| 25 |
class VideoEngine:
|
| 26 |
"""
|
| 27 |
Ultra-fast video generation with FP8 quantization.
|
|
|
|
| 28 |
"""
|
| 29 |
|
| 30 |
MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
|
|
@@ -42,18 +43,14 @@ class VideoEngine:
|
|
| 42 |
MAX_FRAMES = 81
|
| 43 |
|
| 44 |
def __init__(self):
|
|
|
|
| 45 |
self.is_spaces = os.environ.get('SPACE_ID') is not None
|
| 46 |
-
self.
|
| 47 |
self.pipeline: Optional[WanImageToVideoPipeline] = None
|
| 48 |
self.is_loaded = False
|
| 49 |
self.use_aoti = False
|
| 50 |
-
print("✓ VideoEngine initialized")
|
| 51 |
|
| 52 |
-
|
| 53 |
-
def device(self) -> str:
|
| 54 |
-
if self._device is None:
|
| 55 |
-
self._device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 56 |
-
return self._device
|
| 57 |
|
| 58 |
def _check_xformers_available(self) -> bool:
|
| 59 |
"""Check if xFormers is available."""
|
|
@@ -122,13 +119,18 @@ class VideoEngine:
|
|
| 122 |
print("→ [3/5] Applying FP8 quantization...")
|
| 123 |
try:
|
| 124 |
from torchao.quantization import quantize_
|
| 125 |
-
from torchao.quantization import
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# Quantize text encoder (INT8)
|
| 131 |
-
quantize_(self.pipeline.text_encoder,
|
| 132 |
|
| 133 |
# Quantize transformers (FP8)
|
| 134 |
quantize_(
|
|
@@ -333,4 +335,4 @@ class VideoEngine:
|
|
| 333 |
print("✓ VideoEngine unloaded")
|
| 334 |
|
| 335 |
except Exception as e:
|
| 336 |
-
print(f"⚠ Error during unload: {str(e)}")
|
|
|
|
| 25 |
class VideoEngine:
|
| 26 |
"""
|
| 27 |
Ultra-fast video generation with FP8 quantization.
|
| 28 |
+
70-90s inference time (compared to 150s baseline).
|
| 29 |
"""
|
| 30 |
|
| 31 |
MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
|
|
|
|
| 43 |
MAX_FRAMES = 81
|
| 44 |
|
| 45 |
def __init__(self):
|
| 46 |
+
"""Initialize VideoEngine."""
|
| 47 |
self.is_spaces = os.environ.get('SPACE_ID') is not None
|
| 48 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 49 |
self.pipeline: Optional[WanImageToVideoPipeline] = None
|
| 50 |
self.is_loaded = False
|
| 51 |
self.use_aoti = False
|
|
|
|
| 52 |
|
| 53 |
+
print(f"✓ VideoEngine initialized ({self.device})")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def _check_xformers_available(self) -> bool:
|
| 56 |
"""Check if xFormers is available."""
|
|
|
|
| 119 |
print("→ [3/5] Applying FP8 quantization...")
|
| 120 |
try:
|
| 121 |
from torchao.quantization import quantize_
|
| 122 |
+
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
|
| 123 |
+
|
| 124 |
+
# int8_weight_only 在 torchao 0.16+ 改名為 Int8WeightOnlyConfig
|
| 125 |
+
try:
|
| 126 |
+
from torchao.quantization import int8_weight_only
|
| 127 |
+
int8_config = int8_weight_only()
|
| 128 |
+
except ImportError:
|
| 129 |
+
from torchao.quantization import Int8WeightOnlyConfig
|
| 130 |
+
int8_config = Int8WeightOnlyConfig()
|
| 131 |
|
| 132 |
# Quantize text encoder (INT8)
|
| 133 |
+
quantize_(self.pipeline.text_encoder, int8_config)
|
| 134 |
|
| 135 |
# Quantize transformers (FP8)
|
| 136 |
quantize_(
|
|
|
|
| 335 |
print("✓ VideoEngine unloaded")
|
| 336 |
|
| 337 |
except Exception as e:
|
| 338 |
+
print(f"⚠ Error during unload: {str(e)}")
|