Spaces:

DawnC
/

VividFlow

Running on Zero

App Files Files Community

DawnC commited on Apr 27

Commit

eb66bbc

verified ·

1 Parent(s): 2b1e668

Upload VideoEngine_optimized.py

Browse files

Files changed (1) hide show

VideoEngine_optimized.py +15 -13

VideoEngine_optimized.py CHANGED Viewed

@@ -25,6 +25,7 @@ from diffusers.utils.export_utils import export_to_video
 class VideoEngine:
     """
     Ultra-fast video generation with FP8 quantization.
     """
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
@@ -42,18 +43,14 @@ class VideoEngine:
     MAX_FRAMES = 81
     def __init__(self):
         self.is_spaces = os.environ.get('SPACE_ID') is not None
-        self._device: Optional[str] = None  # defer CUDA check until inside @spaces.GPU
         self.pipeline: Optional[WanImageToVideoPipeline] = None
         self.is_loaded = False
         self.use_aoti = False
-        print("✓ VideoEngine initialized")
-    @property
-    def device(self) -> str:
-        if self._device is None:
-            self._device = "cuda" if torch.cuda.is_available() else "cpu"
-        return self._device
     def _check_xformers_available(self) -> bool:
         """Check if xFormers is available."""
@@ -122,13 +119,18 @@ class VideoEngine:
             print("→ [3/5] Applying FP8 quantization...")
             try:
                 from torchao.quantization import quantize_
-                from torchao.quantization import (
-                    Float8DynamicActivationFloat8WeightConfig,
-                    int8_weight_only
-                )
                 # Quantize text encoder (INT8)
-                quantize_(self.pipeline.text_encoder, int8_weight_only())
                 # Quantize transformers (FP8)
                 quantize_(
@@ -333,4 +335,4 @@ class VideoEngine:
             print("✓ VideoEngine unloaded")
         except Exception as e:
-            print(f"⚠ Error during unload: {str(e)}")

 class VideoEngine:
     """
     Ultra-fast video generation with FP8 quantization.
+    70-90s inference time (compared to 150s baseline).
     """
     MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
     MAX_FRAMES = 81
     def __init__(self):
+        """Initialize VideoEngine."""
         self.is_spaces = os.environ.get('SPACE_ID') is not None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.pipeline: Optional[WanImageToVideoPipeline] = None
         self.is_loaded = False
         self.use_aoti = False
+        print(f"✓ VideoEngine initialized ({self.device})")
     def _check_xformers_available(self) -> bool:
         """Check if xFormers is available."""
             print("→ [3/5] Applying FP8 quantization...")
             try:
                 from torchao.quantization import quantize_
+                from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
+                # int8_weight_only 在 torchao 0.16+ 改名為 Int8WeightOnlyConfig
+                try:
+                    from torchao.quantization import int8_weight_only
+                    int8_config = int8_weight_only()
+                except ImportError:
+                    from torchao.quantization import Int8WeightOnlyConfig
+                    int8_config = Int8WeightOnlyConfig()
                 # Quantize text encoder (INT8)
+                quantize_(self.pipeline.text_encoder, int8_config)
                 # Quantize transformers (FP8)
                 quantize_(
             print("✓ VideoEngine unloaded")
         except Exception as e:
+            print(f"⚠ Error during unload: {str(e)}")