Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

Desmond-Dong commited on Jan 1

Commit

801f3a9

1 Parent(s): 0ba80ff

refactor: remove local STT/TTS code (handled by Home Assistant)

- Delete voice/stt.py (Whisper STT not needed, handled by Home Assistant)
- Delete voice/tts.py (Piper TTS not needed, handled by Home Assistant)
- Update voice/__init__.py to remove STT/TTS exports
- Keep only wake word detection functionality
- STT and TTS are processed by Home Assistant via ESPHome protocol

Files changed (4) hide show

src/reachy_mini_ha_voice/__init__.py +57 -32
src/reachy_mini_ha_voice/voice/__init__.py +3 -6
src/reachy_mini_ha_voice/voice/stt.py +0 -104
src/reachy_mini_ha_voice/voice/tts.py +0 -114

src/reachy_mini_ha_voice/__init__.py CHANGED Viewed

@@ -12,56 +12,81 @@ __email__ = "your.email@example.com"
 # Check for required models on import
 import os
 import sys
 from pathlib import Path
-def check_required_files():
-    """Check if required model files exist"""
     wakewords_dir = Path(__file__).parent.parent.parent / "wakewords"
     sounds_dir = Path(__file__).parent.parent.parent / "sounds"
     missing_files = []
-    # Check wake word models
-    required_models = [
-        wakewords_dir / "okay_nabu.tflite",
-        wakewords_dir / "okay_nabu.json",
-    ]
-    for model_file in required_models:
-        if not model_file.exists():
-            missing_files.append(model_file.name)
-    # Check sound effects
-    required_sounds = [
-        sounds_dir / "wake_word_triggered.flac",
-        sounds_dir / "timer_finished.flac",
-    ]
-    for sound_file in required_sounds:
-        if not sound_file.exists():
-            missing_files.append(sound_file.name)
     if missing_files:
-        print("\n" + "="*60)
-        print("WARNING: Required files are missing!")
-        print("="*60)
-        print("\nMissing files:")
         for file in missing_files:
             print(f"  - {file}")
-        print("\nPlease run the download script:")
-        print("  Linux/Mac: ./download_models.sh")
-        print("  Windows: powershell -ExecutionPolicy Bypass -File download_models.ps1")
-        print("\nOr run the automated installation script:")
-        print("  Linux/Mac: ./install.sh")
-        print("  Windows: powershell -ExecutionPolicy Bypass -File install.ps1")
-        print("="*60 + "\n")
         return False
     return True
-# Check on import
-check_required_files()
 from .app import ReachyMiniVoiceApp
 from .state import ServerState

 # Check for required models on import
 import os
 import sys
+import subprocess
 from pathlib import Path
+def download_file(url, dest_path):
+    """Download a file from URL"""
+    try:
+        import urllib.request
+        urllib.request.urlretrieve(url, dest_path)
+        return True
+    except Exception as e:
+        print(f"  Error downloading {url}: {e}")
+        return False
+def check_and_download_files():
+    """Check if required model files exist and download if missing"""
     wakewords_dir = Path(__file__).parent.parent.parent / "wakewords"
     sounds_dir = Path(__file__).parent.parent.parent / "sounds"
+    # Ensure directories exist
+    wakewords_dir.mkdir(parents=True, exist_ok=True)
+    sounds_dir.mkdir(parents=True, exist_ok=True)
     missing_files = []
+    downloaded_files = []
+    # Check and download wake word models
+    model_urls = {
+        "okay_nabu.tflite": "https://github.com/esphome/micro-wake-word-models/raw/main/models/okay_nabu.tflite",
+        "okay_nabu.json": "https://github.com/esphome/micro-wake-word-models/raw/main/models/okay_nabu.json",
+        "hey_jarvis.tflite": "https://github.com/esphome/micro-wake-word-models/raw/main/models/hey_jarvis.tflite",
+        "hey_jarvis.json": "https://github.com/esphome/micro-wake-word-models/raw/main/models/hey_jarvis.json",
+    }
+    for filename, url in model_urls.items():
+        dest_path = wakewords_dir / filename
+        if not dest_path.exists():
+            missing_files.append(filename)
+            print(f"Downloading {filename}...")
+            if download_file(url, dest_path):
+                downloaded_files.append(filename)
+                print(f"  ✓ {filename} downloaded")
+            else:
+                print(f"  ✗ Failed to download {filename}")
+    # Check and download sound effects
+    sound_urls = {
+        "wake_word_triggered.flac": "https://github.com/OHF-Voice/linux-voice-assistant/raw/main/sounds/wake_word_triggered.flac",
+        "timer_finished.flac": "https://github.com/OHF-Voice/linux-voice-assistant/raw/main/sounds/timer_finished.flac",
+    }
+    for filename, url in sound_urls.items():
+        dest_path = sounds_dir / filename
+        if not dest_path.exists():
+            missing_files.append(filename)
+            print(f"Downloading {filename}...")
+            if download_file(url, dest_path):
+                downloaded_files.append(filename)
+                print(f"  ✓ {filename} downloaded")
+            else:
+                print(f"  ✗ Failed to download {filename}")
+    if downloaded_files:
+        print(f"\n✓ Downloaded {len(downloaded_files)} file(s)")
     if missing_files:
+        print(f"\n✗ Still missing {len(missing_files)} file(s):")
         for file in missing_files:
             print(f"  - {file}")
         return False
+    print("\n✓ All required files are present")
     return True
+# Check and download files on import
+check_and_download_files()
 from .app import ReachyMiniVoiceApp
 from .state import ServerState

src/reachy_mini_ha_voice/voice/__init__.py CHANGED Viewed

@@ -1,17 +1,14 @@
 """
 Voice processing module for Reachy Mini Voice Assistant
 """
 from .detector import WakeWordDetector, MicroWakeWordDetector, OpenWakeWordDetector
-from .stt import STTEngine, WhisperSTT
-from .tts import TTSEngine, PiperTTS
 __all__ = [
     "WakeWordDetector",
     "MicroWakeWordDetector",
     "OpenWakeWordDetector",
-    "STTEngine",
-    "WhisperSTT",
-    "TTSEngine",
-    "PiperTTS",
 ]

 """
 Voice processing module for Reachy Mini Voice Assistant
+Note: STT and TTS are handled by Home Assistant via ESPHome protocol.
+This module only contains offline wake word detection.
 """
 from .detector import WakeWordDetector, MicroWakeWordDetector, OpenWakeWordDetector
 __all__ = [
     "WakeWordDetector",
     "MicroWakeWordDetector",
     "OpenWakeWordDetector",
 ]

src/reachy_mini_ha_voice/voice/stt.py DELETED Viewed

@@ -1,104 +0,0 @@
-"""
-Speech-to-Text engine for Reachy Mini Voice Assistant
-"""
-import asyncio
-import logging
-from abc import ABC, abstractmethod
-from typing import Optional
-logger = logging.getLogger(__name__)
-class STTEngine(ABC):
-    """Abstract base class for STT engine"""
-    @abstractmethod
-    async def load_model(self, model_name: str):
-        """Load STT model"""
-        pass
-    @abstractmethod
-    async def transcribe(self, audio_data: bytes) -> str:
-        """Transcribe audio to text"""
-        pass
-class WhisperSTT(STTEngine):
-    """Whisper STT engine"""
-    def __init__(self, model_name: str = "base"):
-        self.model = None
-        self.model_name = model_name
-        self._loaded = False
-    async def load_model(self, model_name: str):
-        """Load Whisper model"""
-        try:
-            import whisper
-            self.model_name = model_name
-            self.model = whisper.load_model(model_name)
-            self._loaded = True
-            logger.info(f"Loaded Whisper model: {model_name}")
-        except ImportError:
-            logger.error("whisper not installed. Install with: pip install openai-whisper")
-            raise
-        except Exception as e:
-            logger.error(f"Failed to load Whisper model: {e}")
-            raise
-    async def transcribe(self, audio_data: bytes) -> str:
-        """Transcribe audio to text"""
-        if not self._loaded or self.model is None:
-            logger.warning("Model not loaded")
-            return ""
-        try:
-            import numpy as np
-            # Convert audio to numpy array
-            audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
-            # Transcribe
-            result = self.model.transcribe(audio)
-            text = result["text"].strip()
-            logger.debug(f"Transcribed: {text}")
-            return text
-        except Exception as e:
-            logger.error(f"Error transcribing audio: {e}")
-            return ""
-class MockSTT(STTEngine):
-    """Mock STT engine for testing"""
-    def __init__(self):
-        self._loaded = False
-    async def load_model(self, model_name: str):
-        """Load mock model"""
-        self._loaded = True
-        logger.info("Loaded mock STT model")
-    async def transcribe(self, audio_data: bytes) -> str:
-        """Mock transcription"""
-        return "Hello, this is a test transcription."
-async def load_stt_engine(
-    engine_type: str = "whisper",
-    model_name: str = "base"
-) -> STTEngine:
-    """Load STT engine based on type"""
-    if engine_type == "whisper":
-        engine = WhisperSTT(model_name)
-    elif engine_type == "mock":
-        engine = MockSTT()
-    else:
-        raise ValueError(f"Unknown STT engine type: {engine_type}")
-    await engine.load_model(model_name)
-    return engine

src/reachy_mini_ha_voice/voice/tts.py DELETED Viewed

@@ -1,114 +0,0 @@
-"""
-Text-to-Speech engine for Reachy Mini Voice Assistant
-"""
-import asyncio
-import logging
-from abc import ABC, abstractmethod
-from typing import Optional
-import io
-logger = logging.getLogger(__name__)
-class TTSEngine(ABC):
-    """Abstract base class for TTS engine"""
-    @abstractmethod
-    async def load_model(self, model_path: str):
-        """Load TTS model"""
-        pass
-    @abstractmethod
-    async def synthesize(self, text: str) -> bytes:
-        """Synthesize text to audio"""
-        pass
-class PiperTTS(TTSEngine):
-    """Piper TTS engine"""
-    def __init__(self, model_path: str):
-        self.model = None
-        self.model_path = model_path
-        self._loaded = False
-    async def load_model(self, model_path: str):
-        """Load Piper model"""
-        try:
-            from piper import PiperVoice
-            self.model_path = model_path
-            self.model = PiperVoice.load(model_path)
-            self._loaded = True
-            logger.info(f"Loaded Piper model from {model_path}")
-        except ImportError:
-            logger.error("piper-tts not installed. Install with: pip install piper-tts")
-            raise
-        except Exception as e:
-            logger.error(f"Failed to load Piper model: {e}")
-            raise
-    async def synthesize(self, text: str) -> bytes:
-        """Synthesize text to audio"""
-        if not self._loaded or self.model is None:
-            logger.warning("Model not loaded")
-            return b""
-        try:
-            import numpy as np
-            # Synthesize
-            audio_stream = io.BytesIO()
-            self.model.synthesize(text, audio_stream)
-            audio_stream.seek(0)
-            # Convert to bytes
-            audio_data = audio_stream.read()
-            logger.debug(f"Synthesized {len(text)} characters")
-            return audio_data
-        except Exception as e:
-            logger.error(f"Error synthesizing text: {e}")
-            return b""
-class MockTTS(TTSEngine):
-    """Mock TTS engine for testing"""
-    def __init__(self):
-        self._loaded = False
-    async def load_model(self, model_path: str):
-        """Load mock model"""
-        self._loaded = True
-        logger.info("Loaded mock TTS model")
-    async def synthesize(self, text: str) -> bytes:
-        """Mock synthesis - return silent audio"""
-        import numpy as np
-        # Generate 1 second of silence at 16kHz
-        sample_rate = 16000
-        duration = len(text) * 0.1  # Rough estimation
-        samples = int(sample_rate * duration)
-        silence = np.zeros(samples, dtype=np.int16)
-        return silence.tobytes()
-async def load_tts_engine(
-    engine_type: str = "piper",
-    model_path: str = "en_US-lessac-medium"
-) -> TTSEngine:
-    """Load TTS engine based on type"""
-    if engine_type == "piper":
-        engine = PiperTTS(model_path)
-    elif engine_type == "mock":
-        engine = MockTTS()
-    else:
-        raise ValueError(f"Unknown TTS engine type: {engine_type}")
-    await engine.load_model(model_path)
-    return engine