Commit ·
801f3a9
1
Parent(s): 0ba80ff
refactor: remove local STT/TTS code (handled by Home Assistant)
Browse files- Delete voice/stt.py (Whisper STT not needed, handled by Home Assistant)
- Delete voice/tts.py (Piper TTS not needed, handled by Home Assistant)
- Update voice/__init__.py to remove STT/TTS exports
- Keep only wake word detection functionality
- STT and TTS are processed by Home Assistant via ESPHome protocol
src/reachy_mini_ha_voice/__init__.py
CHANGED
|
@@ -12,56 +12,81 @@ __email__ = "your.email@example.com"
|
|
| 12 |
# Check for required models on import
|
| 13 |
import os
|
| 14 |
import sys
|
|
|
|
| 15 |
from pathlib import Path
|
| 16 |
|
| 17 |
-
def
|
| 18 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
wakewords_dir = Path(__file__).parent.parent.parent / "wakewords"
|
| 20 |
sounds_dir = Path(__file__).parent.parent.parent / "sounds"
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
missing_files = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
missing_files.append(sound_file.name)
|
| 43 |
|
| 44 |
if missing_files:
|
| 45 |
-
print("\n
|
| 46 |
-
print("WARNING: Required files are missing!")
|
| 47 |
-
print("="*60)
|
| 48 |
-
print("\nMissing files:")
|
| 49 |
for file in missing_files:
|
| 50 |
print(f" - {file}")
|
| 51 |
-
|
| 52 |
-
print("\nPlease run the download script:")
|
| 53 |
-
print(" Linux/Mac: ./download_models.sh")
|
| 54 |
-
print(" Windows: powershell -ExecutionPolicy Bypass -File download_models.ps1")
|
| 55 |
-
print("\nOr run the automated installation script:")
|
| 56 |
-
print(" Linux/Mac: ./install.sh")
|
| 57 |
-
print(" Windows: powershell -ExecutionPolicy Bypass -File install.ps1")
|
| 58 |
-
print("="*60 + "\n")
|
| 59 |
return False
|
| 60 |
|
|
|
|
| 61 |
return True
|
| 62 |
|
| 63 |
-
# Check on import
|
| 64 |
-
|
| 65 |
|
| 66 |
from .app import ReachyMiniVoiceApp
|
| 67 |
from .state import ServerState
|
|
|
|
| 12 |
# Check for required models on import
|
| 13 |
import os
|
| 14 |
import sys
|
| 15 |
+
import subprocess
|
| 16 |
from pathlib import Path
|
| 17 |
|
| 18 |
+
def download_file(url, dest_path):
|
| 19 |
+
"""Download a file from URL"""
|
| 20 |
+
try:
|
| 21 |
+
import urllib.request
|
| 22 |
+
urllib.request.urlretrieve(url, dest_path)
|
| 23 |
+
return True
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f" Error downloading {url}: {e}")
|
| 26 |
+
return False
|
| 27 |
+
|
| 28 |
+
def check_and_download_files():
|
| 29 |
+
"""Check if required model files exist and download if missing"""
|
| 30 |
wakewords_dir = Path(__file__).parent.parent.parent / "wakewords"
|
| 31 |
sounds_dir = Path(__file__).parent.parent.parent / "sounds"
|
| 32 |
|
| 33 |
+
# Ensure directories exist
|
| 34 |
+
wakewords_dir.mkdir(parents=True, exist_ok=True)
|
| 35 |
+
sounds_dir.mkdir(parents=True, exist_ok=True)
|
| 36 |
+
|
| 37 |
missing_files = []
|
| 38 |
+
downloaded_files = []
|
| 39 |
+
|
| 40 |
+
# Check and download wake word models
|
| 41 |
+
model_urls = {
|
| 42 |
+
"okay_nabu.tflite": "https://github.com/esphome/micro-wake-word-models/raw/main/models/okay_nabu.tflite",
|
| 43 |
+
"okay_nabu.json": "https://github.com/esphome/micro-wake-word-models/raw/main/models/okay_nabu.json",
|
| 44 |
+
"hey_jarvis.tflite": "https://github.com/esphome/micro-wake-word-models/raw/main/models/hey_jarvis.tflite",
|
| 45 |
+
"hey_jarvis.json": "https://github.com/esphome/micro-wake-word-models/raw/main/models/hey_jarvis.json",
|
| 46 |
+
}
|
| 47 |
|
| 48 |
+
for filename, url in model_urls.items():
|
| 49 |
+
dest_path = wakewords_dir / filename
|
| 50 |
+
if not dest_path.exists():
|
| 51 |
+
missing_files.append(filename)
|
| 52 |
+
print(f"Downloading {filename}...")
|
| 53 |
+
if download_file(url, dest_path):
|
| 54 |
+
downloaded_files.append(filename)
|
| 55 |
+
print(f" ✓ {filename} downloaded")
|
| 56 |
+
else:
|
| 57 |
+
print(f" ✗ Failed to download {filename}")
|
| 58 |
|
| 59 |
+
# Check and download sound effects
|
| 60 |
+
sound_urls = {
|
| 61 |
+
"wake_word_triggered.flac": "https://github.com/OHF-Voice/linux-voice-assistant/raw/main/sounds/wake_word_triggered.flac",
|
| 62 |
+
"timer_finished.flac": "https://github.com/OHF-Voice/linux-voice-assistant/raw/main/sounds/timer_finished.flac",
|
| 63 |
+
}
|
| 64 |
|
| 65 |
+
for filename, url in sound_urls.items():
|
| 66 |
+
dest_path = sounds_dir / filename
|
| 67 |
+
if not dest_path.exists():
|
| 68 |
+
missing_files.append(filename)
|
| 69 |
+
print(f"Downloading {filename}...")
|
| 70 |
+
if download_file(url, dest_path):
|
| 71 |
+
downloaded_files.append(filename)
|
| 72 |
+
print(f" ✓ {filename} downloaded")
|
| 73 |
+
else:
|
| 74 |
+
print(f" ✗ Failed to download {filename}")
|
| 75 |
|
| 76 |
+
if downloaded_files:
|
| 77 |
+
print(f"\n✓ Downloaded {len(downloaded_files)} file(s)")
|
|
|
|
| 78 |
|
| 79 |
if missing_files:
|
| 80 |
+
print(f"\n✗ Still missing {len(missing_files)} file(s):")
|
|
|
|
|
|
|
|
|
|
| 81 |
for file in missing_files:
|
| 82 |
print(f" - {file}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
return False
|
| 84 |
|
| 85 |
+
print("\n✓ All required files are present")
|
| 86 |
return True
|
| 87 |
|
| 88 |
+
# Check and download files on import
|
| 89 |
+
check_and_download_files()
|
| 90 |
|
| 91 |
from .app import ReachyMiniVoiceApp
|
| 92 |
from .state import ServerState
|
src/reachy_mini_ha_voice/voice/__init__.py
CHANGED
|
@@ -1,17 +1,14 @@
|
|
| 1 |
"""
|
| 2 |
Voice processing module for Reachy Mini Voice Assistant
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
from .detector import WakeWordDetector, MicroWakeWordDetector, OpenWakeWordDetector
|
| 6 |
-
from .stt import STTEngine, WhisperSTT
|
| 7 |
-
from .tts import TTSEngine, PiperTTS
|
| 8 |
|
| 9 |
__all__ = [
|
| 10 |
"WakeWordDetector",
|
| 11 |
"MicroWakeWordDetector",
|
| 12 |
"OpenWakeWordDetector",
|
| 13 |
-
"STTEngine",
|
| 14 |
-
"WhisperSTT",
|
| 15 |
-
"TTSEngine",
|
| 16 |
-
"PiperTTS",
|
| 17 |
]
|
|
|
|
| 1 |
"""
|
| 2 |
Voice processing module for Reachy Mini Voice Assistant
|
| 3 |
+
|
| 4 |
+
Note: STT and TTS are handled by Home Assistant via ESPHome protocol.
|
| 5 |
+
This module only contains offline wake word detection.
|
| 6 |
"""
|
| 7 |
|
| 8 |
from .detector import WakeWordDetector, MicroWakeWordDetector, OpenWakeWordDetector
|
|
|
|
|
|
|
| 9 |
|
| 10 |
__all__ = [
|
| 11 |
"WakeWordDetector",
|
| 12 |
"MicroWakeWordDetector",
|
| 13 |
"OpenWakeWordDetector",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
]
|
src/reachy_mini_ha_voice/voice/stt.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Speech-to-Text engine for Reachy Mini Voice Assistant
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
import asyncio
|
| 6 |
-
import logging
|
| 7 |
-
from abc import ABC, abstractmethod
|
| 8 |
-
from typing import Optional
|
| 9 |
-
|
| 10 |
-
logger = logging.getLogger(__name__)
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
class STTEngine(ABC):
|
| 14 |
-
"""Abstract base class for STT engine"""
|
| 15 |
-
|
| 16 |
-
@abstractmethod
|
| 17 |
-
async def load_model(self, model_name: str):
|
| 18 |
-
"""Load STT model"""
|
| 19 |
-
pass
|
| 20 |
-
|
| 21 |
-
@abstractmethod
|
| 22 |
-
async def transcribe(self, audio_data: bytes) -> str:
|
| 23 |
-
"""Transcribe audio to text"""
|
| 24 |
-
pass
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
class WhisperSTT(STTEngine):
|
| 28 |
-
"""Whisper STT engine"""
|
| 29 |
-
|
| 30 |
-
def __init__(self, model_name: str = "base"):
|
| 31 |
-
self.model = None
|
| 32 |
-
self.model_name = model_name
|
| 33 |
-
self._loaded = False
|
| 34 |
-
|
| 35 |
-
async def load_model(self, model_name: str):
|
| 36 |
-
"""Load Whisper model"""
|
| 37 |
-
try:
|
| 38 |
-
import whisper
|
| 39 |
-
|
| 40 |
-
self.model_name = model_name
|
| 41 |
-
self.model = whisper.load_model(model_name)
|
| 42 |
-
self._loaded = True
|
| 43 |
-
|
| 44 |
-
logger.info(f"Loaded Whisper model: {model_name}")
|
| 45 |
-
except ImportError:
|
| 46 |
-
logger.error("whisper not installed. Install with: pip install openai-whisper")
|
| 47 |
-
raise
|
| 48 |
-
except Exception as e:
|
| 49 |
-
logger.error(f"Failed to load Whisper model: {e}")
|
| 50 |
-
raise
|
| 51 |
-
|
| 52 |
-
async def transcribe(self, audio_data: bytes) -> str:
|
| 53 |
-
"""Transcribe audio to text"""
|
| 54 |
-
if not self._loaded or self.model is None:
|
| 55 |
-
logger.warning("Model not loaded")
|
| 56 |
-
return ""
|
| 57 |
-
|
| 58 |
-
try:
|
| 59 |
-
import numpy as np
|
| 60 |
-
|
| 61 |
-
# Convert audio to numpy array
|
| 62 |
-
audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
|
| 63 |
-
|
| 64 |
-
# Transcribe
|
| 65 |
-
result = self.model.transcribe(audio)
|
| 66 |
-
text = result["text"].strip()
|
| 67 |
-
|
| 68 |
-
logger.debug(f"Transcribed: {text}")
|
| 69 |
-
return text
|
| 70 |
-
except Exception as e:
|
| 71 |
-
logger.error(f"Error transcribing audio: {e}")
|
| 72 |
-
return ""
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
class MockSTT(STTEngine):
|
| 76 |
-
"""Mock STT engine for testing"""
|
| 77 |
-
|
| 78 |
-
def __init__(self):
|
| 79 |
-
self._loaded = False
|
| 80 |
-
|
| 81 |
-
async def load_model(self, model_name: str):
|
| 82 |
-
"""Load mock model"""
|
| 83 |
-
self._loaded = True
|
| 84 |
-
logger.info("Loaded mock STT model")
|
| 85 |
-
|
| 86 |
-
async def transcribe(self, audio_data: bytes) -> str:
|
| 87 |
-
"""Mock transcription"""
|
| 88 |
-
return "Hello, this is a test transcription."
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
async def load_stt_engine(
|
| 92 |
-
engine_type: str = "whisper",
|
| 93 |
-
model_name: str = "base"
|
| 94 |
-
) -> STTEngine:
|
| 95 |
-
"""Load STT engine based on type"""
|
| 96 |
-
if engine_type == "whisper":
|
| 97 |
-
engine = WhisperSTT(model_name)
|
| 98 |
-
elif engine_type == "mock":
|
| 99 |
-
engine = MockSTT()
|
| 100 |
-
else:
|
| 101 |
-
raise ValueError(f"Unknown STT engine type: {engine_type}")
|
| 102 |
-
|
| 103 |
-
await engine.load_model(model_name)
|
| 104 |
-
return engine
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/reachy_mini_ha_voice/voice/tts.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Text-to-Speech engine for Reachy Mini Voice Assistant
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
import asyncio
|
| 6 |
-
import logging
|
| 7 |
-
from abc import ABC, abstractmethod
|
| 8 |
-
from typing import Optional
|
| 9 |
-
import io
|
| 10 |
-
|
| 11 |
-
logger = logging.getLogger(__name__)
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
class TTSEngine(ABC):
|
| 15 |
-
"""Abstract base class for TTS engine"""
|
| 16 |
-
|
| 17 |
-
@abstractmethod
|
| 18 |
-
async def load_model(self, model_path: str):
|
| 19 |
-
"""Load TTS model"""
|
| 20 |
-
pass
|
| 21 |
-
|
| 22 |
-
@abstractmethod
|
| 23 |
-
async def synthesize(self, text: str) -> bytes:
|
| 24 |
-
"""Synthesize text to audio"""
|
| 25 |
-
pass
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
class PiperTTS(TTSEngine):
|
| 29 |
-
"""Piper TTS engine"""
|
| 30 |
-
|
| 31 |
-
def __init__(self, model_path: str):
|
| 32 |
-
self.model = None
|
| 33 |
-
self.model_path = model_path
|
| 34 |
-
self._loaded = False
|
| 35 |
-
|
| 36 |
-
async def load_model(self, model_path: str):
|
| 37 |
-
"""Load Piper model"""
|
| 38 |
-
try:
|
| 39 |
-
from piper import PiperVoice
|
| 40 |
-
|
| 41 |
-
self.model_path = model_path
|
| 42 |
-
self.model = PiperVoice.load(model_path)
|
| 43 |
-
self._loaded = True
|
| 44 |
-
|
| 45 |
-
logger.info(f"Loaded Piper model from {model_path}")
|
| 46 |
-
except ImportError:
|
| 47 |
-
logger.error("piper-tts not installed. Install with: pip install piper-tts")
|
| 48 |
-
raise
|
| 49 |
-
except Exception as e:
|
| 50 |
-
logger.error(f"Failed to load Piper model: {e}")
|
| 51 |
-
raise
|
| 52 |
-
|
| 53 |
-
async def synthesize(self, text: str) -> bytes:
|
| 54 |
-
"""Synthesize text to audio"""
|
| 55 |
-
if not self._loaded or self.model is None:
|
| 56 |
-
logger.warning("Model not loaded")
|
| 57 |
-
return b""
|
| 58 |
-
|
| 59 |
-
try:
|
| 60 |
-
import numpy as np
|
| 61 |
-
|
| 62 |
-
# Synthesize
|
| 63 |
-
audio_stream = io.BytesIO()
|
| 64 |
-
self.model.synthesize(text, audio_stream)
|
| 65 |
-
audio_stream.seek(0)
|
| 66 |
-
|
| 67 |
-
# Convert to bytes
|
| 68 |
-
audio_data = audio_stream.read()
|
| 69 |
-
|
| 70 |
-
logger.debug(f"Synthesized {len(text)} characters")
|
| 71 |
-
return audio_data
|
| 72 |
-
except Exception as e:
|
| 73 |
-
logger.error(f"Error synthesizing text: {e}")
|
| 74 |
-
return b""
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
class MockTTS(TTSEngine):
|
| 78 |
-
"""Mock TTS engine for testing"""
|
| 79 |
-
|
| 80 |
-
def __init__(self):
|
| 81 |
-
self._loaded = False
|
| 82 |
-
|
| 83 |
-
async def load_model(self, model_path: str):
|
| 84 |
-
"""Load mock model"""
|
| 85 |
-
self._loaded = True
|
| 86 |
-
logger.info("Loaded mock TTS model")
|
| 87 |
-
|
| 88 |
-
async def synthesize(self, text: str) -> bytes:
|
| 89 |
-
"""Mock synthesis - return silent audio"""
|
| 90 |
-
import numpy as np
|
| 91 |
-
|
| 92 |
-
# Generate 1 second of silence at 16kHz
|
| 93 |
-
sample_rate = 16000
|
| 94 |
-
duration = len(text) * 0.1 # Rough estimation
|
| 95 |
-
samples = int(sample_rate * duration)
|
| 96 |
-
silence = np.zeros(samples, dtype=np.int16)
|
| 97 |
-
|
| 98 |
-
return silence.tobytes()
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
async def load_tts_engine(
|
| 102 |
-
engine_type: str = "piper",
|
| 103 |
-
model_path: str = "en_US-lessac-medium"
|
| 104 |
-
) -> TTSEngine:
|
| 105 |
-
"""Load TTS engine based on type"""
|
| 106 |
-
if engine_type == "piper":
|
| 107 |
-
engine = PiperTTS(model_path)
|
| 108 |
-
elif engine_type == "mock":
|
| 109 |
-
engine = MockTTS()
|
| 110 |
-
else:
|
| 111 |
-
raise ValueError(f"Unknown TTS engine type: {engine_type}")
|
| 112 |
-
|
| 113 |
-
await engine.load_model(model_path)
|
| 114 |
-
return engine
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|