Desmond-Dong commited on
Commit
801f3a9
·
1 Parent(s): 0ba80ff

refactor: remove local STT/TTS code (handled by Home Assistant)

Browse files

- Delete voice/stt.py (Whisper STT not needed, handled by Home Assistant)
- Delete voice/tts.py (Piper TTS not needed, handled by Home Assistant)
- Update voice/__init__.py to remove STT/TTS exports
- Keep only wake word detection functionality
- STT and TTS are processed by Home Assistant via ESPHome protocol

src/reachy_mini_ha_voice/__init__.py CHANGED
@@ -12,56 +12,81 @@ __email__ = "your.email@example.com"
12
  # Check for required models on import
13
  import os
14
  import sys
 
15
  from pathlib import Path
16
 
17
- def check_required_files():
18
- """Check if required model files exist"""
 
 
 
 
 
 
 
 
 
 
19
  wakewords_dir = Path(__file__).parent.parent.parent / "wakewords"
20
  sounds_dir = Path(__file__).parent.parent.parent / "sounds"
21
 
 
 
 
 
22
  missing_files = []
 
 
 
 
 
 
 
 
 
23
 
24
- # Check wake word models
25
- required_models = [
26
- wakewords_dir / "okay_nabu.tflite",
27
- wakewords_dir / "okay_nabu.json",
28
- ]
 
 
 
 
 
29
 
30
- for model_file in required_models:
31
- if not model_file.exists():
32
- missing_files.append(model_file.name)
 
 
33
 
34
- # Check sound effects
35
- required_sounds = [
36
- sounds_dir / "wake_word_triggered.flac",
37
- sounds_dir / "timer_finished.flac",
38
- ]
 
 
 
 
 
39
 
40
- for sound_file in required_sounds:
41
- if not sound_file.exists():
42
- missing_files.append(sound_file.name)
43
 
44
  if missing_files:
45
- print("\n" + "="*60)
46
- print("WARNING: Required files are missing!")
47
- print("="*60)
48
- print("\nMissing files:")
49
  for file in missing_files:
50
  print(f" - {file}")
51
-
52
- print("\nPlease run the download script:")
53
- print(" Linux/Mac: ./download_models.sh")
54
- print(" Windows: powershell -ExecutionPolicy Bypass -File download_models.ps1")
55
- print("\nOr run the automated installation script:")
56
- print(" Linux/Mac: ./install.sh")
57
- print(" Windows: powershell -ExecutionPolicy Bypass -File install.ps1")
58
- print("="*60 + "\n")
59
  return False
60
 
 
61
  return True
62
 
63
- # Check on import
64
- check_required_files()
65
 
66
  from .app import ReachyMiniVoiceApp
67
  from .state import ServerState
 
12
  # Check for required models on import
13
  import os
14
  import sys
15
+ import subprocess
16
  from pathlib import Path
17
 
18
+ def download_file(url, dest_path):
19
+ """Download a file from URL"""
20
+ try:
21
+ import urllib.request
22
+ urllib.request.urlretrieve(url, dest_path)
23
+ return True
24
+ except Exception as e:
25
+ print(f" Error downloading {url}: {e}")
26
+ return False
27
+
28
+ def check_and_download_files():
29
+ """Check if required model files exist and download if missing"""
30
  wakewords_dir = Path(__file__).parent.parent.parent / "wakewords"
31
  sounds_dir = Path(__file__).parent.parent.parent / "sounds"
32
 
33
+ # Ensure directories exist
34
+ wakewords_dir.mkdir(parents=True, exist_ok=True)
35
+ sounds_dir.mkdir(parents=True, exist_ok=True)
36
+
37
  missing_files = []
38
+ downloaded_files = []
39
+
40
+ # Check and download wake word models
41
+ model_urls = {
42
+ "okay_nabu.tflite": "https://github.com/esphome/micro-wake-word-models/raw/main/models/okay_nabu.tflite",
43
+ "okay_nabu.json": "https://github.com/esphome/micro-wake-word-models/raw/main/models/okay_nabu.json",
44
+ "hey_jarvis.tflite": "https://github.com/esphome/micro-wake-word-models/raw/main/models/hey_jarvis.tflite",
45
+ "hey_jarvis.json": "https://github.com/esphome/micro-wake-word-models/raw/main/models/hey_jarvis.json",
46
+ }
47
 
48
+ for filename, url in model_urls.items():
49
+ dest_path = wakewords_dir / filename
50
+ if not dest_path.exists():
51
+ missing_files.append(filename)
52
+ print(f"Downloading {filename}...")
53
+ if download_file(url, dest_path):
54
+ downloaded_files.append(filename)
55
+ print(f" ✓ {filename} downloaded")
56
+ else:
57
+ print(f" ✗ Failed to download {filename}")
58
 
59
+ # Check and download sound effects
60
+ sound_urls = {
61
+ "wake_word_triggered.flac": "https://github.com/OHF-Voice/linux-voice-assistant/raw/main/sounds/wake_word_triggered.flac",
62
+ "timer_finished.flac": "https://github.com/OHF-Voice/linux-voice-assistant/raw/main/sounds/timer_finished.flac",
63
+ }
64
 
65
+ for filename, url in sound_urls.items():
66
+ dest_path = sounds_dir / filename
67
+ if not dest_path.exists():
68
+ missing_files.append(filename)
69
+ print(f"Downloading {filename}...")
70
+ if download_file(url, dest_path):
71
+ downloaded_files.append(filename)
72
+ print(f" ✓ {filename} downloaded")
73
+ else:
74
+ print(f" ✗ Failed to download {filename}")
75
 
76
+ if downloaded_files:
77
+ print(f"\n✓ Downloaded {len(downloaded_files)} file(s)")
 
78
 
79
  if missing_files:
80
+ print(f"\n Still missing {len(missing_files)} file(s):")
 
 
 
81
  for file in missing_files:
82
  print(f" - {file}")
 
 
 
 
 
 
 
 
83
  return False
84
 
85
+ print("\n✓ All required files are present")
86
  return True
87
 
88
+ # Check and download files on import
89
+ check_and_download_files()
90
 
91
  from .app import ReachyMiniVoiceApp
92
  from .state import ServerState
src/reachy_mini_ha_voice/voice/__init__.py CHANGED
@@ -1,17 +1,14 @@
1
  """
2
  Voice processing module for Reachy Mini Voice Assistant
 
 
 
3
  """
4
 
5
  from .detector import WakeWordDetector, MicroWakeWordDetector, OpenWakeWordDetector
6
- from .stt import STTEngine, WhisperSTT
7
- from .tts import TTSEngine, PiperTTS
8
 
9
  __all__ = [
10
  "WakeWordDetector",
11
  "MicroWakeWordDetector",
12
  "OpenWakeWordDetector",
13
- "STTEngine",
14
- "WhisperSTT",
15
- "TTSEngine",
16
- "PiperTTS",
17
  ]
 
1
  """
2
  Voice processing module for Reachy Mini Voice Assistant
3
+
4
+ Note: STT and TTS are handled by Home Assistant via ESPHome protocol.
5
+ This module only contains offline wake word detection.
6
  """
7
 
8
  from .detector import WakeWordDetector, MicroWakeWordDetector, OpenWakeWordDetector
 
 
9
 
10
  __all__ = [
11
  "WakeWordDetector",
12
  "MicroWakeWordDetector",
13
  "OpenWakeWordDetector",
 
 
 
 
14
  ]
src/reachy_mini_ha_voice/voice/stt.py DELETED
@@ -1,104 +0,0 @@
1
- """
2
- Speech-to-Text engine for Reachy Mini Voice Assistant
3
- """
4
-
5
- import asyncio
6
- import logging
7
- from abc import ABC, abstractmethod
8
- from typing import Optional
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class STTEngine(ABC):
14
- """Abstract base class for STT engine"""
15
-
16
- @abstractmethod
17
- async def load_model(self, model_name: str):
18
- """Load STT model"""
19
- pass
20
-
21
- @abstractmethod
22
- async def transcribe(self, audio_data: bytes) -> str:
23
- """Transcribe audio to text"""
24
- pass
25
-
26
-
27
- class WhisperSTT(STTEngine):
28
- """Whisper STT engine"""
29
-
30
- def __init__(self, model_name: str = "base"):
31
- self.model = None
32
- self.model_name = model_name
33
- self._loaded = False
34
-
35
- async def load_model(self, model_name: str):
36
- """Load Whisper model"""
37
- try:
38
- import whisper
39
-
40
- self.model_name = model_name
41
- self.model = whisper.load_model(model_name)
42
- self._loaded = True
43
-
44
- logger.info(f"Loaded Whisper model: {model_name}")
45
- except ImportError:
46
- logger.error("whisper not installed. Install with: pip install openai-whisper")
47
- raise
48
- except Exception as e:
49
- logger.error(f"Failed to load Whisper model: {e}")
50
- raise
51
-
52
- async def transcribe(self, audio_data: bytes) -> str:
53
- """Transcribe audio to text"""
54
- if not self._loaded or self.model is None:
55
- logger.warning("Model not loaded")
56
- return ""
57
-
58
- try:
59
- import numpy as np
60
-
61
- # Convert audio to numpy array
62
- audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
63
-
64
- # Transcribe
65
- result = self.model.transcribe(audio)
66
- text = result["text"].strip()
67
-
68
- logger.debug(f"Transcribed: {text}")
69
- return text
70
- except Exception as e:
71
- logger.error(f"Error transcribing audio: {e}")
72
- return ""
73
-
74
-
75
- class MockSTT(STTEngine):
76
- """Mock STT engine for testing"""
77
-
78
- def __init__(self):
79
- self._loaded = False
80
-
81
- async def load_model(self, model_name: str):
82
- """Load mock model"""
83
- self._loaded = True
84
- logger.info("Loaded mock STT model")
85
-
86
- async def transcribe(self, audio_data: bytes) -> str:
87
- """Mock transcription"""
88
- return "Hello, this is a test transcription."
89
-
90
-
91
- async def load_stt_engine(
92
- engine_type: str = "whisper",
93
- model_name: str = "base"
94
- ) -> STTEngine:
95
- """Load STT engine based on type"""
96
- if engine_type == "whisper":
97
- engine = WhisperSTT(model_name)
98
- elif engine_type == "mock":
99
- engine = MockSTT()
100
- else:
101
- raise ValueError(f"Unknown STT engine type: {engine_type}")
102
-
103
- await engine.load_model(model_name)
104
- return engine
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/reachy_mini_ha_voice/voice/tts.py DELETED
@@ -1,114 +0,0 @@
1
- """
2
- Text-to-Speech engine for Reachy Mini Voice Assistant
3
- """
4
-
5
- import asyncio
6
- import logging
7
- from abc import ABC, abstractmethod
8
- from typing import Optional
9
- import io
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
-
14
- class TTSEngine(ABC):
15
- """Abstract base class for TTS engine"""
16
-
17
- @abstractmethod
18
- async def load_model(self, model_path: str):
19
- """Load TTS model"""
20
- pass
21
-
22
- @abstractmethod
23
- async def synthesize(self, text: str) -> bytes:
24
- """Synthesize text to audio"""
25
- pass
26
-
27
-
28
- class PiperTTS(TTSEngine):
29
- """Piper TTS engine"""
30
-
31
- def __init__(self, model_path: str):
32
- self.model = None
33
- self.model_path = model_path
34
- self._loaded = False
35
-
36
- async def load_model(self, model_path: str):
37
- """Load Piper model"""
38
- try:
39
- from piper import PiperVoice
40
-
41
- self.model_path = model_path
42
- self.model = PiperVoice.load(model_path)
43
- self._loaded = True
44
-
45
- logger.info(f"Loaded Piper model from {model_path}")
46
- except ImportError:
47
- logger.error("piper-tts not installed. Install with: pip install piper-tts")
48
- raise
49
- except Exception as e:
50
- logger.error(f"Failed to load Piper model: {e}")
51
- raise
52
-
53
- async def synthesize(self, text: str) -> bytes:
54
- """Synthesize text to audio"""
55
- if not self._loaded or self.model is None:
56
- logger.warning("Model not loaded")
57
- return b""
58
-
59
- try:
60
- import numpy as np
61
-
62
- # Synthesize
63
- audio_stream = io.BytesIO()
64
- self.model.synthesize(text, audio_stream)
65
- audio_stream.seek(0)
66
-
67
- # Convert to bytes
68
- audio_data = audio_stream.read()
69
-
70
- logger.debug(f"Synthesized {len(text)} characters")
71
- return audio_data
72
- except Exception as e:
73
- logger.error(f"Error synthesizing text: {e}")
74
- return b""
75
-
76
-
77
- class MockTTS(TTSEngine):
78
- """Mock TTS engine for testing"""
79
-
80
- def __init__(self):
81
- self._loaded = False
82
-
83
- async def load_model(self, model_path: str):
84
- """Load mock model"""
85
- self._loaded = True
86
- logger.info("Loaded mock TTS model")
87
-
88
- async def synthesize(self, text: str) -> bytes:
89
- """Mock synthesis - return silent audio"""
90
- import numpy as np
91
-
92
- # Generate 1 second of silence at 16kHz
93
- sample_rate = 16000
94
- duration = len(text) * 0.1 # Rough estimation
95
- samples = int(sample_rate * duration)
96
- silence = np.zeros(samples, dtype=np.int16)
97
-
98
- return silence.tobytes()
99
-
100
-
101
- async def load_tts_engine(
102
- engine_type: str = "piper",
103
- model_path: str = "en_US-lessac-medium"
104
- ) -> TTSEngine:
105
- """Load TTS engine based on type"""
106
- if engine_type == "piper":
107
- engine = PiperTTS(model_path)
108
- elif engine_type == "mock":
109
- engine = MockTTS()
110
- else:
111
- raise ValueError(f"Unknown TTS engine type: {engine_type}")
112
-
113
- await engine.load_model(model_path)
114
- return engine