Desmond-Dong commited on
Commit
6a3e3dc
·
1 Parent(s): 58fd2a5

Remove __main__.py to avoid conflict with Reachy Mini app framework

Browse files

The app.py with ReachyMiniHAVoiceApp class is the correct entry point
for Reachy Mini's app framework. __main__.py is only needed for
standalone command-line usage.

Files changed (1) hide show
  1. reachy_mini_ha_voice/__main__.py +0 -449
reachy_mini_ha_voice/__main__.py DELETED
@@ -1,449 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Main entry point for Reachy Mini Home Assistant Voice Assistant."""
3
-
4
- import argparse
5
- import asyncio
6
- import json
7
- import logging
8
- import sys
9
- import threading
10
- import time
11
- from pathlib import Path
12
- from queue import Queue
13
- from typing import Dict, List, Optional, Set, Union
14
-
15
- import numpy as np
16
- import pyaudio
17
- from pymicro_wakeword import MicroWakeWord, MicroWakeWordFeatures
18
- from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
19
-
20
- from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType, AudioPlayer
21
- from .satellite import VoiceSatelliteProtocol
22
- from .util import get_mac
23
- from .zeroconf import HomeAssistantZeroconf
24
- from .reachy_integration import ReachyMiniIntegration
25
-
26
- _LOGGER = logging.getLogger(__name__)
27
- _MODULE_DIR = Path(__file__).parent
28
- _REPO_DIR = _MODULE_DIR.parent
29
- _WAKEWORDS_DIR = _REPO_DIR / "wakewords"
30
- _SOUNDS_DIR = _REPO_DIR / "sounds"
31
-
32
-
33
- async def main() -> None:
34
- """Main entry point."""
35
- parser = argparse.ArgumentParser(
36
- description="Reachy Mini Voice Assistant for Home Assistant"
37
- )
38
- parser.add_argument("--name", required=True, help="Device name")
39
- parser.add_argument(
40
- "--audio-input-device",
41
- type=int,
42
- help="Audio input device index (see --list-input-devices)",
43
- )
44
- parser.add_argument(
45
- "--list-input-devices",
46
- action="store_true",
47
- help="List audio input devices and exit",
48
- )
49
- parser.add_argument(
50
- "--audio-output-device",
51
- type=int,
52
- help="Audio output device index (see --list-output-devices)",
53
- )
54
- parser.add_argument(
55
- "--list-output-devices",
56
- action="store_true",
57
- help="List audio output devices and exit",
58
- )
59
- parser.add_argument(
60
- "--wake-word-dir",
61
- default=[_WAKEWORDS_DIR],
62
- action="append",
63
- help="Directory with wake word models (.tflite) and configs (.json)",
64
- )
65
- parser.add_argument(
66
- "--wake-model", default="okay_nabu", help="Id of active wake model"
67
- )
68
- parser.add_argument("--stop-model", default="stop", help="Id of stop model")
69
- parser.add_argument(
70
- "--download-dir",
71
- default=_REPO_DIR / "local",
72
- help="Directory to download custom wake word models, etc.",
73
- )
74
- parser.add_argument(
75
- "--refractory-seconds",
76
- default=2.0,
77
- type=float,
78
- help="Seconds before wake word can be activated again",
79
- )
80
- parser.add_argument(
81
- "--wakeup-sound", default=str(_SOUNDS_DIR / "wake_word_triggered.flac")
82
- )
83
- parser.add_argument(
84
- "--timer-finished-sound", default=str(_SOUNDS_DIR / "timer_finished.flac")
85
- )
86
- parser.add_argument("--preferences-file", default=_REPO_DIR / "preferences.json")
87
- parser.add_argument(
88
- "--host",
89
- default="0.0.0.0",
90
- help="Address for ESPHome server (default: 0.0.0.0)",
91
- )
92
- parser.add_argument(
93
- "--port", type=int, default=6053, help="Port for ESPHome server (default: 6053)"
94
- )
95
- parser.add_argument(
96
- "--debug", action="store_true", help="Print DEBUG messages to console"
97
- )
98
- parser.add_argument(
99
- "--enable-reachy",
100
- action="store_true",
101
- help="Enable Reachy Mini integration",
102
- )
103
- args = parser.parse_args()
104
-
105
- # List devices and exit
106
- if args.list_input_devices:
107
- p = pyaudio.PyAudio()
108
- print("Input devices")
109
- print("=" * 13)
110
- for i in range(p.get_device_count()):
111
- info = p.get_device_info_by_index(i)
112
- if info["maxInputChannels"] > 0:
113
- print(f"[{i}] {info['name']}")
114
- p.terminate()
115
- return
116
-
117
- if args.list_output_devices:
118
- p = pyaudio.PyAudio()
119
- print("Output devices")
120
- print("=" * 14)
121
- for i in range(p.get_device_count()):
122
- info = p.get_device_info_by_index(i)
123
- if info["maxOutputChannels"] > 0:
124
- print(f"[{i}] {info['name']}")
125
- p.terminate()
126
- return
127
-
128
- # Setup logging
129
- logging.basicConfig(
130
- level=logging.DEBUG if args.debug else logging.INFO,
131
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
132
- )
133
- _LOGGER.debug(args)
134
-
135
- # Create directories
136
- args.download_dir = Path(args.download_dir)
137
- args.download_dir.mkdir(parents=True, exist_ok=True)
138
-
139
- # Initialize Reachy Mini integration
140
- reachy_integration = ReachyMiniIntegration()
141
- if args.enable_reachy:
142
- reachy_integration.connect()
143
- else:
144
- _LOGGER.info("Reachy Mini integration disabled")
145
-
146
- # Load available wake words
147
- wake_word_dirs = [Path(ww_dir) for ww_dir in args.wake_word_dir]
148
- wake_word_dirs.append(args.download_dir / "external_wake_words")
149
- available_wake_words: Dict[str, AvailableWakeWord] = {}
150
-
151
- for wake_word_dir in wake_word_dirs:
152
- if not wake_word_dir.exists():
153
- continue
154
-
155
- for model_config_path in wake_word_dir.glob("*.json"):
156
- model_id = model_config_path.stem
157
- if model_id == args.stop_model:
158
- continue
159
-
160
- try:
161
- with open(model_config_path, "r", encoding="utf-8") as model_config_file:
162
- model_config = json.load(model_config_file)
163
- model_type = WakeWordType(model_config.get("type", "microWakeWord"))
164
- if model_type == WakeWordType.OPEN_WAKE_WORD:
165
- wake_word_path = model_config_path.parent / model_config["model"]
166
- else:
167
- wake_word_path = model_config_path
168
-
169
- available_wake_words[model_id] = AvailableWakeWord(
170
- id=model_id,
171
- type=WakeWordType(model_type),
172
- wake_word=model_config["wake_word"],
173
- trained_languages=model_config.get("trained_languages", []),
174
- wake_word_path=wake_word_path,
175
- )
176
- except Exception as e:
177
- _LOGGER.error("Error loading wake word config %s: %s", model_config_path, e)
178
-
179
- _LOGGER.debug("Available wake words: %s", list(sorted(available_wake_words.keys())))
180
-
181
- # Load preferences
182
- preferences_path = Path(args.preferences_file)
183
- if preferences_path.exists():
184
- try:
185
- with open(preferences_path, "r", encoding="utf-8") as preferences_file:
186
- preferences_dict = json.load(preferences_file)
187
- preferences = Preferences(**preferences_dict)
188
- except Exception as e:
189
- _LOGGER.error("Error loading preferences: %s", e)
190
- preferences = Preferences()
191
- else:
192
- preferences = Preferences()
193
-
194
- # Load wake/stop models
195
- active_wake_words: Set[str] = set()
196
- wake_models: Dict[str, Union[MicroWakeWord, OpenWakeWord]] = {}
197
-
198
- if preferences.active_wake_words:
199
- for wake_word_id in preferences.active_wake_words:
200
- wake_word = available_wake_words.get(wake_word_id)
201
- if wake_word is None:
202
- _LOGGER.warning("Unrecognized wake word id: %s", wake_word_id)
203
- continue
204
-
205
- try:
206
- _LOGGER.debug("Loading wake model: %s", wake_word_id)
207
- wake_models[wake_word_id] = wake_word.load()
208
- active_wake_words.add(wake_word_id)
209
- except Exception as e:
210
- _LOGGER.error("Error loading wake model %s: %s", wake_word_id, e)
211
-
212
- if not wake_models:
213
- wake_word_id = args.wake_model
214
- if wake_word_id in available_wake_words:
215
- try:
216
- wake_word = available_wake_words[wake_word_id]
217
- _LOGGER.debug("Loading wake model: %s", wake_word_id)
218
- wake_models[wake_word_id] = wake_word.load()
219
- active_wake_words.add(wake_word_id)
220
- except Exception as e:
221
- _LOGGER.error("Error loading default wake model: %s", e)
222
- else:
223
- _LOGGER.error("Default wake word not found: %s", wake_word_id)
224
-
225
- # Load stop model
226
- stop_model: Optional[MicroWakeWord] = None
227
- for wake_word_dir in wake_word_dirs:
228
- stop_config_path = wake_word_dir / f"{args.stop_model}.json"
229
- if not stop_config_path.exists():
230
- continue
231
-
232
- try:
233
- _LOGGER.debug("Loading stop model: %s", stop_config_path)
234
- stop_model = MicroWakeWord.from_config(stop_config_path)
235
- break
236
- except Exception as e:
237
- _LOGGER.error("Error loading stop model: %s", e)
238
-
239
- if stop_model is None:
240
- _LOGGER.warning("Stop model not loaded")
241
-
242
- # Create audio players
243
- music_player = AudioPlayer(device=args.audio_output_device)
244
- tts_player = AudioPlayer(device=args.audio_output_device)
245
-
246
- # Create server state
247
- state = ServerState(
248
- name=args.name,
249
- mac_address=get_mac(),
250
- audio_queue=Queue(),
251
- entities=[],
252
- available_wake_words=available_wake_words,
253
- wake_words=wake_models,
254
- active_wake_words=active_wake_words,
255
- stop_word=stop_model,
256
- music_player=music_player,
257
- tts_player=tts_player,
258
- wakeup_sound=args.wakeup_sound,
259
- timer_finished_sound=args.timer_finished_sound,
260
- preferences=preferences,
261
- preferences_path=preferences_path,
262
- refractory_seconds=args.refractory_seconds,
263
- download_dir=args.download_dir,
264
- reachy_integration=reachy_integration,
265
- )
266
-
267
- # Start audio processing thread
268
- process_audio_thread = threading.Thread(
269
- target=process_audio,
270
- args=(state, args.audio_input_device),
271
- daemon=True,
272
- )
273
- process_audio_thread.start()
274
-
275
- # Start ESPHome server
276
- loop = asyncio.get_running_loop()
277
- server = await loop.create_server(
278
- lambda: VoiceSatelliteProtocol(state), host=args.host, port=args.port
279
- )
280
-
281
- # Auto discovery (zeroconf, mDNS)
282
- discovery = HomeAssistantZeroconf(port=args.port, name=args.name)
283
- await discovery.register_server()
284
-
285
- try:
286
- async with server:
287
- _LOGGER.info("Server started (host=%s, port=%s)", args.host, args.port)
288
- if reachy_integration.is_connected():
289
- _LOGGER.info("Reachy Mini integration enabled")
290
- await server.serve_forever()
291
- except KeyboardInterrupt:
292
- _LOGGER.info("Shutting down...")
293
- finally:
294
- state.audio_queue.put_nowait(None)
295
- process_audio_thread.join(timeout=5)
296
- if reachy_integration.is_connected():
297
- reachy_integration.disconnect()
298
- music_player.close()
299
- tts_player.close()
300
- await discovery.unregister_server()
301
-
302
- _LOGGER.debug("Server stopped")
303
-
304
-
305
- def process_audio(state: ServerState, input_device: Optional[int]) -> None:
306
- """Process audio chunks from the microphone."""
307
- import pyaudio
308
-
309
- p = pyaudio.PyAudio()
310
-
311
- # Get input device
312
- if input_device is not None:
313
- device_index = input_device
314
- else:
315
- # Try to find default input device
316
- device_index = None
317
- for i in range(p.get_device_count()):
318
- info = p.get_device_info_by_index(i)
319
- if info["maxInputChannels"] > 0 and info["isDefaultInputDevice"]:
320
- device_index = i
321
- break
322
-
323
- if device_index is None:
324
- _LOGGER.error("No default input device found")
325
- return
326
-
327
- device_info = p.get_device_info_by_index(device_index)
328
- _LOGGER.info(
329
- "Using audio input device: %s (index: %s)", device_info["name"], device_index
330
- )
331
-
332
- # Audio parameters
333
- CHUNK = 1024
334
- FORMAT = pyaudio.paInt16
335
- CHANNELS = 1
336
- RATE = 16000
337
-
338
- try:
339
- stream = p.open(
340
- format=FORMAT,
341
- channels=CHANNELS,
342
- rate=RATE,
343
- input=True,
344
- input_device_index=device_index,
345
- frames_per_buffer=CHUNK,
346
- )
347
-
348
- wake_words: List[Union[MicroWakeWord, OpenWakeWord]] = []
349
- micro_features: Optional[MicroWakeWordFeatures] = None
350
- micro_inputs: List[np.ndarray] = []
351
-
352
- oww_features: Optional[OpenWakeWordFeatures] = None
353
- oww_inputs: List[np.ndarray] = []
354
- has_oww = False
355
-
356
- last_active: Optional[float] = None
357
-
358
- _LOGGER.info("Audio processing started")
359
-
360
- while True:
361
- try:
362
- # Read audio chunk
363
- data = stream.read(CHUNK, exception_on_overflow=False)
364
- audio_array = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
365
-
366
- # Send to satellite if connected
367
- if state.satellite is not None:
368
- state.satellite.handle_audio(data)
369
-
370
- # Update wake word models
371
- if (not wake_words) or (state.wake_words_changed and state.wake_words):
372
- state.wake_words_changed = False
373
- wake_words = [
374
- ww
375
- for ww in state.wake_words.values()
376
- if ww.id in state.active_wake_words
377
- ]
378
-
379
- has_oww = False
380
- for wake_word in wake_words:
381
- if isinstance(wake_word, OpenWakeWord):
382
- has_oww = True
383
-
384
- if micro_features is None:
385
- micro_features = MicroWakeWordFeatures()
386
-
387
- if has_oww and (oww_features is None):
388
- oww_features = OpenWakeWordFeatures.from_builtin()
389
-
390
- # Process wake words
391
- if wake_words:
392
- assert micro_features is not None
393
- micro_inputs.clear()
394
- micro_inputs.extend(micro_features.process_streaming(data))
395
-
396
- if has_oww:
397
- assert oww_features is not None
398
- oww_inputs.clear()
399
- oww_inputs.extend(oww_features.process_streaming(data))
400
-
401
- for wake_word in wake_words:
402
- activated = False
403
- if isinstance(wake_word, MicroWakeWord):
404
- for micro_input in micro_inputs:
405
- if wake_word.process_streaming(micro_input):
406
- activated = True
407
- elif isinstance(wake_word, OpenWakeWord):
408
- for oww_input in oww_inputs:
409
- for prob in wake_word.process_streaming(oww_input):
410
- if prob > 0.5:
411
- activated = True
412
-
413
- if activated:
414
- now = time.monotonic()
415
- if (last_active is None) or (
416
- (now - last_active) > state.refractory_seconds
417
- ):
418
- state.satellite.wakeup(wake_word)
419
- if state.reachy_integration.is_connected():
420
- state.reachy_integration.on_wake_word_detected()
421
- last_active = now
422
-
423
- # Process stop word
424
- if state.stop_word is not None:
425
- stopped = False
426
- for micro_input in micro_inputs:
427
- if state.stop_word.process_streaming(micro_input):
428
- stopped = True
429
-
430
- if stopped and (state.stop_word.id in state.active_wake_words):
431
- state.satellite.stop()
432
- if state.reachy_integration.is_connected():
433
- state.reachy_integration.on_stop()
434
-
435
- except Exception as e:
436
- _LOGGER.error("Error processing audio: %s", e)
437
- time.sleep(0.1)
438
-
439
- except Exception as e:
440
- _LOGGER.error("Error opening audio stream: %s", e)
441
- finally:
442
- stream.stop_stream()
443
- stream.close()
444
- p.terminate()
445
- _LOGGER.info("Audio processing stopped")
446
-
447
-
448
- if __name__ == "__main__":
449
- asyncio.run(main())