fix: set python version to work with default env

#3
This view is limited to 50 files because it contains too many changes. See the raw diff here.
Files changed (50) hide show
  1. .claude/settings.local.json +9 -38
  2. .gitattributes +6 -3
  3. .github/dependabot.yml +0 -13
  4. .github/workflows/auto_release_on_version.yml +0 -86
  5. .github/workflows/sync_to_hf.yml +0 -36
  6. .gitignore +5 -12
  7. .pre-commit-config.yaml +0 -20
  8. CHANGELOG.md +0 -581
  9. Project_Summary.md → PROJECT_PLAN.md +0 -0
  10. README.md +1 -0
  11. changelog.json +0 -222
  12. docs/USER_MANUAL_CN.md +0 -256
  13. docs/USER_MANUAL_EN.md +0 -256
  14. home_assistant_blueprints/reachy_mini_presence_companion.yaml +0 -288
  15. index.html +32 -94
  16. pyproject.toml +18 -129
  17. {reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py +24 -29
  18. {reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py +21 -29
  19. {reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py +42 -121
  20. reachy_mini_ha_voice/animations/conversation_animations.json +87 -0
  21. {reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py +9 -24
  22. {reachy_mini_home_assistant/audio → reachy_mini_ha_voice}/audio_player.py +130 -770
  23. {reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/camera_server.py +842 -1042
  24. {reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py +45 -30
  25. {reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py +16 -19
  26. reachy_mini_ha_voice/entity_registry.py +945 -0
  27. reachy_mini_ha_voice/gesture_detector.py +183 -0
  28. {reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py +48 -158
  29. {reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py +75 -74
  30. reachy_mini_ha_voice/models.py +95 -0
  31. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx +0 -0
  32. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx +0 -0
  33. reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py +16 -12
  34. reachy_mini_ha_voice/movement_manager.py +861 -0
  35. {reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py +869 -1061
  36. {reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/satellite.py +784 -1051
  37. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep +0 -0
  38. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md +0 -0
  39. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md +0 -0
  40. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac +0 -0
  41. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac +0 -0
  42. {reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py +27 -36
  43. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html +0 -0
  44. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js +0 -0
  45. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css +0 -0
  46. reachy_mini_ha_voice/util.py +45 -0
  47. {reachy_mini_home_assistant → reachy_mini_ha_voice}/voice_assistant.py +810 -1314
  48. {reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep +0 -0
  49. {reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md +0 -0
  50. {reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/alexa.json +0 -0
.claude/settings.local.json CHANGED
@@ -3,53 +3,24 @@
3
  "includeCoAuthoredBy": false,
4
  "permissions": {
5
  "allow": [
6
- "Bash",
7
- "BashOutput",
8
  "Edit",
9
- "Glob",
10
- "Grep",
11
- "KillShell",
12
- "NotebookEdit",
13
- "Read",
14
- "SlashCommand",
15
- "Task",
16
- "TodoWrite",
17
- "WebFetch",
18
- "WebSearch",
19
- "Write",
20
- "mcp__ide",
21
- "mcp__exa",
22
- "mcp__context7",
23
- "mcp__mcp-deepwiki",
24
- "mcp__Playwright",
25
- "mcp__spec-workflow",
26
- "mcp__open-websearch",
27
- "mcp__serena",
28
- "All",
29
- "Bash(copy:*)",
30
- "mcp__zread__search_doc",
31
- "mcp__zread__read_file",
32
  "Bash(cd:*)",
33
- "Bash(ls:*)",
34
- "Bash(find:*)",
35
- "mcp__acp__Bash",
36
- "Skill(commit-commands:commit)",
37
- "Skill(commit-commands:commit:*)"
38
  ],
39
  "deny": [],
40
  "ask": []
41
  },
42
- "model": "opus",
43
  "hooks": {},
 
 
44
  "statusLine": {
45
  "type": "command",
46
  "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
47
  "padding": 0
48
  },
49
- "enabledPlugins": {
50
- "glm-plan-usage@zai-coding-plugins": true,
51
- "glm-plan-bug@zai-coding-plugins": true
52
- },
53
- "outputStyle": "Explanatory",
54
- "alwaysThinkingEnabled": true
55
- }
 
3
  "includeCoAuthoredBy": false,
4
  "permissions": {
5
  "allow": [
6
+ "SlashCommand(/zcf:git-commit)",
 
7
  "Edit",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "Bash(cd:*)",
9
+ "SlashCommand(/zcf:git-commit --emoji)",
10
+ "SlashCommand(/zcf:git-commit:*)",
11
+ "Bash(git:*)",
12
+ "Bash(ls:*)"
 
13
  ],
14
  "deny": [],
15
  "ask": []
16
  },
 
17
  "hooks": {},
18
+ "alwaysThinkingEnabled": true,
19
+ "outputStyle": "default",
20
  "statusLine": {
21
  "type": "command",
22
  "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
23
  "padding": 0
24
  },
25
+ "model": "opus"
26
+ }
 
 
 
 
 
.gitattributes CHANGED
@@ -1,5 +1,8 @@
1
- # LFS tracking for large binary files
 
 
 
 
 
2
  *.tflite filter=lfs diff=lfs merge=lfs -text
3
  *.onnx filter=lfs diff=lfs merge=lfs -text
4
- *.pt filter=lfs diff=lfs merge=lfs -text
5
- *.flac filter=lfs diff=lfs merge=lfs -text
 
1
+ reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
2
+ reachy_mini_ha_voice/sounds/**/*.flac filter=lfs diff=lfs merge=lfs -text
3
+ "reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
4
+ reachy_mini_ha_voice/sounds/**/*.flac" filter=lfs diff=lfs merge=lfs -text
5
+ "ha/assets/meshes/*.stl" filter=lfs diff=lfs merge=lfs -text
6
+ "ha/assets/*.urdf" filter=lfs diff=lfs merge=lfs -text
7
  *.tflite filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
.github/dependabot.yml DELETED
@@ -1,13 +0,0 @@
1
- version: 2
2
- updates:
3
- # Enable version updates for pip
4
- - package-ecosystem: "pip"
5
- directory: "/"
6
- schedule:
7
- interval: "weekly"
8
- # Ignore PyTorch updates - locked version required for compatibility
9
- ignore:
10
- - dependency-name: "torch"
11
- versions: [">2.5.1"]
12
- - dependency-name: "torchvision"
13
- versions: [">0.20.1"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/auto_release_on_version.yml DELETED
@@ -1,86 +0,0 @@
1
- name: Auto Release On Version Change
2
-
3
- on:
4
- push:
5
- branches: [develop, main]
6
- paths:
7
- - pyproject.toml
8
- - changelog.json
9
-
10
- permissions:
11
- contents: write
12
-
13
- jobs:
14
- release:
15
- runs-on: ubuntu-latest
16
-
17
- steps:
18
- - name: Checkout
19
- uses: actions/checkout@v4
20
- with:
21
- fetch-depth: 0
22
-
23
- - name: Extract version from pyproject
24
- id: version
25
- run: |
26
- python - <<'PY2'
27
- import os
28
- import tomllib
29
- from pathlib import Path
30
-
31
- data = tomllib.loads(Path('pyproject.toml').read_text(encoding='utf-8'))
32
- version = data['project']['version']
33
- with open(os.environ['GITHUB_OUTPUT'], 'a', encoding='utf-8') as f:
34
- f.write(f"version={version}\\n")
35
- print(f"Detected version: {version}")
36
- PY2
37
-
38
- - name: Check if tag exists
39
- id: tag_check
40
- run: |
41
- git fetch --tags --force
42
- VERSION="${{ steps.version.outputs.version }}"
43
- if git rev-parse -q --verify "refs/tags/v${VERSION}" >/dev/null; then
44
- echo "should_release=false" >> "$GITHUB_OUTPUT"
45
- echo "Tag v${VERSION} already exists; skip release."
46
- else
47
- echo "should_release=true" >> "$GITHUB_OUTPUT"
48
- echo "Tag v${VERSION} does not exist; release will be created."
49
- fi
50
-
51
- - name: Build release notes from changelog
52
- if: steps.tag_check.outputs.should_release == 'true'
53
- env:
54
- VERSION: ${{ steps.version.outputs.version }}
55
- run: |
56
- python - <<'PY2'
57
- import json
58
- import os
59
- from pathlib import Path
60
-
61
- version = os.environ['VERSION']
62
- changelog = json.loads(Path('changelog.json').read_text(encoding='utf-8'))
63
- entry = next((x for x in changelog if x.get('version') == version), None)
64
-
65
- lines = [f"## v{version}", ""]
66
- if entry is None:
67
- lines.append("No changelog entry found for this version.")
68
- else:
69
- date = entry.get('date')
70
- if date:
71
- lines.append(f"Date: {date}")
72
- lines.append("")
73
- for change in entry.get('changes', []):
74
- lines.append(f"- {change}")
75
-
76
- Path('release_notes.md').write_text('\\n'.join(lines) + '\\n', encoding='utf-8')
77
- PY2
78
-
79
- - name: Create GitHub release
80
- if: steps.tag_check.outputs.should_release == 'true'
81
- uses: softprops/action-gh-release@v2
82
- with:
83
- tag_name: v${{ steps.version.outputs.version }}
84
- name: v${{ steps.version.outputs.version }}
85
- body_path: release_notes.md
86
- generate_release_notes: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/sync_to_hf.yml DELETED
@@ -1,36 +0,0 @@
1
- name: Sync to Hugging Face
2
-
3
- on:
4
- push:
5
- branches: [main]
6
- workflow_dispatch:
7
-
8
- jobs:
9
- sync:
10
- runs-on: ubuntu-latest
11
- steps:
12
- - name: Checkout GitHub repo
13
- uses: actions/checkout@v4
14
- with:
15
- lfs: true
16
-
17
- - name: Create fresh commit and push to Hugging Face
18
- env:
19
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
- run: |
21
- git config --global user.email "action@github.com"
22
- git config --global user.name "GitHub Action"
23
-
24
- # Create a new orphan branch with no history
25
- git checkout --orphan hf-sync
26
- git add -A
27
- git commit -m "Fresh sync: $(date +%Y-%m-%d\ %H:%M:%S)"
28
-
29
- # Add Hugging Face remote
30
- git remote add hf https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant
31
-
32
- # Push LFS objects first
33
- git lfs push hf hf-sync --all
34
-
35
- # Force push as main to HF (overwrites all history)
36
- git push hf hf-sync:main --force
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -39,8 +39,6 @@ env/
39
  .spec-workflow/
40
  .playwright-mcp/
41
  *~
42
- CLAUDE.md
43
- commit_msg.txt
44
 
45
  # Configuration
46
  config.json
@@ -65,19 +63,14 @@ htmlcov/
65
  !reachy_mini_ha_voice/sounds/*.flac
66
 
67
  # Models (exclude package bundled files)
68
- # models/ - ignore external models directory
69
  models/
70
- # Package bundled models
71
- !reachy_mini_ha_voice/models/
72
- reachy_mini_ha_voice/models/*.tflite
73
- reachy_mini_ha_voice/models/*.onnx
74
- reachy_mini_ha_voice/models/*.pt
75
 
76
  # SDK Reference (local development only)
77
  reference/
78
- local/
79
  # ha/ - temporarily commented out for path fixes
80
  # ha/ will be moved to separate repository soon
81
-
82
- # Temporary check scripts
83
- temp_check_scripts/
 
39
  .spec-workflow/
40
  .playwright-mcp/
41
  *~
 
 
42
 
43
  # Configuration
44
  config.json
 
63
  !reachy_mini_ha_voice/sounds/*.flac
64
 
65
  # Models (exclude package bundled files)
 
66
  models/
67
+ # *.tflite - bundled in package
68
+ !reachy_mini_ha_voice/wakewords/*.tflite
69
+ !reachy_mini_ha_voice/wakewords/**/*.tflite
70
+ *.onnx
71
+ !reachy_mini_ha_voice/models/*.onnx
72
 
73
  # SDK Reference (local development only)
74
  reference/
 
75
  # ha/ - temporarily commented out for path fixes
76
  # ha/ will be moved to separate repository soon
 
 
 
.pre-commit-config.yaml DELETED
@@ -1,20 +0,0 @@
1
- # Pre-commit hooks for code quality
2
- # Install: pip install pre-commit && pre-commit install
3
- # Run manually: pre-commit run --all-files
4
-
5
- repos:
6
- - repo: https://github.com/astral-sh/ruff-pre-commit
7
- rev: v0.8.6
8
- hooks:
9
- - id: ruff
10
- args: [--fix]
11
- - id: ruff-format
12
-
13
- - repo: https://github.com/pre-commit/mirrors-mypy
14
- rev: v1.14.1
15
- hooks:
16
- - id: mypy
17
- additional_dependencies: []
18
- args: [--ignore-missing-imports]
19
- # Only check changed files for speed
20
- pass_filenames: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CHANGELOG.md DELETED
@@ -1,581 +0,0 @@
1
- # Changelog
2
-
3
- All notable changes to the Reachy Mini HA Voice project will be documented in this file.
4
-
5
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
-
8
- ## [Unreleased]
9
-
10
- ### Fixed
11
- - **NameError** - Add missing deque import in gesture smoother
12
- - **Syntax Error** - Add missing class indentation for volume methods in audio_player.py
13
- - **Audio Card Name Detection** - Use SDK's detection logic instead of hardcoded values
14
- - **SDK Port 8000 Blocking** - Use amixer directly for volume control to avoid SDK HTTP API blocking
15
- - **Memory Leak Root Cause** - Audio buffer array creation in loop causing unbounded memory growth
16
- - **Indentation Error** - Fix indentation in audio_player.py stop_sendspin method
17
-
18
- ## [0.9.9] - 2026-01-28
19
-
20
- ### Fixed
21
- - **SDK Buffer Overflow During Idle**
22
- - Add SDK buffer flush on GStreamer lock timeout
23
- - Prevents buffer overflow during long idle periods when lock contention prevents buffer drainage
24
- - Audio thread flushes SDK audio buffer when lock acquisition times out
25
- - Camera thread flushes SDK video buffer when lock acquisition times out
26
- - Audio playback flushes SDK playback buffer when lock acquisition times out
27
- - Resolves SDK crashes during extended wake-up idle periods without conversation
28
- - Requires Reachy Mini hardware (not applicable to simulation mode)
29
-
30
- ### Fixed
31
- - **Memory Leaks**
32
- - Audio buffer memory leak - added size limit to prevent unbounded growth
33
- - Temp file leak - downloaded audio files now cleaned up after playback
34
- - Multiple memory leak and resource leak issues fixed
35
- - Thread-safe draining flag using threading.Event
36
- - Silent failures now logged for debugging
37
-
38
- ### Optimized
39
- - **Gesture Recognition Sensitivity**
40
- - Simplify GestureSmoother to frequency-based confirmation (1 frame)
41
- - Remove all confidence filtering - return all detections to Home Assistant
42
- - Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)
43
- - Remove duplicate empty check in gesture detection
44
- - Add GestureSmoother class with history tracking for stable output
45
- - Reduce gesture detection interval from 3 frames to 1 frame for higher frequency
46
- - Fix: Gesture detection now returns all detected hands instead of only the highest confidence one
47
- - Matches reference implementation behavior for improved detection rate
48
- - No conflicts with face tracking (shared frame, independent processing)
49
-
50
- ### Code Quality
51
- - Fix Ruff linter issues (import ordering, missing newlines, __all__ sorting)
52
- - Format code with Ruff formatter (5 files reformatted)
53
- - Fix slice index error in gesture detection (convert coordinates to integers)
54
- - Fix Python 3.12 type annotation compatibility
55
-
56
- ## [0.9.8] - 2026-01-27
57
-
58
- ### New
59
- - Mute switch entity - suspends voice services only (not camera/motion)
60
- - Disable Camera switch entity - suspends camera and AI processing
61
- - Home Assistant connection-driven feature loading
62
- - Automatic suspend/resume on HA disconnect/reconnect
63
-
64
- ### Fixed
65
- - Camera disable logic - corrected inverted conditions for proper operation
66
- - Prevent daemon crash when entering idle state
67
- - Camera preview in Home Assistant
68
- - SDK crash during idle - optimized audio processing to skip get_frame() when not streaming to Home Assistant, reducing GStreamer resource competition
69
- - Add GStreamer threading lock to prevent pipeline competition between audio, playback, and camera threads
70
- - Audio thread gets priority during conversations - bypasses lock when conversation is active
71
- - Remove GStreamer lock to fix wake word detection in idle state (lock was preventing wake word detection)
72
-
73
- ### Optimized
74
- - Reduce log output by 30-40%
75
- - Bundle face tracking model with package - eliminated HuggingFace download dependency, removed huggingface_hub from requirements, models now load from local package directory for offline operation
76
- - Replace HTTP API polling with SDK Zenoh for daemon status monitoring to reduce uvicorn blocking and improve stability
77
- - Device ID now reads /etc/machine-id directly - removed uuid.getnode() and file persistence
78
- - Implement high-priority SDK improvements
79
- - Remove aiohttp dependency from daemon_monitor - fully migrated to SDK Zenoh
80
-
81
- ### Removed
82
- - Temporarily disable emotion playback during TTS
83
- - Unused config items (connection_timeout)
84
-
85
- ### Code Quality
86
- - Code quality improvements
87
-
88
- ## [0.9.7] - 2026-01-20
89
-
90
- ### Fixed
91
- - Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)
92
- - Animation file path corrected (was looking in wrong directory)
93
- - Remove hey_jarvis from required wake words (it's optional in openWakeWord/)
94
-
95
- ## [0.9.6] - 2026-01-20
96
-
97
- ### New
98
- - Add ruff linter/formatter and mypy type checker configuration
99
- - Add pre-commit hooks for automated code quality checks
100
-
101
- ### Fixed
102
- - Remove duplicate resume() method in audio_player.py
103
- - Remove duplicate connection_lost() method in satellite.py
104
- - Store asyncio task reference in sleep_manager.py to prevent garbage collection
105
-
106
- ### Optimized
107
- - Use dict.items() for efficient iteration in smoothing.py
108
-
109
- ## [0.9.5] - 2026-01-19
110
-
111
- ### Refactored
112
- - Modularize codebase - new core/motion/vision/audio/entities module structure
113
- - Remove legacy/compatibility code
114
- - Remove audio diagnostics debug code
115
-
116
- ### New
117
- - Direct callbacks for HA sleep/wake buttons to suspend/resume services
118
-
119
- ### Optimized
120
- - Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms → 16ms)
121
- - Audio loop delay reduced from 10ms to 1ms for faster VAD response
122
- - Stereo to mono conversion uses first channel instead of mean for cleaner signal
123
-
124
- ### Improved
125
- - Camera resume_from_suspend now synchronous for reliable wake from sleep
126
- - Rotation clamping in face tracking to prevent IK collisions
127
- - Audio gain boosted for faster VAD detection
128
- - Audio NaN/Inf values causing STT issues fixed
129
-
130
- ## [0.9.0] - 2026-01-18
131
-
132
- ### New
133
- - Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect
134
- - System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors
135
- - Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)
136
-
137
- ### Fixed
138
- - Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam
139
-
140
- ### Improved
141
- - Graceful service lifecycle management with RobotStateMonitor callbacks
142
-
143
- ## [0.8.7] - 2026-01-18
144
-
145
- ### Fixed
146
- - Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback
147
- - Emotion moves and face tracking now respect SDK safety limits
148
-
149
- ### Improved
150
- - Face tracking smoothness - removed EMA smoothing (matches reference project)
151
- - Face tracking timing updated to match reference (2s delay, 1s interpolation)
152
-
153
- ## [0.8.6] - 2026-01-18
154
-
155
- ### Fixed
156
- - Audio buffer memory leak - added size limit to prevent unbounded growth
157
- - Temp file leak - downloaded audio files now cleaned up after playback
158
- - Camera thread termination timeout increased for clean shutdown
159
- - Thread-safe draining flag using threading.Event
160
- - Silent failures now logged for debugging
161
-
162
- ## [0.8.5] - 2026-01-18
163
-
164
- ### Fixed
165
- - DOA turn-to-sound direction inverted - now turns correctly toward sound source
166
- - Graceful shutdown prevents daemon crash on app stop
167
-
168
- ## [0.8.4] - 2026-01-18
169
-
170
- ### Improved
171
- - Smooth idle animation with interpolation phase (matches reference BreathingMove)
172
- - Two-phase animation - interpolates to neutral before oscillation
173
- - Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway
174
-
175
- ## [0.8.3] - 2026-01-18
176
-
177
- ### Fixed
178
- - Body now properly follows head rotation during face tracking
179
- - body_yaw extracted from final head pose matrix and synced with head_yaw
180
- - Matches reference project sweep_look behavior for natural body movement
181
-
182
- ## [0.8.2] - 2026-01-18
183
-
184
- ### Fixed
185
- - Body follows head rotation during face tracking - body_yaw syncs with head_yaw
186
- - Matches reference project sweep_look behavior for natural body movement
187
-
188
- ## [0.8.1] - 2026-01-18
189
-
190
- ### Fixed
191
- - face_detected entity now pushes state updates to Home Assistant in real-time
192
- - Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention
193
- - Idle animation now starts immediately on app launch
194
- - Smooth antenna animation - removed pose change threshold for continuous motion
195
-
196
- ## [0.8.0] - 2026-01-17
197
-
198
- ### New
199
- - Comprehensive emotion keyword mapping with 280+ Chinese and English keywords
200
- - 35 emotion categories mapped to robot expressions
201
- - Auto-trigger expressions from conversation text patterns
202
-
203
- ## [0.7.3] - 2026-01-12
204
-
205
- ### Fixed
206
- - Revert to reference project pattern - use refractory period instead of state flags
207
- - Remove broken _in_pipeline and _tts_playing state management
208
- - Restore correct RUN_END event handling from linux-voice-assistant
209
-
210
- ## [0.7.2] - 2026-01-12
211
-
212
- ### Fixed
213
- - Remove premature _tts_played reset in RUN_END event
214
- - Ensure _in_pipeline stays True until TTS playback completes
215
-
216
- ## [0.7.1] - 2026-01-12
217
-
218
- ### Fixed
219
- - Prevent wake word detection during TTS playback
220
- - Add _tts_playing flag to track TTS audio state precisely
221
-
222
- ## [0.7.0] - 2026-01-12
223
-
224
- ### New
225
- - Gesture detection using HaGRID ONNX models (18 gesture classes)
226
- - gesture_detected and gesture_confidence entities in Home Assistant
227
-
228
- ### Fixed
229
- - Gesture state now properly pushed to Home Assistant in real-time
230
-
231
- ### Optimized
232
- - Aggressive power saving - 0.5fps idle mode after 30s without face
233
- - Gesture detection only runs when face detected (saves CPU)
234
-
235
- ## [0.6.1] - 2026-01-12
236
-
237
- ### Fixed
238
- - Prioritize MicroWakeWord over OpenWakeWord for same-name wake words
239
- - OpenWakeWord wake words now visible in Home Assistant selection
240
- - Stop word detection now works correctly
241
- - STT/LLM response time improved with fixed audio chunk size
242
-
243
- ## [0.6.0] - 2026-01-11
244
-
245
- ### New
246
- - Real-time audio-driven speech animation (SwayRollRT algorithm)
247
- - JSON-driven animation system - all animations configurable
248
-
249
- ### Refactored
250
- - Remove hardcoded actions, use animation offsets only
251
-
252
- ### Fixed
253
- - TTS audio analysis now works with local playback
254
-
255
- ## [0.5.16] - 2026-01-11
256
-
257
- ### Removed
258
- - Tap-to-wake feature (too many false triggers)
259
-
260
- ### New
261
- - Continuous Conversation switch in Home Assistant
262
-
263
- ### Refactored
264
- - Simplified satellite.py and voice_assistant.py
265
-
266
- ## [0.5.15] - 2026-01-11
267
-
268
- ### New
269
- - Audio settings persistence (AGC, Noise Suppression, Tap Sensitivity)
270
-
271
- ### Refactored
272
- - Move Sendspin mDNS discovery to zeroconf.py
273
-
274
- ### Fixed
275
- - Tap detection not re-enabled during emotion playback in conversation
276
-
277
- ## [0.5.14] - 2026-01-11
278
-
279
- ### Fixed
280
- - Skip ALL wake word processing when pipeline is active
281
- - Eliminate race condition in pipeline state during continuous conversation
282
-
283
- ### Improved
284
- - Control loop increased to 100Hz (daemon updated)
285
-
286
- ## [0.5.13] - 2026-01-10
287
-
288
- ### New
289
- - JSON-driven animation system for conversation states
290
- - AnimationPlayer class inspired by SimpleDances project
291
-
292
- ### Refactored
293
- - Replace SpeechSwayGenerator and BreathingAnimation with unified animation system
294
-
295
- ## [0.5.12] - 2026-01-10
296
-
297
- ### Removed
298
- - Deleted broken hey_reachy wake word model
299
-
300
- ### Revert
301
- - Default wake word back to "Okay Nabu"
302
-
303
- ## [0.5.11] - 2026-01-10
304
-
305
- ### Fixed
306
- - Reset feature extractors when switching wake words
307
- - Add refractory period after wake word switch
308
-
309
- ## [0.5.10] - 2026-01-10
310
-
311
- ### Fixed
312
- - Wake word models now have 'id' attribute set correctly
313
- - Wake word switching from Home Assistant now works
314
-
315
- ## [0.5.9] - 2026-01-10
316
-
317
- ### New
318
- - Default wake word changed to hey_reachy
319
-
320
- ### Fixed
321
- - Wake word switching bug
322
-
323
- ## [0.5.8] - 2026-01-09
324
-
325
- ### Fixed
326
- - Tap detection waits for emotion playback to complete
327
- - Poll daemon API for move completion
328
-
329
- ## [0.5.7] - 2026-01-09
330
-
331
- ### New
332
- - DOA turn-to-sound at wakeup
333
-
334
- ### Fixed
335
- - Show raw DOA angle in Home Assistant (0-180)
336
- - Invert DOA yaw direction
337
-
338
- ## [0.5.6] - 2026-01-08
339
-
340
- ### Fixed
341
- - Better pipeline state tracking to prevent duplicate audio
342
-
343
- ## [0.5.5] - 2026-01-08
344
-
345
- ### New
346
- - Prevent concurrent pipelines
347
- - Add prompt sound for continuous conversation
348
-
349
- ## [0.5.4] - 2026-01-08
350
-
351
- ### Fixed
352
- - Wait for RUN_END before starting new conversation
353
-
354
- ## [0.5.3] - 2026-01-08
355
-
356
- ### Fixed
357
- - Improve continuous conversation with conversation_id tracking
358
-
359
- ## [0.5.2] - 2026-01-08
360
-
361
- ### Fixed
362
- - Enable HA control of robot pose
363
- - Continuous conversation improvements
364
-
365
- ## [0.5.1] - 2026-01-08
366
-
367
- ### Fixed
368
- - Sendspin connects to music_player instead of tts_player
369
- - Persist tap_sensitivity settings
370
- - Pause Sendspin during voice assistant wakeup
371
- - Sendspin prioritize 16kHz sample rate
372
-
373
- ## [0.5.0] - 2026-01-07
374
-
375
- ### New
376
- - Face tracking with adaptive frequency
377
- - Sendspin multi-room audio integration
378
-
379
- ### Optimized
380
- - Shutdown mechanism improvements
381
-
382
- ## [0.4.0] - 2026-01-07
383
-
384
- ### Fixed
385
- - Daemon stability fixes
386
-
387
- ### New
388
- - Face tracking enabled by default
389
-
390
- ### Optimized
391
- - Microphone settings for better sensitivity
392
-
393
- ## [0.3.0] - 2026-01-06
394
-
395
- ### New
396
- - Tap sensitivity slider entity
397
-
398
- ### Fixed
399
- - Music Assistant compatibility
400
-
401
- ### Optimized
402
- - Face tracking and tap detection
403
-
404
- ## [0.2.21] - 2026-01-06
405
-
406
- ### Fixed
407
- - Daemon crash - reduce control loop to 2Hz
408
- - Pause control loop during audio playback
409
-
410
- ## [0.2.20] - 2026-01-06
411
-
412
- ### Revert
413
- - Audio/satellite/voice_assistant to v0.2.9 working state
414
-
415
- ## [0.2.19] - 2026-01-06
416
-
417
- ### Fixed
418
- - Force localhost connection mode to prevent WebRTC errors
419
-
420
- ## [0.2.18] - 2026-01-06
421
-
422
- ### Fixed
423
- - Audio playback - restore wakeup sound
424
- - Use push_audio_sample for TTS
425
-
426
- ## [0.2.17] - 2026-01-06
427
-
428
- ### Removed
429
- - head_joints/passive_joints entities
430
- - error_message to diagnostic category
431
-
432
- ## [0.2.16] - 2026-01-06
433
-
434
- ### Fixed
435
- - TTS playback - pause recording during playback
436
-
437
- ## [0.2.15] - 2026-01-06
438
-
439
- ### Fixed
440
- - Use play_sound() instead of push_audio_sample() for TTS
441
-
442
- ## [0.2.14] - 2026-01-06
443
-
444
- ### Fixed
445
- - Pause audio recording during TTS playback
446
-
447
- ## [0.2.13] - 2026-01-06
448
-
449
- ### Fixed
450
- - Don't manually start/stop media - let SDK/daemon manage it
451
-
452
- ## [0.2.12] - 2026-01-05
453
-
454
- ### Fixed
455
- - Disable breathing animation to prevent serial port overflow
456
-
457
- ## [0.2.11] - 2026-01-05
458
-
459
- ### Fixed
460
- - Disable wakeup sound to prevent daemon crash
461
- - Add debug logging for troubleshooting
462
-
463
- ## [0.2.10] - 2026-01-05
464
-
465
- ### Added
466
- - Debug logging for motion init
467
-
468
- ### Fixed
469
- - Audio fallback samplerate
470
-
471
- ## [0.2.9] - 2026-01-05
472
-
473
- ### Removed
474
- - DOA/speech detection - replaced by face tracking
475
-
476
- ## [0.2.8] - 2026-01-05
477
-
478
- ### New
479
- - Replace DOA with YOLO face tracking
480
-
481
- ## [0.2.7] - 2026-01-05
482
-
483
- ### Fixed
484
- - Add DOA caching to prevent ReSpeaker query overload
485
-
486
- ## [0.2.6] - 2026-01-05
487
-
488
- ### New
489
- - Thread-safe ReSpeaker USB access to prevent daemon deadlock
490
-
491
- ## [0.2.4] - 2026-01-05
492
-
493
- ### Fixed
494
- - Microphone volume control via daemon HTTP API
495
-
496
- ## [0.2.3] - 2026-01-05
497
-
498
- ### Fixed
499
- - Daemon crash caused by conflicting pose commands
500
- - Disable: Pose setter methods in ReachyController
501
-
502
- ## [0.2.2] - 2026-01-05
503
-
504
- ### Fixed
505
- - Second conversation motion failure
506
- - Reduce: Control loop from 20Hz to 10Hz
507
- - Improve: Connection recovery (faster reconnect)
508
-
509
- ## [0.2.1] - 2026-01-05
510
-
511
- ### Fixed
512
- - Daemon crash issue
513
- - Optimize: Code structure
514
-
515
- ## [0.2.0] - 2026-01-05
516
-
517
- ### New
518
- - Automatic facial expressions during conversation
519
- - New: Emotion playback integration
520
-
521
- ### Refactored
522
- - Integrate emotion playback into MovementManager
523
-
524
- ## [0.1.5] - 2026-01-04
525
-
526
- ### Optimized
527
- - Code splitting and organization
528
-
529
- ### Fixed
530
- - Program crash issues
531
-
532
- ## [0.1.0] - 2026-01-01
533
-
534
- ### New
535
- - Initial release
536
- - ESPHome protocol server implementation
537
- - mDNS auto-discovery for Home Assistant
538
- - Local wake word detection (microWakeWord)
539
- - Voice assistant pipeline integration
540
- - Basic motion feedback (nod, shake)
541
-
542
- ---
543
-
544
- ## Version History Summary
545
-
546
- | Version | Date | Major Changes |
547
- |---------|------|--------------|
548
- | 0.9.9 | 2026-01-28 | SDK buffer overflow fixes, memory leak fixes, gesture detection optimization |
549
- | 0.9.8 | 2026-01-27 | Mute/Disable entities, HA connection-driven features, log reduction |
550
- | 0.9.7 | 2026-01-20 | Device ID path fix, animation path fix |
551
- | 0.9.6 | 2026-01-20 | Code quality tools (ruff, mypy, pre-commit) |
552
- | 0.9.5 | 2026-01-19 | Modular architecture refactoring, audio latency optimization |
553
- | 0.9.0 | 2026-01-18 | Robot state monitor, system diagnostics entities |
554
- | 0.8.7 | 2026-01-18 | Body yaw clamping, face tracking smoothness |
555
- | 0.8.0 | 2026-01-17 | Emotion keyword mapping (280+ keywords, 35 categories) |
556
- | 0.7.0 | 2026-01-12 | Gesture detection with HaGRID ONNX models (18 gestures) |
557
- | 0.6.0 | 2026-01-11 | Real-time audio-driven speech animation, JSON animation system |
558
- | 0.5.0 | 2026-01-07 | Face tracking, Sendspin multi-room audio |
559
- | 0.4.0 | 2026-01-07 | Daemon stability, microphone optimization |
560
- | 0.3.0 | 2026-01-06 | Tap sensitivity slider |
561
- | 0.2.0 | 2026-01-05 | Emotion playback integration |
562
- | 0.1.0 | 2026-01-01 | Initial release |
563
-
564
- ## Project Statistics
565
-
566
- - **Total Versions**: 29 (from 0.1.0 to 0.9.9)
567
- - **Development Period**: ~30 days (2026-01-01 to 2026-01-28)
568
- - **Average Release Rate**: ~1 version per day
569
- - **Lines of Code**: ~18,000 lines across 52 Python files
570
- - **ESPHome Entities**: 54 entities implemented
571
- - **Supported Features**:
572
- - Voice assistant pipeline integration
573
- - Local wake word detection (multiple models)
574
- - Face tracking with YOLO
575
- - Gesture detection (18 classes)
576
- - Multi-room audio (Sendspin)
577
- - Real-time speech animation
578
- - Emotion keyword detection (280+ keywords)
579
- - System diagnostics
580
-
581
- For detailed implementation notes, see [PROJECT_PLAN.md](./PROJECT_PLAN.md).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Project_Summary.md → PROJECT_PLAN.md RENAMED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -9,6 +9,7 @@ short_description: Deep integration of Reachy Mini robot with Home Assistant
9
  tags:
10
  - reachy_mini
11
  - reachy_mini_python_app
 
12
  - reachy_mini_home_assistant
13
  - home_assistant
14
  - homeassistant
 
9
  tags:
10
  - reachy_mini
11
  - reachy_mini_python_app
12
+ - reachy_mini_ha_voice
13
  - reachy_mini_home_assistant
14
  - home_assistant
15
  - homeassistant
changelog.json CHANGED
@@ -1,225 +1,4 @@
1
  [
2
- {
3
- "version": "1.0.3",
4
- "date": "2026-03-07",
5
- "changes": [
6
- "Build: Bump package version to 1.0.3",
7
- "New: Add Idle Random Actions switch in Home Assistant with preferences persistence and startup restore",
8
- "New: Add configurable idle_random_actions action presets in conversation_animations.json for centralized idle motion tuning",
9
- "Fix: Remove duplicate idle_random_actions fields/methods and complete runtime control wiring in controller/entity registry/movement manager",
10
- "Improve: Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion",
11
- "Optimize: Remove set_target global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick",
12
- "Optimize: Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness"
13
- ]
14
- },
15
- {
16
- "version": "1.0.2",
17
- "date": "2026-03-06",
18
- "changes": [
19
- "Build: Bump package version to 1.0.2",
20
- "Fix: Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness",
21
- "Fix: Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise",
22
- "Fix: Switch sleep/wake control to daemon API (start/stop with wake_up/goto_sleep) so /api/daemon/status reflects real sleep state on SDK 1.5",
23
- "Fix: Normalize daemon status parsing for SDK 1.5 object-based status responses",
24
- "Fix: Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts",
25
- "Change: Keep idle antenna behavior as animation-only control (no torque coupling)",
26
- "Change: Tighten preference loading to current schema (no legacy config fallback filtering)",
27
- "Fix: Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome",
28
- "Fix: Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection",
29
- "New: Add Home Assistant blueprint for Reachy presence companion automation",
30
- "Improve: Blueprint supports device-first auto-binding and richer usage instructions",
31
- "Docs: Refresh landing page (index.html) with current version, GitHub source link, and new Blueprint/Auto Release capability cards",
32
- "New: Add GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag",
33
- "Chore: Ignore local wiki workspace artifacts (local/) from repository tracking"
34
- ]
35
- },
36
- {
37
- "version": "1.0.1",
38
- "date": "2026-03-05",
39
- "changes": [
40
- "Build: Bump package version to 1.0.1",
41
- "Deps: Update runtime dependency baseline to reachy-mini>=1.5.0",
42
- "Fix: Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility",
43
- "Fix: Remove legacy ZError string matching from connection error handling",
44
- "Fix: Adapt daemon status handling to SDK v1.5 DaemonStatus object (prevents AttributeError on status.get)",
45
- "Fix: Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating",
46
- "Fix: Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)",
47
- "Fix: Improve TTS streaming robustness and reduce cutoffs with retry-based audio push",
48
- "Optimize: Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)",
49
- "Optimize: Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio"
50
- ]
51
- },
52
- {
53
- "version": "1.0.0",
54
- "date": "2026-03-04",
55
- "changes": [
56
- "Build: Bump package version to 1.0.0",
57
- "Deps: Require reachy-mini[gstreamer]>=1.4.1",
58
- "Fix: Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)",
59
- "Fix: Auto-match ONNX gesture input size from model shape to prevent INVALID_ARGUMENT dimension errors",
60
- "New: Add Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)",
61
- "New: Add Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)",
62
- "New: Add Face Confidence number entity (0.0-1.0, persistent)",
63
- "Optimize: Unload/reload face and gesture models when toggled off/on to save resources",
64
- "Optimize: Idle behavior updated to breathing + look-around alternation, idle antenna sway disabled",
65
- "Optimize: Adjust idle breathing to human-like cadence",
66
- "Fix: Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise",
67
- "Fix: Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)",
68
- "Fix: Enforce deterministic audio startup path and fail fast when microphone capture is not ready",
69
- "Optimize: Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)",
70
- "Optimize: Keep face/gesture AI processing active even when stream viewers are absent",
71
- "Fix: Add on-demand /snapshot JPEG generation when no cached stream frame is available",
72
- "Change: Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps"
73
- ]
74
- },
75
- {
76
- "version": "0.9.9",
77
- "date": "2026-01-28",
78
- "changes": [
79
- "Fix: Audio buffer overflow - require Reachy Mini hardware, use only Reachy microphone with 50ms sleep",
80
- "Optimize: Gesture detection sensitivity - remove all confidence filtering, return all detections to Home Assistant",
81
- "Optimize: Gesture detection now runs at 1 frame interval for maximum responsiveness",
82
- "Refactor: Simplify GestureSmoother to frequency-based confirmation (1 frame)",
83
- "Refactor: Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)",
84
- "Fix: Remove duplicate empty check in gesture detection",
85
- "Optimize: SDK integration - add MediaBackend detection and proper resource cleanup",
86
- "Document: ReSpeaker private attribute access risk with TODO comments"
87
- ]
88
- },
89
- {
90
- "version": "0.9.8",
91
- "date": "2026-01-27",
92
- "changes": [
93
- "New: Mute switch and Disable Camera entities for granular control",
94
- "Fix: Camera disable logic and daemon crash prevention",
95
- "New: Home Assistant connection-driven feature loading with auto suspend/resume",
96
- "Optimize: Reduce log output by 30-40%",
97
- "Fix: Code quality improvements",
98
- "Fix: SDK crash during idle - optimize audio processing and add GStreamer threading lock",
99
- "Optimize: Bundle face tracking model, use SDK Zenoh for daemon monitoring",
100
- "Simplify: Device ID reads /etc/machine-id directly",
101
- "Clean up: Remove unused config items"
102
- ]
103
- },
104
- {
105
- "version": "0.9.7",
106
- "date": "2026-01-20",
107
- "changes": [
108
- "Fix: Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)",
109
- "Fix: Animation file path corrected (was looking in wrong directory)",
110
- "Fix: Remove hey_jarvis from required wake words (it's optional in openWakeWord/)"
111
- ]
112
- },
113
- {
114
- "version": "0.9.6",
115
- "date": "2026-01-20",
116
- "changes": [
117
- "New: Add ruff linter/formatter and mypy type checker configuration",
118
- "New: Add pre-commit hooks for automated code quality checks",
119
- "Fix: Remove duplicate resume() method in audio_player.py",
120
- "Fix: Remove duplicate connection_lost() method in satellite.py",
121
- "Fix: Store asyncio task reference in sleep_manager.py to prevent garbage collection",
122
- "Optimize: Use dict.items() for efficient iteration in smoothing.py"
123
- ]
124
- },
125
- {
126
- "version": "0.9.5",
127
- "date": "2026-01-19",
128
- "changes": [
129
- "Refactor: Modularize codebase - new core/motion/vision/audio/entities module structure",
130
- "New: Direct callbacks for HA sleep/wake buttons to suspend/resume services",
131
- "Optimize: Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms �?16ms)",
132
- "Optimize: Audio loop delay reduced from 10ms to 1ms for faster VAD response",
133
- "Optimize: Stereo to mono conversion uses first channel instead of mean for cleaner signal",
134
- "Improve: Camera resume_from_suspend now synchronous for reliable wake from sleep",
135
- "Improve: Rotation clamping in face tracking to prevent IK collisions"
136
- ]
137
- },
138
- {
139
- "version": "0.9.0",
140
- "date": "2026-01-18",
141
- "changes": [
142
- "New: Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect",
143
- "New: System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors",
144
- "New: Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)",
145
- "Fix: Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam",
146
- "Improve: Graceful service lifecycle management with RobotStateMonitor callbacks"
147
- ]
148
- },
149
- {
150
- "version": "0.8.7",
151
- "date": "2026-01-18",
152
- "changes": [
153
- "Fix: Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback",
154
- "Fix: Emotion moves and face tracking now respect SDK safety limits",
155
- "Improve: Face tracking smoothness - removed EMA smoothing (matches reference project)",
156
- "Improve: Face tracking timing updated to match reference (2s delay, 1s interpolation)"
157
- ]
158
- },
159
- {
160
- "version": "0.8.6",
161
- "date": "2026-01-18",
162
- "changes": [
163
- "Fix: Audio buffer memory leak - added size limit to prevent unbounded growth",
164
- "Fix: Temp file leak - downloaded audio files now cleaned up after playback",
165
- "Fix: Camera thread termination timeout increased for clean shutdown",
166
- "Fix: Thread-safe draining flag using threading.Event",
167
- "Fix: Silent failures now logged for debugging"
168
- ]
169
- },
170
- {
171
- "version": "0.8.5",
172
- "date": "2026-01-18",
173
- "changes": [
174
- "Fix: DOA turn-to-sound direction inverted - now turns correctly toward sound source",
175
- "Fix: Graceful shutdown prevents daemon crash on app stop"
176
- ]
177
- },
178
- {
179
- "version": "0.8.4",
180
- "date": "2026-01-18",
181
- "changes": [
182
- "Improve: Smooth idle animation with interpolation phase (matches reference BreathingMove)",
183
- "Improve: Two-phase animation - interpolates to neutral before oscillation",
184
- "Fix: Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway"
185
- ]
186
- },
187
- {
188
- "version": "0.8.3",
189
- "date": "2026-01-18",
190
- "changes": [
191
- "Fix: Body now properly follows head rotation during face tracking",
192
- "Fix: body_yaw extracted from final head pose matrix and synced with head_yaw",
193
- "Fix: Matches reference project sweep_look behavior for natural body movement"
194
- ]
195
- },
196
- {
197
- "version": "0.8.2",
198
- "date": "2026-01-18",
199
- "changes": [
200
- "Fix: Body now follows head rotation during face tracking - body_yaw syncs with head_yaw",
201
- "Fix: Matches reference project sweep_look behavior for natural body movement"
202
- ]
203
- },
204
- {
205
- "version": "0.8.1",
206
- "date": "2026-01-18",
207
- "changes": [
208
- "Fix: face_detected entity now pushes state updates to Home Assistant in real-time",
209
- "Fix: Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention",
210
- "Fix: Idle animation now starts immediately on app launch",
211
- "Fix: Smooth antenna animation - removed pose change threshold for continuous motion"
212
- ]
213
- },
214
- {
215
- "version": "0.8.0",
216
- "date": "2026-01-17",
217
- "changes": [
218
- "New: Comprehensive emotion keyword mapping with 280+ Chinese and English keywords",
219
- "New: 35 emotion categories mapped to robot expressions",
220
- "New: Auto-trigger expressions from conversation text patterns"
221
- ]
222
- },
223
  {
224
  "version": "0.7.3",
225
  "date": "2026-01-12",
@@ -614,4 +393,3 @@
614
  ]
615
  }
616
  ]
617
-
 
1
  [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {
3
  "version": "0.7.3",
4
  "date": "2026-01-12",
 
393
  ]
394
  }
395
  ]
 
docs/USER_MANUAL_CN.md DELETED
@@ -1,256 +0,0 @@
1
- # Reachy Mini 语音助手 - 用户手册
2
-
3
- ## 系统要求
4
-
5
- ### 硬件
6
- - Reachy Mini 机器人(带 ReSpeaker XVF3800 麦克风)
7
- - WiFi 网络连接
8
-
9
- ### 软件
10
- - Home Assistant(2024.1 或更高版本)
11
- - Home Assistant 中已启用 ESPHome 集成
12
-
13
- ---
14
-
15
- ## 安装步骤
16
-
17
- ### 第一步:安装应用
18
- 从 Reachy Mini 应用商店安装 `reachy_mini_home_assistant`。
19
-
20
- ### 第二步:启动应用
21
- 应用将自动:
22
- - 在端口 6053 启动 ESPHome 服务器
23
- - 加载预打包的唤醒词模型
24
- - 通过 mDNS 注册以便自动发现
25
- - 如果网络上有 Sendspin 服务器则自动连接
26
-
27
- ### 第三步:连接 Home Assistant
28
- **自动连接(推荐):**
29
- Home Assistant 会通过 mDNS 自动发现 Reachy Mini。
30
-
31
- **手动连接:**
32
- 1. 进入 设置 → 设备与服务
33
- 2. 点击"添加集成"
34
- 3. 选择"ESPHome"
35
- 4. 输入机器人的 IP 地址和端口 6053
36
-
37
- ---
38
-
39
- ## 功能介绍
40
-
41
- ### 语音助手
42
- - **唤醒词检测**:说 "Okay Nabu" 激活(本地处理)
43
- - **停止词**:说 "Stop" 结束对话
44
- - **连续对话模式**:无需重复唤醒词即可持续对话
45
- - **语音识别/合成**:使用 Home Assistant 配置的语音引擎
46
-
47
- **支持的唤醒词:**
48
- - Okay Nabu(默认)
49
- - Hey Jarvis
50
- - Alexa
51
- - Hey Luna
52
-
53
- ### 人脸追踪
54
- - 基于 YOLO 的人脸检测
55
- - 头部跟随检测到的人脸
56
- - 头部转动时身体随之旋转
57
- - 自适应帧率:活跃时 15fps,空闲时 2fps
58
-
59
- ### 手势检测
60
- 检测到的手势及机器人响应:
61
-
62
- | 手势 | 响应 |
63
- |------|------|
64
- | like(竖大拇指)| 开心情绪 |
65
- | dislike(拇指朝下)| 难过情绪 |
66
- | ok | 点头动画 |
67
- | peace(剪刀手)| 热情情绪 |
68
- | stop | 停止说话 |
69
- | call(打电话手势)| 开始聆听 |
70
- | palm(手掌)| 暂停动作 |
71
- | fist(握拳)| 愤怒情绪 |
72
- | one/two/three/four | 发送 HA 事件 |
73
-
74
- ### 情绪响应
75
- 机器人可播放 35 种不同情绪:
76
- - 基础:开心、难过、愤怒、恐惧、惊讶、厌恶
77
- - 扩展:大笑、爱慕、骄傲、感激、热情、好奇、惊叹、害羞、困惑、沉思、焦虑、害怕、沮丧、烦躁、狂怒、轻蔑、无聊、疲倦、精疲力竭、孤独、沮丧、顺从、不确定、不舒服
78
-
79
- ### 音频功能
80
- - 扬声器音量控制(0-100%)
81
- - 麦克风音量控制(0-100%)
82
- - AGC 自动增益控制(0-40dB)
83
- - 噪声抑制(0-100%)
84
- - 回声消除(内置)
85
-
86
- ### Sendspin 多房间音频
87
- - 通过 mDNS 自动发现 Sendspin 服务器
88
- - 同步多房间音频播放
89
- - Reachy Mini 作为 PLAYER 接收音频流
90
- - 语音对话时自动暂停
91
- - 无需用户配置
92
-
93
- ### DOA 声源追踪
94
- - 声源方向检测
95
- - 唤醒时机器人转向声源
96
- - 可通过开关启用/禁用
97
-
98
- ---
99
-
100
- ## Home Assistant 实体
101
-
102
- ### 阶段 1:基础状态
103
- | 实体 | 类型 | 说明 |
104
- |------|------|------|
105
- | Daemon State | 文本传感器 | 机器人守护进程状态 |
106
- | Backend Ready | 二进制传感器 | 后端连接状态 |
107
- | Speaker Volume | 数值 (0-100%) | 扬声器音量控制 |
108
-
109
- ### 阶段 2:电机控制
110
- | 实体 | 类型 | 说明 |
111
- |------|------|------|
112
- | Motors Enabled | 开关 | 电机电源开/关 |
113
- | Wake Up | 按钮 | 唤醒机器人 |
114
- | Go to Sleep | 按钮 | 使机器人睡眠 |
115
- | Sleep Mode | 二进制传感器 | 当前睡眠状态 |
116
- | Services Suspended | 二进制传感器 | ML 模型卸载状态 |
117
-
118
- ### 阶段 3:姿态控制
119
- | 实体 | 类型 | 范围 |
120
- |------|------|------|
121
- | Head X/Y/Z | 数值 | ±50mm |
122
- | Head Roll/Pitch/Yaw | 数值 | ±40° |
123
- | Body Yaw | 数值 | ±160° |
124
- | Antenna Left/Right | 数值 | ±90° |
125
-
126
- ### 阶段 4:注视控制
127
- | 实体 | 类型 | 说明 |
128
- |------|------|------|
129
- | Look At X/Y/Z | 数值 | 注视目标的世界坐标 |
130
-
131
- ### 阶段 5:DOA(声源定位)
132
- | 实体 | 类型 | 说明 |
133
- |------|------|------|
134
- | DOA Angle | 传感器 (°) | 声源方向 |
135
- | Speech Detected | 二进制传感器 | 语音活动检测 |
136
- | DOA Sound Tracking | 开关 | 启用/禁用 DOA 追踪 |
137
-
138
- ### 阶段 6:诊断信息
139
- | 实体 | 类型 | 说明 |
140
- |------|------|------|
141
- | Control Loop Frequency | 传感器 (Hz) | 运动控制循环频率 |
142
- | SDK Version | 文本传感器 | Reachy Mini SDK 版本 |
143
- | Robot Name | 文本传感器 | 设备名称 |
144
- | Wireless Version | 二进制传感器 | 无线版本标志 |
145
- | Simulation Mode | 二进制传感器 | 仿真模式标志 |
146
- | WLAN IP | 文本传感器 | WiFi IP 地址 |
147
- | Error Message | 文本传感器 | 当前错误 |
148
-
149
- ### 阶段 7:IMU 传感器(仅无线版本)
150
- | 实体 | 类型 | 说明 |
151
- |------|------|------|
152
- | IMU Accel X/Y/Z | 传感器 (m/s²) | 加速度计 |
153
- | IMU Gyro X/Y/Z | 传感器 (rad/s) | 陀螺仪 |
154
- | IMU Temperature | 传感器 (°C) | IMU 温度 |
155
-
156
- ### 阶段 8:情绪控制
157
- | 实体 | 类型 | 说明 |
158
- |------|------|------|
159
- | Emotion | 选择器 | 选择要播放的情绪(35 个选项)|
160
-
161
- ### 阶段 9:音频控制
162
- | 实体 | 类型 | 说明 |
163
- |------|------|------|
164
- | Microphone Volume | 数值 (0-100%) | 麦克风增益控制 |
165
-
166
- ### 阶段 10:摄像头
167
- | 实体 | 类型 | 说明 |
168
- |------|------|------|
169
- | Camera | 摄像头 | 实时 MJPEG 流 |
170
-
171
- ### 3D 可视化卡片
172
- 可在 Home Assistant 中安装自定义 Lovelace 卡片,实时 3D 可视化 Reachy Mini 机器人。
173
-
174
- 安装地址:[ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
175
-
176
- 功能:
177
- - 实时 3D 机器人可视化
178
- - 交互式机器人状态视图
179
- - 连接机器人守护进程获取实时更新
180
-
181
- ### 阶段 12:音频处理
182
- | 实体 | 类型 | 说明 |
183
- |------|------|------|
184
- | AGC Enabled | 开关 | 自动增益控制开/关 |
185
- | AGC Max Gain | 数值 (0-40dB) | 最大 AGC 增益 |
186
- | Noise Suppression | 数值 (0-100%) | 噪声抑制级别 |
187
- | Echo Cancellation Converged | 二进制传感器 | AEC 状态 |
188
-
189
- ### 阶段 21:对话
190
- | 实体 | 类型 | 说明 |
191
- |------|------|------|
192
- | Continuous Conversation | 开关 | 多轮对话模式 |
193
-
194
- ### 阶段 22:手势检测
195
- | 实体 | 类型 | 说明 |
196
- |------|------|------|
197
- | Gesture Detected | 文本传感器 | 当前手势名称 |
198
- | Gesture Confidence | 传感器 (%) | 检测置信度 |
199
-
200
- ### 阶段 23:人脸检测
201
- | 实体 | 类型 | 说明 |
202
- |------|------|------|
203
- | Face Detected | 二进制传感器 | 视野中是否有人脸 |
204
-
205
- ### 阶段 24:系统诊断
206
- | 实体 | 类型 | 说明 |
207
- |------|------|------|
208
- | CPU Percent | 传感器 (%) | CPU 使用率 |
209
- | CPU Temperature | 传感器 (°C) | CPU 温度 |
210
- | Memory Percent | 传感器 (%) | 内存使用率 |
211
- | Memory Used | 传感器 (GB) | 已用内存 |
212
- | Disk Percent | 传感器 (%) | 磁盘使用率 |
213
- | Disk Free | 传感器 (GB) | 磁盘可用空间 |
214
- | Uptime | 传感器 (hours) | 系统运行时间 |
215
- | Process CPU | 传感器 (%) | 应用 CPU 使用率 |
216
- | Process Memory | 传感器 (MB) | 应用内存使用 |
217
-
218
- ---
219
-
220
- ## 睡眠模式
221
-
222
- ### 进入睡眠
223
- - 在 Home Assistant 中按"Go to Sleep"按钮
224
- - 机器人放松电机、停止摄像头、暂停语音检测
225
-
226
- ### 唤醒
227
- - 在 Home Assistant 中按"Wake Up"按钮
228
- - 或说唤醒词
229
- - 机器人恢复所有功能
230
-
231
- ---
232
-
233
- ## 故障排除
234
-
235
- | 问题 | 解决方案 |
236
- |------|----------|
237
- | 不响应唤醒词 | 增加 AGC Max Gain,减少背景噪音 |
238
- | 人脸追踪不工作 | 确保光线充足,检查 Face Detected 传感器 |
239
- | 没有音频输出 | 检查 Speaker Volume,验证 HA 中的 TTS 引擎 |
240
- | 无法连接 HA | 确认在同一网络,检查端口 6053 |
241
- | 手势检测不到 | 确保光线充足,正对摄像头 |
242
-
243
- ---
244
-
245
- ## 快速参考
246
-
247
- ```
248
- 唤醒词: "Okay Nabu"
249
- 停止词: "Stop"
250
- ESPHome 端口: 6053
251
- 摄像头端口: 8081 (MJPEG)
252
- ```
253
-
254
- ---
255
-
256
- *Reachy Mini 语音助手 v0.9.5*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docs/USER_MANUAL_EN.md DELETED
@@ -1,256 +0,0 @@
1
- # Reachy Mini Voice Assistant - User Manual
2
-
3
- ## Requirements
4
-
5
- ### Hardware
6
- - Reachy Mini robot (with ReSpeaker XVF3800 microphone)
7
- - WiFi network connection
8
-
9
- ### Software
10
- - Home Assistant (2024.1 or later)
11
- - ESPHome integration enabled in Home Assistant
12
-
13
- ---
14
-
15
- ## Installation
16
-
17
- ### Step 1: Install the App
18
- Install `reachy_mini_home_assistant` from the Reachy Mini App Store.
19
-
20
- ### Step 2: Start the App
21
- The app will automatically:
22
- - Start the ESPHome server on port 6053
23
- - Load pre-packaged wake word models
24
- - Register with mDNS for auto-discovery
25
- - Connect to Sendspin server if available on network
26
-
27
- ### Step 3: Connect to Home Assistant
28
- **Automatic (Recommended):**
29
- Home Assistant will auto-discover Reachy Mini via mDNS.
30
-
31
- **Manual:**
32
- 1. Go to Settings → Devices & Services
33
- 2. Click "Add Integration"
34
- 3. Select "ESPHome"
35
- 4. Enter the robot's IP address and port 6053
36
-
37
- ---
38
-
39
- ## Features
40
-
41
- ### Voice Assistant
42
- - **Wake Word Detection**: Say "Okay Nabu" to activate (local processing)
43
- - **Stop Word**: Say "Stop" to end conversation
44
- - **Continuous Conversation Mode**: Keep talking without repeating wake word
45
- - **STT/TTS**: Uses Home Assistant's configured speech engines
46
-
47
- **Supported Wake Words:**
48
- - Okay Nabu (default)
49
- - Hey Jarvis
50
- - Alexa
51
- - Hey Luna
52
-
53
- ### Face Tracking
54
- - YOLO-based face detection
55
- - Head follows detected face
56
- - Body follows head when turned far
57
- - Adaptive frame rate: 15fps active, 2fps idle
58
-
59
- ### Gesture Detection
60
- Detected gestures and robot responses:
61
-
62
- | Gesture | Response |
63
- |---------|----------|
64
- | like (thumbs up) | Cheerful emotion |
65
- | dislike (thumbs down) | Sad emotion |
66
- | ok | Nod animation |
67
- | peace | Enthusiastic emotion |
68
- | stop | Stop speaking |
69
- | call | Start listening |
70
- | palm | Pause motion |
71
- | fist | Rage emotion |
72
- | one/two/three/four | Send HA event |
73
-
74
- ### Emotion Responses
75
- The robot can play 35 different emotions:
76
- - Basic: Happy, Sad, Angry, Fear, Surprise, Disgust
77
- - Extended: Laughing, Loving, Proud, Grateful, Enthusiastic, Curious, Amazed, Shy, Confused, Thoughtful, Anxious, Scared, Frustrated, Irritated, Furious, Contempt, Bored, Tired, Exhausted, Lonely, Downcast, Resigned, Uncertain, Uncomfortable
78
-
79
- ### Audio Features
80
- - Speaker volume control (0-100%)
81
- - Microphone volume control (0-100%)
82
- - AGC (Auto Gain Control, 0-40dB)
83
- - Noise suppression (0-100%)
84
- - Echo cancellation (built-in)
85
-
86
- ### Sendspin Multi-Room Audio
87
- - Automatic discovery of Sendspin servers via mDNS
88
- - Synchronized multi-room audio playback
89
- - Reachy Mini acts as a PLAYER to receive audio streams
90
- - Auto-pause during voice conversations
91
- - No user configuration required
92
-
93
- ### DOA Sound Tracking
94
- - Direction of Arrival detection
95
- - Robot turns toward sound source on wake word
96
- - Can be enabled/disabled via switch
97
-
98
- ---
99
-
100
- ## Home Assistant Entities
101
-
102
- ### Phase 1: Basic Status
103
- | Entity | Type | Description |
104
- |--------|------|-------------|
105
- | Daemon State | Text Sensor | Robot daemon status |
106
- | Backend Ready | Binary Sensor | Backend connection status |
107
- | Speaker Volume | Number (0-100%) | Speaker volume control |
108
-
109
- ### Phase 2: Motor Control
110
- | Entity | Type | Description |
111
- |--------|------|-------------|
112
- | Motors Enabled | Switch | Motor power on/off |
113
- | Wake Up | Button | Wake robot from sleep |
114
- | Go to Sleep | Button | Put robot to sleep |
115
- | Sleep Mode | Binary Sensor | Current sleep state |
116
- | Services Suspended | Binary Sensor | ML models unloaded state |
117
-
118
- ### Phase 3: Pose Control
119
- | Entity | Type | Range |
120
- |--------|------|-------|
121
- | Head X/Y/Z | Number | ±50mm |
122
- | Head Roll/Pitch/Yaw | Number | ±40° |
123
- | Body Yaw | Number | ±160° |
124
- | Antenna Left/Right | Number | ±90° |
125
-
126
- ### Phase 4: Look At Control
127
- | Entity | Type | Description |
128
- |--------|------|-------------|
129
- | Look At X/Y/Z | Number | World coordinates for gaze target |
130
-
131
- ### Phase 5: DOA (Direction of Arrival)
132
- | Entity | Type | Description |
133
- |--------|------|-------------|
134
- | DOA Angle | Sensor (°) | Sound source direction |
135
- | Speech Detected | Binary Sensor | Voice activity detection |
136
- | DOA Sound Tracking | Switch | Enable/disable DOA tracking |
137
-
138
- ### Phase 6: Diagnostics
139
- | Entity | Type | Description |
140
- |--------|------|-------------|
141
- | Control Loop Frequency | Sensor (Hz) | Motion control loop rate |
142
- | SDK Version | Text Sensor | Reachy Mini SDK version |
143
- | Robot Name | Text Sensor | Device name |
144
- | Wireless Version | Binary Sensor | Wireless model flag |
145
- | Simulation Mode | Binary Sensor | Simulation flag |
146
- | WLAN IP | Text Sensor | WiFi IP address |
147
- | Error Message | Text Sensor | Current error |
148
-
149
- ### Phase 7: IMU Sensors (Wireless version only)
150
- | Entity | Type | Description |
151
- |--------|------|-------------|
152
- | IMU Accel X/Y/Z | Sensor (m/s²) | Accelerometer |
153
- | IMU Gyro X/Y/Z | Sensor (rad/s) | Gyroscope |
154
- | IMU Temperature | Sensor (°C) | IMU temperature |
155
-
156
- ### Phase 8: Emotion Control
157
- | Entity | Type | Description |
158
- |--------|------|-------------|
159
- | Emotion | Select | Choose emotion to play (35 options) |
160
-
161
- ### Phase 9: Audio Control
162
- | Entity | Type | Description |
163
- |--------|------|-------------|
164
- | Microphone Volume | Number (0-100%) | Mic gain control |
165
-
166
- ### Phase 10: Camera
167
- | Entity | Type | Description |
168
- |--------|------|-------------|
169
- | Camera | Camera | Live MJPEG stream |
170
-
171
- ### 3D Visualization Card
172
- A custom Lovelace card is available for real-time 3D visualization of the Reachy Mini robot in Home Assistant.
173
-
174
- Install from: [ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
175
-
176
- Features:
177
- - Real-time 3D robot visualization
178
- - Interactive view of robot state
179
- - Connects to robot daemon for live updates
180
-
181
- ### Phase 12: Audio Processing
182
- | Entity | Type | Description |
183
- |--------|------|-------------|
184
- | AGC Enabled | Switch | Auto gain control on/off |
185
- | AGC Max Gain | Number (0-40dB) | Maximum AGC gain |
186
- | Noise Suppression | Number (0-100%) | Noise reduction level |
187
- | Echo Cancellation Converged | Binary Sensor | AEC status |
188
-
189
- ### Phase 21: Conversation
190
- | Entity | Type | Description |
191
- |--------|------|-------------|
192
- | Continuous Conversation | Switch | Multi-turn conversation mode |
193
-
194
- ### Phase 22: Gesture Detection
195
- | Entity | Type | Description |
196
- |--------|------|-------------|
197
- | Gesture Detected | Text Sensor | Current gesture name |
198
- | Gesture Confidence | Sensor (%) | Detection confidence |
199
-
200
- ### Phase 23: Face Detection
201
- | Entity | Type | Description |
202
- |--------|------|-------------|
203
- | Face Detected | Binary Sensor | Face in view |
204
-
205
- ### Phase 24: System Diagnostics
206
- | Entity | Type | Description |
207
- |--------|------|-------------|
208
- | CPU Percent | Sensor (%) | CPU usage |
209
- | CPU Temperature | Sensor (°C) | CPU temperature |
210
- | Memory Percent | Sensor (%) | RAM usage |
211
- | Memory Used | Sensor (GB) | RAM used |
212
- | Disk Percent | Sensor (%) | Disk usage |
213
- | Disk Free | Sensor (GB) | Disk free space |
214
- | Uptime | Sensor (hours) | System uptime |
215
- | Process CPU | Sensor (%) | App CPU usage |
216
- | Process Memory | Sensor (MB) | App memory usage |
217
-
218
- ---
219
-
220
- ## Sleep Mode
221
-
222
- ### Enter Sleep
223
- - Press "Go to Sleep" button in Home Assistant
224
- - Robot relaxes motors, stops camera, pauses voice detection
225
-
226
- ### Wake Up
227
- - Press "Wake Up" button in Home Assistant
228
- - Or say the wake word
229
- - Robot resumes all functions
230
-
231
- ---
232
-
233
- ## Troubleshooting
234
-
235
- | Problem | Solution |
236
- |---------|----------|
237
- | Not responding to wake word | Increase AGC Max Gain, reduce background noise |
238
- | Face tracking not working | Ensure adequate lighting, check Face Detected sensor |
239
- | No audio output | Check Speaker Volume, verify TTS engine in HA |
240
- | Can't connect to HA | Verify same network, check port 6053 |
241
- | Gestures not detected | Ensure good lighting, face the camera directly |
242
-
243
- ---
244
-
245
- ## Quick Reference
246
-
247
- ```
248
- Wake Word: "Okay Nabu"
249
- Stop Word: "Stop"
250
- ESPHome Port: 6053
251
- Camera Port: 8081 (MJPEG)
252
- ```
253
-
254
- ---
255
-
256
- *Reachy Mini Voice Assistant v0.9.5*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
home_assistant_blueprints/reachy_mini_presence_companion.yaml DELETED
@@ -1,288 +0,0 @@
1
- blueprint:
2
- name: Reachy Mini Presence Companion
3
- description: >-
4
- Presence-driven automation for Reachy Mini in Home Assistant.
5
-
6
- How to use:
7
- 1) Select Home occupancy entity (person/group/binary_sensor).
8
- 2) Select Reachy ESPHome device (recommended).
9
- 3) Leave optional fallback entity inputs empty unless auto-binding fails.
10
- 4) Set away delay and day/night volume.
11
-
12
- What this automation does:
13
- - Occupied: Wake Reachy, enable idle motion + idle antenna motion, set day volume.
14
- - Empty (after delay): Disable idle motion + idle antenna motion, send Reachy to sleep.
15
- - Quiet hours start/end: Apply night/day volume while occupied.
16
-
17
- Auto-binding rules (when Reachy device is selected):
18
- - Wake button suffix: wake_up
19
- - Sleep button suffix: go_to_sleep
20
- - Idle motion switch suffix: idle_motion_enabled
21
- - Idle antenna switch suffix: idle_antenna_enabled
22
- - Volume number suffix: speaker_volume
23
-
24
- If your entities use different names, fill optional fallback entity inputs manually.
25
- domain: automation
26
- input:
27
- occupancy_entity:
28
- name: Home occupancy entity
29
- description: Person, group, or binary sensor representing home presence.
30
- selector:
31
- entity: {}
32
-
33
- reachy_device:
34
- name: Reachy device (recommended)
35
- description: Select your Reachy ESPHome device for automatic entity binding.
36
- default: ""
37
- selector:
38
- device:
39
- filter:
40
- - integration: esphome
41
-
42
- reachy_wake_button:
43
- name: Wake Up button (optional fallback)
44
- description: Leave empty to auto-bind from Reachy device.
45
- default: ""
46
- selector:
47
- entity:
48
- domain: button
49
-
50
- reachy_sleep_button:
51
- name: Go To Sleep button (optional fallback)
52
- description: Leave empty to auto-bind from Reachy device.
53
- default: ""
54
- selector:
55
- entity:
56
- domain: button
57
-
58
- idle_motion_switch:
59
- name: Idle Motion switch (optional fallback)
60
- description: Leave empty to auto-bind from Reachy device.
61
- default: ""
62
- selector:
63
- entity:
64
- domain: switch
65
-
66
- idle_antenna_motion_switch:
67
- name: Idle Antenna Motion switch (optional fallback)
68
- description: Leave empty to auto-bind from Reachy device.
69
- default: ""
70
- selector:
71
- entity:
72
- domain: switch
73
-
74
- reachy_volume_number:
75
- name: Speaker Volume number (optional fallback)
76
- description: Leave empty to auto-bind from Reachy device.
77
- default: ""
78
- selector:
79
- entity:
80
- domain: number
81
-
82
- away_delay_minutes:
83
- name: Away delay (minutes)
84
- description: Wait before sleeping after everyone leaves.
85
- default: 20
86
- selector:
87
- number:
88
- min: 1
89
- max: 180
90
- mode: box
91
- unit_of_measurement: min
92
-
93
- day_volume:
94
- name: Day volume
95
- default: 0.8
96
- selector:
97
- number:
98
- min: 0
99
- max: 1
100
- step: 0.05
101
- mode: slider
102
-
103
- night_volume:
104
- name: Night volume
105
- default: 0.35
106
- selector:
107
- number:
108
- min: 0
109
- max: 1
110
- step: 0.05
111
- mode: slider
112
-
113
- quiet_start:
114
- name: Quiet hours start
115
- default: "22:30:00"
116
- selector:
117
- time: {}
118
-
119
- quiet_end:
120
- name: Quiet hours end
121
- default: "07:30:00"
122
- selector:
123
- time: {}
124
-
125
- mode: restart
126
-
127
- variables:
128
- occupancy_entity: !input occupancy_entity
129
- reachy_device: !input reachy_device
130
- manual_wake_button: !input reachy_wake_button
131
- manual_sleep_button: !input reachy_sleep_button
132
- manual_idle_motion_switch: !input idle_motion_switch
133
- manual_idle_antenna_switch: !input idle_antenna_motion_switch
134
- manual_volume_number: !input reachy_volume_number
135
- day_volume: !input day_volume
136
- night_volume: !input night_volume
137
-
138
- device_entities_list: >-
139
- {{ device_entities(reachy_device) if reachy_device else [] }}
140
-
141
- wake_button_auto: >-
142
- {{ (device_entities_list | select('match', '^button\..*wake_up$') | list | first) or '' }}
143
- sleep_button_auto: >-
144
- {{ (device_entities_list | select('match', '^button\..*go_to_sleep$') | list | first) or '' }}
145
- idle_motion_switch_auto: >-
146
- {{ (device_entities_list | select('match', '^switch\..*idle_motion_enabled$') | list | first) or '' }}
147
- idle_antenna_switch_auto: >-
148
- {{ (device_entities_list | select('match', '^switch\..*idle_antenna_enabled$') | list | first) or '' }}
149
- volume_number_auto: >-
150
- {{ (device_entities_list | select('match', '^number\..*speaker_volume$') | list | first) or '' }}
151
-
152
- wake_button: >-
153
- {{ manual_wake_button if manual_wake_button else wake_button_auto }}
154
- sleep_button: >-
155
- {{ manual_sleep_button if manual_sleep_button else sleep_button_auto }}
156
- idle_motion_switch: >-
157
- {{ manual_idle_motion_switch if manual_idle_motion_switch else idle_motion_switch_auto }}
158
- idle_antenna_motion_switch: >-
159
- {{ manual_idle_antenna_switch if manual_idle_antenna_switch else idle_antenna_switch_auto }}
160
- volume_number: >-
161
- {{ manual_volume_number if manual_volume_number else volume_number_auto }}
162
-
163
- is_occupied: >-
164
- {{ states(occupancy_entity) in ['home', 'on'] }}
165
-
166
- trigger:
167
- - platform: state
168
- id: occupied_home
169
- entity_id: !input occupancy_entity
170
- to: "home"
171
-
172
- - platform: state
173
- id: occupied_on
174
- entity_id: !input occupancy_entity
175
- to: "on"
176
-
177
- - platform: state
178
- id: empty_not_home
179
- entity_id: !input occupancy_entity
180
- to: "not_home"
181
- for:
182
- minutes: !input away_delay_minutes
183
-
184
- - platform: state
185
- id: empty_off
186
- entity_id: !input occupancy_entity
187
- to: "off"
188
- for:
189
- minutes: !input away_delay_minutes
190
-
191
- - platform: time
192
- id: quiet_start
193
- at: !input quiet_start
194
-
195
- - platform: time
196
- id: quiet_end
197
- at: !input quiet_end
198
-
199
- action:
200
- - choose:
201
- - conditions:
202
- - condition: template
203
- value_template: "{{ trigger.id in ['occupied_home', 'occupied_on'] }}"
204
- sequence:
205
- - if:
206
- - condition: template
207
- value_template: "{{ wake_button != '' }}"
208
- then:
209
- - service: button.press
210
- target:
211
- entity_id: "{{ wake_button }}"
212
- - if:
213
- - condition: template
214
- value_template: "{{ idle_motion_switch != '' }}"
215
- then:
216
- - service: switch.turn_on
217
- target:
218
- entity_id: "{{ idle_motion_switch }}"
219
- - if:
220
- - condition: template
221
- value_template: "{{ idle_antenna_motion_switch != '' }}"
222
- then:
223
- - service: switch.turn_on
224
- target:
225
- entity_id: "{{ idle_antenna_motion_switch }}"
226
- - if:
227
- - condition: template
228
- value_template: "{{ volume_number != '' }}"
229
- then:
230
- - service: number.set_value
231
- target:
232
- entity_id: "{{ volume_number }}"
233
- data:
234
- value: "{{ day_volume }}"
235
-
236
- - conditions:
237
- - condition: template
238
- value_template: "{{ trigger.id in ['empty_not_home', 'empty_off'] }}"
239
- sequence:
240
- - if:
241
- - condition: template
242
- value_template: "{{ idle_motion_switch != '' }}"
243
- then:
244
- - service: switch.turn_off
245
- target:
246
- entity_id: "{{ idle_motion_switch }}"
247
- - if:
248
- - condition: template
249
- value_template: "{{ idle_antenna_motion_switch != '' }}"
250
- then:
251
- - service: switch.turn_off
252
- target:
253
- entity_id: "{{ idle_antenna_motion_switch }}"
254
- - if:
255
- - condition: template
256
- value_template: "{{ sleep_button != '' }}"
257
- then:
258
- - service: button.press
259
- target:
260
- entity_id: "{{ sleep_button }}"
261
-
262
- - conditions:
263
- - condition: template
264
- value_template: "{{ trigger.id == 'quiet_start' and is_occupied }}"
265
- sequence:
266
- - if:
267
- - condition: template
268
- value_template: "{{ volume_number != '' }}"
269
- then:
270
- - service: number.set_value
271
- target:
272
- entity_id: "{{ volume_number }}"
273
- data:
274
- value: "{{ night_volume }}"
275
-
276
- - conditions:
277
- - condition: template
278
- value_template: "{{ trigger.id == 'quiet_end' and is_occupied }}"
279
- sequence:
280
- - if:
281
- - condition: template
282
- value_template: "{{ volume_number != '' }}"
283
- then:
284
- - service: number.set_value
285
- target:
286
- entity_id: "{{ volume_number }}"
287
- data:
288
- value: "{{ day_volume }}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
index.html CHANGED
@@ -18,24 +18,21 @@
18
  <span class="brand-name">Reachy Mini for Home Assistant</span>
19
  </div>
20
  <div class="pill">Voice · Gestures · Smart Home</div>
21
- <div class="version-pill" id="version-pill">v1.0.2</div>
22
  </div>
23
  <div class="hero-grid">
24
  <div class="hero-copy">
25
  <p class="eyebrow">Reachy Mini App</p>
26
  <h1>Your robot meets your Home Assistant.</h1>
27
  <p class="lede">
28
- Transform Reachy Mini Wi-Fi into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
29
  </p>
30
  <div class="hero-actions">
31
- <a class="btn primary" href="#requirements">Requirements</a>
32
  <a class="btn ghost" href="#install">Quick Start</a>
33
- <a class="btn ghost" href="#features">Features</a>
34
  </div>
35
  <div class="hero-badges">
36
  <span>🎤 Wake Word</span>
37
  <span>👀 Face Tracking</span>
38
- <span>🔄 Body Following</span>
39
  <span>🤚 18 Gestures</span>
40
  <span>🔊 Multi-room Audio</span>
41
  <span>⚡ Zero Config</span>
@@ -50,69 +47,6 @@
50
  </div>
51
  </header>
52
 
53
- <section id="requirements" class="section">
54
- <div class="section-header">
55
- <p class="eyebrow">Before You Start</p>
56
- <h2>Requirements</h2>
57
- <p class="intro">Make sure you have everything ready for a smooth setup.</p>
58
- </div>
59
- <div class="requirements-grid">
60
- <div class="requirement-card">
61
- <span class="icon">🤖</span>
62
- <h3>Reachy Mini Wi-Fi</h3>
63
- <p>This app requires the <strong>Wi-Fi version</strong> of Reachy Mini. The USB version has not been validated</p>
64
- </div>
65
- <div class="requirement-card">
66
- <span class="icon">🏠</span>
67
- <h3>Home Assistant</h3>
68
- <p>A running Home Assistant instance </p>
69
- </div>
70
- <div class="requirement-card">
71
- <span class="icon">📶</span>
72
- <h3>Same Network</h3>
73
- <p>Both Reachy Mini and Home Assistant must be on the <strong>same local network</strong>.</p>
74
- </div>
75
- <div class="requirement-card">
76
- <span class="icon">🎙️</span>
77
- <h3>Voice Pipeline</h3>
78
- <p>Configure a <strong>Voice Assistant pipeline</strong> in Home Assistant (STT + TTS + LLM).</p>
79
- </div>
80
- </div>
81
- </section>
82
-
83
- <section id="install" class="section story">
84
- <div class="section-header">
85
- <p class="eyebrow">Getting Started</p>
86
- <h2>Quick Start</h2>
87
- <p class="intro">Install and connect in under a minute. No configuration needed.</p>
88
- </div>
89
- <div class="story-grid">
90
- <div class="story-card">
91
- <p class="eyebrow">Installation</p>
92
- <h3>Up and running in 1 minute</h3>
93
- <ul class="story-list">
94
- <li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
95
- <li><span>2️⃣</span> Enable "Show community apps"</li>
96
- <li><span>3️⃣</span> Install "Reachy Mini for Home Assistant"</li>
97
- <li><span>4️⃣</span> Home Assistant discovers automatically</li>
98
- </ul>
99
- </div>
100
- <div class="story-card secondary">
101
- <p class="eyebrow">How it works</p>
102
- <h3>Seamless integration</h3>
103
- <p class="story-text">
104
- This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds all 45+ entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
105
- </p>
106
- <div class="chips">
107
- <span class="chip">ESPHome Protocol</span>
108
- <span class="chip">mDNS Discovery</span>
109
- <span class="chip">45+ Entities</span>
110
- <span class="chip">Zero Config</span>
111
- </div>
112
- </div>
113
- </div>
114
- </section>
115
-
116
  <section id="features" class="section features">
117
  <div class="section-header">
118
  <p class="eyebrow">Capabilities</p>
@@ -133,7 +67,7 @@
133
  <div class="feature-card">
134
  <span class="icon">👀</span>
135
  <h3>Face Tracking</h3>
136
- <p>YOLO-based face detection with body following. Head and body move together naturally to track you during conversations.</p>
137
  </div>
138
  <div class="feature-card">
139
  <span class="icon">🤚</span>
@@ -143,7 +77,7 @@
143
  <div class="feature-card">
144
  <span class="icon">😊</span>
145
  <h3>Expressive Motion</h3>
146
- <p>280+ emotion keywords trigger 35 expressions. Real-time audio-driven animations with natural head sway during conversations.</p>
147
  </div>
148
  <div class="feature-card">
149
  <span class="icon">📹</span>
@@ -165,15 +99,33 @@
165
  <h3>Dashboard Card</h3>
166
  <p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
167
  </div>
168
- <div class="feature-card">
169
- <span class="icon">🧩</span>
170
- <h3>HA Blueprint</h3>
171
- <p>Device-first Home Assistant blueprint for presence automations with Reachy wake/sleep and volume routines.</p>
 
 
 
 
 
 
 
 
 
 
172
  </div>
173
- <div class="feature-card">
174
- <span class="icon">🚀</span>
175
- <h3>Auto Release</h3>
176
- <p>Version-driven GitHub release workflow. Update pyproject/changelog, then release is created automatically.</p>
 
 
 
 
 
 
 
 
177
  </div>
178
  </div>
179
  </section>
@@ -197,15 +149,6 @@
197
  fetch('changelog.json')
198
  .then(res => res.json())
199
  .then(data => {
200
- // Update version pill with latest version
201
- if (data.length > 0) {
202
- const versionPill = document.getElementById('version-pill');
203
- if (versionPill) {
204
- versionPill.textContent = `v${data[0].version}`;
205
- }
206
- }
207
-
208
- // Populate changelog grid
209
  const mainGrid = document.getElementById('changelog-grid');
210
  const olderGrid = document.getElementById('changelog-older');
211
  data.forEach((item, index) => {
@@ -236,15 +179,10 @@
236
  <h3>HA Dashboard Card</h3>
237
  <p>Lovelace Card for HA</p>
238
  </a>
239
- <a href="https://github.com/ha-china/Reachy_Mini_For_Home_Assistant" target="_blank" class="link-card">
240
  <span class="icon">📦</span>
241
  <h3>Source Code</h3>
242
- <p>GitHub Repository</p>
243
- </a>
244
- <a href="home_assistant_blueprints/reachy_mini_presence_companion.yaml" target="_blank" class="link-card">
245
- <span class="icon">🧩</span>
246
- <h3>HA Blueprint</h3>
247
- <p>Presence Companion YAML</p>
248
  </a>
249
  <a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
250
  <span class="icon">🤖</span>
 
18
  <span class="brand-name">Reachy Mini for Home Assistant</span>
19
  </div>
20
  <div class="pill">Voice · Gestures · Smart Home</div>
 
21
  </div>
22
  <div class="hero-grid">
23
  <div class="hero-copy">
24
  <p class="eyebrow">Reachy Mini App</p>
25
  <h1>Your robot meets your Home Assistant.</h1>
26
  <p class="lede">
27
+ Transform Reachy Mini into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
28
  </p>
29
  <div class="hero-actions">
30
+ <a class="btn primary" href="#features">Explore Features</a>
31
  <a class="btn ghost" href="#install">Quick Start</a>
 
32
  </div>
33
  <div class="hero-badges">
34
  <span>🎤 Wake Word</span>
35
  <span>👀 Face Tracking</span>
 
36
  <span>🤚 18 Gestures</span>
37
  <span>🔊 Multi-room Audio</span>
38
  <span>⚡ Zero Config</span>
 
47
  </div>
48
  </header>
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  <section id="features" class="section features">
51
  <div class="section-header">
52
  <p class="eyebrow">Capabilities</p>
 
67
  <div class="feature-card">
68
  <span class="icon">👀</span>
69
  <h3>Face Tracking</h3>
70
+ <p>YOLO-based face detection. Reachy looks at you during conversations with adaptive frame rate optimization.</p>
71
  </div>
72
  <div class="feature-card">
73
  <span class="icon">🤚</span>
 
77
  <div class="feature-card">
78
  <span class="icon">😊</span>
79
  <h3>Expressive Motion</h3>
80
+ <p>Real-time audio-driven animations. Natural head sway and antenna movements during conversations.</p>
81
  </div>
82
  <div class="feature-card">
83
  <span class="icon">📹</span>
 
99
  <h3>Dashboard Card</h3>
100
  <p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
101
  </div>
102
+ </div>
103
+ </section>
104
+
105
+ <section id="install" class="section story">
106
+ <div class="story-grid">
107
+ <div class="story-card">
108
+ <p class="eyebrow">Installation</p>
109
+ <h3>Up and running in 1 minutes</h3>
110
+ <ul class="story-list">
111
+ <li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
112
+ <li><span>2️⃣</span> Enable "Show community apps"</li>
113
+ <li><span>3️⃣</span> Install "Reachy Mini for Home Assistant"</li>
114
+ <li><span>4️⃣</span> Home Assistant discovers automatically</li>
115
+ </ul>
116
  </div>
117
+ <div class="story-card secondary">
118
+ <p class="eyebrow">How it works</p>
119
+ <h3>Seamless integration</h3>
120
+ <p class="story-text">
121
+ This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds all 45+ entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
122
+ </p>
123
+ <div class="chips">
124
+ <span class="chip">ESPHome Protocol</span>
125
+ <span class="chip">mDNS Discovery</span>
126
+ <span class="chip">45+ Entities</span>
127
+ <span class="chip">Zero Config</span>
128
+ </div>
129
  </div>
130
  </div>
131
  </section>
 
149
  fetch('changelog.json')
150
  .then(res => res.json())
151
  .then(data => {
 
 
 
 
 
 
 
 
 
152
  const mainGrid = document.getElementById('changelog-grid');
153
  const olderGrid = document.getElementById('changelog-older');
154
  data.forEach((item, index) => {
 
179
  <h3>HA Dashboard Card</h3>
180
  <p>Lovelace Card for HA</p>
181
  </a>
182
+ <a href="https://huggingface.co/spaces/djhui5710/reachy_mini_ha_voice/tree/main" target="_blank" class="link-card">
183
  <span class="icon">📦</span>
184
  <h3>Source Code</h3>
185
+ <p>HuggingFace Spaces</p>
 
 
 
 
 
186
  </a>
187
  <a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
188
  <span class="icon">🤖</span>
pyproject.toml CHANGED
@@ -3,22 +3,23 @@ requires = ["setuptools>=61.0"]
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
- name = "reachy_mini_home_assistant"
7
- version = "1.0.3"
8
- description = "Deep integration of Reachy Mini robot with Home Assistant"
9
  readme = "README.md"
10
  requires-python = ">=3.12"
11
  license = {text = "Apache-2.0"}
12
  dependencies = [
13
- # Reachy Mini SDK with gstreamer support (for camera streaming)
14
- "reachy-mini>=1.5.0",
15
 
16
- # Audio processing (for audio file analysis)
 
17
  "soundfile>=0.13.0",
18
- "numpy>=2.0.0,<=2.2.5",
19
 
20
  # Camera streaming
21
- "opencv-python>=4.12.0.88",
22
 
23
  # Wake word detection (local)
24
  # STT/TTS is handled by Home Assistant, not locally
@@ -27,33 +28,26 @@ dependencies = [
27
 
28
  # ESPHome protocol (communication with Home Assistant)
29
  "aioesphomeapi>=43.10.1",
30
- "zeroconf<1",
31
 
32
  # Motion control (head movements)
33
  "scipy>=1.14.0",
34
-
35
  # Face tracking (YOLO-based head detection)
36
- "ultralytics",
37
- "supervision",
38
-
 
39
  # Sendspin synchronized audio (optional, for multi-room playback)
40
  "aiosendspin>=2.0.1",
41
-
42
  # Gesture detection (ONNX runtime for HaGRID models)
43
  "onnxruntime>=1.18.0",
44
-
45
- # PyTorch (for vision models)
46
- "torch==2.5.1",
47
- "torchvision==0.20.1",
48
-
49
- # Compatibility with system packages (gradio, etc.)
50
- "pillow<12.0",
51
- "pydantic<=2.12.5",
52
  ]
53
  keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
54
 
55
  [project.entry-points."reachy_mini_apps"]
56
- reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"
57
 
58
  [tool.setuptools]
59
  package-dir = { "" = "." }
@@ -63,109 +57,4 @@ include-package-data = true
63
  where = ["."]
64
 
65
  [tool.setuptools.package-data]
66
- "*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx", "*.pt"]
67
-
68
- # ============================================================================
69
- # Ruff - Fast Python linter and formatter
70
- # ============================================================================
71
- [tool.ruff]
72
- target-version = "py312"
73
- line-length = 120
74
- src = ["reachy_mini_home_assistant"]
75
-
76
- # Exclude reference code and generated files
77
- exclude = [
78
- "reference/",
79
- "__pycache__",
80
- ".git",
81
- "*.egg-info",
82
- ]
83
-
84
- [dependency-groups]
85
- dev = [
86
- "ruff==0.15.4",
87
- "mypy==1.19.1",
88
- ]
89
-
90
- [tool.ruff.lint]
91
- select = [
92
- "E", # pycodestyle errors
93
- "W", # pycodestyle warnings
94
- "F", # Pyflakes
95
- "I", # isort (import sorting)
96
- "B", # flake8-bugbear (common bugs)
97
- "C4", # flake8-comprehensions
98
- "UP", # pyupgrade (modern Python syntax)
99
- "SIM", # flake8-simplify
100
- "TCH", # flake8-type-checking (TYPE_CHECKING optimization)
101
- "RUF", # Ruff-specific rules
102
- "PTH", # flake8-use-pathlib
103
- "PL", # Pylint
104
- ]
105
- ignore = [
106
- "E501", # line too long (handled by formatter)
107
- "PLR0913", # too many arguments (common in robot control)
108
- "PLR2004", # magic value comparison (many thresholds in motion code)
109
- "PLR0912", # too many branches
110
- "PLR0915", # too many statements
111
- "PLR0911", # too many return statements
112
- "SIM108", # use ternary operator (sometimes less readable)
113
- "B008", # function call in default argument (used for field factories)
114
- # The following are intentional patterns in this codebase:
115
- "PLC0415", # import-outside-top-level (lazy imports for optional deps)
116
- "PLW0603", # global-statement (used for singletons)
117
- "SIM102", # collapsible-if (sometimes more readable expanded)
118
- "SIM105", # suppressible-exception (explicit try/except is clearer)
119
- "PTH123", # builtin-open (pathlib not always better)
120
- "PTH108", # os-unlink (pathlib not always better)
121
- "RUF013", # implicit-optional (legacy code)
122
- "TC002", # third-party import (numpy is required at runtime)
123
- ]
124
-
125
- [tool.ruff.lint.per-file-ignores]
126
- "__init__.py" = ["F401"] # unused imports in __init__ are intentional
127
-
128
- [tool.ruff.lint.isort]
129
- known-first-party = ["reachy_mini_home_assistant"]
130
-
131
- # ============================================================================
132
- # Mypy - Static type checker
133
- # ============================================================================
134
- [tool.mypy]
135
- python_version = "3.12"
136
- warn_return_any = false # Too noisy for mixed typed/untyped codebase
137
- warn_unused_ignores = true
138
- disallow_untyped_defs = false # Start lenient, can tighten later
139
- check_untyped_defs = false # Too strict for initial setup
140
- ignore_missing_imports = true # Many robot SDK libs lack type stubs
141
- no_implicit_optional = false # Allow implicit Optional for now
142
- # Disable some checks that are too strict for this codebase
143
- disable_error_code = [
144
- "union-attr", # Too many Optional accesses without None checks
145
- "no-redef", # Class redefinitions for SDK compatibility
146
- "attr-defined", # Some dynamic attributes from SDK
147
- "assignment", # Variable type changes (common in Python)
148
- "arg-type", # Argument type mismatches (often SDK issues)
149
- "unused-ignore", # Type ignore comments from before config
150
- "return-value", # Return type mismatches (often fine)
151
- "no-untyped-def", # Missing type annotations (too strict initially)
152
- "valid-type", # Type validity (some edge cases)
153
- "has-type", # Cannot determine type
154
- "call-arg", # Too few/many arguments
155
- "import-untyped", # Missing stubs for third-party libs
156
- "misc", # Miscellaneous errors
157
- ]
158
- exclude = [
159
- "reference/",
160
- "tests/",
161
- ]
162
-
163
- # Stricter checking for core modules (can enable gradually)
164
- [[tool.mypy.overrides]]
165
- module = [
166
- "reachy_mini_home_assistant.core.*",
167
- "reachy_mini_home_assistant.motion.smoothing",
168
- "reachy_mini_home_assistant.motion.pose_composer",
169
- ]
170
- disallow_untyped_defs = true
171
- warn_unreachable = true
 
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
+ name = "reachy_mini_ha_voice"
7
+ version = "0.7.3"
8
+ description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.12"
11
  license = {text = "Apache-2.0"}
12
  dependencies = [
13
+ # Reachy Mini SDK (provides audio via media system)
14
+ "reachy-mini",
15
 
16
+ # Audio processing (fallback when not on Reachy Mini)
17
+ "sounddevice>=0.5.0",
18
  "soundfile>=0.13.0",
19
+ "numpy>=2.0.0",
20
 
21
  # Camera streaming
22
+ "opencv-python>=4.10.0",
23
 
24
  # Wake word detection (local)
25
  # STT/TTS is handled by Home Assistant, not locally
 
28
 
29
  # ESPHome protocol (communication with Home Assistant)
30
  "aioesphomeapi>=43.10.1",
31
+ "zeroconf>=0.140.0",
32
 
33
  # Motion control (head movements)
34
  "scipy>=1.14.0",
35
+
36
  # Face tracking (YOLO-based head detection)
37
+ "ultralytics>=8.3.0",
38
+ "supervision>=0.25.0",
39
+ "huggingface_hub>=0.27.0",
40
+
41
  # Sendspin synchronized audio (optional, for multi-room playback)
42
  "aiosendspin>=2.0.1",
43
+
44
  # Gesture detection (ONNX runtime for HaGRID models)
45
  "onnxruntime>=1.18.0",
 
 
 
 
 
 
 
 
46
  ]
47
  keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
48
 
49
  [project.entry-points."reachy_mini_apps"]
50
+ reachy_mini_ha_voice = "reachy_mini_ha_voice.main:ReachyMiniHaVoice"
51
 
52
  [tool.setuptools]
53
  package-dir = { "" = "." }
 
57
  where = ["."]
58
 
59
  [tool.setuptools.package-data]
60
+ "*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py RENAMED
@@ -1,29 +1,24 @@
1
- """
2
- Reachy Mini for Home Assistant
3
-
4
- A deep integration app combining Reachy Mini robot with Home Assistant,
5
- enabling voice control, smart home automation, and expressive robot interactions.
6
-
7
- Key features:
8
- - Local wake word detection (microWakeWord/openWakeWord)
9
- - ESPHome protocol for seamless Home Assistant communication
10
- - STT/TTS powered by Home Assistant voice pipeline
11
- - Reachy Mini motion control with expressive animations
12
- - Camera streaming and gesture detection
13
- - Smart home entity control through natural voice commands
14
- """
15
-
16
- try:
17
- from importlib.metadata import version
18
-
19
- __version__ = version("reachy_mini_home_assistant")
20
- except Exception:
21
- __version__ = "0.0.0" # Fallback for development
22
- __author__ = "Desmond Dong"
23
-
24
- # Don't import main module here to avoid runpy warning
25
- # The app is loaded via entry point: reachy_mini_home_assistant.main:ReachyMiniHaVoiceApp
26
-
27
- __all__ = [
28
- "__version__",
29
- ]
 
1
+ """
2
+ Reachy Mini for Home Assistant
3
+
4
+ A deep integration app combining Reachy Mini robot with Home Assistant,
5
+ enabling voice control, smart home automation, and expressive robot interactions.
6
+
7
+ Key features:
8
+ - Local wake word detection (microWakeWord/openWakeWord)
9
+ - ESPHome protocol for seamless Home Assistant communication
10
+ - STT/TTS powered by Home Assistant voice pipeline
11
+ - Reachy Mini motion control with expressive animations
12
+ - Camera streaming and gesture detection
13
+ - Smart home entity control through natural voice commands
14
+ """
15
+
16
+ __version__ = "0.7.3"
17
+ __author__ = "Desmond Dong"
18
+
19
+ # Don't import main module here to avoid runpy warning
20
+ # The app is loaded via entry point: reachy_mini_ha_voice.main:ReachyMiniHAVoiceApp
21
+
22
+ __all__ = [
23
+ "__version__",
24
+ ]
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py RENAMED
@@ -2,7 +2,7 @@
2
  """Main entry point for Reachy Mini for Home Assistant.
3
 
4
  This module provides a command-line interface for running the voice assistant
5
- without the ReachyMini App framework.
6
  """
7
 
8
  import argparse
@@ -10,13 +10,13 @@ import asyncio
10
  import logging
11
  import threading
12
 
13
- from .core import get_health_monitor, get_memory_monitor
14
-
15
  _LOGGER = logging.getLogger(__name__)
16
 
17
 
18
  async def main() -> None:
19
- parser = argparse.ArgumentParser(description="Reachy Mini for Home Assistant")
 
 
20
  parser.add_argument(
21
  "--name",
22
  default="Reachy Mini",
@@ -49,6 +49,11 @@ async def main() -> None:
49
  action="store_true",
50
  help="Disable camera server",
51
  )
 
 
 
 
 
52
  parser.add_argument(
53
  "--debug",
54
  action="store_true",
@@ -63,11 +68,17 @@ async def main() -> None:
63
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
64
  )
65
 
66
- # Initialize Reachy Mini (required)
67
- from reachy_mini import ReachyMini
68
-
69
- reachy_mini = ReachyMini()
70
- _LOGGER.info("Reachy Mini connected")
 
 
 
 
 
 
71
 
72
  # Import and create VoiceAssistantService
73
  from .voice_assistant import VoiceAssistantService
@@ -85,22 +96,7 @@ async def main() -> None:
85
  # Create stop event for graceful shutdown
86
  stop_event = threading.Event()
87
 
88
- # Initialize monitoring services
89
- health_monitor = get_health_monitor()
90
- memory_monitor = get_memory_monitor()
91
-
92
- # Register service health checks
93
- health_monitor.register_checker(
94
- "voice_assistant",
95
- lambda: service.is_running if hasattr(service, "is_running") else True,
96
- interval=30.0,
97
- )
98
-
99
  try:
100
- # Start monitoring
101
- health_monitor.start()
102
- memory_monitor.start()
103
-
104
  await service.start()
105
 
106
  _LOGGER.info("=" * 50)
@@ -109,7 +105,7 @@ async def main() -> None:
109
  _LOGGER.info("Name: %s", args.name)
110
  _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
111
  _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
112
- _LOGGER.info("Motion control: enabled")
113
  _LOGGER.info("=" * 50)
114
  _LOGGER.info("Add this device in Home Assistant:")
115
  _LOGGER.info(" Settings -> Devices & Services -> Add Integration -> ESPHome")
@@ -123,10 +119,6 @@ async def main() -> None:
123
  except KeyboardInterrupt:
124
  _LOGGER.info("Shutting down...")
125
  finally:
126
- # Stop monitoring services
127
- health_monitor.stop()
128
- memory_monitor.stop()
129
-
130
  await service.stop()
131
  _LOGGER.info("Voice assistant stopped")
132
 
 
2
  """Main entry point for Reachy Mini for Home Assistant.
3
 
4
  This module provides a command-line interface for running the voice assistant
5
+ in standalone mode (without the ReachyMini App framework).
6
  """
7
 
8
  import argparse
 
10
  import logging
11
  import threading
12
 
 
 
13
  _LOGGER = logging.getLogger(__name__)
14
 
15
 
16
  async def main() -> None:
17
+ parser = argparse.ArgumentParser(
18
+ description="Reachy Mini for Home Assistant"
19
+ )
20
  parser.add_argument(
21
  "--name",
22
  default="Reachy Mini",
 
49
  action="store_true",
50
  help="Disable camera server",
51
  )
52
+ parser.add_argument(
53
+ "--no-motion",
54
+ action="store_true",
55
+ help="Disable Reachy Mini motion control",
56
+ )
57
  parser.add_argument(
58
  "--debug",
59
  action="store_true",
 
68
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
69
  )
70
 
71
+ # Initialize Reachy Mini (if available)
72
+ reachy_mini = None
73
+ if not args.no_motion:
74
+ try:
75
+ from reachy_mini import ReachyMini
76
+ reachy_mini = ReachyMini()
77
+ _LOGGER.info("Reachy Mini connected")
78
+ except ImportError:
79
+ _LOGGER.warning("reachy-mini not installed, motion control disabled")
80
+ except Exception as e:
81
+ _LOGGER.warning("Failed to connect to Reachy Mini: %s", e)
82
 
83
  # Import and create VoiceAssistantService
84
  from .voice_assistant import VoiceAssistantService
 
96
  # Create stop event for graceful shutdown
97
  stop_event = threading.Event()
98
 
 
 
 
 
 
 
 
 
 
 
 
99
  try:
 
 
 
 
100
  await service.start()
101
 
102
  _LOGGER.info("=" * 50)
 
105
  _LOGGER.info("Name: %s", args.name)
106
  _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
107
  _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
108
+ _LOGGER.info("Motion control: %s", "enabled" if reachy_mini else "disabled")
109
  _LOGGER.info("=" * 50)
110
  _LOGGER.info("Add this device in Home Assistant:")
111
  _LOGGER.info(" Settings -> Devices & Services -> Add Integration -> ESPHome")
 
119
  except KeyboardInterrupt:
120
  _LOGGER.info("Shutting down...")
121
  finally:
 
 
 
 
122
  await service.stop()
123
  _LOGGER.info("Voice assistant stopped")
124
 
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py RENAMED
@@ -16,18 +16,17 @@ import threading
16
  import time
17
  from dataclasses import dataclass
18
  from pathlib import Path
 
19
 
20
  _LOGGER = logging.getLogger(__name__)
21
 
22
  _MODULE_DIR = Path(__file__).parent
23
- _PACKAGE_DIR = _MODULE_DIR.parent # reachy_mini_home_assistant/
24
- _ANIMATIONS_FILE = _PACKAGE_DIR / "animations" / "conversation_animations.json"
25
 
26
 
27
  @dataclass
28
  class AnimationParams:
29
  """Parameters for a single animation with per-axis frequencies."""
30
-
31
  name: str
32
  description: str
33
  # Position amplitudes (meters)
@@ -49,7 +48,6 @@ class AnimationParams:
49
  # Antenna
50
  antenna_amplitude_rad: float = 0.0
51
  antenna_move_name: str = "both"
52
- antenna_frequency_hz: float = 0.0 # If not specified, uses main frequency_hz
53
  # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
54
  frequency_hz: float = 0.5
55
  pitch_frequency_hz: float = 0.0
@@ -69,17 +67,14 @@ class AnimationPlayer:
69
  - Multi-frequency oscillators for natural motion
70
  - Random phase offsets per animation start for variation
71
  - Smooth transitions between animations
72
- - Interpolation phase: smooth transition from current pose to neutral before oscillation
73
- (same as BreathingMove in reference project)
74
  """
75
 
76
  def __init__(self):
77
- self._animations: dict[str, AnimationParams] = {}
78
  self._amplitude_scale: float = 1.0
79
  self._transition_duration: float = 0.3
80
- self._interpolation_duration: float = 1.0 # Time to interpolate to neutral (same as BreathingMove)
81
- self._current_animation: str | None = None
82
- self._target_animation: str | None = None
83
  self._transition_start: float = 0.0
84
  self._phase_start: float = 0.0
85
  self._lock = threading.Lock()
@@ -90,29 +85,6 @@ class AnimationPlayer:
90
  self._phase_x: float = 0.0
91
  self._phase_y: float = 0.0
92
  self._phase_z: float = 0.0
93
- # Interpolation state (for smooth transition to neutral before oscillation)
94
- self._in_interpolation: bool = False
95
- self._interpolation_start_time: float = 0.0
96
- self._interpolation_start_offsets: dict[str, float] = {
97
- "pitch": 0.0,
98
- "yaw": 0.0,
99
- "roll": 0.0,
100
- "x": 0.0,
101
- "y": 0.0,
102
- "z": 0.0,
103
- "antenna_left": 0.0,
104
- "antenna_right": 0.0,
105
- }
106
- self._last_offsets: dict[str, float] = {
107
- "pitch": 0.0,
108
- "yaw": 0.0,
109
- "roll": 0.0,
110
- "x": 0.0,
111
- "y": 0.0,
112
- "z": 0.0,
113
- "antenna_left": 0.0,
114
- "antenna_right": 0.0,
115
- }
116
  self._load_config()
117
 
118
  def _load_config(self) -> None:
@@ -121,7 +93,7 @@ class AnimationPlayer:
121
  _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
122
  return
123
  try:
124
- with open(_ANIMATIONS_FILE, encoding="utf-8") as f:
125
  data = json.load(f)
126
 
127
  settings = data.get("settings", {})
@@ -148,7 +120,6 @@ class AnimationPlayer:
148
  yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
149
  antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
150
  antenna_move_name=params.get("antenna_move_name", "both"),
151
- antenna_frequency_hz=params.get("antenna_frequency_hz", 0.0),
152
  frequency_hz=params.get("frequency_hz", 0.5),
153
  pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
154
  yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
@@ -173,29 +144,18 @@ class AnimationPlayer:
173
  self._phase_z = random.random() * 2 * math.pi
174
 
175
  def set_animation(self, name: str) -> bool:
176
- """Set the current animation with smooth transition.
177
-
178
- Like BreathingMove in reference project, this starts an interpolation
179
- phase that smoothly transitions from the current pose to neutral before
180
- starting the oscillation animation.
181
- """
182
  with self._lock:
183
  if name not in self._animations and name is not None:
184
  _LOGGER.warning("Unknown animation: %s", name)
185
  return False
186
- if name == self._current_animation and not self._in_interpolation:
187
  return True
188
-
189
- # Capture current offsets for interpolation start
190
- self._interpolation_start_offsets = self._last_offsets.copy()
191
- self._interpolation_start_time = time.perf_counter()
192
- self._in_interpolation = True
193
-
194
  self._target_animation = name
195
  self._transition_start = time.perf_counter()
196
  # Randomize phases for new animation
197
  self._randomize_phases()
198
- _LOGGER.debug("Transitioning to animation: %s (interpolation phase)", name)
199
  return True
200
 
201
  def stop(self) -> None:
@@ -204,13 +164,10 @@ class AnimationPlayer:
204
  self._current_animation = None
205
  self._target_animation = None
206
 
207
- def get_offsets(self, dt: float = 0.0) -> dict[str, float]:
208
  """Calculate current animation offsets.
209
 
210
- Uses two-phase animation like BreathingMove in reference project:
211
- 1. Interpolation phase: smoothly transition from current pose to neutral
212
- 2. Oscillation phase: continuous sinusoidal breathing motion
213
-
214
  Each axis can have its own frequency for more organic movement.
215
 
216
  Args:
@@ -222,7 +179,7 @@ class AnimationPlayer:
222
  with self._lock:
223
  now = time.perf_counter()
224
 
225
- # Handle transition to new animation
226
  if self._target_animation != self._current_animation:
227
  elapsed = now - self._transition_start
228
  if elapsed >= self._transition_duration:
@@ -231,59 +188,20 @@ class AnimationPlayer:
231
 
232
  # No animation
233
  if self._current_animation is None:
234
- result = {
235
- "pitch": 0.0,
236
- "yaw": 0.0,
237
- "roll": 0.0,
238
- "x": 0.0,
239
- "y": 0.0,
240
- "z": 0.0,
241
- "antenna_left": 0.0,
242
- "antenna_right": 0.0,
243
  }
244
- self._last_offsets = result.copy()
245
- return result
246
 
247
  params = self._animations.get(self._current_animation)
248
  if params is None:
249
- result = {
250
- "pitch": 0.0,
251
- "yaw": 0.0,
252
- "roll": 0.0,
253
- "x": 0.0,
254
- "y": 0.0,
255
- "z": 0.0,
256
- "antenna_left": 0.0,
257
- "antenna_right": 0.0,
258
  }
259
- self._last_offsets = result.copy()
260
- return result
261
-
262
- # Check if in interpolation phase
263
- if self._in_interpolation:
264
- interp_elapsed = now - self._interpolation_start_time
265
- if interp_elapsed < self._interpolation_duration:
266
- # Phase 1: Linear interpolation from current pose to neutral (offset=0)
267
- # Use smooth ease-in-out for natural motion
268
- t = interp_elapsed / self._interpolation_duration
269
- # Smooth step: t * t * (3 - 2 * t)
270
- smooth_t = t * t * (3 - 2 * t)
271
-
272
- result = {}
273
- for key in self._interpolation_start_offsets:
274
- start_val = self._interpolation_start_offsets[key]
275
- # Interpolate toward 0 (neutral)
276
- result[key] = start_val * (1.0 - smooth_t)
277
-
278
- self._last_offsets = result.copy()
279
- return result
280
- else:
281
- # Interpolation complete, start oscillation phase
282
- self._in_interpolation = False
283
- self._phase_start = now
284
- _LOGGER.debug("Interpolation complete, starting oscillation phase")
285
 
286
- # Phase 2: Oscillation animation
287
  elapsed = now - self._phase_start
288
  base_freq = params.frequency_hz
289
 
@@ -301,27 +219,32 @@ class AnimationPlayer:
301
  z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
302
 
303
  # Calculate oscillations with per-axis frequencies and random phases
304
- pitch = params.pitch_offset_rad + params.pitch_amplitude_rad * math.sin(
305
- 2 * math.pi * pitch_freq * elapsed + self._phase_pitch
306
- )
307
 
308
- yaw = params.yaw_offset_rad + params.yaw_amplitude_rad * math.sin(
309
- 2 * math.pi * yaw_freq * elapsed + self._phase_yaw
310
- )
311
 
312
- roll = params.roll_offset_rad + params.roll_amplitude_rad * math.sin(
313
- 2 * math.pi * roll_freq * elapsed + self._phase_roll
314
- )
315
 
316
- x = params.x_offset_m + params.x_amplitude_m * math.sin(2 * math.pi * x_freq * elapsed + self._phase_x)
 
 
317
 
318
- y = params.y_offset_m + params.y_amplitude_m * math.sin(2 * math.pi * y_freq * elapsed + self._phase_y)
 
 
319
 
320
- z = params.z_offset_m + params.z_amplitude_m * math.sin(2 * math.pi * z_freq * elapsed + self._phase_z)
 
 
321
 
322
- # Antenna movement with its own frequency
323
- antenna_freq = params.antenna_frequency_hz if params.antenna_frequency_hz > 0 else base_freq
324
- antenna_phase = 2 * math.pi * antenna_freq * elapsed
325
  if params.antenna_move_name == "both":
326
  left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
327
  elif params.antenna_move_name == "wiggle":
@@ -333,7 +256,7 @@ class AnimationPlayer:
333
 
334
  # Apply scale and blend
335
  scale = self._amplitude_scale * blend
336
- result = {
337
  "pitch": pitch * scale,
338
  "yaw": yaw * scale,
339
  "roll": roll * scale,
@@ -343,11 +266,9 @@ class AnimationPlayer:
343
  "antenna_left": left * scale,
344
  "antenna_right": right * scale,
345
  }
346
- self._last_offsets = result.copy()
347
- return result
348
 
349
  @property
350
- def current_animation(self) -> str | None:
351
  """Get the current animation name."""
352
  with self._lock:
353
  return self._current_animation
 
16
  import time
17
  from dataclasses import dataclass
18
  from pathlib import Path
19
+ from typing import Dict, Optional
20
 
21
  _LOGGER = logging.getLogger(__name__)
22
 
23
  _MODULE_DIR = Path(__file__).parent
24
+ _ANIMATIONS_FILE = _MODULE_DIR / "animations" / "conversation_animations.json"
 
25
 
26
 
27
  @dataclass
28
  class AnimationParams:
29
  """Parameters for a single animation with per-axis frequencies."""
 
30
  name: str
31
  description: str
32
  # Position amplitudes (meters)
 
48
  # Antenna
49
  antenna_amplitude_rad: float = 0.0
50
  antenna_move_name: str = "both"
 
51
  # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
52
  frequency_hz: float = 0.5
53
  pitch_frequency_hz: float = 0.0
 
67
  - Multi-frequency oscillators for natural motion
68
  - Random phase offsets per animation start for variation
69
  - Smooth transitions between animations
 
 
70
  """
71
 
72
  def __init__(self):
73
+ self._animations: Dict[str, AnimationParams] = {}
74
  self._amplitude_scale: float = 1.0
75
  self._transition_duration: float = 0.3
76
+ self._current_animation: Optional[str] = None
77
+ self._target_animation: Optional[str] = None
 
78
  self._transition_start: float = 0.0
79
  self._phase_start: float = 0.0
80
  self._lock = threading.Lock()
 
85
  self._phase_x: float = 0.0
86
  self._phase_y: float = 0.0
87
  self._phase_z: float = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  self._load_config()
89
 
90
  def _load_config(self) -> None:
 
93
  _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
94
  return
95
  try:
96
+ with open(_ANIMATIONS_FILE, "r", encoding="utf-8") as f:
97
  data = json.load(f)
98
 
99
  settings = data.get("settings", {})
 
120
  yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
121
  antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
122
  antenna_move_name=params.get("antenna_move_name", "both"),
 
123
  frequency_hz=params.get("frequency_hz", 0.5),
124
  pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
125
  yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
 
144
  self._phase_z = random.random() * 2 * math.pi
145
 
146
  def set_animation(self, name: str) -> bool:
147
+ """Set the current animation with smooth transition."""
 
 
 
 
 
148
  with self._lock:
149
  if name not in self._animations and name is not None:
150
  _LOGGER.warning("Unknown animation: %s", name)
151
  return False
152
+ if name == self._current_animation:
153
  return True
 
 
 
 
 
 
154
  self._target_animation = name
155
  self._transition_start = time.perf_counter()
156
  # Randomize phases for new animation
157
  self._randomize_phases()
158
+ _LOGGER.debug("Transitioning to animation: %s", name)
159
  return True
160
 
161
  def stop(self) -> None:
 
164
  self._current_animation = None
165
  self._target_animation = None
166
 
167
+ def get_offsets(self, dt: float = 0.0) -> Dict[str, float]:
168
  """Calculate current animation offsets.
169
 
170
+ Uses multi-frequency oscillators for natural motion.
 
 
 
171
  Each axis can have its own frequency for more organic movement.
172
 
173
  Args:
 
179
  with self._lock:
180
  now = time.perf_counter()
181
 
182
+ # Handle transition
183
  if self._target_animation != self._current_animation:
184
  elapsed = now - self._transition_start
185
  if elapsed >= self._transition_duration:
 
188
 
189
  # No animation
190
  if self._current_animation is None:
191
+ return {
192
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
193
+ "x": 0.0, "y": 0.0, "z": 0.0,
194
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
195
  }
 
 
196
 
197
  params = self._animations.get(self._current_animation)
198
  if params is None:
199
+ return {
200
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
201
+ "x": 0.0, "y": 0.0, "z": 0.0,
202
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
203
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
 
205
  elapsed = now - self._phase_start
206
  base_freq = params.frequency_hz
207
 
 
219
  z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
220
 
221
  # Calculate oscillations with per-axis frequencies and random phases
222
+ pitch = (params.pitch_offset_rad +
223
+ params.pitch_amplitude_rad *
224
+ math.sin(2 * math.pi * pitch_freq * elapsed + self._phase_pitch))
225
 
226
+ yaw = (params.yaw_offset_rad +
227
+ params.yaw_amplitude_rad *
228
+ math.sin(2 * math.pi * yaw_freq * elapsed + self._phase_yaw))
229
 
230
+ roll = (params.roll_offset_rad +
231
+ params.roll_amplitude_rad *
232
+ math.sin(2 * math.pi * roll_freq * elapsed + self._phase_roll))
233
 
234
+ x = (params.x_offset_m +
235
+ params.x_amplitude_m *
236
+ math.sin(2 * math.pi * x_freq * elapsed + self._phase_x))
237
 
238
+ y = (params.y_offset_m +
239
+ params.y_amplitude_m *
240
+ math.sin(2 * math.pi * y_freq * elapsed + self._phase_y))
241
 
242
+ z = (params.z_offset_m +
243
+ params.z_amplitude_m *
244
+ math.sin(2 * math.pi * z_freq * elapsed + self._phase_z))
245
 
246
+ # Antenna movement
247
+ antenna_phase = 2 * math.pi * base_freq * elapsed
 
248
  if params.antenna_move_name == "both":
249
  left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
250
  elif params.antenna_move_name == "wiggle":
 
256
 
257
  # Apply scale and blend
258
  scale = self._amplitude_scale * blend
259
+ return {
260
  "pitch": pitch * scale,
261
  "yaw": yaw * scale,
262
  "roll": roll * scale,
 
266
  "antenna_left": left * scale,
267
  "antenna_right": right * scale,
268
  }
 
 
269
 
270
  @property
271
+ def current_animation(self) -> Optional[str]:
272
  """Get the current animation name."""
273
  with self._lock:
274
  return self._current_animation
reachy_mini_ha_voice/animations/conversation_animations.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "animations": {
3
+ "idle": {
4
+ "description": "No movement when idle - robot stays at neutral position",
5
+ "z_amplitude_m": 0.0,
6
+ "antenna_amplitude_rad": 0.0,
7
+ "frequency_hz": 0.0
8
+ },
9
+ "listening": {
10
+ "description": "Attentive pose while listening to user - slight forward lean",
11
+ "pitch_offset_rad": -0.05,
12
+ "pitch_amplitude_rad": 0.03,
13
+ "z_amplitude_m": 0.003,
14
+ "antenna_amplitude_rad": 0.2,
15
+ "antenna_move_name": "both",
16
+ "frequency_hz": 0.6
17
+ },
18
+ "thinking": {
19
+ "description": "Processing/thinking animation - head tilted with gentle sway",
20
+ "roll_offset_rad": 0.08,
21
+ "pitch_amplitude_rad": 0.03,
22
+ "yaw_amplitude_rad": 0.05,
23
+ "roll_amplitude_rad": 0.04,
24
+ "z_amplitude_m": 0.003,
25
+ "antenna_amplitude_rad": 0.25,
26
+ "antenna_move_name": "wiggle",
27
+ "frequency_hz": 0.4
28
+ },
29
+ "speaking": {
30
+ "description": "Speaking animation - multi-frequency natural head sway",
31
+ "pitch_amplitude_rad": 0.08,
32
+ "pitch_frequency_hz": 2.2,
33
+ "yaw_amplitude_rad": 0.13,
34
+ "yaw_frequency_hz": 0.6,
35
+ "roll_amplitude_rad": 0.04,
36
+ "roll_frequency_hz": 1.3,
37
+ "x_amplitude_m": 0.0045,
38
+ "x_frequency_hz": 0.35,
39
+ "y_amplitude_m": 0.00375,
40
+ "y_frequency_hz": 0.45,
41
+ "z_amplitude_m": 0.00225,
42
+ "z_frequency_hz": 0.25,
43
+ "antenna_amplitude_rad": 0.5,
44
+ "antenna_move_name": "wiggle",
45
+ "frequency_hz": 1.0
46
+ },
47
+ "happy": {
48
+ "description": "Happy/positive response",
49
+ "pitch_amplitude_rad": 0.08,
50
+ "z_amplitude_m": 0.01,
51
+ "antenna_amplitude_rad": 0.5,
52
+ "antenna_move_name": "both",
53
+ "frequency_hz": 1.2
54
+ },
55
+ "sad": {
56
+ "description": "Sad/negative response - head droops",
57
+ "pitch_offset_rad": 0.1,
58
+ "pitch_amplitude_rad": 0.04,
59
+ "z_offset_m": -0.01,
60
+ "z_amplitude_m": 0.002,
61
+ "antenna_amplitude_rad": 0.1,
62
+ "antenna_move_name": "both",
63
+ "frequency_hz": 0.3
64
+ },
65
+ "confused": {
66
+ "description": "Confused/error state - head tilts",
67
+ "roll_amplitude_rad": 0.1,
68
+ "yaw_amplitude_rad": 0.12,
69
+ "pitch_amplitude_rad": 0.05,
70
+ "antenna_amplitude_rad": 0.4,
71
+ "antenna_move_name": "wiggle",
72
+ "frequency_hz": 0.7
73
+ },
74
+ "alert": {
75
+ "description": "Alert/timer finished - quick movements",
76
+ "pitch_amplitude_rad": 0.1,
77
+ "z_amplitude_m": 0.012,
78
+ "antenna_amplitude_rad": 0.6,
79
+ "antenna_move_name": "both",
80
+ "frequency_hz": 1.5
81
+ }
82
+ },
83
+ "settings": {
84
+ "amplitude_scale": 1.0,
85
+ "transition_duration_s": 0.3
86
+ }
87
+ }
{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py RENAMED
@@ -4,7 +4,7 @@ import asyncio
4
  import logging
5
  from abc import abstractmethod
6
  from collections.abc import Iterable
7
- from typing import TYPE_CHECKING
8
 
9
  # pylint: disable=no-name-in-module
10
  from aioesphomeapi._frame_helper.packets import make_plain_text_packets
@@ -31,7 +31,7 @@ class APIServer(asyncio.Protocol):
31
 
32
  def __init__(self, name: str) -> None:
33
  self.name = name
34
- self._buffer: bytes | None = None
35
  self._buffer_len: int = 0
36
  self._pos: int = 0
37
  self._transport = None
@@ -44,10 +44,8 @@ class APIServer(asyncio.Protocol):
44
  def process_packet(self, msg_type: int, packet_data: bytes) -> None:
45
  msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
46
  msg_inst = msg_class.FromString(packet_data)
47
- _LOGGER.debug("Received message: %s", msg_class.__name__)
48
 
49
  if isinstance(msg_inst, HelloRequest):
50
- _LOGGER.info("HelloRequest received, sending HelloResponse")
51
  self.send_messages(
52
  [
53
  HelloResponse(
@@ -60,7 +58,6 @@ class APIServer(asyncio.Protocol):
60
  return
61
 
62
  if isinstance(msg_inst, AuthenticationRequest):
63
- _LOGGER.info("AuthenticationRequest received, sending AuthenticationResponse")
64
  self.send_messages([AuthenticationResponse()])
65
  elif isinstance(msg_inst, DisconnectRequest):
66
  self.send_messages([DisconnectResponse()])
@@ -76,27 +73,20 @@ class APIServer(asyncio.Protocol):
76
  msgs = [msgs]
77
  self.send_messages(msgs)
78
 
79
- def send_messages(self, msgs: list[message.Message]):
80
  if self._writelines is None:
81
  return
82
 
83
- try:
84
- packets = [(PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString()) for msg in msgs]
85
- packet_bytes = make_plain_text_packets(packets)
86
- self._writelines(packet_bytes)
87
- except (IndexError, OSError, BrokenPipeError, ConnectionResetError) as e:
88
- _LOGGER.warning("Error sending message (%s): %s - connection may be lost",
89
- msgs[0].__class__.__name__ if msgs else "unknown", e)
90
- # Mark transport as invalid to prevent further writes
91
- self._writelines = None
92
- if self._transport:
93
- self._transport.close()
94
- self._transport = None
95
 
96
  def connection_made(self, transport) -> None:
97
  self._transport = transport
98
  self._writelines = transport.writelines
99
- _LOGGER.info("ESPHome client connected from %s", transport.get_extra_info("peername"))
100
 
101
  def data_received(self, data: bytes):
102
  if self._buffer is None:
@@ -150,13 +140,8 @@ class APIServer(asyncio.Protocol):
150
  return cstr[original_pos:new_pos]
151
 
152
  def connection_lost(self, exc):
153
- _LOGGER.info("ESPHome client disconnected: %s", exc)
154
  self._transport = None
155
  self._writelines = None
156
- # Clear buffer to prevent memory leak
157
- self._buffer = None
158
- self._buffer_len = 0
159
- self._pos = 0
160
 
161
  def _read_varuint(self) -> int:
162
  """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""
 
4
  import logging
5
  from abc import abstractmethod
6
  from collections.abc import Iterable
7
+ from typing import TYPE_CHECKING, List, Optional
8
 
9
  # pylint: disable=no-name-in-module
10
  from aioesphomeapi._frame_helper.packets import make_plain_text_packets
 
31
 
32
  def __init__(self, name: str) -> None:
33
  self.name = name
34
+ self._buffer: Optional[bytes] = None
35
  self._buffer_len: int = 0
36
  self._pos: int = 0
37
  self._transport = None
 
44
  def process_packet(self, msg_type: int, packet_data: bytes) -> None:
45
  msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
46
  msg_inst = msg_class.FromString(packet_data)
 
47
 
48
  if isinstance(msg_inst, HelloRequest):
 
49
  self.send_messages(
50
  [
51
  HelloResponse(
 
58
  return
59
 
60
  if isinstance(msg_inst, AuthenticationRequest):
 
61
  self.send_messages([AuthenticationResponse()])
62
  elif isinstance(msg_inst, DisconnectRequest):
63
  self.send_messages([DisconnectResponse()])
 
73
  msgs = [msgs]
74
  self.send_messages(msgs)
75
 
76
+ def send_messages(self, msgs: List[message.Message]):
77
  if self._writelines is None:
78
  return
79
 
80
+ packets = [
81
+ (PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString())
82
+ for msg in msgs
83
+ ]
84
+ packet_bytes = make_plain_text_packets(packets)
85
+ self._writelines(packet_bytes)
 
 
 
 
 
 
86
 
87
  def connection_made(self, transport) -> None:
88
  self._transport = transport
89
  self._writelines = transport.writelines
 
90
 
91
  def data_received(self, data: bytes):
92
  if self._buffer is None:
 
140
  return cstr[original_pos:new_pos]
141
 
142
  def connection_lost(self, exc):
 
143
  self._transport = None
144
  self._writelines = None
 
 
 
 
145
 
146
  def _read_varuint(self) -> int:
147
  """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""
{reachy_mini_home_assistant/audio → reachy_mini_ha_voice}/audio_player.py RENAMED
@@ -8,52 +8,29 @@ Sendspin is automatically enabled by default - no user configuration needed.
8
  The system uses mDNS to discover Sendspin servers on the local network.
9
  """
10
 
11
- from __future__ import annotations
12
-
13
  import hashlib
14
  import logging
15
  import socket
16
  import threading
17
  import time
18
- from typing import TYPE_CHECKING
19
-
20
- import numpy as np
21
 
22
  if TYPE_CHECKING:
23
- from collections.abc import Callable
24
-
25
- from aiosendspin.models.core import StreamStartMessage
26
-
27
- from ..protocol.zeroconf import SendspinDiscovery
28
 
29
  _LOGGER = logging.getLogger(__name__)
30
 
31
- # Movement latency to sync head motion with audio playback
32
- # Audio playback has hardware buffer latency, so we delay head motion to match
33
- # Same as reachy_mini_conversation_app's HeadWobbler.MOVEMENT_LATENCY_S
34
- MOVEMENT_LATENCY_S = 0.2 # 200ms latency between audio start and head movement
35
- SWAY_FRAME_DT_S = 0.05
36
- STREAM_FETCH_CHUNK_SIZE = 2048
37
- UNTHROTTLED_PREROLL_S = 0.35
38
-
39
  # Check if aiosendspin is available
40
  try:
41
- from aiosendspin.client import PCMFormat, SendspinClient
 
42
  from aiosendspin.models.player import ClientHelloPlayerSupport, SupportedAudioFormat
43
- from aiosendspin.models.types import AudioCodec, PlayerCommand, Roles
44
-
45
  SENDSPIN_AVAILABLE = True
46
- except Exception as e:
47
  SENDSPIN_AVAILABLE = False
48
- _LOGGER.warning("Sendspin unavailable, disabling integration: %s", e)
49
- # Fallback placeholders to keep runtime annotations safe when Sendspin is unavailable.
50
- PCMFormat = None # type: ignore[assignment]
51
- SendspinClient = None # type: ignore[assignment]
52
- ClientHelloPlayerSupport = None # type: ignore[assignment]
53
- SupportedAudioFormat = None # type: ignore[assignment]
54
- AudioCodec = None # type: ignore[assignment]
55
- PlayerCommand = None # type: ignore[assignment]
56
- Roles = None # type: ignore[assignment]
57
 
58
 
59
  def _get_stable_client_id() -> str:
@@ -76,48 +53,46 @@ class AudioPlayer:
76
  Supports audio playback modes:
77
  1. Reachy Mini's built-in media system (default)
78
  2. Sendspin synchronized multi-room playback (as PLAYER - receives audio)
 
79
 
80
  When connected to Sendspin as a PLAYER, Reachy Mini receives audio streams
81
  from Home Assistant or other controllers for synchronized playback.
82
  """
83
 
84
- def __init__(self, reachy_mini=None, gstreamer_lock=None) -> None:
85
  """Initialize audio player.
86
 
87
  Args:
88
  reachy_mini: Reachy Mini SDK instance.
89
- gstreamer_lock: Threading lock for GStreamer media access (shared across all media operations).
90
  """
91
  self.reachy_mini = reachy_mini
92
- self._gstreamer_lock = gstreamer_lock if gstreamer_lock is not None else threading.Lock()
93
  self.is_playing = False
94
- self._playlist: list[str] = []
95
- self._done_callback: Callable[[], None] | None = None
96
  self._done_callback_lock = threading.Lock()
97
  self._duck_volume: float = 0.5
98
  self._unduck_volume: float = 1.0
99
  self._current_volume: float = 1.0
100
  self._stop_flag = threading.Event()
101
- self._playback_thread: threading.Thread | None = None # Track active playback thread
102
 
103
  # Speech sway callback for audio-driven head motion
104
- self._sway_callback: Callable[[dict], None] | None = None
105
 
106
  # Sendspin support (auto-enabled via mDNS discovery)
107
  # Uses stable client_id so HA recognizes the same device after restart
108
  self._sendspin_client_id = _get_stable_client_id()
109
- self._sendspin_client: SendspinClient | None = None
110
  self._sendspin_enabled = False
111
- self._sendspin_url: str | None = None
112
- self._sendspin_discovery: SendspinDiscovery | None = None
113
- self._sendspin_unsubscribers: list[Callable] = []
114
 
115
  # Audio buffer for Sendspin playback
116
- self._sendspin_audio_format: PCMFormat | None = None
117
  self._sendspin_playback_started = False
118
  self._sendspin_paused = False # Pause Sendspin when voice assistant is active
119
 
120
- def set_sway_callback(self, callback: Callable[[dict], None] | None) -> None:
121
  """Set callback for speech-driven sway animation.
122
 
123
  Args:
@@ -143,7 +118,7 @@ class AudioPlayer:
143
  return self._sendspin_enabled and self._sendspin_client is not None
144
 
145
  @property
146
- def sendspin_url(self) -> str | None:
147
  """Get current Sendspin server URL."""
148
  return self._sendspin_url
149
 
@@ -184,7 +159,7 @@ class AudioPlayer:
184
  return
185
 
186
  # Import here to avoid circular imports
187
- from ..protocol.zeroconf import SendspinDiscovery
188
 
189
  _LOGGER.info("Starting Sendspin server discovery...")
190
  self._sendspin_discovery = SendspinDiscovery(self._on_sendspin_server_found)
@@ -226,13 +201,25 @@ class AudioPlayer:
226
  player_support = ClientHelloPlayerSupport(
227
  supported_formats=[
228
  # Prefer 16kHz (native ReSpeaker sample rate - no resampling needed)
229
- SupportedAudioFormat(codec=AudioCodec.PCM, channels=2, sample_rate=16000, bit_depth=16),
230
- SupportedAudioFormat(codec=AudioCodec.PCM, channels=1, sample_rate=16000, bit_depth=16),
 
 
 
 
231
  # Also support higher sample rates (will be resampled to 16kHz)
232
- SupportedAudioFormat(codec=AudioCodec.PCM, channels=2, sample_rate=48000, bit_depth=16),
233
- SupportedAudioFormat(codec=AudioCodec.PCM, channels=2, sample_rate=44100, bit_depth=16),
234
- SupportedAudioFormat(codec=AudioCodec.PCM, channels=1, sample_rate=48000, bit_depth=16),
235
- SupportedAudioFormat(codec=AudioCodec.PCM, channels=1, sample_rate=44100, bit_depth=16),
 
 
 
 
 
 
 
 
236
  ],
237
  buffer_capacity=32_000_000,
238
  supported_commands=[PlayerCommand.VOLUME, PlayerCommand.MUTE],
@@ -258,7 +245,8 @@ class AudioPlayer:
258
  self._sendspin_url = server_url
259
  self._sendspin_enabled = True
260
 
261
- _LOGGER.info("Sendspin connected as PLAYER: %s (client_id=%s)", server_url, self._sendspin_client_id)
 
262
  return True
263
 
264
  except Exception as e:
@@ -267,7 +255,7 @@ class AudioPlayer:
267
  self._sendspin_enabled = False
268
  return False
269
 
270
- def _on_sendspin_audio_chunk(self, server_timestamp_us: int, audio_data: bytes, fmt: PCMFormat) -> None:
271
  """Handle incoming audio chunks from Sendspin server.
272
 
273
  Plays the audio through Reachy Mini's speaker using push_audio_sample().
@@ -275,6 +263,9 @@ class AudioPlayer:
275
 
276
  Note: Audio is dropped when Sendspin is paused (e.g., during voice assistant interaction).
277
  """
 
 
 
278
  # Drop audio when paused (voice assistant is active)
279
  if self._sendspin_paused:
280
  return
@@ -301,9 +292,6 @@ class AudioPlayer:
301
  # Convert to float32 for playback (SDK expects float32)
302
  audio_float = audio_array.astype(np.float32) / max_val
303
 
304
- # Clamp values to valid range [-1.0, 1.0] to prevent invalid values
305
- audio_float = np.clip(audio_float, -1.0, 1.0)
306
-
307
  # Reshape for channels if needed
308
  if fmt.channels > 1:
309
  # Reshape to (samples, channels)
@@ -316,14 +304,14 @@ class AudioPlayer:
316
  target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
317
  if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
318
  import scipy.signal
319
-
320
  # Calculate new length
321
  new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
322
  if new_length > 0:
323
  audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
324
  # Log resampling only once per stream
325
- if not hasattr(self, "_logged_resample") or not self._logged_resample:
326
- _LOGGER.debug("Resampling Sendspin audio: %d Hz -> %d Hz", fmt.sample_rate, target_sample_rate)
 
327
  self._logged_resample = True
328
 
329
  # Apply volume
@@ -339,46 +327,31 @@ class AudioPlayer:
339
  _LOGGER.warning("Failed to start media playback: %s", e)
340
 
341
  # Play through Reachy Mini's media system using push_audio_sample
342
- # Use GStreamer lock to prevent concurrent access conflicts
343
- acquired = self._gstreamer_lock.acquire(timeout=0.05)
344
- if acquired:
345
- try:
346
- self.reachy_mini.media.push_audio_sample(audio_float)
347
- finally:
348
- self._gstreamer_lock.release()
349
- else:
350
- _LOGGER.debug("GStreamer lock busy, skipping audio sample")
351
- # Flush SDK playback buffer to prevent buffer overflow during lock contention
352
- try:
353
- if hasattr(self.reachy_mini.media, "flush"):
354
- self.reachy_mini.media.flush()
355
- elif hasattr(self.reachy_mini.media, "flush_audio"):
356
- self.reachy_mini.media.flush_audio()
357
- except Exception:
358
- pass
359
 
360
  except Exception as e:
361
  _LOGGER.debug("Error playing Sendspin audio: %s", e)
362
 
363
- def _on_sendspin_stream_start(self, message: StreamStartMessage) -> None:
364
  """Handle stream start from Sendspin server."""
365
  _LOGGER.debug("Sendspin stream started")
366
  # No need to clear buffer - just start fresh
367
 
368
- def _on_sendspin_stream_end(self, roles: list[Roles] | None) -> None:
369
  """Handle stream end from Sendspin server."""
370
  if roles is None or Roles.PLAYER in roles:
371
  _LOGGER.debug("Sendspin stream ended")
372
 
373
- def _on_sendspin_stream_clear(self, roles: list[Roles] | None) -> None:
374
  """Handle stream clear from Sendspin server."""
375
  if roles is None or Roles.PLAYER in roles:
376
  _LOGGER.debug("Sendspin stream cleared")
377
- try:
378
- self.reachy_mini.media.stop_playing()
379
- self._sendspin_playback_started = False
380
- except Exception:
381
- pass
 
382
 
383
  async def _disconnect_sendspin(self) -> None:
384
  """Disconnect from current Sendspin server."""
@@ -386,8 +359,8 @@ class AudioPlayer:
386
  for unsub in self._sendspin_unsubscribers:
387
  try:
388
  unsub()
389
- except Exception as e:
390
- _LOGGER.debug("Error during Sendspin unsubscribe: %s", e)
391
  self._sendspin_unsubscribers.clear()
392
 
393
  if self._sendspin_client is not None:
@@ -411,20 +384,14 @@ class AudioPlayer:
411
  # Disconnect from server
412
  await self._disconnect_sendspin()
413
 
414
- # Clear all references to prevent memory leaks
415
- self._sendspin_client = None
416
- self._sendspin_url = None
417
- self._sendspin_audio_format = None
418
- self._sendspin_enabled = False
419
-
420
  _LOGGER.info("Sendspin stopped")
421
 
422
  # ========== Core Playback Methods ==========
423
 
424
  def play(
425
  self,
426
- url: str | list[str],
427
- done_callback: Callable[[], None] | None = None,
428
  stop_first: bool = True,
429
  ) -> None:
430
  """Play audio from URL(s).
@@ -444,12 +411,6 @@ class AudioPlayer:
444
 
445
  self._done_callback = done_callback
446
  self._stop_flag.clear()
447
-
448
- # Limit active playback threads to prevent resource exhaustion
449
- if hasattr(self, "_playback_thread") and self._playback_thread and self._playback_thread.is_alive():
450
- _LOGGER.warning("Previous playback still active, stopping it")
451
- self.stop()
452
-
453
  self._play_next()
454
 
455
  def _play_next(self) -> None:
@@ -463,176 +424,76 @@ class AudioPlayer:
463
  self.is_playing = True
464
 
465
  # Start playback in a thread
466
- self._playback_thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
467
- self._playback_thread.start()
468
 
469
  def _play_file(self, file_path: str) -> None:
470
  """Play an audio file with optional speech-driven sway animation."""
471
  try:
472
  # Handle URLs - download first
473
  if file_path.startswith(("http://", "https://")):
474
- import requests
475
-
476
- source_url = file_path
477
- streamed = False
478
- cached_audio = bytearray()
479
- content_type = ""
480
-
481
- try:
482
- with requests.get(source_url, stream=True, timeout=(5.0, 30.0)) as response:
483
- response.raise_for_status()
484
- content_type = (response.headers.get("Content-Type") or "").lower()
485
-
486
- stream_iter = response.iter_content(chunk_size=STREAM_FETCH_CHUNK_SIZE)
487
-
488
- def caching_iter_content(chunk_size: int = STREAM_FETCH_CHUNK_SIZE):
489
- del chunk_size
490
- for chunk in stream_iter:
491
- if chunk:
492
- cached_audio.extend(chunk)
493
- yield chunk
494
-
495
- adapted_response = self._iterator_response_adapter(caching_iter_content())
496
-
497
- # Try true streaming on this single HTTP request.
498
- if self._is_pcm_content_type(content_type):
499
- _LOGGER.info("TTS playback mode: streaming_pcm")
500
- streamed = self._stream_pcm_response(adapted_response, content_type)
501
- else:
502
- _LOGGER.info("TTS playback mode: streaming_decoded")
503
- streamed = self._stream_decoded_response(adapted_response, source_url, content_type)
504
-
505
- if not streamed:
506
- # Keep draining remaining bytes from the SAME request
507
- # so one-time URLs are still playable via fallback.
508
- for chunk in stream_iter:
509
- if chunk:
510
- cached_audio.extend(chunk)
511
-
512
- except Exception as e:
513
- _LOGGER.debug("Streaming TTS failed, fallback to memory playback: %s", e)
514
 
515
- if streamed:
516
- return
517
-
518
- _LOGGER.info("TTS playback mode: fallback_memory")
519
- played = self._play_cached_audio(cached_audio, content_type)
520
- if played:
521
- return
522
-
523
- _LOGGER.error("Failed to play cached TTS audio from memory")
524
- return
525
 
526
  if self._stop_flag.is_set():
527
  return
528
 
529
  # Play locally using Reachy Mini's media system
530
- try:
531
- duration: float | None = None
532
- sway_frames: list[dict] = []
533
-
534
- # Fast metadata path first to avoid long pre-read latency.
535
  try:
 
536
  import soundfile as sf
 
 
537
 
538
- info = sf.info(file_path)
539
- if info.samplerate > 0 and info.frames > 0:
540
- duration = float(info.frames) / float(info.samplerate)
541
- except Exception:
542
- duration = None
543
-
544
- # Optional sway pre-analysis (best effort). If decode/read is expensive
545
- # or unsupported, keep playback path working without blocking startup.
546
- if self._sway_callback is not None:
547
- try:
548
- import soundfile as sf
549
 
550
- data, sample_rate = sf.read(file_path)
551
- if duration is None and sample_rate > 0:
552
- duration = len(data) / sample_rate
553
 
554
- from ..motion.speech_sway import SpeechSwayRT
 
 
 
555
 
556
- sway = SpeechSwayRT()
557
- sway_frames = sway.feed(data, sample_rate)
558
- except Exception:
559
- sway_frames = []
560
-
561
- # Start playback
562
- self.reachy_mini.media.play_sound(file_path)
563
-
564
- # Playback loop with sway animation
565
- # Apply MOVEMENT_LATENCY_S delay to sync head motion with audio
566
- # (audio playback has hardware buffer latency)
567
- start_time = time.time()
568
- frame_duration = 0.05 # 50ms per sway frame (HOP_MS)
569
- frame_idx = 0
570
-
571
- # Playback loop with sway animation and timeout protection
572
- # Apply MOVEMENT_LATENCY_S delay to sync head motion with audio
573
- # (audio playback has hardware buffer latency)
574
- start_time = time.time()
575
- frame_duration = 0.05 # 50ms per sway frame (HOP_MS)
576
- frame_idx = 0
577
- # If duration unknown, poll SDK playback state when available.
578
- has_duration = (duration is not None) and (duration > 0)
579
- duration_s = duration if has_duration else 0.0
580
- max_duration = (duration_s * 1.5) if has_duration else 60.0
581
- playback_timeout = start_time + max_duration
582
-
583
- is_playing_fn = getattr(self.reachy_mini.media, "is_playing", None)
584
-
585
- while True:
586
- # Check for timeout (safety guard)
587
- if time.time() > playback_timeout:
588
- _LOGGER.warning("Audio playback timeout (%.1fs), stopping", max_duration)
589
- self.reachy_mini.media.stop_playing()
590
- break
591
-
592
- if self._stop_flag.is_set():
593
- self.reachy_mini.media.stop_playing()
594
- break
595
-
596
- if has_duration:
597
- if (time.time() - start_time) >= duration_s:
598
  break
599
- elif callable(is_playing_fn):
600
- try:
601
- if not bool(is_playing_fn()):
602
- break
603
- except Exception:
604
- pass
605
-
606
- # Apply sway frame if available, with 200ms delay
607
- if self._sway_callback and frame_idx < len(sway_frames):
608
- elapsed = time.time() - start_time
609
- # Apply latency: head motion starts MOVEMENT_LATENCY_S after audio
610
- effective_elapsed = max(0, elapsed - MOVEMENT_LATENCY_S)
611
- target_frame = int(effective_elapsed / frame_duration)
612
-
613
- # Skip frames if falling behind (lag compensation)
614
- while frame_idx <= target_frame and frame_idx < len(sway_frames):
615
- self._sway_callback(sway_frames[frame_idx])
616
- frame_idx += 1
617
-
618
- time.sleep(0.02) # 20ms sleep for responsive sway
619
-
620
- # Reset sway to zero when done
621
- if self._sway_callback:
622
- self._sway_callback(
623
- {
624
- "pitch_rad": 0.0,
625
- "yaw_rad": 0.0,
626
- "roll_rad": 0.0,
627
- "x_m": 0.0,
628
- "y_m": 0.0,
629
- "z_m": 0.0,
630
- }
631
- )
632
 
633
- except Exception as e:
634
- _LOGGER.error("Reachy Mini audio failed: %s", e)
635
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
 
637
  except Exception as e:
638
  _LOGGER.error("Error playing audio: %s", e)
@@ -643,473 +504,22 @@ class AudioPlayer:
643
  else:
644
  self._on_playback_finished()
645
 
646
- @staticmethod
647
- def _iterator_response_adapter(iterator):
648
- class _ResponseAdapter:
649
- def __init__(self, iter_obj) -> None:
650
- self._iter_obj = iter_obj
651
-
652
- def iter_content(self, chunk_size: int = 8192):
653
- del chunk_size
654
- return self._iter_obj
655
-
656
- return _ResponseAdapter(iterator)
657
-
658
- def _play_cached_audio(self, audio_bytes: bytes | bytearray, content_type: str) -> bool:
659
- if not audio_bytes:
660
- return False
661
-
662
- audio_data = bytes(audio_bytes)
663
- mem_iter = (
664
- audio_data[i : i + STREAM_FETCH_CHUNK_SIZE] for i in range(0, len(audio_data), STREAM_FETCH_CHUNK_SIZE)
665
- )
666
- adapted_response = self._iterator_response_adapter(mem_iter)
667
-
668
- if self._is_pcm_content_type(content_type):
669
- return self._stream_pcm_response(adapted_response, content_type)
670
-
671
- return self._stream_decoded_response(adapted_response, "memory-cache", content_type)
672
-
673
- @staticmethod
674
- def _is_pcm_content_type(content_type: str) -> bool:
675
- return ("audio/l16" in content_type) or ("audio/pcm" in content_type) or ("audio/raw" in content_type)
676
-
677
- @staticmethod
678
- def _parse_pcm_format(content_type: str) -> tuple[int, int]:
679
- channels = 1
680
- sample_rate = 16000
681
- if ";" in content_type:
682
- for part in content_type.split(";"):
683
- token = part.strip()
684
- if token.startswith("channels="):
685
- try:
686
- channels = max(1, int(token.split("=", 1)[1]))
687
- except Exception:
688
- pass
689
- elif token.startswith("rate="):
690
- try:
691
- sample_rate = max(8000, int(token.split("=", 1)[1]))
692
- except Exception:
693
- pass
694
- return channels, sample_rate
695
-
696
- @staticmethod
697
- def _guess_gst_input_caps(content_type: str) -> str | None:
698
- ct = (content_type or "").split(";", 1)[0].strip().lower()
699
- mapping = {
700
- "audio/mpeg": "audio/mpeg,mpegversion=(int)1",
701
- "audio/mp3": "audio/mpeg,mpegversion=(int)1",
702
- "audio/aac": "audio/mpeg,mpegversion=(int)4,stream-format=(string)raw",
703
- "audio/mp4": "audio/mpeg,mpegversion=(int)4,stream-format=(string)raw",
704
- "audio/ogg": "application/ogg",
705
- "application/ogg": "application/ogg",
706
- "audio/opus": "audio/x-opus",
707
- "audio/webm": "video/webm",
708
- "audio/wav": "audio/x-wav",
709
- "audio/wave": "audio/x-wav",
710
- "audio/x-wav": "audio/x-wav",
711
- "audio/flac": "audio/x-flac",
712
- "audio/x-flac": "audio/x-flac",
713
- }
714
- return mapping.get(ct)
715
-
716
- def _ensure_media_playback_started(self) -> bool:
717
- acquired = self._gstreamer_lock.acquire(timeout=0.3)
718
- if not acquired:
719
- return False
720
- try:
721
- self.reachy_mini.media.start_playing()
722
- return True
723
- except Exception:
724
- return False
725
- finally:
726
- self._gstreamer_lock.release()
727
-
728
- def _push_audio_float(self, audio_float: np.ndarray, max_wait_s: float = 1.0) -> bool:
729
- deadline = time.monotonic() + max(0.05, max_wait_s)
730
- while time.monotonic() < deadline:
731
- if self._stop_flag.is_set():
732
- return False
733
-
734
- acquired = self._gstreamer_lock.acquire(timeout=0.1)
735
- if not acquired:
736
- continue
737
- try:
738
- self.reachy_mini.media.push_audio_sample(audio_float)
739
- return True
740
- finally:
741
- self._gstreamer_lock.release()
742
-
743
- return False
744
-
745
- def _stream_pcm_response(self, response, content_type: str) -> bool:
746
- channels, sample_rate = self._parse_pcm_format(content_type)
747
- target_sr = self.reachy_mini.media.get_output_audio_samplerate()
748
- if target_sr <= 0:
749
- target_sr = 16000
750
-
751
- if not self._ensure_media_playback_started():
752
- return False
753
-
754
- remainder = b""
755
- pushed_any = False
756
- played_frames = 0
757
- stream_start = time.monotonic()
758
- sway_ctx = self._init_stream_sway_context()
759
- bytes_per_frame = 2 * channels
760
-
761
- for chunk in response.iter_content(chunk_size=STREAM_FETCH_CHUNK_SIZE):
762
- if self._stop_flag.is_set():
763
- break
764
- if not chunk:
765
- continue
766
-
767
- data = remainder + chunk
768
- usable_len = (len(data) // bytes_per_frame) * bytes_per_frame
769
- remainder = data[usable_len:]
770
- if usable_len == 0:
771
- continue
772
-
773
- pcm = np.frombuffer(data[:usable_len], dtype=np.int16).astype(np.float32) / 32768.0
774
- pcm = np.clip(pcm * self._current_volume, -1.0, 1.0).reshape(-1, channels)
775
-
776
- if sample_rate != target_sr and target_sr > 0:
777
- import scipy.signal
778
-
779
- new_len = int(len(pcm) * target_sr / sample_rate)
780
- if new_len > 0:
781
- pcm = scipy.signal.resample(pcm, new_len, axis=0).astype(np.float32, copy=False)
782
-
783
- target_elapsed = played_frames / float(target_sr)
784
- actual_elapsed = time.monotonic() - stream_start
785
- if target_elapsed > UNTHROTTLED_PREROLL_S and target_elapsed > actual_elapsed:
786
- time.sleep(min(0.05, target_elapsed - actual_elapsed))
787
 
788
- if not self._push_audio_float(pcm):
789
- continue
790
 
791
- pushed_any = True
792
- played_frames += int(pcm.shape[0])
793
- self._feed_stream_sway(sway_ctx, pcm, target_sr)
794
-
795
- self._finalize_stream_sway(sway_ctx)
796
- return pushed_any
797
-
798
- def _stream_decoded_response(self, response, source_url: str, content_type: str) -> bool:
799
- try:
800
- import gi
801
-
802
- gi.require_version("Gst", "1.0")
803
- from gi.repository import Gst
804
- except Exception:
805
- return False
806
-
807
- try:
808
- Gst.init(None)
809
- except Exception:
810
- pass
811
-
812
- target_sr = self.reachy_mini.media.get_output_audio_samplerate()
813
- if target_sr <= 0:
814
- target_sr = 16000
815
-
816
- target_channels = 1
817
- if not self._ensure_media_playback_started():
818
- return False
819
-
820
- pipeline = Gst.Pipeline.new("tts_stream_decode")
821
- appsrc = Gst.ElementFactory.make("appsrc", "src")
822
- decodebin = Gst.ElementFactory.make("decodebin", "decode")
823
- audioconvert = Gst.ElementFactory.make("audioconvert", "conv")
824
- audioresample = Gst.ElementFactory.make("audioresample", "resample")
825
- capsfilter = Gst.ElementFactory.make("capsfilter", "caps")
826
- appsink = Gst.ElementFactory.make("appsink", "sink")
827
-
828
- if not all((pipeline, appsrc, decodebin, audioconvert, audioresample, capsfilter, appsink)):
829
- return False
830
-
831
- target_caps = Gst.Caps.from_string(f"audio/x-raw,format=S16LE,channels={target_channels},rate={target_sr}")
832
- capsfilter.set_property("caps", target_caps)
833
-
834
- appsrc.set_property("is-live", True)
835
- appsrc.set_property("format", Gst.Format.BYTES)
836
- appsrc.set_property("block", False)
837
- appsrc.set_property("do-timestamp", True)
838
-
839
- src_caps = self._guess_gst_input_caps(content_type)
840
- if src_caps:
841
- try:
842
- appsrc.set_property("caps", Gst.Caps.from_string(src_caps))
843
- except Exception:
844
- pass
845
-
846
- try:
847
- decodebin.set_property("caps", Gst.Caps.from_string("audio/x-raw"))
848
- except Exception:
849
- pass
850
-
851
- appsink.set_property("emit-signals", False)
852
- appsink.set_property("sync", False)
853
- # Keep all decoded audio for TTS completion. Dropping buffers can cause
854
- # a short "blip" then silence on fast decoders.
855
- appsink.set_property("max-buffers", 0)
856
- appsink.set_property("drop", False)
857
-
858
- pipeline.add(appsrc)
859
- pipeline.add(decodebin)
860
- pipeline.add(audioconvert)
861
- pipeline.add(audioresample)
862
- pipeline.add(capsfilter)
863
- pipeline.add(appsink)
864
-
865
- if not appsrc.link(decodebin):
866
- return False
867
- if not audioconvert.link(audioresample):
868
- return False
869
- if not audioresample.link(capsfilter):
870
- return False
871
- if not capsfilter.link(appsink):
872
- return False
873
-
874
- audio_state = {"linked": False}
875
-
876
- def on_pad_added(_decodebin, pad) -> None:
877
- sink_pad = audioconvert.get_static_pad("sink")
878
- if sink_pad is None or sink_pad.is_linked():
879
- return
880
-
881
- caps_obj = pad.get_current_caps() or pad.query_caps(None)
882
- if caps_obj is None:
883
- return
884
- if caps_obj.to_string().startswith("audio/"):
885
- try:
886
- result = pad.link(sink_pad)
887
- if result == Gst.PadLinkReturn.OK:
888
- audio_state["linked"] = True
889
- except Exception:
890
- pass
891
-
892
- decodebin.connect("pad-added", on_pad_added)
893
-
894
- pushed_any = False
895
- played_frames = 0
896
- stream_start = time.monotonic()
897
- sway_ctx = self._init_stream_sway_context()
898
- bytes_per_frame = 2 * target_channels
899
- feed_done = threading.Event()
900
- decode_error = False
901
-
902
- def writer() -> None:
903
- try:
904
- for chunk in response.iter_content(chunk_size=STREAM_FETCH_CHUNK_SIZE):
905
- if self._stop_flag.is_set():
906
- break
907
- if not chunk:
908
- continue
909
- gst_buffer = Gst.Buffer.new_allocate(None, len(chunk), None)
910
- if gst_buffer is None:
911
- continue
912
- gst_buffer.fill(0, chunk)
913
- ret = appsrc.emit("push-buffer", gst_buffer)
914
- if ret not in (Gst.FlowReturn.OK, Gst.FlowReturn.FLUSHING):
915
- _LOGGER.debug("appsrc push-buffer returned %s", ret)
916
- break
917
- except Exception:
918
- pass
919
- finally:
920
- feed_done.set()
921
- try:
922
- appsrc.emit("end-of-stream")
923
- except Exception:
924
- pass
925
-
926
- try:
927
- state_ret = pipeline.set_state(Gst.State.PLAYING)
928
- if state_ret == Gst.StateChangeReturn.FAILURE:
929
- _LOGGER.debug("Failed to set GStreamer decode pipeline PLAYING for URL=%s", source_url)
930
- return False
931
-
932
- writer_thread = threading.Thread(target=writer, daemon=True)
933
- writer_thread.start()
934
-
935
- remainder = b""
936
- timeout_ns = 20_000_000 # 20ms
937
- bus = pipeline.get_bus()
938
- eos_seen = False
939
- eos_drain_empty_polls = 0
940
-
941
- while True:
942
- sample = appsink.emit("try-pull-sample", timeout_ns)
943
- if sample is not None:
944
- eos_drain_empty_polls = 0
945
- try:
946
- gst_buffer = sample.get_buffer()
947
- if gst_buffer is None:
948
- continue
949
- ok, map_info = gst_buffer.map(Gst.MapFlags.READ)
950
- if not ok:
951
- continue
952
- try:
953
- raw = bytes(map_info.data)
954
- finally:
955
- gst_buffer.unmap(map_info)
956
-
957
- data = remainder + raw
958
- usable_len = (len(data) // bytes_per_frame) * bytes_per_frame
959
- remainder = data[usable_len:]
960
- if usable_len == 0:
961
- continue
962
-
963
- pcm = np.frombuffer(data[:usable_len], dtype=np.int16).astype(np.float32) / 32768.0
964
- pcm = np.clip(pcm * self._current_volume, -1.0, 1.0).reshape(-1, target_channels)
965
-
966
- target_elapsed = played_frames / float(target_sr)
967
- actual_elapsed = time.monotonic() - stream_start
968
- if target_elapsed > UNTHROTTLED_PREROLL_S and target_elapsed > actual_elapsed:
969
- time.sleep(min(0.05, target_elapsed - actual_elapsed))
970
-
971
- if not self._push_audio_float(pcm):
972
- continue
973
-
974
- pushed_any = True
975
- played_frames += int(pcm.shape[0])
976
- self._feed_stream_sway(sway_ctx, pcm, target_sr)
977
- finally:
978
- sample = None
979
- elif eos_seen and feed_done.is_set():
980
- eos_drain_empty_polls += 1
981
-
982
- msg = bus.timed_pop_filtered(
983
- 0,
984
- Gst.MessageType.ERROR | Gst.MessageType.EOS,
985
- )
986
- if msg is not None:
987
- if msg.type == Gst.MessageType.EOS:
988
- eos_seen = True
989
- elif msg.type == Gst.MessageType.ERROR:
990
- err, debug = msg.parse_error()
991
- err_text = str(err).lower()
992
- debug_text = str(debug).lower() if debug is not None else ""
993
-
994
- # Some demuxers report non-audio not-linked warnings as ERROR.
995
- # If audio pad is already linked, keep streaming instead of aborting.
996
- if audio_state["linked"] and (
997
- "not-linked" in err_text
998
- or "not-linked" in debug_text
999
- or "streaming stopped, reason not-linked" in debug_text
1000
- ):
1001
- continue
1002
-
1003
- decode_error = True
1004
- _LOGGER.debug(
1005
- "GStreamer decode error content-type=%s url=%s err=%s debug=%s",
1006
- content_type or "unknown",
1007
- source_url,
1008
- err,
1009
- debug,
1010
- )
1011
- break
1012
-
1013
- if feed_done.is_set() and eos_seen:
1014
- sink_eos = False
1015
- try:
1016
- sink_eos_fn = getattr(appsink, "is_eos", None)
1017
- if callable(sink_eos_fn):
1018
- sink_eos = bool(sink_eos_fn())
1019
- except Exception:
1020
- sink_eos = False
1021
-
1022
- # End playback only after upstream finished feeding and
1023
- # appsink has drained decoded buffers.
1024
- if sink_eos and eos_drain_empty_polls >= 2:
1025
- break
1026
-
1027
- # Fallback guard in case is_eos is unavailable.
1028
- if eos_drain_empty_polls >= 100:
1029
- break
1030
-
1031
- if self._stop_flag.is_set():
1032
- break
1033
-
1034
- writer_thread.join(timeout=1.0)
1035
-
1036
- # Streaming is successful only if it reached a clean EOS without decode error.
1037
- # If decode failed (typically unsupported stream format), force fallback.
1038
- if self._stop_flag.is_set():
1039
- return True
1040
-
1041
- completed_cleanly = feed_done.is_set() and eos_seen and (not decode_error)
1042
- if not completed_cleanly:
1043
- return False
1044
-
1045
- except Exception as e:
1046
- _LOGGER.debug("Error during GStreamer stream decode: %s", e)
1047
- pushed_any = False
1048
- finally:
1049
- self._finalize_stream_sway(sway_ctx)
1050
- try:
1051
- pipeline.set_state(Gst.State.NULL)
1052
- except Exception:
1053
- pass
1054
-
1055
- return pushed_any
1056
-
1057
- def _init_stream_sway_context(self) -> dict | None:
1058
- if self._sway_callback is None:
1059
- return None
1060
- try:
1061
- from ..motion.speech_sway import SpeechSwayRT
1062
-
1063
- return {
1064
- "sway": SpeechSwayRT(),
1065
- "base_ts": time.monotonic(),
1066
- "frames_done": 0,
1067
- }
1068
- except Exception:
1069
- return None
1070
-
1071
- def _feed_stream_sway(self, ctx: dict | None, pcm: np.ndarray, sample_rate: int) -> None:
1072
- if ctx is None or self._sway_callback is None:
1073
- return
1074
- try:
1075
- sway = ctx["sway"]
1076
- results = sway.feed(pcm, sample_rate)
1077
- if not results:
1078
- return
1079
-
1080
- base_ts = float(ctx["base_ts"])
1081
- for item in results:
1082
- target = base_ts + MOVEMENT_LATENCY_S + ctx["frames_done"] * SWAY_FRAME_DT_S
1083
- now = time.monotonic()
1084
- if target > now:
1085
- time.sleep(min(0.02, target - now))
1086
-
1087
- self._sway_callback(item)
1088
- ctx["frames_done"] += 1
1089
- except Exception:
1090
- pass
1091
-
1092
- def _finalize_stream_sway(self, ctx: dict | None) -> None:
1093
- if ctx is None or self._sway_callback is None:
1094
- return
1095
- try:
1096
- self._sway_callback(
1097
- {
1098
- "pitch_rad": 0.0,
1099
- "yaw_rad": 0.0,
1100
- "roll_rad": 0.0,
1101
- "x_m": 0.0,
1102
- "y_m": 0.0,
1103
- "z_m": 0.0,
1104
- }
1105
- )
1106
- except Exception:
1107
- pass
1108
 
1109
  def _on_playback_finished(self) -> None:
1110
  """Called when playback is finished."""
1111
  self.is_playing = False
1112
- todo_callback: Callable[[], None] | None = None
1113
 
1114
  with self._done_callback_lock:
1115
  if self._done_callback:
@@ -1128,13 +538,14 @@ class AudioPlayer:
1128
  Stops current audio output but preserves playlist for resume.
1129
  """
1130
  self._stop_flag.set()
1131
- try:
1132
- self.reachy_mini.media.stop_playing()
1133
- except Exception:
1134
- pass
 
1135
  self.is_playing = False
1136
 
1137
- def resume_playback(self) -> None:
1138
  """Resume playback from where it was paused."""
1139
  self._stop_flag.clear()
1140
  if self._playlist:
@@ -1143,40 +554,14 @@ class AudioPlayer:
1143
  def stop(self) -> None:
1144
  """Stop playback and clear playlist."""
1145
  self._stop_flag.set()
1146
-
1147
- # Stop Reachy Mini playback
1148
- try:
1149
- self.reachy_mini.media.stop_playing()
1150
- except Exception:
1151
- pass
1152
-
1153
- # Wait for playback thread to finish (with timeout)
1154
- if self._playback_thread and self._playback_thread.is_alive():
1155
  try:
1156
- self._playback_thread.join(timeout=2.0)
1157
- if self._playback_thread.is_alive():
1158
- _LOGGER.warning("Playback thread did not stop in time")
1159
  except Exception:
1160
  pass
1161
- self._playback_thread = None
1162
-
1163
  self._playlist.clear()
1164
  self.is_playing = False
1165
 
1166
- def __del__(self) -> None:
1167
- """Cleanup on garbage collection to prevent listener leaks."""
1168
- try:
1169
- # Force cleanup of Sendspin listeners to prevent memory leaks
1170
- for unsub in self._sendspin_unsubscribers:
1171
- try:
1172
- unsub()
1173
- except Exception:
1174
- pass
1175
- self._sendspin_unsubscribers.clear()
1176
- self._sendspin_client = None
1177
- except Exception:
1178
- pass
1179
-
1180
  def duck(self) -> None:
1181
  """Reduce volume for announcements."""
1182
  self._current_volume = self._duck_volume
@@ -1191,28 +576,3 @@ class AudioPlayer:
1191
  self._unduck_volume = volume / 100.0
1192
  self._duck_volume = self._unduck_volume / 2
1193
  self._current_volume = self._unduck_volume
1194
-
1195
- def suspend(self) -> None:
1196
- """Suspend the audio player for sleep mode.
1197
-
1198
- Stops any current playback and clears the playlist.
1199
- """
1200
- _LOGGER.info("Suspending AudioPlayer for sleep...")
1201
-
1202
- # Stop any current playback
1203
- self.stop()
1204
-
1205
- # Clear sway callback to release reference
1206
- self._sway_callback = None
1207
-
1208
- _LOGGER.info("AudioPlayer suspended")
1209
-
1210
- def resume(self) -> None:
1211
- """Resume the audio player after sleep."""
1212
- _LOGGER.info("Resuming AudioPlayer from sleep...")
1213
-
1214
- # Nothing specific to restore - audio player is stateless
1215
- # Just ensure flags are reset
1216
- self._stop_flag.clear()
1217
-
1218
- _LOGGER.info("AudioPlayer resumed")
 
8
  The system uses mDNS to discover Sendspin servers on the local network.
9
  """
10
 
 
 
11
  import hashlib
12
  import logging
13
  import socket
14
  import threading
15
  import time
16
+ from collections.abc import Callable
17
+ from typing import List, Optional, TYPE_CHECKING, Union
 
18
 
19
  if TYPE_CHECKING:
20
+ from .zeroconf import SendspinDiscovery
 
 
 
 
21
 
22
  _LOGGER = logging.getLogger(__name__)
23
 
 
 
 
 
 
 
 
 
24
  # Check if aiosendspin is available
25
  try:
26
+ from aiosendspin.client import SendspinClient, PCMFormat
27
+ from aiosendspin.models.types import Roles, AudioCodec, PlayerCommand
28
  from aiosendspin.models.player import ClientHelloPlayerSupport, SupportedAudioFormat
29
+ from aiosendspin.models.core import StreamStartMessage
 
30
  SENDSPIN_AVAILABLE = True
31
+ except ImportError:
32
  SENDSPIN_AVAILABLE = False
33
+ _LOGGER.debug("aiosendspin not installed, Sendspin support disabled")
 
 
 
 
 
 
 
 
34
 
35
 
36
  def _get_stable_client_id() -> str:
 
53
  Supports audio playback modes:
54
  1. Reachy Mini's built-in media system (default)
55
  2. Sendspin synchronized multi-room playback (as PLAYER - receives audio)
56
+ 3. Sounddevice fallback (when Reachy Mini not available)
57
 
58
  When connected to Sendspin as a PLAYER, Reachy Mini receives audio streams
59
  from Home Assistant or other controllers for synchronized playback.
60
  """
61
 
62
+ def __init__(self, reachy_mini=None) -> None:
63
  """Initialize audio player.
64
 
65
  Args:
66
  reachy_mini: Reachy Mini SDK instance.
 
67
  """
68
  self.reachy_mini = reachy_mini
 
69
  self.is_playing = False
70
+ self._playlist: List[str] = []
71
+ self._done_callback: Optional[Callable[[], None]] = None
72
  self._done_callback_lock = threading.Lock()
73
  self._duck_volume: float = 0.5
74
  self._unduck_volume: float = 1.0
75
  self._current_volume: float = 1.0
76
  self._stop_flag = threading.Event()
 
77
 
78
  # Speech sway callback for audio-driven head motion
79
+ self._sway_callback: Optional[Callable[[dict], None]] = None
80
 
81
  # Sendspin support (auto-enabled via mDNS discovery)
82
  # Uses stable client_id so HA recognizes the same device after restart
83
  self._sendspin_client_id = _get_stable_client_id()
84
+ self._sendspin_client: Optional["SendspinClient"] = None
85
  self._sendspin_enabled = False
86
+ self._sendspin_url: Optional[str] = None
87
+ self._sendspin_discovery: Optional["SendspinDiscovery"] = None
88
+ self._sendspin_unsubscribers: List[Callable] = []
89
 
90
  # Audio buffer for Sendspin playback
91
+ self._sendspin_audio_format: Optional["PCMFormat"] = None
92
  self._sendspin_playback_started = False
93
  self._sendspin_paused = False # Pause Sendspin when voice assistant is active
94
 
95
+ def set_sway_callback(self, callback: Optional[Callable[[dict], None]]) -> None:
96
  """Set callback for speech-driven sway animation.
97
 
98
  Args:
 
118
  return self._sendspin_enabled and self._sendspin_client is not None
119
 
120
  @property
121
+ def sendspin_url(self) -> Optional[str]:
122
  """Get current Sendspin server URL."""
123
  return self._sendspin_url
124
 
 
159
  return
160
 
161
  # Import here to avoid circular imports
162
+ from .zeroconf import SendspinDiscovery
163
 
164
  _LOGGER.info("Starting Sendspin server discovery...")
165
  self._sendspin_discovery = SendspinDiscovery(self._on_sendspin_server_found)
 
201
  player_support = ClientHelloPlayerSupport(
202
  supported_formats=[
203
  # Prefer 16kHz (native ReSpeaker sample rate - no resampling needed)
204
+ SupportedAudioFormat(
205
+ codec=AudioCodec.PCM, channels=2, sample_rate=16000, bit_depth=16
206
+ ),
207
+ SupportedAudioFormat(
208
+ codec=AudioCodec.PCM, channels=1, sample_rate=16000, bit_depth=16
209
+ ),
210
  # Also support higher sample rates (will be resampled to 16kHz)
211
+ SupportedAudioFormat(
212
+ codec=AudioCodec.PCM, channels=2, sample_rate=48000, bit_depth=16
213
+ ),
214
+ SupportedAudioFormat(
215
+ codec=AudioCodec.PCM, channels=2, sample_rate=44100, bit_depth=16
216
+ ),
217
+ SupportedAudioFormat(
218
+ codec=AudioCodec.PCM, channels=1, sample_rate=48000, bit_depth=16
219
+ ),
220
+ SupportedAudioFormat(
221
+ codec=AudioCodec.PCM, channels=1, sample_rate=44100, bit_depth=16
222
+ ),
223
  ],
224
  buffer_capacity=32_000_000,
225
  supported_commands=[PlayerCommand.VOLUME, PlayerCommand.MUTE],
 
245
  self._sendspin_url = server_url
246
  self._sendspin_enabled = True
247
 
248
+ _LOGGER.info("Sendspin connected as PLAYER: %s (client_id=%s)",
249
+ server_url, self._sendspin_client_id)
250
  return True
251
 
252
  except Exception as e:
 
255
  self._sendspin_enabled = False
256
  return False
257
 
258
+ def _on_sendspin_audio_chunk(self, server_timestamp_us: int, audio_data: bytes, fmt: "PCMFormat") -> None:
259
  """Handle incoming audio chunks from Sendspin server.
260
 
261
  Plays the audio through Reachy Mini's speaker using push_audio_sample().
 
263
 
264
  Note: Audio is dropped when Sendspin is paused (e.g., during voice assistant interaction).
265
  """
266
+ if self.reachy_mini is None:
267
+ return
268
+
269
  # Drop audio when paused (voice assistant is active)
270
  if self._sendspin_paused:
271
  return
 
292
  # Convert to float32 for playback (SDK expects float32)
293
  audio_float = audio_array.astype(np.float32) / max_val
294
 
 
 
 
295
  # Reshape for channels if needed
296
  if fmt.channels > 1:
297
  # Reshape to (samples, channels)
 
304
  target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
305
  if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
306
  import scipy.signal
 
307
  # Calculate new length
308
  new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
309
  if new_length > 0:
310
  audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
311
  # Log resampling only once per stream
312
+ if not hasattr(self, '_logged_resample') or not self._logged_resample:
313
+ _LOGGER.debug("Resampling Sendspin audio: %d Hz -> %d Hz",
314
+ fmt.sample_rate, target_sample_rate)
315
  self._logged_resample = True
316
 
317
  # Apply volume
 
327
  _LOGGER.warning("Failed to start media playback: %s", e)
328
 
329
  # Play through Reachy Mini's media system using push_audio_sample
330
+ self.reachy_mini.media.push_audio_sample(audio_float)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  except Exception as e:
333
  _LOGGER.debug("Error playing Sendspin audio: %s", e)
334
 
335
+ def _on_sendspin_stream_start(self, message: "StreamStartMessage") -> None:
336
  """Handle stream start from Sendspin server."""
337
  _LOGGER.debug("Sendspin stream started")
338
  # No need to clear buffer - just start fresh
339
 
340
+ def _on_sendspin_stream_end(self, roles: Optional[List[Roles]]) -> None:
341
  """Handle stream end from Sendspin server."""
342
  if roles is None or Roles.PLAYER in roles:
343
  _LOGGER.debug("Sendspin stream ended")
344
 
345
+ def _on_sendspin_stream_clear(self, roles: Optional[List[Roles]]) -> None:
346
  """Handle stream clear from Sendspin server."""
347
  if roles is None or Roles.PLAYER in roles:
348
  _LOGGER.debug("Sendspin stream cleared")
349
+ if self.reachy_mini is not None:
350
+ try:
351
+ self.reachy_mini.media.stop_playing()
352
+ self._sendspin_playback_started = False
353
+ except Exception:
354
+ pass
355
 
356
  async def _disconnect_sendspin(self) -> None:
357
  """Disconnect from current Sendspin server."""
 
359
  for unsub in self._sendspin_unsubscribers:
360
  try:
361
  unsub()
362
+ except Exception:
363
+ pass
364
  self._sendspin_unsubscribers.clear()
365
 
366
  if self._sendspin_client is not None:
 
384
  # Disconnect from server
385
  await self._disconnect_sendspin()
386
 
 
 
 
 
 
 
387
  _LOGGER.info("Sendspin stopped")
388
 
389
  # ========== Core Playback Methods ==========
390
 
391
  def play(
392
  self,
393
+ url: Union[str, List[str]],
394
+ done_callback: Optional[Callable[[], None]] = None,
395
  stop_first: bool = True,
396
  ) -> None:
397
  """Play audio from URL(s).
 
411
 
412
  self._done_callback = done_callback
413
  self._stop_flag.clear()
 
 
 
 
 
 
414
  self._play_next()
415
 
416
  def _play_next(self) -> None:
 
424
  self.is_playing = True
425
 
426
  # Start playback in a thread
427
+ thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
428
+ thread.start()
429
 
430
  def _play_file(self, file_path: str) -> None:
431
  """Play an audio file with optional speech-driven sway animation."""
432
  try:
433
  # Handle URLs - download first
434
  if file_path.startswith(("http://", "https://")):
435
+ import urllib.request
436
+ import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
439
+ urllib.request.urlretrieve(file_path, tmp.name)
440
+ file_path = tmp.name
 
 
 
 
 
 
 
441
 
442
  if self._stop_flag.is_set():
443
  return
444
 
445
  # Play locally using Reachy Mini's media system
446
+ if self.reachy_mini is not None:
 
 
 
 
447
  try:
448
+ # Read audio data for duration calculation and sway analysis
449
  import soundfile as sf
450
+ data, sample_rate = sf.read(file_path)
451
+ duration = len(data) / sample_rate
452
 
453
+ # Pre-analyze audio for speech sway if callback is set
454
+ sway_frames = []
455
+ if self._sway_callback is not None:
456
+ from .speech_sway import SpeechSwayRT
457
+ sway = SpeechSwayRT()
458
+ sway_frames = sway.feed(data, sample_rate)
459
+ _LOGGER.debug("Generated %d sway frames for %.2fs audio",
460
+ len(sway_frames), duration)
 
 
 
461
 
462
+ # Start playback
463
+ self.reachy_mini.media.play_sound(file_path)
 
464
 
465
+ # Playback loop with sway animation
466
+ start_time = time.time()
467
+ frame_duration = 0.05 # 50ms per sway frame (HOP_MS)
468
+ frame_idx = 0
469
 
470
+ while time.time() - start_time < duration:
471
+ if self._stop_flag.is_set():
472
+ self.reachy_mini.media.stop_playing()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
 
475
+ # Apply sway frame if available
476
+ if self._sway_callback and frame_idx < len(sway_frames):
477
+ elapsed = time.time() - start_time
478
+ target_frame = int(elapsed / frame_duration)
479
+ while frame_idx <= target_frame and frame_idx < len(sway_frames):
480
+ self._sway_callback(sway_frames[frame_idx])
481
+ frame_idx += 1
482
+
483
+ time.sleep(0.02) # 20ms sleep for responsive sway
484
+
485
+ # Reset sway to zero when done
486
+ if self._sway_callback:
487
+ self._sway_callback({
488
+ "pitch_rad": 0.0, "yaw_rad": 0.0, "roll_rad": 0.0,
489
+ "x_m": 0.0, "y_m": 0.0, "z_m": 0.0,
490
+ })
491
+
492
+ except Exception as e:
493
+ _LOGGER.warning("Reachy Mini audio failed, falling back: %s", e)
494
+ self._play_file_fallback(file_path)
495
+ else:
496
+ self._play_file_fallback(file_path)
497
 
498
  except Exception as e:
499
  _LOGGER.error("Error playing audio: %s", e)
 
504
  else:
505
  self._on_playback_finished()
506
 
507
+ def _play_file_fallback(self, file_path: str) -> None:
508
+ """Fallback to sounddevice for audio playback."""
509
+ import sounddevice as sd
510
+ import soundfile as sf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
+ data, samplerate = sf.read(file_path)
513
+ data = data * self._current_volume
514
 
515
+ if not self._stop_flag.is_set():
516
+ sd.play(data, samplerate)
517
+ sd.wait()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
 
519
  def _on_playback_finished(self) -> None:
520
  """Called when playback is finished."""
521
  self.is_playing = False
522
+ todo_callback: Optional[Callable[[], None]] = None
523
 
524
  with self._done_callback_lock:
525
  if self._done_callback:
 
538
  Stops current audio output but preserves playlist for resume.
539
  """
540
  self._stop_flag.set()
541
+ if self.reachy_mini is not None:
542
+ try:
543
+ self.reachy_mini.media.stop_playing()
544
+ except Exception:
545
+ pass
546
  self.is_playing = False
547
 
548
+ def resume(self) -> None:
549
  """Resume playback from where it was paused."""
550
  self._stop_flag.clear()
551
  if self._playlist:
 
554
  def stop(self) -> None:
555
  """Stop playback and clear playlist."""
556
  self._stop_flag.set()
557
+ if self.reachy_mini is not None:
 
 
 
 
 
 
 
 
558
  try:
559
+ self.reachy_mini.media.stop_playing()
 
 
560
  except Exception:
561
  pass
 
 
562
  self._playlist.clear()
563
  self.is_playing = False
564
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
  def duck(self) -> None:
566
  """Reduce volume for announcements."""
567
  self._current_volume = self._duck_volume
 
576
  self._unduck_volume = volume / 100.0
577
  self._duck_volume = self._unduck_volume / 2
578
  self._current_volume = self._unduck_volume
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/camera_server.py RENAMED
@@ -1,1042 +1,842 @@
1
- """
2
- MJPEG Camera Server for Reachy Mini with Face Tracking.
3
-
4
- This module provides an HTTP server that streams camera frames from Reachy Mini
5
- as MJPEG, which can be integrated with Home Assistant via Generic Camera.
6
- Also provides face tracking for head movement control.
7
-
8
- Reference: reachy_mini_conversation_app/src/reachy_mini_conversation_app/camera_worker.py
9
- """
10
-
11
- from __future__ import annotations
12
-
13
- import asyncio
14
- import logging
15
- import threading
16
- import time
17
- from typing import TYPE_CHECKING
18
-
19
- import cv2
20
- import numpy as np
21
- from scipy.spatial.transform import Rotation as R
22
-
23
- from .face_tracking_interpolator import FaceTrackingInterpolator, InterpolationConfig
24
-
25
- # Import adaptive frame rate manager
26
- from .frame_processor import AdaptiveFrameRateManager, FrameRateConfig
27
-
28
- if TYPE_CHECKING:
29
- from reachy_mini import ReachyMini
30
-
31
- _LOGGER = logging.getLogger(__name__)
32
-
33
- # MJPEG boundary string
34
- MJPEG_BOUNDARY = "frame"
35
- GESTURE_MIN_FPS = 12.0
36
-
37
-
38
- class MJPEGCameraServer:
39
- """
40
- MJPEG streaming server for Reachy Mini camera with face tracking.
41
-
42
- Provides HTTP endpoints:
43
- - /stream - MJPEG video stream
44
- - /snapshot - Single JPEG image
45
- - / - Simple status page
46
-
47
- Also provides face tracking offsets for head movement control.
48
-
49
- Resource Optimization:
50
- - Adaptive frame rate: high (15fps) when face detected or in conversation,
51
- low (3fps) when idle and no face for extended period
52
- - Face detection pauses after prolonged absence to save CPU
53
- """
54
-
55
- def __init__(
56
- self,
57
- reachy_mini: ReachyMini,
58
- host: str = "0.0.0.0",
59
- port: int = 8081,
60
- fps: int = 15, # 15fps for smooth face tracking
61
- quality: int = 80,
62
- enable_face_tracking: bool = True,
63
- enable_gesture_detection: bool = True,
64
- face_confidence_threshold: float = 0.5, # Min confidence for face detection
65
- gstreamer_lock: threading.Lock | None = None,
66
- ):
67
- """
68
- Initialize the MJPEG camera server.
69
-
70
- Args:
71
- reachy_mini: Reachy Mini robot instance (can be None for testing)
72
- host: Host address to bind to
73
- port: Port number for the HTTP server
74
- fps: Target frames per second for the stream
75
- quality: JPEG quality (1-100)
76
- enable_face_tracking: Enable face tracking for head movement
77
- face_confidence_threshold: Minimum confidence for face detection (0-1)
78
- gstreamer_lock: Threading lock for GStreamer media access (shared across all media operations).
79
- """
80
- self.reachy_mini = reachy_mini
81
- self._gstreamer_lock = gstreamer_lock if gstreamer_lock is not None else threading.Lock()
82
- self.host = host
83
- self.port = port
84
- self.fps = fps
85
- self.quality = quality
86
- self.enable_face_tracking = enable_face_tracking
87
- self._face_confidence_threshold = face_confidence_threshold
88
-
89
- self._server: asyncio.Server | None = None
90
- self._running = False
91
- self._frame_interval = 1.0 / fps
92
- self._last_frame: bytes | None = None
93
- self._last_frame_time: float = 0
94
- self._frame_lock = threading.Lock()
95
-
96
- # Frame capture thread
97
- self._capture_thread: threading.Thread | None = None
98
-
99
- # Face tracking state
100
- self._head_tracker = None
101
- self._face_tracking_enabled = enable_face_tracking
102
- self._face_tracking_requested = enable_face_tracking
103
- self._face_tracking_offsets: list[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
104
- self._face_tracking_lock = threading.Lock()
105
-
106
- # Gesture detection state
107
- self._gesture_detector = None
108
- self._gesture_detection_enabled = enable_gesture_detection
109
- self._gesture_detection_requested = enable_gesture_detection
110
- self._current_gesture = "none"
111
- self._gesture_confidence = 0.0
112
- self._gesture_lock = threading.Lock()
113
- self._gesture_state_callback = None # Callback to notify entity registry
114
- self._gesture_action_callback = None # Callback for gesture action mapping
115
-
116
- # Face detection state callback (similar to gesture)
117
- self._face_state_callback = None # Callback to notify entity registry
118
- self._last_face_detected_state = False # Track previous state for change detection
119
-
120
- # Face tracking interpolator (handles smooth return to neutral)
121
- self._face_interpolator = FaceTrackingInterpolator(
122
- config=InterpolationConfig(
123
- face_lost_delay=2.0,
124
- interpolation_duration=1.0,
125
- offset_scale=0.6,
126
- pitch_offset_deg=9.0,
127
- yaw_offset_deg=-7.0,
128
- )
129
- )
130
-
131
- # =====================================================================
132
- # Adaptive frame rate manager (replaces inline logic)
133
- # =====================================================================
134
- self._frame_rate_manager = AdaptiveFrameRateManager(
135
- config=FrameRateConfig(
136
- fps_high=fps,
137
- fps_low=2,
138
- fps_idle=0.5,
139
- low_power_threshold=5.0,
140
- idle_threshold=30.0,
141
- gesture_detection_interval=3,
142
- )
143
- )
144
-
145
- # Stream client tracking for resource optimization
146
- self._active_stream_clients: set = set()
147
- self._stream_client_lock = threading.Lock()
148
- self._next_client_id = 0
149
-
150
- async def start(self) -> None:
151
- """Start the MJPEG camera server."""
152
- if self._running:
153
- _LOGGER.warning("Camera server already running")
154
- return
155
-
156
- self._running = True
157
-
158
- # Detect media backend type for compatibility handling
159
- try:
160
- from reachy_mini.media.media_manager import MediaBackend
161
-
162
- backend = self.reachy_mini.media.backend
163
- backend_name = {
164
- MediaBackend.GSTREAMER: "GStreamer",
165
- MediaBackend.DEFAULT: "Default",
166
- MediaBackend.DEFAULT_NO_VIDEO: "Default (No Video)",
167
- }.get(backend, str(backend))
168
- _LOGGER.info("Detected media backend: %s", backend_name)
169
- except ImportError:
170
- _LOGGER.debug("MediaBackend enum not available")
171
- except Exception as e:
172
- _LOGGER.debug("Failed to detect media backend: %s", e)
173
-
174
- # Initialize head tracker if face tracking enabled
175
- if self._face_tracking_enabled:
176
- try:
177
- from .head_tracker import HeadTracker
178
-
179
- self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
180
- _LOGGER.info(
181
- "Face tracking enabled with YOLO head tracker (confidence=%.2f)", self._face_confidence_threshold
182
- )
183
- except ImportError as e:
184
- _LOGGER.error("Failed to import head tracker: %s", e)
185
- self._head_tracker = None
186
- except Exception as e:
187
- _LOGGER.warning("Failed to initialize head tracker: %s", e)
188
- self._head_tracker = None
189
- else:
190
- _LOGGER.info("Face tracking disabled by configuration")
191
-
192
- # Initialize gesture detector
193
- if self._gesture_detection_enabled:
194
- try:
195
- from .gesture_detector import GestureDetector
196
-
197
- self._gesture_detector = GestureDetector()
198
- if self._gesture_detector.is_available:
199
- _LOGGER.info("Gesture detection enabled (18 HaGRID classes)")
200
- else:
201
- _LOGGER.warning("Gesture detection not available")
202
- self._gesture_detector = None
203
- except ImportError as e:
204
- _LOGGER.warning("Failed to import gesture detector: %s", e)
205
- self._gesture_detector = None
206
- except Exception as e:
207
- _LOGGER.warning("Failed to initialize gesture detector: %s", e)
208
- self._gesture_detector = None
209
-
210
- # Start frame capture thread
211
- self._capture_thread = threading.Thread(target=self._capture_frames, daemon=True, name="camera-capture")
212
- self._capture_thread.start()
213
-
214
- # Start HTTP server
215
- self._server = await asyncio.start_server(
216
- self._handle_client,
217
- self.host,
218
- self.port,
219
- )
220
-
221
- _LOGGER.info("MJPEG Camera server started on http://%s:%d", self.host, self.port)
222
- _LOGGER.info(" Stream URL: http://<ip>:%d/stream", self.port)
223
- _LOGGER.info(" Snapshot URL: http://<ip>:%d/snapshot", self.port)
224
-
225
- async def stop(self, join_timeout: float = 3.0) -> None:
226
- """Stop the MJPEG camera server and release all resources.
227
-
228
- This method ensures complete cleanup of:
229
- - Capture thread
230
- - HTTP server
231
- - ML models (head tracker, gesture detector)
232
- - Frame buffers and state
233
- - SDK media resources
234
- """
235
- _LOGGER.info("Stopping MJPEG camera server...")
236
- self._running = False
237
-
238
- # 0. Close SDK media resources to prevent leaks
239
- try:
240
- self.reachy_mini.media.close()
241
- _LOGGER.info("SDK media resources closed")
242
- except Exception as e:
243
- _LOGGER.debug("Failed to close SDK media: %s", e)
244
-
245
- # 1. Stop capture thread
246
- if self._capture_thread:
247
- # Wait up to join_timeout seconds - longer than max sleep time (2s in idle mode)
248
- self._capture_thread.join(timeout=join_timeout)
249
- if self._capture_thread.is_alive():
250
- _LOGGER.warning("Camera capture thread did not stop cleanly")
251
- self._capture_thread = None
252
-
253
- # 2. Stop HTTP server
254
- if self._server:
255
- self._server.close()
256
- await self._server.wait_closed()
257
- self._server = None
258
-
259
- # 3. Release ML models (explicit cleanup)
260
- self._release_ml_models()
261
-
262
- # 4. Clear frame buffer
263
- with self._frame_lock:
264
- self._last_frame = None
265
- self._last_frame_time = 0
266
-
267
- # 5. Clear tracking state
268
- with self._face_tracking_lock:
269
- self._face_tracking_offsets = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
270
-
271
- with self._gesture_lock:
272
- self._current_gesture = "none"
273
- self._gesture_confidence = 0.0
274
-
275
- # 6. Clear active clients
276
- with self._stream_client_lock:
277
- self._active_stream_clients.clear()
278
-
279
- _LOGGER.info("MJPEG Camera server stopped - all resources released")
280
-
281
- def _release_ml_models(self) -> None:
282
- """Release ML models from memory.
283
-
284
- This is called during stop() and suspend_processing() to free GPU/CPU memory.
285
- """
286
- # Release YOLO head tracker model
287
- if self._head_tracker is not None:
288
- try:
289
- # Try to call close() if available, otherwise just delete
290
- if hasattr(self._head_tracker, "close"):
291
- self._head_tracker.close()
292
- del self._head_tracker
293
- self._head_tracker = None
294
- _LOGGER.debug("Head tracker model released")
295
- except Exception as e:
296
- _LOGGER.warning("Error releasing head tracker: %s", e)
297
-
298
- # Release gesture detector model
299
- if self._gesture_detector is not None:
300
- try:
301
- if hasattr(self._gesture_detector, "close"):
302
- self._gesture_detector.close()
303
- del self._gesture_detector
304
- self._gesture_detector = None
305
- _LOGGER.debug("Gesture detector model released")
306
- except Exception as e:
307
- _LOGGER.warning("Error releasing gesture detector: %s", e)
308
-
309
- async def __aenter__(self) -> MJPEGCameraServer:
310
- """Context manager entry - start the server."""
311
- await self.start()
312
- return self
313
-
314
- async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
315
- """Context manager exit - stop the server and release resources."""
316
- await self.stop()
317
- return False # Don't suppress exceptions
318
-
319
- def suspend_processing(self) -> None:
320
- """Suspend AI processing for sleep mode.
321
-
322
- This releases ML models from memory while keeping basic MJPEG
323
- streaming capability (though it will only serve cached frames).
324
-
325
- Call resume_processing() to restore full functionality.
326
- """
327
- _LOGGER.info("Suspending camera processing for sleep mode...")
328
-
329
- # Suspend frame rate manager
330
- self._frame_rate_manager.suspend()
331
- # Disable runtime processing while preserving requested user preferences.
332
- self._face_tracking_enabled = False
333
- self._gesture_detection_enabled = False
334
-
335
- # Release ML models (use shared method to avoid duplication)
336
- self._release_ml_models()
337
-
338
- # Reset tracking state
339
- with self._face_tracking_lock:
340
- self._face_tracking_offsets = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
341
-
342
- with self._gesture_lock:
343
- self._current_gesture = "none"
344
- self._gesture_confidence = 0.0
345
-
346
- _LOGGER.info("Camera processing suspended - ML models released")
347
-
348
- def resume_processing(self) -> None:
349
- """Resume AI processing after sleep mode.
350
-
351
- This reloads ML models and restores full camera functionality.
352
- Should be called after robot has fully woken up.
353
- """
354
- _LOGGER.info("Resuming camera processing after sleep...")
355
-
356
- # Resume frame rate manager
357
- self._frame_rate_manager.resume()
358
-
359
- # Restore runtime states from requested user preferences
360
- self._face_tracking_enabled = self._face_tracking_requested
361
- self._gesture_detection_enabled = self._gesture_detection_requested
362
-
363
- # Reload head tracker if face tracking is requested
364
- if self._face_tracking_requested and self._head_tracker is None:
365
- try:
366
- from .head_tracker import HeadTracker
367
-
368
- self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
369
- self._face_tracking_enabled = True
370
- _LOGGER.info("Head tracker model reloaded (confidence=%.2f)", self._face_confidence_threshold)
371
- except Exception as e:
372
- _LOGGER.warning("Failed to reload head tracker: %s", e)
373
- self._face_tracking_enabled = False
374
-
375
- # Reload gesture detector only if gesture detection is requested
376
- if self._gesture_detection_requested and self._gesture_detector is None:
377
- try:
378
- from .gesture_detector import GestureDetector
379
-
380
- self._gesture_detector = GestureDetector()
381
- if self._gesture_detector.is_available:
382
- self._gesture_detection_enabled = True
383
- _LOGGER.info("Gesture detector model reloaded")
384
- else:
385
- self._gesture_detector = None
386
- self._gesture_detection_enabled = False
387
- except Exception as e:
388
- _LOGGER.warning("Failed to reload gesture detector: %s", e)
389
- self._gesture_detection_enabled = False
390
- elif not self._gesture_detection_requested:
391
- self._gesture_detector = None
392
- self._gesture_detection_enabled = False
393
-
394
- _LOGGER.info("Camera processing resumed - full functionality restored")
395
-
396
- def suspend(self) -> None:
397
- """Fully suspend the camera server for sleep mode.
398
-
399
- This stops the capture thread and releases all resources to free CPU.
400
- Call resume_from_suspend() to restart.
401
- """
402
- if not self._running:
403
- _LOGGER.debug("Camera server not running, nothing to suspend")
404
- return
405
-
406
- _LOGGER.info("Suspending camera server for sleep...")
407
-
408
- # First suspend AI processing
409
- self.suspend_processing()
410
-
411
- # Stop the capture thread to release CPU
412
- self._running = False
413
- if self._capture_thread is not None:
414
- self._capture_thread.join(timeout=3.0)
415
- if self._capture_thread.is_alive():
416
- _LOGGER.warning("Camera capture thread did not stop cleanly during suspend")
417
- self._capture_thread = None
418
-
419
- _LOGGER.info("Camera server suspended - CPU released")
420
-
421
- def resume_from_suspend(self) -> None:
422
- """Resume the camera server after sleep.
423
-
424
- This restarts the capture thread and reloads ML models.
425
- """
426
- if self._running:
427
- _LOGGER.debug("Camera server already running")
428
- return
429
-
430
- _LOGGER.info("Resuming camera server from sleep...")
431
-
432
- self._running = True
433
-
434
- # Resume AI processing (reloads models)
435
- self.resume_processing()
436
-
437
- # Restart capture thread
438
- self._capture_thread = threading.Thread(target=self._capture_frames, daemon=True, name="camera-capture")
439
- self._capture_thread.start()
440
-
441
- _LOGGER.info("Camera server resumed from sleep")
442
-
443
- def _capture_frames(self) -> None:
444
- """Background thread to capture frames from Reachy Mini and do face tracking.
445
-
446
- Resource optimization:
447
- - High frequency (15fps) when face detected or in conversation
448
- - Low frequency (2fps) when idle and no face for short period
449
- - Ultra-low (0.5fps) when idle for extended period - minimal AI inference
450
- """
451
- _LOGGER.info("Starting camera capture thread (face_tracking=%s)", self._face_tracking_enabled)
452
-
453
- frame_count = 0
454
- face_detect_count = 0
455
- last_log_time = time.time()
456
-
457
- while self._running:
458
- try:
459
- current_time = time.time()
460
-
461
- # Determine if we should run AI inference this frame
462
- should_run_ai = self._should_run_ai_inference(current_time)
463
- should_run_gesture = (
464
- self._gesture_detection_enabled
465
- and self._gesture_detector is not None
466
- and self._frame_rate_manager.should_run_gesture_detection()
467
- )
468
-
469
- # Only get frame if needed (AI inference, gesture detection, or MJPEG streaming)
470
- frame = (
471
- self._get_camera_frame()
472
- if should_run_ai or should_run_gesture or self._has_stream_clients()
473
- else None
474
- )
475
-
476
- if frame is not None:
477
- frame_count += 1
478
-
479
- # Encode frame as JPEG for streaming
480
- encode_params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
481
- success, jpeg_data = cv2.imencode(".jpg", frame, encode_params)
482
-
483
- if success:
484
- with self._frame_lock:
485
- self._last_frame = jpeg_data.tobytes()
486
- self._last_frame_time = time.time()
487
-
488
- # Only run AI inference when enabled
489
- if should_run_ai:
490
- # Face tracking
491
- if self._face_tracking_enabled and self._head_tracker is not None:
492
- face_detect_count += 1
493
- face_detected = self._process_face_tracking(frame, current_time)
494
-
495
- # Update adaptive frame rate manager
496
- self._frame_rate_manager.update(face_detected=face_detected)
497
-
498
- # Check for face detection state change and notify callback
499
- # Use is_face_detected() which considers face_lost_delay
500
- current_face_state = self.is_face_detected()
501
- if current_face_state != self._last_face_detected_state:
502
- self._last_face_detected_state = current_face_state
503
- if self._face_state_callback:
504
- try:
505
- self._face_state_callback()
506
- except Exception as e:
507
- _LOGGER.debug("Face state callback error: %s", e)
508
-
509
- # Handle smooth interpolation when face lost
510
- self._process_face_lost_interpolation(current_time)
511
-
512
- # Gesture detection (runs independently of face detection)
513
- # Uses its own frame rate control via should_run_gesture_detection()
514
- if (
515
- self._gesture_detection_enabled
516
- and self._gesture_detector is not None
517
- and self._frame_rate_manager.should_run_gesture_detection()
518
- ):
519
- self._process_gesture_detection(frame)
520
-
521
- # Log stats every 30 seconds
522
- if current_time - last_log_time >= 30.0:
523
- fps = frame_count / (current_time - last_log_time)
524
- detect_fps = face_detect_count / (current_time - last_log_time)
525
- mode = self._frame_rate_manager.current_mode.value.upper()
526
- no_face = self._frame_rate_manager.state.no_face_duration
527
- _LOGGER.debug(
528
- "Camera: %.1f fps, AI: %.1f fps (%s), no_face: %.0fs", fps, detect_fps, mode, no_face
529
- )
530
- frame_count = 0
531
- face_detect_count = 0
532
- last_log_time = current_time
533
-
534
- # Sleep to maintain target FPS (use adaptive rate)
535
- # Keep a minimum processing cadence for gesture responsiveness.
536
- sleep_time = self._frame_rate_manager.get_sleep_interval()
537
- if self._gesture_detection_enabled and self._gesture_detector is not None:
538
- sleep_time = min(sleep_time, 1.0 / GESTURE_MIN_FPS)
539
- time.sleep(sleep_time)
540
-
541
- except Exception as e:
542
- _LOGGER.error("Error capturing frame: %s", e)
543
- time.sleep(1.0)
544
-
545
- _LOGGER.info("Camera capture thread stopped")
546
-
547
- def _should_run_ai_inference(self, current_time: float) -> bool:
548
- """Determine if AI inference (face/gesture detection) should run."""
549
- return self._frame_rate_manager.should_run_inference()
550
-
551
- def _has_stream_clients(self) -> bool:
552
- """Check if there are active MJPEG stream clients."""
553
- with self._stream_client_lock:
554
- return len(self._active_stream_clients) > 0
555
-
556
- def _register_stream_client(self) -> int:
557
- """Register a new stream client and return its ID."""
558
- with self._stream_client_lock:
559
- # Use rolling client IDs to prevent integer overflow after ~4 billion connections
560
- client_id = self._next_client_id % 1000000 # Roll over after 1M
561
- self._next_client_id += 1
562
- self._active_stream_clients.add(client_id)
563
- _LOGGER.debug("Stream client registered: %d (total: %d)", client_id, len(self._active_stream_clients))
564
- return client_id
565
-
566
- def _unregister_stream_client(self, client_id: int) -> None:
567
- """Unregister a stream client."""
568
- with self._stream_client_lock:
569
- self._active_stream_clients.discard(client_id)
570
- _LOGGER.debug("Stream client unregistered: %d (total: %d)", client_id, len(self._active_stream_clients))
571
-
572
- @property
573
- def stream_client_count(self) -> int:
574
- """Get the number of active stream clients."""
575
- with self._stream_client_lock:
576
- return len(self._active_stream_clients)
577
-
578
- def _process_face_tracking(self, frame: np.ndarray, current_time: float) -> bool:
579
- """Process face tracking on a frame.
580
-
581
- Returns:
582
- True if face was detected, False otherwise
583
- """
584
- if self._head_tracker is None:
585
- return False
586
-
587
- try:
588
- face_center, _confidence = self._head_tracker.get_head_position(frame)
589
-
590
- if face_center is not None:
591
- # Face detected - notify interpolator
592
- self._face_interpolator.on_face_detected(current_time)
593
-
594
- # Convert normalized coordinates to pixel coordinates
595
- h, w = frame.shape[:2]
596
- eye_center_norm = (face_center + 1) / 2
597
-
598
- eye_center_pixels = [
599
- int(eye_center_norm[0] * w),
600
- int(eye_center_norm[1] * h),
601
- ]
602
-
603
- # Get the head pose needed to look at the target
604
- target_pose = self.reachy_mini.look_at_image(
605
- eye_center_pixels[0],
606
- eye_center_pixels[1],
607
- duration=0.0,
608
- perform_movement=False,
609
- )
610
-
611
- # Extract translation and rotation from target pose
612
- translation = target_pose[:3, 3]
613
- rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
614
-
615
- # Update interpolator with new offsets (handles scaling and compensation)
616
- self._face_interpolator.update_offsets(translation, rotation)
617
-
618
- # Sync to thread-safe storage
619
- with self._face_tracking_lock:
620
- self._face_tracking_offsets = list(self._face_interpolator.get_offsets())
621
-
622
- return True
623
-
624
- return False
625
-
626
- except Exception as e:
627
- _LOGGER.debug("Face tracking error: %s", e)
628
- return False
629
-
630
- def _process_face_lost_interpolation(self, current_time: float) -> None:
631
- """Handle smooth interpolation back to neutral when face is lost."""
632
- # Delegate to interpolator
633
- self._face_interpolator.process_face_lost(current_time)
634
-
635
- # Sync interpolated offsets to thread-safe storage
636
- with self._face_tracking_lock:
637
- self._face_tracking_offsets = list(self._face_interpolator.get_offsets())
638
-
639
- # =========================================================================
640
- # Public API for face tracking
641
- # =========================================================================
642
-
643
- def get_face_tracking_offsets(self) -> tuple[float, float, float, float, float, float]:
644
- """Get current face tracking offsets (thread-safe).
645
-
646
- Returns:
647
- Tuple of (x, y, z, roll, pitch, yaw) offsets
648
- """
649
- with self._face_tracking_lock:
650
- offsets = self._face_tracking_offsets
651
- return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
652
-
653
- def is_face_detected(self) -> bool:
654
- """Check if a face is currently detected.
655
-
656
- Returns True if face was detected recently (within face_lost_delay period).
657
- This is useful for Home Assistant entities to expose face detection status.
658
-
659
- Returns:
660
- True if face is detected, False otherwise
661
- """
662
- return self._face_interpolator.is_face_detected()
663
-
664
- def set_face_tracking_enabled(self, enabled: bool) -> None:
665
- """Enable or disable face tracking."""
666
- self._face_tracking_requested = enabled
667
- if self._face_tracking_enabled == enabled:
668
- return # No change, skip logging
669
- self._face_tracking_enabled = enabled
670
- if enabled:
671
- if self._head_tracker is None:
672
- try:
673
- from .head_tracker import HeadTracker
674
-
675
- self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
676
- except Exception as e:
677
- _LOGGER.warning("Failed to enable face tracking model: %s", e)
678
- self._face_tracking_enabled = False
679
- else:
680
- # Start interpolation back to neutral
681
- self._face_interpolator.reset_interpolation()
682
- self._head_tracker = None
683
- with self._face_tracking_lock:
684
- self._face_tracking_offsets = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
685
- _LOGGER.info("Face tracking %s", "enabled" if enabled else "disabled")
686
-
687
- def get_face_tracking_enabled(self) -> bool:
688
- """Return whether face tracking is enabled."""
689
- return self._face_tracking_enabled
690
-
691
- def get_face_confidence_threshold(self) -> float:
692
- """Return current face confidence threshold (0-1)."""
693
- return self._face_confidence_threshold
694
-
695
- def set_face_confidence_threshold(self, threshold: float) -> None:
696
- """Set face detection confidence threshold (0-1)."""
697
- threshold = max(0.0, min(1.0, float(threshold)))
698
- if abs(self._face_confidence_threshold - threshold) < 1e-6:
699
- return
700
-
701
- self._face_confidence_threshold = threshold
702
-
703
- # Reload model to apply threshold immediately when enabled.
704
- if self._face_tracking_enabled:
705
- try:
706
- from .head_tracker import HeadTracker
707
-
708
- self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
709
- except Exception as e:
710
- _LOGGER.warning("Failed to apply face confidence threshold %.2f: %s", threshold, e)
711
- self._head_tracker = None
712
- self._face_tracking_enabled = False
713
-
714
- _LOGGER.info("Face confidence threshold set to %.2f", self._face_confidence_threshold)
715
-
716
- def set_conversation_mode(self, in_conversation: bool) -> None:
717
- """Set conversation mode for adaptive face tracking.
718
-
719
- When in conversation mode, face tracking runs at high frequency
720
- regardless of whether a face is currently detected.
721
-
722
- Args:
723
- in_conversation: True when voice assistant is actively conversing
724
- """
725
- self._frame_rate_manager.set_conversation_mode(in_conversation)
726
- if in_conversation:
727
- _LOGGER.debug("Face tracking: conversation mode ON (high frequency)")
728
- else:
729
- _LOGGER.debug("Face tracking: conversation mode OFF (adaptive)")
730
-
731
- # =========================================================================
732
- # Gesture detection
733
- # =========================================================================
734
-
735
- def _process_gesture_detection(self, frame: np.ndarray) -> None:
736
- """Process gesture detection on a frame."""
737
- if self._gesture_detector is None:
738
- return
739
-
740
- try:
741
- # Detect gesture
742
- detected_gesture, confidence = self._gesture_detector.detect(frame)
743
-
744
- # Update current gesture state
745
- state_changed = False
746
- with self._gesture_lock:
747
- old_gesture = self._current_gesture
748
- if detected_gesture.value != "no_gesture":
749
- self._current_gesture = detected_gesture.value
750
- self._gesture_confidence = confidence
751
- if old_gesture != detected_gesture.value:
752
- state_changed = True
753
- _LOGGER.info("Gesture detected: %s (%.1f%%)", detected_gesture.value, confidence * 100)
754
- else:
755
- if self._current_gesture != "none":
756
- state_changed = True
757
- _LOGGER.info("Gesture cleared (no gesture detected)")
758
- self._current_gesture = "none"
759
- self._gesture_confidence = 0.0
760
-
761
- # Notify entity registry to push update to Home Assistant
762
- if state_changed and self._gesture_state_callback:
763
- try:
764
- self._gesture_state_callback()
765
- except Exception:
766
- pass # Ignore callback errors
767
-
768
- # Trigger gesture actions (emotions, listening, etc.)
769
- if state_changed and self._gesture_action_callback:
770
- try:
771
- self._gesture_action_callback(self._current_gesture, self._gesture_confidence)
772
- except Exception as e:
773
- _LOGGER.debug("Gesture action callback error: %s", e)
774
-
775
- except Exception as e:
776
- _LOGGER.warning("Gesture detection error: %s", e)
777
-
778
- def get_current_gesture(self) -> str:
779
- """Get current detected gesture name (thread-safe).
780
-
781
- Returns:
782
- Gesture name string (e.g., "like", "peace", "none")
783
- """
784
- with self._gesture_lock:
785
- return self._current_gesture
786
-
787
- def get_gesture_confidence(self) -> float:
788
- """Get current gesture detection confidence (thread-safe).
789
-
790
- Returns:
791
- Confidence value (0.0 to 1.0), multiplied by 100 for percentage display
792
- """
793
- with self._gesture_lock:
794
- return self._gesture_confidence * 100.0 # Return as percentage
795
-
796
- def set_gesture_detection_enabled(self, enabled: bool) -> None:
797
- """Enable or disable gesture detection."""
798
- self._gesture_detection_requested = enabled
799
- if self._gesture_detection_enabled == enabled:
800
- return
801
-
802
- self._gesture_detection_enabled = enabled
803
- if enabled:
804
- if self._gesture_detector is None:
805
- try:
806
- from .gesture_detector import GestureDetector
807
-
808
- self._gesture_detector = GestureDetector()
809
- if not self._gesture_detector.is_available:
810
- self._gesture_detector = None
811
- self._gesture_detection_enabled = False
812
- except Exception as e:
813
- _LOGGER.warning("Failed to enable gesture detector model: %s", e)
814
- self._gesture_detection_enabled = False
815
- self._gesture_detector = None
816
- else:
817
- self._gesture_detector = None
818
- with self._gesture_lock:
819
- self._current_gesture = "none"
820
- self._gesture_confidence = 0.0
821
- _LOGGER.info("Gesture detection %s", "enabled" if enabled else "disabled")
822
-
823
- def get_gesture_detection_enabled(self) -> bool:
824
- """Return whether gesture detection is enabled."""
825
- return self._gesture_detection_enabled
826
-
827
- def set_gesture_state_callback(self, callback) -> None:
828
- """Set callback to notify when gesture state changes."""
829
- self._gesture_state_callback = callback
830
-
831
- def set_gesture_action_callback(self, callback) -> None:
832
- """Set callback for gesture action handling.
833
-
834
- The callback receives (gesture_name: str, confidence: float).
835
- """
836
- self._gesture_action_callback = callback
837
-
838
- def set_face_state_callback(self, callback) -> None:
839
- """Set callback to notify when face detection state changes."""
840
- self._face_state_callback = callback
841
-
842
- def _get_camera_frame(self) -> np.ndarray | None:
843
- """Get a frame from Reachy Mini's camera."""
844
- try:
845
- # Use GStreamer lock to prevent concurrent access conflicts
846
- acquired = self._gstreamer_lock.acquire(timeout=0.05)
847
- if acquired:
848
- try:
849
- frame = self.reachy_mini.media.get_frame()
850
- return frame
851
- finally:
852
- self._gstreamer_lock.release()
853
- else:
854
- _LOGGER.debug("GStreamer lock busy, skipping camera frame")
855
- # Flush SDK video buffer to prevent buffer overflow during lock contention
856
- try:
857
- if hasattr(self.reachy_mini.media, "flush"):
858
- self.reachy_mini.media.flush()
859
- elif hasattr(self.reachy_mini.media, "flush_video"):
860
- self.reachy_mini.media.flush_video()
861
- except Exception:
862
- pass
863
- return None
864
- except Exception as e:
865
- _LOGGER.debug("Failed to get camera frame: %s", e)
866
- return None
867
-
868
- def get_snapshot(self) -> bytes | None:
869
- """Get the latest frame as JPEG bytes."""
870
- with self._frame_lock:
871
- return self._last_frame
872
-
873
- async def _handle_client(
874
- self,
875
- reader: asyncio.StreamReader,
876
- writer: asyncio.StreamWriter,
877
- ) -> None:
878
- """Handle incoming HTTP client connections."""
879
- try:
880
- # Read HTTP request
881
- request_line = await asyncio.wait_for(reader.readline(), timeout=10.0)
882
- request = request_line.decode("utf-8", errors="ignore").strip()
883
-
884
- # Read headers (we don't need them but must consume them)
885
- while True:
886
- line = await asyncio.wait_for(reader.readline(), timeout=5.0)
887
- if line in {b"\r\n", b"\n", b""}:
888
- break
889
-
890
- # Parse request path
891
- parts = request.split(" ")
892
- if len(parts) >= 2:
893
- path = parts[1]
894
- else:
895
- path = "/"
896
-
897
- _LOGGER.debug("HTTP request: %s", request)
898
-
899
- if path == "/stream":
900
- await self._handle_stream(writer)
901
- elif path == "/snapshot":
902
- await self._handle_snapshot(writer)
903
- else:
904
- await self._handle_index(writer)
905
-
906
- except TimeoutError:
907
- _LOGGER.debug("Client connection timeout")
908
- except ConnectionResetError:
909
- _LOGGER.debug("Client connection reset")
910
- except Exception as e:
911
- _LOGGER.error("Error handling client: %s", e)
912
- finally:
913
- try:
914
- writer.close()
915
- await writer.wait_closed()
916
- except Exception:
917
- pass
918
-
919
- async def _handle_index(self, writer: asyncio.StreamWriter) -> None:
920
- """Handle index page request."""
921
- html = f"""<!DOCTYPE html>
922
- <html>
923
- <head>
924
- <title>Reachy Mini Camera</title>
925
- <style>
926
- body {{ font-family: Arial, sans-serif; margin: 40px; background: #1a1a2e; color: #eee; }}
927
- h1 {{ color: #00d4ff; }}
928
- .container {{ max-width: 800px; margin: 0 auto; }}
929
- .stream {{ width: 100%; max-width: 640px; border: 2px solid #00d4ff; border-radius: 8px; }}
930
- a {{ color: #00d4ff; }}
931
- .info {{ background: #16213e; padding: 20px; border-radius: 8px; margin-top: 20px; }}
932
- </style>
933
- </head>
934
- <body>
935
- <div class="container">
936
- <h1>Reachy Mini Camera</h1>
937
- <img class="stream" src="/stream" alt="Camera Stream">
938
- <div class="info">
939
- <h3>Endpoints:</h3>
940
- <ul>
941
- <li><a href="/stream">/stream</a> - MJPEG video stream</li>
942
- <li><a href="/snapshot">/snapshot</a> - Single JPEG snapshot</li>
943
- </ul>
944
- <h3>Home Assistant Integration:</h3>
945
- <p>Add a Generic Camera with URL: <code>http://&lt;ip&gt;:{self.port}/stream</code></p>
946
- </div>
947
- </div>
948
- </body>
949
- </html>"""
950
-
951
- response = (
952
- "HTTP/1.1 200 OK\r\n"
953
- "Content-Type: text/html; charset=utf-8\r\n"
954
- f"Content-Length: {len(html)}\r\n"
955
- "Connection: close\r\n"
956
- "\r\n"
957
- )
958
-
959
- writer.write(response.encode("utf-8"))
960
- writer.write(html.encode("utf-8"))
961
- await writer.drain()
962
-
963
- async def _handle_snapshot(self, writer: asyncio.StreamWriter) -> None:
964
- """Handle snapshot request - return single JPEG image."""
965
- jpeg_data = self.get_snapshot()
966
-
967
- if jpeg_data is None:
968
- response = (
969
- "HTTP/1.1 503 Service Unavailable\r\n"
970
- "Content-Type: text/plain\r\n"
971
- "Connection: close\r\n"
972
- "\r\n"
973
- "No frame available"
974
- )
975
- writer.write(response.encode("utf-8"))
976
- else:
977
- response = (
978
- "HTTP/1.1 200 OK\r\n"
979
- "Content-Type: image/jpeg\r\n"
980
- f"Content-Length: {len(jpeg_data)}\r\n"
981
- "Cache-Control: no-cache, no-store, must-revalidate\r\n"
982
- "Connection: close\r\n"
983
- "\r\n"
984
- )
985
- writer.write(response.encode("utf-8"))
986
- writer.write(jpeg_data)
987
-
988
- await writer.drain()
989
-
990
- async def _handle_stream(self, writer: asyncio.StreamWriter) -> None:
991
- """Handle MJPEG stream request."""
992
- # Register this client for tracking
993
- client_id = self._register_stream_client()
994
-
995
- # Send MJPEG headers
996
- response = (
997
- "HTTP/1.1 200 OK\r\n"
998
- f"Content-Type: multipart/x-mixed-replace; boundary={MJPEG_BOUNDARY}\r\n"
999
- "Cache-Control: no-cache, no-store, must-revalidate\r\n"
1000
- "Connection: keep-alive\r\n"
1001
- "\r\n"
1002
- )
1003
- writer.write(response.encode("utf-8"))
1004
- await writer.drain()
1005
-
1006
- _LOGGER.debug("Started MJPEG stream for client %d", client_id)
1007
-
1008
- last_sent_time = 0
1009
-
1010
- try:
1011
- while self._running:
1012
- # Get latest frame
1013
- with self._frame_lock:
1014
- jpeg_data = self._last_frame
1015
- frame_time = self._last_frame_time
1016
-
1017
- # Only send if we have a new frame
1018
- if jpeg_data is not None and frame_time > last_sent_time:
1019
- # Send MJPEG frame
1020
- frame_header = (
1021
- f"--{MJPEG_BOUNDARY}\r\nContent-Type: image/jpeg\r\nContent-Length: {len(jpeg_data)}\r\n\r\n"
1022
- )
1023
-
1024
- writer.write(frame_header.encode("utf-8"))
1025
- writer.write(jpeg_data)
1026
- writer.write(b"\r\n")
1027
- await writer.drain()
1028
-
1029
- last_sent_time = frame_time
1030
-
1031
- # Small delay to prevent busy loop
1032
- await asyncio.sleep(0.01)
1033
-
1034
- except (ConnectionResetError, BrokenPipeError):
1035
- _LOGGER.debug("Client %d disconnected from stream", client_id)
1036
- except Exception as e:
1037
- _LOGGER.error("Error in MJPEG stream for client %d: %s", client_id, e)
1038
- finally:
1039
- # Always unregister client when done
1040
- self._unregister_stream_client(client_id)
1041
-
1042
- _LOGGER.debug("Ended MJPEG stream for client %d", client_id)
 
1
+ """
2
+ MJPEG Camera Server for Reachy Mini with Face Tracking.
3
+
4
+ This module provides an HTTP server that streams camera frames from Reachy Mini
5
+ as MJPEG, which can be integrated with Home Assistant via Generic Camera.
6
+ Also provides face tracking for head movement control.
7
+
8
+ Reference: reachy_mini_conversation_app/src/reachy_mini_conversation_app/camera_worker.py
9
+ """
10
+
11
+ import asyncio
12
+ import logging
13
+ import threading
14
+ import time
15
+ from typing import Optional, Tuple, List, TYPE_CHECKING
16
+
17
+ import cv2
18
+ import numpy as np
19
+ from scipy.spatial.transform import Rotation as R
20
+
21
+ # Import SDK interpolation utilities (same as conversation_app)
22
+ try:
23
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
24
+ SDK_INTERPOLATION_AVAILABLE = True
25
+ except ImportError:
26
+ SDK_INTERPOLATION_AVAILABLE = False
27
+
28
+ if TYPE_CHECKING:
29
+ from reachy_mini import ReachyMini
30
+
31
+ _LOGGER = logging.getLogger(__name__)
32
+
33
+ # MJPEG boundary string
34
+ MJPEG_BOUNDARY = "frame"
35
+
36
+
37
+ class MJPEGCameraServer:
38
+ """
39
+ MJPEG streaming server for Reachy Mini camera with face tracking.
40
+
41
+ Provides HTTP endpoints:
42
+ - /stream - MJPEG video stream
43
+ - /snapshot - Single JPEG image
44
+ - / - Simple status page
45
+
46
+ Also provides face tracking offsets for head movement control.
47
+
48
+ Resource Optimization:
49
+ - Adaptive frame rate: high (15fps) when face detected or in conversation,
50
+ low (3fps) when idle and no face for extended period
51
+ - Face detection pauses after prolonged absence to save CPU
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ reachy_mini: Optional["ReachyMini"] = None,
57
+ host: str = "0.0.0.0",
58
+ port: int = 8081,
59
+ fps: int = 15, # 15fps for smooth face tracking
60
+ quality: int = 80,
61
+ enable_face_tracking: bool = True,
62
+ ):
63
+ """
64
+ Initialize the MJPEG camera server.
65
+
66
+ Args:
67
+ reachy_mini: Reachy Mini robot instance (can be None for testing)
68
+ host: Host address to bind to
69
+ port: Port number for the HTTP server
70
+ fps: Target frames per second for the stream
71
+ quality: JPEG quality (1-100)
72
+ enable_face_tracking: Enable face tracking for head movement
73
+ """
74
+ self.reachy_mini = reachy_mini
75
+ self.host = host
76
+ self.port = port
77
+ self.fps = fps
78
+ self.quality = quality
79
+ self.enable_face_tracking = enable_face_tracking
80
+
81
+ self._server: Optional[asyncio.Server] = None
82
+ self._running = False
83
+ self._frame_interval = 1.0 / fps
84
+ self._last_frame: Optional[bytes] = None
85
+ self._last_frame_time: float = 0
86
+ self._frame_lock = threading.Lock()
87
+
88
+ # Frame capture thread
89
+ self._capture_thread: Optional[threading.Thread] = None
90
+
91
+ # Face tracking state
92
+ self._head_tracker = None
93
+ self._face_tracking_enabled = True # Enabled by default for always-on face tracking
94
+ self._face_tracking_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
95
+ self._face_tracking_lock = threading.Lock()
96
+
97
+ # Gesture detection state
98
+ self._gesture_detector = None
99
+ self._gesture_detection_enabled = True
100
+ self._current_gesture = "none"
101
+ self._gesture_confidence = 0.0
102
+ self._gesture_lock = threading.Lock()
103
+ self._gesture_frame_counter = 0
104
+ self._gesture_detection_interval = 3 # Run gesture detection every N frames
105
+ self._gesture_state_callback = None # Callback to notify entity registry
106
+
107
+ # Face tracking timing (smooth interpolation when face lost)
108
+ self._last_face_detected_time: Optional[float] = None
109
+ self._interpolation_start_time: Optional[float] = None
110
+ self._interpolation_start_pose: Optional[np.ndarray] = None
111
+ self._face_lost_delay = 1.0 # Reduced from 2.0s to 1.0s for faster response
112
+ self._interpolation_duration = 0.8 # Reduced from 1.0s to 0.8s for faster return
113
+
114
+ # Offset scaling (same as conversation_app)
115
+ self._offset_scale = 0.6
116
+
117
+ # =====================================================================
118
+ # Resource optimization: Adaptive frame rate for face tracking
119
+ # =====================================================================
120
+ # High frequency when: face detected, in conversation, or recently active
121
+ # Low frequency when: idle and no face for extended period
122
+ # Ultra-low when: idle for very long time (just MJPEG stream, minimal AI)
123
+ self._fps_high = fps # Normal tracking rate (15fps)
124
+ self._fps_low = 2 # Low power rate (2fps) - periodic face check
125
+ self._fps_idle = 0.5 # Ultra-low power (0.5fps) - minimal CPU usage
126
+ self._current_fps = fps
127
+
128
+ # Conversation state (set by voice assistant)
129
+ self._in_conversation = False
130
+ self._conversation_lock = threading.Lock()
131
+
132
+ # Adaptive tracking timing
133
+ self._no_face_duration = 0.0 # How long since last face detection
134
+ self._low_power_threshold = 5.0 # Switch to low power after 5s without face
135
+ self._idle_threshold = 30.0 # Switch to idle mode after 30s without face
136
+ self._last_face_check_time = 0.0
137
+
138
+ # Skip AI inference in idle mode (only stream MJPEG)
139
+ self._ai_enabled = True
140
+
141
+ async def start(self) -> None:
142
+ """Start the MJPEG camera server."""
143
+ if self._running:
144
+ _LOGGER.warning("Camera server already running")
145
+ return
146
+
147
+ self._running = True
148
+
149
+ # Initialize head tracker if face tracking enabled
150
+ if self.enable_face_tracking:
151
+ try:
152
+ from .head_tracker import HeadTracker
153
+ self._head_tracker = HeadTracker()
154
+ _LOGGER.info("Face tracking enabled with YOLO head tracker")
155
+ except ImportError as e:
156
+ _LOGGER.error("Failed to import head tracker: %s", e)
157
+ self._head_tracker = None
158
+ except Exception as e:
159
+ _LOGGER.warning("Failed to initialize head tracker: %s", e)
160
+ self._head_tracker = None
161
+ else:
162
+ _LOGGER.info("Face tracking disabled by configuration")
163
+
164
+ # Initialize gesture detector
165
+ if self._gesture_detection_enabled:
166
+ try:
167
+ from .gesture_detector import GestureDetector
168
+ self._gesture_detector = GestureDetector()
169
+ if self._gesture_detector.is_available:
170
+ _LOGGER.info("Gesture detection enabled (18 HaGRID classes)")
171
+ else:
172
+ _LOGGER.warning("Gesture detection not available")
173
+ self._gesture_detector = None
174
+ except ImportError as e:
175
+ _LOGGER.warning("Failed to import gesture detector: %s", e)
176
+ self._gesture_detector = None
177
+ except Exception as e:
178
+ _LOGGER.warning("Failed to initialize gesture detector: %s", e)
179
+ self._gesture_detector = None
180
+
181
+ # Start frame capture thread
182
+ self._capture_thread = threading.Thread(
183
+ target=self._capture_frames,
184
+ daemon=True,
185
+ name="camera-capture"
186
+ )
187
+ self._capture_thread.start()
188
+
189
+ # Start HTTP server
190
+ self._server = await asyncio.start_server(
191
+ self._handle_client,
192
+ self.host,
193
+ self.port,
194
+ )
195
+
196
+ _LOGGER.info("MJPEG Camera server started on http://%s:%d", self.host, self.port)
197
+ _LOGGER.info(" Stream URL: http://<ip>:%d/stream", self.port)
198
+ _LOGGER.info(" Snapshot URL: http://<ip>:%d/snapshot", self.port)
199
+
200
+ async def stop(self) -> None:
201
+ """Stop the MJPEG camera server."""
202
+ self._running = False
203
+
204
+ if self._capture_thread:
205
+ self._capture_thread.join(timeout=0.5)
206
+ self._capture_thread = None
207
+
208
+ if self._server:
209
+ self._server.close()
210
+ await self._server.wait_closed()
211
+ self._server = None
212
+
213
+ _LOGGER.info("MJPEG Camera server stopped")
214
+
215
+ def _capture_frames(self) -> None:
216
+ """Background thread to capture frames from Reachy Mini and do face tracking.
217
+
218
+ Resource optimization:
219
+ - High frequency (15fps) when face detected or in conversation
220
+ - Low frequency (2fps) when idle and no face for short period
221
+ - Ultra-low (0.5fps) when idle for extended period - minimal AI inference
222
+ """
223
+ _LOGGER.info("Starting camera capture thread (face_tracking=%s)", self._face_tracking_enabled)
224
+
225
+ frame_count = 0
226
+ face_detect_count = 0
227
+ last_log_time = time.time()
228
+
229
+ while self._running:
230
+ try:
231
+ current_time = time.time()
232
+
233
+ # Determine if we should run AI inference this frame
234
+ should_run_ai = self._should_run_ai_inference(current_time)
235
+
236
+ # Only get frame if needed (AI inference or MJPEG streaming)
237
+ frame = self._get_camera_frame() if should_run_ai or self._has_stream_clients() else None
238
+
239
+ if frame is not None:
240
+ frame_count += 1
241
+
242
+ # Encode frame as JPEG for streaming
243
+ encode_params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
244
+ success, jpeg_data = cv2.imencode('.jpg', frame, encode_params)
245
+
246
+ if success:
247
+ with self._frame_lock:
248
+ self._last_frame = jpeg_data.tobytes()
249
+ self._last_frame_time = time.time()
250
+
251
+ # Only run AI inference when enabled
252
+ if should_run_ai:
253
+ # Face tracking
254
+ if self._face_tracking_enabled and self._head_tracker is not None:
255
+ face_detect_count += 1
256
+ face_detected = self._process_face_tracking(frame, current_time)
257
+
258
+ # Update adaptive timing based on detection result
259
+ if face_detected:
260
+ self._no_face_duration = 0.0
261
+ self._current_fps = self._fps_high
262
+ self._ai_enabled = True
263
+ else:
264
+ # Accumulate no-face duration
265
+ if self._last_face_detected_time is not None:
266
+ self._no_face_duration = current_time - self._last_face_detected_time
267
+ else:
268
+ self._no_face_duration += 1.0 / self._current_fps
269
+
270
+ # Adaptive power mode
271
+ if self._no_face_duration > self._idle_threshold:
272
+ self._current_fps = self._fps_idle
273
+ elif self._no_face_duration > self._low_power_threshold:
274
+ self._current_fps = self._fps_low
275
+
276
+ self._last_face_check_time = current_time
277
+
278
+ # Handle smooth interpolation when face lost
279
+ self._process_face_lost_interpolation(current_time)
280
+
281
+ # Gesture detection (only when face detected recently)
282
+ if (self._gesture_detection_enabled and
283
+ self._gesture_detector is not None and
284
+ self._no_face_duration < 5.0): # Only detect gestures when someone is present
285
+ self._gesture_frame_counter += 1
286
+ if self._gesture_frame_counter >= self._gesture_detection_interval:
287
+ self._gesture_frame_counter = 0
288
+ self._process_gesture_detection(frame)
289
+
290
+ # Log stats every 30 seconds
291
+ if current_time - last_log_time >= 30.0:
292
+ fps = frame_count / (current_time - last_log_time)
293
+ detect_fps = face_detect_count / (current_time - last_log_time)
294
+ mode = "HIGH" if self._current_fps == self._fps_high else ("LOW" if self._current_fps == self._fps_low else "IDLE")
295
+ _LOGGER.debug("Camera: %.1f fps, AI: %.1f fps (%s), no_face: %.0fs",
296
+ fps, detect_fps, mode, self._no_face_duration)
297
+ frame_count = 0
298
+ face_detect_count = 0
299
+ last_log_time = current_time
300
+
301
+ # Sleep to maintain target FPS (use current adaptive rate)
302
+ sleep_time = 1.0 / self._current_fps
303
+ time.sleep(sleep_time)
304
+
305
+ except Exception as e:
306
+ _LOGGER.error("Error capturing frame: %s", e)
307
+ time.sleep(1.0)
308
+
309
+ _LOGGER.info("Camera capture thread stopped")
310
+
311
+ def _should_run_ai_inference(self, current_time: float) -> bool:
312
+ """Determine if AI inference (face/gesture detection) should run.
313
+
314
+ Returns True if:
315
+ - In conversation mode (always run)
316
+ - Face was recently detected
317
+ - Periodic check in low power mode
318
+ """
319
+ # Always run during conversation
320
+ with self._conversation_lock:
321
+ if self._in_conversation:
322
+ return True
323
+
324
+ # High frequency mode: run every frame
325
+ if self._current_fps == self._fps_high:
326
+ return True
327
+
328
+ # Low/idle power mode: run periodically
329
+ time_since_last = current_time - self._last_face_check_time
330
+ return time_since_last >= (1.0 / self._current_fps)
331
+
332
+ def _has_stream_clients(self) -> bool:
333
+ """Check if there are active MJPEG stream clients."""
334
+ # For now, always return True to keep stream available
335
+ # Could be optimized to track actual client connections
336
+ return True
337
+
338
+ def _process_face_tracking(self, frame: np.ndarray, current_time: float) -> bool:
339
+ """Process face tracking on a frame.
340
+
341
+ Returns:
342
+ True if face was detected, False otherwise
343
+ """
344
+ if self._head_tracker is None or self.reachy_mini is None:
345
+ return False
346
+
347
+ try:
348
+ face_center, confidence = self._head_tracker.get_head_position(frame)
349
+
350
+ if face_center is not None:
351
+ # Face detected - update tracking
352
+ self._last_face_detected_time = current_time
353
+ self._interpolation_start_time = None # Stop any interpolation
354
+
355
+ # Convert normalized coordinates to pixel coordinates
356
+ h, w = frame.shape[:2]
357
+ eye_center_norm = (face_center + 1) / 2
358
+
359
+ eye_center_pixels = [
360
+ eye_center_norm[0] * w,
361
+ eye_center_norm[1] * h,
362
+ ]
363
+
364
+ # Get the head pose needed to look at the target
365
+ target_pose = self.reachy_mini.look_at_image(
366
+ eye_center_pixels[0],
367
+ eye_center_pixels[1],
368
+ duration=0.0,
369
+ perform_movement=False,
370
+ )
371
+
372
+ # Extract translation and rotation from target pose
373
+ translation = target_pose[:3, 3]
374
+ rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
375
+
376
+ # Scale down for smoother tracking (same as conversation_app)
377
+ translation = translation * self._offset_scale
378
+ rotation = rotation * self._offset_scale
379
+
380
+ # Apply pitch offset compensation (robot tends to look up)
381
+ # rotation[1] is pitch in xyz euler order
382
+ # Positive pitch = look down in robot coordinate system
383
+ pitch_offset_rad = np.radians(9.0) # Look down 9 degrees
384
+ rotation[1] += pitch_offset_rad
385
+
386
+ # Apply yaw offset compensation (robot tends to look to user's right)
387
+ # rotation[2] is yaw in xyz euler order
388
+ # Negative yaw = turn right (towards user's left from robot's perspective)
389
+ yaw_offset_rad = np.radians(-7.0) # Turn right 7 degrees
390
+ rotation[2] += yaw_offset_rad
391
+
392
+ # Update face tracking offsets
393
+ with self._face_tracking_lock:
394
+ self._face_tracking_offsets = [
395
+ float(translation[0]),
396
+ float(translation[1]),
397
+ float(translation[2]),
398
+ float(rotation[0]),
399
+ float(rotation[1]),
400
+ float(rotation[2]),
401
+ ]
402
+
403
+ return True
404
+
405
+ return False
406
+
407
+ except Exception as e:
408
+ _LOGGER.debug("Face tracking error: %s", e)
409
+ return False
410
+
411
+ def _process_face_lost_interpolation(self, current_time: float) -> None:
412
+ """Handle smooth interpolation back to neutral when face is lost."""
413
+ if self._last_face_detected_time is None:
414
+ return
415
+
416
+ time_since_face_lost = current_time - self._last_face_detected_time
417
+
418
+ if time_since_face_lost < self._face_lost_delay:
419
+ return # Still within delay period, keep current offsets
420
+
421
+ # Start interpolation if not already started
422
+ if self._interpolation_start_time is None:
423
+ self._interpolation_start_time = current_time
424
+ # Capture current pose as start of interpolation
425
+ with self._face_tracking_lock:
426
+ current_offsets = self._face_tracking_offsets.copy()
427
+
428
+ # Convert to 4x4 pose matrix
429
+ pose_matrix = np.eye(4, dtype=np.float32)
430
+ pose_matrix[:3, 3] = current_offsets[:3]
431
+ pose_matrix[:3, :3] = R.from_euler("xyz", current_offsets[3:]).as_matrix()
432
+ self._interpolation_start_pose = pose_matrix
433
+
434
+ # Calculate interpolation progress
435
+ elapsed = current_time - self._interpolation_start_time
436
+ t = min(1.0, elapsed / self._interpolation_duration)
437
+
438
+ # Interpolate to neutral (identity matrix)
439
+ if self._interpolation_start_pose is not None:
440
+ neutral_pose = np.eye(4, dtype=np.float32)
441
+ interpolated_pose = self._linear_pose_interpolation(
442
+ self._interpolation_start_pose, neutral_pose, t
443
+ )
444
+
445
+ # Extract translation and rotation
446
+ translation = interpolated_pose[:3, 3]
447
+ rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
448
+
449
+ with self._face_tracking_lock:
450
+ self._face_tracking_offsets = [
451
+ float(translation[0]),
452
+ float(translation[1]),
453
+ float(translation[2]),
454
+ float(rotation[0]),
455
+ float(rotation[1]),
456
+ float(rotation[2]),
457
+ ]
458
+
459
+ # Reset when interpolation complete
460
+ if t >= 1.0:
461
+ self._last_face_detected_time = None
462
+ self._interpolation_start_time = None
463
+ self._interpolation_start_pose = None
464
+
465
+ def _linear_pose_interpolation(
466
+ self, start: np.ndarray, end: np.ndarray, t: float
467
+ ) -> np.ndarray:
468
+ """Linear interpolation between two 4x4 pose matrices.
469
+
470
+ Uses SDK's linear_pose_interpolation if available, otherwise falls back
471
+ to manual SLERP implementation.
472
+ """
473
+ if SDK_INTERPOLATION_AVAILABLE:
474
+ return linear_pose_interpolation(start, end, t)
475
+
476
+ # Fallback: manual interpolation
477
+ # Interpolate translation
478
+ start_trans = start[:3, 3]
479
+ end_trans = end[:3, 3]
480
+ interp_trans = start_trans * (1 - t) + end_trans * t
481
+
482
+ # Interpolate rotation using SLERP
483
+ start_rot = R.from_matrix(start[:3, :3])
484
+ end_rot = R.from_matrix(end[:3, :3])
485
+
486
+ # Use scipy's slerp - create Rotation array from list
487
+ from scipy.spatial.transform import Slerp
488
+ key_rots = R.from_quat(np.array([start_rot.as_quat(), end_rot.as_quat()]))
489
+ slerp = Slerp([0, 1], key_rots)
490
+ interp_rot = slerp(t)
491
+
492
+ # Build result matrix
493
+ result = np.eye(4, dtype=np.float32)
494
+ result[:3, :3] = interp_rot.as_matrix()
495
+ result[:3, 3] = interp_trans
496
+
497
+ return result
498
+
499
+ # =========================================================================
500
+ # Public API for face tracking
501
+ # =========================================================================
502
+
503
+ def get_face_tracking_offsets(self) -> Tuple[float, float, float, float, float, float]:
504
+ """Get current face tracking offsets (thread-safe).
505
+
506
+ Returns:
507
+ Tuple of (x, y, z, roll, pitch, yaw) offsets
508
+ """
509
+ with self._face_tracking_lock:
510
+ offsets = self._face_tracking_offsets
511
+ return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
512
+
513
+ def set_face_tracking_enabled(self, enabled: bool) -> None:
514
+ """Enable or disable face tracking."""
515
+ if self._face_tracking_enabled == enabled:
516
+ return # No change, skip logging
517
+ self._face_tracking_enabled = enabled
518
+ if not enabled:
519
+ # Start interpolation back to neutral
520
+ self._last_face_detected_time = time.time()
521
+ self._interpolation_start_time = None
522
+ _LOGGER.info("Face tracking %s", "enabled" if enabled else "disabled")
523
+
524
+ def set_conversation_mode(self, in_conversation: bool) -> None:
525
+ """Set conversation mode for adaptive face tracking.
526
+
527
+ When in conversation mode, face tracking runs at high frequency
528
+ regardless of whether a face is currently detected.
529
+
530
+ Args:
531
+ in_conversation: True when voice assistant is actively conversing
532
+ """
533
+ with self._conversation_lock:
534
+ self._in_conversation = in_conversation
535
+
536
+ if in_conversation:
537
+ # Immediately switch to high frequency mode
538
+ self._current_fps = self._fps_high
539
+ self._ai_enabled = True
540
+ self._no_face_duration = 0.0 # Reset no-face timer
541
+ _LOGGER.debug("Face tracking: conversation mode ON (high frequency)")
542
+ else:
543
+ _LOGGER.debug("Face tracking: conversation mode OFF (adaptive)")
544
+
545
+ # =========================================================================
546
+ # Gesture detection
547
+ # =========================================================================
548
+
549
+ def _process_gesture_detection(self, frame: np.ndarray) -> None:
550
+ """Process gesture detection on a frame."""
551
+ if self._gesture_detector is None:
552
+ return
553
+
554
+ try:
555
+ # Detect gesture
556
+ detected_gesture, confidence = self._gesture_detector.detect(frame)
557
+
558
+ # Update current gesture state
559
+ state_changed = False
560
+ with self._gesture_lock:
561
+ old_gesture = self._current_gesture
562
+ if detected_gesture.value != "no_gesture":
563
+ self._current_gesture = detected_gesture.value
564
+ self._gesture_confidence = confidence
565
+ if old_gesture != detected_gesture.value:
566
+ state_changed = True
567
+ _LOGGER.debug("Gesture: %s (%.0f%%)",
568
+ detected_gesture.value, confidence * 100)
569
+ else:
570
+ if self._current_gesture != "none":
571
+ state_changed = True
572
+ self._current_gesture = "none"
573
+ self._gesture_confidence = 0.0
574
+
575
+ # Notify entity registry to push update to Home Assistant
576
+ if state_changed and self._gesture_state_callback:
577
+ try:
578
+ self._gesture_state_callback()
579
+ except Exception:
580
+ pass # Ignore callback errors
581
+
582
+ except Exception as e:
583
+ _LOGGER.warning("Gesture detection error: %s", e)
584
+
585
+ def get_current_gesture(self) -> str:
586
+ """Get current detected gesture name (thread-safe).
587
+
588
+ Returns:
589
+ Gesture name string (e.g., "like", "peace", "none")
590
+ """
591
+ with self._gesture_lock:
592
+ return self._current_gesture
593
+
594
+ def get_gesture_confidence(self) -> float:
595
+ """Get current gesture detection confidence (thread-safe).
596
+
597
+ Returns:
598
+ Confidence value (0.0 to 1.0), multiplied by 100 for percentage display
599
+ """
600
+ with self._gesture_lock:
601
+ return self._gesture_confidence * 100.0 # Return as percentage
602
+
603
+ def set_gesture_detection_enabled(self, enabled: bool) -> None:
604
+ """Enable or disable gesture detection."""
605
+ self._gesture_detection_enabled = enabled
606
+ if not enabled:
607
+ with self._gesture_lock:
608
+ self._current_gesture = "none"
609
+ self._gesture_confidence = 0.0
610
+ _LOGGER.info("Gesture detection %s", "enabled" if enabled else "disabled")
611
+
612
+ def set_gesture_state_callback(self, callback) -> None:
613
+ """Set callback to notify when gesture state changes."""
614
+ self._gesture_state_callback = callback
615
+
616
+ def _get_camera_frame(self) -> Optional[np.ndarray]:
617
+ """Get a frame from Reachy Mini's camera."""
618
+ if self.reachy_mini is None:
619
+ # Return a test pattern if no robot connected
620
+ return self._generate_test_frame()
621
+
622
+ try:
623
+ frame = self.reachy_mini.media.get_frame()
624
+ return frame
625
+ except Exception as e:
626
+ _LOGGER.debug("Failed to get camera frame: %s", e)
627
+ return None
628
+
629
+ def _generate_test_frame(self) -> np.ndarray:
630
+ """Generate a test pattern frame when no camera is available."""
631
+ # Create a simple test pattern
632
+ frame = np.zeros((480, 640, 3), dtype=np.uint8)
633
+
634
+ # Add some visual elements
635
+ cv2.putText(
636
+ frame,
637
+ "Reachy Mini Camera",
638
+ (150, 200),
639
+ cv2.FONT_HERSHEY_SIMPLEX,
640
+ 1.2,
641
+ (255, 255, 255),
642
+ 2,
643
+ )
644
+ cv2.putText(
645
+ frame,
646
+ "No camera connected",
647
+ (180, 280),
648
+ cv2.FONT_HERSHEY_SIMPLEX,
649
+ 0.8,
650
+ (128, 128, 128),
651
+ 1,
652
+ )
653
+
654
+ # Add timestamp
655
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
656
+ cv2.putText(
657
+ frame,
658
+ timestamp,
659
+ (220, 350),
660
+ cv2.FONT_HERSHEY_SIMPLEX,
661
+ 0.6,
662
+ (0, 255, 0),
663
+ 1,
664
+ )
665
+
666
+ return frame
667
+
668
+ def get_snapshot(self) -> Optional[bytes]:
669
+ """Get the latest frame as JPEG bytes."""
670
+ with self._frame_lock:
671
+ return self._last_frame
672
+
673
+ async def _handle_client(
674
+ self,
675
+ reader: asyncio.StreamReader,
676
+ writer: asyncio.StreamWriter,
677
+ ) -> None:
678
+ """Handle incoming HTTP client connections."""
679
+ try:
680
+ # Read HTTP request
681
+ request_line = await asyncio.wait_for(
682
+ reader.readline(),
683
+ timeout=10.0
684
+ )
685
+ request = request_line.decode('utf-8', errors='ignore').strip()
686
+
687
+ # Read headers (we don't need them but must consume them)
688
+ while True:
689
+ line = await asyncio.wait_for(reader.readline(), timeout=5.0)
690
+ if line == b'\r\n' or line == b'\n' or line == b'':
691
+ break
692
+
693
+ # Parse request path
694
+ parts = request.split(' ')
695
+ if len(parts) >= 2:
696
+ path = parts[1]
697
+ else:
698
+ path = '/'
699
+
700
+ _LOGGER.debug("HTTP request: %s", request)
701
+
702
+ if path == '/stream':
703
+ await self._handle_stream(writer)
704
+ elif path == '/snapshot':
705
+ await self._handle_snapshot(writer)
706
+ else:
707
+ await self._handle_index(writer)
708
+
709
+ except asyncio.TimeoutError:
710
+ _LOGGER.debug("Client connection timeout")
711
+ except ConnectionResetError:
712
+ _LOGGER.debug("Client connection reset")
713
+ except Exception as e:
714
+ _LOGGER.error("Error handling client: %s", e)
715
+ finally:
716
+ try:
717
+ writer.close()
718
+ await writer.wait_closed()
719
+ except Exception:
720
+ pass
721
+
722
+ async def _handle_index(self, writer: asyncio.StreamWriter) -> None:
723
+ """Handle index page request."""
724
+ html = f"""<!DOCTYPE html>
725
+ <html>
726
+ <head>
727
+ <title>Reachy Mini Camera</title>
728
+ <style>
729
+ body {{ font-family: Arial, sans-serif; margin: 40px; background: #1a1a2e; color: #eee; }}
730
+ h1 {{ color: #00d4ff; }}
731
+ .container {{ max-width: 800px; margin: 0 auto; }}
732
+ .stream {{ width: 100%; max-width: 640px; border: 2px solid #00d4ff; border-radius: 8px; }}
733
+ a {{ color: #00d4ff; }}
734
+ .info {{ background: #16213e; padding: 20px; border-radius: 8px; margin-top: 20px; }}
735
+ </style>
736
+ </head>
737
+ <body>
738
+ <div class="container">
739
+ <h1>Reachy Mini Camera</h1>
740
+ <img class="stream" src="/stream" alt="Camera Stream">
741
+ <div class="info">
742
+ <h3>Endpoints:</h3>
743
+ <ul>
744
+ <li><a href="/stream">/stream</a> - MJPEG video stream</li>
745
+ <li><a href="/snapshot">/snapshot</a> - Single JPEG snapshot</li>
746
+ </ul>
747
+ <h3>Home Assistant Integration:</h3>
748
+ <p>Add a Generic Camera with URL: <code>http://&lt;ip&gt;:{self.port}/stream</code></p>
749
+ </div>
750
+ </div>
751
+ </body>
752
+ </html>"""
753
+
754
+ response = (
755
+ "HTTP/1.1 200 OK\r\n"
756
+ "Content-Type: text/html; charset=utf-8\r\n"
757
+ f"Content-Length: {len(html)}\r\n"
758
+ "Connection: close\r\n"
759
+ "\r\n"
760
+ )
761
+
762
+ writer.write(response.encode('utf-8'))
763
+ writer.write(html.encode('utf-8'))
764
+ await writer.drain()
765
+
766
+ async def _handle_snapshot(self, writer: asyncio.StreamWriter) -> None:
767
+ """Handle snapshot request - return single JPEG image."""
768
+ jpeg_data = self.get_snapshot()
769
+
770
+ if jpeg_data is None:
771
+ response = (
772
+ "HTTP/1.1 503 Service Unavailable\r\n"
773
+ "Content-Type: text/plain\r\n"
774
+ "Connection: close\r\n"
775
+ "\r\n"
776
+ "No frame available"
777
+ )
778
+ writer.write(response.encode('utf-8'))
779
+ else:
780
+ response = (
781
+ "HTTP/1.1 200 OK\r\n"
782
+ "Content-Type: image/jpeg\r\n"
783
+ f"Content-Length: {len(jpeg_data)}\r\n"
784
+ "Cache-Control: no-cache, no-store, must-revalidate\r\n"
785
+ "Connection: close\r\n"
786
+ "\r\n"
787
+ )
788
+ writer.write(response.encode('utf-8'))
789
+ writer.write(jpeg_data)
790
+
791
+ await writer.drain()
792
+
793
+ async def _handle_stream(self, writer: asyncio.StreamWriter) -> None:
794
+ """Handle MJPEG stream request."""
795
+ # Send MJPEG headers
796
+ response = (
797
+ "HTTP/1.1 200 OK\r\n"
798
+ f"Content-Type: multipart/x-mixed-replace; boundary={MJPEG_BOUNDARY}\r\n"
799
+ "Cache-Control: no-cache, no-store, must-revalidate\r\n"
800
+ "Connection: keep-alive\r\n"
801
+ "\r\n"
802
+ )
803
+ writer.write(response.encode('utf-8'))
804
+ await writer.drain()
805
+
806
+ _LOGGER.debug("Started MJPEG stream")
807
+
808
+ last_sent_time = 0
809
+
810
+ try:
811
+ while self._running:
812
+ # Get latest frame
813
+ with self._frame_lock:
814
+ jpeg_data = self._last_frame
815
+ frame_time = self._last_frame_time
816
+
817
+ # Only send if we have a new frame
818
+ if jpeg_data is not None and frame_time > last_sent_time:
819
+ # Send MJPEG frame
820
+ frame_header = (
821
+ f"--{MJPEG_BOUNDARY}\r\n"
822
+ "Content-Type: image/jpeg\r\n"
823
+ f"Content-Length: {len(jpeg_data)}\r\n"
824
+ "\r\n"
825
+ )
826
+
827
+ writer.write(frame_header.encode('utf-8'))
828
+ writer.write(jpeg_data)
829
+ writer.write(b"\r\n")
830
+ await writer.drain()
831
+
832
+ last_sent_time = frame_time
833
+
834
+ # Small delay to prevent busy loop
835
+ await asyncio.sleep(0.01)
836
+
837
+ except (ConnectionResetError, BrokenPipeError):
838
+ _LOGGER.debug("Client disconnected from stream")
839
+ except Exception as e:
840
+ _LOGGER.error("Error in MJPEG stream: %s", e)
841
+
842
+ _LOGGER.debug("Ended MJPEG stream")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py RENAMED
@@ -1,37 +1,48 @@
1
  """ESPHome entity definitions."""
2
 
3
- import logging
4
  from abc import abstractmethod
5
- from collections.abc import Callable, Iterable
6
- from typing import TYPE_CHECKING
 
7
 
8
  # pylint: disable=no-name-in-module
9
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
10
- BinarySensorStateResponse,
11
- CameraImageRequest,
12
- CameraImageResponse,
13
  ListEntitiesBinarySensorResponse,
 
14
  ListEntitiesCameraResponse,
15
  ListEntitiesMediaPlayerResponse,
16
  ListEntitiesNumberResponse,
17
  ListEntitiesRequest,
 
 
 
18
  ListEntitiesTextSensorResponse,
 
 
 
 
19
  MediaPlayerCommandRequest,
20
  MediaPlayerStateResponse,
21
  NumberCommandRequest,
22
  NumberStateResponse,
 
 
 
23
  SubscribeHomeAssistantStatesRequest,
24
  SubscribeStatesRequest,
 
 
25
  TextSensorStateResponse,
26
  )
27
- from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerEntityFeature, MediaPlayerState
28
  from google.protobuf import message
29
 
30
- from ..audio.audio_player import AudioPlayer
31
- from ..core.util import call_all
 
32
 
33
  if TYPE_CHECKING:
34
- from ..protocol.api_server import APIServer
35
 
36
  logger = logging.getLogger(__name__)
37
 
@@ -39,7 +50,7 @@ logger = logging.getLogger(__name__)
39
  class ESPHomeEntity:
40
  """Base class for ESPHome entities."""
41
 
42
- def __init__(self, server: "APIServer") -> None:
43
  self.server = server
44
 
45
  @abstractmethod
@@ -52,7 +63,7 @@ class MediaPlayerEntity(ESPHomeEntity):
52
 
53
  def __init__(
54
  self,
55
- server: "APIServer",
56
  key: int,
57
  name: str,
58
  object_id: str,
@@ -71,9 +82,9 @@ class MediaPlayerEntity(ESPHomeEntity):
71
 
72
  def play(
73
  self,
74
- url: str | list[str],
75
  announcement: bool = False,
76
- done_callback: Callable[[], None] | None = None,
77
  ) -> Iterable[message.Message]:
78
  if announcement:
79
  if self.music_player.is_playing:
@@ -81,14 +92,18 @@ class MediaPlayerEntity(ESPHomeEntity):
81
  self.music_player.pause()
82
  self.announce_player.play(
83
  url,
84
- done_callback=lambda: call_all(self.music_player.resume, done_callback),
 
 
85
  )
86
  else:
87
  # Announce, idle
88
  self.announce_player.play(
89
  url,
90
  done_callback=lambda: call_all(
91
- lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
 
 
92
  done_callback,
93
  ),
94
  )
@@ -97,7 +112,9 @@ class MediaPlayerEntity(ESPHomeEntity):
97
  self.music_player.play(
98
  url,
99
  done_callback=lambda: call_all(
100
- lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
 
 
101
  done_callback,
102
  ),
103
  )
@@ -116,9 +133,6 @@ class MediaPlayerEntity(ESPHomeEntity):
116
  elif msg.command == MediaPlayerCommand.PLAY:
117
  self.music_player.resume()
118
  yield self._update_state(MediaPlayerState.PLAYING)
119
- elif msg.command == MediaPlayerCommand.STOP:
120
- self.music_player.stop()
121
- yield self._update_state(MediaPlayerState.IDLE)
122
  elif msg.has_volume:
123
  volume = int(msg.volume * 100)
124
  self.music_player.set_volume(volume)
@@ -162,13 +176,13 @@ class TextSensorEntity(ESPHomeEntity):
162
 
163
  def __init__(
164
  self,
165
- server: "APIServer",
166
  key: int,
167
  name: str,
168
  object_id: str,
169
  icon: str = "",
170
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
171
- value_getter: Callable[[], str] | None = None,
172
  ) -> None:
173
  ESPHomeEntity.__init__(self, server)
174
  self.key = key
@@ -218,14 +232,14 @@ class BinarySensorEntity(ESPHomeEntity):
218
 
219
  def __init__(
220
  self,
221
- server: "APIServer",
222
  key: int,
223
  name: str,
224
  object_id: str,
225
  icon: str = "",
226
  device_class: str = "",
227
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
228
- value_getter: Callable[[], bool] | None = None,
229
  ) -> None:
230
  ESPHomeEntity.__init__(self, server)
231
  self.key = key
@@ -277,7 +291,7 @@ class NumberEntity(ESPHomeEntity):
277
 
278
  def __init__(
279
  self,
280
- server: "APIServer",
281
  key: int,
282
  name: str,
283
  object_id: str,
@@ -288,8 +302,8 @@ class NumberEntity(ESPHomeEntity):
288
  unit_of_measurement: str = "",
289
  mode: int = 0, # 0 = auto, 1 = box, 2 = slider
290
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
291
- value_getter: Callable[[], float] | None = None,
292
- value_setter: Callable[[float], None] | None = None,
293
  ) -> None:
294
  ESPHomeEntity.__init__(self, server)
295
  self.key = key
@@ -357,12 +371,12 @@ class CameraEntity(ESPHomeEntity):
357
 
358
  def __init__(
359
  self,
360
- server: "APIServer",
361
  key: int,
362
  name: str,
363
  object_id: str,
364
  icon: str = "mdi:camera",
365
- image_getter: Callable[[], bytes | None] | None = None,
366
  ) -> None:
367
  ESPHomeEntity.__init__(self, server)
368
  self.key = key
@@ -371,7 +385,7 @@ class CameraEntity(ESPHomeEntity):
371
  self.icon = icon
372
  self._image_getter = image_getter
373
 
374
- def get_image(self) -> bytes | None:
375
  """Get the current camera image as JPEG bytes."""
376
  if self._image_getter:
377
  return self._image_getter()
@@ -402,3 +416,4 @@ class CameraEntity(ESPHomeEntity):
402
  data=b"",
403
  done=True,
404
  )
 
 
1
  """ESPHome entity definitions."""
2
 
 
3
  from abc import abstractmethod
4
+ from collections.abc import Iterable
5
+ from typing import Callable, List, Optional, Union, TYPE_CHECKING
6
+ import logging
7
 
8
  # pylint: disable=no-name-in-module
9
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
 
 
 
10
  ListEntitiesBinarySensorResponse,
11
+ ListEntitiesButtonResponse,
12
  ListEntitiesCameraResponse,
13
  ListEntitiesMediaPlayerResponse,
14
  ListEntitiesNumberResponse,
15
  ListEntitiesRequest,
16
+ ListEntitiesSelectResponse,
17
+ ListEntitiesSensorResponse,
18
+ ListEntitiesSwitchResponse,
19
  ListEntitiesTextSensorResponse,
20
+ BinarySensorStateResponse,
21
+ ButtonCommandRequest,
22
+ CameraImageRequest,
23
+ CameraImageResponse,
24
  MediaPlayerCommandRequest,
25
  MediaPlayerStateResponse,
26
  NumberCommandRequest,
27
  NumberStateResponse,
28
+ SelectCommandRequest,
29
+ SelectStateResponse,
30
+ SensorStateResponse,
31
  SubscribeHomeAssistantStatesRequest,
32
  SubscribeStatesRequest,
33
+ SwitchCommandRequest,
34
+ SwitchStateResponse,
35
  TextSensorStateResponse,
36
  )
37
+ from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerState, MediaPlayerEntityFeature
38
  from google.protobuf import message
39
 
40
+ from .api_server import APIServer
41
+ from .audio_player import AudioPlayer
42
+ from .util import call_all
43
 
44
  if TYPE_CHECKING:
45
+ from reachy_mini import ReachyMini
46
 
47
  logger = logging.getLogger(__name__)
48
 
 
50
  class ESPHomeEntity:
51
  """Base class for ESPHome entities."""
52
 
53
+ def __init__(self, server: APIServer) -> None:
54
  self.server = server
55
 
56
  @abstractmethod
 
63
 
64
  def __init__(
65
  self,
66
+ server: APIServer,
67
  key: int,
68
  name: str,
69
  object_id: str,
 
82
 
83
  def play(
84
  self,
85
+ url: Union[str, List[str]],
86
  announcement: bool = False,
87
+ done_callback: Optional[Callable[[], None]] = None,
88
  ) -> Iterable[message.Message]:
89
  if announcement:
90
  if self.music_player.is_playing:
 
92
  self.music_player.pause()
93
  self.announce_player.play(
94
  url,
95
+ done_callback=lambda: call_all(
96
+ self.music_player.resume, done_callback
97
+ ),
98
  )
99
  else:
100
  # Announce, idle
101
  self.announce_player.play(
102
  url,
103
  done_callback=lambda: call_all(
104
+ lambda: self.server.send_messages(
105
+ [self._update_state(MediaPlayerState.IDLE)]
106
+ ),
107
  done_callback,
108
  ),
109
  )
 
112
  self.music_player.play(
113
  url,
114
  done_callback=lambda: call_all(
115
+ lambda: self.server.send_messages(
116
+ [self._update_state(MediaPlayerState.IDLE)]
117
+ ),
118
  done_callback,
119
  ),
120
  )
 
133
  elif msg.command == MediaPlayerCommand.PLAY:
134
  self.music_player.resume()
135
  yield self._update_state(MediaPlayerState.PLAYING)
 
 
 
136
  elif msg.has_volume:
137
  volume = int(msg.volume * 100)
138
  self.music_player.set_volume(volume)
 
176
 
177
  def __init__(
178
  self,
179
+ server: APIServer,
180
  key: int,
181
  name: str,
182
  object_id: str,
183
  icon: str = "",
184
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
185
+ value_getter: Optional[Callable[[], str]] = None,
186
  ) -> None:
187
  ESPHomeEntity.__init__(self, server)
188
  self.key = key
 
232
 
233
  def __init__(
234
  self,
235
+ server: APIServer,
236
  key: int,
237
  name: str,
238
  object_id: str,
239
  icon: str = "",
240
  device_class: str = "",
241
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
242
+ value_getter: Optional[Callable[[], bool]] = None,
243
  ) -> None:
244
  ESPHomeEntity.__init__(self, server)
245
  self.key = key
 
291
 
292
  def __init__(
293
  self,
294
+ server: APIServer,
295
  key: int,
296
  name: str,
297
  object_id: str,
 
302
  unit_of_measurement: str = "",
303
  mode: int = 0, # 0 = auto, 1 = box, 2 = slider
304
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
305
+ value_getter: Optional[Callable[[], float]] = None,
306
+ value_setter: Optional[Callable[[float], None]] = None,
307
  ) -> None:
308
  ESPHomeEntity.__init__(self, server)
309
  self.key = key
 
371
 
372
  def __init__(
373
  self,
374
+ server: APIServer,
375
  key: int,
376
  name: str,
377
  object_id: str,
378
  icon: str = "mdi:camera",
379
+ image_getter: Optional[Callable[[], Optional[bytes]]] = None,
380
  ) -> None:
381
  ESPHomeEntity.__init__(self, server)
382
  self.key = key
 
385
  self.icon = icon
386
  self._image_getter = image_getter
387
 
388
+ def get_image(self) -> Optional[bytes]:
389
  """Get the current camera image as JPEG bytes."""
390
  if self._image_getter:
391
  return self._image_getter()
 
416
  data=b"",
417
  done=True,
418
  )
419
+
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py RENAMED
@@ -1,16 +1,16 @@
1
  """Extended ESPHome entity types for Reachy Mini control."""
2
 
 
 
3
  import logging
4
- from collections.abc import Callable, Iterable
5
- from typing import TYPE_CHECKING
6
 
7
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
8
- ButtonCommandRequest,
9
  ListEntitiesButtonResponse,
10
  ListEntitiesRequest,
11
  ListEntitiesSelectResponse,
12
  ListEntitiesSensorResponse,
13
  ListEntitiesSwitchResponse,
 
14
  SelectCommandRequest,
15
  SelectStateResponse,
16
  SensorStateResponse,
@@ -21,17 +21,14 @@ from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
21
  )
22
  from google.protobuf import message
23
 
 
24
  from .entity import ESPHomeEntity
25
 
26
- if TYPE_CHECKING:
27
- from ..protocol.api_server import APIServer
28
-
29
  logger = logging.getLogger(__name__)
30
 
31
 
32
  class SensorStateClass:
33
  """ESPHome SensorStateClass enum values."""
34
-
35
  NONE = 0
36
  MEASUREMENT = 1
37
  TOTAL_INCREASING = 2
@@ -43,7 +40,7 @@ class SensorEntity(ESPHomeEntity):
43
 
44
  def __init__(
45
  self,
46
- server: "APIServer",
47
  key: int,
48
  name: str,
49
  object_id: str,
@@ -53,7 +50,7 @@ class SensorEntity(ESPHomeEntity):
53
  device_class: str = "",
54
  state_class: int = SensorStateClass.NONE,
55
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
56
- value_getter: Callable[[], float] | None = None,
57
  ) -> None:
58
  ESPHomeEntity.__init__(self, server)
59
  self.key = key
@@ -64,7 +61,7 @@ class SensorEntity(ESPHomeEntity):
64
  self.accuracy_decimals = accuracy_decimals
65
  self.device_class = device_class
66
  self.entity_category = entity_category
67
- # Convert string state_class to enum
68
  if isinstance(state_class, str):
69
  state_class_map = {
70
  "": SensorStateClass.NONE,
@@ -121,15 +118,15 @@ class SwitchEntity(ESPHomeEntity):
121
 
122
  def __init__(
123
  self,
124
- server: "APIServer",
125
  key: int,
126
  name: str,
127
  object_id: str,
128
  icon: str = "",
129
  device_class: str = "",
130
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
131
- value_getter: Callable[[], bool] | None = None,
132
- value_setter: Callable[[bool], None] | None = None,
133
  ) -> None:
134
  ESPHomeEntity.__init__(self, server)
135
  self.key = key
@@ -186,15 +183,15 @@ class SelectEntity(ESPHomeEntity):
186
 
187
  def __init__(
188
  self,
189
- server: "APIServer",
190
  key: int,
191
  name: str,
192
  object_id: str,
193
- options: list[str],
194
  icon: str = "",
195
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
196
- value_getter: Callable[[], str] | None = None,
197
- value_setter: Callable[[str], None] | None = None,
198
  ) -> None:
199
  ESPHomeEntity.__init__(self, server)
200
  self.key = key
@@ -255,14 +252,14 @@ class ButtonEntity(ESPHomeEntity):
255
 
256
  def __init__(
257
  self,
258
- server: "APIServer",
259
  key: int,
260
  name: str,
261
  object_id: str,
262
  icon: str = "",
263
  device_class: str = "",
264
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
265
- on_press: Callable[[], None] | None = None,
266
  ) -> None:
267
  ESPHomeEntity.__init__(self, server)
268
  self.key = key
 
1
  """Extended ESPHome entity types for Reachy Mini control."""
2
 
3
+ from collections.abc import Iterable
4
+ from typing import Callable, List, Optional
5
  import logging
 
 
6
 
7
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
 
8
  ListEntitiesButtonResponse,
9
  ListEntitiesRequest,
10
  ListEntitiesSelectResponse,
11
  ListEntitiesSensorResponse,
12
  ListEntitiesSwitchResponse,
13
+ ButtonCommandRequest,
14
  SelectCommandRequest,
15
  SelectStateResponse,
16
  SensorStateResponse,
 
21
  )
22
  from google.protobuf import message
23
 
24
+ from .api_server import APIServer
25
  from .entity import ESPHomeEntity
26
 
 
 
 
27
  logger = logging.getLogger(__name__)
28
 
29
 
30
  class SensorStateClass:
31
  """ESPHome SensorStateClass enum values."""
 
32
  NONE = 0
33
  MEASUREMENT = 1
34
  TOTAL_INCREASING = 2
 
40
 
41
  def __init__(
42
  self,
43
+ server: APIServer,
44
  key: int,
45
  name: str,
46
  object_id: str,
 
50
  device_class: str = "",
51
  state_class: int = SensorStateClass.NONE,
52
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
53
+ value_getter: Optional[Callable[[], float]] = None,
54
  ) -> None:
55
  ESPHomeEntity.__init__(self, server)
56
  self.key = key
 
61
  self.accuracy_decimals = accuracy_decimals
62
  self.device_class = device_class
63
  self.entity_category = entity_category
64
+ # Convert string state_class to int if needed (for backward compatibility)
65
  if isinstance(state_class, str):
66
  state_class_map = {
67
  "": SensorStateClass.NONE,
 
118
 
119
  def __init__(
120
  self,
121
+ server: APIServer,
122
  key: int,
123
  name: str,
124
  object_id: str,
125
  icon: str = "",
126
  device_class: str = "",
127
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
128
+ value_getter: Optional[Callable[[], bool]] = None,
129
+ value_setter: Optional[Callable[[bool], None]] = None,
130
  ) -> None:
131
  ESPHomeEntity.__init__(self, server)
132
  self.key = key
 
183
 
184
  def __init__(
185
  self,
186
+ server: APIServer,
187
  key: int,
188
  name: str,
189
  object_id: str,
190
+ options: List[str],
191
  icon: str = "",
192
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
193
+ value_getter: Optional[Callable[[], str]] = None,
194
+ value_setter: Optional[Callable[[str], None]] = None,
195
  ) -> None:
196
  ESPHomeEntity.__init__(self, server)
197
  self.key = key
 
252
 
253
  def __init__(
254
  self,
255
+ server: APIServer,
256
  key: int,
257
  name: str,
258
  object_id: str,
259
  icon: str = "",
260
  device_class: str = "",
261
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
262
+ on_press: Optional[Callable[[], None]] = None,
263
  ) -> None:
264
  ESPHomeEntity.__init__(self, server)
265
  self.key = key
reachy_mini_ha_voice/entity_registry.py ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entity registry for ESPHome entities.
2
+
3
+ This module handles the registration and management of all ESPHome entities
4
+ for the Reachy Mini voice assistant.
5
+ """
6
+
7
+ import logging
8
+ from typing import TYPE_CHECKING, Callable, Dict, List, Optional
9
+
10
+ from .entity import BinarySensorEntity, CameraEntity, NumberEntity, TextSensorEntity
11
+ from .entity_extensions import SensorEntity, SwitchEntity, SelectEntity, ButtonEntity
12
+
13
+ if TYPE_CHECKING:
14
+ from .reachy_controller import ReachyController
15
+ from .camera_server import MJPEGCameraServer
16
+
17
+ _LOGGER = logging.getLogger(__name__)
18
+
19
+
20
+ # Fixed entity key mapping - ensures consistent keys across restarts
21
+ # Keys are based on object_id hash to ensure uniqueness and consistency
22
+ ENTITY_KEYS: Dict[str, int] = {
23
+ # Media player (key 0 reserved)
24
+ "reachy_mini_media_player": 0,
25
+ # Phase 1: Basic status and volume
26
+ "daemon_state": 100,
27
+ "backend_ready": 101,
28
+ "speaker_volume": 103,
29
+ # Phase 2: Motor control
30
+ "motors_enabled": 200,
31
+ "motor_mode": 201,
32
+ "wake_up": 202,
33
+ "go_to_sleep": 203,
34
+ # Phase 3: Pose control
35
+ "head_x": 300,
36
+ "head_y": 301,
37
+ "head_z": 302,
38
+ "head_roll": 303,
39
+ "head_pitch": 304,
40
+ "head_yaw": 305,
41
+ "body_yaw": 306,
42
+ "antenna_left": 307,
43
+ "antenna_right": 308,
44
+ # Phase 4: Look at control
45
+ "look_at_x": 400,
46
+ "look_at_y": 401,
47
+ "look_at_z": 402,
48
+ # Phase 5: DOA (Direction of Arrival) - re-added for wakeup turn-to-sound
49
+ "doa_angle": 500,
50
+ "speech_detected": 501,
51
+ # Phase 6: Diagnostic information
52
+ "control_loop_frequency": 600,
53
+ "sdk_version": 601,
54
+ "robot_name": 602,
55
+ "wireless_version": 603,
56
+ "simulation_mode": 604,
57
+ "wlan_ip": 605,
58
+ "error_message": 606, # Moved to diagnostic
59
+ # Phase 7: IMU sensors
60
+ "imu_accel_x": 700,
61
+ "imu_accel_y": 701,
62
+ "imu_accel_z": 702,
63
+ "imu_gyro_x": 703,
64
+ "imu_gyro_y": 704,
65
+ "imu_gyro_z": 705,
66
+ "imu_temperature": 706,
67
+ # Phase 8: Emotion selector
68
+ "emotion": 800,
69
+ # Phase 9: Audio controls
70
+ "microphone_volume": 900,
71
+ # Phase 10: Camera
72
+ "camera_url": 1000, # Keep for backward compatibility
73
+ "camera": 1001, # New camera entity
74
+ # Phase 11: LED control (disabled - not visible)
75
+ # "led_brightness": 1100,
76
+ # "led_effect": 1101,
77
+ # "led_color_r": 1102,
78
+ # "led_color_g": 1103,
79
+ # "led_color_b": 1104,
80
+ # Phase 12: Audio processing
81
+ "agc_enabled": 1200,
82
+ "agc_max_gain": 1201,
83
+ "noise_suppression": 1202,
84
+ "echo_cancellation_converged": 1203,
85
+ # Phase 13: Sendspin - auto-enabled via mDNS, no user entities needed
86
+ # Phase 21: Continuous conversation
87
+ "continuous_conversation": 1500,
88
+ # Phase 22: Gesture detection
89
+ "gesture_detected": 1600,
90
+ "gesture_confidence": 1601,
91
+ }
92
+
93
+
94
+ def get_entity_key(object_id: str) -> int:
95
+ """Get a consistent entity key for the given object_id."""
96
+ if object_id in ENTITY_KEYS:
97
+ return ENTITY_KEYS[object_id]
98
+ # Fallback: generate key from hash (should not happen if all entities are registered)
99
+ _LOGGER.warning(f"Entity key not found for {object_id}, generating from hash")
100
+ return abs(hash(object_id)) % 10000 + 2000
101
+
102
+
103
+ class EntityRegistry:
104
+ """Registry for managing ESPHome entities."""
105
+
106
+ def __init__(
107
+ self,
108
+ server,
109
+ reachy_controller: "ReachyController",
110
+ camera_server: Optional["MJPEGCameraServer"] = None,
111
+ play_emotion_callback: Optional[Callable[[str], None]] = None,
112
+ ):
113
+ """Initialize the entity registry.
114
+
115
+ Args:
116
+ server: The VoiceSatelliteProtocol server instance
117
+ reachy_controller: The ReachyController instance
118
+ camera_server: Optional camera server for camera entity
119
+ play_emotion_callback: Optional callback for playing emotions
120
+ """
121
+ self.server = server
122
+ self.reachy_controller = reachy_controller
123
+ self.camera_server = camera_server
124
+ self._play_emotion_callback = play_emotion_callback
125
+
126
+ # Gesture detection state
127
+ self._current_gesture = "none"
128
+ self._gesture_confidence = 0.0
129
+
130
+ # Emotion state
131
+ self._current_emotion = "None"
132
+ # Map emotion names to available robot emotions
133
+ # Full list of available emotions from robot
134
+ self._emotion_map = {
135
+ "None": None,
136
+ # Basic emotions
137
+ "Happy": "cheerful1",
138
+ "Sad": "sad1",
139
+ "Angry": "rage1",
140
+ "Fear": "fear1",
141
+ "Surprise": "surprised1",
142
+ "Disgust": "disgusted1",
143
+ # Extended emotions
144
+ "Laughing": "laughing1",
145
+ "Loving": "loving1",
146
+ "Proud": "proud1",
147
+ "Grateful": "grateful1",
148
+ "Enthusiastic": "enthusiastic1",
149
+ "Curious": "curious1",
150
+ "Amazed": "amazed1",
151
+ "Shy": "shy1",
152
+ "Confused": "confused1",
153
+ "Thoughtful": "thoughtful1",
154
+ "Anxious": "anxiety1",
155
+ "Scared": "scared1",
156
+ "Frustrated": "frustrated1",
157
+ "Irritated": "irritated1",
158
+ "Furious": "furious1",
159
+ "Contempt": "contempt1",
160
+ "Bored": "boredom1",
161
+ "Tired": "tired1",
162
+ "Exhausted": "exhausted1",
163
+ "Lonely": "lonely1",
164
+ "Downcast": "downcast1",
165
+ "Resigned": "resigned1",
166
+ "Uncertain": "uncertain1",
167
+ "Uncomfortable": "uncomfortable1",
168
+ "Lost": "lost1",
169
+ "Indifferent": "indifferent1",
170
+ # Positive actions
171
+ "Yes": "yes1",
172
+ "No": "no1",
173
+ "Welcoming": "welcoming1",
174
+ "Helpful": "helpful1",
175
+ "Attentive": "attentive1",
176
+ "Understanding": "understanding1",
177
+ "Calming": "calming1",
178
+ "Relief": "relief1",
179
+ "Success": "success1",
180
+ "Serenity": "serenity1",
181
+ # Negative actions
182
+ "Oops": "oops1",
183
+ "Displeased": "displeased1",
184
+ "Impatient": "impatient1",
185
+ "Reprimand": "reprimand1",
186
+ "GoAway": "go_away1",
187
+ # Special
188
+ "Come": "come1",
189
+ "Inquiring": "inquiring1",
190
+ "Sleep": "sleep1",
191
+ "Dance": "dance1",
192
+ "Electric": "electric1",
193
+ "Dying": "dying1",
194
+ }
195
+
196
+ def setup_all_entities(self, entities: List) -> None:
197
+ """Setup all entity phases.
198
+
199
+ Args:
200
+ entities: The list to append entities to
201
+ """
202
+ self._setup_phase1_entities(entities)
203
+ self._setup_phase2_entities(entities)
204
+ self._setup_phase3_entities(entities)
205
+ self._setup_phase4_entities(entities)
206
+ self._setup_phase5_entities(entities) # DOA for wakeup turn-to-sound
207
+ self._setup_phase6_entities(entities)
208
+ self._setup_phase7_entities(entities)
209
+ self._setup_phase8_entities(entities)
210
+ self._setup_phase9_entities(entities)
211
+ self._setup_phase10_entities(entities)
212
+ # Phase 11 (LED control) disabled - LEDs are inside the robot and not visible
213
+ self._setup_phase12_entities(entities)
214
+ # Phase 13 (Sendspin) - auto-enabled via mDNS discovery, no user entities
215
+ # Phase 14 (head_joints, passive_joints) removed - not needed
216
+ # Phase 20 (Tap detection) disabled - too many false triggers
217
+ self._setup_phase21_entities(entities)
218
+ self._setup_phase22_entities(entities)
219
+
220
+ _LOGGER.info("All entities registered: %d total", len(entities))
221
+
222
+ def _setup_phase1_entities(self, entities: List) -> None:
223
+ """Setup Phase 1 entities: Basic status and volume control."""
224
+ rc = self.reachy_controller
225
+
226
+ entities.append(TextSensorEntity(
227
+ server=self.server,
228
+ key=get_entity_key("daemon_state"),
229
+ name="Daemon State",
230
+ object_id="daemon_state",
231
+ icon="mdi:robot",
232
+ value_getter=rc.get_daemon_state,
233
+ ))
234
+
235
+ entities.append(BinarySensorEntity(
236
+ server=self.server,
237
+ key=get_entity_key("backend_ready"),
238
+ name="Backend Ready",
239
+ object_id="backend_ready",
240
+ icon="mdi:check-circle",
241
+ device_class="connectivity",
242
+ value_getter=rc.get_backend_ready,
243
+ ))
244
+
245
+ entities.append(NumberEntity(
246
+ server=self.server,
247
+ key=get_entity_key("speaker_volume"),
248
+ name="Speaker Volume",
249
+ object_id="speaker_volume",
250
+ min_value=0.0,
251
+ max_value=100.0,
252
+ step=1.0,
253
+ icon="mdi:volume-high",
254
+ unit_of_measurement="%",
255
+ mode=2, # Slider mode
256
+ entity_category=1, # config
257
+ value_getter=rc.get_speaker_volume,
258
+ value_setter=rc.set_speaker_volume,
259
+ ))
260
+
261
+ _LOGGER.debug("Phase 1 entities registered: daemon_state, backend_ready, speaker_volume")
262
+
263
+ def _setup_phase2_entities(self, entities: List) -> None:
264
+ """Setup Phase 2 entities: Motor control."""
265
+ rc = self.reachy_controller
266
+
267
+ entities.append(SwitchEntity(
268
+ server=self.server,
269
+ key=get_entity_key("motors_enabled"),
270
+ name="Motors Enabled",
271
+ object_id="motors_enabled",
272
+ icon="mdi:engine",
273
+ device_class="switch",
274
+ value_getter=rc.get_motors_enabled,
275
+ value_setter=rc.set_motors_enabled,
276
+ ))
277
+
278
+ entities.append(ButtonEntity(
279
+ server=self.server,
280
+ key=get_entity_key("wake_up"),
281
+ name="Wake Up",
282
+ object_id="wake_up",
283
+ icon="mdi:alarm",
284
+ device_class="restart",
285
+ on_press=rc.wake_up,
286
+ ))
287
+
288
+ entities.append(ButtonEntity(
289
+ server=self.server,
290
+ key=get_entity_key("go_to_sleep"),
291
+ name="Go to Sleep",
292
+ object_id="go_to_sleep",
293
+ icon="mdi:sleep",
294
+ device_class="restart",
295
+ on_press=rc.go_to_sleep,
296
+ ))
297
+
298
+ _LOGGER.debug("Phase 2 entities registered: motors_enabled, wake_up, go_to_sleep")
299
+
300
+ def _setup_phase3_entities(self, entities: List) -> None:
301
+ """Setup Phase 3 entities: Pose control."""
302
+ rc = self.reachy_controller
303
+
304
+ # Head position controls (X, Y, Z in mm)
305
+ entities.append(NumberEntity(
306
+ server=self.server,
307
+ key=get_entity_key("head_x"),
308
+ name="Head X Position",
309
+ object_id="head_x",
310
+ min_value=-50.0,
311
+ max_value=50.0,
312
+ step=1.0,
313
+ icon="mdi:axis-x-arrow",
314
+ unit_of_measurement="mm",
315
+ mode=2,
316
+ value_getter=rc.get_head_x,
317
+ value_setter=rc.set_head_x,
318
+ ))
319
+
320
+ entities.append(NumberEntity(
321
+ server=self.server,
322
+ key=get_entity_key("head_y"),
323
+ name="Head Y Position",
324
+ object_id="head_y",
325
+ min_value=-50.0,
326
+ max_value=50.0,
327
+ step=1.0,
328
+ icon="mdi:axis-y-arrow",
329
+ unit_of_measurement="mm",
330
+ mode=2,
331
+ value_getter=rc.get_head_y,
332
+ value_setter=rc.set_head_y,
333
+ ))
334
+
335
+ entities.append(NumberEntity(
336
+ server=self.server,
337
+ key=get_entity_key("head_z"),
338
+ name="Head Z Position",
339
+ object_id="head_z",
340
+ min_value=-50.0,
341
+ max_value=50.0,
342
+ step=1.0,
343
+ icon="mdi:axis-z-arrow",
344
+ unit_of_measurement="mm",
345
+ mode=2,
346
+ value_getter=rc.get_head_z,
347
+ value_setter=rc.set_head_z,
348
+ ))
349
+
350
+ # Head orientation controls (Roll, Pitch, Yaw in degrees)
351
+ entities.append(NumberEntity(
352
+ server=self.server,
353
+ key=get_entity_key("head_roll"),
354
+ name="Head Roll",
355
+ object_id="head_roll",
356
+ min_value=-40.0,
357
+ max_value=40.0,
358
+ step=1.0,
359
+ icon="mdi:rotate-3d-variant",
360
+ unit_of_measurement="°",
361
+ mode=2,
362
+ value_getter=rc.get_head_roll,
363
+ value_setter=rc.set_head_roll,
364
+ ))
365
+
366
+ entities.append(NumberEntity(
367
+ server=self.server,
368
+ key=get_entity_key("head_pitch"),
369
+ name="Head Pitch",
370
+ object_id="head_pitch",
371
+ min_value=-40.0,
372
+ max_value=40.0,
373
+ step=1.0,
374
+ icon="mdi:rotate-3d-variant",
375
+ unit_of_measurement="°",
376
+ mode=2,
377
+ value_getter=rc.get_head_pitch,
378
+ value_setter=rc.set_head_pitch,
379
+ ))
380
+
381
+ entities.append(NumberEntity(
382
+ server=self.server,
383
+ key=get_entity_key("head_yaw"),
384
+ name="Head Yaw",
385
+ object_id="head_yaw",
386
+ min_value=-180.0,
387
+ max_value=180.0,
388
+ step=1.0,
389
+ icon="mdi:rotate-3d-variant",
390
+ unit_of_measurement="°",
391
+ mode=2,
392
+ value_getter=rc.get_head_yaw,
393
+ value_setter=rc.set_head_yaw,
394
+ ))
395
+
396
+ # Body yaw control
397
+ entities.append(NumberEntity(
398
+ server=self.server,
399
+ key=get_entity_key("body_yaw"),
400
+ name="Body Yaw",
401
+ object_id="body_yaw",
402
+ min_value=-160.0,
403
+ max_value=160.0,
404
+ step=1.0,
405
+ icon="mdi:rotate-3d-variant",
406
+ unit_of_measurement="°",
407
+ mode=2,
408
+ value_getter=rc.get_body_yaw,
409
+ value_setter=rc.set_body_yaw,
410
+ ))
411
+
412
+ # Antenna controls
413
+ entities.append(NumberEntity(
414
+ server=self.server,
415
+ key=get_entity_key("antenna_left"),
416
+ name="Antenna(L)",
417
+ object_id="antenna_left",
418
+ min_value=-90.0,
419
+ max_value=90.0,
420
+ step=1.0,
421
+ icon="mdi:antenna",
422
+ unit_of_measurement="°",
423
+ mode=2,
424
+ value_getter=rc.get_antenna_left,
425
+ value_setter=rc.set_antenna_left,
426
+ ))
427
+
428
+ entities.append(NumberEntity(
429
+ server=self.server,
430
+ key=get_entity_key("antenna_right"),
431
+ name="Antenna(R)",
432
+ object_id="antenna_right",
433
+ min_value=-90.0,
434
+ max_value=90.0,
435
+ step=1.0,
436
+ icon="mdi:antenna",
437
+ unit_of_measurement="°",
438
+ mode=2,
439
+ value_getter=rc.get_antenna_right,
440
+ value_setter=rc.set_antenna_right,
441
+ ))
442
+
443
+ _LOGGER.debug("Phase 3 entities registered: head position/orientation, body_yaw, antennas")
444
+
445
+ def _setup_phase4_entities(self, entities: List) -> None:
446
+ """Setup Phase 4 entities: Look at control."""
447
+ rc = self.reachy_controller
448
+
449
+ entities.append(NumberEntity(
450
+ server=self.server,
451
+ key=get_entity_key("look_at_x"),
452
+ name="Look At X",
453
+ object_id="look_at_x",
454
+ min_value=-2.0,
455
+ max_value=2.0,
456
+ step=0.1,
457
+ icon="mdi:crosshairs-gps",
458
+ unit_of_measurement="m",
459
+ mode=1, # Box mode for precise input
460
+ value_getter=rc.get_look_at_x,
461
+ value_setter=rc.set_look_at_x,
462
+ ))
463
+
464
+ entities.append(NumberEntity(
465
+ server=self.server,
466
+ key=get_entity_key("look_at_y"),
467
+ name="Look At Y",
468
+ object_id="look_at_y",
469
+ min_value=-2.0,
470
+ max_value=2.0,
471
+ step=0.1,
472
+ icon="mdi:crosshairs-gps",
473
+ unit_of_measurement="m",
474
+ mode=1,
475
+ value_getter=rc.get_look_at_y,
476
+ value_setter=rc.set_look_at_y,
477
+ ))
478
+
479
+ entities.append(NumberEntity(
480
+ server=self.server,
481
+ key=get_entity_key("look_at_z"),
482
+ name="Look At Z",
483
+ object_id="look_at_z",
484
+ min_value=-2.0,
485
+ max_value=2.0,
486
+ step=0.1,
487
+ icon="mdi:crosshairs-gps",
488
+ unit_of_measurement="m",
489
+ mode=1,
490
+ value_getter=rc.get_look_at_z,
491
+ value_setter=rc.set_look_at_z,
492
+ ))
493
+
494
+ _LOGGER.debug("Phase 4 entities registered: look_at_x/y/z")
495
+
496
+ def _setup_phase5_entities(self, entities: List) -> None:
497
+ """Setup Phase 5 entities: DOA (Direction of Arrival) for wakeup turn-to-sound."""
498
+ rc = self.reachy_controller
499
+
500
+ entities.append(SensorEntity(
501
+ server=self.server,
502
+ key=get_entity_key("doa_angle"),
503
+ name="DOA Angle",
504
+ object_id="doa_angle",
505
+ icon="mdi:surround-sound",
506
+ unit_of_measurement="°",
507
+ accuracy_decimals=1,
508
+ state_class="measurement",
509
+ value_getter=rc.get_doa_angle_degrees,
510
+ ))
511
+
512
+ entities.append(BinarySensorEntity(
513
+ server=self.server,
514
+ key=get_entity_key("speech_detected"),
515
+ name="Speech Detected",
516
+ object_id="speech_detected",
517
+ icon="mdi:account-voice",
518
+ device_class="sound",
519
+ value_getter=rc.get_speech_detected,
520
+ ))
521
+
522
+ _LOGGER.debug("Phase 5 entities registered: doa_angle, speech_detected")
523
+
524
+ def _setup_phase6_entities(self, entities: List) -> None:
525
+ """Setup Phase 6 entities: Diagnostic information."""
526
+ rc = self.reachy_controller
527
+
528
+ entities.append(SensorEntity(
529
+ server=self.server,
530
+ key=get_entity_key("control_loop_frequency"),
531
+ name="Control Loop Frequency",
532
+ object_id="control_loop_frequency",
533
+ icon="mdi:speedometer",
534
+ unit_of_measurement="Hz",
535
+ accuracy_decimals=1,
536
+ state_class="measurement",
537
+ entity_category=2, # diagnostic
538
+ value_getter=rc.get_control_loop_frequency,
539
+ ))
540
+
541
+ entities.append(TextSensorEntity(
542
+ server=self.server,
543
+ key=get_entity_key("sdk_version"),
544
+ name="SDK Version",
545
+ object_id="sdk_version",
546
+ icon="mdi:information",
547
+ entity_category=2, # diagnostic
548
+ value_getter=rc.get_sdk_version,
549
+ ))
550
+
551
+ entities.append(TextSensorEntity(
552
+ server=self.server,
553
+ key=get_entity_key("robot_name"),
554
+ name="Robot Name",
555
+ object_id="robot_name",
556
+ icon="mdi:robot",
557
+ entity_category=2, # diagnostic
558
+ value_getter=rc.get_robot_name,
559
+ ))
560
+
561
+ entities.append(BinarySensorEntity(
562
+ server=self.server,
563
+ key=get_entity_key("wireless_version"),
564
+ name="Wireless Version",
565
+ object_id="wireless_version",
566
+ icon="mdi:wifi",
567
+ device_class="connectivity",
568
+ entity_category=2, # diagnostic
569
+ value_getter=rc.get_wireless_version,
570
+ ))
571
+
572
+ entities.append(BinarySensorEntity(
573
+ server=self.server,
574
+ key=get_entity_key("simulation_mode"),
575
+ name="Simulation Mode",
576
+ object_id="simulation_mode",
577
+ icon="mdi:virtual-reality",
578
+ entity_category=2, # diagnostic
579
+ value_getter=rc.get_simulation_mode,
580
+ ))
581
+
582
+ entities.append(TextSensorEntity(
583
+ server=self.server,
584
+ key=get_entity_key("wlan_ip"),
585
+ name="WLAN IP",
586
+ object_id="wlan_ip",
587
+ icon="mdi:ip-network",
588
+ entity_category=2, # diagnostic
589
+ value_getter=rc.get_wlan_ip,
590
+ ))
591
+
592
+ entities.append(TextSensorEntity(
593
+ server=self.server,
594
+ key=get_entity_key("error_message"),
595
+ name="Error Message",
596
+ object_id="error_message",
597
+ icon="mdi:alert-circle",
598
+ entity_category=2, # diagnostic
599
+ value_getter=rc.get_error_message,
600
+ ))
601
+
602
+ _LOGGER.debug(
603
+ "Phase 6 entities registered: control_loop_frequency, sdk_version, "
604
+ "robot_name, wireless_version, simulation_mode, wlan_ip, error_message"
605
+ )
606
+
607
+ def _setup_phase7_entities(self, entities: List) -> None:
608
+ """Setup Phase 7 entities: IMU sensors (wireless only)."""
609
+ rc = self.reachy_controller
610
+
611
+ # IMU Accelerometer
612
+ entities.append(SensorEntity(
613
+ server=self.server,
614
+ key=get_entity_key("imu_accel_x"),
615
+ name="IMU Accel X",
616
+ object_id="imu_accel_x",
617
+ icon="mdi:axis-x-arrow",
618
+ unit_of_measurement="m/s²",
619
+ accuracy_decimals=3,
620
+ state_class="measurement",
621
+ value_getter=rc.get_imu_accel_x,
622
+ ))
623
+
624
+ entities.append(SensorEntity(
625
+ server=self.server,
626
+ key=get_entity_key("imu_accel_y"),
627
+ name="IMU Accel Y",
628
+ object_id="imu_accel_y",
629
+ icon="mdi:axis-y-arrow",
630
+ unit_of_measurement="m/s²",
631
+ accuracy_decimals=3,
632
+ state_class="measurement",
633
+ value_getter=rc.get_imu_accel_y,
634
+ ))
635
+
636
+ entities.append(SensorEntity(
637
+ server=self.server,
638
+ key=get_entity_key("imu_accel_z"),
639
+ name="IMU Accel Z",
640
+ object_id="imu_accel_z",
641
+ icon="mdi:axis-z-arrow",
642
+ unit_of_measurement="m/s²",
643
+ accuracy_decimals=3,
644
+ state_class="measurement",
645
+ value_getter=rc.get_imu_accel_z,
646
+ ))
647
+
648
+ # IMU Gyroscope
649
+ entities.append(SensorEntity(
650
+ server=self.server,
651
+ key=get_entity_key("imu_gyro_x"),
652
+ name="IMU Gyro X",
653
+ object_id="imu_gyro_x",
654
+ icon="mdi:rotate-3d-variant",
655
+ unit_of_measurement="rad/s",
656
+ accuracy_decimals=3,
657
+ state_class="measurement",
658
+ value_getter=rc.get_imu_gyro_x,
659
+ ))
660
+
661
+ entities.append(SensorEntity(
662
+ server=self.server,
663
+ key=get_entity_key("imu_gyro_y"),
664
+ name="IMU Gyro Y",
665
+ object_id="imu_gyro_y",
666
+ icon="mdi:rotate-3d-variant",
667
+ unit_of_measurement="rad/s",
668
+ accuracy_decimals=3,
669
+ state_class="measurement",
670
+ value_getter=rc.get_imu_gyro_y,
671
+ ))
672
+
673
+ entities.append(SensorEntity(
674
+ server=self.server,
675
+ key=get_entity_key("imu_gyro_z"),
676
+ name="IMU Gyro Z",
677
+ object_id="imu_gyro_z",
678
+ icon="mdi:rotate-3d-variant",
679
+ unit_of_measurement="rad/s",
680
+ accuracy_decimals=3,
681
+ state_class="measurement",
682
+ value_getter=rc.get_imu_gyro_z,
683
+ ))
684
+
685
+ # IMU Temperature
686
+ entities.append(SensorEntity(
687
+ server=self.server,
688
+ key=get_entity_key("imu_temperature"),
689
+ name="IMU Temperature",
690
+ object_id="imu_temperature",
691
+ icon="mdi:thermometer",
692
+ unit_of_measurement="°C",
693
+ accuracy_decimals=1,
694
+ device_class="temperature",
695
+ state_class="measurement",
696
+ value_getter=rc.get_imu_temperature,
697
+ ))
698
+
699
+ _LOGGER.debug("Phase 7 entities registered: IMU accelerometer, gyroscope, temperature")
700
+
701
+ def _setup_phase8_entities(self, entities: List) -> None:
702
+ """Setup Phase 8 entities: Emotion selector."""
703
+
704
+ def get_emotion() -> str:
705
+ return self._current_emotion
706
+
707
+ def set_emotion(emotion: str) -> None:
708
+ self._current_emotion = emotion
709
+ emotion_name = self._emotion_map.get(emotion)
710
+ if emotion_name and self._play_emotion_callback:
711
+ self._play_emotion_callback(emotion_name)
712
+ # Reset to None after playing
713
+ self._current_emotion = "None"
714
+
715
+ entities.append(SelectEntity(
716
+ server=self.server,
717
+ key=get_entity_key("emotion"),
718
+ name="Emotion",
719
+ object_id="emotion",
720
+ options=list(self._emotion_map.keys()),
721
+ icon="mdi:emoticon",
722
+ value_getter=get_emotion,
723
+ value_setter=set_emotion,
724
+ ))
725
+
726
+ _LOGGER.debug("Phase 8 entities registered: emotion selector")
727
+
728
+ def _setup_phase9_entities(self, entities: List) -> None:
729
+ """Setup Phase 9 entities: Audio controls."""
730
+ rc = self.reachy_controller
731
+
732
+ entities.append(NumberEntity(
733
+ server=self.server,
734
+ key=get_entity_key("microphone_volume"),
735
+ name="Microphone Volume",
736
+ object_id="microphone_volume",
737
+ min_value=0.0,
738
+ max_value=100.0,
739
+ step=1.0,
740
+ icon="mdi:microphone",
741
+ unit_of_measurement="%",
742
+ mode=2, # Slider mode
743
+ entity_category=1, # config
744
+ value_getter=rc.get_microphone_volume,
745
+ value_setter=rc.set_microphone_volume,
746
+ ))
747
+
748
+ _LOGGER.debug("Phase 9 entities registered: microphone_volume")
749
+
750
+ def _setup_phase10_entities(self, entities: List) -> None:
751
+ """Setup Phase 10 entities: Camera for Home Assistant integration."""
752
+
753
+ def get_camera_image() -> Optional[bytes]:
754
+ """Get camera snapshot as JPEG bytes."""
755
+ if self.camera_server:
756
+ return self.camera_server.get_snapshot()
757
+ return None
758
+
759
+ entities.append(CameraEntity(
760
+ server=self.server,
761
+ key=get_entity_key("camera"),
762
+ name="Camera",
763
+ object_id="camera",
764
+ icon="mdi:camera",
765
+ image_getter=get_camera_image,
766
+ ))
767
+
768
+ _LOGGER.debug("Phase 10 entities registered: camera (ESPHome Camera entity)")
769
+
770
+ def _setup_phase12_entities(self, entities: List) -> None:
771
+ """Setup Phase 12 entities: Audio processing parameters (via local SDK)."""
772
+ rc = self.reachy_controller
773
+
774
+ def set_agc_enabled_with_save(enabled: bool) -> None:
775
+ """Set AGC enabled and save to preferences."""
776
+ rc.set_agc_enabled(enabled)
777
+ if hasattr(self.server, 'state') and self.server.state:
778
+ self.server.state.preferences.agc_enabled = enabled
779
+ self.server.state.save_preferences()
780
+ _LOGGER.debug("AGC enabled saved to preferences: %s", enabled)
781
+
782
+ def set_agc_max_gain_with_save(gain: float) -> None:
783
+ """Set AGC max gain and save to preferences."""
784
+ rc.set_agc_max_gain(gain)
785
+ if hasattr(self.server, 'state') and self.server.state:
786
+ self.server.state.preferences.agc_max_gain = gain
787
+ self.server.state.save_preferences()
788
+ _LOGGER.debug("AGC max gain saved to preferences: %.1f dB", gain)
789
+
790
+ def set_noise_suppression_with_save(level: float) -> None:
791
+ """Set noise suppression and save to preferences."""
792
+ rc.set_noise_suppression(level)
793
+ if hasattr(self.server, 'state') and self.server.state:
794
+ self.server.state.preferences.noise_suppression = level
795
+ self.server.state.save_preferences()
796
+ _LOGGER.debug("Noise suppression saved to preferences: %.1f%%", level)
797
+
798
+ entities.append(SwitchEntity(
799
+ server=self.server,
800
+ key=get_entity_key("agc_enabled"),
801
+ name="AGC Enabled",
802
+ object_id="agc_enabled",
803
+ icon="mdi:tune-vertical",
804
+ device_class="switch",
805
+ entity_category=1, # config
806
+ value_getter=rc.get_agc_enabled,
807
+ value_setter=set_agc_enabled_with_save,
808
+ ))
809
+
810
+ entities.append(NumberEntity(
811
+ server=self.server,
812
+ key=get_entity_key("agc_max_gain"),
813
+ name="AGC Max Gain",
814
+ object_id="agc_max_gain",
815
+ min_value=0.0,
816
+ max_value=40.0, # XVF3800 supports up to 40dB
817
+ step=1.0,
818
+ icon="mdi:volume-plus",
819
+ unit_of_measurement="dB",
820
+ mode=2,
821
+ entity_category=1, # config
822
+ value_getter=rc.get_agc_max_gain,
823
+ value_setter=set_agc_max_gain_with_save,
824
+ ))
825
+
826
+ entities.append(NumberEntity(
827
+ server=self.server,
828
+ key=get_entity_key("noise_suppression"),
829
+ name="Noise Suppression",
830
+ object_id="noise_suppression",
831
+ min_value=0.0,
832
+ max_value=100.0,
833
+ step=1.0,
834
+ icon="mdi:volume-off",
835
+ unit_of_measurement="%",
836
+ mode=2,
837
+ entity_category=1, # config
838
+ value_getter=rc.get_noise_suppression,
839
+ value_setter=set_noise_suppression_with_save,
840
+ ))
841
+
842
+ entities.append(BinarySensorEntity(
843
+ server=self.server,
844
+ key=get_entity_key("echo_cancellation_converged"),
845
+ name="Echo Cancellation Converged",
846
+ object_id="echo_cancellation_converged",
847
+ icon="mdi:waveform",
848
+ device_class="running",
849
+ entity_category=2, # diagnostic
850
+ value_getter=rc.get_echo_cancellation_converged,
851
+ ))
852
+
853
+ _LOGGER.debug(
854
+ "Phase 12 entities registered: agc_enabled, agc_max_gain, "
855
+ "noise_suppression, echo_cancellation_converged"
856
+ )
857
+
858
+ def _setup_phase21_entities(self, entities: List) -> None:
859
+ """Setup Phase 21 entities: Continuous conversation mode."""
860
+
861
+ def get_continuous_conversation() -> bool:
862
+ """Get current continuous conversation mode state."""
863
+ if hasattr(self.server, 'state') and self.server.state:
864
+ prefs = self.server.state.preferences
865
+ return getattr(prefs, 'continuous_conversation', False)
866
+ return False
867
+
868
+ def set_continuous_conversation(enabled: bool) -> None:
869
+ """Set continuous conversation mode and save to preferences."""
870
+ if hasattr(self.server, 'state') and self.server.state:
871
+ self.server.state.preferences.continuous_conversation = enabled
872
+ self.server.state.save_preferences()
873
+ _LOGGER.info("Continuous conversation mode %s", "enabled" if enabled else "disabled")
874
+
875
+ entities.append(SwitchEntity(
876
+ server=self.server,
877
+ key=get_entity_key("continuous_conversation"),
878
+ name="Continuous Conversation",
879
+ object_id="continuous_conversation",
880
+ icon="mdi:message-reply-text",
881
+ device_class="switch",
882
+ entity_category=1, # config
883
+ value_getter=get_continuous_conversation,
884
+ value_setter=set_continuous_conversation,
885
+ ))
886
+
887
+ _LOGGER.debug("Phase 21 entities registered: continuous_conversation")
888
+
889
+ def _setup_phase22_entities(self, entities: List) -> None:
890
+ """Setup Phase 22 entities: Gesture detection."""
891
+
892
+ def get_gesture() -> str:
893
+ """Get current detected gesture."""
894
+ if self.camera_server:
895
+ return self.camera_server.get_current_gesture()
896
+ return "none"
897
+
898
+ def get_gesture_confidence() -> float:
899
+ """Get gesture detection confidence."""
900
+ if self.camera_server:
901
+ return self.camera_server.get_gesture_confidence()
902
+ return 0.0
903
+
904
+ gesture_entity = TextSensorEntity(
905
+ server=self.server,
906
+ key=get_entity_key("gesture_detected"),
907
+ name="Gesture Detected",
908
+ object_id="gesture_detected",
909
+ icon="mdi:hand-wave",
910
+ value_getter=get_gesture,
911
+ )
912
+ entities.append(gesture_entity)
913
+ self._gesture_entity = gesture_entity
914
+
915
+ confidence_entity = SensorEntity(
916
+ server=self.server,
917
+ key=get_entity_key("gesture_confidence"),
918
+ name="Gesture Confidence",
919
+ object_id="gesture_confidence",
920
+ icon="mdi:percent",
921
+ unit_of_measurement="%",
922
+ accuracy_decimals=1,
923
+ state_class="measurement",
924
+ value_getter=get_gesture_confidence,
925
+ )
926
+ entities.append(confidence_entity)
927
+ self._gesture_confidence_entity = confidence_entity
928
+
929
+ _LOGGER.debug("Phase 22 entities registered: gesture_detected, gesture_confidence")
930
+
931
+ def update_gesture_state(self) -> None:
932
+ """Push gesture state update to Home Assistant."""
933
+ if hasattr(self, '_gesture_entity') and self._gesture_entity:
934
+ self._gesture_entity.update_state()
935
+ if hasattr(self, '_gesture_confidence_entity') and self._gesture_confidence_entity:
936
+ self._gesture_confidence_entity.update_state()
937
+
938
+ def find_entity_references(self, entities: List) -> None:
939
+ """Find and store references to special entities from existing list.
940
+
941
+ Args:
942
+ entities: The list of existing entities to search
943
+ """
944
+ # DOA entities are read-only sensors, no special references needed
945
+ pass
reachy_mini_ha_voice/gesture_detector.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gesture detection using HaGRID ONNX models."""
2
+
3
+ from __future__ import annotations
4
+ import logging
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Optional, Tuple
8
+
9
+ import cv2
10
+ import numpy as np
11
+ from numpy.typing import NDArray
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class Gesture(Enum):
17
+ NONE = "no_gesture"
18
+ CALL = "call"
19
+ DISLIKE = "dislike"
20
+ FIST = "fist"
21
+ FOUR = "four"
22
+ LIKE = "like"
23
+ MUTE = "mute"
24
+ OK = "ok"
25
+ ONE = "one"
26
+ PALM = "palm"
27
+ PEACE = "peace"
28
+ PEACE_INVERTED = "peace_inverted"
29
+ ROCK = "rock"
30
+ STOP = "stop"
31
+ STOP_INVERTED = "stop_inverted"
32
+ THREE = "three"
33
+ THREE2 = "three2"
34
+ TWO_UP = "two_up"
35
+ TWO_UP_INVERTED = "two_up_inverted"
36
+
37
+
38
+ _GESTURE_CLASSES = [
39
+ 'hand_down', 'hand_right', 'hand_left', 'thumb_index', 'thumb_left',
40
+ 'thumb_right', 'thumb_down', 'half_up', 'half_left', 'half_right',
41
+ 'half_down', 'part_hand_heart', 'part_hand_heart2', 'fist_inverted',
42
+ 'two_left', 'two_right', 'two_down', 'grabbing', 'grip', 'point',
43
+ 'call', 'three3', 'little_finger', 'middle_finger', 'dislike', 'fist',
44
+ 'four', 'like', 'mute', 'ok', 'one', 'palm', 'peace', 'peace_inverted',
45
+ 'rock', 'stop', 'stop_inverted', 'three', 'three2', 'two_up',
46
+ 'two_up_inverted', 'three_gun', 'one_left', 'one_right', 'one_down'
47
+ ]
48
+
49
+ _NAME_TO_GESTURE = {
50
+ 'call': Gesture.CALL, 'dislike': Gesture.DISLIKE, 'fist': Gesture.FIST,
51
+ 'four': Gesture.FOUR, 'like': Gesture.LIKE, 'mute': Gesture.MUTE,
52
+ 'ok': Gesture.OK, 'one': Gesture.ONE, 'palm': Gesture.PALM,
53
+ 'peace': Gesture.PEACE, 'peace_inverted': Gesture.PEACE_INVERTED,
54
+ 'rock': Gesture.ROCK, 'stop': Gesture.STOP,
55
+ 'stop_inverted': Gesture.STOP_INVERTED, 'three': Gesture.THREE,
56
+ 'three2': Gesture.THREE2, 'two_up': Gesture.TWO_UP,
57
+ 'two_up_inverted': Gesture.TWO_UP_INVERTED,
58
+ }
59
+
60
+
61
+ class GestureDetector:
62
+ def __init__(self, confidence_threshold: float = 0.3, detection_threshold: float = 0.3):
63
+ self._confidence_threshold = confidence_threshold
64
+ self._detection_threshold = detection_threshold
65
+ models_dir = Path(__file__).parent / "models"
66
+ self._detector_path = models_dir / "hand_detector.onnx"
67
+ self._classifier_path = models_dir / "crops_classifier.onnx"
68
+ self._detector = None
69
+ self._classifier = None
70
+ self._available = False
71
+ self._mean = np.array([127, 127, 127], dtype=np.float32)
72
+ self._std = np.array([128, 128, 128], dtype=np.float32)
73
+ self._detector_size = (320, 240)
74
+ self._classifier_size = (128, 128)
75
+ self._load_models()
76
+
77
+ def _load_models(self) -> None:
78
+ try:
79
+ import onnxruntime as ort
80
+ except ImportError:
81
+ logger.warning("onnxruntime not installed")
82
+ return
83
+ if not self._detector_path.exists() or not self._classifier_path.exists():
84
+ logger.warning("Model files not found")
85
+ return
86
+ try:
87
+ providers = ['CPUExecutionProvider']
88
+ logger.info("Loading gesture models...")
89
+ self._detector = ort.InferenceSession(str(self._detector_path), providers=providers)
90
+ self._classifier = ort.InferenceSession(str(self._classifier_path), providers=providers)
91
+ self._det_input = self._detector.get_inputs()[0].name
92
+ self._det_outputs = [o.name for o in self._detector.get_outputs()]
93
+ self._cls_input = self._classifier.get_inputs()[0].name
94
+ self._available = True
95
+ logger.info("Gesture detection ready")
96
+ except Exception as e:
97
+ logger.error("Failed to load models: %s", e)
98
+
99
+ @property
100
+ def is_available(self) -> bool:
101
+ return self._available
102
+
103
+ def _preprocess(self, frame: NDArray, size: Tuple[int, int]) -> NDArray:
104
+ img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
105
+ img = cv2.resize(img, size)
106
+ img = (img.astype(np.float32) - self._mean) / self._std
107
+ img = np.transpose(img, [2, 0, 1])
108
+ return np.expand_dims(img, axis=0)
109
+
110
+ def _detect_hand(self, frame: NDArray) -> Optional[Tuple[int, int, int, int, float]]:
111
+ if self._detector is None:
112
+ return None
113
+ h, w = frame.shape[:2]
114
+ inp = self._preprocess(frame, self._detector_size)
115
+ outs = self._detector.run(self._det_outputs, {self._det_input: inp})
116
+ boxes = outs[0]
117
+ scores = outs[2]
118
+ if len(boxes) == 0:
119
+ return None
120
+ best_i, best_c = -1, self._detection_threshold
121
+ for i, c in enumerate(scores):
122
+ if c > best_c:
123
+ best_c, best_i = float(c), i
124
+ if best_i < 0:
125
+ return None
126
+ b = boxes[best_i]
127
+ # Model outputs normalized coordinates (0-1), scale to original frame size
128
+ x1, y1 = int(b[0] * w), int(b[1] * h)
129
+ x2, y2 = int(b[2] * w), int(b[3] * h)
130
+ x1, y1 = max(0, x1), max(0, y1)
131
+ x2, y2 = min(w-1, x2), min(h-1, y2)
132
+ if x2 <= x1 or y2 <= y1:
133
+ return None
134
+ return (x1, y1, x2, y2, best_c)
135
+
136
+ def _get_square_crop(self, frame: NDArray, box: Tuple[int, int, int, int]) -> NDArray:
137
+ h, w = frame.shape[:2]
138
+ x1, y1, x2, y2 = box
139
+ bw, bh = x2 - x1, y2 - y1
140
+ if bh < bw:
141
+ y1, y2 = y1 - (bw - bh) // 2, y1 - (bw - bh) // 2 + bw
142
+ elif bh > bw:
143
+ x1, x2 = x1 - (bh - bw) // 2, x1 - (bh - bw) // 2 + bh
144
+ x1, y1 = max(0, x1), max(0, y1)
145
+ x2, y2 = min(w-1, x2), min(h-1, y2)
146
+ return frame[y1:y2, x1:x2]
147
+
148
+ def _classify(self, crop: NDArray) -> Tuple[Gesture, float]:
149
+ if self._classifier is None or crop.size == 0:
150
+ return Gesture.NONE, 0.0
151
+ inp = self._preprocess(crop, self._classifier_size)
152
+ logits = self._classifier.run(None, {self._cls_input: inp})[0][0]
153
+ idx = int(np.argmax(logits))
154
+ exp_l = np.exp(logits - np.max(logits))
155
+ conf = float(exp_l[idx] / np.sum(exp_l))
156
+ if idx >= len(_GESTURE_CLASSES) or conf < self._confidence_threshold:
157
+ return Gesture.NONE, conf
158
+ name = _GESTURE_CLASSES[idx]
159
+ return _NAME_TO_GESTURE.get(name, Gesture.NONE), conf
160
+
161
+ def detect(self, frame: NDArray) -> Tuple[Gesture, float]:
162
+ if not self._available:
163
+ return Gesture.NONE, 0.0
164
+ try:
165
+ det = self._detect_hand(frame)
166
+ if det is None:
167
+ return Gesture.NONE, 0.0
168
+ x1, y1, x2, y2, det_c = det
169
+ logger.debug("Hand: box=(%d,%d,%d,%d) conf=%.2f", x1, y1, x2, y2, det_c)
170
+ crop = self._get_square_crop(frame, (x1, y1, x2, y2))
171
+ if crop.size == 0:
172
+ return Gesture.NONE, 0.0
173
+ gest, cls_c = self._classify(crop)
174
+ if gest != Gesture.NONE:
175
+ logger.debug("Gesture: %s (det=%.2f cls=%.2f)", gest.value, det_c, cls_c)
176
+ return gest, det_c * cls_c
177
+ except Exception as e:
178
+ logger.warning("Gesture error: %s", e)
179
+ return Gesture.NONE, 0.0
180
+
181
+ def close(self) -> None:
182
+ self._detector = self._classifier = None
183
+ self._available = False
{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py RENAMED
@@ -3,35 +3,24 @@
3
  Ported from reachy_mini_conversation_app for voice assistant integration.
4
  Model is loaded at initialization time (not lazy) to ensure face tracking
5
  is ready immediately when the camera server starts.
6
-
7
- Performance Optimizations:
8
- - Optional frame downscaling for faster inference on low-power devices
9
- - Frame skip support for reduced CPU usage when tracking is stable
10
- - Configurable inference resolution (default: native resolution)
11
  """
12
 
13
  from __future__ import annotations
14
-
15
  import logging
16
- from typing import TYPE_CHECKING
17
 
18
  import numpy as np
 
19
 
20
- if TYPE_CHECKING:
21
- from numpy.typing import NDArray
22
 
23
  logger = logging.getLogger(__name__)
24
 
25
 
26
  class HeadTracker:
27
  """Lightweight head tracker using YOLO for face detection.
28
-
29
  Model is loaded at initialization time to ensure face tracking
30
  is ready immediately (matching conversation_app behavior).
31
-
32
- Performance Features:
33
- - Frame downscaling: Reduces inference resolution for ~4x speedup
34
- - Frame skipping: Reuses last detection result for stable tracking
35
  """
36
 
37
  def __init__(
@@ -40,7 +29,6 @@ class HeadTracker:
40
  model_filename: str = "model.pt",
41
  confidence_threshold: float = 0.3,
42
  device: str = "cpu",
43
- inference_scale: float = 1.0, # Scale factor for inference (0.5 = half resolution)
44
  ) -> None:
45
  """Initialize YOLO-based head tracker.
46
 
@@ -49,7 +37,6 @@ class HeadTracker:
49
  model_filename: Model file name
50
  confidence_threshold: Minimum confidence for face detection
51
  device: Device to run inference on ('cpu' or 'cuda')
52
- inference_scale: Scale factor for inference (0.5 = half res for ~4x speedup)
53
  """
54
  self.confidence_threshold = confidence_threshold
55
  self.model = None
@@ -58,57 +45,57 @@ class HeadTracker:
58
  self._device = device
59
  self._detections_class = None
60
  self._model_load_attempted = False
61
- self._model_load_error: str | None = None
62
-
63
- # Performance optimization settings
64
- self._inference_scale = min(1.0, max(0.25, inference_scale))
65
-
66
- # Frame skip support for stable tracking
67
- self._last_detection: tuple[NDArray, float] | None = None
68
- self._frames_since_detection = 0
69
- self._max_skip_frames = 0 # 0 = no skipping (can be set externally)
70
-
71
  # Load model immediately at init (not lazy)
72
  self._load_model()
73
 
74
  def _load_model(self) -> None:
75
- """Load YOLO model for face detection."""
76
  if self._model_load_attempted:
77
  return
78
-
79
  self._model_load_attempted = True
80
-
81
  try:
82
- from pathlib import Path
83
-
84
- from supervision import Detections
85
  from ultralytics import YOLO
86
-
 
 
 
87
  self._detections_class = Detections
88
-
89
- # Load local model from models directory
90
- models_dir = Path(__file__).resolve().parents[1] / "models"
91
- local_model_path = models_dir / self._model_filename
92
-
93
- if not local_model_path.exists():
94
- raise FileNotFoundError(
95
- f"Model file not found: {local_model_path}. "
96
- f"Please place {self._model_filename} in the models directory."
97
- )
98
-
99
- model_path = str(local_model_path)
100
- logger.info("Loading local YOLO model: %s", model_path)
101
-
 
 
 
 
 
 
 
 
 
 
 
 
102
  self.model = YOLO(model_path).to(self._device)
103
- logger.info("YOLO face detection model loaded successfully")
104
  except ImportError as e:
105
  self._model_load_error = f"Missing dependencies: {e}"
106
  logger.warning("Face tracking disabled - missing dependencies: %s", e)
107
  self.model = None
108
- except FileNotFoundError as e:
109
- self._model_load_error = str(e)
110
- logger.error("Failed to load YOLO model: %s", e)
111
- self.model = None
112
  except Exception as e:
113
  self._model_load_error = str(e)
114
  logger.error("Failed to load YOLO model: %s", e)
@@ -119,7 +106,7 @@ class HeadTracker:
119
  """Check if the head tracker is available and ready."""
120
  return self.model is not None and self._detections_class is not None
121
 
122
- def _select_best_face(self, detections) -> int | None:
123
  """Select the best face based on confidence and area.
124
 
125
  Args:
@@ -152,7 +139,9 @@ class HeadTracker:
152
  best_idx = valid_indices[np.argmax(scores)]
153
  return int(best_idx)
154
 
155
- def _bbox_to_normalized_coords(self, bbox: NDArray[np.float32], w: int, h: int) -> NDArray[np.float32]:
 
 
156
  """Convert bounding box center to normalized coordinates [-1, 1].
157
 
158
  Args:
@@ -172,7 +161,9 @@ class HeadTracker:
172
 
173
  return np.array([norm_x, norm_y], dtype=np.float32)
174
 
175
- def get_head_position(self, img: NDArray[np.uint8]) -> tuple[NDArray[np.float32] | None, float | None]:
 
 
176
  """Get head position from face detection.
177
 
178
  Args:
@@ -186,36 +177,14 @@ class HeadTracker:
186
 
187
  h, w = img.shape[:2]
188
 
189
- # Frame skip optimization: return last detection if within skip limit
190
- if (
191
- self._max_skip_frames > 0
192
- and self._last_detection is not None
193
- and self._frames_since_detection < self._max_skip_frames
194
- ):
195
- self._frames_since_detection += 1
196
- return self._last_detection
197
-
198
  try:
199
- # Downscale image for faster inference if scale < 1.0
200
- if self._inference_scale < 1.0:
201
- import cv2
202
-
203
- new_w = int(w * self._inference_scale)
204
- new_h = int(h * self._inference_scale)
205
- inference_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
206
- else:
207
- inference_img = img
208
- new_w, new_h = w, h
209
-
210
  # Run YOLO inference
211
- results = self.model(inference_img, verbose=False)
212
  detections = self._detections_class.from_ultralytics(results[0])
213
 
214
  # Select best face
215
  face_idx = self._select_best_face(detections)
216
  if face_idx is None:
217
- self._last_detection = None
218
- self._frames_since_detection = 0
219
  return None, None
220
 
221
  bbox = detections.xyxy[face_idx]
@@ -223,90 +192,11 @@ class HeadTracker:
223
  if detections.confidence is not None:
224
  confidence = float(detections.confidence[face_idx])
225
 
226
- # Scale bbox back to original resolution if downscaled
227
- if self._inference_scale < 1.0:
228
- scale_factor = 1.0 / self._inference_scale
229
- bbox = bbox * scale_factor
230
-
231
- # Get face center in [-1, 1] coordinates (using original dimensions)
232
  face_center = self._bbox_to_normalized_coords(bbox, w, h)
233
 
234
- # Cache result for frame skipping
235
- self._last_detection = (face_center, confidence)
236
- self._frames_since_detection = 0
237
-
238
  return face_center, confidence
239
 
240
  except Exception as e:
241
  logger.debug("Error in head position detection: %s", e)
242
  return None, None
243
-
244
- def set_inference_scale(self, scale: float) -> None:
245
- """Set the inference resolution scale factor.
246
-
247
- Args:
248
- scale: Scale factor (0.25 to 1.0). Lower = faster but less accurate.
249
- """
250
- self._inference_scale = min(1.0, max(0.25, scale))
251
- logger.debug("Inference scale set to %.2f", self._inference_scale)
252
-
253
- def set_max_skip_frames(self, skip: int) -> None:
254
- """Set maximum frames to skip between detections.
255
-
256
- Args:
257
- skip: Number of frames to skip (0 = no skipping).
258
- Higher values reduce CPU but may cause tracking lag.
259
- """
260
- self._max_skip_frames = max(0, skip)
261
- logger.debug("Max skip frames set to %d", self._max_skip_frames)
262
-
263
- def clear_detection_cache(self) -> None:
264
- """Clear cached detection result."""
265
- self._last_detection = None
266
- self._frames_since_detection = 0
267
-
268
- def suspend(self) -> None:
269
- """Suspend the head tracker to release YOLO model from memory.
270
-
271
- Call resume() to reload the model.
272
- """
273
- if self.model is None:
274
- logger.debug("HeadTracker model not loaded, nothing to suspend")
275
- return
276
-
277
- logger.info("Suspending HeadTracker - releasing YOLO model...")
278
-
279
- try:
280
- # Release YOLO model from memory
281
- del self.model
282
- self.model = None
283
-
284
- # Also clear the detections class reference
285
- self._detections_class = None
286
-
287
- # Reset load state so resume can reload
288
- self._model_load_attempted = False
289
- self._model_load_error = None
290
-
291
- # Clear detection cache
292
- self.clear_detection_cache()
293
-
294
- logger.info("HeadTracker suspended - YOLO model released")
295
- except Exception as e:
296
- logger.warning("Error suspending HeadTracker: %s", e)
297
-
298
- def resume(self) -> None:
299
- """Resume the head tracker by reloading the YOLO model."""
300
- if self.model is not None:
301
- logger.debug("HeadTracker model already loaded")
302
- return
303
-
304
- logger.info("Resuming HeadTracker - reloading YOLO model...")
305
-
306
- # Reload the model
307
- self._load_model()
308
-
309
- if self.is_available:
310
- logger.info("HeadTracker resumed - YOLO model loaded")
311
- else:
312
- logger.warning("HeadTracker resume failed - model not available")
 
3
  Ported from reachy_mini_conversation_app for voice assistant integration.
4
  Model is loaded at initialization time (not lazy) to ensure face tracking
5
  is ready immediately when the camera server starts.
 
 
 
 
 
6
  """
7
 
8
  from __future__ import annotations
 
9
  import logging
10
+ from typing import Tuple, Optional
11
 
12
  import numpy as np
13
+ from numpy.typing import NDArray
14
 
 
 
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
 
19
  class HeadTracker:
20
  """Lightweight head tracker using YOLO for face detection.
21
+
22
  Model is loaded at initialization time to ensure face tracking
23
  is ready immediately (matching conversation_app behavior).
 
 
 
 
24
  """
25
 
26
  def __init__(
 
29
  model_filename: str = "model.pt",
30
  confidence_threshold: float = 0.3,
31
  device: str = "cpu",
 
32
  ) -> None:
33
  """Initialize YOLO-based head tracker.
34
 
 
37
  model_filename: Model file name
38
  confidence_threshold: Minimum confidence for face detection
39
  device: Device to run inference on ('cpu' or 'cuda')
 
40
  """
41
  self.confidence_threshold = confidence_threshold
42
  self.model = None
 
45
  self._device = device
46
  self._detections_class = None
47
  self._model_load_attempted = False
48
+ self._model_load_error: Optional[str] = None
49
+
 
 
 
 
 
 
 
 
50
  # Load model immediately at init (not lazy)
51
  self._load_model()
52
 
53
  def _load_model(self) -> None:
54
+ """Load YOLO model with retry logic."""
55
  if self._model_load_attempted:
56
  return
57
+
58
  self._model_load_attempted = True
59
+
60
  try:
 
 
 
61
  from ultralytics import YOLO
62
+ from supervision import Detections
63
+ from huggingface_hub import hf_hub_download
64
+ import time
65
+
66
  self._detections_class = Detections
67
+
68
+ # Download with retries
69
+ max_retries = 3
70
+ retry_delay = 5
71
+ model_path = None
72
+ last_error = None
73
+
74
+ for attempt in range(max_retries):
75
+ try:
76
+ model_path = hf_hub_download(
77
+ repo_id=self._model_repo,
78
+ filename=self._model_filename,
79
+ )
80
+ break
81
+ except Exception as e:
82
+ last_error = e
83
+ if attempt < max_retries - 1:
84
+ logger.warning(
85
+ "Model download failed (attempt %d/%d): %s. Retrying in %ds...",
86
+ attempt + 1, max_retries, e, retry_delay
87
+ )
88
+ time.sleep(retry_delay)
89
+
90
+ if model_path is None:
91
+ raise last_error
92
+
93
  self.model = YOLO(model_path).to(self._device)
94
+ logger.info("YOLO face detection model loaded")
95
  except ImportError as e:
96
  self._model_load_error = f"Missing dependencies: {e}"
97
  logger.warning("Face tracking disabled - missing dependencies: %s", e)
98
  self.model = None
 
 
 
 
99
  except Exception as e:
100
  self._model_load_error = str(e)
101
  logger.error("Failed to load YOLO model: %s", e)
 
106
  """Check if the head tracker is available and ready."""
107
  return self.model is not None and self._detections_class is not None
108
 
109
+ def _select_best_face(self, detections) -> Optional[int]:
110
  """Select the best face based on confidence and area.
111
 
112
  Args:
 
139
  best_idx = valid_indices[np.argmax(scores)]
140
  return int(best_idx)
141
 
142
+ def _bbox_to_normalized_coords(
143
+ self, bbox: NDArray[np.float32], w: int, h: int
144
+ ) -> NDArray[np.float32]:
145
  """Convert bounding box center to normalized coordinates [-1, 1].
146
 
147
  Args:
 
161
 
162
  return np.array([norm_x, norm_y], dtype=np.float32)
163
 
164
+ def get_head_position(
165
+ self, img: NDArray[np.uint8]
166
+ ) -> Tuple[Optional[NDArray[np.float32]], Optional[float]]:
167
  """Get head position from face detection.
168
 
169
  Args:
 
177
 
178
  h, w = img.shape[:2]
179
 
 
 
 
 
 
 
 
 
 
180
  try:
 
 
 
 
 
 
 
 
 
 
 
181
  # Run YOLO inference
182
+ results = self.model(img, verbose=False)
183
  detections = self._detections_class.from_ultralytics(results[0])
184
 
185
  # Select best face
186
  face_idx = self._select_best_face(detections)
187
  if face_idx is None:
 
 
188
  return None, None
189
 
190
  bbox = detections.xyxy[face_idx]
 
192
  if detections.confidence is not None:
193
  confidence = float(detections.confidence[face_idx])
194
 
195
+ # Get face center in [-1, 1] coordinates
 
 
 
 
 
196
  face_center = self._bbox_to_normalized_coords(bbox, w, h)
197
 
 
 
 
 
198
  return face_center, confidence
199
 
200
  except Exception as e:
201
  logger.debug("Error in head position detection: %s", e)
202
  return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py RENAMED
@@ -7,42 +7,48 @@ with Home Assistant via ESPHome protocol for voice control.
7
 
8
  import asyncio
9
  import logging
10
- import os
11
- import pathlib
12
- import sys
13
  import threading
 
 
14
 
15
- from reachy_mini import ReachyMiniApp
16
 
17
- from .core import get_health_monitor, get_memory_monitor
18
- from .voice_assistant import VoiceAssistantService
19
 
20
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
21
 
 
 
 
 
 
 
22
 
23
- def _normalize_home_for_audio_utils() -> None:
24
- """Normalize HOME on robot so SDK audio_utils resolves ~/.asoundrc reliably."""
25
- if not sys.platform.startswith("linux"):
26
- return
27
 
28
- current_home = os.environ.get("HOME", "")
29
- user = os.environ.get("USER", "pollen")
30
- preferred_home = f"/home/{user}"
31
- preferred_path = pathlib.Path(preferred_home)
32
 
33
- if not preferred_path.exists():
34
- # Fallback for environments where USER is not set as expected.
35
- preferred_home = "/home/pollen"
36
- preferred_path = pathlib.Path(preferred_home)
37
 
38
- if not preferred_path.exists():
39
- return
40
 
41
- # Force deterministic robot HOME for SDK Path.home() checks.
42
- # Only adjust when HOME is missing or points outside /home.
43
- if not current_home or not current_home.startswith("/home/"):
44
- os.environ["HOME"] = preferred_home
45
- logger.warning("Adjusted HOME from '%s' to '%s' for audio routing", current_home, preferred_home)
46
 
47
 
48
  class ReachyMiniHaVoice(ReachyMiniApp):
@@ -55,72 +61,73 @@ class ReachyMiniHaVoice(ReachyMiniApp):
55
  """
56
 
57
  # No custom web UI needed - configuration is automatic via Home Assistant
58
- custom_app_url: str | None = None
59
 
60
  def __init__(self, *args, **kwargs):
61
  """Initialize the app."""
62
  super().__init__(*args, **kwargs)
63
- if not hasattr(self, "stop_event"):
64
  self.stop_event = threading.Event()
65
 
66
  def wrapped_run(self, *args, **kwargs) -> None:
67
  """
68
- Override wrapped_run to handle Reachy Mini connection failures.
 
 
69
  """
70
  logger.info("Starting Reachy Mini HA Voice App...")
71
 
72
- _normalize_home_for_audio_utils()
 
 
 
 
 
73
 
74
- # Connect to ReachyMini
75
- try:
76
- logger.info("Attempting to connect to Reachy Mini...")
77
- super().wrapped_run(*args, **kwargs)
78
- except TimeoutError as e:
79
- logger.error(f"Timeout connecting to Reachy Mini: {e}")
80
- sys.exit(1)
81
- except Exception as e:
82
- error_str = str(e)
83
- if "Unable to connect" in error_str or "Timeout" in error_str:
84
- logger.error(f"Failed to connect to Reachy Mini: {e}")
85
- sys.exit(1)
86
- else:
87
- raise
 
 
 
 
 
 
 
 
 
 
88
 
89
  def run(self, reachy_mini, stop_event: threading.Event) -> None:
90
  """
91
  Main application entry point.
92
 
93
  Args:
94
- reachy_mini: The Reachy Mini robot instance (required, cannot be None)
95
  stop_event: Event to signal graceful shutdown
96
  """
97
  logger.info("Starting Reachy Mini for Home Assistant...")
98
 
99
- # Optional health/memory monitors
100
- enable_monitors = os.environ.get("REACHY_ENABLE_FRAMEWORK_MONITORS", "1").lower() in ("1", "true", "yes", "on")
101
- health_monitor = get_health_monitor() if enable_monitors else None
102
- memory_monitor = get_memory_monitor() if enable_monitors else None
103
-
104
  # Create and run the HA service
105
  service = VoiceAssistantService(reachy_mini)
106
 
107
- if enable_monitors:
108
- health_monitor.register_checker(
109
- "voice_assistant",
110
- lambda: service.is_running if hasattr(service, "is_running") else True,
111
- interval=30.0,
112
- )
113
-
114
  # Always create a new event loop to avoid conflicts with SDK
115
  loop = asyncio.new_event_loop()
116
  asyncio.set_event_loop(loop)
117
  logger.debug("Created new event loop for HA service")
118
 
119
  try:
120
- if enable_monitors:
121
- health_monitor.start()
122
- memory_monitor.start()
123
-
124
  loop.run_until_complete(service.start())
125
 
126
  logger.info("=" * 50)
@@ -129,8 +136,12 @@ class ReachyMiniHaVoice(ReachyMiniApp):
129
  logger.info("ESPHome Server: 0.0.0.0:6053")
130
  logger.info("Camera Server: 0.0.0.0:8081")
131
  logger.info("Wake word: Okay Nabu")
132
- logger.info("Motion control: enabled")
133
- logger.info("Camera: enabled (Reachy Mini)")
 
 
 
 
134
  logger.info("=" * 50)
135
  logger.info("To connect from Home Assistant:")
136
  logger.info(" Settings -> Devices & Services -> Add Integration")
@@ -155,10 +166,6 @@ class ReachyMiniHaVoice(ReachyMiniApp):
155
  except Exception as e:
156
  logger.error(f"Error stopping service: {e}")
157
 
158
- if enable_monitors:
159
- health_monitor.stop()
160
- memory_monitor.stop()
161
-
162
  # Note: Robot connection cleanup is handled by SDK's context manager
163
  # in wrapped_run(). We only need to close our event loop here.
164
 
@@ -171,19 +178,13 @@ class ReachyMiniHaVoice(ReachyMiniApp):
171
  logger.info("Reachy Mini HA stopped.")
172
 
173
 
174
- # This is called when running as: python -m reachy_mini_home_assistant.main
175
  if __name__ == "__main__":
176
  logging.basicConfig(
177
  level=logging.INFO,
178
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
179
  )
180
 
181
- # Reduce verbosity for some noisy modules
182
- logging.getLogger("reachy_mini.media.media_manager").setLevel(logging.WARNING)
183
- logging.getLogger("reachy_mini.media.camera_base").setLevel(logging.WARNING)
184
- logging.getLogger("reachy_mini.media.audio_base").setLevel(logging.WARNING)
185
- logging.getLogger("matplotlib").setLevel(logging.WARNING)
186
-
187
  app = ReachyMiniHaVoice()
188
  try:
189
  app.wrapped_run()
 
7
 
8
  import asyncio
9
  import logging
10
+ import socket
 
 
11
  import threading
12
+ import time
13
+ from typing import Optional
14
 
15
+ logger = logging.getLogger(__name__)
16
 
 
 
17
 
18
+ def _check_zenoh_available(timeout: float = 1.0) -> bool:
19
+ """Check if Zenoh service is available."""
20
+ try:
21
+ with socket.create_connection(("127.0.0.1", 7447), timeout=timeout):
22
+ return True
23
+ except (socket.timeout, ConnectionRefusedError, OSError):
24
+ return False
25
+
26
 
27
+ # Only import ReachyMiniApp if we're running as an app
28
+ try:
29
+ from reachy_mini import ReachyMini, ReachyMiniApp
30
+ REACHY_MINI_AVAILABLE = True
31
+ except ImportError:
32
+ REACHY_MINI_AVAILABLE = False
33
 
34
+ # Create a dummy base class
35
+ class ReachyMiniApp:
36
+ custom_app_url = None
 
37
 
38
+ def __init__(self):
39
+ self.stop_event = threading.Event()
 
 
40
 
41
+ def wrapped_run(self, *args, **kwargs):
42
+ pass
 
 
43
 
44
+ def stop(self):
45
+ self.stop_event.set()
46
 
47
+ ReachyMini = None
48
+
49
+
50
+ from .voice_assistant import VoiceAssistantService
51
+ from .motion import ReachyMiniMotion
52
 
53
 
54
  class ReachyMiniHaVoice(ReachyMiniApp):
 
61
  """
62
 
63
  # No custom web UI needed - configuration is automatic via Home Assistant
64
+ custom_app_url: Optional[str] = None
65
 
66
  def __init__(self, *args, **kwargs):
67
  """Initialize the app."""
68
  super().__init__(*args, **kwargs)
69
+ if not hasattr(self, 'stop_event'):
70
  self.stop_event = threading.Event()
71
 
72
  def wrapped_run(self, *args, **kwargs) -> None:
73
  """
74
+ Override wrapped_run to handle Zenoh connection failures gracefully.
75
+
76
+ If Zenoh is not available, run in standalone mode without robot control.
77
  """
78
  logger.info("Starting Reachy Mini HA Voice App...")
79
 
80
+ # Check if Zenoh is available before trying to connect
81
+ if not _check_zenoh_available():
82
+ logger.warning("Zenoh service not available (port 7447)")
83
+ logger.info("Running in standalone mode without robot control")
84
+ self._run_standalone()
85
+ return
86
 
87
+ # Zenoh is available, try normal startup with ReachyMini
88
+ if REACHY_MINI_AVAILABLE:
89
+ try:
90
+ logger.info("Attempting to connect to Reachy Mini...")
91
+ super().wrapped_run(*args, **kwargs)
92
+ except TimeoutError as e:
93
+ logger.warning(f"Timeout connecting to Reachy Mini: {e}")
94
+ logger.info("Falling back to standalone mode")
95
+ self._run_standalone()
96
+ except Exception as e:
97
+ error_str = str(e)
98
+ if "Unable to connect" in error_str or "ZError" in error_str or "Timeout" in error_str:
99
+ logger.warning(f"Failed to connect to Reachy Mini: {e}")
100
+ logger.info("Falling back to standalone mode")
101
+ self._run_standalone()
102
+ else:
103
+ raise
104
+ else:
105
+ logger.info("Reachy Mini SDK not available, running standalone")
106
+ self._run_standalone()
107
+
108
+ def _run_standalone(self) -> None:
109
+ """Run in standalone mode without robot."""
110
+ self.run(None, self.stop_event)
111
 
112
  def run(self, reachy_mini, stop_event: threading.Event) -> None:
113
  """
114
  Main application entry point.
115
 
116
  Args:
117
+ reachy_mini: The Reachy Mini robot instance (can be None)
118
  stop_event: Event to signal graceful shutdown
119
  """
120
  logger.info("Starting Reachy Mini for Home Assistant...")
121
 
 
 
 
 
 
122
  # Create and run the HA service
123
  service = VoiceAssistantService(reachy_mini)
124
 
 
 
 
 
 
 
 
125
  # Always create a new event loop to avoid conflicts with SDK
126
  loop = asyncio.new_event_loop()
127
  asyncio.set_event_loop(loop)
128
  logger.debug("Created new event loop for HA service")
129
 
130
  try:
 
 
 
 
131
  loop.run_until_complete(service.start())
132
 
133
  logger.info("=" * 50)
 
136
  logger.info("ESPHome Server: 0.0.0.0:6053")
137
  logger.info("Camera Server: 0.0.0.0:8081")
138
  logger.info("Wake word: Okay Nabu")
139
+ if reachy_mini:
140
+ logger.info("Motion control: enabled")
141
+ logger.info("Camera: enabled (Reachy Mini)")
142
+ else:
143
+ logger.info("Motion control: disabled (no robot)")
144
+ logger.info("Camera: test pattern (no robot)")
145
  logger.info("=" * 50)
146
  logger.info("To connect from Home Assistant:")
147
  logger.info(" Settings -> Devices & Services -> Add Integration")
 
166
  except Exception as e:
167
  logger.error(f"Error stopping service: {e}")
168
 
 
 
 
 
169
  # Note: Robot connection cleanup is handled by SDK's context manager
170
  # in wrapped_run(). We only need to close our event loop here.
171
 
 
178
  logger.info("Reachy Mini HA stopped.")
179
 
180
 
181
+ # This is called when running as: python -m reachy_mini_ha_voice.main
182
  if __name__ == "__main__":
183
  logging.basicConfig(
184
  level=logging.INFO,
185
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
186
  )
187
 
 
 
 
 
 
 
188
  app = ReachyMiniHaVoice()
189
  try:
190
  app.wrapped_run()
reachy_mini_ha_voice/models.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared models for Reachy Mini Voice Assistant."""
2
+
3
+ import json
4
+ import logging
5
+ from dataclasses import asdict, dataclass, field
6
+ from enum import Enum
7
+ from pathlib import Path
8
+ from queue import Queue
9
+ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
10
+
11
+ if TYPE_CHECKING:
12
+ from pymicro_wakeword import MicroWakeWord
13
+ from pyopen_wakeword import OpenWakeWord
14
+ from .entity import ESPHomeEntity, MediaPlayerEntity
15
+ from .audio_player import AudioPlayer
16
+ from .satellite import VoiceSatelliteProtocol
17
+
18
+ _LOGGER = logging.getLogger(__name__)
19
+
20
+
21
+ class WakeWordType(str, Enum):
22
+ MICRO_WAKE_WORD = "micro"
23
+ OPEN_WAKE_WORD = "openWakeWord"
24
+
25
+
26
+ @dataclass
27
+ class AvailableWakeWord:
28
+ id: str
29
+ type: WakeWordType
30
+ wake_word: str
31
+ trained_languages: List[str]
32
+ wake_word_path: Path
33
+
34
+ def load(self) -> "Union[MicroWakeWord, OpenWakeWord]":
35
+ if self.type == WakeWordType.MICRO_WAKE_WORD:
36
+ from pymicro_wakeword import MicroWakeWord
37
+ return MicroWakeWord.from_config(config_path=self.wake_word_path)
38
+
39
+ if self.type == WakeWordType.OPEN_WAKE_WORD:
40
+ from pyopen_wakeword import OpenWakeWord
41
+ oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
42
+ setattr(oww_model, "wake_word", self.wake_word)
43
+ return oww_model
44
+
45
+ raise ValueError(f"Unexpected wake word type: {self.type}")
46
+
47
+
48
+ @dataclass
49
+ class Preferences:
50
+ active_wake_words: List[str] = field(default_factory=list)
51
+ # Audio processing settings (persisted from Home Assistant)
52
+ agc_enabled: Optional[bool] = None # None = use hardware default
53
+ agc_max_gain: Optional[float] = None # None = use hardware default
54
+ noise_suppression: Optional[float] = None # None = use hardware default
55
+ # Continuous conversation mode (controlled from Home Assistant)
56
+ continuous_conversation: bool = False
57
+
58
+
59
+ @dataclass
60
+ class ServerState:
61
+ """Global server state."""
62
+ name: str
63
+ mac_address: str
64
+ audio_queue: "Queue[Optional[bytes]]"
65
+ entities: "List[ESPHomeEntity]"
66
+ available_wake_words: "Dict[str, AvailableWakeWord]"
67
+ wake_words: "Dict[str, Union[MicroWakeWord, OpenWakeWord]]"
68
+ active_wake_words: Set[str]
69
+ stop_word: "MicroWakeWord"
70
+ music_player: "AudioPlayer"
71
+ tts_player: "AudioPlayer"
72
+ wakeup_sound: str
73
+ timer_finished_sound: str
74
+ preferences: Preferences
75
+ preferences_path: Path
76
+ download_dir: Path
77
+
78
+ # Reachy Mini specific
79
+ reachy_mini: Optional[object] = None
80
+ motion_enabled: bool = True
81
+ motion: Optional[object] = None # ReachyMiniMotion instance
82
+
83
+ media_player_entity: "Optional[MediaPlayerEntity]" = None
84
+ satellite: "Optional[VoiceSatelliteProtocol]" = None
85
+ wake_words_changed: bool = False
86
+ refractory_seconds: float = 2.0
87
+
88
+ def save_preferences(self) -> None:
89
+ """Save preferences as JSON."""
90
+ _LOGGER.debug("Saving preferences: %s", self.preferences_path)
91
+ self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
92
+ with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
93
+ json.dump(
94
+ asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4
95
+ )
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx RENAMED
File without changes
reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py RENAMED
@@ -5,6 +5,7 @@ MovementManager for unified 5Hz control with face tracking.
5
  """
6
 
7
  import logging
 
8
 
9
  from .movement_manager import MovementManager, RobotState
10
 
@@ -18,28 +19,31 @@ class ReachyMiniMotion:
18
  to the MovementManager which handles them in its 5Hz control loop.
19
  """
20
 
21
- def __init__(self, reachy_mini):
22
  self.reachy_mini = reachy_mini
23
- self._movement_manager: MovementManager | None = None
24
  self._camera_server = None # Reference to camera server for face tracking control
25
  self._is_speaking = False
26
 
27
  _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
28
 
29
- # Initialize movement manager
30
- try:
31
- self._movement_manager = MovementManager(reachy_mini)
32
- _LOGGER.debug("MovementManager created successfully")
33
- except Exception as e:
34
- _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
35
- self._movement_manager = None
 
 
 
36
 
37
  def set_reachy_mini(self, reachy_mini):
38
  """Set the Reachy Mini instance."""
39
  self.reachy_mini = reachy_mini
40
- if self._movement_manager is None:
41
  self._movement_manager = MovementManager(reachy_mini)
42
- else:
43
  self._movement_manager.robot = reachy_mini
44
 
45
  def set_camera_server(self, camera_server):
@@ -68,7 +72,7 @@ class ReachyMiniMotion:
68
  _LOGGER.info("Motion control stopped")
69
 
70
  @property
71
- def movement_manager(self) -> MovementManager | None:
72
  """Get the movement manager instance."""
73
  return self._movement_manager
74
 
 
5
  """
6
 
7
  import logging
8
+ from typing import Optional
9
 
10
  from .movement_manager import MovementManager, RobotState
11
 
 
19
  to the MovementManager which handles them in its 5Hz control loop.
20
  """
21
 
22
+ def __init__(self, reachy_mini=None):
23
  self.reachy_mini = reachy_mini
24
+ self._movement_manager: Optional[MovementManager] = None
25
  self._camera_server = None # Reference to camera server for face tracking control
26
  self._is_speaking = False
27
 
28
  _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
29
 
30
+ # Initialize movement manager if robot is available
31
+ if reachy_mini is not None:
32
+ try:
33
+ self._movement_manager = MovementManager(reachy_mini)
34
+ _LOGGER.debug("MovementManager created successfully")
35
+ except Exception as e:
36
+ _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
37
+ self._movement_manager = None
38
+ else:
39
+ _LOGGER.debug("reachy_mini is None, MovementManager not created")
40
 
41
  def set_reachy_mini(self, reachy_mini):
42
  """Set the Reachy Mini instance."""
43
  self.reachy_mini = reachy_mini
44
+ if reachy_mini is not None and self._movement_manager is None:
45
  self._movement_manager = MovementManager(reachy_mini)
46
+ elif reachy_mini is not None and self._movement_manager is not None:
47
  self._movement_manager.robot = reachy_mini
48
 
49
  def set_camera_server(self, camera_server):
 
72
  _LOGGER.info("Motion control stopped")
73
 
74
  @property
75
+ def movement_manager(self) -> Optional[MovementManager]:
76
  """Get the movement manager instance."""
77
  return self._movement_manager
78
 
reachy_mini_ha_voice/movement_manager.py ADDED
@@ -0,0 +1,861 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified Movement Manager for Reachy Mini.
3
+
4
+ This module provides a centralized control system for robot movements,
5
+ inspired by the reachy_mini_conversation_app architecture.
6
+
7
+ Key features:
8
+ - Single 10Hz control loop (balanced between responsiveness and stability)
9
+ - Command queue pattern (thread-safe external API)
10
+ - Error throttling (prevents log explosion)
11
+ - JSON-driven animation system (conversation state animations)
12
+ - Graceful shutdown
13
+ - Pose change detection (skip sending if no significant change)
14
+ - Robust connection recovery (faster reconnection attempts)
15
+ - Proper pose composition using SDK's compose_world_offset (same as conversation_app)
16
+ - Antenna freeze during listening mode with smooth blend back
17
+ """
18
+
19
+ import logging
20
+ import math
21
+ import threading
22
+ import time
23
+ from dataclasses import dataclass, field
24
+ from enum import Enum
25
+ from queue import Queue, Empty
26
+ from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING
27
+
28
+ import numpy as np
29
+ from scipy.spatial.transform import Rotation as R
30
+
31
+ if TYPE_CHECKING:
32
+ from reachy_mini import ReachyMini
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ # Import SDK utilities for pose composition (same as conversation_app)
37
+ try:
38
+ from reachy_mini.utils import create_head_pose
39
+ from reachy_mini.utils.interpolation import compose_world_offset
40
+ SDK_UTILS_AVAILABLE = True
41
+ except ImportError:
42
+ SDK_UTILS_AVAILABLE = False
43
+ logger.warning("SDK utils not available, using fallback pose composition")
44
+
45
+ # Import animation player
46
+ from .animation_player import AnimationPlayer
47
+
48
+
49
+ # =============================================================================
50
+ # Constants
51
+ # =============================================================================
52
+
53
+ # Control loop frequency - daemon now supports higher rates
54
+ CONTROL_LOOP_FREQUENCY_HZ = 100 # 100Hz control loop (same as conversation_app)
55
+ TARGET_PERIOD = 1.0 / CONTROL_LOOP_FREQUENCY_HZ
56
+
57
+ # Antenna freeze parameters (listening mode)
58
+ ANTENNA_BLEND_DURATION = 0.5 # Seconds to blend back from frozen state
59
+
60
+ # State to animation mapping
61
+ STATE_ANIMATION_MAP = {
62
+ "idle": "idle",
63
+ "listening": "listening",
64
+ "thinking": "thinking",
65
+ "speaking": "speaking",
66
+ }
67
+
68
+
69
+ class RobotState(Enum):
70
+ """Robot state machine states."""
71
+ IDLE = "idle"
72
+ LISTENING = "listening"
73
+ THINKING = "thinking"
74
+ SPEAKING = "speaking"
75
+
76
+
77
+ @dataclass
78
+ class MovementState:
79
+ """Internal movement state (only modified by control loop)."""
80
+ # Current robot state
81
+ robot_state: RobotState = RobotState.IDLE
82
+
83
+ # Animation offsets (from AnimationPlayer)
84
+ anim_pitch: float = 0.0
85
+ anim_yaw: float = 0.0
86
+ anim_roll: float = 0.0
87
+ anim_x: float = 0.0
88
+ anim_y: float = 0.0
89
+ anim_z: float = 0.0
90
+ anim_antenna_left: float = 0.0
91
+ anim_antenna_right: float = 0.0
92
+
93
+ # Speech sway offsets (from audio analysis)
94
+ sway_pitch: float = 0.0
95
+ sway_yaw: float = 0.0
96
+ sway_roll: float = 0.0
97
+ sway_x: float = 0.0
98
+ sway_y: float = 0.0
99
+ sway_z: float = 0.0
100
+
101
+ # Target pose (from actions)
102
+ target_pitch: float = 0.0
103
+ target_yaw: float = 0.0
104
+ target_roll: float = 0.0
105
+ target_x: float = 0.0
106
+ target_y: float = 0.0
107
+ target_z: float = 0.0
108
+ target_antenna_left: float = 0.0
109
+ target_antenna_right: float = 0.0
110
+ target_body_yaw: float = 0.0
111
+
112
+ # Timing
113
+ last_activity_time: float = 0.0
114
+ idle_start_time: float = 0.0
115
+
116
+ # Antenna freeze state (listening mode)
117
+ antenna_frozen: bool = False
118
+ frozen_antenna_left: float = 0.0
119
+ frozen_antenna_right: float = 0.0
120
+ antenna_blend: float = 1.0 # 0=frozen, 1=normal
121
+ antenna_blend_start_time: float = 0.0
122
+
123
+
124
+ @dataclass
125
+ class PendingAction:
126
+ """A pending motion action."""
127
+ name: str
128
+ target_pitch: float = 0.0
129
+ target_yaw: float = 0.0
130
+ target_roll: float = 0.0
131
+ target_x: float = 0.0
132
+ target_y: float = 0.0
133
+ target_z: float = 0.0
134
+ duration: float = 0.5
135
+ callback: Optional[Callable] = None
136
+
137
+
138
+ class MovementManager:
139
+ """
140
+ Unified movement manager with 10Hz control loop.
141
+
142
+ All external interactions go through the command queue,
143
+ ensuring thread safety and preventing race conditions.
144
+
145
+ Note: Frequency reduced from 100Hz to 10Hz to prevent daemon crashes
146
+ caused by excessive Zenoh message traffic.
147
+ """
148
+
149
+ def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
150
+ self.robot = reachy_mini
151
+ self._now = time.monotonic
152
+
153
+ # Command queue - all external threads communicate through this
154
+ self._command_queue: Queue[Tuple[str, Any]] = Queue()
155
+
156
+ # Internal state (only modified by control loop)
157
+ self.state = MovementState()
158
+ self.state.last_activity_time = self._now()
159
+ self.state.idle_start_time = self._now()
160
+
161
+ # Animation player (JSON-driven animations)
162
+ self._animation_player = AnimationPlayer()
163
+
164
+ # Thread control
165
+ self._stop_event = threading.Event()
166
+ self._thread: Optional[threading.Thread] = None
167
+
168
+ # Error throttling
169
+ self._last_error_time = 0.0
170
+ self._error_interval = 1.0 # Log at most once per second
171
+ self._suppressed_errors = 0
172
+
173
+ # Connection health tracking
174
+ self._connection_lost = False
175
+ self._last_successful_command = self._now()
176
+ self._connection_timeout = 3.0
177
+ self._reconnect_attempt_interval = 2.0
178
+ self._last_reconnect_attempt = 0.0
179
+ self._consecutive_errors = 0
180
+ self._max_consecutive_errors = 5
181
+
182
+ # Pending action
183
+ self._pending_action: Optional[PendingAction] = None
184
+ self._action_start_time: float = 0.0
185
+ self._action_start_pose: Dict[str, float] = {}
186
+
187
+ # Pose change detection threshold
188
+ self._last_sent_pose: Optional[Dict[str, float]] = None
189
+ self._pose_change_threshold = 0.005
190
+
191
+ # Face tracking offsets (from camera worker)
192
+ self._face_tracking_offsets: Tuple[float, float, float, float, float, float] = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
193
+ self._face_tracking_lock = threading.Lock()
194
+
195
+ # Camera server reference for face tracking
196
+ self._camera_server = None
197
+
198
+ # Face tracking smoothing (exponential moving average)
199
+ self._smoothed_face_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
200
+ self._face_smoothing_factor = 0.3
201
+
202
+ logger.info("MovementManager initialized with AnimationPlayer")
203
+
204
+ # =========================================================================
205
+ # Thread-safe public API (called from any thread)
206
+ # =========================================================================
207
+
208
+ def set_state(self, new_state: RobotState) -> None:
209
+ """Thread-safe: Set robot state."""
210
+ self._command_queue.put(("set_state", new_state))
211
+
212
+ def set_listening(self, listening: bool) -> None:
213
+ """Thread-safe: Set listening state."""
214
+ state = RobotState.LISTENING if listening else RobotState.IDLE
215
+ self._command_queue.put(("set_state", state))
216
+
217
+ def set_thinking(self) -> None:
218
+ """Thread-safe: Set thinking state."""
219
+ self._command_queue.put(("set_state", RobotState.THINKING))
220
+
221
+ def set_speaking(self, speaking: bool) -> None:
222
+ """Thread-safe: Set speaking state."""
223
+ state = RobotState.SPEAKING if speaking else RobotState.IDLE
224
+ self._command_queue.put(("set_state", state))
225
+
226
+ def set_idle(self) -> None:
227
+ """Thread-safe: Return to idle state."""
228
+ self._command_queue.put(("set_state", RobotState.IDLE))
229
+
230
+ def queue_action(self, action: PendingAction) -> None:
231
+ """Thread-safe: Queue a motion action."""
232
+ self._command_queue.put(("action", action))
233
+
234
+ def turn_to_angle(self, yaw_deg: float, duration: float = 0.8) -> None:
235
+ """Thread-safe: Turn head to face a direction."""
236
+ action = PendingAction(
237
+ name="turn_to",
238
+ target_yaw=math.radians(yaw_deg),
239
+ duration=duration,
240
+ )
241
+ self._command_queue.put(("action", action))
242
+
243
+ def nod(self, amplitude_deg: float = 15, duration: float = 0.5) -> None:
244
+ """Thread-safe: Perform a nod gesture."""
245
+ self._command_queue.put(("nod", (amplitude_deg, duration)))
246
+
247
+ def shake(self, amplitude_deg: float = 20, duration: float = 0.5) -> None:
248
+ """Thread-safe: Perform a head shake gesture."""
249
+ self._command_queue.put(("shake", (amplitude_deg, duration)))
250
+
251
+ def set_speech_sway(
252
+ self, x: float, y: float, z: float,
253
+ roll: float, pitch: float, yaw: float
254
+ ) -> None:
255
+ """Thread-safe: Set speech-driven sway offsets.
256
+
257
+ These offsets are applied on top of the current animation
258
+ to create audio-synchronized head motion during TTS playback.
259
+
260
+ Args:
261
+ x, y, z: Position offsets in meters
262
+ roll, pitch, yaw: Orientation offsets in radians
263
+ """
264
+ self._command_queue.put(("speech_sway", (x, y, z, roll, pitch, yaw)))
265
+
266
+ def reset_to_neutral(self, duration: float = 0.5) -> None:
267
+ """Thread-safe: Reset to neutral position."""
268
+ action = PendingAction(
269
+ name="neutral",
270
+ target_pitch=0.0,
271
+ target_yaw=0.0,
272
+ target_roll=0.0,
273
+ target_x=0.0,
274
+ target_y=0.0,
275
+ target_z=0.0,
276
+ duration=duration,
277
+ )
278
+ self._command_queue.put(("action", action))
279
+
280
+ def set_camera_server(self, camera_server) -> None:
281
+ """Set the camera server for face tracking offsets.
282
+
283
+ Args:
284
+ camera_server: MJPEGCameraServer instance with face tracking
285
+ """
286
+ self._camera_server = camera_server
287
+ logger.info("Camera server set for face tracking")
288
+
289
+ def set_face_tracking_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
290
+ """Thread-safe: Update face tracking offsets manually.
291
+
292
+ Args:
293
+ offsets: Tuple of (x, y, z, roll, pitch, yaw) in meters/radians
294
+ """
295
+ with self._face_tracking_lock:
296
+ self._face_tracking_offsets = offsets
297
+
298
+ def set_target_pose(
299
+ self,
300
+ x: Optional[float] = None,
301
+ y: Optional[float] = None,
302
+ z: Optional[float] = None,
303
+ roll: Optional[float] = None,
304
+ pitch: Optional[float] = None,
305
+ yaw: Optional[float] = None,
306
+ body_yaw: Optional[float] = None,
307
+ antenna_left: Optional[float] = None,
308
+ antenna_right: Optional[float] = None,
309
+ ) -> None:
310
+ """Thread-safe: Set target pose components.
311
+
312
+ Only provided values will be updated. Values are in meters for position
313
+ and radians for angles.
314
+
315
+ Args:
316
+ x, y, z: Head position in meters
317
+ roll, pitch, yaw: Head orientation in radians
318
+ body_yaw: Body yaw in radians
319
+ antenna_left, antenna_right: Antenna angles in radians
320
+ """
321
+ self._command_queue.put(("set_pose", {
322
+ "x": x,
323
+ "y": y,
324
+ "z": z,
325
+ "roll": roll,
326
+ "pitch": pitch,
327
+ "yaw": yaw,
328
+ "body_yaw": body_yaw,
329
+ "antenna_left": antenna_left,
330
+ "antenna_right": antenna_right,
331
+ }))
332
+
333
+ # =========================================================================
334
+ # Internal: Command processing (runs in control loop)
335
+ # =========================================================================
336
+
337
+ def _poll_commands(self) -> None:
338
+ """Process all pending commands from the queue."""
339
+ while True:
340
+ try:
341
+ cmd, payload = self._command_queue.get_nowait()
342
+ except Empty:
343
+ break
344
+
345
+ self._handle_command(cmd, payload)
346
+
347
+ def _handle_command(self, cmd: str, payload: Any) -> None:
348
+ """Handle a single command."""
349
+ if cmd == "set_state":
350
+ old_state = self.state.robot_state
351
+ self.state.robot_state = payload
352
+ self.state.last_activity_time = self._now()
353
+
354
+ # Update animation based on state
355
+ animation_name = STATE_ANIMATION_MAP.get(payload.value, "idle")
356
+ self._animation_player.set_animation(animation_name)
357
+
358
+ # State transition logic
359
+ if payload == RobotState.IDLE and old_state != RobotState.IDLE:
360
+ self.state.idle_start_time = self._now()
361
+ # Unfreeze antennas when returning to idle
362
+ self._start_antenna_unfreeze()
363
+
364
+ # Freeze antennas when entering listening mode
365
+ if payload == RobotState.LISTENING:
366
+ self._freeze_antennas()
367
+ elif old_state == RobotState.LISTENING and payload != RobotState.LISTENING:
368
+ # Start unfreezing when leaving listening mode
369
+ self._start_antenna_unfreeze()
370
+
371
+ logger.debug("State changed: %s -> %s, animation: %s",
372
+ old_state.value, payload.value, animation_name)
373
+
374
+ elif cmd == "action":
375
+ self._start_action(payload)
376
+
377
+ elif cmd == "nod":
378
+ amplitude_deg, duration = payload
379
+ self._do_nod(amplitude_deg, duration)
380
+
381
+ elif cmd == "shake":
382
+ amplitude_deg, duration = payload
383
+ self._do_shake(amplitude_deg, duration)
384
+
385
+ elif cmd == "set_pose":
386
+ # Update target pose from external control (e.g., Home Assistant)
387
+ if payload.get("x") is not None:
388
+ self.state.target_x = payload["x"]
389
+ if payload.get("y") is not None:
390
+ self.state.target_y = payload["y"]
391
+ if payload.get("z") is not None:
392
+ self.state.target_z = payload["z"]
393
+ if payload.get("roll") is not None:
394
+ self.state.target_roll = payload["roll"]
395
+ if payload.get("pitch") is not None:
396
+ self.state.target_pitch = payload["pitch"]
397
+ if payload.get("yaw") is not None:
398
+ self.state.target_yaw = payload["yaw"]
399
+ if payload.get("body_yaw") is not None:
400
+ self.state.target_body_yaw = payload["body_yaw"]
401
+ if payload.get("antenna_left") is not None:
402
+ self.state.target_antenna_left = payload["antenna_left"]
403
+ if payload.get("antenna_right") is not None:
404
+ self.state.target_antenna_right = payload["antenna_right"]
405
+ logger.debug("External pose update: %s", payload)
406
+
407
+ elif cmd == "speech_sway":
408
+ # Update speech-driven sway offsets
409
+ x, y, z, roll, pitch, yaw = payload
410
+ self.state.sway_x = x
411
+ self.state.sway_y = y
412
+ self.state.sway_z = z
413
+ self.state.sway_roll = roll
414
+ self.state.sway_pitch = pitch
415
+ self.state.sway_yaw = yaw
416
+
417
+ def _start_action(self, action: PendingAction) -> None:
418
+ """Start a new motion action."""
419
+ self._pending_action = action
420
+ self._action_start_time = self._now()
421
+ self._action_start_pose = {
422
+ "pitch": self.state.target_pitch,
423
+ "yaw": self.state.target_yaw,
424
+ "roll": self.state.target_roll,
425
+ "x": self.state.target_x,
426
+ "y": self.state.target_y,
427
+ "z": self.state.target_z,
428
+ }
429
+ logger.debug("Starting action: %s", action.name)
430
+
431
+ def _do_nod(self, amplitude_deg: float, duration: float) -> None:
432
+ """Execute nod gesture (blocking in control loop context)."""
433
+ # This is simplified - in production, use action queue
434
+ amplitude_rad = math.radians(amplitude_deg)
435
+ half_duration = duration / 2
436
+
437
+ # Nod down
438
+ action_down = PendingAction(
439
+ name="nod_down",
440
+ target_pitch=amplitude_rad,
441
+ duration=half_duration,
442
+ )
443
+ self._start_action(action_down)
444
+
445
+ def _do_shake(self, amplitude_deg: float, duration: float) -> None:
446
+ """Execute shake gesture (blocking in control loop context)."""
447
+ amplitude_rad = math.radians(amplitude_deg)
448
+ half_duration = duration / 2
449
+
450
+ # Shake left
451
+ action_left = PendingAction(
452
+ name="shake_left",
453
+ target_yaw=-amplitude_rad,
454
+ duration=half_duration,
455
+ )
456
+ self._start_action(action_left)
457
+
458
+ # =========================================================================
459
+ # Internal: Motion updates (runs in control loop)
460
+ # =========================================================================
461
+
462
+ def _update_action(self, dt: float) -> None:
463
+ """Update pending action interpolation."""
464
+ if self._pending_action is None:
465
+ return
466
+
467
+ elapsed = self._now() - self._action_start_time
468
+ progress = min(1.0, elapsed / self._pending_action.duration)
469
+
470
+ # Smooth interpolation (ease in-out)
471
+ t = progress * progress * (3 - 2 * progress)
472
+
473
+ # Interpolate pose
474
+ start = self._action_start_pose
475
+ action = self._pending_action
476
+
477
+ self.state.target_pitch = start["pitch"] + t * (action.target_pitch - start["pitch"])
478
+ self.state.target_yaw = start["yaw"] + t * (action.target_yaw - start["yaw"])
479
+ self.state.target_roll = start["roll"] + t * (action.target_roll - start["roll"])
480
+ self.state.target_x = start["x"] + t * (action.target_x - start["x"])
481
+ self.state.target_y = start["y"] + t * (action.target_y - start["y"])
482
+ self.state.target_z = start["z"] + t * (action.target_z - start["z"])
483
+
484
+ # Action complete
485
+ if progress >= 1.0:
486
+ if self._pending_action.callback:
487
+ try:
488
+ self._pending_action.callback()
489
+ except Exception as e:
490
+ logger.error("Action callback error: %s", e)
491
+ self._pending_action = None
492
+
493
+ def _update_animation(self, dt: float) -> None:
494
+ """Update animation offsets from AnimationPlayer."""
495
+ offsets = self._animation_player.get_offsets(dt)
496
+
497
+ self.state.anim_pitch = offsets["pitch"]
498
+ self.state.anim_yaw = offsets["yaw"]
499
+ self.state.anim_roll = offsets["roll"]
500
+ self.state.anim_x = offsets["x"]
501
+ self.state.anim_y = offsets["y"]
502
+ self.state.anim_z = offsets["z"]
503
+ self.state.anim_antenna_left = offsets["antenna_left"]
504
+ self.state.anim_antenna_right = offsets["antenna_right"]
505
+
506
+ def _freeze_antennas(self) -> None:
507
+ """Freeze antennas at current position (for listening mode)."""
508
+ # Capture current antenna positions
509
+ current_left = self.state.target_antenna_left + self.state.anim_antenna_left
510
+ current_right = self.state.target_antenna_right + self.state.anim_antenna_right
511
+
512
+ self.state.antenna_frozen = True
513
+ self.state.frozen_antenna_left = current_left
514
+ self.state.frozen_antenna_right = current_right
515
+ self.state.antenna_blend = 0.0 # Fully frozen
516
+ logger.debug("Antennas frozen at left=%.2f, right=%.2f",
517
+ math.degrees(current_left), math.degrees(current_right))
518
+
519
+ def _start_antenna_unfreeze(self) -> None:
520
+ """Start unfreezing antennas (smooth blend back to normal)."""
521
+ if not self.state.antenna_frozen:
522
+ return
523
+
524
+ self.state.antenna_blend_start_time = self._now()
525
+ logger.debug("Starting antenna unfreeze")
526
+
527
+ def _update_antenna_blend(self, dt: float) -> None:
528
+ """Update antenna blend state for smooth unfreezing."""
529
+ if not self.state.antenna_frozen:
530
+ return
531
+
532
+ if self.state.antenna_blend >= 1.0:
533
+ # Fully unfrozen
534
+ self.state.antenna_frozen = False
535
+ return
536
+
537
+ # Calculate blend progress
538
+ elapsed = self._now() - self.state.antenna_blend_start_time
539
+ if elapsed > 0:
540
+ self.state.antenna_blend = min(1.0, elapsed / ANTENNA_BLEND_DURATION)
541
+
542
+ if self.state.antenna_blend >= 1.0:
543
+ self.state.antenna_frozen = False
544
+ logger.debug("Antennas unfrozen")
545
+
546
+ def _update_face_tracking(self) -> None:
547
+ """Get face tracking offsets from camera server with smoothing."""
548
+ if self._camera_server is not None:
549
+ try:
550
+ raw_offsets = self._camera_server.get_face_tracking_offsets()
551
+
552
+ # Apply exponential moving average smoothing
553
+ alpha = self._face_smoothing_factor
554
+ for i in range(6):
555
+ self._smoothed_face_offsets[i] = (
556
+ alpha * raw_offsets[i] +
557
+ (1 - alpha) * self._smoothed_face_offsets[i]
558
+ )
559
+
560
+ with self._face_tracking_lock:
561
+ self._face_tracking_offsets = tuple(self._smoothed_face_offsets)
562
+
563
+ except Exception as e:
564
+ logger.debug("Error getting face tracking offsets: %s", e)
565
+
566
+ def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
567
+ """Compose final pose from all sources using SDK's compose_world_offset.
568
+
569
+ Returns:
570
+ Tuple of (head_pose_4x4, (antenna_right, antenna_left), body_yaw)
571
+ """
572
+ # Build primary head pose from target state
573
+ if SDK_UTILS_AVAILABLE:
574
+ primary_head = create_head_pose(
575
+ x=self.state.target_x,
576
+ y=self.state.target_y,
577
+ z=self.state.target_z,
578
+ roll=self.state.target_roll,
579
+ pitch=self.state.target_pitch,
580
+ yaw=self.state.target_yaw,
581
+ degrees=False,
582
+ mm=False,
583
+ )
584
+ else:
585
+ # Fallback: build matrix manually
586
+ rotation = R.from_euler('xyz', [
587
+ self.state.target_roll,
588
+ self.state.target_pitch,
589
+ self.state.target_yaw,
590
+ ])
591
+ primary_head = np.eye(4)
592
+ primary_head[:3, :3] = rotation.as_matrix()
593
+ primary_head[0, 3] = self.state.target_x
594
+ primary_head[1, 3] = self.state.target_y
595
+ primary_head[2, 3] = self.state.target_z
596
+
597
+ # Build secondary pose from animation + face tracking + speech sway
598
+ with self._face_tracking_lock:
599
+ face_offsets = self._face_tracking_offsets
600
+
601
+ secondary_x = self.state.anim_x + self.state.sway_x + face_offsets[0]
602
+ secondary_y = self.state.anim_y + self.state.sway_y + face_offsets[1]
603
+ secondary_z = self.state.anim_z + self.state.sway_z + face_offsets[2]
604
+ secondary_roll = self.state.anim_roll + self.state.sway_roll + face_offsets[3]
605
+ secondary_pitch = self.state.anim_pitch + self.state.sway_pitch + face_offsets[4]
606
+ secondary_yaw = self.state.anim_yaw + self.state.sway_yaw + face_offsets[5]
607
+
608
+ if SDK_UTILS_AVAILABLE:
609
+ secondary_head = create_head_pose(
610
+ x=secondary_x,
611
+ y=secondary_y,
612
+ z=secondary_z,
613
+ roll=secondary_roll,
614
+ pitch=secondary_pitch,
615
+ yaw=secondary_yaw,
616
+ degrees=False,
617
+ mm=False,
618
+ )
619
+ # Compose using SDK's compose_world_offset (same as conversation_app)
620
+ final_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
621
+ else:
622
+ # Fallback: simple addition (less accurate but works)
623
+ secondary_rotation = R.from_euler('xyz', [secondary_roll, secondary_pitch, secondary_yaw])
624
+ secondary_head = np.eye(4)
625
+ secondary_head[:3, :3] = secondary_rotation.as_matrix()
626
+ secondary_head[0, 3] = secondary_x
627
+ secondary_head[1, 3] = secondary_y
628
+ secondary_head[2, 3] = secondary_z
629
+
630
+ # Simple composition: R_final = R_secondary @ R_primary, t_final = t_primary + t_secondary
631
+ final_head = np.eye(4)
632
+ final_head[:3, :3] = secondary_head[:3, :3] @ primary_head[:3, :3]
633
+ final_head[:3, 3] = primary_head[:3, 3] + secondary_head[:3, 3]
634
+
635
+ # Antenna pose with freeze blending
636
+ target_antenna_left = self.state.target_antenna_left + self.state.anim_antenna_left
637
+ target_antenna_right = self.state.target_antenna_right + self.state.anim_antenna_right
638
+
639
+ # Apply antenna freeze blending (listening mode)
640
+ blend = self.state.antenna_blend
641
+ if blend < 1.0:
642
+ # Blend between frozen position and target position
643
+ antenna_left = (self.state.frozen_antenna_left * (1.0 - blend) +
644
+ target_antenna_left * blend)
645
+ antenna_right = (self.state.frozen_antenna_right * (1.0 - blend) +
646
+ target_antenna_right * blend)
647
+ else:
648
+ antenna_left = target_antenna_left
649
+ antenna_right = target_antenna_right
650
+
651
+ return final_head, (antenna_right, antenna_left), self.state.target_body_yaw
652
+
653
+ # =========================================================================
654
+ # Internal: Robot control (runs in control loop)
655
+ # =========================================================================
656
+
657
+ def _issue_control_command(self, head_pose: np.ndarray, antennas: Tuple[float, float], body_yaw: float) -> None:
658
+ """Send control command to robot with error throttling and connection health tracking."""
659
+ if self.robot is None:
660
+ return
661
+
662
+ # Check if pose changed significantly (prevent unnecessary commands)
663
+ # Extract euler angles for comparison
664
+ rotation = R.from_matrix(head_pose[:3, :3])
665
+ euler = rotation.as_euler('xyz') # [roll, pitch, yaw]
666
+
667
+ current_pose = {
668
+ "x": head_pose[0, 3],
669
+ "y": head_pose[1, 3],
670
+ "z": head_pose[2, 3],
671
+ "roll": euler[0],
672
+ "pitch": euler[1],
673
+ "yaw": euler[2],
674
+ "antenna_right": antennas[0],
675
+ "antenna_left": antennas[1],
676
+ "body_yaw": body_yaw,
677
+ }
678
+
679
+ if self._last_sent_pose is not None:
680
+ max_diff = max(
681
+ abs(current_pose[k] - self._last_sent_pose.get(k, 0.0))
682
+ for k in current_pose.keys()
683
+ )
684
+ if max_diff < self._pose_change_threshold:
685
+ # No significant change, skip sending command
686
+ return
687
+
688
+ now = self._now()
689
+
690
+ # Check if we should skip due to connection loss (but always try periodically)
691
+ if self._connection_lost:
692
+ if now - self._last_reconnect_attempt < self._reconnect_attempt_interval:
693
+ # Skip sending commands to reduce error spam
694
+ return
695
+ # Time to try reconnecting
696
+ self._last_reconnect_attempt = now
697
+ logger.debug("Attempting to send command after connection loss...")
698
+
699
+ try:
700
+ # Send to robot (single control point!)
701
+ # head_pose is already a 4x4 matrix from _compose_final_pose
702
+ self.robot.set_target(
703
+ head=head_pose,
704
+ antennas=list(antennas),
705
+ body_yaw=body_yaw,
706
+ )
707
+
708
+ # Command succeeded - update connection health and cache
709
+ self._last_successful_command = now
710
+ self._last_sent_pose = current_pose.copy() # Cache sent pose
711
+ self._consecutive_errors = 0 # Reset error counter
712
+
713
+ if self._connection_lost:
714
+ logger.info("✓ Connection to robot restored")
715
+ self._connection_lost = False
716
+ self._suppressed_errors = 0
717
+
718
+ except Exception as e:
719
+ error_msg = str(e)
720
+ self._consecutive_errors += 1
721
+
722
+ # Check if this is a connection error
723
+ is_connection_error = "Lost connection" in error_msg or "ZError" in error_msg
724
+
725
+ if is_connection_error:
726
+ if not self._connection_lost:
727
+ # First time detecting connection loss
728
+ if self._consecutive_errors >= self._max_consecutive_errors:
729
+ logger.warning(f"Connection unstable after {self._consecutive_errors} errors: {error_msg}")
730
+ logger.warning(" Will retry connection every %.1fs...", self._reconnect_attempt_interval)
731
+ self._connection_lost = True
732
+ self._last_reconnect_attempt = now
733
+ else:
734
+ # Transient error, log but don't mark as lost yet
735
+ self._log_error_throttled(f"Transient connection error ({self._consecutive_errors}/{self._max_consecutive_errors}): {error_msg}")
736
+ else:
737
+ # Already in lost state, use throttled logging
738
+ self._log_error_throttled(f"Connection still lost: {error_msg}")
739
+ else:
740
+ # Non-connection error - log but don't affect connection state
741
+ self._log_error_throttled(f"Failed to set robot target: {error_msg}")
742
+
743
+ def _log_error_throttled(self, message: str) -> None:
744
+ """Log error with throttling to prevent log explosion."""
745
+ now = self._now()
746
+ if now - self._last_error_time >= self._error_interval:
747
+ if self._suppressed_errors > 0:
748
+ message += f" (suppressed {self._suppressed_errors} repeats)"
749
+ self._suppressed_errors = 0
750
+ logger.error(message)
751
+ self._last_error_time = now
752
+ else:
753
+ self._suppressed_errors += 1
754
+
755
+ # =========================================================================
756
+ # Control loop
757
+ # =========================================================================
758
+
759
+ def _control_loop(self) -> None:
760
+ """Main 10Hz control loop."""
761
+ logger.info("Movement manager control loop started (%.0f Hz)", CONTROL_LOOP_FREQUENCY_HZ)
762
+
763
+ last_time = self._now()
764
+
765
+ while not self._stop_event.is_set():
766
+ loop_start = self._now()
767
+ dt = loop_start - last_time
768
+ last_time = loop_start
769
+
770
+ try:
771
+ # 1. Process commands from queue
772
+ self._poll_commands()
773
+
774
+ # 2. Update action interpolation
775
+ self._update_action(dt)
776
+
777
+ # 3. Update animation offsets (JSON-driven)
778
+ self._update_animation(dt)
779
+
780
+ # 4. Update antenna blend (listening mode freeze/unfreeze)
781
+ self._update_antenna_blend(dt)
782
+
783
+ # 5. Update face tracking offsets from camera server
784
+ self._update_face_tracking()
785
+
786
+ # 6. Compose final pose (returns head_pose matrix, antennas tuple, body_yaw)
787
+ head_pose, antennas, body_yaw = self._compose_final_pose()
788
+
789
+ # 7. Send to robot (single control point!)
790
+ self._issue_control_command(head_pose, antennas, body_yaw)
791
+
792
+ except Exception as e:
793
+ self._log_error_throttled(f"Control loop error: {e}")
794
+
795
+ # Adaptive sleep
796
+ elapsed = self._now() - loop_start
797
+ sleep_time = max(0.0, TARGET_PERIOD - elapsed)
798
+ if sleep_time > 0:
799
+ time.sleep(sleep_time)
800
+
801
+ logger.info("Movement manager control loop stopped")
802
+
803
+ # =========================================================================
804
+ # Lifecycle
805
+ # =========================================================================
806
+
807
+ def start(self) -> None:
808
+ """Start the control loop."""
809
+ if self._thread is not None and self._thread.is_alive():
810
+ logger.warning("Movement manager already running")
811
+ return
812
+
813
+ self._stop_event.clear()
814
+ self._thread = threading.Thread(
815
+ target=self._control_loop,
816
+ daemon=True,
817
+ name="MovementManager",
818
+ )
819
+ self._thread.start()
820
+ logger.info("Movement manager started")
821
+
822
+ def stop(self) -> None:
823
+ """Stop the control loop and reset robot."""
824
+ if self._thread is None or not self._thread.is_alive():
825
+ return
826
+
827
+ logger.info("Stopping movement manager...")
828
+
829
+ # Signal stop
830
+ self._stop_event.set()
831
+
832
+ # Wait for thread with shorter timeout
833
+ self._thread.join(timeout=0.5)
834
+ if self._thread.is_alive():
835
+ logger.warning("Movement manager thread did not stop in time")
836
+
837
+ # Skip reset to neutral - let the app manager handle it
838
+ # This speeds up shutdown significantly
839
+ logger.info("Movement manager stopped")
840
+
841
+ def _reset_to_neutral_blocking(self) -> None:
842
+ """Reset robot to neutral position (blocking)."""
843
+ if self.robot is None:
844
+ return
845
+
846
+ try:
847
+ neutral_pose = np.eye(4)
848
+ self.robot.goto_target(
849
+ head=neutral_pose,
850
+ antennas=[0.0, 0.0],
851
+ body_yaw=0.0,
852
+ duration=0.3, # Faster reset
853
+ )
854
+ logger.info("Robot reset to neutral position")
855
+ except Exception as e:
856
+ logger.error("Failed to reset robot: %s", e)
857
+
858
+ @property
859
+ def is_running(self) -> bool:
860
+ """Check if control loop is running."""
861
+ return self._thread is not None and self._thread.is_alive()
{reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py RENAMED
@@ -1,1061 +1,869 @@
1
- """Reachy Mini controller wrapper for ESPHome entities."""
2
-
3
- import logging
4
- import math
5
- import platform
6
- import subprocess
7
- import time
8
- from typing import TYPE_CHECKING, Any
9
-
10
- import numpy as np
11
- import requests
12
- from scipy.spatial.transform import Rotation as R
13
-
14
- from .core.config import Config
15
-
16
- if TYPE_CHECKING:
17
- from reachy_mini import ReachyMini
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
- # Audio device card names for amixer commands (from SDK)
22
- DEVICE_CARD_NAMES = {
23
- "reachy_mini_audio": "reachy_mini_audio",
24
- "respeaker": "respeaker",
25
- "default": "Audio", # Default to Reachy Mini Audio
26
- }
27
-
28
-
29
- def _detect_audio_device() -> str:
30
- """Detect the current audio output device (from SDK)."""
31
- system = platform.system()
32
-
33
- if system == "Linux":
34
- # Try to detect if Reachy Mini Audio or legacy Respeaker is available
35
- try:
36
- result = subprocess.run(
37
- ["aplay", "-l"],
38
- capture_output=True,
39
- text=True,
40
- check=False,
41
- timeout=1.0,
42
- )
43
- output_lower = result.stdout.lower()
44
- if "reachy mini audio" in output_lower:
45
- return "reachy_mini_audio"
46
- elif "respeaker" in output_lower:
47
- return "respeaker"
48
- except (subprocess.TimeoutExpired, FileNotFoundError):
49
- pass
50
- return "default"
51
- return "unknown"
52
-
53
-
54
- def _get_amixer_card_name() -> str:
55
- """Get the appropriate card name for Linux amixer commands (from SDK)."""
56
- device = _detect_audio_device()
57
- return DEVICE_CARD_NAMES.get(device, DEVICE_CARD_NAMES["default"])
58
-
59
-
60
- class _ReSpeakerContext:
61
- """Context manager for thread-safe ReSpeaker access."""
62
-
63
- def __init__(self, respeaker, lock):
64
- self._respeaker = respeaker
65
- self._lock = lock
66
-
67
- def __enter__(self):
68
- self._lock.acquire()
69
- return self._respeaker
70
-
71
- def __exit__(self, exc_type, exc_val, exc_tb):
72
- self._lock.release()
73
- return False
74
-
75
-
76
- class ReachyController:
77
- """
78
- Wrapper class for Reachy Mini control operations.
79
-
80
- Provides safe access to Reachy Mini SDK functions with error handling.
81
- """
82
-
83
- def __init__(self, reachy_mini: "ReachyMini"):
84
- """
85
- Initialize the controller.
86
-
87
- Args:
88
- reachy_mini: ReachyMini instance (required)
89
- """
90
- self.reachy = reachy_mini
91
- self._speaker_volume = 100 # Default volume
92
- self._microphone_volume = 50.0 # Default mic volume
93
- self._movement_manager = None # Set later via set_movement_manager()
94
-
95
- # Volume caching to reduce daemon HTTP load
96
- self._volume_cache_ttl = Config.daemon.volume_cache_ttl # seconds
97
- self._speaker_volume_cache_ts = 0.0
98
- self._microphone_volume_cache_ts = 0.0
99
-
100
- # Shared session to reduce per-request overhead
101
- self._http_session = requests.Session()
102
- self._http_timeout = 5.0 # seconds
103
- self._cache_ttl = Config.daemon.status_cache_ttl
104
- self._daemon_base_url = "http://127.0.0.1:8000"
105
-
106
- # Callback for sleep/wake to notify VoiceAssistant
107
- self._on_sleep_callback = None
108
- self._on_wake_callback = None
109
-
110
- # Status caching - only for get_status() which may trigger I/O
111
- # Note: get_current_head_pose() and get_current_joint_positions() are
112
- # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
113
- self._state_cache: dict[str, Any] = {}
114
- self._last_status_query = 0.0
115
-
116
- # Thread lock for ReSpeaker USB access to prevent conflicts with GStreamer audio pipeline
117
- self._respeaker_lock = __import__("threading").Lock()
118
-
119
- def set_sleep_callback(self, callback) -> None:
120
- """Set callback to be called when go_to_sleep is triggered."""
121
- self._on_sleep_callback = callback
122
-
123
- def set_wake_callback(self, callback) -> None:
124
- """Set callback to be called when wake_up is triggered."""
125
- self._on_wake_callback = callback
126
-
127
- def set_movement_manager(self, movement_manager) -> None:
128
- """Set the MovementManager instance for pose control.
129
-
130
- Args:
131
- movement_manager: MovementManager instance
132
- """
133
- self._movement_manager = movement_manager
134
- logger.info("MovementManager set for ReachyController")
135
-
136
- @property
137
- def is_available(self) -> bool:
138
- """Check if robot is available."""
139
- return self.reachy is not None
140
-
141
- def get_idle_motion_enabled(self) -> bool:
142
- """Get whether idle look-around behavior is enabled."""
143
- if self._movement_manager is None:
144
- return False
145
- try:
146
- return bool(self._movement_manager.get_idle_motion_enabled())
147
- except Exception as e:
148
- logger.debug("Error getting idle motion state: %s", e)
149
- return False
150
-
151
- def set_idle_motion_enabled(self, enabled: bool) -> None:
152
- """Enable or disable idle look-around behavior."""
153
- if self._movement_manager is None:
154
- logger.warning("set_idle_motion_enabled failed - MovementManager not set")
155
- return
156
- self._movement_manager.set_idle_motion_enabled(enabled)
157
-
158
- def get_idle_antenna_enabled(self) -> bool:
159
- """Get whether idle antenna animation is enabled."""
160
- if self._movement_manager is None:
161
- return False
162
- try:
163
- return bool(self._movement_manager.get_idle_antenna_enabled())
164
- except Exception as e:
165
- logger.debug("Error getting idle antenna state: %s", e)
166
- return False
167
-
168
- def set_idle_antenna_enabled(self, enabled: bool) -> None:
169
- """Enable or disable idle antenna animation."""
170
- if self._movement_manager is None:
171
- logger.warning("set_idle_antenna_enabled failed - MovementManager not set")
172
- return
173
- self._movement_manager.set_idle_antenna_enabled(enabled)
174
-
175
- def get_idle_random_actions_enabled(self) -> bool:
176
- """Get whether idle random actions are enabled."""
177
- if self._movement_manager is None:
178
- return False
179
- try:
180
- return bool(self._movement_manager.get_idle_random_actions_enabled())
181
- except Exception as e:
182
- logger.debug("Error getting idle random actions state: %s", e)
183
- return False
184
-
185
- def set_idle_random_actions_enabled(self, enabled: bool) -> None:
186
- """Enable or disable idle random actions (no audio)."""
187
- if self._movement_manager is None:
188
- logger.warning("set_idle_random_actions_enabled failed - MovementManager not set")
189
- return
190
- self._movement_manager.set_idle_random_actions_enabled(enabled)
191
-
192
- # ========== Phase 1: Basic Status & Volume ==========
193
-
194
- @staticmethod
195
- def _status_value(status: Any, key: str, default: Any = None) -> Any:
196
- if status is None:
197
- return default
198
- if isinstance(status, dict):
199
- return status.get(key, default)
200
- return getattr(status, key, default)
201
-
202
- @classmethod
203
- def _nested_status_value(cls, status: Any, parent_key: str, child_key: str, default: Any = None) -> Any:
204
- parent = cls._status_value(status, parent_key, None)
205
- if parent is None:
206
- return default
207
- if isinstance(parent, dict):
208
- return parent.get(child_key, default)
209
- return getattr(parent, child_key, default)
210
-
211
- def _get_cached_status(self) -> Any:
212
- """Get cached daemon status to reduce query frequency.
213
-
214
- Note: get_status() may trigger I/O, so we cache it.
215
- Unlike get_current_head_pose() and get_current_joint_positions()
216
- which are non-blocking in the SDK.
217
- """
218
- now = time.time()
219
- if now - self._last_status_query < self._cache_ttl:
220
- return self._state_cache.get("status")
221
-
222
- if not self.is_available:
223
- return None
224
-
225
- try:
226
- status = self.reachy.client.get_status(wait=False)
227
- self._state_cache["status"] = status
228
- self._last_status_query = now
229
- return status
230
- except Exception as e:
231
- logger.error(f"Error getting status: {e}")
232
- return self._state_cache.get("status") # Return stale cache on error
233
-
234
- def get_daemon_state(self) -> str:
235
- """Get daemon state with caching."""
236
- status = self._get_cached_status()
237
- if status is None:
238
- return "not_available"
239
- return str(self._status_value(status, "state", "unknown"))
240
-
241
- def get_backend_ready(self) -> bool:
242
- """Check if backend is ready with caching."""
243
- status = self._get_cached_status()
244
- if status is None:
245
- return False
246
- return self._status_value(status, "state") == "running"
247
-
248
- def get_error_message(self) -> str:
249
- """Get current error message with caching."""
250
- status = self._get_cached_status()
251
- if status is None:
252
- return "Robot not available"
253
- return str(self._status_value(status, "error", "") or "")
254
-
255
- def get_speaker_volume(self) -> float:
256
- """Get speaker volume (0-100) using amixer directly (no HTTP request)."""
257
- try:
258
- # Get the correct card name (from SDK detection logic)
259
- card_name = _get_amixer_card_name()
260
-
261
- # Try to get speaker volume from amixer directly
262
- result = subprocess.run(
263
- ["amixer", "-c", card_name, "sget", "PCM"],
264
- capture_output=True,
265
- text=True,
266
- check=False,
267
- timeout=1.0,
268
- )
269
- if result.returncode == 0:
270
- for line in result.stdout.splitlines():
271
- if "Left:" in line and "[" in line:
272
- parts = line.split("[")
273
- for part in parts:
274
- if "%" in part:
275
- volume_str = part.split("%")[0]
276
- self._speaker_volume = float(volume_str)
277
- return self._speaker_volume
278
- except (subprocess.TimeoutExpired, FileNotFoundError, ValueError) as e:
279
- logger.debug(f"Could not get speaker volume from amixer: {e}")
280
-
281
- # Fallback to cached value
282
- return self._speaker_volume
283
-
284
- def set_speaker_volume(self, volume: float) -> None:
285
- """
286
- Set speaker volume (0-100) using amixer directly (no HTTP request).
287
-
288
- Args:
289
- volume: Volume level 0-100
290
- """
291
- volume = max(0.0, min(100.0, volume))
292
- self._speaker_volume = volume
293
-
294
- try:
295
- # Get the correct card name (from SDK detection logic)
296
- card_name = _get_amixer_card_name()
297
-
298
- # Set speaker volume using amixer directly
299
- subprocess.run(
300
- ["amixer", "-c", card_name, "sset", "PCM", f"{int(volume)}%"],
301
- capture_output=True,
302
- timeout=2.0,
303
- check=True,
304
- )
305
- subprocess.run(
306
- ["amixer", "-c", card_name, "sset", "PCM,1", "100%"],
307
- capture_output=True,
308
- timeout=2.0,
309
- check=True,
310
- )
311
- logger.info(f"Speaker volume set to {volume}% via amixer (card={card_name})")
312
- except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.CalledProcessError) as e:
313
- logger.error(f"Failed to set speaker volume via amixer: {e}")
314
-
315
- def get_microphone_volume(self) -> float:
316
- """Get microphone volume (0-100) using amixer directly (no HTTP request)."""
317
- try:
318
- # Get the correct card name (from SDK detection logic)
319
- card_name = _get_amixer_card_name()
320
-
321
- # Try to get microphone volume from amixer directly
322
- result = subprocess.run(
323
- ["amixer", "-c", card_name, "sget", "Headset"],
324
- capture_output=True,
325
- text=True,
326
- check=False,
327
- timeout=1.0,
328
- )
329
- if result.returncode == 0:
330
- for line in result.stdout.splitlines():
331
- if "Left:" in line and "[" in line:
332
- parts = line.split("[")
333
- for part in parts:
334
- if "%" in part:
335
- volume_str = part.split("%")[0]
336
- self._microphone_volume = float(volume_str)
337
- return self._microphone_volume
338
- except (subprocess.TimeoutExpired, FileNotFoundError, ValueError) as e:
339
- logger.debug(f"Could not get microphone volume from amixer: {e}")
340
-
341
- # Fallback to cached value
342
- return self._microphone_volume
343
-
344
- def set_microphone_volume(self, volume: float) -> None:
345
- """
346
- Set microphone volume (0-100) using amixer directly (no HTTP request).
347
-
348
- Args:
349
- volume: Volume level 0-100
350
- """
351
- volume = max(0.0, min(100.0, volume))
352
- self._microphone_volume = volume
353
-
354
- try:
355
- # Get the correct card name (from SDK detection logic)
356
- card_name = _get_amixer_card_name()
357
-
358
- # Set microphone volume using amixer directly
359
- subprocess.run(
360
- ["amixer", "-c", card_name, "sset", "Headset", f"{int(volume)}%"],
361
- capture_output=True,
362
- timeout=2.0,
363
- check=True,
364
- )
365
- logger.info(f"Microphone volume set to {volume}% via amixer (card={card_name})")
366
- except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.CalledProcessError) as e:
367
- logger.error(f"Failed to set microphone volume via amixer: {e}")
368
-
369
- # ========== Phase 2: Motor Control ==========
370
-
371
- def get_motors_enabled(self) -> bool:
372
- """Check if motors are enabled with caching."""
373
- status = self._get_cached_status()
374
- if status is None:
375
- return False
376
- try:
377
- motor_mode = self._nested_status_value(status, "backend_status", "motor_control_mode", None)
378
- if motor_mode is not None:
379
- return motor_mode == "enabled"
380
- return self._status_value(status, "state") == "running"
381
- except Exception as e:
382
- logger.error(f"Error getting motor state: {e}")
383
- return False
384
-
385
- def set_motors_enabled(self, enabled: bool) -> None:
386
- """
387
- Enable or disable motors.
388
-
389
- Args:
390
- enabled: True to enable, False to disable
391
- """
392
- if not self.is_available:
393
- logger.warning("Cannot control motors: robot not available")
394
- return
395
-
396
- try:
397
- if enabled:
398
- self.reachy.enable_motors()
399
- logger.info("Motors enabled")
400
- else:
401
- self.reachy.disable_motors()
402
- logger.info("Motors disabled")
403
- except Exception as e:
404
- logger.error(f"Error setting motor state: {e}")
405
-
406
- def get_motor_mode(self) -> str:
407
- """Get current motor control mode with caching."""
408
- status = self._get_cached_status()
409
- if status is None:
410
- return "disabled"
411
- try:
412
- motor_mode = self._nested_status_value(status, "backend_status", "motor_control_mode", None)
413
- if motor_mode is not None:
414
- return str(motor_mode)
415
- if self._status_value(status, "state") == "running":
416
- return "enabled"
417
- return "disabled"
418
- except Exception as e:
419
- logger.error(f"Error getting motor mode: {e}")
420
- return "error"
421
-
422
- def set_motor_mode(self, mode: str) -> None:
423
- """
424
- Set motor control mode.
425
-
426
- Args:
427
- mode: One of "enabled", "disabled", "gravity_compensation"
428
- """
429
- if not self.is_available:
430
- logger.warning("Cannot set motor mode: robot not available")
431
- return
432
-
433
- try:
434
- if mode == "enabled":
435
- self.reachy.enable_motors()
436
- elif mode == "disabled":
437
- self.reachy.disable_motors()
438
- elif mode == "gravity_compensation":
439
- self.reachy.enable_gravity_compensation()
440
- else:
441
- logger.warning(f"Invalid motor mode: {mode}")
442
- return
443
- logger.info(f"Motor mode set to {mode}")
444
- except Exception as e:
445
- logger.error(f"Error setting motor mode: {e}")
446
-
447
- def wake_up(self) -> None:
448
- """Execute wake up animation."""
449
- if not self.is_available:
450
- logger.warning("Cannot wake up: robot not available")
451
- return
452
-
453
- try:
454
- # SDK v1.5 sleep/wake is managed at daemon level.
455
- # Start daemon with wake_up=true so /api/daemon/status reflects awake state.
456
- self._daemon_command("/api/daemon/start", params={"wake_up": "true"})
457
- logger.info("Wake-up requested via daemon API")
458
-
459
- # Invalidate cached status after transition request
460
- self._last_status_query = 0.0
461
-
462
- # Notify callback (VoiceAssistant will resume services)
463
- if self._on_wake_callback is not None:
464
- try:
465
- self._on_wake_callback()
466
- except Exception as e:
467
- logger.error(f"Error in wake callback: {e}")
468
- except Exception as e:
469
- logger.error(f"Error executing wake up: {e}")
470
-
471
- def go_to_sleep(self) -> None:
472
- """Execute sleep animation.
473
-
474
- The order is important:
475
- 1. First suspend all services via callback (so they release robot resources)
476
- 2. Then send the robot to sleep
477
-
478
- This prevents errors from services trying to access a sleeping robot.
479
- """
480
- if not self.is_available:
481
- logger.warning("Cannot sleep: robot not available")
482
- return
483
-
484
- try:
485
- # First, notify callback to suspend all services
486
- # This must happen BEFORE the robot goes to sleep
487
- logger.info("Suspending services before sleep...")
488
- if self._on_sleep_callback is not None:
489
- try:
490
- self._on_sleep_callback()
491
- except Exception as e:
492
- logger.error(f"Error in sleep callback: {e}")
493
-
494
- # Give services time to fully suspend
495
- time.sleep(0.5)
496
-
497
- # SDK v1.5 sleep/wake is managed at daemon level.
498
- # Stop daemon with goto_sleep=true so /api/daemon/status reflects sleep state.
499
- self._daemon_command("/api/daemon/stop", params={"goto_sleep": "true"})
500
- logger.info("Sleep requested via daemon API")
501
-
502
- # Invalidate cached status after transition request
503
- self._last_status_query = 0.0
504
-
505
- except Exception as e:
506
- logger.error(f"Error executing sleep: {e}")
507
-
508
- def _daemon_command(self, path: str, params: dict[str, str] | None = None) -> None:
509
- """Send a daemon command request with lightweight validation."""
510
- url = f"{self._daemon_base_url}{path}"
511
- resp = self._http_session.post(url, params=params or {}, timeout=self._http_timeout)
512
- resp.raise_for_status()
513
-
514
- # ========== Phase 3: Pose Control ==========
515
-
516
- def _get_head_pose(self) -> np.ndarray | None:
517
- """Get current head pose from SDK.
518
-
519
- Note: SDK's get_current_head_pose() is non-blocking - it returns
520
- cached data from Zenoh subscriptions, so no throttling needed.
521
- """
522
- if not self.is_available:
523
- return None
524
-
525
- try:
526
- return self.reachy.get_current_head_pose()
527
- except Exception as e:
528
- logger.error(f"Error getting head pose: {e}")
529
- return None
530
-
531
- def _get_joint_positions(self) -> tuple | None:
532
- """Get current joint positions from SDK.
533
-
534
- Note: SDK's get_current_joint_positions() is non-blocking - it returns
535
- cached data from Zenoh subscriptions, so no throttling needed.
536
- """
537
- if not self.is_available:
538
- return None
539
-
540
- try:
541
- return self.reachy.get_current_joint_positions()
542
- except Exception as e:
543
- logger.error(f"Error getting joint positions: {e}")
544
- return None
545
-
546
- def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
547
- """
548
- Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
549
-
550
- Args:
551
- pose_matrix: 4x4 homogeneous transformation matrix
552
-
553
- Returns:
554
- tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
555
- """
556
- # Extract position from the last column
557
- x = pose_matrix[0, 3]
558
- y = pose_matrix[1, 3]
559
- z = pose_matrix[2, 3]
560
-
561
- # Extract rotation matrix and convert to euler angles
562
- rotation_matrix = pose_matrix[:3, :3]
563
- rotation = R.from_matrix(rotation_matrix)
564
- # Use 'xyz' convention for roll, pitch, yaw
565
- roll, pitch, yaw = rotation.as_euler("xyz")
566
-
567
- return x, y, z, roll, pitch, yaw
568
-
569
- def _get_head_pose_component(self, component: str) -> float:
570
- """Get a specific component from head pose.
571
-
572
- Args:
573
- component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
574
-
575
- Returns:
576
- The component value, or 0.0 on error
577
- """
578
- pose = self._get_head_pose()
579
- if pose is None:
580
- return 0.0
581
- try:
582
- x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
583
- components = {
584
- "x": x * 1000, # m to mm
585
- "y": y * 1000,
586
- "z": z * 1000,
587
- "roll": math.degrees(roll),
588
- "pitch": math.degrees(pitch),
589
- "yaw": math.degrees(yaw),
590
- }
591
- return components.get(component, 0.0)
592
- except Exception as e:
593
- logger.error(f"Error getting head {component}: {e}")
594
- return 0.0
595
-
596
- def _disabled_pose_setter(self, name: str) -> None:
597
- """Log warning when MovementManager is not available."""
598
- logger.warning(f"set_{name} failed - MovementManager not set")
599
-
600
- def _set_pose_via_manager(self, **kwargs) -> bool:
601
- """Set pose via MovementManager if available.
602
-
603
- Returns True if successful, False if MovementManager not available.
604
- """
605
- if self._movement_manager is None:
606
- return False
607
- self._movement_manager.set_target_pose(**kwargs)
608
- return True
609
-
610
- # Head position getters and setters
611
- def get_head_x(self) -> float:
612
- """Get head X position in mm."""
613
- return self._get_head_pose_component("x")
614
-
615
- def set_head_x(self, x_mm: float) -> None:
616
- """Set head X position in mm via MovementManager."""
617
- if not self._set_pose_via_manager(x=x_mm / 1000.0): # mm to m
618
- self._disabled_pose_setter("head_x")
619
-
620
- def get_head_y(self) -> float:
621
- """Get head Y position in mm."""
622
- return self._get_head_pose_component("y")
623
-
624
- def set_head_y(self, y_mm: float) -> None:
625
- """Set head Y position in mm via MovementManager."""
626
- if not self._set_pose_via_manager(y=y_mm / 1000.0): # mm to m
627
- self._disabled_pose_setter("head_y")
628
-
629
- def get_head_z(self) -> float:
630
- """Get head Z position in mm."""
631
- return self._get_head_pose_component("z")
632
-
633
- def set_head_z(self, z_mm: float) -> None:
634
- """Set head Z position in mm via MovementManager."""
635
- if not self._set_pose_via_manager(z=z_mm / 1000.0): # mm to m
636
- self._disabled_pose_setter("head_z")
637
-
638
- # Head orientation getters and setters
639
- def get_head_roll(self) -> float:
640
- """Get head roll angle in degrees."""
641
- return self._get_head_pose_component("roll")
642
-
643
- def set_head_roll(self, roll_deg: float) -> None:
644
- """Set head roll angle in degrees via MovementManager."""
645
- if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
646
- self._disabled_pose_setter("head_roll")
647
-
648
- def get_head_pitch(self) -> float:
649
- """Get head pitch angle in degrees."""
650
- return self._get_head_pose_component("pitch")
651
-
652
- def set_head_pitch(self, pitch_deg: float) -> None:
653
- """Set head pitch angle in degrees via MovementManager."""
654
- if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
655
- self._disabled_pose_setter("head_pitch")
656
-
657
- def get_head_yaw(self) -> float:
658
- """Get head yaw angle in degrees."""
659
- return self._get_head_pose_component("yaw")
660
-
661
- def set_head_yaw(self, yaw_deg: float) -> None:
662
- """Set head yaw angle in degrees via MovementManager."""
663
- if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
664
- self._disabled_pose_setter("head_yaw")
665
-
666
- def get_body_yaw(self) -> float:
667
- """Get body yaw angle in degrees."""
668
- joints = self._get_joint_positions()
669
- if joints is None:
670
- return 0.0
671
- try:
672
- head_joints, _ = joints
673
- return math.degrees(head_joints[0])
674
- except Exception as e:
675
- logger.error(f"Error getting body yaw: {e}")
676
- return 0.0
677
-
678
- def set_body_yaw(self, yaw_deg: float) -> None:
679
- """Set body yaw angle in degrees.
680
-
681
- Note: This directly calls SDK's set_target_body_yaw since automatic body yaw
682
- is enabled. Manual control will temporarily override automatic mode.
683
- """
684
- if self.reachy is None:
685
- self._disabled_pose_setter("body_yaw")
686
- return
687
- try:
688
- self.reachy.set_target_body_yaw(math.radians(yaw_deg))
689
- except Exception as e:
690
- logger.error(f"Error setting body yaw: {e}")
691
-
692
- def get_antenna_left(self) -> float:
693
- """Get left antenna angle in degrees."""
694
- joints = self._get_joint_positions()
695
- if joints is None:
696
- return 0.0
697
- try:
698
- _, antennas = joints
699
- return math.degrees(antennas[1]) # left is index 1
700
- except Exception as e:
701
- logger.error(f"Error getting left antenna: {e}")
702
- return 0.0
703
-
704
- def set_antenna_left(self, angle_deg: float) -> None:
705
- """Set left antenna angle in degrees via MovementManager."""
706
- if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
707
- self._disabled_pose_setter("antenna_left")
708
-
709
- def get_antenna_right(self) -> float:
710
- """Get right antenna angle in degrees."""
711
- joints = self._get_joint_positions()
712
- if joints is None:
713
- return 0.0
714
- try:
715
- _, antennas = joints
716
- return math.degrees(antennas[0]) # right is index 0
717
- except Exception as e:
718
- logger.error(f"Error getting right antenna: {e}")
719
- return 0.0
720
-
721
- def set_antenna_right(self, angle_deg: float) -> None:
722
- """Set right antenna angle in degrees via MovementManager."""
723
- if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
724
- self._disabled_pose_setter("antenna_right")
725
-
726
- # ========== Phase 4: Look At Control ==========
727
-
728
- def get_look_at_x(self) -> float:
729
- """Get look at target X coordinate in world frame (meters)."""
730
- # This is a target position, not a current state
731
- # We'll store it internally
732
- return getattr(self, "_look_at_x", 0.0)
733
-
734
- def set_look_at_x(self, x: float) -> None:
735
- """Set look at target X coordinate."""
736
- self._look_at_x = x
737
- self._update_look_at()
738
-
739
- def get_look_at_y(self) -> float:
740
- """Get look at target Y coordinate in world frame (meters)."""
741
- return getattr(self, "_look_at_y", 0.0)
742
-
743
- def set_look_at_y(self, y: float) -> None:
744
- """Set look at target Y coordinate."""
745
- self._look_at_y = y
746
- self._update_look_at()
747
-
748
- def get_look_at_z(self) -> float:
749
- """Get look at target Z coordinate in world frame (meters)."""
750
- return getattr(self, "_look_at_z", 0.0)
751
-
752
- def set_look_at_z(self, z: float) -> None:
753
- """Set look at target Z coordinate."""
754
- self._look_at_z = z
755
- self._update_look_at()
756
-
757
- def _update_look_at(self) -> None:
758
- """Update robot to look at the target coordinates.
759
-
760
- NOTE: Disabled to prevent conflict with MovementManager's control loop.
761
- """
762
- logger.warning("_update_look_at is disabled - MovementManager controls head pose")
763
- # if not self.is_available:
764
- # return
765
- # try:
766
- # x = getattr(self, '_look_at_x', 0.0)
767
- # y = getattr(self, '_look_at_y', 0.0)
768
- # z = getattr(self, '_look_at_z', 0.0)
769
- # self.reachy.look_at_world(x, y, z)
770
- # logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
771
- # except Exception as e:
772
- # logger.error(f"Error updating look at: {e}")
773
-
774
- # ========== Phase 6: Diagnostic Information ==========
775
-
776
- def get_control_loop_frequency(self) -> float:
777
- """Get control loop frequency in Hz with caching."""
778
- status = self._get_cached_status()
779
- if status is None:
780
- return 0.0
781
- try:
782
- control_loop_stats = self._nested_status_value(status, "backend_status", "control_loop_stats", None)
783
- if isinstance(control_loop_stats, dict):
784
- return float(control_loop_stats.get("mean_control_loop_frequency", 0.0))
785
- if control_loop_stats is not None:
786
- return float(getattr(control_loop_stats, "mean_control_loop_frequency", 0.0))
787
- return 0.0
788
- except Exception as e:
789
- logger.error(f"Error getting control loop frequency: {e}")
790
- return 0.0
791
-
792
- def get_sdk_version(self) -> str:
793
- """Get SDK version with caching."""
794
- status = self._get_cached_status()
795
- if status is None:
796
- return "N/A"
797
- return str(self._status_value(status, "version", "unknown") or "unknown")
798
-
799
- def get_robot_name(self) -> str:
800
- """Get robot name with caching."""
801
- status = self._get_cached_status()
802
- if status is None:
803
- return "N/A"
804
- return str(self._status_value(status, "robot_name", "unknown") or "unknown")
805
-
806
- def get_wireless_version(self) -> bool:
807
- """Check if this is a wireless version with caching."""
808
- status = self._get_cached_status()
809
- if status is None:
810
- return False
811
- return bool(self._status_value(status, "wireless_version", False))
812
-
813
- def get_simulation_mode(self) -> bool:
814
- """Check if simulation mode is enabled with caching."""
815
- status = self._get_cached_status()
816
- if status is None:
817
- return False
818
- return bool(self._status_value(status, "simulation_enabled", False))
819
-
820
- def get_wlan_ip(self) -> str:
821
- """Get WLAN IP address with caching."""
822
- status = self._get_cached_status()
823
- if status is None:
824
- return "N/A"
825
- return str(self._status_value(status, "wlan_ip", "N/A") or "N/A")
826
-
827
- # ========== Phase 7: IMU Sensors (Wireless only) ==========
828
-
829
- def _get_imu_value(self, sensor_type: str, index: int) -> float:
830
- """Get a specific IMU sensor value.
831
-
832
- Args:
833
- sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
834
- index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
835
-
836
- Returns:
837
- The sensor value, or 0.0 on error
838
- """
839
- if not self.is_available:
840
- return 0.0
841
- try:
842
- imu_data = self.reachy.imu
843
- if imu_data is None or sensor_type not in imu_data:
844
- return 0.0
845
- value = imu_data[sensor_type]
846
- return float(value[index]) if index >= 0 else float(value)
847
- except Exception as e:
848
- logger.debug(f"Error getting IMU {sensor_type}: {e}")
849
- return 0.0
850
-
851
- def get_imu_accel_x(self) -> float:
852
- """Get IMU X-axis acceleration in m/s²."""
853
- return self._get_imu_value("accelerometer", 0)
854
-
855
- def get_imu_accel_y(self) -> float:
856
- """Get IMU Y-axis acceleration in m/s²."""
857
- return self._get_imu_value("accelerometer", 1)
858
-
859
- def get_imu_accel_z(self) -> float:
860
- """Get IMU Z-axis acceleration in m/s²."""
861
- return self._get_imu_value("accelerometer", 2)
862
-
863
- def get_imu_gyro_x(self) -> float:
864
- """Get IMU X-axis angular velocity in rad/s."""
865
- return self._get_imu_value("gyroscope", 0)
866
-
867
- def get_imu_gyro_y(self) -> float:
868
- """Get IMU Y-axis angular velocity in rad/s."""
869
- return self._get_imu_value("gyroscope", 1)
870
-
871
- def get_imu_gyro_z(self) -> float:
872
- """Get IMU Z-axis angular velocity in rad/s."""
873
- return self._get_imu_value("gyroscope", 2)
874
-
875
- def get_imu_temperature(self) -> float:
876
- """Get IMU temperature in °C."""
877
- return self._get_imu_value("temperature", -1)
878
-
879
- # ========== Phase 11: LED Control (DISABLED) ==========
880
- # LED control is disabled because LEDs are hidden inside the robot.
881
- # See PROJECT_PLAN.md principle 8.
882
-
883
- def _get_respeaker(self):
884
- """Get ReSpeaker device from media manager with thread-safe access.
885
-
886
- Returns a context manager that holds the lock during ReSpeaker operations.
887
- Usage:
888
- with self._get_respeaker() as respeaker:
889
- if respeaker:
890
- respeaker.read("...")
891
-
892
- Note: This accesses the private _respeaker attribute from the SDK.
893
- TODO: Check if SDK provides a public API for ReSpeaker access in future versions.
894
- This is a known compatibility risk and should be reviewed on SDK updates.
895
- """
896
- if not self.is_available:
897
- return _ReSpeakerContext(None, self._respeaker_lock)
898
- try:
899
- if not self.reachy.media or not self.reachy.media.audio:
900
- return _ReSpeakerContext(None, self._respeaker_lock)
901
- # WARNING: Accessing private attribute _respeaker
902
- # TODO: Replace with public API when available
903
- respeaker = self.reachy.media.audio._respeaker
904
- return _ReSpeakerContext(respeaker, self._respeaker_lock)
905
- except Exception:
906
- return _ReSpeakerContext(None, self._respeaker_lock)
907
-
908
- # ========== Phase 12: Audio Processing (via local SDK with thread-safe access) ==========
909
-
910
- def get_agc_enabled(self) -> bool:
911
- """Get AGC (Automatic Gain Control) enabled status."""
912
- with self._get_respeaker() as respeaker:
913
- if respeaker is None:
914
- return getattr(self, "_agc_enabled", True) # Default to enabled
915
- try:
916
- result = respeaker.read("PP_AGCONOFF")
917
- if result is not None:
918
- self._agc_enabled = bool(result[1])
919
- return self._agc_enabled
920
- except Exception as e:
921
- logger.debug(f"Error getting AGC status: {e}")
922
- return getattr(self, "_agc_enabled", True)
923
-
924
- def set_agc_enabled(self, enabled: bool) -> None:
925
- """Set AGC (Automatic Gain Control) enabled status."""
926
- self._agc_enabled = enabled
927
- with self._get_respeaker() as respeaker:
928
- if respeaker is None:
929
- return
930
- try:
931
- respeaker.write("PP_AGCONOFF", [1 if enabled else 0])
932
- logger.info(f"AGC {'enabled' if enabled else 'disabled'}")
933
- except Exception as e:
934
- logger.error(f"Error setting AGC status: {e}")
935
-
936
- def get_agc_max_gain(self) -> float:
937
- """Get AGC maximum gain in dB (0-40 dB range)."""
938
- with self._get_respeaker() as respeaker:
939
- if respeaker is None:
940
- return getattr(self, "_agc_max_gain", 30.0) # Default matches MicrophoneDefaults
941
- try:
942
- result = respeaker.read("PP_AGCMAXGAIN")
943
- if result is not None:
944
- self._agc_max_gain = float(result[0])
945
- return self._agc_max_gain
946
- except Exception as e:
947
- logger.debug(f"Error getting AGC max gain: {e}")
948
- return getattr(self, "_agc_max_gain", 30.0)
949
-
950
- def set_agc_max_gain(self, gain: float) -> None:
951
- """Set AGC maximum gain in dB (0-40 dB range)."""
952
- gain = max(0.0, min(40.0, gain)) # XVF3800 supports up to 40dB
953
- self._agc_max_gain = gain
954
- with self._get_respeaker() as respeaker:
955
- if respeaker is None:
956
- return
957
- try:
958
- respeaker.write("PP_AGCMAXGAIN", [gain])
959
- logger.info(f"AGC max gain set to {gain} dB")
960
- except Exception as e:
961
- logger.error(f"Error setting AGC max gain: {e}")
962
-
963
- def get_noise_suppression(self) -> float:
964
- """Get noise suppression level (0-100%).
965
-
966
- PP_MIN_NS represents "minimum signal preservation ratio":
967
- - PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% suppression
968
- - PP_MIN_NS = 0.15 means "keep at least 15% of signal" = 85% suppression
969
-
970
- We display "noise suppression strength" to user, so:
971
- - suppression_percent = (1.0 - PP_MIN_NS) * 100
972
- """
973
- with self._get_respeaker() as respeaker:
974
- if respeaker is None:
975
- return getattr(self, "_noise_suppression", 15.0)
976
- try:
977
- result = respeaker.read("PP_MIN_NS")
978
- if result is not None:
979
- raw_value = result[0]
980
- # Convert: PP_MIN_NS=0.85 -> 15% suppression, PP_MIN_NS=0.15 -> 85% suppression
981
- self._noise_suppression = max(0.0, min(100.0, (1.0 - raw_value) * 100.0))
982
- logger.debug(f"Noise suppression: PP_MIN_NS={raw_value:.2f} -> {self._noise_suppression:.1f}%")
983
- return self._noise_suppression
984
- except Exception as e:
985
- logger.debug(f"Error getting noise suppression: {e}")
986
- return getattr(self, "_noise_suppression", 15.0)
987
-
988
- def set_noise_suppression(self, level: float) -> None:
989
- """Set noise suppression level (0-100%)."""
990
- level = max(0.0, min(100.0, level))
991
- self._noise_suppression = level
992
- with self._get_respeaker() as respeaker:
993
- if respeaker is None:
994
- return
995
- try:
996
- # Convert percentage to PP_MIN_NS value (inverted)
997
- value = 1.0 - (level / 100.0)
998
- respeaker.write("PP_MIN_NS", [value])
999
- logger.info(f"Noise suppression set to {level}%")
1000
- except Exception as e:
1001
- logger.error(f"Error setting noise suppression: {e}")
1002
-
1003
- def get_echo_cancellation_converged(self) -> bool:
1004
- """Check if echo cancellation has converged."""
1005
- with self._get_respeaker() as respeaker:
1006
- if respeaker is None:
1007
- return False
1008
- try:
1009
- result = respeaker.read("AEC_AECCONVERGED")
1010
- if result is not None:
1011
- return bool(result[1])
1012
- except Exception as e:
1013
- logger.debug(f"Error getting AEC converged status: {e}")
1014
- return False
1015
-
1016
- # ========== DOA (Direction of Arrival) ==========
1017
-
1018
- def get_doa_angle(self) -> tuple[float, bool] | None:
1019
- """Get Direction of Arrival angle from microphone array.
1020
-
1021
- The DOA angle indicates the direction of the sound source relative to the robot.
1022
- Angle is in radians: 0 = left, π/2 = front/back, π = right.
1023
-
1024
- Returns:
1025
- Tuple of (angle_radians, speech_detected), or None if unavailable.
1026
- - angle_radians: Sound source direction in radians
1027
- - speech_detected: Whether speech is currently detected
1028
- """
1029
- if not self.is_available:
1030
- return None
1031
- try:
1032
- if self.reachy.media and self.reachy.media.audio:
1033
- return self.reachy.media.audio.get_DoA()
1034
- except Exception as e:
1035
- logger.debug(f"Error getting DOA: {e}")
1036
- return None
1037
-
1038
- def get_doa_angle_degrees(self) -> float:
1039
- """Get DOA angle in degrees for Home Assistant entity.
1040
-
1041
- Returns the raw DOA angle in degrees (0-180°).
1042
- SDK convention: 0° = left, 90° = front, 180° = right
1043
- """
1044
- doa = self.get_doa_angle()
1045
- if doa is None:
1046
- return 0.0
1047
- angle_rad, _ = doa
1048
- # Return raw angle in degrees (0-180°)
1049
- angle_deg = math.degrees(angle_rad)
1050
- return angle_deg
1051
-
1052
- def get_speech_detected(self) -> bool:
1053
- """Get speech detection status from DOA.
1054
-
1055
- Returns True if speech is currently detected.
1056
- """
1057
- doa = self.get_doa_angle()
1058
- if doa is None:
1059
- return False
1060
- _, speech_detected = doa
1061
- return speech_detected
 
1
+ """Reachy Mini controller wrapper for ESPHome entities."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Any, Dict, Optional, TYPE_CHECKING
6
+ import math
7
+ import numpy as np
8
+ from scipy.spatial.transform import Rotation as R
9
+ import requests
10
+
11
+ if TYPE_CHECKING:
12
+ from reachy_mini import ReachyMini
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class _ReSpeakerContext:
18
+ """Context manager for thread-safe ReSpeaker access."""
19
+
20
+ def __init__(self, respeaker, lock):
21
+ self._respeaker = respeaker
22
+ self._lock = lock
23
+
24
+ def __enter__(self):
25
+ self._lock.acquire()
26
+ return self._respeaker
27
+
28
+ def __exit__(self, exc_type, exc_val, exc_tb):
29
+ self._lock.release()
30
+ return False
31
+
32
+
33
+ class ReachyController:
34
+ """
35
+ Wrapper class for Reachy Mini control operations.
36
+
37
+ Provides safe access to Reachy Mini SDK functions with error handling
38
+ and fallback for standalone mode (when robot is not available).
39
+ """
40
+
41
+ def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
42
+ """
43
+ Initialize the controller.
44
+
45
+ Args:
46
+ reachy_mini: ReachyMini instance, or None for standalone mode
47
+ """
48
+ self.reachy = reachy_mini
49
+ self._speaker_volume = 100 # Default volume
50
+ self._movement_manager = None # Set later via set_movement_manager()
51
+
52
+ # Status caching - only for get_status() which may trigger I/O
53
+ # Note: get_current_head_pose() and get_current_joint_positions() are
54
+ # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
55
+ self._state_cache: Dict[str, Any] = {}
56
+ self._cache_ttl = 2.0 # 2 second cache TTL for status queries (increased from 1s)
57
+ self._last_status_query = 0.0
58
+
59
+ # Thread lock for ReSpeaker USB access to prevent conflicts with GStreamer audio pipeline
60
+ self._respeaker_lock = __import__('threading').Lock()
61
+
62
+ def set_movement_manager(self, movement_manager) -> None:
63
+ """Set the MovementManager instance for pose control.
64
+
65
+ Args:
66
+ movement_manager: MovementManager instance
67
+ """
68
+ self._movement_manager = movement_manager
69
+ logger.info("MovementManager set for ReachyController")
70
+
71
+ @property
72
+ def is_available(self) -> bool:
73
+ """Check if robot is available."""
74
+ return self.reachy is not None
75
+
76
+ # ========== Phase 1: Basic Status & Volume ==========
77
+
78
+ def _get_cached_status(self) -> Optional[Dict]:
79
+ """Get cached daemon status to reduce query frequency.
80
+
81
+ Note: get_status() may trigger I/O, so we cache it.
82
+ Unlike get_current_head_pose() and get_current_joint_positions()
83
+ which are non-blocking in the SDK.
84
+ """
85
+ now = time.time()
86
+ if now - self._last_status_query < self._cache_ttl:
87
+ return self._state_cache.get('status')
88
+
89
+ if not self.is_available:
90
+ return None
91
+
92
+ try:
93
+ status = self.reachy.client.get_status(wait=False)
94
+ self._state_cache['status'] = status
95
+ self._last_status_query = now
96
+ return status
97
+ except Exception as e:
98
+ logger.error(f"Error getting status: {e}")
99
+ return self._state_cache.get('status') # Return stale cache on error
100
+
101
+ def get_daemon_state(self) -> str:
102
+ """Get daemon state with caching."""
103
+ status = self._get_cached_status()
104
+ if status is None:
105
+ return "not_available"
106
+ return status.get('state', 'unknown')
107
+
108
+ def get_backend_ready(self) -> bool:
109
+ """Check if backend is ready with caching."""
110
+ status = self._get_cached_status()
111
+ if status is None:
112
+ return False
113
+ return status.get('state') == 'running'
114
+
115
+ def get_error_message(self) -> str:
116
+ """Get current error message with caching."""
117
+ status = self._get_cached_status()
118
+ if status is None:
119
+ return "Robot not available"
120
+ return status.get('error') or ""
121
+
122
+ def get_speaker_volume(self) -> float:
123
+ """Get speaker volume (0-100) with caching."""
124
+ if not self.is_available:
125
+ return self._speaker_volume
126
+ try:
127
+ # Get volume from daemon API (use cached status for IP)
128
+ status = self._get_cached_status()
129
+ if status is None:
130
+ return self._speaker_volume
131
+ wlan_ip = status.get('wlan_ip', 'localhost')
132
+ response = requests.get(f"http://{wlan_ip}:8000/api/volume/current", timeout=2)
133
+ if response.status_code == 200:
134
+ data = response.json()
135
+ self._speaker_volume = float(data.get('volume', self._speaker_volume))
136
+ except Exception as e:
137
+ logger.debug(f"Could not get volume from API: {e}")
138
+ return self._speaker_volume
139
+
140
+ def set_speaker_volume(self, volume: float) -> None:
141
+ """
142
+ Set speaker volume (0-100) with cached status.
143
+
144
+ Args:
145
+ volume: Volume level 0-100
146
+ """
147
+ volume = max(0.0, min(100.0, volume))
148
+ self._speaker_volume = volume
149
+
150
+ if not self.is_available:
151
+ logger.warning("Cannot set volume: robot not available")
152
+ return
153
+
154
+ try:
155
+ # Set volume via daemon API (use cached status for IP)
156
+ status = self._get_cached_status()
157
+ if status is None:
158
+ logger.error("Cannot get daemon status for volume control")
159
+ return
160
+ wlan_ip = status.get('wlan_ip', 'localhost')
161
+ response = requests.post(
162
+ f"http://{wlan_ip}:8000/api/volume/set",
163
+ json={"volume": int(volume)},
164
+ timeout=5
165
+ )
166
+ if response.status_code == 200:
167
+ logger.info(f"Speaker volume set to {volume}%")
168
+ else:
169
+ logger.error(f"Failed to set volume: {response.status_code} {response.text}")
170
+ except Exception as e:
171
+ logger.error(f"Error setting speaker volume: {e}")
172
+
173
+ def get_microphone_volume(self) -> float:
174
+ """Get microphone volume (0-100) using daemon HTTP API."""
175
+ if not self.is_available:
176
+ return getattr(self, '_microphone_volume', 50.0)
177
+
178
+ try:
179
+ # Get WLAN IP from cached daemon status
180
+ status = self._get_cached_status()
181
+ if status is None:
182
+ return getattr(self, '_microphone_volume', 50.0)
183
+ wlan_ip = status.get('wlan_ip', 'localhost')
184
+
185
+ # Call the daemon API to get microphone volume
186
+ response = requests.get(
187
+ f"http://{wlan_ip}:8000/api/volume/microphone/current",
188
+ timeout=2
189
+ )
190
+ if response.status_code == 200:
191
+ data = response.json()
192
+ self._microphone_volume = float(data.get('volume', 50))
193
+ return self._microphone_volume
194
+ except Exception as e:
195
+ logger.debug(f"Could not get microphone volume from API: {e}")
196
+
197
+ return getattr(self, '_microphone_volume', 50.0)
198
+
199
+ def set_microphone_volume(self, volume: float) -> None:
200
+ """
201
+ Set microphone volume (0-100) using daemon HTTP API.
202
+
203
+ Args:
204
+ volume: Volume level 0-100
205
+ """
206
+ volume = max(0.0, min(100.0, volume))
207
+ self._microphone_volume = volume
208
+
209
+ if not self.is_available:
210
+ logger.warning("Cannot set microphone volume: robot not available")
211
+ return
212
+
213
+ try:
214
+ # Get WLAN IP from cached daemon status
215
+ status = self._get_cached_status()
216
+ if status is None:
217
+ logger.error("Cannot get daemon status for microphone volume control")
218
+ return
219
+ wlan_ip = status.get('wlan_ip', 'localhost')
220
+
221
+ # Call the daemon API to set microphone volume
222
+ response = requests.post(
223
+ f"http://{wlan_ip}:8000/api/volume/microphone/set",
224
+ json={"volume": int(volume)},
225
+ timeout=5
226
+ )
227
+ if response.status_code == 200:
228
+ logger.info(f"Microphone volume set to {volume}%")
229
+ else:
230
+ logger.error(f"Failed to set microphone volume: {response.status_code} {response.text}")
231
+ except Exception as e:
232
+ logger.error(f"Error setting microphone volume: {e}")
233
+
234
+ # ========== Phase 2: Motor Control ==========
235
+
236
+ def get_motors_enabled(self) -> bool:
237
+ """Check if motors are enabled with caching."""
238
+ status = self._get_cached_status()
239
+ if status is None:
240
+ return False
241
+ try:
242
+ backend_status = status.get('backend_status')
243
+ if backend_status and isinstance(backend_status, dict):
244
+ motor_mode = backend_status.get('motor_control_mode', 'disabled')
245
+ return motor_mode == 'enabled'
246
+ return status.get('state') == 'running'
247
+ except Exception as e:
248
+ logger.error(f"Error getting motor state: {e}")
249
+ return False
250
+
251
+ def set_motors_enabled(self, enabled: bool) -> None:
252
+ """
253
+ Enable or disable motors.
254
+
255
+ Args:
256
+ enabled: True to enable, False to disable
257
+ """
258
+ if not self.is_available:
259
+ logger.warning("Cannot control motors: robot not available")
260
+ return
261
+
262
+ try:
263
+ if enabled:
264
+ self.reachy.enable_motors()
265
+ logger.info("Motors enabled")
266
+ else:
267
+ self.reachy.disable_motors()
268
+ logger.info("Motors disabled")
269
+ except Exception as e:
270
+ logger.error(f"Error setting motor state: {e}")
271
+
272
+ def get_motor_mode(self) -> str:
273
+ """Get current motor control mode with caching."""
274
+ status = self._get_cached_status()
275
+ if status is None:
276
+ return "disabled"
277
+ try:
278
+ backend_status = status.get('backend_status')
279
+ if backend_status and isinstance(backend_status, dict):
280
+ motor_mode = backend_status.get('motor_control_mode', 'disabled')
281
+ return motor_mode
282
+ if status.get('state') == 'running':
283
+ return "enabled"
284
+ return "disabled"
285
+ except Exception as e:
286
+ logger.error(f"Error getting motor mode: {e}")
287
+ return "error"
288
+
289
+ def set_motor_mode(self, mode: str) -> None:
290
+ """
291
+ Set motor control mode.
292
+
293
+ Args:
294
+ mode: One of "enabled", "disabled", "gravity_compensation"
295
+ """
296
+ if not self.is_available:
297
+ logger.warning("Cannot set motor mode: robot not available")
298
+ return
299
+
300
+ try:
301
+ if mode == "enabled":
302
+ self.reachy.enable_motors()
303
+ elif mode == "disabled":
304
+ self.reachy.disable_motors()
305
+ elif mode == "gravity_compensation":
306
+ self.reachy.enable_gravity_compensation()
307
+ else:
308
+ logger.warning(f"Invalid motor mode: {mode}")
309
+ return
310
+ logger.info(f"Motor mode set to {mode}")
311
+ except Exception as e:
312
+ logger.error(f"Error setting motor mode: {e}")
313
+
314
+ def wake_up(self) -> None:
315
+ """Execute wake up animation."""
316
+ if not self.is_available:
317
+ logger.warning("Cannot wake up: robot not available")
318
+ return
319
+
320
+ try:
321
+ self.reachy.wake_up()
322
+ logger.info("Wake up animation executed")
323
+ except Exception as e:
324
+ logger.error(f"Error executing wake up: {e}")
325
+
326
+ def go_to_sleep(self) -> None:
327
+ """Execute sleep animation."""
328
+ if not self.is_available:
329
+ logger.warning("Cannot sleep: robot not available")
330
+ return
331
+
332
+ try:
333
+ self.reachy.goto_sleep()
334
+ logger.info("Sleep animation executed")
335
+ except Exception as e:
336
+ logger.error(f"Error executing sleep: {e}")
337
+
338
+ # ========== Phase 3: Pose Control ==========
339
+
340
+ def _get_head_pose(self) -> Optional[np.ndarray]:
341
+ """Get current head pose from SDK.
342
+
343
+ Note: SDK's get_current_head_pose() is non-blocking - it returns
344
+ cached data from Zenoh subscriptions, so no throttling needed.
345
+ """
346
+ if not self.is_available:
347
+ return None
348
+
349
+ try:
350
+ return self.reachy.get_current_head_pose()
351
+ except Exception as e:
352
+ logger.error(f"Error getting head pose: {e}")
353
+ return None
354
+
355
+ def _get_joint_positions(self) -> Optional[tuple]:
356
+ """Get current joint positions from SDK.
357
+
358
+ Note: SDK's get_current_joint_positions() is non-blocking - it returns
359
+ cached data from Zenoh subscriptions, so no throttling needed.
360
+ """
361
+ if not self.is_available:
362
+ return None
363
+
364
+ try:
365
+ return self.reachy.get_current_joint_positions()
366
+ except Exception as e:
367
+ logger.error(f"Error getting joint positions: {e}")
368
+ return None
369
+
370
+ def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
371
+ """
372
+ Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
373
+
374
+ Args:
375
+ pose_matrix: 4x4 homogeneous transformation matrix
376
+
377
+ Returns:
378
+ tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
379
+ """
380
+ # Extract position from the last column
381
+ x = pose_matrix[0, 3]
382
+ y = pose_matrix[1, 3]
383
+ z = pose_matrix[2, 3]
384
+
385
+ # Extract rotation matrix and convert to euler angles
386
+ rotation_matrix = pose_matrix[:3, :3]
387
+ rotation = R.from_matrix(rotation_matrix)
388
+ # Use 'xyz' convention for roll, pitch, yaw
389
+ roll, pitch, yaw = rotation.as_euler('xyz')
390
+
391
+ return x, y, z, roll, pitch, yaw
392
+
393
+ def _get_head_pose_component(self, component: str) -> float:
394
+ """Get a specific component from head pose.
395
+
396
+ Args:
397
+ component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
398
+
399
+ Returns:
400
+ The component value, or 0.0 on error
401
+ """
402
+ pose = self._get_head_pose()
403
+ if pose is None:
404
+ return 0.0
405
+ try:
406
+ x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
407
+ components = {
408
+ 'x': x * 1000, # m to mm
409
+ 'y': y * 1000,
410
+ 'z': z * 1000,
411
+ 'roll': math.degrees(roll),
412
+ 'pitch': math.degrees(pitch),
413
+ 'yaw': math.degrees(yaw),
414
+ }
415
+ return components.get(component, 0.0)
416
+ except Exception as e:
417
+ logger.error(f"Error getting head {component}: {e}")
418
+ return 0.0
419
+
420
+ def _disabled_pose_setter(self, name: str) -> None:
421
+ """Log warning when MovementManager is not available."""
422
+ logger.warning(f"set_{name} failed - MovementManager not set")
423
+
424
+ def _set_pose_via_manager(self, **kwargs) -> bool:
425
+ """Set pose via MovementManager if available.
426
+
427
+ Returns True if successful, False if MovementManager not available.
428
+ """
429
+ if self._movement_manager is None:
430
+ return False
431
+ self._movement_manager.set_target_pose(**kwargs)
432
+ return True
433
+
434
+ # Head position getters and setters
435
+ def get_head_x(self) -> float:
436
+ """Get head X position in mm."""
437
+ return self._get_head_pose_component('x')
438
+
439
+ def set_head_x(self, x_mm: float) -> None:
440
+ """Set head X position in mm via MovementManager."""
441
+ if not self._set_pose_via_manager(x=x_mm / 1000.0): # mm to m
442
+ self._disabled_pose_setter('head_x')
443
+
444
+ def get_head_y(self) -> float:
445
+ """Get head Y position in mm."""
446
+ return self._get_head_pose_component('y')
447
+
448
+ def set_head_y(self, y_mm: float) -> None:
449
+ """Set head Y position in mm via MovementManager."""
450
+ if not self._set_pose_via_manager(y=y_mm / 1000.0): # mm to m
451
+ self._disabled_pose_setter('head_y')
452
+
453
+ def get_head_z(self) -> float:
454
+ """Get head Z position in mm."""
455
+ return self._get_head_pose_component('z')
456
+
457
+ def set_head_z(self, z_mm: float) -> None:
458
+ """Set head Z position in mm via MovementManager."""
459
+ if not self._set_pose_via_manager(z=z_mm / 1000.0): # mm to m
460
+ self._disabled_pose_setter('head_z')
461
+
462
+ # Head orientation getters and setters
463
+ def get_head_roll(self) -> float:
464
+ """Get head roll angle in degrees."""
465
+ return self._get_head_pose_component('roll')
466
+
467
+ def set_head_roll(self, roll_deg: float) -> None:
468
+ """Set head roll angle in degrees via MovementManager."""
469
+ if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
470
+ self._disabled_pose_setter('head_roll')
471
+
472
+ def get_head_pitch(self) -> float:
473
+ """Get head pitch angle in degrees."""
474
+ return self._get_head_pose_component('pitch')
475
+
476
+ def set_head_pitch(self, pitch_deg: float) -> None:
477
+ """Set head pitch angle in degrees via MovementManager."""
478
+ if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
479
+ self._disabled_pose_setter('head_pitch')
480
+
481
+ def get_head_yaw(self) -> float:
482
+ """Get head yaw angle in degrees."""
483
+ return self._get_head_pose_component('yaw')
484
+
485
+ def set_head_yaw(self, yaw_deg: float) -> None:
486
+ """Set head yaw angle in degrees via MovementManager."""
487
+ if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
488
+ self._disabled_pose_setter('head_yaw')
489
+
490
+ def get_body_yaw(self) -> float:
491
+ """Get body yaw angle in degrees."""
492
+ joints = self._get_joint_positions()
493
+ if joints is None:
494
+ return 0.0
495
+ try:
496
+ head_joints, _ = joints
497
+ return math.degrees(head_joints[0])
498
+ except Exception as e:
499
+ logger.error(f"Error getting body yaw: {e}")
500
+ return 0.0
501
+
502
+ def set_body_yaw(self, yaw_deg: float) -> None:
503
+ """Set body yaw angle in degrees via MovementManager."""
504
+ if not self._set_pose_via_manager(body_yaw=math.radians(yaw_deg)):
505
+ self._disabled_pose_setter('body_yaw')
506
+
507
+ def get_antenna_left(self) -> float:
508
+ """Get left antenna angle in degrees."""
509
+ joints = self._get_joint_positions()
510
+ if joints is None:
511
+ return 0.0
512
+ try:
513
+ _, antennas = joints
514
+ return math.degrees(antennas[1]) # left is index 1
515
+ except Exception as e:
516
+ logger.error(f"Error getting left antenna: {e}")
517
+ return 0.0
518
+
519
+ def set_antenna_left(self, angle_deg: float) -> None:
520
+ """Set left antenna angle in degrees via MovementManager."""
521
+ if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
522
+ self._disabled_pose_setter('antenna_left')
523
+
524
+ def get_antenna_right(self) -> float:
525
+ """Get right antenna angle in degrees."""
526
+ joints = self._get_joint_positions()
527
+ if joints is None:
528
+ return 0.0
529
+ try:
530
+ _, antennas = joints
531
+ return math.degrees(antennas[0]) # right is index 0
532
+ except Exception as e:
533
+ logger.error(f"Error getting right antenna: {e}")
534
+ return 0.0
535
+
536
+ def set_antenna_right(self, angle_deg: float) -> None:
537
+ """Set right antenna angle in degrees via MovementManager."""
538
+ if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
539
+ self._disabled_pose_setter('antenna_right')
540
+
541
+ # ========== Phase 4: Look At Control ==========
542
+
543
+ def get_look_at_x(self) -> float:
544
+ """Get look at target X coordinate in world frame (meters)."""
545
+ # This is a target position, not a current state
546
+ # We'll store it internally
547
+ return getattr(self, '_look_at_x', 0.0)
548
+
549
+ def set_look_at_x(self, x: float) -> None:
550
+ """Set look at target X coordinate."""
551
+ self._look_at_x = x
552
+ self._update_look_at()
553
+
554
+ def get_look_at_y(self) -> float:
555
+ """Get look at target Y coordinate in world frame (meters)."""
556
+ return getattr(self, '_look_at_y', 0.0)
557
+
558
+ def set_look_at_y(self, y: float) -> None:
559
+ """Set look at target Y coordinate."""
560
+ self._look_at_y = y
561
+ self._update_look_at()
562
+
563
+ def get_look_at_z(self) -> float:
564
+ """Get look at target Z coordinate in world frame (meters)."""
565
+ return getattr(self, '_look_at_z', 0.0)
566
+
567
+ def set_look_at_z(self, z: float) -> None:
568
+ """Set look at target Z coordinate."""
569
+ self._look_at_z = z
570
+ self._update_look_at()
571
+
572
+ def _update_look_at(self) -> None:
573
+ """Update robot to look at the target coordinates.
574
+
575
+ NOTE: Disabled to prevent conflict with MovementManager's control loop.
576
+ """
577
+ logger.warning("_update_look_at is disabled - MovementManager controls head pose")
578
+ # if not self.is_available:
579
+ # return
580
+ # try:
581
+ # x = getattr(self, '_look_at_x', 0.0)
582
+ # y = getattr(self, '_look_at_y', 0.0)
583
+ # z = getattr(self, '_look_at_z', 0.0)
584
+ # self.reachy.look_at_world(x, y, z)
585
+ # logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
586
+ # except Exception as e:
587
+ # logger.error(f"Error updating look at: {e}")
588
+
589
+ # ========== Phase 6: Diagnostic Information ==========
590
+
591
+ def get_control_loop_frequency(self) -> float:
592
+ """Get control loop frequency in Hz with caching."""
593
+ status = self._get_cached_status()
594
+ if status is None:
595
+ return 0.0
596
+ try:
597
+ backend_status = status.get('backend_status')
598
+ if backend_status and isinstance(backend_status, dict):
599
+ control_loop_stats = backend_status.get('control_loop_stats', {})
600
+ return control_loop_stats.get('mean_control_loop_frequency', 0.0)
601
+ return 0.0
602
+ except Exception as e:
603
+ logger.error(f"Error getting control loop frequency: {e}")
604
+ return 0.0
605
+
606
+ def get_sdk_version(self) -> str:
607
+ """Get SDK version with caching."""
608
+ status = self._get_cached_status()
609
+ if status is None:
610
+ return "N/A"
611
+ return status.get('version') or "unknown"
612
+
613
+ def get_robot_name(self) -> str:
614
+ """Get robot name with caching."""
615
+ status = self._get_cached_status()
616
+ if status is None:
617
+ return "N/A"
618
+ return status.get('robot_name') or "unknown"
619
+
620
+ def get_wireless_version(self) -> bool:
621
+ """Check if this is a wireless version with caching."""
622
+ status = self._get_cached_status()
623
+ if status is None:
624
+ return False
625
+ return status.get('wireless_version', False)
626
+
627
+ def get_simulation_mode(self) -> bool:
628
+ """Check if simulation mode is enabled with caching."""
629
+ status = self._get_cached_status()
630
+ if status is None:
631
+ return False
632
+ return status.get('simulation_enabled', False)
633
+
634
+ def get_wlan_ip(self) -> str:
635
+ """Get WLAN IP address with caching."""
636
+ status = self._get_cached_status()
637
+ if status is None:
638
+ return "N/A"
639
+ return status.get('wlan_ip') or "N/A"
640
+
641
+ # ========== Phase 7: IMU Sensors (Wireless only) ==========
642
+
643
+ def _get_imu_value(self, sensor_type: str, index: int) -> float:
644
+ """Get a specific IMU sensor value.
645
+
646
+ Args:
647
+ sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
648
+ index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
649
+
650
+ Returns:
651
+ The sensor value, or 0.0 on error
652
+ """
653
+ if not self.is_available:
654
+ return 0.0
655
+ try:
656
+ imu_data = self.reachy.imu
657
+ if imu_data is None or sensor_type not in imu_data:
658
+ return 0.0
659
+ value = imu_data[sensor_type]
660
+ return float(value[index]) if index >= 0 else float(value)
661
+ except Exception as e:
662
+ logger.debug(f"Error getting IMU {sensor_type}: {e}")
663
+ return 0.0
664
+
665
+ def get_imu_accel_x(self) -> float:
666
+ """Get IMU X-axis acceleration in m/s²."""
667
+ return self._get_imu_value('accelerometer', 0)
668
+
669
+ def get_imu_accel_y(self) -> float:
670
+ """Get IMU Y-axis acceleration in m/s²."""
671
+ return self._get_imu_value('accelerometer', 1)
672
+
673
+ def get_imu_accel_z(self) -> float:
674
+ """Get IMU Z-axis acceleration in m/s²."""
675
+ return self._get_imu_value('accelerometer', 2)
676
+
677
+ def get_imu_gyro_x(self) -> float:
678
+ """Get IMU X-axis angular velocity in rad/s."""
679
+ return self._get_imu_value('gyroscope', 0)
680
+
681
+ def get_imu_gyro_y(self) -> float:
682
+ """Get IMU Y-axis angular velocity in rad/s."""
683
+ return self._get_imu_value('gyroscope', 1)
684
+
685
+ def get_imu_gyro_z(self) -> float:
686
+ """Get IMU Z-axis angular velocity in rad/s."""
687
+ return self._get_imu_value('gyroscope', 2)
688
+
689
+ def get_imu_temperature(self) -> float:
690
+ """Get IMU temperature in °C."""
691
+ return self._get_imu_value('temperature', -1)
692
+
693
+ # ========== Phase 11: LED Control (DISABLED) ==========
694
+ # LED control is disabled because LEDs are hidden inside the robot.
695
+ # See PROJECT_PLAN.md principle 8.
696
+
697
+ def _get_respeaker(self):
698
+ """Get ReSpeaker device from media manager with thread-safe access.
699
+
700
+ Returns a context manager that holds the lock during ReSpeaker operations.
701
+ Usage:
702
+ with self._get_respeaker() as respeaker:
703
+ if respeaker:
704
+ respeaker.read("...")
705
+ """
706
+ if not self.is_available:
707
+ return _ReSpeakerContext(None, self._respeaker_lock)
708
+ try:
709
+ if not self.reachy.media or not self.reachy.media.audio:
710
+ return _ReSpeakerContext(None, self._respeaker_lock)
711
+ respeaker = self.reachy.media.audio._respeaker
712
+ return _ReSpeakerContext(respeaker, self._respeaker_lock)
713
+ except Exception:
714
+ return _ReSpeakerContext(None, self._respeaker_lock)
715
+
716
+ # ========== Phase 12: Audio Processing (via local SDK with thread-safe access) ==========
717
+
718
+ def get_agc_enabled(self) -> bool:
719
+ """Get AGC (Automatic Gain Control) enabled status."""
720
+ with self._get_respeaker() as respeaker:
721
+ if respeaker is None:
722
+ return getattr(self, '_agc_enabled', True) # Default to enabled
723
+ try:
724
+ result = respeaker.read("PP_AGCONOFF")
725
+ if result is not None:
726
+ self._agc_enabled = bool(result[1])
727
+ return self._agc_enabled
728
+ except Exception as e:
729
+ logger.debug(f"Error getting AGC status: {e}")
730
+ return getattr(self, '_agc_enabled', True)
731
+
732
+ def set_agc_enabled(self, enabled: bool) -> None:
733
+ """Set AGC (Automatic Gain Control) enabled status."""
734
+ self._agc_enabled = enabled
735
+ with self._get_respeaker() as respeaker:
736
+ if respeaker is None:
737
+ return
738
+ try:
739
+ respeaker.write("PP_AGCONOFF", [1 if enabled else 0])
740
+ logger.info(f"AGC {'enabled' if enabled else 'disabled'}")
741
+ except Exception as e:
742
+ logger.error(f"Error setting AGC status: {e}")
743
+
744
+ def get_agc_max_gain(self) -> float:
745
+ """Get AGC maximum gain in dB (0-40 dB range)."""
746
+ with self._get_respeaker() as respeaker:
747
+ if respeaker is None:
748
+ return getattr(self, '_agc_max_gain', 30.0) # Default to optimized value
749
+ try:
750
+ result = respeaker.read("PP_AGCMAXGAIN")
751
+ if result is not None:
752
+ self._agc_max_gain = float(result[0])
753
+ return self._agc_max_gain
754
+ except Exception as e:
755
+ logger.debug(f"Error getting AGC max gain: {e}")
756
+ return getattr(self, '_agc_max_gain', 30.0)
757
+
758
+ def set_agc_max_gain(self, gain: float) -> None:
759
+ """Set AGC maximum gain in dB (0-40 dB range)."""
760
+ gain = max(0.0, min(40.0, gain)) # XVF3800 supports up to 40dB
761
+ self._agc_max_gain = gain
762
+ with self._get_respeaker() as respeaker:
763
+ if respeaker is None:
764
+ return
765
+ try:
766
+ respeaker.write("PP_AGCMAXGAIN", [gain])
767
+ logger.info(f"AGC max gain set to {gain} dB")
768
+ except Exception as e:
769
+ logger.error(f"Error setting AGC max gain: {e}")
770
+
771
+ def get_noise_suppression(self) -> float:
772
+ """Get noise suppression level (0-100%).
773
+
774
+ PP_MIN_NS represents "minimum signal preservation ratio":
775
+ - PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% suppression
776
+ - PP_MIN_NS = 0.15 means "keep at least 15% of signal" = 85% suppression
777
+
778
+ We display "noise suppression strength" to user, so:
779
+ - suppression_percent = (1.0 - PP_MIN_NS) * 100
780
+ """
781
+ with self._get_respeaker() as respeaker:
782
+ if respeaker is None:
783
+ return getattr(self, '_noise_suppression', 15.0)
784
+ try:
785
+ result = respeaker.read("PP_MIN_NS")
786
+ if result is not None:
787
+ raw_value = result[0]
788
+ # Convert: PP_MIN_NS=0.85 -> 15% suppression, PP_MIN_NS=0.15 -> 85% suppression
789
+ self._noise_suppression = max(0.0, min(100.0, (1.0 - raw_value) * 100.0))
790
+ logger.debug(f"Noise suppression: PP_MIN_NS={raw_value:.2f} -> {self._noise_suppression:.1f}%")
791
+ return self._noise_suppression
792
+ except Exception as e:
793
+ logger.debug(f"Error getting noise suppression: {e}")
794
+ return getattr(self, '_noise_suppression', 15.0)
795
+
796
+ def set_noise_suppression(self, level: float) -> None:
797
+ """Set noise suppression level (0-100%)."""
798
+ level = max(0.0, min(100.0, level))
799
+ self._noise_suppression = level
800
+ with self._get_respeaker() as respeaker:
801
+ if respeaker is None:
802
+ return
803
+ try:
804
+ # Convert percentage to PP_MIN_NS value (inverted)
805
+ value = 1.0 - (level / 100.0)
806
+ respeaker.write("PP_MIN_NS", [value])
807
+ logger.info(f"Noise suppression set to {level}%")
808
+ except Exception as e:
809
+ logger.error(f"Error setting noise suppression: {e}")
810
+
811
+ def get_echo_cancellation_converged(self) -> bool:
812
+ """Check if echo cancellation has converged."""
813
+ with self._get_respeaker() as respeaker:
814
+ if respeaker is None:
815
+ return False
816
+ try:
817
+ result = respeaker.read("AEC_AECCONVERGED")
818
+ if result is not None:
819
+ return bool(result[1])
820
+ except Exception as e:
821
+ logger.debug(f"Error getting AEC converged status: {e}")
822
+ return False
823
+
824
+ # ========== DOA (Direction of Arrival) ==========
825
+
826
+ def get_doa_angle(self) -> tuple[float, bool] | None:
827
+ """Get Direction of Arrival angle from microphone array.
828
+
829
+ The DOA angle indicates the direction of the sound source relative to the robot.
830
+ Angle is in radians: 0 = left, π/2 = front/back, π = right.
831
+
832
+ Returns:
833
+ Tuple of (angle_radians, speech_detected), or None if unavailable.
834
+ - angle_radians: Sound source direction in radians
835
+ - speech_detected: Whether speech is currently detected
836
+ """
837
+ if not self.is_available:
838
+ return None
839
+ try:
840
+ if self.reachy.media and self.reachy.media.audio:
841
+ return self.reachy.media.audio.get_DoA()
842
+ except Exception as e:
843
+ logger.debug(f"Error getting DOA: {e}")
844
+ return None
845
+
846
+ def get_doa_angle_degrees(self) -> float:
847
+ """Get DOA angle in degrees for Home Assistant entity.
848
+
849
+ Returns the raw DOA angle in degrees (0-180°).
850
+ SDK convention: 0° = left, 90° = front, 180° = right
851
+ """
852
+ doa = self.get_doa_angle()
853
+ if doa is None:
854
+ return 0.0
855
+ angle_rad, _ = doa
856
+ # Return raw angle in degrees (0-180°)
857
+ angle_deg = math.degrees(angle_rad)
858
+ return angle_deg
859
+
860
+ def get_speech_detected(self) -> bool:
861
+ """Get speech detection status from DOA.
862
+
863
+ Returns True if speech is currently detected.
864
+ """
865
+ doa = self.get_doa_angle()
866
+ if doa is None:
867
+ return False
868
+ _, speech_detected = doa
869
+ return speech_detected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/satellite.py RENAMED
@@ -1,1051 +1,784 @@
1
- """Voice satellite protocol for Reachy Mini."""
2
-
3
- import hashlib
4
- import logging
5
- import math
6
- import posixpath
7
- import shutil
8
- import time
9
- from collections.abc import Iterable
10
- from typing import TYPE_CHECKING, Optional
11
- from urllib.parse import urlparse, urlunparse
12
- from urllib.request import urlopen
13
-
14
- if TYPE_CHECKING:
15
- from ..vision.camera_server import MJPEGCameraServer
16
-
17
- # pylint: disable=no-name-in-module
18
- from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
19
- ButtonCommandRequest,
20
- CameraImageRequest,
21
- DeviceInfoRequest,
22
- DeviceInfoResponse,
23
- HomeAssistantStateResponse,
24
- ListEntitiesDoneResponse,
25
- ListEntitiesRequest,
26
- MediaPlayerCommandRequest,
27
- NumberCommandRequest,
28
- SelectCommandRequest,
29
- SubscribeHomeAssistantStatesRequest,
30
- SubscribeStatesRequest,
31
- SwitchCommandRequest,
32
- VoiceAssistantAnnounceFinished,
33
- VoiceAssistantAnnounceRequest,
34
- VoiceAssistantAudio,
35
- VoiceAssistantConfigurationRequest,
36
- VoiceAssistantConfigurationResponse,
37
- VoiceAssistantEventResponse,
38
- VoiceAssistantExternalWakeWord,
39
- VoiceAssistantRequest,
40
- VoiceAssistantSetConfiguration,
41
- VoiceAssistantTimerEventResponse,
42
- VoiceAssistantWakeWord,
43
- )
44
- from aioesphomeapi.model import (
45
- VoiceAssistantEventType,
46
- VoiceAssistantFeature,
47
- VoiceAssistantTimerEventType,
48
- )
49
- from google.protobuf import message
50
- from pymicro_wakeword import MicroWakeWord
51
- from pyopen_wakeword import OpenWakeWord
52
-
53
- from ..core.util import call_all
54
-
55
- # DISABLED: Emotion detection moved to Home Assistant blueprint
56
- # from ..entities.emotion_detector import EmotionKeywordDetector
57
- from ..entities.entity import MediaPlayerEntity
58
- from ..entities.entity_registry import EntityRegistry, get_entity_key
59
- from ..entities.event_emotion_mapper import EventEmotionMapper
60
- from ..models import AvailableWakeWord, ServerState, WakeWordType
61
- from ..motion.gesture_actions import GestureActionMapper
62
- from ..reachy_controller import ReachyController
63
- from .api_server import APIServer
64
-
65
- _LOGGER = logging.getLogger(__name__)
66
-
67
-
68
- class VoiceSatelliteProtocol(APIServer):
69
- """Voice satellite protocol handler for ESPHome."""
70
-
71
- def __init__(
72
- self, state: ServerState, camera_server: Optional["MJPEGCameraServer"] = None, voice_assistant_service=None
73
- ) -> None:
74
- _LOGGER.info("VoiceSatelliteProtocol.__init__ called - new connection")
75
- super().__init__(state.name)
76
- self.state = state
77
- self.state.satellite = self
78
- self.camera_server = camera_server
79
- self._voice_assistant_service = voice_assistant_service # Store reference for mute functionality
80
-
81
- # Home Assistant connection callbacks
82
- self._on_ha_connected_callback = None
83
- self._on_ha_disconnected_callback = None
84
-
85
- # Initialize streaming state early (before entity setup)
86
- self._is_streaming_audio = False
87
- self._tts_url: str | None = None
88
- self._tts_played = False
89
- self._continue_conversation = False
90
- self._timer_finished = False
91
- self._external_wake_words: dict[str, VoiceAssistantExternalWakeWord] = {}
92
-
93
- # Conversation tracking for continuous conversation
94
- self._conversation_id: str | None = None
95
- self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
96
- self._last_conversation_time = 0.0
97
-
98
- # Track Home Assistant entity states for change detection
99
- self._ha_entity_states: dict[str, str] = {}
100
-
101
- # Initialize Reachy controller
102
- self.reachy_controller = ReachyController(state.reachy_mini)
103
-
104
- # Connect sleep/wake callbacks to ServerState callbacks
105
- def on_sleep_from_ha():
106
- if state.on_ha_sleep is not None:
107
- try:
108
- state.on_ha_sleep()
109
- except Exception as e:
110
- _LOGGER.error("Error in on_ha_sleep callback: %s", e)
111
-
112
- def on_wake_from_ha():
113
- if state.on_ha_wake is not None:
114
- try:
115
- state.on_ha_wake()
116
- except Exception as e:
117
- _LOGGER.error("Error in on_ha_wake callback: %s", e)
118
-
119
- self.reachy_controller.set_sleep_callback(on_sleep_from_ha)
120
- self.reachy_controller.set_wake_callback(on_wake_from_ha)
121
-
122
- # Connect MovementManager to ReachyController for pose control from HA
123
- if state.motion is not None and state.motion.movement_manager is not None:
124
- self.reachy_controller.set_movement_manager(state.motion.movement_manager)
125
-
126
- # Setup speech sway callback for audio-driven head motion
127
- def sway_callback(sway: dict) -> None:
128
- mm = state.motion.movement_manager
129
- if mm is not None:
130
- mm.set_speech_sway(
131
- sway.get("x_m", 0.0),
132
- sway.get("y_m", 0.0),
133
- sway.get("z_m", 0.0),
134
- sway.get("roll_rad", 0.0),
135
- sway.get("pitch_rad", 0.0),
136
- sway.get("yaw_rad", 0.0),
137
- )
138
-
139
- state.tts_player.set_sway_callback(sway_callback)
140
- _LOGGER.info("Speech sway callback configured for TTS player")
141
-
142
- # Initialize entity registry
143
- self._entity_registry = EntityRegistry(
144
- server=self,
145
- reachy_controller=self.reachy_controller,
146
- camera_server=camera_server,
147
- play_emotion_callback=self._play_emotion,
148
- )
149
-
150
- # Connect gesture state callback
151
- if camera_server:
152
- camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
153
- camera_server.set_face_state_callback(self._entity_registry.update_face_detected_state)
154
- camera_server.set_gesture_action_callback(self.handle_detected_gesture)
155
-
156
- # Initialize gesture action mapper for local gesture → action handling
157
- self._gesture_action_mapper = GestureActionMapper()
158
- self._gesture_action_mapper.set_emotion_callback(self._play_emotion)
159
- self._gesture_action_mapper.set_start_listening_callback(self._trigger_wake_word)
160
- self._gesture_action_mapper.set_stop_speaking_callback(self._stop_current_tts)
161
- self._gesture_action_mapper.set_ha_event_callback(self._send_gesture_event_to_ha)
162
- _LOGGER.info("Gesture action mapper initialized")
163
-
164
- # Initialize event-emotion mapper for HA state change reactions
165
- self._event_emotion_mapper = EventEmotionMapper()
166
- self._event_emotion_mapper.set_emotion_callback(self._play_emotion)
167
- # Load custom mappings from JSON if available
168
- from pathlib import Path
169
-
170
- mappings_file = Path(__file__).parent / "animations" / "event_mappings.json"
171
- if mappings_file.exists():
172
- self._event_emotion_mapper.load_from_json(mappings_file)
173
- _LOGGER.info("Event emotion mapper initialized")
174
-
175
- # Only setup entities once (check if already initialized)
176
- # This prevents duplicate entity registration on reconnection
177
- try:
178
- _LOGGER.info("Checking entity initialization state...")
179
- if not getattr(self.state, "_entities_initialized", False):
180
- _LOGGER.info("Setting up entities for first time...")
181
- if self.state.media_player_entity is None:
182
- _LOGGER.info("Creating MediaPlayerEntity...")
183
- self.state.media_player_entity = MediaPlayerEntity(
184
- server=self,
185
- key=get_entity_key("reachy_mini_media_player"),
186
- name="Media Player",
187
- object_id="reachy_mini_media_player",
188
- music_player=state.music_player,
189
- announce_player=state.tts_player,
190
- )
191
- self.state.entities.append(self.state.media_player_entity)
192
- _LOGGER.info("MediaPlayerEntity created")
193
-
194
- # Setup all entities using the registry
195
- _LOGGER.info("Setting up all entities via registry...")
196
- self._entity_registry.setup_all_entities(self.state.entities)
197
-
198
- # Mark entities as initialized
199
- self.state._entities_initialized = True
200
- _LOGGER.info("Entities initialized: %d total", len(self.state.entities))
201
- else:
202
- _LOGGER.info("Entities already initialized, updating server references")
203
- # Update server reference in existing entities
204
- for entity in self.state.entities:
205
- entity.server = self
206
- _LOGGER.info("Server references updated for %d entities", len(self.state.entities))
207
- except Exception as e:
208
- _LOGGER.error("Error during entity setup: %s", e, exc_info=True)
209
- raise
210
-
211
- # Initialize emotion keyword detector for auto-triggering emotions from LLM responses
212
- # DISABLED: Emotion detection moved to Home Assistant blueprint
213
- # self._emotion_detector = EmotionKeywordDetector(play_emotion_callback=self._play_emotion)
214
- _LOGGER.info("VoiceSatelliteProtocol.__init__ completed")
215
-
216
- def set_ha_connection_callbacks(self, on_connected, on_disconnected):
217
- """Set callbacks for Home Assistant connection/disconnection."""
218
- self._on_ha_connected_callback = on_connected
219
- self._on_ha_disconnected_callback = on_disconnected
220
-
221
- def connection_made(self, transport) -> None:
222
- """Called when a client connects."""
223
- peer = transport.get_extra_info("peername")
224
- _LOGGER.info("ESPHome client connected from %s", peer)
225
- super().connection_made(transport)
226
-
227
- def update_camera_server(self, camera_server):
228
- """Update the camera server reference in entity registry.
229
-
230
- Called when camera server is started after Home Assistant connection.
231
- """
232
- self._entity_registry.camera_server = camera_server
233
- if camera_server:
234
- camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
235
- camera_server.set_face_state_callback(self._entity_registry.update_face_detected_state)
236
- camera_server.set_gesture_action_callback(self.handle_detected_gesture)
237
- _LOGGER.debug("Camera server reference updated in entity registry")
238
-
239
- # Note: connection_lost is defined later in the class with full cleanup logic
240
-
241
- def handle_voice_event(self, event_type: VoiceAssistantEventType, data: dict[str, str]) -> None:
242
- _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
243
-
244
- if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
245
- self._tts_url = data.get("url")
246
- self._tts_played = False
247
- self._continue_conversation = False
248
- # Reachy Mini: Start listening animation
249
- self._reachy_on_listening()
250
-
251
- # Note: TTS URL requires HA authentication, cannot pre-download
252
- # Speaking animation uses JSON-defined multi-frequency sway instead
253
-
254
- elif event_type in (
255
- VoiceAssistantEventType.VOICE_ASSISTANT_STT_VAD_END,
256
- VoiceAssistantEventType.VOICE_ASSISTANT_STT_END,
257
- ):
258
- self._is_streaming_audio = False
259
- # Reachy Mini: Stop listening, start thinking
260
- self._reachy_on_thinking()
261
-
262
- elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_PROGRESS:
263
- if data.get("tts_start_streaming") == "1":
264
- # Start streaming early
265
- self.play_tts()
266
-
267
- elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_END:
268
- if data.get("continue_conversation") == "1":
269
- self._continue_conversation = True
270
-
271
- elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
272
- # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
273
- _LOGGER.debug("TTS_START event received, triggering speaking animation")
274
- self._reachy_on_speaking()
275
-
276
- # Auto-trigger emotion based on response text
277
- # TTS_START may contain the text to be spoken
278
- # DISABLED: Emotion detection moved to Home Assistant blueprint
279
- # tts_text = data.get("tts_output") or data.get("text") or ""
280
- # if tts_text:
281
- # self._emotion_detector.detect_and_play(tts_text)
282
-
283
- elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
284
- self._tts_url = data.get("url")
285
- self.play_tts()
286
-
287
- elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_END:
288
- # Pipeline run ended
289
- self._is_streaming_audio = False
290
-
291
- # Following reference project pattern
292
- if not self._tts_played:
293
- self._tts_finished()
294
-
295
- self._tts_played = False
296
-
297
- def handle_timer_event(
298
- self,
299
- event_type: VoiceAssistantTimerEventType,
300
- msg: VoiceAssistantTimerEventResponse,
301
- ) -> None:
302
- _LOGGER.debug("Timer event: type=%s", event_type.name)
303
-
304
- if event_type == VoiceAssistantTimerEventType.VOICE_ASSISTANT_TIMER_FINISHED:
305
- if not self._timer_finished:
306
- self.state.active_wake_words.add(self.state.stop_word.id)
307
- self._timer_finished = True
308
- self.duck()
309
- self._play_timer_finished()
310
- # Reachy Mini: Timer finished animation
311
- self._reachy_on_timer_finished()
312
-
313
- def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
314
- if isinstance(msg, VoiceAssistantEventResponse):
315
- # Pipeline event
316
- data: dict[str, str] = {}
317
- for arg in msg.data:
318
- data[arg.name] = arg.value
319
- self.handle_voice_event(VoiceAssistantEventType(msg.event_type), data)
320
-
321
- elif isinstance(msg, VoiceAssistantAnnounceRequest):
322
- _LOGGER.debug("Announcing: %s", msg.text)
323
- assert self.state.media_player_entity is not None
324
-
325
- urls = []
326
- if msg.preannounce_media_id:
327
- urls.append(msg.preannounce_media_id)
328
- urls.append(msg.media_id)
329
-
330
- self.state.active_wake_words.add(self.state.stop_word.id)
331
- self._set_stop_word_active(True)
332
- self._continue_conversation = msg.start_conversation
333
- self.duck()
334
-
335
- yield from self.state.media_player_entity.play(urls, announcement=True, done_callback=self._tts_finished)
336
-
337
- elif isinstance(msg, VoiceAssistantTimerEventResponse):
338
- self.handle_timer_event(VoiceAssistantTimerEventType(msg.event_type), msg)
339
-
340
- elif isinstance(msg, HomeAssistantStateResponse):
341
- # Handle Home Assistant state changes for emotion mapping
342
- self._handle_ha_state_change(msg)
343
-
344
- elif isinstance(msg, DeviceInfoRequest):
345
- _LOGGER.info("DeviceInfoRequest received, sending DeviceInfoResponse")
346
- yield DeviceInfoResponse(
347
- uses_password=False,
348
- name=self.state.name,
349
- mac_address=self.state.mac_address,
350
- voice_assistant_feature_flags=(
351
- VoiceAssistantFeature.VOICE_ASSISTANT
352
- | VoiceAssistantFeature.API_AUDIO
353
- | VoiceAssistantFeature.ANNOUNCE
354
- | VoiceAssistantFeature.START_CONVERSATION
355
- | VoiceAssistantFeature.TIMERS
356
- ),
357
- )
358
-
359
- elif isinstance(
360
- msg,
361
- (
362
- ListEntitiesRequest,
363
- SubscribeHomeAssistantStatesRequest,
364
- SubscribeStatesRequest,
365
- MediaPlayerCommandRequest,
366
- NumberCommandRequest,
367
- SwitchCommandRequest,
368
- SelectCommandRequest,
369
- ButtonCommandRequest,
370
- CameraImageRequest,
371
- ),
372
- ):
373
- for entity in self.state.entities:
374
- yield from entity.handle_message(msg)
375
-
376
- if isinstance(msg, ListEntitiesRequest):
377
- yield ListEntitiesDoneResponse()
378
-
379
- elif isinstance(msg, VoiceAssistantConfigurationRequest):
380
- available_wake_words = [
381
- VoiceAssistantWakeWord(
382
- id=ww.id,
383
- wake_word=ww.wake_word,
384
- trained_languages=ww.trained_languages,
385
- )
386
- for ww in self.state.available_wake_words.values()
387
- ]
388
-
389
- for eww in msg.external_wake_words:
390
- if eww.model_type != "micro":
391
- continue
392
-
393
- available_wake_words.append(
394
- VoiceAssistantWakeWord(
395
- id=eww.id,
396
- wake_word=eww.wake_word,
397
- trained_languages=eww.trained_languages,
398
- )
399
- )
400
- self._external_wake_words[eww.id] = eww
401
-
402
- yield VoiceAssistantConfigurationResponse(
403
- available_wake_words=available_wake_words,
404
- active_wake_words=[
405
- ww.id for ww in self.state.wake_words.values() if ww.id in self.state.active_wake_words
406
- ],
407
- max_active_wake_words=2,
408
- )
409
-
410
- _LOGGER.info("Connected to Home Assistant")
411
-
412
- # Trigger HA connected callback (async)
413
- if self._on_ha_connected_callback:
414
- try:
415
- import asyncio
416
-
417
- loop = asyncio.get_running_loop()
418
- task = loop.create_task(self._on_ha_connected_callback())
419
- _ = task # Prevent RUF006 warning
420
- except Exception as e:
421
- _LOGGER.error("Error in HA connected callback: %s", e)
422
-
423
- elif isinstance(msg, VoiceAssistantSetConfiguration):
424
- # Change active wake words
425
- active_wake_words: set[str] = set()
426
-
427
- for wake_word_id in msg.active_wake_words:
428
- if wake_word_id in self.state.wake_words:
429
- # Already loaded, just add to active set
430
- active_wake_words.add(wake_word_id)
431
- continue
432
-
433
- model_info = self.state.available_wake_words.get(wake_word_id)
434
- if not model_info:
435
- # Check external wake words (may require download)
436
- external_wake_word = self._external_wake_words.get(wake_word_id)
437
- if not external_wake_word:
438
- _LOGGER.warning("Wake word not found: %s", wake_word_id)
439
- continue
440
-
441
- model_info = self._download_external_wake_word(external_wake_word)
442
- if not model_info:
443
- continue
444
-
445
- self.state.available_wake_words[wake_word_id] = model_info
446
-
447
- _LOGGER.debug("Loading wake word: %s", model_info.wake_word_path)
448
- loaded_model = model_info.load()
449
- # Set id attribute on the model for later identification
450
- loaded_model.id = wake_word_id
451
- self.state.wake_words[wake_word_id] = loaded_model
452
- _LOGGER.info("Wake word loaded: %s", wake_word_id)
453
- active_wake_words.add(wake_word_id)
454
- # Don't break - load ALL requested wake words, not just the first one
455
-
456
- self.state.active_wake_words = active_wake_words
457
- _LOGGER.debug("Active wake words: %s", active_wake_words)
458
-
459
- self.state.preferences.active_wake_words = list(active_wake_words)
460
- self.state.save_preferences()
461
- self.state.wake_words_changed = True
462
-
463
- @property
464
- def is_streaming_audio(self) -> bool:
465
- return self._is_streaming_audio
466
-
467
- def handle_audio(self, audio_chunk: bytes) -> None:
468
- if not self._is_streaming_audio:
469
- return
470
- # Check if transport is still valid before sending
471
- if self._writelines is None:
472
- _LOGGER.warning("Cannot send audio: transport not available, stopping stream")
473
- self._is_streaming_audio = False
474
- return
475
- self.send_messages([VoiceAssistantAudio(data=audio_chunk)])
476
-
477
- def _get_or_create_conversation_id(self) -> str:
478
- """Get existing conversation_id or create a new one.
479
-
480
- Reuses conversation_id if within timeout period, otherwise creates new one.
481
- """
482
- now = time.time()
483
- if self._conversation_id is None or now - self._last_conversation_time > self._conversation_timeout:
484
- # Create new conversation_id
485
- import uuid
486
-
487
- self._conversation_id = str(uuid.uuid4())
488
- _LOGGER.debug("Created new conversation_id: %s", self._conversation_id)
489
-
490
- self._last_conversation_time = now
491
- return self._conversation_id
492
-
493
- def _clear_conversation(self) -> None:
494
- """Clear conversation state when exiting conversation mode."""
495
- self._conversation_id = None
496
- self._continue_conversation = False
497
-
498
- def wakeup(self, wake_word: MicroWakeWord | OpenWakeWord) -> None:
499
- """Handle wake word detection - start voice pipeline."""
500
- if self._timer_finished:
501
- # Stop timer instead
502
- self._timer_finished = False
503
- self.state.tts_player.stop()
504
- _LOGGER.debug("Stopping timer finished sound")
505
- return
506
-
507
- wake_word_phrase = wake_word.wake_word
508
- _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
509
-
510
- # Turn toward sound source using DOA (Direction of Arrival)
511
- self._turn_to_sound_source()
512
-
513
- # Get or create conversation_id for context tracking
514
- conv_id = self._get_or_create_conversation_id()
515
-
516
- self.send_messages(
517
- [
518
- VoiceAssistantRequest(
519
- start=True,
520
- wake_word_phrase=wake_word_phrase,
521
- conversation_id=conv_id,
522
- )
523
- ]
524
- )
525
- self.duck()
526
- self.state.tts_player.play(self.state.wakeup_sound, done_callback=self._on_wakeup_sound_finished)
527
-
528
- def _on_wakeup_sound_finished(self) -> None:
529
- """Start microphone streaming after wakeup sound finishes."""
530
- self._is_streaming_audio = True
531
-
532
- def stop(self) -> None:
533
- """Stop current TTS playback (e.g., user said stop word)."""
534
- # Ensure pipeline does not re-arm itself after manual stop
535
- self._is_streaming_audio = False
536
- self._continue_conversation = False
537
- self.state.active_wake_words.discard(self.state.stop_word.id)
538
- self._set_stop_word_active(False)
539
- self.state.tts_player.stop()
540
-
541
- if self._timer_finished:
542
- self._timer_finished = False
543
- _LOGGER.debug("Stopping timer finished sound")
544
- else:
545
- _LOGGER.debug("TTS response stopped manually")
546
- # Reset TTS state to prevent double-finished
547
- self._tts_url = None
548
- self._tts_played = True
549
- self._tts_finished()
550
-
551
- def play_tts(self) -> None:
552
- if (not self._tts_url) or self._tts_played:
553
- return
554
-
555
- self._tts_played = True
556
- _LOGGER.debug("Playing TTS response: %s", self._tts_url)
557
-
558
- self.state.active_wake_words.add(self.state.stop_word.id)
559
- self._set_stop_word_active(True)
560
- self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
561
-
562
- def duck(self) -> None:
563
- _LOGGER.debug("Ducking music")
564
- self.state.music_player.duck()
565
- # Pause Sendspin to prevent audio conflicts during voice interaction
566
- self.state.music_player.pause_sendspin()
567
-
568
- def unduck(self) -> None:
569
- _LOGGER.debug("Unducking music")
570
- self.state.music_player.unduck()
571
- # Resume Sendspin audio
572
- self.state.music_player.resume_sendspin()
573
-
574
- def _tts_finished(self) -> None:
575
- """Called when TTS audio playback finishes.
576
-
577
- Following reference project pattern: handle continue conversation here.
578
- """
579
- self.state.active_wake_words.discard(self.state.stop_word.id)
580
- self._set_stop_word_active(False)
581
- self.send_messages([VoiceAssistantAnnounceFinished()])
582
-
583
- # Check if should continue conversation
584
- # 1. Our switch is ON: Always continue (unconditional)
585
- # 2. Our switch is OFF: Follow HA's continue_conversation request
586
- continuous_mode = self.state.preferences.continuous_conversation
587
- should_continue = continuous_mode or self._continue_conversation
588
-
589
- if should_continue:
590
- _LOGGER.debug(
591
- "Continuing conversation (our_switch=%s, ha_request=%s)", continuous_mode, self._continue_conversation
592
- )
593
-
594
- # Play prompt sound to indicate ready for next input
595
- self.state.tts_player.play(self.state.wakeup_sound)
596
-
597
- # Use same conversation_id for context continuity
598
- conv_id = self._get_or_create_conversation_id()
599
- self.send_messages(
600
- [
601
- VoiceAssistantRequest(
602
- start=True,
603
- conversation_id=conv_id,
604
- )
605
- ]
606
- )
607
- self._is_streaming_audio = True
608
-
609
- # Stay in listening mode
610
- self._reachy_on_listening()
611
- else:
612
- self._clear_conversation()
613
- self.unduck()
614
- self._is_streaming_audio = False
615
- _LOGGER.debug("Conversation finished")
616
-
617
- # Reachy Mini: Return to idle
618
- self._reachy_on_idle()
619
-
620
- def _set_stop_word_active(self, active: bool) -> None:
621
- """Toggle stop word detector when model supports runtime activation."""
622
- try:
623
- if hasattr(self.state.stop_word, "is_active"):
624
- self.state.stop_word.is_active = active
625
- except Exception:
626
- pass
627
-
628
- def _play_timer_finished(self) -> None:
629
- if not self._timer_finished:
630
- self.unduck()
631
- return
632
-
633
- self.state.tts_player.play(
634
- self.state.timer_finished_sound,
635
- done_callback=lambda: call_all(lambda: time.sleep(1.0), self._play_timer_finished),
636
- )
637
-
638
- def connection_lost(self, exc):
639
- super().connection_lost(exc)
640
- _LOGGER.info("Disconnected from Home Assistant")
641
- # Clear streaming state on disconnect
642
- self._is_streaming_audio = False
643
- self._tts_url = None
644
- self._tts_played = False
645
- self._continue_conversation = False
646
- self._set_stop_word_active(False)
647
-
648
- # Trigger HA disconnected callback
649
- if self._on_ha_disconnected_callback:
650
- try:
651
- self._on_ha_disconnected_callback()
652
- except Exception as e:
653
- _LOGGER.error("Error in HA disconnected callback: %s", e)
654
-
655
- def _download_external_wake_word(
656
- self, external_wake_word: VoiceAssistantExternalWakeWord
657
- ) -> AvailableWakeWord | None:
658
- eww_dir = self.state.download_dir / "external_wake_words"
659
- eww_dir.mkdir(parents=True, exist_ok=True)
660
-
661
- config_path = eww_dir / f"{external_wake_word.id}.json"
662
- should_download_config = not config_path.exists()
663
-
664
- # Check if we need to download the model file
665
- model_path = eww_dir / f"{external_wake_word.id}.tflite"
666
- should_download_model = True
667
-
668
- if model_path.exists():
669
- model_size = model_path.stat().st_size
670
- if model_size == external_wake_word.model_size:
671
- with open(model_path, "rb") as model_file:
672
- model_hash = hashlib.sha256(model_file.read()).hexdigest()
673
-
674
- if model_hash == external_wake_word.model_hash:
675
- should_download_model = False
676
- _LOGGER.debug(
677
- "Model size and hash match for %s. Skipping download.",
678
- external_wake_word.id,
679
- )
680
-
681
- if should_download_config or should_download_model:
682
- # Download config
683
- _LOGGER.debug("Downloading %s to %s", external_wake_word.url, config_path)
684
- with urlopen(external_wake_word.url) as request:
685
- if request.status != 200:
686
- _LOGGER.warning(
687
- "Failed to download: %s, status=%s",
688
- external_wake_word.url,
689
- request.status,
690
- )
691
- return None
692
-
693
- with open(config_path, "wb") as model_file:
694
- shutil.copyfileobj(request, model_file)
695
-
696
- if should_download_model:
697
- # Download model file
698
- parsed_url = urlparse(external_wake_word.url)
699
- parsed_url = parsed_url._replace(path=posixpath.join(posixpath.dirname(parsed_url.path), model_path.name))
700
- model_url = urlunparse(parsed_url)
701
-
702
- _LOGGER.debug("Downloading %s to %s", model_url, model_path)
703
- with urlopen(model_url) as request:
704
- if request.status != 200:
705
- _LOGGER.warning("Failed to download: %s, status=%s", model_url, request.status)
706
- return None
707
-
708
- with open(model_path, "wb") as model_file:
709
- shutil.copyfileobj(request, model_file)
710
-
711
- return AvailableWakeWord(
712
- id=external_wake_word.id,
713
- type=WakeWordType.MICRO_WAKE_WORD,
714
- wake_word=external_wake_word.wake_word,
715
- trained_languages=external_wake_word.trained_languages,
716
- wake_word_path=config_path,
717
- )
718
-
719
- # -------------------------------------------------------------------------
720
- # Reachy Mini Motion Control
721
- # -------------------------------------------------------------------------
722
-
723
- def _turn_to_sound_source(self) -> None:
724
- """Turn robot head toward sound source using DOA at wakeup.
725
-
726
- This is called once at wakeup to orient the robot toward the speaker.
727
- Face tracking will take over after the initial turn.
728
-
729
- DOA angle convention (from SDK):
730
- - 0 radians = left (Y+ direction in head frame)
731
- - π/2 radians = front (X+ direction in head frame)
732
- - π radians = right (Y- direction in head frame)
733
-
734
- The SDK uses: p_head = [sin(doa), cos(doa), 0]
735
- So we need to convert this to yaw angle.
736
-
737
- Note: We don't check speech_detected because by the time wake word
738
- detection completes, the user may have stopped speaking.
739
- """
740
- if not self.state.motion_enabled:
741
- _LOGGER.info("DOA turn-to-sound: motion disabled")
742
- return
743
-
744
- try:
745
- # Get DOA from reachy_controller (only read once)
746
- doa = self.reachy_controller.get_doa_angle()
747
- if doa is None:
748
- _LOGGER.info("DOA not available, skipping turn-to-sound")
749
- return
750
-
751
- angle_rad, speech_detected = doa
752
- _LOGGER.debug(
753
- "DOA raw: angle=%.3f rad (%.1f°), speech=%s", angle_rad, math.degrees(angle_rad), speech_detected
754
- )
755
-
756
- # Convert DOA to direction vector in head frame
757
- # SDK convention: p_head = [sin(doa), cos(doa), 0]
758
- # where X+ is front, Y+ is left
759
- dir_x = math.sin(angle_rad) # Front component
760
- dir_y = math.cos(angle_rad) # Left component
761
-
762
- # Calculate yaw angle from direction vector
763
- # DOA convention: 0 = left, π/2 = front, π = right
764
- # Robot yaw: positive = turn right, negative = turn left
765
- # Invert the sign: left(0) → +90° (turn right toward left sound)
766
- # right(π) → -90° (turn left toward right sound)
767
- yaw_rad = -(angle_rad - math.pi / 2)
768
- yaw_deg = math.degrees(yaw_rad)
769
-
770
- _LOGGER.debug("DOA direction: x=%.2f, y=%.2f, yaw=%.1f°", dir_x, dir_y, yaw_deg)
771
-
772
- # Only turn if angle is significant (> 10°) to avoid noise
773
- DOA_THRESHOLD_DEG = 10.0
774
- if abs(yaw_deg) < DOA_THRESHOLD_DEG:
775
- _LOGGER.debug("DOA angle %.1f° below threshold (%.1f°), skipping turn", yaw_deg, DOA_THRESHOLD_DEG)
776
- return
777
-
778
- # Apply 80% of DOA angle as conservative strategy
779
- # This accounts for potential DOA inaccuracy
780
- DOA_SCALE = 0.8
781
- target_yaw_deg = yaw_deg * DOA_SCALE
782
-
783
- _LOGGER.info("Turning toward sound source: DOA=%.1f°, target=%.1f°", yaw_deg, target_yaw_deg)
784
-
785
- # Use MovementManager to turn (non-blocking)
786
- if self.state.motion and self.state.motion.movement_manager:
787
- self.state.motion.movement_manager.turn_to_angle(
788
- target_yaw_deg,
789
- duration=0.5, # Quick turn
790
- )
791
- except Exception as e:
792
- _LOGGER.error("Error in turn-to-sound: %s", e)
793
-
794
- def _reachy_on_listening(self) -> None:
795
- """Called when listening for speech (HA state: Listening)."""
796
- # Enable high-frequency face tracking during listening
797
- self._set_conversation_mode(True)
798
-
799
- # Resume face tracking according to user preference (may have been paused during speaking)
800
- if self.camera_server is not None:
801
- try:
802
- enabled = bool(getattr(self.state.preferences, "face_tracking_enabled", False))
803
- self.camera_server.set_face_tracking_enabled(enabled)
804
- except Exception as e:
805
- _LOGGER.debug("Failed to resume face tracking: %s", e)
806
-
807
- if not self.state.motion_enabled:
808
- return
809
- try:
810
- _LOGGER.debug("Reachy Mini: Listening animation")
811
- if self.state.motion:
812
- self.state.motion.on_listening()
813
- except Exception as e:
814
- _LOGGER.error("Reachy Mini motion error: %s", e)
815
-
816
- def _reachy_on_thinking(self) -> None:
817
- """Called when processing speech (HA state: Processing)."""
818
- # Resume face tracking according to user preference (may have been paused during speaking)
819
- if self.camera_server is not None:
820
- try:
821
- enabled = bool(getattr(self.state.preferences, "face_tracking_enabled", False))
822
- self.camera_server.set_face_tracking_enabled(enabled)
823
- except Exception as e:
824
- _LOGGER.debug("Failed to resume face tracking: %s", e)
825
-
826
- if not self.state.motion_enabled or not self.state.reachy_mini:
827
- return
828
- try:
829
- _LOGGER.debug("Reachy Mini: Thinking animation")
830
- if self.state.motion:
831
- self.state.motion.on_thinking()
832
- except Exception as e:
833
- _LOGGER.error("Reachy Mini motion error: %s", e)
834
-
835
- def _reachy_on_speaking(self) -> None:
836
- """Called when TTS is playing (HA state: Responding)."""
837
- # Pause face tracking during speaking - robot will use speaking animation instead
838
- if self.camera_server is not None:
839
- try:
840
- self.camera_server.set_face_tracking_enabled(False)
841
- _LOGGER.debug("Face tracking paused during speaking")
842
- except Exception as e:
843
- _LOGGER.debug("Failed to pause face tracking: %s", e)
844
-
845
- if not self.state.motion_enabled:
846
- _LOGGER.warning("Motion disabled, skipping speaking animation")
847
- return
848
- if not self.state.motion:
849
- _LOGGER.warning("No motion controller, skipping speaking animation")
850
- return
851
-
852
- try:
853
- _LOGGER.debug("Reachy Mini: Starting speaking animation")
854
- self.state.motion.on_speaking_start()
855
- except Exception as e:
856
- _LOGGER.error("Reachy Mini motion error: %s", e)
857
-
858
- def _reachy_on_idle(self) -> None:
859
- """Called when returning to idle state (HA state: Idle)."""
860
- # Disable high-frequency face tracking, switch to adaptive mode
861
- self._set_conversation_mode(False)
862
-
863
- # Resume face tracking according to user preference (may have been paused during speaking)
864
- if self.camera_server is not None:
865
- try:
866
- enabled = bool(getattr(self.state.preferences, "face_tracking_enabled", False))
867
- self.camera_server.set_face_tracking_enabled(enabled)
868
- except Exception as e:
869
- _LOGGER.debug("Failed to resume face tracking: %s", e)
870
-
871
- if not self.state.motion_enabled or not self.state.reachy_mini:
872
- return
873
- try:
874
- _LOGGER.debug("Reachy Mini: Idle animation")
875
- if self.state.motion:
876
- self.state.motion.on_idle()
877
- except Exception as e:
878
- _LOGGER.error("Reachy Mini motion error: %s", e)
879
-
880
- def _set_conversation_mode(self, in_conversation: bool) -> None:
881
- """Set conversation mode for adaptive face tracking.
882
-
883
- When in conversation, face tracking runs at high frequency.
884
- When idle, face tracking uses adaptive rate to save CPU.
885
- """
886
- if self.camera_server is not None:
887
- try:
888
- self.camera_server.set_conversation_mode(in_conversation)
889
- except Exception as e:
890
- _LOGGER.debug("Failed to set conversation mode: %s", e)
891
-
892
- def _reachy_on_timer_finished(self) -> None:
893
- """Called when a timer finishes."""
894
- if not self.state.motion_enabled or not self.state.reachy_mini:
895
- return
896
- try:
897
- _LOGGER.debug("Reachy Mini: Timer finished animation")
898
- if self.state.motion:
899
- self.state.motion.on_timer_finished()
900
- except Exception as e:
901
- _LOGGER.error("Reachy Mini motion error: %s", e)
902
-
903
- def _play_emotion(self, emotion_name: str) -> None:
904
- """Play an emotion/expression from the emotions library.
905
-
906
- Uses the MovementManager's queue_emotion_move() method which samples
907
- poses via RecordedMoves.evaluate(t) in the control loop. This avoids
908
- "a move is currently running" warnings from the SDK daemon.
909
-
910
- Args:
911
- emotion_name: Name of the emotion (e.g., "happy1", "sad1", etc.)
912
- """
913
- try:
914
- # Use MovementManager to play emotion (non-blocking, integrated with control loop)
915
- if self.state.motion and self.state.motion.movement_manager:
916
- movement_manager = self.state.motion.movement_manager
917
- if movement_manager.queue_emotion_move(emotion_name):
918
- _LOGGER.info(f"Queued emotion move: {emotion_name}")
919
- else:
920
- _LOGGER.warning(f"Failed to queue emotion: {emotion_name}")
921
- else:
922
- _LOGGER.warning("Cannot play emotion: no movement manager available")
923
-
924
- except Exception as e:
925
- _LOGGER.error(f"Error playing emotion {emotion_name}: {e}")
926
-
927
- def _trigger_wake_word(self) -> None:
928
- """Trigger wake word detection (simulate hearing the wake word).
929
-
930
- This is called by GestureActionMapper when a "call" gesture is detected,
931
- allowing users to activate the voice assistant with a hand gesture.
932
- """
933
- try:
934
- # The wake word detected event triggers the voice pipeline
935
- _LOGGER.info("Gesture triggered wake word - starting voice assistant")
936
- # Set the wake word event to simulate detection
937
- if hasattr(self.state, "last_wake_word"):
938
- self.state.last_wake_word = "gesture"
939
- # Trigger the run_voice_assistant logic
940
- self.start_voice_assistant()
941
- except Exception as e:
942
- _LOGGER.error(f"Error triggering wake word from gesture: {e}")
943
-
944
- def _stop_current_tts(self) -> None:
945
- """Stop current TTS playback.
946
-
947
- Called by GestureActionMapper when a "stop" gesture is detected,
948
- allowing users to interrupt the robot's speech.
949
- """
950
- try:
951
- _LOGGER.info("Gesture triggered TTS stop")
952
- if self.state.tts_player:
953
- self.state.tts_player.stop()
954
- if self.state.music_player:
955
- self.state.music_player.stop()
956
- except Exception as e:
957
- _LOGGER.error(f"Error stopping TTS from gesture: {e}")
958
-
959
- def _send_gesture_event_to_ha(self, event_name: str) -> None:
960
- """Send a gesture event to Home Assistant.
961
-
962
- This allows HA automations to react to gestures like "one", "two", etc.
963
-
964
- Args:
965
- event_name: Name of the gesture event (e.g., "gesture_one")
966
- """
967
- try:
968
- _LOGGER.info(f"Sending gesture event to HA: {event_name}")
969
- # Fire an event to Home Assistant via the satellite protocol
970
- # This uses the VoiceAssistantEventResponse mechanism
971
- # For now, we can use the timer event mechanism or a custom event
972
- # Home Assistant can subscribe to these events via ESPHome integration
973
- except Exception as e:
974
- _LOGGER.error(f"Error sending gesture event to HA: {e}")
975
-
976
- def _handle_ha_state_change(self, msg: HomeAssistantStateResponse) -> None:
977
- """Handle Home Assistant state change via ESPHome bidirectional communication.
978
-
979
- This method is called when Home Assistant sends state updates through
980
- the ESPHome protocol. It uses EventEmotionMapper to trigger robot
981
- emotions based on configured entity state changes.
982
-
983
- Args:
984
- msg: HomeAssistantStateResponse containing entity_id and state
985
- """
986
- try:
987
- entity_id = msg.entity_id
988
- new_state = msg.state
989
-
990
- # Track old state for proper event handling
991
- old_state = self._ha_entity_states.get(entity_id, "unknown")
992
- self._ha_entity_states[entity_id] = new_state
993
-
994
- _LOGGER.debug("HA state change: %s: %s -> %s", entity_id, old_state, new_state)
995
-
996
- # Let EventEmotionMapper handle the state change
997
- emotion = self._event_emotion_mapper.handle_state_change(entity_id, old_state, new_state)
998
- if emotion:
999
- _LOGGER.info("HA event triggered emotion: %s from %s", emotion, entity_id)
1000
-
1001
- except Exception as e:
1002
- _LOGGER.error("Error handling HA state change: %s", e)
1003
-
1004
- def handle_detected_gesture(self, gesture_name: str, confidence: float) -> bool:
1005
- """Handle a detected gesture by triggering mapped actions.
1006
-
1007
- This should be called when a gesture is detected to trigger local actions
1008
- (emotions, TTS control, HA events) based on the gesture mappings.
1009
-
1010
- Args:
1011
- gesture_name: Name of the detected gesture
1012
- confidence: Detection confidence (0-1)
1013
-
1014
- Returns:
1015
- True if an action was triggered, False otherwise
1016
- """
1017
- return self._gesture_action_mapper.handle_gesture(gesture_name, confidence)
1018
-
1019
- def suspend(self) -> None:
1020
- """Suspend the satellite for sleep mode.
1021
-
1022
- Stops any current playback and releases resources.
1023
- """
1024
- _LOGGER.info("Suspending VoiceSatellite for sleep...")
1025
-
1026
- # Stop any current TTS/music
1027
- if self.state.tts_player:
1028
- self.state.tts_player.stop()
1029
- if self.state.music_player:
1030
- self.state.music_player.stop()
1031
-
1032
- # Keep configured wake words intact.
1033
- # Audio processing is paused by sleep/mute lifecycle, so clearing wake words here
1034
- # can cause Home Assistant UI to temporarily show an empty wake word selection.
1035
-
1036
- # Reset conversation state
1037
- self._tts_url = None
1038
- self._tts_played = True
1039
- self._continue_conversation = False
1040
- self._is_streaming_audio = False
1041
-
1042
- _LOGGER.info("VoiceSatellite suspended")
1043
-
1044
- def resume(self) -> None:
1045
- """Resume the satellite after sleep."""
1046
- _LOGGER.info("Resuming VoiceSatellite from sleep...")
1047
-
1048
- # Ensure wake word processing context is refreshed after resume.
1049
- self.state.wake_words_changed = True
1050
-
1051
- _LOGGER.info("VoiceSatellite resumed")
 
1
+ """Voice satellite protocol for Reachy Mini."""
2
+
3
+ import hashlib
4
+ import logging
5
+ import math
6
+ import posixpath
7
+ import shutil
8
+ import time
9
+ from collections.abc import Iterable
10
+ from typing import Dict, Optional, Set, Union, TYPE_CHECKING
11
+ from urllib.parse import urlparse, urlunparse
12
+ from urllib.request import urlopen
13
+
14
+ if TYPE_CHECKING:
15
+ from .camera_server import MJPEGCameraServer
16
+
17
+ # pylint: disable=no-name-in-module
18
+ from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
19
+ ButtonCommandRequest,
20
+ CameraImageRequest,
21
+ DeviceInfoRequest,
22
+ DeviceInfoResponse,
23
+ ListEntitiesDoneResponse,
24
+ ListEntitiesRequest,
25
+ MediaPlayerCommandRequest,
26
+ NumberCommandRequest,
27
+ SelectCommandRequest,
28
+ SubscribeHomeAssistantStatesRequest,
29
+ SubscribeStatesRequest,
30
+ SwitchCommandRequest,
31
+ VoiceAssistantAnnounceFinished,
32
+ VoiceAssistantAnnounceRequest,
33
+ VoiceAssistantAudio,
34
+ VoiceAssistantConfigurationRequest,
35
+ VoiceAssistantConfigurationResponse,
36
+ VoiceAssistantEventResponse,
37
+ VoiceAssistantExternalWakeWord,
38
+ VoiceAssistantRequest,
39
+ VoiceAssistantSetConfiguration,
40
+ VoiceAssistantTimerEventResponse,
41
+ VoiceAssistantWakeWord,
42
+ )
43
+ from aioesphomeapi.model import (
44
+ VoiceAssistantEventType,
45
+ VoiceAssistantFeature,
46
+ VoiceAssistantTimerEventType,
47
+ )
48
+ from google.protobuf import message
49
+ from pymicro_wakeword import MicroWakeWord
50
+ from pyopen_wakeword import OpenWakeWord
51
+
52
+ from .api_server import APIServer
53
+ from .entity import MediaPlayerEntity
54
+ from .entity_registry import EntityRegistry, get_entity_key
55
+ from .models import AvailableWakeWord, ServerState, WakeWordType
56
+ from .util import call_all
57
+ from .reachy_controller import ReachyController
58
+
59
+ _LOGGER = logging.getLogger(__name__)
60
+
61
+
62
+ class VoiceSatelliteProtocol(APIServer):
63
+ """Voice satellite protocol handler for ESPHome."""
64
+
65
+ def __init__(self, state: ServerState, camera_server: Optional["MJPEGCameraServer"] = None) -> None:
66
+ super().__init__(state.name)
67
+ self.state = state
68
+ self.state.satellite = self
69
+ self.camera_server = camera_server
70
+
71
+ # Initialize streaming state early (before entity setup)
72
+ self._is_streaming_audio = False
73
+ self._tts_url: Optional[str] = None
74
+ self._tts_played = False
75
+ self._continue_conversation = False
76
+ self._timer_finished = False
77
+ self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
78
+
79
+ # Conversation tracking for continuous conversation
80
+ self._conversation_id: Optional[str] = None
81
+ self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
82
+ self._last_conversation_time = 0.0
83
+
84
+ # Initialize Reachy controller
85
+ self.reachy_controller = ReachyController(state.reachy_mini)
86
+
87
+ # Connect MovementManager to ReachyController for pose control from HA
88
+ if state.motion is not None and state.motion.movement_manager is not None:
89
+ self.reachy_controller.set_movement_manager(state.motion.movement_manager)
90
+
91
+ # Setup speech sway callback for audio-driven head motion
92
+ def sway_callback(sway: dict) -> None:
93
+ mm = state.motion.movement_manager
94
+ if mm is not None:
95
+ mm.set_speech_sway(
96
+ sway.get("x_m", 0.0),
97
+ sway.get("y_m", 0.0),
98
+ sway.get("z_m", 0.0),
99
+ sway.get("roll_rad", 0.0),
100
+ sway.get("pitch_rad", 0.0),
101
+ sway.get("yaw_rad", 0.0),
102
+ )
103
+
104
+ state.tts_player.set_sway_callback(sway_callback)
105
+ _LOGGER.info("Speech sway callback configured for TTS player")
106
+
107
+ # Initialize entity registry
108
+ self._entity_registry = EntityRegistry(
109
+ server=self,
110
+ reachy_controller=self.reachy_controller,
111
+ camera_server=camera_server,
112
+ play_emotion_callback=self._play_emotion,
113
+ )
114
+
115
+ # Connect gesture state callback
116
+ if camera_server:
117
+ camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
118
+
119
+ # Only setup entities once (check if already initialized)
120
+ # This prevents duplicate entity registration on reconnection
121
+ if not getattr(self.state, '_entities_initialized', False):
122
+ if self.state.media_player_entity is None:
123
+ self.state.media_player_entity = MediaPlayerEntity(
124
+ server=self,
125
+ key=get_entity_key("reachy_mini_media_player"),
126
+ name="Media Player",
127
+ object_id="reachy_mini_media_player",
128
+ music_player=state.music_player,
129
+ announce_player=state.tts_player,
130
+ )
131
+ self.state.entities.append(self.state.media_player_entity)
132
+
133
+ # Setup all entities using the registry
134
+ self._entity_registry.setup_all_entities(self.state.entities)
135
+
136
+ # Mark entities as initialized
137
+ self.state._entities_initialized = True
138
+ _LOGGER.info("Entities initialized: %d total", len(self.state.entities))
139
+ else:
140
+ _LOGGER.debug("Entities already initialized, skipping setup")
141
+ # Update server reference in existing entities
142
+ for entity in self.state.entities:
143
+ entity.server = self
144
+
145
+ def handle_voice_event(
146
+ self, event_type: VoiceAssistantEventType, data: Dict[str, str]
147
+ ) -> None:
148
+ _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
149
+
150
+ if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
151
+ self._tts_url = data.get("url")
152
+ self._tts_played = False
153
+ self._continue_conversation = False
154
+ # Reachy Mini: Start listening animation
155
+ self._reachy_on_listening()
156
+
157
+ # Note: TTS URL requires HA authentication, cannot pre-download
158
+ # Speaking animation uses JSON-defined multi-frequency sway instead
159
+
160
+ elif event_type in (
161
+ VoiceAssistantEventType.VOICE_ASSISTANT_STT_VAD_END,
162
+ VoiceAssistantEventType.VOICE_ASSISTANT_STT_END,
163
+ ):
164
+ self._is_streaming_audio = False
165
+ # Reachy Mini: Stop listening, start thinking
166
+ self._reachy_on_thinking()
167
+
168
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_PROGRESS:
169
+ if data.get("tts_start_streaming") == "1":
170
+ # Start streaming early
171
+ self.play_tts()
172
+
173
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_END:
174
+ if data.get("continue_conversation") == "1":
175
+ self._continue_conversation = True
176
+
177
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
178
+ # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
179
+ _LOGGER.debug("TTS_START event received, triggering speaking animation")
180
+ self._reachy_on_speaking()
181
+
182
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
183
+ self._tts_url = data.get("url")
184
+ self.play_tts()
185
+
186
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_END:
187
+ # Pipeline run ended
188
+ self._is_streaming_audio = False
189
+
190
+ # Following reference project pattern
191
+ if not self._tts_played:
192
+ self._tts_finished()
193
+
194
+ self._tts_played = False
195
+
196
+ def handle_timer_event(
197
+ self,
198
+ event_type: VoiceAssistantTimerEventType,
199
+ msg: VoiceAssistantTimerEventResponse,
200
+ ) -> None:
201
+ _LOGGER.debug("Timer event: type=%s", event_type.name)
202
+
203
+ if event_type == VoiceAssistantTimerEventType.VOICE_ASSISTANT_TIMER_FINISHED:
204
+ if not self._timer_finished:
205
+ self.state.active_wake_words.add(self.state.stop_word.id)
206
+ self._timer_finished = True
207
+ self.duck()
208
+ self._play_timer_finished()
209
+ # Reachy Mini: Timer finished animation
210
+ self._reachy_on_timer_finished()
211
+
212
+ def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
213
+ if isinstance(msg, VoiceAssistantEventResponse):
214
+ # Pipeline event
215
+ data: Dict[str, str] = {}
216
+ for arg in msg.data:
217
+ data[arg.name] = arg.value
218
+ self.handle_voice_event(VoiceAssistantEventType(msg.event_type), data)
219
+
220
+ elif isinstance(msg, VoiceAssistantAnnounceRequest):
221
+ _LOGGER.debug("Announcing: %s", msg.text)
222
+ assert self.state.media_player_entity is not None
223
+
224
+ urls = []
225
+ if msg.preannounce_media_id:
226
+ urls.append(msg.preannounce_media_id)
227
+ urls.append(msg.media_id)
228
+
229
+ self.state.active_wake_words.add(self.state.stop_word.id)
230
+ self._continue_conversation = msg.start_conversation
231
+ self.duck()
232
+
233
+ yield from self.state.media_player_entity.play(
234
+ urls, announcement=True, done_callback=self._tts_finished
235
+ )
236
+
237
+ elif isinstance(msg, VoiceAssistantTimerEventResponse):
238
+ self.handle_timer_event(VoiceAssistantTimerEventType(msg.event_type), msg)
239
+
240
+ elif isinstance(msg, DeviceInfoRequest):
241
+ yield DeviceInfoResponse(
242
+ uses_password=False,
243
+ name=self.state.name,
244
+ mac_address=self.state.mac_address,
245
+ voice_assistant_feature_flags=(
246
+ VoiceAssistantFeature.VOICE_ASSISTANT
247
+ | VoiceAssistantFeature.API_AUDIO
248
+ | VoiceAssistantFeature.ANNOUNCE
249
+ | VoiceAssistantFeature.START_CONVERSATION
250
+ | VoiceAssistantFeature.TIMERS
251
+ ),
252
+ )
253
+
254
+ elif isinstance(
255
+ msg,
256
+ (
257
+ ListEntitiesRequest,
258
+ SubscribeHomeAssistantStatesRequest,
259
+ SubscribeStatesRequest,
260
+ MediaPlayerCommandRequest,
261
+ NumberCommandRequest,
262
+ SwitchCommandRequest,
263
+ SelectCommandRequest,
264
+ ButtonCommandRequest,
265
+ CameraImageRequest,
266
+ ),
267
+ ):
268
+ for entity in self.state.entities:
269
+ yield from entity.handle_message(msg)
270
+
271
+ if isinstance(msg, ListEntitiesRequest):
272
+ yield ListEntitiesDoneResponse()
273
+
274
+ elif isinstance(msg, VoiceAssistantConfigurationRequest):
275
+ available_wake_words = [
276
+ VoiceAssistantWakeWord(
277
+ id=ww.id,
278
+ wake_word=ww.wake_word,
279
+ trained_languages=ww.trained_languages,
280
+ )
281
+ for ww in self.state.available_wake_words.values()
282
+ ]
283
+
284
+ for eww in msg.external_wake_words:
285
+ if eww.model_type != "micro":
286
+ continue
287
+
288
+ available_wake_words.append(
289
+ VoiceAssistantWakeWord(
290
+ id=eww.id,
291
+ wake_word=eww.wake_word,
292
+ trained_languages=eww.trained_languages,
293
+ )
294
+ )
295
+ self._external_wake_words[eww.id] = eww
296
+
297
+ yield VoiceAssistantConfigurationResponse(
298
+ available_wake_words=available_wake_words,
299
+ active_wake_words=[
300
+ ww.id
301
+ for ww in self.state.wake_words.values()
302
+ if ww.id in self.state.active_wake_words
303
+ ],
304
+ max_active_wake_words=2,
305
+ )
306
+
307
+ _LOGGER.info("Connected to Home Assistant")
308
+
309
+ elif isinstance(msg, VoiceAssistantSetConfiguration):
310
+ # Change active wake words
311
+ active_wake_words: Set[str] = set()
312
+
313
+ for wake_word_id in msg.active_wake_words:
314
+ if wake_word_id in self.state.wake_words:
315
+ # Already loaded, just add to active set
316
+ active_wake_words.add(wake_word_id)
317
+ continue
318
+
319
+ model_info = self.state.available_wake_words.get(wake_word_id)
320
+ if not model_info:
321
+ # Check external wake words (may require download)
322
+ external_wake_word = self._external_wake_words.get(wake_word_id)
323
+ if not external_wake_word:
324
+ _LOGGER.warning("Wake word not found: %s", wake_word_id)
325
+ continue
326
+
327
+ model_info = self._download_external_wake_word(external_wake_word)
328
+ if not model_info:
329
+ continue
330
+
331
+ self.state.available_wake_words[wake_word_id] = model_info
332
+
333
+ _LOGGER.debug("Loading wake word: %s", model_info.wake_word_path)
334
+ loaded_model = model_info.load()
335
+ # Set id attribute on the model for later identification
336
+ setattr(loaded_model, 'id', wake_word_id)
337
+ self.state.wake_words[wake_word_id] = loaded_model
338
+ _LOGGER.info("Wake word loaded: %s", wake_word_id)
339
+ active_wake_words.add(wake_word_id)
340
+ # Don't break - load ALL requested wake words, not just the first one
341
+
342
+ self.state.active_wake_words = active_wake_words
343
+ _LOGGER.debug("Active wake words: %s", active_wake_words)
344
+
345
+ self.state.preferences.active_wake_words = list(active_wake_words)
346
+ self.state.save_preferences()
347
+ self.state.wake_words_changed = True
348
+
349
+ def handle_audio(self, audio_chunk: bytes) -> None:
350
+ if not self._is_streaming_audio:
351
+ return
352
+ self.send_messages([VoiceAssistantAudio(data=audio_chunk)])
353
+
354
+ def _get_or_create_conversation_id(self) -> str:
355
+ """Get existing conversation_id or create a new one.
356
+
357
+ Reuses conversation_id if within timeout period, otherwise creates new one.
358
+ """
359
+ now = time.time()
360
+ if (self._conversation_id is None or
361
+ now - self._last_conversation_time > self._conversation_timeout):
362
+ # Create new conversation_id
363
+ import uuid
364
+ self._conversation_id = str(uuid.uuid4())
365
+ _LOGGER.debug("Created new conversation_id: %s", self._conversation_id)
366
+
367
+ self._last_conversation_time = now
368
+ return self._conversation_id
369
+
370
+ def _clear_conversation(self) -> None:
371
+ """Clear conversation state when exiting conversation mode."""
372
+ self._conversation_id = None
373
+ self._continue_conversation = False
374
+
375
+ def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
376
+ """Handle wake word detection - start voice pipeline."""
377
+ if self._timer_finished:
378
+ # Stop timer instead
379
+ self._timer_finished = False
380
+ self.state.tts_player.stop()
381
+ _LOGGER.debug("Stopping timer finished sound")
382
+ return
383
+
384
+ wake_word_phrase = wake_word.wake_word
385
+ _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
386
+
387
+ # Turn toward sound source using DOA (Direction of Arrival)
388
+ self._turn_to_sound_source()
389
+
390
+ # Get or create conversation_id for context tracking
391
+ conv_id = self._get_or_create_conversation_id()
392
+
393
+ self.send_messages(
394
+ [VoiceAssistantRequest(
395
+ start=True,
396
+ wake_word_phrase=wake_word_phrase,
397
+ conversation_id=conv_id,
398
+ )]
399
+ )
400
+ self.duck()
401
+ self._is_streaming_audio = True
402
+ self.state.tts_player.play(self.state.wakeup_sound)
403
+
404
+ def stop(self) -> None:
405
+ """Stop current TTS playback (e.g., user said stop word)."""
406
+ self.state.active_wake_words.discard(self.state.stop_word.id)
407
+ self.state.tts_player.stop()
408
+
409
+ if self._timer_finished:
410
+ self._timer_finished = False
411
+ _LOGGER.debug("Stopping timer finished sound")
412
+ else:
413
+ _LOGGER.debug("TTS response stopped manually")
414
+ self._tts_finished()
415
+
416
+ def play_tts(self) -> None:
417
+ if (not self._tts_url) or self._tts_played:
418
+ return
419
+
420
+ self._tts_played = True
421
+ _LOGGER.debug("Playing TTS response: %s", self._tts_url)
422
+
423
+ self.state.active_wake_words.add(self.state.stop_word.id)
424
+ self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
425
+
426
+ def duck(self) -> None:
427
+ _LOGGER.debug("Ducking music")
428
+ self.state.music_player.duck()
429
+ # Pause Sendspin to prevent audio conflicts during voice interaction
430
+ self.state.music_player.pause_sendspin()
431
+
432
+ def unduck(self) -> None:
433
+ _LOGGER.debug("Unducking music")
434
+ self.state.music_player.unduck()
435
+ # Resume Sendspin audio
436
+ self.state.music_player.resume_sendspin()
437
+
438
+ def _tts_finished(self) -> None:
439
+ """Called when TTS audio playback finishes.
440
+
441
+ Following reference project pattern: handle continue conversation here.
442
+ """
443
+ self.state.active_wake_words.discard(self.state.stop_word.id)
444
+ self.send_messages([VoiceAssistantAnnounceFinished()])
445
+
446
+ # Check if should continue conversation
447
+ # 1. Our switch is ON: Always continue (unconditional)
448
+ # 2. Our switch is OFF: Follow HA's continue_conversation request
449
+ continuous_mode = self.state.preferences.continuous_conversation
450
+ should_continue = continuous_mode or self._continue_conversation
451
+
452
+ if should_continue:
453
+ _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
454
+ continuous_mode, self._continue_conversation)
455
+
456
+ # Play prompt sound to indicate ready for next input
457
+ self.state.tts_player.play(self.state.wakeup_sound)
458
+
459
+ # Use same conversation_id for context continuity
460
+ conv_id = self._get_or_create_conversation_id()
461
+ self.send_messages([VoiceAssistantRequest(
462
+ start=True,
463
+ conversation_id=conv_id,
464
+ )])
465
+ self._is_streaming_audio = True
466
+
467
+ # Stay in listening mode
468
+ self._reachy_on_listening()
469
+ else:
470
+ self._clear_conversation()
471
+ self.unduck()
472
+ _LOGGER.debug("Conversation finished")
473
+
474
+ # Reachy Mini: Return to idle
475
+ self._reachy_on_idle()
476
+
477
+ def _play_timer_finished(self) -> None:
478
+ if not self._timer_finished:
479
+ self.unduck()
480
+ return
481
+
482
+ self.state.tts_player.play(
483
+ self.state.timer_finished_sound,
484
+ done_callback=lambda: call_all(
485
+ lambda: time.sleep(1.0), self._play_timer_finished
486
+ ),
487
+ )
488
+
489
+ def connection_lost(self, exc):
490
+ super().connection_lost(exc)
491
+ _LOGGER.info("Disconnected from Home Assistant")
492
+ # Clear streaming state on disconnect
493
+ self._is_streaming_audio = False
494
+ self._tts_url = None
495
+ self._tts_played = False
496
+ self._continue_conversation = False
497
+
498
+ def _download_external_wake_word(
499
+ self, external_wake_word: VoiceAssistantExternalWakeWord
500
+ ) -> Optional[AvailableWakeWord]:
501
+ eww_dir = self.state.download_dir / "external_wake_words"
502
+ eww_dir.mkdir(parents=True, exist_ok=True)
503
+
504
+ config_path = eww_dir / f"{external_wake_word.id}.json"
505
+ should_download_config = not config_path.exists()
506
+
507
+ # Check if we need to download the model file
508
+ model_path = eww_dir / f"{external_wake_word.id}.tflite"
509
+ should_download_model = True
510
+
511
+ if model_path.exists():
512
+ model_size = model_path.stat().st_size
513
+ if model_size == external_wake_word.model_size:
514
+ with open(model_path, "rb") as model_file:
515
+ model_hash = hashlib.sha256(model_file.read()).hexdigest()
516
+
517
+ if model_hash == external_wake_word.model_hash:
518
+ should_download_model = False
519
+ _LOGGER.debug(
520
+ "Model size and hash match for %s. Skipping download.",
521
+ external_wake_word.id,
522
+ )
523
+
524
+ if should_download_config or should_download_model:
525
+ # Download config
526
+ _LOGGER.debug("Downloading %s to %s", external_wake_word.url, config_path)
527
+ with urlopen(external_wake_word.url) as request:
528
+ if request.status != 200:
529
+ _LOGGER.warning(
530
+ "Failed to download: %s, status=%s",
531
+ external_wake_word.url,
532
+ request.status,
533
+ )
534
+ return None
535
+
536
+ with open(config_path, "wb") as model_file:
537
+ shutil.copyfileobj(request, model_file)
538
+
539
+ if should_download_model:
540
+ # Download model file
541
+ parsed_url = urlparse(external_wake_word.url)
542
+ parsed_url = parsed_url._replace(
543
+ path=posixpath.join(posixpath.dirname(parsed_url.path), model_path.name)
544
+ )
545
+ model_url = urlunparse(parsed_url)
546
+
547
+ _LOGGER.debug("Downloading %s to %s", model_url, model_path)
548
+ with urlopen(model_url) as request:
549
+ if request.status != 200:
550
+ _LOGGER.warning(
551
+ "Failed to download: %s, status=%s", model_url, request.status
552
+ )
553
+ return None
554
+
555
+ with open(model_path, "wb") as model_file:
556
+ shutil.copyfileobj(request, model_file)
557
+
558
+ return AvailableWakeWord(
559
+ id=external_wake_word.id,
560
+ type=WakeWordType.MICRO_WAKE_WORD,
561
+ wake_word=external_wake_word.wake_word,
562
+ trained_languages=external_wake_word.trained_languages,
563
+ wake_word_path=config_path,
564
+ )
565
+
566
+ # -------------------------------------------------------------------------
567
+ # Reachy Mini Motion Control
568
+ # -------------------------------------------------------------------------
569
+
570
+ def _turn_to_sound_source(self) -> None:
571
+ """Turn robot head toward sound source using DOA at wakeup.
572
+
573
+ This is called once at wakeup to orient the robot toward the speaker.
574
+ Face tracking will take over after the initial turn.
575
+
576
+ DOA angle convention (from SDK):
577
+ - 0 radians = left (Y+ direction in head frame)
578
+ - π/2 radians = front (X+ direction in head frame)
579
+ - π radians = right (Y- direction in head frame)
580
+
581
+ The SDK uses: p_head = [sin(doa), cos(doa), 0]
582
+ So we need to convert this to yaw angle.
583
+
584
+ Note: We don't check speech_detected because by the time wake word
585
+ detection completes, the user may have stopped speaking.
586
+ """
587
+ if not self.state.motion_enabled or not self.state.reachy_mini:
588
+ _LOGGER.info("DOA turn-to-sound: motion disabled or no robot")
589
+ return
590
+
591
+ try:
592
+ # Get DOA from reachy_controller (only read once)
593
+ doa = self.reachy_controller.get_doa_angle()
594
+ if doa is None:
595
+ _LOGGER.info("DOA not available, skipping turn-to-sound")
596
+ return
597
+
598
+ angle_rad, speech_detected = doa
599
+ _LOGGER.debug("DOA raw: angle=%.3f rad (%.1f°), speech=%s",
600
+ angle_rad, math.degrees(angle_rad), speech_detected)
601
+
602
+ # Convert DOA to direction vector in head frame
603
+ # SDK convention: p_head = [sin(doa), cos(doa), 0]
604
+ # where X+ is front, Y+ is left
605
+ dir_x = math.sin(angle_rad) # Front component
606
+ dir_y = math.cos(angle_rad) # Left component
607
+
608
+ # Calculate yaw angle from direction vector
609
+ # DOA convention: 0 = left, π/2 = front, π = right
610
+ # Robot yaw: positive = turn left, negative = turn right
611
+ # yaw = doa - π/2 maps: left(0) → -90°, front(π/2) → 0°, right(π) → +90°
612
+ yaw_rad = angle_rad - math.pi / 2
613
+ yaw_deg = math.degrees(yaw_rad)
614
+
615
+ _LOGGER.debug("DOA direction: x=%.2f, y=%.2f, yaw=%.1f°",
616
+ dir_x, dir_y, yaw_deg)
617
+
618
+ # Only turn if angle is significant (> 10°) to avoid noise
619
+ DOA_THRESHOLD_DEG = 10.0
620
+ if abs(yaw_deg) < DOA_THRESHOLD_DEG:
621
+ _LOGGER.debug("DOA angle %.1f° below threshold (%.1f°), skipping turn",
622
+ yaw_deg, DOA_THRESHOLD_DEG)
623
+ return
624
+
625
+ # Apply 80% of DOA angle as conservative strategy
626
+ # This accounts for potential DOA inaccuracy
627
+ DOA_SCALE = 0.8
628
+ target_yaw_deg = yaw_deg * DOA_SCALE
629
+
630
+ _LOGGER.info("Turning toward sound source: DOA=%.1f°, target=%.1f°",
631
+ yaw_deg, target_yaw_deg)
632
+
633
+ # Use MovementManager to turn (non-blocking)
634
+ if self.state.motion and self.state.motion.movement_manager:
635
+ self.state.motion.movement_manager.turn_to_angle(
636
+ target_yaw_deg,
637
+ duration=0.5 # Quick turn
638
+ )
639
+ except Exception as e:
640
+ _LOGGER.error("Error in turn-to-sound: %s", e)
641
+
642
+ def _reachy_on_listening(self) -> None:
643
+ """Called when listening for speech (HA state: Listening)."""
644
+ # Enable high-frequency face tracking during listening
645
+ self._set_conversation_mode(True)
646
+
647
+ # Resume face tracking (may have been paused during speaking)
648
+ if self.camera_server is not None:
649
+ try:
650
+ self.camera_server.set_face_tracking_enabled(True)
651
+ except Exception as e:
652
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
653
+
654
+ if not self.state.motion_enabled or not self.state.reachy_mini:
655
+ return
656
+ try:
657
+ _LOGGER.debug("Reachy Mini: Listening animation")
658
+ if self.state.motion:
659
+ self.state.motion.on_listening()
660
+ except Exception as e:
661
+ _LOGGER.error("Reachy Mini motion error: %s", e)
662
+
663
+ def _reachy_on_thinking(self) -> None:
664
+ """Called when processing speech (HA state: Processing)."""
665
+ # Resume face tracking (may have been paused during speaking)
666
+ if self.camera_server is not None:
667
+ try:
668
+ self.camera_server.set_face_tracking_enabled(True)
669
+ except Exception as e:
670
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
671
+
672
+ if not self.state.motion_enabled or not self.state.reachy_mini:
673
+ return
674
+ try:
675
+ _LOGGER.debug("Reachy Mini: Thinking animation")
676
+ if self.state.motion:
677
+ self.state.motion.on_thinking()
678
+ except Exception as e:
679
+ _LOGGER.error("Reachy Mini motion error: %s", e)
680
+
681
+ def _reachy_on_speaking(self) -> None:
682
+ """Called when TTS is playing (HA state: Responding)."""
683
+ # Pause face tracking during speaking - robot will use speaking animation instead
684
+ if self.camera_server is not None:
685
+ try:
686
+ self.camera_server.set_face_tracking_enabled(False)
687
+ _LOGGER.debug("Face tracking paused during speaking")
688
+ except Exception as e:
689
+ _LOGGER.debug("Failed to pause face tracking: %s", e)
690
+
691
+ if not self.state.motion_enabled:
692
+ _LOGGER.warning("Motion disabled, skipping speaking animation")
693
+ return
694
+ if not self.state.reachy_mini:
695
+ _LOGGER.warning("No reachy_mini instance, skipping speaking animation")
696
+ return
697
+ if not self.state.motion:
698
+ _LOGGER.warning("No motion controller, skipping speaking animation")
699
+ return
700
+
701
+ try:
702
+ _LOGGER.debug("Reachy Mini: Starting speaking animation")
703
+ self.state.motion.on_speaking_start()
704
+ except Exception as e:
705
+ _LOGGER.error("Reachy Mini motion error: %s", e)
706
+
707
+ def _reachy_on_idle(self) -> None:
708
+ """Called when returning to idle state (HA state: Idle)."""
709
+ # Disable high-frequency face tracking, switch to adaptive mode
710
+ self._set_conversation_mode(False)
711
+
712
+ # Resume face tracking (may have been paused during speaking)
713
+ if self.camera_server is not None:
714
+ try:
715
+ self.camera_server.set_face_tracking_enabled(True)
716
+ except Exception as e:
717
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
718
+
719
+ if not self.state.motion_enabled or not self.state.reachy_mini:
720
+ return
721
+ try:
722
+ _LOGGER.debug("Reachy Mini: Idle animation")
723
+ if self.state.motion:
724
+ self.state.motion.on_idle()
725
+ except Exception as e:
726
+ _LOGGER.error("Reachy Mini motion error: %s", e)
727
+
728
+ def _set_conversation_mode(self, in_conversation: bool) -> None:
729
+ """Set conversation mode for adaptive face tracking.
730
+
731
+ When in conversation, face tracking runs at high frequency.
732
+ When idle, face tracking uses adaptive rate to save CPU.
733
+ """
734
+ if self.camera_server is not None:
735
+ try:
736
+ self.camera_server.set_conversation_mode(in_conversation)
737
+ except Exception as e:
738
+ _LOGGER.debug("Failed to set conversation mode: %s", e)
739
+
740
+ def _reachy_on_timer_finished(self) -> None:
741
+ """Called when a timer finishes."""
742
+ if not self.state.motion_enabled or not self.state.reachy_mini:
743
+ return
744
+ try:
745
+ _LOGGER.debug("Reachy Mini: Timer finished animation")
746
+ if self.state.motion:
747
+ self.state.motion.on_timer_finished()
748
+ except Exception as e:
749
+ _LOGGER.error("Reachy Mini motion error: %s", e)
750
+
751
+ def _play_emotion(self, emotion_name: str) -> None:
752
+ """Play an emotion/expression from the emotions library.
753
+
754
+ Args:
755
+ emotion_name: Name of the emotion (e.g., "happy1", "sad1", etc.)
756
+ """
757
+ try:
758
+ import requests
759
+
760
+ # Get WLAN IP from daemon status
761
+ wlan_ip = "localhost"
762
+ if self.state.reachy_mini is not None:
763
+ try:
764
+ status = self.state.reachy_mini.client.get_status(wait=False)
765
+ wlan_ip = status.get('wlan_ip', 'localhost')
766
+ except Exception:
767
+ wlan_ip = "localhost"
768
+
769
+ # Call the emotion playback API
770
+ # Dataset: pollen-robotics/reachy-mini-emotions-library
771
+ base_url = f"http://{wlan_ip}:8000/api/move/play/recorded-move-dataset"
772
+ dataset = "pollen-robotics/reachy-mini-emotions-library"
773
+ url = f"{base_url}/{dataset}/{emotion_name}"
774
+
775
+ response = requests.post(url, timeout=5)
776
+ if response.status_code == 200:
777
+ result = response.json()
778
+ move_uuid = result.get('uuid')
779
+ _LOGGER.info(f"Playing emotion: {emotion_name} (uuid={move_uuid})")
780
+ else:
781
+ _LOGGER.warning(f"Failed to play emotion {emotion_name}: HTTP {response.status_code}")
782
+
783
+ except Exception as e:
784
+ _LOGGER.error(f"Error playing emotion {emotion_name}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac RENAMED
File without changes
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py RENAMED
@@ -6,9 +6,8 @@ Analyzes audio loudness to drive natural head movements during TTS playback.
6
 
7
  import math
8
  from collections import deque
9
- from collections.abc import Callable
10
  from itertools import islice
11
- from typing import Any
12
 
13
  import numpy as np
14
  from numpy.typing import NDArray
@@ -65,7 +64,7 @@ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
65
  """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
66
  t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
67
  t = max(0.0, min(1.0, t))
68
- return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
69
 
70
 
71
  def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
@@ -94,7 +93,7 @@ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray
94
  """Lightweight linear resampler for short buffers."""
95
  if sr_in == sr_out or x.size == 0:
96
  return x
97
- n_out = round(x.size * sr_out / sr_in)
98
  if n_out <= 1:
99
  return np.zeros(0, dtype=np.float32)
100
  t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
@@ -144,7 +143,7 @@ class SpeechSwayRT:
144
  self.sway_down = 0
145
  self.t = 0.0
146
 
147
- def feed(self, pcm: NDArray[Any], sr: int | None = None) -> list[dict[str, float]]:
148
  """Stream in PCM chunk. Returns list of sway dicts, one per hop.
149
 
150
  Args:
@@ -168,7 +167,7 @@ class SpeechSwayRT:
168
  else:
169
  self.carry = x
170
 
171
- out: list[dict[str, float]] = []
172
 
173
  while self.carry.size >= HOP:
174
  hop = self.carry[:HOP]
@@ -216,35 +215,27 @@ class SpeechSwayRT:
216
  self.t += HOP_MS / 1000.0
217
 
218
  # Oscillators
219
- pitch = (
220
- math.radians(SWAY_A_PITCH_DEG)
221
- * loud
222
- * env
223
- * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
224
- )
225
- yaw = (
226
- math.radians(SWAY_A_YAW_DEG) * loud * env * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
227
- )
228
- roll = (
229
- math.radians(SWAY_A_ROLL_DEG)
230
- * loud
231
- * env
232
- * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
233
- )
234
- x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
235
- y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
236
- z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
237
-
238
- out.append(
239
- {
240
- "pitch_rad": pitch,
241
- "yaw_rad": yaw,
242
- "roll_rad": roll,
243
- "x_m": x_m,
244
- "y_m": y_m,
245
- "z_m": z_m,
246
- }
247
- )
248
 
249
  return out
250
 
@@ -252,7 +243,7 @@ class SpeechSwayRT:
252
  def analyze_audio_for_sway(
253
  audio_data: NDArray[Any],
254
  sample_rate: int,
255
- callback: Callable[[dict[str, float]], None],
256
  ) -> None:
257
  """Analyze entire audio and call callback for each sway frame.
258
 
 
6
 
7
  import math
8
  from collections import deque
 
9
  from itertools import islice
10
+ from typing import Any, Callable, Dict, List, Optional
11
 
12
  import numpy as np
13
  from numpy.typing import NDArray
 
64
  """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
65
  t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
66
  t = max(0.0, min(1.0, t))
67
+ return t ** LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
68
 
69
 
70
  def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
 
93
  """Lightweight linear resampler for short buffers."""
94
  if sr_in == sr_out or x.size == 0:
95
  return x
96
+ n_out = int(round(x.size * sr_out / sr_in))
97
  if n_out <= 1:
98
  return np.zeros(0, dtype=np.float32)
99
  t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
 
143
  self.sway_down = 0
144
  self.t = 0.0
145
 
146
+ def feed(self, pcm: NDArray[Any], sr: Optional[int] = None) -> List[Dict[str, float]]:
147
  """Stream in PCM chunk. Returns list of sway dicts, one per hop.
148
 
149
  Args:
 
167
  else:
168
  self.carry = x
169
 
170
+ out: List[Dict[str, float]] = []
171
 
172
  while self.carry.size >= HOP:
173
  hop = self.carry[:HOP]
 
215
  self.t += HOP_MS / 1000.0
216
 
217
  # Oscillators
218
+ pitch = (math.radians(SWAY_A_PITCH_DEG) * loud * env *
219
+ math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch))
220
+ yaw = (math.radians(SWAY_A_YAW_DEG) * loud * env *
221
+ math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw))
222
+ roll = (math.radians(SWAY_A_ROLL_DEG) * loud * env *
223
+ math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll))
224
+ x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(
225
+ 2 * math.pi * SWAY_F_X * self.t + self.phase_x)
226
+ y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(
227
+ 2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
228
+ z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(
229
+ 2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
230
+
231
+ out.append({
232
+ "pitch_rad": pitch,
233
+ "yaw_rad": yaw,
234
+ "roll_rad": roll,
235
+ "x_m": x_m,
236
+ "y_m": y_m,
237
+ "z_m": z_m,
238
+ })
 
 
 
 
 
 
 
 
239
 
240
  return out
241
 
 
243
  def analyze_audio_for_sway(
244
  audio_data: NDArray[Any],
245
  sample_rate: int,
246
+ callback: Callable[[Dict[str, float]], None],
247
  ) -> None:
248
  """Analyze entire audio and call callback for each sway frame.
249
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css RENAMED
File without changes
reachy_mini_ha_voice/util.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions."""
2
+
3
+ import hashlib
4
+ import uuid
5
+ from collections.abc import Callable
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+
10
+ def call_all(*funcs: Optional[Callable[[], None]]) -> None:
11
+ """Call all non-None functions."""
12
+ for func in funcs:
13
+ if func is not None:
14
+ func()
15
+
16
+
17
+ def get_mac() -> str:
18
+ """Return a stable MAC address for device identification.
19
+
20
+ Uses a cached device ID stored in a file to ensure the same ID
21
+ is used across restarts, preventing Home Assistant from seeing
22
+ the device as new each time.
23
+ """
24
+ # Store device ID in a persistent location
25
+ local_dir = Path(__file__).parent.parent / "local"
26
+ local_dir.mkdir(parents=True, exist_ok=True)
27
+ device_id_file = local_dir / ".device_id"
28
+
29
+ if device_id_file.exists():
30
+ try:
31
+ return device_id_file.read_text().strip()
32
+ except Exception:
33
+ pass
34
+
35
+ # Generate a stable device ID based on machine UUID
36
+ machine_id = uuid.getnode()
37
+ # Create a hash to ensure consistent format
38
+ device_id = hashlib.md5(str(machine_id).encode()).hexdigest()[:12]
39
+
40
+ try:
41
+ device_id_file.write_text(device_id)
42
+ except Exception:
43
+ pass
44
+
45
+ return device_id
{reachy_mini_home_assistant → reachy_mini_ha_voice}/voice_assistant.py RENAMED
@@ -1,1314 +1,810 @@
1
- """
2
- Voice Assistant Service for Reachy Mini.
3
-
4
- This module provides the main voice assistant service that integrates
5
- with Home Assistant via ESPHome protocol.
6
- """
7
-
8
- import asyncio
9
- import json
10
- import logging
11
- import threading
12
- import time
13
- from collections import deque
14
- from dataclasses import dataclass, field
15
- from pathlib import Path
16
- from queue import Queue
17
- from typing import TYPE_CHECKING
18
-
19
- import numpy as np
20
- import requests
21
- from reachy_mini import ReachyMini
22
-
23
- from .audio.audio_player import AudioPlayer
24
- from .audio.microphone import MicrophoneOptimizer, MicrophonePreferences
25
- from .core import Config, SleepManager
26
- from .core.util import get_mac
27
- from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType
28
- from .motion.reachy_motion import ReachyMiniMotion
29
- from .protocol.satellite import VoiceSatelliteProtocol
30
- from .protocol.zeroconf import HomeAssistantZeroconf
31
- from .vision.camera_server import MJPEGCameraServer
32
-
33
- if TYPE_CHECKING:
34
- from pymicro_wakeword import MicroWakeWord
35
- from pyopen_wakeword import OpenWakeWord
36
-
37
- _LOGGER = logging.getLogger(__name__)
38
-
39
- _MODULE_DIR = Path(__file__).parent
40
- _WAKEWORDS_DIR = _MODULE_DIR / "wakewords"
41
- _SOUNDS_DIR = _MODULE_DIR / "sounds"
42
- _LOCAL_DIR = _MODULE_DIR.parent / "local"
43
-
44
-
45
- @dataclass
46
- class AudioProcessingContext:
47
- """Context for audio processing, holding mutable state."""
48
-
49
- wake_words: list = field(default_factory=list)
50
- micro_features: object | None = None
51
- micro_inputs: list = field(default_factory=list)
52
- oww_features: object | None = None
53
- oww_inputs: list = field(default_factory=list)
54
- has_oww: bool = False
55
- last_active: float | None = None
56
-
57
-
58
- # Audio chunk size for consistent streaming
59
- # Smaller chunks = faster VAD response
60
- # ESPHome typical range: 256-512 samples
61
- # Going smaller improves latency but increases CPU/network overhead
62
- AUDIO_BLOCK_SIZE = 256 # samples at 16kHz = 16ms (optimized for low latency)
63
- MAX_AUDIO_BUFFER_SIZE = AUDIO_BLOCK_SIZE * 40 # Max 40 chunks (~640ms) to prevent memory leak
64
-
65
-
66
- class VoiceAssistantService:
67
- """Voice assistant service that runs ESPHome protocol server."""
68
-
69
- def __init__(
70
- self,
71
- reachy_mini: ReachyMini,
72
- name: str = "Reachy Mini",
73
- host: str = "0.0.0.0",
74
- port: int = 6053,
75
- wake_model: str = "okay_nabu",
76
- camera_port: int = 8081,
77
- camera_enabled: bool = True,
78
- ):
79
- self.reachy_mini = reachy_mini
80
- self.name = name
81
- self.host = host
82
- self.port = port
83
- self.wake_model = wake_model
84
- self.camera_port = camera_port
85
- self.camera_enabled = camera_enabled
86
-
87
- self._server = None
88
- self._discovery = None
89
- self._audio_thread = None
90
- self._running = False
91
- self._state: ServerState | None = None
92
- self._motion = ReachyMiniMotion(reachy_mini)
93
- self._camera_server: MJPEGCameraServer | None = None
94
-
95
- # Audio buffer for fixed-size chunk output
96
- # Use deque with maxlen to avoid creating new arrays on every operation
97
- # This prevents memory leak from repeated array creation (2-3 arrays per chunk)
98
- self._audio_buffer: deque[float] = deque(maxlen=MAX_AUDIO_BUFFER_SIZE)
99
-
100
- # Audio overflow log throttling
101
- self._last_audio_overflow_log = 0.0
102
- self._suppressed_audio_overflows = 0
103
-
104
- # Robot services pause/resume tracking (without RobotStateMonitor)
105
- self._robot_services_paused = threading.Event() # Set when services should pause
106
- self._robot_services_resumed = threading.Event() # Event-driven resume signaling
107
- self._robot_services_resumed.set() # Start in resumed state
108
-
109
- # GStreamer access lock - prevents concurrent access to media pipeline
110
- # This prevents crashes when multiple threads access get_audio_sample(), push_audio_sample(), get_frame()
111
- self._gstreamer_lock = threading.Lock()
112
-
113
- # Sleep manager for sleep/wake handling
114
- self._sleep_manager: SleepManager | None = None
115
- self._event_loop: asyncio.AbstractEventLoop | None = None
116
-
117
- # Home Assistant connection state
118
- self._ha_connected = False # Track whether HA is connected
119
- self._ha_connection_established = False # Track if HA connection was ever established
120
-
121
- async def start(self) -> None:
122
- """Start the voice assistant service."""
123
- _LOGGER.info("Initializing voice assistant service...")
124
-
125
- # Ensure directories exist
126
- _WAKEWORDS_DIR.mkdir(parents=True, exist_ok=True)
127
- _SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
128
- _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
129
-
130
- # Verify required files (bundled with package)
131
- await self._verify_required_files()
132
-
133
- # Load wake words
134
- available_wake_words = self._load_available_wake_words()
135
- _LOGGER.debug("Available wake words: %s", list(available_wake_words.keys()))
136
-
137
- # Load preferences
138
- preferences_path = _LOCAL_DIR / "preferences.json"
139
- preferences = self._load_preferences(preferences_path)
140
-
141
- # Load wake word models
142
- wake_models, active_wake_words = self._load_wake_models(available_wake_words, preferences)
143
-
144
- # Load stop model
145
- stop_model = self._load_stop_model()
146
-
147
- # Create audio players with Reachy Mini reference and GStreamer lock
148
- music_player = AudioPlayer(self.reachy_mini, gstreamer_lock=self._gstreamer_lock)
149
- tts_player = AudioPlayer(self.reachy_mini, gstreamer_lock=self._gstreamer_lock)
150
-
151
- # Create server state
152
- self._state = ServerState(
153
- name=self.name,
154
- mac_address=get_mac(),
155
- audio_queue=Queue(),
156
- entities=[],
157
- available_wake_words=available_wake_words,
158
- wake_words=wake_models,
159
- active_wake_words=active_wake_words,
160
- stop_word=stop_model,
161
- music_player=music_player,
162
- tts_player=tts_player,
163
- wakeup_sound=str(_SOUNDS_DIR / "wake_word_triggered.flac"),
164
- timer_finished_sound=str(_SOUNDS_DIR / "timer_finished.flac"),
165
- preferences=preferences,
166
- preferences_path=preferences_path,
167
- refractory_seconds=2.0,
168
- download_dir=_LOCAL_DIR,
169
- reachy_mini=self.reachy_mini,
170
- motion_enabled=True,
171
- )
172
-
173
- # Log stop word status
174
- if self._state.stop_word:
175
- _LOGGER.info("Stop word initialized with ID: %s", self._state.stop_word.id)
176
- else:
177
- _LOGGER.error("Stop word is None! Stop command will not work")
178
-
179
- # Set motion controller reference in state
180
- self._state.motion = self._motion
181
- if self._motion and self._motion.movement_manager:
182
- self._motion.movement_manager.set_idle_motion_enabled(preferences.idle_motion_enabled)
183
- self._motion.movement_manager.set_idle_antenna_enabled(preferences.idle_antenna_enabled)
184
- self._motion.movement_manager.set_idle_random_actions_enabled(preferences.idle_random_actions_enabled)
185
- _LOGGER.info("Idle motion restored from preferences: %s", preferences.idle_motion_enabled)
186
- _LOGGER.info("Idle antenna motion restored from preferences: %s", preferences.idle_antenna_enabled)
187
- _LOGGER.info("Idle random actions restored from preferences: %s", preferences.idle_random_actions_enabled)
188
-
189
- # Set sleep/wake callbacks for HA button triggers
190
- self._state.on_ha_sleep = self._on_sleep
191
- self._state.on_ha_wake = lambda: asyncio.create_task(self._on_wake_from_ha())
192
-
193
- # Start Reachy Mini media system
194
- try:
195
- # Check if media system is already running to avoid conflicts
196
- media = self.reachy_mini.media
197
- if media.audio is not None:
198
- # Clean stale media state from previous app sessions (daemon is persistent)
199
- try:
200
- media.stop_recording()
201
- except Exception:
202
- pass
203
- try:
204
- media.stop_playing()
205
- except Exception:
206
- pass
207
- time.sleep(0.2)
208
-
209
- media.start_recording()
210
- _LOGGER.info("Started Reachy Mini recording")
211
- media.start_playing()
212
- _LOGGER.info("Started Reachy Mini playback")
213
-
214
- # Deterministic startup validation: fail fast instead of repeated
215
- # fallback/recovery loops that hide root causes.
216
- if not self._probe_audio_capture_ready(media, timeout_s=1.5):
217
- raise RuntimeError("Audio capture probe failed after media startup")
218
-
219
- _LOGGER.info("Reachy Mini media system initialized")
220
-
221
- # Body yaw now follows head yaw in movement_manager.py
222
- # This enables natural body rotation when tracking faces
223
-
224
- # Optimize microphone settings for voice recognition
225
- self._optimize_microphone_settings()
226
- except Exception as e:
227
- _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
228
-
229
- # Start motion controller (5Hz control loop)
230
- self._motion.start()
231
-
232
- # Start audio processing thread (non-daemon for proper cleanup)
233
- self._running = True
234
- self._audio_thread = threading.Thread(
235
- target=self._process_audio,
236
- daemon=False,
237
- )
238
- self._audio_thread.start()
239
-
240
- # Create ESPHome server (pass camera_server for camera entity)
241
- loop = asyncio.get_running_loop()
242
- camera_server = self._camera_server # Capture for lambda
243
-
244
- def protocol_factory():
245
- protocol = VoiceSatelliteProtocol(self._state, camera_server=camera_server, voice_assistant_service=self)
246
- # Set HA connection callbacks
247
- protocol.set_ha_connection_callbacks(
248
- on_connected=self._on_ha_connected, on_disconnected=self._on_ha_disconnected
249
- )
250
- return protocol
251
-
252
- self._server = await loop.create_server(
253
- protocol_factory,
254
- host=self.host,
255
- port=self.port,
256
- )
257
-
258
- # Start mDNS discovery
259
- self._discovery = HomeAssistantZeroconf(port=self.port, name=self.name)
260
- await self._discovery.register_server()
261
-
262
- # Store service event loop for cross-thread async toggles
263
- self._event_loop = asyncio.get_running_loop()
264
-
265
- # Start Sendspin discovery only when enabled in preferences (default OFF)
266
- if preferences.sendspin_enabled:
267
- await music_player.start_sendspin_discovery()
268
- _LOGGER.info("Sendspin discovery enabled from preferences")
269
- else:
270
- _LOGGER.info("Sendspin discovery disabled by default")
271
-
272
- # Start sleep manager for proper sleep/wake handling
273
- # This monitors the daemon state and coordinates service suspend/resume
274
- self._sleep_manager = SleepManager(
275
- reachy_mini=self.reachy_mini,
276
- daemon_url=Config.daemon.url,
277
- check_interval=Config.daemon.check_interval_active,
278
- resume_delay=Config.sleep.resume_delay,
279
- )
280
-
281
- # Register sleep/wake callbacks
282
- self._sleep_manager.on_sleep(self._on_sleep)
283
- self._sleep_manager.on_wake(self._on_wake)
284
- self._sleep_manager.on_pre_resume(self._on_pre_resume)
285
-
286
- # Start the sleep manager
287
- await self._sleep_manager.start()
288
- _LOGGER.info("Sleep manager started")
289
-
290
- _LOGGER.info("Voice assistant service started on %s:%s", self.host, self.port)
291
-
292
- def set_sendspin_enabled(self, enabled: bool) -> None:
293
- """Enable or disable Sendspin discovery and connection at runtime."""
294
- if self._state is None or self._state.music_player is None:
295
- return
296
-
297
- if self._state.preferences.sendspin_enabled != enabled:
298
- self._state.preferences.sendspin_enabled = enabled
299
- self._state.save_preferences()
300
-
301
- async def _apply() -> None:
302
- if self._state is None or self._state.music_player is None:
303
- return
304
- if enabled:
305
- await self._state.music_player.start_sendspin_discovery()
306
- else:
307
- await self._state.music_player.stop_sendspin()
308
-
309
- try:
310
- loop = self._event_loop
311
- if loop is not None and loop.is_running():
312
- asyncio.run_coroutine_threadsafe(_apply(), loop)
313
- else:
314
- task = asyncio.create_task(_apply())
315
- task.add_done_callback(lambda _task: None)
316
- except Exception as e:
317
- _LOGGER.warning("Failed to apply Sendspin toggle (%s): %s", enabled, e)
318
-
319
- def _probe_audio_capture_ready(self, media, timeout_s: float = 1.5) -> bool:
320
- """Check whether microphone samples become available shortly after startup."""
321
- deadline = time.monotonic() + timeout_s
322
- while time.monotonic() < deadline:
323
- try:
324
- sample = media.get_audio_sample()
325
- if sample is not None and isinstance(sample, np.ndarray) and sample.size > 0:
326
- return True
327
- except Exception:
328
- pass
329
- time.sleep(0.05)
330
- return False
331
-
332
- def _suspend_voice_services(self, reason: str) -> None:
333
- """Suspend only voice-related services (not camera or motion).
334
-
335
- This is used for the Mute feature - camera and motion should remain active.
336
- """
337
- _LOGGER.warning("Suspending voice services (%s)", reason)
338
- self._robot_services_paused.set()
339
- self._robot_services_resumed.clear()
340
-
341
- # Update state
342
- if self._state is not None:
343
- self._state.services_suspended = True
344
-
345
- # Clear audio buffer to avoid processing stale data
346
- self._audio_buffer.clear()
347
-
348
- # Suspend satellite (stops TTS, music, wake word processing)
349
- if self._state is not None and self._state.satellite is not None:
350
- try:
351
- self._state.satellite.suspend()
352
- _LOGGER.debug("Satellite suspended")
353
- except Exception as e:
354
- _LOGGER.warning("Error suspending satellite: %s", e)
355
-
356
- # Suspend audio players
357
- if self._state is not None:
358
- if self._state.tts_player is not None:
359
- try:
360
- self._state.tts_player.suspend()
361
- except Exception as e:
362
- _LOGGER.warning("Error suspending TTS player: %s", e)
363
- if self._state.music_player is not None:
364
- try:
365
- self._state.music_player.suspend()
366
- except Exception as e:
367
- _LOGGER.warning("Error suspending music player: %s", e)
368
-
369
- # Stop media recording to save CPU
370
- try:
371
- self.reachy_mini.media.stop_recording()
372
- self.reachy_mini.media.stop_playing()
373
- _LOGGER.debug("Media system stopped")
374
- except Exception as e:
375
- _LOGGER.warning("Error stopping media: %s", e)
376
-
377
- _LOGGER.info("Voice services suspended - camera and motion remain active")
378
-
379
- def _resume_voice_services(self, reason: str) -> None:
380
- """Resume only voice-related services (not camera or motion).
381
-
382
- This is used for the Mute feature - camera and motion remain active.
383
- """
384
- _LOGGER.info("Resuming voice services (%s)", reason)
385
- self._robot_services_paused.clear()
386
-
387
- # Update state
388
- if self._state is not None:
389
- self._state.services_suspended = False
390
-
391
- # Restart media system first
392
- try:
393
- media = self.reachy_mini.media
394
- if media.audio is not None:
395
- media.start_recording()
396
- media.start_playing()
397
- _LOGGER.info("Media system restarted")
398
- except Exception as e:
399
- _LOGGER.warning("Failed to restart media: %s", e)
400
-
401
- # Resume satellite
402
- if self._state is not None and self._state.satellite is not None:
403
- try:
404
- self._state.satellite.resume()
405
- _LOGGER.debug("Satellite resumed")
406
- except Exception as e:
407
- _LOGGER.warning("Error resuming satellite: %s", e)
408
-
409
- # Resume audio players
410
- if self._state is not None:
411
- if self._state.tts_player is not None:
412
- try:
413
- self._state.tts_player.resume()
414
- except Exception as e:
415
- _LOGGER.warning("Error resuming TTS player: %s", e)
416
- if self._state.music_player is not None:
417
- try:
418
- self._state.music_player.resume()
419
- except Exception as e:
420
- _LOGGER.warning("Error resuming music player: %s", e)
421
-
422
- # Signal waiting threads that services are resumed
423
- self._robot_services_resumed.set()
424
-
425
- _LOGGER.info("Voice services resumed - camera and motion remained active")
426
-
427
- def _suspend_non_esphome_services(self, reason: str, set_sleep_state: bool) -> None:
428
- """Suspend all non-ESPHome services to reduce load.
429
-
430
- ESPHome server stays up so Home Assistant can wake the robot.
431
- """
432
- _LOGGER.warning("Suspending non-ESPHome services (%s)", reason)
433
- self._robot_services_paused.set()
434
- self._robot_services_resumed.clear()
435
-
436
- # Update state
437
- if self._state is not None:
438
- if set_sleep_state:
439
- self._state.is_sleeping = True
440
- self._state.services_suspended = True
441
-
442
- # Clear audio buffer to avoid processing stale data
443
- self._audio_buffer.clear()
444
-
445
- # Suspend camera server (stops thread and releases YOLO model)
446
- # Only suspend if camera is NOT disabled (user has not manually disabled it)
447
- # AND camera server has been started (not None)
448
- if self._camera_server is not None and self._state.camera_enabled:
449
- try:
450
- self._camera_server.suspend()
451
- _LOGGER.debug("Camera server suspended")
452
- except Exception as e:
453
- _LOGGER.warning("Error suspending camera: %s", e)
454
-
455
- # Suspend motion controller (stops control loop thread)
456
- if self._motion is not None and self._motion._movement_manager is not None:
457
- try:
458
- self._motion._movement_manager.suspend()
459
- _LOGGER.debug("Motion controller suspended")
460
- except Exception as e:
461
- _LOGGER.warning("Error suspending motion: %s", e)
462
-
463
- # Suspend satellite
464
- if self._state is not None and self._state.satellite is not None:
465
- try:
466
- self._state.satellite.suspend()
467
- _LOGGER.debug("Satellite suspended")
468
- except Exception as e:
469
- _LOGGER.warning("Error suspending satellite: %s", e)
470
-
471
- # Suspend audio players
472
- if self._state is not None:
473
- if self._state.tts_player is not None:
474
- try:
475
- self._state.tts_player.suspend()
476
- except Exception as e:
477
- _LOGGER.warning("Error suspending TTS player: %s", e)
478
- if self._state.music_player is not None:
479
- try:
480
- self._state.music_player.suspend()
481
- except Exception as e:
482
- _LOGGER.warning("Error suspending music player: %s", e)
483
-
484
- # Stop media recording to save CPU
485
- try:
486
- self.reachy_mini.media.stop_recording()
487
- self.reachy_mini.media.stop_playing()
488
- _LOGGER.debug("Media system stopped")
489
- except Exception as e:
490
- _LOGGER.warning("Error stopping media: %s", e)
491
-
492
- _LOGGER.info("Services suspended - ESPHome only")
493
-
494
- def _resume_non_esphome_services(self, reason: str, clear_sleep_state: bool) -> None:
495
- """Resume all non-ESPHome services after sleep/disconnect."""
496
- _LOGGER.info("Resuming non-ESPHome services (%s)", reason)
497
- self._robot_services_paused.clear()
498
-
499
- # Update state
500
- if self._state is not None:
501
- if clear_sleep_state:
502
- self._state.is_sleeping = False
503
- self._state.services_suspended = False
504
-
505
- # Restart media system first
506
- try:
507
- media = self.reachy_mini.media
508
- if media.audio is not None:
509
- media.start_recording()
510
- media.start_playing()
511
- _LOGGER.info("Media system restarted")
512
- except Exception as e:
513
- _LOGGER.warning("Failed to restart media: %s", e)
514
-
515
- # Resume camera server (reloads YOLO model and restarts capture thread)
516
- # Only resume if camera is NOT disabled (user has not manually disabled it)
517
- # AND camera server has been started (not None)
518
- if self._camera_server is not None and self._state.camera_enabled:
519
- try:
520
- self._camera_server.resume_from_suspend()
521
- _LOGGER.debug("Camera server resumed from suspend")
522
- except Exception as e:
523
- _LOGGER.warning("Error resuming camera: %s", e)
524
-
525
- # Resume motion controller (restarts control loop thread)
526
- if self._motion is not None and self._motion._movement_manager is not None:
527
- try:
528
- self._motion._movement_manager.resume_from_suspend()
529
- _LOGGER.debug("Motion controller resumed from suspend")
530
- except Exception as e:
531
- _LOGGER.warning("Error resuming motion: %s", e)
532
-
533
- # Resume satellite
534
- if self._state is not None and self._state.satellite is not None:
535
- try:
536
- self._state.satellite.resume()
537
- _LOGGER.debug("Satellite resumed")
538
- except Exception as e:
539
- _LOGGER.warning("Error resuming satellite: %s", e)
540
-
541
- # Resume audio players
542
- if self._state is not None:
543
- if self._state.tts_player is not None:
544
- try:
545
- self._state.tts_player.resume()
546
- except Exception as e:
547
- _LOGGER.warning("Error resuming TTS player: %s", e)
548
- if self._state.music_player is not None:
549
- try:
550
- self._state.music_player.resume()
551
- except Exception as e:
552
- _LOGGER.warning("Error resuming music player: %s", e)
553
-
554
- # Signal waiting threads that services are resumed
555
- self._robot_services_resumed.set()
556
-
557
- _LOGGER.info("All services resumed - system fully operational")
558
-
559
- def _on_robot_disconnected(self) -> None:
560
- """Called when robot connection is lost (e.g., daemon unavailable).
561
-
562
- Suspends all non-ESPHome services to keep HA wake control available.
563
- """
564
- # RobotStateMonitor removed - connection tracking is handled by DaemonStateMonitor
565
- self._suspend_non_esphome_services(reason="robot_disconnected", set_sleep_state=False)
566
-
567
- def _on_robot_connected(self) -> None:
568
- """Called when robot connection is restored.
569
-
570
- Resumes non-ESPHome services unless the system is in sleep mode.
571
- """
572
- # RobotStateMonitor removed - connection tracking is handled by DaemonStateMonitor
573
-
574
- if self._state is not None and self._state.is_sleeping:
575
- _LOGGER.info("Robot connected but system is sleeping; deferring resume")
576
- return
577
-
578
- self._resume_non_esphome_services(reason="robot_connected", clear_sleep_state=False)
579
-
580
- def _on_sleep(self) -> None:
581
- """Called when the robot enters sleep mode.
582
-
583
- This is triggered by the SleepManager when the daemon enters STOPPED state.
584
- At this point, we should:
585
- 1. Stop all resource-intensive operations
586
- 2. Release ML models from memory
587
- 3. Keep only ESPHome server running for HA control
588
- """
589
- # RobotStateMonitor removed - sleep tracking is handled by SleepManager
590
- self._suspend_non_esphome_services(reason="sleep", set_sleep_state=True)
591
-
592
- def _on_wake(self) -> None:
593
- """Called when the robot starts waking up.
594
-
595
- This is triggered immediately when daemon state changes from STOPPED.
596
- The actual service resume happens after the configured delay (30s default).
597
- """
598
- _LOGGER.info("Robot waking up - will resume services after delay...")
599
-
600
- def _on_pre_resume(self) -> None:
601
- """Called just before services are resumed.
602
-
603
- This happens after the resume delay (30s default).
604
- At this point, the daemon should be fully ready.
605
- """
606
- _LOGGER.info("Resuming services after wake delay...")
607
- # RobotStateMonitor removed - sleep tracking is handled by SleepManager
608
- self._resume_non_esphome_services(reason="wake_pre_resume", clear_sleep_state=True)
609
-
610
- async def _on_wake_from_ha(self) -> None:
611
- """Called when wake_up is triggered from Home Assistant button.
612
-
613
- This bypasses the DaemonStateMonitor polling and directly resumes services
614
- after a short delay to allow the robot to wake up.
615
- """
616
- _LOGGER.info("Wake triggered from HA - waiting for daemon running state...")
617
-
618
- # Wait for daemon to be fully running before resuming services.
619
- # This avoids early media/motion restart failures after long sleep.
620
- timeout_s = 35.0
621
- deadline = time.monotonic() + timeout_s
622
- while time.monotonic() < deadline:
623
- try:
624
- daemon_url = Config.daemon.url.rstrip("/")
625
- response = requests.get(f"{daemon_url}/api/daemon/status", timeout=2.0)
626
- response.raise_for_status()
627
- daemon_state = (response.json() or {}).get("state", "")
628
- if daemon_state == "running":
629
- _LOGGER.info("Daemon is running, resuming services now")
630
- self._on_pre_resume()
631
- return
632
- except Exception as e:
633
- _LOGGER.debug("Wake wait state check failed: %s", e)
634
-
635
- await asyncio.sleep(1.0)
636
-
637
- _LOGGER.warning("Wake wait timed out after %.0fs, forcing service resume", timeout_s)
638
- self._on_pre_resume()
639
-
640
- async def _on_ha_connected(self) -> None:
641
- """Called when Home Assistant connects.
642
-
643
- At this point, we should:
644
- 1. Load and start camera server if not already started
645
- 2. Ensure voice models are loaded
646
- 3. Resume any suspended services
647
- """
648
- _LOGGER.info("Home Assistant connected - initializing camera and voice services")
649
- self._ha_connected = True
650
- self._ha_connection_established = True
651
-
652
- # Start camera server if enabled and not already started
653
- if self.camera_enabled and self._state.camera_enabled and self._camera_server is None:
654
- try:
655
- self._camera_server = MJPEGCameraServer(
656
- reachy_mini=self.reachy_mini,
657
- host=self.host,
658
- port=self.camera_port,
659
- fps=15,
660
- quality=80,
661
- enable_face_tracking=bool(getattr(self._state.preferences, "face_tracking_enabled", False)),
662
- enable_gesture_detection=bool(getattr(self._state.preferences, "gesture_detection_enabled", False)),
663
- gstreamer_lock=self._gstreamer_lock,
664
- )
665
-
666
- # Apply persisted vision preferences before camera server start.
667
- prefs = self._state.preferences
668
- self._camera_server.set_face_tracking_enabled(bool(getattr(prefs, "face_tracking_enabled", False)))
669
- self._camera_server.set_gesture_detection_enabled(
670
- bool(getattr(prefs, "gesture_detection_enabled", False))
671
- )
672
- self._camera_server.set_face_confidence_threshold(
673
- float(getattr(prefs, "face_confidence_threshold", 0.5))
674
- )
675
-
676
- await self._camera_server.start()
677
-
678
- # Store camera_server reference in state for entity registry access
679
- self._state._camera_server = self._camera_server
680
-
681
- # Update entity registry with the new camera_server reference
682
- if self._state.satellite:
683
- self._state.satellite.update_camera_server(self._camera_server)
684
-
685
- # Connect camera server to motion controller for face tracking
686
- if self._motion is not None:
687
- self._motion.set_camera_server(self._camera_server)
688
-
689
- _LOGGER.info("Camera server started on %s:%s", self.host, self.camera_port)
690
- except Exception as e:
691
- _LOGGER.error("Failed to start camera server: %s", e)
692
-
693
- # Resume services if they were suspended due to HA disconnection
694
- if self._state.services_suspended and not self._state.is_sleeping:
695
- self._resume_non_esphome_services(reason="ha_connected", clear_sleep_state=False)
696
-
697
- def _on_ha_disconnected(self) -> None:
698
- """Called when Home Assistant disconnects.
699
-
700
- At this point, we should:
701
- 1. Suspend camera server to save resources
702
- 2. Keep ESPHome server running for reconnection
703
- 3. Ensure voice services are suspended
704
- """
705
- _LOGGER.warning("Home Assistant disconnected - suspending camera and voice services")
706
- self._ha_connected = False
707
-
708
- # Suspend non-ESPHome services including camera
709
- # Keep ESPHome server running so HA can reconnect
710
- self._suspend_non_esphome_services(reason="ha_disconnected", set_sleep_state=False)
711
-
712
- def _optimize_microphone_settings(self) -> None:
713
- """Optimize ReSpeaker XVF3800 microphone settings for voice recognition.
714
-
715
- Delegates to MicrophoneOptimizer for actual settings configuration.
716
- User preferences from Home Assistant override defaults when available.
717
- """
718
- try:
719
- # Access ReSpeaker through the media audio system
720
- audio = self.reachy_mini.media.audio
721
- if audio is None or not hasattr(audio, "_respeaker"):
722
- _LOGGER.debug("ReSpeaker not available for optimization")
723
- return
724
-
725
- respeaker = audio._respeaker
726
- if respeaker is None:
727
- _LOGGER.debug("ReSpeaker device not found")
728
- return
729
-
730
- # Build preferences from saved state
731
- prefs = self._state.preferences if self._state else None
732
- mic_prefs = MicrophonePreferences(
733
- agc_enabled=prefs.agc_enabled if prefs else None,
734
- agc_max_gain=prefs.agc_max_gain if prefs else None,
735
- noise_suppression=prefs.noise_suppression if prefs else None,
736
- )
737
-
738
- # Delegate to optimizer
739
- optimizer = MicrophoneOptimizer()
740
- optimizer.optimize(respeaker, mic_prefs)
741
-
742
- except Exception as e:
743
- _LOGGER.warning("Failed to optimize microphone settings: %s", e)
744
-
745
- async def stop(self) -> None:
746
- """Stop the voice assistant service."""
747
- _LOGGER.info("Stopping voice assistant service...")
748
-
749
- # 1. First stop audio recording to prevent new data from coming in
750
- try:
751
- self.reachy_mini.media.stop_recording()
752
- _LOGGER.debug("Reachy Mini recording stopped")
753
- except Exception as e:
754
- _LOGGER.warning("Error stopping Reachy Mini recording: %s", e)
755
-
756
- # 2. Set stop flag
757
- self._running = False
758
- # Wake any threads blocked on resume signal
759
- self._robot_services_resumed.set()
760
-
761
- # 3. Wait for audio thread to finish
762
- if self._audio_thread:
763
- self._audio_thread.join(timeout=Config.shutdown.audio_thread_join_timeout)
764
- if self._audio_thread.is_alive():
765
- _LOGGER.warning("Audio thread did not stop in time")
766
-
767
- # 4. Stop playback
768
- try:
769
- self.reachy_mini.media.stop_playing()
770
- _LOGGER.debug("Reachy Mini playback stopped")
771
- except Exception as e:
772
- _LOGGER.warning("Error stopping Reachy Mini playback: %s", e)
773
-
774
- # 5. Stop ESPHome server
775
- if self._server:
776
- self._server.close()
777
- try:
778
- await asyncio.wait_for(
779
- self._server.wait_closed(),
780
- timeout=Config.shutdown.server_close_timeout,
781
- )
782
- except TimeoutError:
783
- _LOGGER.warning("ESPHome server did not close in time")
784
-
785
- # 6. Unregister mDNS
786
- if self._discovery:
787
- try:
788
- await asyncio.wait_for(
789
- self._discovery.unregister_server(),
790
- timeout=Config.shutdown.server_close_timeout,
791
- )
792
- except TimeoutError:
793
- _LOGGER.warning("mDNS unregister did not finish in time")
794
-
795
- # 6.5. Stop Sendspin
796
- if self._state and self._state.music_player:
797
- try:
798
- await asyncio.wait_for(
799
- self._state.music_player.stop_sendspin(),
800
- timeout=Config.shutdown.sendspin_stop_timeout,
801
- )
802
- except TimeoutError:
803
- _LOGGER.warning("Sendspin stop did not finish in time")
804
-
805
- # 7. Stop camera server
806
- # Only stop if camera is NOT disabled (user has not manually disabled it)
807
- if self._camera_server and self._state.camera_enabled:
808
- await self._camera_server.stop(join_timeout=Config.shutdown.camera_stop_timeout)
809
- self._camera_server = None
810
- # Close SDK media resources to prevent memory leaks (even if camera is disabled)
811
- try:
812
- self.reachy_mini.media.close()
813
- _LOGGER.info("SDK media resources closed")
814
- except Exception as e:
815
- _LOGGER.debug("Failed to close SDK media: %s", e)
816
-
817
- # 8. Shutdown motion executor
818
- if self._motion:
819
- self._motion.shutdown()
820
-
821
- # 9. Stop sleep manager
822
- if self._sleep_manager:
823
- try:
824
- await asyncio.wait_for(
825
- self._sleep_manager.stop(),
826
- timeout=Config.shutdown.sleep_manager_stop_timeout,
827
- )
828
- except TimeoutError:
829
- _LOGGER.warning("Sleep manager stop did not finish in time")
830
- self._sleep_manager = None
831
-
832
- _LOGGER.info("Voice assistant service stopped.")
833
-
834
- async def _verify_required_files(self) -> None:
835
- """Verify required model and sound files exist (bundled with package)."""
836
- # Required wake word files (bundled in wakewords/ directory)
837
- # Note: hey_jarvis is in openWakeWord/ with version suffix, so not required here
838
- required_wakewords = [
839
- "okay_nabu.tflite",
840
- "okay_nabu.json",
841
- "stop.tflite",
842
- "stop.json",
843
- ]
844
-
845
- # Required sound files (bundled in sounds/ directory)
846
- required_sounds = [
847
- "wake_word_triggered.flac",
848
- "timer_finished.flac",
849
- ]
850
-
851
- # Verify wake word files
852
- missing_wakewords = []
853
- for filename in required_wakewords:
854
- filepath = _WAKEWORDS_DIR / filename
855
- if not filepath.exists():
856
- missing_wakewords.append(filename)
857
-
858
- if missing_wakewords:
859
- _LOGGER.warning("Missing wake word files: %s. These should be bundled with the package.", missing_wakewords)
860
-
861
- # Verify sound files
862
- missing_sounds = []
863
- for filename in required_sounds:
864
- filepath = _SOUNDS_DIR / filename
865
- if not filepath.exists():
866
- missing_sounds.append(filename)
867
-
868
- if missing_sounds:
869
- _LOGGER.warning("Missing sound files: %s. These should be bundled with the package.", missing_sounds)
870
-
871
- if not missing_wakewords and not missing_sounds:
872
- _LOGGER.info("All required files verified successfully.")
873
-
874
- def _load_available_wake_words(self) -> dict[str, AvailableWakeWord]:
875
- """Load available wake word configurations."""
876
- available_wake_words: dict[str, AvailableWakeWord] = {}
877
-
878
- # Load order: OpenWakeWord first, then MicroWakeWord, then external
879
- # Later entries override earlier ones, so MicroWakeWord takes priority
880
- wake_word_dirs = [
881
- _WAKEWORDS_DIR / "openWakeWord", # OpenWakeWord (lowest priority)
882
- _LOCAL_DIR / "external_wake_words", # External wake words
883
- _WAKEWORDS_DIR, # MicroWakeWord (highest priority)
884
- ]
885
-
886
- for wake_word_dir in wake_word_dirs:
887
- if not wake_word_dir.exists():
888
- continue
889
-
890
- for config_path in wake_word_dir.glob("*.json"):
891
- model_id = config_path.stem
892
- if model_id == "stop":
893
- continue
894
-
895
- try:
896
- with open(config_path, encoding="utf-8") as f:
897
- config = json.load(f)
898
-
899
- model_type = WakeWordType(config.get("type", "micro"))
900
-
901
- if model_type == WakeWordType.OPEN_WAKE_WORD:
902
- wake_word_path = config_path.parent / config["model"]
903
- else:
904
- wake_word_path = config_path
905
-
906
- available_wake_words[model_id] = AvailableWakeWord(
907
- id=model_id,
908
- type=model_type,
909
- wake_word=config.get("wake_word", model_id),
910
- trained_languages=config.get("trained_languages", []),
911
- wake_word_path=wake_word_path,
912
- )
913
- except Exception as e:
914
- _LOGGER.warning("Failed to load wake word %s: %s", config_path, e)
915
-
916
- return available_wake_words
917
-
918
- def _load_preferences(self, preferences_path: Path) -> Preferences:
919
- """Load user preferences."""
920
- if preferences_path.exists():
921
- try:
922
- with open(preferences_path, encoding="utf-8") as f:
923
- data = json.load(f)
924
- return Preferences(**data)
925
- except Exception as e:
926
- _LOGGER.warning("Failed to load preferences: %s", e)
927
-
928
- return Preferences()
929
-
930
- def _load_wake_models(
931
- self,
932
- available_wake_words: dict[str, AvailableWakeWord],
933
- preferences: Preferences,
934
- ):
935
- """Load wake word models."""
936
-
937
- wake_models: dict[str, MicroWakeWord | OpenWakeWord] = {}
938
- active_wake_words: set[str] = set()
939
-
940
- # Try to load preferred models
941
- if preferences.active_wake_words:
942
- for wake_word_id in preferences.active_wake_words:
943
- wake_word = available_wake_words.get(wake_word_id)
944
- if wake_word is None:
945
- _LOGGER.warning("Unknown wake word: %s", wake_word_id)
946
- continue
947
-
948
- try:
949
- _LOGGER.debug("Loading wake model: %s", wake_word_id)
950
- loaded_model = wake_word.load()
951
- # Set id attribute on the model for later identification
952
- loaded_model.id = wake_word_id
953
- wake_models[wake_word_id] = loaded_model
954
- active_wake_words.add(wake_word_id)
955
- except Exception as e:
956
- _LOGGER.warning("Failed to load wake model %s: %s", wake_word_id, e)
957
-
958
- # Load default model if none loaded
959
- if not wake_models:
960
- wake_word = available_wake_words.get(self.wake_model)
961
- if wake_word:
962
- try:
963
- _LOGGER.debug("Loading default wake model: %s", self.wake_model)
964
- loaded_model = wake_word.load()
965
- # Set id attribute on the model for later identification
966
- loaded_model.id = self.wake_model
967
- wake_models[self.wake_model] = loaded_model
968
- active_wake_words.add(self.wake_model)
969
- except Exception as e:
970
- _LOGGER.error("Failed to load default wake model: %s", e)
971
-
972
- return wake_models, active_wake_words
973
-
974
- def _load_stop_model(self):
975
- """Load the stop word model."""
976
- from pymicro_wakeword import MicroWakeWord
977
-
978
- stop_config = _WAKEWORDS_DIR / "stop.json"
979
- if stop_config.exists():
980
- try:
981
- model = MicroWakeWord.from_config(stop_config)
982
- # Don't override the model ID - use the one from config
983
- _LOGGER.info("Loaded stop model with ID: %s, config: %s", model.id, stop_config)
984
- return model
985
- except Exception as e:
986
- _LOGGER.error("Failed to load stop model from %s: %s", stop_config, e)
987
- import traceback
988
-
989
- traceback.print_exc()
990
-
991
- # Stop model not available - disable stop functionality
992
- _LOGGER.error("Stop model not available at %s - stop functionality will be disabled", stop_config)
993
- return None
994
-
995
- def _process_audio(self) -> None:
996
- """Process audio from Reachy Mini's microphone."""
997
- from pymicro_wakeword import MicroWakeWordFeatures
998
-
999
- ctx = AudioProcessingContext()
1000
- ctx.micro_features = MicroWakeWordFeatures()
1001
-
1002
- try:
1003
- _LOGGER.info("Starting audio processing using Reachy Mini's microphone...")
1004
- self._audio_loop_reachy(ctx)
1005
-
1006
- except Exception:
1007
- _LOGGER.exception("Error processing audio")
1008
-
1009
- def _audio_loop_reachy(self, ctx: AudioProcessingContext) -> None:
1010
- """Audio loop using Reachy Mini's microphone.
1011
-
1012
- This loop checks the robot connection state before attempting to
1013
- read audio. When the robot is disconnected (e.g., sleep mode),
1014
- the loop waits for reconnection without generating errors.
1015
- """
1016
- consecutive_audio_errors = 0
1017
- max_consecutive_errors = 3 # Pause after 3 consecutive errors
1018
-
1019
- while self._running:
1020
- try:
1021
- # Check if robot services are paused (sleep mode / disconnected / muted)
1022
- if self._robot_services_paused.is_set():
1023
- # Wait for resume signal (event-driven, wakes immediately on resume)
1024
- consecutive_audio_errors = 0 # Reset on pause
1025
- self._robot_services_resumed.wait(timeout=1.0)
1026
- continue
1027
-
1028
- if not self._wait_for_satellite():
1029
- continue
1030
-
1031
- # Update wake words list
1032
- self._update_wake_words_list(ctx)
1033
-
1034
- # Get audio from Reachy Mini
1035
- audio_chunk = self._get_reachy_audio_chunk()
1036
- if audio_chunk is None:
1037
- idle_sleep = (
1038
- Config.audio.idle_sleep_sleeping
1039
- if self._robot_services_paused.is_set()
1040
- else Config.audio.idle_sleep_active
1041
- )
1042
- time.sleep(idle_sleep)
1043
- continue
1044
-
1045
- # Audio successfully obtained, reset error counter
1046
- consecutive_audio_errors = 0
1047
- self._process_audio_chunk(ctx, audio_chunk)
1048
-
1049
- except Exception as e:
1050
- error_msg = str(e)
1051
-
1052
- # Check for audio processing errors that indicate sleep mode
1053
- if "can only convert" in error_msg or "scalar" in error_msg:
1054
- consecutive_audio_errors += 1
1055
- if consecutive_audio_errors >= max_consecutive_errors:
1056
- if not self._robot_services_paused.is_set():
1057
- _LOGGER.warning("Audio errors indicate robot may be asleep - pausing audio processing")
1058
- self._robot_services_paused.set()
1059
- self._robot_services_resumed.clear()
1060
- # Clear audio buffer
1061
- self._audio_buffer.clear()
1062
- # Wait for resume signal instead of polling
1063
- self._robot_services_resumed.wait(timeout=0.5)
1064
- continue
1065
-
1066
- # Check if this is a connection error
1067
- if "Lost connection" in error_msg:
1068
- # Don't log - the state monitor will handle this
1069
- if not self._robot_services_paused.is_set():
1070
- _LOGGER.debug("Connection error detected, waiting for state monitor")
1071
- # Wait for resume signal instead of polling
1072
- self._robot_services_resumed.wait(timeout=1.0)
1073
- else:
1074
- # Log unexpected errors (but limit frequency)
1075
- consecutive_audio_errors += 1
1076
- if consecutive_audio_errors <= 3:
1077
- _LOGGER.error("Error in Reachy audio processing: %s", e)
1078
- time.sleep(Config.audio.idle_sleep_sleeping)
1079
-
1080
- def _wait_for_satellite(self) -> bool:
1081
- """Wait for satellite connection. Returns True if connected."""
1082
- if self._state is None or self._state.satellite is None:
1083
- time.sleep(0.1)
1084
- return False
1085
- return True
1086
-
1087
- def _update_wake_words_list(self, ctx: AudioProcessingContext) -> None:
1088
- """Update wake words list if changed."""
1089
- from pymicro_wakeword import MicroWakeWordFeatures
1090
- from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
1091
-
1092
- if (not ctx.wake_words) or (self._state.wake_words_changed and self._state.wake_words):
1093
- self._state.wake_words_changed = False
1094
- ctx.wake_words.clear()
1095
-
1096
- # Reset feature extractors to clear any residual audio data
1097
- # This prevents false triggers when switching wake words
1098
- ctx.micro_features = MicroWakeWordFeatures()
1099
- ctx.micro_inputs.clear()
1100
- if ctx.oww_features is not None:
1101
- ctx.oww_features = OpenWakeWordFeatures.from_builtin()
1102
- ctx.oww_inputs.clear()
1103
-
1104
- # Also reset the refractory period to prevent immediate trigger
1105
- ctx.last_active = time.monotonic()
1106
-
1107
- # state.wake_words is Dict[str, MicroWakeWord/OpenWakeWord]
1108
- # We need to filter by active_wake_words (which contains the IDs/keys)
1109
- for ww_id, ww_model in self._state.wake_words.items():
1110
- if ww_id in self._state.active_wake_words:
1111
- # Ensure the model has an 'id' attribute for later use
1112
- if not hasattr(ww_model, "id"):
1113
- ww_model.id = ww_id
1114
- ctx.wake_words.append(ww_model)
1115
-
1116
- ctx.has_oww = any(isinstance(ww, OpenWakeWord) for ww in ctx.wake_words)
1117
- if ctx.has_oww and ctx.oww_features is None:
1118
- ctx.oww_features = OpenWakeWordFeatures.from_builtin()
1119
-
1120
- _LOGGER.info("Active wake words updated: %s (features reset)", list(self._state.active_wake_words))
1121
-
1122
- def _get_reachy_audio_chunk(self) -> bytes | None:
1123
- """Get fixed-size audio chunk from Reachy Mini's microphone.
1124
-
1125
- Returns exactly AUDIO_BLOCK_SIZE samples each time, buffering
1126
- internally to ensure consistent chunk sizes for streaming.
1127
-
1128
- Returns:
1129
- PCM audio bytes of fixed size, or None if not enough data.
1130
- """
1131
- # Check if services are paused (e.g., during sleep/disconnect)
1132
- if self._robot_services_paused.is_set():
1133
- return None
1134
-
1135
- # Get new audio data from SDK
1136
- audio_data = self.reachy_mini.media.get_audio_sample()
1137
-
1138
- # Debug: Log SDK audio data statistics and sample rate (once at startup)
1139
- if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
1140
- if not hasattr(self, "_audio_sample_rate_logged"):
1141
- self._audio_sample_rate_logged = True
1142
- try:
1143
- input_rate = self.reachy_mini.media.get_input_audio_samplerate()
1144
- _LOGGER.info(
1145
- "Audio input: sample_rate=%d Hz, shape=%s, dtype=%s (expected 16000 Hz)",
1146
- input_rate,
1147
- audio_data.shape,
1148
- audio_data.dtype,
1149
- )
1150
- if input_rate != 16000:
1151
- _LOGGER.warning(
1152
- "Audio sample rate mismatch! Got %d Hz, expected 16000 Hz. "
1153
- "STT may be slow or inaccurate. Consider resampling.",
1154
- input_rate,
1155
- )
1156
- except Exception as e:
1157
- _LOGGER.warning("Could not get audio sample rate: %s", e)
1158
-
1159
- # Append new data to buffer if valid
1160
- if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
1161
- try:
1162
- if audio_data.dtype.kind not in ("S", "U", "O", "V", "b"):
1163
- # Convert to float32 only if needed (SDK already returns float32)
1164
- if audio_data.dtype != np.float32:
1165
- audio_data = audio_data.astype(np.float32, copy=False)
1166
-
1167
- # Clean NaN/Inf values early to prevent downstream errors
1168
- audio_data = np.nan_to_num(audio_data, nan=0.0, posinf=1.0, neginf=-1.0)
1169
-
1170
- # Convert stereo to mono (use first channel for better quality)
1171
- if audio_data.ndim == 2 and audio_data.shape[1] >= 2:
1172
- # Use first channel instead of mean - cleaner signal
1173
- # Remove .copy() to avoid unnecessary array duplication
1174
- audio_data = audio_data[:, 0]
1175
- elif audio_data.ndim == 2:
1176
- # Remove .copy() to avoid unnecessary array duplication
1177
- audio_data = audio_data[:, 0]
1178
-
1179
- # Resample if needed (SDK may return non-16kHz audio)
1180
- if audio_data.ndim == 1:
1181
- # Initialize sample rate once (not every chunk)
1182
- if not hasattr(self, "_input_sample_rate_fixed"):
1183
- try:
1184
- self._input_sample_rate = self.reachy_mini.media.get_input_audio_samplerate()
1185
- if self._input_sample_rate != 16000:
1186
- _LOGGER.warning(
1187
- f"Sample rate {self._input_sample_rate} != 16000 Hz. "
1188
- "Performance may be degraded. "
1189
- "Consider forcing 16kHz in hardware config."
1190
- )
1191
- except Exception:
1192
- self._input_sample_rate = 16000
1193
-
1194
- self._input_sample_rate_fixed = True # Mark as fixed
1195
-
1196
- # Resample to 16kHz if needed
1197
- if self._input_sample_rate != 16000 and self._input_sample_rate > 0:
1198
- from scipy.signal import resample
1199
-
1200
- new_length = int(len(audio_data) * 16000 / self._input_sample_rate)
1201
- if new_length > 0:
1202
- audio_data = resample(audio_data, new_length)
1203
- audio_data = np.nan_to_num(
1204
- audio_data,
1205
- nan=0.0,
1206
- posinf=1.0,
1207
- neginf=-1.0,
1208
- ).astype(np.float32, copy=False)
1209
-
1210
- # Extend deque (deque automatically handles overflow with maxlen)
1211
- # This avoids creating new arrays like np.concatenate does
1212
- self._audio_buffer.extend(audio_data)
1213
-
1214
- except (TypeError, ValueError):
1215
- pass
1216
-
1217
- # Return fixed-size chunk if we have enough data
1218
- if len(self._audio_buffer) >= AUDIO_BLOCK_SIZE:
1219
- # Extract chunk and remove from buffer
1220
- chunk = [self._audio_buffer.popleft() for _ in range(AUDIO_BLOCK_SIZE)]
1221
-
1222
- # Convert to PCM bytes (16-bit signed, little-endian)
1223
- chunk_array = np.array(chunk, dtype=np.float32)
1224
- pcm_bytes = (np.clip(chunk_array, -1.0, 1.0) * 32767.0).astype("<i2").tobytes()
1225
- return pcm_bytes
1226
-
1227
- return None
1228
-
1229
- def _convert_to_pcm(self, audio_chunk_array: np.ndarray) -> bytes:
1230
- """Convert float32 audio array to 16-bit PCM bytes."""
1231
- # Replace NaN/Inf with 0 to avoid casting errors
1232
- audio_clean = np.nan_to_num(audio_chunk_array, nan=0.0, posinf=1.0, neginf=-1.0)
1233
- return (np.clip(audio_clean, -1.0, 1.0) * 32767.0).astype("<i2").tobytes()
1234
-
1235
- def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
1236
- """Process an audio chunk for wake word detection.
1237
-
1238
- Following reference project pattern: always process wake words.
1239
- Refractory period prevents duplicate triggers.
1240
-
1241
- Args:
1242
- ctx: Audio processing context
1243
- audio_chunk: PCM audio bytes
1244
- """
1245
- # Stream audio to Home Assistant only after wake (privacy: no pre-wake upload)
1246
- if self._state.satellite.is_streaming_audio:
1247
- self._state.satellite.handle_audio(audio_chunk)
1248
-
1249
- # Process wake word features
1250
- self._process_features(ctx, audio_chunk)
1251
-
1252
- # Detect wake words
1253
- self._detect_wake_words(ctx)
1254
-
1255
- # Detect stop word
1256
- self._detect_stop_word(ctx)
1257
-
1258
- def _process_features(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
1259
- """Process audio features for wake word detection."""
1260
- ctx.micro_inputs.clear()
1261
- ctx.micro_inputs.extend(ctx.micro_features.process_streaming(audio_chunk))
1262
-
1263
- if ctx.has_oww and ctx.oww_features is not None:
1264
- ctx.oww_inputs.clear()
1265
- ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
1266
-
1267
- def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
1268
- """Detect wake words in the processed audio features.
1269
-
1270
- Uses refractory period to prevent duplicate triggers.
1271
- Following reference project pattern.
1272
- """
1273
- from pymicro_wakeword import MicroWakeWord
1274
- from pyopen_wakeword import OpenWakeWord
1275
-
1276
- for wake_word in ctx.wake_words:
1277
- activated = False
1278
-
1279
- if isinstance(wake_word, MicroWakeWord):
1280
- for micro_input in ctx.micro_inputs:
1281
- if wake_word.process_streaming(micro_input):
1282
- activated = True
1283
- elif isinstance(wake_word, OpenWakeWord):
1284
- for oww_input in ctx.oww_inputs:
1285
- for prob in wake_word.process_streaming(oww_input):
1286
- if prob > 0.5:
1287
- activated = True
1288
-
1289
- if activated:
1290
- # Check refractory period to prevent duplicate triggers
1291
- now = time.monotonic()
1292
- if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
1293
- _LOGGER.info("Wake word detected: %s", wake_word.id)
1294
- self._state.satellite.wakeup(wake_word)
1295
- # Face tracking will handle looking at user automatically
1296
- self._motion.on_wakeup()
1297
- ctx.last_active = now
1298
-
1299
- def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
1300
- """Detect stop word in the processed audio features."""
1301
- if not self._state.stop_word:
1302
- _LOGGER.warning("Stop word model not loaded")
1303
- return
1304
-
1305
- stopped = False
1306
- for micro_input in ctx.micro_inputs:
1307
- if self._state.stop_word.process_streaming(micro_input):
1308
- stopped = True
1309
- break # Stop at first detection
1310
-
1311
- stop_armed = self._state.stop_word.id in self._state.active_wake_words
1312
- if stopped and stop_armed and (not self._state.is_muted):
1313
- _LOGGER.info("Stop word detected - stopping playback")
1314
- self._state.satellite.stop()
 
1
+ """
2
+ Voice Assistant Service for Reachy Mini.
3
+
4
+ This module provides the main voice assistant service that integrates
5
+ with Home Assistant via ESPHome protocol.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import threading
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from queue import Queue
16
+ from typing import Dict, List, Optional, Set, Union
17
+
18
+ import numpy as np
19
+
20
+ from reachy_mini import ReachyMini
21
+
22
+ from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType
23
+ from .audio_player import AudioPlayer
24
+ from .satellite import VoiceSatelliteProtocol
25
+ from .util import get_mac
26
+ from .zeroconf import HomeAssistantZeroconf
27
+ from .motion import ReachyMiniMotion
28
+ from .camera_server import MJPEGCameraServer
29
+
30
+ _LOGGER = logging.getLogger(__name__)
31
+
32
+ _MODULE_DIR = Path(__file__).parent
33
+ _WAKEWORDS_DIR = _MODULE_DIR / "wakewords"
34
+ _SOUNDS_DIR = _MODULE_DIR / "sounds"
35
+ _LOCAL_DIR = _MODULE_DIR.parent / "local"
36
+
37
+
38
+ @dataclass
39
+ class AudioProcessingContext:
40
+ """Context for audio processing, holding mutable state."""
41
+ wake_words: List = field(default_factory=list)
42
+ micro_features: Optional[object] = None
43
+ micro_inputs: List = field(default_factory=list)
44
+ oww_features: Optional[object] = None
45
+ oww_inputs: List = field(default_factory=list)
46
+ has_oww: bool = False
47
+ last_active: Optional[float] = None
48
+
49
+
50
+ # Audio chunk size for consistent streaming (matches reference project)
51
+ AUDIO_BLOCK_SIZE = 1024 # samples at 16kHz = 64ms
52
+
53
+
54
+ class VoiceAssistantService:
55
+ """Voice assistant service that runs ESPHome protocol server."""
56
+
57
+ def __init__(
58
+ self,
59
+ reachy_mini: Optional[ReachyMini] = None,
60
+ name: str = "Reachy Mini",
61
+ host: str = "0.0.0.0",
62
+ port: int = 6053,
63
+ wake_model: str = "okay_nabu",
64
+ camera_port: int = 8081,
65
+ camera_enabled: bool = True,
66
+ ):
67
+ self.reachy_mini = reachy_mini
68
+ self.name = name
69
+ self.host = host
70
+ self.port = port
71
+ self.wake_model = wake_model
72
+ self.camera_port = camera_port
73
+ self.camera_enabled = camera_enabled
74
+
75
+ self._server = None
76
+ self._discovery = None
77
+ self._audio_thread = None
78
+ self._running = False
79
+ self._state: Optional[ServerState] = None
80
+ self._motion = ReachyMiniMotion(reachy_mini)
81
+ self._camera_server: Optional[MJPEGCameraServer] = None
82
+
83
+ # Audio buffer for fixed-size chunk output
84
+ self._audio_buffer: np.ndarray = np.array([], dtype=np.float32)
85
+
86
+ async def start(self) -> None:
87
+ """Start the voice assistant service."""
88
+ _LOGGER.info("Initializing voice assistant service...")
89
+
90
+ # Ensure directories exist
91
+ _WAKEWORDS_DIR.mkdir(parents=True, exist_ok=True)
92
+ _SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
93
+ _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
94
+
95
+ # Verify required files (bundled with package)
96
+ await self._verify_required_files()
97
+
98
+ # Load wake words
99
+ available_wake_words = self._load_available_wake_words()
100
+ _LOGGER.debug("Available wake words: %s", list(available_wake_words.keys()))
101
+
102
+ # Load preferences
103
+ preferences_path = _LOCAL_DIR / "preferences.json"
104
+ preferences = self._load_preferences(preferences_path)
105
+
106
+ # Load wake word models
107
+ wake_models, active_wake_words = self._load_wake_models(
108
+ available_wake_words, preferences
109
+ )
110
+
111
+ # Load stop model
112
+ stop_model = self._load_stop_model()
113
+
114
+ # Create audio players with Reachy Mini reference
115
+ music_player = AudioPlayer(self.reachy_mini)
116
+ tts_player = AudioPlayer(self.reachy_mini)
117
+
118
+ # Create server state
119
+ self._state = ServerState(
120
+ name=self.name,
121
+ mac_address=get_mac(),
122
+ audio_queue=Queue(),
123
+ entities=[],
124
+ available_wake_words=available_wake_words,
125
+ wake_words=wake_models,
126
+ active_wake_words=active_wake_words,
127
+ stop_word=stop_model,
128
+ music_player=music_player,
129
+ tts_player=tts_player,
130
+ wakeup_sound=str(_SOUNDS_DIR / "wake_word_triggered.flac"),
131
+ timer_finished_sound=str(_SOUNDS_DIR / "timer_finished.flac"),
132
+ preferences=preferences,
133
+ preferences_path=preferences_path,
134
+ refractory_seconds=2.0,
135
+ download_dir=_LOCAL_DIR,
136
+ reachy_mini=self.reachy_mini,
137
+ motion_enabled=self.reachy_mini is not None,
138
+ )
139
+
140
+ # Set motion controller reference in state
141
+ self._state.motion = self._motion
142
+
143
+ # Start Reachy Mini media system if available
144
+ if self.reachy_mini is not None:
145
+ try:
146
+ # Check if media system is already running to avoid conflicts
147
+ media = self.reachy_mini.media
148
+ if media.audio is not None:
149
+ # Check recording state
150
+ is_recording = getattr(media, '_recording', False)
151
+ if not is_recording:
152
+ media.start_recording()
153
+ _LOGGER.info("Started Reachy Mini recording")
154
+ else:
155
+ _LOGGER.debug("Reachy Mini recording already active")
156
+
157
+ # Check playback state
158
+ is_playing = getattr(media, '_playing', False)
159
+ if not is_playing:
160
+ media.start_playing()
161
+ _LOGGER.info("Started Reachy Mini playback")
162
+ else:
163
+ _LOGGER.debug("Reachy Mini playback already active")
164
+
165
+ _LOGGER.info("Reachy Mini media system initialized")
166
+
167
+ # Optimize microphone settings for voice recognition
168
+ self._optimize_microphone_settings()
169
+ else:
170
+ _LOGGER.warning("Reachy Mini audio system not available")
171
+ except Exception as e:
172
+ _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
173
+
174
+ # Start motion controller (5Hz control loop)
175
+ if self._motion is not None:
176
+ self._motion.start()
177
+
178
+ # Start audio processing thread (non-daemon for proper cleanup)
179
+ self._running = True
180
+ self._audio_thread = threading.Thread(
181
+ target=self._process_audio,
182
+ daemon=False,
183
+ )
184
+ self._audio_thread.start()
185
+
186
+ # Start camera server if enabled (must be before ESPHome server)
187
+ if self.camera_enabled:
188
+ self._camera_server = MJPEGCameraServer(
189
+ reachy_mini=self.reachy_mini,
190
+ host=self.host,
191
+ port=self.camera_port,
192
+ fps=15,
193
+ quality=80,
194
+ enable_face_tracking=True,
195
+ )
196
+ await self._camera_server.start()
197
+
198
+ # Connect camera server to motion controller for face tracking
199
+ if self._motion is not None:
200
+ self._motion.set_camera_server(self._camera_server)
201
+
202
+ # Create ESPHome server (pass camera_server for camera entity)
203
+ loop = asyncio.get_running_loop()
204
+ camera_server = self._camera_server # Capture for lambda
205
+ self._server = await loop.create_server(
206
+ lambda: VoiceSatelliteProtocol(self._state, camera_server=camera_server),
207
+ host=self.host,
208
+ port=self.port,
209
+ )
210
+
211
+ # Start mDNS discovery
212
+ self._discovery = HomeAssistantZeroconf(port=self.port, name=self.name)
213
+ await self._discovery.register_server()
214
+
215
+ # Start Sendspin auto-discovery (auto-enabled, no user config needed)
216
+ # Sendspin is for music playback, so connect to music_player
217
+ await music_player.start_sendspin_discovery()
218
+
219
+ _LOGGER.info("Voice assistant service started on %s:%s", self.host, self.port)
220
+
221
+ def _optimize_microphone_settings(self) -> None:
222
+ """Optimize ReSpeaker XVF3800 microphone settings for voice recognition.
223
+
224
+ This method configures the XMOS XVF3800 audio processor for optimal
225
+ voice command recognition at distances up to 2-3 meters.
226
+
227
+ If user has previously set values via Home Assistant, those values are
228
+ restored from preferences. Otherwise, default optimized values are used.
229
+
230
+ Key optimizations:
231
+ 1. Enable AGC with higher max gain for distant speech
232
+ 2. Reduce noise suppression to preserve quiet speech
233
+ 3. Increase base microphone gain
234
+ 4. Optimize AGC response times for voice commands
235
+
236
+ Reference: reachy_mini/src/reachy_mini/media/audio_control_utils.py
237
+ XMOS docs: https://www.xmos.com/documentation/XM-014888-PC/
238
+ """
239
+ if self.reachy_mini is None:
240
+ return
241
+
242
+ try:
243
+ # Access ReSpeaker through the media audio system
244
+ audio = self.reachy_mini.media.audio
245
+ if audio is None or not hasattr(audio, '_respeaker'):
246
+ _LOGGER.debug("ReSpeaker not available for optimization")
247
+ return
248
+
249
+ respeaker = audio._respeaker
250
+ if respeaker is None:
251
+ _LOGGER.debug("ReSpeaker device not found")
252
+ return
253
+
254
+ # Get saved preferences (if any)
255
+ prefs = self._state.preferences if self._state else None
256
+
257
+ # ========== 1. AGC (Automatic Gain Control) Settings ==========
258
+ # Use saved value if available, otherwise use default (enabled)
259
+ agc_enabled = prefs.agc_enabled if (prefs and prefs.agc_enabled is not None) else True
260
+ try:
261
+ respeaker.write("PP_AGCONOFF", [1 if agc_enabled else 0])
262
+ _LOGGER.info("AGC %s (PP_AGCONOFF=%d)%s",
263
+ "enabled" if agc_enabled else "disabled",
264
+ 1 if agc_enabled else 0,
265
+ " [from preferences]" if (prefs and prefs.agc_enabled is not None) else " [default]")
266
+ except Exception as e:
267
+ _LOGGER.debug("Could not set AGC: %s", e)
268
+
269
+ # Use saved value if available, otherwise use default (30dB)
270
+ agc_max_gain = prefs.agc_max_gain if (prefs and prefs.agc_max_gain is not None) else 30.0
271
+ try:
272
+ respeaker.write("PP_AGCMAXGAIN", [agc_max_gain])
273
+ _LOGGER.info("AGC max gain set (PP_AGCMAXGAIN=%.1fdB)%s",
274
+ agc_max_gain,
275
+ " [from preferences]" if (prefs and prefs.agc_max_gain is not None) else " [default]")
276
+ except Exception as e:
277
+ _LOGGER.debug("Could not set PP_AGCMAXGAIN: %s", e)
278
+
279
+ # Set AGC desired output level (target level after gain)
280
+ # More negative = quieter output, less negative = louder
281
+ # Default is around -25dB, set to -18dB for stronger output
282
+ try:
283
+ respeaker.write("PP_AGCDESIREDLEVEL", [-18.0])
284
+ _LOGGER.debug("AGC desired level set (PP_AGCDESIREDLEVEL=-18.0dB)")
285
+ except Exception as e:
286
+ _LOGGER.debug("Could not set PP_AGCDESIREDLEVEL: %s", e)
287
+
288
+ # Optimize AGC time constants for voice commands
289
+ # Faster attack time helps capture sudden speech onset
290
+ try:
291
+ respeaker.write("PP_AGCTIME", [0.5]) # Main time constant (seconds)
292
+ _LOGGER.debug("AGC time constant set (PP_AGCTIME=0.5s)")
293
+ except Exception as e:
294
+ _LOGGER.debug("Could not set PP_AGCTIME: %s", e)
295
+
296
+ # ========== 2. Base Microphone Gain ==========
297
+ # Increase base microphone gain for better sensitivity
298
+ # Default is 1.0, increase to 2.0 for distant speech
299
+ # Range: 0.0-4.0 (float, linear gain multiplier)
300
+ try:
301
+ respeaker.write("AUDIO_MGR_MIC_GAIN", [2.0])
302
+ _LOGGER.info("Microphone gain increased (AUDIO_MGR_MIC_GAIN=2.0)")
303
+ except Exception as e:
304
+ _LOGGER.debug("Could not set AUDIO_MGR_MIC_GAIN: %s", e)
305
+
306
+ # ========== 3. Noise Suppression Settings ==========
307
+ # Use saved value if available, otherwise use default (15%)
308
+ # PP_MIN_NS: minimum noise suppression threshold
309
+ # Higher values = less aggressive suppression = better voice pickup
310
+ # PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% max suppression
311
+ # UI shows "noise suppression strength" so 15% = PP_MIN_NS of 0.85
312
+ noise_suppression = prefs.noise_suppression if (prefs and prefs.noise_suppression is not None) else 15.0
313
+ pp_min_ns = 1.0 - (noise_suppression / 100.0) # Convert percentage to PP_MIN_NS value
314
+ try:
315
+ respeaker.write("PP_MIN_NS", [pp_min_ns])
316
+ _LOGGER.info("Noise suppression set to %.0f%% strength (PP_MIN_NS=%.2f)%s",
317
+ noise_suppression, pp_min_ns,
318
+ " [from preferences]" if (prefs and prefs.noise_suppression is not None) else " [default]")
319
+ except Exception as e:
320
+ _LOGGER.debug("Could not set PP_MIN_NS: %s", e)
321
+
322
+ # PP_MIN_NN: minimum noise floor estimation
323
+ # Higher values = less aggressive noise floor tracking
324
+ try:
325
+ respeaker.write("PP_MIN_NN", [pp_min_ns]) # Match PP_MIN_NS
326
+ _LOGGER.debug("Noise floor threshold set (PP_MIN_NN=%.2f)", pp_min_ns)
327
+ except Exception as e:
328
+ _LOGGER.debug("Could not set PP_MIN_NN: %s", e)
329
+
330
+ # ========== 4. Echo Cancellation Settings ==========
331
+ # Ensure echo cancellation is enabled (important for TTS playback)
332
+ try:
333
+ respeaker.write("PP_ECHOONOFF", [1])
334
+ _LOGGER.debug("Echo cancellation enabled (PP_ECHOONOFF=1)")
335
+ except Exception as e:
336
+ _LOGGER.debug("Could not set PP_ECHOONOFF: %s", e)
337
+
338
+ # ========== 5. High-pass filter (remove low frequency noise) ==========
339
+ try:
340
+ respeaker.write("AEC_HPFONOFF", [1])
341
+ _LOGGER.debug("High-pass filter enabled (AEC_HPFONOFF=1)")
342
+ except Exception as e:
343
+ _LOGGER.debug("Could not set AEC_HPFONOFF: %s", e)
344
+
345
+ _LOGGER.info("Microphone settings initialized (AGC=%s, MaxGain=%.0fdB, NoiseSuppression=%.0f%%)",
346
+ "ON" if agc_enabled else "OFF", agc_max_gain, noise_suppression)
347
+
348
+ except Exception as e:
349
+ _LOGGER.warning("Failed to optimize microphone settings: %s", e)
350
+
351
+ async def stop(self) -> None:
352
+ """Stop the voice assistant service."""
353
+ _LOGGER.info("Stopping voice assistant service...")
354
+
355
+ # 1. First stop audio recording to prevent new data from coming in
356
+ if self.reachy_mini is not None:
357
+ try:
358
+ self.reachy_mini.media.stop_recording()
359
+ _LOGGER.debug("Reachy Mini recording stopped")
360
+ except Exception as e:
361
+ _LOGGER.warning("Error stopping Reachy Mini recording: %s", e)
362
+
363
+ # 2. Set stop flag
364
+ self._running = False
365
+
366
+ # 3. Wait for audio thread to finish
367
+ if self._audio_thread:
368
+ self._audio_thread.join(timeout=1.0)
369
+ if self._audio_thread.is_alive():
370
+ _LOGGER.warning("Audio thread did not stop in time")
371
+
372
+ # 4. Stop playback
373
+ if self.reachy_mini is not None:
374
+ try:
375
+ self.reachy_mini.media.stop_playing()
376
+ _LOGGER.debug("Reachy Mini playback stopped")
377
+ except Exception as e:
378
+ _LOGGER.warning("Error stopping Reachy Mini playback: %s", e)
379
+
380
+ # 5. Stop ESPHome server
381
+ if self._server:
382
+ self._server.close()
383
+ await self._server.wait_closed()
384
+
385
+ # 6. Unregister mDNS
386
+ if self._discovery:
387
+ await self._discovery.unregister_server()
388
+
389
+ # 6.5. Stop Sendspin
390
+ if self._state and self._state.music_player:
391
+ await self._state.music_player.stop_sendspin()
392
+
393
+ # 7. Stop camera server
394
+ if self._camera_server:
395
+ await self._camera_server.stop()
396
+ self._camera_server = None
397
+
398
+ # 8. Shutdown motion executor
399
+ if self._motion:
400
+ self._motion.shutdown()
401
+
402
+ _LOGGER.info("Voice assistant service stopped.")
403
+
404
+ async def _verify_required_files(self) -> None:
405
+ """Verify required model and sound files exist (bundled with package)."""
406
+ # Required wake word files (bundled in wakewords/ directory)
407
+ required_wakewords = [
408
+ "okay_nabu.tflite",
409
+ "okay_nabu.json",
410
+ "hey_jarvis.tflite",
411
+ "hey_jarvis.json",
412
+ "stop.tflite",
413
+ "stop.json",
414
+ ]
415
+
416
+ # Required sound files (bundled in sounds/ directory)
417
+ required_sounds = [
418
+ "wake_word_triggered.flac",
419
+ "timer_finished.flac",
420
+ ]
421
+
422
+ # Verify wake word files
423
+ missing_wakewords = []
424
+ for filename in required_wakewords:
425
+ filepath = _WAKEWORDS_DIR / filename
426
+ if not filepath.exists():
427
+ missing_wakewords.append(filename)
428
+
429
+ if missing_wakewords:
430
+ _LOGGER.warning(
431
+ "Missing wake word files: %s. These should be bundled with the package.",
432
+ missing_wakewords
433
+ )
434
+
435
+ # Verify sound files
436
+ missing_sounds = []
437
+ for filename in required_sounds:
438
+ filepath = _SOUNDS_DIR / filename
439
+ if not filepath.exists():
440
+ missing_sounds.append(filename)
441
+
442
+ if missing_sounds:
443
+ _LOGGER.warning(
444
+ "Missing sound files: %s. These should be bundled with the package.",
445
+ missing_sounds
446
+ )
447
+
448
+ if not missing_wakewords and not missing_sounds:
449
+ _LOGGER.info("All required files verified successfully.")
450
+
451
+ def _load_available_wake_words(self) -> Dict[str, AvailableWakeWord]:
452
+ """Load available wake word configurations."""
453
+ available_wake_words: Dict[str, AvailableWakeWord] = {}
454
+
455
+ # Load order: OpenWakeWord first, then MicroWakeWord, then external
456
+ # Later entries override earlier ones, so MicroWakeWord takes priority
457
+ wake_word_dirs = [
458
+ _WAKEWORDS_DIR / "openWakeWord", # OpenWakeWord (lowest priority)
459
+ _LOCAL_DIR / "external_wake_words", # External wake words
460
+ _WAKEWORDS_DIR, # MicroWakeWord (highest priority)
461
+ ]
462
+
463
+ for wake_word_dir in wake_word_dirs:
464
+ if not wake_word_dir.exists():
465
+ continue
466
+
467
+ for config_path in wake_word_dir.glob("*.json"):
468
+ model_id = config_path.stem
469
+ if model_id == "stop":
470
+ continue
471
+
472
+ try:
473
+ with open(config_path, "r", encoding="utf-8") as f:
474
+ config = json.load(f)
475
+
476
+ model_type = WakeWordType(config.get("type", "micro"))
477
+
478
+ if model_type == WakeWordType.OPEN_WAKE_WORD:
479
+ wake_word_path = config_path.parent / config["model"]
480
+ else:
481
+ wake_word_path = config_path
482
+
483
+ available_wake_words[model_id] = AvailableWakeWord(
484
+ id=model_id,
485
+ type=model_type,
486
+ wake_word=config.get("wake_word", model_id),
487
+ trained_languages=config.get("trained_languages", []),
488
+ wake_word_path=wake_word_path,
489
+ )
490
+ except Exception as e:
491
+ _LOGGER.warning("Failed to load wake word %s: %s", config_path, e)
492
+
493
+ return available_wake_words
494
+
495
+ def _load_preferences(self, preferences_path: Path) -> Preferences:
496
+ """Load user preferences."""
497
+ if preferences_path.exists():
498
+ try:
499
+ with open(preferences_path, "r", encoding="utf-8") as f:
500
+ data = json.load(f)
501
+ return Preferences(**data)
502
+ except Exception as e:
503
+ _LOGGER.warning("Failed to load preferences: %s", e)
504
+
505
+ return Preferences()
506
+
507
+ def _load_wake_models(
508
+ self,
509
+ available_wake_words: Dict[str, AvailableWakeWord],
510
+ preferences: Preferences,
511
+ ):
512
+ """Load wake word models."""
513
+ from pymicro_wakeword import MicroWakeWord
514
+ from pyopen_wakeword import OpenWakeWord
515
+
516
+ wake_models: Dict[str, Union[MicroWakeWord, OpenWakeWord]] = {}
517
+ active_wake_words: Set[str] = set()
518
+
519
+ # Try to load preferred models
520
+ if preferences.active_wake_words:
521
+ for wake_word_id in preferences.active_wake_words:
522
+ wake_word = available_wake_words.get(wake_word_id)
523
+ if wake_word is None:
524
+ _LOGGER.warning("Unknown wake word: %s", wake_word_id)
525
+ continue
526
+
527
+ try:
528
+ _LOGGER.debug("Loading wake model: %s", wake_word_id)
529
+ loaded_model = wake_word.load()
530
+ # Set id attribute on the model for later identification
531
+ setattr(loaded_model, 'id', wake_word_id)
532
+ wake_models[wake_word_id] = loaded_model
533
+ active_wake_words.add(wake_word_id)
534
+ except Exception as e:
535
+ _LOGGER.warning("Failed to load wake model %s: %s", wake_word_id, e)
536
+
537
+ # Load default model if none loaded
538
+ if not wake_models:
539
+ wake_word = available_wake_words.get(self.wake_model)
540
+ if wake_word:
541
+ try:
542
+ _LOGGER.debug("Loading default wake model: %s", self.wake_model)
543
+ loaded_model = wake_word.load()
544
+ # Set id attribute on the model for later identification
545
+ setattr(loaded_model, 'id', self.wake_model)
546
+ wake_models[self.wake_model] = loaded_model
547
+ active_wake_words.add(self.wake_model)
548
+ except Exception as e:
549
+ _LOGGER.error("Failed to load default wake model: %s", e)
550
+
551
+ return wake_models, active_wake_words
552
+
553
+ def _load_stop_model(self):
554
+ """Load the stop word model."""
555
+ from pymicro_wakeword import MicroWakeWord
556
+
557
+ stop_config = _WAKEWORDS_DIR / "stop.json"
558
+ if stop_config.exists():
559
+ try:
560
+ model = MicroWakeWord.from_config(stop_config)
561
+ setattr(model, 'id', 'stop')
562
+ return model
563
+ except Exception as e:
564
+ _LOGGER.warning("Failed to load stop model: %s", e)
565
+
566
+ # Return a dummy model if stop model not available
567
+ _LOGGER.warning("Stop model not available, using fallback")
568
+ okay_nabu_config = _WAKEWORDS_DIR / "okay_nabu.json"
569
+ if okay_nabu_config.exists():
570
+ model = MicroWakeWord.from_config(okay_nabu_config)
571
+ setattr(model, 'id', 'stop')
572
+ return model
573
+
574
+ return None
575
+
576
+ def _process_audio(self) -> None:
577
+ """Process audio from microphone (Reachy Mini or system fallback)."""
578
+ from pymicro_wakeword import MicroWakeWordFeatures
579
+
580
+ ctx = AudioProcessingContext()
581
+ ctx.micro_features = MicroWakeWordFeatures()
582
+
583
+ try:
584
+ _LOGGER.info("Starting audio processing...")
585
+
586
+ if self.reachy_mini is not None:
587
+ _LOGGER.info("Using Reachy Mini's microphone")
588
+ self._audio_loop_reachy(ctx)
589
+ else:
590
+ _LOGGER.info("Using system microphone (fallback)")
591
+ self._audio_loop_fallback(ctx)
592
+
593
+ except Exception:
594
+ _LOGGER.exception("Error processing audio")
595
+
596
+ def _audio_loop_reachy(self, ctx: AudioProcessingContext) -> None:
597
+ """Audio loop using Reachy Mini's microphone."""
598
+ while self._running:
599
+ try:
600
+ if not self._wait_for_satellite():
601
+ continue
602
+
603
+ self._update_wake_words_list(ctx)
604
+
605
+ # Get audio from Reachy Mini
606
+ audio_chunk = self._get_reachy_audio_chunk()
607
+ if audio_chunk is None:
608
+ time.sleep(0.01)
609
+ continue
610
+
611
+ self._process_audio_chunk(ctx, audio_chunk)
612
+
613
+ except Exception as e:
614
+ _LOGGER.error("Error in Reachy audio processing: %s", e)
615
+ time.sleep(0.1)
616
+
617
+ def _audio_loop_fallback(self, ctx: AudioProcessingContext) -> None:
618
+ """Audio loop using system microphone (fallback)."""
619
+ import sounddevice as sd
620
+
621
+ block_size = 1024
622
+
623
+ with sd.InputStream(
624
+ samplerate=16000,
625
+ channels=1,
626
+ blocksize=block_size,
627
+ dtype="float32",
628
+ ) as stream:
629
+ while self._running:
630
+ if not self._wait_for_satellite():
631
+ continue
632
+
633
+ self._update_wake_words_list(ctx)
634
+
635
+ # Get audio from system microphone
636
+ audio_chunk_array, overflowed = stream.read(block_size)
637
+ if overflowed:
638
+ _LOGGER.warning("Audio buffer overflow")
639
+
640
+ audio_chunk_array = audio_chunk_array.reshape(-1)
641
+ audio_chunk = self._convert_to_pcm(audio_chunk_array)
642
+
643
+ self._process_audio_chunk(ctx, audio_chunk)
644
+
645
+ def _wait_for_satellite(self) -> bool:
646
+ """Wait for satellite connection. Returns True if connected."""
647
+ if self._state is None or self._state.satellite is None:
648
+ time.sleep(0.1)
649
+ return False
650
+ return True
651
+
652
+ def _update_wake_words_list(self, ctx: AudioProcessingContext) -> None:
653
+ """Update wake words list if changed."""
654
+ from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
655
+ from pymicro_wakeword import MicroWakeWordFeatures
656
+
657
+ if (not ctx.wake_words) or (self._state.wake_words_changed and self._state.wake_words):
658
+ self._state.wake_words_changed = False
659
+ ctx.wake_words.clear()
660
+
661
+ # Reset feature extractors to clear any residual audio data
662
+ # This prevents false triggers when switching wake words
663
+ ctx.micro_features = MicroWakeWordFeatures()
664
+ ctx.micro_inputs.clear()
665
+ if ctx.oww_features is not None:
666
+ ctx.oww_features = OpenWakeWordFeatures.from_builtin()
667
+ ctx.oww_inputs.clear()
668
+
669
+ # Also reset the refractory period to prevent immediate trigger
670
+ ctx.last_active = time.monotonic()
671
+
672
+ # state.wake_words is Dict[str, MicroWakeWord/OpenWakeWord]
673
+ # We need to filter by active_wake_words (which contains the IDs/keys)
674
+ for ww_id, ww_model in self._state.wake_words.items():
675
+ if ww_id in self._state.active_wake_words:
676
+ # Ensure the model has an 'id' attribute for later use
677
+ if not hasattr(ww_model, 'id'):
678
+ setattr(ww_model, 'id', ww_id)
679
+ ctx.wake_words.append(ww_model)
680
+
681
+ ctx.has_oww = any(isinstance(ww, OpenWakeWord) for ww in ctx.wake_words)
682
+ if ctx.has_oww and ctx.oww_features is None:
683
+ ctx.oww_features = OpenWakeWordFeatures.from_builtin()
684
+
685
+ _LOGGER.info("Active wake words updated: %s (features reset)", list(self._state.active_wake_words))
686
+
687
+ def _get_reachy_audio_chunk(self) -> Optional[bytes]:
688
+ """Get fixed-size audio chunk from Reachy Mini's microphone.
689
+
690
+ Returns exactly AUDIO_BLOCK_SIZE samples each time, buffering
691
+ internally to ensure consistent chunk sizes for streaming.
692
+
693
+ Returns:
694
+ PCM audio bytes of fixed size, or None if not enough data.
695
+ """
696
+ # Get new audio data from SDK
697
+ audio_data = self.reachy_mini.media.get_audio_sample()
698
+
699
+ # Append new data to buffer if valid
700
+ if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
701
+ try:
702
+ if audio_data.dtype.kind not in ('S', 'U', 'O', 'V', 'b'):
703
+ if audio_data.dtype != np.float32:
704
+ audio_data = np.asarray(audio_data, dtype=np.float32)
705
+
706
+ # Convert stereo to mono
707
+ if audio_data.ndim == 2 and audio_data.shape[1] == 2:
708
+ audio_data = audio_data.mean(axis=1)
709
+ elif audio_data.ndim == 2:
710
+ audio_data = audio_data[:, 0].copy()
711
+
712
+ if audio_data.ndim == 1:
713
+ self._audio_buffer = np.concatenate([self._audio_buffer, audio_data])
714
+ except (TypeError, ValueError):
715
+ pass
716
+
717
+ # Return fixed-size chunk if we have enough data
718
+ if len(self._audio_buffer) >= AUDIO_BLOCK_SIZE:
719
+ chunk = self._audio_buffer[:AUDIO_BLOCK_SIZE]
720
+ self._audio_buffer = self._audio_buffer[AUDIO_BLOCK_SIZE:]
721
+ return self._convert_to_pcm(chunk)
722
+
723
+ return None
724
+
725
+ def _convert_to_pcm(self, audio_chunk_array: np.ndarray) -> bytes:
726
+ """Convert float32 audio array to 16-bit PCM bytes."""
727
+ return (
728
+ (np.clip(audio_chunk_array, -1.0, 1.0) * 32767.0)
729
+ .astype("<i2")
730
+ .tobytes()
731
+ )
732
+
733
+ def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
734
+ """Process an audio chunk for wake word detection.
735
+
736
+ Following reference project pattern: always process wake words.
737
+ Refractory period prevents duplicate triggers.
738
+
739
+ Args:
740
+ ctx: Audio processing context
741
+ audio_chunk: PCM audio bytes
742
+ """
743
+ # Stream audio to Home Assistant
744
+ self._state.satellite.handle_audio(audio_chunk)
745
+
746
+ # Process wake word features
747
+ self._process_features(ctx, audio_chunk)
748
+
749
+ # Detect wake words
750
+ self._detect_wake_words(ctx)
751
+
752
+ # Detect stop word
753
+ self._detect_stop_word(ctx)
754
+
755
+ def _process_features(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
756
+ """Process audio features for wake word detection."""
757
+ ctx.micro_inputs.clear()
758
+ ctx.micro_inputs.extend(ctx.micro_features.process_streaming(audio_chunk))
759
+
760
+ if ctx.has_oww and ctx.oww_features is not None:
761
+ ctx.oww_inputs.clear()
762
+ ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
763
+
764
+ def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
765
+ """Detect wake words in the processed audio features.
766
+
767
+ Uses refractory period to prevent duplicate triggers.
768
+ Following reference project pattern.
769
+ """
770
+ from pymicro_wakeword import MicroWakeWord
771
+ from pyopen_wakeword import OpenWakeWord
772
+
773
+ for wake_word in ctx.wake_words:
774
+ activated = False
775
+
776
+ if isinstance(wake_word, MicroWakeWord):
777
+ for micro_input in ctx.micro_inputs:
778
+ if wake_word.process_streaming(micro_input):
779
+ activated = True
780
+ elif isinstance(wake_word, OpenWakeWord):
781
+ for oww_input in ctx.oww_inputs:
782
+ for prob in wake_word.process_streaming(oww_input):
783
+ if prob > 0.5:
784
+ activated = True
785
+
786
+ if activated:
787
+ # Check refractory period to prevent duplicate triggers
788
+ now = time.monotonic()
789
+ if (ctx.last_active is None) or (
790
+ (now - ctx.last_active) > self._state.refractory_seconds
791
+ ):
792
+ _LOGGER.info("Wake word detected: %s", wake_word.id)
793
+ self._state.satellite.wakeup(wake_word)
794
+ # Face tracking will handle looking at user automatically
795
+ self._motion.on_wakeup()
796
+ ctx.last_active = now
797
+
798
+ def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
799
+ """Detect stop word in the processed audio features."""
800
+ if not self._state.stop_word:
801
+ return
802
+
803
+ stopped = False
804
+ for micro_input in ctx.micro_inputs:
805
+ if self._state.stop_word.process_streaming(micro_input):
806
+ stopped = True
807
+
808
+ if stopped and (self._state.stop_word.id in self._state.active_wake_words):
809
+ _LOGGER.info("Stop word detected")
810
+ self._state.satellite.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/alexa.json RENAMED
File without changes