Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

App Files Files Community

fix: set python version to work with default env

by Domotick - opened Jan 16

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+7939

-21069

This view is limited to 50 files because it contains too many changes. See the raw diff here.

Files changed (50) hide show

.claude/settings.local.json +9 -38
.gitattributes +6 -3
.github/dependabot.yml +0 -13
.github/workflows/auto_release_on_version.yml +0 -86
.github/workflows/sync_to_hf.yml +0 -36
.gitignore +5 -12
.pre-commit-config.yaml +0 -20
CHANGELOG.md +0 -581
Project_Summary.md → PROJECT_PLAN.md +0 -0
README.md +1 -0
changelog.json +0 -222
docs/USER_MANUAL_CN.md +0 -256
docs/USER_MANUAL_EN.md +0 -256
home_assistant_blueprints/reachy_mini_presence_companion.yaml +0 -288
index.html +32 -94
pyproject.toml +18 -129
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py +24 -29
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py +21 -29
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py +42 -121
reachy_mini_ha_voice/animations/conversation_animations.json +87 -0
{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py +9 -24
{reachy_mini_home_assistant/audio → reachy_mini_ha_voice}/audio_player.py +130 -770
{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/camera_server.py +842 -1042
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py +45 -30
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py +16 -19
reachy_mini_ha_voice/entity_registry.py +945 -0
reachy_mini_ha_voice/gesture_detector.py +183 -0
{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py +48 -158
{reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py +75 -74
reachy_mini_ha_voice/models.py +95 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx +0 -0
reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py +16 -12
reachy_mini_ha_voice/movement_manager.py +861 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py +869 -1061
{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/satellite.py +784 -1051
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac +0 -0
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py +27 -36
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css +0 -0
reachy_mini_ha_voice/util.py +45 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/voice_assistant.py +810 -1314
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/alexa.json +0 -0

.claude/settings.local.json CHANGED Viewed

@@ -3,53 +3,24 @@
   "includeCoAuthoredBy": false,
   "permissions": {
     "allow": [
-      "Bash",
-      "BashOutput",
       "Edit",
-      "Glob",
-      "Grep",
-      "KillShell",
-      "NotebookEdit",
-      "Read",
-      "SlashCommand",
-      "Task",
-      "TodoWrite",
-      "WebFetch",
-      "WebSearch",
-      "Write",
-      "mcp__ide",
-      "mcp__exa",
-      "mcp__context7",
-      "mcp__mcp-deepwiki",
-      "mcp__Playwright",
-      "mcp__spec-workflow",
-      "mcp__open-websearch",
-      "mcp__serena",
-      "All",
-      "Bash(copy:*)",
-      "mcp__zread__search_doc",
-      "mcp__zread__read_file",
       "Bash(cd:*)",
-      "Bash(ls:*)",
-      "Bash(find:*)",
-      "mcp__acp__Bash",
-      "Skill(commit-commands:commit)",
-      "Skill(commit-commands:commit:*)"
     ],
     "deny": [],
     "ask": []
   },
-  "model": "opus",
   "hooks": {},
   "statusLine": {
     "type": "command",
     "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
     "padding": 0
   },
-  "enabledPlugins": {
-    "glm-plan-usage@zai-coding-plugins": true,
-    "glm-plan-bug@zai-coding-plugins": true
-  },
-  "outputStyle": "Explanatory",
-  "alwaysThinkingEnabled": true
-}

   "includeCoAuthoredBy": false,
   "permissions": {
     "allow": [
+      "SlashCommand(/zcf:git-commit)",
       "Edit",
       "Bash(cd:*)",
+      "SlashCommand(/zcf:git-commit --emoji)",
+      "SlashCommand(/zcf:git-commit:*)",
+      "Bash(git:*)",
+      "Bash(ls:*)"
     ],
     "deny": [],
     "ask": []
   },
   "hooks": {},
+  "alwaysThinkingEnabled": true,
+  "outputStyle": "default",
   "statusLine": {
     "type": "command",
     "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
     "padding": 0
   },
+  "model": "opus"
+}

.gitattributes CHANGED Viewed

@@ -1,5 +1,8 @@
-# LFS tracking for large binary files
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.flac filter=lfs diff=lfs merge=lfs -text

+reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
+reachy_mini_ha_voice/sounds/**/*.flac filter=lfs diff=lfs merge=lfs -text
+"reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
+reachy_mini_ha_voice/sounds/**/*.flac" filter=lfs diff=lfs merge=lfs -text
+"ha/assets/meshes/*.stl" filter=lfs diff=lfs merge=lfs -text
+"ha/assets/*.urdf" filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text

.github/dependabot.yml DELETED Viewed

@@ -1,13 +0,0 @@
-version: 2
-updates:
-  # Enable version updates for pip
-  - package-ecosystem: "pip"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-    # Ignore PyTorch updates - locked version required for compatibility
-    ignore:
-      - dependency-name: "torch"
-        versions: [">2.5.1"]
-      - dependency-name: "torchvision"
-        versions: [">0.20.1"]

.github/workflows/auto_release_on_version.yml DELETED Viewed

@@ -1,86 +0,0 @@
-name: Auto Release On Version Change
-on:
-  push:
-    branches: [develop, main]
-    paths:
-      - pyproject.toml
-      - changelog.json
-permissions:
-  contents: write
-jobs:
-  release:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Extract version from pyproject
-        id: version
-        run: |
-          python - <<'PY2'
-          import os
-          import tomllib
-          from pathlib import Path
-          data = tomllib.loads(Path('pyproject.toml').read_text(encoding='utf-8'))
-          version = data['project']['version']
-          with open(os.environ['GITHUB_OUTPUT'], 'a', encoding='utf-8') as f:
-              f.write(f"version={version}\\n")
-          print(f"Detected version: {version}")
-          PY2
-      - name: Check if tag exists
-        id: tag_check
-        run: |
-          git fetch --tags --force
-          VERSION="${{ steps.version.outputs.version }}"
-          if git rev-parse -q --verify "refs/tags/v${VERSION}" >/dev/null; then
-            echo "should_release=false" >> "$GITHUB_OUTPUT"
-            echo "Tag v${VERSION} already exists; skip release."
-          else
-            echo "should_release=true" >> "$GITHUB_OUTPUT"
-            echo "Tag v${VERSION} does not exist; release will be created."
-          fi
-      - name: Build release notes from changelog
-        if: steps.tag_check.outputs.should_release == 'true'
-        env:
-          VERSION: ${{ steps.version.outputs.version }}
-        run: |
-          python - <<'PY2'
-          import json
-          import os
-          from pathlib import Path
-          version = os.environ['VERSION']
-          changelog = json.loads(Path('changelog.json').read_text(encoding='utf-8'))
-          entry = next((x for x in changelog if x.get('version') == version), None)
-          lines = [f"## v{version}", ""]
-          if entry is None:
-              lines.append("No changelog entry found for this version.")
-          else:
-              date = entry.get('date')
-              if date:
-                  lines.append(f"Date: {date}")
-                  lines.append("")
-              for change in entry.get('changes', []):
-                  lines.append(f"- {change}")
-          Path('release_notes.md').write_text('\\n'.join(lines) + '\\n', encoding='utf-8')
-          PY2
-      - name: Create GitHub release
-        if: steps.tag_check.outputs.should_release == 'true'
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: v${{ steps.version.outputs.version }}
-          name: v${{ steps.version.outputs.version }}
-          body_path: release_notes.md
-          generate_release_notes: false

.github/workflows/sync_to_hf.yml DELETED Viewed

@@ -1,36 +0,0 @@
-name: Sync to Hugging Face
-on:
-  push:
-    branches: [main]
-  workflow_dispatch:
-jobs:
-  sync:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout GitHub repo
-        uses: actions/checkout@v4
-        with:
-          lfs: true
-      - name: Create fresh commit and push to Hugging Face
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: |
-          git config --global user.email "action@github.com"
-          git config --global user.name "GitHub Action"
-          # Create a new orphan branch with no history
-          git checkout --orphan hf-sync
-          git add -A
-          git commit -m "Fresh sync: $(date +%Y-%m-%d\ %H:%M:%S)"
-          # Add Hugging Face remote
-          git remote add hf https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant
-          # Push LFS objects first
-          git lfs push hf hf-sync --all
-          # Force push as main to HF (overwrites all history)
-          git push hf hf-sync:main --force

.gitignore CHANGED Viewed

@@ -39,8 +39,6 @@ env/
 .spec-workflow/
 .playwright-mcp/
 *~
-CLAUDE.md
-commit_msg.txt
 # Configuration
 config.json
@@ -65,19 +63,14 @@ htmlcov/
 !reachy_mini_ha_voice/sounds/*.flac
 # Models (exclude package bundled files)
-# models/ - ignore external models directory
 models/
-# Package bundled models
-!reachy_mini_ha_voice/models/
-reachy_mini_ha_voice/models/*.tflite
-reachy_mini_ha_voice/models/*.onnx
-reachy_mini_ha_voice/models/*.pt
 # SDK Reference (local development only)
 reference/
-local/
 # ha/ - temporarily commented out for path fixes
 # ha/ will be moved to separate repository soon
-# Temporary check scripts
-temp_check_scripts/

 .spec-workflow/
 .playwright-mcp/
 *~
 # Configuration
 config.json
 !reachy_mini_ha_voice/sounds/*.flac
 # Models (exclude package bundled files)
 models/
+# *.tflite - bundled in package
+!reachy_mini_ha_voice/wakewords/*.tflite
+!reachy_mini_ha_voice/wakewords/**/*.tflite
+*.onnx
+!reachy_mini_ha_voice/models/*.onnx
 # SDK Reference (local development only)
 reference/
 # ha/ - temporarily commented out for path fixes
 # ha/ will be moved to separate repository soon

.pre-commit-config.yaml DELETED Viewed

@@ -1,20 +0,0 @@
-# Pre-commit hooks for code quality
-# Install: pip install pre-commit && pre-commit install
-# Run manually: pre-commit run --all-files
-repos:
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
-    hooks:
-      - id: ruff
-        args: [--fix]
-      - id: ruff-format
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.14.1
-    hooks:
-      - id: mypy
-        additional_dependencies: []
-        args: [--ignore-missing-imports]
-        # Only check changed files for speed
-        pass_filenames: true

CHANGELOG.md DELETED Viewed

@@ -1,581 +0,0 @@
-# Changelog
-All notable changes to the Reachy Mini HA Voice project will be documented in this file.
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [Unreleased]
-### Fixed
-- **NameError** - Add missing deque import in gesture smoother
-- **Syntax Error** - Add missing class indentation for volume methods in audio_player.py
-- **Audio Card Name Detection** - Use SDK's detection logic instead of hardcoded values
-- **SDK Port 8000 Blocking** - Use amixer directly for volume control to avoid SDK HTTP API blocking
-- **Memory Leak Root Cause** - Audio buffer array creation in loop causing unbounded memory growth
-- **Indentation Error** - Fix indentation in audio_player.py stop_sendspin method
-## [0.9.9] - 2026-01-28
-### Fixed
-- **SDK Buffer Overflow During Idle**
-  - Add SDK buffer flush on GStreamer lock timeout
-  - Prevents buffer overflow during long idle periods when lock contention prevents buffer drainage
-  - Audio thread flushes SDK audio buffer when lock acquisition times out
-  - Camera thread flushes SDK video buffer when lock acquisition times out
-  - Audio playback flushes SDK playback buffer when lock acquisition times out
-  - Resolves SDK crashes during extended wake-up idle periods without conversation
-  - Requires Reachy Mini hardware (not applicable to simulation mode)
-### Fixed
-- **Memory Leaks**
-  - Audio buffer memory leak - added size limit to prevent unbounded growth
-  - Temp file leak - downloaded audio files now cleaned up after playback
-  - Multiple memory leak and resource leak issues fixed
-  - Thread-safe draining flag using threading.Event
-  - Silent failures now logged for debugging
-### Optimized
-- **Gesture Recognition Sensitivity**
-  - Simplify GestureSmoother to frequency-based confirmation (1 frame)
-  - Remove all confidence filtering - return all detections to Home Assistant
-  - Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)
-  - Remove duplicate empty check in gesture detection
-  - Add GestureSmoother class with history tracking for stable output
-  - Reduce gesture detection interval from 3 frames to 1 frame for higher frequency
-  - Fix: Gesture detection now returns all detected hands instead of only the highest confidence one
-  - Matches reference implementation behavior for improved detection rate
-  - No conflicts with face tracking (shared frame, independent processing)
-### Code Quality
-- Fix Ruff linter issues (import ordering, missing newlines, __all__ sorting)
-- Format code with Ruff formatter (5 files reformatted)
-- Fix slice index error in gesture detection (convert coordinates to integers)
-- Fix Python 3.12 type annotation compatibility
-## [0.9.8] - 2026-01-27
-### New
-- Mute switch entity - suspends voice services only (not camera/motion)
-- Disable Camera switch entity - suspends camera and AI processing
-- Home Assistant connection-driven feature loading
-- Automatic suspend/resume on HA disconnect/reconnect
-### Fixed
-- Camera disable logic - corrected inverted conditions for proper operation
-- Prevent daemon crash when entering idle state
-- Camera preview in Home Assistant
-- SDK crash during idle - optimized audio processing to skip get_frame() when not streaming to Home Assistant, reducing GStreamer resource competition
-- Add GStreamer threading lock to prevent pipeline competition between audio, playback, and camera threads
-- Audio thread gets priority during conversations - bypasses lock when conversation is active
-- Remove GStreamer lock to fix wake word detection in idle state (lock was preventing wake word detection)
-### Optimized
-- Reduce log output by 30-40%
-- Bundle face tracking model with package - eliminated HuggingFace download dependency, removed huggingface_hub from requirements, models now load from local package directory for offline operation
-- Replace HTTP API polling with SDK Zenoh for daemon status monitoring to reduce uvicorn blocking and improve stability
-- Device ID now reads /etc/machine-id directly - removed uuid.getnode() and file persistence
-- Implement high-priority SDK improvements
-- Remove aiohttp dependency from daemon_monitor - fully migrated to SDK Zenoh
-### Removed
-- Temporarily disable emotion playback during TTS
-- Unused config items (connection_timeout)
-### Code Quality
-- Code quality improvements
-## [0.9.7] - 2026-01-20
-### Fixed
-- Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)
-- Animation file path corrected (was looking in wrong directory)
-- Remove hey_jarvis from required wake words (it's optional in openWakeWord/)
-## [0.9.6] - 2026-01-20
-### New
-- Add ruff linter/formatter and mypy type checker configuration
-- Add pre-commit hooks for automated code quality checks
-### Fixed
-- Remove duplicate resume() method in audio_player.py
-- Remove duplicate connection_lost() method in satellite.py
-- Store asyncio task reference in sleep_manager.py to prevent garbage collection
-### Optimized
-- Use dict.items() for efficient iteration in smoothing.py
-## [0.9.5] - 2026-01-19
-### Refactored
-- Modularize codebase - new core/motion/vision/audio/entities module structure
-- Remove legacy/compatibility code
-- Remove audio diagnostics debug code
-### New
-- Direct callbacks for HA sleep/wake buttons to suspend/resume services
-### Optimized
-- Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms → 16ms)
-- Audio loop delay reduced from 10ms to 1ms for faster VAD response
-- Stereo to mono conversion uses first channel instead of mean for cleaner signal
-### Improved
-- Camera resume_from_suspend now synchronous for reliable wake from sleep
-- Rotation clamping in face tracking to prevent IK collisions
-- Audio gain boosted for faster VAD detection
-- Audio NaN/Inf values causing STT issues fixed
-## [0.9.0] - 2026-01-18
-### New
-- Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect
-- System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors
-- Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)
-### Fixed
-- Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam
-### Improved
-- Graceful service lifecycle management with RobotStateMonitor callbacks
-## [0.8.7] - 2026-01-18
-### Fixed
-- Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback
-- Emotion moves and face tracking now respect SDK safety limits
-### Improved
-- Face tracking smoothness - removed EMA smoothing (matches reference project)
-- Face tracking timing updated to match reference (2s delay, 1s interpolation)
-## [0.8.6] - 2026-01-18
-### Fixed
-- Audio buffer memory leak - added size limit to prevent unbounded growth
-- Temp file leak - downloaded audio files now cleaned up after playback
-- Camera thread termination timeout increased for clean shutdown
-- Thread-safe draining flag using threading.Event
-- Silent failures now logged for debugging
-## [0.8.5] - 2026-01-18
-### Fixed
-- DOA turn-to-sound direction inverted - now turns correctly toward sound source
-- Graceful shutdown prevents daemon crash on app stop
-## [0.8.4] - 2026-01-18
-### Improved
-- Smooth idle animation with interpolation phase (matches reference BreathingMove)
-- Two-phase animation - interpolates to neutral before oscillation
-- Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway
-## [0.8.3] - 2026-01-18
-### Fixed
-- Body now properly follows head rotation during face tracking
-- body_yaw extracted from final head pose matrix and synced with head_yaw
-- Matches reference project sweep_look behavior for natural body movement
-## [0.8.2] - 2026-01-18
-### Fixed
-- Body follows head rotation during face tracking - body_yaw syncs with head_yaw
-- Matches reference project sweep_look behavior for natural body movement
-## [0.8.1] - 2026-01-18
-### Fixed
-- face_detected entity now pushes state updates to Home Assistant in real-time
-- Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention
-- Idle animation now starts immediately on app launch
-- Smooth antenna animation - removed pose change threshold for continuous motion
-## [0.8.0] - 2026-01-17
-### New
-- Comprehensive emotion keyword mapping with 280+ Chinese and English keywords
-- 35 emotion categories mapped to robot expressions
-- Auto-trigger expressions from conversation text patterns
-## [0.7.3] - 2026-01-12
-### Fixed
-- Revert to reference project pattern - use refractory period instead of state flags
-- Remove broken _in_pipeline and _tts_playing state management
-- Restore correct RUN_END event handling from linux-voice-assistant
-## [0.7.2] - 2026-01-12
-### Fixed
-- Remove premature _tts_played reset in RUN_END event
-- Ensure _in_pipeline stays True until TTS playback completes
-## [0.7.1] - 2026-01-12
-### Fixed
-- Prevent wake word detection during TTS playback
-- Add _tts_playing flag to track TTS audio state precisely
-## [0.7.0] - 2026-01-12
-### New
-- Gesture detection using HaGRID ONNX models (18 gesture classes)
-- gesture_detected and gesture_confidence entities in Home Assistant
-### Fixed
-- Gesture state now properly pushed to Home Assistant in real-time
-### Optimized
-- Aggressive power saving - 0.5fps idle mode after 30s without face
-- Gesture detection only runs when face detected (saves CPU)
-## [0.6.1] - 2026-01-12
-### Fixed
-- Prioritize MicroWakeWord over OpenWakeWord for same-name wake words
-- OpenWakeWord wake words now visible in Home Assistant selection
-- Stop word detection now works correctly
-- STT/LLM response time improved with fixed audio chunk size
-## [0.6.0] - 2026-01-11
-### New
-- Real-time audio-driven speech animation (SwayRollRT algorithm)
-- JSON-driven animation system - all animations configurable
-### Refactored
-- Remove hardcoded actions, use animation offsets only
-### Fixed
-- TTS audio analysis now works with local playback
-## [0.5.16] - 2026-01-11
-### Removed
-- Tap-to-wake feature (too many false triggers)
-### New
-- Continuous Conversation switch in Home Assistant
-### Refactored
-- Simplified satellite.py and voice_assistant.py
-## [0.5.15] - 2026-01-11
-### New
-- Audio settings persistence (AGC, Noise Suppression, Tap Sensitivity)
-### Refactored
-- Move Sendspin mDNS discovery to zeroconf.py
-### Fixed
-- Tap detection not re-enabled during emotion playback in conversation
-## [0.5.14] - 2026-01-11
-### Fixed
-- Skip ALL wake word processing when pipeline is active
-- Eliminate race condition in pipeline state during continuous conversation
-### Improved
-- Control loop increased to 100Hz (daemon updated)
-## [0.5.13] - 2026-01-10
-### New
-- JSON-driven animation system for conversation states
-- AnimationPlayer class inspired by SimpleDances project
-### Refactored
-- Replace SpeechSwayGenerator and BreathingAnimation with unified animation system
-## [0.5.12] - 2026-01-10
-### Removed
-- Deleted broken hey_reachy wake word model
-### Revert
-- Default wake word back to "Okay Nabu"
-## [0.5.11] - 2026-01-10
-### Fixed
-- Reset feature extractors when switching wake words
-- Add refractory period after wake word switch
-## [0.5.10] - 2026-01-10
-### Fixed
-- Wake word models now have 'id' attribute set correctly
-- Wake word switching from Home Assistant now works
-## [0.5.9] - 2026-01-10
-### New
-- Default wake word changed to hey_reachy
-### Fixed
-- Wake word switching bug
-## [0.5.8] - 2026-01-09
-### Fixed
-- Tap detection waits for emotion playback to complete
-- Poll daemon API for move completion
-## [0.5.7] - 2026-01-09
-### New
-- DOA turn-to-sound at wakeup
-### Fixed
-- Show raw DOA angle in Home Assistant (0-180)
-- Invert DOA yaw direction
-## [0.5.6] - 2026-01-08
-### Fixed
-- Better pipeline state tracking to prevent duplicate audio
-## [0.5.5] - 2026-01-08
-### New
-- Prevent concurrent pipelines
-- Add prompt sound for continuous conversation
-## [0.5.4] - 2026-01-08
-### Fixed
-- Wait for RUN_END before starting new conversation
-## [0.5.3] - 2026-01-08
-### Fixed
-- Improve continuous conversation with conversation_id tracking
-## [0.5.2] - 2026-01-08
-### Fixed
-- Enable HA control of robot pose
-- Continuous conversation improvements
-## [0.5.1] - 2026-01-08
-### Fixed
-- Sendspin connects to music_player instead of tts_player
-- Persist tap_sensitivity settings
-- Pause Sendspin during voice assistant wakeup
-- Sendspin prioritize 16kHz sample rate
-## [0.5.0] - 2026-01-07
-### New
-- Face tracking with adaptive frequency
-- Sendspin multi-room audio integration
-### Optimized
-- Shutdown mechanism improvements
-## [0.4.0] - 2026-01-07
-### Fixed
-- Daemon stability fixes
-### New
-- Face tracking enabled by default
-### Optimized
-- Microphone settings for better sensitivity
-## [0.3.0] - 2026-01-06
-### New
-- Tap sensitivity slider entity
-### Fixed
-- Music Assistant compatibility
-### Optimized
-- Face tracking and tap detection
-## [0.2.21] - 2026-01-06
-### Fixed
-- Daemon crash - reduce control loop to 2Hz
-- Pause control loop during audio playback
-## [0.2.20] - 2026-01-06
-### Revert
-- Audio/satellite/voice_assistant to v0.2.9 working state
-## [0.2.19] - 2026-01-06
-### Fixed
-- Force localhost connection mode to prevent WebRTC errors
-## [0.2.18] - 2026-01-06
-### Fixed
-- Audio playback - restore wakeup sound
-- Use push_audio_sample for TTS
-## [0.2.17] - 2026-01-06
-### Removed
-- head_joints/passive_joints entities
-- error_message to diagnostic category
-## [0.2.16] - 2026-01-06
-### Fixed
-- TTS playback - pause recording during playback
-## [0.2.15] - 2026-01-06
-### Fixed
-- Use play_sound() instead of push_audio_sample() for TTS
-## [0.2.14] - 2026-01-06
-### Fixed
-- Pause audio recording during TTS playback
-## [0.2.13] - 2026-01-06
-### Fixed
-- Don't manually start/stop media - let SDK/daemon manage it
-## [0.2.12] - 2026-01-05
-### Fixed
-- Disable breathing animation to prevent serial port overflow
-## [0.2.11] - 2026-01-05
-### Fixed
-- Disable wakeup sound to prevent daemon crash
-- Add debug logging for troubleshooting
-## [0.2.10] - 2026-01-05
-### Added
-- Debug logging for motion init
-### Fixed
-- Audio fallback samplerate
-## [0.2.9] - 2026-01-05
-### Removed
-- DOA/speech detection - replaced by face tracking
-## [0.2.8] - 2026-01-05
-### New
-- Replace DOA with YOLO face tracking
-## [0.2.7] - 2026-01-05
-### Fixed
-- Add DOA caching to prevent ReSpeaker query overload
-## [0.2.6] - 2026-01-05
-### New
-- Thread-safe ReSpeaker USB access to prevent daemon deadlock
-## [0.2.4] - 2026-01-05
-### Fixed
-- Microphone volume control via daemon HTTP API
-## [0.2.3] - 2026-01-05
-### Fixed
-- Daemon crash caused by conflicting pose commands
-- Disable: Pose setter methods in ReachyController
-## [0.2.2] - 2026-01-05
-### Fixed
-- Second conversation motion failure
-- Reduce: Control loop from 20Hz to 10Hz
-- Improve: Connection recovery (faster reconnect)
-## [0.2.1] - 2026-01-05
-### Fixed
-- Daemon crash issue
-- Optimize: Code structure
-## [0.2.0] - 2026-01-05
-### New
-- Automatic facial expressions during conversation
-- New: Emotion playback integration
-### Refactored
-- Integrate emotion playback into MovementManager
-## [0.1.5] - 2026-01-04
-### Optimized
-- Code splitting and organization
-### Fixed
-- Program crash issues
-## [0.1.0] - 2026-01-01
-### New
-- Initial release
-- ESPHome protocol server implementation
-- mDNS auto-discovery for Home Assistant
-- Local wake word detection (microWakeWord)
-- Voice assistant pipeline integration
-- Basic motion feedback (nod, shake)
----
-## Version History Summary
-| Version | Date | Major Changes |
-|---------|------|--------------|
-| 0.9.9 | 2026-01-28 | SDK buffer overflow fixes, memory leak fixes, gesture detection optimization |
-| 0.9.8 | 2026-01-27 | Mute/Disable entities, HA connection-driven features, log reduction |
-| 0.9.7 | 2026-01-20 | Device ID path fix, animation path fix |
-| 0.9.6 | 2026-01-20 | Code quality tools (ruff, mypy, pre-commit) |
-| 0.9.5 | 2026-01-19 | Modular architecture refactoring, audio latency optimization |
-| 0.9.0 | 2026-01-18 | Robot state monitor, system diagnostics entities |
-| 0.8.7 | 2026-01-18 | Body yaw clamping, face tracking smoothness |
-| 0.8.0 | 2026-01-17 | Emotion keyword mapping (280+ keywords, 35 categories) |
-| 0.7.0 | 2026-01-12 | Gesture detection with HaGRID ONNX models (18 gestures) |
-| 0.6.0 | 2026-01-11 | Real-time audio-driven speech animation, JSON animation system |
-| 0.5.0 | 2026-01-07 | Face tracking, Sendspin multi-room audio |
-| 0.4.0 | 2026-01-07 | Daemon stability, microphone optimization |
-| 0.3.0 | 2026-01-06 | Tap sensitivity slider |
-| 0.2.0 | 2026-01-05 | Emotion playback integration |
-| 0.1.0 | 2026-01-01 | Initial release |
-## Project Statistics
-- **Total Versions**: 29 (from 0.1.0 to 0.9.9)
-- **Development Period**: ~30 days (2026-01-01 to 2026-01-28)
-- **Average Release Rate**: ~1 version per day
-- **Lines of Code**: ~18,000 lines across 52 Python files
-- **ESPHome Entities**: 54 entities implemented
-- **Supported Features**:
-  - Voice assistant pipeline integration
-  - Local wake word detection (multiple models)
-  - Face tracking with YOLO
-  - Gesture detection (18 classes)
-  - Multi-room audio (Sendspin)
-  - Real-time speech animation
-  - Emotion keyword detection (280+ keywords)
-  - System diagnostics
-For detailed implementation notes, see [PROJECT_PLAN.md](./PROJECT_PLAN.md).

Project_Summary.md → PROJECT_PLAN.md RENAMED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -9,6 +9,7 @@ short_description: Deep integration of Reachy Mini robot with Home Assistant
 tags:
   - reachy_mini
   - reachy_mini_python_app
   - reachy_mini_home_assistant
   - home_assistant
   - homeassistant

 tags:
   - reachy_mini
   - reachy_mini_python_app
+  - reachy_mini_ha_voice
   - reachy_mini_home_assistant
   - home_assistant
   - homeassistant

changelog.json CHANGED Viewed

@@ -1,225 +1,4 @@
 [
-  {
-    "version": "1.0.3",
-    "date": "2026-03-07",
-    "changes": [
-      "Build: Bump package version to 1.0.3",
-      "New: Add Idle Random Actions switch in Home Assistant with preferences persistence and startup restore",
-      "New: Add configurable idle_random_actions action presets in conversation_animations.json for centralized idle motion tuning",
-      "Fix: Remove duplicate idle_random_actions fields/methods and complete runtime control wiring in controller/entity registry/movement manager",
-      "Improve: Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion",
-      "Optimize: Remove set_target global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick",
-      "Optimize: Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness"
-    ]
-  },
-  {
-    "version": "1.0.2",
-    "date": "2026-03-06",
-    "changes": [
-      "Build: Bump package version to 1.0.2",
-      "Fix: Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness",
-      "Fix: Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise",
-      "Fix: Switch sleep/wake control to daemon API (start/stop with wake_up/goto_sleep) so /api/daemon/status reflects real sleep state on SDK 1.5",
-      "Fix: Normalize daemon status parsing for SDK 1.5 object-based status responses",
-      "Fix: Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts",
-      "Change: Keep idle antenna behavior as animation-only control (no torque coupling)",
-      "Change: Tighten preference loading to current schema (no legacy config fallback filtering)",
-      "Fix: Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome",
-      "Fix: Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection",
-      "New: Add Home Assistant blueprint for Reachy presence companion automation",
-      "Improve: Blueprint supports device-first auto-binding and richer usage instructions",
-      "Docs: Refresh landing page (index.html) with current version, GitHub source link, and new Blueprint/Auto Release capability cards",
-      "New: Add GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag",
-      "Chore: Ignore local wiki workspace artifacts (local/) from repository tracking"
-    ]
-  },
-  {
-    "version": "1.0.1",
-    "date": "2026-03-05",
-    "changes": [
-      "Build: Bump package version to 1.0.1",
-      "Deps: Update runtime dependency baseline to reachy-mini>=1.5.0",
-      "Fix: Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility",
-      "Fix: Remove legacy ZError string matching from connection error handling",
-      "Fix: Adapt daemon status handling to SDK v1.5 DaemonStatus object (prevents AttributeError on status.get)",
-      "Fix: Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating",
-      "Fix: Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)",
-      "Fix: Improve TTS streaming robustness and reduce cutoffs with retry-based audio push",
-      "Optimize: Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)",
-      "Optimize: Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio"
-    ]
-  },
-  {
-    "version": "1.0.0",
-    "date": "2026-03-04",
-    "changes": [
-      "Build: Bump package version to 1.0.0",
-      "Deps: Require reachy-mini[gstreamer]>=1.4.1",
-      "Fix: Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)",
-      "Fix: Auto-match ONNX gesture input size from model shape to prevent INVALID_ARGUMENT dimension errors",
-      "New: Add Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)",
-      "New: Add Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)",
-      "New: Add Face Confidence number entity (0.0-1.0, persistent)",
-      "Optimize: Unload/reload face and gesture models when toggled off/on to save resources",
-      "Optimize: Idle behavior updated to breathing + look-around alternation, idle antenna sway disabled",
-      "Optimize: Adjust idle breathing to human-like cadence",
-      "Fix: Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise",
-      "Fix: Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)",
-      "Fix: Enforce deterministic audio startup path and fail fast when microphone capture is not ready",
-      "Optimize: Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)",
-      "Optimize: Keep face/gesture AI processing active even when stream viewers are absent",
-      "Fix: Add on-demand /snapshot JPEG generation when no cached stream frame is available",
-      "Change: Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps"
-    ]
-  },
-  {
-    "version": "0.9.9",
-    "date": "2026-01-28",
-    "changes": [
-      "Fix: Audio buffer overflow - require Reachy Mini hardware, use only Reachy microphone with 50ms sleep",
-      "Optimize: Gesture detection sensitivity - remove all confidence filtering, return all detections to Home Assistant",
-      "Optimize: Gesture detection now runs at 1 frame interval for maximum responsiveness",
-      "Refactor: Simplify GestureSmoother to frequency-based confirmation (1 frame)",
-      "Refactor: Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)",
-      "Fix: Remove duplicate empty check in gesture detection",
-      "Optimize: SDK integration - add MediaBackend detection and proper resource cleanup",
-      "Document: ReSpeaker private attribute access risk with TODO comments"
-    ]
-  },
-  {
-    "version": "0.9.8",
-    "date": "2026-01-27",
-    "changes": [
-      "New: Mute switch and Disable Camera entities for granular control",
-      "Fix: Camera disable logic and daemon crash prevention",
-      "New: Home Assistant connection-driven feature loading with auto suspend/resume",
-      "Optimize: Reduce log output by 30-40%",
-      "Fix: Code quality improvements",
-      "Fix: SDK crash during idle - optimize audio processing and add GStreamer threading lock",
-      "Optimize: Bundle face tracking model, use SDK Zenoh for daemon monitoring",
-      "Simplify: Device ID reads /etc/machine-id directly",
-      "Clean up: Remove unused config items"
-    ]
-  },
-  {
-    "version": "0.9.7",
-    "date": "2026-01-20",
-    "changes": [
-      "Fix: Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)",
-      "Fix: Animation file path corrected (was looking in wrong directory)",
-      "Fix: Remove hey_jarvis from required wake words (it's optional in openWakeWord/)"
-    ]
-  },
-  {
-    "version": "0.9.6",
-    "date": "2026-01-20",
-    "changes": [
-      "New: Add ruff linter/formatter and mypy type checker configuration",
-      "New: Add pre-commit hooks for automated code quality checks",
-      "Fix: Remove duplicate resume() method in audio_player.py",
-      "Fix: Remove duplicate connection_lost() method in satellite.py",
-      "Fix: Store asyncio task reference in sleep_manager.py to prevent garbage collection",
-      "Optimize: Use dict.items() for efficient iteration in smoothing.py"
-    ]
-  },
-  {
-    "version": "0.9.5",
-    "date": "2026-01-19",
-    "changes": [
-      "Refactor: Modularize codebase - new core/motion/vision/audio/entities module structure",
-      "New: Direct callbacks for HA sleep/wake buttons to suspend/resume services",
-      "Optimize: Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms �?16ms)",
-      "Optimize: Audio loop delay reduced from 10ms to 1ms for faster VAD response",
-      "Optimize: Stereo to mono conversion uses first channel instead of mean for cleaner signal",
-      "Improve: Camera resume_from_suspend now synchronous for reliable wake from sleep",
-      "Improve: Rotation clamping in face tracking to prevent IK collisions"
-    ]
-  },
-  {
-    "version": "0.9.0",
-    "date": "2026-01-18",
-    "changes": [
-      "New: Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect",
-      "New: System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors",
-      "New: Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)",
-      "Fix: Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam",
-      "Improve: Graceful service lifecycle management with RobotStateMonitor callbacks"
-    ]
-  },
-  {
-    "version": "0.8.7",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback",
-      "Fix: Emotion moves and face tracking now respect SDK safety limits",
-      "Improve: Face tracking smoothness - removed EMA smoothing (matches reference project)",
-      "Improve: Face tracking timing updated to match reference (2s delay, 1s interpolation)"
-    ]
-  },
-  {
-    "version": "0.8.6",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Audio buffer memory leak - added size limit to prevent unbounded growth",
-      "Fix: Temp file leak - downloaded audio files now cleaned up after playback",
-      "Fix: Camera thread termination timeout increased for clean shutdown",
-      "Fix: Thread-safe draining flag using threading.Event",
-      "Fix: Silent failures now logged for debugging"
-    ]
-  },
-  {
-    "version": "0.8.5",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: DOA turn-to-sound direction inverted - now turns correctly toward sound source",
-      "Fix: Graceful shutdown prevents daemon crash on app stop"
-    ]
-  },
-  {
-    "version": "0.8.4",
-    "date": "2026-01-18",
-    "changes": [
-      "Improve: Smooth idle animation with interpolation phase (matches reference BreathingMove)",
-      "Improve: Two-phase animation - interpolates to neutral before oscillation",
-      "Fix: Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway"
-    ]
-  },
-  {
-    "version": "0.8.3",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Body now properly follows head rotation during face tracking",
-      "Fix: body_yaw extracted from final head pose matrix and synced with head_yaw",
-      "Fix: Matches reference project sweep_look behavior for natural body movement"
-    ]
-  },
-  {
-    "version": "0.8.2",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Body now follows head rotation during face tracking - body_yaw syncs with head_yaw",
-      "Fix: Matches reference project sweep_look behavior for natural body movement"
-    ]
-  },
-  {
-    "version": "0.8.1",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: face_detected entity now pushes state updates to Home Assistant in real-time",
-      "Fix: Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention",
-      "Fix: Idle animation now starts immediately on app launch",
-      "Fix: Smooth antenna animation - removed pose change threshold for continuous motion"
-    ]
-  },
-  {
-    "version": "0.8.0",
-    "date": "2026-01-17",
-    "changes": [
-      "New: Comprehensive emotion keyword mapping with 280+ Chinese and English keywords",
-      "New: 35 emotion categories mapped to robot expressions",
-      "New: Auto-trigger expressions from conversation text patterns"
-    ]
-  },
   {
     "version": "0.7.3",
     "date": "2026-01-12",
@@ -614,4 +393,3 @@
     ]
   }
 ]

 [
   {
     "version": "0.7.3",
     "date": "2026-01-12",
     ]
   }
 ]

docs/USER_MANUAL_CN.md DELETED Viewed

@@ -1,256 +0,0 @@
-# Reachy Mini 语音助手 - 用户手册
-## 系统要求
-### 硬件
-- Reachy Mini 机器人（带 ReSpeaker XVF3800 麦克风）
-- WiFi 网络连接
-### 软件
-- Home Assistant（2024.1 或更高版本）
-- Home Assistant 中已启用 ESPHome 集成
----
-## 安装步骤
-### 第一步：安装应用
-从 Reachy Mini 应用商店安装 `reachy_mini_home_assistant`。
-### 第二步：启动应用
-应用将自动：
-- 在端口 6053 启动 ESPHome 服务器
-- 加载预打包的唤醒词模型
-- 通过 mDNS 注册以便自动发现
-- 如果网络上有 Sendspin 服务器则自动连接
-### 第三步：连接 Home Assistant
-**自动连接（推荐）：**
-Home Assistant 会通过 mDNS 自动发现 Reachy Mini。
-**手动连接：**
-1. 进入 设置 → 设备与服务
-2. 点击"添加集成"
-3. 选择"ESPHome"
-4. 输入机器人的 IP 地址和端口 6053
----
-## 功能介绍
-### 语音助手
-- **唤醒词检测**：说 "Okay Nabu" 激活（本地处理）
-- **停止词**：说 "Stop" 结束对话
-- **连续对话模式**：无需重复唤醒词即可持续对话
-- **语音识别/合成**：使用 Home Assistant 配置的语音引擎
-**支持的唤醒词：**
-- Okay Nabu（默认）
-- Hey Jarvis
-- Alexa
-- Hey Luna
-### 人脸追踪
-- 基于 YOLO 的人脸检测
-- 头部跟随检测到的人脸
-- 头部转动时身体随之旋转
-- 自适应帧率：活跃时 15fps，空闲时 2fps
-### 手势检测
-检测到的手势及机器人响应：
-| 手势 | 响应 |
-|------|------|
-| like（竖大拇指）| 开心情绪 |
-| dislike（拇指朝下）| 难过情绪 |
-| ok | 点头动画 |
-| peace（剪刀手）| 热情情绪 |
-| stop | 停止说话 |
-| call（打电话手势）| 开始聆听 |
-| palm（手掌）| 暂停动作 |
-| fist（握拳）| 愤怒情绪 |
-| one/two/three/four | 发送 HA 事件 |
-### 情绪响应
-机器人可播放 35 种不同情绪：
-- 基础：开心、难过、愤怒、恐惧、惊讶、厌恶
-- 扩展：大笑、爱慕、骄傲、感激、热情、好奇、惊叹、害羞、困惑、沉思、焦虑、害怕、沮丧、烦躁、狂怒、轻蔑、无聊、疲倦、精疲力竭、孤独、沮丧、顺从、不确定、不舒服
-### 音频功能
-- 扬声器音量控制（0-100%）
-- 麦克风音量控制（0-100%）
-- AGC 自动增益控制（0-40dB）
-- 噪声抑制（0-100%）
-- 回声消除（内置）
-### Sendspin 多房间音频
-- 通过 mDNS 自动发现 Sendspin 服务器
-- 同步多房间音频播放
-- Reachy Mini 作为 PLAYER 接收音频流
-- 语音对话时自动暂停
-- 无需用户配置
-### DOA 声源追踪
-- 声源方向检测
-- 唤醒时机器人转向声源
-- 可通过开关启用/禁用
----
-## Home Assistant 实体
-### 阶段 1：基础状态
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Daemon State | 文本传感器 | 机器人守护进程状态 |
-| Backend Ready | 二进制传感器 | 后端连接状态 |
-| Speaker Volume | 数值 (0-100%) | 扬声器音量控制 |
-### 阶段 2：电机控制
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Motors Enabled | 开关 | 电机电源开/关 |
-| Wake Up | 按钮 | 唤醒机器人 |
-| Go to Sleep | 按钮 | 使机器人睡眠 |
-| Sleep Mode | 二进制传感器 | 当前睡眠状态 |
-| Services Suspended | 二进制传感器 | ML 模型卸载状态 |
-### 阶段 3：姿态控制
-| 实体 | 类型 | 范围 |
-|------|------|------|
-| Head X/Y/Z | 数值 | ±50mm |
-| Head Roll/Pitch/Yaw | 数值 | ±40° |
-| Body Yaw | 数值 | ±160° |
-| Antenna Left/Right | 数值 | ±90° |
-### 阶段 4：注视控制
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Look At X/Y/Z | 数值 | 注视目标的世界坐标 |
-### 阶段 5：DOA（声源定位）
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| DOA Angle | 传感器 (°) | 声源方向 |
-| Speech Detected | 二进制传感器 | 语音活动检测 |
-| DOA Sound Tracking | 开关 | 启用/禁用 DOA 追踪 |
-### 阶段 6：诊断信息
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Control Loop Frequency | 传感器 (Hz) | 运动控制循环频率 |
-| SDK Version | 文本传感器 | Reachy Mini SDK 版本 |
-| Robot Name | 文本传感器 | 设备名称 |
-| Wireless Version | 二进制传感器 | 无线版本标志 |
-| Simulation Mode | 二进制传感器 | 仿真模式标志 |
-| WLAN IP | 文本传感器 | WiFi IP 地址 |
-| Error Message | 文本传感器 | 当前错误 |
-### 阶段 7：IMU 传感器（仅无线版本）
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| IMU Accel X/Y/Z | 传感器 (m/s²) | 加速度计 |
-| IMU Gyro X/Y/Z | 传感器 (rad/s) | 陀螺仪 |
-| IMU Temperature | 传感器 (°C) | IMU 温度 |
-### 阶段 8：情绪控制
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Emotion | 选择器 | 选择要播放的情绪（35 个选项）|
-### 阶段 9：音频控制
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Microphone Volume | 数值 (0-100%) | 麦克风增益控制 |
-### 阶段 10：摄像头
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Camera | 摄像头 | 实时 MJPEG 流 |
-### 3D 可视化卡片
-可在 Home Assistant 中安装自定义 Lovelace 卡片，实时 3D 可视化 Reachy Mini 机器人。
-安装地址：[ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
-功能：
-- 实时 3D 机器人可视化
-- 交互式机器人状态视图
-- 连接机器人守护进程获取实时更新
-### 阶段 12：音频处理
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| AGC Enabled | 开关 | 自动增益控制开/关 |
-| AGC Max Gain | 数值 (0-40dB) | 最大 AGC 增益 |
-| Noise Suppression | 数值 (0-100%) | 噪声抑制级别 |
-| Echo Cancellation Converged | 二进制传感器 | AEC 状态 |
-### 阶段 21：对话
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Continuous Conversation | 开关 | 多轮对话模式 |
-### 阶段 22：手势检测
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Gesture Detected | 文本传感器 | 当前手势名称 |
-| Gesture Confidence | 传感器 (%) | 检测置信度 |
-### 阶段 23：人脸检测
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Face Detected | 二进制传感器 | 视野中是否有人脸 |
-### 阶段 24：系统诊断
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| CPU Percent | 传感器 (%) | CPU 使用率 |
-| CPU Temperature | 传感器 (°C) | CPU 温度 |
-| Memory Percent | 传感器 (%) | 内存使用率 |
-| Memory Used | 传感器 (GB) | 已用内存 |
-| Disk Percent | 传感器 (%) | 磁盘使用率 |
-| Disk Free | 传感器 (GB) | 磁盘可用空间 |
-| Uptime | 传感器 (hours) | 系统运行时间 |
-| Process CPU | 传感器 (%) | 应用 CPU 使用率 |
-| Process Memory | 传感器 (MB) | 应用内存使用 |
----
-## 睡眠模式
-### 进入睡眠
-- 在 Home Assistant 中按"Go to Sleep"按钮
-- 机器人放松电机、停止摄像头、暂停语音检测
-### 唤醒
-- 在 Home Assistant 中按"Wake Up"按钮
-- 或说唤醒词
-- 机器人恢复所有功能
----
-## 故障排除
-| 问题 | 解决方案 |
-|------|----------|
-| 不响应唤醒词 | 增加 AGC Max Gain，减少背景噪音 |
-| 人脸追踪不工作 | 确保光线充足，检查 Face Detected 传感器 |
-| 没有音频输出 | 检查 Speaker Volume，验证 HA 中的 TTS 引擎 |
-| 无法连接 HA | 确认在同一网络，检查端口 6053 |
-| 手势检测不到 | 确保光线充足，正对摄像头 |
----
-## 快速参考
-```
-唤醒词：       "Okay Nabu"
-停止词：       "Stop"
-ESPHome 端口： 6053
-摄像头端口：   8081 (MJPEG)
-```
----
-*Reachy Mini 语音助手 v0.9.5*

docs/USER_MANUAL_EN.md DELETED Viewed

@@ -1,256 +0,0 @@
-# Reachy Mini Voice Assistant - User Manual
-## Requirements
-### Hardware
-- Reachy Mini robot (with ReSpeaker XVF3800 microphone)
-- WiFi network connection
-### Software
-- Home Assistant (2024.1 or later)
-- ESPHome integration enabled in Home Assistant
----
-## Installation
-### Step 1: Install the App
-Install `reachy_mini_home_assistant` from the Reachy Mini App Store.
-### Step 2: Start the App
-The app will automatically:
-- Start the ESPHome server on port 6053
-- Load pre-packaged wake word models
-- Register with mDNS for auto-discovery
-- Connect to Sendspin server if available on network
-### Step 3: Connect to Home Assistant
-**Automatic (Recommended):**
-Home Assistant will auto-discover Reachy Mini via mDNS.
-**Manual:**
-1. Go to Settings → Devices & Services
-2. Click "Add Integration"
-3. Select "ESPHome"
-4. Enter the robot's IP address and port 6053
----
-## Features
-### Voice Assistant
-- **Wake Word Detection**: Say "Okay Nabu" to activate (local processing)
-- **Stop Word**: Say "Stop" to end conversation
-- **Continuous Conversation Mode**: Keep talking without repeating wake word
-- **STT/TTS**: Uses Home Assistant's configured speech engines
-**Supported Wake Words:**
-- Okay Nabu (default)
-- Hey Jarvis
-- Alexa
-- Hey Luna
-### Face Tracking
-- YOLO-based face detection
-- Head follows detected face
-- Body follows head when turned far
-- Adaptive frame rate: 15fps active, 2fps idle
-### Gesture Detection
-Detected gestures and robot responses:
-| Gesture | Response |
-|---------|----------|
-| like (thumbs up) | Cheerful emotion |
-| dislike (thumbs down) | Sad emotion |
-| ok | Nod animation |
-| peace | Enthusiastic emotion |
-| stop | Stop speaking |
-| call | Start listening |
-| palm | Pause motion |
-| fist | Rage emotion |
-| one/two/three/four | Send HA event |
-### Emotion Responses
-The robot can play 35 different emotions:
-- Basic: Happy, Sad, Angry, Fear, Surprise, Disgust
-- Extended: Laughing, Loving, Proud, Grateful, Enthusiastic, Curious, Amazed, Shy, Confused, Thoughtful, Anxious, Scared, Frustrated, Irritated, Furious, Contempt, Bored, Tired, Exhausted, Lonely, Downcast, Resigned, Uncertain, Uncomfortable
-### Audio Features
-- Speaker volume control (0-100%)
-- Microphone volume control (0-100%)
-- AGC (Auto Gain Control, 0-40dB)
-- Noise suppression (0-100%)
-- Echo cancellation (built-in)
-### Sendspin Multi-Room Audio
-- Automatic discovery of Sendspin servers via mDNS
-- Synchronized multi-room audio playback
-- Reachy Mini acts as a PLAYER to receive audio streams
-- Auto-pause during voice conversations
-- No user configuration required
-### DOA Sound Tracking
-- Direction of Arrival detection
-- Robot turns toward sound source on wake word
-- Can be enabled/disabled via switch
----
-## Home Assistant Entities
-### Phase 1: Basic Status
-| Entity | Type | Description |
-|--------|------|-------------|
-| Daemon State | Text Sensor | Robot daemon status |
-| Backend Ready | Binary Sensor | Backend connection status |
-| Speaker Volume | Number (0-100%) | Speaker volume control |
-### Phase 2: Motor Control
-| Entity | Type | Description |
-|--------|------|-------------|
-| Motors Enabled | Switch | Motor power on/off |
-| Wake Up | Button | Wake robot from sleep |
-| Go to Sleep | Button | Put robot to sleep |
-| Sleep Mode | Binary Sensor | Current sleep state |
-| Services Suspended | Binary Sensor | ML models unloaded state |
-### Phase 3: Pose Control
-| Entity | Type | Range |
-|--------|------|-------|
-| Head X/Y/Z | Number | ±50mm |
-| Head Roll/Pitch/Yaw | Number | ±40° |
-| Body Yaw | Number | ±160° |
-| Antenna Left/Right | Number | ±90° |
-### Phase 4: Look At Control
-| Entity | Type | Description |
-|--------|------|-------------|
-| Look At X/Y/Z | Number | World coordinates for gaze target |
-### Phase 5: DOA (Direction of Arrival)
-| Entity | Type | Description |
-|--------|------|-------------|
-| DOA Angle | Sensor (°) | Sound source direction |
-| Speech Detected | Binary Sensor | Voice activity detection |
-| DOA Sound Tracking | Switch | Enable/disable DOA tracking |
-### Phase 6: Diagnostics
-| Entity | Type | Description |
-|--------|------|-------------|
-| Control Loop Frequency | Sensor (Hz) | Motion control loop rate |
-| SDK Version | Text Sensor | Reachy Mini SDK version |
-| Robot Name | Text Sensor | Device name |
-| Wireless Version | Binary Sensor | Wireless model flag |
-| Simulation Mode | Binary Sensor | Simulation flag |
-| WLAN IP | Text Sensor | WiFi IP address |
-| Error Message | Text Sensor | Current error |
-### Phase 7: IMU Sensors (Wireless version only)
-| Entity | Type | Description |
-|--------|------|-------------|
-| IMU Accel X/Y/Z | Sensor (m/s²) | Accelerometer |
-| IMU Gyro X/Y/Z | Sensor (rad/s) | Gyroscope |
-| IMU Temperature | Sensor (°C) | IMU temperature |
-### Phase 8: Emotion Control
-| Entity | Type | Description |
-|--------|------|-------------|
-| Emotion | Select | Choose emotion to play (35 options) |
-### Phase 9: Audio Control
-| Entity | Type | Description |
-|--------|------|-------------|
-| Microphone Volume | Number (0-100%) | Mic gain control |
-### Phase 10: Camera
-| Entity | Type | Description |
-|--------|------|-------------|
-| Camera | Camera | Live MJPEG stream |
-### 3D Visualization Card
-A custom Lovelace card is available for real-time 3D visualization of the Reachy Mini robot in Home Assistant.
-Install from: [ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
-Features:
-- Real-time 3D robot visualization
-- Interactive view of robot state
-- Connects to robot daemon for live updates
-### Phase 12: Audio Processing
-| Entity | Type | Description |
-|--------|------|-------------|
-| AGC Enabled | Switch | Auto gain control on/off |
-| AGC Max Gain | Number (0-40dB) | Maximum AGC gain |
-| Noise Suppression | Number (0-100%) | Noise reduction level |
-| Echo Cancellation Converged | Binary Sensor | AEC status |
-### Phase 21: Conversation
-| Entity | Type | Description |
-|--------|------|-------------|
-| Continuous Conversation | Switch | Multi-turn conversation mode |
-### Phase 22: Gesture Detection
-| Entity | Type | Description |
-|--------|------|-------------|
-| Gesture Detected | Text Sensor | Current gesture name |
-| Gesture Confidence | Sensor (%) | Detection confidence |
-### Phase 23: Face Detection
-| Entity | Type | Description |
-|--------|------|-------------|
-| Face Detected | Binary Sensor | Face in view |
-### Phase 24: System Diagnostics
-| Entity | Type | Description |
-|--------|------|-------------|
-| CPU Percent | Sensor (%) | CPU usage |
-| CPU Temperature | Sensor (°C) | CPU temperature |
-| Memory Percent | Sensor (%) | RAM usage |
-| Memory Used | Sensor (GB) | RAM used |
-| Disk Percent | Sensor (%) | Disk usage |
-| Disk Free | Sensor (GB) | Disk free space |
-| Uptime | Sensor (hours) | System uptime |
-| Process CPU | Sensor (%) | App CPU usage |
-| Process Memory | Sensor (MB) | App memory usage |
----
-## Sleep Mode
-### Enter Sleep
-- Press "Go to Sleep" button in Home Assistant
-- Robot relaxes motors, stops camera, pauses voice detection
-### Wake Up
-- Press "Wake Up" button in Home Assistant
-- Or say the wake word
-- Robot resumes all functions
----
-## Troubleshooting
-| Problem | Solution |
-|---------|----------|
-| Not responding to wake word | Increase AGC Max Gain, reduce background noise |
-| Face tracking not working | Ensure adequate lighting, check Face Detected sensor |
-| No audio output | Check Speaker Volume, verify TTS engine in HA |
-| Can't connect to HA | Verify same network, check port 6053 |
-| Gestures not detected | Ensure good lighting, face the camera directly |
----
-## Quick Reference
-```
-Wake Word:     "Okay Nabu"
-Stop Word:     "Stop"
-ESPHome Port:  6053
-Camera Port:   8081 (MJPEG)
-```
----
-*Reachy Mini Voice Assistant v0.9.5*

home_assistant_blueprints/reachy_mini_presence_companion.yaml DELETED Viewed

@@ -1,288 +0,0 @@
-blueprint:
-  name: Reachy Mini Presence Companion
-  description: >-
-    Presence-driven automation for Reachy Mini in Home Assistant.
-    How to use:
-    1) Select Home occupancy entity (person/group/binary_sensor).
-    2) Select Reachy ESPHome device (recommended).
-    3) Leave optional fallback entity inputs empty unless auto-binding fails.
-    4) Set away delay and day/night volume.
-    What this automation does:
-    - Occupied: Wake Reachy, enable idle motion + idle antenna motion, set day volume.
-    - Empty (after delay): Disable idle motion + idle antenna motion, send Reachy to sleep.
-    - Quiet hours start/end: Apply night/day volume while occupied.
-    Auto-binding rules (when Reachy device is selected):
-    - Wake button suffix: wake_up
-    - Sleep button suffix: go_to_sleep
-    - Idle motion switch suffix: idle_motion_enabled
-    - Idle antenna switch suffix: idle_antenna_enabled
-    - Volume number suffix: speaker_volume
-    If your entities use different names, fill optional fallback entity inputs manually.
-  domain: automation
-  input:
-    occupancy_entity:
-      name: Home occupancy entity
-      description: Person, group, or binary sensor representing home presence.
-      selector:
-        entity: {}
-    reachy_device:
-      name: Reachy device (recommended)
-      description: Select your Reachy ESPHome device for automatic entity binding.
-      default: ""
-      selector:
-        device:
-          filter:
-            - integration: esphome
-    reachy_wake_button:
-      name: Wake Up button (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: button
-    reachy_sleep_button:
-      name: Go To Sleep button (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: button
-    idle_motion_switch:
-      name: Idle Motion switch (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: switch
-    idle_antenna_motion_switch:
-      name: Idle Antenna Motion switch (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: switch
-    reachy_volume_number:
-      name: Speaker Volume number (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: number
-    away_delay_minutes:
-      name: Away delay (minutes)
-      description: Wait before sleeping after everyone leaves.
-      default: 20
-      selector:
-        number:
-          min: 1
-          max: 180
-          mode: box
-          unit_of_measurement: min
-    day_volume:
-      name: Day volume
-      default: 0.8
-      selector:
-        number:
-          min: 0
-          max: 1
-          step: 0.05
-          mode: slider
-    night_volume:
-      name: Night volume
-      default: 0.35
-      selector:
-        number:
-          min: 0
-          max: 1
-          step: 0.05
-          mode: slider
-    quiet_start:
-      name: Quiet hours start
-      default: "22:30:00"
-      selector:
-        time: {}
-    quiet_end:
-      name: Quiet hours end
-      default: "07:30:00"
-      selector:
-        time: {}
-mode: restart
-variables:
-  occupancy_entity: !input occupancy_entity
-  reachy_device: !input reachy_device
-  manual_wake_button: !input reachy_wake_button
-  manual_sleep_button: !input reachy_sleep_button
-  manual_idle_motion_switch: !input idle_motion_switch
-  manual_idle_antenna_switch: !input idle_antenna_motion_switch
-  manual_volume_number: !input reachy_volume_number
-  day_volume: !input day_volume
-  night_volume: !input night_volume
-  device_entities_list: >-
-    {{ device_entities(reachy_device) if reachy_device else [] }}
-  wake_button_auto: >-
-    {{ (device_entities_list | select('match', '^button\..*wake_up$') | list | first) or '' }}
-  sleep_button_auto: >-
-    {{ (device_entities_list | select('match', '^button\..*go_to_sleep$') | list | first) or '' }}
-  idle_motion_switch_auto: >-
-    {{ (device_entities_list | select('match', '^switch\..*idle_motion_enabled$') | list | first) or '' }}
-  idle_antenna_switch_auto: >-
-    {{ (device_entities_list | select('match', '^switch\..*idle_antenna_enabled$') | list | first) or '' }}
-  volume_number_auto: >-
-    {{ (device_entities_list | select('match', '^number\..*speaker_volume$') | list | first) or '' }}
-  wake_button: >-
-    {{ manual_wake_button if manual_wake_button else wake_button_auto }}
-  sleep_button: >-
-    {{ manual_sleep_button if manual_sleep_button else sleep_button_auto }}
-  idle_motion_switch: >-
-    {{ manual_idle_motion_switch if manual_idle_motion_switch else idle_motion_switch_auto }}
-  idle_antenna_motion_switch: >-
-    {{ manual_idle_antenna_switch if manual_idle_antenna_switch else idle_antenna_switch_auto }}
-  volume_number: >-
-    {{ manual_volume_number if manual_volume_number else volume_number_auto }}
-  is_occupied: >-
-    {{ states(occupancy_entity) in ['home', 'on'] }}
-trigger:
-  - platform: state
-    id: occupied_home
-    entity_id: !input occupancy_entity
-    to: "home"
-  - platform: state
-    id: occupied_on
-    entity_id: !input occupancy_entity
-    to: "on"
-  - platform: state
-    id: empty_not_home
-    entity_id: !input occupancy_entity
-    to: "not_home"
-    for:
-      minutes: !input away_delay_minutes
-  - platform: state
-    id: empty_off
-    entity_id: !input occupancy_entity
-    to: "off"
-    for:
-      minutes: !input away_delay_minutes
-  - platform: time
-    id: quiet_start
-    at: !input quiet_start
-  - platform: time
-    id: quiet_end
-    at: !input quiet_end
-action:
-  - choose:
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id in ['occupied_home', 'occupied_on'] }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ wake_button != '' }}"
-            then:
-              - service: button.press
-                target:
-                  entity_id: "{{ wake_button }}"
-          - if:
-              - condition: template
-                value_template: "{{ idle_motion_switch != '' }}"
-            then:
-              - service: switch.turn_on
-                target:
-                  entity_id: "{{ idle_motion_switch }}"
-          - if:
-              - condition: template
-                value_template: "{{ idle_antenna_motion_switch != '' }}"
-            then:
-              - service: switch.turn_on
-                target:
-                  entity_id: "{{ idle_antenna_motion_switch }}"
-          - if:
-              - condition: template
-                value_template: "{{ volume_number != '' }}"
-            then:
-              - service: number.set_value
-                target:
-                  entity_id: "{{ volume_number }}"
-                data:
-                  value: "{{ day_volume }}"
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id in ['empty_not_home', 'empty_off'] }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ idle_motion_switch != '' }}"
-            then:
-              - service: switch.turn_off
-                target:
-                  entity_id: "{{ idle_motion_switch }}"
-          - if:
-              - condition: template
-                value_template: "{{ idle_antenna_motion_switch != '' }}"
-            then:
-              - service: switch.turn_off
-                target:
-                  entity_id: "{{ idle_antenna_motion_switch }}"
-          - if:
-              - condition: template
-                value_template: "{{ sleep_button != '' }}"
-            then:
-              - service: button.press
-                target:
-                  entity_id: "{{ sleep_button }}"
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id == 'quiet_start' and is_occupied }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ volume_number != '' }}"
-            then:
-              - service: number.set_value
-                target:
-                  entity_id: "{{ volume_number }}"
-                data:
-                  value: "{{ night_volume }}"
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id == 'quiet_end' and is_occupied }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ volume_number != '' }}"
-            then:
-              - service: number.set_value
-                target:
-                  entity_id: "{{ volume_number }}"
-                data:
-                  value: "{{ day_volume }}"

index.html CHANGED Viewed

@@ -18,24 +18,21 @@
 				<span class="brand-name">Reachy Mini for Home Assistant</span>
 			</div>
 			<div class="pill">Voice · Gestures · Smart Home</div>
-			<div class="version-pill" id="version-pill">v1.0.2</div>
 		</div>
 		<div class="hero-grid">
 			<div class="hero-copy">
 				<p class="eyebrow">Reachy Mini App</p>
 				<h1>Your robot meets your Home Assistant.</h1>
 				<p class="lede">
-					Transform Reachy Mini Wi-Fi into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
 				</p>
 				<div class="hero-actions">
-					<a class="btn primary" href="#requirements">Requirements</a>
 					<a class="btn ghost" href="#install">Quick Start</a>
-					<a class="btn ghost" href="#features">Features</a>
 				</div>
 				<div class="hero-badges">
 					<span>🎤 Wake Word</span>
 					<span>👀 Face Tracking</span>
-					<span>🔄 Body Following</span>
 					<span>🤚 18 Gestures</span>
 					<span>🔊 Multi-room Audio</span>
 					<span>⚡ Zero Config</span>
@@ -50,69 +47,6 @@
 		</div>
 	</header>
-	<section id="requirements" class="section">
-		<div class="section-header">
-			<p class="eyebrow">Before You Start</p>
-			<h2>Requirements</h2>
-			<p class="intro">Make sure you have everything ready for a smooth setup.</p>
-		</div>
-		<div class="requirements-grid">
-			<div class="requirement-card">
-				<span class="icon">🤖</span>
-				<h3>Reachy Mini Wi-Fi</h3>
-				<p>This app requires the <strong>Wi-Fi version</strong> of Reachy Mini. The USB version has not been validated</p>
-			</div>
-			<div class="requirement-card">
-				<span class="icon">🏠</span>
-				<h3>Home Assistant</h3>
-				<p>A running Home Assistant instance </p>
-			</div>
-			<div class="requirement-card">
-				<span class="icon">📶</span>
-				<h3>Same Network</h3>
-				<p>Both Reachy Mini and Home Assistant must be on the <strong>same local network</strong>.</p>
-			</div>
-			<div class="requirement-card">
-				<span class="icon">🎙️</span>
-				<h3>Voice Pipeline</h3>
-				<p>Configure a <strong>Voice Assistant pipeline</strong> in Home Assistant (STT + TTS + LLM).</p>
-			</div>
-		</div>
-	</section>
-	<section id="install" class="section story">
-		<div class="section-header">
-			<p class="eyebrow">Getting Started</p>
-			<h2>Quick Start</h2>
-			<p class="intro">Install and connect in under a minute. No configuration needed.</p>
-		</div>
-		<div class="story-grid">
-			<div class="story-card">
-				<p class="eyebrow">Installation</p>
-				<h3>Up and running in 1 minute</h3>
-				<ul class="story-list">
-					<li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
-					<li><span>2️⃣</span> Enable "Show community apps"</li>
-					<li><span>3️⃣</span> Install "Reachy Mini for Home Assistant"</li>
-					<li><span>4️⃣</span> Home Assistant discovers automatically</li>
-				</ul>
-			</div>
-			<div class="story-card secondary">
-				<p class="eyebrow">How it works</p>
-				<h3>Seamless integration</h3>
-				<p class="story-text">
-					This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds all 45+ entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
-				</p>
-				<div class="chips">
-					<span class="chip">ESPHome Protocol</span>
-					<span class="chip">mDNS Discovery</span>
-					<span class="chip">45+ Entities</span>
-					<span class="chip">Zero Config</span>
-				</div>
-			</div>
-		</div>
-	</section>
 	<section id="features" class="section features">
 		<div class="section-header">
 			<p class="eyebrow">Capabilities</p>
@@ -133,7 +67,7 @@
 			<div class="feature-card">
 				<span class="icon">👀</span>
 				<h3>Face Tracking</h3>
-				<p>YOLO-based face detection with body following. Head and body move together naturally to track you during conversations.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">🤚</span>
@@ -143,7 +77,7 @@
 			<div class="feature-card">
 				<span class="icon">😊</span>
 				<h3>Expressive Motion</h3>
-				<p>280+ emotion keywords trigger 35 expressions. Real-time audio-driven animations with natural head sway during conversations.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">📹</span>
@@ -165,15 +99,33 @@
 				<h3>Dashboard Card</h3>
 				<p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
 			</div>
-			<div class="feature-card">
-				<span class="icon">🧩</span>
-				<h3>HA Blueprint</h3>
-				<p>Device-first Home Assistant blueprint for presence automations with Reachy wake/sleep and volume routines.</p>
 			</div>
-			<div class="feature-card">
-				<span class="icon">🚀</span>
-				<h3>Auto Release</h3>
-				<p>Version-driven GitHub release workflow. Update pyproject/changelog, then release is created automatically.</p>
 			</div>
 		</div>
 	</section>
@@ -197,15 +149,6 @@
 		fetch('changelog.json')
 			.then(res => res.json())
 			.then(data => {
-				// Update version pill with latest version
-				if (data.length > 0) {
-					const versionPill = document.getElementById('version-pill');
-					if (versionPill) {
-						versionPill.textContent = `v${data[0].version}`;
-					}
-				}
-				// Populate changelog grid
 				const mainGrid = document.getElementById('changelog-grid');
 				const olderGrid = document.getElementById('changelog-older');
 				data.forEach((item, index) => {
@@ -236,15 +179,10 @@
 				<h3>HA Dashboard Card</h3>
 				<p>Lovelace Card for HA</p>
 			</a>
-			<a href="https://github.com/ha-china/Reachy_Mini_For_Home_Assistant" target="_blank" class="link-card">
 				<span class="icon">📦</span>
 				<h3>Source Code</h3>
-				<p>GitHub Repository</p>
-			</a>
-			<a href="home_assistant_blueprints/reachy_mini_presence_companion.yaml" target="_blank" class="link-card">
-				<span class="icon">🧩</span>
-				<h3>HA Blueprint</h3>
-				<p>Presence Companion YAML</p>
 			</a>
 			<a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
 				<span class="icon">🤖</span>

 				<span class="brand-name">Reachy Mini for Home Assistant</span>
 			</div>
 			<div class="pill">Voice · Gestures · Smart Home</div>
 		</div>
 		<div class="hero-grid">
 			<div class="hero-copy">
 				<p class="eyebrow">Reachy Mini App</p>
 				<h1>Your robot meets your Home Assistant.</h1>
 				<p class="lede">
+					Transform Reachy Mini into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
 				</p>
 				<div class="hero-actions">
+					<a class="btn primary" href="#features">Explore Features</a>
 					<a class="btn ghost" href="#install">Quick Start</a>
 				</div>
 				<div class="hero-badges">
 					<span>🎤 Wake Word</span>
 					<span>👀 Face Tracking</span>
 					<span>🤚 18 Gestures</span>
 					<span>🔊 Multi-room Audio</span>
 					<span>⚡ Zero Config</span>
 		</div>
 	</header>
 	<section id="features" class="section features">
 		<div class="section-header">
 			<p class="eyebrow">Capabilities</p>
 			<div class="feature-card">
 				<span class="icon">👀</span>
 				<h3>Face Tracking</h3>
+				<p>YOLO-based face detection. Reachy looks at you during conversations with adaptive frame rate optimization.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">🤚</span>
 			<div class="feature-card">
 				<span class="icon">😊</span>
 				<h3>Expressive Motion</h3>
+				<p>Real-time audio-driven animations. Natural head sway and antenna movements during conversations.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">📹</span>
 				<h3>Dashboard Card</h3>
 				<p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
 			</div>
+		</div>
+	</section>
+	<section id="install" class="section story">
+		<div class="story-grid">
+			<div class="story-card">
+				<p class="eyebrow">Installation</p>
+				<h3>Up and running in 1 minutes</h3>
+				<ul class="story-list">
+					<li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
+					<li><span>2️⃣</span> Enable "Show community apps"</li>
+					<li><span>3️⃣</span> Install "Reachy Mini for Home Assistant"</li>
+					<li><span>4️⃣</span> Home Assistant discovers automatically</li>
+				</ul>
 			</div>
+			<div class="story-card secondary">
+				<p class="eyebrow">How it works</p>
+				<h3>Seamless integration</h3>
+				<p class="story-text">
+					This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds all 45+ entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
+				</p>
+				<div class="chips">
+					<span class="chip">ESPHome Protocol</span>
+					<span class="chip">mDNS Discovery</span>
+					<span class="chip">45+ Entities</span>
+					<span class="chip">Zero Config</span>
+				</div>
 			</div>
 		</div>
 	</section>
 		fetch('changelog.json')
 			.then(res => res.json())
 			.then(data => {
 				const mainGrid = document.getElementById('changelog-grid');
 				const olderGrid = document.getElementById('changelog-older');
 				data.forEach((item, index) => {
 				<h3>HA Dashboard Card</h3>
 				<p>Lovelace Card for HA</p>
 			</a>
+			<a href="https://huggingface.co/spaces/djhui5710/reachy_mini_ha_voice/tree/main" target="_blank" class="link-card">
 				<span class="icon">📦</span>
 				<h3>Source Code</h3>
+				<p>HuggingFace Spaces</p>
 			</a>
 			<a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
 				<span class="icon">🤖</span>

pyproject.toml CHANGED Viewed

@@ -3,22 +3,23 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
-name = "reachy_mini_home_assistant"
-version = "1.0.3"
-description = "Deep integration of Reachy Mini robot with Home Assistant"
 readme = "README.md"
 requires-python = ">=3.12"
 license = {text = "Apache-2.0"}
 dependencies = [
-    # Reachy Mini SDK with gstreamer support (for camera streaming)
-    "reachy-mini>=1.5.0",
-    # Audio processing (for audio file analysis)
     "soundfile>=0.13.0",
-    "numpy>=2.0.0,<=2.2.5",
     # Camera streaming
-    "opencv-python>=4.12.0.88",
     # Wake word detection (local)
     # STT/TTS is handled by Home Assistant, not locally
@@ -27,33 +28,26 @@ dependencies = [
     # ESPHome protocol (communication with Home Assistant)
     "aioesphomeapi>=43.10.1",
-    "zeroconf<1",
     # Motion control (head movements)
     "scipy>=1.14.0",
     # Face tracking (YOLO-based head detection)
-    "ultralytics",
-    "supervision",
     # Sendspin synchronized audio (optional, for multi-room playback)
     "aiosendspin>=2.0.1",
     # Gesture detection (ONNX runtime for HaGRID models)
     "onnxruntime>=1.18.0",
-    # PyTorch (for vision models)
-    "torch==2.5.1",
-    "torchvision==0.20.1",
-    # Compatibility with system packages (gradio, etc.)
-    "pillow<12.0",
-    "pydantic<=2.12.5",
 ]
 keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
 [project.entry-points."reachy_mini_apps"]
-reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"
 [tool.setuptools]
 package-dir = { "" = "." }
@@ -63,109 +57,4 @@ include-package-data = true
 where = ["."]
 [tool.setuptools.package-data]
-"*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx", "*.pt"]
-# ============================================================================
-# Ruff - Fast Python linter and formatter
-# ============================================================================
-[tool.ruff]
-target-version = "py312"
-line-length = 120
-src = ["reachy_mini_home_assistant"]
-# Exclude reference code and generated files
-exclude = [
-    "reference/",
-    "__pycache__",
-    ".git",
-    "*.egg-info",
-]
-[dependency-groups]
-dev = [
-    "ruff==0.15.4",
-    "mypy==1.19.1",
-]
-[tool.ruff.lint]
-select = [
-    "E",      # pycodestyle errors
-    "W",      # pycodestyle warnings
-    "F",      # Pyflakes
-    "I",      # isort (import sorting)
-    "B",      # flake8-bugbear (common bugs)
-    "C4",     # flake8-comprehensions
-    "UP",     # pyupgrade (modern Python syntax)
-    "SIM",    # flake8-simplify
-    "TCH",    # flake8-type-checking (TYPE_CHECKING optimization)
-    "RUF",    # Ruff-specific rules
-    "PTH",    # flake8-use-pathlib
-    "PL",     # Pylint
-]
-ignore = [
-    "E501",     # line too long (handled by formatter)
-    "PLR0913",  # too many arguments (common in robot control)
-    "PLR2004",  # magic value comparison (many thresholds in motion code)
-    "PLR0912",  # too many branches
-    "PLR0915",  # too many statements
-    "PLR0911",  # too many return statements
-    "SIM108",   # use ternary operator (sometimes less readable)
-    "B008",     # function call in default argument (used for field factories)
-    # The following are intentional patterns in this codebase:
-    "PLC0415",  # import-outside-top-level (lazy imports for optional deps)
-    "PLW0603",  # global-statement (used for singletons)
-    "SIM102",   # collapsible-if (sometimes more readable expanded)
-    "SIM105",   # suppressible-exception (explicit try/except is clearer)
-    "PTH123",   # builtin-open (pathlib not always better)
-    "PTH108",   # os-unlink (pathlib not always better)
-    "RUF013",   # implicit-optional (legacy code)
-    "TC002",    # third-party import (numpy is required at runtime)
-]
-[tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["F401"]  # unused imports in __init__ are intentional
-[tool.ruff.lint.isort]
-known-first-party = ["reachy_mini_home_assistant"]
-# ============================================================================
-# Mypy - Static type checker
-# ============================================================================
-[tool.mypy]
-python_version = "3.12"
-warn_return_any = false  # Too noisy for mixed typed/untyped codebase
-warn_unused_ignores = true
-disallow_untyped_defs = false  # Start lenient, can tighten later
-check_untyped_defs = false  # Too strict for initial setup
-ignore_missing_imports = true  # Many robot SDK libs lack type stubs
-no_implicit_optional = false  # Allow implicit Optional for now
-# Disable some checks that are too strict for this codebase
-disable_error_code = [
-    "union-attr",  # Too many Optional accesses without None checks
-    "no-redef",    # Class redefinitions for SDK compatibility
-    "attr-defined",  # Some dynamic attributes from SDK
-    "assignment",  # Variable type changes (common in Python)
-    "arg-type",    # Argument type mismatches (often SDK issues)
-    "unused-ignore",  # Type ignore comments from before config
-    "return-value",  # Return type mismatches (often fine)
-    "no-untyped-def",  # Missing type annotations (too strict initially)
-    "valid-type",  # Type validity (some edge cases)
-    "has-type",    # Cannot determine type
-    "call-arg",    # Too few/many arguments
-    "import-untyped",  # Missing stubs for third-party libs
-    "misc",        # Miscellaneous errors
-]
-exclude = [
-    "reference/",
-    "tests/",
-]
-# Stricter checking for core modules (can enable gradually)
-[[tool.mypy.overrides]]
-module = [
-    "reachy_mini_home_assistant.core.*",
-    "reachy_mini_home_assistant.motion.smoothing",
-    "reachy_mini_home_assistant.motion.pose_composer",
-]
-disallow_untyped_defs = true
-warn_unreachable = true

 build-backend = "setuptools.build_meta"
 [project]
+name = "reachy_mini_ha_voice"
+version = "0.7.3"
+description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.12"
 license = {text = "Apache-2.0"}
 dependencies = [
+    # Reachy Mini SDK (provides audio via media system)
+    "reachy-mini",
+    # Audio processing (fallback when not on Reachy Mini)
+    "sounddevice>=0.5.0",
     "soundfile>=0.13.0",
+    "numpy>=2.0.0",
     # Camera streaming
+    "opencv-python>=4.10.0",
     # Wake word detection (local)
     # STT/TTS is handled by Home Assistant, not locally
     # ESPHome protocol (communication with Home Assistant)
     "aioesphomeapi>=43.10.1",
+    "zeroconf>=0.140.0",
     # Motion control (head movements)
     "scipy>=1.14.0",
     # Face tracking (YOLO-based head detection)
+    "ultralytics>=8.3.0",
+    "supervision>=0.25.0",
+    "huggingface_hub>=0.27.0",
     # Sendspin synchronized audio (optional, for multi-room playback)
     "aiosendspin>=2.0.1",
     # Gesture detection (ONNX runtime for HaGRID models)
     "onnxruntime>=1.18.0",
 ]
 keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
 [project.entry-points."reachy_mini_apps"]
+reachy_mini_ha_voice = "reachy_mini_ha_voice.main:ReachyMiniHaVoice"
 [tool.setuptools]
 package-dir = { "" = "." }
 where = ["."]
 [tool.setuptools.package-data]
+"*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx"]

{reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py RENAMED Viewed

@@ -1,29 +1,24 @@
-"""
-Reachy Mini for Home Assistant
-A deep integration app combining Reachy Mini robot with Home Assistant,
-enabling voice control, smart home automation, and expressive robot interactions.
-Key features:
-- Local wake word detection (microWakeWord/openWakeWord)
-- ESPHome protocol for seamless Home Assistant communication
-- STT/TTS powered by Home Assistant voice pipeline
-- Reachy Mini motion control with expressive animations
-- Camera streaming and gesture detection
-- Smart home entity control through natural voice commands
-"""
-try:
-    from importlib.metadata import version
-    __version__ = version("reachy_mini_home_assistant")
-except Exception:
-    __version__ = "0.0.0"  # Fallback for development
-__author__ = "Desmond Dong"
-# Don't import main module here to avoid runpy warning
-# The app is loaded via entry point: reachy_mini_home_assistant.main:ReachyMiniHaVoiceApp
-__all__ = [
-    "__version__",
-]

+"""
+Reachy Mini for Home Assistant
+A deep integration app combining Reachy Mini robot with Home Assistant,
+enabling voice control, smart home automation, and expressive robot interactions.
+Key features:
+- Local wake word detection (microWakeWord/openWakeWord)
+- ESPHome protocol for seamless Home Assistant communication
+- STT/TTS powered by Home Assistant voice pipeline
+- Reachy Mini motion control with expressive animations
+- Camera streaming and gesture detection
+- Smart home entity control through natural voice commands
+"""
+__version__ = "0.7.3"
+__author__ = "Desmond Dong"
+# Don't import main module here to avoid runpy warning
+# The app is loaded via entry point: reachy_mini_ha_voice.main:ReachyMiniHAVoiceApp
+__all__ = [
+    "__version__",
+]

{reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py RENAMED Viewed

@@ -2,7 +2,7 @@
 """Main entry point for Reachy Mini for Home Assistant.
 This module provides a command-line interface for running the voice assistant
-without the ReachyMini App framework.
 """
 import argparse
@@ -10,13 +10,13 @@ import asyncio
 import logging
 import threading
-from .core import get_health_monitor, get_memory_monitor
 _LOGGER = logging.getLogger(__name__)
 async def main() -> None:
-    parser = argparse.ArgumentParser(description="Reachy Mini for Home Assistant")
     parser.add_argument(
         "--name",
         default="Reachy Mini",
@@ -49,6 +49,11 @@ async def main() -> None:
         action="store_true",
         help="Disable camera server",
     )
     parser.add_argument(
         "--debug",
         action="store_true",
@@ -63,11 +68,17 @@ async def main() -> None:
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
-    # Initialize Reachy Mini (required)
-    from reachy_mini import ReachyMini
-    reachy_mini = ReachyMini()
-    _LOGGER.info("Reachy Mini connected")
     # Import and create VoiceAssistantService
     from .voice_assistant import VoiceAssistantService
@@ -85,22 +96,7 @@ async def main() -> None:
     # Create stop event for graceful shutdown
     stop_event = threading.Event()
-    # Initialize monitoring services
-    health_monitor = get_health_monitor()
-    memory_monitor = get_memory_monitor()
-    # Register service health checks
-    health_monitor.register_checker(
-        "voice_assistant",
-        lambda: service.is_running if hasattr(service, "is_running") else True,
-        interval=30.0,
-    )
     try:
-        # Start monitoring
-        health_monitor.start()
-        memory_monitor.start()
         await service.start()
         _LOGGER.info("=" * 50)
@@ -109,7 +105,7 @@ async def main() -> None:
         _LOGGER.info("Name: %s", args.name)
         _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
         _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
-        _LOGGER.info("Motion control: enabled")
         _LOGGER.info("=" * 50)
         _LOGGER.info("Add this device in Home Assistant:")
         _LOGGER.info("  Settings -> Devices & Services -> Add Integration -> ESPHome")
@@ -123,10 +119,6 @@ async def main() -> None:
     except KeyboardInterrupt:
         _LOGGER.info("Shutting down...")
     finally:
-        # Stop monitoring services
-        health_monitor.stop()
-        memory_monitor.stop()
         await service.stop()
         _LOGGER.info("Voice assistant stopped")

 """Main entry point for Reachy Mini for Home Assistant.
 This module provides a command-line interface for running the voice assistant
+in standalone mode (without the ReachyMini App framework).
 """
 import argparse
 import logging
 import threading
 _LOGGER = logging.getLogger(__name__)
 async def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Reachy Mini for Home Assistant"
+    )
     parser.add_argument(
         "--name",
         default="Reachy Mini",
         action="store_true",
         help="Disable camera server",
     )
+    parser.add_argument(
+        "--no-motion",
+        action="store_true",
+        help="Disable Reachy Mini motion control",
+    )
     parser.add_argument(
         "--debug",
         action="store_true",
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
+    # Initialize Reachy Mini (if available)
+    reachy_mini = None
+    if not args.no_motion:
+        try:
+            from reachy_mini import ReachyMini
+            reachy_mini = ReachyMini()
+            _LOGGER.info("Reachy Mini connected")
+        except ImportError:
+            _LOGGER.warning("reachy-mini not installed, motion control disabled")
+        except Exception as e:
+            _LOGGER.warning("Failed to connect to Reachy Mini: %s", e)
     # Import and create VoiceAssistantService
     from .voice_assistant import VoiceAssistantService
     # Create stop event for graceful shutdown
     stop_event = threading.Event()
     try:
         await service.start()
         _LOGGER.info("=" * 50)
         _LOGGER.info("Name: %s", args.name)
         _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
         _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
+        _LOGGER.info("Motion control: %s", "enabled" if reachy_mini else "disabled")
         _LOGGER.info("=" * 50)
         _LOGGER.info("Add this device in Home Assistant:")
         _LOGGER.info("  Settings -> Devices & Services -> Add Integration -> ESPHome")
     except KeyboardInterrupt:
         _LOGGER.info("Shutting down...")
     finally:
         await service.stop()
         _LOGGER.info("Voice assistant stopped")

{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py RENAMED Viewed

@@ -16,18 +16,17 @@ import threading
 import time
 from dataclasses import dataclass
 from pathlib import Path
 _LOGGER = logging.getLogger(__name__)
 _MODULE_DIR = Path(__file__).parent
-_PACKAGE_DIR = _MODULE_DIR.parent  # reachy_mini_home_assistant/
-_ANIMATIONS_FILE = _PACKAGE_DIR / "animations" / "conversation_animations.json"
 @dataclass
 class AnimationParams:
     """Parameters for a single animation with per-axis frequencies."""
     name: str
     description: str
     # Position amplitudes (meters)
@@ -49,7 +48,6 @@ class AnimationParams:
     # Antenna
     antenna_amplitude_rad: float = 0.0
     antenna_move_name: str = "both"
-    antenna_frequency_hz: float = 0.0  # If not specified, uses main frequency_hz
     # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
     frequency_hz: float = 0.5
     pitch_frequency_hz: float = 0.0
@@ -69,17 +67,14 @@ class AnimationPlayer:
     - Multi-frequency oscillators for natural motion
     - Random phase offsets per animation start for variation
     - Smooth transitions between animations
-    - Interpolation phase: smooth transition from current pose to neutral before oscillation
-      (same as BreathingMove in reference project)
     """
     def __init__(self):
-        self._animations: dict[str, AnimationParams] = {}
         self._amplitude_scale: float = 1.0
         self._transition_duration: float = 0.3
-        self._interpolation_duration: float = 1.0  # Time to interpolate to neutral (same as BreathingMove)
-        self._current_animation: str | None = None
-        self._target_animation: str | None = None
         self._transition_start: float = 0.0
         self._phase_start: float = 0.0
         self._lock = threading.Lock()
@@ -90,29 +85,6 @@ class AnimationPlayer:
         self._phase_x: float = 0.0
         self._phase_y: float = 0.0
         self._phase_z: float = 0.0
-        # Interpolation state (for smooth transition to neutral before oscillation)
-        self._in_interpolation: bool = False
-        self._interpolation_start_time: float = 0.0
-        self._interpolation_start_offsets: dict[str, float] = {
-            "pitch": 0.0,
-            "yaw": 0.0,
-            "roll": 0.0,
-            "x": 0.0,
-            "y": 0.0,
-            "z": 0.0,
-            "antenna_left": 0.0,
-            "antenna_right": 0.0,
-        }
-        self._last_offsets: dict[str, float] = {
-            "pitch": 0.0,
-            "yaw": 0.0,
-            "roll": 0.0,
-            "x": 0.0,
-            "y": 0.0,
-            "z": 0.0,
-            "antenna_left": 0.0,
-            "antenna_right": 0.0,
-        }
         self._load_config()
     def _load_config(self) -> None:
@@ -121,7 +93,7 @@ class AnimationPlayer:
             _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
             return
         try:
-            with open(_ANIMATIONS_FILE, encoding="utf-8") as f:
                 data = json.load(f)
             settings = data.get("settings", {})
@@ -148,7 +120,6 @@ class AnimationPlayer:
                     yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
                     antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
                     antenna_move_name=params.get("antenna_move_name", "both"),
-                    antenna_frequency_hz=params.get("antenna_frequency_hz", 0.0),
                     frequency_hz=params.get("frequency_hz", 0.5),
                     pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
                     yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
@@ -173,29 +144,18 @@ class AnimationPlayer:
         self._phase_z = random.random() * 2 * math.pi
     def set_animation(self, name: str) -> bool:
-        """Set the current animation with smooth transition.
-        Like BreathingMove in reference project, this starts an interpolation
-        phase that smoothly transitions from the current pose to neutral before
-        starting the oscillation animation.
-        """
         with self._lock:
             if name not in self._animations and name is not None:
                 _LOGGER.warning("Unknown animation: %s", name)
                 return False
-            if name == self._current_animation and not self._in_interpolation:
                 return True
-            # Capture current offsets for interpolation start
-            self._interpolation_start_offsets = self._last_offsets.copy()
-            self._interpolation_start_time = time.perf_counter()
-            self._in_interpolation = True
             self._target_animation = name
             self._transition_start = time.perf_counter()
             # Randomize phases for new animation
             self._randomize_phases()
-            _LOGGER.debug("Transitioning to animation: %s (interpolation phase)", name)
             return True
     def stop(self) -> None:
@@ -204,13 +164,10 @@ class AnimationPlayer:
             self._current_animation = None
             self._target_animation = None
-    def get_offsets(self, dt: float = 0.0) -> dict[str, float]:
         """Calculate current animation offsets.
-        Uses two-phase animation like BreathingMove in reference project:
-        1. Interpolation phase: smoothly transition from current pose to neutral
-        2. Oscillation phase: continuous sinusoidal breathing motion
         Each axis can have its own frequency for more organic movement.
         Args:
@@ -222,7 +179,7 @@ class AnimationPlayer:
         with self._lock:
             now = time.perf_counter()
-            # Handle transition to new animation
             if self._target_animation != self._current_animation:
                 elapsed = now - self._transition_start
                 if elapsed >= self._transition_duration:
@@ -231,59 +188,20 @@ class AnimationPlayer:
             # No animation
             if self._current_animation is None:
-                result = {
-                    "pitch": 0.0,
-                    "yaw": 0.0,
-                    "roll": 0.0,
-                    "x": 0.0,
-                    "y": 0.0,
-                    "z": 0.0,
-                    "antenna_left": 0.0,
-                    "antenna_right": 0.0,
                 }
-                self._last_offsets = result.copy()
-                return result
             params = self._animations.get(self._current_animation)
             if params is None:
-                result = {
-                    "pitch": 0.0,
-                    "yaw": 0.0,
-                    "roll": 0.0,
-                    "x": 0.0,
-                    "y": 0.0,
-                    "z": 0.0,
-                    "antenna_left": 0.0,
-                    "antenna_right": 0.0,
                 }
-                self._last_offsets = result.copy()
-                return result
-            # Check if in interpolation phase
-            if self._in_interpolation:
-                interp_elapsed = now - self._interpolation_start_time
-                if interp_elapsed < self._interpolation_duration:
-                    # Phase 1: Linear interpolation from current pose to neutral (offset=0)
-                    # Use smooth ease-in-out for natural motion
-                    t = interp_elapsed / self._interpolation_duration
-                    # Smooth step: t * t * (3 - 2 * t)
-                    smooth_t = t * t * (3 - 2 * t)
-                    result = {}
-                    for key in self._interpolation_start_offsets:
-                        start_val = self._interpolation_start_offsets[key]
-                        # Interpolate toward 0 (neutral)
-                        result[key] = start_val * (1.0 - smooth_t)
-                    self._last_offsets = result.copy()
-                    return result
-                else:
-                    # Interpolation complete, start oscillation phase
-                    self._in_interpolation = False
-                    self._phase_start = now
-                    _LOGGER.debug("Interpolation complete, starting oscillation phase")
-            # Phase 2: Oscillation animation
             elapsed = now - self._phase_start
             base_freq = params.frequency_hz
@@ -301,27 +219,32 @@ class AnimationPlayer:
             z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
             # Calculate oscillations with per-axis frequencies and random phases
-            pitch = params.pitch_offset_rad + params.pitch_amplitude_rad * math.sin(
-                2 * math.pi * pitch_freq * elapsed + self._phase_pitch
-            )
-            yaw = params.yaw_offset_rad + params.yaw_amplitude_rad * math.sin(
-                2 * math.pi * yaw_freq * elapsed + self._phase_yaw
-            )
-            roll = params.roll_offset_rad + params.roll_amplitude_rad * math.sin(
-                2 * math.pi * roll_freq * elapsed + self._phase_roll
-            )
-            x = params.x_offset_m + params.x_amplitude_m * math.sin(2 * math.pi * x_freq * elapsed + self._phase_x)
-            y = params.y_offset_m + params.y_amplitude_m * math.sin(2 * math.pi * y_freq * elapsed + self._phase_y)
-            z = params.z_offset_m + params.z_amplitude_m * math.sin(2 * math.pi * z_freq * elapsed + self._phase_z)
-            # Antenna movement with its own frequency
-            antenna_freq = params.antenna_frequency_hz if params.antenna_frequency_hz > 0 else base_freq
-            antenna_phase = 2 * math.pi * antenna_freq * elapsed
             if params.antenna_move_name == "both":
                 left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
             elif params.antenna_move_name == "wiggle":
@@ -333,7 +256,7 @@ class AnimationPlayer:
             # Apply scale and blend
             scale = self._amplitude_scale * blend
-            result = {
                 "pitch": pitch * scale,
                 "yaw": yaw * scale,
                 "roll": roll * scale,
@@ -343,11 +266,9 @@ class AnimationPlayer:
                 "antenna_left": left * scale,
                 "antenna_right": right * scale,
             }
-            self._last_offsets = result.copy()
-            return result
     @property
-    def current_animation(self) -> str | None:
         """Get the current animation name."""
         with self._lock:
             return self._current_animation

 import time
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Dict, Optional
 _LOGGER = logging.getLogger(__name__)
 _MODULE_DIR = Path(__file__).parent
+_ANIMATIONS_FILE = _MODULE_DIR / "animations" / "conversation_animations.json"
 @dataclass
 class AnimationParams:
     """Parameters for a single animation with per-axis frequencies."""
     name: str
     description: str
     # Position amplitudes (meters)
     # Antenna
     antenna_amplitude_rad: float = 0.0
     antenna_move_name: str = "both"
     # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
     frequency_hz: float = 0.5
     pitch_frequency_hz: float = 0.0
     - Multi-frequency oscillators for natural motion
     - Random phase offsets per animation start for variation
     - Smooth transitions between animations
     """
     def __init__(self):
+        self._animations: Dict[str, AnimationParams] = {}
         self._amplitude_scale: float = 1.0
         self._transition_duration: float = 0.3
+        self._current_animation: Optional[str] = None
+        self._target_animation: Optional[str] = None
         self._transition_start: float = 0.0
         self._phase_start: float = 0.0
         self._lock = threading.Lock()
         self._phase_x: float = 0.0
         self._phase_y: float = 0.0
         self._phase_z: float = 0.0
         self._load_config()
     def _load_config(self) -> None:
             _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
             return
         try:
+            with open(_ANIMATIONS_FILE, "r", encoding="utf-8") as f:
                 data = json.load(f)
             settings = data.get("settings", {})
                     yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
                     antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
                     antenna_move_name=params.get("antenna_move_name", "both"),
                     frequency_hz=params.get("frequency_hz", 0.5),
                     pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
                     yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
         self._phase_z = random.random() * 2 * math.pi
     def set_animation(self, name: str) -> bool:
+        """Set the current animation with smooth transition."""
         with self._lock:
             if name not in self._animations and name is not None:
                 _LOGGER.warning("Unknown animation: %s", name)
                 return False
+            if name == self._current_animation:
                 return True
             self._target_animation = name
             self._transition_start = time.perf_counter()
             # Randomize phases for new animation
             self._randomize_phases()
+            _LOGGER.debug("Transitioning to animation: %s", name)
             return True
     def stop(self) -> None:
             self._current_animation = None
             self._target_animation = None
+    def get_offsets(self, dt: float = 0.0) -> Dict[str, float]:
         """Calculate current animation offsets.
+        Uses multi-frequency oscillators for natural motion.
         Each axis can have its own frequency for more organic movement.
         Args:
         with self._lock:
             now = time.perf_counter()
+            # Handle transition
             if self._target_animation != self._current_animation:
                 elapsed = now - self._transition_start
                 if elapsed >= self._transition_duration:
             # No animation
             if self._current_animation is None:
+                return {
+                    "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
+                    "x": 0.0, "y": 0.0, "z": 0.0,
+                    "antenna_left": 0.0, "antenna_right": 0.0,
                 }
             params = self._animations.get(self._current_animation)
             if params is None:
+                return {
+                    "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
+                    "x": 0.0, "y": 0.0, "z": 0.0,
+                    "antenna_left": 0.0, "antenna_right": 0.0,
                 }
             elapsed = now - self._phase_start
             base_freq = params.frequency_hz
             z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
             # Calculate oscillations with per-axis frequencies and random phases
+            pitch = (params.pitch_offset_rad +
+                     params.pitch_amplitude_rad *
+                     math.sin(2 * math.pi * pitch_freq * elapsed + self._phase_pitch))
+            yaw = (params.yaw_offset_rad +
+                   params.yaw_amplitude_rad *
+                   math.sin(2 * math.pi * yaw_freq * elapsed + self._phase_yaw))
+            roll = (params.roll_offset_rad +
+                    params.roll_amplitude_rad *
+                    math.sin(2 * math.pi * roll_freq * elapsed + self._phase_roll))
+            x = (params.x_offset_m +
+                 params.x_amplitude_m *
+                 math.sin(2 * math.pi * x_freq * elapsed + self._phase_x))
+            y = (params.y_offset_m +
+                 params.y_amplitude_m *
+                 math.sin(2 * math.pi * y_freq * elapsed + self._phase_y))
+            z = (params.z_offset_m +
+                 params.z_amplitude_m *
+                 math.sin(2 * math.pi * z_freq * elapsed + self._phase_z))
+            # Antenna movement
+            antenna_phase = 2 * math.pi * base_freq * elapsed
             if params.antenna_move_name == "both":
                 left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
             elif params.antenna_move_name == "wiggle":
             # Apply scale and blend
             scale = self._amplitude_scale * blend
+            return {
                 "pitch": pitch * scale,
                 "yaw": yaw * scale,
                 "roll": roll * scale,
                 "antenna_left": left * scale,
                 "antenna_right": right * scale,
             }
     @property
+    def current_animation(self) -> Optional[str]:
         """Get the current animation name."""
         with self._lock:
             return self._current_animation

reachy_mini_ha_voice/animations/conversation_animations.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "animations": {
+    "idle": {
+      "description": "No movement when idle - robot stays at neutral position",
+      "z_amplitude_m": 0.0,
+      "antenna_amplitude_rad": 0.0,
+      "frequency_hz": 0.0
+    },
+    "listening": {
+      "description": "Attentive pose while listening to user - slight forward lean",
+      "pitch_offset_rad": -0.05,
+      "pitch_amplitude_rad": 0.03,
+      "z_amplitude_m": 0.003,
+      "antenna_amplitude_rad": 0.2,
+      "antenna_move_name": "both",
+      "frequency_hz": 0.6
+    },
+    "thinking": {
+      "description": "Processing/thinking animation - head tilted with gentle sway",
+      "roll_offset_rad": 0.08,
+      "pitch_amplitude_rad": 0.03,
+      "yaw_amplitude_rad": 0.05,
+      "roll_amplitude_rad": 0.04,
+      "z_amplitude_m": 0.003,
+      "antenna_amplitude_rad": 0.25,
+      "antenna_move_name": "wiggle",
+      "frequency_hz": 0.4
+    },
+    "speaking": {
+      "description": "Speaking animation - multi-frequency natural head sway",
+      "pitch_amplitude_rad": 0.08,
+      "pitch_frequency_hz": 2.2,
+      "yaw_amplitude_rad": 0.13,
+      "yaw_frequency_hz": 0.6,
+      "roll_amplitude_rad": 0.04,
+      "roll_frequency_hz": 1.3,
+      "x_amplitude_m": 0.0045,
+      "x_frequency_hz": 0.35,
+      "y_amplitude_m": 0.00375,
+      "y_frequency_hz": 0.45,
+      "z_amplitude_m": 0.00225,
+      "z_frequency_hz": 0.25,
+      "antenna_amplitude_rad": 0.5,
+      "antenna_move_name": "wiggle",
+      "frequency_hz": 1.0
+    },
+    "happy": {
+      "description": "Happy/positive response",
+      "pitch_amplitude_rad": 0.08,
+      "z_amplitude_m": 0.01,
+      "antenna_amplitude_rad": 0.5,
+      "antenna_move_name": "both",
+      "frequency_hz": 1.2
+    },
+    "sad": {
+      "description": "Sad/negative response - head droops",
+      "pitch_offset_rad": 0.1,
+      "pitch_amplitude_rad": 0.04,
+      "z_offset_m": -0.01,
+      "z_amplitude_m": 0.002,
+      "antenna_amplitude_rad": 0.1,
+      "antenna_move_name": "both",
+      "frequency_hz": 0.3
+    },
+    "confused": {
+      "description": "Confused/error state - head tilts",
+      "roll_amplitude_rad": 0.1,
+      "yaw_amplitude_rad": 0.12,
+      "pitch_amplitude_rad": 0.05,
+      "antenna_amplitude_rad": 0.4,
+      "antenna_move_name": "wiggle",
+      "frequency_hz": 0.7
+    },
+    "alert": {
+      "description": "Alert/timer finished - quick movements",
+      "pitch_amplitude_rad": 0.1,
+      "z_amplitude_m": 0.012,
+      "antenna_amplitude_rad": 0.6,
+      "antenna_move_name": "both",
+      "frequency_hz": 1.5
+    }
+  },
+  "settings": {
+    "amplitude_scale": 1.0,
+    "transition_duration_s": 0.3
+  }
+}

{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py RENAMED Viewed

@@ -4,7 +4,7 @@ import asyncio
 import logging
 from abc import abstractmethod
 from collections.abc import Iterable
-from typing import TYPE_CHECKING
 # pylint: disable=no-name-in-module
 from aioesphomeapi._frame_helper.packets import make_plain_text_packets
@@ -31,7 +31,7 @@ class APIServer(asyncio.Protocol):
     def __init__(self, name: str) -> None:
         self.name = name
-        self._buffer: bytes | None = None
         self._buffer_len: int = 0
         self._pos: int = 0
         self._transport = None
@@ -44,10 +44,8 @@ class APIServer(asyncio.Protocol):
     def process_packet(self, msg_type: int, packet_data: bytes) -> None:
         msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
         msg_inst = msg_class.FromString(packet_data)
-        _LOGGER.debug("Received message: %s", msg_class.__name__)
         if isinstance(msg_inst, HelloRequest):
-            _LOGGER.info("HelloRequest received, sending HelloResponse")
             self.send_messages(
                 [
                     HelloResponse(
@@ -60,7 +58,6 @@ class APIServer(asyncio.Protocol):
             return
         if isinstance(msg_inst, AuthenticationRequest):
-            _LOGGER.info("AuthenticationRequest received, sending AuthenticationResponse")
             self.send_messages([AuthenticationResponse()])
         elif isinstance(msg_inst, DisconnectRequest):
             self.send_messages([DisconnectResponse()])
@@ -76,27 +73,20 @@ class APIServer(asyncio.Protocol):
                 msgs = [msgs]
             self.send_messages(msgs)
-    def send_messages(self, msgs: list[message.Message]):
         if self._writelines is None:
             return
-        try:
-            packets = [(PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString()) for msg in msgs]
-            packet_bytes = make_plain_text_packets(packets)
-            self._writelines(packet_bytes)
-        except (IndexError, OSError, BrokenPipeError, ConnectionResetError) as e:
-            _LOGGER.warning("Error sending message (%s): %s - connection may be lost",
-                         msgs[0].__class__.__name__ if msgs else "unknown", e)
-            # Mark transport as invalid to prevent further writes
-            self._writelines = None
-            if self._transport:
-                self._transport.close()
-                self._transport = None
     def connection_made(self, transport) -> None:
         self._transport = transport
         self._writelines = transport.writelines
-        _LOGGER.info("ESPHome client connected from %s", transport.get_extra_info("peername"))
     def data_received(self, data: bytes):
         if self._buffer is None:
@@ -150,13 +140,8 @@ class APIServer(asyncio.Protocol):
         return cstr[original_pos:new_pos]
     def connection_lost(self, exc):
-        _LOGGER.info("ESPHome client disconnected: %s", exc)
         self._transport = None
         self._writelines = None
-        # Clear buffer to prevent memory leak
-        self._buffer = None
-        self._buffer_len = 0
-        self._pos = 0
     def _read_varuint(self) -> int:
         """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""

 import logging
 from abc import abstractmethod
 from collections.abc import Iterable
+from typing import TYPE_CHECKING, List, Optional
 # pylint: disable=no-name-in-module
 from aioesphomeapi._frame_helper.packets import make_plain_text_packets
     def __init__(self, name: str) -> None:
         self.name = name
+        self._buffer: Optional[bytes] = None
         self._buffer_len: int = 0
         self._pos: int = 0
         self._transport = None
     def process_packet(self, msg_type: int, packet_data: bytes) -> None:
         msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
         msg_inst = msg_class.FromString(packet_data)
         if isinstance(msg_inst, HelloRequest):
             self.send_messages(
                 [
                     HelloResponse(
             return
         if isinstance(msg_inst, AuthenticationRequest):
             self.send_messages([AuthenticationResponse()])
         elif isinstance(msg_inst, DisconnectRequest):
             self.send_messages([DisconnectResponse()])
                 msgs = [msgs]
             self.send_messages(msgs)
+    def send_messages(self, msgs: List[message.Message]):
         if self._writelines is None:
             return
+        packets = [
+            (PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString())
+            for msg in msgs
+        ]
+        packet_bytes = make_plain_text_packets(packets)
+        self._writelines(packet_bytes)
     def connection_made(self, transport) -> None:
         self._transport = transport
         self._writelines = transport.writelines
     def data_received(self, data: bytes):
         if self._buffer is None:
         return cstr[original_pos:new_pos]
     def connection_lost(self, exc):
         self._transport = None
         self._writelines = None
     def _read_varuint(self) -> int:
         """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""

{reachy_mini_home_assistant/audio → reachy_mini_ha_voice}/audio_player.py RENAMED Viewed

@@ -8,52 +8,29 @@ Sendspin is automatically enabled by default - no user configuration needed.
 The system uses mDNS to discover Sendspin servers on the local network.
 """
-from __future__ import annotations
 import hashlib
 import logging
 import socket
 import threading
 import time
-from typing import TYPE_CHECKING
-import numpy as np
 if TYPE_CHECKING:
-    from collections.abc import Callable
-    from aiosendspin.models.core import StreamStartMessage
-    from ..protocol.zeroconf import SendspinDiscovery
 _LOGGER = logging.getLogger(__name__)
-# Movement latency to sync head motion with audio playback
-# Audio playback has hardware buffer latency, so we delay head motion to match
-# Same as reachy_mini_conversation_app's HeadWobbler.MOVEMENT_LATENCY_S
-MOVEMENT_LATENCY_S = 0.2  # 200ms latency between audio start and head movement
-SWAY_FRAME_DT_S = 0.05
-STREAM_FETCH_CHUNK_SIZE = 2048
-UNTHROTTLED_PREROLL_S = 0.35
 # Check if aiosendspin is available
 try:
-    from aiosendspin.client import PCMFormat, SendspinClient
     from aiosendspin.models.player import ClientHelloPlayerSupport, SupportedAudioFormat
-    from aiosendspin.models.types import AudioCodec, PlayerCommand, Roles
     SENDSPIN_AVAILABLE = True
-except Exception as e:
     SENDSPIN_AVAILABLE = False
-    _LOGGER.warning("Sendspin unavailable, disabling integration: %s", e)
-    # Fallback placeholders to keep runtime annotations safe when Sendspin is unavailable.
-    PCMFormat = None  # type: ignore[assignment]
-    SendspinClient = None  # type: ignore[assignment]
-    ClientHelloPlayerSupport = None  # type: ignore[assignment]
-    SupportedAudioFormat = None  # type: ignore[assignment]
-    AudioCodec = None  # type: ignore[assignment]
-    PlayerCommand = None  # type: ignore[assignment]
-    Roles = None  # type: ignore[assignment]
 def _get_stable_client_id() -> str:
@@ -76,48 +53,46 @@ class AudioPlayer:
     Supports audio playback modes:
     1. Reachy Mini's built-in media system (default)
     2. Sendspin synchronized multi-room playback (as PLAYER - receives audio)
     When connected to Sendspin as a PLAYER, Reachy Mini receives audio streams
     from Home Assistant or other controllers for synchronized playback.
     """
-    def __init__(self, reachy_mini=None, gstreamer_lock=None) -> None:
         """Initialize audio player.
         Args:
             reachy_mini: Reachy Mini SDK instance.
-            gstreamer_lock: Threading lock for GStreamer media access (shared across all media operations).
         """
         self.reachy_mini = reachy_mini
-        self._gstreamer_lock = gstreamer_lock if gstreamer_lock is not None else threading.Lock()
         self.is_playing = False
-        self._playlist: list[str] = []
-        self._done_callback: Callable[[], None] | None = None
         self._done_callback_lock = threading.Lock()
         self._duck_volume: float = 0.5
         self._unduck_volume: float = 1.0
         self._current_volume: float = 1.0
         self._stop_flag = threading.Event()
-        self._playback_thread: threading.Thread | None = None  # Track active playback thread
         # Speech sway callback for audio-driven head motion
-        self._sway_callback: Callable[[dict], None] | None = None
         # Sendspin support (auto-enabled via mDNS discovery)
         # Uses stable client_id so HA recognizes the same device after restart
         self._sendspin_client_id = _get_stable_client_id()
-        self._sendspin_client: SendspinClient | None = None
         self._sendspin_enabled = False
-        self._sendspin_url: str | None = None
-        self._sendspin_discovery: SendspinDiscovery | None = None
-        self._sendspin_unsubscribers: list[Callable] = []
         # Audio buffer for Sendspin playback
-        self._sendspin_audio_format: PCMFormat | None = None
         self._sendspin_playback_started = False
         self._sendspin_paused = False  # Pause Sendspin when voice assistant is active
-    def set_sway_callback(self, callback: Callable[[dict], None] | None) -> None:
         """Set callback for speech-driven sway animation.
         Args:
@@ -143,7 +118,7 @@ class AudioPlayer:
         return self._sendspin_enabled and self._sendspin_client is not None
     @property
-    def sendspin_url(self) -> str | None:
         """Get current Sendspin server URL."""
         return self._sendspin_url
@@ -184,7 +159,7 @@ class AudioPlayer:
             return
         # Import here to avoid circular imports
-        from ..protocol.zeroconf import SendspinDiscovery
         _LOGGER.info("Starting Sendspin server discovery...")
         self._sendspin_discovery = SendspinDiscovery(self._on_sendspin_server_found)
@@ -226,13 +201,25 @@ class AudioPlayer:
             player_support = ClientHelloPlayerSupport(
                 supported_formats=[
                     # Prefer 16kHz (native ReSpeaker sample rate - no resampling needed)
-                    SupportedAudioFormat(codec=AudioCodec.PCM, channels=2, sample_rate=16000, bit_depth=16),
-                    SupportedAudioFormat(codec=AudioCodec.PCM, channels=1, sample_rate=16000, bit_depth=16),
                     # Also support higher sample rates (will be resampled to 16kHz)
-                    SupportedAudioFormat(codec=AudioCodec.PCM, channels=2, sample_rate=48000, bit_depth=16),
-                    SupportedAudioFormat(codec=AudioCodec.PCM, channels=2, sample_rate=44100, bit_depth=16),
-                    SupportedAudioFormat(codec=AudioCodec.PCM, channels=1, sample_rate=48000, bit_depth=16),
-                    SupportedAudioFormat(codec=AudioCodec.PCM, channels=1, sample_rate=44100, bit_depth=16),
                 ],
                 buffer_capacity=32_000_000,
                 supported_commands=[PlayerCommand.VOLUME, PlayerCommand.MUTE],
@@ -258,7 +245,8 @@ class AudioPlayer:
             self._sendspin_url = server_url
             self._sendspin_enabled = True
-            _LOGGER.info("Sendspin connected as PLAYER: %s (client_id=%s)", server_url, self._sendspin_client_id)
             return True
         except Exception as e:
@@ -267,7 +255,7 @@ class AudioPlayer:
             self._sendspin_enabled = False
             return False
-    def _on_sendspin_audio_chunk(self, server_timestamp_us: int, audio_data: bytes, fmt: PCMFormat) -> None:
         """Handle incoming audio chunks from Sendspin server.
         Plays the audio through Reachy Mini's speaker using push_audio_sample().
@@ -275,6 +263,9 @@ class AudioPlayer:
         Note: Audio is dropped when Sendspin is paused (e.g., during voice assistant interaction).
         """
         # Drop audio when paused (voice assistant is active)
         if self._sendspin_paused:
             return
@@ -301,9 +292,6 @@ class AudioPlayer:
             # Convert to float32 for playback (SDK expects float32)
             audio_float = audio_array.astype(np.float32) / max_val
-            # Clamp values to valid range [-1.0, 1.0] to prevent invalid values
-            audio_float = np.clip(audio_float, -1.0, 1.0)
             # Reshape for channels if needed
             if fmt.channels > 1:
                 # Reshape to (samples, channels)
@@ -316,14 +304,14 @@ class AudioPlayer:
             target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
             if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
                 import scipy.signal
                 # Calculate new length
                 new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
                 if new_length > 0:
                     audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
                     # Log resampling only once per stream
-                    if not hasattr(self, "_logged_resample") or not self._logged_resample:
-                        _LOGGER.debug("Resampling Sendspin audio: %d Hz -> %d Hz", fmt.sample_rate, target_sample_rate)
                         self._logged_resample = True
             # Apply volume
@@ -339,46 +327,31 @@ class AudioPlayer:
                     _LOGGER.warning("Failed to start media playback: %s", e)
             # Play through Reachy Mini's media system using push_audio_sample
-            # Use GStreamer lock to prevent concurrent access conflicts
-            acquired = self._gstreamer_lock.acquire(timeout=0.05)
-            if acquired:
-                try:
-                    self.reachy_mini.media.push_audio_sample(audio_float)
-                finally:
-                    self._gstreamer_lock.release()
-            else:
-                _LOGGER.debug("GStreamer lock busy, skipping audio sample")
-                # Flush SDK playback buffer to prevent buffer overflow during lock contention
-                try:
-                    if hasattr(self.reachy_mini.media, "flush"):
-                        self.reachy_mini.media.flush()
-                    elif hasattr(self.reachy_mini.media, "flush_audio"):
-                        self.reachy_mini.media.flush_audio()
-                except Exception:
-                    pass
         except Exception as e:
             _LOGGER.debug("Error playing Sendspin audio: %s", e)
-    def _on_sendspin_stream_start(self, message: StreamStartMessage) -> None:
         """Handle stream start from Sendspin server."""
         _LOGGER.debug("Sendspin stream started")
         # No need to clear buffer - just start fresh
-    def _on_sendspin_stream_end(self, roles: list[Roles] | None) -> None:
         """Handle stream end from Sendspin server."""
         if roles is None or Roles.PLAYER in roles:
             _LOGGER.debug("Sendspin stream ended")
-    def _on_sendspin_stream_clear(self, roles: list[Roles] | None) -> None:
         """Handle stream clear from Sendspin server."""
         if roles is None or Roles.PLAYER in roles:
             _LOGGER.debug("Sendspin stream cleared")
-            try:
-                self.reachy_mini.media.stop_playing()
-                self._sendspin_playback_started = False
-            except Exception:
-                pass
     async def _disconnect_sendspin(self) -> None:
         """Disconnect from current Sendspin server."""
@@ -386,8 +359,8 @@ class AudioPlayer:
         for unsub in self._sendspin_unsubscribers:
             try:
                 unsub()
-            except Exception as e:
-                _LOGGER.debug("Error during Sendspin unsubscribe: %s", e)
         self._sendspin_unsubscribers.clear()
         if self._sendspin_client is not None:
@@ -411,20 +384,14 @@ class AudioPlayer:
         # Disconnect from server
         await self._disconnect_sendspin()
-        # Clear all references to prevent memory leaks
-        self._sendspin_client = None
-        self._sendspin_url = None
-        self._sendspin_audio_format = None
-        self._sendspin_enabled = False
         _LOGGER.info("Sendspin stopped")
     # ========== Core Playback Methods ==========
     def play(
         self,
-        url: str | list[str],
-        done_callback: Callable[[], None] | None = None,
         stop_first: bool = True,
     ) -> None:
         """Play audio from URL(s).
@@ -444,12 +411,6 @@ class AudioPlayer:
         self._done_callback = done_callback
         self._stop_flag.clear()
-        # Limit active playback threads to prevent resource exhaustion
-        if hasattr(self, "_playback_thread") and self._playback_thread and self._playback_thread.is_alive():
-            _LOGGER.warning("Previous playback still active, stopping it")
-            self.stop()
         self._play_next()
     def _play_next(self) -> None:
@@ -463,176 +424,76 @@ class AudioPlayer:
         self.is_playing = True
         # Start playback in a thread
-        self._playback_thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
-        self._playback_thread.start()
     def _play_file(self, file_path: str) -> None:
         """Play an audio file with optional speech-driven sway animation."""
         try:
             # Handle URLs - download first
             if file_path.startswith(("http://", "https://")):
-                import requests
-                source_url = file_path
-                streamed = False
-                cached_audio = bytearray()
-                content_type = ""
-                try:
-                    with requests.get(source_url, stream=True, timeout=(5.0, 30.0)) as response:
-                        response.raise_for_status()
-                        content_type = (response.headers.get("Content-Type") or "").lower()
-                        stream_iter = response.iter_content(chunk_size=STREAM_FETCH_CHUNK_SIZE)
-                        def caching_iter_content(chunk_size: int = STREAM_FETCH_CHUNK_SIZE):
-                            del chunk_size
-                            for chunk in stream_iter:
-                                if chunk:
-                                    cached_audio.extend(chunk)
-                                    yield chunk
-                        adapted_response = self._iterator_response_adapter(caching_iter_content())
-                        # Try true streaming on this single HTTP request.
-                        if self._is_pcm_content_type(content_type):
-                            _LOGGER.info("TTS playback mode: streaming_pcm")
-                            streamed = self._stream_pcm_response(adapted_response, content_type)
-                        else:
-                            _LOGGER.info("TTS playback mode: streaming_decoded")
-                            streamed = self._stream_decoded_response(adapted_response, source_url, content_type)
-                        if not streamed:
-                            # Keep draining remaining bytes from the SAME request
-                            # so one-time URLs are still playable via fallback.
-                            for chunk in stream_iter:
-                                if chunk:
-                                    cached_audio.extend(chunk)
-                except Exception as e:
-                    _LOGGER.debug("Streaming TTS failed, fallback to memory playback: %s", e)
-                if streamed:
-                    return
-                _LOGGER.info("TTS playback mode: fallback_memory")
-                played = self._play_cached_audio(cached_audio, content_type)
-                if played:
-                    return
-                _LOGGER.error("Failed to play cached TTS audio from memory")
-                return
             if self._stop_flag.is_set():
                 return
             # Play locally using Reachy Mini's media system
-            try:
-                duration: float | None = None
-                sway_frames: list[dict] = []
-                # Fast metadata path first to avoid long pre-read latency.
                 try:
                     import soundfile as sf
-                    info = sf.info(file_path)
-                    if info.samplerate > 0 and info.frames > 0:
-                        duration = float(info.frames) / float(info.samplerate)
-                except Exception:
-                    duration = None
-                # Optional sway pre-analysis (best effort). If decode/read is expensive
-                # or unsupported, keep playback path working without blocking startup.
-                if self._sway_callback is not None:
-                    try:
-                        import soundfile as sf
-                        data, sample_rate = sf.read(file_path)
-                        if duration is None and sample_rate > 0:
-                            duration = len(data) / sample_rate
-                        from ..motion.speech_sway import SpeechSwayRT
-                        sway = SpeechSwayRT()
-                        sway_frames = sway.feed(data, sample_rate)
-                    except Exception:
-                        sway_frames = []
-                # Start playback
-                self.reachy_mini.media.play_sound(file_path)
-                # Playback loop with sway animation
-                # Apply MOVEMENT_LATENCY_S delay to sync head motion with audio
-                # (audio playback has hardware buffer latency)
-                start_time = time.time()
-                frame_duration = 0.05  # 50ms per sway frame (HOP_MS)
-                frame_idx = 0
-                # Playback loop with sway animation and timeout protection
-                # Apply MOVEMENT_LATENCY_S delay to sync head motion with audio
-                # (audio playback has hardware buffer latency)
-                start_time = time.time()
-                frame_duration = 0.05  # 50ms per sway frame (HOP_MS)
-                frame_idx = 0
-                # If duration unknown, poll SDK playback state when available.
-                has_duration = (duration is not None) and (duration > 0)
-                duration_s = duration if has_duration else 0.0
-                max_duration = (duration_s * 1.5) if has_duration else 60.0
-                playback_timeout = start_time + max_duration
-                is_playing_fn = getattr(self.reachy_mini.media, "is_playing", None)
-                while True:
-                    # Check for timeout (safety guard)
-                    if time.time() > playback_timeout:
-                        _LOGGER.warning("Audio playback timeout (%.1fs), stopping", max_duration)
-                        self.reachy_mini.media.stop_playing()
-                        break
-                    if self._stop_flag.is_set():
-                        self.reachy_mini.media.stop_playing()
-                        break
-                    if has_duration:
-                        if (time.time() - start_time) >= duration_s:
                             break
-                    elif callable(is_playing_fn):
-                        try:
-                            if not bool(is_playing_fn()):
-                                break
-                        except Exception:
-                            pass
-                    # Apply sway frame if available, with 200ms delay
-                    if self._sway_callback and frame_idx < len(sway_frames):
-                        elapsed = time.time() - start_time
-                        # Apply latency: head motion starts MOVEMENT_LATENCY_S after audio
-                        effective_elapsed = max(0, elapsed - MOVEMENT_LATENCY_S)
-                        target_frame = int(effective_elapsed / frame_duration)
-                        # Skip frames if falling behind (lag compensation)
-                        while frame_idx <= target_frame and frame_idx < len(sway_frames):
-                            self._sway_callback(sway_frames[frame_idx])
-                            frame_idx += 1
-                    time.sleep(0.02)  # 20ms sleep for responsive sway
-                # Reset sway to zero when done
-                if self._sway_callback:
-                    self._sway_callback(
-                        {
-                            "pitch_rad": 0.0,
-                            "yaw_rad": 0.0,
-                            "roll_rad": 0.0,
-                            "x_m": 0.0,
-                            "y_m": 0.0,
-                            "z_m": 0.0,
-                        }
-                    )
-            except Exception as e:
-                _LOGGER.error("Reachy Mini audio failed: %s", e)
-                raise
         except Exception as e:
             _LOGGER.error("Error playing audio: %s", e)
@@ -643,473 +504,22 @@ class AudioPlayer:
             else:
                 self._on_playback_finished()
-    @staticmethod
-    def _iterator_response_adapter(iterator):
-        class _ResponseAdapter:
-            def __init__(self, iter_obj) -> None:
-                self._iter_obj = iter_obj
-            def iter_content(self, chunk_size: int = 8192):
-                del chunk_size
-                return self._iter_obj
-        return _ResponseAdapter(iterator)
-    def _play_cached_audio(self, audio_bytes: bytes | bytearray, content_type: str) -> bool:
-        if not audio_bytes:
-            return False
-        audio_data = bytes(audio_bytes)
-        mem_iter = (
-            audio_data[i : i + STREAM_FETCH_CHUNK_SIZE] for i in range(0, len(audio_data), STREAM_FETCH_CHUNK_SIZE)
-        )
-        adapted_response = self._iterator_response_adapter(mem_iter)
-        if self._is_pcm_content_type(content_type):
-            return self._stream_pcm_response(adapted_response, content_type)
-        return self._stream_decoded_response(adapted_response, "memory-cache", content_type)
-    @staticmethod
-    def _is_pcm_content_type(content_type: str) -> bool:
-        return ("audio/l16" in content_type) or ("audio/pcm" in content_type) or ("audio/raw" in content_type)
-    @staticmethod
-    def _parse_pcm_format(content_type: str) -> tuple[int, int]:
-        channels = 1
-        sample_rate = 16000
-        if ";" in content_type:
-            for part in content_type.split(";"):
-                token = part.strip()
-                if token.startswith("channels="):
-                    try:
-                        channels = max(1, int(token.split("=", 1)[1]))
-                    except Exception:
-                        pass
-                elif token.startswith("rate="):
-                    try:
-                        sample_rate = max(8000, int(token.split("=", 1)[1]))
-                    except Exception:
-                        pass
-        return channels, sample_rate
-    @staticmethod
-    def _guess_gst_input_caps(content_type: str) -> str | None:
-        ct = (content_type or "").split(";", 1)[0].strip().lower()
-        mapping = {
-            "audio/mpeg": "audio/mpeg,mpegversion=(int)1",
-            "audio/mp3": "audio/mpeg,mpegversion=(int)1",
-            "audio/aac": "audio/mpeg,mpegversion=(int)4,stream-format=(string)raw",
-            "audio/mp4": "audio/mpeg,mpegversion=(int)4,stream-format=(string)raw",
-            "audio/ogg": "application/ogg",
-            "application/ogg": "application/ogg",
-            "audio/opus": "audio/x-opus",
-            "audio/webm": "video/webm",
-            "audio/wav": "audio/x-wav",
-            "audio/wave": "audio/x-wav",
-            "audio/x-wav": "audio/x-wav",
-            "audio/flac": "audio/x-flac",
-            "audio/x-flac": "audio/x-flac",
-        }
-        return mapping.get(ct)
-    def _ensure_media_playback_started(self) -> bool:
-        acquired = self._gstreamer_lock.acquire(timeout=0.3)
-        if not acquired:
-            return False
-        try:
-            self.reachy_mini.media.start_playing()
-            return True
-        except Exception:
-            return False
-        finally:
-            self._gstreamer_lock.release()
-    def _push_audio_float(self, audio_float: np.ndarray, max_wait_s: float = 1.0) -> bool:
-        deadline = time.monotonic() + max(0.05, max_wait_s)
-        while time.monotonic() < deadline:
-            if self._stop_flag.is_set():
-                return False
-            acquired = self._gstreamer_lock.acquire(timeout=0.1)
-            if not acquired:
-                continue
-            try:
-                self.reachy_mini.media.push_audio_sample(audio_float)
-                return True
-            finally:
-                self._gstreamer_lock.release()
-        return False
-    def _stream_pcm_response(self, response, content_type: str) -> bool:
-        channels, sample_rate = self._parse_pcm_format(content_type)
-        target_sr = self.reachy_mini.media.get_output_audio_samplerate()
-        if target_sr <= 0:
-            target_sr = 16000
-        if not self._ensure_media_playback_started():
-            return False
-        remainder = b""
-        pushed_any = False
-        played_frames = 0
-        stream_start = time.monotonic()
-        sway_ctx = self._init_stream_sway_context()
-        bytes_per_frame = 2 * channels
-        for chunk in response.iter_content(chunk_size=STREAM_FETCH_CHUNK_SIZE):
-            if self._stop_flag.is_set():
-                break
-            if not chunk:
-                continue
-            data = remainder + chunk
-            usable_len = (len(data) // bytes_per_frame) * bytes_per_frame
-            remainder = data[usable_len:]
-            if usable_len == 0:
-                continue
-            pcm = np.frombuffer(data[:usable_len], dtype=np.int16).astype(np.float32) / 32768.0
-            pcm = np.clip(pcm * self._current_volume, -1.0, 1.0).reshape(-1, channels)
-            if sample_rate != target_sr and target_sr > 0:
-                import scipy.signal
-                new_len = int(len(pcm) * target_sr / sample_rate)
-                if new_len > 0:
-                    pcm = scipy.signal.resample(pcm, new_len, axis=0).astype(np.float32, copy=False)
-            target_elapsed = played_frames / float(target_sr)
-            actual_elapsed = time.monotonic() - stream_start
-            if target_elapsed > UNTHROTTLED_PREROLL_S and target_elapsed > actual_elapsed:
-                time.sleep(min(0.05, target_elapsed - actual_elapsed))
-            if not self._push_audio_float(pcm):
-                continue
-            pushed_any = True
-            played_frames += int(pcm.shape[0])
-            self._feed_stream_sway(sway_ctx, pcm, target_sr)
-        self._finalize_stream_sway(sway_ctx)
-        return pushed_any
-    def _stream_decoded_response(self, response, source_url: str, content_type: str) -> bool:
-        try:
-            import gi
-            gi.require_version("Gst", "1.0")
-            from gi.repository import Gst
-        except Exception:
-            return False
-        try:
-            Gst.init(None)
-        except Exception:
-            pass
-        target_sr = self.reachy_mini.media.get_output_audio_samplerate()
-        if target_sr <= 0:
-            target_sr = 16000
-        target_channels = 1
-        if not self._ensure_media_playback_started():
-            return False
-        pipeline = Gst.Pipeline.new("tts_stream_decode")
-        appsrc = Gst.ElementFactory.make("appsrc", "src")
-        decodebin = Gst.ElementFactory.make("decodebin", "decode")
-        audioconvert = Gst.ElementFactory.make("audioconvert", "conv")
-        audioresample = Gst.ElementFactory.make("audioresample", "resample")
-        capsfilter = Gst.ElementFactory.make("capsfilter", "caps")
-        appsink = Gst.ElementFactory.make("appsink", "sink")
-        if not all((pipeline, appsrc, decodebin, audioconvert, audioresample, capsfilter, appsink)):
-            return False
-        target_caps = Gst.Caps.from_string(f"audio/x-raw,format=S16LE,channels={target_channels},rate={target_sr}")
-        capsfilter.set_property("caps", target_caps)
-        appsrc.set_property("is-live", True)
-        appsrc.set_property("format", Gst.Format.BYTES)
-        appsrc.set_property("block", False)
-        appsrc.set_property("do-timestamp", True)
-        src_caps = self._guess_gst_input_caps(content_type)
-        if src_caps:
-            try:
-                appsrc.set_property("caps", Gst.Caps.from_string(src_caps))
-            except Exception:
-                pass
-        try:
-            decodebin.set_property("caps", Gst.Caps.from_string("audio/x-raw"))
-        except Exception:
-            pass
-        appsink.set_property("emit-signals", False)
-        appsink.set_property("sync", False)
-        # Keep all decoded audio for TTS completion. Dropping buffers can cause
-        # a short "blip" then silence on fast decoders.
-        appsink.set_property("max-buffers", 0)
-        appsink.set_property("drop", False)
-        pipeline.add(appsrc)
-        pipeline.add(decodebin)
-        pipeline.add(audioconvert)
-        pipeline.add(audioresample)
-        pipeline.add(capsfilter)
-        pipeline.add(appsink)
-        if not appsrc.link(decodebin):
-            return False
-        if not audioconvert.link(audioresample):
-            return False
-        if not audioresample.link(capsfilter):
-            return False
-        if not capsfilter.link(appsink):
-            return False
-        audio_state = {"linked": False}
-        def on_pad_added(_decodebin, pad) -> None:
-            sink_pad = audioconvert.get_static_pad("sink")
-            if sink_pad is None or sink_pad.is_linked():
-                return
-            caps_obj = pad.get_current_caps() or pad.query_caps(None)
-            if caps_obj is None:
-                return
-            if caps_obj.to_string().startswith("audio/"):
-                try:
-                    result = pad.link(sink_pad)
-                    if result == Gst.PadLinkReturn.OK:
-                        audio_state["linked"] = True
-                except Exception:
-                    pass
-        decodebin.connect("pad-added", on_pad_added)
-        pushed_any = False
-        played_frames = 0
-        stream_start = time.monotonic()
-        sway_ctx = self._init_stream_sway_context()
-        bytes_per_frame = 2 * target_channels
-        feed_done = threading.Event()
-        decode_error = False
-        def writer() -> None:
-            try:
-                for chunk in response.iter_content(chunk_size=STREAM_FETCH_CHUNK_SIZE):
-                    if self._stop_flag.is_set():
-                        break
-                    if not chunk:
-                        continue
-                    gst_buffer = Gst.Buffer.new_allocate(None, len(chunk), None)
-                    if gst_buffer is None:
-                        continue
-                    gst_buffer.fill(0, chunk)
-                    ret = appsrc.emit("push-buffer", gst_buffer)
-                    if ret not in (Gst.FlowReturn.OK, Gst.FlowReturn.FLUSHING):
-                        _LOGGER.debug("appsrc push-buffer returned %s", ret)
-                        break
-            except Exception:
-                pass
-            finally:
-                feed_done.set()
-                try:
-                    appsrc.emit("end-of-stream")
-                except Exception:
-                    pass
-        try:
-            state_ret = pipeline.set_state(Gst.State.PLAYING)
-            if state_ret == Gst.StateChangeReturn.FAILURE:
-                _LOGGER.debug("Failed to set GStreamer decode pipeline PLAYING for URL=%s", source_url)
-                return False
-            writer_thread = threading.Thread(target=writer, daemon=True)
-            writer_thread.start()
-            remainder = b""
-            timeout_ns = 20_000_000  # 20ms
-            bus = pipeline.get_bus()
-            eos_seen = False
-            eos_drain_empty_polls = 0
-            while True:
-                sample = appsink.emit("try-pull-sample", timeout_ns)
-                if sample is not None:
-                    eos_drain_empty_polls = 0
-                    try:
-                        gst_buffer = sample.get_buffer()
-                        if gst_buffer is None:
-                            continue
-                        ok, map_info = gst_buffer.map(Gst.MapFlags.READ)
-                        if not ok:
-                            continue
-                        try:
-                            raw = bytes(map_info.data)
-                        finally:
-                            gst_buffer.unmap(map_info)
-                        data = remainder + raw
-                        usable_len = (len(data) // bytes_per_frame) * bytes_per_frame
-                        remainder = data[usable_len:]
-                        if usable_len == 0:
-                            continue
-                        pcm = np.frombuffer(data[:usable_len], dtype=np.int16).astype(np.float32) / 32768.0
-                        pcm = np.clip(pcm * self._current_volume, -1.0, 1.0).reshape(-1, target_channels)
-                        target_elapsed = played_frames / float(target_sr)
-                        actual_elapsed = time.monotonic() - stream_start
-                        if target_elapsed > UNTHROTTLED_PREROLL_S and target_elapsed > actual_elapsed:
-                            time.sleep(min(0.05, target_elapsed - actual_elapsed))
-                        if not self._push_audio_float(pcm):
-                            continue
-                        pushed_any = True
-                        played_frames += int(pcm.shape[0])
-                        self._feed_stream_sway(sway_ctx, pcm, target_sr)
-                    finally:
-                        sample = None
-                elif eos_seen and feed_done.is_set():
-                    eos_drain_empty_polls += 1
-                msg = bus.timed_pop_filtered(
-                    0,
-                    Gst.MessageType.ERROR | Gst.MessageType.EOS,
-                )
-                if msg is not None:
-                    if msg.type == Gst.MessageType.EOS:
-                        eos_seen = True
-                    elif msg.type == Gst.MessageType.ERROR:
-                        err, debug = msg.parse_error()
-                        err_text = str(err).lower()
-                        debug_text = str(debug).lower() if debug is not None else ""
-                        # Some demuxers report non-audio not-linked warnings as ERROR.
-                        # If audio pad is already linked, keep streaming instead of aborting.
-                        if audio_state["linked"] and (
-                            "not-linked" in err_text
-                            or "not-linked" in debug_text
-                            or "streaming stopped, reason not-linked" in debug_text
-                        ):
-                            continue
-                        decode_error = True
-                        _LOGGER.debug(
-                            "GStreamer decode error content-type=%s url=%s err=%s debug=%s",
-                            content_type or "unknown",
-                            source_url,
-                            err,
-                            debug,
-                        )
-                        break
-                if feed_done.is_set() and eos_seen:
-                    sink_eos = False
-                    try:
-                        sink_eos_fn = getattr(appsink, "is_eos", None)
-                        if callable(sink_eos_fn):
-                            sink_eos = bool(sink_eos_fn())
-                    except Exception:
-                        sink_eos = False
-                    # End playback only after upstream finished feeding and
-                    # appsink has drained decoded buffers.
-                    if sink_eos and eos_drain_empty_polls >= 2:
-                        break
-                    # Fallback guard in case is_eos is unavailable.
-                    if eos_drain_empty_polls >= 100:
-                        break
-                if self._stop_flag.is_set():
-                    break
-            writer_thread.join(timeout=1.0)
-            # Streaming is successful only if it reached a clean EOS without decode error.
-            # If decode failed (typically unsupported stream format), force fallback.
-            if self._stop_flag.is_set():
-                return True
-            completed_cleanly = feed_done.is_set() and eos_seen and (not decode_error)
-            if not completed_cleanly:
-                return False
-        except Exception as e:
-            _LOGGER.debug("Error during GStreamer stream decode: %s", e)
-            pushed_any = False
-        finally:
-            self._finalize_stream_sway(sway_ctx)
-            try:
-                pipeline.set_state(Gst.State.NULL)
-            except Exception:
-                pass
-        return pushed_any
-    def _init_stream_sway_context(self) -> dict | None:
-        if self._sway_callback is None:
-            return None
-        try:
-            from ..motion.speech_sway import SpeechSwayRT
-            return {
-                "sway": SpeechSwayRT(),
-                "base_ts": time.monotonic(),
-                "frames_done": 0,
-            }
-        except Exception:
-            return None
-    def _feed_stream_sway(self, ctx: dict | None, pcm: np.ndarray, sample_rate: int) -> None:
-        if ctx is None or self._sway_callback is None:
-            return
-        try:
-            sway = ctx["sway"]
-            results = sway.feed(pcm, sample_rate)
-            if not results:
-                return
-            base_ts = float(ctx["base_ts"])
-            for item in results:
-                target = base_ts + MOVEMENT_LATENCY_S + ctx["frames_done"] * SWAY_FRAME_DT_S
-                now = time.monotonic()
-                if target > now:
-                    time.sleep(min(0.02, target - now))
-                self._sway_callback(item)
-                ctx["frames_done"] += 1
-        except Exception:
-            pass
-    def _finalize_stream_sway(self, ctx: dict | None) -> None:
-        if ctx is None or self._sway_callback is None:
-            return
-        try:
-            self._sway_callback(
-                {
-                    "pitch_rad": 0.0,
-                    "yaw_rad": 0.0,
-                    "roll_rad": 0.0,
-                    "x_m": 0.0,
-                    "y_m": 0.0,
-                    "z_m": 0.0,
-                }
-            )
-        except Exception:
-            pass
     def _on_playback_finished(self) -> None:
         """Called when playback is finished."""
         self.is_playing = False
-        todo_callback: Callable[[], None] | None = None
         with self._done_callback_lock:
             if self._done_callback:
@@ -1128,13 +538,14 @@ class AudioPlayer:
         Stops current audio output but preserves playlist for resume.
         """
         self._stop_flag.set()
-        try:
-            self.reachy_mini.media.stop_playing()
-        except Exception:
-            pass
         self.is_playing = False
-    def resume_playback(self) -> None:
         """Resume playback from where it was paused."""
         self._stop_flag.clear()
         if self._playlist:
@@ -1143,40 +554,14 @@ class AudioPlayer:
     def stop(self) -> None:
         """Stop playback and clear playlist."""
         self._stop_flag.set()
-        # Stop Reachy Mini playback
-        try:
-            self.reachy_mini.media.stop_playing()
-        except Exception:
-            pass
-        # Wait for playback thread to finish (with timeout)
-        if self._playback_thread and self._playback_thread.is_alive():
             try:
-                self._playback_thread.join(timeout=2.0)
-                if self._playback_thread.is_alive():
-                    _LOGGER.warning("Playback thread did not stop in time")
             except Exception:
                 pass
-            self._playback_thread = None
         self._playlist.clear()
         self.is_playing = False
-    def __del__(self) -> None:
-        """Cleanup on garbage collection to prevent listener leaks."""
-        try:
-            # Force cleanup of Sendspin listeners to prevent memory leaks
-            for unsub in self._sendspin_unsubscribers:
-                try:
-                    unsub()
-                except Exception:
-                    pass
-            self._sendspin_unsubscribers.clear()
-            self._sendspin_client = None
-        except Exception:
-            pass
     def duck(self) -> None:
         """Reduce volume for announcements."""
         self._current_volume = self._duck_volume
@@ -1191,28 +576,3 @@ class AudioPlayer:
         self._unduck_volume = volume / 100.0
         self._duck_volume = self._unduck_volume / 2
         self._current_volume = self._unduck_volume
-    def suspend(self) -> None:
-        """Suspend the audio player for sleep mode.
-        Stops any current playback and clears the playlist.
-        """
-        _LOGGER.info("Suspending AudioPlayer for sleep...")
-        # Stop any current playback
-        self.stop()
-        # Clear sway callback to release reference
-        self._sway_callback = None
-        _LOGGER.info("AudioPlayer suspended")
-    def resume(self) -> None:
-        """Resume the audio player after sleep."""
-        _LOGGER.info("Resuming AudioPlayer from sleep...")
-        # Nothing specific to restore - audio player is stateless
-        # Just ensure flags are reset
-        self._stop_flag.clear()
-        _LOGGER.info("AudioPlayer resumed")

 The system uses mDNS to discover Sendspin servers on the local network.
 """
 import hashlib
 import logging
 import socket
 import threading
 import time
+from collections.abc import Callable
+from typing import List, Optional, TYPE_CHECKING, Union
 if TYPE_CHECKING:
+    from .zeroconf import SendspinDiscovery
 _LOGGER = logging.getLogger(__name__)
 # Check if aiosendspin is available
 try:
+    from aiosendspin.client import SendspinClient, PCMFormat
+    from aiosendspin.models.types import Roles, AudioCodec, PlayerCommand
     from aiosendspin.models.player import ClientHelloPlayerSupport, SupportedAudioFormat
+    from aiosendspin.models.core import StreamStartMessage
     SENDSPIN_AVAILABLE = True
+except ImportError:
     SENDSPIN_AVAILABLE = False
+    _LOGGER.debug("aiosendspin not installed, Sendspin support disabled")
 def _get_stable_client_id() -> str:
     Supports audio playback modes:
     1. Reachy Mini's built-in media system (default)
     2. Sendspin synchronized multi-room playback (as PLAYER - receives audio)
+    3. Sounddevice fallback (when Reachy Mini not available)
     When connected to Sendspin as a PLAYER, Reachy Mini receives audio streams
     from Home Assistant or other controllers for synchronized playback.
     """
+    def __init__(self, reachy_mini=None) -> None:
         """Initialize audio player.
         Args:
             reachy_mini: Reachy Mini SDK instance.
         """
         self.reachy_mini = reachy_mini
         self.is_playing = False
+        self._playlist: List[str] = []
+        self._done_callback: Optional[Callable[[], None]] = None
         self._done_callback_lock = threading.Lock()
         self._duck_volume: float = 0.5
         self._unduck_volume: float = 1.0
         self._current_volume: float = 1.0
         self._stop_flag = threading.Event()
         # Speech sway callback for audio-driven head motion
+        self._sway_callback: Optional[Callable[[dict], None]] = None
         # Sendspin support (auto-enabled via mDNS discovery)
         # Uses stable client_id so HA recognizes the same device after restart
         self._sendspin_client_id = _get_stable_client_id()
+        self._sendspin_client: Optional["SendspinClient"] = None
         self._sendspin_enabled = False
+        self._sendspin_url: Optional[str] = None
+        self._sendspin_discovery: Optional["SendspinDiscovery"] = None
+        self._sendspin_unsubscribers: List[Callable] = []
         # Audio buffer for Sendspin playback
+        self._sendspin_audio_format: Optional["PCMFormat"] = None
         self._sendspin_playback_started = False
         self._sendspin_paused = False  # Pause Sendspin when voice assistant is active
+    def set_sway_callback(self, callback: Optional[Callable[[dict], None]]) -> None:
         """Set callback for speech-driven sway animation.
         Args:
         return self._sendspin_enabled and self._sendspin_client is not None
     @property
+    def sendspin_url(self) -> Optional[str]:
         """Get current Sendspin server URL."""
         return self._sendspin_url
             return
         # Import here to avoid circular imports
+        from .zeroconf import SendspinDiscovery
         _LOGGER.info("Starting Sendspin server discovery...")
         self._sendspin_discovery = SendspinDiscovery(self._on_sendspin_server_found)
             player_support = ClientHelloPlayerSupport(
                 supported_formats=[
                     # Prefer 16kHz (native ReSpeaker sample rate - no resampling needed)
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=2, sample_rate=16000, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=1, sample_rate=16000, bit_depth=16
+                    ),
                     # Also support higher sample rates (will be resampled to 16kHz)
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=2, sample_rate=48000, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=2, sample_rate=44100, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=1, sample_rate=48000, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=1, sample_rate=44100, bit_depth=16
+                    ),
                 ],
                 buffer_capacity=32_000_000,
                 supported_commands=[PlayerCommand.VOLUME, PlayerCommand.MUTE],
             self._sendspin_url = server_url
             self._sendspin_enabled = True
+            _LOGGER.info("Sendspin connected as PLAYER: %s (client_id=%s)",
+                         server_url, self._sendspin_client_id)
             return True
         except Exception as e:
             self._sendspin_enabled = False
             return False
+    def _on_sendspin_audio_chunk(self, server_timestamp_us: int, audio_data: bytes, fmt: "PCMFormat") -> None:
         """Handle incoming audio chunks from Sendspin server.
         Plays the audio through Reachy Mini's speaker using push_audio_sample().
         Note: Audio is dropped when Sendspin is paused (e.g., during voice assistant interaction).
         """
+        if self.reachy_mini is None:
+            return
         # Drop audio when paused (voice assistant is active)
         if self._sendspin_paused:
             return
             # Convert to float32 for playback (SDK expects float32)
             audio_float = audio_array.astype(np.float32) / max_val
             # Reshape for channels if needed
             if fmt.channels > 1:
                 # Reshape to (samples, channels)
             target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
             if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
                 import scipy.signal
                 # Calculate new length
                 new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
                 if new_length > 0:
                     audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
                     # Log resampling only once per stream
+                    if not hasattr(self, '_logged_resample') or not self._logged_resample:
+                        _LOGGER.debug("Resampling Sendspin audio: %d Hz -> %d Hz",
+                                      fmt.sample_rate, target_sample_rate)
                         self._logged_resample = True
             # Apply volume
                     _LOGGER.warning("Failed to start media playback: %s", e)
             # Play through Reachy Mini's media system using push_audio_sample
+            self.reachy_mini.media.push_audio_sample(audio_float)
         except Exception as e:
             _LOGGER.debug("Error playing Sendspin audio: %s", e)
+    def _on_sendspin_stream_start(self, message: "StreamStartMessage") -> None:
         """Handle stream start from Sendspin server."""
         _LOGGER.debug("Sendspin stream started")
         # No need to clear buffer - just start fresh
+    def _on_sendspin_stream_end(self, roles: Optional[List[Roles]]) -> None:
         """Handle stream end from Sendspin server."""
         if roles is None or Roles.PLAYER in roles:
             _LOGGER.debug("Sendspin stream ended")
+    def _on_sendspin_stream_clear(self, roles: Optional[List[Roles]]) -> None:
         """Handle stream clear from Sendspin server."""
         if roles is None or Roles.PLAYER in roles:
             _LOGGER.debug("Sendspin stream cleared")
+            if self.reachy_mini is not None:
+                try:
+                    self.reachy_mini.media.stop_playing()
+                    self._sendspin_playback_started = False
+                except Exception:
+                    pass
     async def _disconnect_sendspin(self) -> None:
         """Disconnect from current Sendspin server."""
         for unsub in self._sendspin_unsubscribers:
             try:
                 unsub()
+            except Exception:
+                pass
         self._sendspin_unsubscribers.clear()
         if self._sendspin_client is not None:
         # Disconnect from server
         await self._disconnect_sendspin()
         _LOGGER.info("Sendspin stopped")
     # ========== Core Playback Methods ==========
     def play(
         self,
+        url: Union[str, List[str]],
+        done_callback: Optional[Callable[[], None]] = None,
         stop_first: bool = True,
     ) -> None:
         """Play audio from URL(s).
         self._done_callback = done_callback
         self._stop_flag.clear()
         self._play_next()
     def _play_next(self) -> None:
         self.is_playing = True
         # Start playback in a thread
+        thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
+        thread.start()
     def _play_file(self, file_path: str) -> None:
         """Play an audio file with optional speech-driven sway animation."""
         try:
             # Handle URLs - download first
             if file_path.startswith(("http://", "https://")):
+                import urllib.request
+                import tempfile
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+                    urllib.request.urlretrieve(file_path, tmp.name)
+                    file_path = tmp.name
             if self._stop_flag.is_set():
                 return
             # Play locally using Reachy Mini's media system
+            if self.reachy_mini is not None:
                 try:
+                    # Read audio data for duration calculation and sway analysis
                     import soundfile as sf
+                    data, sample_rate = sf.read(file_path)
+                    duration = len(data) / sample_rate
+                    # Pre-analyze audio for speech sway if callback is set
+                    sway_frames = []
+                    if self._sway_callback is not None:
+                        from .speech_sway import SpeechSwayRT
+                        sway = SpeechSwayRT()
+                        sway_frames = sway.feed(data, sample_rate)
+                        _LOGGER.debug("Generated %d sway frames for %.2fs audio",
+                                      len(sway_frames), duration)
+                    # Start playback
+                    self.reachy_mini.media.play_sound(file_path)
+                    # Playback loop with sway animation
+                    start_time = time.time()
+                    frame_duration = 0.05  # 50ms per sway frame (HOP_MS)
+                    frame_idx = 0
+                    while time.time() - start_time < duration:
+                        if self._stop_flag.is_set():
+                            self.reachy_mini.media.stop_playing()
                             break
+                        # Apply sway frame if available
+                        if self._sway_callback and frame_idx < len(sway_frames):
+                            elapsed = time.time() - start_time
+                            target_frame = int(elapsed / frame_duration)
+                            while frame_idx <= target_frame and frame_idx < len(sway_frames):
+                                self._sway_callback(sway_frames[frame_idx])
+                                frame_idx += 1
+                        time.sleep(0.02)  # 20ms sleep for responsive sway
+                    # Reset sway to zero when done
+                    if self._sway_callback:
+                        self._sway_callback({
+                            "pitch_rad": 0.0, "yaw_rad": 0.0, "roll_rad": 0.0,
+                            "x_m": 0.0, "y_m": 0.0, "z_m": 0.0,
+                        })
+                except Exception as e:
+                    _LOGGER.warning("Reachy Mini audio failed, falling back: %s", e)
+                    self._play_file_fallback(file_path)
+            else:
+                self._play_file_fallback(file_path)
         except Exception as e:
             _LOGGER.error("Error playing audio: %s", e)
             else:
                 self._on_playback_finished()
+    def _play_file_fallback(self, file_path: str) -> None:
+        """Fallback to sounddevice for audio playback."""
+        import sounddevice as sd
+        import soundfile as sf
+        data, samplerate = sf.read(file_path)
+        data = data * self._current_volume
+        if not self._stop_flag.is_set():
+            sd.play(data, samplerate)
+            sd.wait()
     def _on_playback_finished(self) -> None:
         """Called when playback is finished."""
         self.is_playing = False
+        todo_callback: Optional[Callable[[], None]] = None
         with self._done_callback_lock:
             if self._done_callback:
         Stops current audio output but preserves playlist for resume.
         """
         self._stop_flag.set()
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.stop_playing()
+            except Exception:
+                pass
         self.is_playing = False
+    def resume(self) -> None:
         """Resume playback from where it was paused."""
         self._stop_flag.clear()
         if self._playlist:
     def stop(self) -> None:
         """Stop playback and clear playlist."""
         self._stop_flag.set()
+        if self.reachy_mini is not None:
             try:
+                self.reachy_mini.media.stop_playing()
             except Exception:
                 pass
         self._playlist.clear()
         self.is_playing = False
     def duck(self) -> None:
         """Reduce volume for announcements."""
         self._current_volume = self._duck_volume
         self._unduck_volume = volume / 100.0
         self._duck_volume = self._unduck_volume / 2
         self._current_volume = self._unduck_volume

{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/camera_server.py RENAMED Viewed

@@ -1,1042 +1,842 @@
-"""
-MJPEG Camera Server for Reachy Mini with Face Tracking.
-This module provides an HTTP server that streams camera frames from Reachy Mini
-as MJPEG, which can be integrated with Home Assistant via Generic Camera.
-Also provides face tracking for head movement control.
-Reference: reachy_mini_conversation_app/src/reachy_mini_conversation_app/camera_worker.py
-"""
-from __future__ import annotations
-import asyncio
-import logging
-import threading
-import time
-from typing import TYPE_CHECKING
-import cv2
-import numpy as np
-from scipy.spatial.transform import Rotation as R
-from .face_tracking_interpolator import FaceTrackingInterpolator, InterpolationConfig
-# Import adaptive frame rate manager
-from .frame_processor import AdaptiveFrameRateManager, FrameRateConfig
-if TYPE_CHECKING:
-    from reachy_mini import ReachyMini
-_LOGGER = logging.getLogger(__name__)
-# MJPEG boundary string
-MJPEG_BOUNDARY = "frame"
-GESTURE_MIN_FPS = 12.0
-class MJPEGCameraServer:
-    """
-    MJPEG streaming server for Reachy Mini camera with face tracking.
-    Provides HTTP endpoints:
-    - /stream - MJPEG video stream
-    - /snapshot - Single JPEG image
-    - / - Simple status page
-    Also provides face tracking offsets for head movement control.
-    Resource Optimization:
-    - Adaptive frame rate: high (15fps) when face detected or in conversation,
-      low (3fps) when idle and no face for extended period
-    - Face detection pauses after prolonged absence to save CPU
-    """
-    def __init__(
-        self,
-        reachy_mini: ReachyMini,
-        host: str = "0.0.0.0",
-        port: int = 8081,
-        fps: int = 15,  # 15fps for smooth face tracking
-        quality: int = 80,
-        enable_face_tracking: bool = True,
-        enable_gesture_detection: bool = True,
-        face_confidence_threshold: float = 0.5,  # Min confidence for face detection
-        gstreamer_lock: threading.Lock | None = None,
-    ):
-        """
-        Initialize the MJPEG camera server.
-        Args:
-            reachy_mini: Reachy Mini robot instance (can be None for testing)
-            host: Host address to bind to
-            port: Port number for the HTTP server
-            fps: Target frames per second for the stream
-            quality: JPEG quality (1-100)
-            enable_face_tracking: Enable face tracking for head movement
-            face_confidence_threshold: Minimum confidence for face detection (0-1)
-            gstreamer_lock: Threading lock for GStreamer media access (shared across all media operations).
-        """
-        self.reachy_mini = reachy_mini
-        self._gstreamer_lock = gstreamer_lock if gstreamer_lock is not None else threading.Lock()
-        self.host = host
-        self.port = port
-        self.fps = fps
-        self.quality = quality
-        self.enable_face_tracking = enable_face_tracking
-        self._face_confidence_threshold = face_confidence_threshold
-        self._server: asyncio.Server | None = None
-        self._running = False
-        self._frame_interval = 1.0 / fps
-        self._last_frame: bytes | None = None
-        self._last_frame_time: float = 0
-        self._frame_lock = threading.Lock()
-        # Frame capture thread
-        self._capture_thread: threading.Thread | None = None
-        # Face tracking state
-        self._head_tracker = None
-        self._face_tracking_enabled = enable_face_tracking
-        self._face_tracking_requested = enable_face_tracking
-        self._face_tracking_offsets: list[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-        self._face_tracking_lock = threading.Lock()
-        # Gesture detection state
-        self._gesture_detector = None
-        self._gesture_detection_enabled = enable_gesture_detection
-        self._gesture_detection_requested = enable_gesture_detection
-        self._current_gesture = "none"
-        self._gesture_confidence = 0.0
-        self._gesture_lock = threading.Lock()
-        self._gesture_state_callback = None  # Callback to notify entity registry
-        self._gesture_action_callback = None  # Callback for gesture → action mapping
-        # Face detection state callback (similar to gesture)
-        self._face_state_callback = None  # Callback to notify entity registry
-        self._last_face_detected_state = False  # Track previous state for change detection
-        # Face tracking interpolator (handles smooth return to neutral)
-        self._face_interpolator = FaceTrackingInterpolator(
-            config=InterpolationConfig(
-                face_lost_delay=2.0,
-                interpolation_duration=1.0,
-                offset_scale=0.6,
-                pitch_offset_deg=9.0,
-                yaw_offset_deg=-7.0,
-            )
-        )
-        # =====================================================================
-        # Adaptive frame rate manager (replaces inline logic)
-        # =====================================================================
-        self._frame_rate_manager = AdaptiveFrameRateManager(
-            config=FrameRateConfig(
-                fps_high=fps,
-                fps_low=2,
-                fps_idle=0.5,
-                low_power_threshold=5.0,
-                idle_threshold=30.0,
-                gesture_detection_interval=3,
-            )
-        )
-        # Stream client tracking for resource optimization
-        self._active_stream_clients: set = set()
-        self._stream_client_lock = threading.Lock()
-        self._next_client_id = 0
-    async def start(self) -> None:
-        """Start the MJPEG camera server."""
-        if self._running:
-            _LOGGER.warning("Camera server already running")
-            return
-        self._running = True
-        # Detect media backend type for compatibility handling
-        try:
-            from reachy_mini.media.media_manager import MediaBackend
-            backend = self.reachy_mini.media.backend
-            backend_name = {
-                MediaBackend.GSTREAMER: "GStreamer",
-                MediaBackend.DEFAULT: "Default",
-                MediaBackend.DEFAULT_NO_VIDEO: "Default (No Video)",
-            }.get(backend, str(backend))
-            _LOGGER.info("Detected media backend: %s", backend_name)
-        except ImportError:
-            _LOGGER.debug("MediaBackend enum not available")
-        except Exception as e:
-            _LOGGER.debug("Failed to detect media backend: %s", e)
-        # Initialize head tracker if face tracking enabled
-        if self._face_tracking_enabled:
-            try:
-                from .head_tracker import HeadTracker
-                self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
-                _LOGGER.info(
-                    "Face tracking enabled with YOLO head tracker (confidence=%.2f)", self._face_confidence_threshold
-                )
-            except ImportError as e:
-                _LOGGER.error("Failed to import head tracker: %s", e)
-                self._head_tracker = None
-            except Exception as e:
-                _LOGGER.warning("Failed to initialize head tracker: %s", e)
-                self._head_tracker = None
-        else:
-            _LOGGER.info("Face tracking disabled by configuration")
-        # Initialize gesture detector
-        if self._gesture_detection_enabled:
-            try:
-                from .gesture_detector import GestureDetector
-                self._gesture_detector = GestureDetector()
-                if self._gesture_detector.is_available:
-                    _LOGGER.info("Gesture detection enabled (18 HaGRID classes)")
-                else:
-                    _LOGGER.warning("Gesture detection not available")
-                    self._gesture_detector = None
-            except ImportError as e:
-                _LOGGER.warning("Failed to import gesture detector: %s", e)
-                self._gesture_detector = None
-            except Exception as e:
-                _LOGGER.warning("Failed to initialize gesture detector: %s", e)
-                self._gesture_detector = None
-        # Start frame capture thread
-        self._capture_thread = threading.Thread(target=self._capture_frames, daemon=True, name="camera-capture")
-        self._capture_thread.start()
-        # Start HTTP server
-        self._server = await asyncio.start_server(
-            self._handle_client,
-            self.host,
-            self.port,
-        )
-        _LOGGER.info("MJPEG Camera server started on http://%s:%d", self.host, self.port)
-        _LOGGER.info("  Stream URL: http://<ip>:%d/stream", self.port)
-        _LOGGER.info("  Snapshot URL: http://<ip>:%d/snapshot", self.port)
-    async def stop(self, join_timeout: float = 3.0) -> None:
-        """Stop the MJPEG camera server and release all resources.
-        This method ensures complete cleanup of:
-        - Capture thread
-        - HTTP server
-        - ML models (head tracker, gesture detector)
-        - Frame buffers and state
-        - SDK media resources
-        """
-        _LOGGER.info("Stopping MJPEG camera server...")
-        self._running = False
-        # 0. Close SDK media resources to prevent leaks
-        try:
-            self.reachy_mini.media.close()
-            _LOGGER.info("SDK media resources closed")
-        except Exception as e:
-            _LOGGER.debug("Failed to close SDK media: %s", e)
-        # 1. Stop capture thread
-        if self._capture_thread:
-            # Wait up to join_timeout seconds - longer than max sleep time (2s in idle mode)
-            self._capture_thread.join(timeout=join_timeout)
-            if self._capture_thread.is_alive():
-                _LOGGER.warning("Camera capture thread did not stop cleanly")
-            self._capture_thread = None
-        # 2. Stop HTTP server
-        if self._server:
-            self._server.close()
-            await self._server.wait_closed()
-            self._server = None
-        # 3. Release ML models (explicit cleanup)
-        self._release_ml_models()
-        # 4. Clear frame buffer
-        with self._frame_lock:
-            self._last_frame = None
-            self._last_frame_time = 0
-        # 5. Clear tracking state
-        with self._face_tracking_lock:
-            self._face_tracking_offsets = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-        with self._gesture_lock:
-            self._current_gesture = "none"
-            self._gesture_confidence = 0.0
-        # 6. Clear active clients
-        with self._stream_client_lock:
-            self._active_stream_clients.clear()
-        _LOGGER.info("MJPEG Camera server stopped - all resources released")
-    def _release_ml_models(self) -> None:
-        """Release ML models from memory.
-        This is called during stop() and suspend_processing() to free GPU/CPU memory.
-        """
-        # Release YOLO head tracker model
-        if self._head_tracker is not None:
-            try:
-                # Try to call close() if available, otherwise just delete
-                if hasattr(self._head_tracker, "close"):
-                    self._head_tracker.close()
-                del self._head_tracker
-                self._head_tracker = None
-                _LOGGER.debug("Head tracker model released")
-            except Exception as e:
-                _LOGGER.warning("Error releasing head tracker: %s", e)
-        # Release gesture detector model
-        if self._gesture_detector is not None:
-            try:
-                if hasattr(self._gesture_detector, "close"):
-                    self._gesture_detector.close()
-                del self._gesture_detector
-                self._gesture_detector = None
-                _LOGGER.debug("Gesture detector model released")
-            except Exception as e:
-                _LOGGER.warning("Error releasing gesture detector: %s", e)
-    async def __aenter__(self) -> MJPEGCameraServer:
-        """Context manager entry - start the server."""
-        await self.start()
-        return self
-    async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
-        """Context manager exit - stop the server and release resources."""
-        await self.stop()
-        return False  # Don't suppress exceptions
-    def suspend_processing(self) -> None:
-        """Suspend AI processing for sleep mode.
-        This releases ML models from memory while keeping basic MJPEG
-        streaming capability (though it will only serve cached frames).
-        Call resume_processing() to restore full functionality.
-        """
-        _LOGGER.info("Suspending camera processing for sleep mode...")
-        # Suspend frame rate manager
-        self._frame_rate_manager.suspend()
-        # Disable runtime processing while preserving requested user preferences.
-        self._face_tracking_enabled = False
-        self._gesture_detection_enabled = False
-        # Release ML models (use shared method to avoid duplication)
-        self._release_ml_models()
-        # Reset tracking state
-        with self._face_tracking_lock:
-            self._face_tracking_offsets = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-        with self._gesture_lock:
-            self._current_gesture = "none"
-            self._gesture_confidence = 0.0
-        _LOGGER.info("Camera processing suspended - ML models released")
-    def resume_processing(self) -> None:
-        """Resume AI processing after sleep mode.
-        This reloads ML models and restores full camera functionality.
-        Should be called after robot has fully woken up.
-        """
-        _LOGGER.info("Resuming camera processing after sleep...")
-        # Resume frame rate manager
-        self._frame_rate_manager.resume()
-        # Restore runtime states from requested user preferences
-        self._face_tracking_enabled = self._face_tracking_requested
-        self._gesture_detection_enabled = self._gesture_detection_requested
-        # Reload head tracker if face tracking is requested
-        if self._face_tracking_requested and self._head_tracker is None:
-            try:
-                from .head_tracker import HeadTracker
-                self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
-                self._face_tracking_enabled = True
-                _LOGGER.info("Head tracker model reloaded (confidence=%.2f)", self._face_confidence_threshold)
-            except Exception as e:
-                _LOGGER.warning("Failed to reload head tracker: %s", e)
-                self._face_tracking_enabled = False
-        # Reload gesture detector only if gesture detection is requested
-        if self._gesture_detection_requested and self._gesture_detector is None:
-            try:
-                from .gesture_detector import GestureDetector
-                self._gesture_detector = GestureDetector()
-                if self._gesture_detector.is_available:
-                    self._gesture_detection_enabled = True
-                    _LOGGER.info("Gesture detector model reloaded")
-                else:
-                    self._gesture_detector = None
-                    self._gesture_detection_enabled = False
-            except Exception as e:
-                _LOGGER.warning("Failed to reload gesture detector: %s", e)
-                self._gesture_detection_enabled = False
-        elif not self._gesture_detection_requested:
-            self._gesture_detector = None
-            self._gesture_detection_enabled = False
-        _LOGGER.info("Camera processing resumed - full functionality restored")
-    def suspend(self) -> None:
-        """Fully suspend the camera server for sleep mode.
-        This stops the capture thread and releases all resources to free CPU.
-        Call resume_from_suspend() to restart.
-        """
-        if not self._running:
-            _LOGGER.debug("Camera server not running, nothing to suspend")
-            return
-        _LOGGER.info("Suspending camera server for sleep...")
-        # First suspend AI processing
-        self.suspend_processing()
-        # Stop the capture thread to release CPU
-        self._running = False
-        if self._capture_thread is not None:
-            self._capture_thread.join(timeout=3.0)
-            if self._capture_thread.is_alive():
-                _LOGGER.warning("Camera capture thread did not stop cleanly during suspend")
-            self._capture_thread = None
-        _LOGGER.info("Camera server suspended - CPU released")
-    def resume_from_suspend(self) -> None:
-        """Resume the camera server after sleep.
-        This restarts the capture thread and reloads ML models.
-        """
-        if self._running:
-            _LOGGER.debug("Camera server already running")
-            return
-        _LOGGER.info("Resuming camera server from sleep...")
-        self._running = True
-        # Resume AI processing (reloads models)
-        self.resume_processing()
-        # Restart capture thread
-        self._capture_thread = threading.Thread(target=self._capture_frames, daemon=True, name="camera-capture")
-        self._capture_thread.start()
-        _LOGGER.info("Camera server resumed from sleep")
-    def _capture_frames(self) -> None:
-        """Background thread to capture frames from Reachy Mini and do face tracking.
-        Resource optimization:
-        - High frequency (15fps) when face detected or in conversation
-        - Low frequency (2fps) when idle and no face for short period
-        - Ultra-low (0.5fps) when idle for extended period - minimal AI inference
-        """
-        _LOGGER.info("Starting camera capture thread (face_tracking=%s)", self._face_tracking_enabled)
-        frame_count = 0
-        face_detect_count = 0
-        last_log_time = time.time()
-        while self._running:
-            try:
-                current_time = time.time()
-                # Determine if we should run AI inference this frame
-                should_run_ai = self._should_run_ai_inference(current_time)
-                should_run_gesture = (
-                    self._gesture_detection_enabled
-                    and self._gesture_detector is not None
-                    and self._frame_rate_manager.should_run_gesture_detection()
-                )
-                # Only get frame if needed (AI inference, gesture detection, or MJPEG streaming)
-                frame = (
-                    self._get_camera_frame()
-                    if should_run_ai or should_run_gesture or self._has_stream_clients()
-                    else None
-                )
-                if frame is not None:
-                    frame_count += 1
-                    # Encode frame as JPEG for streaming
-                    encode_params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
-                    success, jpeg_data = cv2.imencode(".jpg", frame, encode_params)
-                    if success:
-                        with self._frame_lock:
-                            self._last_frame = jpeg_data.tobytes()
-                            self._last_frame_time = time.time()
-                    # Only run AI inference when enabled
-                    if should_run_ai:
-                        # Face tracking
-                        if self._face_tracking_enabled and self._head_tracker is not None:
-                            face_detect_count += 1
-                            face_detected = self._process_face_tracking(frame, current_time)
-                            # Update adaptive frame rate manager
-                            self._frame_rate_manager.update(face_detected=face_detected)
-                            # Check for face detection state change and notify callback
-                            # Use is_face_detected() which considers face_lost_delay
-                            current_face_state = self.is_face_detected()
-                            if current_face_state != self._last_face_detected_state:
-                                self._last_face_detected_state = current_face_state
-                                if self._face_state_callback:
-                                    try:
-                                        self._face_state_callback()
-                                    except Exception as e:
-                                        _LOGGER.debug("Face state callback error: %s", e)
-                        # Handle smooth interpolation when face lost
-                        self._process_face_lost_interpolation(current_time)
-                    # Gesture detection (runs independently of face detection)
-                    # Uses its own frame rate control via should_run_gesture_detection()
-                    if (
-                        self._gesture_detection_enabled
-                        and self._gesture_detector is not None
-                        and self._frame_rate_manager.should_run_gesture_detection()
-                    ):
-                        self._process_gesture_detection(frame)
-                    # Log stats every 30 seconds
-                    if current_time - last_log_time >= 30.0:
-                        fps = frame_count / (current_time - last_log_time)
-                        detect_fps = face_detect_count / (current_time - last_log_time)
-                        mode = self._frame_rate_manager.current_mode.value.upper()
-                        no_face = self._frame_rate_manager.state.no_face_duration
-                        _LOGGER.debug(
-                            "Camera: %.1f fps, AI: %.1f fps (%s), no_face: %.0fs", fps, detect_fps, mode, no_face
-                        )
-                        frame_count = 0
-                        face_detect_count = 0
-                        last_log_time = current_time
-                # Sleep to maintain target FPS (use adaptive rate)
-                # Keep a minimum processing cadence for gesture responsiveness.
-                sleep_time = self._frame_rate_manager.get_sleep_interval()
-                if self._gesture_detection_enabled and self._gesture_detector is not None:
-                    sleep_time = min(sleep_time, 1.0 / GESTURE_MIN_FPS)
-                time.sleep(sleep_time)
-            except Exception as e:
-                _LOGGER.error("Error capturing frame: %s", e)
-                time.sleep(1.0)
-        _LOGGER.info("Camera capture thread stopped")
-    def _should_run_ai_inference(self, current_time: float) -> bool:
-        """Determine if AI inference (face/gesture detection) should run."""
-        return self._frame_rate_manager.should_run_inference()
-    def _has_stream_clients(self) -> bool:
-        """Check if there are active MJPEG stream clients."""
-        with self._stream_client_lock:
-            return len(self._active_stream_clients) > 0
-    def _register_stream_client(self) -> int:
-        """Register a new stream client and return its ID."""
-        with self._stream_client_lock:
-            # Use rolling client IDs to prevent integer overflow after ~4 billion connections
-            client_id = self._next_client_id % 1000000  # Roll over after 1M
-            self._next_client_id += 1
-            self._active_stream_clients.add(client_id)
-            _LOGGER.debug("Stream client registered: %d (total: %d)", client_id, len(self._active_stream_clients))
-            return client_id
-    def _unregister_stream_client(self, client_id: int) -> None:
-        """Unregister a stream client."""
-        with self._stream_client_lock:
-            self._active_stream_clients.discard(client_id)
-            _LOGGER.debug("Stream client unregistered: %d (total: %d)", client_id, len(self._active_stream_clients))
-    @property
-    def stream_client_count(self) -> int:
-        """Get the number of active stream clients."""
-        with self._stream_client_lock:
-            return len(self._active_stream_clients)
-    def _process_face_tracking(self, frame: np.ndarray, current_time: float) -> bool:
-        """Process face tracking on a frame.
-        Returns:
-            True if face was detected, False otherwise
-        """
-        if self._head_tracker is None:
-            return False
-        try:
-            face_center, _confidence = self._head_tracker.get_head_position(frame)
-            if face_center is not None:
-                # Face detected - notify interpolator
-                self._face_interpolator.on_face_detected(current_time)
-                # Convert normalized coordinates to pixel coordinates
-                h, w = frame.shape[:2]
-                eye_center_norm = (face_center + 1) / 2
-                eye_center_pixels = [
-                    int(eye_center_norm[0] * w),
-                    int(eye_center_norm[1] * h),
-                ]
-                # Get the head pose needed to look at the target
-                target_pose = self.reachy_mini.look_at_image(
-                    eye_center_pixels[0],
-                    eye_center_pixels[1],
-                    duration=0.0,
-                    perform_movement=False,
-                )
-                # Extract translation and rotation from target pose
-                translation = target_pose[:3, 3]
-                rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
-                # Update interpolator with new offsets (handles scaling and compensation)
-                self._face_interpolator.update_offsets(translation, rotation)
-                # Sync to thread-safe storage
-                with self._face_tracking_lock:
-                    self._face_tracking_offsets = list(self._face_interpolator.get_offsets())
-                return True
-            return False
-        except Exception as e:
-            _LOGGER.debug("Face tracking error: %s", e)
-            return False
-    def _process_face_lost_interpolation(self, current_time: float) -> None:
-        """Handle smooth interpolation back to neutral when face is lost."""
-        # Delegate to interpolator
-        self._face_interpolator.process_face_lost(current_time)
-        # Sync interpolated offsets to thread-safe storage
-        with self._face_tracking_lock:
-            self._face_tracking_offsets = list(self._face_interpolator.get_offsets())
-    # =========================================================================
-    # Public API for face tracking
-    # =========================================================================
-    def get_face_tracking_offsets(self) -> tuple[float, float, float, float, float, float]:
-        """Get current face tracking offsets (thread-safe).
-        Returns:
-            Tuple of (x, y, z, roll, pitch, yaw) offsets
-        """
-        with self._face_tracking_lock:
-            offsets = self._face_tracking_offsets
-            return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
-    def is_face_detected(self) -> bool:
-        """Check if a face is currently detected.
-        Returns True if face was detected recently (within face_lost_delay period).
-        This is useful for Home Assistant entities to expose face detection status.
-        Returns:
-            True if face is detected, False otherwise
-        """
-        return self._face_interpolator.is_face_detected()
-    def set_face_tracking_enabled(self, enabled: bool) -> None:
-        """Enable or disable face tracking."""
-        self._face_tracking_requested = enabled
-        if self._face_tracking_enabled == enabled:
-            return  # No change, skip logging
-        self._face_tracking_enabled = enabled
-        if enabled:
-            if self._head_tracker is None:
-                try:
-                    from .head_tracker import HeadTracker
-                    self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
-                except Exception as e:
-                    _LOGGER.warning("Failed to enable face tracking model: %s", e)
-                    self._face_tracking_enabled = False
-        else:
-            # Start interpolation back to neutral
-            self._face_interpolator.reset_interpolation()
-            self._head_tracker = None
-            with self._face_tracking_lock:
-                self._face_tracking_offsets = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-        _LOGGER.info("Face tracking %s", "enabled" if enabled else "disabled")
-    def get_face_tracking_enabled(self) -> bool:
-        """Return whether face tracking is enabled."""
-        return self._face_tracking_enabled
-    def get_face_confidence_threshold(self) -> float:
-        """Return current face confidence threshold (0-1)."""
-        return self._face_confidence_threshold
-    def set_face_confidence_threshold(self, threshold: float) -> None:
-        """Set face detection confidence threshold (0-1)."""
-        threshold = max(0.0, min(1.0, float(threshold)))
-        if abs(self._face_confidence_threshold - threshold) < 1e-6:
-            return
-        self._face_confidence_threshold = threshold
-        # Reload model to apply threshold immediately when enabled.
-        if self._face_tracking_enabled:
-            try:
-                from .head_tracker import HeadTracker
-                self._head_tracker = HeadTracker(confidence_threshold=self._face_confidence_threshold)
-            except Exception as e:
-                _LOGGER.warning("Failed to apply face confidence threshold %.2f: %s", threshold, e)
-                self._head_tracker = None
-                self._face_tracking_enabled = False
-        _LOGGER.info("Face confidence threshold set to %.2f", self._face_confidence_threshold)
-    def set_conversation_mode(self, in_conversation: bool) -> None:
-        """Set conversation mode for adaptive face tracking.
-        When in conversation mode, face tracking runs at high frequency
-        regardless of whether a face is currently detected.
-        Args:
-            in_conversation: True when voice assistant is actively conversing
-        """
-        self._frame_rate_manager.set_conversation_mode(in_conversation)
-        if in_conversation:
-            _LOGGER.debug("Face tracking: conversation mode ON (high frequency)")
-        else:
-            _LOGGER.debug("Face tracking: conversation mode OFF (adaptive)")
-    # =========================================================================
-    # Gesture detection
-    # =========================================================================
-    def _process_gesture_detection(self, frame: np.ndarray) -> None:
-        """Process gesture detection on a frame."""
-        if self._gesture_detector is None:
-            return
-        try:
-            # Detect gesture
-            detected_gesture, confidence = self._gesture_detector.detect(frame)
-            # Update current gesture state
-            state_changed = False
-            with self._gesture_lock:
-                old_gesture = self._current_gesture
-                if detected_gesture.value != "no_gesture":
-                    self._current_gesture = detected_gesture.value
-                    self._gesture_confidence = confidence
-                    if old_gesture != detected_gesture.value:
-                        state_changed = True
-                        _LOGGER.info("Gesture detected: %s (%.1f%%)", detected_gesture.value, confidence * 100)
-                else:
-                    if self._current_gesture != "none":
-                        state_changed = True
-                        _LOGGER.info("Gesture cleared (no gesture detected)")
-                    self._current_gesture = "none"
-                    self._gesture_confidence = 0.0
-            # Notify entity registry to push update to Home Assistant
-            if state_changed and self._gesture_state_callback:
-                try:
-                    self._gesture_state_callback()
-                except Exception:
-                    pass  # Ignore callback errors
-            # Trigger gesture actions (emotions, listening, etc.)
-            if state_changed and self._gesture_action_callback:
-                try:
-                    self._gesture_action_callback(self._current_gesture, self._gesture_confidence)
-                except Exception as e:
-                    _LOGGER.debug("Gesture action callback error: %s", e)
-        except Exception as e:
-            _LOGGER.warning("Gesture detection error: %s", e)
-    def get_current_gesture(self) -> str:
-        """Get current detected gesture name (thread-safe).
-        Returns:
-            Gesture name string (e.g., "like", "peace", "none")
-        """
-        with self._gesture_lock:
-            return self._current_gesture
-    def get_gesture_confidence(self) -> float:
-        """Get current gesture detection confidence (thread-safe).
-        Returns:
-            Confidence value (0.0 to 1.0), multiplied by 100 for percentage display
-        """
-        with self._gesture_lock:
-            return self._gesture_confidence * 100.0  # Return as percentage
-    def set_gesture_detection_enabled(self, enabled: bool) -> None:
-        """Enable or disable gesture detection."""
-        self._gesture_detection_requested = enabled
-        if self._gesture_detection_enabled == enabled:
-            return
-        self._gesture_detection_enabled = enabled
-        if enabled:
-            if self._gesture_detector is None:
-                try:
-                    from .gesture_detector import GestureDetector
-                    self._gesture_detector = GestureDetector()
-                    if not self._gesture_detector.is_available:
-                        self._gesture_detector = None
-                        self._gesture_detection_enabled = False
-                except Exception as e:
-                    _LOGGER.warning("Failed to enable gesture detector model: %s", e)
-                    self._gesture_detection_enabled = False
-                    self._gesture_detector = None
-        else:
-            self._gesture_detector = None
-            with self._gesture_lock:
-                self._current_gesture = "none"
-                self._gesture_confidence = 0.0
-        _LOGGER.info("Gesture detection %s", "enabled" if enabled else "disabled")
-    def get_gesture_detection_enabled(self) -> bool:
-        """Return whether gesture detection is enabled."""
-        return self._gesture_detection_enabled
-    def set_gesture_state_callback(self, callback) -> None:
-        """Set callback to notify when gesture state changes."""
-        self._gesture_state_callback = callback
-    def set_gesture_action_callback(self, callback) -> None:
-        """Set callback for gesture action handling.
-        The callback receives (gesture_name: str, confidence: float).
-        """
-        self._gesture_action_callback = callback
-    def set_face_state_callback(self, callback) -> None:
-        """Set callback to notify when face detection state changes."""
-        self._face_state_callback = callback
-    def _get_camera_frame(self) -> np.ndarray | None:
-        """Get a frame from Reachy Mini's camera."""
-        try:
-            # Use GStreamer lock to prevent concurrent access conflicts
-            acquired = self._gstreamer_lock.acquire(timeout=0.05)
-            if acquired:
-                try:
-                    frame = self.reachy_mini.media.get_frame()
-                    return frame
-                finally:
-                    self._gstreamer_lock.release()
-            else:
-                _LOGGER.debug("GStreamer lock busy, skipping camera frame")
-                # Flush SDK video buffer to prevent buffer overflow during lock contention
-                try:
-                    if hasattr(self.reachy_mini.media, "flush"):
-                        self.reachy_mini.media.flush()
-                    elif hasattr(self.reachy_mini.media, "flush_video"):
-                        self.reachy_mini.media.flush_video()
-                except Exception:
-                    pass
-                return None
-        except Exception as e:
-            _LOGGER.debug("Failed to get camera frame: %s", e)
-            return None
-    def get_snapshot(self) -> bytes | None:
-        """Get the latest frame as JPEG bytes."""
-        with self._frame_lock:
-            return self._last_frame
-    async def _handle_client(
-        self,
-        reader: asyncio.StreamReader,
-        writer: asyncio.StreamWriter,
-    ) -> None:
-        """Handle incoming HTTP client connections."""
-        try:
-            # Read HTTP request
-            request_line = await asyncio.wait_for(reader.readline(), timeout=10.0)
-            request = request_line.decode("utf-8", errors="ignore").strip()
-            # Read headers (we don't need them but must consume them)
-            while True:
-                line = await asyncio.wait_for(reader.readline(), timeout=5.0)
-                if line in {b"\r\n", b"\n", b""}:
-                    break
-            # Parse request path
-            parts = request.split(" ")
-            if len(parts) >= 2:
-                path = parts[1]
-            else:
-                path = "/"
-            _LOGGER.debug("HTTP request: %s", request)
-            if path == "/stream":
-                await self._handle_stream(writer)
-            elif path == "/snapshot":
-                await self._handle_snapshot(writer)
-            else:
-                await self._handle_index(writer)
-        except TimeoutError:
-            _LOGGER.debug("Client connection timeout")
-        except ConnectionResetError:
-            _LOGGER.debug("Client connection reset")
-        except Exception as e:
-            _LOGGER.error("Error handling client: %s", e)
-        finally:
-            try:
-                writer.close()
-                await writer.wait_closed()
-            except Exception:
-                pass
-    async def _handle_index(self, writer: asyncio.StreamWriter) -> None:
-        """Handle index page request."""
-        html = f"""<!DOCTYPE html>
-<html>
-<head>
-    <title>Reachy Mini Camera</title>
-    <style>
-        body {{ font-family: Arial, sans-serif; margin: 40px; background: #1a1a2e; color: #eee; }}
-        h1 {{ color: #00d4ff; }}
-        .container {{ max-width: 800px; margin: 0 auto; }}
-        .stream {{ width: 100%; max-width: 640px; border: 2px solid #00d4ff; border-radius: 8px; }}
-        a {{ color: #00d4ff; }}
-        .info {{ background: #16213e; padding: 20px; border-radius: 8px; margin-top: 20px; }}
-    </style>
-</head>
-<body>
-    <div class="container">
-        <h1>Reachy Mini Camera</h1>
-        <img class="stream" src="/stream" alt="Camera Stream">
-        <div class="info">
-            <h3>Endpoints:</h3>
-            <ul>
-                <li><a href="/stream">/stream</a> - MJPEG video stream</li>
-                <li><a href="/snapshot">/snapshot</a> - Single JPEG snapshot</li>
-            </ul>
-            <h3>Home Assistant Integration:</h3>
-            <p>Add a Generic Camera with URL: <code>http://&lt;ip&gt;:{self.port}/stream</code></p>
-        </div>
-    </div>
-</body>
-</html>"""
-        response = (
-            "HTTP/1.1 200 OK\r\n"
-            "Content-Type: text/html; charset=utf-8\r\n"
-            f"Content-Length: {len(html)}\r\n"
-            "Connection: close\r\n"
-            "\r\n"
-        )
-        writer.write(response.encode("utf-8"))
-        writer.write(html.encode("utf-8"))
-        await writer.drain()
-    async def _handle_snapshot(self, writer: asyncio.StreamWriter) -> None:
-        """Handle snapshot request - return single JPEG image."""
-        jpeg_data = self.get_snapshot()
-        if jpeg_data is None:
-            response = (
-                "HTTP/1.1 503 Service Unavailable\r\n"
-                "Content-Type: text/plain\r\n"
-                "Connection: close\r\n"
-                "\r\n"
-                "No frame available"
-            )
-            writer.write(response.encode("utf-8"))
-        else:
-            response = (
-                "HTTP/1.1 200 OK\r\n"
-                "Content-Type: image/jpeg\r\n"
-                f"Content-Length: {len(jpeg_data)}\r\n"
-                "Cache-Control: no-cache, no-store, must-revalidate\r\n"
-                "Connection: close\r\n"
-                "\r\n"
-            )
-            writer.write(response.encode("utf-8"))
-            writer.write(jpeg_data)
-        await writer.drain()
-    async def _handle_stream(self, writer: asyncio.StreamWriter) -> None:
-        """Handle MJPEG stream request."""
-        # Register this client for tracking
-        client_id = self._register_stream_client()
-        # Send MJPEG headers
-        response = (
-            "HTTP/1.1 200 OK\r\n"
-            f"Content-Type: multipart/x-mixed-replace; boundary={MJPEG_BOUNDARY}\r\n"
-            "Cache-Control: no-cache, no-store, must-revalidate\r\n"
-            "Connection: keep-alive\r\n"
-            "\r\n"
-        )
-        writer.write(response.encode("utf-8"))
-        await writer.drain()
-        _LOGGER.debug("Started MJPEG stream for client %d", client_id)
-        last_sent_time = 0
-        try:
-            while self._running:
-                # Get latest frame
-                with self._frame_lock:
-                    jpeg_data = self._last_frame
-                    frame_time = self._last_frame_time
-                # Only send if we have a new frame
-                if jpeg_data is not None and frame_time > last_sent_time:
-                    # Send MJPEG frame
-                    frame_header = (
-                        f"--{MJPEG_BOUNDARY}\r\nContent-Type: image/jpeg\r\nContent-Length: {len(jpeg_data)}\r\n\r\n"
-                    )
-                    writer.write(frame_header.encode("utf-8"))
-                    writer.write(jpeg_data)
-                    writer.write(b"\r\n")
-                    await writer.drain()
-                    last_sent_time = frame_time
-                # Small delay to prevent busy loop
-                await asyncio.sleep(0.01)
-        except (ConnectionResetError, BrokenPipeError):
-            _LOGGER.debug("Client %d disconnected from stream", client_id)
-        except Exception as e:
-            _LOGGER.error("Error in MJPEG stream for client %d: %s", client_id, e)
-        finally:
-            # Always unregister client when done
-            self._unregister_stream_client(client_id)
-        _LOGGER.debug("Ended MJPEG stream for client %d", client_id)

+"""
+MJPEG Camera Server for Reachy Mini with Face Tracking.
+This module provides an HTTP server that streams camera frames from Reachy Mini
+as MJPEG, which can be integrated with Home Assistant via Generic Camera.
+Also provides face tracking for head movement control.
+Reference: reachy_mini_conversation_app/src/reachy_mini_conversation_app/camera_worker.py
+"""
+import asyncio
+import logging
+import threading
+import time
+from typing import Optional, Tuple, List, TYPE_CHECKING
+import cv2
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+# Import SDK interpolation utilities (same as conversation_app)
+try:
+    from reachy_mini.utils.interpolation import linear_pose_interpolation
+    SDK_INTERPOLATION_AVAILABLE = True
+except ImportError:
+    SDK_INTERPOLATION_AVAILABLE = False
+if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
+_LOGGER = logging.getLogger(__name__)
+# MJPEG boundary string
+MJPEG_BOUNDARY = "frame"
+class MJPEGCameraServer:
+    """
+    MJPEG streaming server for Reachy Mini camera with face tracking.
+    Provides HTTP endpoints:
+    - /stream - MJPEG video stream
+    - /snapshot - Single JPEG image
+    - / - Simple status page
+    Also provides face tracking offsets for head movement control.
+    Resource Optimization:
+    - Adaptive frame rate: high (15fps) when face detected or in conversation,
+      low (3fps) when idle and no face for extended period
+    - Face detection pauses after prolonged absence to save CPU
+    """
+    def __init__(
+        self,
+        reachy_mini: Optional["ReachyMini"] = None,
+        host: str = "0.0.0.0",
+        port: int = 8081,
+        fps: int = 15,  # 15fps for smooth face tracking
+        quality: int = 80,
+        enable_face_tracking: bool = True,
+    ):
+        """
+        Initialize the MJPEG camera server.
+        Args:
+            reachy_mini: Reachy Mini robot instance (can be None for testing)
+            host: Host address to bind to
+            port: Port number for the HTTP server
+            fps: Target frames per second for the stream
+            quality: JPEG quality (1-100)
+            enable_face_tracking: Enable face tracking for head movement
+        """
+        self.reachy_mini = reachy_mini
+        self.host = host
+        self.port = port
+        self.fps = fps
+        self.quality = quality
+        self.enable_face_tracking = enable_face_tracking
+        self._server: Optional[asyncio.Server] = None
+        self._running = False
+        self._frame_interval = 1.0 / fps
+        self._last_frame: Optional[bytes] = None
+        self._last_frame_time: float = 0
+        self._frame_lock = threading.Lock()
+        # Frame capture thread
+        self._capture_thread: Optional[threading.Thread] = None
+        # Face tracking state
+        self._head_tracker = None
+        self._face_tracking_enabled = True  # Enabled by default for always-on face tracking
+        self._face_tracking_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        self._face_tracking_lock = threading.Lock()
+        # Gesture detection state
+        self._gesture_detector = None
+        self._gesture_detection_enabled = True
+        self._current_gesture = "none"
+        self._gesture_confidence = 0.0
+        self._gesture_lock = threading.Lock()
+        self._gesture_frame_counter = 0
+        self._gesture_detection_interval = 3  # Run gesture detection every N frames
+        self._gesture_state_callback = None  # Callback to notify entity registry
+        # Face tracking timing (smooth interpolation when face lost)
+        self._last_face_detected_time: Optional[float] = None
+        self._interpolation_start_time: Optional[float] = None
+        self._interpolation_start_pose: Optional[np.ndarray] = None
+        self._face_lost_delay = 1.0  # Reduced from 2.0s to 1.0s for faster response
+        self._interpolation_duration = 0.8  # Reduced from 1.0s to 0.8s for faster return
+        # Offset scaling (same as conversation_app)
+        self._offset_scale = 0.6
+        # =====================================================================
+        # Resource optimization: Adaptive frame rate for face tracking
+        # =====================================================================
+        # High frequency when: face detected, in conversation, or recently active
+        # Low frequency when: idle and no face for extended period
+        # Ultra-low when: idle for very long time (just MJPEG stream, minimal AI)
+        self._fps_high = fps  # Normal tracking rate (15fps)
+        self._fps_low = 2     # Low power rate (2fps) - periodic face check
+        self._fps_idle = 0.5  # Ultra-low power (0.5fps) - minimal CPU usage
+        self._current_fps = fps
+        # Conversation state (set by voice assistant)
+        self._in_conversation = False
+        self._conversation_lock = threading.Lock()
+        # Adaptive tracking timing
+        self._no_face_duration = 0.0  # How long since last face detection
+        self._low_power_threshold = 5.0   # Switch to low power after 5s without face
+        self._idle_threshold = 30.0       # Switch to idle mode after 30s without face
+        self._last_face_check_time = 0.0
+        # Skip AI inference in idle mode (only stream MJPEG)
+        self._ai_enabled = True
+    async def start(self) -> None:
+        """Start the MJPEG camera server."""
+        if self._running:
+            _LOGGER.warning("Camera server already running")
+            return
+        self._running = True
+        # Initialize head tracker if face tracking enabled
+        if self.enable_face_tracking:
+            try:
+                from .head_tracker import HeadTracker
+                self._head_tracker = HeadTracker()
+                _LOGGER.info("Face tracking enabled with YOLO head tracker")
+            except ImportError as e:
+                _LOGGER.error("Failed to import head tracker: %s", e)
+                self._head_tracker = None
+            except Exception as e:
+                _LOGGER.warning("Failed to initialize head tracker: %s", e)
+                self._head_tracker = None
+        else:
+            _LOGGER.info("Face tracking disabled by configuration")
+        # Initialize gesture detector
+        if self._gesture_detection_enabled:
+            try:
+                from .gesture_detector import GestureDetector
+                self._gesture_detector = GestureDetector()
+                if self._gesture_detector.is_available:
+                    _LOGGER.info("Gesture detection enabled (18 HaGRID classes)")
+                else:
+                    _LOGGER.warning("Gesture detection not available")
+                    self._gesture_detector = None
+            except ImportError as e:
+                _LOGGER.warning("Failed to import gesture detector: %s", e)
+                self._gesture_detector = None
+            except Exception as e:
+                _LOGGER.warning("Failed to initialize gesture detector: %s", e)
+                self._gesture_detector = None
+        # Start frame capture thread
+        self._capture_thread = threading.Thread(
+            target=self._capture_frames,
+            daemon=True,
+            name="camera-capture"
+        )
+        self._capture_thread.start()
+        # Start HTTP server
+        self._server = await asyncio.start_server(
+            self._handle_client,
+            self.host,
+            self.port,
+        )
+        _LOGGER.info("MJPEG Camera server started on http://%s:%d", self.host, self.port)
+        _LOGGER.info("  Stream URL: http://<ip>:%d/stream", self.port)
+        _LOGGER.info("  Snapshot URL: http://<ip>:%d/snapshot", self.port)
+    async def stop(self) -> None:
+        """Stop the MJPEG camera server."""
+        self._running = False
+        if self._capture_thread:
+            self._capture_thread.join(timeout=0.5)
+            self._capture_thread = None
+        if self._server:
+            self._server.close()
+            await self._server.wait_closed()
+            self._server = None
+        _LOGGER.info("MJPEG Camera server stopped")
+    def _capture_frames(self) -> None:
+        """Background thread to capture frames from Reachy Mini and do face tracking.
+        Resource optimization:
+        - High frequency (15fps) when face detected or in conversation
+        - Low frequency (2fps) when idle and no face for short period
+        - Ultra-low (0.5fps) when idle for extended period - minimal AI inference
+        """
+        _LOGGER.info("Starting camera capture thread (face_tracking=%s)", self._face_tracking_enabled)
+        frame_count = 0
+        face_detect_count = 0
+        last_log_time = time.time()
+        while self._running:
+            try:
+                current_time = time.time()
+                # Determine if we should run AI inference this frame
+                should_run_ai = self._should_run_ai_inference(current_time)
+                # Only get frame if needed (AI inference or MJPEG streaming)
+                frame = self._get_camera_frame() if should_run_ai or self._has_stream_clients() else None
+                if frame is not None:
+                    frame_count += 1
+                    # Encode frame as JPEG for streaming
+                    encode_params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
+                    success, jpeg_data = cv2.imencode('.jpg', frame, encode_params)
+                    if success:
+                        with self._frame_lock:
+                            self._last_frame = jpeg_data.tobytes()
+                            self._last_frame_time = time.time()
+                    # Only run AI inference when enabled
+                    if should_run_ai:
+                        # Face tracking
+                        if self._face_tracking_enabled and self._head_tracker is not None:
+                            face_detect_count += 1
+                            face_detected = self._process_face_tracking(frame, current_time)
+                            # Update adaptive timing based on detection result
+                            if face_detected:
+                                self._no_face_duration = 0.0
+                                self._current_fps = self._fps_high
+                                self._ai_enabled = True
+                            else:
+                                # Accumulate no-face duration
+                                if self._last_face_detected_time is not None:
+                                    self._no_face_duration = current_time - self._last_face_detected_time
+                                else:
+                                    self._no_face_duration += 1.0 / self._current_fps
+                                # Adaptive power mode
+                                if self._no_face_duration > self._idle_threshold:
+                                    self._current_fps = self._fps_idle
+                                elif self._no_face_duration > self._low_power_threshold:
+                                    self._current_fps = self._fps_low
+                            self._last_face_check_time = current_time
+                        # Handle smooth interpolation when face lost
+                        self._process_face_lost_interpolation(current_time)
+                        # Gesture detection (only when face detected recently)
+                        if (self._gesture_detection_enabled and
+                            self._gesture_detector is not None and
+                            self._no_face_duration < 5.0):  # Only detect gestures when someone is present
+                            self._gesture_frame_counter += 1
+                            if self._gesture_frame_counter >= self._gesture_detection_interval:
+                                self._gesture_frame_counter = 0
+                                self._process_gesture_detection(frame)
+                    # Log stats every 30 seconds
+                    if current_time - last_log_time >= 30.0:
+                        fps = frame_count / (current_time - last_log_time)
+                        detect_fps = face_detect_count / (current_time - last_log_time)
+                        mode = "HIGH" if self._current_fps == self._fps_high else ("LOW" if self._current_fps == self._fps_low else "IDLE")
+                        _LOGGER.debug("Camera: %.1f fps, AI: %.1f fps (%s), no_face: %.0fs",
+                                     fps, detect_fps, mode, self._no_face_duration)
+                        frame_count = 0
+                        face_detect_count = 0
+                        last_log_time = current_time
+                # Sleep to maintain target FPS (use current adaptive rate)
+                sleep_time = 1.0 / self._current_fps
+                time.sleep(sleep_time)
+            except Exception as e:
+                _LOGGER.error("Error capturing frame: %s", e)
+                time.sleep(1.0)
+        _LOGGER.info("Camera capture thread stopped")
+    def _should_run_ai_inference(self, current_time: float) -> bool:
+        """Determine if AI inference (face/gesture detection) should run.
+        Returns True if:
+        - In conversation mode (always run)
+        - Face was recently detected
+        - Periodic check in low power mode
+        """
+        # Always run during conversation
+        with self._conversation_lock:
+            if self._in_conversation:
+                return True
+        # High frequency mode: run every frame
+        if self._current_fps == self._fps_high:
+            return True
+        # Low/idle power mode: run periodically
+        time_since_last = current_time - self._last_face_check_time
+        return time_since_last >= (1.0 / self._current_fps)
+    def _has_stream_clients(self) -> bool:
+        """Check if there are active MJPEG stream clients."""
+        # For now, always return True to keep stream available
+        # Could be optimized to track actual client connections
+        return True
+    def _process_face_tracking(self, frame: np.ndarray, current_time: float) -> bool:
+        """Process face tracking on a frame.
+        Returns:
+            True if face was detected, False otherwise
+        """
+        if self._head_tracker is None or self.reachy_mini is None:
+            return False
+        try:
+            face_center, confidence = self._head_tracker.get_head_position(frame)
+            if face_center is not None:
+                # Face detected - update tracking
+                self._last_face_detected_time = current_time
+                self._interpolation_start_time = None  # Stop any interpolation
+                # Convert normalized coordinates to pixel coordinates
+                h, w = frame.shape[:2]
+                eye_center_norm = (face_center + 1) / 2
+                eye_center_pixels = [
+                    eye_center_norm[0] * w,
+                    eye_center_norm[1] * h,
+                ]
+                # Get the head pose needed to look at the target
+                target_pose = self.reachy_mini.look_at_image(
+                    eye_center_pixels[0],
+                    eye_center_pixels[1],
+                    duration=0.0,
+                    perform_movement=False,
+                )
+                # Extract translation and rotation from target pose
+                translation = target_pose[:3, 3]
+                rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
+                # Scale down for smoother tracking (same as conversation_app)
+                translation = translation * self._offset_scale
+                rotation = rotation * self._offset_scale
+                # Apply pitch offset compensation (robot tends to look up)
+                # rotation[1] is pitch in xyz euler order
+                # Positive pitch = look down in robot coordinate system
+                pitch_offset_rad = np.radians(9.0)  # Look down 9 degrees
+                rotation[1] += pitch_offset_rad
+                # Apply yaw offset compensation (robot tends to look to user's right)
+                # rotation[2] is yaw in xyz euler order
+                # Negative yaw = turn right (towards user's left from robot's perspective)
+                yaw_offset_rad = np.radians(-7.0)  # Turn right 7 degrees
+                rotation[2] += yaw_offset_rad
+                # Update face tracking offsets
+                with self._face_tracking_lock:
+                    self._face_tracking_offsets = [
+                        float(translation[0]),
+                        float(translation[1]),
+                        float(translation[2]),
+                        float(rotation[0]),
+                        float(rotation[1]),
+                        float(rotation[2]),
+                    ]
+                return True
+            return False
+        except Exception as e:
+            _LOGGER.debug("Face tracking error: %s", e)
+            return False
+    def _process_face_lost_interpolation(self, current_time: float) -> None:
+        """Handle smooth interpolation back to neutral when face is lost."""
+        if self._last_face_detected_time is None:
+            return
+        time_since_face_lost = current_time - self._last_face_detected_time
+        if time_since_face_lost < self._face_lost_delay:
+            return  # Still within delay period, keep current offsets
+        # Start interpolation if not already started
+        if self._interpolation_start_time is None:
+            self._interpolation_start_time = current_time
+            # Capture current pose as start of interpolation
+            with self._face_tracking_lock:
+                current_offsets = self._face_tracking_offsets.copy()
+            # Convert to 4x4 pose matrix
+            pose_matrix = np.eye(4, dtype=np.float32)
+            pose_matrix[:3, 3] = current_offsets[:3]
+            pose_matrix[:3, :3] = R.from_euler("xyz", current_offsets[3:]).as_matrix()
+            self._interpolation_start_pose = pose_matrix
+        # Calculate interpolation progress
+        elapsed = current_time - self._interpolation_start_time
+        t = min(1.0, elapsed / self._interpolation_duration)
+        # Interpolate to neutral (identity matrix)
+        if self._interpolation_start_pose is not None:
+            neutral_pose = np.eye(4, dtype=np.float32)
+            interpolated_pose = self._linear_pose_interpolation(
+                self._interpolation_start_pose, neutral_pose, t
+            )
+            # Extract translation and rotation
+            translation = interpolated_pose[:3, 3]
+            rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
+            with self._face_tracking_lock:
+                self._face_tracking_offsets = [
+                    float(translation[0]),
+                    float(translation[1]),
+                    float(translation[2]),
+                    float(rotation[0]),
+                    float(rotation[1]),
+                    float(rotation[2]),
+                ]
+        # Reset when interpolation complete
+        if t >= 1.0:
+            self._last_face_detected_time = None
+            self._interpolation_start_time = None
+            self._interpolation_start_pose = None
+    def _linear_pose_interpolation(
+        self, start: np.ndarray, end: np.ndarray, t: float
+    ) -> np.ndarray:
+        """Linear interpolation between two 4x4 pose matrices.
+        Uses SDK's linear_pose_interpolation if available, otherwise falls back
+        to manual SLERP implementation.
+        """
+        if SDK_INTERPOLATION_AVAILABLE:
+            return linear_pose_interpolation(start, end, t)
+        # Fallback: manual interpolation
+        # Interpolate translation
+        start_trans = start[:3, 3]
+        end_trans = end[:3, 3]
+        interp_trans = start_trans * (1 - t) + end_trans * t
+        # Interpolate rotation using SLERP
+        start_rot = R.from_matrix(start[:3, :3])
+        end_rot = R.from_matrix(end[:3, :3])
+        # Use scipy's slerp - create Rotation array from list
+        from scipy.spatial.transform import Slerp
+        key_rots = R.from_quat(np.array([start_rot.as_quat(), end_rot.as_quat()]))
+        slerp = Slerp([0, 1], key_rots)
+        interp_rot = slerp(t)
+        # Build result matrix
+        result = np.eye(4, dtype=np.float32)
+        result[:3, :3] = interp_rot.as_matrix()
+        result[:3, 3] = interp_trans
+        return result
+    # =========================================================================
+    # Public API for face tracking
+    # =========================================================================
+    def get_face_tracking_offsets(self) -> Tuple[float, float, float, float, float, float]:
+        """Get current face tracking offsets (thread-safe).
+        Returns:
+            Tuple of (x, y, z, roll, pitch, yaw) offsets
+        """
+        with self._face_tracking_lock:
+            offsets = self._face_tracking_offsets
+            return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
+    def set_face_tracking_enabled(self, enabled: bool) -> None:
+        """Enable or disable face tracking."""
+        if self._face_tracking_enabled == enabled:
+            return  # No change, skip logging
+        self._face_tracking_enabled = enabled
+        if not enabled:
+            # Start interpolation back to neutral
+            self._last_face_detected_time = time.time()
+            self._interpolation_start_time = None
+        _LOGGER.info("Face tracking %s", "enabled" if enabled else "disabled")
+    def set_conversation_mode(self, in_conversation: bool) -> None:
+        """Set conversation mode for adaptive face tracking.
+        When in conversation mode, face tracking runs at high frequency
+        regardless of whether a face is currently detected.
+        Args:
+            in_conversation: True when voice assistant is actively conversing
+        """
+        with self._conversation_lock:
+            self._in_conversation = in_conversation
+        if in_conversation:
+            # Immediately switch to high frequency mode
+            self._current_fps = self._fps_high
+            self._ai_enabled = True
+            self._no_face_duration = 0.0  # Reset no-face timer
+            _LOGGER.debug("Face tracking: conversation mode ON (high frequency)")
+        else:
+            _LOGGER.debug("Face tracking: conversation mode OFF (adaptive)")
+    # =========================================================================
+    # Gesture detection
+    # =========================================================================
+    def _process_gesture_detection(self, frame: np.ndarray) -> None:
+        """Process gesture detection on a frame."""
+        if self._gesture_detector is None:
+            return
+        try:
+            # Detect gesture
+            detected_gesture, confidence = self._gesture_detector.detect(frame)
+            # Update current gesture state
+            state_changed = False
+            with self._gesture_lock:
+                old_gesture = self._current_gesture
+                if detected_gesture.value != "no_gesture":
+                    self._current_gesture = detected_gesture.value
+                    self._gesture_confidence = confidence
+                    if old_gesture != detected_gesture.value:
+                        state_changed = True
+                        _LOGGER.debug("Gesture: %s (%.0f%%)",
+                                     detected_gesture.value, confidence * 100)
+                else:
+                    if self._current_gesture != "none":
+                        state_changed = True
+                    self._current_gesture = "none"
+                    self._gesture_confidence = 0.0
+            # Notify entity registry to push update to Home Assistant
+            if state_changed and self._gesture_state_callback:
+                try:
+                    self._gesture_state_callback()
+                except Exception:
+                    pass  # Ignore callback errors
+        except Exception as e:
+            _LOGGER.warning("Gesture detection error: %s", e)
+    def get_current_gesture(self) -> str:
+        """Get current detected gesture name (thread-safe).
+        Returns:
+            Gesture name string (e.g., "like", "peace", "none")
+        """
+        with self._gesture_lock:
+            return self._current_gesture
+    def get_gesture_confidence(self) -> float:
+        """Get current gesture detection confidence (thread-safe).
+        Returns:
+            Confidence value (0.0 to 1.0), multiplied by 100 for percentage display
+        """
+        with self._gesture_lock:
+            return self._gesture_confidence * 100.0  # Return as percentage
+    def set_gesture_detection_enabled(self, enabled: bool) -> None:
+        """Enable or disable gesture detection."""
+        self._gesture_detection_enabled = enabled
+        if not enabled:
+            with self._gesture_lock:
+                self._current_gesture = "none"
+                self._gesture_confidence = 0.0
+        _LOGGER.info("Gesture detection %s", "enabled" if enabled else "disabled")
+    def set_gesture_state_callback(self, callback) -> None:
+        """Set callback to notify when gesture state changes."""
+        self._gesture_state_callback = callback
+    def _get_camera_frame(self) -> Optional[np.ndarray]:
+        """Get a frame from Reachy Mini's camera."""
+        if self.reachy_mini is None:
+            # Return a test pattern if no robot connected
+            return self._generate_test_frame()
+        try:
+            frame = self.reachy_mini.media.get_frame()
+            return frame
+        except Exception as e:
+            _LOGGER.debug("Failed to get camera frame: %s", e)
+            return None
+    def _generate_test_frame(self) -> np.ndarray:
+        """Generate a test pattern frame when no camera is available."""
+        # Create a simple test pattern
+        frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        # Add some visual elements
+        cv2.putText(
+            frame,
+            "Reachy Mini Camera",
+            (150, 200),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.2,
+            (255, 255, 255),
+            2,
+        )
+        cv2.putText(
+            frame,
+            "No camera connected",
+            (180, 280),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.8,
+            (128, 128, 128),
+            1,
+        )
+        # Add timestamp
+        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+        cv2.putText(
+            frame,
+            timestamp,
+            (220, 350),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.6,
+            (0, 255, 0),
+            1,
+        )
+        return frame
+    def get_snapshot(self) -> Optional[bytes]:
+        """Get the latest frame as JPEG bytes."""
+        with self._frame_lock:
+            return self._last_frame
+    async def _handle_client(
+        self,
+        reader: asyncio.StreamReader,
+        writer: asyncio.StreamWriter,
+    ) -> None:
+        """Handle incoming HTTP client connections."""
+        try:
+            # Read HTTP request
+            request_line = await asyncio.wait_for(
+                reader.readline(),
+                timeout=10.0
+            )
+            request = request_line.decode('utf-8', errors='ignore').strip()
+            # Read headers (we don't need them but must consume them)
+            while True:
+                line = await asyncio.wait_for(reader.readline(), timeout=5.0)
+                if line == b'\r\n' or line == b'\n' or line == b'':
+                    break
+            # Parse request path
+            parts = request.split(' ')
+            if len(parts) >= 2:
+                path = parts[1]
+            else:
+                path = '/'
+            _LOGGER.debug("HTTP request: %s", request)
+            if path == '/stream':
+                await self._handle_stream(writer)
+            elif path == '/snapshot':
+                await self._handle_snapshot(writer)
+            else:
+                await self._handle_index(writer)
+        except asyncio.TimeoutError:
+            _LOGGER.debug("Client connection timeout")
+        except ConnectionResetError:
+            _LOGGER.debug("Client connection reset")
+        except Exception as e:
+            _LOGGER.error("Error handling client: %s", e)
+        finally:
+            try:
+                writer.close()
+                await writer.wait_closed()
+            except Exception:
+                pass
+    async def _handle_index(self, writer: asyncio.StreamWriter) -> None:
+        """Handle index page request."""
+        html = f"""<!DOCTYPE html>
+<html>
+<head>
+    <title>Reachy Mini Camera</title>
+    <style>
+        body {{ font-family: Arial, sans-serif; margin: 40px; background: #1a1a2e; color: #eee; }}
+        h1 {{ color: #00d4ff; }}
+        .container {{ max-width: 800px; margin: 0 auto; }}
+        .stream {{ width: 100%; max-width: 640px; border: 2px solid #00d4ff; border-radius: 8px; }}
+        a {{ color: #00d4ff; }}
+        .info {{ background: #16213e; padding: 20px; border-radius: 8px; margin-top: 20px; }}
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Reachy Mini Camera</h1>
+        <img class="stream" src="/stream" alt="Camera Stream">
+        <div class="info">
+            <h3>Endpoints:</h3>
+            <ul>
+                <li><a href="/stream">/stream</a> - MJPEG video stream</li>
+                <li><a href="/snapshot">/snapshot</a> - Single JPEG snapshot</li>
+            </ul>
+            <h3>Home Assistant Integration:</h3>
+            <p>Add a Generic Camera with URL: <code>http://&lt;ip&gt;:{self.port}/stream</code></p>
+        </div>
+    </div>
+</body>
+</html>"""
+        response = (
+            "HTTP/1.1 200 OK\r\n"
+            "Content-Type: text/html; charset=utf-8\r\n"
+            f"Content-Length: {len(html)}\r\n"
+            "Connection: close\r\n"
+            "\r\n"
+        )
+        writer.write(response.encode('utf-8'))
+        writer.write(html.encode('utf-8'))
+        await writer.drain()
+    async def _handle_snapshot(self, writer: asyncio.StreamWriter) -> None:
+        """Handle snapshot request - return single JPEG image."""
+        jpeg_data = self.get_snapshot()
+        if jpeg_data is None:
+            response = (
+                "HTTP/1.1 503 Service Unavailable\r\n"
+                "Content-Type: text/plain\r\n"
+                "Connection: close\r\n"
+                "\r\n"
+                "No frame available"
+            )
+            writer.write(response.encode('utf-8'))
+        else:
+            response = (
+                "HTTP/1.1 200 OK\r\n"
+                "Content-Type: image/jpeg\r\n"
+                f"Content-Length: {len(jpeg_data)}\r\n"
+                "Cache-Control: no-cache, no-store, must-revalidate\r\n"
+                "Connection: close\r\n"
+                "\r\n"
+            )
+            writer.write(response.encode('utf-8'))
+            writer.write(jpeg_data)
+        await writer.drain()
+    async def _handle_stream(self, writer: asyncio.StreamWriter) -> None:
+        """Handle MJPEG stream request."""
+        # Send MJPEG headers
+        response = (
+            "HTTP/1.1 200 OK\r\n"
+            f"Content-Type: multipart/x-mixed-replace; boundary={MJPEG_BOUNDARY}\r\n"
+            "Cache-Control: no-cache, no-store, must-revalidate\r\n"
+            "Connection: keep-alive\r\n"
+            "\r\n"
+        )
+        writer.write(response.encode('utf-8'))
+        await writer.drain()
+        _LOGGER.debug("Started MJPEG stream")
+        last_sent_time = 0
+        try:
+            while self._running:
+                # Get latest frame
+                with self._frame_lock:
+                    jpeg_data = self._last_frame
+                    frame_time = self._last_frame_time
+                # Only send if we have a new frame
+                if jpeg_data is not None and frame_time > last_sent_time:
+                    # Send MJPEG frame
+                    frame_header = (
+                        f"--{MJPEG_BOUNDARY}\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        f"Content-Length: {len(jpeg_data)}\r\n"
+                        "\r\n"
+                    )
+                    writer.write(frame_header.encode('utf-8'))
+                    writer.write(jpeg_data)
+                    writer.write(b"\r\n")
+                    await writer.drain()
+                    last_sent_time = frame_time
+                # Small delay to prevent busy loop
+                await asyncio.sleep(0.01)
+        except (ConnectionResetError, BrokenPipeError):
+            _LOGGER.debug("Client disconnected from stream")
+        except Exception as e:
+            _LOGGER.error("Error in MJPEG stream: %s", e)
+        _LOGGER.debug("Ended MJPEG stream")

{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py RENAMED Viewed

@@ -1,37 +1,48 @@
 """ESPHome entity definitions."""
-import logging
 from abc import abstractmethod
-from collections.abc import Callable, Iterable
-from typing import TYPE_CHECKING
 # pylint: disable=no-name-in-module
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
-    BinarySensorStateResponse,
-    CameraImageRequest,
-    CameraImageResponse,
     ListEntitiesBinarySensorResponse,
     ListEntitiesCameraResponse,
     ListEntitiesMediaPlayerResponse,
     ListEntitiesNumberResponse,
     ListEntitiesRequest,
     ListEntitiesTextSensorResponse,
     MediaPlayerCommandRequest,
     MediaPlayerStateResponse,
     NumberCommandRequest,
     NumberStateResponse,
     SubscribeHomeAssistantStatesRequest,
     SubscribeStatesRequest,
     TextSensorStateResponse,
 )
-from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerEntityFeature, MediaPlayerState
 from google.protobuf import message
-from ..audio.audio_player import AudioPlayer
-from ..core.util import call_all
 if TYPE_CHECKING:
-    from ..protocol.api_server import APIServer
 logger = logging.getLogger(__name__)
@@ -39,7 +50,7 @@ logger = logging.getLogger(__name__)
 class ESPHomeEntity:
     """Base class for ESPHome entities."""
-    def __init__(self, server: "APIServer") -> None:
         self.server = server
     @abstractmethod
@@ -52,7 +63,7 @@ class MediaPlayerEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
@@ -71,9 +82,9 @@ class MediaPlayerEntity(ESPHomeEntity):
     def play(
         self,
-        url: str | list[str],
         announcement: bool = False,
-        done_callback: Callable[[], None] | None = None,
     ) -> Iterable[message.Message]:
         if announcement:
             if self.music_player.is_playing:
@@ -81,14 +92,18 @@ class MediaPlayerEntity(ESPHomeEntity):
                 self.music_player.pause()
                 self.announce_player.play(
                     url,
-                    done_callback=lambda: call_all(self.music_player.resume, done_callback),
                 )
             else:
                 # Announce, idle
                 self.announce_player.play(
                     url,
                     done_callback=lambda: call_all(
-                        lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
                         done_callback,
                     ),
                 )
@@ -97,7 +112,9 @@ class MediaPlayerEntity(ESPHomeEntity):
             self.music_player.play(
                 url,
                 done_callback=lambda: call_all(
-                    lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
                     done_callback,
                 ),
             )
@@ -116,9 +133,6 @@ class MediaPlayerEntity(ESPHomeEntity):
                 elif msg.command == MediaPlayerCommand.PLAY:
                     self.music_player.resume()
                     yield self._update_state(MediaPlayerState.PLAYING)
-                elif msg.command == MediaPlayerCommand.STOP:
-                    self.music_player.stop()
-                    yield self._update_state(MediaPlayerState.IDLE)
             elif msg.has_volume:
                 volume = int(msg.volume * 100)
                 self.music_player.set_volume(volume)
@@ -162,13 +176,13 @@ class TextSensorEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], str] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -218,14 +232,14 @@ class BinarySensorEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], bool] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -277,7 +291,7 @@ class NumberEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
@@ -288,8 +302,8 @@ class NumberEntity(ESPHomeEntity):
         unit_of_measurement: str = "",
         mode: int = 0,  # 0 = auto, 1 = box, 2 = slider
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], float] | None = None,
-        value_setter: Callable[[float], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -357,12 +371,12 @@ class CameraEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "mdi:camera",
-        image_getter: Callable[[], bytes | None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -371,7 +385,7 @@ class CameraEntity(ESPHomeEntity):
         self.icon = icon
         self._image_getter = image_getter
-    def get_image(self) -> bytes | None:
         """Get the current camera image as JPEG bytes."""
         if self._image_getter:
             return self._image_getter()
@@ -402,3 +416,4 @@ class CameraEntity(ESPHomeEntity):
                     data=b"",
                     done=True,
                 )

 """ESPHome entity definitions."""
 from abc import abstractmethod
+from collections.abc import Iterable
+from typing import Callable, List, Optional, Union, TYPE_CHECKING
+import logging
 # pylint: disable=no-name-in-module
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
     ListEntitiesBinarySensorResponse,
+    ListEntitiesButtonResponse,
     ListEntitiesCameraResponse,
     ListEntitiesMediaPlayerResponse,
     ListEntitiesNumberResponse,
     ListEntitiesRequest,
+    ListEntitiesSelectResponse,
+    ListEntitiesSensorResponse,
+    ListEntitiesSwitchResponse,
     ListEntitiesTextSensorResponse,
+    BinarySensorStateResponse,
+    ButtonCommandRequest,
+    CameraImageRequest,
+    CameraImageResponse,
     MediaPlayerCommandRequest,
     MediaPlayerStateResponse,
     NumberCommandRequest,
     NumberStateResponse,
+    SelectCommandRequest,
+    SelectStateResponse,
+    SensorStateResponse,
     SubscribeHomeAssistantStatesRequest,
     SubscribeStatesRequest,
+    SwitchCommandRequest,
+    SwitchStateResponse,
     TextSensorStateResponse,
 )
+from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerState, MediaPlayerEntityFeature
 from google.protobuf import message
+from .api_server import APIServer
+from .audio_player import AudioPlayer
+from .util import call_all
 if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
 logger = logging.getLogger(__name__)
 class ESPHomeEntity:
     """Base class for ESPHome entities."""
+    def __init__(self, server: APIServer) -> None:
         self.server = server
     @abstractmethod
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
     def play(
         self,
+        url: Union[str, List[str]],
         announcement: bool = False,
+        done_callback: Optional[Callable[[], None]] = None,
     ) -> Iterable[message.Message]:
         if announcement:
             if self.music_player.is_playing:
                 self.music_player.pause()
                 self.announce_player.play(
                     url,
+                    done_callback=lambda: call_all(
+                        self.music_player.resume, done_callback
+                    ),
                 )
             else:
                 # Announce, idle
                 self.announce_player.play(
                     url,
                     done_callback=lambda: call_all(
+                        lambda: self.server.send_messages(
+                            [self._update_state(MediaPlayerState.IDLE)]
+                        ),
                         done_callback,
                     ),
                 )
             self.music_player.play(
                 url,
                 done_callback=lambda: call_all(
+                    lambda: self.server.send_messages(
+                        [self._update_state(MediaPlayerState.IDLE)]
+                    ),
                     done_callback,
                 ),
             )
                 elif msg.command == MediaPlayerCommand.PLAY:
                     self.music_player.resume()
                     yield self._update_state(MediaPlayerState.PLAYING)
             elif msg.has_volume:
                 volume = int(msg.volume * 100)
                 self.music_player.set_volume(volume)
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], str]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], bool]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         unit_of_measurement: str = "",
         mode: int = 0,  # 0 = auto, 1 = box, 2 = slider
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], float]] = None,
+        value_setter: Optional[Callable[[float], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "mdi:camera",
+        image_getter: Optional[Callable[[], Optional[bytes]]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
         self.icon = icon
         self._image_getter = image_getter
+    def get_image(self) -> Optional[bytes]:
         """Get the current camera image as JPEG bytes."""
         if self._image_getter:
             return self._image_getter()
                     data=b"",
                     done=True,
                 )

{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py RENAMED Viewed

@@ -1,16 +1,16 @@
 """Extended ESPHome entity types for Reachy Mini control."""
 import logging
-from collections.abc import Callable, Iterable
-from typing import TYPE_CHECKING
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
-    ButtonCommandRequest,
     ListEntitiesButtonResponse,
     ListEntitiesRequest,
     ListEntitiesSelectResponse,
     ListEntitiesSensorResponse,
     ListEntitiesSwitchResponse,
     SelectCommandRequest,
     SelectStateResponse,
     SensorStateResponse,
@@ -21,17 +21,14 @@ from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
 )
 from google.protobuf import message
 from .entity import ESPHomeEntity
-if TYPE_CHECKING:
-    from ..protocol.api_server import APIServer
 logger = logging.getLogger(__name__)
 class SensorStateClass:
     """ESPHome SensorStateClass enum values."""
     NONE = 0
     MEASUREMENT = 1
     TOTAL_INCREASING = 2
@@ -43,7 +40,7 @@ class SensorEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
@@ -53,7 +50,7 @@ class SensorEntity(ESPHomeEntity):
         device_class: str = "",
         state_class: int = SensorStateClass.NONE,
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], float] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -64,7 +61,7 @@ class SensorEntity(ESPHomeEntity):
         self.accuracy_decimals = accuracy_decimals
         self.device_class = device_class
         self.entity_category = entity_category
-        # Convert string state_class to enum
         if isinstance(state_class, str):
             state_class_map = {
                 "": SensorStateClass.NONE,
@@ -121,15 +118,15 @@ class SwitchEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], bool] | None = None,
-        value_setter: Callable[[bool], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -186,15 +183,15 @@ class SelectEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
-        options: list[str],
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], str] | None = None,
-        value_setter: Callable[[str], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -255,14 +252,14 @@ class ButtonEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        on_press: Callable[[], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key

 """Extended ESPHome entity types for Reachy Mini control."""
+from collections.abc import Iterable
+from typing import Callable, List, Optional
 import logging
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
     ListEntitiesButtonResponse,
     ListEntitiesRequest,
     ListEntitiesSelectResponse,
     ListEntitiesSensorResponse,
     ListEntitiesSwitchResponse,
+    ButtonCommandRequest,
     SelectCommandRequest,
     SelectStateResponse,
     SensorStateResponse,
 )
 from google.protobuf import message
+from .api_server import APIServer
 from .entity import ESPHomeEntity
 logger = logging.getLogger(__name__)
 class SensorStateClass:
     """ESPHome SensorStateClass enum values."""
     NONE = 0
     MEASUREMENT = 1
     TOTAL_INCREASING = 2
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         device_class: str = "",
         state_class: int = SensorStateClass.NONE,
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], float]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
         self.accuracy_decimals = accuracy_decimals
         self.device_class = device_class
         self.entity_category = entity_category
+        # Convert string state_class to int if needed (for backward compatibility)
         if isinstance(state_class, str):
             state_class_map = {
                 "": SensorStateClass.NONE,
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], bool]] = None,
+        value_setter: Optional[Callable[[bool], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
+        options: List[str],
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], str]] = None,
+        value_setter: Optional[Callable[[str], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        on_press: Optional[Callable[[], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key

reachy_mini_ha_voice/entity_registry.py ADDED Viewed

	@@ -0,0 +1,945 @@

+"""Entity registry for ESPHome entities.
+This module handles the registration and management of all ESPHome entities
+for the Reachy Mini voice assistant.
+"""
+import logging
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional
+from .entity import BinarySensorEntity, CameraEntity, NumberEntity, TextSensorEntity
+from .entity_extensions import SensorEntity, SwitchEntity, SelectEntity, ButtonEntity
+if TYPE_CHECKING:
+    from .reachy_controller import ReachyController
+    from .camera_server import MJPEGCameraServer
+_LOGGER = logging.getLogger(__name__)
+# Fixed entity key mapping - ensures consistent keys across restarts
+# Keys are based on object_id hash to ensure uniqueness and consistency
+ENTITY_KEYS: Dict[str, int] = {
+    # Media player (key 0 reserved)
+    "reachy_mini_media_player": 0,
+    # Phase 1: Basic status and volume
+    "daemon_state": 100,
+    "backend_ready": 101,
+    "speaker_volume": 103,
+    # Phase 2: Motor control
+    "motors_enabled": 200,
+    "motor_mode": 201,
+    "wake_up": 202,
+    "go_to_sleep": 203,
+    # Phase 3: Pose control
+    "head_x": 300,
+    "head_y": 301,
+    "head_z": 302,
+    "head_roll": 303,
+    "head_pitch": 304,
+    "head_yaw": 305,
+    "body_yaw": 306,
+    "antenna_left": 307,
+    "antenna_right": 308,
+    # Phase 4: Look at control
+    "look_at_x": 400,
+    "look_at_y": 401,
+    "look_at_z": 402,
+    # Phase 5: DOA (Direction of Arrival) - re-added for wakeup turn-to-sound
+    "doa_angle": 500,
+    "speech_detected": 501,
+    # Phase 6: Diagnostic information
+    "control_loop_frequency": 600,
+    "sdk_version": 601,
+    "robot_name": 602,
+    "wireless_version": 603,
+    "simulation_mode": 604,
+    "wlan_ip": 605,
+    "error_message": 606,  # Moved to diagnostic
+    # Phase 7: IMU sensors
+    "imu_accel_x": 700,
+    "imu_accel_y": 701,
+    "imu_accel_z": 702,
+    "imu_gyro_x": 703,
+    "imu_gyro_y": 704,
+    "imu_gyro_z": 705,
+    "imu_temperature": 706,
+    # Phase 8: Emotion selector
+    "emotion": 800,
+    # Phase 9: Audio controls
+    "microphone_volume": 900,
+    # Phase 10: Camera
+    "camera_url": 1000,  # Keep for backward compatibility
+    "camera": 1001,      # New camera entity
+    # Phase 11: LED control (disabled - not visible)
+    # "led_brightness": 1100,
+    # "led_effect": 1101,
+    # "led_color_r": 1102,
+    # "led_color_g": 1103,
+    # "led_color_b": 1104,
+    # Phase 12: Audio processing
+    "agc_enabled": 1200,
+    "agc_max_gain": 1201,
+    "noise_suppression": 1202,
+    "echo_cancellation_converged": 1203,
+    # Phase 13: Sendspin - auto-enabled via mDNS, no user entities needed
+    # Phase 21: Continuous conversation
+    "continuous_conversation": 1500,
+    # Phase 22: Gesture detection
+    "gesture_detected": 1600,
+    "gesture_confidence": 1601,
+}
+def get_entity_key(object_id: str) -> int:
+    """Get a consistent entity key for the given object_id."""
+    if object_id in ENTITY_KEYS:
+        return ENTITY_KEYS[object_id]
+    # Fallback: generate key from hash (should not happen if all entities are registered)
+    _LOGGER.warning(f"Entity key not found for {object_id}, generating from hash")
+    return abs(hash(object_id)) % 10000 + 2000
+class EntityRegistry:
+    """Registry for managing ESPHome entities."""
+    def __init__(
+        self,
+        server,
+        reachy_controller: "ReachyController",
+        camera_server: Optional["MJPEGCameraServer"] = None,
+        play_emotion_callback: Optional[Callable[[str], None]] = None,
+    ):
+        """Initialize the entity registry.
+        Args:
+            server: The VoiceSatelliteProtocol server instance
+            reachy_controller: The ReachyController instance
+            camera_server: Optional camera server for camera entity
+            play_emotion_callback: Optional callback for playing emotions
+        """
+        self.server = server
+        self.reachy_controller = reachy_controller
+        self.camera_server = camera_server
+        self._play_emotion_callback = play_emotion_callback
+        # Gesture detection state
+        self._current_gesture = "none"
+        self._gesture_confidence = 0.0
+        # Emotion state
+        self._current_emotion = "None"
+        # Map emotion names to available robot emotions
+        # Full list of available emotions from robot
+        self._emotion_map = {
+            "None": None,
+            # Basic emotions
+            "Happy": "cheerful1",
+            "Sad": "sad1",
+            "Angry": "rage1",
+            "Fear": "fear1",
+            "Surprise": "surprised1",
+            "Disgust": "disgusted1",
+            # Extended emotions
+            "Laughing": "laughing1",
+            "Loving": "loving1",
+            "Proud": "proud1",
+            "Grateful": "grateful1",
+            "Enthusiastic": "enthusiastic1",
+            "Curious": "curious1",
+            "Amazed": "amazed1",
+            "Shy": "shy1",
+            "Confused": "confused1",
+            "Thoughtful": "thoughtful1",
+            "Anxious": "anxiety1",
+            "Scared": "scared1",
+            "Frustrated": "frustrated1",
+            "Irritated": "irritated1",
+            "Furious": "furious1",
+            "Contempt": "contempt1",
+            "Bored": "boredom1",
+            "Tired": "tired1",
+            "Exhausted": "exhausted1",
+            "Lonely": "lonely1",
+            "Downcast": "downcast1",
+            "Resigned": "resigned1",
+            "Uncertain": "uncertain1",
+            "Uncomfortable": "uncomfortable1",
+            "Lost": "lost1",
+            "Indifferent": "indifferent1",
+            # Positive actions
+            "Yes": "yes1",
+            "No": "no1",
+            "Welcoming": "welcoming1",
+            "Helpful": "helpful1",
+            "Attentive": "attentive1",
+            "Understanding": "understanding1",
+            "Calming": "calming1",
+            "Relief": "relief1",
+            "Success": "success1",
+            "Serenity": "serenity1",
+            # Negative actions
+            "Oops": "oops1",
+            "Displeased": "displeased1",
+            "Impatient": "impatient1",
+            "Reprimand": "reprimand1",
+            "GoAway": "go_away1",
+            # Special
+            "Come": "come1",
+            "Inquiring": "inquiring1",
+            "Sleep": "sleep1",
+            "Dance": "dance1",
+            "Electric": "electric1",
+            "Dying": "dying1",
+        }
+    def setup_all_entities(self, entities: List) -> None:
+        """Setup all entity phases.
+        Args:
+            entities: The list to append entities to
+        """
+        self._setup_phase1_entities(entities)
+        self._setup_phase2_entities(entities)
+        self._setup_phase3_entities(entities)
+        self._setup_phase4_entities(entities)
+        self._setup_phase5_entities(entities)  # DOA for wakeup turn-to-sound
+        self._setup_phase6_entities(entities)
+        self._setup_phase7_entities(entities)
+        self._setup_phase8_entities(entities)
+        self._setup_phase9_entities(entities)
+        self._setup_phase10_entities(entities)
+        # Phase 11 (LED control) disabled - LEDs are inside the robot and not visible
+        self._setup_phase12_entities(entities)
+        # Phase 13 (Sendspin) - auto-enabled via mDNS discovery, no user entities
+        # Phase 14 (head_joints, passive_joints) removed - not needed
+        # Phase 20 (Tap detection) disabled - too many false triggers
+        self._setup_phase21_entities(entities)
+        self._setup_phase22_entities(entities)
+        _LOGGER.info("All entities registered: %d total", len(entities))
+    def _setup_phase1_entities(self, entities: List) -> None:
+        """Setup Phase 1 entities: Basic status and volume control."""
+        rc = self.reachy_controller
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("daemon_state"),
+            name="Daemon State",
+            object_id="daemon_state",
+            icon="mdi:robot",
+            value_getter=rc.get_daemon_state,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("backend_ready"),
+            name="Backend Ready",
+            object_id="backend_ready",
+            icon="mdi:check-circle",
+            device_class="connectivity",
+            value_getter=rc.get_backend_ready,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("speaker_volume"),
+            name="Speaker Volume",
+            object_id="speaker_volume",
+            min_value=0.0,
+            max_value=100.0,
+            step=1.0,
+            icon="mdi:volume-high",
+            unit_of_measurement="%",
+            mode=2,  # Slider mode
+            entity_category=1,  # config
+            value_getter=rc.get_speaker_volume,
+            value_setter=rc.set_speaker_volume,
+        ))
+        _LOGGER.debug("Phase 1 entities registered: daemon_state, backend_ready, speaker_volume")
+    def _setup_phase2_entities(self, entities: List) -> None:
+        """Setup Phase 2 entities: Motor control."""
+        rc = self.reachy_controller
+        entities.append(SwitchEntity(
+            server=self.server,
+            key=get_entity_key("motors_enabled"),
+            name="Motors Enabled",
+            object_id="motors_enabled",
+            icon="mdi:engine",
+            device_class="switch",
+            value_getter=rc.get_motors_enabled,
+            value_setter=rc.set_motors_enabled,
+        ))
+        entities.append(ButtonEntity(
+            server=self.server,
+            key=get_entity_key("wake_up"),
+            name="Wake Up",
+            object_id="wake_up",
+            icon="mdi:alarm",
+            device_class="restart",
+            on_press=rc.wake_up,
+        ))
+        entities.append(ButtonEntity(
+            server=self.server,
+            key=get_entity_key("go_to_sleep"),
+            name="Go to Sleep",
+            object_id="go_to_sleep",
+            icon="mdi:sleep",
+            device_class="restart",
+            on_press=rc.go_to_sleep,
+        ))
+        _LOGGER.debug("Phase 2 entities registered: motors_enabled, wake_up, go_to_sleep")
+    def _setup_phase3_entities(self, entities: List) -> None:
+        """Setup Phase 3 entities: Pose control."""
+        rc = self.reachy_controller
+        # Head position controls (X, Y, Z in mm)
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_x"),
+            name="Head X Position",
+            object_id="head_x",
+            min_value=-50.0,
+            max_value=50.0,
+            step=1.0,
+            icon="mdi:axis-x-arrow",
+            unit_of_measurement="mm",
+            mode=2,
+            value_getter=rc.get_head_x,
+            value_setter=rc.set_head_x,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_y"),
+            name="Head Y Position",
+            object_id="head_y",
+            min_value=-50.0,
+            max_value=50.0,
+            step=1.0,
+            icon="mdi:axis-y-arrow",
+            unit_of_measurement="mm",
+            mode=2,
+            value_getter=rc.get_head_y,
+            value_setter=rc.set_head_y,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_z"),
+            name="Head Z Position",
+            object_id="head_z",
+            min_value=-50.0,
+            max_value=50.0,
+            step=1.0,
+            icon="mdi:axis-z-arrow",
+            unit_of_measurement="mm",
+            mode=2,
+            value_getter=rc.get_head_z,
+            value_setter=rc.set_head_z,
+        ))
+        # Head orientation controls (Roll, Pitch, Yaw in degrees)
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_roll"),
+            name="Head Roll",
+            object_id="head_roll",
+            min_value=-40.0,
+            max_value=40.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_head_roll,
+            value_setter=rc.set_head_roll,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_pitch"),
+            name="Head Pitch",
+            object_id="head_pitch",
+            min_value=-40.0,
+            max_value=40.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_head_pitch,
+            value_setter=rc.set_head_pitch,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_yaw"),
+            name="Head Yaw",
+            object_id="head_yaw",
+            min_value=-180.0,
+            max_value=180.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_head_yaw,
+            value_setter=rc.set_head_yaw,
+        ))
+        # Body yaw control
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("body_yaw"),
+            name="Body Yaw",
+            object_id="body_yaw",
+            min_value=-160.0,
+            max_value=160.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_body_yaw,
+            value_setter=rc.set_body_yaw,
+        ))
+        # Antenna controls
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("antenna_left"),
+            name="Antenna(L)",
+            object_id="antenna_left",
+            min_value=-90.0,
+            max_value=90.0,
+            step=1.0,
+            icon="mdi:antenna",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_antenna_left,
+            value_setter=rc.set_antenna_left,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("antenna_right"),
+            name="Antenna(R)",
+            object_id="antenna_right",
+            min_value=-90.0,
+            max_value=90.0,
+            step=1.0,
+            icon="mdi:antenna",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_antenna_right,
+            value_setter=rc.set_antenna_right,
+        ))
+        _LOGGER.debug("Phase 3 entities registered: head position/orientation, body_yaw, antennas")
+    def _setup_phase4_entities(self, entities: List) -> None:
+        """Setup Phase 4 entities: Look at control."""
+        rc = self.reachy_controller
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("look_at_x"),
+            name="Look At X",
+            object_id="look_at_x",
+            min_value=-2.0,
+            max_value=2.0,
+            step=0.1,
+            icon="mdi:crosshairs-gps",
+            unit_of_measurement="m",
+            mode=1,  # Box mode for precise input
+            value_getter=rc.get_look_at_x,
+            value_setter=rc.set_look_at_x,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("look_at_y"),
+            name="Look At Y",
+            object_id="look_at_y",
+            min_value=-2.0,
+            max_value=2.0,
+            step=0.1,
+            icon="mdi:crosshairs-gps",
+            unit_of_measurement="m",
+            mode=1,
+            value_getter=rc.get_look_at_y,
+            value_setter=rc.set_look_at_y,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("look_at_z"),
+            name="Look At Z",
+            object_id="look_at_z",
+            min_value=-2.0,
+            max_value=2.0,
+            step=0.1,
+            icon="mdi:crosshairs-gps",
+            unit_of_measurement="m",
+            mode=1,
+            value_getter=rc.get_look_at_z,
+            value_setter=rc.set_look_at_z,
+        ))
+        _LOGGER.debug("Phase 4 entities registered: look_at_x/y/z")
+    def _setup_phase5_entities(self, entities: List) -> None:
+        """Setup Phase 5 entities: DOA (Direction of Arrival) for wakeup turn-to-sound."""
+        rc = self.reachy_controller
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("doa_angle"),
+            name="DOA Angle",
+            object_id="doa_angle",
+            icon="mdi:surround-sound",
+            unit_of_measurement="°",
+            accuracy_decimals=1,
+            state_class="measurement",
+            value_getter=rc.get_doa_angle_degrees,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("speech_detected"),
+            name="Speech Detected",
+            object_id="speech_detected",
+            icon="mdi:account-voice",
+            device_class="sound",
+            value_getter=rc.get_speech_detected,
+        ))
+        _LOGGER.debug("Phase 5 entities registered: doa_angle, speech_detected")
+    def _setup_phase6_entities(self, entities: List) -> None:
+        """Setup Phase 6 entities: Diagnostic information."""
+        rc = self.reachy_controller
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("control_loop_frequency"),
+            name="Control Loop Frequency",
+            object_id="control_loop_frequency",
+            icon="mdi:speedometer",
+            unit_of_measurement="Hz",
+            accuracy_decimals=1,
+            state_class="measurement",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_control_loop_frequency,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("sdk_version"),
+            name="SDK Version",
+            object_id="sdk_version",
+            icon="mdi:information",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_sdk_version,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("robot_name"),
+            name="Robot Name",
+            object_id="robot_name",
+            icon="mdi:robot",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_robot_name,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("wireless_version"),
+            name="Wireless Version",
+            object_id="wireless_version",
+            icon="mdi:wifi",
+            device_class="connectivity",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_wireless_version,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("simulation_mode"),
+            name="Simulation Mode",
+            object_id="simulation_mode",
+            icon="mdi:virtual-reality",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_simulation_mode,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("wlan_ip"),
+            name="WLAN IP",
+            object_id="wlan_ip",
+            icon="mdi:ip-network",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_wlan_ip,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("error_message"),
+            name="Error Message",
+            object_id="error_message",
+            icon="mdi:alert-circle",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_error_message,
+        ))
+        _LOGGER.debug(
+            "Phase 6 entities registered: control_loop_frequency, sdk_version, "
+            "robot_name, wireless_version, simulation_mode, wlan_ip, error_message"
+        )
+    def _setup_phase7_entities(self, entities: List) -> None:
+        """Setup Phase 7 entities: IMU sensors (wireless only)."""
+        rc = self.reachy_controller
+        # IMU Accelerometer
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_accel_x"),
+            name="IMU Accel X",
+            object_id="imu_accel_x",
+            icon="mdi:axis-x-arrow",
+            unit_of_measurement="m/s²",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_accel_x,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_accel_y"),
+            name="IMU Accel Y",
+            object_id="imu_accel_y",
+            icon="mdi:axis-y-arrow",
+            unit_of_measurement="m/s²",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_accel_y,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_accel_z"),
+            name="IMU Accel Z",
+            object_id="imu_accel_z",
+            icon="mdi:axis-z-arrow",
+            unit_of_measurement="m/s²",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_accel_z,
+        ))
+        # IMU Gyroscope
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_gyro_x"),
+            name="IMU Gyro X",
+            object_id="imu_gyro_x",
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="rad/s",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_gyro_x,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_gyro_y"),
+            name="IMU Gyro Y",
+            object_id="imu_gyro_y",
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="rad/s",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_gyro_y,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_gyro_z"),
+            name="IMU Gyro Z",
+            object_id="imu_gyro_z",
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="rad/s",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_gyro_z,
+        ))
+        # IMU Temperature
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_temperature"),
+            name="IMU Temperature",
+            object_id="imu_temperature",
+            icon="mdi:thermometer",
+            unit_of_measurement="°C",
+            accuracy_decimals=1,
+            device_class="temperature",
+            state_class="measurement",
+            value_getter=rc.get_imu_temperature,
+        ))
+        _LOGGER.debug("Phase 7 entities registered: IMU accelerometer, gyroscope, temperature")
+    def _setup_phase8_entities(self, entities: List) -> None:
+        """Setup Phase 8 entities: Emotion selector."""
+        def get_emotion() -> str:
+            return self._current_emotion
+        def set_emotion(emotion: str) -> None:
+            self._current_emotion = emotion
+            emotion_name = self._emotion_map.get(emotion)
+            if emotion_name and self._play_emotion_callback:
+                self._play_emotion_callback(emotion_name)
+                # Reset to None after playing
+                self._current_emotion = "None"
+        entities.append(SelectEntity(
+            server=self.server,
+            key=get_entity_key("emotion"),
+            name="Emotion",
+            object_id="emotion",
+            options=list(self._emotion_map.keys()),
+            icon="mdi:emoticon",
+            value_getter=get_emotion,
+            value_setter=set_emotion,
+        ))
+        _LOGGER.debug("Phase 8 entities registered: emotion selector")
+    def _setup_phase9_entities(self, entities: List) -> None:
+        """Setup Phase 9 entities: Audio controls."""
+        rc = self.reachy_controller
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("microphone_volume"),
+            name="Microphone Volume",
+            object_id="microphone_volume",
+            min_value=0.0,
+            max_value=100.0,
+            step=1.0,
+            icon="mdi:microphone",
+            unit_of_measurement="%",
+            mode=2,  # Slider mode
+            entity_category=1,  # config
+            value_getter=rc.get_microphone_volume,
+            value_setter=rc.set_microphone_volume,
+        ))
+        _LOGGER.debug("Phase 9 entities registered: microphone_volume")
+    def _setup_phase10_entities(self, entities: List) -> None:
+        """Setup Phase 10 entities: Camera for Home Assistant integration."""
+        def get_camera_image() -> Optional[bytes]:
+            """Get camera snapshot as JPEG bytes."""
+            if self.camera_server:
+                return self.camera_server.get_snapshot()
+            return None
+        entities.append(CameraEntity(
+            server=self.server,
+            key=get_entity_key("camera"),
+            name="Camera",
+            object_id="camera",
+            icon="mdi:camera",
+            image_getter=get_camera_image,
+        ))
+        _LOGGER.debug("Phase 10 entities registered: camera (ESPHome Camera entity)")
+    def _setup_phase12_entities(self, entities: List) -> None:
+        """Setup Phase 12 entities: Audio processing parameters (via local SDK)."""
+        rc = self.reachy_controller
+        def set_agc_enabled_with_save(enabled: bool) -> None:
+            """Set AGC enabled and save to preferences."""
+            rc.set_agc_enabled(enabled)
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.agc_enabled = enabled
+                self.server.state.save_preferences()
+                _LOGGER.debug("AGC enabled saved to preferences: %s", enabled)
+        def set_agc_max_gain_with_save(gain: float) -> None:
+            """Set AGC max gain and save to preferences."""
+            rc.set_agc_max_gain(gain)
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.agc_max_gain = gain
+                self.server.state.save_preferences()
+                _LOGGER.debug("AGC max gain saved to preferences: %.1f dB", gain)
+        def set_noise_suppression_with_save(level: float) -> None:
+            """Set noise suppression and save to preferences."""
+            rc.set_noise_suppression(level)
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.noise_suppression = level
+                self.server.state.save_preferences()
+                _LOGGER.debug("Noise suppression saved to preferences: %.1f%%", level)
+        entities.append(SwitchEntity(
+            server=self.server,
+            key=get_entity_key("agc_enabled"),
+            name="AGC Enabled",
+            object_id="agc_enabled",
+            icon="mdi:tune-vertical",
+            device_class="switch",
+            entity_category=1,  # config
+            value_getter=rc.get_agc_enabled,
+            value_setter=set_agc_enabled_with_save,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("agc_max_gain"),
+            name="AGC Max Gain",
+            object_id="agc_max_gain",
+            min_value=0.0,
+            max_value=40.0,  # XVF3800 supports up to 40dB
+            step=1.0,
+            icon="mdi:volume-plus",
+            unit_of_measurement="dB",
+            mode=2,
+            entity_category=1,  # config
+            value_getter=rc.get_agc_max_gain,
+            value_setter=set_agc_max_gain_with_save,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("noise_suppression"),
+            name="Noise Suppression",
+            object_id="noise_suppression",
+            min_value=0.0,
+            max_value=100.0,
+            step=1.0,
+            icon="mdi:volume-off",
+            unit_of_measurement="%",
+            mode=2,
+            entity_category=1,  # config
+            value_getter=rc.get_noise_suppression,
+            value_setter=set_noise_suppression_with_save,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("echo_cancellation_converged"),
+            name="Echo Cancellation Converged",
+            object_id="echo_cancellation_converged",
+            icon="mdi:waveform",
+            device_class="running",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_echo_cancellation_converged,
+        ))
+        _LOGGER.debug(
+            "Phase 12 entities registered: agc_enabled, agc_max_gain, "
+            "noise_suppression, echo_cancellation_converged"
+        )
+    def _setup_phase21_entities(self, entities: List) -> None:
+        """Setup Phase 21 entities: Continuous conversation mode."""
+        def get_continuous_conversation() -> bool:
+            """Get current continuous conversation mode state."""
+            if hasattr(self.server, 'state') and self.server.state:
+                prefs = self.server.state.preferences
+                return getattr(prefs, 'continuous_conversation', False)
+            return False
+        def set_continuous_conversation(enabled: bool) -> None:
+            """Set continuous conversation mode and save to preferences."""
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.continuous_conversation = enabled
+                self.server.state.save_preferences()
+                _LOGGER.info("Continuous conversation mode %s", "enabled" if enabled else "disabled")
+        entities.append(SwitchEntity(
+            server=self.server,
+            key=get_entity_key("continuous_conversation"),
+            name="Continuous Conversation",
+            object_id="continuous_conversation",
+            icon="mdi:message-reply-text",
+            device_class="switch",
+            entity_category=1,  # config
+            value_getter=get_continuous_conversation,
+            value_setter=set_continuous_conversation,
+        ))
+        _LOGGER.debug("Phase 21 entities registered: continuous_conversation")
+    def _setup_phase22_entities(self, entities: List) -> None:
+        """Setup Phase 22 entities: Gesture detection."""
+        def get_gesture() -> str:
+            """Get current detected gesture."""
+            if self.camera_server:
+                return self.camera_server.get_current_gesture()
+            return "none"
+        def get_gesture_confidence() -> float:
+            """Get gesture detection confidence."""
+            if self.camera_server:
+                return self.camera_server.get_gesture_confidence()
+            return 0.0
+        gesture_entity = TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("gesture_detected"),
+            name="Gesture Detected",
+            object_id="gesture_detected",
+            icon="mdi:hand-wave",
+            value_getter=get_gesture,
+        )
+        entities.append(gesture_entity)
+        self._gesture_entity = gesture_entity
+        confidence_entity = SensorEntity(
+            server=self.server,
+            key=get_entity_key("gesture_confidence"),
+            name="Gesture Confidence",
+            object_id="gesture_confidence",
+            icon="mdi:percent",
+            unit_of_measurement="%",
+            accuracy_decimals=1,
+            state_class="measurement",
+            value_getter=get_gesture_confidence,
+        )
+        entities.append(confidence_entity)
+        self._gesture_confidence_entity = confidence_entity
+        _LOGGER.debug("Phase 22 entities registered: gesture_detected, gesture_confidence")
+    def update_gesture_state(self) -> None:
+        """Push gesture state update to Home Assistant."""
+        if hasattr(self, '_gesture_entity') and self._gesture_entity:
+            self._gesture_entity.update_state()
+        if hasattr(self, '_gesture_confidence_entity') and self._gesture_confidence_entity:
+            self._gesture_confidence_entity.update_state()
+    def find_entity_references(self, entities: List) -> None:
+        """Find and store references to special entities from existing list.
+        Args:
+            entities: The list of existing entities to search
+        """
+        # DOA entities are read-only sensors, no special references needed
+        pass

reachy_mini_ha_voice/gesture_detector.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""Gesture detection using HaGRID ONNX models."""
+from __future__ import annotations
+import logging
+from enum import Enum
+from pathlib import Path
+from typing import Optional, Tuple
+import cv2
+import numpy as np
+from numpy.typing import NDArray
+logger = logging.getLogger(__name__)
+class Gesture(Enum):
+    NONE = "no_gesture"
+    CALL = "call"
+    DISLIKE = "dislike"
+    FIST = "fist"
+    FOUR = "four"
+    LIKE = "like"
+    MUTE = "mute"
+    OK = "ok"
+    ONE = "one"
+    PALM = "palm"
+    PEACE = "peace"
+    PEACE_INVERTED = "peace_inverted"
+    ROCK = "rock"
+    STOP = "stop"
+    STOP_INVERTED = "stop_inverted"
+    THREE = "three"
+    THREE2 = "three2"
+    TWO_UP = "two_up"
+    TWO_UP_INVERTED = "two_up_inverted"
+_GESTURE_CLASSES = [
+    'hand_down', 'hand_right', 'hand_left', 'thumb_index', 'thumb_left',
+    'thumb_right', 'thumb_down', 'half_up', 'half_left', 'half_right',
+    'half_down', 'part_hand_heart', 'part_hand_heart2', 'fist_inverted',
+    'two_left', 'two_right', 'two_down', 'grabbing', 'grip', 'point',
+    'call', 'three3', 'little_finger', 'middle_finger', 'dislike', 'fist',
+    'four', 'like', 'mute', 'ok', 'one', 'palm', 'peace', 'peace_inverted',
+    'rock', 'stop', 'stop_inverted', 'three', 'three2', 'two_up',
+    'two_up_inverted', 'three_gun', 'one_left', 'one_right', 'one_down'
+]
+_NAME_TO_GESTURE = {
+    'call': Gesture.CALL, 'dislike': Gesture.DISLIKE, 'fist': Gesture.FIST,
+    'four': Gesture.FOUR, 'like': Gesture.LIKE, 'mute': Gesture.MUTE,
+    'ok': Gesture.OK, 'one': Gesture.ONE, 'palm': Gesture.PALM,
+    'peace': Gesture.PEACE, 'peace_inverted': Gesture.PEACE_INVERTED,
+    'rock': Gesture.ROCK, 'stop': Gesture.STOP,
+    'stop_inverted': Gesture.STOP_INVERTED, 'three': Gesture.THREE,
+    'three2': Gesture.THREE2, 'two_up': Gesture.TWO_UP,
+    'two_up_inverted': Gesture.TWO_UP_INVERTED,
+}
+class GestureDetector:
+    def __init__(self, confidence_threshold: float = 0.3, detection_threshold: float = 0.3):
+        self._confidence_threshold = confidence_threshold
+        self._detection_threshold = detection_threshold
+        models_dir = Path(__file__).parent / "models"
+        self._detector_path = models_dir / "hand_detector.onnx"
+        self._classifier_path = models_dir / "crops_classifier.onnx"
+        self._detector = None
+        self._classifier = None
+        self._available = False
+        self._mean = np.array([127, 127, 127], dtype=np.float32)
+        self._std = np.array([128, 128, 128], dtype=np.float32)
+        self._detector_size = (320, 240)
+        self._classifier_size = (128, 128)
+        self._load_models()
+    def _load_models(self) -> None:
+        try:
+            import onnxruntime as ort
+        except ImportError:
+            logger.warning("onnxruntime not installed")
+            return
+        if not self._detector_path.exists() or not self._classifier_path.exists():
+            logger.warning("Model files not found")
+            return
+        try:
+            providers = ['CPUExecutionProvider']
+            logger.info("Loading gesture models...")
+            self._detector = ort.InferenceSession(str(self._detector_path), providers=providers)
+            self._classifier = ort.InferenceSession(str(self._classifier_path), providers=providers)
+            self._det_input = self._detector.get_inputs()[0].name
+            self._det_outputs = [o.name for o in self._detector.get_outputs()]
+            self._cls_input = self._classifier.get_inputs()[0].name
+            self._available = True
+            logger.info("Gesture detection ready")
+        except Exception as e:
+            logger.error("Failed to load models: %s", e)
+    @property
+    def is_available(self) -> bool:
+        return self._available
+    def _preprocess(self, frame: NDArray, size: Tuple[int, int]) -> NDArray:
+        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(img, size)
+        img = (img.astype(np.float32) - self._mean) / self._std
+        img = np.transpose(img, [2, 0, 1])
+        return np.expand_dims(img, axis=0)
+    def _detect_hand(self, frame: NDArray) -> Optional[Tuple[int, int, int, int, float]]:
+        if self._detector is None:
+            return None
+        h, w = frame.shape[:2]
+        inp = self._preprocess(frame, self._detector_size)
+        outs = self._detector.run(self._det_outputs, {self._det_input: inp})
+        boxes = outs[0]
+        scores = outs[2]
+        if len(boxes) == 0:
+            return None
+        best_i, best_c = -1, self._detection_threshold
+        for i, c in enumerate(scores):
+            if c > best_c:
+                best_c, best_i = float(c), i
+        if best_i < 0:
+            return None
+        b = boxes[best_i]
+        # Model outputs normalized coordinates (0-1), scale to original frame size
+        x1, y1 = int(b[0] * w), int(b[1] * h)
+        x2, y2 = int(b[2] * w), int(b[3] * h)
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(w-1, x2), min(h-1, y2)
+        if x2 <= x1 or y2 <= y1:
+            return None
+        return (x1, y1, x2, y2, best_c)
+    def _get_square_crop(self, frame: NDArray, box: Tuple[int, int, int, int]) -> NDArray:
+        h, w = frame.shape[:2]
+        x1, y1, x2, y2 = box
+        bw, bh = x2 - x1, y2 - y1
+        if bh < bw:
+            y1, y2 = y1 - (bw - bh) // 2, y1 - (bw - bh) // 2 + bw
+        elif bh > bw:
+            x1, x2 = x1 - (bh - bw) // 2, x1 - (bh - bw) // 2 + bh
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(w-1, x2), min(h-1, y2)
+        return frame[y1:y2, x1:x2]
+    def _classify(self, crop: NDArray) -> Tuple[Gesture, float]:
+        if self._classifier is None or crop.size == 0:
+            return Gesture.NONE, 0.0
+        inp = self._preprocess(crop, self._classifier_size)
+        logits = self._classifier.run(None, {self._cls_input: inp})[0][0]
+        idx = int(np.argmax(logits))
+        exp_l = np.exp(logits - np.max(logits))
+        conf = float(exp_l[idx] / np.sum(exp_l))
+        if idx >= len(_GESTURE_CLASSES) or conf < self._confidence_threshold:
+            return Gesture.NONE, conf
+        name = _GESTURE_CLASSES[idx]
+        return _NAME_TO_GESTURE.get(name, Gesture.NONE), conf
+    def detect(self, frame: NDArray) -> Tuple[Gesture, float]:
+        if not self._available:
+            return Gesture.NONE, 0.0
+        try:
+            det = self._detect_hand(frame)
+            if det is None:
+                return Gesture.NONE, 0.0
+            x1, y1, x2, y2, det_c = det
+            logger.debug("Hand: box=(%d,%d,%d,%d) conf=%.2f", x1, y1, x2, y2, det_c)
+            crop = self._get_square_crop(frame, (x1, y1, x2, y2))
+            if crop.size == 0:
+                return Gesture.NONE, 0.0
+            gest, cls_c = self._classify(crop)
+            if gest != Gesture.NONE:
+                logger.debug("Gesture: %s (det=%.2f cls=%.2f)", gest.value, det_c, cls_c)
+            return gest, det_c * cls_c
+        except Exception as e:
+            logger.warning("Gesture error: %s", e)
+            return Gesture.NONE, 0.0
+    def close(self) -> None:
+        self._detector = self._classifier = None
+        self._available = False

{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py RENAMED Viewed

@@ -3,35 +3,24 @@
 Ported from reachy_mini_conversation_app for voice assistant integration.
 Model is loaded at initialization time (not lazy) to ensure face tracking
 is ready immediately when the camera server starts.
-Performance Optimizations:
-- Optional frame downscaling for faster inference on low-power devices
-- Frame skip support for reduced CPU usage when tracking is stable
-- Configurable inference resolution (default: native resolution)
 """
 from __future__ import annotations
 import logging
-from typing import TYPE_CHECKING
 import numpy as np
-if TYPE_CHECKING:
-    from numpy.typing import NDArray
 logger = logging.getLogger(__name__)
 class HeadTracker:
     """Lightweight head tracker using YOLO for face detection.
     Model is loaded at initialization time to ensure face tracking
     is ready immediately (matching conversation_app behavior).
-    Performance Features:
-    - Frame downscaling: Reduces inference resolution for ~4x speedup
-    - Frame skipping: Reuses last detection result for stable tracking
     """
     def __init__(
@@ -40,7 +29,6 @@ class HeadTracker:
         model_filename: str = "model.pt",
         confidence_threshold: float = 0.3,
         device: str = "cpu",
-        inference_scale: float = 1.0,  # Scale factor for inference (0.5 = half resolution)
     ) -> None:
         """Initialize YOLO-based head tracker.
@@ -49,7 +37,6 @@ class HeadTracker:
             model_filename: Model file name
             confidence_threshold: Minimum confidence for face detection
             device: Device to run inference on ('cpu' or 'cuda')
-            inference_scale: Scale factor for inference (0.5 = half res for ~4x speedup)
         """
         self.confidence_threshold = confidence_threshold
         self.model = None
@@ -58,57 +45,57 @@ class HeadTracker:
         self._device = device
         self._detections_class = None
         self._model_load_attempted = False
-        self._model_load_error: str | None = None
-        # Performance optimization settings
-        self._inference_scale = min(1.0, max(0.25, inference_scale))
-        # Frame skip support for stable tracking
-        self._last_detection: tuple[NDArray, float] | None = None
-        self._frames_since_detection = 0
-        self._max_skip_frames = 0  # 0 = no skipping (can be set externally)
         # Load model immediately at init (not lazy)
         self._load_model()
     def _load_model(self) -> None:
-        """Load YOLO model for face detection."""
         if self._model_load_attempted:
             return
         self._model_load_attempted = True
         try:
-            from pathlib import Path
-            from supervision import Detections
             from ultralytics import YOLO
             self._detections_class = Detections
-            # Load local model from models directory
-            models_dir = Path(__file__).resolve().parents[1] / "models"
-            local_model_path = models_dir / self._model_filename
-            if not local_model_path.exists():
-                raise FileNotFoundError(
-                    f"Model file not found: {local_model_path}. "
-                    f"Please place {self._model_filename} in the models directory."
-                )
-            model_path = str(local_model_path)
-            logger.info("Loading local YOLO model: %s", model_path)
             self.model = YOLO(model_path).to(self._device)
-            logger.info("YOLO face detection model loaded successfully")
         except ImportError as e:
             self._model_load_error = f"Missing dependencies: {e}"
             logger.warning("Face tracking disabled - missing dependencies: %s", e)
             self.model = None
-        except FileNotFoundError as e:
-            self._model_load_error = str(e)
-            logger.error("Failed to load YOLO model: %s", e)
-            self.model = None
         except Exception as e:
             self._model_load_error = str(e)
             logger.error("Failed to load YOLO model: %s", e)
@@ -119,7 +106,7 @@ class HeadTracker:
         """Check if the head tracker is available and ready."""
         return self.model is not None and self._detections_class is not None
-    def _select_best_face(self, detections) -> int | None:
         """Select the best face based on confidence and area.
         Args:
@@ -152,7 +139,9 @@ class HeadTracker:
         best_idx = valid_indices[np.argmax(scores)]
         return int(best_idx)
-    def _bbox_to_normalized_coords(self, bbox: NDArray[np.float32], w: int, h: int) -> NDArray[np.float32]:
         """Convert bounding box center to normalized coordinates [-1, 1].
         Args:
@@ -172,7 +161,9 @@ class HeadTracker:
         return np.array([norm_x, norm_y], dtype=np.float32)
-    def get_head_position(self, img: NDArray[np.uint8]) -> tuple[NDArray[np.float32] | None, float | None]:
         """Get head position from face detection.
         Args:
@@ -186,36 +177,14 @@ class HeadTracker:
         h, w = img.shape[:2]
-        # Frame skip optimization: return last detection if within skip limit
-        if (
-            self._max_skip_frames > 0
-            and self._last_detection is not None
-            and self._frames_since_detection < self._max_skip_frames
-        ):
-            self._frames_since_detection += 1
-            return self._last_detection
         try:
-            # Downscale image for faster inference if scale < 1.0
-            if self._inference_scale < 1.0:
-                import cv2
-                new_w = int(w * self._inference_scale)
-                new_h = int(h * self._inference_scale)
-                inference_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
-            else:
-                inference_img = img
-                new_w, new_h = w, h
             # Run YOLO inference
-            results = self.model(inference_img, verbose=False)
             detections = self._detections_class.from_ultralytics(results[0])
             # Select best face
             face_idx = self._select_best_face(detections)
             if face_idx is None:
-                self._last_detection = None
-                self._frames_since_detection = 0
                 return None, None
             bbox = detections.xyxy[face_idx]
@@ -223,90 +192,11 @@ class HeadTracker:
             if detections.confidence is not None:
                 confidence = float(detections.confidence[face_idx])
-            # Scale bbox back to original resolution if downscaled
-            if self._inference_scale < 1.0:
-                scale_factor = 1.0 / self._inference_scale
-                bbox = bbox * scale_factor
-            # Get face center in [-1, 1] coordinates (using original dimensions)
             face_center = self._bbox_to_normalized_coords(bbox, w, h)
-            # Cache result for frame skipping
-            self._last_detection = (face_center, confidence)
-            self._frames_since_detection = 0
             return face_center, confidence
         except Exception as e:
             logger.debug("Error in head position detection: %s", e)
             return None, None
-    def set_inference_scale(self, scale: float) -> None:
-        """Set the inference resolution scale factor.
-        Args:
-            scale: Scale factor (0.25 to 1.0). Lower = faster but less accurate.
-        """
-        self._inference_scale = min(1.0, max(0.25, scale))
-        logger.debug("Inference scale set to %.2f", self._inference_scale)
-    def set_max_skip_frames(self, skip: int) -> None:
-        """Set maximum frames to skip between detections.
-        Args:
-            skip: Number of frames to skip (0 = no skipping).
-                  Higher values reduce CPU but may cause tracking lag.
-        """
-        self._max_skip_frames = max(0, skip)
-        logger.debug("Max skip frames set to %d", self._max_skip_frames)
-    def clear_detection_cache(self) -> None:
-        """Clear cached detection result."""
-        self._last_detection = None
-        self._frames_since_detection = 0
-    def suspend(self) -> None:
-        """Suspend the head tracker to release YOLO model from memory.
-        Call resume() to reload the model.
-        """
-        if self.model is None:
-            logger.debug("HeadTracker model not loaded, nothing to suspend")
-            return
-        logger.info("Suspending HeadTracker - releasing YOLO model...")
-        try:
-            # Release YOLO model from memory
-            del self.model
-            self.model = None
-            # Also clear the detections class reference
-            self._detections_class = None
-            # Reset load state so resume can reload
-            self._model_load_attempted = False
-            self._model_load_error = None
-            # Clear detection cache
-            self.clear_detection_cache()
-            logger.info("HeadTracker suspended - YOLO model released")
-        except Exception as e:
-            logger.warning("Error suspending HeadTracker: %s", e)
-    def resume(self) -> None:
-        """Resume the head tracker by reloading the YOLO model."""
-        if self.model is not None:
-            logger.debug("HeadTracker model already loaded")
-            return
-        logger.info("Resuming HeadTracker - reloading YOLO model...")
-        # Reload the model
-        self._load_model()
-        if self.is_available:
-            logger.info("HeadTracker resumed - YOLO model loaded")
-        else:
-            logger.warning("HeadTracker resume failed - model not available")

 Ported from reachy_mini_conversation_app for voice assistant integration.
 Model is loaded at initialization time (not lazy) to ensure face tracking
 is ready immediately when the camera server starts.
 """
 from __future__ import annotations
 import logging
+from typing import Tuple, Optional
 import numpy as np
+from numpy.typing import NDArray
 logger = logging.getLogger(__name__)
 class HeadTracker:
     """Lightweight head tracker using YOLO for face detection.
     Model is loaded at initialization time to ensure face tracking
     is ready immediately (matching conversation_app behavior).
     """
     def __init__(
         model_filename: str = "model.pt",
         confidence_threshold: float = 0.3,
         device: str = "cpu",
     ) -> None:
         """Initialize YOLO-based head tracker.
             model_filename: Model file name
             confidence_threshold: Minimum confidence for face detection
             device: Device to run inference on ('cpu' or 'cuda')
         """
         self.confidence_threshold = confidence_threshold
         self.model = None
         self._device = device
         self._detections_class = None
         self._model_load_attempted = False
+        self._model_load_error: Optional[str] = None
         # Load model immediately at init (not lazy)
         self._load_model()
     def _load_model(self) -> None:
+        """Load YOLO model with retry logic."""
         if self._model_load_attempted:
             return
         self._model_load_attempted = True
         try:
             from ultralytics import YOLO
+            from supervision import Detections
+            from huggingface_hub import hf_hub_download
+            import time
             self._detections_class = Detections
+            # Download with retries
+            max_retries = 3
+            retry_delay = 5
+            model_path = None
+            last_error = None
+            for attempt in range(max_retries):
+                try:
+                    model_path = hf_hub_download(
+                        repo_id=self._model_repo,
+                        filename=self._model_filename,
+                    )
+                    break
+                except Exception as e:
+                    last_error = e
+                    if attempt < max_retries - 1:
+                        logger.warning(
+                            "Model download failed (attempt %d/%d): %s. Retrying in %ds...",
+                            attempt + 1, max_retries, e, retry_delay
+                        )
+                        time.sleep(retry_delay)
+            if model_path is None:
+                raise last_error
             self.model = YOLO(model_path).to(self._device)
+            logger.info("YOLO face detection model loaded")
         except ImportError as e:
             self._model_load_error = f"Missing dependencies: {e}"
             logger.warning("Face tracking disabled - missing dependencies: %s", e)
             self.model = None
         except Exception as e:
             self._model_load_error = str(e)
             logger.error("Failed to load YOLO model: %s", e)
         """Check if the head tracker is available and ready."""
         return self.model is not None and self._detections_class is not None
+    def _select_best_face(self, detections) -> Optional[int]:
         """Select the best face based on confidence and area.
         Args:
         best_idx = valid_indices[np.argmax(scores)]
         return int(best_idx)
+    def _bbox_to_normalized_coords(
+        self, bbox: NDArray[np.float32], w: int, h: int
+    ) -> NDArray[np.float32]:
         """Convert bounding box center to normalized coordinates [-1, 1].
         Args:
         return np.array([norm_x, norm_y], dtype=np.float32)
+    def get_head_position(
+        self, img: NDArray[np.uint8]
+    ) -> Tuple[Optional[NDArray[np.float32]], Optional[float]]:
         """Get head position from face detection.
         Args:
         h, w = img.shape[:2]
         try:
             # Run YOLO inference
+            results = self.model(img, verbose=False)
             detections = self._detections_class.from_ultralytics(results[0])
             # Select best face
             face_idx = self._select_best_face(detections)
             if face_idx is None:
                 return None, None
             bbox = detections.xyxy[face_idx]
             if detections.confidence is not None:
                 confidence = float(detections.confidence[face_idx])
+            # Get face center in [-1, 1] coordinates
             face_center = self._bbox_to_normalized_coords(bbox, w, h)
             return face_center, confidence
         except Exception as e:
             logger.debug("Error in head position detection: %s", e)
             return None, None

{reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py RENAMED Viewed

@@ -7,42 +7,48 @@ with Home Assistant via ESPHome protocol for voice control.
 import asyncio
 import logging
-import os
-import pathlib
-import sys
 import threading
-from reachy_mini import ReachyMiniApp
-from .core import get_health_monitor, get_memory_monitor
-from .voice_assistant import VoiceAssistantService
-logger = logging.getLogger(__name__)
-def _normalize_home_for_audio_utils() -> None:
-    """Normalize HOME on robot so SDK audio_utils resolves ~/.asoundrc reliably."""
-    if not sys.platform.startswith("linux"):
-        return
-    current_home = os.environ.get("HOME", "")
-    user = os.environ.get("USER", "pollen")
-    preferred_home = f"/home/{user}"
-    preferred_path = pathlib.Path(preferred_home)
-    if not preferred_path.exists():
-        # Fallback for environments where USER is not set as expected.
-        preferred_home = "/home/pollen"
-        preferred_path = pathlib.Path(preferred_home)
-    if not preferred_path.exists():
-        return
-    # Force deterministic robot HOME for SDK Path.home() checks.
-    # Only adjust when HOME is missing or points outside /home.
-    if not current_home or not current_home.startswith("/home/"):
-        os.environ["HOME"] = preferred_home
-        logger.warning("Adjusted HOME from '%s' to '%s' for audio routing", current_home, preferred_home)
 class ReachyMiniHaVoice(ReachyMiniApp):
@@ -55,72 +61,73 @@ class ReachyMiniHaVoice(ReachyMiniApp):
     """
     # No custom web UI needed - configuration is automatic via Home Assistant
-    custom_app_url: str | None = None
     def __init__(self, *args, **kwargs):
         """Initialize the app."""
         super().__init__(*args, **kwargs)
-        if not hasattr(self, "stop_event"):
             self.stop_event = threading.Event()
     def wrapped_run(self, *args, **kwargs) -> None:
         """
-        Override wrapped_run to handle Reachy Mini connection failures.
         """
         logger.info("Starting Reachy Mini HA Voice App...")
-        _normalize_home_for_audio_utils()
-        # Connect to ReachyMini
-        try:
-            logger.info("Attempting to connect to Reachy Mini...")
-            super().wrapped_run(*args, **kwargs)
-        except TimeoutError as e:
-            logger.error(f"Timeout connecting to Reachy Mini: {e}")
-            sys.exit(1)
-        except Exception as e:
-            error_str = str(e)
-            if "Unable to connect" in error_str or "Timeout" in error_str:
-                logger.error(f"Failed to connect to Reachy Mini: {e}")
-                sys.exit(1)
-            else:
-                raise
     def run(self, reachy_mini, stop_event: threading.Event) -> None:
         """
         Main application entry point.
         Args:
-            reachy_mini: The Reachy Mini robot instance (required, cannot be None)
             stop_event: Event to signal graceful shutdown
         """
         logger.info("Starting Reachy Mini for Home Assistant...")
-        # Optional health/memory monitors
-        enable_monitors = os.environ.get("REACHY_ENABLE_FRAMEWORK_MONITORS", "1").lower() in ("1", "true", "yes", "on")
-        health_monitor = get_health_monitor() if enable_monitors else None
-        memory_monitor = get_memory_monitor() if enable_monitors else None
         # Create and run the HA service
         service = VoiceAssistantService(reachy_mini)
-        if enable_monitors:
-            health_monitor.register_checker(
-                "voice_assistant",
-                lambda: service.is_running if hasattr(service, "is_running") else True,
-                interval=30.0,
-            )
         # Always create a new event loop to avoid conflicts with SDK
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
         logger.debug("Created new event loop for HA service")
         try:
-            if enable_monitors:
-                health_monitor.start()
-                memory_monitor.start()
             loop.run_until_complete(service.start())
             logger.info("=" * 50)
@@ -129,8 +136,12 @@ class ReachyMiniHaVoice(ReachyMiniApp):
             logger.info("ESPHome Server: 0.0.0.0:6053")
             logger.info("Camera Server: 0.0.0.0:8081")
             logger.info("Wake word: Okay Nabu")
-            logger.info("Motion control: enabled")
-            logger.info("Camera: enabled (Reachy Mini)")
             logger.info("=" * 50)
             logger.info("To connect from Home Assistant:")
             logger.info("  Settings -> Devices & Services -> Add Integration")
@@ -155,10 +166,6 @@ class ReachyMiniHaVoice(ReachyMiniApp):
             except Exception as e:
                 logger.error(f"Error stopping service: {e}")
-            if enable_monitors:
-                health_monitor.stop()
-                memory_monitor.stop()
             # Note: Robot connection cleanup is handled by SDK's context manager
             # in wrapped_run(). We only need to close our event loop here.
@@ -171,19 +178,13 @@ class ReachyMiniHaVoice(ReachyMiniApp):
             logger.info("Reachy Mini HA stopped.")
-# This is called when running as: python -m reachy_mini_home_assistant.main
 if __name__ == "__main__":
     logging.basicConfig(
         level=logging.INFO,
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
-    # Reduce verbosity for some noisy modules
-    logging.getLogger("reachy_mini.media.media_manager").setLevel(logging.WARNING)
-    logging.getLogger("reachy_mini.media.camera_base").setLevel(logging.WARNING)
-    logging.getLogger("reachy_mini.media.audio_base").setLevel(logging.WARNING)
-    logging.getLogger("matplotlib").setLevel(logging.WARNING)
     app = ReachyMiniHaVoice()
     try:
         app.wrapped_run()

 import asyncio
 import logging
+import socket
 import threading
+import time
+from typing import Optional
+logger = logging.getLogger(__name__)
+def _check_zenoh_available(timeout: float = 1.0) -> bool:
+    """Check if Zenoh service is available."""
+    try:
+        with socket.create_connection(("127.0.0.1", 7447), timeout=timeout):
+            return True
+    except (socket.timeout, ConnectionRefusedError, OSError):
+        return False
+# Only import ReachyMiniApp if we're running as an app
+try:
+    from reachy_mini import ReachyMini, ReachyMiniApp
+    REACHY_MINI_AVAILABLE = True
+except ImportError:
+    REACHY_MINI_AVAILABLE = False
+    # Create a dummy base class
+    class ReachyMiniApp:
+        custom_app_url = None
+        def __init__(self):
+            self.stop_event = threading.Event()
+        def wrapped_run(self, *args, **kwargs):
+            pass
+        def stop(self):
+            self.stop_event.set()
+    ReachyMini = None
+from .voice_assistant import VoiceAssistantService
+from .motion import ReachyMiniMotion
 class ReachyMiniHaVoice(ReachyMiniApp):
     """
     # No custom web UI needed - configuration is automatic via Home Assistant
+    custom_app_url: Optional[str] = None
     def __init__(self, *args, **kwargs):
         """Initialize the app."""
         super().__init__(*args, **kwargs)
+        if not hasattr(self, 'stop_event'):
             self.stop_event = threading.Event()
     def wrapped_run(self, *args, **kwargs) -> None:
         """
+        Override wrapped_run to handle Zenoh connection failures gracefully.
+        If Zenoh is not available, run in standalone mode without robot control.
         """
         logger.info("Starting Reachy Mini HA Voice App...")
+        # Check if Zenoh is available before trying to connect
+        if not _check_zenoh_available():
+            logger.warning("Zenoh service not available (port 7447)")
+            logger.info("Running in standalone mode without robot control")
+            self._run_standalone()
+            return
+        # Zenoh is available, try normal startup with ReachyMini
+        if REACHY_MINI_AVAILABLE:
+            try:
+                logger.info("Attempting to connect to Reachy Mini...")
+                super().wrapped_run(*args, **kwargs)
+            except TimeoutError as e:
+                logger.warning(f"Timeout connecting to Reachy Mini: {e}")
+                logger.info("Falling back to standalone mode")
+                self._run_standalone()
+            except Exception as e:
+                error_str = str(e)
+                if "Unable to connect" in error_str or "ZError" in error_str or "Timeout" in error_str:
+                    logger.warning(f"Failed to connect to Reachy Mini: {e}")
+                    logger.info("Falling back to standalone mode")
+                    self._run_standalone()
+                else:
+                    raise
+        else:
+            logger.info("Reachy Mini SDK not available, running standalone")
+            self._run_standalone()
+    def _run_standalone(self) -> None:
+        """Run in standalone mode without robot."""
+        self.run(None, self.stop_event)
     def run(self, reachy_mini, stop_event: threading.Event) -> None:
         """
         Main application entry point.
         Args:
+            reachy_mini: The Reachy Mini robot instance (can be None)
             stop_event: Event to signal graceful shutdown
         """
         logger.info("Starting Reachy Mini for Home Assistant...")
         # Create and run the HA service
         service = VoiceAssistantService(reachy_mini)
         # Always create a new event loop to avoid conflicts with SDK
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
         logger.debug("Created new event loop for HA service")
         try:
             loop.run_until_complete(service.start())
             logger.info("=" * 50)
             logger.info("ESPHome Server: 0.0.0.0:6053")
             logger.info("Camera Server: 0.0.0.0:8081")
             logger.info("Wake word: Okay Nabu")
+            if reachy_mini:
+                logger.info("Motion control: enabled")
+                logger.info("Camera: enabled (Reachy Mini)")
+            else:
+                logger.info("Motion control: disabled (no robot)")
+                logger.info("Camera: test pattern (no robot)")
             logger.info("=" * 50)
             logger.info("To connect from Home Assistant:")
             logger.info("  Settings -> Devices & Services -> Add Integration")
             except Exception as e:
                 logger.error(f"Error stopping service: {e}")
             # Note: Robot connection cleanup is handled by SDK's context manager
             # in wrapped_run(). We only need to close our event loop here.
             logger.info("Reachy Mini HA stopped.")
+# This is called when running as: python -m reachy_mini_ha_voice.main
 if __name__ == "__main__":
     logging.basicConfig(
         level=logging.INFO,
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
     app = ReachyMiniHaVoice()
     try:
         app.wrapped_run()

reachy_mini_ha_voice/models.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""Shared models for Reachy Mini Voice Assistant."""
+import json
+import logging
+from dataclasses import asdict, dataclass, field
+from enum import Enum
+from pathlib import Path
+from queue import Queue
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
+if TYPE_CHECKING:
+    from pymicro_wakeword import MicroWakeWord
+    from pyopen_wakeword import OpenWakeWord
+    from .entity import ESPHomeEntity, MediaPlayerEntity
+    from .audio_player import AudioPlayer
+    from .satellite import VoiceSatelliteProtocol
+_LOGGER = logging.getLogger(__name__)
+class WakeWordType(str, Enum):
+    MICRO_WAKE_WORD = "micro"
+    OPEN_WAKE_WORD = "openWakeWord"
+@dataclass
+class AvailableWakeWord:
+    id: str
+    type: WakeWordType
+    wake_word: str
+    trained_languages: List[str]
+    wake_word_path: Path
+    def load(self) -> "Union[MicroWakeWord, OpenWakeWord]":
+        if self.type == WakeWordType.MICRO_WAKE_WORD:
+            from pymicro_wakeword import MicroWakeWord
+            return MicroWakeWord.from_config(config_path=self.wake_word_path)
+        if self.type == WakeWordType.OPEN_WAKE_WORD:
+            from pyopen_wakeword import OpenWakeWord
+            oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
+            setattr(oww_model, "wake_word", self.wake_word)
+            return oww_model
+        raise ValueError(f"Unexpected wake word type: {self.type}")
+@dataclass
+class Preferences:
+    active_wake_words: List[str] = field(default_factory=list)
+    # Audio processing settings (persisted from Home Assistant)
+    agc_enabled: Optional[bool] = None  # None = use hardware default
+    agc_max_gain: Optional[float] = None  # None = use hardware default
+    noise_suppression: Optional[float] = None  # None = use hardware default
+    # Continuous conversation mode (controlled from Home Assistant)
+    continuous_conversation: bool = False
+@dataclass
+class ServerState:
+    """Global server state."""
+    name: str
+    mac_address: str
+    audio_queue: "Queue[Optional[bytes]]"
+    entities: "List[ESPHomeEntity]"
+    available_wake_words: "Dict[str, AvailableWakeWord]"
+    wake_words: "Dict[str, Union[MicroWakeWord, OpenWakeWord]]"
+    active_wake_words: Set[str]
+    stop_word: "MicroWakeWord"
+    music_player: "AudioPlayer"
+    tts_player: "AudioPlayer"
+    wakeup_sound: str
+    timer_finished_sound: str
+    preferences: Preferences
+    preferences_path: Path
+    download_dir: Path
+    # Reachy Mini specific
+    reachy_mini: Optional[object] = None
+    motion_enabled: bool = True
+    motion: Optional[object] = None  # ReachyMiniMotion instance
+    media_player_entity: "Optional[MediaPlayerEntity]" = None
+    satellite: "Optional[VoiceSatelliteProtocol]" = None
+    wake_words_changed: bool = False
+    refractory_seconds: float = 2.0
+    def save_preferences(self) -> None:
+        """Save preferences as JSON."""
+        _LOGGER.debug("Saving preferences: %s", self.preferences_path)
+        self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
+            json.dump(
+                asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4
+            )

{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx RENAMED Viewed

File without changes

reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py RENAMED Viewed

@@ -5,6 +5,7 @@ MovementManager for unified 5Hz control with face tracking.
 """
 import logging
 from .movement_manager import MovementManager, RobotState
@@ -18,28 +19,31 @@ class ReachyMiniMotion:
     to the MovementManager which handles them in its 5Hz control loop.
     """
-    def __init__(self, reachy_mini):
         self.reachy_mini = reachy_mini
-        self._movement_manager: MovementManager | None = None
         self._camera_server = None  # Reference to camera server for face tracking control
         self._is_speaking = False
         _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
-        # Initialize movement manager
-        try:
-            self._movement_manager = MovementManager(reachy_mini)
-            _LOGGER.debug("MovementManager created successfully")
-        except Exception as e:
-            _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
-            self._movement_manager = None
     def set_reachy_mini(self, reachy_mini):
         """Set the Reachy Mini instance."""
         self.reachy_mini = reachy_mini
-        if self._movement_manager is None:
             self._movement_manager = MovementManager(reachy_mini)
-        else:
             self._movement_manager.robot = reachy_mini
     def set_camera_server(self, camera_server):
@@ -68,7 +72,7 @@ class ReachyMiniMotion:
             _LOGGER.info("Motion control stopped")
     @property
-    def movement_manager(self) -> MovementManager | None:
         """Get the movement manager instance."""
         return self._movement_manager

 """
 import logging
+from typing import Optional
 from .movement_manager import MovementManager, RobotState
     to the MovementManager which handles them in its 5Hz control loop.
     """
+    def __init__(self, reachy_mini=None):
         self.reachy_mini = reachy_mini
+        self._movement_manager: Optional[MovementManager] = None
         self._camera_server = None  # Reference to camera server for face tracking control
         self._is_speaking = False
         _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
+        # Initialize movement manager if robot is available
+        if reachy_mini is not None:
+            try:
+                self._movement_manager = MovementManager(reachy_mini)
+                _LOGGER.debug("MovementManager created successfully")
+            except Exception as e:
+                _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
+                self._movement_manager = None
+        else:
+            _LOGGER.debug("reachy_mini is None, MovementManager not created")
     def set_reachy_mini(self, reachy_mini):
         """Set the Reachy Mini instance."""
         self.reachy_mini = reachy_mini
+        if reachy_mini is not None and self._movement_manager is None:
             self._movement_manager = MovementManager(reachy_mini)
+        elif reachy_mini is not None and self._movement_manager is not None:
             self._movement_manager.robot = reachy_mini
     def set_camera_server(self, camera_server):
             _LOGGER.info("Motion control stopped")
     @property
+    def movement_manager(self) -> Optional[MovementManager]:
         """Get the movement manager instance."""
         return self._movement_manager

reachy_mini_ha_voice/movement_manager.py ADDED Viewed

	@@ -0,0 +1,861 @@

+"""
+Unified Movement Manager for Reachy Mini.
+This module provides a centralized control system for robot movements,
+inspired by the reachy_mini_conversation_app architecture.
+Key features:
+- Single 10Hz control loop (balanced between responsiveness and stability)
+- Command queue pattern (thread-safe external API)
+- Error throttling (prevents log explosion)
+- JSON-driven animation system (conversation state animations)
+- Graceful shutdown
+- Pose change detection (skip sending if no significant change)
+- Robust connection recovery (faster reconnection attempts)
+- Proper pose composition using SDK's compose_world_offset (same as conversation_app)
+- Antenna freeze during listening mode with smooth blend back
+"""
+import logging
+import math
+import threading
+import time
+from dataclasses import dataclass, field
+from enum import Enum
+from queue import Queue, Empty
+from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
+logger = logging.getLogger(__name__)
+# Import SDK utilities for pose composition (same as conversation_app)
+try:
+    from reachy_mini.utils import create_head_pose
+    from reachy_mini.utils.interpolation import compose_world_offset
+    SDK_UTILS_AVAILABLE = True
+except ImportError:
+    SDK_UTILS_AVAILABLE = False
+    logger.warning("SDK utils not available, using fallback pose composition")
+# Import animation player
+from .animation_player import AnimationPlayer
+# =============================================================================
+# Constants
+# =============================================================================
+# Control loop frequency - daemon now supports higher rates
+CONTROL_LOOP_FREQUENCY_HZ = 100  # 100Hz control loop (same as conversation_app)
+TARGET_PERIOD = 1.0 / CONTROL_LOOP_FREQUENCY_HZ
+# Antenna freeze parameters (listening mode)
+ANTENNA_BLEND_DURATION = 0.5  # Seconds to blend back from frozen state
+# State to animation mapping
+STATE_ANIMATION_MAP = {
+    "idle": "idle",
+    "listening": "listening",
+    "thinking": "thinking",
+    "speaking": "speaking",
+}
+class RobotState(Enum):
+    """Robot state machine states."""
+    IDLE = "idle"
+    LISTENING = "listening"
+    THINKING = "thinking"
+    SPEAKING = "speaking"
+@dataclass
+class MovementState:
+    """Internal movement state (only modified by control loop)."""
+    # Current robot state
+    robot_state: RobotState = RobotState.IDLE
+    # Animation offsets (from AnimationPlayer)
+    anim_pitch: float = 0.0
+    anim_yaw: float = 0.0
+    anim_roll: float = 0.0
+    anim_x: float = 0.0
+    anim_y: float = 0.0
+    anim_z: float = 0.0
+    anim_antenna_left: float = 0.0
+    anim_antenna_right: float = 0.0
+    # Speech sway offsets (from audio analysis)
+    sway_pitch: float = 0.0
+    sway_yaw: float = 0.0
+    sway_roll: float = 0.0
+    sway_x: float = 0.0
+    sway_y: float = 0.0
+    sway_z: float = 0.0
+    # Target pose (from actions)
+    target_pitch: float = 0.0
+    target_yaw: float = 0.0
+    target_roll: float = 0.0
+    target_x: float = 0.0
+    target_y: float = 0.0
+    target_z: float = 0.0
+    target_antenna_left: float = 0.0
+    target_antenna_right: float = 0.0
+    target_body_yaw: float = 0.0
+    # Timing
+    last_activity_time: float = 0.0
+    idle_start_time: float = 0.0
+    # Antenna freeze state (listening mode)
+    antenna_frozen: bool = False
+    frozen_antenna_left: float = 0.0
+    frozen_antenna_right: float = 0.0
+    antenna_blend: float = 1.0  # 0=frozen, 1=normal
+    antenna_blend_start_time: float = 0.0
+@dataclass
+class PendingAction:
+    """A pending motion action."""
+    name: str
+    target_pitch: float = 0.0
+    target_yaw: float = 0.0
+    target_roll: float = 0.0
+    target_x: float = 0.0
+    target_y: float = 0.0
+    target_z: float = 0.0
+    duration: float = 0.5
+    callback: Optional[Callable] = None
+class MovementManager:
+    """
+    Unified movement manager with 10Hz control loop.
+    All external interactions go through the command queue,
+    ensuring thread safety and preventing race conditions.
+    Note: Frequency reduced from 100Hz to 10Hz to prevent daemon crashes
+    caused by excessive Zenoh message traffic.
+    """
+    def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
+        self.robot = reachy_mini
+        self._now = time.monotonic
+        # Command queue - all external threads communicate through this
+        self._command_queue: Queue[Tuple[str, Any]] = Queue()
+        # Internal state (only modified by control loop)
+        self.state = MovementState()
+        self.state.last_activity_time = self._now()
+        self.state.idle_start_time = self._now()
+        # Animation player (JSON-driven animations)
+        self._animation_player = AnimationPlayer()
+        # Thread control
+        self._stop_event = threading.Event()
+        self._thread: Optional[threading.Thread] = None
+        # Error throttling
+        self._last_error_time = 0.0
+        self._error_interval = 1.0  # Log at most once per second
+        self._suppressed_errors = 0
+        # Connection health tracking
+        self._connection_lost = False
+        self._last_successful_command = self._now()
+        self._connection_timeout = 3.0
+        self._reconnect_attempt_interval = 2.0
+        self._last_reconnect_attempt = 0.0
+        self._consecutive_errors = 0
+        self._max_consecutive_errors = 5
+        # Pending action
+        self._pending_action: Optional[PendingAction] = None
+        self._action_start_time: float = 0.0
+        self._action_start_pose: Dict[str, float] = {}
+        # Pose change detection threshold
+        self._last_sent_pose: Optional[Dict[str, float]] = None
+        self._pose_change_threshold = 0.005
+        # Face tracking offsets (from camera worker)
+        self._face_tracking_offsets: Tuple[float, float, float, float, float, float] = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+        self._face_tracking_lock = threading.Lock()
+        # Camera server reference for face tracking
+        self._camera_server = None
+        # Face tracking smoothing (exponential moving average)
+        self._smoothed_face_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        self._face_smoothing_factor = 0.3
+        logger.info("MovementManager initialized with AnimationPlayer")
+    # =========================================================================
+    # Thread-safe public API (called from any thread)
+    # =========================================================================
+    def set_state(self, new_state: RobotState) -> None:
+        """Thread-safe: Set robot state."""
+        self._command_queue.put(("set_state", new_state))
+    def set_listening(self, listening: bool) -> None:
+        """Thread-safe: Set listening state."""
+        state = RobotState.LISTENING if listening else RobotState.IDLE
+        self._command_queue.put(("set_state", state))
+    def set_thinking(self) -> None:
+        """Thread-safe: Set thinking state."""
+        self._command_queue.put(("set_state", RobotState.THINKING))
+    def set_speaking(self, speaking: bool) -> None:
+        """Thread-safe: Set speaking state."""
+        state = RobotState.SPEAKING if speaking else RobotState.IDLE
+        self._command_queue.put(("set_state", state))
+    def set_idle(self) -> None:
+        """Thread-safe: Return to idle state."""
+        self._command_queue.put(("set_state", RobotState.IDLE))
+    def queue_action(self, action: PendingAction) -> None:
+        """Thread-safe: Queue a motion action."""
+        self._command_queue.put(("action", action))
+    def turn_to_angle(self, yaw_deg: float, duration: float = 0.8) -> None:
+        """Thread-safe: Turn head to face a direction."""
+        action = PendingAction(
+            name="turn_to",
+            target_yaw=math.radians(yaw_deg),
+            duration=duration,
+        )
+        self._command_queue.put(("action", action))
+    def nod(self, amplitude_deg: float = 15, duration: float = 0.5) -> None:
+        """Thread-safe: Perform a nod gesture."""
+        self._command_queue.put(("nod", (amplitude_deg, duration)))
+    def shake(self, amplitude_deg: float = 20, duration: float = 0.5) -> None:
+        """Thread-safe: Perform a head shake gesture."""
+        self._command_queue.put(("shake", (amplitude_deg, duration)))
+    def set_speech_sway(
+        self, x: float, y: float, z: float,
+        roll: float, pitch: float, yaw: float
+    ) -> None:
+        """Thread-safe: Set speech-driven sway offsets.
+        These offsets are applied on top of the current animation
+        to create audio-synchronized head motion during TTS playback.
+        Args:
+            x, y, z: Position offsets in meters
+            roll, pitch, yaw: Orientation offsets in radians
+        """
+        self._command_queue.put(("speech_sway", (x, y, z, roll, pitch, yaw)))
+    def reset_to_neutral(self, duration: float = 0.5) -> None:
+        """Thread-safe: Reset to neutral position."""
+        action = PendingAction(
+            name="neutral",
+            target_pitch=0.0,
+            target_yaw=0.0,
+            target_roll=0.0,
+            target_x=0.0,
+            target_y=0.0,
+            target_z=0.0,
+            duration=duration,
+        )
+        self._command_queue.put(("action", action))
+    def set_camera_server(self, camera_server) -> None:
+        """Set the camera server for face tracking offsets.
+        Args:
+            camera_server: MJPEGCameraServer instance with face tracking
+        """
+        self._camera_server = camera_server
+        logger.info("Camera server set for face tracking")
+    def set_face_tracking_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
+        """Thread-safe: Update face tracking offsets manually.
+        Args:
+            offsets: Tuple of (x, y, z, roll, pitch, yaw) in meters/radians
+        """
+        with self._face_tracking_lock:
+            self._face_tracking_offsets = offsets
+    def set_target_pose(
+        self,
+        x: Optional[float] = None,
+        y: Optional[float] = None,
+        z: Optional[float] = None,
+        roll: Optional[float] = None,
+        pitch: Optional[float] = None,
+        yaw: Optional[float] = None,
+        body_yaw: Optional[float] = None,
+        antenna_left: Optional[float] = None,
+        antenna_right: Optional[float] = None,
+    ) -> None:
+        """Thread-safe: Set target pose components.
+        Only provided values will be updated. Values are in meters for position
+        and radians for angles.
+        Args:
+            x, y, z: Head position in meters
+            roll, pitch, yaw: Head orientation in radians
+            body_yaw: Body yaw in radians
+            antenna_left, antenna_right: Antenna angles in radians
+        """
+        self._command_queue.put(("set_pose", {
+            "x": x,
+            "y": y,
+            "z": z,
+            "roll": roll,
+            "pitch": pitch,
+            "yaw": yaw,
+            "body_yaw": body_yaw,
+            "antenna_left": antenna_left,
+            "antenna_right": antenna_right,
+        }))
+    # =========================================================================
+    # Internal: Command processing (runs in control loop)
+    # =========================================================================
+    def _poll_commands(self) -> None:
+        """Process all pending commands from the queue."""
+        while True:
+            try:
+                cmd, payload = self._command_queue.get_nowait()
+            except Empty:
+                break
+            self._handle_command(cmd, payload)
+    def _handle_command(self, cmd: str, payload: Any) -> None:
+        """Handle a single command."""
+        if cmd == "set_state":
+            old_state = self.state.robot_state
+            self.state.robot_state = payload
+            self.state.last_activity_time = self._now()
+            # Update animation based on state
+            animation_name = STATE_ANIMATION_MAP.get(payload.value, "idle")
+            self._animation_player.set_animation(animation_name)
+            # State transition logic
+            if payload == RobotState.IDLE and old_state != RobotState.IDLE:
+                self.state.idle_start_time = self._now()
+                # Unfreeze antennas when returning to idle
+                self._start_antenna_unfreeze()
+            # Freeze antennas when entering listening mode
+            if payload == RobotState.LISTENING:
+                self._freeze_antennas()
+            elif old_state == RobotState.LISTENING and payload != RobotState.LISTENING:
+                # Start unfreezing when leaving listening mode
+                self._start_antenna_unfreeze()
+            logger.debug("State changed: %s -> %s, animation: %s",
+                        old_state.value, payload.value, animation_name)
+        elif cmd == "action":
+            self._start_action(payload)
+        elif cmd == "nod":
+            amplitude_deg, duration = payload
+            self._do_nod(amplitude_deg, duration)
+        elif cmd == "shake":
+            amplitude_deg, duration = payload
+            self._do_shake(amplitude_deg, duration)
+        elif cmd == "set_pose":
+            # Update target pose from external control (e.g., Home Assistant)
+            if payload.get("x") is not None:
+                self.state.target_x = payload["x"]
+            if payload.get("y") is not None:
+                self.state.target_y = payload["y"]
+            if payload.get("z") is not None:
+                self.state.target_z = payload["z"]
+            if payload.get("roll") is not None:
+                self.state.target_roll = payload["roll"]
+            if payload.get("pitch") is not None:
+                self.state.target_pitch = payload["pitch"]
+            if payload.get("yaw") is not None:
+                self.state.target_yaw = payload["yaw"]
+            if payload.get("body_yaw") is not None:
+                self.state.target_body_yaw = payload["body_yaw"]
+            if payload.get("antenna_left") is not None:
+                self.state.target_antenna_left = payload["antenna_left"]
+            if payload.get("antenna_right") is not None:
+                self.state.target_antenna_right = payload["antenna_right"]
+            logger.debug("External pose update: %s", payload)
+        elif cmd == "speech_sway":
+            # Update speech-driven sway offsets
+            x, y, z, roll, pitch, yaw = payload
+            self.state.sway_x = x
+            self.state.sway_y = y
+            self.state.sway_z = z
+            self.state.sway_roll = roll
+            self.state.sway_pitch = pitch
+            self.state.sway_yaw = yaw
+    def _start_action(self, action: PendingAction) -> None:
+        """Start a new motion action."""
+        self._pending_action = action
+        self._action_start_time = self._now()
+        self._action_start_pose = {
+            "pitch": self.state.target_pitch,
+            "yaw": self.state.target_yaw,
+            "roll": self.state.target_roll,
+            "x": self.state.target_x,
+            "y": self.state.target_y,
+            "z": self.state.target_z,
+        }
+        logger.debug("Starting action: %s", action.name)
+    def _do_nod(self, amplitude_deg: float, duration: float) -> None:
+        """Execute nod gesture (blocking in control loop context)."""
+        # This is simplified - in production, use action queue
+        amplitude_rad = math.radians(amplitude_deg)
+        half_duration = duration / 2
+        # Nod down
+        action_down = PendingAction(
+            name="nod_down",
+            target_pitch=amplitude_rad,
+            duration=half_duration,
+        )
+        self._start_action(action_down)
+    def _do_shake(self, amplitude_deg: float, duration: float) -> None:
+        """Execute shake gesture (blocking in control loop context)."""
+        amplitude_rad = math.radians(amplitude_deg)
+        half_duration = duration / 2
+        # Shake left
+        action_left = PendingAction(
+            name="shake_left",
+            target_yaw=-amplitude_rad,
+            duration=half_duration,
+        )
+        self._start_action(action_left)
+    # =========================================================================
+    # Internal: Motion updates (runs in control loop)
+    # =========================================================================
+    def _update_action(self, dt: float) -> None:
+        """Update pending action interpolation."""
+        if self._pending_action is None:
+            return
+        elapsed = self._now() - self._action_start_time
+        progress = min(1.0, elapsed / self._pending_action.duration)
+        # Smooth interpolation (ease in-out)
+        t = progress * progress * (3 - 2 * progress)
+        # Interpolate pose
+        start = self._action_start_pose
+        action = self._pending_action
+        self.state.target_pitch = start["pitch"] + t * (action.target_pitch - start["pitch"])
+        self.state.target_yaw = start["yaw"] + t * (action.target_yaw - start["yaw"])
+        self.state.target_roll = start["roll"] + t * (action.target_roll - start["roll"])
+        self.state.target_x = start["x"] + t * (action.target_x - start["x"])
+        self.state.target_y = start["y"] + t * (action.target_y - start["y"])
+        self.state.target_z = start["z"] + t * (action.target_z - start["z"])
+        # Action complete
+        if progress >= 1.0:
+            if self._pending_action.callback:
+                try:
+                    self._pending_action.callback()
+                except Exception as e:
+                    logger.error("Action callback error: %s", e)
+            self._pending_action = None
+    def _update_animation(self, dt: float) -> None:
+        """Update animation offsets from AnimationPlayer."""
+        offsets = self._animation_player.get_offsets(dt)
+        self.state.anim_pitch = offsets["pitch"]
+        self.state.anim_yaw = offsets["yaw"]
+        self.state.anim_roll = offsets["roll"]
+        self.state.anim_x = offsets["x"]
+        self.state.anim_y = offsets["y"]
+        self.state.anim_z = offsets["z"]
+        self.state.anim_antenna_left = offsets["antenna_left"]
+        self.state.anim_antenna_right = offsets["antenna_right"]
+    def _freeze_antennas(self) -> None:
+        """Freeze antennas at current position (for listening mode)."""
+        # Capture current antenna positions
+        current_left = self.state.target_antenna_left + self.state.anim_antenna_left
+        current_right = self.state.target_antenna_right + self.state.anim_antenna_right
+        self.state.antenna_frozen = True
+        self.state.frozen_antenna_left = current_left
+        self.state.frozen_antenna_right = current_right
+        self.state.antenna_blend = 0.0  # Fully frozen
+        logger.debug("Antennas frozen at left=%.2f, right=%.2f",
+                    math.degrees(current_left), math.degrees(current_right))
+    def _start_antenna_unfreeze(self) -> None:
+        """Start unfreezing antennas (smooth blend back to normal)."""
+        if not self.state.antenna_frozen:
+            return
+        self.state.antenna_blend_start_time = self._now()
+        logger.debug("Starting antenna unfreeze")
+    def _update_antenna_blend(self, dt: float) -> None:
+        """Update antenna blend state for smooth unfreezing."""
+        if not self.state.antenna_frozen:
+            return
+        if self.state.antenna_blend >= 1.0:
+            # Fully unfrozen
+            self.state.antenna_frozen = False
+            return
+        # Calculate blend progress
+        elapsed = self._now() - self.state.antenna_blend_start_time
+        if elapsed > 0:
+            self.state.antenna_blend = min(1.0, elapsed / ANTENNA_BLEND_DURATION)
+            if self.state.antenna_blend >= 1.0:
+                self.state.antenna_frozen = False
+                logger.debug("Antennas unfrozen")
+    def _update_face_tracking(self) -> None:
+        """Get face tracking offsets from camera server with smoothing."""
+        if self._camera_server is not None:
+            try:
+                raw_offsets = self._camera_server.get_face_tracking_offsets()
+                # Apply exponential moving average smoothing
+                alpha = self._face_smoothing_factor
+                for i in range(6):
+                    self._smoothed_face_offsets[i] = (
+                        alpha * raw_offsets[i] +
+                        (1 - alpha) * self._smoothed_face_offsets[i]
+                    )
+                with self._face_tracking_lock:
+                    self._face_tracking_offsets = tuple(self._smoothed_face_offsets)
+            except Exception as e:
+                logger.debug("Error getting face tracking offsets: %s", e)
+    def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
+        """Compose final pose from all sources using SDK's compose_world_offset.
+        Returns:
+            Tuple of (head_pose_4x4, (antenna_right, antenna_left), body_yaw)
+        """
+        # Build primary head pose from target state
+        if SDK_UTILS_AVAILABLE:
+            primary_head = create_head_pose(
+                x=self.state.target_x,
+                y=self.state.target_y,
+                z=self.state.target_z,
+                roll=self.state.target_roll,
+                pitch=self.state.target_pitch,
+                yaw=self.state.target_yaw,
+                degrees=False,
+                mm=False,
+            )
+        else:
+            # Fallback: build matrix manually
+            rotation = R.from_euler('xyz', [
+                self.state.target_roll,
+                self.state.target_pitch,
+                self.state.target_yaw,
+            ])
+            primary_head = np.eye(4)
+            primary_head[:3, :3] = rotation.as_matrix()
+            primary_head[0, 3] = self.state.target_x
+            primary_head[1, 3] = self.state.target_y
+            primary_head[2, 3] = self.state.target_z
+        # Build secondary pose from animation + face tracking + speech sway
+        with self._face_tracking_lock:
+            face_offsets = self._face_tracking_offsets
+        secondary_x = self.state.anim_x + self.state.sway_x + face_offsets[0]
+        secondary_y = self.state.anim_y + self.state.sway_y + face_offsets[1]
+        secondary_z = self.state.anim_z + self.state.sway_z + face_offsets[2]
+        secondary_roll = self.state.anim_roll + self.state.sway_roll + face_offsets[3]
+        secondary_pitch = self.state.anim_pitch + self.state.sway_pitch + face_offsets[4]
+        secondary_yaw = self.state.anim_yaw + self.state.sway_yaw + face_offsets[5]
+        if SDK_UTILS_AVAILABLE:
+            secondary_head = create_head_pose(
+                x=secondary_x,
+                y=secondary_y,
+                z=secondary_z,
+                roll=secondary_roll,
+                pitch=secondary_pitch,
+                yaw=secondary_yaw,
+                degrees=False,
+                mm=False,
+            )
+            # Compose using SDK's compose_world_offset (same as conversation_app)
+            final_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
+        else:
+            # Fallback: simple addition (less accurate but works)
+            secondary_rotation = R.from_euler('xyz', [secondary_roll, secondary_pitch, secondary_yaw])
+            secondary_head = np.eye(4)
+            secondary_head[:3, :3] = secondary_rotation.as_matrix()
+            secondary_head[0, 3] = secondary_x
+            secondary_head[1, 3] = secondary_y
+            secondary_head[2, 3] = secondary_z
+            # Simple composition: R_final = R_secondary @ R_primary, t_final = t_primary + t_secondary
+            final_head = np.eye(4)
+            final_head[:3, :3] = secondary_head[:3, :3] @ primary_head[:3, :3]
+            final_head[:3, 3] = primary_head[:3, 3] + secondary_head[:3, 3]
+        # Antenna pose with freeze blending
+        target_antenna_left = self.state.target_antenna_left + self.state.anim_antenna_left
+        target_antenna_right = self.state.target_antenna_right + self.state.anim_antenna_right
+        # Apply antenna freeze blending (listening mode)
+        blend = self.state.antenna_blend
+        if blend < 1.0:
+            # Blend between frozen position and target position
+            antenna_left = (self.state.frozen_antenna_left * (1.0 - blend) +
+                          target_antenna_left * blend)
+            antenna_right = (self.state.frozen_antenna_right * (1.0 - blend) +
+                           target_antenna_right * blend)
+        else:
+            antenna_left = target_antenna_left
+            antenna_right = target_antenna_right
+        return final_head, (antenna_right, antenna_left), self.state.target_body_yaw
+    # =========================================================================
+    # Internal: Robot control (runs in control loop)
+    # =========================================================================
+    def _issue_control_command(self, head_pose: np.ndarray, antennas: Tuple[float, float], body_yaw: float) -> None:
+        """Send control command to robot with error throttling and connection health tracking."""
+        if self.robot is None:
+            return
+        # Check if pose changed significantly (prevent unnecessary commands)
+        # Extract euler angles for comparison
+        rotation = R.from_matrix(head_pose[:3, :3])
+        euler = rotation.as_euler('xyz')  # [roll, pitch, yaw]
+        current_pose = {
+            "x": head_pose[0, 3],
+            "y": head_pose[1, 3],
+            "z": head_pose[2, 3],
+            "roll": euler[0],
+            "pitch": euler[1],
+            "yaw": euler[2],
+            "antenna_right": antennas[0],
+            "antenna_left": antennas[1],
+            "body_yaw": body_yaw,
+        }
+        if self._last_sent_pose is not None:
+            max_diff = max(
+                abs(current_pose[k] - self._last_sent_pose.get(k, 0.0))
+                for k in current_pose.keys()
+            )
+            if max_diff < self._pose_change_threshold:
+                # No significant change, skip sending command
+                return
+        now = self._now()
+        # Check if we should skip due to connection loss (but always try periodically)
+        if self._connection_lost:
+            if now - self._last_reconnect_attempt < self._reconnect_attempt_interval:
+                # Skip sending commands to reduce error spam
+                return
+            # Time to try reconnecting
+            self._last_reconnect_attempt = now
+            logger.debug("Attempting to send command after connection loss...")
+        try:
+            # Send to robot (single control point!)
+            # head_pose is already a 4x4 matrix from _compose_final_pose
+            self.robot.set_target(
+                head=head_pose,
+                antennas=list(antennas),
+                body_yaw=body_yaw,
+            )
+            # Command succeeded - update connection health and cache
+            self._last_successful_command = now
+            self._last_sent_pose = current_pose.copy()  # Cache sent pose
+            self._consecutive_errors = 0  # Reset error counter
+            if self._connection_lost:
+                logger.info("✓ Connection to robot restored")
+                self._connection_lost = False
+                self._suppressed_errors = 0
+        except Exception as e:
+            error_msg = str(e)
+            self._consecutive_errors += 1
+            # Check if this is a connection error
+            is_connection_error = "Lost connection" in error_msg or "ZError" in error_msg
+            if is_connection_error:
+                if not self._connection_lost:
+                    # First time detecting connection loss
+                    if self._consecutive_errors >= self._max_consecutive_errors:
+                        logger.warning(f"Connection unstable after {self._consecutive_errors} errors: {error_msg}")
+                        logger.warning("  Will retry connection every %.1fs...", self._reconnect_attempt_interval)
+                        self._connection_lost = True
+                        self._last_reconnect_attempt = now
+                    else:
+                        # Transient error, log but don't mark as lost yet
+                        self._log_error_throttled(f"Transient connection error ({self._consecutive_errors}/{self._max_consecutive_errors}): {error_msg}")
+                else:
+                    # Already in lost state, use throttled logging
+                    self._log_error_throttled(f"Connection still lost: {error_msg}")
+            else:
+                # Non-connection error - log but don't affect connection state
+                self._log_error_throttled(f"Failed to set robot target: {error_msg}")
+    def _log_error_throttled(self, message: str) -> None:
+        """Log error with throttling to prevent log explosion."""
+        now = self._now()
+        if now - self._last_error_time >= self._error_interval:
+            if self._suppressed_errors > 0:
+                message += f" (suppressed {self._suppressed_errors} repeats)"
+                self._suppressed_errors = 0
+            logger.error(message)
+            self._last_error_time = now
+        else:
+            self._suppressed_errors += 1
+    # =========================================================================
+    # Control loop
+    # =========================================================================
+    def _control_loop(self) -> None:
+        """Main 10Hz control loop."""
+        logger.info("Movement manager control loop started (%.0f Hz)", CONTROL_LOOP_FREQUENCY_HZ)
+        last_time = self._now()
+        while not self._stop_event.is_set():
+            loop_start = self._now()
+            dt = loop_start - last_time
+            last_time = loop_start
+            try:
+                # 1. Process commands from queue
+                self._poll_commands()
+                # 2. Update action interpolation
+                self._update_action(dt)
+                # 3. Update animation offsets (JSON-driven)
+                self._update_animation(dt)
+                # 4. Update antenna blend (listening mode freeze/unfreeze)
+                self._update_antenna_blend(dt)
+                # 5. Update face tracking offsets from camera server
+                self._update_face_tracking()
+                # 6. Compose final pose (returns head_pose matrix, antennas tuple, body_yaw)
+                head_pose, antennas, body_yaw = self._compose_final_pose()
+                # 7. Send to robot (single control point!)
+                self._issue_control_command(head_pose, antennas, body_yaw)
+            except Exception as e:
+                self._log_error_throttled(f"Control loop error: {e}")
+            # Adaptive sleep
+            elapsed = self._now() - loop_start
+            sleep_time = max(0.0, TARGET_PERIOD - elapsed)
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+        logger.info("Movement manager control loop stopped")
+    # =========================================================================
+    # Lifecycle
+    # =========================================================================
+    def start(self) -> None:
+        """Start the control loop."""
+        if self._thread is not None and self._thread.is_alive():
+            logger.warning("Movement manager already running")
+            return
+        self._stop_event.clear()
+        self._thread = threading.Thread(
+            target=self._control_loop,
+            daemon=True,
+            name="MovementManager",
+        )
+        self._thread.start()
+        logger.info("Movement manager started")
+    def stop(self) -> None:
+        """Stop the control loop and reset robot."""
+        if self._thread is None or not self._thread.is_alive():
+            return
+        logger.info("Stopping movement manager...")
+        # Signal stop
+        self._stop_event.set()
+        # Wait for thread with shorter timeout
+        self._thread.join(timeout=0.5)
+        if self._thread.is_alive():
+            logger.warning("Movement manager thread did not stop in time")
+        # Skip reset to neutral - let the app manager handle it
+        # This speeds up shutdown significantly
+        logger.info("Movement manager stopped")
+    def _reset_to_neutral_blocking(self) -> None:
+        """Reset robot to neutral position (blocking)."""
+        if self.robot is None:
+            return
+        try:
+            neutral_pose = np.eye(4)
+            self.robot.goto_target(
+                head=neutral_pose,
+                antennas=[0.0, 0.0],
+                body_yaw=0.0,
+                duration=0.3,  # Faster reset
+            )
+            logger.info("Robot reset to neutral position")
+        except Exception as e:
+            logger.error("Failed to reset robot: %s", e)
+    @property
+    def is_running(self) -> bool:
+        """Check if control loop is running."""
+        return self._thread is not None and self._thread.is_alive()

{reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py RENAMED Viewed

@@ -1,1061 +1,869 @@
-"""Reachy Mini controller wrapper for ESPHome entities."""
-import logging
-import math
-import platform
-import subprocess
-import time
-from typing import TYPE_CHECKING, Any
-import numpy as np
-import requests
-from scipy.spatial.transform import Rotation as R
-from .core.config import Config
-if TYPE_CHECKING:
-    from reachy_mini import ReachyMini
-logger = logging.getLogger(__name__)
-# Audio device card names for amixer commands (from SDK)
-DEVICE_CARD_NAMES = {
-    "reachy_mini_audio": "reachy_mini_audio",
-    "respeaker": "respeaker",
-    "default": "Audio",  # Default to Reachy Mini Audio
-}
-def _detect_audio_device() -> str:
-    """Detect the current audio output device (from SDK)."""
-    system = platform.system()
-    if system == "Linux":
-        # Try to detect if Reachy Mini Audio or legacy Respeaker is available
-        try:
-            result = subprocess.run(
-                ["aplay", "-l"],
-                capture_output=True,
-                text=True,
-                check=False,
-                timeout=1.0,
-            )
-            output_lower = result.stdout.lower()
-            if "reachy mini audio" in output_lower:
-                return "reachy_mini_audio"
-            elif "respeaker" in output_lower:
-                return "respeaker"
-        except (subprocess.TimeoutExpired, FileNotFoundError):
-            pass
-        return "default"
-    return "unknown"
-def _get_amixer_card_name() -> str:
-    """Get the appropriate card name for Linux amixer commands (from SDK)."""
-    device = _detect_audio_device()
-    return DEVICE_CARD_NAMES.get(device, DEVICE_CARD_NAMES["default"])
-class _ReSpeakerContext:
-    """Context manager for thread-safe ReSpeaker access."""
-    def __init__(self, respeaker, lock):
-        self._respeaker = respeaker
-        self._lock = lock
-    def __enter__(self):
-        self._lock.acquire()
-        return self._respeaker
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self._lock.release()
-        return False
-class ReachyController:
-    """
-    Wrapper class for Reachy Mini control operations.
-    Provides safe access to Reachy Mini SDK functions with error handling.
-    """
-    def __init__(self, reachy_mini: "ReachyMini"):
-        """
-        Initialize the controller.
-        Args:
-            reachy_mini: ReachyMini instance (required)
-        """
-        self.reachy = reachy_mini
-        self._speaker_volume = 100  # Default volume
-        self._microphone_volume = 50.0  # Default mic volume
-        self._movement_manager = None  # Set later via set_movement_manager()
-        # Volume caching to reduce daemon HTTP load
-        self._volume_cache_ttl = Config.daemon.volume_cache_ttl  # seconds
-        self._speaker_volume_cache_ts = 0.0
-        self._microphone_volume_cache_ts = 0.0
-        # Shared session to reduce per-request overhead
-        self._http_session = requests.Session()
-        self._http_timeout = 5.0  # seconds
-        self._cache_ttl = Config.daemon.status_cache_ttl
-        self._daemon_base_url = "http://127.0.0.1:8000"
-        # Callback for sleep/wake to notify VoiceAssistant
-        self._on_sleep_callback = None
-        self._on_wake_callback = None
-        # Status caching - only for get_status() which may trigger I/O
-        # Note: get_current_head_pose() and get_current_joint_positions() are
-        # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
-        self._state_cache: dict[str, Any] = {}
-        self._last_status_query = 0.0
-        # Thread lock for ReSpeaker USB access to prevent conflicts with GStreamer audio pipeline
-        self._respeaker_lock = __import__("threading").Lock()
-    def set_sleep_callback(self, callback) -> None:
-        """Set callback to be called when go_to_sleep is triggered."""
-        self._on_sleep_callback = callback
-    def set_wake_callback(self, callback) -> None:
-        """Set callback to be called when wake_up is triggered."""
-        self._on_wake_callback = callback
-    def set_movement_manager(self, movement_manager) -> None:
-        """Set the MovementManager instance for pose control.
-        Args:
-            movement_manager: MovementManager instance
-        """
-        self._movement_manager = movement_manager
-        logger.info("MovementManager set for ReachyController")
-    @property
-    def is_available(self) -> bool:
-        """Check if robot is available."""
-        return self.reachy is not None
-    def get_idle_motion_enabled(self) -> bool:
-        """Get whether idle look-around behavior is enabled."""
-        if self._movement_manager is None:
-            return False
-        try:
-            return bool(self._movement_manager.get_idle_motion_enabled())
-        except Exception as e:
-            logger.debug("Error getting idle motion state: %s", e)
-            return False
-    def set_idle_motion_enabled(self, enabled: bool) -> None:
-        """Enable or disable idle look-around behavior."""
-        if self._movement_manager is None:
-            logger.warning("set_idle_motion_enabled failed - MovementManager not set")
-            return
-        self._movement_manager.set_idle_motion_enabled(enabled)
-    def get_idle_antenna_enabled(self) -> bool:
-        """Get whether idle antenna animation is enabled."""
-        if self._movement_manager is None:
-            return False
-        try:
-            return bool(self._movement_manager.get_idle_antenna_enabled())
-        except Exception as e:
-            logger.debug("Error getting idle antenna state: %s", e)
-            return False
-    def set_idle_antenna_enabled(self, enabled: bool) -> None:
-        """Enable or disable idle antenna animation."""
-        if self._movement_manager is None:
-            logger.warning("set_idle_antenna_enabled failed - MovementManager not set")
-            return
-        self._movement_manager.set_idle_antenna_enabled(enabled)
-    def get_idle_random_actions_enabled(self) -> bool:
-        """Get whether idle random actions are enabled."""
-        if self._movement_manager is None:
-            return False
-        try:
-            return bool(self._movement_manager.get_idle_random_actions_enabled())
-        except Exception as e:
-            logger.debug("Error getting idle random actions state: %s", e)
-            return False
-    def set_idle_random_actions_enabled(self, enabled: bool) -> None:
-        """Enable or disable idle random actions (no audio)."""
-        if self._movement_manager is None:
-            logger.warning("set_idle_random_actions_enabled failed - MovementManager not set")
-            return
-        self._movement_manager.set_idle_random_actions_enabled(enabled)
-    # ========== Phase 1: Basic Status & Volume ==========
-    @staticmethod
-    def _status_value(status: Any, key: str, default: Any = None) -> Any:
-        if status is None:
-            return default
-        if isinstance(status, dict):
-            return status.get(key, default)
-        return getattr(status, key, default)
-    @classmethod
-    def _nested_status_value(cls, status: Any, parent_key: str, child_key: str, default: Any = None) -> Any:
-        parent = cls._status_value(status, parent_key, None)
-        if parent is None:
-            return default
-        if isinstance(parent, dict):
-            return parent.get(child_key, default)
-        return getattr(parent, child_key, default)
-    def _get_cached_status(self) -> Any:
-        """Get cached daemon status to reduce query frequency.
-        Note: get_status() may trigger I/O, so we cache it.
-        Unlike get_current_head_pose() and get_current_joint_positions()
-        which are non-blocking in the SDK.
-        """
-        now = time.time()
-        if now - self._last_status_query < self._cache_ttl:
-            return self._state_cache.get("status")
-        if not self.is_available:
-            return None
-        try:
-            status = self.reachy.client.get_status(wait=False)
-            self._state_cache["status"] = status
-            self._last_status_query = now
-            return status
-        except Exception as e:
-            logger.error(f"Error getting status: {e}")
-            return self._state_cache.get("status")  # Return stale cache on error
-    def get_daemon_state(self) -> str:
-        """Get daemon state with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "not_available"
-        return str(self._status_value(status, "state", "unknown"))
-    def get_backend_ready(self) -> bool:
-        """Check if backend is ready with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        return self._status_value(status, "state") == "running"
-    def get_error_message(self) -> str:
-        """Get current error message with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "Robot not available"
-        return str(self._status_value(status, "error", "") or "")
-    def get_speaker_volume(self) -> float:
-        """Get speaker volume (0-100) using amixer directly (no HTTP request)."""
-        try:
-            # Get the correct card name (from SDK detection logic)
-            card_name = _get_amixer_card_name()
-            # Try to get speaker volume from amixer directly
-            result = subprocess.run(
-                ["amixer", "-c", card_name, "sget", "PCM"],
-                capture_output=True,
-                text=True,
-                check=False,
-                timeout=1.0,
-            )
-            if result.returncode == 0:
-                for line in result.stdout.splitlines():
-                    if "Left:" in line and "[" in line:
-                        parts = line.split("[")
-                        for part in parts:
-                            if "%" in part:
-                                volume_str = part.split("%")[0]
-                                self._speaker_volume = float(volume_str)
-                                return self._speaker_volume
-        except (subprocess.TimeoutExpired, FileNotFoundError, ValueError) as e:
-            logger.debug(f"Could not get speaker volume from amixer: {e}")
-        # Fallback to cached value
-        return self._speaker_volume
-    def set_speaker_volume(self, volume: float) -> None:
-        """
-        Set speaker volume (0-100) using amixer directly (no HTTP request).
-        Args:
-            volume: Volume level 0-100
-        """
-        volume = max(0.0, min(100.0, volume))
-        self._speaker_volume = volume
-        try:
-            # Get the correct card name (from SDK detection logic)
-            card_name = _get_amixer_card_name()
-            # Set speaker volume using amixer directly
-            subprocess.run(
-                ["amixer", "-c", card_name, "sset", "PCM", f"{int(volume)}%"],
-                capture_output=True,
-                timeout=2.0,
-                check=True,
-            )
-            subprocess.run(
-                ["amixer", "-c", card_name, "sset", "PCM,1", "100%"],
-                capture_output=True,
-                timeout=2.0,
-                check=True,
-            )
-            logger.info(f"Speaker volume set to {volume}% via amixer (card={card_name})")
-        except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.CalledProcessError) as e:
-            logger.error(f"Failed to set speaker volume via amixer: {e}")
-    def get_microphone_volume(self) -> float:
-        """Get microphone volume (0-100) using amixer directly (no HTTP request)."""
-        try:
-            # Get the correct card name (from SDK detection logic)
-            card_name = _get_amixer_card_name()
-            # Try to get microphone volume from amixer directly
-            result = subprocess.run(
-                ["amixer", "-c", card_name, "sget", "Headset"],
-                capture_output=True,
-                text=True,
-                check=False,
-                timeout=1.0,
-            )
-            if result.returncode == 0:
-                for line in result.stdout.splitlines():
-                    if "Left:" in line and "[" in line:
-                        parts = line.split("[")
-                        for part in parts:
-                            if "%" in part:
-                                volume_str = part.split("%")[0]
-                                self._microphone_volume = float(volume_str)
-                                return self._microphone_volume
-        except (subprocess.TimeoutExpired, FileNotFoundError, ValueError) as e:
-            logger.debug(f"Could not get microphone volume from amixer: {e}")
-        # Fallback to cached value
-        return self._microphone_volume
-    def set_microphone_volume(self, volume: float) -> None:
-        """
-        Set microphone volume (0-100) using amixer directly (no HTTP request).
-        Args:
-            volume: Volume level 0-100
-        """
-        volume = max(0.0, min(100.0, volume))
-        self._microphone_volume = volume
-        try:
-            # Get the correct card name (from SDK detection logic)
-            card_name = _get_amixer_card_name()
-            # Set microphone volume using amixer directly
-            subprocess.run(
-                ["amixer", "-c", card_name, "sset", "Headset", f"{int(volume)}%"],
-                capture_output=True,
-                timeout=2.0,
-                check=True,
-            )
-            logger.info(f"Microphone volume set to {volume}% via amixer (card={card_name})")
-        except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.CalledProcessError) as e:
-            logger.error(f"Failed to set microphone volume via amixer: {e}")
-    # ========== Phase 2: Motor Control ==========
-    def get_motors_enabled(self) -> bool:
-        """Check if motors are enabled with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        try:
-            motor_mode = self._nested_status_value(status, "backend_status", "motor_control_mode", None)
-            if motor_mode is not None:
-                return motor_mode == "enabled"
-            return self._status_value(status, "state") == "running"
-        except Exception as e:
-            logger.error(f"Error getting motor state: {e}")
-            return False
-    def set_motors_enabled(self, enabled: bool) -> None:
-        """
-        Enable or disable motors.
-        Args:
-            enabled: True to enable, False to disable
-        """
-        if not self.is_available:
-            logger.warning("Cannot control motors: robot not available")
-            return
-        try:
-            if enabled:
-                self.reachy.enable_motors()
-                logger.info("Motors enabled")
-            else:
-                self.reachy.disable_motors()
-                logger.info("Motors disabled")
-        except Exception as e:
-            logger.error(f"Error setting motor state: {e}")
-    def get_motor_mode(self) -> str:
-        """Get current motor control mode with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "disabled"
-        try:
-            motor_mode = self._nested_status_value(status, "backend_status", "motor_control_mode", None)
-            if motor_mode is not None:
-                return str(motor_mode)
-            if self._status_value(status, "state") == "running":
-                return "enabled"
-            return "disabled"
-        except Exception as e:
-            logger.error(f"Error getting motor mode: {e}")
-            return "error"
-    def set_motor_mode(self, mode: str) -> None:
-        """
-        Set motor control mode.
-        Args:
-            mode: One of "enabled", "disabled", "gravity_compensation"
-        """
-        if not self.is_available:
-            logger.warning("Cannot set motor mode: robot not available")
-            return
-        try:
-            if mode == "enabled":
-                self.reachy.enable_motors()
-            elif mode == "disabled":
-                self.reachy.disable_motors()
-            elif mode == "gravity_compensation":
-                self.reachy.enable_gravity_compensation()
-            else:
-                logger.warning(f"Invalid motor mode: {mode}")
-                return
-            logger.info(f"Motor mode set to {mode}")
-        except Exception as e:
-            logger.error(f"Error setting motor mode: {e}")
-    def wake_up(self) -> None:
-        """Execute wake up animation."""
-        if not self.is_available:
-            logger.warning("Cannot wake up: robot not available")
-            return
-        try:
-            # SDK v1.5 sleep/wake is managed at daemon level.
-            # Start daemon with wake_up=true so /api/daemon/status reflects awake state.
-            self._daemon_command("/api/daemon/start", params={"wake_up": "true"})
-            logger.info("Wake-up requested via daemon API")
-            # Invalidate cached status after transition request
-            self._last_status_query = 0.0
-            # Notify callback (VoiceAssistant will resume services)
-            if self._on_wake_callback is not None:
-                try:
-                    self._on_wake_callback()
-                except Exception as e:
-                    logger.error(f"Error in wake callback: {e}")
-        except Exception as e:
-            logger.error(f"Error executing wake up: {e}")
-    def go_to_sleep(self) -> None:
-        """Execute sleep animation.
-        The order is important:
-        1. First suspend all services via callback (so they release robot resources)
-        2. Then send the robot to sleep
-        This prevents errors from services trying to access a sleeping robot.
-        """
-        if not self.is_available:
-            logger.warning("Cannot sleep: robot not available")
-            return
-        try:
-            # First, notify callback to suspend all services
-            # This must happen BEFORE the robot goes to sleep
-            logger.info("Suspending services before sleep...")
-            if self._on_sleep_callback is not None:
-                try:
-                    self._on_sleep_callback()
-                except Exception as e:
-                    logger.error(f"Error in sleep callback: {e}")
-            # Give services time to fully suspend
-            time.sleep(0.5)
-            # SDK v1.5 sleep/wake is managed at daemon level.
-            # Stop daemon with goto_sleep=true so /api/daemon/status reflects sleep state.
-            self._daemon_command("/api/daemon/stop", params={"goto_sleep": "true"})
-            logger.info("Sleep requested via daemon API")
-            # Invalidate cached status after transition request
-            self._last_status_query = 0.0
-        except Exception as e:
-            logger.error(f"Error executing sleep: {e}")
-    def _daemon_command(self, path: str, params: dict[str, str] | None = None) -> None:
-        """Send a daemon command request with lightweight validation."""
-        url = f"{self._daemon_base_url}{path}"
-        resp = self._http_session.post(url, params=params or {}, timeout=self._http_timeout)
-        resp.raise_for_status()
-    # ========== Phase 3: Pose Control ==========
-    def _get_head_pose(self) -> np.ndarray | None:
-        """Get current head pose from SDK.
-        Note: SDK's get_current_head_pose() is non-blocking - it returns
-        cached data from Zenoh subscriptions, so no throttling needed.
-        """
-        if not self.is_available:
-            return None
-        try:
-            return self.reachy.get_current_head_pose()
-        except Exception as e:
-            logger.error(f"Error getting head pose: {e}")
-            return None
-    def _get_joint_positions(self) -> tuple | None:
-        """Get current joint positions from SDK.
-        Note: SDK's get_current_joint_positions() is non-blocking - it returns
-        cached data from Zenoh subscriptions, so no throttling needed.
-        """
-        if not self.is_available:
-            return None
-        try:
-            return self.reachy.get_current_joint_positions()
-        except Exception as e:
-            logger.error(f"Error getting joint positions: {e}")
-            return None
-    def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
-        """
-        Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
-        Args:
-            pose_matrix: 4x4 homogeneous transformation matrix
-        Returns:
-            tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
-        """
-        # Extract position from the last column
-        x = pose_matrix[0, 3]
-        y = pose_matrix[1, 3]
-        z = pose_matrix[2, 3]
-        # Extract rotation matrix and convert to euler angles
-        rotation_matrix = pose_matrix[:3, :3]
-        rotation = R.from_matrix(rotation_matrix)
-        # Use 'xyz' convention for roll, pitch, yaw
-        roll, pitch, yaw = rotation.as_euler("xyz")
-        return x, y, z, roll, pitch, yaw
-    def _get_head_pose_component(self, component: str) -> float:
-        """Get a specific component from head pose.
-        Args:
-            component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
-        Returns:
-            The component value, or 0.0 on error
-        """
-        pose = self._get_head_pose()
-        if pose is None:
-            return 0.0
-        try:
-            x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
-            components = {
-                "x": x * 1000,  # m to mm
-                "y": y * 1000,
-                "z": z * 1000,
-                "roll": math.degrees(roll),
-                "pitch": math.degrees(pitch),
-                "yaw": math.degrees(yaw),
-            }
-            return components.get(component, 0.0)
-        except Exception as e:
-            logger.error(f"Error getting head {component}: {e}")
-            return 0.0
-    def _disabled_pose_setter(self, name: str) -> None:
-        """Log warning when MovementManager is not available."""
-        logger.warning(f"set_{name} failed - MovementManager not set")
-    def _set_pose_via_manager(self, **kwargs) -> bool:
-        """Set pose via MovementManager if available.
-        Returns True if successful, False if MovementManager not available.
-        """
-        if self._movement_manager is None:
-            return False
-        self._movement_manager.set_target_pose(**kwargs)
-        return True
-    # Head position getters and setters
-    def get_head_x(self) -> float:
-        """Get head X position in mm."""
-        return self._get_head_pose_component("x")
-    def set_head_x(self, x_mm: float) -> None:
-        """Set head X position in mm via MovementManager."""
-        if not self._set_pose_via_manager(x=x_mm / 1000.0):  # mm to m
-            self._disabled_pose_setter("head_x")
-    def get_head_y(self) -> float:
-        """Get head Y position in mm."""
-        return self._get_head_pose_component("y")
-    def set_head_y(self, y_mm: float) -> None:
-        """Set head Y position in mm via MovementManager."""
-        if not self._set_pose_via_manager(y=y_mm / 1000.0):  # mm to m
-            self._disabled_pose_setter("head_y")
-    def get_head_z(self) -> float:
-        """Get head Z position in mm."""
-        return self._get_head_pose_component("z")
-    def set_head_z(self, z_mm: float) -> None:
-        """Set head Z position in mm via MovementManager."""
-        if not self._set_pose_via_manager(z=z_mm / 1000.0):  # mm to m
-            self._disabled_pose_setter("head_z")
-    # Head orientation getters and setters
-    def get_head_roll(self) -> float:
-        """Get head roll angle in degrees."""
-        return self._get_head_pose_component("roll")
-    def set_head_roll(self, roll_deg: float) -> None:
-        """Set head roll angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
-            self._disabled_pose_setter("head_roll")
-    def get_head_pitch(self) -> float:
-        """Get head pitch angle in degrees."""
-        return self._get_head_pose_component("pitch")
-    def set_head_pitch(self, pitch_deg: float) -> None:
-        """Set head pitch angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
-            self._disabled_pose_setter("head_pitch")
-    def get_head_yaw(self) -> float:
-        """Get head yaw angle in degrees."""
-        return self._get_head_pose_component("yaw")
-    def set_head_yaw(self, yaw_deg: float) -> None:
-        """Set head yaw angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
-            self._disabled_pose_setter("head_yaw")
-    def get_body_yaw(self) -> float:
-        """Get body yaw angle in degrees."""
-        joints = self._get_joint_positions()
-        if joints is None:
-            return 0.0
-        try:
-            head_joints, _ = joints
-            return math.degrees(head_joints[0])
-        except Exception as e:
-            logger.error(f"Error getting body yaw: {e}")
-            return 0.0
-    def set_body_yaw(self, yaw_deg: float) -> None:
-        """Set body yaw angle in degrees.
-        Note: This directly calls SDK's set_target_body_yaw since automatic body yaw
-        is enabled. Manual control will temporarily override automatic mode.
-        """
-        if self.reachy is None:
-            self._disabled_pose_setter("body_yaw")
-            return
-        try:
-            self.reachy.set_target_body_yaw(math.radians(yaw_deg))
-        except Exception as e:
-            logger.error(f"Error setting body yaw: {e}")
-    def get_antenna_left(self) -> float:
-        """Get left antenna angle in degrees."""
-        joints = self._get_joint_positions()
-        if joints is None:
-            return 0.0
-        try:
-            _, antennas = joints
-            return math.degrees(antennas[1])  # left is index 1
-        except Exception as e:
-            logger.error(f"Error getting left antenna: {e}")
-            return 0.0
-    def set_antenna_left(self, angle_deg: float) -> None:
-        """Set left antenna angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
-            self._disabled_pose_setter("antenna_left")
-    def get_antenna_right(self) -> float:
-        """Get right antenna angle in degrees."""
-        joints = self._get_joint_positions()
-        if joints is None:
-            return 0.0
-        try:
-            _, antennas = joints
-            return math.degrees(antennas[0])  # right is index 0
-        except Exception as e:
-            logger.error(f"Error getting right antenna: {e}")
-            return 0.0
-    def set_antenna_right(self, angle_deg: float) -> None:
-        """Set right antenna angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
-            self._disabled_pose_setter("antenna_right")
-    # ========== Phase 4: Look At Control ==========
-    def get_look_at_x(self) -> float:
-        """Get look at target X coordinate in world frame (meters)."""
-        # This is a target position, not a current state
-        # We'll store it internally
-        return getattr(self, "_look_at_x", 0.0)
-    def set_look_at_x(self, x: float) -> None:
-        """Set look at target X coordinate."""
-        self._look_at_x = x
-        self._update_look_at()
-    def get_look_at_y(self) -> float:
-        """Get look at target Y coordinate in world frame (meters)."""
-        return getattr(self, "_look_at_y", 0.0)
-    def set_look_at_y(self, y: float) -> None:
-        """Set look at target Y coordinate."""
-        self._look_at_y = y
-        self._update_look_at()
-    def get_look_at_z(self) -> float:
-        """Get look at target Z coordinate in world frame (meters)."""
-        return getattr(self, "_look_at_z", 0.0)
-    def set_look_at_z(self, z: float) -> None:
-        """Set look at target Z coordinate."""
-        self._look_at_z = z
-        self._update_look_at()
-    def _update_look_at(self) -> None:
-        """Update robot to look at the target coordinates.
-        NOTE: Disabled to prevent conflict with MovementManager's control loop.
-        """
-        logger.warning("_update_look_at is disabled - MovementManager controls head pose")
-        # if not self.is_available:
-        #     return
-        # try:
-        #     x = getattr(self, '_look_at_x', 0.0)
-        #     y = getattr(self, '_look_at_y', 0.0)
-        #     z = getattr(self, '_look_at_z', 0.0)
-        #     self.reachy.look_at_world(x, y, z)
-        #     logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
-        # except Exception as e:
-        #     logger.error(f"Error updating look at: {e}")
-    # ========== Phase 6: Diagnostic Information ==========
-    def get_control_loop_frequency(self) -> float:
-        """Get control loop frequency in Hz with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return 0.0
-        try:
-            control_loop_stats = self._nested_status_value(status, "backend_status", "control_loop_stats", None)
-            if isinstance(control_loop_stats, dict):
-                return float(control_loop_stats.get("mean_control_loop_frequency", 0.0))
-            if control_loop_stats is not None:
-                return float(getattr(control_loop_stats, "mean_control_loop_frequency", 0.0))
-            return 0.0
-        except Exception as e:
-            logger.error(f"Error getting control loop frequency: {e}")
-            return 0.0
-    def get_sdk_version(self) -> str:
-        """Get SDK version with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "N/A"
-        return str(self._status_value(status, "version", "unknown") or "unknown")
-    def get_robot_name(self) -> str:
-        """Get robot name with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "N/A"
-        return str(self._status_value(status, "robot_name", "unknown") or "unknown")
-    def get_wireless_version(self) -> bool:
-        """Check if this is a wireless version with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        return bool(self._status_value(status, "wireless_version", False))
-    def get_simulation_mode(self) -> bool:
-        """Check if simulation mode is enabled with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        return bool(self._status_value(status, "simulation_enabled", False))
-    def get_wlan_ip(self) -> str:
-        """Get WLAN IP address with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "N/A"
-        return str(self._status_value(status, "wlan_ip", "N/A") or "N/A")
-    # ========== Phase 7: IMU Sensors (Wireless only) ==========
-    def _get_imu_value(self, sensor_type: str, index: int) -> float:
-        """Get a specific IMU sensor value.
-        Args:
-            sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
-            index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
-        Returns:
-            The sensor value, or 0.0 on error
-        """
-        if not self.is_available:
-            return 0.0
-        try:
-            imu_data = self.reachy.imu
-            if imu_data is None or sensor_type not in imu_data:
-                return 0.0
-            value = imu_data[sensor_type]
-            return float(value[index]) if index >= 0 else float(value)
-        except Exception as e:
-            logger.debug(f"Error getting IMU {sensor_type}: {e}")
-            return 0.0
-    def get_imu_accel_x(self) -> float:
-        """Get IMU X-axis acceleration in m/s²."""
-        return self._get_imu_value("accelerometer", 0)
-    def get_imu_accel_y(self) -> float:
-        """Get IMU Y-axis acceleration in m/s²."""
-        return self._get_imu_value("accelerometer", 1)
-    def get_imu_accel_z(self) -> float:
-        """Get IMU Z-axis acceleration in m/s²."""
-        return self._get_imu_value("accelerometer", 2)
-    def get_imu_gyro_x(self) -> float:
-        """Get IMU X-axis angular velocity in rad/s."""
-        return self._get_imu_value("gyroscope", 0)
-    def get_imu_gyro_y(self) -> float:
-        """Get IMU Y-axis angular velocity in rad/s."""
-        return self._get_imu_value("gyroscope", 1)
-    def get_imu_gyro_z(self) -> float:
-        """Get IMU Z-axis angular velocity in rad/s."""
-        return self._get_imu_value("gyroscope", 2)
-    def get_imu_temperature(self) -> float:
-        """Get IMU temperature in °C."""
-        return self._get_imu_value("temperature", -1)
-    # ========== Phase 11: LED Control (DISABLED) ==========
-    # LED control is disabled because LEDs are hidden inside the robot.
-    # See PROJECT_PLAN.md principle 8.
-    def _get_respeaker(self):
-        """Get ReSpeaker device from media manager with thread-safe access.
-        Returns a context manager that holds the lock during ReSpeaker operations.
-        Usage:
-            with self._get_respeaker() as respeaker:
-                if respeaker:
-                    respeaker.read("...")
-        Note: This accesses the private _respeaker attribute from the SDK.
-        TODO: Check if SDK provides a public API for ReSpeaker access in future versions.
-        This is a known compatibility risk and should be reviewed on SDK updates.
-        """
-        if not self.is_available:
-            return _ReSpeakerContext(None, self._respeaker_lock)
-        try:
-            if not self.reachy.media or not self.reachy.media.audio:
-                return _ReSpeakerContext(None, self._respeaker_lock)
-            # WARNING: Accessing private attribute _respeaker
-            # TODO: Replace with public API when available
-            respeaker = self.reachy.media.audio._respeaker
-            return _ReSpeakerContext(respeaker, self._respeaker_lock)
-        except Exception:
-            return _ReSpeakerContext(None, self._respeaker_lock)
-    # ========== Phase 12: Audio Processing (via local SDK with thread-safe access) ==========
-    def get_agc_enabled(self) -> bool:
-        """Get AGC (Automatic Gain Control) enabled status."""
-        with self._get_respeaker() as respeaker:
-            if respeaker is None:
-                return getattr(self, "_agc_enabled", True)  # Default to enabled
-            try:
-                result = respeaker.read("PP_AGCONOFF")
-                if result is not None:
-                    self._agc_enabled = bool(result[1])
-                    return self._agc_enabled
-            except Exception as e:
-                logger.debug(f"Error getting AGC status: {e}")
-        return getattr(self, "_agc_enabled", True)
-    def set_agc_enabled(self, enabled: bool) -> None:
-        """Set AGC (Automatic Gain Control) enabled status."""
-        self._agc_enabled = enabled
-        with self._get_respeaker() as respeaker:
-            if respeaker is None:
-                return
-            try:
-                respeaker.write("PP_AGCONOFF", [1 if enabled else 0])
-                logger.info(f"AGC {'enabled' if enabled else 'disabled'}")
-            except Exception as e:
-                logger.error(f"Error setting AGC status: {e}")
-    def get_agc_max_gain(self) -> float:
-        """Get AGC maximum gain in dB (0-40 dB range)."""
-        with self._get_respeaker() as respeaker:
-            if respeaker is None:
-                return getattr(self, "_agc_max_gain", 30.0)  # Default matches MicrophoneDefaults
-            try:
-                result = respeaker.read("PP_AGCMAXGAIN")
-                if result is not None:
-                    self._agc_max_gain = float(result[0])
-                    return self._agc_max_gain
-            except Exception as e:
-                logger.debug(f"Error getting AGC max gain: {e}")
-        return getattr(self, "_agc_max_gain", 30.0)
-    def set_agc_max_gain(self, gain: float) -> None:
-        """Set AGC maximum gain in dB (0-40 dB range)."""
-        gain = max(0.0, min(40.0, gain))  # XVF3800 supports up to 40dB
-        self._agc_max_gain = gain
-        with self._get_respeaker() as respeaker:
-            if respeaker is None:
-                return
-            try:
-                respeaker.write("PP_AGCMAXGAIN", [gain])
-                logger.info(f"AGC max gain set to {gain} dB")
-            except Exception as e:
-                logger.error(f"Error setting AGC max gain: {e}")
-    def get_noise_suppression(self) -> float:
-        """Get noise suppression level (0-100%).
-        PP_MIN_NS represents "minimum signal preservation ratio":
-        - PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% suppression
-        - PP_MIN_NS = 0.15 means "keep at least 15% of signal" = 85% suppression
-        We display "noise suppression strength" to user, so:
-        - suppression_percent = (1.0 - PP_MIN_NS) * 100
-        """
-        with self._get_respeaker() as respeaker:
-            if respeaker is None:
-                return getattr(self, "_noise_suppression", 15.0)
-            try:
-                result = respeaker.read("PP_MIN_NS")
-                if result is not None:
-                    raw_value = result[0]
-                    # Convert: PP_MIN_NS=0.85 -> 15% suppression, PP_MIN_NS=0.15 -> 85% suppression
-                    self._noise_suppression = max(0.0, min(100.0, (1.0 - raw_value) * 100.0))
-                    logger.debug(f"Noise suppression: PP_MIN_NS={raw_value:.2f} -> {self._noise_suppression:.1f}%")
-                    return self._noise_suppression
-            except Exception as e:
-                logger.debug(f"Error getting noise suppression: {e}")
-        return getattr(self, "_noise_suppression", 15.0)
-    def set_noise_suppression(self, level: float) -> None:
-        """Set noise suppression level (0-100%)."""
-        level = max(0.0, min(100.0, level))
-        self._noise_suppression = level
-        with self._get_respeaker() as respeaker:
-            if respeaker is None:
-                return
-            try:
-                # Convert percentage to PP_MIN_NS value (inverted)
-                value = 1.0 - (level / 100.0)
-                respeaker.write("PP_MIN_NS", [value])
-                logger.info(f"Noise suppression set to {level}%")
-            except Exception as e:
-                logger.error(f"Error setting noise suppression: {e}")
-    def get_echo_cancellation_converged(self) -> bool:
-        """Check if echo cancellation has converged."""
-        with self._get_respeaker() as respeaker:
-            if respeaker is None:
-                return False
-            try:
-                result = respeaker.read("AEC_AECCONVERGED")
-                if result is not None:
-                    return bool(result[1])
-            except Exception as e:
-                logger.debug(f"Error getting AEC converged status: {e}")
-        return False
-    # ========== DOA (Direction of Arrival) ==========
-    def get_doa_angle(self) -> tuple[float, bool] | None:
-        """Get Direction of Arrival angle from microphone array.
-        The DOA angle indicates the direction of the sound source relative to the robot.
-        Angle is in radians: 0 = left, π/2 = front/back, π = right.
-        Returns:
-            Tuple of (angle_radians, speech_detected), or None if unavailable.
-            - angle_radians: Sound source direction in radians
-            - speech_detected: Whether speech is currently detected
-        """
-        if not self.is_available:
-            return None
-        try:
-            if self.reachy.media and self.reachy.media.audio:
-                return self.reachy.media.audio.get_DoA()
-        except Exception as e:
-            logger.debug(f"Error getting DOA: {e}")
-        return None
-    def get_doa_angle_degrees(self) -> float:
-        """Get DOA angle in degrees for Home Assistant entity.
-        Returns the raw DOA angle in degrees (0-180°).
-        SDK convention: 0° = left, 90° = front, 180° = right
-        """
-        doa = self.get_doa_angle()
-        if doa is None:
-            return 0.0
-        angle_rad, _ = doa
-        # Return raw angle in degrees (0-180°)
-        angle_deg = math.degrees(angle_rad)
-        return angle_deg
-    def get_speech_detected(self) -> bool:
-        """Get speech detection status from DOA.
-        Returns True if speech is currently detected.
-        """
-        doa = self.get_doa_angle()
-        if doa is None:
-            return False
-        _, speech_detected = doa
-        return speech_detected

+"""Reachy Mini controller wrapper for ESPHome entities."""
+import logging
+import time
+from typing import Any, Dict, Optional, TYPE_CHECKING
+import math
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+import requests
+if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
+logger = logging.getLogger(__name__)
+class _ReSpeakerContext:
+    """Context manager for thread-safe ReSpeaker access."""
+    def __init__(self, respeaker, lock):
+        self._respeaker = respeaker
+        self._lock = lock
+    def __enter__(self):
+        self._lock.acquire()
+        return self._respeaker
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._lock.release()
+        return False
+class ReachyController:
+    """
+    Wrapper class for Reachy Mini control operations.
+    Provides safe access to Reachy Mini SDK functions with error handling
+    and fallback for standalone mode (when robot is not available).
+    """
+    def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
+        """
+        Initialize the controller.
+        Args:
+            reachy_mini: ReachyMini instance, or None for standalone mode
+        """
+        self.reachy = reachy_mini
+        self._speaker_volume = 100  # Default volume
+        self._movement_manager = None  # Set later via set_movement_manager()
+        # Status caching - only for get_status() which may trigger I/O
+        # Note: get_current_head_pose() and get_current_joint_positions() are
+        # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
+        self._state_cache: Dict[str, Any] = {}
+        self._cache_ttl = 2.0  # 2 second cache TTL for status queries (increased from 1s)
+        self._last_status_query = 0.0
+        # Thread lock for ReSpeaker USB access to prevent conflicts with GStreamer audio pipeline
+        self._respeaker_lock = __import__('threading').Lock()
+    def set_movement_manager(self, movement_manager) -> None:
+        """Set the MovementManager instance for pose control.
+        Args:
+            movement_manager: MovementManager instance
+        """
+        self._movement_manager = movement_manager
+        logger.info("MovementManager set for ReachyController")
+    @property
+    def is_available(self) -> bool:
+        """Check if robot is available."""
+        return self.reachy is not None
+    # ========== Phase 1: Basic Status & Volume ==========
+    def _get_cached_status(self) -> Optional[Dict]:
+        """Get cached daemon status to reduce query frequency.
+        Note: get_status() may trigger I/O, so we cache it.
+        Unlike get_current_head_pose() and get_current_joint_positions()
+        which are non-blocking in the SDK.
+        """
+        now = time.time()
+        if now - self._last_status_query < self._cache_ttl:
+            return self._state_cache.get('status')
+        if not self.is_available:
+            return None
+        try:
+            status = self.reachy.client.get_status(wait=False)
+            self._state_cache['status'] = status
+            self._last_status_query = now
+            return status
+        except Exception as e:
+            logger.error(f"Error getting status: {e}")
+            return self._state_cache.get('status')  # Return stale cache on error
+    def get_daemon_state(self) -> str:
+        """Get daemon state with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "not_available"
+        return status.get('state', 'unknown')
+    def get_backend_ready(self) -> bool:
+        """Check if backend is ready with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        return status.get('state') == 'running'
+    def get_error_message(self) -> str:
+        """Get current error message with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "Robot not available"
+        return status.get('error') or ""
+    def get_speaker_volume(self) -> float:
+        """Get speaker volume (0-100) with caching."""
+        if not self.is_available:
+            return self._speaker_volume
+        try:
+            # Get volume from daemon API (use cached status for IP)
+            status = self._get_cached_status()
+            if status is None:
+                return self._speaker_volume
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            response = requests.get(f"http://{wlan_ip}:8000/api/volume/current", timeout=2)
+            if response.status_code == 200:
+                data = response.json()
+                self._speaker_volume = float(data.get('volume', self._speaker_volume))
+        except Exception as e:
+            logger.debug(f"Could not get volume from API: {e}")
+        return self._speaker_volume
+    def set_speaker_volume(self, volume: float) -> None:
+        """
+        Set speaker volume (0-100) with cached status.
+        Args:
+            volume: Volume level 0-100
+        """
+        volume = max(0.0, min(100.0, volume))
+        self._speaker_volume = volume
+        if not self.is_available:
+            logger.warning("Cannot set volume: robot not available")
+            return
+        try:
+            # Set volume via daemon API (use cached status for IP)
+            status = self._get_cached_status()
+            if status is None:
+                logger.error("Cannot get daemon status for volume control")
+                return
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            response = requests.post(
+                f"http://{wlan_ip}:8000/api/volume/set",
+                json={"volume": int(volume)},
+                timeout=5
+            )
+            if response.status_code == 200:
+                logger.info(f"Speaker volume set to {volume}%")
+            else:
+                logger.error(f"Failed to set volume: {response.status_code} {response.text}")
+        except Exception as e:
+            logger.error(f"Error setting speaker volume: {e}")
+    def get_microphone_volume(self) -> float:
+        """Get microphone volume (0-100) using daemon HTTP API."""
+        if not self.is_available:
+            return getattr(self, '_microphone_volume', 50.0)
+        try:
+            # Get WLAN IP from cached daemon status
+            status = self._get_cached_status()
+            if status is None:
+                return getattr(self, '_microphone_volume', 50.0)
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            # Call the daemon API to get microphone volume
+            response = requests.get(
+                f"http://{wlan_ip}:8000/api/volume/microphone/current",
+                timeout=2
+            )
+            if response.status_code == 200:
+                data = response.json()
+                self._microphone_volume = float(data.get('volume', 50))
+                return self._microphone_volume
+        except Exception as e:
+            logger.debug(f"Could not get microphone volume from API: {e}")
+        return getattr(self, '_microphone_volume', 50.0)
+    def set_microphone_volume(self, volume: float) -> None:
+        """
+        Set microphone volume (0-100) using daemon HTTP API.
+        Args:
+            volume: Volume level 0-100
+        """
+        volume = max(0.0, min(100.0, volume))
+        self._microphone_volume = volume
+        if not self.is_available:
+            logger.warning("Cannot set microphone volume: robot not available")
+            return
+        try:
+            # Get WLAN IP from cached daemon status
+            status = self._get_cached_status()
+            if status is None:
+                logger.error("Cannot get daemon status for microphone volume control")
+                return
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            # Call the daemon API to set microphone volume
+            response = requests.post(
+                f"http://{wlan_ip}:8000/api/volume/microphone/set",
+                json={"volume": int(volume)},
+                timeout=5
+            )
+            if response.status_code == 200:
+                logger.info(f"Microphone volume set to {volume}%")
+            else:
+                logger.error(f"Failed to set microphone volume: {response.status_code} {response.text}")
+        except Exception as e:
+            logger.error(f"Error setting microphone volume: {e}")
+    # ========== Phase 2: Motor Control ==========
+    def get_motors_enabled(self) -> bool:
+        """Check if motors are enabled with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        try:
+            backend_status = status.get('backend_status')
+            if backend_status and isinstance(backend_status, dict):
+                motor_mode = backend_status.get('motor_control_mode', 'disabled')
+                return motor_mode == 'enabled'
+            return status.get('state') == 'running'
+        except Exception as e:
+            logger.error(f"Error getting motor state: {e}")
+            return False
+    def set_motors_enabled(self, enabled: bool) -> None:
+        """
+        Enable or disable motors.
+        Args:
+            enabled: True to enable, False to disable
+        """
+        if not self.is_available:
+            logger.warning("Cannot control motors: robot not available")
+            return
+        try:
+            if enabled:
+                self.reachy.enable_motors()
+                logger.info("Motors enabled")
+            else:
+                self.reachy.disable_motors()
+                logger.info("Motors disabled")
+        except Exception as e:
+            logger.error(f"Error setting motor state: {e}")
+    def get_motor_mode(self) -> str:
+        """Get current motor control mode with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "disabled"
+        try:
+            backend_status = status.get('backend_status')
+            if backend_status and isinstance(backend_status, dict):
+                motor_mode = backend_status.get('motor_control_mode', 'disabled')
+                return motor_mode
+            if status.get('state') == 'running':
+                return "enabled"
+            return "disabled"
+        except Exception as e:
+            logger.error(f"Error getting motor mode: {e}")
+            return "error"
+    def set_motor_mode(self, mode: str) -> None:
+        """
+        Set motor control mode.
+        Args:
+            mode: One of "enabled", "disabled", "gravity_compensation"
+        """
+        if not self.is_available:
+            logger.warning("Cannot set motor mode: robot not available")
+            return
+        try:
+            if mode == "enabled":
+                self.reachy.enable_motors()
+            elif mode == "disabled":
+                self.reachy.disable_motors()
+            elif mode == "gravity_compensation":
+                self.reachy.enable_gravity_compensation()
+            else:
+                logger.warning(f"Invalid motor mode: {mode}")
+                return
+            logger.info(f"Motor mode set to {mode}")
+        except Exception as e:
+            logger.error(f"Error setting motor mode: {e}")
+    def wake_up(self) -> None:
+        """Execute wake up animation."""
+        if not self.is_available:
+            logger.warning("Cannot wake up: robot not available")
+            return
+        try:
+            self.reachy.wake_up()
+            logger.info("Wake up animation executed")
+        except Exception as e:
+            logger.error(f"Error executing wake up: {e}")
+    def go_to_sleep(self) -> None:
+        """Execute sleep animation."""
+        if not self.is_available:
+            logger.warning("Cannot sleep: robot not available")
+            return
+        try:
+            self.reachy.goto_sleep()
+            logger.info("Sleep animation executed")
+        except Exception as e:
+            logger.error(f"Error executing sleep: {e}")
+    # ========== Phase 3: Pose Control ==========
+    def _get_head_pose(self) -> Optional[np.ndarray]:
+        """Get current head pose from SDK.
+        Note: SDK's get_current_head_pose() is non-blocking - it returns
+        cached data from Zenoh subscriptions, so no throttling needed.
+        """
+        if not self.is_available:
+            return None
+        try:
+            return self.reachy.get_current_head_pose()
+        except Exception as e:
+            logger.error(f"Error getting head pose: {e}")
+            return None
+    def _get_joint_positions(self) -> Optional[tuple]:
+        """Get current joint positions from SDK.
+        Note: SDK's get_current_joint_positions() is non-blocking - it returns
+        cached data from Zenoh subscriptions, so no throttling needed.
+        """
+        if not self.is_available:
+            return None
+        try:
+            return self.reachy.get_current_joint_positions()
+        except Exception as e:
+            logger.error(f"Error getting joint positions: {e}")
+            return None
+    def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
+        """
+        Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
+        Args:
+            pose_matrix: 4x4 homogeneous transformation matrix
+        Returns:
+            tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
+        """
+        # Extract position from the last column
+        x = pose_matrix[0, 3]
+        y = pose_matrix[1, 3]
+        z = pose_matrix[2, 3]
+        # Extract rotation matrix and convert to euler angles
+        rotation_matrix = pose_matrix[:3, :3]
+        rotation = R.from_matrix(rotation_matrix)
+        # Use 'xyz' convention for roll, pitch, yaw
+        roll, pitch, yaw = rotation.as_euler('xyz')
+        return x, y, z, roll, pitch, yaw
+    def _get_head_pose_component(self, component: str) -> float:
+        """Get a specific component from head pose.
+        Args:
+            component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
+        Returns:
+            The component value, or 0.0 on error
+        """
+        pose = self._get_head_pose()
+        if pose is None:
+            return 0.0
+        try:
+            x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
+            components = {
+                'x': x * 1000,  # m to mm
+                'y': y * 1000,
+                'z': z * 1000,
+                'roll': math.degrees(roll),
+                'pitch': math.degrees(pitch),
+                'yaw': math.degrees(yaw),
+            }
+            return components.get(component, 0.0)
+        except Exception as e:
+            logger.error(f"Error getting head {component}: {e}")
+            return 0.0
+    def _disabled_pose_setter(self, name: str) -> None:
+        """Log warning when MovementManager is not available."""
+        logger.warning(f"set_{name} failed - MovementManager not set")
+    def _set_pose_via_manager(self, **kwargs) -> bool:
+        """Set pose via MovementManager if available.
+        Returns True if successful, False if MovementManager not available.
+        """
+        if self._movement_manager is None:
+            return False
+        self._movement_manager.set_target_pose(**kwargs)
+        return True
+    # Head position getters and setters
+    def get_head_x(self) -> float:
+        """Get head X position in mm."""
+        return self._get_head_pose_component('x')
+    def set_head_x(self, x_mm: float) -> None:
+        """Set head X position in mm via MovementManager."""
+        if not self._set_pose_via_manager(x=x_mm / 1000.0):  # mm to m
+            self._disabled_pose_setter('head_x')
+    def get_head_y(self) -> float:
+        """Get head Y position in mm."""
+        return self._get_head_pose_component('y')
+    def set_head_y(self, y_mm: float) -> None:
+        """Set head Y position in mm via MovementManager."""
+        if not self._set_pose_via_manager(y=y_mm / 1000.0):  # mm to m
+            self._disabled_pose_setter('head_y')
+    def get_head_z(self) -> float:
+        """Get head Z position in mm."""
+        return self._get_head_pose_component('z')
+    def set_head_z(self, z_mm: float) -> None:
+        """Set head Z position in mm via MovementManager."""
+        if not self._set_pose_via_manager(z=z_mm / 1000.0):  # mm to m
+            self._disabled_pose_setter('head_z')
+    # Head orientation getters and setters
+    def get_head_roll(self) -> float:
+        """Get head roll angle in degrees."""
+        return self._get_head_pose_component('roll')
+    def set_head_roll(self, roll_deg: float) -> None:
+        """Set head roll angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
+            self._disabled_pose_setter('head_roll')
+    def get_head_pitch(self) -> float:
+        """Get head pitch angle in degrees."""
+        return self._get_head_pose_component('pitch')
+    def set_head_pitch(self, pitch_deg: float) -> None:
+        """Set head pitch angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
+            self._disabled_pose_setter('head_pitch')
+    def get_head_yaw(self) -> float:
+        """Get head yaw angle in degrees."""
+        return self._get_head_pose_component('yaw')
+    def set_head_yaw(self, yaw_deg: float) -> None:
+        """Set head yaw angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
+            self._disabled_pose_setter('head_yaw')
+    def get_body_yaw(self) -> float:
+        """Get body yaw angle in degrees."""
+        joints = self._get_joint_positions()
+        if joints is None:
+            return 0.0
+        try:
+            head_joints, _ = joints
+            return math.degrees(head_joints[0])
+        except Exception as e:
+            logger.error(f"Error getting body yaw: {e}")
+            return 0.0
+    def set_body_yaw(self, yaw_deg: float) -> None:
+        """Set body yaw angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(body_yaw=math.radians(yaw_deg)):
+            self._disabled_pose_setter('body_yaw')
+    def get_antenna_left(self) -> float:
+        """Get left antenna angle in degrees."""
+        joints = self._get_joint_positions()
+        if joints is None:
+            return 0.0
+        try:
+            _, antennas = joints
+            return math.degrees(antennas[1])  # left is index 1
+        except Exception as e:
+            logger.error(f"Error getting left antenna: {e}")
+            return 0.0
+    def set_antenna_left(self, angle_deg: float) -> None:
+        """Set left antenna angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
+            self._disabled_pose_setter('antenna_left')
+    def get_antenna_right(self) -> float:
+        """Get right antenna angle in degrees."""
+        joints = self._get_joint_positions()
+        if joints is None:
+            return 0.0
+        try:
+            _, antennas = joints
+            return math.degrees(antennas[0])  # right is index 0
+        except Exception as e:
+            logger.error(f"Error getting right antenna: {e}")
+            return 0.0
+    def set_antenna_right(self, angle_deg: float) -> None:
+        """Set right antenna angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
+            self._disabled_pose_setter('antenna_right')
+    # ========== Phase 4: Look At Control ==========
+    def get_look_at_x(self) -> float:
+        """Get look at target X coordinate in world frame (meters)."""
+        # This is a target position, not a current state
+        # We'll store it internally
+        return getattr(self, '_look_at_x', 0.0)
+    def set_look_at_x(self, x: float) -> None:
+        """Set look at target X coordinate."""
+        self._look_at_x = x
+        self._update_look_at()
+    def get_look_at_y(self) -> float:
+        """Get look at target Y coordinate in world frame (meters)."""
+        return getattr(self, '_look_at_y', 0.0)
+    def set_look_at_y(self, y: float) -> None:
+        """Set look at target Y coordinate."""
+        self._look_at_y = y
+        self._update_look_at()
+    def get_look_at_z(self) -> float:
+        """Get look at target Z coordinate in world frame (meters)."""
+        return getattr(self, '_look_at_z', 0.0)
+    def set_look_at_z(self, z: float) -> None:
+        """Set look at target Z coordinate."""
+        self._look_at_z = z
+        self._update_look_at()
+    def _update_look_at(self) -> None:
+        """Update robot to look at the target coordinates.
+        NOTE: Disabled to prevent conflict with MovementManager's control loop.
+        """
+        logger.warning("_update_look_at is disabled - MovementManager controls head pose")
+        # if not self.is_available:
+        #     return
+        # try:
+        #     x = getattr(self, '_look_at_x', 0.0)
+        #     y = getattr(self, '_look_at_y', 0.0)
+        #     z = getattr(self, '_look_at_z', 0.0)
+        #     self.reachy.look_at_world(x, y, z)
+        #     logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
+        # except Exception as e:
+        #     logger.error(f"Error updating look at: {e}")
+    # ========== Phase 6: Diagnostic Information ==========
+    def get_control_loop_frequency(self) -> float:
+        """Get control loop frequency in Hz with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return 0.0
+        try:
+            backend_status = status.get('backend_status')
+            if backend_status and isinstance(backend_status, dict):
+                control_loop_stats = backend_status.get('control_loop_stats', {})
+                return control_loop_stats.get('mean_control_loop_frequency', 0.0)
+            return 0.0
+        except Exception as e:
+            logger.error(f"Error getting control loop frequency: {e}")
+            return 0.0
+    def get_sdk_version(self) -> str:
+        """Get SDK version with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "N/A"
+        return status.get('version') or "unknown"
+    def get_robot_name(self) -> str:
+        """Get robot name with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "N/A"
+        return status.get('robot_name') or "unknown"
+    def get_wireless_version(self) -> bool:
+        """Check if this is a wireless version with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        return status.get('wireless_version', False)
+    def get_simulation_mode(self) -> bool:
+        """Check if simulation mode is enabled with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        return status.get('simulation_enabled', False)
+    def get_wlan_ip(self) -> str:
+        """Get WLAN IP address with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "N/A"
+        return status.get('wlan_ip') or "N/A"
+    # ========== Phase 7: IMU Sensors (Wireless only) ==========
+    def _get_imu_value(self, sensor_type: str, index: int) -> float:
+        """Get a specific IMU sensor value.
+        Args:
+            sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
+            index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
+        Returns:
+            The sensor value, or 0.0 on error
+        """
+        if not self.is_available:
+            return 0.0
+        try:
+            imu_data = self.reachy.imu
+            if imu_data is None or sensor_type not in imu_data:
+                return 0.0
+            value = imu_data[sensor_type]
+            return float(value[index]) if index >= 0 else float(value)
+        except Exception as e:
+            logger.debug(f"Error getting IMU {sensor_type}: {e}")
+            return 0.0
+    def get_imu_accel_x(self) -> float:
+        """Get IMU X-axis acceleration in m/s²."""
+        return self._get_imu_value('accelerometer', 0)
+    def get_imu_accel_y(self) -> float:
+        """Get IMU Y-axis acceleration in m/s²."""
+        return self._get_imu_value('accelerometer', 1)
+    def get_imu_accel_z(self) -> float:
+        """Get IMU Z-axis acceleration in m/s²."""
+        return self._get_imu_value('accelerometer', 2)
+    def get_imu_gyro_x(self) -> float:
+        """Get IMU X-axis angular velocity in rad/s."""
+        return self._get_imu_value('gyroscope', 0)
+    def get_imu_gyro_y(self) -> float:
+        """Get IMU Y-axis angular velocity in rad/s."""
+        return self._get_imu_value('gyroscope', 1)
+    def get_imu_gyro_z(self) -> float:
+        """Get IMU Z-axis angular velocity in rad/s."""
+        return self._get_imu_value('gyroscope', 2)
+    def get_imu_temperature(self) -> float:
+        """Get IMU temperature in °C."""
+        return self._get_imu_value('temperature', -1)
+    # ========== Phase 11: LED Control (DISABLED) ==========
+    # LED control is disabled because LEDs are hidden inside the robot.
+    # See PROJECT_PLAN.md principle 8.
+    def _get_respeaker(self):
+        """Get ReSpeaker device from media manager with thread-safe access.
+        Returns a context manager that holds the lock during ReSpeaker operations.
+        Usage:
+            with self._get_respeaker() as respeaker:
+                if respeaker:
+                    respeaker.read("...")
+        """
+        if not self.is_available:
+            return _ReSpeakerContext(None, self._respeaker_lock)
+        try:
+            if not self.reachy.media or not self.reachy.media.audio:
+                return _ReSpeakerContext(None, self._respeaker_lock)
+            respeaker = self.reachy.media.audio._respeaker
+            return _ReSpeakerContext(respeaker, self._respeaker_lock)
+        except Exception:
+            return _ReSpeakerContext(None, self._respeaker_lock)
+    # ========== Phase 12: Audio Processing (via local SDK with thread-safe access) ==========
+    def get_agc_enabled(self) -> bool:
+        """Get AGC (Automatic Gain Control) enabled status."""
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return getattr(self, '_agc_enabled', True)  # Default to enabled
+            try:
+                result = respeaker.read("PP_AGCONOFF")
+                if result is not None:
+                    self._agc_enabled = bool(result[1])
+                    return self._agc_enabled
+            except Exception as e:
+                logger.debug(f"Error getting AGC status: {e}")
+        return getattr(self, '_agc_enabled', True)
+    def set_agc_enabled(self, enabled: bool) -> None:
+        """Set AGC (Automatic Gain Control) enabled status."""
+        self._agc_enabled = enabled
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return
+            try:
+                respeaker.write("PP_AGCONOFF", [1 if enabled else 0])
+                logger.info(f"AGC {'enabled' if enabled else 'disabled'}")
+            except Exception as e:
+                logger.error(f"Error setting AGC status: {e}")
+    def get_agc_max_gain(self) -> float:
+        """Get AGC maximum gain in dB (0-40 dB range)."""
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return getattr(self, '_agc_max_gain', 30.0)  # Default to optimized value
+            try:
+                result = respeaker.read("PP_AGCMAXGAIN")
+                if result is not None:
+                    self._agc_max_gain = float(result[0])
+                    return self._agc_max_gain
+            except Exception as e:
+                logger.debug(f"Error getting AGC max gain: {e}")
+        return getattr(self, '_agc_max_gain', 30.0)
+    def set_agc_max_gain(self, gain: float) -> None:
+        """Set AGC maximum gain in dB (0-40 dB range)."""
+        gain = max(0.0, min(40.0, gain))  # XVF3800 supports up to 40dB
+        self._agc_max_gain = gain
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return
+            try:
+                respeaker.write("PP_AGCMAXGAIN", [gain])
+                logger.info(f"AGC max gain set to {gain} dB")
+            except Exception as e:
+                logger.error(f"Error setting AGC max gain: {e}")
+    def get_noise_suppression(self) -> float:
+        """Get noise suppression level (0-100%).
+        PP_MIN_NS represents "minimum signal preservation ratio":
+        - PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% suppression
+        - PP_MIN_NS = 0.15 means "keep at least 15% of signal" = 85% suppression
+        We display "noise suppression strength" to user, so:
+        - suppression_percent = (1.0 - PP_MIN_NS) * 100
+        """
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return getattr(self, '_noise_suppression', 15.0)
+            try:
+                result = respeaker.read("PP_MIN_NS")
+                if result is not None:
+                    raw_value = result[0]
+                    # Convert: PP_MIN_NS=0.85 -> 15% suppression, PP_MIN_NS=0.15 -> 85% suppression
+                    self._noise_suppression = max(0.0, min(100.0, (1.0 - raw_value) * 100.0))
+                    logger.debug(f"Noise suppression: PP_MIN_NS={raw_value:.2f} -> {self._noise_suppression:.1f}%")
+                    return self._noise_suppression
+            except Exception as e:
+                logger.debug(f"Error getting noise suppression: {e}")
+        return getattr(self, '_noise_suppression', 15.0)
+    def set_noise_suppression(self, level: float) -> None:
+        """Set noise suppression level (0-100%)."""
+        level = max(0.0, min(100.0, level))
+        self._noise_suppression = level
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return
+            try:
+                # Convert percentage to PP_MIN_NS value (inverted)
+                value = 1.0 - (level / 100.0)
+                respeaker.write("PP_MIN_NS", [value])
+                logger.info(f"Noise suppression set to {level}%")
+            except Exception as e:
+                logger.error(f"Error setting noise suppression: {e}")
+    def get_echo_cancellation_converged(self) -> bool:
+        """Check if echo cancellation has converged."""
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return False
+            try:
+                result = respeaker.read("AEC_AECCONVERGED")
+                if result is not None:
+                    return bool(result[1])
+            except Exception as e:
+                logger.debug(f"Error getting AEC converged status: {e}")
+        return False
+    # ========== DOA (Direction of Arrival) ==========
+    def get_doa_angle(self) -> tuple[float, bool] | None:
+        """Get Direction of Arrival angle from microphone array.
+        The DOA angle indicates the direction of the sound source relative to the robot.
+        Angle is in radians: 0 = left, π/2 = front/back, π = right.
+        Returns:
+            Tuple of (angle_radians, speech_detected), or None if unavailable.
+            - angle_radians: Sound source direction in radians
+            - speech_detected: Whether speech is currently detected
+        """
+        if not self.is_available:
+            return None
+        try:
+            if self.reachy.media and self.reachy.media.audio:
+                return self.reachy.media.audio.get_DoA()
+        except Exception as e:
+            logger.debug(f"Error getting DOA: {e}")
+        return None
+    def get_doa_angle_degrees(self) -> float:
+        """Get DOA angle in degrees for Home Assistant entity.
+        Returns the raw DOA angle in degrees (0-180°).
+        SDK convention: 0° = left, 90° = front, 180° = right
+        """
+        doa = self.get_doa_angle()
+        if doa is None:
+            return 0.0
+        angle_rad, _ = doa
+        # Return raw angle in degrees (0-180°)
+        angle_deg = math.degrees(angle_rad)
+        return angle_deg
+    def get_speech_detected(self) -> bool:
+        """Get speech detection status from DOA.
+        Returns True if speech is currently detected.
+        """
+        doa = self.get_doa_angle()
+        if doa is None:
+            return False
+        _, speech_detected = doa
+        return speech_detected

{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/satellite.py RENAMED Viewed

@@ -1,1051 +1,784 @@
-"""Voice satellite protocol for Reachy Mini."""
-import hashlib
-import logging
-import math
-import posixpath
-import shutil
-import time
-from collections.abc import Iterable
-from typing import TYPE_CHECKING, Optional
-from urllib.parse import urlparse, urlunparse
-from urllib.request import urlopen
-if TYPE_CHECKING:
-    from ..vision.camera_server import MJPEGCameraServer
-# pylint: disable=no-name-in-module
-from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
-    ButtonCommandRequest,
-    CameraImageRequest,
-    DeviceInfoRequest,
-    DeviceInfoResponse,
-    HomeAssistantStateResponse,
-    ListEntitiesDoneResponse,
-    ListEntitiesRequest,
-    MediaPlayerCommandRequest,
-    NumberCommandRequest,
-    SelectCommandRequest,
-    SubscribeHomeAssistantStatesRequest,
-    SubscribeStatesRequest,
-    SwitchCommandRequest,
-    VoiceAssistantAnnounceFinished,
-    VoiceAssistantAnnounceRequest,
-    VoiceAssistantAudio,
-    VoiceAssistantConfigurationRequest,
-    VoiceAssistantConfigurationResponse,
-    VoiceAssistantEventResponse,
-    VoiceAssistantExternalWakeWord,
-    VoiceAssistantRequest,
-    VoiceAssistantSetConfiguration,
-    VoiceAssistantTimerEventResponse,
-    VoiceAssistantWakeWord,
-)
-from aioesphomeapi.model import (
-    VoiceAssistantEventType,
-    VoiceAssistantFeature,
-    VoiceAssistantTimerEventType,
-)
-from google.protobuf import message
-from pymicro_wakeword import MicroWakeWord
-from pyopen_wakeword import OpenWakeWord
-from ..core.util import call_all
-# DISABLED: Emotion detection moved to Home Assistant blueprint
-# from ..entities.emotion_detector import EmotionKeywordDetector
-from ..entities.entity import MediaPlayerEntity
-from ..entities.entity_registry import EntityRegistry, get_entity_key
-from ..entities.event_emotion_mapper import EventEmotionMapper
-from ..models import AvailableWakeWord, ServerState, WakeWordType
-from ..motion.gesture_actions import GestureActionMapper
-from ..reachy_controller import ReachyController
-from .api_server import APIServer
-_LOGGER = logging.getLogger(__name__)
-class VoiceSatelliteProtocol(APIServer):
-    """Voice satellite protocol handler for ESPHome."""
-    def __init__(
-        self, state: ServerState, camera_server: Optional["MJPEGCameraServer"] = None, voice_assistant_service=None
-    ) -> None:
-        _LOGGER.info("VoiceSatelliteProtocol.__init__ called - new connection")
-        super().__init__(state.name)
-        self.state = state
-        self.state.satellite = self
-        self.camera_server = camera_server
-        self._voice_assistant_service = voice_assistant_service  # Store reference for mute functionality
-        # Home Assistant connection callbacks
-        self._on_ha_connected_callback = None
-        self._on_ha_disconnected_callback = None
-        # Initialize streaming state early (before entity setup)
-        self._is_streaming_audio = False
-        self._tts_url: str | None = None
-        self._tts_played = False
-        self._continue_conversation = False
-        self._timer_finished = False
-        self._external_wake_words: dict[str, VoiceAssistantExternalWakeWord] = {}
-        # Conversation tracking for continuous conversation
-        self._conversation_id: str | None = None
-        self._conversation_timeout = 300.0  # 5 minutes, same as ESPHome default
-        self._last_conversation_time = 0.0
-        # Track Home Assistant entity states for change detection
-        self._ha_entity_states: dict[str, str] = {}
-        # Initialize Reachy controller
-        self.reachy_controller = ReachyController(state.reachy_mini)
-        # Connect sleep/wake callbacks to ServerState callbacks
-        def on_sleep_from_ha():
-            if state.on_ha_sleep is not None:
-                try:
-                    state.on_ha_sleep()
-                except Exception as e:
-                    _LOGGER.error("Error in on_ha_sleep callback: %s", e)
-        def on_wake_from_ha():
-            if state.on_ha_wake is not None:
-                try:
-                    state.on_ha_wake()
-                except Exception as e:
-                    _LOGGER.error("Error in on_ha_wake callback: %s", e)
-        self.reachy_controller.set_sleep_callback(on_sleep_from_ha)
-        self.reachy_controller.set_wake_callback(on_wake_from_ha)
-        # Connect MovementManager to ReachyController for pose control from HA
-        if state.motion is not None and state.motion.movement_manager is not None:
-            self.reachy_controller.set_movement_manager(state.motion.movement_manager)
-            # Setup speech sway callback for audio-driven head motion
-            def sway_callback(sway: dict) -> None:
-                mm = state.motion.movement_manager
-                if mm is not None:
-                    mm.set_speech_sway(
-                        sway.get("x_m", 0.0),
-                        sway.get("y_m", 0.0),
-                        sway.get("z_m", 0.0),
-                        sway.get("roll_rad", 0.0),
-                        sway.get("pitch_rad", 0.0),
-                        sway.get("yaw_rad", 0.0),
-                    )
-            state.tts_player.set_sway_callback(sway_callback)
-            _LOGGER.info("Speech sway callback configured for TTS player")
-        # Initialize entity registry
-        self._entity_registry = EntityRegistry(
-            server=self,
-            reachy_controller=self.reachy_controller,
-            camera_server=camera_server,
-            play_emotion_callback=self._play_emotion,
-        )
-        # Connect gesture state callback
-        if camera_server:
-            camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
-            camera_server.set_face_state_callback(self._entity_registry.update_face_detected_state)
-            camera_server.set_gesture_action_callback(self.handle_detected_gesture)
-        # Initialize gesture action mapper for local gesture → action handling
-        self._gesture_action_mapper = GestureActionMapper()
-        self._gesture_action_mapper.set_emotion_callback(self._play_emotion)
-        self._gesture_action_mapper.set_start_listening_callback(self._trigger_wake_word)
-        self._gesture_action_mapper.set_stop_speaking_callback(self._stop_current_tts)
-        self._gesture_action_mapper.set_ha_event_callback(self._send_gesture_event_to_ha)
-        _LOGGER.info("Gesture action mapper initialized")
-        # Initialize event-emotion mapper for HA state change reactions
-        self._event_emotion_mapper = EventEmotionMapper()
-        self._event_emotion_mapper.set_emotion_callback(self._play_emotion)
-        # Load custom mappings from JSON if available
-        from pathlib import Path
-        mappings_file = Path(__file__).parent / "animations" / "event_mappings.json"
-        if mappings_file.exists():
-            self._event_emotion_mapper.load_from_json(mappings_file)
-        _LOGGER.info("Event emotion mapper initialized")
-        # Only setup entities once (check if already initialized)
-        # This prevents duplicate entity registration on reconnection
-        try:
-            _LOGGER.info("Checking entity initialization state...")
-            if not getattr(self.state, "_entities_initialized", False):
-                _LOGGER.info("Setting up entities for first time...")
-                if self.state.media_player_entity is None:
-                    _LOGGER.info("Creating MediaPlayerEntity...")
-                    self.state.media_player_entity = MediaPlayerEntity(
-                        server=self,
-                        key=get_entity_key("reachy_mini_media_player"),
-                        name="Media Player",
-                        object_id="reachy_mini_media_player",
-                        music_player=state.music_player,
-                        announce_player=state.tts_player,
-                    )
-                    self.state.entities.append(self.state.media_player_entity)
-                    _LOGGER.info("MediaPlayerEntity created")
-                # Setup all entities using the registry
-                _LOGGER.info("Setting up all entities via registry...")
-                self._entity_registry.setup_all_entities(self.state.entities)
-                # Mark entities as initialized
-                self.state._entities_initialized = True
-                _LOGGER.info("Entities initialized: %d total", len(self.state.entities))
-            else:
-                _LOGGER.info("Entities already initialized, updating server references")
-                # Update server reference in existing entities
-                for entity in self.state.entities:
-                    entity.server = self
-                _LOGGER.info("Server references updated for %d entities", len(self.state.entities))
-        except Exception as e:
-            _LOGGER.error("Error during entity setup: %s", e, exc_info=True)
-            raise
-        # Initialize emotion keyword detector for auto-triggering emotions from LLM responses
-        # DISABLED: Emotion detection moved to Home Assistant blueprint
-        # self._emotion_detector = EmotionKeywordDetector(play_emotion_callback=self._play_emotion)
-        _LOGGER.info("VoiceSatelliteProtocol.__init__ completed")
-    def set_ha_connection_callbacks(self, on_connected, on_disconnected):
-        """Set callbacks for Home Assistant connection/disconnection."""
-        self._on_ha_connected_callback = on_connected
-        self._on_ha_disconnected_callback = on_disconnected
-    def connection_made(self, transport) -> None:
-        """Called when a client connects."""
-        peer = transport.get_extra_info("peername")
-        _LOGGER.info("ESPHome client connected from %s", peer)
-        super().connection_made(transport)
-    def update_camera_server(self, camera_server):
-        """Update the camera server reference in entity registry.
-        Called when camera server is started after Home Assistant connection.
-        """
-        self._entity_registry.camera_server = camera_server
-        if camera_server:
-            camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
-            camera_server.set_face_state_callback(self._entity_registry.update_face_detected_state)
-            camera_server.set_gesture_action_callback(self.handle_detected_gesture)
-        _LOGGER.debug("Camera server reference updated in entity registry")
-    # Note: connection_lost is defined later in the class with full cleanup logic
-    def handle_voice_event(self, event_type: VoiceAssistantEventType, data: dict[str, str]) -> None:
-        _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
-        if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
-            self._tts_url = data.get("url")
-            self._tts_played = False
-            self._continue_conversation = False
-            # Reachy Mini: Start listening animation
-            self._reachy_on_listening()
-            # Note: TTS URL requires HA authentication, cannot pre-download
-            # Speaking animation uses JSON-defined multi-frequency sway instead
-        elif event_type in (
-            VoiceAssistantEventType.VOICE_ASSISTANT_STT_VAD_END,
-            VoiceAssistantEventType.VOICE_ASSISTANT_STT_END,
-        ):
-            self._is_streaming_audio = False
-            # Reachy Mini: Stop listening, start thinking
-            self._reachy_on_thinking()
-        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_PROGRESS:
-            if data.get("tts_start_streaming") == "1":
-                # Start streaming early
-                self.play_tts()
-        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_END:
-            if data.get("continue_conversation") == "1":
-                self._continue_conversation = True
-        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
-            # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
-            _LOGGER.debug("TTS_START event received, triggering speaking animation")
-            self._reachy_on_speaking()
-            # Auto-trigger emotion based on response text
-            # TTS_START may contain the text to be spoken
-            # DISABLED: Emotion detection moved to Home Assistant blueprint
-            # tts_text = data.get("tts_output") or data.get("text") or ""
-            # if tts_text:
-            #     self._emotion_detector.detect_and_play(tts_text)
-        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
-            self._tts_url = data.get("url")
-            self.play_tts()
-        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_END:
-            # Pipeline run ended
-            self._is_streaming_audio = False
-            # Following reference project pattern
-            if not self._tts_played:
-                self._tts_finished()
-            self._tts_played = False
-    def handle_timer_event(
-        self,
-        event_type: VoiceAssistantTimerEventType,
-        msg: VoiceAssistantTimerEventResponse,
-    ) -> None:
-        _LOGGER.debug("Timer event: type=%s", event_type.name)
-        if event_type == VoiceAssistantTimerEventType.VOICE_ASSISTANT_TIMER_FINISHED:
-            if not self._timer_finished:
-                self.state.active_wake_words.add(self.state.stop_word.id)
-                self._timer_finished = True
-                self.duck()
-                self._play_timer_finished()
-                # Reachy Mini: Timer finished animation
-                self._reachy_on_timer_finished()
-    def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
-        if isinstance(msg, VoiceAssistantEventResponse):
-            # Pipeline event
-            data: dict[str, str] = {}
-            for arg in msg.data:
-                data[arg.name] = arg.value
-            self.handle_voice_event(VoiceAssistantEventType(msg.event_type), data)
-        elif isinstance(msg, VoiceAssistantAnnounceRequest):
-            _LOGGER.debug("Announcing: %s", msg.text)
-            assert self.state.media_player_entity is not None
-            urls = []
-            if msg.preannounce_media_id:
-                urls.append(msg.preannounce_media_id)
-            urls.append(msg.media_id)
-            self.state.active_wake_words.add(self.state.stop_word.id)
-            self._set_stop_word_active(True)
-            self._continue_conversation = msg.start_conversation
-            self.duck()
-            yield from self.state.media_player_entity.play(urls, announcement=True, done_callback=self._tts_finished)
-        elif isinstance(msg, VoiceAssistantTimerEventResponse):
-            self.handle_timer_event(VoiceAssistantTimerEventType(msg.event_type), msg)
-        elif isinstance(msg, HomeAssistantStateResponse):
-            # Handle Home Assistant state changes for emotion mapping
-            self._handle_ha_state_change(msg)
-        elif isinstance(msg, DeviceInfoRequest):
-            _LOGGER.info("DeviceInfoRequest received, sending DeviceInfoResponse")
-            yield DeviceInfoResponse(
-                uses_password=False,
-                name=self.state.name,
-                mac_address=self.state.mac_address,
-                voice_assistant_feature_flags=(
-                    VoiceAssistantFeature.VOICE_ASSISTANT
-                    | VoiceAssistantFeature.API_AUDIO
-                    | VoiceAssistantFeature.ANNOUNCE
-                    | VoiceAssistantFeature.START_CONVERSATION
-                    | VoiceAssistantFeature.TIMERS
-                ),
-            )
-        elif isinstance(
-            msg,
-            (
-                ListEntitiesRequest,
-                SubscribeHomeAssistantStatesRequest,
-                SubscribeStatesRequest,
-                MediaPlayerCommandRequest,
-                NumberCommandRequest,
-                SwitchCommandRequest,
-                SelectCommandRequest,
-                ButtonCommandRequest,
-                CameraImageRequest,
-            ),
-        ):
-            for entity in self.state.entities:
-                yield from entity.handle_message(msg)
-            if isinstance(msg, ListEntitiesRequest):
-                yield ListEntitiesDoneResponse()
-        elif isinstance(msg, VoiceAssistantConfigurationRequest):
-            available_wake_words = [
-                VoiceAssistantWakeWord(
-                    id=ww.id,
-                    wake_word=ww.wake_word,
-                    trained_languages=ww.trained_languages,
-                )
-                for ww in self.state.available_wake_words.values()
-            ]
-            for eww in msg.external_wake_words:
-                if eww.model_type != "micro":
-                    continue
-                available_wake_words.append(
-                    VoiceAssistantWakeWord(
-                        id=eww.id,
-                        wake_word=eww.wake_word,
-                        trained_languages=eww.trained_languages,
-                    )
-                )
-                self._external_wake_words[eww.id] = eww
-            yield VoiceAssistantConfigurationResponse(
-                available_wake_words=available_wake_words,
-                active_wake_words=[
-                    ww.id for ww in self.state.wake_words.values() if ww.id in self.state.active_wake_words
-                ],
-                max_active_wake_words=2,
-            )
-            _LOGGER.info("Connected to Home Assistant")
-            # Trigger HA connected callback (async)
-            if self._on_ha_connected_callback:
-                try:
-                    import asyncio
-                    loop = asyncio.get_running_loop()
-                    task = loop.create_task(self._on_ha_connected_callback())
-                    _ = task  # Prevent RUF006 warning
-                except Exception as e:
-                    _LOGGER.error("Error in HA connected callback: %s", e)
-        elif isinstance(msg, VoiceAssistantSetConfiguration):
-            # Change active wake words
-            active_wake_words: set[str] = set()
-            for wake_word_id in msg.active_wake_words:
-                if wake_word_id in self.state.wake_words:
-                    # Already loaded, just add to active set
-                    active_wake_words.add(wake_word_id)
-                    continue
-                model_info = self.state.available_wake_words.get(wake_word_id)
-                if not model_info:
-                    # Check external wake words (may require download)
-                    external_wake_word = self._external_wake_words.get(wake_word_id)
-                    if not external_wake_word:
-                        _LOGGER.warning("Wake word not found: %s", wake_word_id)
-                        continue
-                    model_info = self._download_external_wake_word(external_wake_word)
-                    if not model_info:
-                        continue
-                    self.state.available_wake_words[wake_word_id] = model_info
-                _LOGGER.debug("Loading wake word: %s", model_info.wake_word_path)
-                loaded_model = model_info.load()
-                # Set id attribute on the model for later identification
-                loaded_model.id = wake_word_id
-                self.state.wake_words[wake_word_id] = loaded_model
-                _LOGGER.info("Wake word loaded: %s", wake_word_id)
-                active_wake_words.add(wake_word_id)
-                # Don't break - load ALL requested wake words, not just the first one
-            self.state.active_wake_words = active_wake_words
-            _LOGGER.debug("Active wake words: %s", active_wake_words)
-            self.state.preferences.active_wake_words = list(active_wake_words)
-            self.state.save_preferences()
-            self.state.wake_words_changed = True
-    @property
-    def is_streaming_audio(self) -> bool:
-        return self._is_streaming_audio
-    def handle_audio(self, audio_chunk: bytes) -> None:
-        if not self._is_streaming_audio:
-            return
-        # Check if transport is still valid before sending
-        if self._writelines is None:
-            _LOGGER.warning("Cannot send audio: transport not available, stopping stream")
-            self._is_streaming_audio = False
-            return
-        self.send_messages([VoiceAssistantAudio(data=audio_chunk)])
-    def _get_or_create_conversation_id(self) -> str:
-        """Get existing conversation_id or create a new one.
-        Reuses conversation_id if within timeout period, otherwise creates new one.
-        """
-        now = time.time()
-        if self._conversation_id is None or now - self._last_conversation_time > self._conversation_timeout:
-            # Create new conversation_id
-            import uuid
-            self._conversation_id = str(uuid.uuid4())
-            _LOGGER.debug("Created new conversation_id: %s", self._conversation_id)
-        self._last_conversation_time = now
-        return self._conversation_id
-    def _clear_conversation(self) -> None:
-        """Clear conversation state when exiting conversation mode."""
-        self._conversation_id = None
-        self._continue_conversation = False
-    def wakeup(self, wake_word: MicroWakeWord | OpenWakeWord) -> None:
-        """Handle wake word detection - start voice pipeline."""
-        if self._timer_finished:
-            # Stop timer instead
-            self._timer_finished = False
-            self.state.tts_player.stop()
-            _LOGGER.debug("Stopping timer finished sound")
-            return
-        wake_word_phrase = wake_word.wake_word
-        _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
-        # Turn toward sound source using DOA (Direction of Arrival)
-        self._turn_to_sound_source()
-        # Get or create conversation_id for context tracking
-        conv_id = self._get_or_create_conversation_id()
-        self.send_messages(
-            [
-                VoiceAssistantRequest(
-                    start=True,
-                    wake_word_phrase=wake_word_phrase,
-                    conversation_id=conv_id,
-                )
-            ]
-        )
-        self.duck()
-        self.state.tts_player.play(self.state.wakeup_sound, done_callback=self._on_wakeup_sound_finished)
-    def _on_wakeup_sound_finished(self) -> None:
-        """Start microphone streaming after wakeup sound finishes."""
-        self._is_streaming_audio = True
-    def stop(self) -> None:
-        """Stop current TTS playback (e.g., user said stop word)."""
-        # Ensure pipeline does not re-arm itself after manual stop
-        self._is_streaming_audio = False
-        self._continue_conversation = False
-        self.state.active_wake_words.discard(self.state.stop_word.id)
-        self._set_stop_word_active(False)
-        self.state.tts_player.stop()
-        if self._timer_finished:
-            self._timer_finished = False
-            _LOGGER.debug("Stopping timer finished sound")
-        else:
-            _LOGGER.debug("TTS response stopped manually")
-            # Reset TTS state to prevent double-finished
-            self._tts_url = None
-            self._tts_played = True
-            self._tts_finished()
-    def play_tts(self) -> None:
-        if (not self._tts_url) or self._tts_played:
-            return
-        self._tts_played = True
-        _LOGGER.debug("Playing TTS response: %s", self._tts_url)
-        self.state.active_wake_words.add(self.state.stop_word.id)
-        self._set_stop_word_active(True)
-        self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
-    def duck(self) -> None:
-        _LOGGER.debug("Ducking music")
-        self.state.music_player.duck()
-        # Pause Sendspin to prevent audio conflicts during voice interaction
-        self.state.music_player.pause_sendspin()
-    def unduck(self) -> None:
-        _LOGGER.debug("Unducking music")
-        self.state.music_player.unduck()
-        # Resume Sendspin audio
-        self.state.music_player.resume_sendspin()
-    def _tts_finished(self) -> None:
-        """Called when TTS audio playback finishes.
-        Following reference project pattern: handle continue conversation here.
-        """
-        self.state.active_wake_words.discard(self.state.stop_word.id)
-        self._set_stop_word_active(False)
-        self.send_messages([VoiceAssistantAnnounceFinished()])
-        # Check if should continue conversation
-        # 1. Our switch is ON: Always continue (unconditional)
-        # 2. Our switch is OFF: Follow HA's continue_conversation request
-        continuous_mode = self.state.preferences.continuous_conversation
-        should_continue = continuous_mode or self._continue_conversation
-        if should_continue:
-            _LOGGER.debug(
-                "Continuing conversation (our_switch=%s, ha_request=%s)", continuous_mode, self._continue_conversation
-            )
-            # Play prompt sound to indicate ready for next input
-            self.state.tts_player.play(self.state.wakeup_sound)
-            # Use same conversation_id for context continuity
-            conv_id = self._get_or_create_conversation_id()
-            self.send_messages(
-                [
-                    VoiceAssistantRequest(
-                        start=True,
-                        conversation_id=conv_id,
-                    )
-                ]
-            )
-            self._is_streaming_audio = True
-            # Stay in listening mode
-            self._reachy_on_listening()
-        else:
-            self._clear_conversation()
-            self.unduck()
-            self._is_streaming_audio = False
-            _LOGGER.debug("Conversation finished")
-            # Reachy Mini: Return to idle
-            self._reachy_on_idle()
-    def _set_stop_word_active(self, active: bool) -> None:
-        """Toggle stop word detector when model supports runtime activation."""
-        try:
-            if hasattr(self.state.stop_word, "is_active"):
-                self.state.stop_word.is_active = active
-        except Exception:
-            pass
-    def _play_timer_finished(self) -> None:
-        if not self._timer_finished:
-            self.unduck()
-            return
-        self.state.tts_player.play(
-            self.state.timer_finished_sound,
-            done_callback=lambda: call_all(lambda: time.sleep(1.0), self._play_timer_finished),
-        )
-    def connection_lost(self, exc):
-        super().connection_lost(exc)
-        _LOGGER.info("Disconnected from Home Assistant")
-        # Clear streaming state on disconnect
-        self._is_streaming_audio = False
-        self._tts_url = None
-        self._tts_played = False
-        self._continue_conversation = False
-        self._set_stop_word_active(False)
-        # Trigger HA disconnected callback
-        if self._on_ha_disconnected_callback:
-            try:
-                self._on_ha_disconnected_callback()
-            except Exception as e:
-                _LOGGER.error("Error in HA disconnected callback: %s", e)
-    def _download_external_wake_word(
-        self, external_wake_word: VoiceAssistantExternalWakeWord
-    ) -> AvailableWakeWord | None:
-        eww_dir = self.state.download_dir / "external_wake_words"
-        eww_dir.mkdir(parents=True, exist_ok=True)
-        config_path = eww_dir / f"{external_wake_word.id}.json"
-        should_download_config = not config_path.exists()
-        # Check if we need to download the model file
-        model_path = eww_dir / f"{external_wake_word.id}.tflite"
-        should_download_model = True
-        if model_path.exists():
-            model_size = model_path.stat().st_size
-            if model_size == external_wake_word.model_size:
-                with open(model_path, "rb") as model_file:
-                    model_hash = hashlib.sha256(model_file.read()).hexdigest()
-                if model_hash == external_wake_word.model_hash:
-                    should_download_model = False
-                    _LOGGER.debug(
-                        "Model size and hash match for %s. Skipping download.",
-                        external_wake_word.id,
-                    )
-        if should_download_config or should_download_model:
-            # Download config
-            _LOGGER.debug("Downloading %s to %s", external_wake_word.url, config_path)
-            with urlopen(external_wake_word.url) as request:
-                if request.status != 200:
-                    _LOGGER.warning(
-                        "Failed to download: %s, status=%s",
-                        external_wake_word.url,
-                        request.status,
-                    )
-                    return None
-                with open(config_path, "wb") as model_file:
-                    shutil.copyfileobj(request, model_file)
-        if should_download_model:
-            # Download model file
-            parsed_url = urlparse(external_wake_word.url)
-            parsed_url = parsed_url._replace(path=posixpath.join(posixpath.dirname(parsed_url.path), model_path.name))
-            model_url = urlunparse(parsed_url)
-            _LOGGER.debug("Downloading %s to %s", model_url, model_path)
-            with urlopen(model_url) as request:
-                if request.status != 200:
-                    _LOGGER.warning("Failed to download: %s, status=%s", model_url, request.status)
-                    return None
-                with open(model_path, "wb") as model_file:
-                    shutil.copyfileobj(request, model_file)
-        return AvailableWakeWord(
-            id=external_wake_word.id,
-            type=WakeWordType.MICRO_WAKE_WORD,
-            wake_word=external_wake_word.wake_word,
-            trained_languages=external_wake_word.trained_languages,
-            wake_word_path=config_path,
-        )
-    # -------------------------------------------------------------------------
-    # Reachy Mini Motion Control
-    # -------------------------------------------------------------------------
-    def _turn_to_sound_source(self) -> None:
-        """Turn robot head toward sound source using DOA at wakeup.
-        This is called once at wakeup to orient the robot toward the speaker.
-        Face tracking will take over after the initial turn.
-        DOA angle convention (from SDK):
-        - 0 radians = left (Y+ direction in head frame)
-        - π/2 radians = front (X+ direction in head frame)
-        - π radians = right (Y- direction in head frame)
-        The SDK uses: p_head = [sin(doa), cos(doa), 0]
-        So we need to convert this to yaw angle.
-        Note: We don't check speech_detected because by the time wake word
-        detection completes, the user may have stopped speaking.
-        """
-        if not self.state.motion_enabled:
-            _LOGGER.info("DOA turn-to-sound: motion disabled")
-            return
-        try:
-            # Get DOA from reachy_controller (only read once)
-            doa = self.reachy_controller.get_doa_angle()
-            if doa is None:
-                _LOGGER.info("DOA not available, skipping turn-to-sound")
-                return
-            angle_rad, speech_detected = doa
-            _LOGGER.debug(
-                "DOA raw: angle=%.3f rad (%.1f°), speech=%s", angle_rad, math.degrees(angle_rad), speech_detected
-            )
-            # Convert DOA to direction vector in head frame
-            # SDK convention: p_head = [sin(doa), cos(doa), 0]
-            # where X+ is front, Y+ is left
-            dir_x = math.sin(angle_rad)  # Front component
-            dir_y = math.cos(angle_rad)  # Left component
-            # Calculate yaw angle from direction vector
-            # DOA convention: 0 = left, π/2 = front, π = right
-            # Robot yaw: positive = turn right, negative = turn left
-            # Invert the sign: left(0) → +90° (turn right toward left sound)
-            #                  right(π) → -90° (turn left toward right sound)
-            yaw_rad = -(angle_rad - math.pi / 2)
-            yaw_deg = math.degrees(yaw_rad)
-            _LOGGER.debug("DOA direction: x=%.2f, y=%.2f, yaw=%.1f°", dir_x, dir_y, yaw_deg)
-            # Only turn if angle is significant (> 10°) to avoid noise
-            DOA_THRESHOLD_DEG = 10.0
-            if abs(yaw_deg) < DOA_THRESHOLD_DEG:
-                _LOGGER.debug("DOA angle %.1f° below threshold (%.1f°), skipping turn", yaw_deg, DOA_THRESHOLD_DEG)
-                return
-            # Apply 80% of DOA angle as conservative strategy
-            # This accounts for potential DOA inaccuracy
-            DOA_SCALE = 0.8
-            target_yaw_deg = yaw_deg * DOA_SCALE
-            _LOGGER.info("Turning toward sound source: DOA=%.1f°, target=%.1f°", yaw_deg, target_yaw_deg)
-            # Use MovementManager to turn (non-blocking)
-            if self.state.motion and self.state.motion.movement_manager:
-                self.state.motion.movement_manager.turn_to_angle(
-                    target_yaw_deg,
-                    duration=0.5,  # Quick turn
-                )
-        except Exception as e:
-            _LOGGER.error("Error in turn-to-sound: %s", e)
-    def _reachy_on_listening(self) -> None:
-        """Called when listening for speech (HA state: Listening)."""
-        # Enable high-frequency face tracking during listening
-        self._set_conversation_mode(True)
-        # Resume face tracking according to user preference (may have been paused during speaking)
-        if self.camera_server is not None:
-            try:
-                enabled = bool(getattr(self.state.preferences, "face_tracking_enabled", False))
-                self.camera_server.set_face_tracking_enabled(enabled)
-            except Exception as e:
-                _LOGGER.debug("Failed to resume face tracking: %s", e)
-        if not self.state.motion_enabled:
-            return
-        try:
-            _LOGGER.debug("Reachy Mini: Listening animation")
-            if self.state.motion:
-                self.state.motion.on_listening()
-        except Exception as e:
-            _LOGGER.error("Reachy Mini motion error: %s", e)
-    def _reachy_on_thinking(self) -> None:
-        """Called when processing speech (HA state: Processing)."""
-        # Resume face tracking according to user preference (may have been paused during speaking)
-        if self.camera_server is not None:
-            try:
-                enabled = bool(getattr(self.state.preferences, "face_tracking_enabled", False))
-                self.camera_server.set_face_tracking_enabled(enabled)
-            except Exception as e:
-                _LOGGER.debug("Failed to resume face tracking: %s", e)
-        if not self.state.motion_enabled or not self.state.reachy_mini:
-            return
-        try:
-            _LOGGER.debug("Reachy Mini: Thinking animation")
-            if self.state.motion:
-                self.state.motion.on_thinking()
-        except Exception as e:
-            _LOGGER.error("Reachy Mini motion error: %s", e)
-    def _reachy_on_speaking(self) -> None:
-        """Called when TTS is playing (HA state: Responding)."""
-        # Pause face tracking during speaking - robot will use speaking animation instead
-        if self.camera_server is not None:
-            try:
-                self.camera_server.set_face_tracking_enabled(False)
-                _LOGGER.debug("Face tracking paused during speaking")
-            except Exception as e:
-                _LOGGER.debug("Failed to pause face tracking: %s", e)
-        if not self.state.motion_enabled:
-            _LOGGER.warning("Motion disabled, skipping speaking animation")
-            return
-        if not self.state.motion:
-            _LOGGER.warning("No motion controller, skipping speaking animation")
-            return
-        try:
-            _LOGGER.debug("Reachy Mini: Starting speaking animation")
-            self.state.motion.on_speaking_start()
-        except Exception as e:
-            _LOGGER.error("Reachy Mini motion error: %s", e)
-    def _reachy_on_idle(self) -> None:
-        """Called when returning to idle state (HA state: Idle)."""
-        # Disable high-frequency face tracking, switch to adaptive mode
-        self._set_conversation_mode(False)
-        # Resume face tracking according to user preference (may have been paused during speaking)
-        if self.camera_server is not None:
-            try:
-                enabled = bool(getattr(self.state.preferences, "face_tracking_enabled", False))
-                self.camera_server.set_face_tracking_enabled(enabled)
-            except Exception as e:
-                _LOGGER.debug("Failed to resume face tracking: %s", e)
-        if not self.state.motion_enabled or not self.state.reachy_mini:
-            return
-        try:
-            _LOGGER.debug("Reachy Mini: Idle animation")
-            if self.state.motion:
-                self.state.motion.on_idle()
-        except Exception as e:
-            _LOGGER.error("Reachy Mini motion error: %s", e)
-    def _set_conversation_mode(self, in_conversation: bool) -> None:
-        """Set conversation mode for adaptive face tracking.
-        When in conversation, face tracking runs at high frequency.
-        When idle, face tracking uses adaptive rate to save CPU.
-        """
-        if self.camera_server is not None:
-            try:
-                self.camera_server.set_conversation_mode(in_conversation)
-            except Exception as e:
-                _LOGGER.debug("Failed to set conversation mode: %s", e)
-    def _reachy_on_timer_finished(self) -> None:
-        """Called when a timer finishes."""
-        if not self.state.motion_enabled or not self.state.reachy_mini:
-            return
-        try:
-            _LOGGER.debug("Reachy Mini: Timer finished animation")
-            if self.state.motion:
-                self.state.motion.on_timer_finished()
-        except Exception as e:
-            _LOGGER.error("Reachy Mini motion error: %s", e)
-    def _play_emotion(self, emotion_name: str) -> None:
-        """Play an emotion/expression from the emotions library.
-        Uses the MovementManager's queue_emotion_move() method which samples
-        poses via RecordedMoves.evaluate(t) in the control loop. This avoids
-        "a move is currently running" warnings from the SDK daemon.
-        Args:
-            emotion_name: Name of the emotion (e.g., "happy1", "sad1", etc.)
-        """
-        try:
-            # Use MovementManager to play emotion (non-blocking, integrated with control loop)
-            if self.state.motion and self.state.motion.movement_manager:
-                movement_manager = self.state.motion.movement_manager
-                if movement_manager.queue_emotion_move(emotion_name):
-                    _LOGGER.info(f"Queued emotion move: {emotion_name}")
-                else:
-                    _LOGGER.warning(f"Failed to queue emotion: {emotion_name}")
-            else:
-                _LOGGER.warning("Cannot play emotion: no movement manager available")
-        except Exception as e:
-            _LOGGER.error(f"Error playing emotion {emotion_name}: {e}")
-    def _trigger_wake_word(self) -> None:
-        """Trigger wake word detection (simulate hearing the wake word).
-        This is called by GestureActionMapper when a "call" gesture is detected,
-        allowing users to activate the voice assistant with a hand gesture.
-        """
-        try:
-            # The wake word detected event triggers the voice pipeline
-            _LOGGER.info("Gesture triggered wake word - starting voice assistant")
-            # Set the wake word event to simulate detection
-            if hasattr(self.state, "last_wake_word"):
-                self.state.last_wake_word = "gesture"
-            # Trigger the run_voice_assistant logic
-            self.start_voice_assistant()
-        except Exception as e:
-            _LOGGER.error(f"Error triggering wake word from gesture: {e}")
-    def _stop_current_tts(self) -> None:
-        """Stop current TTS playback.
-        Called by GestureActionMapper when a "stop" gesture is detected,
-        allowing users to interrupt the robot's speech.
-        """
-        try:
-            _LOGGER.info("Gesture triggered TTS stop")
-            if self.state.tts_player:
-                self.state.tts_player.stop()
-            if self.state.music_player:
-                self.state.music_player.stop()
-        except Exception as e:
-            _LOGGER.error(f"Error stopping TTS from gesture: {e}")
-    def _send_gesture_event_to_ha(self, event_name: str) -> None:
-        """Send a gesture event to Home Assistant.
-        This allows HA automations to react to gestures like "one", "two", etc.
-        Args:
-            event_name: Name of the gesture event (e.g., "gesture_one")
-        """
-        try:
-            _LOGGER.info(f"Sending gesture event to HA: {event_name}")
-            # Fire an event to Home Assistant via the satellite protocol
-            # This uses the VoiceAssistantEventResponse mechanism
-            # For now, we can use the timer event mechanism or a custom event
-            # Home Assistant can subscribe to these events via ESPHome integration
-        except Exception as e:
-            _LOGGER.error(f"Error sending gesture event to HA: {e}")
-    def _handle_ha_state_change(self, msg: HomeAssistantStateResponse) -> None:
-        """Handle Home Assistant state change via ESPHome bidirectional communication.
-        This method is called when Home Assistant sends state updates through
-        the ESPHome protocol. It uses EventEmotionMapper to trigger robot
-        emotions based on configured entity state changes.
-        Args:
-            msg: HomeAssistantStateResponse containing entity_id and state
-        """
-        try:
-            entity_id = msg.entity_id
-            new_state = msg.state
-            # Track old state for proper event handling
-            old_state = self._ha_entity_states.get(entity_id, "unknown")
-            self._ha_entity_states[entity_id] = new_state
-            _LOGGER.debug("HA state change: %s: %s -> %s", entity_id, old_state, new_state)
-            # Let EventEmotionMapper handle the state change
-            emotion = self._event_emotion_mapper.handle_state_change(entity_id, old_state, new_state)
-            if emotion:
-                _LOGGER.info("HA event triggered emotion: %s from %s", emotion, entity_id)
-        except Exception as e:
-            _LOGGER.error("Error handling HA state change: %s", e)
-    def handle_detected_gesture(self, gesture_name: str, confidence: float) -> bool:
-        """Handle a detected gesture by triggering mapped actions.
-        This should be called when a gesture is detected to trigger local actions
-        (emotions, TTS control, HA events) based on the gesture mappings.
-        Args:
-            gesture_name: Name of the detected gesture
-            confidence: Detection confidence (0-1)
-        Returns:
-            True if an action was triggered, False otherwise
-        """
-        return self._gesture_action_mapper.handle_gesture(gesture_name, confidence)
-    def suspend(self) -> None:
-        """Suspend the satellite for sleep mode.
-        Stops any current playback and releases resources.
-        """
-        _LOGGER.info("Suspending VoiceSatellite for sleep...")
-        # Stop any current TTS/music
-        if self.state.tts_player:
-            self.state.tts_player.stop()
-        if self.state.music_player:
-            self.state.music_player.stop()
-        # Keep configured wake words intact.
-        # Audio processing is paused by sleep/mute lifecycle, so clearing wake words here
-        # can cause Home Assistant UI to temporarily show an empty wake word selection.
-        # Reset conversation state
-        self._tts_url = None
-        self._tts_played = True
-        self._continue_conversation = False
-        self._is_streaming_audio = False
-        _LOGGER.info("VoiceSatellite suspended")
-    def resume(self) -> None:
-        """Resume the satellite after sleep."""
-        _LOGGER.info("Resuming VoiceSatellite from sleep...")
-        # Ensure wake word processing context is refreshed after resume.
-        self.state.wake_words_changed = True
-        _LOGGER.info("VoiceSatellite resumed")

+"""Voice satellite protocol for Reachy Mini."""
+import hashlib
+import logging
+import math
+import posixpath
+import shutil
+import time
+from collections.abc import Iterable
+from typing import Dict, Optional, Set, Union, TYPE_CHECKING
+from urllib.parse import urlparse, urlunparse
+from urllib.request import urlopen
+if TYPE_CHECKING:
+    from .camera_server import MJPEGCameraServer
+# pylint: disable=no-name-in-module
+from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
+    ButtonCommandRequest,
+    CameraImageRequest,
+    DeviceInfoRequest,
+    DeviceInfoResponse,
+    ListEntitiesDoneResponse,
+    ListEntitiesRequest,
+    MediaPlayerCommandRequest,
+    NumberCommandRequest,
+    SelectCommandRequest,
+    SubscribeHomeAssistantStatesRequest,
+    SubscribeStatesRequest,
+    SwitchCommandRequest,
+    VoiceAssistantAnnounceFinished,
+    VoiceAssistantAnnounceRequest,
+    VoiceAssistantAudio,
+    VoiceAssistantConfigurationRequest,
+    VoiceAssistantConfigurationResponse,
+    VoiceAssistantEventResponse,
+    VoiceAssistantExternalWakeWord,
+    VoiceAssistantRequest,
+    VoiceAssistantSetConfiguration,
+    VoiceAssistantTimerEventResponse,
+    VoiceAssistantWakeWord,
+)
+from aioesphomeapi.model import (
+    VoiceAssistantEventType,
+    VoiceAssistantFeature,
+    VoiceAssistantTimerEventType,
+)
+from google.protobuf import message
+from pymicro_wakeword import MicroWakeWord
+from pyopen_wakeword import OpenWakeWord
+from .api_server import APIServer
+from .entity import MediaPlayerEntity
+from .entity_registry import EntityRegistry, get_entity_key
+from .models import AvailableWakeWord, ServerState, WakeWordType
+from .util import call_all
+from .reachy_controller import ReachyController
+_LOGGER = logging.getLogger(__name__)
+class VoiceSatelliteProtocol(APIServer):
+    """Voice satellite protocol handler for ESPHome."""
+    def __init__(self, state: ServerState, camera_server: Optional["MJPEGCameraServer"] = None) -> None:
+        super().__init__(state.name)
+        self.state = state
+        self.state.satellite = self
+        self.camera_server = camera_server
+        # Initialize streaming state early (before entity setup)
+        self._is_streaming_audio = False
+        self._tts_url: Optional[str] = None
+        self._tts_played = False
+        self._continue_conversation = False
+        self._timer_finished = False
+        self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
+        # Conversation tracking for continuous conversation
+        self._conversation_id: Optional[str] = None
+        self._conversation_timeout = 300.0  # 5 minutes, same as ESPHome default
+        self._last_conversation_time = 0.0
+        # Initialize Reachy controller
+        self.reachy_controller = ReachyController(state.reachy_mini)
+        # Connect MovementManager to ReachyController for pose control from HA
+        if state.motion is not None and state.motion.movement_manager is not None:
+            self.reachy_controller.set_movement_manager(state.motion.movement_manager)
+            # Setup speech sway callback for audio-driven head motion
+            def sway_callback(sway: dict) -> None:
+                mm = state.motion.movement_manager
+                if mm is not None:
+                    mm.set_speech_sway(
+                        sway.get("x_m", 0.0),
+                        sway.get("y_m", 0.0),
+                        sway.get("z_m", 0.0),
+                        sway.get("roll_rad", 0.0),
+                        sway.get("pitch_rad", 0.0),
+                        sway.get("yaw_rad", 0.0),
+                    )
+            state.tts_player.set_sway_callback(sway_callback)
+            _LOGGER.info("Speech sway callback configured for TTS player")
+        # Initialize entity registry
+        self._entity_registry = EntityRegistry(
+            server=self,
+            reachy_controller=self.reachy_controller,
+            camera_server=camera_server,
+            play_emotion_callback=self._play_emotion,
+        )
+        # Connect gesture state callback
+        if camera_server:
+            camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
+        # Only setup entities once (check if already initialized)
+        # This prevents duplicate entity registration on reconnection
+        if not getattr(self.state, '_entities_initialized', False):
+            if self.state.media_player_entity is None:
+                self.state.media_player_entity = MediaPlayerEntity(
+                    server=self,
+                    key=get_entity_key("reachy_mini_media_player"),
+                    name="Media Player",
+                    object_id="reachy_mini_media_player",
+                    music_player=state.music_player,
+                    announce_player=state.tts_player,
+                )
+                self.state.entities.append(self.state.media_player_entity)
+            # Setup all entities using the registry
+            self._entity_registry.setup_all_entities(self.state.entities)
+            # Mark entities as initialized
+            self.state._entities_initialized = True
+            _LOGGER.info("Entities initialized: %d total", len(self.state.entities))
+        else:
+            _LOGGER.debug("Entities already initialized, skipping setup")
+            # Update server reference in existing entities
+            for entity in self.state.entities:
+                entity.server = self
+    def handle_voice_event(
+        self, event_type: VoiceAssistantEventType, data: Dict[str, str]
+    ) -> None:
+        _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
+        if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
+            self._tts_url = data.get("url")
+            self._tts_played = False
+            self._continue_conversation = False
+            # Reachy Mini: Start listening animation
+            self._reachy_on_listening()
+            # Note: TTS URL requires HA authentication, cannot pre-download
+            # Speaking animation uses JSON-defined multi-frequency sway instead
+        elif event_type in (
+            VoiceAssistantEventType.VOICE_ASSISTANT_STT_VAD_END,
+            VoiceAssistantEventType.VOICE_ASSISTANT_STT_END,
+        ):
+            self._is_streaming_audio = False
+            # Reachy Mini: Stop listening, start thinking
+            self._reachy_on_thinking()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_PROGRESS:
+            if data.get("tts_start_streaming") == "1":
+                # Start streaming early
+                self.play_tts()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_END:
+            if data.get("continue_conversation") == "1":
+                self._continue_conversation = True
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
+            # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
+            _LOGGER.debug("TTS_START event received, triggering speaking animation")
+            self._reachy_on_speaking()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
+            self._tts_url = data.get("url")
+            self.play_tts()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_END:
+            # Pipeline run ended
+            self._is_streaming_audio = False
+            # Following reference project pattern
+            if not self._tts_played:
+                self._tts_finished()
+            self._tts_played = False
+    def handle_timer_event(
+        self,
+        event_type: VoiceAssistantTimerEventType,
+        msg: VoiceAssistantTimerEventResponse,
+    ) -> None:
+        _LOGGER.debug("Timer event: type=%s", event_type.name)
+        if event_type == VoiceAssistantTimerEventType.VOICE_ASSISTANT_TIMER_FINISHED:
+            if not self._timer_finished:
+                self.state.active_wake_words.add(self.state.stop_word.id)
+                self._timer_finished = True
+                self.duck()
+                self._play_timer_finished()
+                # Reachy Mini: Timer finished animation
+                self._reachy_on_timer_finished()
+    def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
+        if isinstance(msg, VoiceAssistantEventResponse):
+            # Pipeline event
+            data: Dict[str, str] = {}
+            for arg in msg.data:
+                data[arg.name] = arg.value
+            self.handle_voice_event(VoiceAssistantEventType(msg.event_type), data)
+        elif isinstance(msg, VoiceAssistantAnnounceRequest):
+            _LOGGER.debug("Announcing: %s", msg.text)
+            assert self.state.media_player_entity is not None
+            urls = []
+            if msg.preannounce_media_id:
+                urls.append(msg.preannounce_media_id)
+            urls.append(msg.media_id)
+            self.state.active_wake_words.add(self.state.stop_word.id)
+            self._continue_conversation = msg.start_conversation
+            self.duck()
+            yield from self.state.media_player_entity.play(
+                urls, announcement=True, done_callback=self._tts_finished
+            )
+        elif isinstance(msg, VoiceAssistantTimerEventResponse):
+            self.handle_timer_event(VoiceAssistantTimerEventType(msg.event_type), msg)
+        elif isinstance(msg, DeviceInfoRequest):
+            yield DeviceInfoResponse(
+                uses_password=False,
+                name=self.state.name,
+                mac_address=self.state.mac_address,
+                voice_assistant_feature_flags=(
+                    VoiceAssistantFeature.VOICE_ASSISTANT
+                    | VoiceAssistantFeature.API_AUDIO
+                    | VoiceAssistantFeature.ANNOUNCE
+                    | VoiceAssistantFeature.START_CONVERSATION
+                    | VoiceAssistantFeature.TIMERS
+                ),
+            )
+        elif isinstance(
+            msg,
+            (
+                ListEntitiesRequest,
+                SubscribeHomeAssistantStatesRequest,
+                SubscribeStatesRequest,
+                MediaPlayerCommandRequest,
+                NumberCommandRequest,
+                SwitchCommandRequest,
+                SelectCommandRequest,
+                ButtonCommandRequest,
+                CameraImageRequest,
+            ),
+        ):
+            for entity in self.state.entities:
+                yield from entity.handle_message(msg)
+            if isinstance(msg, ListEntitiesRequest):
+                yield ListEntitiesDoneResponse()
+        elif isinstance(msg, VoiceAssistantConfigurationRequest):
+            available_wake_words = [
+                VoiceAssistantWakeWord(
+                    id=ww.id,
+                    wake_word=ww.wake_word,
+                    trained_languages=ww.trained_languages,
+                )
+                for ww in self.state.available_wake_words.values()
+            ]
+            for eww in msg.external_wake_words:
+                if eww.model_type != "micro":
+                    continue
+                available_wake_words.append(
+                    VoiceAssistantWakeWord(
+                        id=eww.id,
+                        wake_word=eww.wake_word,
+                        trained_languages=eww.trained_languages,
+                    )
+                )
+                self._external_wake_words[eww.id] = eww
+            yield VoiceAssistantConfigurationResponse(
+                available_wake_words=available_wake_words,
+                active_wake_words=[
+                    ww.id
+                    for ww in self.state.wake_words.values()
+                    if ww.id in self.state.active_wake_words
+                ],
+                max_active_wake_words=2,
+            )
+            _LOGGER.info("Connected to Home Assistant")
+        elif isinstance(msg, VoiceAssistantSetConfiguration):
+            # Change active wake words
+            active_wake_words: Set[str] = set()
+            for wake_word_id in msg.active_wake_words:
+                if wake_word_id in self.state.wake_words:
+                    # Already loaded, just add to active set
+                    active_wake_words.add(wake_word_id)
+                    continue
+                model_info = self.state.available_wake_words.get(wake_word_id)
+                if not model_info:
+                    # Check external wake words (may require download)
+                    external_wake_word = self._external_wake_words.get(wake_word_id)
+                    if not external_wake_word:
+                        _LOGGER.warning("Wake word not found: %s", wake_word_id)
+                        continue
+                    model_info = self._download_external_wake_word(external_wake_word)
+                    if not model_info:
+                        continue
+                    self.state.available_wake_words[wake_word_id] = model_info
+                _LOGGER.debug("Loading wake word: %s", model_info.wake_word_path)
+                loaded_model = model_info.load()
+                # Set id attribute on the model for later identification
+                setattr(loaded_model, 'id', wake_word_id)
+                self.state.wake_words[wake_word_id] = loaded_model
+                _LOGGER.info("Wake word loaded: %s", wake_word_id)
+                active_wake_words.add(wake_word_id)
+                # Don't break - load ALL requested wake words, not just the first one
+            self.state.active_wake_words = active_wake_words
+            _LOGGER.debug("Active wake words: %s", active_wake_words)
+            self.state.preferences.active_wake_words = list(active_wake_words)
+            self.state.save_preferences()
+            self.state.wake_words_changed = True
+    def handle_audio(self, audio_chunk: bytes) -> None:
+        if not self._is_streaming_audio:
+            return
+        self.send_messages([VoiceAssistantAudio(data=audio_chunk)])
+    def _get_or_create_conversation_id(self) -> str:
+        """Get existing conversation_id or create a new one.
+        Reuses conversation_id if within timeout period, otherwise creates new one.
+        """
+        now = time.time()
+        if (self._conversation_id is None or
+                now - self._last_conversation_time > self._conversation_timeout):
+            # Create new conversation_id
+            import uuid
+            self._conversation_id = str(uuid.uuid4())
+            _LOGGER.debug("Created new conversation_id: %s", self._conversation_id)
+        self._last_conversation_time = now
+        return self._conversation_id
+    def _clear_conversation(self) -> None:
+        """Clear conversation state when exiting conversation mode."""
+        self._conversation_id = None
+        self._continue_conversation = False
+    def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
+        """Handle wake word detection - start voice pipeline."""
+        if self._timer_finished:
+            # Stop timer instead
+            self._timer_finished = False
+            self.state.tts_player.stop()
+            _LOGGER.debug("Stopping timer finished sound")
+            return
+        wake_word_phrase = wake_word.wake_word
+        _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
+        # Turn toward sound source using DOA (Direction of Arrival)
+        self._turn_to_sound_source()
+        # Get or create conversation_id for context tracking
+        conv_id = self._get_or_create_conversation_id()
+        self.send_messages(
+            [VoiceAssistantRequest(
+                start=True,
+                wake_word_phrase=wake_word_phrase,
+                conversation_id=conv_id,
+            )]
+        )
+        self.duck()
+        self._is_streaming_audio = True
+        self.state.tts_player.play(self.state.wakeup_sound)
+    def stop(self) -> None:
+        """Stop current TTS playback (e.g., user said stop word)."""
+        self.state.active_wake_words.discard(self.state.stop_word.id)
+        self.state.tts_player.stop()
+        if self._timer_finished:
+            self._timer_finished = False
+            _LOGGER.debug("Stopping timer finished sound")
+        else:
+            _LOGGER.debug("TTS response stopped manually")
+            self._tts_finished()
+    def play_tts(self) -> None:
+        if (not self._tts_url) or self._tts_played:
+            return
+        self._tts_played = True
+        _LOGGER.debug("Playing TTS response: %s", self._tts_url)
+        self.state.active_wake_words.add(self.state.stop_word.id)
+        self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
+    def duck(self) -> None:
+        _LOGGER.debug("Ducking music")
+        self.state.music_player.duck()
+        # Pause Sendspin to prevent audio conflicts during voice interaction
+        self.state.music_player.pause_sendspin()
+    def unduck(self) -> None:
+        _LOGGER.debug("Unducking music")
+        self.state.music_player.unduck()
+        # Resume Sendspin audio
+        self.state.music_player.resume_sendspin()
+    def _tts_finished(self) -> None:
+        """Called when TTS audio playback finishes.
+        Following reference project pattern: handle continue conversation here.
+        """
+        self.state.active_wake_words.discard(self.state.stop_word.id)
+        self.send_messages([VoiceAssistantAnnounceFinished()])
+        # Check if should continue conversation
+        # 1. Our switch is ON: Always continue (unconditional)
+        # 2. Our switch is OFF: Follow HA's continue_conversation request
+        continuous_mode = self.state.preferences.continuous_conversation
+        should_continue = continuous_mode or self._continue_conversation
+        if should_continue:
+            _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
+                         continuous_mode, self._continue_conversation)
+            # Play prompt sound to indicate ready for next input
+            self.state.tts_player.play(self.state.wakeup_sound)
+            # Use same conversation_id for context continuity
+            conv_id = self._get_or_create_conversation_id()
+            self.send_messages([VoiceAssistantRequest(
+                start=True,
+                conversation_id=conv_id,
+            )])
+            self._is_streaming_audio = True
+            # Stay in listening mode
+            self._reachy_on_listening()
+        else:
+            self._clear_conversation()
+            self.unduck()
+            _LOGGER.debug("Conversation finished")
+            # Reachy Mini: Return to idle
+            self._reachy_on_idle()
+    def _play_timer_finished(self) -> None:
+        if not self._timer_finished:
+            self.unduck()
+            return
+        self.state.tts_player.play(
+            self.state.timer_finished_sound,
+            done_callback=lambda: call_all(
+                lambda: time.sleep(1.0), self._play_timer_finished
+            ),
+        )
+    def connection_lost(self, exc):
+        super().connection_lost(exc)
+        _LOGGER.info("Disconnected from Home Assistant")
+        # Clear streaming state on disconnect
+        self._is_streaming_audio = False
+        self._tts_url = None
+        self._tts_played = False
+        self._continue_conversation = False
+    def _download_external_wake_word(
+        self, external_wake_word: VoiceAssistantExternalWakeWord
+    ) -> Optional[AvailableWakeWord]:
+        eww_dir = self.state.download_dir / "external_wake_words"
+        eww_dir.mkdir(parents=True, exist_ok=True)
+        config_path = eww_dir / f"{external_wake_word.id}.json"
+        should_download_config = not config_path.exists()
+        # Check if we need to download the model file
+        model_path = eww_dir / f"{external_wake_word.id}.tflite"
+        should_download_model = True
+        if model_path.exists():
+            model_size = model_path.stat().st_size
+            if model_size == external_wake_word.model_size:
+                with open(model_path, "rb") as model_file:
+                    model_hash = hashlib.sha256(model_file.read()).hexdigest()
+                if model_hash == external_wake_word.model_hash:
+                    should_download_model = False
+                    _LOGGER.debug(
+                        "Model size and hash match for %s. Skipping download.",
+                        external_wake_word.id,
+                    )
+        if should_download_config or should_download_model:
+            # Download config
+            _LOGGER.debug("Downloading %s to %s", external_wake_word.url, config_path)
+            with urlopen(external_wake_word.url) as request:
+                if request.status != 200:
+                    _LOGGER.warning(
+                        "Failed to download: %s, status=%s",
+                        external_wake_word.url,
+                        request.status,
+                    )
+                    return None
+                with open(config_path, "wb") as model_file:
+                    shutil.copyfileobj(request, model_file)
+        if should_download_model:
+            # Download model file
+            parsed_url = urlparse(external_wake_word.url)
+            parsed_url = parsed_url._replace(
+                path=posixpath.join(posixpath.dirname(parsed_url.path), model_path.name)
+            )
+            model_url = urlunparse(parsed_url)
+            _LOGGER.debug("Downloading %s to %s", model_url, model_path)
+            with urlopen(model_url) as request:
+                if request.status != 200:
+                    _LOGGER.warning(
+                        "Failed to download: %s, status=%s", model_url, request.status
+                    )
+                    return None
+                with open(model_path, "wb") as model_file:
+                    shutil.copyfileobj(request, model_file)
+        return AvailableWakeWord(
+            id=external_wake_word.id,
+            type=WakeWordType.MICRO_WAKE_WORD,
+            wake_word=external_wake_word.wake_word,
+            trained_languages=external_wake_word.trained_languages,
+            wake_word_path=config_path,
+        )
+    # -------------------------------------------------------------------------
+    # Reachy Mini Motion Control
+    # -------------------------------------------------------------------------
+    def _turn_to_sound_source(self) -> None:
+        """Turn robot head toward sound source using DOA at wakeup.
+        This is called once at wakeup to orient the robot toward the speaker.
+        Face tracking will take over after the initial turn.
+        DOA angle convention (from SDK):
+        - 0 radians = left (Y+ direction in head frame)
+        - π/2 radians = front (X+ direction in head frame)
+        - π radians = right (Y- direction in head frame)
+        The SDK uses: p_head = [sin(doa), cos(doa), 0]
+        So we need to convert this to yaw angle.
+        Note: We don't check speech_detected because by the time wake word
+        detection completes, the user may have stopped speaking.
+        """
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            _LOGGER.info("DOA turn-to-sound: motion disabled or no robot")
+            return
+        try:
+            # Get DOA from reachy_controller (only read once)
+            doa = self.reachy_controller.get_doa_angle()
+            if doa is None:
+                _LOGGER.info("DOA not available, skipping turn-to-sound")
+                return
+            angle_rad, speech_detected = doa
+            _LOGGER.debug("DOA raw: angle=%.3f rad (%.1f°), speech=%s",
+                         angle_rad, math.degrees(angle_rad), speech_detected)
+            # Convert DOA to direction vector in head frame
+            # SDK convention: p_head = [sin(doa), cos(doa), 0]
+            # where X+ is front, Y+ is left
+            dir_x = math.sin(angle_rad)  # Front component
+            dir_y = math.cos(angle_rad)  # Left component
+            # Calculate yaw angle from direction vector
+            # DOA convention: 0 = left, π/2 = front, π = right
+            # Robot yaw: positive = turn left, negative = turn right
+            # yaw = doa - π/2 maps: left(0) → -90°, front(π/2) → 0°, right(π) → +90°
+            yaw_rad = angle_rad - math.pi / 2
+            yaw_deg = math.degrees(yaw_rad)
+            _LOGGER.debug("DOA direction: x=%.2f, y=%.2f, yaw=%.1f°",
+                         dir_x, dir_y, yaw_deg)
+            # Only turn if angle is significant (> 10°) to avoid noise
+            DOA_THRESHOLD_DEG = 10.0
+            if abs(yaw_deg) < DOA_THRESHOLD_DEG:
+                _LOGGER.debug("DOA angle %.1f° below threshold (%.1f°), skipping turn",
+                             yaw_deg, DOA_THRESHOLD_DEG)
+                return
+            # Apply 80% of DOA angle as conservative strategy
+            # This accounts for potential DOA inaccuracy
+            DOA_SCALE = 0.8
+            target_yaw_deg = yaw_deg * DOA_SCALE
+            _LOGGER.info("Turning toward sound source: DOA=%.1f°, target=%.1f°",
+                         yaw_deg, target_yaw_deg)
+            # Use MovementManager to turn (non-blocking)
+            if self.state.motion and self.state.motion.movement_manager:
+                self.state.motion.movement_manager.turn_to_angle(
+                    target_yaw_deg,
+                    duration=0.5  # Quick turn
+                )
+        except Exception as e:
+            _LOGGER.error("Error in turn-to-sound: %s", e)
+    def _reachy_on_listening(self) -> None:
+        """Called when listening for speech (HA state: Listening)."""
+        # Enable high-frequency face tracking during listening
+        self._set_conversation_mode(True)
+        # Resume face tracking (may have been paused during speaking)
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(True)
+            except Exception as e:
+                _LOGGER.debug("Failed to resume face tracking: %s", e)
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Listening animation")
+            if self.state.motion:
+                self.state.motion.on_listening()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _reachy_on_thinking(self) -> None:
+        """Called when processing speech (HA state: Processing)."""
+        # Resume face tracking (may have been paused during speaking)
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(True)
+            except Exception as e:
+                _LOGGER.debug("Failed to resume face tracking: %s", e)
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Thinking animation")
+            if self.state.motion:
+                self.state.motion.on_thinking()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _reachy_on_speaking(self) -> None:
+        """Called when TTS is playing (HA state: Responding)."""
+        # Pause face tracking during speaking - robot will use speaking animation instead
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(False)
+                _LOGGER.debug("Face tracking paused during speaking")
+            except Exception as e:
+                _LOGGER.debug("Failed to pause face tracking: %s", e)
+        if not self.state.motion_enabled:
+            _LOGGER.warning("Motion disabled, skipping speaking animation")
+            return
+        if not self.state.reachy_mini:
+            _LOGGER.warning("No reachy_mini instance, skipping speaking animation")
+            return
+        if not self.state.motion:
+            _LOGGER.warning("No motion controller, skipping speaking animation")
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Starting speaking animation")
+            self.state.motion.on_speaking_start()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _reachy_on_idle(self) -> None:
+        """Called when returning to idle state (HA state: Idle)."""
+        # Disable high-frequency face tracking, switch to adaptive mode
+        self._set_conversation_mode(False)
+        # Resume face tracking (may have been paused during speaking)
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(True)
+            except Exception as e:
+                _LOGGER.debug("Failed to resume face tracking: %s", e)
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Idle animation")
+            if self.state.motion:
+                self.state.motion.on_idle()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _set_conversation_mode(self, in_conversation: bool) -> None:
+        """Set conversation mode for adaptive face tracking.
+        When in conversation, face tracking runs at high frequency.
+        When idle, face tracking uses adaptive rate to save CPU.
+        """
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_conversation_mode(in_conversation)
+            except Exception as e:
+                _LOGGER.debug("Failed to set conversation mode: %s", e)
+    def _reachy_on_timer_finished(self) -> None:
+        """Called when a timer finishes."""
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Timer finished animation")
+            if self.state.motion:
+                self.state.motion.on_timer_finished()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _play_emotion(self, emotion_name: str) -> None:
+        """Play an emotion/expression from the emotions library.
+        Args:
+            emotion_name: Name of the emotion (e.g., "happy1", "sad1", etc.)
+        """
+        try:
+            import requests
+            # Get WLAN IP from daemon status
+            wlan_ip = "localhost"
+            if self.state.reachy_mini is not None:
+                try:
+                    status = self.state.reachy_mini.client.get_status(wait=False)
+                    wlan_ip = status.get('wlan_ip', 'localhost')
+                except Exception:
+                    wlan_ip = "localhost"
+            # Call the emotion playback API
+            # Dataset: pollen-robotics/reachy-mini-emotions-library
+            base_url = f"http://{wlan_ip}:8000/api/move/play/recorded-move-dataset"
+            dataset = "pollen-robotics/reachy-mini-emotions-library"
+            url = f"{base_url}/{dataset}/{emotion_name}"
+            response = requests.post(url, timeout=5)
+            if response.status_code == 200:
+                result = response.json()
+                move_uuid = result.get('uuid')
+                _LOGGER.info(f"Playing emotion: {emotion_name} (uuid={move_uuid})")
+            else:
+                _LOGGER.warning(f"Failed to play emotion {emotion_name}: HTTP {response.status_code}")
+        except Exception as e:
+            _LOGGER.error(f"Error playing emotion {emotion_name}: {e}")

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac RENAMED Viewed

File without changes

{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py RENAMED Viewed

@@ -6,9 +6,8 @@ Analyzes audio loudness to drive natural head movements during TTS playback.
 import math
 from collections import deque
-from collections.abc import Callable
 from itertools import islice
-from typing import Any
 import numpy as np
 from numpy.typing import NDArray
@@ -65,7 +64,7 @@ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
     """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
     t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
     t = max(0.0, min(1.0, t))
-    return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
 def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
@@ -94,7 +93,7 @@ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray
     """Lightweight linear resampler for short buffers."""
     if sr_in == sr_out or x.size == 0:
         return x
-    n_out = round(x.size * sr_out / sr_in)
     if n_out <= 1:
         return np.zeros(0, dtype=np.float32)
     t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
@@ -144,7 +143,7 @@ class SpeechSwayRT:
         self.sway_down = 0
         self.t = 0.0
-    def feed(self, pcm: NDArray[Any], sr: int | None = None) -> list[dict[str, float]]:
         """Stream in PCM chunk. Returns list of sway dicts, one per hop.
         Args:
@@ -168,7 +167,7 @@ class SpeechSwayRT:
         else:
             self.carry = x
-        out: list[dict[str, float]] = []
         while self.carry.size >= HOP:
             hop = self.carry[:HOP]
@@ -216,35 +215,27 @@ class SpeechSwayRT:
             self.t += HOP_MS / 1000.0
             # Oscillators
-            pitch = (
-                math.radians(SWAY_A_PITCH_DEG)
-                * loud
-                * env
-                * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
-            )
-            yaw = (
-                math.radians(SWAY_A_YAW_DEG) * loud * env * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
-            )
-            roll = (
-                math.radians(SWAY_A_ROLL_DEG)
-                * loud
-                * env
-                * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
-            )
-            x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
-            y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
-            z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
-            out.append(
-                {
-                    "pitch_rad": pitch,
-                    "yaw_rad": yaw,
-                    "roll_rad": roll,
-                    "x_m": x_m,
-                    "y_m": y_m,
-                    "z_m": z_m,
-                }
-            )
         return out
@@ -252,7 +243,7 @@ class SpeechSwayRT:
 def analyze_audio_for_sway(
     audio_data: NDArray[Any],
     sample_rate: int,
-    callback: Callable[[dict[str, float]], None],
 ) -> None:
     """Analyze entire audio and call callback for each sway frame.

 import math
 from collections import deque
 from itertools import islice
+from typing import Any, Callable, Dict, List, Optional
 import numpy as np
 from numpy.typing import NDArray
     """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
     t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
     t = max(0.0, min(1.0, t))
+    return t ** LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
 def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
     """Lightweight linear resampler for short buffers."""
     if sr_in == sr_out or x.size == 0:
         return x
+    n_out = int(round(x.size * sr_out / sr_in))
     if n_out <= 1:
         return np.zeros(0, dtype=np.float32)
     t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
         self.sway_down = 0
         self.t = 0.0
+    def feed(self, pcm: NDArray[Any], sr: Optional[int] = None) -> List[Dict[str, float]]:
         """Stream in PCM chunk. Returns list of sway dicts, one per hop.
         Args:
         else:
             self.carry = x
+        out: List[Dict[str, float]] = []
         while self.carry.size >= HOP:
             hop = self.carry[:HOP]
             self.t += HOP_MS / 1000.0
             # Oscillators
+            pitch = (math.radians(SWAY_A_PITCH_DEG) * loud * env *
+                     math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch))
+            yaw = (math.radians(SWAY_A_YAW_DEG) * loud * env *
+                   math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw))
+            roll = (math.radians(SWAY_A_ROLL_DEG) * loud * env *
+                    math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll))
+            x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(
+                2 * math.pi * SWAY_F_X * self.t + self.phase_x)
+            y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(
+                2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
+            z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(
+                2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
+            out.append({
+                "pitch_rad": pitch,
+                "yaw_rad": yaw,
+                "roll_rad": roll,
+                "x_m": x_m,
+                "y_m": y_m,
+                "z_m": z_m,
+            })
         return out
 def analyze_audio_for_sway(
     audio_data: NDArray[Any],
     sample_rate: int,
+    callback: Callable[[Dict[str, float]], None],
 ) -> None:
     """Analyze entire audio and call callback for each sway frame.

{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css RENAMED Viewed

File without changes

reachy_mini_ha_voice/util.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Utility functions."""
+import hashlib
+import uuid
+from collections.abc import Callable
+from pathlib import Path
+from typing import Optional
+def call_all(*funcs: Optional[Callable[[], None]]) -> None:
+    """Call all non-None functions."""
+    for func in funcs:
+        if func is not None:
+            func()
+def get_mac() -> str:
+    """Return a stable MAC address for device identification.
+    Uses a cached device ID stored in a file to ensure the same ID
+    is used across restarts, preventing Home Assistant from seeing
+    the device as new each time.
+    """
+    # Store device ID in a persistent location
+    local_dir = Path(__file__).parent.parent / "local"
+    local_dir.mkdir(parents=True, exist_ok=True)
+    device_id_file = local_dir / ".device_id"
+    if device_id_file.exists():
+        try:
+            return device_id_file.read_text().strip()
+        except Exception:
+            pass
+    # Generate a stable device ID based on machine UUID
+    machine_id = uuid.getnode()
+    # Create a hash to ensure consistent format
+    device_id = hashlib.md5(str(machine_id).encode()).hexdigest()[:12]
+    try:
+        device_id_file.write_text(device_id)
+    except Exception:
+        pass
+    return device_id

{reachy_mini_home_assistant → reachy_mini_ha_voice}/voice_assistant.py RENAMED Viewed

@@ -1,1314 +1,810 @@
-"""
-Voice Assistant Service for Reachy Mini.
-This module provides the main voice assistant service that integrates
-with Home Assistant via ESPHome protocol.
-"""
-import asyncio
-import json
-import logging
-import threading
-import time
-from collections import deque
-from dataclasses import dataclass, field
-from pathlib import Path
-from queue import Queue
-from typing import TYPE_CHECKING
-import numpy as np
-import requests
-from reachy_mini import ReachyMini
-from .audio.audio_player import AudioPlayer
-from .audio.microphone import MicrophoneOptimizer, MicrophonePreferences
-from .core import Config, SleepManager
-from .core.util import get_mac
-from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType
-from .motion.reachy_motion import ReachyMiniMotion
-from .protocol.satellite import VoiceSatelliteProtocol
-from .protocol.zeroconf import HomeAssistantZeroconf
-from .vision.camera_server import MJPEGCameraServer
-if TYPE_CHECKING:
-    from pymicro_wakeword import MicroWakeWord
-    from pyopen_wakeword import OpenWakeWord
-_LOGGER = logging.getLogger(__name__)
-_MODULE_DIR = Path(__file__).parent
-_WAKEWORDS_DIR = _MODULE_DIR / "wakewords"
-_SOUNDS_DIR = _MODULE_DIR / "sounds"
-_LOCAL_DIR = _MODULE_DIR.parent / "local"
-@dataclass
-class AudioProcessingContext:
-    """Context for audio processing, holding mutable state."""
-    wake_words: list = field(default_factory=list)
-    micro_features: object | None = None
-    micro_inputs: list = field(default_factory=list)
-    oww_features: object | None = None
-    oww_inputs: list = field(default_factory=list)
-    has_oww: bool = False
-    last_active: float | None = None
-# Audio chunk size for consistent streaming
-# Smaller chunks = faster VAD response
-# ESPHome typical range: 256-512 samples
-# Going smaller improves latency but increases CPU/network overhead
-AUDIO_BLOCK_SIZE = 256  # samples at 16kHz = 16ms (optimized for low latency)
-MAX_AUDIO_BUFFER_SIZE = AUDIO_BLOCK_SIZE * 40  # Max 40 chunks (~640ms) to prevent memory leak
-class VoiceAssistantService:
-    """Voice assistant service that runs ESPHome protocol server."""
-    def __init__(
-        self,
-        reachy_mini: ReachyMini,
-        name: str = "Reachy Mini",
-        host: str = "0.0.0.0",
-        port: int = 6053,
-        wake_model: str = "okay_nabu",
-        camera_port: int = 8081,
-        camera_enabled: bool = True,
-    ):
-        self.reachy_mini = reachy_mini
-        self.name = name
-        self.host = host
-        self.port = port
-        self.wake_model = wake_model
-        self.camera_port = camera_port
-        self.camera_enabled = camera_enabled
-        self._server = None
-        self._discovery = None
-        self._audio_thread = None
-        self._running = False
-        self._state: ServerState | None = None
-        self._motion = ReachyMiniMotion(reachy_mini)
-        self._camera_server: MJPEGCameraServer | None = None
-        # Audio buffer for fixed-size chunk output
-        # Use deque with maxlen to avoid creating new arrays on every operation
-        # This prevents memory leak from repeated array creation (2-3 arrays per chunk)
-        self._audio_buffer: deque[float] = deque(maxlen=MAX_AUDIO_BUFFER_SIZE)
-        # Audio overflow log throttling
-        self._last_audio_overflow_log = 0.0
-        self._suppressed_audio_overflows = 0
-        # Robot services pause/resume tracking (without RobotStateMonitor)
-        self._robot_services_paused = threading.Event()  # Set when services should pause
-        self._robot_services_resumed = threading.Event()  # Event-driven resume signaling
-        self._robot_services_resumed.set()  # Start in resumed state
-        # GStreamer access lock - prevents concurrent access to media pipeline
-        # This prevents crashes when multiple threads access get_audio_sample(), push_audio_sample(), get_frame()
-        self._gstreamer_lock = threading.Lock()
-        # Sleep manager for sleep/wake handling
-        self._sleep_manager: SleepManager | None = None
-        self._event_loop: asyncio.AbstractEventLoop | None = None
-        # Home Assistant connection state
-        self._ha_connected = False  # Track whether HA is connected
-        self._ha_connection_established = False  # Track if HA connection was ever established
-    async def start(self) -> None:
-        """Start the voice assistant service."""
-        _LOGGER.info("Initializing voice assistant service...")
-        # Ensure directories exist
-        _WAKEWORDS_DIR.mkdir(parents=True, exist_ok=True)
-        _SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
-        _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
-        # Verify required files (bundled with package)
-        await self._verify_required_files()
-        # Load wake words
-        available_wake_words = self._load_available_wake_words()
-        _LOGGER.debug("Available wake words: %s", list(available_wake_words.keys()))
-        # Load preferences
-        preferences_path = _LOCAL_DIR / "preferences.json"
-        preferences = self._load_preferences(preferences_path)
-        # Load wake word models
-        wake_models, active_wake_words = self._load_wake_models(available_wake_words, preferences)
-        # Load stop model
-        stop_model = self._load_stop_model()
-        # Create audio players with Reachy Mini reference and GStreamer lock
-        music_player = AudioPlayer(self.reachy_mini, gstreamer_lock=self._gstreamer_lock)
-        tts_player = AudioPlayer(self.reachy_mini, gstreamer_lock=self._gstreamer_lock)
-        # Create server state
-        self._state = ServerState(
-            name=self.name,
-            mac_address=get_mac(),
-            audio_queue=Queue(),
-            entities=[],
-            available_wake_words=available_wake_words,
-            wake_words=wake_models,
-            active_wake_words=active_wake_words,
-            stop_word=stop_model,
-            music_player=music_player,
-            tts_player=tts_player,
-            wakeup_sound=str(_SOUNDS_DIR / "wake_word_triggered.flac"),
-            timer_finished_sound=str(_SOUNDS_DIR / "timer_finished.flac"),
-            preferences=preferences,
-            preferences_path=preferences_path,
-            refractory_seconds=2.0,
-            download_dir=_LOCAL_DIR,
-            reachy_mini=self.reachy_mini,
-            motion_enabled=True,
-        )
-        # Log stop word status
-        if self._state.stop_word:
-            _LOGGER.info("Stop word initialized with ID: %s", self._state.stop_word.id)
-        else:
-            _LOGGER.error("Stop word is None! Stop command will not work")
-        # Set motion controller reference in state
-        self._state.motion = self._motion
-        if self._motion and self._motion.movement_manager:
-            self._motion.movement_manager.set_idle_motion_enabled(preferences.idle_motion_enabled)
-            self._motion.movement_manager.set_idle_antenna_enabled(preferences.idle_antenna_enabled)
-            self._motion.movement_manager.set_idle_random_actions_enabled(preferences.idle_random_actions_enabled)
-            _LOGGER.info("Idle motion restored from preferences: %s", preferences.idle_motion_enabled)
-            _LOGGER.info("Idle antenna motion restored from preferences: %s", preferences.idle_antenna_enabled)
-            _LOGGER.info("Idle random actions restored from preferences: %s", preferences.idle_random_actions_enabled)
-        # Set sleep/wake callbacks for HA button triggers
-        self._state.on_ha_sleep = self._on_sleep
-        self._state.on_ha_wake = lambda: asyncio.create_task(self._on_wake_from_ha())
-        # Start Reachy Mini media system
-        try:
-            # Check if media system is already running to avoid conflicts
-            media = self.reachy_mini.media
-            if media.audio is not None:
-                # Clean stale media state from previous app sessions (daemon is persistent)
-                try:
-                    media.stop_recording()
-                except Exception:
-                    pass
-                try:
-                    media.stop_playing()
-                except Exception:
-                    pass
-                time.sleep(0.2)
-                media.start_recording()
-                _LOGGER.info("Started Reachy Mini recording")
-                media.start_playing()
-                _LOGGER.info("Started Reachy Mini playback")
-                # Deterministic startup validation: fail fast instead of repeated
-                # fallback/recovery loops that hide root causes.
-                if not self._probe_audio_capture_ready(media, timeout_s=1.5):
-                    raise RuntimeError("Audio capture probe failed after media startup")
-                _LOGGER.info("Reachy Mini media system initialized")
-                # Body yaw now follows head yaw in movement_manager.py
-                # This enables natural body rotation when tracking faces
-                # Optimize microphone settings for voice recognition
-                self._optimize_microphone_settings()
-        except Exception as e:
-            _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
-        # Start motion controller (5Hz control loop)
-        self._motion.start()
-        # Start audio processing thread (non-daemon for proper cleanup)
-        self._running = True
-        self._audio_thread = threading.Thread(
-            target=self._process_audio,
-            daemon=False,
-        )
-        self._audio_thread.start()
-        # Create ESPHome server (pass camera_server for camera entity)
-        loop = asyncio.get_running_loop()
-        camera_server = self._camera_server  # Capture for lambda
-        def protocol_factory():
-            protocol = VoiceSatelliteProtocol(self._state, camera_server=camera_server, voice_assistant_service=self)
-            # Set HA connection callbacks
-            protocol.set_ha_connection_callbacks(
-                on_connected=self._on_ha_connected, on_disconnected=self._on_ha_disconnected
-            )
-            return protocol
-        self._server = await loop.create_server(
-            protocol_factory,
-            host=self.host,
-            port=self.port,
-        )
-        # Start mDNS discovery
-        self._discovery = HomeAssistantZeroconf(port=self.port, name=self.name)
-        await self._discovery.register_server()
-        # Store service event loop for cross-thread async toggles
-        self._event_loop = asyncio.get_running_loop()
-        # Start Sendspin discovery only when enabled in preferences (default OFF)
-        if preferences.sendspin_enabled:
-            await music_player.start_sendspin_discovery()
-            _LOGGER.info("Sendspin discovery enabled from preferences")
-        else:
-            _LOGGER.info("Sendspin discovery disabled by default")
-        # Start sleep manager for proper sleep/wake handling
-        # This monitors the daemon state and coordinates service suspend/resume
-        self._sleep_manager = SleepManager(
-            reachy_mini=self.reachy_mini,
-            daemon_url=Config.daemon.url,
-            check_interval=Config.daemon.check_interval_active,
-            resume_delay=Config.sleep.resume_delay,
-        )
-        # Register sleep/wake callbacks
-        self._sleep_manager.on_sleep(self._on_sleep)
-        self._sleep_manager.on_wake(self._on_wake)
-        self._sleep_manager.on_pre_resume(self._on_pre_resume)
-        # Start the sleep manager
-        await self._sleep_manager.start()
-        _LOGGER.info("Sleep manager started")
-        _LOGGER.info("Voice assistant service started on %s:%s", self.host, self.port)
-    def set_sendspin_enabled(self, enabled: bool) -> None:
-        """Enable or disable Sendspin discovery and connection at runtime."""
-        if self._state is None or self._state.music_player is None:
-            return
-        if self._state.preferences.sendspin_enabled != enabled:
-            self._state.preferences.sendspin_enabled = enabled
-            self._state.save_preferences()
-        async def _apply() -> None:
-            if self._state is None or self._state.music_player is None:
-                return
-            if enabled:
-                await self._state.music_player.start_sendspin_discovery()
-            else:
-                await self._state.music_player.stop_sendspin()
-        try:
-            loop = self._event_loop
-            if loop is not None and loop.is_running():
-                asyncio.run_coroutine_threadsafe(_apply(), loop)
-            else:
-                task = asyncio.create_task(_apply())
-                task.add_done_callback(lambda _task: None)
-        except Exception as e:
-            _LOGGER.warning("Failed to apply Sendspin toggle (%s): %s", enabled, e)
-    def _probe_audio_capture_ready(self, media, timeout_s: float = 1.5) -> bool:
-        """Check whether microphone samples become available shortly after startup."""
-        deadline = time.monotonic() + timeout_s
-        while time.monotonic() < deadline:
-            try:
-                sample = media.get_audio_sample()
-                if sample is not None and isinstance(sample, np.ndarray) and sample.size > 0:
-                    return True
-            except Exception:
-                pass
-            time.sleep(0.05)
-        return False
-    def _suspend_voice_services(self, reason: str) -> None:
-        """Suspend only voice-related services (not camera or motion).
-        This is used for the Mute feature - camera and motion should remain active.
-        """
-        _LOGGER.warning("Suspending voice services (%s)", reason)
-        self._robot_services_paused.set()
-        self._robot_services_resumed.clear()
-        # Update state
-        if self._state is not None:
-            self._state.services_suspended = True
-        # Clear audio buffer to avoid processing stale data
-        self._audio_buffer.clear()
-        # Suspend satellite (stops TTS, music, wake word processing)
-        if self._state is not None and self._state.satellite is not None:
-            try:
-                self._state.satellite.suspend()
-                _LOGGER.debug("Satellite suspended")
-            except Exception as e:
-                _LOGGER.warning("Error suspending satellite: %s", e)
-        # Suspend audio players
-        if self._state is not None:
-            if self._state.tts_player is not None:
-                try:
-                    self._state.tts_player.suspend()
-                except Exception as e:
-                    _LOGGER.warning("Error suspending TTS player: %s", e)
-            if self._state.music_player is not None:
-                try:
-                    self._state.music_player.suspend()
-                except Exception as e:
-                    _LOGGER.warning("Error suspending music player: %s", e)
-        # Stop media recording to save CPU
-        try:
-            self.reachy_mini.media.stop_recording()
-            self.reachy_mini.media.stop_playing()
-            _LOGGER.debug("Media system stopped")
-        except Exception as e:
-            _LOGGER.warning("Error stopping media: %s", e)
-        _LOGGER.info("Voice services suspended - camera and motion remain active")
-    def _resume_voice_services(self, reason: str) -> None:
-        """Resume only voice-related services (not camera or motion).
-        This is used for the Mute feature - camera and motion remain active.
-        """
-        _LOGGER.info("Resuming voice services (%s)", reason)
-        self._robot_services_paused.clear()
-        # Update state
-        if self._state is not None:
-            self._state.services_suspended = False
-        # Restart media system first
-        try:
-            media = self.reachy_mini.media
-            if media.audio is not None:
-                media.start_recording()
-                media.start_playing()
-                _LOGGER.info("Media system restarted")
-        except Exception as e:
-            _LOGGER.warning("Failed to restart media: %s", e)
-        # Resume satellite
-        if self._state is not None and self._state.satellite is not None:
-            try:
-                self._state.satellite.resume()
-                _LOGGER.debug("Satellite resumed")
-            except Exception as e:
-                _LOGGER.warning("Error resuming satellite: %s", e)
-        # Resume audio players
-        if self._state is not None:
-            if self._state.tts_player is not None:
-                try:
-                    self._state.tts_player.resume()
-                except Exception as e:
-                    _LOGGER.warning("Error resuming TTS player: %s", e)
-            if self._state.music_player is not None:
-                try:
-                    self._state.music_player.resume()
-                except Exception as e:
-                    _LOGGER.warning("Error resuming music player: %s", e)
-        # Signal waiting threads that services are resumed
-        self._robot_services_resumed.set()
-        _LOGGER.info("Voice services resumed - camera and motion remained active")
-    def _suspend_non_esphome_services(self, reason: str, set_sleep_state: bool) -> None:
-        """Suspend all non-ESPHome services to reduce load.
-        ESPHome server stays up so Home Assistant can wake the robot.
-        """
-        _LOGGER.warning("Suspending non-ESPHome services (%s)", reason)
-        self._robot_services_paused.set()
-        self._robot_services_resumed.clear()
-        # Update state
-        if self._state is not None:
-            if set_sleep_state:
-                self._state.is_sleeping = True
-            self._state.services_suspended = True
-        # Clear audio buffer to avoid processing stale data
-        self._audio_buffer.clear()
-        # Suspend camera server (stops thread and releases YOLO model)
-        # Only suspend if camera is NOT disabled (user has not manually disabled it)
-        # AND camera server has been started (not None)
-        if self._camera_server is not None and self._state.camera_enabled:
-            try:
-                self._camera_server.suspend()
-                _LOGGER.debug("Camera server suspended")
-            except Exception as e:
-                _LOGGER.warning("Error suspending camera: %s", e)
-        # Suspend motion controller (stops control loop thread)
-        if self._motion is not None and self._motion._movement_manager is not None:
-            try:
-                self._motion._movement_manager.suspend()
-                _LOGGER.debug("Motion controller suspended")
-            except Exception as e:
-                _LOGGER.warning("Error suspending motion: %s", e)
-        # Suspend satellite
-        if self._state is not None and self._state.satellite is not None:
-            try:
-                self._state.satellite.suspend()
-                _LOGGER.debug("Satellite suspended")
-            except Exception as e:
-                _LOGGER.warning("Error suspending satellite: %s", e)
-        # Suspend audio players
-        if self._state is not None:
-            if self._state.tts_player is not None:
-                try:
-                    self._state.tts_player.suspend()
-                except Exception as e:
-                    _LOGGER.warning("Error suspending TTS player: %s", e)
-            if self._state.music_player is not None:
-                try:
-                    self._state.music_player.suspend()
-                except Exception as e:
-                    _LOGGER.warning("Error suspending music player: %s", e)
-        # Stop media recording to save CPU
-        try:
-            self.reachy_mini.media.stop_recording()
-            self.reachy_mini.media.stop_playing()
-            _LOGGER.debug("Media system stopped")
-        except Exception as e:
-            _LOGGER.warning("Error stopping media: %s", e)
-        _LOGGER.info("Services suspended - ESPHome only")
-    def _resume_non_esphome_services(self, reason: str, clear_sleep_state: bool) -> None:
-        """Resume all non-ESPHome services after sleep/disconnect."""
-        _LOGGER.info("Resuming non-ESPHome services (%s)", reason)
-        self._robot_services_paused.clear()
-        # Update state
-        if self._state is not None:
-            if clear_sleep_state:
-                self._state.is_sleeping = False
-            self._state.services_suspended = False
-        # Restart media system first
-        try:
-            media = self.reachy_mini.media
-            if media.audio is not None:
-                media.start_recording()
-                media.start_playing()
-                _LOGGER.info("Media system restarted")
-        except Exception as e:
-            _LOGGER.warning("Failed to restart media: %s", e)
-        # Resume camera server (reloads YOLO model and restarts capture thread)
-        # Only resume if camera is NOT disabled (user has not manually disabled it)
-        # AND camera server has been started (not None)
-        if self._camera_server is not None and self._state.camera_enabled:
-            try:
-                self._camera_server.resume_from_suspend()
-                _LOGGER.debug("Camera server resumed from suspend")
-            except Exception as e:
-                _LOGGER.warning("Error resuming camera: %s", e)
-        # Resume motion controller (restarts control loop thread)
-        if self._motion is not None and self._motion._movement_manager is not None:
-            try:
-                self._motion._movement_manager.resume_from_suspend()
-                _LOGGER.debug("Motion controller resumed from suspend")
-            except Exception as e:
-                _LOGGER.warning("Error resuming motion: %s", e)
-        # Resume satellite
-        if self._state is not None and self._state.satellite is not None:
-            try:
-                self._state.satellite.resume()
-                _LOGGER.debug("Satellite resumed")
-            except Exception as e:
-                _LOGGER.warning("Error resuming satellite: %s", e)
-        # Resume audio players
-        if self._state is not None:
-            if self._state.tts_player is not None:
-                try:
-                    self._state.tts_player.resume()
-                except Exception as e:
-                    _LOGGER.warning("Error resuming TTS player: %s", e)
-            if self._state.music_player is not None:
-                try:
-                    self._state.music_player.resume()
-                except Exception as e:
-                    _LOGGER.warning("Error resuming music player: %s", e)
-        # Signal waiting threads that services are resumed
-        self._robot_services_resumed.set()
-        _LOGGER.info("All services resumed - system fully operational")
-    def _on_robot_disconnected(self) -> None:
-        """Called when robot connection is lost (e.g., daemon unavailable).
-        Suspends all non-ESPHome services to keep HA wake control available.
-        """
-        # RobotStateMonitor removed - connection tracking is handled by DaemonStateMonitor
-        self._suspend_non_esphome_services(reason="robot_disconnected", set_sleep_state=False)
-    def _on_robot_connected(self) -> None:
-        """Called when robot connection is restored.
-        Resumes non-ESPHome services unless the system is in sleep mode.
-        """
-        # RobotStateMonitor removed - connection tracking is handled by DaemonStateMonitor
-        if self._state is not None and self._state.is_sleeping:
-            _LOGGER.info("Robot connected but system is sleeping; deferring resume")
-            return
-        self._resume_non_esphome_services(reason="robot_connected", clear_sleep_state=False)
-    def _on_sleep(self) -> None:
-        """Called when the robot enters sleep mode.
-        This is triggered by the SleepManager when the daemon enters STOPPED state.
-        At this point, we should:
-        1. Stop all resource-intensive operations
-        2. Release ML models from memory
-        3. Keep only ESPHome server running for HA control
-        """
-        # RobotStateMonitor removed - sleep tracking is handled by SleepManager
-        self._suspend_non_esphome_services(reason="sleep", set_sleep_state=True)
-    def _on_wake(self) -> None:
-        """Called when the robot starts waking up.
-        This is triggered immediately when daemon state changes from STOPPED.
-        The actual service resume happens after the configured delay (30s default).
-        """
-        _LOGGER.info("Robot waking up - will resume services after delay...")
-    def _on_pre_resume(self) -> None:
-        """Called just before services are resumed.
-        This happens after the resume delay (30s default).
-        At this point, the daemon should be fully ready.
-        """
-        _LOGGER.info("Resuming services after wake delay...")
-        # RobotStateMonitor removed - sleep tracking is handled by SleepManager
-        self._resume_non_esphome_services(reason="wake_pre_resume", clear_sleep_state=True)
-    async def _on_wake_from_ha(self) -> None:
-        """Called when wake_up is triggered from Home Assistant button.
-        This bypasses the DaemonStateMonitor polling and directly resumes services
-        after a short delay to allow the robot to wake up.
-        """
-        _LOGGER.info("Wake triggered from HA - waiting for daemon running state...")
-        # Wait for daemon to be fully running before resuming services.
-        # This avoids early media/motion restart failures after long sleep.
-        timeout_s = 35.0
-        deadline = time.monotonic() + timeout_s
-        while time.monotonic() < deadline:
-            try:
-                daemon_url = Config.daemon.url.rstrip("/")
-                response = requests.get(f"{daemon_url}/api/daemon/status", timeout=2.0)
-                response.raise_for_status()
-                daemon_state = (response.json() or {}).get("state", "")
-                if daemon_state == "running":
-                    _LOGGER.info("Daemon is running, resuming services now")
-                    self._on_pre_resume()
-                    return
-            except Exception as e:
-                _LOGGER.debug("Wake wait state check failed: %s", e)
-            await asyncio.sleep(1.0)
-        _LOGGER.warning("Wake wait timed out after %.0fs, forcing service resume", timeout_s)
-        self._on_pre_resume()
-    async def _on_ha_connected(self) -> None:
-        """Called when Home Assistant connects.
-        At this point, we should:
-        1. Load and start camera server if not already started
-        2. Ensure voice models are loaded
-        3. Resume any suspended services
-        """
-        _LOGGER.info("Home Assistant connected - initializing camera and voice services")
-        self._ha_connected = True
-        self._ha_connection_established = True
-        # Start camera server if enabled and not already started
-        if self.camera_enabled and self._state.camera_enabled and self._camera_server is None:
-            try:
-                self._camera_server = MJPEGCameraServer(
-                    reachy_mini=self.reachy_mini,
-                    host=self.host,
-                    port=self.camera_port,
-                    fps=15,
-                    quality=80,
-                    enable_face_tracking=bool(getattr(self._state.preferences, "face_tracking_enabled", False)),
-                    enable_gesture_detection=bool(getattr(self._state.preferences, "gesture_detection_enabled", False)),
-                    gstreamer_lock=self._gstreamer_lock,
-                )
-                # Apply persisted vision preferences before camera server start.
-                prefs = self._state.preferences
-                self._camera_server.set_face_tracking_enabled(bool(getattr(prefs, "face_tracking_enabled", False)))
-                self._camera_server.set_gesture_detection_enabled(
-                    bool(getattr(prefs, "gesture_detection_enabled", False))
-                )
-                self._camera_server.set_face_confidence_threshold(
-                    float(getattr(prefs, "face_confidence_threshold", 0.5))
-                )
-                await self._camera_server.start()
-                # Store camera_server reference in state for entity registry access
-                self._state._camera_server = self._camera_server
-                # Update entity registry with the new camera_server reference
-                if self._state.satellite:
-                    self._state.satellite.update_camera_server(self._camera_server)
-                # Connect camera server to motion controller for face tracking
-                if self._motion is not None:
-                    self._motion.set_camera_server(self._camera_server)
-                _LOGGER.info("Camera server started on %s:%s", self.host, self.camera_port)
-            except Exception as e:
-                _LOGGER.error("Failed to start camera server: %s", e)
-        # Resume services if they were suspended due to HA disconnection
-        if self._state.services_suspended and not self._state.is_sleeping:
-            self._resume_non_esphome_services(reason="ha_connected", clear_sleep_state=False)
-    def _on_ha_disconnected(self) -> None:
-        """Called when Home Assistant disconnects.
-        At this point, we should:
-        1. Suspend camera server to save resources
-        2. Keep ESPHome server running for reconnection
-        3. Ensure voice services are suspended
-        """
-        _LOGGER.warning("Home Assistant disconnected - suspending camera and voice services")
-        self._ha_connected = False
-        # Suspend non-ESPHome services including camera
-        # Keep ESPHome server running so HA can reconnect
-        self._suspend_non_esphome_services(reason="ha_disconnected", set_sleep_state=False)
-    def _optimize_microphone_settings(self) -> None:
-        """Optimize ReSpeaker XVF3800 microphone settings for voice recognition.
-        Delegates to MicrophoneOptimizer for actual settings configuration.
-        User preferences from Home Assistant override defaults when available.
-        """
-        try:
-            # Access ReSpeaker through the media audio system
-            audio = self.reachy_mini.media.audio
-            if audio is None or not hasattr(audio, "_respeaker"):
-                _LOGGER.debug("ReSpeaker not available for optimization")
-                return
-            respeaker = audio._respeaker
-            if respeaker is None:
-                _LOGGER.debug("ReSpeaker device not found")
-                return
-            # Build preferences from saved state
-            prefs = self._state.preferences if self._state else None
-            mic_prefs = MicrophonePreferences(
-                agc_enabled=prefs.agc_enabled if prefs else None,
-                agc_max_gain=prefs.agc_max_gain if prefs else None,
-                noise_suppression=prefs.noise_suppression if prefs else None,
-            )
-            # Delegate to optimizer
-            optimizer = MicrophoneOptimizer()
-            optimizer.optimize(respeaker, mic_prefs)
-        except Exception as e:
-            _LOGGER.warning("Failed to optimize microphone settings: %s", e)
-    async def stop(self) -> None:
-        """Stop the voice assistant service."""
-        _LOGGER.info("Stopping voice assistant service...")
-        # 1. First stop audio recording to prevent new data from coming in
-        try:
-            self.reachy_mini.media.stop_recording()
-            _LOGGER.debug("Reachy Mini recording stopped")
-        except Exception as e:
-            _LOGGER.warning("Error stopping Reachy Mini recording: %s", e)
-        # 2. Set stop flag
-        self._running = False
-        # Wake any threads blocked on resume signal
-        self._robot_services_resumed.set()
-        # 3. Wait for audio thread to finish
-        if self._audio_thread:
-            self._audio_thread.join(timeout=Config.shutdown.audio_thread_join_timeout)
-            if self._audio_thread.is_alive():
-                _LOGGER.warning("Audio thread did not stop in time")
-        # 4. Stop playback
-        try:
-            self.reachy_mini.media.stop_playing()
-            _LOGGER.debug("Reachy Mini playback stopped")
-        except Exception as e:
-            _LOGGER.warning("Error stopping Reachy Mini playback: %s", e)
-        # 5. Stop ESPHome server
-        if self._server:
-            self._server.close()
-            try:
-                await asyncio.wait_for(
-                    self._server.wait_closed(),
-                    timeout=Config.shutdown.server_close_timeout,
-                )
-            except TimeoutError:
-                _LOGGER.warning("ESPHome server did not close in time")
-        # 6. Unregister mDNS
-        if self._discovery:
-            try:
-                await asyncio.wait_for(
-                    self._discovery.unregister_server(),
-                    timeout=Config.shutdown.server_close_timeout,
-                )
-            except TimeoutError:
-                _LOGGER.warning("mDNS unregister did not finish in time")
-        # 6.5. Stop Sendspin
-        if self._state and self._state.music_player:
-            try:
-                await asyncio.wait_for(
-                    self._state.music_player.stop_sendspin(),
-                    timeout=Config.shutdown.sendspin_stop_timeout,
-                )
-            except TimeoutError:
-                _LOGGER.warning("Sendspin stop did not finish in time")
-        # 7. Stop camera server
-        # Only stop if camera is NOT disabled (user has not manually disabled it)
-        if self._camera_server and self._state.camera_enabled:
-            await self._camera_server.stop(join_timeout=Config.shutdown.camera_stop_timeout)
-            self._camera_server = None
-        # Close SDK media resources to prevent memory leaks (even if camera is disabled)
-        try:
-            self.reachy_mini.media.close()
-            _LOGGER.info("SDK media resources closed")
-        except Exception as e:
-            _LOGGER.debug("Failed to close SDK media: %s", e)
-        # 8. Shutdown motion executor
-        if self._motion:
-            self._motion.shutdown()
-        # 9. Stop sleep manager
-        if self._sleep_manager:
-            try:
-                await asyncio.wait_for(
-                    self._sleep_manager.stop(),
-                    timeout=Config.shutdown.sleep_manager_stop_timeout,
-                )
-            except TimeoutError:
-                _LOGGER.warning("Sleep manager stop did not finish in time")
-            self._sleep_manager = None
-        _LOGGER.info("Voice assistant service stopped.")
-    async def _verify_required_files(self) -> None:
-        """Verify required model and sound files exist (bundled with package)."""
-        # Required wake word files (bundled in wakewords/ directory)
-        # Note: hey_jarvis is in openWakeWord/ with version suffix, so not required here
-        required_wakewords = [
-            "okay_nabu.tflite",
-            "okay_nabu.json",
-            "stop.tflite",
-            "stop.json",
-        ]
-        # Required sound files (bundled in sounds/ directory)
-        required_sounds = [
-            "wake_word_triggered.flac",
-            "timer_finished.flac",
-        ]
-        # Verify wake word files
-        missing_wakewords = []
-        for filename in required_wakewords:
-            filepath = _WAKEWORDS_DIR / filename
-            if not filepath.exists():
-                missing_wakewords.append(filename)
-        if missing_wakewords:
-            _LOGGER.warning("Missing wake word files: %s. These should be bundled with the package.", missing_wakewords)
-        # Verify sound files
-        missing_sounds = []
-        for filename in required_sounds:
-            filepath = _SOUNDS_DIR / filename
-            if not filepath.exists():
-                missing_sounds.append(filename)
-        if missing_sounds:
-            _LOGGER.warning("Missing sound files: %s. These should be bundled with the package.", missing_sounds)
-        if not missing_wakewords and not missing_sounds:
-            _LOGGER.info("All required files verified successfully.")
-    def _load_available_wake_words(self) -> dict[str, AvailableWakeWord]:
-        """Load available wake word configurations."""
-        available_wake_words: dict[str, AvailableWakeWord] = {}
-        # Load order: OpenWakeWord first, then MicroWakeWord, then external
-        # Later entries override earlier ones, so MicroWakeWord takes priority
-        wake_word_dirs = [
-            _WAKEWORDS_DIR / "openWakeWord",  # OpenWakeWord (lowest priority)
-            _LOCAL_DIR / "external_wake_words",  # External wake words
-            _WAKEWORDS_DIR,  # MicroWakeWord (highest priority)
-        ]
-        for wake_word_dir in wake_word_dirs:
-            if not wake_word_dir.exists():
-                continue
-            for config_path in wake_word_dir.glob("*.json"):
-                model_id = config_path.stem
-                if model_id == "stop":
-                    continue
-                try:
-                    with open(config_path, encoding="utf-8") as f:
-                        config = json.load(f)
-                    model_type = WakeWordType(config.get("type", "micro"))
-                    if model_type == WakeWordType.OPEN_WAKE_WORD:
-                        wake_word_path = config_path.parent / config["model"]
-                    else:
-                        wake_word_path = config_path
-                    available_wake_words[model_id] = AvailableWakeWord(
-                        id=model_id,
-                        type=model_type,
-                        wake_word=config.get("wake_word", model_id),
-                        trained_languages=config.get("trained_languages", []),
-                        wake_word_path=wake_word_path,
-                    )
-                except Exception as e:
-                    _LOGGER.warning("Failed to load wake word %s: %s", config_path, e)
-        return available_wake_words
-    def _load_preferences(self, preferences_path: Path) -> Preferences:
-        """Load user preferences."""
-        if preferences_path.exists():
-            try:
-                with open(preferences_path, encoding="utf-8") as f:
-                    data = json.load(f)
-                return Preferences(**data)
-            except Exception as e:
-                _LOGGER.warning("Failed to load preferences: %s", e)
-        return Preferences()
-    def _load_wake_models(
-        self,
-        available_wake_words: dict[str, AvailableWakeWord],
-        preferences: Preferences,
-    ):
-        """Load wake word models."""
-        wake_models: dict[str, MicroWakeWord | OpenWakeWord] = {}
-        active_wake_words: set[str] = set()
-        # Try to load preferred models
-        if preferences.active_wake_words:
-            for wake_word_id in preferences.active_wake_words:
-                wake_word = available_wake_words.get(wake_word_id)
-                if wake_word is None:
-                    _LOGGER.warning("Unknown wake word: %s", wake_word_id)
-                    continue
-                try:
-                    _LOGGER.debug("Loading wake model: %s", wake_word_id)
-                    loaded_model = wake_word.load()
-                    # Set id attribute on the model for later identification
-                    loaded_model.id = wake_word_id
-                    wake_models[wake_word_id] = loaded_model
-                    active_wake_words.add(wake_word_id)
-                except Exception as e:
-                    _LOGGER.warning("Failed to load wake model %s: %s", wake_word_id, e)
-        # Load default model if none loaded
-        if not wake_models:
-            wake_word = available_wake_words.get(self.wake_model)
-            if wake_word:
-                try:
-                    _LOGGER.debug("Loading default wake model: %s", self.wake_model)
-                    loaded_model = wake_word.load()
-                    # Set id attribute on the model for later identification
-                    loaded_model.id = self.wake_model
-                    wake_models[self.wake_model] = loaded_model
-                    active_wake_words.add(self.wake_model)
-                except Exception as e:
-                    _LOGGER.error("Failed to load default wake model: %s", e)
-        return wake_models, active_wake_words
-    def _load_stop_model(self):
-        """Load the stop word model."""
-        from pymicro_wakeword import MicroWakeWord
-        stop_config = _WAKEWORDS_DIR / "stop.json"
-        if stop_config.exists():
-            try:
-                model = MicroWakeWord.from_config(stop_config)
-                # Don't override the model ID - use the one from config
-                _LOGGER.info("Loaded stop model with ID: %s, config: %s", model.id, stop_config)
-                return model
-            except Exception as e:
-                _LOGGER.error("Failed to load stop model from %s: %s", stop_config, e)
-                import traceback
-                traceback.print_exc()
-        # Stop model not available - disable stop functionality
-        _LOGGER.error("Stop model not available at %s - stop functionality will be disabled", stop_config)
-        return None
-    def _process_audio(self) -> None:
-        """Process audio from Reachy Mini's microphone."""
-        from pymicro_wakeword import MicroWakeWordFeatures
-        ctx = AudioProcessingContext()
-        ctx.micro_features = MicroWakeWordFeatures()
-        try:
-            _LOGGER.info("Starting audio processing using Reachy Mini's microphone...")
-            self._audio_loop_reachy(ctx)
-        except Exception:
-            _LOGGER.exception("Error processing audio")
-    def _audio_loop_reachy(self, ctx: AudioProcessingContext) -> None:
-        """Audio loop using Reachy Mini's microphone.
-        This loop checks the robot connection state before attempting to
-        read audio. When the robot is disconnected (e.g., sleep mode),
-        the loop waits for reconnection without generating errors.
-        """
-        consecutive_audio_errors = 0
-        max_consecutive_errors = 3  # Pause after 3 consecutive errors
-        while self._running:
-            try:
-                # Check if robot services are paused (sleep mode / disconnected / muted)
-                if self._robot_services_paused.is_set():
-                    # Wait for resume signal (event-driven, wakes immediately on resume)
-                    consecutive_audio_errors = 0  # Reset on pause
-                    self._robot_services_resumed.wait(timeout=1.0)
-                    continue
-                if not self._wait_for_satellite():
-                    continue
-                # Update wake words list
-                self._update_wake_words_list(ctx)
-                # Get audio from Reachy Mini
-                audio_chunk = self._get_reachy_audio_chunk()
-                if audio_chunk is None:
-                    idle_sleep = (
-                        Config.audio.idle_sleep_sleeping
-                        if self._robot_services_paused.is_set()
-                        else Config.audio.idle_sleep_active
-                    )
-                    time.sleep(idle_sleep)
-                    continue
-                # Audio successfully obtained, reset error counter
-                consecutive_audio_errors = 0
-                self._process_audio_chunk(ctx, audio_chunk)
-            except Exception as e:
-                error_msg = str(e)
-                # Check for audio processing errors that indicate sleep mode
-                if "can only convert" in error_msg or "scalar" in error_msg:
-                    consecutive_audio_errors += 1
-                    if consecutive_audio_errors >= max_consecutive_errors:
-                        if not self._robot_services_paused.is_set():
-                            _LOGGER.warning("Audio errors indicate robot may be asleep - pausing audio processing")
-                            self._robot_services_paused.set()
-                            self._robot_services_resumed.clear()
-                            # Clear audio buffer
-                            self._audio_buffer.clear()
-                    # Wait for resume signal instead of polling
-                    self._robot_services_resumed.wait(timeout=0.5)
-                    continue
-                # Check if this is a connection error
-                if "Lost connection" in error_msg:
-                    # Don't log - the state monitor will handle this
-                    if not self._robot_services_paused.is_set():
-                        _LOGGER.debug("Connection error detected, waiting for state monitor")
-                    # Wait for resume signal instead of polling
-                    self._robot_services_resumed.wait(timeout=1.0)
-                else:
-                    # Log unexpected errors (but limit frequency)
-                    consecutive_audio_errors += 1
-                    if consecutive_audio_errors <= 3:
-                        _LOGGER.error("Error in Reachy audio processing: %s", e)
-                    time.sleep(Config.audio.idle_sleep_sleeping)
-    def _wait_for_satellite(self) -> bool:
-        """Wait for satellite connection. Returns True if connected."""
-        if self._state is None or self._state.satellite is None:
-            time.sleep(0.1)
-            return False
-        return True
-    def _update_wake_words_list(self, ctx: AudioProcessingContext) -> None:
-        """Update wake words list if changed."""
-        from pymicro_wakeword import MicroWakeWordFeatures
-        from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
-        if (not ctx.wake_words) or (self._state.wake_words_changed and self._state.wake_words):
-            self._state.wake_words_changed = False
-            ctx.wake_words.clear()
-            # Reset feature extractors to clear any residual audio data
-            # This prevents false triggers when switching wake words
-            ctx.micro_features = MicroWakeWordFeatures()
-            ctx.micro_inputs.clear()
-            if ctx.oww_features is not None:
-                ctx.oww_features = OpenWakeWordFeatures.from_builtin()
-            ctx.oww_inputs.clear()
-            # Also reset the refractory period to prevent immediate trigger
-            ctx.last_active = time.monotonic()
-            # state.wake_words is Dict[str, MicroWakeWord/OpenWakeWord]
-            # We need to filter by active_wake_words (which contains the IDs/keys)
-            for ww_id, ww_model in self._state.wake_words.items():
-                if ww_id in self._state.active_wake_words:
-                    # Ensure the model has an 'id' attribute for later use
-                    if not hasattr(ww_model, "id"):
-                        ww_model.id = ww_id
-                    ctx.wake_words.append(ww_model)
-            ctx.has_oww = any(isinstance(ww, OpenWakeWord) for ww in ctx.wake_words)
-            if ctx.has_oww and ctx.oww_features is None:
-                ctx.oww_features = OpenWakeWordFeatures.from_builtin()
-            _LOGGER.info("Active wake words updated: %s (features reset)", list(self._state.active_wake_words))
-    def _get_reachy_audio_chunk(self) -> bytes | None:
-        """Get fixed-size audio chunk from Reachy Mini's microphone.
-        Returns exactly AUDIO_BLOCK_SIZE samples each time, buffering
-        internally to ensure consistent chunk sizes for streaming.
-        Returns:
-            PCM audio bytes of fixed size, or None if not enough data.
-        """
-        # Check if services are paused (e.g., during sleep/disconnect)
-        if self._robot_services_paused.is_set():
-            return None
-        # Get new audio data from SDK
-        audio_data = self.reachy_mini.media.get_audio_sample()
-        # Debug: Log SDK audio data statistics and sample rate (once at startup)
-        if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
-            if not hasattr(self, "_audio_sample_rate_logged"):
-                self._audio_sample_rate_logged = True
-                try:
-                    input_rate = self.reachy_mini.media.get_input_audio_samplerate()
-                    _LOGGER.info(
-                        "Audio input: sample_rate=%d Hz, shape=%s, dtype=%s (expected 16000 Hz)",
-                        input_rate,
-                        audio_data.shape,
-                        audio_data.dtype,
-                    )
-                    if input_rate != 16000:
-                        _LOGGER.warning(
-                            "Audio sample rate mismatch! Got %d Hz, expected 16000 Hz. "
-                            "STT may be slow or inaccurate. Consider resampling.",
-                            input_rate,
-                        )
-                except Exception as e:
-                    _LOGGER.warning("Could not get audio sample rate: %s", e)
-        # Append new data to buffer if valid
-        if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
-            try:
-                if audio_data.dtype.kind not in ("S", "U", "O", "V", "b"):
-                    # Convert to float32 only if needed (SDK already returns float32)
-                    if audio_data.dtype != np.float32:
-                        audio_data = audio_data.astype(np.float32, copy=False)
-                    # Clean NaN/Inf values early to prevent downstream errors
-                    audio_data = np.nan_to_num(audio_data, nan=0.0, posinf=1.0, neginf=-1.0)
-                    # Convert stereo to mono (use first channel for better quality)
-                    if audio_data.ndim == 2 and audio_data.shape[1] >= 2:
-                        # Use first channel instead of mean - cleaner signal
-                        # Remove .copy() to avoid unnecessary array duplication
-                        audio_data = audio_data[:, 0]
-                    elif audio_data.ndim == 2:
-                        # Remove .copy() to avoid unnecessary array duplication
-                        audio_data = audio_data[:, 0]
-                    # Resample if needed (SDK may return non-16kHz audio)
-                    if audio_data.ndim == 1:
-                        # Initialize sample rate once (not every chunk)
-                        if not hasattr(self, "_input_sample_rate_fixed"):
-                            try:
-                                self._input_sample_rate = self.reachy_mini.media.get_input_audio_samplerate()
-                                if self._input_sample_rate != 16000:
-                                    _LOGGER.warning(
-                                        f"Sample rate {self._input_sample_rate} != 16000 Hz. "
-                                        "Performance may be degraded. "
-                                        "Consider forcing 16kHz in hardware config."
-                                    )
-                            except Exception:
-                                self._input_sample_rate = 16000
-                            self._input_sample_rate_fixed = True  # Mark as fixed
-                        # Resample to 16kHz if needed
-                        if self._input_sample_rate != 16000 and self._input_sample_rate > 0:
-                            from scipy.signal import resample
-                            new_length = int(len(audio_data) * 16000 / self._input_sample_rate)
-                            if new_length > 0:
-                                audio_data = resample(audio_data, new_length)
-                                audio_data = np.nan_to_num(
-                                    audio_data,
-                                    nan=0.0,
-                                    posinf=1.0,
-                                    neginf=-1.0,
-                                ).astype(np.float32, copy=False)
-                        # Extend deque (deque automatically handles overflow with maxlen)
-                        # This avoids creating new arrays like np.concatenate does
-                        self._audio_buffer.extend(audio_data)
-            except (TypeError, ValueError):
-                pass
-        # Return fixed-size chunk if we have enough data
-        if len(self._audio_buffer) >= AUDIO_BLOCK_SIZE:
-            # Extract chunk and remove from buffer
-            chunk = [self._audio_buffer.popleft() for _ in range(AUDIO_BLOCK_SIZE)]
-            # Convert to PCM bytes (16-bit signed, little-endian)
-            chunk_array = np.array(chunk, dtype=np.float32)
-            pcm_bytes = (np.clip(chunk_array, -1.0, 1.0) * 32767.0).astype("<i2").tobytes()
-            return pcm_bytes
-        return None
-    def _convert_to_pcm(self, audio_chunk_array: np.ndarray) -> bytes:
-        """Convert float32 audio array to 16-bit PCM bytes."""
-        # Replace NaN/Inf with 0 to avoid casting errors
-        audio_clean = np.nan_to_num(audio_chunk_array, nan=0.0, posinf=1.0, neginf=-1.0)
-        return (np.clip(audio_clean, -1.0, 1.0) * 32767.0).astype("<i2").tobytes()
-    def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
-        """Process an audio chunk for wake word detection.
-        Following reference project pattern: always process wake words.
-        Refractory period prevents duplicate triggers.
-        Args:
-            ctx: Audio processing context
-            audio_chunk: PCM audio bytes
-        """
-        # Stream audio to Home Assistant only after wake (privacy: no pre-wake upload)
-        if self._state.satellite.is_streaming_audio:
-            self._state.satellite.handle_audio(audio_chunk)
-        # Process wake word features
-        self._process_features(ctx, audio_chunk)
-        # Detect wake words
-        self._detect_wake_words(ctx)
-        # Detect stop word
-        self._detect_stop_word(ctx)
-    def _process_features(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
-        """Process audio features for wake word detection."""
-        ctx.micro_inputs.clear()
-        ctx.micro_inputs.extend(ctx.micro_features.process_streaming(audio_chunk))
-        if ctx.has_oww and ctx.oww_features is not None:
-            ctx.oww_inputs.clear()
-            ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
-    def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
-        """Detect wake words in the processed audio features.
-        Uses refractory period to prevent duplicate triggers.
-        Following reference project pattern.
-        """
-        from pymicro_wakeword import MicroWakeWord
-        from pyopen_wakeword import OpenWakeWord
-        for wake_word in ctx.wake_words:
-            activated = False
-            if isinstance(wake_word, MicroWakeWord):
-                for micro_input in ctx.micro_inputs:
-                    if wake_word.process_streaming(micro_input):
-                        activated = True
-            elif isinstance(wake_word, OpenWakeWord):
-                for oww_input in ctx.oww_inputs:
-                    for prob in wake_word.process_streaming(oww_input):
-                        if prob > 0.5:
-                            activated = True
-            if activated:
-                # Check refractory period to prevent duplicate triggers
-                now = time.monotonic()
-                if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
-                    _LOGGER.info("Wake word detected: %s", wake_word.id)
-                    self._state.satellite.wakeup(wake_word)
-                    # Face tracking will handle looking at user automatically
-                    self._motion.on_wakeup()
-                    ctx.last_active = now
-    def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
-        """Detect stop word in the processed audio features."""
-        if not self._state.stop_word:
-            _LOGGER.warning("Stop word model not loaded")
-            return
-        stopped = False
-        for micro_input in ctx.micro_inputs:
-            if self._state.stop_word.process_streaming(micro_input):
-                stopped = True
-                break  # Stop at first detection
-        stop_armed = self._state.stop_word.id in self._state.active_wake_words
-        if stopped and stop_armed and (not self._state.is_muted):
-            _LOGGER.info("Stop word detected - stopping playback")
-            self._state.satellite.stop()

+"""
+Voice Assistant Service for Reachy Mini.
+This module provides the main voice assistant service that integrates
+with Home Assistant via ESPHome protocol.
+"""
+import asyncio
+import json
+import logging
+import threading
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from queue import Queue
+from typing import Dict, List, Optional, Set, Union
+import numpy as np
+from reachy_mini import ReachyMini
+from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType
+from .audio_player import AudioPlayer
+from .satellite import VoiceSatelliteProtocol
+from .util import get_mac
+from .zeroconf import HomeAssistantZeroconf
+from .motion import ReachyMiniMotion
+from .camera_server import MJPEGCameraServer
+_LOGGER = logging.getLogger(__name__)
+_MODULE_DIR = Path(__file__).parent
+_WAKEWORDS_DIR = _MODULE_DIR / "wakewords"
+_SOUNDS_DIR = _MODULE_DIR / "sounds"
+_LOCAL_DIR = _MODULE_DIR.parent / "local"
+@dataclass
+class AudioProcessingContext:
+    """Context for audio processing, holding mutable state."""
+    wake_words: List = field(default_factory=list)
+    micro_features: Optional[object] = None
+    micro_inputs: List = field(default_factory=list)
+    oww_features: Optional[object] = None
+    oww_inputs: List = field(default_factory=list)
+    has_oww: bool = False
+    last_active: Optional[float] = None
+# Audio chunk size for consistent streaming (matches reference project)
+AUDIO_BLOCK_SIZE = 1024  # samples at 16kHz = 64ms
+class VoiceAssistantService:
+    """Voice assistant service that runs ESPHome protocol server."""
+    def __init__(
+        self,
+        reachy_mini: Optional[ReachyMini] = None,
+        name: str = "Reachy Mini",
+        host: str = "0.0.0.0",
+        port: int = 6053,
+        wake_model: str = "okay_nabu",
+        camera_port: int = 8081,
+        camera_enabled: bool = True,
+    ):
+        self.reachy_mini = reachy_mini
+        self.name = name
+        self.host = host
+        self.port = port
+        self.wake_model = wake_model
+        self.camera_port = camera_port
+        self.camera_enabled = camera_enabled
+        self._server = None
+        self._discovery = None
+        self._audio_thread = None
+        self._running = False
+        self._state: Optional[ServerState] = None
+        self._motion = ReachyMiniMotion(reachy_mini)
+        self._camera_server: Optional[MJPEGCameraServer] = None
+        # Audio buffer for fixed-size chunk output
+        self._audio_buffer: np.ndarray = np.array([], dtype=np.float32)
+    async def start(self) -> None:
+        """Start the voice assistant service."""
+        _LOGGER.info("Initializing voice assistant service...")
+        # Ensure directories exist
+        _WAKEWORDS_DIR.mkdir(parents=True, exist_ok=True)
+        _SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
+        _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
+        # Verify required files (bundled with package)
+        await self._verify_required_files()
+        # Load wake words
+        available_wake_words = self._load_available_wake_words()
+        _LOGGER.debug("Available wake words: %s", list(available_wake_words.keys()))
+        # Load preferences
+        preferences_path = _LOCAL_DIR / "preferences.json"
+        preferences = self._load_preferences(preferences_path)
+        # Load wake word models
+        wake_models, active_wake_words = self._load_wake_models(
+            available_wake_words, preferences
+        )
+        # Load stop model
+        stop_model = self._load_stop_model()
+        # Create audio players with Reachy Mini reference
+        music_player = AudioPlayer(self.reachy_mini)
+        tts_player = AudioPlayer(self.reachy_mini)
+        # Create server state
+        self._state = ServerState(
+            name=self.name,
+            mac_address=get_mac(),
+            audio_queue=Queue(),
+            entities=[],
+            available_wake_words=available_wake_words,
+            wake_words=wake_models,
+            active_wake_words=active_wake_words,
+            stop_word=stop_model,
+            music_player=music_player,
+            tts_player=tts_player,
+            wakeup_sound=str(_SOUNDS_DIR / "wake_word_triggered.flac"),
+            timer_finished_sound=str(_SOUNDS_DIR / "timer_finished.flac"),
+            preferences=preferences,
+            preferences_path=preferences_path,
+            refractory_seconds=2.0,
+            download_dir=_LOCAL_DIR,
+            reachy_mini=self.reachy_mini,
+            motion_enabled=self.reachy_mini is not None,
+        )
+        # Set motion controller reference in state
+        self._state.motion = self._motion
+        # Start Reachy Mini media system if available
+        if self.reachy_mini is not None:
+            try:
+                # Check if media system is already running to avoid conflicts
+                media = self.reachy_mini.media
+                if media.audio is not None:
+                    # Check recording state
+                    is_recording = getattr(media, '_recording', False)
+                    if not is_recording:
+                        media.start_recording()
+                        _LOGGER.info("Started Reachy Mini recording")
+                    else:
+                        _LOGGER.debug("Reachy Mini recording already active")
+                    # Check playback state
+                    is_playing = getattr(media, '_playing', False)
+                    if not is_playing:
+                        media.start_playing()
+                        _LOGGER.info("Started Reachy Mini playback")
+                    else:
+                        _LOGGER.debug("Reachy Mini playback already active")
+                    _LOGGER.info("Reachy Mini media system initialized")
+                    # Optimize microphone settings for voice recognition
+                    self._optimize_microphone_settings()
+                else:
+                    _LOGGER.warning("Reachy Mini audio system not available")
+            except Exception as e:
+                _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
+        # Start motion controller (5Hz control loop)
+        if self._motion is not None:
+            self._motion.start()
+        # Start audio processing thread (non-daemon for proper cleanup)
+        self._running = True
+        self._audio_thread = threading.Thread(
+            target=self._process_audio,
+            daemon=False,
+        )
+        self._audio_thread.start()
+        # Start camera server if enabled (must be before ESPHome server)
+        if self.camera_enabled:
+            self._camera_server = MJPEGCameraServer(
+                reachy_mini=self.reachy_mini,
+                host=self.host,
+                port=self.camera_port,
+                fps=15,
+                quality=80,
+                enable_face_tracking=True,
+            )
+            await self._camera_server.start()
+            # Connect camera server to motion controller for face tracking
+            if self._motion is not None:
+                self._motion.set_camera_server(self._camera_server)
+        # Create ESPHome server (pass camera_server for camera entity)
+        loop = asyncio.get_running_loop()
+        camera_server = self._camera_server  # Capture for lambda
+        self._server = await loop.create_server(
+            lambda: VoiceSatelliteProtocol(self._state, camera_server=camera_server),
+            host=self.host,
+            port=self.port,
+        )
+        # Start mDNS discovery
+        self._discovery = HomeAssistantZeroconf(port=self.port, name=self.name)
+        await self._discovery.register_server()
+        # Start Sendspin auto-discovery (auto-enabled, no user config needed)
+        # Sendspin is for music playback, so connect to music_player
+        await music_player.start_sendspin_discovery()
+        _LOGGER.info("Voice assistant service started on %s:%s", self.host, self.port)
+    def _optimize_microphone_settings(self) -> None:
+        """Optimize ReSpeaker XVF3800 microphone settings for voice recognition.
+        This method configures the XMOS XVF3800 audio processor for optimal
+        voice command recognition at distances up to 2-3 meters.
+        If user has previously set values via Home Assistant, those values are
+        restored from preferences. Otherwise, default optimized values are used.
+        Key optimizations:
+        1. Enable AGC with higher max gain for distant speech
+        2. Reduce noise suppression to preserve quiet speech
+        3. Increase base microphone gain
+        4. Optimize AGC response times for voice commands
+        Reference: reachy_mini/src/reachy_mini/media/audio_control_utils.py
+        XMOS docs: https://www.xmos.com/documentation/XM-014888-PC/
+        """
+        if self.reachy_mini is None:
+            return
+        try:
+            # Access ReSpeaker through the media audio system
+            audio = self.reachy_mini.media.audio
+            if audio is None or not hasattr(audio, '_respeaker'):
+                _LOGGER.debug("ReSpeaker not available for optimization")
+                return
+            respeaker = audio._respeaker
+            if respeaker is None:
+                _LOGGER.debug("ReSpeaker device not found")
+                return
+            # Get saved preferences (if any)
+            prefs = self._state.preferences if self._state else None
+            # ========== 1. AGC (Automatic Gain Control) Settings ==========
+            # Use saved value if available, otherwise use default (enabled)
+            agc_enabled = prefs.agc_enabled if (prefs and prefs.agc_enabled is not None) else True
+            try:
+                respeaker.write("PP_AGCONOFF", [1 if agc_enabled else 0])
+                _LOGGER.info("AGC %s (PP_AGCONOFF=%d)%s",
+                             "enabled" if agc_enabled else "disabled",
+                             1 if agc_enabled else 0,
+                             " [from preferences]" if (prefs and prefs.agc_enabled is not None) else " [default]")
+            except Exception as e:
+                _LOGGER.debug("Could not set AGC: %s", e)
+            # Use saved value if available, otherwise use default (30dB)
+            agc_max_gain = prefs.agc_max_gain if (prefs and prefs.agc_max_gain is not None) else 30.0
+            try:
+                respeaker.write("PP_AGCMAXGAIN", [agc_max_gain])
+                _LOGGER.info("AGC max gain set (PP_AGCMAXGAIN=%.1fdB)%s",
+                             agc_max_gain,
+                             " [from preferences]" if (prefs and prefs.agc_max_gain is not None) else " [default]")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_AGCMAXGAIN: %s", e)
+            # Set AGC desired output level (target level after gain)
+            # More negative = quieter output, less negative = louder
+            # Default is around -25dB, set to -18dB for stronger output
+            try:
+                respeaker.write("PP_AGCDESIREDLEVEL", [-18.0])
+                _LOGGER.debug("AGC desired level set (PP_AGCDESIREDLEVEL=-18.0dB)")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_AGCDESIREDLEVEL: %s", e)
+            # Optimize AGC time constants for voice commands
+            # Faster attack time helps capture sudden speech onset
+            try:
+                respeaker.write("PP_AGCTIME", [0.5])  # Main time constant (seconds)
+                _LOGGER.debug("AGC time constant set (PP_AGCTIME=0.5s)")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_AGCTIME: %s", e)
+            # ========== 2. Base Microphone Gain ==========
+            # Increase base microphone gain for better sensitivity
+            # Default is 1.0, increase to 2.0 for distant speech
+            # Range: 0.0-4.0 (float, linear gain multiplier)
+            try:
+                respeaker.write("AUDIO_MGR_MIC_GAIN", [2.0])
+                _LOGGER.info("Microphone gain increased (AUDIO_MGR_MIC_GAIN=2.0)")
+            except Exception as e:
+                _LOGGER.debug("Could not set AUDIO_MGR_MIC_GAIN: %s", e)
+            # ========== 3. Noise Suppression Settings ==========
+            # Use saved value if available, otherwise use default (15%)
+            # PP_MIN_NS: minimum noise suppression threshold
+            # Higher values = less aggressive suppression = better voice pickup
+            # PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% max suppression
+            # UI shows "noise suppression strength" so 15% = PP_MIN_NS of 0.85
+            noise_suppression = prefs.noise_suppression if (prefs and prefs.noise_suppression is not None) else 15.0
+            pp_min_ns = 1.0 - (noise_suppression / 100.0)  # Convert percentage to PP_MIN_NS value
+            try:
+                respeaker.write("PP_MIN_NS", [pp_min_ns])
+                _LOGGER.info("Noise suppression set to %.0f%% strength (PP_MIN_NS=%.2f)%s",
+                             noise_suppression, pp_min_ns,
+                             " [from preferences]" if (prefs and prefs.noise_suppression is not None) else " [default]")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_MIN_NS: %s", e)
+            # PP_MIN_NN: minimum noise floor estimation
+            # Higher values = less aggressive noise floor tracking
+            try:
+                respeaker.write("PP_MIN_NN", [pp_min_ns])  # Match PP_MIN_NS
+                _LOGGER.debug("Noise floor threshold set (PP_MIN_NN=%.2f)", pp_min_ns)
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_MIN_NN: %s", e)
+            # ========== 4. Echo Cancellation Settings ==========
+            # Ensure echo cancellation is enabled (important for TTS playback)
+            try:
+                respeaker.write("PP_ECHOONOFF", [1])
+                _LOGGER.debug("Echo cancellation enabled (PP_ECHOONOFF=1)")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_ECHOONOFF: %s", e)
+            # ========== 5. High-pass filter (remove low frequency noise) ==========
+            try:
+                respeaker.write("AEC_HPFONOFF", [1])
+                _LOGGER.debug("High-pass filter enabled (AEC_HPFONOFF=1)")
+            except Exception as e:
+                _LOGGER.debug("Could not set AEC_HPFONOFF: %s", e)
+            _LOGGER.info("Microphone settings initialized (AGC=%s, MaxGain=%.0fdB, NoiseSuppression=%.0f%%)",
+                         "ON" if agc_enabled else "OFF", agc_max_gain, noise_suppression)
+        except Exception as e:
+            _LOGGER.warning("Failed to optimize microphone settings: %s", e)
+    async def stop(self) -> None:
+        """Stop the voice assistant service."""
+        _LOGGER.info("Stopping voice assistant service...")
+        # 1. First stop audio recording to prevent new data from coming in
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.stop_recording()
+                _LOGGER.debug("Reachy Mini recording stopped")
+            except Exception as e:
+                _LOGGER.warning("Error stopping Reachy Mini recording: %s", e)
+        # 2. Set stop flag
+        self._running = False
+        # 3. Wait for audio thread to finish
+        if self._audio_thread:
+            self._audio_thread.join(timeout=1.0)
+            if self._audio_thread.is_alive():
+                _LOGGER.warning("Audio thread did not stop in time")
+        # 4. Stop playback
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.stop_playing()
+                _LOGGER.debug("Reachy Mini playback stopped")
+            except Exception as e:
+                _LOGGER.warning("Error stopping Reachy Mini playback: %s", e)
+        # 5. Stop ESPHome server
+        if self._server:
+            self._server.close()
+            await self._server.wait_closed()
+        # 6. Unregister mDNS
+        if self._discovery:
+            await self._discovery.unregister_server()
+        # 6.5. Stop Sendspin
+        if self._state and self._state.music_player:
+            await self._state.music_player.stop_sendspin()
+        # 7. Stop camera server
+        if self._camera_server:
+            await self._camera_server.stop()
+            self._camera_server = None
+        # 8. Shutdown motion executor
+        if self._motion:
+            self._motion.shutdown()
+        _LOGGER.info("Voice assistant service stopped.")
+    async def _verify_required_files(self) -> None:
+        """Verify required model and sound files exist (bundled with package)."""
+        # Required wake word files (bundled in wakewords/ directory)
+        required_wakewords = [
+            "okay_nabu.tflite",
+            "okay_nabu.json",
+            "hey_jarvis.tflite",
+            "hey_jarvis.json",
+            "stop.tflite",
+            "stop.json",
+        ]
+        # Required sound files (bundled in sounds/ directory)
+        required_sounds = [
+            "wake_word_triggered.flac",
+            "timer_finished.flac",
+        ]
+        # Verify wake word files
+        missing_wakewords = []
+        for filename in required_wakewords:
+            filepath = _WAKEWORDS_DIR / filename
+            if not filepath.exists():
+                missing_wakewords.append(filename)
+        if missing_wakewords:
+            _LOGGER.warning(
+                "Missing wake word files: %s. These should be bundled with the package.",
+                missing_wakewords
+            )
+        # Verify sound files
+        missing_sounds = []
+        for filename in required_sounds:
+            filepath = _SOUNDS_DIR / filename
+            if not filepath.exists():
+                missing_sounds.append(filename)
+        if missing_sounds:
+            _LOGGER.warning(
+                "Missing sound files: %s. These should be bundled with the package.",
+                missing_sounds
+            )
+        if not missing_wakewords and not missing_sounds:
+            _LOGGER.info("All required files verified successfully.")
+    def _load_available_wake_words(self) -> Dict[str, AvailableWakeWord]:
+        """Load available wake word configurations."""
+        available_wake_words: Dict[str, AvailableWakeWord] = {}
+        # Load order: OpenWakeWord first, then MicroWakeWord, then external
+        # Later entries override earlier ones, so MicroWakeWord takes priority
+        wake_word_dirs = [
+            _WAKEWORDS_DIR / "openWakeWord",  # OpenWakeWord (lowest priority)
+            _LOCAL_DIR / "external_wake_words",  # External wake words
+            _WAKEWORDS_DIR,  # MicroWakeWord (highest priority)
+        ]
+        for wake_word_dir in wake_word_dirs:
+            if not wake_word_dir.exists():
+                continue
+            for config_path in wake_word_dir.glob("*.json"):
+                model_id = config_path.stem
+                if model_id == "stop":
+                    continue
+                try:
+                    with open(config_path, "r", encoding="utf-8") as f:
+                        config = json.load(f)
+                    model_type = WakeWordType(config.get("type", "micro"))
+                    if model_type == WakeWordType.OPEN_WAKE_WORD:
+                        wake_word_path = config_path.parent / config["model"]
+                    else:
+                        wake_word_path = config_path
+                    available_wake_words[model_id] = AvailableWakeWord(
+                        id=model_id,
+                        type=model_type,
+                        wake_word=config.get("wake_word", model_id),
+                        trained_languages=config.get("trained_languages", []),
+                        wake_word_path=wake_word_path,
+                    )
+                except Exception as e:
+                    _LOGGER.warning("Failed to load wake word %s: %s", config_path, e)
+        return available_wake_words
+    def _load_preferences(self, preferences_path: Path) -> Preferences:
+        """Load user preferences."""
+        if preferences_path.exists():
+            try:
+                with open(preferences_path, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                return Preferences(**data)
+            except Exception as e:
+                _LOGGER.warning("Failed to load preferences: %s", e)
+        return Preferences()
+    def _load_wake_models(
+        self,
+        available_wake_words: Dict[str, AvailableWakeWord],
+        preferences: Preferences,
+    ):
+        """Load wake word models."""
+        from pymicro_wakeword import MicroWakeWord
+        from pyopen_wakeword import OpenWakeWord
+        wake_models: Dict[str, Union[MicroWakeWord, OpenWakeWord]] = {}
+        active_wake_words: Set[str] = set()
+        # Try to load preferred models
+        if preferences.active_wake_words:
+            for wake_word_id in preferences.active_wake_words:
+                wake_word = available_wake_words.get(wake_word_id)
+                if wake_word is None:
+                    _LOGGER.warning("Unknown wake word: %s", wake_word_id)
+                    continue
+                try:
+                    _LOGGER.debug("Loading wake model: %s", wake_word_id)
+                    loaded_model = wake_word.load()
+                    # Set id attribute on the model for later identification
+                    setattr(loaded_model, 'id', wake_word_id)
+                    wake_models[wake_word_id] = loaded_model
+                    active_wake_words.add(wake_word_id)
+                except Exception as e:
+                    _LOGGER.warning("Failed to load wake model %s: %s", wake_word_id, e)
+        # Load default model if none loaded
+        if not wake_models:
+            wake_word = available_wake_words.get(self.wake_model)
+            if wake_word:
+                try:
+                    _LOGGER.debug("Loading default wake model: %s", self.wake_model)
+                    loaded_model = wake_word.load()
+                    # Set id attribute on the model for later identification
+                    setattr(loaded_model, 'id', self.wake_model)
+                    wake_models[self.wake_model] = loaded_model
+                    active_wake_words.add(self.wake_model)
+                except Exception as e:
+                    _LOGGER.error("Failed to load default wake model: %s", e)
+        return wake_models, active_wake_words
+    def _load_stop_model(self):
+        """Load the stop word model."""
+        from pymicro_wakeword import MicroWakeWord
+        stop_config = _WAKEWORDS_DIR / "stop.json"
+        if stop_config.exists():
+            try:
+                model = MicroWakeWord.from_config(stop_config)
+                setattr(model, 'id', 'stop')
+                return model
+            except Exception as e:
+                _LOGGER.warning("Failed to load stop model: %s", e)
+        # Return a dummy model if stop model not available
+        _LOGGER.warning("Stop model not available, using fallback")
+        okay_nabu_config = _WAKEWORDS_DIR / "okay_nabu.json"
+        if okay_nabu_config.exists():
+            model = MicroWakeWord.from_config(okay_nabu_config)
+            setattr(model, 'id', 'stop')
+            return model
+        return None
+    def _process_audio(self) -> None:
+        """Process audio from microphone (Reachy Mini or system fallback)."""
+        from pymicro_wakeword import MicroWakeWordFeatures
+        ctx = AudioProcessingContext()
+        ctx.micro_features = MicroWakeWordFeatures()
+        try:
+            _LOGGER.info("Starting audio processing...")
+            if self.reachy_mini is not None:
+                _LOGGER.info("Using Reachy Mini's microphone")
+                self._audio_loop_reachy(ctx)
+            else:
+                _LOGGER.info("Using system microphone (fallback)")
+                self._audio_loop_fallback(ctx)
+        except Exception:
+            _LOGGER.exception("Error processing audio")
+    def _audio_loop_reachy(self, ctx: AudioProcessingContext) -> None:
+        """Audio loop using Reachy Mini's microphone."""
+        while self._running:
+            try:
+                if not self._wait_for_satellite():
+                    continue
+                self._update_wake_words_list(ctx)
+                # Get audio from Reachy Mini
+                audio_chunk = self._get_reachy_audio_chunk()
+                if audio_chunk is None:
+                    time.sleep(0.01)
+                    continue
+                self._process_audio_chunk(ctx, audio_chunk)
+            except Exception as e:
+                _LOGGER.error("Error in Reachy audio processing: %s", e)
+                time.sleep(0.1)
+    def _audio_loop_fallback(self, ctx: AudioProcessingContext) -> None:
+        """Audio loop using system microphone (fallback)."""
+        import sounddevice as sd
+        block_size = 1024
+        with sd.InputStream(
+            samplerate=16000,
+            channels=1,
+            blocksize=block_size,
+            dtype="float32",
+        ) as stream:
+            while self._running:
+                if not self._wait_for_satellite():
+                    continue
+                self._update_wake_words_list(ctx)
+                # Get audio from system microphone
+                audio_chunk_array, overflowed = stream.read(block_size)
+                if overflowed:
+                    _LOGGER.warning("Audio buffer overflow")
+                audio_chunk_array = audio_chunk_array.reshape(-1)
+                audio_chunk = self._convert_to_pcm(audio_chunk_array)
+                self._process_audio_chunk(ctx, audio_chunk)
+    def _wait_for_satellite(self) -> bool:
+        """Wait for satellite connection. Returns True if connected."""
+        if self._state is None or self._state.satellite is None:
+            time.sleep(0.1)
+            return False
+        return True
+    def _update_wake_words_list(self, ctx: AudioProcessingContext) -> None:
+        """Update wake words list if changed."""
+        from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
+        from pymicro_wakeword import MicroWakeWordFeatures
+        if (not ctx.wake_words) or (self._state.wake_words_changed and self._state.wake_words):
+            self._state.wake_words_changed = False
+            ctx.wake_words.clear()
+            # Reset feature extractors to clear any residual audio data
+            # This prevents false triggers when switching wake words
+            ctx.micro_features = MicroWakeWordFeatures()
+            ctx.micro_inputs.clear()
+            if ctx.oww_features is not None:
+                ctx.oww_features = OpenWakeWordFeatures.from_builtin()
+            ctx.oww_inputs.clear()
+            # Also reset the refractory period to prevent immediate trigger
+            ctx.last_active = time.monotonic()
+            # state.wake_words is Dict[str, MicroWakeWord/OpenWakeWord]
+            # We need to filter by active_wake_words (which contains the IDs/keys)
+            for ww_id, ww_model in self._state.wake_words.items():
+                if ww_id in self._state.active_wake_words:
+                    # Ensure the model has an 'id' attribute for later use
+                    if not hasattr(ww_model, 'id'):
+                        setattr(ww_model, 'id', ww_id)
+                    ctx.wake_words.append(ww_model)
+            ctx.has_oww = any(isinstance(ww, OpenWakeWord) for ww in ctx.wake_words)
+            if ctx.has_oww and ctx.oww_features is None:
+                ctx.oww_features = OpenWakeWordFeatures.from_builtin()
+            _LOGGER.info("Active wake words updated: %s (features reset)", list(self._state.active_wake_words))
+    def _get_reachy_audio_chunk(self) -> Optional[bytes]:
+        """Get fixed-size audio chunk from Reachy Mini's microphone.
+        Returns exactly AUDIO_BLOCK_SIZE samples each time, buffering
+        internally to ensure consistent chunk sizes for streaming.
+        Returns:
+            PCM audio bytes of fixed size, or None if not enough data.
+        """
+        # Get new audio data from SDK
+        audio_data = self.reachy_mini.media.get_audio_sample()
+        # Append new data to buffer if valid
+        if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
+            try:
+                if audio_data.dtype.kind not in ('S', 'U', 'O', 'V', 'b'):
+                    if audio_data.dtype != np.float32:
+                        audio_data = np.asarray(audio_data, dtype=np.float32)
+                    # Convert stereo to mono
+                    if audio_data.ndim == 2 and audio_data.shape[1] == 2:
+                        audio_data = audio_data.mean(axis=1)
+                    elif audio_data.ndim == 2:
+                        audio_data = audio_data[:, 0].copy()
+                    if audio_data.ndim == 1:
+                        self._audio_buffer = np.concatenate([self._audio_buffer, audio_data])
+            except (TypeError, ValueError):
+                pass
+        # Return fixed-size chunk if we have enough data
+        if len(self._audio_buffer) >= AUDIO_BLOCK_SIZE:
+            chunk = self._audio_buffer[:AUDIO_BLOCK_SIZE]
+            self._audio_buffer = self._audio_buffer[AUDIO_BLOCK_SIZE:]
+            return self._convert_to_pcm(chunk)
+        return None
+    def _convert_to_pcm(self, audio_chunk_array: np.ndarray) -> bytes:
+        """Convert float32 audio array to 16-bit PCM bytes."""
+        return (
+            (np.clip(audio_chunk_array, -1.0, 1.0) * 32767.0)
+            .astype("<i2")
+            .tobytes()
+        )
+    def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
+        """Process an audio chunk for wake word detection.
+        Following reference project pattern: always process wake words.
+        Refractory period prevents duplicate triggers.
+        Args:
+            ctx: Audio processing context
+            audio_chunk: PCM audio bytes
+        """
+        # Stream audio to Home Assistant
+        self._state.satellite.handle_audio(audio_chunk)
+        # Process wake word features
+        self._process_features(ctx, audio_chunk)
+        # Detect wake words
+        self._detect_wake_words(ctx)
+        # Detect stop word
+        self._detect_stop_word(ctx)
+    def _process_features(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
+        """Process audio features for wake word detection."""
+        ctx.micro_inputs.clear()
+        ctx.micro_inputs.extend(ctx.micro_features.process_streaming(audio_chunk))
+        if ctx.has_oww and ctx.oww_features is not None:
+            ctx.oww_inputs.clear()
+            ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
+    def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
+        """Detect wake words in the processed audio features.
+        Uses refractory period to prevent duplicate triggers.
+        Following reference project pattern.
+        """
+        from pymicro_wakeword import MicroWakeWord
+        from pyopen_wakeword import OpenWakeWord
+        for wake_word in ctx.wake_words:
+            activated = False
+            if isinstance(wake_word, MicroWakeWord):
+                for micro_input in ctx.micro_inputs:
+                    if wake_word.process_streaming(micro_input):
+                        activated = True
+            elif isinstance(wake_word, OpenWakeWord):
+                for oww_input in ctx.oww_inputs:
+                    for prob in wake_word.process_streaming(oww_input):
+                        if prob > 0.5:
+                            activated = True
+            if activated:
+                # Check refractory period to prevent duplicate triggers
+                now = time.monotonic()
+                if (ctx.last_active is None) or (
+                    (now - ctx.last_active) > self._state.refractory_seconds
+                ):
+                    _LOGGER.info("Wake word detected: %s", wake_word.id)
+                    self._state.satellite.wakeup(wake_word)
+                    # Face tracking will handle looking at user automatically
+                    self._motion.on_wakeup()
+                    ctx.last_active = now
+    def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
+        """Detect stop word in the processed audio features."""
+        if not self._state.stop_word:
+            return
+        stopped = False
+        for micro_input in ctx.micro_inputs:
+            if self._state.stop_word.process_streaming(micro_input):
+                stopped = True
+        if stopped and (self._state.stop_word.id in self._state.active_wake_words):
+            _LOGGER.info("Stop word detected")
+            self._state.satellite.stop()

{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/alexa.json RENAMED Viewed

File without changes