Desmond-Dong commited on
Commit
9949417
·
1 Parent(s): f3abae3

v0.2.2: Fix second conversation motion failure - Reduce control loop from 20Hz to 10Hz - Improve connection recovery (faster reconnect, 2s interval) - Add consecutive error tracking before marking connection lost - More robust error handling for transient connection issues

Browse files
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
- version = "0.2.1"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
 
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
+ version = "0.2.2"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
reachy_mini_ha_voice/motion.py CHANGED
@@ -1,7 +1,7 @@
1
  """Reachy Mini motion control integration.
2
 
3
  This module provides a high-level motion API that delegates to the
4
- MovementManager for unified 20Hz control.
5
  """
6
 
7
  import logging
@@ -17,7 +17,7 @@ class ReachyMiniMotion:
17
  """Reachy Mini motion controller for voice assistant.
18
 
19
  All public motion methods (on_*) are non-blocking. They send commands
20
- to the MovementManager which handles them in its 20Hz control loop.
21
  """
22
 
23
  def __init__(self, reachy_mini=None):
 
1
  """Reachy Mini motion control integration.
2
 
3
  This module provides a high-level motion API that delegates to the
4
+ MovementManager for unified 10Hz control.
5
  """
6
 
7
  import logging
 
17
  """Reachy Mini motion controller for voice assistant.
18
 
19
  All public motion methods (on_*) are non-blocking. They send commands
20
+ to the MovementManager which handles them in its 10Hz control loop.
21
  """
22
 
23
  def __init__(self, reachy_mini=None):
reachy_mini_ha_voice/movement_manager.py CHANGED
@@ -5,13 +5,14 @@ This module provides a centralized control system for robot movements,
5
  inspired by the reachy_mini_conversation_app architecture.
6
 
7
  Key features:
8
- - Single 20Hz control loop (reduced from 100Hz to prevent daemon crashes)
9
  - Command queue pattern (thread-safe external API)
10
  - Error throttling (prevents log explosion)
11
  - Speech-driven head sway
12
  - Breathing animation during idle
13
  - Graceful shutdown
14
  - Pose change detection (skip sending if no significant change)
 
15
  """
16
 
17
  import logging
@@ -36,7 +37,7 @@ logger = logging.getLogger(__name__)
36
  # Constants (borrowed from conversation_app)
37
  # =============================================================================
38
 
39
- CONTROL_LOOP_FREQUENCY_HZ = 20 # 20Hz control loop (reduced from 100Hz to prevent daemon crashes)
40
  TARGET_PERIOD = 1.0 / CONTROL_LOOP_FREQUENCY_HZ
41
 
42
  # Speech sway parameters (from conversation_app SwayRollRT)
@@ -289,12 +290,12 @@ class BreathingAnimation:
289
 
290
  class MovementManager:
291
  """
292
- Unified movement manager with 20Hz control loop.
293
 
294
  All external interactions go through the command queue,
295
  ensuring thread safety and preventing race conditions.
296
 
297
- Note: Frequency reduced from 100Hz to 20Hz to prevent daemon crashes
298
  caused by excessive Zenoh message traffic.
299
  """
300
 
@@ -332,9 +333,11 @@ class MovementManager:
332
  # Connection health tracking
333
  self._connection_lost = False
334
  self._last_successful_command = self._now()
335
- self._connection_timeout = 5.0 # 5 seconds without success = connection lost
336
- self._reconnect_attempt_interval = 10.0 # Try reconnecting every 10 seconds
337
  self._last_reconnect_attempt = 0.0
 
 
338
 
339
  # Pending action
340
  self._pending_action: Optional[PendingAction] = None
@@ -689,18 +692,16 @@ class MovementManager:
689
  # No significant change, skip sending command
690
  return
691
 
692
- # Check if connection is lost and we should skip sending commands
693
  now = self._now()
 
 
694
  if self._connection_lost:
695
- time_since_last_success = now - self._last_successful_command
696
-
697
- # Only attempt reconnection every N seconds
698
- if now - self._last_reconnect_attempt >= self._reconnect_attempt_interval:
699
- self._last_reconnect_attempt = now
700
- logger.info("Attempting to send command after connection loss...")
701
- else:
702
  # Skip sending commands to reduce error spam
703
  return
 
 
 
704
 
705
  try:
706
  # Build head pose matrix
@@ -726,37 +727,36 @@ class MovementManager:
726
  # Command succeeded - update connection health and cache
727
  self._last_successful_command = now
728
  self._last_sent_pose = pose.copy() # Cache sent pose
 
 
729
  if self._connection_lost:
730
  logger.info("✓ Connection to robot restored")
731
  self._connection_lost = False
732
- self._suppressed_errors = 0 # Reset error counter
733
 
734
  except Exception as e:
735
  error_msg = str(e)
 
736
 
737
  # Check if this is a connection error
738
- if "Lost connection" in error_msg or "ZError" in error_msg:
739
- time_since_last_success = now - self._last_successful_command
740
-
741
- if not self._connection_lost and time_since_last_success > self._connection_timeout:
742
  # First time detecting connection loss
743
- logger.error(f"✗ Lost connection to robot daemon: {error_msg}")
744
- logger.error(" Troubleshooting steps:")
745
- logger.error(" 1. Check if Reachy Mini Daemon is running: sudo systemctl status reachy-mini-daemon")
746
- logger.error(" 2. Verify Zenoh service on port 7447: netstat -tlnp | grep 7447")
747
- logger.error(" 3. Check robot hardware connections")
748
- logger.error(" 4. Review daemon logs: sudo journalctl -u reachy-mini-daemon -n 50")
749
- logger.error(f" Will retry connection every {self._reconnect_attempt_interval}s...")
750
- self._connection_lost = True
751
- self._last_reconnect_attempt = now
752
- elif self._connection_lost:
753
  # Already in lost state, use throttled logging
754
  self._log_error_throttled(f"Connection still lost: {error_msg}")
755
- else:
756
- # Transient error, not yet considered lost
757
- self._log_error_throttled(f"Failed to set robot target: {error_msg}")
758
  else:
759
- # Non-connection error
760
  self._log_error_throttled(f"Failed to set robot target: {error_msg}")
761
 
762
  def _log_error_throttled(self, message: str) -> None:
@@ -776,7 +776,7 @@ class MovementManager:
776
  # =========================================================================
777
 
778
  def _control_loop(self) -> None:
779
- """Main 20Hz control loop."""
780
  logger.info("Movement manager control loop started (%.0f Hz)", CONTROL_LOOP_FREQUENCY_HZ)
781
 
782
  last_time = self._now()
 
5
  inspired by the reachy_mini_conversation_app architecture.
6
 
7
  Key features:
8
+ - Single 10Hz control loop (reduced from 100Hz to prevent daemon crashes)
9
  - Command queue pattern (thread-safe external API)
10
  - Error throttling (prevents log explosion)
11
  - Speech-driven head sway
12
  - Breathing animation during idle
13
  - Graceful shutdown
14
  - Pose change detection (skip sending if no significant change)
15
+ - Robust connection recovery (faster reconnection attempts)
16
  """
17
 
18
  import logging
 
37
  # Constants (borrowed from conversation_app)
38
  # =============================================================================
39
 
40
+ CONTROL_LOOP_FREQUENCY_HZ = 10 # 10Hz control loop (reduced from 20Hz to further reduce daemon load)
41
  TARGET_PERIOD = 1.0 / CONTROL_LOOP_FREQUENCY_HZ
42
 
43
  # Speech sway parameters (from conversation_app SwayRollRT)
 
290
 
291
  class MovementManager:
292
  """
293
+ Unified movement manager with 10Hz control loop.
294
 
295
  All external interactions go through the command queue,
296
  ensuring thread safety and preventing race conditions.
297
 
298
+ Note: Frequency reduced from 100Hz to 10Hz to prevent daemon crashes
299
  caused by excessive Zenoh message traffic.
300
  """
301
 
 
333
  # Connection health tracking
334
  self._connection_lost = False
335
  self._last_successful_command = self._now()
336
+ self._connection_timeout = 3.0 # 3 seconds without success = connection lost
337
+ self._reconnect_attempt_interval = 2.0 # Try reconnecting every 2 seconds (faster recovery)
338
  self._last_reconnect_attempt = 0.0
339
+ self._consecutive_errors = 0
340
+ self._max_consecutive_errors = 5 # Reset connection state after 5 consecutive errors
341
 
342
  # Pending action
343
  self._pending_action: Optional[PendingAction] = None
 
692
  # No significant change, skip sending command
693
  return
694
 
 
695
  now = self._now()
696
+
697
+ # Check if we should skip due to connection loss (but always try periodically)
698
  if self._connection_lost:
699
+ if now - self._last_reconnect_attempt < self._reconnect_attempt_interval:
 
 
 
 
 
 
700
  # Skip sending commands to reduce error spam
701
  return
702
+ # Time to try reconnecting
703
+ self._last_reconnect_attempt = now
704
+ logger.debug("Attempting to send command after connection loss...")
705
 
706
  try:
707
  # Build head pose matrix
 
727
  # Command succeeded - update connection health and cache
728
  self._last_successful_command = now
729
  self._last_sent_pose = pose.copy() # Cache sent pose
730
+ self._consecutive_errors = 0 # Reset error counter
731
+
732
  if self._connection_lost:
733
  logger.info("✓ Connection to robot restored")
734
  self._connection_lost = False
735
+ self._suppressed_errors = 0
736
 
737
  except Exception as e:
738
  error_msg = str(e)
739
+ self._consecutive_errors += 1
740
 
741
  # Check if this is a connection error
742
+ is_connection_error = "Lost connection" in error_msg or "ZError" in error_msg
743
+
744
+ if is_connection_error:
745
+ if not self._connection_lost:
746
  # First time detecting connection loss
747
+ if self._consecutive_errors >= self._max_consecutive_errors:
748
+ logger.warning(f"Connection unstable after {self._consecutive_errors} errors: {error_msg}")
749
+ logger.warning(" Will retry connection every %.1fs...", self._reconnect_attempt_interval)
750
+ self._connection_lost = True
751
+ self._last_reconnect_attempt = now
752
+ else:
753
+ # Transient error, log but don't mark as lost yet
754
+ self._log_error_throttled(f"Transient connection error ({self._consecutive_errors}/{self._max_consecutive_errors}): {error_msg}")
755
+ else:
 
756
  # Already in lost state, use throttled logging
757
  self._log_error_throttled(f"Connection still lost: {error_msg}")
 
 
 
758
  else:
759
+ # Non-connection error - log but don't affect connection state
760
  self._log_error_throttled(f"Failed to set robot target: {error_msg}")
761
 
762
  def _log_error_throttled(self, message: str) -> None:
 
776
  # =========================================================================
777
 
778
  def _control_loop(self) -> None:
779
+ """Main 10Hz control loop."""
780
  logger.info("Movement manager control loop started (%.0f Hz)", CONTROL_LOOP_FREQUENCY_HZ)
781
 
782
  last_time = self._now()
reachy_mini_ha_voice/voice_assistant.py CHANGED
@@ -161,7 +161,7 @@ class VoiceAssistantService:
161
  except Exception as e:
162
  _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
163
 
164
- # Start motion controller (20Hz control loop)
165
  if self._motion is not None:
166
  self._motion.start()
167
 
 
161
  except Exception as e:
162
  _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
163
 
164
+ # Start motion controller (10Hz control loop)
165
  if self._motion is not None:
166
  self._motion.start()
167