"""
SeqMaster Runtime - Watchdog Service
Layer: RUNTIME/CORE

Provides fail-safe mechanisms including:
- Watchdog timer for detecting hangs
- Power-loss recovery
- State persistence for resume after crash
"""

import asyncio
import json
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any

import structlog

from src.core.config import settings

logger = structlog.get_logger(__name__)


class WatchdogService:
    """
    Watchdog service for fail-safe operation.
    
    Features:
    - Periodic heartbeat to detect hangs
    - Persists execution state for power-loss recovery
    - Auto-recovery of interrupted test sessions
    """
    
    def __init__(self):
        self.timeout = settings.WATCHDOG_TIMEOUT_SECONDS
        self.state_file = settings.DATA_DIR / "watchdog_state.json"
        self._running = False
        self._task: Optional[asyncio.Task] = None
        self._last_heartbeat: Optional[datetime] = None
        self._execution_state: Optional[Dict[str, Any]] = None
    
    async def start(self) -> None:
        """Start the watchdog service."""
        self._running = True
        self._task = asyncio.create_task(self._watchdog_loop())
        
        # Check for recovery state
        if settings.POWER_LOSS_RECOVERY_ENABLED:
            await self._check_recovery()
        
        logger.info("Watchdog service started", timeout=self.timeout)
    
    async def stop(self) -> None:
        """Stop the watchdog service."""
        self._running = False
        if self._task:
            self._task.cancel()
            try:
                await self._task
            except asyncio.CancelledError:
                pass
        
        # Clear state on clean shutdown
        await self._clear_state()
        logger.info("Watchdog service stopped")
    
    async def heartbeat(self) -> None:
        """Record a heartbeat to indicate system is alive."""
        self._last_heartbeat = datetime.now()
    
    async def save_execution_state(self, state: Dict[str, Any]) -> None:
        """
        Save execution state for power-loss recovery.
        
        Args:
            state: Current execution state including:
                - session_id
                - sequence_id
                - current_step_index
                - dut_id
                - operator
                - timestamp
        """
        self._execution_state = {
            **state,
            "saved_at": datetime.now().isoformat(),
            "hostname": settings.HOSTNAME
        }
        
        try:
            self.state_file.parent.mkdir(parents=True, exist_ok=True)
            with open(self.state_file, 'w') as f:
                json.dump(self._execution_state, f, indent=2)
            logger.debug("Execution state saved", session_id=state.get("session_id"))
        except Exception as e:
            logger.error("Failed to save execution state", error=str(e))
    
    async def clear_execution_state(self) -> None:
        """Clear saved execution state (after successful completion)."""
        self._execution_state = None
        await self._clear_state()
    
    async def get_recovery_state(self) -> Optional[Dict[str, Any]]:
        """Get any saved recovery state."""
        return self._execution_state
    
    async def _check_recovery(self) -> None:
        """Check if there's a state to recover from."""
        if not self.state_file.exists():
            return
        
        try:
            with open(self.state_file, 'r') as f:
                state = json.load(f)
            
            self._execution_state = state
            logger.warning(
                "Found recovery state from previous session",
                session_id=state.get("session_id"),
                sequence_id=state.get("sequence_id"),
                saved_at=state.get("saved_at")
            )
        except Exception as e:
            logger.error("Failed to load recovery state", error=str(e))
    
    async def _clear_state(self) -> None:
        """Remove the state file."""
        try:
            if self.state_file.exists():
                self.state_file.unlink()
        except Exception as e:
            logger.error("Failed to clear state file", error=str(e))
    
    async def _watchdog_loop(self) -> None:
        """Main watchdog monitoring loop."""
        while self._running:
            try:
                await asyncio.sleep(self.timeout / 2)
                
                # Only check for timeout when we have an active execution state
                # (heartbeats are only sent during test execution)
                if self._last_heartbeat and self._execution_state:
                    elapsed = (datetime.now() - self._last_heartbeat).total_seconds()
                    if elapsed > self.timeout:
                        logger.error(
                            "Watchdog timeout detected!",
                            elapsed_seconds=elapsed,
                            timeout=self.timeout,
                            session_id=self._execution_state.get("session_id")
                        )
                        # Could trigger recovery actions here
                
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error("Watchdog loop error", error=str(e))


class ExecutionGuard:
    """
    Context manager for guarding test execution with watchdog.
    
    Usage:
        async with ExecutionGuard(watchdog, session_state):
            await execute_test()
    """
    
    def __init__(self, watchdog: WatchdogService, state: Dict[str, Any]):
        self.watchdog = watchdog
        self.state = state
    
    async def __aenter__(self):
        await self.watchdog.save_execution_state(self.state)
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        if exc_type is None:
            # Clean exit - clear state
            await self.watchdog.clear_execution_state()
        # If exception occurred, state remains for recovery
        return False
