"""
SeqMaster Runtime - Sequence Executor
Layer: EXECUTOR

Main execution engine for test sequences.
Handles flow control, conditionals, loops, retries, and operator interaction.

Features:
- TestStand-style variable system (Locals, Parameters, Station, RunState)
- Expression evaluation for dynamic values
- Plugin adapters (Python, Exec) for step execution
- Typed inputs/outputs with variable binding
"""

import asyncio
import re
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
from enum import Enum

import structlog

from src.core.constants import (
    OPERATOR_RESPONSE_SEC, OPERATOR_POLL_SEC, 
    WATCHDOG_THROTTLE_MS, MAX_NESTING_DEPTH,
    FLOW_CONTROL_TYPES
)
from src.executor.models import (
    TestSequence, TestStep, TestGroup, StepType,
    PropertySet, StepLimit, ComparisonOperator
)
from src.executor.variables import (
    VariableStore, ExpressionResolver, VariableScope,
    PropertyType, VariableDef, ContainerDef
)
from src.executor.adapters import (
    AdapterManager, PythonAdapter, ExecAdapter, CompareAdapter
)
from src.drivers.base import DriverResult
from src.drivers.manager import DriverManager, get_driver_manager
from src.core.watchdog import WatchdogService
from src.core.license import get_license_service, DEMO_LIMITS
from src.database.models import TestStatus, StepStatus

logger = structlog.get_logger(__name__)


class ExecutionState(str, Enum):
    """Executor state."""
    IDLE = "idle"
    RUNNING = "running"
    PAUSED = "paused"
    ABORTING = "aborting"
    COMPLETED = "completed"
    ERROR = "error"


class GroupRetryRequested(Exception):
    """Raised when operator requests retry - signals to restart the entire group."""
    pass


class StepExecutionResult:
    """Result of executing a single step. Uses __slots__ for memory efficiency."""
    
    __slots__ = (
        'step_id', 'step_name', 'step_type', 'status', 'measured_value',
        'expected_value', 'lower_limit', 'upper_limit', 'passed', 'error_code',
        'error_message', 'applicable_error_codes', 'allow_retry', 'allow_continue',
        'available_actions', 'retry_count', 'loop_iteration', 'loop_total',
        'started_at', 'completed_at', 'duration_ms', 'raw_data', 'metadata',
        'group_id', 'group_name', 'adapter', 'comparison_operator'
    )
    
    def __init__(self, step_id: str, step_name: str, step_type: str = "test"):
        self.step_id = step_id
        self.step_name = step_name
        self.step_type = step_type  # 'test', 'action', 'label', 'if', etc.
        self.status = StepStatus.PENDING
        self.measured_value: Optional[Any] = None
        self.expected_value: Optional[Any] = None
        self.lower_limit: Optional[float] = None
        self.upper_limit: Optional[float] = None
        self.passed: Optional[bool] = None
        self.error_code: Optional[str] = None
        self.error_message: Optional[str] = None
        self.applicable_error_codes: List[Dict[str, Any]] = []  # Error codes that apply to this failure
        self.allow_retry: bool = False  # Whether operator can retry this step (group flag)
        self.allow_continue: bool = False  # Whether operator can continue on failure (group flag)
        self.available_actions: List[str] = []  # Available actions: ['retry', 'continue', 'abort']
        self.retry_count: int = 0
        self.loop_iteration: Optional[int] = None  # Current iteration if in loop
        self.loop_total: Optional[int] = None      # Total iterations for loop
        self.started_at: Optional[datetime] = None
        self.completed_at: Optional[datetime] = None
        self.duration_ms: Optional[float] = None
        self.raw_data: Optional[bytes] = None
        self.metadata: Dict[str, Any] = {}
        self.group_id: Optional[str] = None
        self.group_name: Optional[str] = None
        self.adapter: Optional[str] = None
        self.comparison_operator: Optional[str] = None
    
    # Slots that need special handling in to_dict
    _DATETIME_SLOTS = ('started_at', 'completed_at')
    _ENUM_SLOTS = ('status',)
    _EXCLUDED_SLOTS = ('raw_data',)  # Exclude binary data from JSON
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for JSON serialization."""
        result = {}
        for slot in self.__slots__:
            if slot in self._EXCLUDED_SLOTS:
                continue
            value = getattr(self, slot)
            if slot in self._DATETIME_SLOTS and value is not None:
                value = value.isoformat() + "Z"  # Add Z suffix for UTC
            elif slot in self._ENUM_SLOTS and value is not None:
                value = value.value
            result[slot] = value
        return result


class ExecutionContext:
    """
    Context for sequence execution.
    
    Contains:
    - Variable store with TestStand-style scopes (Locals, Parameters, Station, RunState, Step)
    - Step results history
    - Execution state and counters
    """
    
    # Flow control step types that don't count as passed/failed
    FLOW_CONTROL_TYPES = FLOW_CONTROL_TYPES
    
    def __init__(self, session_id: str, dut_id: str, operator: str,
                 station_file: Optional[Path] = None):
        self.session_id = session_id
        self.dut_id = dut_id
        self.operator = operator
        
        # Variable store with scopes
        self.variable_store = VariableStore(station_file=station_file)
        
        # Expression resolver
        self.expression_resolver = ExpressionResolver()
        
        # Step results
        self.step_results: List[StepExecutionResult] = []
        self.step_outputs: Dict[str, Dict[str, Any]] = {}  # step_id -> outputs
        
        # Execution state
        self.current_step_index: int = 0
        self.current_step: Optional[TestStep] = None
        self.current_group: Optional[TestGroup] = None
        self.total_steps: int = 0          # Total unique step definitions
        self.executed_steps: int = 0       # Total step executions (including loop iterations)
        self.passed_steps: int = 0         # Passed test/action steps only
        self.failed_steps: int = 0         # Failed test/action steps only
        self.skipped_steps: int = 0        # Skipped steps
        self.flow_steps: int = 0           # Flow control steps executed
        self.current_loop_iteration: int = 0  # Current iteration in a loop
        self.current_loop_total: int = 0   # Total iterations for current loop
        self.started_at: Optional[datetime] = None
        self.completed_at: Optional[datetime] = None
    
    @property
    def variables(self) -> Dict[str, Any]:
        """Legacy access to variables (returns Locals scope)."""
        return self.variable_store.get_scope(VariableScope.LOCALS)
    
    def get_variable(self, reference: str, default: Any = None) -> Any:
        """Get variable value using dot-notation."""
        return self.variable_store.get(reference, default)
    
    def set_variable(self, reference: str, value: Any) -> bool:
        """Set variable value using dot-notation."""
        return self.variable_store.set(reference, value)
    
    def resolve_expression(self, expression: str, default: Any = None) -> Any:
        """Evaluate an expression with current variables."""
        namespace = self.variable_store.get_expression_namespace()
        return self.expression_resolver.evaluate(expression, namespace, default)
    
    def resolve_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        """Resolve all expressions in an inputs dictionary."""
        namespace = self.variable_store.get_expression_namespace()
        return self.expression_resolver.resolve_inputs(inputs, namespace)
    
    def update_runstate(self, **kwargs) -> None:
        """Update RunState variables."""
        self.variable_store.update_runstate(kwargs)


class SequenceExecutor:
    """
    Main sequence execution engine.
    
    Features:
    - Executes test sequences with proper flow control
    - Handles conditionals, loops, and retries
    - Supports pause/resume/abort operations
    - Integrates with watchdog for fail-safe operation
    - Publishes live status updates
    - TestStand-style variable system with expressions
    - Plugin adapters (Python, Exec) for step execution
    """
    
    def __init__(self, 
                 driver_manager: Optional[DriverManager] = None,
                 watchdog: Optional[WatchdogService] = None,
                 adapter_manager: Optional[AdapterManager] = None,
                 station_file: Optional[Path] = None):
        """
        Initialize executor.
        
        Args:
            driver_manager: Driver manager instance
            watchdog: Watchdog service for fail-safe
            adapter_manager: Adapter manager for step execution
            station_file: Path for station globals persistence
        """
        self._driver_manager = driver_manager or get_driver_manager()
        self._watchdog = watchdog
        self._station_file = station_file or Path("data/station_globals.json")
        
        # Initialize adapter manager
        self._adapter_manager = adapter_manager or self._create_default_adapter_manager()
        
        self._state = ExecutionState.IDLE
        self._context: Optional[ExecutionContext] = None
        self._sequence: Optional[TestSequence] = None
        self._current_sequence: Optional[TestSequence] = None  # Current sequence for callback access
        self._property_set: Optional[PropertySet] = None
        
        # Control flags
        self._pause_requested = False
        self._abort_requested = False
        self._paused_event = asyncio.Event()
        self._paused_event.set()  # Not paused initially
        
        # Callbacks
        self._on_status_update: Optional[Callable] = None
        self._on_step_complete: Optional[Callable] = None
        self._on_operator_input_required: Optional[Callable] = None
        
        # Performance optimization: cache flow control indices
        self._flow_control_cache: Dict[int, Dict[str, int]] = {}  # step_list_id -> {label: idx}
        self._last_watchdog_update: float = 0
        self._watchdog_throttle_ms: int = WATCHDOG_THROTTLE_MS
    
    def _create_default_adapter_manager(self) -> AdapterManager:
        """Create default adapter manager with Python, Exec, and Compare adapters."""
        manager = AdapterManager()
        manager.register(PythonAdapter())
        manager.register(ExecAdapter())
        manager.register(CompareAdapter())
        return manager
    
    @property
    def adapter_manager(self) -> AdapterManager:
        """Get adapter manager."""
        return self._adapter_manager
    
    @property
    def state(self) -> ExecutionState:
        """Get current execution state."""
        return self._state
    
    @property
    def context(self) -> Optional[ExecutionContext]:
        """Get current execution context."""
        return self._context
    
    @property
    def progress(self) -> float:
        """Get execution progress (0-100)."""
        if not self._context or self._context.total_steps == 0:
            return 0.0
        return (self._context.current_step_index / self._context.total_steps) * 100
    
    def set_callbacks(self,
                      on_status_update: Optional[Callable] = None,
                      on_step_complete: Optional[Callable] = None,
                      on_operator_input: Optional[Callable] = None):
        """Set callback functions for status updates."""
        self._on_status_update = on_status_update
        self._on_step_complete = on_step_complete
        self._on_operator_input_required = on_operator_input
    
    async def _run_callback_steps(
        self,
        steps: Optional[List[TestStep]],
        callback_name: str,
        dry_run: bool = False
    ) -> None:
        """Execute callback steps with error handling.
        
        Args:
            steps: Optional list of steps to execute
            callback_name: Name for logging (e.g., 'cleanup', 'passed', 'failed')
            dry_run: If True, simulate without hardware
        """
        if not steps:
            logger.debug(f"No {callback_name} callback steps defined")
            return
        
        logger.info(f"Running sequence {callback_name} callback", step_count=len(steps))
        try:
            await self._execute_steps(steps, dry_run=dry_run)
            logger.info(f"Sequence {callback_name} callback completed successfully")
        except Exception as e:
            logger.error(f"Sequence {callback_name} callback failed", error=str(e))
    
    async def execute(self, 
                      sequence: TestSequence,
                      dut_id: str,
                      operator: str,
                      property_set: Optional[PropertySet] = None,
                      dry_run: bool = False,
                      session_id: Optional[str] = None,
                      scanned_fields: Optional[Dict[str, Any]] = None) -> ExecutionContext:
        """
        Execute a test sequence.
        
        Args:
            sequence: Test sequence to execute
            dut_id: Device under test identifier
            operator: Operator name/ID
            property_set: Property set with limits
            dry_run: If True, simulate without hardware
            session_id: Optional session ID (generated if not provided)
            scanned_fields: Fields extracted from barcode scanner (e.g. serial number, batch)
            
        Returns:
            ExecutionContext with results
        """
        if session_id is None:
            session_id = str(uuid.uuid4())
        
        self._sequence = sequence
        self._property_set = property_set
        self._context = ExecutionContext(
            session_id=session_id,
            dut_id=dut_id,
            operator=operator,
            station_file=self._station_file
        )
        
        # Start timing immediately after context creation (UTC for database consistency)
        self._context.started_at = datetime.utcnow()
        
        # Initialize variables from sequence definition
        self._initialize_variables(sequence)
        
        # Initialize scanned fields from operator barcode scan
        if scanned_fields:
            from src.executor.variables import VariableScope
            for key, value in scanned_fields.items():
                self._context.variable_store.set(f"Scanned.{key}", value)
            logger.debug("Initialized scanned fields", count=len(scanned_fields), fields=list(scanned_fields.keys()))
        
        # Count total step executions (including loops)
        self._context.total_steps = sequence.get_total_executions()
        
        # ── Demo mode enforcement: max steps per sequence ──
        license_svc = get_license_service()
        if license_svc.is_demo():
            max_steps = DEMO_LIMITS.get("max_steps", 10)
            if self._context.total_steps > max_steps:
                raise RuntimeError(
                    f"Demo mode: sekvensen har {self._context.total_steps} steps "
                    f"(max {max_steps} i demo). Opgrader licens for ubegrænset.")
        
        # Initialize RunState
        self._context.update_runstate(
            SessionId=session_id,
            SequenceId=sequence.id,
            DutId=dut_id,
            Operator=operator,
            TotalSteps=self._context.total_steps,
            StepIndex=0,
            LoopIndex=0,
            PassedSteps=0,
            FailedSteps=0,
            PreviousStepPassed=True
        )
        
        logger.info("Starting sequence execution",
                   session_id=session_id,
                   sequence_id=sequence.id,
                   dut_id=dut_id,
                   total_steps=self._context.total_steps)
        
        self._state = ExecutionState.RUNNING
        self._abort_requested = False
        self._pause_requested = False
        self._current_sequence = sequence  # Store for callback access
        
        try:
            # Save state for recovery if watchdog available
            if self._watchdog:
                await self._watchdog.save_execution_state({
                    "session_id": session_id,
                    "sequence_id": sequence.id,
                    "dut_id": dut_id,
                    "operator": operator,
                    "current_step_index": 0
                })
            
            # Execute setup steps
            if sequence.setup_steps:
                logger.info("Executing setup steps")
                await self._execute_steps(sequence.setup_steps, dry_run)
            
            # Execute main sequence
            if not self._abort_requested:
                # Execute groups
                for group in sequence.groups:
                    if self._abort_requested:
                        break
                    await self._execute_group(group, dry_run)
                
                # Execute ungrouped steps
                if not self._abort_requested:
                    await self._execute_steps(sequence.steps, dry_run)
            
            # Execute teardown (always, even on abort)
            if sequence.teardown_steps:
                logger.info("Executing teardown steps")
                await self._execute_steps(sequence.teardown_steps, dry_run)
            
            # Determine final status
            if self._abort_requested:
                self._state = ExecutionState.COMPLETED
                final_status = TestStatus.ABORTED
            elif self._context.failed_steps > 0:
                self._state = ExecutionState.COMPLETED
                final_status = TestStatus.FAILED
            else:
                self._state = ExecutionState.COMPLETED
                final_status = TestStatus.PASSED
            
            # Clear abort flag so callbacks can run
            saved_abort = self._abort_requested
            self._abort_requested = False
            
            # Run cleanup callback (always runs exactly once, regardless of result)
            await self._run_callback_steps(
                sequence.cleanup_steps, "cleanup", dry_run=False
            )
            
            # Run result-specific callbacks
            if final_status == TestStatus.PASSED:
                await self._run_callback_steps(
                    sequence.passed_steps, "passed", dry_run=False
                )
            elif final_status in (TestStatus.FAILED, TestStatus.ABORTED):
                await self._run_callback_steps(
                    sequence.failed_steps, "failed", dry_run=False
                )
            
            # Restore abort flag
            self._abort_requested = saved_abort
            
            # Set completion time after all callbacks have finished (UTC)
            self._context.completed_at = datetime.utcnow()
            
            logger.info("Sequence execution completed",
                       session_id=session_id,
                       status=final_status.value,
                       passed=self._context.passed_steps,
                       failed=self._context.failed_steps)
            
            # Send final status update to clients
            if self._on_status_update:
                await self._on_status_update(self._get_status())
            
            # Clear watchdog state on success
            if self._watchdog:
                await self._watchdog.clear_execution_state()
            
            return self._context
            
        except Exception as e:
            self._state = ExecutionState.ERROR
            logger.error("Sequence execution error",
                        session_id=session_id,
                        error=str(e))
            raise
    
    def _initialize_variables(self, sequence: TestSequence) -> None:
        """
        Initialize variables from sequence definition.
        
        Loads:
        - Custom container type definitions
        - Variable definitions for each scope (Locals, Parameters, Station)
        """
        store = self._context.variable_store
        
        # Register custom container types
        for type_name, type_def in sequence.types.items():
            properties = {}
            for prop_name, prop_def in type_def.items():
                prop_type = PropertyType(prop_def.get("type", "string"))
                properties[prop_name] = VariableDef(
                    name=prop_name,
                    type=prop_type,
                    default=prop_def.get("default"),
                    container_type=prop_def.get("container_type")
                )
            
            container_def = ContainerDef(
                name=type_name,
                properties=properties
            )
            store.define_container_type(container_def)
            logger.debug("Registered container type", name=type_name)
        
        # Define variables for each scope
        for scope_name, var_defs in sequence.variables.items():
            try:
                scope = VariableScope(scope_name)
            except ValueError:
                logger.warning("Unknown scope in sequence", scope=scope_name)
                continue
            
            store.define_variables_from_dict(scope, var_defs)
            logger.debug("Defined variables for scope", 
                        scope=scope_name, 
                        count=len(var_defs))
    
    async def _execute_group(self, group: TestGroup, dry_run: bool) -> None:
        """Execute a test group with retry and cleanup callback support."""
        self._context.current_group = group
        
        logger.info("Executing group", group_id=group.id, group_name=group.name)
        
        # Check if group is disabled
        if group.disabled:
            logger.info("Group skipped (disabled)", group_id=group.id, group_name=group.name)
            self._context.current_group = None
            return
        
        # Check condition
        if group.condition:
            if not self._evaluate_condition(group.condition):
                logger.info("Group skipped due to condition", group_id=group.id)
                return
        
        # Retry loop - allows restarting the group when operator requests retry
        while True:
            try:
                # Handle loop
                if group.loop:
                    await self._execute_loop(group.steps, group.loop, dry_run)
                else:
                    await self._execute_steps(group.steps, dry_run)
                
                # Successfully completed - exit retry loop
                break
                
            except GroupRetryRequested:
                logger.info("Group retry requested, running cleanup callbacks", group_id=group.id)
                
                # Run sequence-level cleanup steps (global cleanup)
                if self._current_sequence and self._current_sequence.cleanup_steps:
                    logger.info("Running sequence cleanup steps")
                    try:
                        await self._execute_steps(self._current_sequence.cleanup_steps, dry_run=False)
                    except Exception as e:
                        logger.error("Sequence cleanup failed", error=str(e))
                
                # Run retry setup steps for this group (prepare for retry)
                if group.retry_setup_steps:
                    logger.info("Running retry setup steps", group_id=group.id)
                    try:
                        await self._execute_steps(group.retry_setup_steps, dry_run=False)
                    except Exception as e:
                        logger.error("Retry setup failed", error=str(e), group_id=group.id)
                
                logger.info("Restarting group execution", group_id=group.id)
                continue  # Retry the group
        
        self._context.current_group = None
    
    def _precompute_flow_indices(self, steps: List[TestStep]) -> Dict[str, Any]:
        """
        Pre-compute flow control indices for fast lookup.
        
        Builds indices for:
        - Labels (for GOTO)
        - IF -> ELSE/ELSE_IF/END_IF mappings
        - WHILE/FOR_EACH -> END_LOOP mappings
        
        This avoids repeated linear scans during execution.
        """
        label_index: Dict[str, int] = {}
        else_endif_index: Dict[int, int] = {}  # IF/ELSE_IF idx -> next ELSE/ELSE_IF/END_IF idx
        endif_index: Dict[int, int] = {}        # IF/ELSE/ELSE_IF idx -> END_IF idx
        end_loop_index: Dict[int, int] = {}     # WHILE/FOR_EACH idx -> END_LOOP idx
        
        for idx, step in enumerate(steps):
            if step.type == StepType.LABEL and step.label:
                label_index[step.label] = idx
        
        # Build IF->ELSE/END_IF and loop mappings
        for idx, step in enumerate(steps):
            if step.type == StepType.IF:
                else_endif_index[idx] = self._find_else_or_endif(steps, idx)
                endif_index[idx] = self._find_endif(steps, idx)
            elif step.type == StepType.ELSE_IF:
                else_endif_index[idx] = self._find_else_or_endif(steps, idx)
                endif_index[idx] = self._find_endif(steps, idx)
            elif step.type == StepType.ELSE:
                endif_index[idx] = self._find_endif(steps, idx)
            elif step.type in (StepType.WHILE, StepType.FOR_EACH):
                end_loop_index[idx] = self._find_end_loop(steps, idx)
        
        return {
            "labels": label_index,
            "else_endif": else_endif_index,
            "endif": endif_index,
            "end_loop": end_loop_index
        }
    
    async def _execute_steps(self, steps: List[TestStep], dry_run: bool) -> None:
        """
        Execute a list of steps with flow control support.
        
        Supports:
        - LABEL: Named jump target
        - GOTO: Jump to a label
        - IF/ELSE_IF/ELSE/END_IF: Conditional execution
        - STATEMENT: Expression evaluation and variable assignment
        - WHILE/END_LOOP: Conditional loops
        - FOR_EACH/END_LOOP: Collection iteration
        - BREAK/CONTINUE: Loop control
        - MESSAGE: Operator notification
        """
        step_index = 0
        
        # Pre-compute flow control indices (cached per step list)
        flow_cache = self._precompute_flow_indices(steps)
        label_index = flow_cache["labels"]
        else_endif_index = flow_cache["else_endif"]
        endif_index_cache = flow_cache["endif"]
        end_loop_index_cache = flow_cache["end_loop"]
        
        # Track conditional and loop state
        if_stack: List[Dict[str, Any]] = []  # Track IF/ELSE nesting
        loop_stack: List[Dict[str, Any]] = []  # Track loop nesting
        
        while step_index < len(steps):
            if self._abort_requested:
                break
            
            # Wait if paused
            await self._paused_event.wait()
            
            step = steps[step_index]
            
            # Skip disabled steps
            if step.disabled:
                logger.info("Step skipped (disabled)", step_id=step.id, step_name=step.name)
                step_index += 1
                continue
            
            # Handle flow control step types
            if step.type == StepType.LABEL:
                # Label is a no-op, just a marker
                await self._log_flow_step(step, info=f"Label: {step.label or step.name}")
                step_index += 1
                continue
            
            elif step.type == StepType.GOTO:
                # Jump to label
                if step.label and step.label in label_index:
                    await self._log_flow_step(step, info=f"→ {step.label}")
                    logger.info("GOTO jump", target=step.label)
                    step_index = label_index[step.label]
                    continue
                else:
                    await self._log_flow_step(step, info=f"→ {step.label} (not found!)")
                    logger.error("GOTO target not found", target=step.label)
                    step_index += 1
                    continue
            
            elif step.type == StepType.STATEMENT:
                # Evaluate expression and assign to variable
                if step.expression and step.target_variable:
                    try:
                        value = self._evaluate_expression(step.expression)
                        self._context.set_variable(step.target_variable, value)
                        await self._log_flow_step(step, info=f"{step.target_variable} = {value}")
                        logger.debug("Statement executed", 
                                   variable=step.target_variable, 
                                   value=value)
                    except Exception as e:
                        await self._log_flow_step(step, info=f"Error: {str(e)}")
                        logger.error("Statement error", 
                                   expression=step.expression, 
                                   error=str(e))
                else:
                    await self._log_flow_step(step, info="(no expression)")
                step_index += 1
                continue
            
            elif step.type == StepType.IF:
                # Evaluate condition and push to if_stack
                condition_result = False
                if step.expression:
                    condition_result = bool(self._evaluate_expression(step.expression))
                
                if_stack.append({
                    "condition_met": condition_result,
                    "branch_taken": condition_result,
                    "start_index": step_index
                })
                
                await self._log_flow_step(step, info=f"if ({step.expression}) → {'TRUE' if condition_result else 'FALSE'}")
                
                if not condition_result:
                    # Skip to ELSE, ELSE_IF, or END_IF (use cached index)
                    step_index = else_endif_index.get(step_index, len(steps))
                else:
                    step_index += 1
                continue
            
            elif step.type == StepType.ELSE_IF:
                if if_stack and not if_stack[-1]["branch_taken"]:
                    # Evaluate this condition
                    condition_result = False
                    if step.expression:
                        condition_result = bool(self._evaluate_expression(step.expression))
                    
                    await self._log_flow_step(step, info=f"else if ({step.expression}) → {'TRUE' if condition_result else 'FALSE'}")
                    
                    if condition_result:
                        if_stack[-1]["branch_taken"] = True
                        step_index += 1
                    else:
                        step_index = else_endif_index.get(step_index, len(steps))
                else:
                    # Skip - previous branch already taken
                    await self._log_flow_step(step, info=f"else if (skipped - previous branch taken)")
                    # Skip to END_IF (use cached index)
                    step_index = endif_index_cache.get(step_index, len(steps))
                continue
            
            elif step.type == StepType.ELSE:
                if if_stack and not if_stack[-1]["branch_taken"]:
                    # Execute else branch
                    await self._log_flow_step(step, info="else → executing")
                    if_stack[-1]["branch_taken"] = True
                    step_index += 1
                else:
                    # Skip to END_IF (use cached index)
                    await self._log_flow_step(step, info="else → skipped")
                    step_index = endif_index_cache.get(step_index, len(steps))
                continue
            
            elif step.type == StepType.END_IF:
                await self._log_flow_step(step, info="end if")
                if if_stack:
                    if_stack.pop()
                step_index += 1
                continue
            
            elif step.type == StepType.WHILE:
                # Evaluate while condition
                condition_result = False
                if step.expression:
                    condition_result = bool(self._evaluate_expression(step.expression))
                
                if condition_result:
                    # Get current iteration count
                    iteration = 1
                    if loop_stack:
                        for ls in loop_stack:
                            if ls.get("start_index") == step_index:
                                iteration = ls.get("iteration", 1)
                                break
                    await self._log_flow_step(step, info=f"while ({step.expression}) → TRUE [iteration {iteration}]")
                    
                    loop_stack.append({
                        "type": "while",
                        "start_index": step_index,
                        "expression": step.expression,
                        "iteration": iteration
                    })
                    step_index += 1
                else:
                    await self._log_flow_step(step, info=f"while ({step.expression}) → FALSE (exit)")
                    # Skip to END_LOOP (use cached index)
                    step_index = end_loop_index_cache.get(step_index, len(steps))
                continue
            
            elif step.type == StepType.FOR_EACH:
                # Initialize or continue for-each loop
                collection_expr = step.for_each_collection or step.expression
                var_name = step.for_each_variable or "item"
                
                if loop_stack and loop_stack[-1].get("step_index") == step_index:
                    # Continue existing loop
                    loop_state = loop_stack[-1]
                    loop_state["current_index"] += 1
                    
                    if loop_state["current_index"] < len(loop_state["collection"]):
                        # Set iterator variable
                        current_val = loop_state["collection"][loop_state["current_index"]]
                        self._context.set_variable(f"Locals.{var_name}", current_val)
                        self._context.update_runstate(LoopIndex=loop_state["current_index"])
                        await self._log_flow_step(step, info=f"for {var_name} = {current_val} [{loop_state['current_index']+1}/{len(loop_state['collection'])}]")
                        step_index += 1
                    else:
                        # Loop finished
                        await self._log_flow_step(step, info=f"for_each complete ({len(loop_state['collection'])} iterations)")
                        loop_stack.pop()
                        step_index = self._find_end_loop(steps, step_index)
                else:
                    # Start new for-each loop
                    collection = self._evaluate_expression(collection_expr) if collection_expr else []
                    if not isinstance(collection, (list, tuple)):
                        collection = list(collection) if hasattr(collection, '__iter__') else []
                    
                    if collection:
                        loop_stack.append({
                            "type": "for_each",
                            "start_index": step_index,
                            "step_index": step_index,
                            "variable": var_name,
                            "collection": collection,
                            "current_index": 0,
                            "end_loop_index": end_loop_index_cache.get(step_index, len(steps))
                        })
                        self._context.set_variable(f"Locals.{var_name}", collection[0])
                        self._context.update_runstate(LoopIndex=0)
                        await self._log_flow_step(step, info=f"for {var_name} = {collection[0]} [1/{len(collection)}]")
                        step_index += 1
                    else:
                        # Empty collection, skip loop (use cached index)
                        await self._log_flow_step(step, info=f"for_each (empty collection - skipped)")
                        step_index = end_loop_index_cache.get(step_index, len(steps))
                continue
            
            elif step.type == StepType.END_LOOP:
                if loop_stack:
                    loop_state = loop_stack[-1]
                    
                    if loop_state["type"] == "while":
                        # Re-evaluate while condition
                        if bool(self._evaluate_expression(loop_state["expression"])):
                            loop_state["iteration"] = loop_state.get("iteration", 1) + 1
                            await self._log_flow_step(step, info=f"end loop → continue [iteration {loop_state['iteration']}]")
                            step_index = loop_state["start_index"] + 1
                        else:
                            await self._log_flow_step(step, info=f"end loop → exit")
                            loop_stack.pop()
                            step_index += 1
                    elif loop_state["type"] == "for_each":
                        # Jump back to FOR_EACH to check next iteration
                        await self._log_flow_step(step, info="end loop → next iteration")
                        step_index = loop_state["start_index"]
                    else:
                        await self._log_flow_step(step, info="end loop")
                        loop_stack.pop()
                        step_index += 1
                else:
                    await self._log_flow_step(step, info="end loop (no active loop)")
                    step_index += 1
                continue
            
            elif step.type == StepType.BREAK:
                # Exit current loop (use cached end_loop_index from loop_state if available)
                if loop_stack:
                    loop_state = loop_stack.pop()
                    await self._log_flow_step(step, info=f"break → exit {loop_state['type']} loop")
                    step_index = loop_state.get("end_loop_index", end_loop_index_cache.get(step_index, len(steps)))
                else:
                    await self._log_flow_step(step, info="break (no active loop)")
                    step_index += 1
                continue
            
            elif step.type == StepType.CONTINUE:
                # Jump to end of loop (will re-evaluate condition)
                if loop_stack:
                    loop_state = loop_stack[-1]
                    await self._log_flow_step(step, info=f"continue → skip to next {loop_state['type']} iteration")
                    step_index = self._find_end_loop(steps, loop_state["start_index"]) - 1
                else:
                    await self._log_flow_step(step, info="continue (no active loop)")
                step_index += 1
                continue
            
            elif step.type == StepType.MESSAGE:
                # Display message to operator
                msg_text = step.message_text or step.name
                await self._log_flow_step(step, info=f'"{msg_text}"')
                await self._show_operator_message(step)
                step_index += 1
                continue
            
            elif step.type == StepType.DELAY:
                # Pause execution for specified time
                import asyncio
                milliseconds = step.inputs.get('milliseconds', 1000) if step.inputs else 1000
                # Resolve if it's a variable reference
                if isinstance(milliseconds, str) and '.' in milliseconds:
                    milliseconds = self._context.resolve_expression(milliseconds, default=1000)
                
                # Log delay with duration info
                delay_sec = float(milliseconds) / 1000.0
                result = await self._log_flow_step(step, info=f"⏱ {milliseconds}ms ({delay_sec:.1f}s)")
                result.started_at = datetime.utcnow()
                
                await asyncio.sleep(delay_sec)
                
                result.completed_at = datetime.utcnow()
                result.duration_ms = float(milliseconds)
                step_index += 1
                continue
            
            # Regular step execution
            await self._execute_step(step, dry_run)
            
            step_index += 1
    
    def _find_matching_flow_step(
        self,
        steps: List[TestStep],
        start_index: int,
        open_types: tuple,
        close_types: tuple,
        intermediate_types: Optional[tuple] = None,
        return_after_close: bool = False
    ) -> int:
        """Generic helper to find matching flow control step at same nesting level.
        
        Args:
            steps: List of steps to search
            start_index: Starting position (exclusive)
            open_types: Step types that increase nesting (e.g., IF, WHILE)
            close_types: Step types that decrease nesting (e.g., END_IF, END_LOOP)
            intermediate_types: Optional types to match at nesting=1 (e.g., ELSE, ELSE_IF)
            return_after_close: If True, return index after closing step
        
        Returns:
            Index of matching step, or len(steps) if not found
        """
        nesting = 1
        for i in range(start_index + 1, min(len(steps), start_index + MAX_NESTING_DEPTH * 100)):
            if steps[i].type in open_types:
                nesting += 1
            elif steps[i].type in close_types:
                nesting -= 1
                if nesting == 0:
                    return i + 1 if return_after_close else i
            elif nesting == 1 and intermediate_types and steps[i].type in intermediate_types:
                return i
        return len(steps)
    
    def _find_else_or_endif(self, steps: List[TestStep], start_index: int) -> int:
        """Find the next ELSE, ELSE_IF, or END_IF at the same nesting level."""
        return self._find_matching_flow_step(
            steps, start_index,
            open_types=(StepType.IF,),
            close_types=(StepType.END_IF,),
            intermediate_types=(StepType.ELSE, StepType.ELSE_IF)
        )
    
    def _find_endif(self, steps: List[TestStep], start_index: int) -> int:
        """Find the matching END_IF."""
        return self._find_matching_flow_step(
            steps, start_index,
            open_types=(StepType.IF,),
            close_types=(StepType.END_IF,)
        )
    
    def _find_end_loop(self, steps: List[TestStep], start_index: int) -> int:
        """Find the matching END_LOOP."""
        return self._find_matching_flow_step(
            steps, start_index,
            open_types=(StepType.WHILE, StepType.FOR_EACH),
            close_types=(StepType.END_LOOP,),
            return_after_close=True
        )
    
    async def _show_operator_message(self, step: TestStep) -> str:
        """Display a message to the operator and wait for acknowledgment.
        
        Returns:
            The button that was clicked by the operator.
        """
        message = step.message_text or step.description or "Operator action required"
        original_message = message
        
        # Resolve any expressions in the message (e.g., ${DUT.Serial_Number})
        if "${" in message or any(scope in message for scope in ["Locals.", "RunState.", "DUT.", "Station.", "Parameters.", "Step."]):
            try:
                # Replace ${var} patterns like ${Locals.counter} or ${DUT.Serial_Number}
                def replace_var(match):
                    var_expr = match.group(1)
                    try:
                        result = self._context.resolve_expression(var_expr)
                        return str(result) if result is not None else match.group(0)
                    except Exception as e:
                        logger.warning("Failed to resolve variable", expr=var_expr, error=str(e))
                        return match.group(0)
                
                message = re.sub(r'\$\{([^}]+)\}', replace_var, message)
            except Exception as e:
                logger.warning("Failed to resolve message variables", error=str(e))
        
        logger.debug("Operator message", resolved=message, buttons=step.message_buttons)
        
        # Broadcast with resolved message
        if self._on_operator_input_required:
            # Temporarily set resolved message on step for broadcast
            original_message = step.message_text
            step.message_text = message
            await self._on_operator_input_required(step)
            step.message_text = original_message  # Restore original
        
        # Wait for operator response via pending_operator_inputs
        from src.core.service_registry import ServiceRegistry
        max_wait_seconds = OPERATOR_RESPONSE_SEC
        poll_interval = OPERATOR_POLL_SEC
        waited = 0
        
        while waited < max_wait_seconds:
            pending = ServiceRegistry.get("pending_operator_inputs") or {}
            if step.id in pending:
                response = pending.pop(step.id)
                ServiceRegistry.set("pending_operator_inputs", pending)
                logger.info("Operator responded", step_id=step.id, response=response)
                return response
            
            await asyncio.sleep(poll_interval)
            waited += poll_interval
            
            # Check if execution was aborted
            if self._abort_requested:
                logger.info("Operator message aborted")
                return "ABORT"
        
        logger.warning("Operator message timeout", step_id=step.id)
        return "TIMEOUT"
    
    async def _log_flow_step(self, step: TestStep, info: str = None, metadata: dict = None) -> StepExecutionResult:
        """Log a flow control step execution with EXECUTED status."""
        from src.database.models import StepStatus
        
        result = StepExecutionResult(step.id, step.name, step.type.value)
        result.adapter = step.adapter or ("driver" if step.driver else None)
        if step.limits and step.limits.comparison:
            result.comparison_operator = step.limits.comparison.value
        result.started_at = datetime.utcnow()
        result.completed_at = datetime.utcnow()
        result.duration_ms = 0
        result.status = StepStatus.EXECUTED
        result.passed = True  # Flow control doesn't "fail"
        result.metadata = metadata or {}
        
        # Add info to metadata for display
        if info:
            result.metadata["flow_info"] = info
        
        # Add group info
        if self._context.current_group:
            result.group_id = self._context.current_group.id
            result.group_name = self._context.current_group.name
        
        # Add loop iteration info if in a loop
        if self._context.current_loop_total > 0:
            result.loop_iteration = self._context.current_loop_iteration
            result.loop_total = self._context.current_loop_total
        
        # Update counters
        self._context.flow_steps += 1
        self._context.executed_steps += 1
        
        # Add to results
        self._context.step_results.append(result)
        
        # Notify callbacks
        if self._on_step_complete:
            await self._on_step_complete(result)
        
        if self._on_status_update:
            await self._on_status_update(self._get_status())
        
        logger.debug("Flow step logged", step_id=step.id, step_type=step.type.value, info=info)
        return result
    
    async def _execute_step(self, step: TestStep, dry_run: bool) -> StepExecutionResult:
        """Execute a single step."""
        result = StepExecutionResult(step.id, step.name, step.type.value)
        result.adapter = step.adapter or ("driver" if step.driver else None)
        if step.limits and step.limits.comparison:
            result.comparison_operator = step.limits.comparison.value
        result.started_at = datetime.utcnow()
        result.status = StepStatus.RUNNING
        
        # Add group info
        if self._context.current_group:
            result.group_id = self._context.current_group.id
            result.group_name = self._context.current_group.name
        
        # Add loop iteration info if in a loop
        if self._context.current_loop_total > 0:
            result.loop_iteration = self._context.current_loop_iteration
            result.loop_total = self._context.current_loop_total
        
        self._context.current_step = step
        self._context.current_step_index += 1
        self._context.executed_steps += 1
        
        logger.info("Executing step",
                   step_id=step.id,
                   step_name=step.name,
                   step_type=step.type.value,
                   progress=f"{self._context.current_step_index}/{self._context.total_steps}")
        
        # Update watchdog state (throttled to reduce I/O)
        import time
        current_time = time.time() * 1000  # ms
        if self._watchdog and (current_time - self._last_watchdog_update) >= self._watchdog_throttle_ms:
            self._last_watchdog_update = current_time
            await self._watchdog.save_execution_state({
                "session_id": self._context.session_id,
                "sequence_id": self._sequence.id,
                "current_step_index": self._context.current_step_index,
                "current_step_id": step.id,
                "dut_id": self._context.dut_id,
                "operator": self._context.operator
            })
            await self._watchdog.heartbeat()
        
        # Notify status update
        if self._on_status_update:
            await self._on_status_update(self._get_status())
        
        try:
            # Check skip condition
            if step.skip_if:
                if self._evaluate_expression(step.skip_if):
                    result.status = StepStatus.SKIPPED
                    logger.info("Step skipped", step_id=step.id)
                    self._context.step_results.append(result)
                    return result
            
            # Check condition
            if step.condition:
                if not self._evaluate_condition(step.condition):
                    result.status = StepStatus.SKIPPED
                    logger.info("Step skipped due to condition", step_id=step.id)
                    self._context.step_results.append(result)
                    return result
            
            # Handle operator input
            if step.requires_operator_input:
                await self._request_operator_input(step)
            
            # Delay before
            if step.delay_before_ms > 0:
                await asyncio.sleep(step.delay_before_ms / 1000)
            
            # Update RunState before execution
            self._context.update_runstate(
                StepIndex=self._context.current_step_index,
                CurrentStepId=step.id,
                CurrentStepName=step.name,
                PassedSteps=self._context.passed_steps,
                FailedSteps=self._context.failed_steps
            )
            
            # Execute with retry support
            max_attempts = 1
            if step.retry:
                max_attempts = step.retry.max_retries + 1
            
            for attempt in range(max_attempts):
                result.retry_count = attempt
                
                try:
                    if dry_run:
                        # Simulate execution
                        await asyncio.sleep(0.1)
                        result.measured_value = 0.0
                        result.passed = True
                    elif step.adapter:
                        # New adapter-based execution
                        adapter_result = await self._execute_adapter_step(step)
                        result.measured_value = adapter_result.get("return")
                        result.metadata["outputs"] = adapter_result
                        
                        # Store outputs in variables
                        self._bind_outputs(step, adapter_result)
                        
                        # Store in step_outputs for reference
                        self._context.step_outputs[step.id] = adapter_result
                        
                        # Set limits info on result for error display
                        if step.limits:
                            result.lower_limit = step.limits.lower
                            result.upper_limit = step.limits.upper
                            # Use expected_string for string comparisons, else expected
                            result.expected_value = step.limits.expected_string or step.limits.expected
                            logger.debug("Set limits on result",
                                       expected_value=result.expected_value,
                                       lower_limit=result.lower_limit,
                                       upper_limit=result.upper_limit)
                        
                        # For compare adapter, use the passed result from adapter
                        if step.adapter == "compare" and "passed" in adapter_result:
                            result.passed = adapter_result["passed"]
                            # Always set expected_value from adapter result for compare steps
                            if adapter_result.get("expected") is not None:
                                result.expected_value = adapter_result["expected"]
                            logger.debug("Compare adapter result",
                                       passed=result.passed,
                                       measured=result.measured_value,
                                       expected=result.expected_value)
                        # Validate result if limits defined (for other adapters)
                        elif step.limits or step.property_ref:
                            result.passed = self._validate_result(
                                result.measured_value, step
                            )
                        else:
                            result.passed = True
                    else:
                        # Legacy driver-based execution
                        driver_result = await self._execute_driver_command(step)
                        result.measured_value = driver_result.value
                        result.raw_data = driver_result.raw_data
                        
                        # Validate result
                        if step.limits or step.property_ref:
                            result.passed = self._validate_result(
                                result.measured_value, step
                            )
                        else:
                            result.passed = driver_result.success
                    
                    if result.passed:
                        result.status = StepStatus.PASSED
                        break
                    else:
                        if attempt < max_attempts - 1:
                            result.status = StepStatus.RETRY
                            delay = step.retry.delay_ms * (step.retry.backoff_multiplier ** attempt)
                            await asyncio.sleep(delay / 1000)
                        else:
                            result.status = StepStatus.FAILED
                            
                except asyncio.TimeoutError:
                    result.error_code = "TIMEOUT"
                    result.error_message = f"Step timed out after {step.timeout_ms}ms"
                    if attempt >= max_attempts - 1:
                        result.status = StepStatus.ERROR
                        
                except Exception as e:
                    result.error_code = "ERROR"
                    result.error_message = str(e)
                    if attempt >= max_attempts - 1:
                        result.status = StepStatus.ERROR
            
            # Delay after
            if step.delay_after_ms > 0:
                await asyncio.sleep(step.delay_after_ms / 1000)
            
        except Exception as e:
            result.status = StepStatus.ERROR
            result.error_code = "EXCEPTION"
            result.error_message = str(e)
            logger.error("Step execution error", step_id=step.id, error=str(e))
        
        finally:
            result.completed_at = datetime.utcnow()
            result.duration_ms = (result.completed_at - result.started_at).total_seconds() * 1000
        
        # Set allow_retry, allow_continue and find applicable error codes on failure
        if not result.passed and result.status != StepStatus.SKIPPED:
            # Group-level flags for operator interaction
            result.allow_retry = self._context.current_group.allow_retry if self._context.current_group else False
            result.allow_continue = self._context.current_group.allow_continue if self._context.current_group else False
            result.applicable_error_codes = self._find_applicable_error_codes(
                step, result.measured_value
            )
        
        # Update counters - only count test/action steps, not flow control
        is_flow_control = step.type.value in ExecutionContext.FLOW_CONTROL_TYPES
        if not is_flow_control:
            if result.passed:
                self._context.passed_steps += 1
            elif result.status == StepStatus.SKIPPED:
                self._context.skipped_steps += 1
            elif result.status in [StepStatus.FAILED, StepStatus.ERROR]:
                self._context.failed_steps += 1
        
        self._context.step_results.append(result)
        
        # Notify step complete
        if self._on_step_complete:
            await self._on_step_complete(result)
        
        # Handle on_fail behavior (allow_retry/allow_continue is handled via operator interaction)
        if not result.passed and result.status != StepStatus.SKIPPED:
            # Check if this is a critical abort step (immediate abort without user interaction)
            if step.critical_abort:
                logger.warning(
                    "Critical abort triggered - aborting immediately",
                    step_id=step.id,
                    step_name=step.name
                )
                self._abort_requested = True
            else:
                # Normal failure - wait for operator decision based on group flags
                group_allows_retry = self._context.current_group.allow_retry if self._context.current_group else False
                group_allows_continue = self._context.current_group.allow_continue if self._context.current_group else False
                
                # Build available actions based on group flags (abort is always available)
                available_actions = ['abort']
                if group_allows_retry:
                    available_actions.insert(0, 'retry')
                if group_allows_continue:
                    available_actions.insert(1 if group_allows_retry else 0, 'continue')
                
                # Add available_actions to result for frontend
                result.available_actions = available_actions
                
                # Wait for operator decision
                operator_decision = await self._wait_for_operator_decision(step, result)
                
                if operator_decision == "retry":
                    # Signal to restart the entire group (not just this step)
                    logger.info(
                        "Operator requested group retry",
                        step_id=step.id,
                        group_id=self._context.current_group.id if self._context.current_group else None
                    )
                    raise GroupRetryRequested()
                elif operator_decision == "abort":
                    logger.info("Operator requested abort", step_id=step.id)
                    self._abort_requested = True
                # For "continue", just proceed to next step (no action needed)
        
        logger.info("Step completed",
                   step_id=step.id,
                   status=result.status.value,
                   passed=result.passed,
                   duration_ms=result.duration_ms)
        
        return result
    
    async def _execute_driver_command(self, step: TestStep) -> DriverResult:
        """Execute driver command for a step."""
        if not step.driver:
            return DriverResult(success=True, value=None)
        
        # Handle built-in "delay" driver
        if step.driver == "delay":
            delay_ms = 0
            for param in step.parameters:
                if param.name == "delay_ms":
                    delay_ms = int(param.value)
                    break
            await asyncio.sleep(delay_ms / 1000)
            return DriverResult(success=True, value=delay_ms)
        
        driver = self._driver_manager.get_driver(step.driver)
        if not driver:
            raise ValueError(f"Driver not found: {step.driver}")
        
        # Get method
        method_name = step.driver_method or "read"
        method = getattr(driver, method_name, None)
        if not method:
            raise ValueError(f"Method not found: {method_name}")
        
        # Build kwargs from parameters
        kwargs = {p.name: p.value for p in step.parameters}
        
        # Execute with timeout
        result = await asyncio.wait_for(
            method(**kwargs),
            timeout=step.timeout_ms / 1000
        )
        
        return result
    
    async def _execute_adapter_step(self, step: TestStep) -> Dict[str, Any]:
        """
        Execute a step using the adapter system.
        
        Resolves input expressions, calls the adapter, and returns outputs.
        """
        adapter = self._adapter_manager.get(step.adapter)
        if not adapter:
            raise ValueError(f"Adapter not found: {step.adapter}")
        
        # Determine module and method
        module = step.plugin or step.executable or ""
        method = step.method or "execute"
        
        # Resolve input expressions
        resolved_inputs = self._context.resolve_inputs(step.inputs)
        
        # Special handling for compare adapter
        if step.adapter == "compare":
            # Get value from compare_variable if not already in inputs
            if "value" not in resolved_inputs and step.compare_variable:
                var_ref = step.compare_variable
                # Use get_variable directly for dot-notation reference
                resolved_value = self._context.get_variable(var_ref)
                resolved_inputs["value"] = resolved_value
                logger.debug("Compare adapter: resolved variable",
                           var_ref=var_ref,
                           resolved_value=resolved_value)
            
            # Get expected from limits if not in inputs
            if "expected" not in resolved_inputs and step.limits:
                # Check expected_string first (for string comparisons)
                if step.limits.expected_string is not None:
                    resolved_inputs["expected"] = step.limits.expected_string
                    resolved_inputs["case_sensitive"] = step.limits.case_sensitive
                elif step.limits.expected is not None:
                    resolved_inputs["expected"] = step.limits.expected
        
        logger.debug("Executing adapter step",
                    adapter=step.adapter,
                    module=module,
                    method=method,
                    inputs=resolved_inputs)
        
        # Execute with timeout
        adapter_result = await asyncio.wait_for(
            adapter.execute(module, method, resolved_inputs),
            timeout=step.timeout_ms / 1000
        )
        
        if not adapter_result.success:
            raise RuntimeError(
                f"Adapter execution failed: {adapter_result.error}"
            )
        
        return adapter_result.outputs
    
    def _bind_outputs(self, step: TestStep, outputs: Dict[str, Any]) -> None:
        """
        Bind step outputs to variables.
        
        Maps output port names to variable references.
        """
        for port_name, var_reference in step.outputs.items():
            if port_name in outputs:
                value = outputs[port_name]
                success = self._context.set_variable(var_reference, value)
                if success:
                    logger.debug("Bound output to variable",
                               port=port_name,
                               variable=var_reference,
                               value=value)
                else:
                    logger.warning("Failed to bind output",
                                 port=port_name,
                                 variable=var_reference)
    
    def _validate_result(self, measured_value: Any, step: TestStep) -> bool:
        """Validate measured value against limits."""
        limits = step.limits
        
        # Get limits from property set if referenced
        if step.property_ref and self._property_set:
            prop = self._property_set.get_limits(step.property_ref)
            if prop:
                limits = StepLimit(
                    lower=prop.lower,
                    upper=prop.upper,
                    expected=prop.nominal,
                    unit=prop.unit
                )
        
        if not limits:
            return True
        
        if measured_value is None:
            return False
        
        try:
            value = float(measured_value)
        except (TypeError, ValueError):
            # Handle string comparison
            expected_str = limits.expected_string or limits.expected
            if expected_str is not None:
                if limits.case_sensitive:
                    return str(measured_value) == str(expected_str)
                else:
                    return str(measured_value).lower() == str(expected_str).lower()
            return False
        
        op = limits.comparison
        
        if op == ComparisonOperator.BETWEEN:
            if limits.lower is not None and value < limits.lower:
                return False
            if limits.upper is not None and value > limits.upper:
                return False
            return True
        elif op == ComparisonOperator.EQ:
            return value == limits.expected
        elif op == ComparisonOperator.NE:
            return value != limits.expected
        elif op == ComparisonOperator.GT:
            return value > limits.expected
        elif op == ComparisonOperator.GE:
            return value >= limits.expected
        elif op == ComparisonOperator.LT:
            return value < limits.expected
        elif op == ComparisonOperator.LE:
            return value <= limits.expected
        
        return True
    
    def _evaluate_condition(self, condition) -> bool:
        """Evaluate a step condition."""
        # Try new expression system first
        var_ref = f"Locals.{condition.variable}"
        value = self._context.get_variable(var_ref)
        
        # Fallback to legacy
        if value is None:
            value = self._context.variable_store.get_scope(VariableScope.LOCALS).get(condition.variable)
        
        if condition.operator == ComparisonOperator.EQ:
            return value == condition.value
        elif condition.operator == ComparisonOperator.NE:
            return value != condition.value
        elif condition.operator == ComparisonOperator.GT:
            return value > condition.value
        elif condition.operator == ComparisonOperator.GE:
            return value >= condition.value
        elif condition.operator == ComparisonOperator.LT:
            return value < condition.value
        elif condition.operator == ComparisonOperator.LE:
            return value <= condition.value
        
        return True
    
    def _evaluate_expression(self, expression: str) -> Any:
        """
        Evaluate an expression using the expression resolver.
        
        Supports:
        - Variable references: Locals.voltage, RunState.LoopIndex
        - Math: Locals.value * 2 + 0.5
        - Comparisons: Locals.voltage > 12.0
        - Functions: Len(Locals.readings), Avg(Locals.readings)
        """
        return self._context.resolve_expression(expression, default=False)
    
    async def _execute_loop(self, steps: List[TestStep], loop, dry_run: bool) -> None:
        """Execute steps in a loop."""
        if loop.count:
            loop_total = loop.count
            self._context.current_loop_total = loop_total
            
            for i in range(loop.count):
                self._context.current_loop_iteration = i
                # Update RunState.LoopIndex
                self._context.update_runstate(LoopIndex=i)
                # Also set legacy variable for backwards compatibility
                self._context.variable_store.set("Locals.loop_index", i)
                
                await self._execute_steps(steps, dry_run)
                if self._abort_requested:
                    break
            
            # Reset loop state
            self._context.current_loop_iteration = 0
            self._context.current_loop_total = 0
        elif loop.variable:
            loop_start = loop.start or 0
            loop_end = loop.end or 0
            loop_step = loop.step or 1
            loop_total = max(0, (loop_end - loop_start) // loop_step)
            self._context.current_loop_total = loop_total
            
            iteration = 0
            for i in range(loop_start, loop_end, loop_step):
                self._context.current_loop_iteration = iteration
                # Update RunState.LoopIndex
                self._context.update_runstate(LoopIndex=i)
                # Set loop variable
                self._context.variable_store.set(f"Locals.{loop.variable}", i)
                
                await self._execute_steps(steps, dry_run)
                if self._abort_requested:
                    break
                iteration += 1
            
            # Reset loop state
            self._context.current_loop_iteration = 0
            self._context.current_loop_total = 0
    
    def _find_applicable_error_codes(self, step: TestStep, measured_value: Any) -> List[Dict[str, Any]]:
        """Find error codes that apply to this failure based on measured value.
        
        If error codes have range_min/range_max, only those where measured_value
        falls within the range are returned. Otherwise all error codes are returned.
        """
        if not step.error_codes:
            return []
        
        applicable = []
        for ec in step.error_codes:
            ec_dict = ec.model_dump() if hasattr(ec, 'model_dump') else ec.dict()
            
            # Check if this error code has a range restriction
            if ec.range_min is not None or ec.range_max is not None:
                try:
                    val = float(measured_value) if measured_value is not None else None
                    if val is not None:
                        in_range = True
                        if ec.range_min is not None and val < ec.range_min:
                            in_range = False
                        if ec.range_max is not None and val > ec.range_max:
                            in_range = False
                        if in_range:
                            applicable.append(ec_dict)
                except (ValueError, TypeError):
                    # Can't compare non-numeric value, include this error code anyway
                    applicable.append(ec_dict)
            else:
                # No range restriction, always include
                applicable.append(ec_dict)
        
        return applicable
    
    async def _wait_for_operator_decision(self, step: TestStep, result: StepExecutionResult) -> str:
        """Wait for operator to decide on step failure (retry/continue/abort).
        
        The failure popup is shown by frontend when it receives step_complete with 
        failed status and available_actions. This method just waits for the response.
        
        Returns:
            'retry', 'continue', or 'abort' based on operator decision
        """
        # No popup sent here - frontend shows failure popup automatically
        # when it receives step_complete with failed status
        
        # Wait for operator response
        from src.core.service_registry import ServiceRegistry
        max_wait_seconds = OPERATOR_RESPONSE_SEC
        poll_interval = OPERATOR_POLL_SEC
        waited = 0
        
        retry_key = f"{step.id}_retry"
        
        while waited < max_wait_seconds:
            pending = ServiceRegistry.get("pending_operator_inputs") or {}
            if retry_key in pending:
                response = pending.pop(retry_key)
                ServiceRegistry.set("pending_operator_inputs", pending)
                logger.info("Operator decision", step_id=step.id, response=response)
                return response.lower()  # 'retry', 'continue', or 'abort'
            
            # Also check regular step ID for backwards compatibility
            if step.id in pending:
                response = pending.pop(step.id)
                ServiceRegistry.set("pending_operator_inputs", pending)
                logger.info("Operator decision", step_id=step.id, response=response)
                return response.lower()  # 'retry', 'continue', or 'abort'
            
            await asyncio.sleep(poll_interval)
            waited += poll_interval
            
            if self._abort_requested:
                return "abort"
        
        logger.warning("Operator decision timeout", step_id=step.id)
        return "abort"  # Default to abort on timeout
    
    async def _request_operator_input(self, step: TestStep) -> None:
        """Request input from operator."""
        if self._on_operator_input_required:
            await self._on_operator_input_required(step)
    
    def _get_status(self) -> Dict[str, Any]:
        """Get current execution status."""
        # Calculate duration
        duration_ms = None
        if self._context and self._context.started_at:
            end_time = self._context.completed_at or datetime.utcnow()
            duration_ms = (end_time - self._context.started_at).total_seconds() * 1000
        
        return {
            "session_id": self._context.session_id if self._context else None,
            "state": self._state.value,
            "current_step_id": self._context.current_step.id if self._context and self._context.current_step else None,
            "current_step_name": self._context.current_step.name if self._context and self._context.current_step else None,
            "current_step_index": self._context.current_step_index if self._context else 0,
            "total_steps": self._context.total_steps if self._context else 0,
            "progress_percent": self.progress,
            "passed_steps": self._context.passed_steps if self._context else 0,
            "failed_steps": self._context.failed_steps if self._context else 0,
            "dut_id": self._context.dut_id if self._context else None,
            "operator": self._context.operator if self._context else None,
            "started_at": (self._context.started_at.isoformat() + "Z") if self._context and self._context.started_at else None,
            "completed_at": (self._context.completed_at.isoformat() + "Z") if self._context and self._context.completed_at else None,
            "duration_ms": duration_ms
        }
    
    # ============================================
    # CONTROL METHODS
    # ============================================
    
    async def pause(self) -> None:
        """Pause execution."""
        if self._state == ExecutionState.RUNNING:
            self._pause_requested = True
            self._paused_event.clear()
            self._state = ExecutionState.PAUSED
            logger.info("Execution paused")
    
    async def resume(self) -> None:
        """Resume execution."""
        if self._state == ExecutionState.PAUSED:
            self._pause_requested = False
            self._paused_event.set()
            self._state = ExecutionState.RUNNING
            logger.info("Execution resumed")
    
    async def abort(self) -> None:
        """Abort execution."""
        self._abort_requested = True
        if self._state == ExecutionState.PAUSED:
            self._paused_event.set()  # Unblock if paused
        self._state = ExecutionState.ABORTING
        logger.info("Execution abort requested")
    
    def is_running(self) -> bool:
        """Check if execution is running."""
        return self._state == ExecutionState.RUNNING
    
    def is_paused(self) -> bool:
        """Check if execution is paused."""
        return self._state == ExecutionState.PAUSED
