import pyautogui
import time
import cv2
import numpy as np
from element_detector import find_text_on_screen

def switch_to_chrome():
    # List all windows
    windows = pyautogui.getAllWindows()
    
    # Find Chrome window
    chrome_window = next((window for window in windows if "Google Chrome" in window.title), None)
    
    if chrome_window:
        chrome_window.activate()
        time.sleep(1)  # Wait for the window to come into focus
        print("Switched to Chrome window")
        return True
    else:
        print("Chrome window not found")
        return False

def perform_action(actions, elements, screenshot_path):
    print(f"Received actions: {actions}")  # Debug print
    action_results = []

    # Switch to Chrome window before performing actions
    if not switch_to_chrome():
        action_results.append("Failed to switch to Chrome window")
        return action_results

    for action_data in actions:
        action = action_data['action']
        target = action_data['target']
        value = action_data['value']

        print(f"Attempting action: {action} on target: {target} with value: {value}")  # Debug print

        try:
            # Add a small delay before each action
            time.sleep(0.5)

            if action == "click":
                if target is not None:
                    element = next((e for e in elements if e['id'] == int(target)), None)
                    if element:
                        x, y, w, h = element['bbox']
                        click_x, click_y = x + w // 2, y + h // 2
                        print(f"Moving to coordinates: ({click_x}, {click_y})")  # Debug print
                        pyautogui.moveTo(click_x, click_y, duration=0.5)
                        pyautogui.click()
                        print(f"Clicked on element {target}")
                        action_results.append(f"Clicked on element {target}")
                    else:
                        print(f"Element {target} not found")
                        action_results.append(f"Failed to click: Element {target} not found")
                else:
                    # If no target is specified, click at the current mouse position
                    current_pos = pyautogui.position()
                    pyautogui.click()
                    print(f"Clicked at current mouse position: {current_pos}")
                    action_results.append(f"Clicked at position {current_pos}")
            elif action == "type":
                if target is not None:
                    element = next((e for e in elements if e['id'] == int(target)), None)
                    if element:
                        x, y, w, h = element['bbox']
                        click_x, click_y = x + w // 2, y + h // 2
                        print(f"Moving to coordinates: ({click_x}, {click_y})")  # Debug print
                        pyautogui.moveTo(click_x, click_y, duration=0.5)
                        pyautogui.click()
                pyautogui.write(value, interval=0.1)  # Use write instead of typewrite
                print(f"Typed '{value}'")
                action_results.append(f"Typed '{value}'")
            elif action == "hotkey":
                if value:
                    keys = value.split('+')
                    print(f"Pressing hotkey: {keys}")  # Debug print
                    pyautogui.hotkey(*keys)
                    print(f"Pressed hotkey {value}")
                    action_results.append(f"Pressed hotkey {value}")
                else:
                    print("Error: No hotkey specified")
                    action_results.append("Failed to press hotkey: No hotkey specified")
            elif action == "approximate_click":
                if target:
                    # First, try to find the text on the screen using OCR
                    click_coords = find_text_on_screen(screenshot_path, target)
                    if click_coords:
                        pyautogui.moveTo(click_coords[0], click_coords[1], duration=0.5)
                        pyautogui.click()
                        print(f"Clicked on text '{target}' at position {click_coords}")
                        action_results.append(f"Clicked on text '{target}' at position {click_coords}")
                    else:
                        # If text not found, try to find a matching element
                        matching_element = next((e for e in elements if target.lower() in str(e).lower()), None)
                        if matching_element:
                            x, y, w, h = matching_element['bbox']
                            click_x, click_y = x + w // 2, y + h // 2
                            pyautogui.moveTo(click_x, click_y, duration=0.5)
                            pyautogui.click()
                            print(f"Clicked on element containing '{target}' at position ({click_x}, {click_y})")
                            action_results.append(f"Clicked on element containing '{target}' at position ({click_x}, {click_y})")
                        else:
                            print(f"Text or element '{target}' not found on screen")
                            action_results.append(f"Failed to click: Text or element '{target}' not found on screen")
                else:
                    print("Error: No text specified for approximate click")
                    action_results.append("Failed to perform approximate click: No text specified")
            else:
                print(f"Unknown action: {action}")
                action_results.append(f"Unknown action: {action}")
            
            # Add a delay after each action to allow for system response
            time.sleep(2)
            
        except Exception as e:
            print(f"Error performing action: {e}")
            action_results.append(f"Error performing action: {e}")

        print(f"Action completed: {action}")  # Debug print

    print("All actions completed")
    return action_results

def find_text_on_screen(screenshot_path, text):
    try:
        import pytesseract
        # Read the image
        img = cv2.imread(screenshot_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Perform text detection
        data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT)

        for i, word in enumerate(data['text']):
            if text.lower() in word.lower():
                x = data['left'][i]
                y = data['top'][i]
                w = data['width'][i]
                h = data['height'][i]
                return (x + w//2, y + h//2)  # Return center of the word

        return None
    except ImportError:
        raise ImportError("Tesseract OCR is not available")