feat: Add installation scripts for Windows and Unix-based systems

- Created `install_and_run.bat` for Windows installation and setup. - Created `install_and_run.sh` for Unix-based systems installation and setup. - Removed `main.py` as it is no longer needed. - Updated `requirements.txt` to specify package versions and added PyQt5. - Deleted `start.bat` as it is redundant. - Added unit tests for core functionality and scraping modes. - Implemented input validation utilities in `utils/validators.py`. - Added support for dual scraping modes in the scraper.
2026-01-10 14:45:00 +01:00
parent 5f2fca226b
commit ea4cab15c3
19 changed files with 3731 additions and 335 deletions
--- a/core/init.py
+++ b/core/init.py
@@ -0,0 +1 @@
+# Core scraping functionality
--- a/core/credentials.py
+++ b/core/credentials.py
@@ -0,0 +1,309 @@
+"""
+Simple JSON-based credential storage system for EBoek.info scraper.
+"""
+
+import json
+import os
+from pathlib import Path
+import stat
+
+
+class CredentialManager:
+    """
+    Manages storage and retrieval of user credentials in a JSON config file.
+
+    Credentials are stored in the user's home directory in a hidden folder
+    with appropriate file permissions for basic security.
+    """
+
+    def __init__(self, app_name="eboek_scraper"):
+        """
+        Initialize the credential manager.
+
+        Args:
+            app_name (str): Application name for config directory
+        """
+        self.app_name = app_name
+        self.config_dir = Path.home() / f".{app_name}"
+        self.config_file = self.config_dir / "config.json"
+        self._ensure_config_dir()
+
+    def _ensure_config_dir(self):
+        """
+        Ensure the configuration directory exists with appropriate permissions.
+        """
+        try:
+            if not self.config_dir.exists():
+                self.config_dir.mkdir(mode=0o700, exist_ok=True)  # Only user can read/write/execute
+
+            # Ensure directory has correct permissions (user only)
+            if os.name != 'nt':  # Unix-like systems (macOS, Linux)
+                os.chmod(self.config_dir, stat.S_IRWXU)  # 700 permissions
+
+        except Exception as e:
+            # If we can't create the config directory, fall back to current directory
+            self.config_dir = Path(".")
+            self.config_file = self.config_dir / f".{self.app_name}_config.json"
+
+    def _load_config(self):
+        """
+        Load the configuration file.
+
+        Returns:
+            dict: Configuration data, empty dict if file doesn't exist
+        """
+        try:
+            if self.config_file.exists():
+                with open(self.config_file, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+        except (json.JSONDecodeError, IOError, PermissionError) as e:
+            # If there's any error reading the config, return empty dict
+            pass
+
+        return {}
+
+    def _save_config(self, config_data):
+        """
+        Save configuration data to file.
+
+        Args:
+            config_data (dict): Configuration data to save
+
+        Returns:
+            bool: True if saved successfully, False otherwise
+        """
+        try:
+            with open(self.config_file, 'w', encoding='utf-8') as f:
+                json.dump(config_data, f, indent=2, ensure_ascii=False)
+
+            # Set file permissions to be readable/writable by user only
+            if os.name != 'nt':  # Unix-like systems
+                os.chmod(self.config_file, stat.S_IRUSR | stat.S_IWUSR)  # 600 permissions
+
+            return True
+
+        except (IOError, PermissionError) as e:
+            return False
+
+    def save_credentials(self, username, password, remember=True):
+        """
+        Save user credentials to the config file.
+
+        Args:
+            username (str): EBoek.info username
+            password (str): EBoek.info password
+            remember (bool): Whether to save credentials for future use
+
+        Returns:
+            bool: True if saved successfully, False otherwise
+        """
+        if not remember:
+            # If remember is False, just clear any existing credentials
+            return self.clear_credentials()
+
+        try:
+            config = self._load_config()
+
+            config['credentials'] = {
+                'username': username,
+                'password': password,
+                'saved_at': str(Path.home()),  # Just to know which user saved it
+            }
+
+            return self._save_config(config)
+
+        except Exception as e:
+            return False
+
+    def load_credentials(self):
+        """
+        Load stored credentials.
+
+        Returns:
+            dict or None: Dictionary with 'username' and 'password' keys if found,
+                         None if no credentials are stored
+        """
+        try:
+            config = self._load_config()
+            credentials = config.get('credentials')
+
+            if credentials and 'username' in credentials and 'password' in credentials:
+                return {
+                    'username': credentials['username'],
+                    'password': credentials['password']
+                }
+
+        except Exception as e:
+            pass
+
+        return None
+
+    def has_saved_credentials(self):
+        """
+        Check if there are saved credentials available.
+
+        Returns:
+            bool: True if credentials are available, False otherwise
+        """
+        return self.load_credentials() is not None
+
+    def get_saved_username(self):
+        """
+        Get the saved username without the password.
+
+        Returns:
+            str or None: Saved username if available, None otherwise
+        """
+        credentials = self.load_credentials()
+        return credentials['username'] if credentials else None
+
+    def clear_credentials(self):
+        """
+        Remove stored credentials from the config file.
+
+        Returns:
+            bool: True if cleared successfully, False otherwise
+        """
+        try:
+            config = self._load_config()
+
+            if 'credentials' in config:
+                del config['credentials']
+                return self._save_config(config)
+
+            return True  # No credentials to clear is success
+
+        except Exception as e:
+            return False
+
+    def validate_credentials(self, username, password):
+        """
+        Basic validation of credential format.
+
+        Args:
+            username (str): Username to validate
+            password (str): Password to validate
+
+        Returns:
+            dict: Validation result with 'valid' bool and 'errors' list
+        """
+        errors = []
+
+        if not username or not username.strip():
+            errors.append("Username cannot be empty")
+        elif len(username.strip()) < 2:
+            errors.append("Username must be at least 2 characters")
+
+        if not password or not password.strip():
+            errors.append("Password cannot be empty")
+        elif len(password) < 3:
+            errors.append("Password must be at least 3 characters")
+
+        return {
+            'valid': len(errors) == 0,
+            'errors': errors
+        }
+
+    def get_config_file_path(self):
+        """
+        Get the path to the configuration file.
+
+        Returns:
+            Path: Path to the config file
+        """
+        return self.config_file
+
+    def save_app_settings(self, settings):
+        """
+        Save application settings (non-credential settings).
+
+        Args:
+            settings (dict): Application settings to save
+
+        Returns:
+            bool: True if saved successfully, False otherwise
+        """
+        try:
+            config = self._load_config()
+            config['app_settings'] = settings
+            return self._save_config(config)
+        except Exception as e:
+            return False
+
+    def load_app_settings(self):
+        """
+        Load application settings (non-credential settings).
+
+        Returns:
+            dict: Application settings, empty dict if none saved
+        """
+        try:
+            config = self._load_config()
+            return config.get('app_settings', {})
+        except Exception as e:
+            return {}
+
+    def get_default_settings(self):
+        """
+        Get default application settings.
+
+        Returns:
+            dict: Default settings
+        """
+        return {
+            'headless_mode': True,
+            'verbose_logging': False,
+            'auto_save_credentials': True,
+            'download_path': str(Path.home() / "Downloads"),
+            'default_start_page': 1,
+            'default_end_page': 1,
+            'scraping_mode': 0  # 0=All Comics, 1=Latest Comics
+        }
+
+    def export_settings(self, export_path):
+        """
+        Export settings (excluding credentials) to a file.
+
+        Args:
+            export_path (str or Path): Path to export settings to
+
+        Returns:
+            bool: True if exported successfully, False otherwise
+        """
+        try:
+            config = self._load_config()
+            # Remove credentials from export
+            export_config = {k: v for k, v in config.items() if k != 'credentials'}
+
+            with open(export_path, 'w', encoding='utf-8') as f:
+                json.dump(export_config, f, indent=2, ensure_ascii=False)
+
+            return True
+        except Exception as e:
+            return False
+
+    def import_settings(self, import_path):
+        """
+        Import settings (excluding credentials) from a file.
+
+        Args:
+            import_path (str or Path): Path to import settings from
+
+        Returns:
+            bool: True if imported successfully, False otherwise
+        """
+        try:
+            with open(import_path, 'r', encoding='utf-8') as f:
+                imported_config = json.load(f)
+
+            # Don't import credentials for security
+            if 'credentials' in imported_config:
+                del imported_config['credentials']
+
+            # Merge with existing config
+            config = self._load_config()
+            config.update(imported_config)
+
+            return self._save_config(config)
+        except Exception as e:
+            return False
--- a/core/scraper.py
+++ b/core/scraper.py
@@ -0,0 +1,513 @@
+"""
+Core scraper functionality extracted from main.py with callback support for GUI integration.
+"""
+
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.options import Options
+import time
+import random
+import os
+import sys
+from pathlib import Path
+
+# Disable SSL verification warnings and errors
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+class Scraper:
+    """
+    EBoek.info web scraper with GUI callback support.
+
+    This class handles the core scraping functionality while providing
+    callback mechanisms for progress updates to a GUI application.
+    """
+
+    def __init__(self, headless=False, progress_callback=None, scraping_mode=0):
+        """
+        Initialize the scraper with optional GUI callback support.
+
+        Args:
+            headless (bool): Whether to run Chrome in headless mode
+            progress_callback (callable): Optional callback function for progress updates
+                Callback signature: callback(event_type: str, data: dict)
+            scraping_mode (int): Scraping mode (0=All Comics, 1=Latest Comics)
+        """
+        self.progress_callback = progress_callback
+        self._stop_requested = False
+        self.scraping_mode = scraping_mode
+
+        # Set up Chrome options with anti-detection measures
+        chrome_options = Options()
+        if headless:
+            chrome_options.add_argument('--headless')
+
+        # Fix SSL and certificate issues
+        chrome_options.add_argument('--ignore-ssl-errors')
+        chrome_options.add_argument('--ignore-certificate-errors')
+        chrome_options.add_argument('--disable-web-security')
+        chrome_options.add_argument('--allow-running-insecure-content')
+        chrome_options.add_argument('--disable-extensions')
+
+        # Fix DevTools connection issues
+        chrome_options.add_argument('--remote-debugging-port=0')
+        chrome_options.add_argument('--disable-dev-shm-usage')
+        chrome_options.add_argument('--no-sandbox')
+
+        # Make it look more human
+        chrome_options.add_argument('--disable-blink-features=AutomationControlled')
+        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
+        chrome_options.add_experimental_option('useAutomationExtension', False)
+        chrome_options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
+
+        # Suppress logging
+        chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
+        chrome_options.add_experimental_option('useAutomationExtension', False)
+        chrome_options.add_argument('--disable-logging')
+        chrome_options.add_argument('--log-level=3')
+
+        # Set cross-platform download directory
+        downloads_path = str(Path.home() / "Downloads")
+        prefs = {
+            "download.default_directory": downloads_path,
+            "download.prompt_for_download": False,
+            "download.directory_upgrade": True,
+            "safebrowsing.enabled": True
+        }
+        chrome_options.add_experimental_option("prefs", prefs)
+
+        self.driver = webdriver.Chrome(options=chrome_options)
+        self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
+
+        self._emit_progress("scraper_initialized", {"headless": headless, "downloads_path": downloads_path})
+
+    def _emit_progress(self, event_type, data):
+        """
+        Internal method to emit progress updates via callback.
+
+        Args:
+            event_type (str): Type of event (e.g., 'page_started', 'comic_completed')
+            data (dict): Event data
+        """
+        if self.progress_callback:
+            try:
+                self.progress_callback(event_type, data)
+            except Exception as e:
+                # Don't let callback errors crash the scraper
+                pass
+
+    def request_stop(self):
+        """Request the scraper to stop gracefully at the next opportunity."""
+        self._stop_requested = True
+        self._emit_progress("stop_requested", {})
+
+    def human_delay(self, min_sec=0.5, max_sec=2):
+        """
+        Simulate human-like delay with cancellation support.
+
+        Args:
+            min_sec (float): Minimum delay time
+            max_sec (float): Maximum delay time
+        """
+        if self._stop_requested:
+            return
+        delay_time = random.uniform(min_sec, max_sec)
+        self._emit_progress("delay_started", {"duration": delay_time})
+        time.sleep(delay_time)
+
+    def human_type(self, element, text):
+        """
+        Type text character by character with human-like delays.
+
+        Args:
+            element: Selenium web element to type into
+            text (str): Text to type
+        """
+        for char in text:
+            if self._stop_requested:
+                return
+            element.send_keys(char)
+            time.sleep(random.uniform(0.05, 0.15))
+
+    def navigate(self, url):
+        """
+        Navigate to a URL with human-like delay.
+
+        Args:
+            url (str): URL to navigate to
+        """
+        if self._stop_requested:
+            return False
+
+        self._emit_progress("navigation_started", {"url": url})
+        self.driver.get(url)
+        self.human_delay(1, 3)
+        self._emit_progress("navigation_completed", {"url": url})
+        return True
+
+    def login(self, username, password):
+        """
+        Login to EBoek.info with provided credentials.
+
+        Args:
+            username (str): Username for login
+            password (str): Password for login
+
+        Returns:
+            bool: True if login successful, False otherwise
+        """
+        if self._stop_requested:
+            return False
+
+        self._emit_progress("login_started", {"username": username})
+
+        try:
+            self.driver.get("https://eboek.info/komerin")
+            self.human_delay(2, 4)
+
+            if self._stop_requested:
+                return False
+
+            # Find and fill username field
+            username_field = self.driver.find_element(By.CSS_SELECTOR, "input[type='text']")
+            self.human_type(username_field, username)
+
+            self.human_delay(0.5, 1)
+
+            if self._stop_requested:
+                return False
+
+            # Find and fill password field
+            password_field = self.driver.find_element(By.CSS_SELECTOR, "input[type='password']")
+            self.human_type(password_field, password)
+
+            self.human_delay(0.5, 1.5)
+
+            if self._stop_requested:
+                return False
+
+            # Submit the form
+            submit_button = self.driver.find_element(By.CSS_SELECTOR, "input[type='submit']")
+            submit_button.click()
+
+            self.human_delay(2, 4)
+
+            # Check if login was successful (basic check)
+            # You could enhance this by checking for specific elements that appear after login
+            current_url = self.driver.current_url
+            login_successful = "komerin" not in current_url
+
+            if login_successful:
+                self._emit_progress("login_success", {"username": username})
+            else:
+                self._emit_progress("login_failed", {"username": username, "error": "Login appears to have failed"})
+
+            return login_successful
+
+        except Exception as e:
+            self._emit_progress("login_failed", {"username": username, "error": str(e)})
+            return False
+
+    def trigger_download(self, url):
+        """
+        Open URL in new tab to trigger browser download.
+
+        Args:
+            url (str): URL of file to download
+
+        Returns:
+            bool: True if download triggered successfully
+        """
+        if self._stop_requested:
+            return False
+
+        try:
+            # Store current window handle
+            current_window = self.driver.current_window_handle
+
+            # Use JavaScript to open URL in new tab with same session
+            self.driver.execute_script(f"window.open('{url}', '_blank');")
+
+            # Wait for download to complete and tab to auto-close
+            self.human_delay(3, 5)
+
+            # Switch back to original window
+            self.driver.switch_to.window(current_window)
+
+            self._emit_progress("download_triggered", {"url": url})
+            return True
+
+        except Exception as e:
+            self._emit_progress("download_failed", {"url": url, "error": str(e)})
+            return False
+
+    def scrape(self, start_page=1, end_page=1):
+        """
+        Scrape comics from specified page range.
+
+        Args:
+            start_page (int): Starting page number
+            end_page (int): Ending page number
+
+        Returns:
+            dict: Summary of scraping results
+        """
+        if self._stop_requested:
+            return {"success": False, "reason": "Cancelled before starting"}
+
+        # Determine base URL and URL pattern based on scraping mode
+        if self.scraping_mode == 1:  # Latest Comics
+            base_url = "https://eboek.info/laatste"
+            mode_name = "Latest Comics"
+        else:  # All Comics (default)
+            base_url = "https://eboek.info/stripverhalen-alle"
+            mode_name = "All Comics"
+
+        total_pages = end_page - start_page + 1
+        total_comics_processed = 0
+        total_downloads_triggered = 0
+        errors = []
+
+        self._emit_progress("scraping_started", {
+            "start_page": start_page,
+            "end_page": end_page,
+            "total_pages": total_pages,
+            "mode": mode_name
+        })
+
+        for page_num in range(start_page, end_page + 1):
+            if self._stop_requested:
+                break
+
+            # Construct page URL based on scraping mode
+            if self.scraping_mode == 1:  # Latest Comics
+                page_url = f"{base_url}?_page={page_num}&ref=dw"
+            else:  # All Comics
+                if page_num == 1:
+                    page_url = base_url
+                else:
+                    page_url = f"{base_url}/page/{page_num}/"
+
+            current_page_index = page_num - start_page + 1
+            self._emit_progress("page_started", {
+                "page_number": page_num,
+                "page_index": current_page_index,
+                "total_pages": total_pages,
+                "url": page_url
+            })
+
+            # Navigate to the page
+            if not self.navigate(page_url):
+                continue
+
+            # Scroll down a bit like a human would to see content
+            self.driver.execute_script("window.scrollTo(0, 300)")
+            self.human_delay(1, 2)
+
+            if self._stop_requested:
+                break
+
+            try:
+                # Find all comic strip links using mode-specific CSS selectors
+                if self.scraping_mode == 1:  # Latest Comics page
+                    # For "laatste" page - target only title links to avoid duplicates
+                    comic_links = self.driver.find_elements(By.CSS_SELECTOR, '.pt-cv-wrapper .pt-cv-ifield h5.pt-cv-title a')
+                else:  # All Comics page (default)
+                    # For "stripverhalen-alle" page - original selector
+                    comic_links = self.driver.find_elements(By.CSS_SELECTOR, 'h2.post-title a')
+
+                comic_count = len(comic_links)
+
+                self._emit_progress("page_comics_found", {
+                    "page_number": page_num,
+                    "comic_count": comic_count
+                })
+
+                # Store URLs first to avoid stale element issues
+                comic_urls = [link.get_attribute('href') for link in comic_links]
+
+                # Take a break between pages (more likely and longer)
+                if page_num > start_page:
+                    if random.random() < 0.7:  # 70% chance of break
+                        break_time = random.uniform(15, 45)  # 15-45 seconds
+                        self._emit_progress("page_break_started", {
+                            "duration": break_time,
+                            "page_number": page_num
+                        })
+                        time.sleep(break_time)
+                    else:
+                        # Even if no long break, always pause a bit
+                        short_break = random.uniform(5, 10)
+                        self._emit_progress("short_break", {
+                            "duration": short_break,
+                            "page_number": page_num
+                        })
+                        time.sleep(short_break)
+
+                # Process all comics on this page
+                for i, url in enumerate(comic_urls, 1):
+                    if self._stop_requested:
+                        break
+
+                    self._emit_progress("comic_started", {
+                        "page_number": page_num,
+                        "comic_index": i,
+                        "total_comics": comic_count,
+                        "url": url
+                    })
+
+                    # Random chance to scroll on main page before clicking
+                    if random.random() < 0.4:
+                        scroll_amount = random.randint(100, 500)
+                        self.driver.execute_script(f"window.scrollBy(0, {scroll_amount})")
+                        self.human_delay(0.5, 1.5)
+
+                    # Open in new tab to keep main page
+                    self.driver.execute_script("window.open('');")
+                    self.driver.switch_to.window(self.driver.window_handles[-1])
+
+                    try:
+                        self.driver.get(url)
+                        self.human_delay(2, 4)
+
+                        if self._stop_requested:
+                            break
+
+                        # Sometimes scroll down to see the content
+                        if random.random() < 0.6:
+                            self.driver.execute_script("window.scrollTo(0, 400)")
+                            self.human_delay(0.5, 1.5)
+
+                        # Extract title
+                        try:
+                            title = self.driver.find_element(By.CSS_SELECTOR, 'h1.entry-title').text
+                        except:
+                            title = f"Comic {i} on page {page_num}"
+
+                        self._emit_progress("comic_title_extracted", {
+                            "title": title,
+                            "url": url
+                        })
+
+                        # Small delay before clicking download
+                        self.human_delay(0.8, 2)
+
+                        if self._stop_requested:
+                            break
+
+                        # Execute the downloadLinks() JavaScript function
+                        self.driver.execute_script("downloadLinks()")
+                        self.human_delay(1.5, 3)
+
+                        # Find all download links in the table
+                        download_links = self.driver.find_elements(By.CSS_SELECTOR, 'table a')
+                        download_count = len(download_links)
+
+                        self._emit_progress("download_links_found", {
+                            "title": title,
+                            "download_count": download_count
+                        })
+
+                        # Trigger download for each file
+                        for j, link in enumerate(download_links):
+                            if self._stop_requested:
+                                break
+
+                            file_url = link.get_attribute('href')
+                            file_name = link.text.strip()
+
+                            self._emit_progress("download_started", {
+                                "file_name": file_name,
+                                "url": file_url,
+                                "index": j + 1,
+                                "total": download_count
+                            })
+
+                            if self.trigger_download(file_url):
+                                total_downloads_triggered += 1
+
+                            # Human-like delay between downloads
+                            if j < len(download_links) - 1:
+                                delay_time = random.uniform(2, 5)
+                                self._emit_progress("download_delay", {
+                                    "duration": delay_time,
+                                    "remaining": len(download_links) - j - 1
+                                })
+                                time.sleep(delay_time)
+
+                        total_comics_processed += 1
+
+                        self._emit_progress("comic_completed", {
+                            "title": title,
+                            "downloads_triggered": download_count,
+                            "page_number": page_num,
+                            "comic_index": i
+                        })
+
+                        # Take a longer break every 5 comics
+                        if i % 5 == 0 and i < len(comic_urls):
+                            break_time = random.uniform(3, 7)
+                            self._emit_progress("comic_batch_break", {
+                                "duration": break_time,
+                                "comics_processed": i
+                            })
+                            time.sleep(break_time)
+
+                    except Exception as e:
+                        error_msg = f"Error processing {url}: {e}"
+                        errors.append(error_msg)
+                        self._emit_progress("comic_error", {
+                            "url": url,
+                            "error": str(e)
+                        })
+                        # Human would pause after an error
+                        self.human_delay(2, 4)
+
+                    # Close tab and switch back
+                    try:
+                        self.driver.close()
+                        self.driver.switch_to.window(self.driver.window_handles[0])
+                    except:
+                        # Handle case where tab might have closed itself
+                        if len(self.driver.window_handles) > 0:
+                            self.driver.switch_to.window(self.driver.window_handles[0])
+
+                    # Vary the delay between comics
+                    self.human_delay(1, 3)
+
+                self._emit_progress("page_completed", {
+                    "page_number": page_num,
+                    "comics_processed": len(comic_urls)
+                })
+
+            except Exception as e:
+                error_msg = f"Error processing page {page_num}: {e}"
+                errors.append(error_msg)
+                self._emit_progress("page_error", {
+                    "page_number": page_num,
+                    "error": str(e)
+                })
+
+        # Generate summary
+        summary = {
+            "success": not self._stop_requested,
+            "total_pages_processed": min(page_num - start_page + 1, total_pages) if 'page_num' in locals() else 0,
+            "total_comics_processed": total_comics_processed,
+            "total_downloads_triggered": total_downloads_triggered,
+            "errors": errors,
+            "cancelled": self._stop_requested
+        }
+
+        self._emit_progress("scraping_completed", summary)
+
+        return summary
+
+    def close(self):
+        """Close the browser and clean up resources."""
+        try:
+            self.driver.quit()
+            self._emit_progress("scraper_closed", {})
+        except Exception as e:
+            self._emit_progress("scraper_close_error", {"error": str(e)})
--- a/core/scraper_thread.py
+++ b/core/scraper_thread.py
@@ -0,0 +1,301 @@
+"""
+QThread wrapper for the Scraper class with PyQt signals for GUI communication.
+"""
+
+from PyQt5.QtCore import QThread, pyqtSignal
+from .scraper import Scraper
+import time
+
+
+class ScraperThread(QThread):
+    """
+    Thread wrapper for the Scraper class that converts callback events to PyQt signals.
+
+    This class runs the scraper in a separate thread and emits signals that can be
+    connected to GUI components for real-time updates.
+    """
+
+    # Login-related signals
+    login_started = pyqtSignal(str)  # username
+    login_success = pyqtSignal(str)  # username
+    login_failed = pyqtSignal(str, str)  # username, error_message
+
+    # Scraping progress signals
+    scraping_started = pyqtSignal(int, int, int)  # start_page, end_page, total_pages
+    scraping_completed = pyqtSignal(dict)  # summary dictionary
+
+    # Page-level progress signals
+    page_started = pyqtSignal(int, int, int, str)  # page_number, page_index, total_pages, url
+    page_completed = pyqtSignal(int, int)  # page_number, comics_processed
+    page_comics_found = pyqtSignal(int, int)  # page_number, comic_count
+    page_error = pyqtSignal(int, str)  # page_number, error_message
+
+    # Comic-level progress signals
+    comic_started = pyqtSignal(int, int, int, str)  # page_number, comic_index, total_comics, url
+    comic_completed = pyqtSignal(str, int, int, int)  # title, downloads_triggered, page_number, comic_index
+    comic_title_extracted = pyqtSignal(str, str)  # title, url
+    comic_error = pyqtSignal(str, str)  # url, error_message
+
+    # Download-related signals
+    download_links_found = pyqtSignal(str, int)  # title, download_count
+    download_started = pyqtSignal(str, str, int, int)  # file_name, url, index, total
+    download_triggered = pyqtSignal(str)  # url
+    download_failed = pyqtSignal(str, str)  # url, error_message
+
+    # General status and control signals
+    status_update = pyqtSignal(str)  # general status message
+    error_occurred = pyqtSignal(str)  # error message
+    delay_started = pyqtSignal(float)  # duration
+    stop_requested = pyqtSignal()
+
+    # Navigation signals
+    navigation_started = pyqtSignal(str)  # url
+    navigation_completed = pyqtSignal(str)  # url
+
+    # Break and timing signals
+    page_break_started = pyqtSignal(float, int)  # duration, page_number
+    short_break = pyqtSignal(float, int)  # duration, page_number
+    comic_batch_break = pyqtSignal(float, int)  # duration, comics_processed
+    download_delay = pyqtSignal(float, int)  # duration, remaining_downloads
+
+    def __init__(self, username, password, start_page, end_page, scraping_mode=0, headless=True):
+        """
+        Initialize the scraper thread.
+
+        Args:
+            username (str): EBoek.info username
+            password (str): EBoek.info password
+            start_page (int): Starting page number
+            end_page (int): Ending page number
+            scraping_mode (int): Scraping mode (0=All Comics, 1=Latest Comics)
+            headless (bool): Whether to run Chrome in headless mode
+        """
+        super().__init__()
+        self.username = username
+        self.password = password
+        self.start_page = start_page
+        self.end_page = end_page
+        self.scraping_mode = scraping_mode
+        self.headless = headless
+        self.scraper = None
+        self._is_running = False
+
+    def run(self):
+        """
+        Main thread execution method.
+        This runs in the separate thread and should not be called directly.
+        """
+        try:
+            self._is_running = True
+
+            # Initialize scraper with progress callback
+            self.scraper = Scraper(
+                headless=self.headless,
+                progress_callback=self._handle_scraper_progress,
+                scraping_mode=self.scraping_mode
+            )
+
+            # Perform login
+            self.login_started.emit(self.username)
+            login_success = self.scraper.login(self.username, self.password)
+
+            if not login_success:
+                self.login_failed.emit(self.username, "Login failed. Please check your credentials.")
+                return
+
+            # Check if stop was requested during login
+            if self.scraper._stop_requested:
+                return
+
+            # Start scraping
+            summary = self.scraper.scrape(self.start_page, self.end_page)
+
+            # Emit completion signal
+            self.scraping_completed.emit(summary)
+
+        except Exception as e:
+            self.error_occurred.emit(f"Unexpected error: {str(e)}")
+        finally:
+            # Clean up
+            if self.scraper:
+                self.scraper.close()
+            self._is_running = False
+
+    def _handle_scraper_progress(self, event_type, data):
+        """
+        Handle progress callbacks from the Scraper and convert them to PyQt signals.
+
+        Args:
+            event_type (str): Type of event from the scraper
+            data (dict): Event data
+        """
+        try:
+            # Login events
+            if event_type == "login_started":
+                # Already handled in run() method
+                pass
+            elif event_type == "login_success":
+                self.login_success.emit(data.get("username", ""))
+            elif event_type == "login_failed":
+                self.login_failed.emit(data.get("username", ""), data.get("error", "Unknown error"))
+
+            # Scraping events
+            elif event_type == "scraping_started":
+                self.scraping_started.emit(
+                    data.get("start_page", 1),
+                    data.get("end_page", 1),
+                    data.get("total_pages", 1)
+                )
+            elif event_type == "scraping_completed":
+                self.scraping_completed.emit(data)
+
+            # Page events
+            elif event_type == "page_started":
+                self.page_started.emit(
+                    data.get("page_number", 1),
+                    data.get("page_index", 1),
+                    data.get("total_pages", 1),
+                    data.get("url", "")
+                )
+            elif event_type == "page_completed":
+                self.page_completed.emit(
+                    data.get("page_number", 1),
+                    data.get("comics_processed", 0)
+                )
+            elif event_type == "page_comics_found":
+                self.page_comics_found.emit(
+                    data.get("page_number", 1),
+                    data.get("comic_count", 0)
+                )
+            elif event_type == "page_error":
+                self.page_error.emit(
+                    data.get("page_number", 1),
+                    data.get("error", "Unknown error")
+                )
+
+            # Comic events
+            elif event_type == "comic_started":
+                self.comic_started.emit(
+                    data.get("page_number", 1),
+                    data.get("comic_index", 1),
+                    data.get("total_comics", 1),
+                    data.get("url", "")
+                )
+            elif event_type == "comic_completed":
+                self.comic_completed.emit(
+                    data.get("title", "Unknown"),
+                    data.get("downloads_triggered", 0),
+                    data.get("page_number", 1),
+                    data.get("comic_index", 1)
+                )
+            elif event_type == "comic_title_extracted":
+                self.comic_title_extracted.emit(
+                    data.get("title", "Unknown"),
+                    data.get("url", "")
+                )
+            elif event_type == "comic_error":
+                self.comic_error.emit(
+                    data.get("url", ""),
+                    data.get("error", "Unknown error")
+                )
+
+            # Download events
+            elif event_type == "download_links_found":
+                self.download_links_found.emit(
+                    data.get("title", "Unknown"),
+                    data.get("download_count", 0)
+                )
+            elif event_type == "download_started":
+                self.download_started.emit(
+                    data.get("file_name", ""),
+                    data.get("url", ""),
+                    data.get("index", 1),
+                    data.get("total", 1)
+                )
+            elif event_type == "download_triggered":
+                self.download_triggered.emit(data.get("url", ""))
+            elif event_type == "download_failed":
+                self.download_failed.emit(
+                    data.get("url", ""),
+                    data.get("error", "Unknown error")
+                )
+
+            # Navigation events
+            elif event_type == "navigation_started":
+                self.navigation_started.emit(data.get("url", ""))
+            elif event_type == "navigation_completed":
+                self.navigation_completed.emit(data.get("url", ""))
+
+            # Timing and break events
+            elif event_type == "delay_started":
+                self.delay_started.emit(data.get("duration", 0.0))
+            elif event_type == "page_break_started":
+                self.page_break_started.emit(
+                    data.get("duration", 0.0),
+                    data.get("page_number", 1)
+                )
+            elif event_type == "short_break":
+                self.short_break.emit(
+                    data.get("duration", 0.0),
+                    data.get("page_number", 1)
+                )
+            elif event_type == "comic_batch_break":
+                self.comic_batch_break.emit(
+                    data.get("duration", 0.0),
+                    data.get("comics_processed", 0)
+                )
+            elif event_type == "download_delay":
+                self.download_delay.emit(
+                    data.get("duration", 0.0),
+                    data.get("remaining", 0)
+                )
+
+            # Control events
+            elif event_type == "stop_requested":
+                self.stop_requested.emit()
+
+            # General status updates
+            elif event_type in ["scraper_initialized", "scraper_closed", "scraper_close_error"]:
+                self.status_update.emit(f"{event_type}: {data}")
+
+            # Emit a general status update for events we didn't specifically handle
+            else:
+                self.status_update.emit(f"{event_type}: {data}")
+
+        except Exception as e:
+            # Don't let signal emission errors crash the scraper
+            self.error_occurred.emit(f"Signal emission error: {str(e)}")
+
+    def request_stop(self):
+        """
+        Request the scraper to stop gracefully.
+        This can be called from the main thread (GUI).
+        """
+        if self.scraper:
+            self.scraper.request_stop()
+
+    def is_running(self):
+        """
+        Check if the scraper thread is currently running.
+
+        Returns:
+            bool: True if the thread is running
+        """
+        return self._is_running and self.isRunning()
+
+    def get_progress_summary(self):
+        """
+        Get a summary of the current progress.
+        This is thread-safe and can be called from the main thread.
+
+        Returns:
+            dict: Current progress information
+        """
+        if not self.scraper:
+            return {"status": "not_started"}
+
+        return {
+            "status": "running" if self._is_running else "stopped",
+            "stop_requested": self.scraper._stop_requested if self.scraper else False,
+            "thread_running": self.isRunning()
+        }