""" QThread wrapper for the Scraper class with PyQt signals for GUI communication. """ from PyQt5.QtCore import QThread, pyqtSignal from .scraper import Scraper import time class ScraperThread(QThread): """ Thread wrapper for the Scraper class that converts callback events to PyQt signals. This class runs the scraper in a separate thread and emits signals that can be connected to GUI components for real-time updates. """ # Login-related signals login_started = pyqtSignal(str) # username login_success = pyqtSignal(str) # username login_failed = pyqtSignal(str, str) # username, error_message # Scraping progress signals scraping_started = pyqtSignal(int, int, int) # start_page, end_page, total_pages scraping_completed = pyqtSignal(dict) # summary dictionary # Page-level progress signals page_started = pyqtSignal(int, int, int, str) # page_number, page_index, total_pages, url page_completed = pyqtSignal(int, int) # page_number, comics_processed page_comics_found = pyqtSignal(int, int) # page_number, comic_count page_error = pyqtSignal(int, str) # page_number, error_message # Comic-level progress signals comic_started = pyqtSignal(int, int, int, str) # page_number, comic_index, total_comics, url comic_completed = pyqtSignal(str, int, int, int) # title, downloads_triggered, page_number, comic_index comic_title_extracted = pyqtSignal(str, str) # title, url comic_error = pyqtSignal(str, str) # url, error_message # Download-related signals download_links_found = pyqtSignal(str, int) # title, download_count download_started = pyqtSignal(str, str, int, int) # file_name, url, index, total download_triggered = pyqtSignal(str) # url download_failed = pyqtSignal(str, str) # url, error_message # General status and control signals status_update = pyqtSignal(str) # general status message error_occurred = pyqtSignal(str) # error message delay_started = pyqtSignal(float) # duration stop_requested = pyqtSignal() # Navigation signals navigation_started = pyqtSignal(str) # url navigation_completed = pyqtSignal(str) # url # Break and timing signals page_break_started = pyqtSignal(float, int) # duration, page_number short_break = pyqtSignal(float, int) # duration, page_number comic_batch_break = pyqtSignal(float, int) # duration, comics_processed download_delay = pyqtSignal(float, int) # duration, remaining_downloads def __init__(self, username, password, start_page, end_page, scraping_mode=0, headless=True): """ Initialize the scraper thread. Args: username (str): EBoek.info username password (str): EBoek.info password start_page (int): Starting page number end_page (int): Ending page number scraping_mode (int): Scraping mode (0=All Comics, 1=Latest Comics) headless (bool): Whether to run Chrome in headless mode """ super().__init__() self.username = username self.password = password self.start_page = start_page self.end_page = end_page self.scraping_mode = scraping_mode self.headless = headless self.scraper = None self._is_running = False def run(self): """ Main thread execution method. This runs in the separate thread and should not be called directly. """ try: self._is_running = True # Initialize scraper with progress callback self.scraper = Scraper( headless=self.headless, progress_callback=self._handle_scraper_progress, scraping_mode=self.scraping_mode ) # Perform login self.login_started.emit(self.username) login_success = self.scraper.login(self.username, self.password) if not login_success: self.login_failed.emit(self.username, "Login failed. Please check your credentials.") return # Check if stop was requested during login if self.scraper._stop_requested: return # Start scraping summary = self.scraper.scrape(self.start_page, self.end_page) # Emit completion signal self.scraping_completed.emit(summary) except Exception as e: self.error_occurred.emit(f"Unexpected error: {str(e)}") finally: # Clean up if self.scraper: self.scraper.close() self._is_running = False def _handle_scraper_progress(self, event_type, data): """ Handle progress callbacks from the Scraper and convert them to PyQt signals. Args: event_type (str): Type of event from the scraper data (dict): Event data """ try: # Login events if event_type == "login_started": # Already handled in run() method pass elif event_type == "login_success": self.login_success.emit(data.get("username", "")) elif event_type == "login_failed": self.login_failed.emit(data.get("username", ""), data.get("error", "Unknown error")) # Scraping events elif event_type == "scraping_started": self.scraping_started.emit( data.get("start_page", 1), data.get("end_page", 1), data.get("total_pages", 1) ) elif event_type == "scraping_completed": self.scraping_completed.emit(data) # Page events elif event_type == "page_started": self.page_started.emit( data.get("page_number", 1), data.get("page_index", 1), data.get("total_pages", 1), data.get("url", "") ) elif event_type == "page_completed": self.page_completed.emit( data.get("page_number", 1), data.get("comics_processed", 0) ) elif event_type == "page_comics_found": self.page_comics_found.emit( data.get("page_number", 1), data.get("comic_count", 0) ) elif event_type == "page_error": self.page_error.emit( data.get("page_number", 1), data.get("error", "Unknown error") ) # Comic events elif event_type == "comic_started": self.comic_started.emit( data.get("page_number", 1), data.get("comic_index", 1), data.get("total_comics", 1), data.get("url", "") ) elif event_type == "comic_completed": self.comic_completed.emit( data.get("title", "Unknown"), data.get("downloads_triggered", 0), data.get("page_number", 1), data.get("comic_index", 1) ) elif event_type == "comic_title_extracted": self.comic_title_extracted.emit( data.get("title", "Unknown"), data.get("url", "") ) elif event_type == "comic_error": self.comic_error.emit( data.get("url", ""), data.get("error", "Unknown error") ) # Download events elif event_type == "download_links_found": self.download_links_found.emit( data.get("title", "Unknown"), data.get("download_count", 0) ) elif event_type == "download_started": self.download_started.emit( data.get("file_name", ""), data.get("url", ""), data.get("index", 1), data.get("total", 1) ) elif event_type == "download_triggered": self.download_triggered.emit(data.get("url", "")) elif event_type == "download_failed": self.download_failed.emit( data.get("url", ""), data.get("error", "Unknown error") ) # Navigation events elif event_type == "navigation_started": self.navigation_started.emit(data.get("url", "")) elif event_type == "navigation_completed": self.navigation_completed.emit(data.get("url", "")) # Timing and break events elif event_type == "delay_started": self.delay_started.emit(data.get("duration", 0.0)) elif event_type == "page_break_started": self.page_break_started.emit( data.get("duration", 0.0), data.get("page_number", 1) ) elif event_type == "short_break": self.short_break.emit( data.get("duration", 0.0), data.get("page_number", 1) ) elif event_type == "comic_batch_break": self.comic_batch_break.emit( data.get("duration", 0.0), data.get("comics_processed", 0) ) elif event_type == "download_delay": self.download_delay.emit( data.get("duration", 0.0), data.get("remaining", 0) ) # Control events elif event_type == "stop_requested": self.stop_requested.emit() # General status updates elif event_type in ["scraper_initialized", "scraper_closed", "scraper_close_error"]: self.status_update.emit(f"{event_type}: {data}") # Emit a general status update for events we didn't specifically handle else: self.status_update.emit(f"{event_type}: {data}") except Exception as e: # Don't let signal emission errors crash the scraper self.error_occurred.emit(f"Signal emission error: {str(e)}") def request_stop(self): """ Request the scraper to stop gracefully. This can be called from the main thread (GUI). """ if self.scraper: self.scraper.request_stop() def is_running(self): """ Check if the scraper thread is currently running. Returns: bool: True if the thread is running """ return self._is_running and self.isRunning() def get_progress_summary(self): """ Get a summary of the current progress. This is thread-safe and can be called from the main thread. Returns: dict: Current progress information """ if not self.scraper: return {"status": "not_started"} return { "status": "running" if self._is_running else "stopped", "stop_requested": self.scraper._stop_requested if self.scraper else False, "thread_running": self.isRunning() }