""" Progress dialog for real-time scraping progress monitoring. """ import time from PyQt5.QtWidgets import ( QDialog, QVBoxLayout, QHBoxLayout, QGridLayout, QPushButton, QLabel, QProgressBar, QTextEdit, QGroupBox ) from PyQt5.QtCore import Qt, QTimer, pyqtSignal from PyQt5.QtGui import QFont from pathlib import Path import sys # Add the project root directory to Python path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) class ProgressDialog(QDialog): """ Dialog for displaying real-time scraping progress. Shows progress bars for pages and comics, current activity status, and a detailed log of operations with the ability to cancel. """ # Signals cancel_requested = pyqtSignal() def __init__(self, parent=None, scraper_thread=None): super().__init__(parent) self.scraper_thread = scraper_thread self.start_time = time.time() # Progress tracking self.total_pages = 0 self.current_page = 0 self.total_comics_on_page = 0 self.current_comic = 0 self.total_comics_processed = 0 self.total_downloads_triggered = 0 # Enhanced time tracking for better estimation self.comic_start_times = [] # Track start time of each comic self.comic_durations = [] # Track how long each comic took self.estimated_total_comics = 0 # Estimated total comics across all pages self.last_comic_start = None self.pages_processed = 0 self.init_ui() self.connect_signals() def init_ui(self): """Initialize the user interface.""" self.setWindowTitle("Scraping Progress") self.setMinimumSize(500, 400) self.resize(600, 500) layout = QVBoxLayout(self) # Overall progress section self.create_overall_progress_section(layout) # Current activity section self.create_activity_section(layout) # Progress details section self.create_details_section(layout) # Log section self.create_log_section(layout) # Control buttons self.create_control_section(layout) def create_overall_progress_section(self, parent_layout): """Create the overall progress section.""" group = QGroupBox("Overall Progress") layout = QVBoxLayout(group) # Page progress self.page_progress_label = QLabel("Initializing...") layout.addWidget(self.page_progress_label) self.page_progress_bar = QProgressBar() self.page_progress_bar.setRange(0, 100) layout.addWidget(self.page_progress_bar) # Comic progress (current page) self.comic_progress_label = QLabel("Waiting for page data...") layout.addWidget(self.comic_progress_label) self.comic_progress_bar = QProgressBar() self.comic_progress_bar.setRange(0, 100) layout.addWidget(self.comic_progress_bar) parent_layout.addWidget(group) def create_activity_section(self, parent_layout): """Create the current activity section.""" group = QGroupBox("Current Activity") layout = QVBoxLayout(group) self.activity_label = QLabel("Starting scraper...") self.activity_label.setStyleSheet("font-weight: bold; color: #2E8B57;") layout.addWidget(self.activity_label) # Current item details self.current_item_label = QLabel("") layout.addWidget(self.current_item_label) parent_layout.addWidget(group) def create_details_section(self, parent_layout): """Create the progress details section.""" group = QGroupBox("Session Details") layout = QGridLayout(group) # Time information layout.addWidget(QLabel("Time Elapsed:"), 0, 0) self.elapsed_time_label = QLabel("00:00:00") layout.addWidget(self.elapsed_time_label, 0, 1) layout.addWidget(QLabel("Estimated Remaining:"), 0, 2) self.remaining_time_label = QLabel("Calculating...") layout.addWidget(self.remaining_time_label, 0, 3) # Progress statistics layout.addWidget(QLabel("Comics Processed:"), 1, 0) self.comics_processed_label = QLabel("0") layout.addWidget(self.comics_processed_label, 1, 1) layout.addWidget(QLabel("Downloads Triggered:"), 1, 2) self.downloads_triggered_label = QLabel("0") layout.addWidget(self.downloads_triggered_label, 1, 3) parent_layout.addWidget(group) # Start timer for elapsed time updates self.timer = QTimer() self.timer.timeout.connect(self.update_elapsed_time) self.timer.start(1000) # Update every second def create_log_section(self, parent_layout): """Create the log display section.""" group = QGroupBox("Activity Log") layout = QVBoxLayout(group) self.log_text = QTextEdit() self.log_text.setReadOnly(True) self.log_text.setMaximumHeight(150) # Set monospace font for logs (cross-platform) font = QFont() font.setFamily("Monaco, Consolas, 'Courier New', monospace") # Cross-platform fallback font.setPointSize(9) font.setStyleHint(QFont.TypeWriter) # Monospace hint self.log_text.setFont(font) layout.addWidget(self.log_text) parent_layout.addWidget(group) def create_control_section(self, parent_layout): """Create the control buttons section.""" layout = QHBoxLayout() layout.addStretch() self.cancel_btn = QPushButton("Cancel Operation") self.cancel_btn.clicked.connect(self.cancel_scraping) self.cancel_btn.setStyleSheet("QPushButton { background-color: #f44336; color: white; font-weight: bold; padding: 8px; }") layout.addWidget(self.cancel_btn) self.close_btn = QPushButton("Close") self.close_btn.clicked.connect(self.accept) self.close_btn.setEnabled(False) # Enabled when scraping completes layout.addWidget(self.close_btn) parent_layout.addLayout(layout) def connect_signals(self): """Connect signals from the scraper thread.""" if not self.scraper_thread: return # Login signals self.scraper_thread.login_started.connect(self.on_login_started) self.scraper_thread.login_success.connect(self.on_login_success) self.scraper_thread.login_failed.connect(self.on_login_failed) # Scraping progress self.scraper_thread.scraping_started.connect(self.on_scraping_started) self.scraper_thread.scraping_completed.connect(self.on_scraping_completed) # Page progress self.scraper_thread.page_started.connect(self.on_page_started) self.scraper_thread.page_completed.connect(self.on_page_completed) self.scraper_thread.page_comics_found.connect(self.on_page_comics_found) # Comic progress self.scraper_thread.comic_started.connect(self.on_comic_started) self.scraper_thread.comic_completed.connect(self.on_comic_completed) self.scraper_thread.comic_title_extracted.connect(self.on_comic_title_extracted) # Download progress self.scraper_thread.download_links_found.connect(self.on_download_links_found) self.scraper_thread.download_started.connect(self.on_download_started) self.scraper_thread.download_triggered.connect(self.on_download_triggered) # General status self.scraper_thread.status_update.connect(self.log_message) self.scraper_thread.error_occurred.connect(self.on_error_occurred) # Timing events self.scraper_thread.page_break_started.connect(self.on_break_started) self.scraper_thread.comic_batch_break.connect(self.on_break_started) def cancel_scraping(self): """Cancel the scraping operation.""" if self.scraper_thread: self.log_message("Cancel requested - stopping after current operation...") self.scraper_thread.request_stop() self.cancel_btn.setEnabled(False) self.activity_label.setText("Cancelling...") self.activity_label.setStyleSheet("font-weight: bold; color: #FF6B35;") def log_message(self, message): """Add a message to the log.""" import datetime timestamp = datetime.datetime.now().strftime("%H:%M:%S") formatted_message = f"[{timestamp}] {message}" self.log_text.append(formatted_message) # Auto-scroll to bottom scrollbar = self.log_text.verticalScrollBar() scrollbar.setValue(scrollbar.maximum()) def update_elapsed_time(self): """Update the elapsed time display with enhanced estimation.""" elapsed = int(time.time() - self.start_time) hours = elapsed // 3600 minutes = (elapsed % 3600) // 60 seconds = elapsed % 60 self.elapsed_time_label.setText(f"{hours:02d}:{minutes:02d}:{seconds:02d}") # Enhanced time estimation based on comic processing rate self.calculate_realistic_time_estimate(elapsed) def calculate_realistic_time_estimate(self, elapsed): """Calculate realistic time estimate based on comic processing data.""" try: # If we have comic duration data, use it for accurate estimation if len(self.comic_durations) >= 2 and self.estimated_total_comics > 0: # Calculate average time per comic from actual data avg_time_per_comic = sum(self.comic_durations) / len(self.comic_durations) comics_remaining = self.estimated_total_comics - self.total_comics_processed if comics_remaining > 0: estimated_remaining = int(comics_remaining * avg_time_per_comic) self.format_remaining_time(estimated_remaining) else: self.remaining_time_label.setText("Almost done!") # Comic-based estimation when we know total comics but don't have enough duration data elif self.estimated_total_comics > 0 and self.total_comics_processed > 0: # Use current processing rate avg_time_per_comic = elapsed / self.total_comics_processed comics_remaining = self.estimated_total_comics - self.total_comics_processed if comics_remaining > 0: estimated_remaining = int(comics_remaining * avg_time_per_comic) self.format_remaining_time(estimated_remaining) else: self.remaining_time_label.setText("Almost done!") # Fallback to combined page + comic estimation elif self.total_pages > 0 and self.total_comics_on_page > 0: # Calculate combined progress: pages completed + current page comic progress pages_completed = self.current_page - 1 current_page_progress = self.current_comic / self.total_comics_on_page total_progress = (pages_completed + current_page_progress) / self.total_pages if total_progress > 0.05: # Only estimate after 5% progress estimated_total = elapsed / total_progress remaining = int(estimated_total - elapsed) if remaining > 0: self.format_remaining_time(remaining) else: self.remaining_time_label.setText("Almost done!") else: self.remaining_time_label.setText("Calculating...") else: self.remaining_time_label.setText("Calculating...") except (ZeroDivisionError, ValueError): self.remaining_time_label.setText("Calculating...") def format_remaining_time(self, remaining_seconds): """Format remaining time into readable format.""" if remaining_seconds <= 0: self.remaining_time_label.setText("Almost done!") return rem_hours = remaining_seconds // 3600 rem_minutes = (remaining_seconds % 3600) // 60 rem_secs = remaining_seconds % 60 # Show different formats based on duration if rem_hours > 0: self.remaining_time_label.setText(f"{rem_hours:02d}:{rem_minutes:02d}:{rem_secs:02d}") elif rem_minutes > 0: self.remaining_time_label.setText(f"{rem_minutes:02d}:{rem_secs:02d}") else: self.remaining_time_label.setText(f"{rem_secs} sec") def update_progress_bars(self): """Update progress bars based on current state.""" # Page progress if self.total_pages > 0: page_progress = int((self.current_page / self.total_pages) * 100) self.page_progress_bar.setValue(page_progress) self.page_progress_label.setText(f"Page {self.current_page} of {self.total_pages} ({page_progress}%)") # Comic progress if self.total_comics_on_page > 0: comic_progress = int((self.current_comic / self.total_comics_on_page) * 100) self.comic_progress_bar.setValue(comic_progress) self.comic_progress_label.setText(f"Comic {self.current_comic} of {self.total_comics_on_page} ({comic_progress}%)") def update_statistics(self): """Update the statistics display.""" self.comics_processed_label.setText(str(self.total_comics_processed)) self.downloads_triggered_label.setText(str(self.total_downloads_triggered)) # Event handlers def on_login_started(self, username): """Handle login started.""" self.activity_label.setText(f"Logging in as {username}...") self.log_message(f"Logging in as {username}") def on_login_success(self, username): """Handle successful login.""" self.activity_label.setText("Login successful - starting scraper...") self.log_message(f"Login successful for {username}") def on_login_failed(self, username, error): """Handle failed login.""" self.activity_label.setText("Login failed") self.activity_label.setStyleSheet("font-weight: bold; color: #f44336;") self.log_message(f"Login failed: {error}") def on_scraping_started(self, start_page, end_page, total_pages): """Handle scraping start.""" self.total_pages = total_pages self.current_page = 0 self.activity_label.setText(f"Starting scraping: pages {start_page} to {end_page}") self.log_message(f"Starting scraping: pages {start_page} to {end_page}") self.update_progress_bars() def on_scraping_completed(self, summary): """Handle scraping completion.""" self.cancel_btn.setEnabled(False) self.close_btn.setEnabled(True) self.timer.stop() if summary.get('cancelled'): self.activity_label.setText("Scraping cancelled by user") self.activity_label.setStyleSheet("font-weight: bold; color: #FF6B35;") elif summary.get('success'): self.activity_label.setText("Scraping completed successfully!") self.activity_label.setStyleSheet("font-weight: bold; color: #2E8B57;") else: self.activity_label.setText("Scraping completed with errors") self.activity_label.setStyleSheet("font-weight: bold; color: #f44336;") # Update final statistics self.total_comics_processed = summary.get('total_comics_processed', 0) self.total_downloads_triggered = summary.get('total_downloads_triggered', 0) self.update_statistics() self.log_message("Scraping operation completed") def on_page_started(self, page_number, page_index, total_pages, url): """Handle page start.""" self.current_page = page_index self.current_comic = 0 self.total_comics_on_page = 0 self.activity_label.setText(f"Processing page {page_number}...") self.current_item_label.setText(f"URL: {url}") self.update_progress_bars() self.log_message(f"Started processing page {page_number}") def on_page_completed(self, page_number, comics_processed): """Handle page completion.""" self.pages_processed = self.current_page self.log_message(f"Completed page {page_number} - {comics_processed} comics processed") def on_page_comics_found(self, page_number, comic_count): """Handle comics found on page.""" self.total_comics_on_page = comic_count self.current_comic = 0 # Update estimated total comics based on current data if self.total_pages > 0 and self.current_page > 0: avg_comics_per_page = (self.total_comics_processed + comic_count) / self.current_page self.estimated_total_comics = int(avg_comics_per_page * self.total_pages) self.log_message(f"Found {comic_count} comics on page {page_number}") self.update_progress_bars() def on_comic_started(self, page_number, comic_index, total_comics, url): """Handle comic start.""" self.current_comic = comic_index self.last_comic_start = time.time() # Track start time for duration calculation self.activity_label.setText(f"Processing comic {comic_index} of {total_comics}...") self.current_item_label.setText(f"URL: {url}") self.update_progress_bars() def on_comic_completed(self, title, downloads_triggered, page_number, comic_index): """Handle comic completion.""" # Track timing data for enhanced estimation if self.last_comic_start is not None: comic_duration = time.time() - self.last_comic_start self.comic_durations.append(comic_duration) # Keep only recent durations for adaptive estimation (last 20 comics) if len(self.comic_durations) > 20: self.comic_durations = self.comic_durations[-20:] # Update live counters self.total_comics_processed += 1 # Note: downloads_triggered counter is now updated in real-time in on_download_triggered self.update_statistics() # This updates the live display self.log_message(f"Completed: {title} ({downloads_triggered} downloads)") def on_comic_title_extracted(self, title, url): """Handle comic title extraction.""" self.current_item_label.setText(f"Processing: {title}") def on_download_links_found(self, title, download_count): """Handle download links found.""" self.log_message(f"Found {download_count} download links for: {title}") def on_download_started(self, file_name, url, index, total): """Handle download start.""" self.activity_label.setText(f"Downloading file {index} of {total}") self.current_item_label.setText(f"File: {file_name}") def on_download_triggered(self, url): """Handle download triggered.""" # Update download counter in real-time self.total_downloads_triggered += 1 self.update_statistics() def on_error_occurred(self, error_message): """Handle error.""" self.log_message(f"ERROR: {error_message}") def on_break_started(self, duration, context=None): """Handle break start.""" self.activity_label.setText(f"Taking a break for {duration:.1f} seconds...") self.current_item_label.setText("Human-like delay in progress...") def closeEvent(self, event): """Handle dialog close.""" if self.scraper_thread and self.scraper_thread.isRunning(): # Don't allow closing while scraping is active event.ignore() else: # Stop timer if hasattr(self, 'timer'): self.timer.stop() event.accept()