Enhance scraper functionality: update README for installation, add SSL handling in main.py, implement credential storage, and create start.bat for easy execution

This commit is contained in:
Louis Mylle
2025-09-10 22:20:09 +02:00
parent 04e1df15ea
commit 5f2fca226b
4 changed files with 67 additions and 5 deletions

View File

@@ -28,7 +28,7 @@ De ChromeDriver wordt automatisch geïnstalleerd bij het eerste gebruik. Zorg da
Open Command Prompt in de projectmap en voer uit:
```
pip install selenium requests
python -m pip install selenium requests urllib3
```
### Stap 4: Login gegevens instellen

64
main.py
View File

@@ -4,6 +4,11 @@ from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
import random
import os
# Disable SSL verification warnings and errors
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class Scraper:
def __init__(self, headless=False):
@@ -11,12 +16,30 @@ class Scraper:
if headless:
chrome_options.add_argument('--headless')
# Fix SSL and certificate issues
chrome_options.add_argument('--ignore-ssl-errors')
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--disable-web-security')
chrome_options.add_argument('--allow-running-insecure-content')
chrome_options.add_argument('--disable-extensions')
# Fix DevTools connection issues
chrome_options.add_argument('--remote-debugging-port=0')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--no-sandbox')
# Make it look more human
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
# Suppress logging
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-logging')
chrome_options.add_argument('--log-level=3')
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
@@ -157,9 +180,12 @@ class Scraper:
print(f"Triggering download: {file_name}")
self.trigger_download(file_url)
# Small random delay between downloads
# Human-like delay between downloads
if j < len(download_links) - 1:
self.human_delay(0.5, 1.5)
# Longer delay for multiple downloads (human would wait and check)
delay_time = random.uniform(2, 5)
print(f"Waiting {delay_time:.1f} seconds before next download...")
time.sleep(delay_time)
# Take a longer break every 5 comics
if i % 5 == 0 and i < len(comic_urls):
@@ -182,11 +208,43 @@ class Scraper:
def close(self):
self.driver.quit()
def update_credentials_in_file(username, password):
"""Update the credentials in this file for future use"""
# Read the current file
with open(__file__, 'r', encoding='utf-8') as f:
content = f.read()
# Replace the placeholder credentials
content = content.replace('scraper.login("xxx", "yyy")',
f'scraper.login("{username}", "{password}")')
# Write back to file
with open(__file__, 'w', encoding='utf-8') as f:
f.write(content)
print("Credentials saved for future use!")
if __name__ == "__main__":
# Check if credentials need to be set
username = "jouw_gebruikersnaam"
password = "jouw_wachtwoord"
if username == "jouw_gebruikersnaam" or password == "jouw_wachtwoord":
print("First time setup: Please enter your EBoek.info credentials")
new_username = input("Username: ")
new_password = input("Password: ")
# Update the file with new credentials
update_credentials_in_file(new_username, new_password)
# Use the new credentials
username = new_username
password = new_password
scraper = Scraper()
# Login first
scraper.login("jouw_gebruikersnaam", "jouw_wachtwoord")
scraper.login(username, password)
# Ask which page(s) to scrape
start = int(input("Enter start page number (1 for first page): "))

View File

@@ -1 +1,2 @@
selenium
selenium
urllib3

3
start.bat Normal file
View File

@@ -0,0 +1,3 @@
@echo off
cd /d %~dp0
start cmd /k "python main.py"