commit 861ab98af2f993f8c350f20c612f7dc892286f4f
Author: Louis Mylle <louis@optimize-it.be>
Date:   Wed Sep 10 21:29:59 2025 +0200

    Add web scraper implementation using Selenium

diff --git a/main.py b/main.py
new file mode 100644
index 0000000..12aee1a
--- /dev/null
+++ b/main.py
@@ -0,0 +1,200 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.options import Options
+import time
+import random
+
+class Scraper:
+    def __init__(self, headless=False):
+        chrome_options = Options()
+        if headless:
+            chrome_options.add_argument('--headless')
+        
+        # Make it look more human
+        chrome_options.add_argument('--disable-blink-features=AutomationControlled')
+        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
+        chrome_options.add_experimental_option('useAutomationExtension', False)
+        chrome_options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
+        
+        self.driver = webdriver.Chrome(options=chrome_options)
+        self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
+    
+    def human_delay(self, min_sec=0.5, max_sec=2):
+        time.sleep(random.uniform(min_sec, max_sec))
+    
+    def human_type(self, element, text):
+        for char in text:
+            element.send_keys(char)
+            time.sleep(random.uniform(0.05, 0.15))
+    
+    def navigate(self, url):
+        self.driver.get(url)
+        self.human_delay(1, 3)
+    
+    def login(self, username, password):
+        self.driver.get("https://eboek.info/komerin")
+        self.human_delay(2, 4)
+        
+        # Direct selectors based on what worked
+        username_field = self.driver.find_element(By.CSS_SELECTOR, "input[type='text']")
+        self.human_type(username_field, username)
+        
+        self.human_delay(0.5, 1)
+        
+        password_field = self.driver.find_element(By.CSS_SELECTOR, "input[type='password']")
+        self.human_type(password_field, password)
+        
+        self.human_delay(0.5, 1.5)
+        
+        submit_button = self.driver.find_element(By.CSS_SELECTOR, "input[type='submit']")
+        submit_button.click()
+        
+        self.human_delay(2, 4)
+    
+    def trigger_download(self, url):
+        """Open URL in new tab to trigger browser download"""
+        # Store current window handle
+        current_window = self.driver.current_window_handle
+        
+        # Use JavaScript to open URL in new tab with same session
+        self.driver.execute_script(f"window.open('{url}', '_blank');")
+        
+        # Wait for download to complete and tab to auto-close
+        self.human_delay(3, 5)
+        
+        # Switch back to original window
+        self.driver.switch_to.window(current_window)
+        
+        print(f"Download triggered for: {url}")
+    
+    def scrape(self, start_page=1, end_page=1):
+        """Scrape comics from specified page range"""
+        base_url = "https://eboek.info/stripverhalen-alle"
+        
+        for page_num in range(start_page, end_page + 1):
+            # Construct page URL
+            if page_num == 1:
+                page_url = base_url
+            else:
+                page_url = f"{base_url}/page/{page_num}/"
+            
+            print(f"\n{'='*50}")
+            print(f"Processing page {page_num}: {page_url}")
+            print(f"{'='*50}")
+            
+            # Navigate to the page
+            self.navigate(page_url)
+            
+            # Scroll down a bit like a human would to see content
+            self.driver.execute_script("window.scrollTo(0, 300)")
+            self.human_delay(1, 2)
+            
+            # Find all comic strip links
+            comic_links = self.driver.find_elements(By.CSS_SELECTOR, 'h2.post-title a')
+            
+            print(f"Found {len(comic_links)} comic strips on page {page_num}")
+            
+            # Store URLs first to avoid stale element issues
+            comic_urls = [link.get_attribute('href') for link in comic_links]
+            
+            # Take a break between pages (more likely and longer)
+            if page_num > start_page:
+                if random.random() < 0.7:  # 70% chance of break
+                    break_time = random.uniform(15, 45)  # 15-45 seconds
+                    print(f"\nTaking a break between pages for {break_time:.1f} seconds...")
+                    time.sleep(break_time)
+                else:
+                    # Even if no long break, always pause a bit
+                    short_break = random.uniform(5, 10)
+                    print(f"\nQuick pause for {short_break:.1f} seconds...")
+                    time.sleep(short_break)
+            
+            # Process all comics on this page
+            for i, url in enumerate(comic_urls, 1):
+                print(f"\nProcessing comic {i}/{len(comic_urls)} on page {page_num}: {url}")
+                
+                # Random chance to scroll on main page before clicking
+                if random.random() < 0.4:
+                    scroll_amount = random.randint(100, 500)
+                    self.driver.execute_script(f"window.scrollBy(0, {scroll_amount})")
+                    self.human_delay(0.5, 1.5)
+                
+                # Open in new tab to keep main page
+                self.driver.execute_script("window.open('');")
+                self.driver.switch_to.window(self.driver.window_handles[-1])
+                
+                try:
+                    self.driver.get(url)
+                    self.human_delay(2, 4)
+                    
+                    # Sometimes scroll down to see the content
+                    if random.random() < 0.6:
+                        self.driver.execute_script("window.scrollTo(0, 400)")
+                        self.human_delay(0.5, 1.5)
+                    
+                    # Extract title
+                    title = self.driver.find_element(By.CSS_SELECTOR, 'h1.entry-title').text
+                    print(f"Title: {title}")
+                    
+                    # Small delay before clicking download
+                    self.human_delay(0.8, 2)
+                    
+                    # Execute the downloadLinks() JavaScript function
+                    self.driver.execute_script("downloadLinks()")
+                    self.human_delay(1.5, 3)
+                    
+                    # Find all download links in the table
+                    download_links = self.driver.find_elements(By.CSS_SELECTOR, 'table a')
+                    
+                    print(f"Found {len(download_links)} download links")
+                    
+                    # Trigger download for each file
+                    for j, link in enumerate(download_links):
+                        file_url = link.get_attribute('href')
+                        file_name = link.text.strip()
+                        
+                        print(f"Triggering download: {file_name}")
+                        self.trigger_download(file_url)
+                        
+                        # Small random delay between downloads
+                        if j < len(download_links) - 1:
+                            self.human_delay(0.5, 1.5)
+                    
+                    # Take a longer break every 5 comics
+                    if i % 5 == 0 and i < len(comic_urls):
+                        break_time = random.uniform(3, 7)
+                        print(f"\nTaking a break for {break_time:.1f} seconds...")
+                        time.sleep(break_time)
+                    
+                except Exception as e:
+                    print(f"Error processing {url}: {e}")
+                    # Human would pause after an error
+                    self.human_delay(2, 4)
+                
+                # Close tab and switch back
+                self.driver.close()
+                self.driver.switch_to.window(self.driver.window_handles[0])
+                
+                # Vary the delay between comics
+                self.human_delay(1, 3)
+    
+    def close(self):
+        self.driver.quit()
+
+if __name__ == "__main__":
+    scraper = Scraper()
+    
+    # Login first
+    scraper.login("BorstBollen", "123456")
+    
+    # Ask which page(s) to scrape
+    start = int(input("Enter start page number (1 for first page): "))
+    end = int(input("Enter end page number (same as start for single page): "))
+    
+    # Scrape the specified pages
+    scraper.scrape(start_page=start, end_page=end)
+    
+    # Keep browser open
+    input("\nDone! Press Enter to close the browser...")
+    scraper.close()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..954f0db
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+selenium
\ No newline at end of file