""" Input validation utilities for the EBoek.info scraper GUI. """ import re from urllib.parse import urlparse def validate_page_range(start_page, end_page): """ Validate page range input. Args: start_page (int or str): Starting page number end_page (int or str): Ending page number Returns: dict: Validation result with 'valid' bool, 'errors' list, and cleaned values """ errors = [] cleaned_start = None cleaned_end = None # Convert to integers try: cleaned_start = int(start_page) except (ValueError, TypeError): errors.append("Start page must be a valid number") try: cleaned_end = int(end_page) except (ValueError, TypeError): errors.append("End page must be a valid number") # Validate range if both are valid numbers if cleaned_start is not None and cleaned_end is not None: if cleaned_start < 1: errors.append("Start page must be 1 or greater") if cleaned_end < 1: errors.append("End page must be 1 or greater") if cleaned_start > cleaned_end: errors.append("Start page cannot be greater than end page") if cleaned_end - cleaned_start > 100: errors.append("Page range too large (maximum 100 pages at once)") return { 'valid': len(errors) == 0, 'errors': errors, 'start_page': cleaned_start, 'end_page': cleaned_end } def validate_username(username): """ Validate EBoek.info username. Args: username (str): Username to validate Returns: dict: Validation result with 'valid' bool and 'errors' list """ errors = [] if not username: errors.append("Username is required") else: username = username.strip() if len(username) < 2: errors.append("Username must be at least 2 characters long") elif len(username) > 50: errors.append("Username is too long (maximum 50 characters)") elif not re.match(r'^[a-zA-Z0-9_.-]+$', username): errors.append("Username contains invalid characters (use only letters, numbers, _, ., -)") return { 'valid': len(errors) == 0, 'errors': errors, 'username': username.strip() if username else "" } def validate_password(password): """ Validate EBoek.info password. Args: password (str): Password to validate Returns: dict: Validation result with 'valid' bool and 'errors' list """ errors = [] if not password: errors.append("Password is required") else: if len(password) < 3: errors.append("Password must be at least 3 characters long") elif len(password) > 128: errors.append("Password is too long (maximum 128 characters)") return { 'valid': len(errors) == 0, 'errors': errors } def validate_url(url): """ Validate URL format. Args: url (str): URL to validate Returns: dict: Validation result with 'valid' bool and 'errors' list """ errors = [] if not url: errors.append("URL is required") else: try: parsed = urlparse(url) if not parsed.scheme: errors.append("URL must include protocol (http:// or https://)") elif parsed.scheme not in ['http', 'https']: errors.append("URL must use http:// or https://") if not parsed.netloc: errors.append("URL must include domain name") except Exception: errors.append("Invalid URL format") return { 'valid': len(errors) == 0, 'errors': errors, 'url': url.strip() if url else "" } def validate_file_path(file_path): """ Validate file path format. Args: file_path (str): File path to validate Returns: dict: Validation result with 'valid' bool and 'errors' list """ errors = [] if not file_path: errors.append("File path is required") else: file_path = file_path.strip() # Basic path validation - more specific validation would depend on OS invalid_chars = ['<', '>', '|', '"', '*', '?'] for char in invalid_chars: if char in file_path: errors.append(f"File path contains invalid character: {char}") break if len(file_path) > 255: errors.append("File path is too long (maximum 255 characters)") return { 'valid': len(errors) == 0, 'errors': errors, 'path': file_path.strip() if file_path else "" } def sanitize_filename(filename): """ Sanitize a filename for safe storage. Args: filename (str): Original filename Returns: str: Sanitized filename safe for most file systems """ if not filename: return "download" # Replace invalid characters with underscores invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*'] sanitized = filename for char in invalid_chars: sanitized = sanitized.replace(char, '_') # Remove leading/trailing whitespace and dots sanitized = sanitized.strip(' .') # Ensure it's not empty and not too long if not sanitized: sanitized = "download" elif len(sanitized) > 200: sanitized = sanitized[:200] return sanitized def validate_settings(settings): """ Validate application settings dictionary. Args: settings (dict): Settings to validate Returns: dict: Validation result with 'valid' bool, 'errors' list, and cleaned settings """ errors = [] cleaned_settings = {} # Validate headless_mode if 'headless_mode' in settings: if isinstance(settings['headless_mode'], bool): cleaned_settings['headless_mode'] = settings['headless_mode'] else: errors.append("Headless mode must be true or false") # Validate verbose_logging if 'verbose_logging' in settings: if isinstance(settings['verbose_logging'], bool): cleaned_settings['verbose_logging'] = settings['verbose_logging'] else: errors.append("Verbose logging must be true or false") # Validate auto_save_credentials if 'auto_save_credentials' in settings: if isinstance(settings['auto_save_credentials'], bool): cleaned_settings['auto_save_credentials'] = settings['auto_save_credentials'] else: errors.append("Auto save credentials must be true or false") # Validate download_path if 'download_path' in settings: path_validation = validate_file_path(settings['download_path']) if path_validation['valid']: cleaned_settings['download_path'] = path_validation['path'] else: errors.extend(path_validation['errors']) # Validate default pages for page_key in ['default_start_page', 'default_end_page']: if page_key in settings: try: page_num = int(settings[page_key]) if page_num < 1: errors.append(f"{page_key.replace('_', ' ').title()} must be 1 or greater") else: cleaned_settings[page_key] = page_num except (ValueError, TypeError): errors.append(f"{page_key.replace('_', ' ').title()} must be a valid number") return { 'valid': len(errors) == 0, 'errors': errors, 'settings': cleaned_settings } def format_error_message(errors): """ Format a list of error messages into a user-friendly string. Args: errors (list): List of error messages Returns: str: Formatted error message """ if not errors: return "" elif len(errors) == 1: return errors[0] else: return "Multiple errors:\n• " + "\n• ".join(errors) def is_safe_string(text, max_length=1000): """ Check if a string is safe for display/storage (no dangerous content). Args: text (str): Text to check max_length (int): Maximum allowed length Returns: bool: True if string is safe, False otherwise """ if not isinstance(text, str): return False if len(text) > max_length: return False # Check for potential script injection or dangerous content dangerous_patterns = [ '