#!/usr/bin/env python3 """ Special Pricing Debug Tool for UK Price Tracker This tool helps debug and monitor special pricing detection on real websites. It can be used to test URLs and see exactly what pricing information is being detected. """ import sys import os import asyncio import logging import argparse from typing import Dict, Any # Add the src directory to the path sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) from uk_scraper import UKCateringScraper from config import Config # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def detect_site_from_url(url: str) -> str: """Detect which site the URL belongs to.""" if 'jjfoodservice.com' in url: return 'jjfoodservice' elif 'atoz-catering.co.uk' in url: return 'atoz_catering' elif 'amazon.co.uk' in url: return 'amazon_uk' else: return 'unknown' async def debug_url_pricing(url: str, verbose: bool = False): """Debug pricing extraction for a specific URL.""" config = Config() scraper = UKCateringScraper(config) site_name = detect_site_from_url(url) print(f"Debugging URL: {url}") print(f"Detected site: {site_name}") print("-" * 60) if site_name == 'unknown': print("āŒ Unknown site - cannot process") return try: # Fetch the page content print("🌐 Fetching page content...") html_content = await scraper._fetch_page(url) if not html_content: print("āŒ Failed to fetch page content") return print("āœ… Page content fetched successfully") # Parse with BeautifulSoup from bs4 import BeautifulSoup soup = BeautifulSoup(html_content, 'html.parser') # Debug special pricing detection print("\nšŸ” Looking for special offer prices...") special_prices = scraper._find_special_offer_prices(soup, site_name) if special_prices: print(f"āœ… Found {len(special_prices)} special offer prices:") for price, selector in special_prices: print(f" Ā£{price} (found with: {selector})") best_special_price = min(price for price, _ in special_prices) print(f"šŸŽÆ Best special offer price: Ā£{best_special_price}") else: print("āŒ No special offer prices found") # Test the main extraction method print(f"\nšŸ” Testing {site_name} extraction method...") if site_name == 'jjfoodservice': result = scraper._extract_jjfoodservice_data(soup) elif site_name == 'atoz_catering': result = scraper._extract_atoz_catering_data(soup) elif site_name == 'amazon_uk': result = scraper._extract_amazon_uk_data(soup) print(f"āœ… Extraction result:") print(f" Price: Ā£{result['price']}" if result['price'] else " Price: Not found") print(f" Title: {result.get('title', 'Not found')}") print(f" Available: {result.get('availability', 'Unknown')}") print(f" Currency: {result.get('currency', 'Unknown')}") # If verbose, show more debugging info if verbose: print(f"\nšŸ” Verbose debugging for {site_name}...") # Get site selectors from config site_config = config.get_site_config(site_name) if site_config and 'selectors' in site_config: selectors = site_config['selectors'] # Test each selector type for selector_type, selector_list in selectors.items(): print(f"\n Testing {selector_type} selectors:") for selector in selector_list: try: elements = soup.select(selector) if elements: print(f" āœ… {selector} -> Found {len(elements)} elements") for i, elem in enumerate(elements[:3]): # Show first 3 text = elem.get_text(strip=True)[:100] # Truncate long text print(f" [{i+1}] {text}") else: print(f" āŒ {selector} -> No elements found") except Exception as e: print(f" āš ļø {selector} -> Error: {e}") # Test the full scraping method print(f"\nšŸ” Testing full scrape_product_price method...") full_result = await scraper.scrape_product_price(url, site_name) print("āœ… Full scraping result:") print(f" Success: {full_result['success']}") print(f" Price: Ā£{full_result['price']}" if full_result['price'] else " Price: Not found") print(f" Error: {full_result.get('error', 'None')}") except Exception as e: print(f"āŒ Error during debugging: {e}") if verbose: import traceback traceback.print_exc() def main(): """Main function to run the debug tool.""" parser = argparse.ArgumentParser(description='Debug special pricing detection for UK price tracker') parser.add_argument('url', help='URL to debug') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output') parser.add_argument('--test-selectors', action='store_true', help='Test all selectors from config') args = parser.parse_args() print("UK Price Tracker - Special Pricing Debug Tool") print("=" * 60) # Run the debugging asyncio.run(debug_url_pricing(args.url, args.verbose)) if __name__ == "__main__": main()